Index: trunk/extensions/BotQuery/query.php |
— | — | @@ -75,35 +75,15 @@ |
76 | 76 | define( 'NS_ALL_NAMESPACES', -10123 ); |
77 | 77 | |
78 | 78 | |
79 | | -// Possible way to implement login functionality |
80 | | -//$action = $wgRequest->getVal('action', 'query'); |
81 | | -//switch( $action ) { |
82 | | -// case 'query': |
83 | | - $bqp = new BotQueryProcessor( $startTime ); |
84 | | - $bqp->execute(); |
85 | | - $bqp->output(); |
86 | | -// break; |
87 | | -// case 'login': |
88 | | -// $blp = new BotLoginProcessor(); |
89 | | -// $blp->execute(); |
90 | | -// break; |
91 | | -//} |
| 79 | +$bqp = new BotQueryProcessor( $startTime ); |
| 80 | +$bqp->execute(); |
| 81 | +$bqp->output(); |
92 | 82 | |
93 | 83 | wfProfileOut( 'query.php' ); |
94 | 84 | if ( function_exists( 'wfLogProfilingData' ) ) { |
95 | 85 | wfLogProfilingData(); |
96 | 86 | } |
97 | 87 | |
98 | | -//class BotLoginProcessor { |
99 | | -// function BotLoginProcessor() |
100 | | -// {} |
101 | | -// |
102 | | -// function execute() |
103 | | -// { |
104 | | -// wfSetupSession(); |
105 | | -// } |
106 | | -//} |
107 | | - |
108 | 88 | class BotQueryProcessor { |
109 | 89 | var $classname = 'BotQueryProcessor'; |
110 | 90 | |
— | — | @@ -502,15 +482,18 @@ |
503 | 483 | "ucrbtoken - If logged in as an admin, a rollback tokens for top revisions will be included in the output.", |
504 | 484 | "Example: query.php?what=usercontribs&titles=User:YurikBot&uclimit=20&uccomments", |
505 | 485 | )), |
506 | | - 'contribcounter' => array( |
507 | | - GN_FUNC => 'genContributionsCounter', |
508 | | - GN_ISMETA => false, |
509 | | - GN_PARAMS => array(), |
510 | | - GN_DFLT => array(), |
511 | | - GN_DESC => array( |
512 | | - "User contributions counter", |
513 | | - "Example: query.php?what=contribcounter&titles=User:Yurik", |
514 | | - )), |
| 486 | + |
| 487 | + // Extremelly slow query!!! |
| 488 | + |
| 489 | +// 'contribcounter' => array( |
| 490 | +// GN_FUNC => 'genContributionsCounter', |
| 491 | +// GN_ISMETA => false, |
| 492 | +// GN_PARAMS => array(), |
| 493 | +// GN_DFLT => array(), |
| 494 | +// GN_DESC => array( |
| 495 | +// "User contributions counter", |
| 496 | +// "Example: query.php?what=contribcounter&titles=User:Yurik", |
| 497 | +// )), |
515 | 498 | 'imageinfo' => array( |
516 | 499 | GN_FUNC => 'genImageInfo', |
517 | 500 | GN_ISMETA => false, |
— | — | @@ -544,6 +527,9 @@ |
545 | 528 | { |
546 | 529 | global $wgRequest, $wgUser; |
547 | 530 | |
| 531 | + // Initialize Error handler |
| 532 | + set_exception_handler( array($this, 'ExceptionHandler') ); |
| 533 | + |
548 | 534 | $this->startTime = $startTime; |
549 | 535 | $this->totalDbTime = 0; |
550 | 536 | |
— | — | @@ -581,6 +567,40 @@ |
582 | 568 | } |
583 | 569 | |
584 | 570 | /** |
| 571 | + * Exception handler which simulates the appropriate catch() handling: |
| 572 | + * |
| 573 | + * try { |
| 574 | + * ... |
| 575 | + * } catch ( MWException $e ) { |
| 576 | + * dieUsage() |
| 577 | + * } catch ( Exception $e ) { |
| 578 | + * echo $e->__toString(); |
| 579 | + * } |
| 580 | + */ |
| 581 | + function ExceptionHandler( $e ) { |
| 582 | + global $wgFullyInitialised; |
| 583 | + if ( is_a( $e, 'MWException' ) ) { |
| 584 | + try { |
| 585 | + $this->dieUsage( "Exception Caught: {$e->getMessage()}\n\n{$e->getTraceAsString()}\n\n", 'internal_error' ); |
| 586 | + } catch (Exception $e2) { |
| 587 | + echo $e->__toString(); |
| 588 | + } |
| 589 | + } else { |
| 590 | + echo $e->__toString(); |
| 591 | + } |
| 592 | + |
| 593 | + // Final cleanup, similar to wfErrorExit() |
| 594 | + if ( $wgFullyInitialised ) { |
| 595 | + try { |
| 596 | + wfLogProfilingData(); // uses $wgRequest, hence the $wgFullyInitialised condition |
| 597 | + } catch ( Exception $e ) {} |
| 598 | + } |
| 599 | + |
| 600 | + // Exit value should be nonzero for the benefit of shell jobs |
| 601 | + exit( 1 ); |
| 602 | + } |
| 603 | + |
| 604 | + /** |
585 | 605 | * The core function - executes meta generators, populates basic page info, and then fills in the required additional data for all pages |
586 | 606 | */ |
587 | 607 | function execute() |
— | — | @@ -1141,16 +1161,82 @@ |
1142 | 1162 | // |
1143 | 1163 | // ************************************* PAGE INFO GENERATORS ************************************* |
1144 | 1164 | // |
1145 | | - |
| 1165 | + |
1146 | 1166 | /** |
1147 | | - * Populate redirect data. Redirects may be one of the following: |
1148 | | - * Redir to nonexisting, Existing page, or Existing redirect. |
1149 | | - * Existing redirect may point to yet another nonexisting or existing page( which in turn may also be a redirect) |
1150 | | - */ |
| 1167 | + * Simpler replacement for double-redirect resolving genRedirectInfo2() |
| 1168 | + */ |
1151 | 1169 | function genRedirectInfo(&$prop, &$genInfo) |
1152 | 1170 | { |
1153 | 1171 | if( empty( $this->redirectPageIds )) return; |
1154 | 1172 | $this->startProfiling(); |
| 1173 | + |
| 1174 | + $this->startDbProfiling(); |
| 1175 | + $res = $this->db->select( |
| 1176 | + array('page', 'pagelinks'), |
| 1177 | + array('pl_from', 'pl_namespace', 'pl_title', 'page_id', 'page_is_redirect'), |
| 1178 | + array('pl_from' => $this->redirectPageIds, |
| 1179 | + 'pl_namespace = page_namespace', |
| 1180 | + 'pl_title = page_title'), |
| 1181 | + __CLASS__ . '::' . __FUNCTION__ ); |
| 1182 | + $this->endDbProfiling( $prop ); |
| 1183 | + |
| 1184 | + $multiLinkRedirPages = array(); |
| 1185 | + |
| 1186 | + while ( $row = $this->db->fetchObject( $res ) ) { |
| 1187 | + $pageId = intval($row->pl_from); |
| 1188 | + $data = & $this->data['pages'][$pageId]['redirect']; |
| 1189 | + if ( !isset($data['to']) ) { |
| 1190 | + $data['to'] = $this->getLinkInfo( $row->pl_namespace, $row->pl_title, $row->page_id, $row->page_is_redirect ); |
| 1191 | + } else { |
| 1192 | + // More than one link exists from redirect page |
| 1193 | + $multiLinkRedirPages[$pageId] = ''; |
| 1194 | + } |
| 1195 | + } |
| 1196 | + $this->db->freeResult( $res ); |
| 1197 | + |
| 1198 | + if (!empty($multiLinkRedirPages)) { |
| 1199 | + // We found some bad redirect pages. Get the content and solve. |
| 1200 | + $multiLinkRedirPages = array_keys($multiLinkRedirPages); |
| 1201 | + $ids = array(); |
| 1202 | + foreach( $multiLinkRedirPages as $pageId ) { |
| 1203 | + $ids[] = "(rev_page=$pageId AND rev_id={$this->data['pages'][$pageId]['revid']})"; |
| 1204 | + } |
| 1205 | + |
| 1206 | + $this->startDbProfiling(); |
| 1207 | + $res = $this->db->select( |
| 1208 | + array('page', 'revision', 'text'), |
| 1209 | + array('page_id', 'page_is_redirect', 'old_id', 'old_text', 'old_flags'), |
| 1210 | + array('page_id' => $multiLinkRedirPages, 'page_latest=rev_id', 'rev_text_id=old_id' ), |
| 1211 | + $this->classname . '::genPageContent' |
| 1212 | + ); |
| 1213 | + while ( $row = $this->db->fetchObject( $res ) ) { |
| 1214 | + $title = Title :: newFromRedirect(Revision::getRevisionText( $row )); |
| 1215 | + if ($title) { |
| 1216 | + $article = new Article($title); |
| 1217 | + $pageId = $article->getTitle()->getArticleId(); |
| 1218 | + $isRedirect = $pageId > 0 ? !$article->checkTouched() : false; |
| 1219 | + $link = $this->getTitleInfo( $title, $pageId, $isRedirect ); |
| 1220 | + $this->data['pages'][intval($row->page_id)]['redirect']['to'] = $link; |
| 1221 | + } |
| 1222 | + } |
| 1223 | + $this->db->freeResult( $res ); |
| 1224 | + } |
| 1225 | + |
| 1226 | + $this->endProfiling( $prop ); |
| 1227 | + } |
| 1228 | + |
| 1229 | + /** |
| 1230 | + * |
| 1231 | + * This method cannot be used until http://bugzilla.wikipedia.org/show_bug.cgi?id=7304 is fixed |
| 1232 | + * |
| 1233 | + * Populate redirect data. Redirects may be one of the following: |
| 1234 | + * Redir to nonexisting, Existing page, or Existing redirect. |
| 1235 | + * Existing redirect may point to yet another nonexisting or existing page( which in turn may also be a redirect) |
| 1236 | + */ |
| 1237 | + function genRedirectInfo2(&$prop, &$genInfo) |
| 1238 | + { |
| 1239 | + if( empty( $this->redirectPageIds )) return; |
| 1240 | + $this->startProfiling(); |
1155 | 1241 | extract( $this->db->tableNames( 'page', 'pagelinks' ) ); |
1156 | 1242 | |
1157 | 1243 | // |
— | — | @@ -1815,45 +1901,45 @@ |
1816 | 1902 | } |
1817 | 1903 | |
1818 | 1904 | |
1819 | | - /** |
1820 | | - * Add counts of user contributions to the user pages |
1821 | | - */ |
1822 | | - function genContributionsCounter(&$prop, &$genInfo) |
1823 | | - { |
1824 | | - $this->startProfiling(); |
1825 | | - $users = array (); // Users to query |
1826 | | - $userPageIds = array (); // Map of user name to the page ID |
1827 | | - |
1828 | | - // For all valid pages in User namespace query history. Note that the page might not exist. |
1829 | | - foreach ($this->data['pages'] as $pageId => & $page) { |
1830 | | - if (array_key_exists('_obj', $page)) { |
1831 | | - $title = & $page['_obj']; |
1832 | | - if ($title->getNamespace() == NS_USER && !$title->isExternal()) { |
1833 | | - $users[] = $title->getText(); |
1834 | | - $userPageIds[$title->getText()] = $pageId; |
1835 | | - } |
1836 | | - } |
1837 | | - } |
1838 | | - |
1839 | | - $this->validateLimit( 'cc_querytoobig', count($users), 10, 50 ); |
1840 | | - $this->startDbProfiling(); |
1841 | | - $res = $this->db->select('revision', array ( |
1842 | | - 'rev_user_text', |
1843 | | - 'count(*) cnt', |
1844 | | - 'count(DISTINCT rev_page) distcnt' |
1845 | | - ), array ( |
1846 | | - 'rev_user_text' => $users |
1847 | | - ), $this->classname . '::genContributionsCounter', array ( |
1848 | | - 'GROUP BY' => 'rev_user_text' |
1849 | | - )); |
1850 | | - $this->endDbProfiling($prop); |
1851 | | - while ($row = $this->db->fetchObject($res)) { |
1852 | | - $pageId = $userPageIds[$row->rev_user_text]; |
1853 | | - $this->addPageSubElement($pageId, $prop, 'count', $row->cnt, false); |
1854 | | - $this->addPageSubElement($pageId, $prop, 'distcount', $row->distcnt, false); |
1855 | | - } |
1856 | | - $this->endProfiling($prop); |
1857 | | - } |
| 1905 | +// /** |
| 1906 | +// * Add counts of user contributions to the user pages |
| 1907 | +// */ |
| 1908 | +// function genContributionsCounter(&$prop, &$genInfo) |
| 1909 | +// { |
| 1910 | +// $this->startProfiling(); |
| 1911 | +// $users = array (); // Users to query |
| 1912 | +// $userPageIds = array (); // Map of user name to the page ID |
| 1913 | +// |
| 1914 | +// // For all valid pages in User namespace query history. Note that the page might not exist. |
| 1915 | +// foreach ($this->data['pages'] as $pageId => & $page) { |
| 1916 | +// if (array_key_exists('_obj', $page)) { |
| 1917 | +// $title = & $page['_obj']; |
| 1918 | +// if ($title->getNamespace() == NS_USER && !$title->isExternal()) { |
| 1919 | +// $users[] = $title->getText(); |
| 1920 | +// $userPageIds[$title->getText()] = $pageId; |
| 1921 | +// } |
| 1922 | +// } |
| 1923 | +// } |
| 1924 | +// |
| 1925 | +// $this->validateLimit( 'cc_querytoobig', count($users), 10, 50 ); |
| 1926 | +// $this->startDbProfiling(); |
| 1927 | +// $res = $this->db->select('revision', array ( |
| 1928 | +// 'rev_user_text', |
| 1929 | +// 'count(*) cnt', |
| 1930 | +// 'count(DISTINCT rev_page) distcnt' |
| 1931 | +// ), array ( |
| 1932 | +// 'rev_user_text' => $users |
| 1933 | +// ), $this->classname . '::genContributionsCounter', array ( |
| 1934 | +// 'GROUP BY' => 'rev_user_text' |
| 1935 | +// )); |
| 1936 | +// $this->endDbProfiling($prop); |
| 1937 | +// while ($row = $this->db->fetchObject($res)) { |
| 1938 | +// $pageId = $userPageIds[$row->rev_user_text]; |
| 1939 | +// $this->addPageSubElement($pageId, $prop, 'count', $row->cnt, false); |
| 1940 | +// $this->addPageSubElement($pageId, $prop, 'distcount', $row->distcnt, false); |
| 1941 | +// } |
| 1942 | +// $this->endProfiling($prop); |
| 1943 | +// } |
1858 | 1944 | |
1859 | 1945 | /** |
1860 | 1946 | * Add the raw content of the pages |