r14533 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r14532‎ | r14533 | r14534 >
Date:04:13, 2 June 2006
Author:yurik
Status:old
Tags:
Comment:
* old revisions can now be queried for content
* imagelinks - changed ilfilter default to 'all'
* db and overall profiling added
Modified paths:
  • /trunk/extensions/BotQuery/query.php (modified) (history)

Diff [purge]

Index: trunk/extensions/BotQuery/query.php
@@ -111,7 +111,7 @@
112112 * 0) Function to call
113113 * 1) true/false - does this property work on individual pages? (false for site's metadata)
114114 * 2) array of accepted parameters
115 - * 3) array of default parameter values
 115+ * 3) array of default parameter values. If the default value is an array itself, only the listed values are allowed, and the 1st value is taken as default.
116116 * 4) Format description
117117 */
118118 var $propGenerators = array(
@@ -209,7 +209,7 @@
210210 "blfilter - Of all given pages, which should be queried:",
211211 " 'nonredirects', 'existing' (blue links, default), or 'all' (red links)",
212212 "bllimit - how many total links to return",
213 - "blcontfrom - from which point to continue. Use the 'next' value from previous queries.",
 213+ "blcontfrom - from which point to continue. Use the 'next' value from the previous queries.",
214214 "Example: query.php?what=backlinks&titles=Main%20Page&bllimit=10",
215215 )),
216216 'embeddedin' => array( 'genPageBackLinksHelper', false,
@@ -221,35 +221,40 @@
222222 "eifilter - Of all given pages, which should be queried:",
223223 " 'nonredirects', 'existing' (blue links, default), or 'all' (red links)",
224224 "eilimit - how many total links to return",
225 - "eicontfrom - from which point to continue. Use the 'next' value from previous queries.",
 225+ "eicontfrom - from which point to continue. Use the 'next' value from the previous queries.",
226226 "Example: query.php?what=embeddedin&titles=Template:Stub&eilimit=10",
227227 )),
228228 'imagelinks' => array( 'genPageBackLinksHelper', false,
229229 array( 'ilfilter', 'illimit', 'ilcontfrom' ),
230 - array( array('existing', 'nonredirects', 'all'), 50, null ),
 230+ array( array('all', 'existing', 'nonredirects'), 50, null ),
231231 array(
232232 "What pages use this image(s)",
233233 "ilfilter - Of all given images, which should be queried:",
234 - " 'nonredirects', 'existing' (default), or 'all' (including non-existant)",
 234+ " 'nonredirects', 'existing', or 'all' (default, includes non-existant or those stored on Wikimedia Commons)",
235235 "illimit - how many total links to return",
236 - "ilcontfrom - from which point to continue. Use the 'next' value from previous queries.",
237 - "Example: query.php?what=imagelinks&titles=image:test.jpg&illimit=10",
 236+ "ilcontfrom - from which point to continue. Use the 'next' value from the previous queries.",
 237+ "Example: query.php?what=imagelinks&titles=Image:HermitageAcrossNeva.jpg&illimit=10",
238238 )),
239239 'revisions' => array( 'genPageHistory', false,
240 - array( 'rvcomments', 'rvlimit', 'rvoffset', 'rvstart', 'rvend' ),
241 - array( false, 50, 0, null, null ),
 240+ array( 'rvcomments', 'rvcontent', 'rvlimit', 'rvoffset', 'rvstart', 'rvend' ),
 241+ array( false, false, 50, 0, null, null ),
242242 array(
243243 "Revision history - Lists edits performed to the given pages",
244244 "Parameters supported:",
245245 "rvcomments - if specified, the result will include summary strings",
 246+ "rvcontent - if specified, the result will include raw wiki text.",
 247+ " This parameter is *very slow*, use only when needed.",
246248 "rvlimit - how many links to return *for each title*",
247249 "rvoffset - when too many results are found, use this to page",
248250 "rvstart - timestamp of the earliest entry",
249251 "rvend - timestamp of the latest entry",
250252 "Example: query.php?what=revisions&titles=Main%20Page&rvlimit=10&rvcomments",
251253 )),
252 - 'content' => array( 'genPageContent', false, null, null, array(
253 - "Raw page content",
 254+ 'content' => array( 'genPageContent', false, null, null,
 255+ array(
 256+ "Raw page content - Retrieves raw wiki markup for each page.",
 257+ "This query is *very slow*! Please optimize content requests to reduce load on the servers.",
 258+ "Duplicate results may be obtained through revisions+rvcontent request",
254259 "Example: query.php?what=content&titles=Main%20Page",
255260 )),
256261 );
@@ -320,9 +325,7 @@
321326 function output($isError = false) {
322327 global $wgRequest, $wgUser;
323328
324 - // hack: pretend that profiling was started at the begining of the class execution.
325 - $this->startTime = $this->totalStartTime;
326 - $this->endProfiling( 'total' );
 329+ $this->recordProfiling( 'total', 'time', $this->totalStartTime );
327330
328331 $printer = $this->outputGenerators[$this->format][GEN_FUNCTION];
329332 $mime = $this->outputGenerators[$this->format][GEN_MIME];
@@ -404,7 +407,7 @@
405408 */
406409 function genPageInfo() {
407410 global $wgUser, $wgRequest;
408 -
 411+ $this->startProfiling();
409412 $where = array();
410413
411414 //
@@ -414,8 +417,8 @@
415418 if( $titles !== null ) {
416419 $titles = explode( '|', $titles );
417420 $linkBatch = new LinkBatch;
418 - foreach ( $titles as $titleString ) {
419 - $titleObj = Title::newFromText( $titleString );
 421+ foreach ( $titles as &$titleString ) {
 422+ $titleObj = &Title::newFromText( $titleString );
420423 if ( !$titleObj ) {
421424 $this->dieUsage( "bad title $titleString", 'pi_invalidtitle' );
422425 }
@@ -442,7 +445,6 @@
443446 } else {
444447 $nonexistentPages = array(); // empty data to keep unset() happy
445448 }
446 -
447449 //
448450 // List of Page IDs
449451 //
@@ -458,9 +460,10 @@
459461 $where['page_id'] = $pageids;
460462 $this->requestsize += count($pageids);
461463 }
462 -
 464+
463465 // Do we have anything to do?
464466 if( $this->requestsize == 0 ) {
 467+ // Do not end profiling here, as it will introduce an element to the data object, and the usage screen may not be shown.
465468 return false; // Nothing to do for any of the page generators
466469 }
467470
@@ -481,12 +484,12 @@
482485 // Query page information with the given lists of titles & pageIDs
483486 //
484487 $this->redirectPageIds = array();
485 - $this->startProfiling();
 488+ $this->startDbProfiling();
486489 $res = $this->db->select( 'page',
487490 array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_touched', 'page_latest' ),
488491 $this->db->makeList( $where, LIST_OR ),
489492 $this->classname . '::genPageInfo' );
490 - $this->endProfiling('pageInfo');
 493+ $this->endDbProfiling('pageInfo');
491494 while( $row = $this->db->fetchObject( $res ) ) {
492495 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
493496 if ( !$title->userCanRead() ) {
@@ -568,7 +571,7 @@
569572 $data['_element'] = 'title';
570573 $data[] = $givenTitle;
571574 }
572 -
 575+ $this->endProfiling('pageInfo');
573576 return true; // success
574577 }
575578
@@ -583,6 +586,7 @@
584587 */
585588 function genMetaSiteInfo(&$prop, &$genInfo) {
586589 global $wgSitename, $wgVersion, $wgCapitalLinks;
 590+ $this->startProfiling();
587591 $meta = array();
588592 $mainPage = Title::newFromText( wfMsgForContent( 'mainpage' ) );
589593
@@ -593,6 +597,7 @@
594598 $meta['case'] = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; // "case-insensitive" option is reserved for future
595599
596600 $this->data['meta']['site'] = $meta;
 601+ $this->endProfiling($prop);
597602 }
598603
599604 /**
@@ -600,12 +605,14 @@
601606 */
602607 function genMetaNamespaceInfo(&$prop, &$genInfo) {
603608 global $wgContLang;
 609+ $this->startProfiling();
604610 $meta = array();
605611 $meta['_element'] = 'ns';
606612 foreach( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
607613 $meta[$ns] = array( "id"=>$ns, "*" => $title );
608614 }
609615 $this->data['meta']['namespaces'] = $meta;
 616+ $this->endProfiling($prop);
610617 }
611618
612619 /**
@@ -613,7 +620,7 @@
614621 */
615622 function genMetaUserInfo(&$prop, &$genInfo) {
616623 global $wgUser;
617 -
 624+ $this->startProfiling();
618625 extract( $this->getParams( $prop, $genInfo ));
619626 $meta = array();
620627 $meta['name'] = $wgUser->getName();
@@ -627,13 +634,14 @@
628635 $meta['rights']['_element'] = 'r';
629636 }
630637 $this->data['meta']['user'] = $meta;
 638+ $this->endProfiling($prop);
631639 }
632640
633641 /**
634642 * Add pagids of the most recently modified pages to the output
635643 */
636644 function genMetaRecentChanges(&$prop, &$genInfo) {
637 -
 645+ $this->startProfiling();
638646 extract( $this->getParams( $prop, $genInfo ));
639647 # It makes no sense to hide both anons and logged-in users
640648 if( in_array('anons', $rchide) && in_array('liu', $rchide) ) {
@@ -670,7 +678,7 @@
671679 $options = array( 'USE INDEX' => 'rc_timestamp', 'LIMIT' => $rclimit );
672680 $options['ORDER BY'] = 'rc_timestamp' . ( $rcfrom != '' ? '' : ' DESC' );
673681
674 - $this->startProfiling();
 682+ $this->startDbProfiling();
675683 $res = $this->db->select(
676684 'recentchanges',
677685 'rc_cur_id',
@@ -678,13 +686,14 @@
679687 $this->classname . '::genMetaRecentChanges',
680688 $options
681689 );
682 - $this->endProfiling($prop);
 690+ $this->endDbProfiling($prop);
683691 while ( $row = $this->db->fetchObject( $res ) ) {
684692 if( $row->rc_cur_id != 0 ) {
685693 $this->addRaw( 'pageids', $row->rc_cur_id );
686694 }
687695 }
688696 $this->db->freeResult( $res );
 697+ $this->endProfiling($prop);
689698 }
690699
691700 /**
@@ -692,12 +701,12 @@
693702 */
694703 function genUserPages(&$prop, &$genInfo) {
695704 global $wgContLang;
696 -
 705+ $this->startProfiling();
697706 extract( $this->getParams( $prop, $genInfo ));
698707
699708 $this->validateLimit( 'uslimit', $uslimit, 50, 1000 );
700709
701 - $this->startProfiling();
 710+ $this->startDbProfiling();
702711 $res = $this->db->select(
703712 'user',
704713 'user_name',
@@ -705,7 +714,7 @@
706715 $this->classname . '::genUserPages',
707716 array( 'ORDER BY' => 'user_name', 'LIMIT' => $uslimit )
708717 );
709 - $this->endProfiling($prop);
 718+ $this->endDbProfiling($prop);
710719
711720 $userNS = $wgContLang->getNsText(NS_USER);
712721 if( !$userNS ) $userNS = 'User';
@@ -715,6 +724,7 @@
716725 $this->addRaw( 'titles', $userNS . $row->user_name );
717726 }
718727 $this->db->freeResult( $res );
 728+ $this->endProfiling($prop);
719729 }
720730
721731 /**
@@ -725,6 +735,7 @@
726736 // TODO: This is very inefficient - we can get the actual page information, instead we make two identical query.
727737 //
728738 global $wgContLang;
 739+ $this->startProfiling();
729740 extract( $this->getParams( $prop, $genInfo ));
730741
731742 $this->validateLimit( 'aplimit', $aplimit, 50, 1000 );
@@ -736,14 +747,14 @@
737748 $ns .= ':';
738749 }
739750
740 - $this->startProfiling();
 751+ $this->startDbProfiling();
741752 $res = $this->db->select(
742753 'page',
743754 'page_title',
744755 array( 'page_namespace' => intval($apnamespace), 'page_title>=' . $this->db->addQuotes($apfrom) ),
745756 $this->classname . '::genMetaAllPages',
746 - array( 'FORCE INDEX' => 'name_title', 'LIMIT' => $aplimit+1, 'ORDER BY' => 'page_namespace, page_title' ));
747 - $this->endProfiling($prop);
 757+ array( 'USE INDEX' => 'name_title', 'LIMIT' => $aplimit+1, 'ORDER BY' => 'page_namespace, page_title' ));
 758+ $this->endDbProfiling($prop);
748759
749760 // Add found page ids to the list of requested titles - they will be auto-populated later
750761 $count = 0;
@@ -756,6 +767,7 @@
757768 $this->addRaw( 'titles', $ns . $row->page_title );
758769 }
759770 $this->db->freeResult( $res );
 771+ $this->endProfiling($prop);
760772 }
761773
762774 /**
@@ -766,6 +778,7 @@
767779 // TODO: This is very inefficient - we can get the actual page information, instead we make two identical query.
768780 //
769781 global $wgContLang;
 782+ $this->startProfiling();
770783 extract( $this->getParams( $prop, $genInfo ));
771784 $this->validateLimit( 'nllimit', $nllimit, 50, 1000 );
772785 extract( $this->db->tableNames( 'page', 'langlinks' ) );
@@ -781,9 +794,9 @@
782795 . ' ORDER BY page_namespace, page_title'
783796 . ' LIMIT ' . intval($nllimit+1);
784797
785 - $this->startProfiling();
 798+ $this->startDbProfiling();
786799 $res = $this->db->query( $sql, $this->classname . '::genMetaNoLangLinksPages' );
787 - $this->endProfiling($prop);
 800+ $this->endDbProfiling($prop);
788801
789802 // Add found page ids to the list of requested titles - they will be auto-populated later
790803 $count = 0;
@@ -796,6 +809,7 @@
797810 $this->addRaw( 'pageids', $row->page_id );
798811 }
799812 $this->db->freeResult( $res );
 813+ $this->endProfiling($prop);
800814 }
801815
802816
@@ -812,6 +826,7 @@
813827 if( empty( $this->redirectPageIds ) ) {
814828 return;
815829 }
 830+ $this->startProfiling();
816831 extract( $this->db->tableNames( 'page', 'pagelinks' ) );
817832
818833 //
@@ -843,9 +858,9 @@
844859 "pb.page_is_redirect IS NULL OR pb.page_is_redirect = '1'"
845860 ), LIST_AND );
846861
847 - $this->startProfiling();
 862+ $this->startDbProfiling();
848863 $res = $this->db->query( $sql, $this->classname . '::genRedirectInfo' );
849 - $this->endProfiling('redirects');
 864+ $this->endDbProfiling('redirects');
850865 while ( $row = $this->db->fetchObject( $res ) ) {
851866 $this->addPageSubElement( $row->a_id, 'redirect', 'to', $this->getLinkInfo( $row->b_namespace, $row->b_title, $row->b_id, $row->b_is_redirect ), false);
852867 if( $row->b_is_redirect ) {
@@ -853,6 +868,7 @@
854869 }
855870 }
856871 $this->db->freeResult( $res );
 872+ $this->endProfiling($prop);
857873 }
858874
859875 var $genPageLinksSettings = array( // database column name prefix, output element name
@@ -867,9 +883,10 @@
868884 if( empty($this->nonRedirPageIds) ) {
869885 return;
870886 }
 887+ $this->startProfiling();
871888 extract( $this->genPageLinksSettings[$prop] );
872889
873 - $this->startProfiling();
 890+ $this->startDbProfiling();
874891 $res = $this->db->select(
875892 $linktbl,
876893 array( "{$prefix}_from from_id",
@@ -877,7 +894,7 @@
878895 "{$prefix}_title to_title" ),
879896 array( "{$prefix}_from" => $this->nonRedirPageIds ),
880897 $this->classname . "::genPageLinks_{$code}" );
881 - $this->endProfiling($prop);
 898+ $this->endDbProfiling($prop);
882899
883900 while ( $row = $this->db->fetchObject( $res ) ) {
884901 if( $langlinks ) {
@@ -888,6 +905,7 @@
889906 $this->addPageSubElement( $row->from_id, $prop, $code, $values);
890907 }
891908 $this->db->freeResult( $res );
 909+ $this->endProfiling($prop);
892910 }
893911
894912 var $genPageBackLinksSettings = array( // database column name prefix, output element name
@@ -900,7 +918,7 @@
901919 * $type - either 'template' or 'page'
902920 */
903921 function genPageBackLinksHelper(&$prop, &$genInfo) {
904 -
 922+ $this->startProfiling();
905923 extract( $this->genPageBackLinksSettings[$prop] );
906924
907925 //
@@ -1009,14 +1027,14 @@
10101028 //
10111029 // Execute
10121030 //
1013 - $this->startProfiling();
 1031+ $this->startDbProfiling();
10141032 $res = $this->db->select(
10151033 array( $linktbl, 'page' ),
10161034 $columns,
10171035 $where,
10181036 $this->classname . "::genPageBackLinks_{$code}",
10191037 $options );
1020 - $this->endProfiling($prop);
 1038+ $this->endDbProfiling($prop);
10211039
10221040 $count = 0;
10231041 while ( $row = $this->db->fetchObject( $res ) ) {
@@ -1031,6 +1049,7 @@
10321050 $this->addPageSubElement( $pageId, $prop, $code, $values );
10331051 }
10341052 $this->db->freeResult( $res );
 1053+ $this->endProfiling($prop);
10351054 }
10361055
10371056 /**
@@ -1040,9 +1059,11 @@
10411060 if( empty( $this->existingPageIds ) ) {
10421061 return;
10431062 }
 1063+ $this->startProfiling();
10441064 extract( $this->getParams( $prop, $genInfo ));
10451065
1046 - $fields = array('rev_id', 'rev_timestamp', 'rev_user', 'rev_user_text', 'rev_minor_edit');
 1066+ $tables = array('revision');
 1067+ $fields = array('rev_id', 'rev_text_id', 'rev_timestamp', 'rev_user', 'rev_user_text', 'rev_minor_edit');
10471068 if( isset($rvcomments) ) {
10481069 $fields[] = 'rev_comment';
10491070 }
@@ -1060,15 +1081,30 @@
10611082 if( $rvoffset !== 0 ) {
10621083 $options['OFFSET'] = $rvoffset;
10631084 }
1064 - $this->validateLimit( 'rvlimit * pages', $rvlimit * count($this->existingPageIds), 200, 2000 );
1065 -
1066 - $this->startProfiling();
 1085+ if( isset($rvcontent) ) {
 1086+ $this->validateLimit( 'content + rvlimit * pages', $rvlimit * count($this->existingPageIds), 50, 200 );
 1087+ $tables[] = 'text';
 1088+ $fields[] = 'old_id';
 1089+ $fields[] = 'old_text';
 1090+ $fields[] = 'old_flags';
 1091+ $conds[] = 'rev_text_id=old_id';
 1092+ } else {
 1093+ $this->validateLimit( 'rvlimit * pages', $rvlimit * count($this->existingPageIds), 200, 2000 );
 1094+ }
 1095+
 1096+ $this->startDbProfiling();
10671097 foreach( $this->existingPageIds as $pageId ) {
10681098 $conds['rev_page'] = $pageId;
1069 - $res = $this->db->select( 'revision', $fields, $conds, $this->classname . '::genPageHistory', $options );
 1099+ $res = $this->db->select(
 1100+ $tables,
 1101+ $fields,
 1102+ $conds,
 1103+ $this->classname . '::genPageHistory',
 1104+ $options );
10701105 while ( $row = $this->db->fetchObject( $res ) ) {
10711106 $vals = array(
10721107 'revid' => $row->rev_id,
 1108+ 'oldid' => $row->rev_text_id,
10731109 'timestamp' => wfTimestamp( TS_ISO_8601, $row->rev_timestamp ),
10741110 'user' => $row->rev_user_text,
10751111 );
@@ -1078,11 +1114,17 @@
10791115 if( $row->rev_minor_edit ) {
10801116 $vals['minor'] = '';
10811117 }
1082 - $vals['*'] = $rvcomments ? $row->rev_comment : '';
 1118+ if( $rvcomments ) {
 1119+ $vals['comment'] = $row->rev_comment;
 1120+ }
 1121+ if( isset($rvcontent) ) {
 1122+ $vals['*'] = Revision::getRevisionText( $row );
 1123+ }
10831124 $this->addPageSubElement( $pageId, 'revisions', 'rv', $vals);
10841125 }
10851126 $this->db->freeResult( $res );
10861127 }
 1128+ $this->endDbProfiling($prop);
10871129 $this->endProfiling($prop);
10881130 }
10891131
@@ -1093,23 +1135,33 @@
10941136 if( empty( $this->existingPageIds ) ) {
10951137 return;
10961138 }
1097 - $this->validateLimit( 'co_querytoobig', count($this->existingPageIds), 50, 200 );
10981139 $this->startProfiling();
 1140+
 1141+ // Generate the WHERE clause for pageIds+RevisionIds
 1142+ $ids = array();
 1143+ foreach( $this->data['pages'] as $pageId => &$page ) {
 1144+ if( $pageId > 0 ) {
 1145+ $ids[] = "(rev_page=$pageId AND rev_id={$page['revid']})";
 1146+ }
 1147+ }
 1148+ $this->validateLimit( 'co_querytoobig', count($ids), 50, 200 );
 1149+
 1150+ $this->startDbProfiling();
10991151 $res = $this->db->select(
1100 - array('page', 'revision', 'text'),
1101 - array('page_id', 'old_id', 'old_text', 'old_flags'),
1102 - array('page_id=rev_page', 'page_latest=rev_id', 'rev_text_id=old_id', 'page_id' => $this->existingPageIds),
 1152+ array('revision', 'text'),
 1153+ array('rev_page', 'old_id', 'old_text', 'old_flags'),
 1154+ array('rev_text_id=old_id', implode('OR', $ids)),
11031155 $this->classname . '::genPageContent'
11041156 );
11051157 while ( $row = $this->db->fetchObject( $res ) ) {
1106 - $this->addPageSubElement( $row->page_id, $prop, 'xml:space', 'preserve', false);
1107 - $this->addPageSubElement( $row->page_id, $prop, '*', Revision::getRevisionText( $row ), false);
 1158+ $this->addPageSubElement( $row->rev_page, $prop, 'xml:space', 'preserve', false);
 1159+ $this->addPageSubElement( $row->rev_page, $prop, '*', Revision::getRevisionText( $row ), false);
11081160 }
11091161 $this->db->freeResult( $res );
1110 - $this->endProfiling($prop); // getRevisionText is also a database call
 1162+ $this->endDbProfiling($prop); // Revision::getRevisionText is also a database call, so we include them in this scope
 1163+ $this->endProfiling($prop);
11111164 }
11121165
1113 -
11141166 //
11151167 // ************************************* UTILITIES *************************************
11161168 //
@@ -1385,15 +1437,33 @@
13861438 function startProfiling() {
13871439 $this->startTime = wfTime();
13881440 }
 1441+ /**
 1442+ * Same as startProfiling, but used for DB access only
 1443+ */
 1444+ function startDbProfiling() {
 1445+ $this->startDbTime = wfTime();
 1446+ }
13891447
13901448 /**
1391 - * Records the running time of the given module since last startProfiling() call.
 1449+ * Records the running time of the given module since last startDbProfiling() call.
13921450 */
13931451 function endProfiling( $module ) {
1394 - $timeDelta = wfTime() - $this->startTime;
1395 - unset($this->startTime);
1396 - $this->addStatusMessage( $module, array( 'time' => sprintf( "%1.2fms", $timeDelta * 1000.0 ) ));
 1452+ $this->recordProfiling( $module, 'time', $this->startTime );
13971453 }
 1454+ /**
 1455+ * Same as endProfiling, but used for DB access only
 1456+ */
 1457+ function endDbProfiling( $module ) {
 1458+ $this->recordProfiling( $module, 'dbtime', $this->startDbTime );
 1459+ }
 1460+ /**
 1461+ * Helper profiling function
 1462+ */
 1463+ function recordProfiling( $module, $type, &$start ) {
 1464+ $timeDelta = wfTime() - $start;
 1465+ unset($start);
 1466+ $this->addStatusMessage( $module, array( $type => sprintf( "%1.2fms", $timeDelta * 1000.0 ) ));
 1467+ }
13981468
13991469 /**
14001470 * Validate the value against the minimum and user/bot maximum limits. Prints usage info on failure.

Status & tagging log