r14130 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r14129‎ | r14130 | r14131 >
Date:07:17, 8 May 2006
Author:yurik
Status:old
Tags:
Comment:
* Enhanced redirects information
* Removed dblRedirects (performance reasons)
* Minor cleanup
Modified paths:
  • /trunk/extensions/BotQuery/query.php (modified) (history)

Diff [purge]

Index: trunk/extensions/BotQuery/query.php
@@ -159,27 +159,26 @@
160160 "uslimit - how many total links to return.",
161161 "Example: query.php?what=users&usfrom=Y",
162162 )),
163 - 'dblredirects' => array( "genMetaDoubleRedirects", true,
164 - array( 'dfoffset', 'drlimit' ),
165 - array( 0, 50 ),
166 - array(
167 - "List of double-redirect pages",
168 - "THIS QUERY IS CURRENTLY DISABLED DUE TO PERFORMANCE REASONS",
169 - "Example: query.php?what=dblredirects",
170 - )),
171163
 164+ //
172165 // Page-specific Generators
173 - 'links' => array( "genPageLinks", false, null, null, array(
 166+ //
 167+ 'redirects' => array( "genRedirectInfo", false, null, null, array(
 168+ "For all given redirects, provides additional information such as pageIds and double-redirection",
 169+ "Example: query.php?what=redirects&titles=Main_page",
 170+ " query.php?what=recentchanges|redirects (Which of the recent changes are redirects?)",
 171+ )),
 172+ 'links' => array( "genPageLinksHelper", false, null, null, array(
174173 "List of regular page links",
175174 "Example: query.php?what=links&titles=MediaWiki|Wikipedia",
176175 )),
177 - 'langlinks' => array( "genPageLangLinks", false, null, null, array(
 176+ 'langlinks' => array( "genPageLinksHelper", false, null, null, array(
178177 "Inter-language links",
179178 "Example: query.php?what=langlinks&titles=MediaWiki|Wikipedia",
180179 )),
181 - 'templates' => array( "genPageTemplates", false, null, null, array(
 180+ 'templates' => array( "genPageLinksHelper", false, null, null, array(
182181 "List of used templates",
183 - "Example: query.php?what=templates&titles=Main%20Page",
 182+ "Example: query.php?what=templates&titles=Main_Page",
184183 )),
185184 'backlinks' => array( "genPageBackLinksHelper", false,
186185 array( 'blfilter', 'bllimit', 'blcontfrom' ),
@@ -387,7 +386,6 @@
388387 if( $titleString !== $titleObj->getPrefixedText() ) {
389388 $this->normalizedTitles[$titleString] = &$titleObj;
390389 }
391 -
392390 }
393391 if ( $linkBatch->isEmpty() ) {
394392 $this->dieUsage( "no titles could be found", 'pi_novalidtitles' );
@@ -439,7 +437,7 @@
440438 //
441439 // Query page information with the given lists of titles & pageIDs
442440 //
443 - $redirects = array();
 441+ $this->redirectPageIds = array();
444442 $this->startProfiling();
445443 $res = $this->db->select( 'page',
446444 array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_touched', 'page_latest' ),
@@ -462,7 +460,7 @@
463461 $data['revid'] = $row->page_latest;
464462 if ( $row->page_is_redirect ) {
465463 $data['redirect'] = '';
466 - $redirects[] = $row->page_id;
 464+ $this->redirectPageIds[] = $row->page_id;
467465 }
468466
469467 // Strike out link
@@ -475,7 +473,7 @@
476474 // Create lists that can later be used to filter other tables by page Id or other useful query strings
477475 //
478476 $this->existingPageIds = array_keys( $this->data['pages'] );
479 - $this->nonRedirPageIds = array_diff_key($this->existingPageIds, $redirects);
 477+ $this->nonRedirPageIds = array_diff($this->existingPageIds, $this->redirectPageIds);
480478
481479 //
482480 // Create records for non-existent page IDs
@@ -512,16 +510,10 @@
513511 }
514512
515513 //
516 - // Process redirects
 514+ // Mark redirects as such. More information can be given with 'redirects' property
517515 //
518 - if( $redirects ) {
519 - // If the user requested links, redirect links will be populated.
520 - // Otherwise, we have to do it manually here by calling links generator with a custom list of IDs
521 - $prop = 'links';
522 - if( !in_array( $prop, $this->properties )) {
523 - $generator = $this->propGenerators[$prop];
524 - $this->{$generator[GEN_FUNCTION]}( $prop, $generator, $redirects );
525 - }
 516+ foreach( $this->redirectPageIds as $pageid ) {
 517+ $this->data['pages'][$pageid]['redirect'] = '';
526518 }
527519
528520 //
@@ -537,6 +529,9 @@
538530 return true; // success
539531 }
540532
 533+ //
 534+ // ************************************* META GENERATORS *************************************
 535+ //
541536 function genMetaSiteInfo(&$prop, &$genInfo) {
542537 global $wgSitename, $wgVersion, $wgCapitalLinks;
543538 $meta = array();
@@ -671,8 +666,8 @@
672667
673668 $this->startProfiling();
674669 $res = $this->db->select(
675 - array( 'page' ),
676 - array( 'page_title' ),
 670+ 'page',
 671+ 'page_title',
677672 array( 'page_namespace=' . $this->db->addQuotes($apnamespace) . ' AND page_title>=' . $this->db->addQuotes($apfrom) ),
678673 $this->classname . '::genMetaAllPages',
679674 array( 'FORCE INDEX' => 'name_title', 'LIMIT' => $aplimit+1, 'ORDER BY' => 'page_title' ));
@@ -696,131 +691,110 @@
697692 $this->db->freeResult( $res );
698693 }
699694
700 - function genMetaDoubleRedirects(&$prop, &$genInfo) {
701 - global $wgUser;
 695+ //
 696+ // ************************************* PAGE INFO GENERATORS *************************************
 697+ //
702698
703 - $this->dieUsage( "DoubleRedirect generator is disabled until caching is implemented", 'dr_disabled' );
704 -
705 - if( !$wgUser->isBot() ) {
706 - $this->dieUsage( "Only bots are allowed to query for double-redirects", 'dr_notbot' );
 699+ /**
 700+ * Populate redirect data. Redirects may be one of the following:
 701+ * Redir to nonexisting, Existing page, or Existing redirect.
 702+ * Existing redirect may point to yet another nonexisting or existing page( which in turn may also be a redirect)
 703+ */
 704+ function genRedirectInfo(&$prop, &$genInfo) {
 705+ if( empty( $this->redirectPageIds ) ) {
 706+ return;
707707 }
708 -
709 - extract( $this->getParams( $prop, $genInfo ));
710708 extract( $this->db->tableNames( 'page', 'pagelinks' ) );
711 -
712 - $sql = "SELECT " .
713 - " pa.page_id id_a," .
714 - " pb.page_id id_b," .
715 - " pc.page_id id_c" .
716 - " FROM $pagelinks AS la, $pagelinks AS lb, $page AS pa, $page AS pb, $page AS pc" .
717 - " WHERE pa.page_is_redirect=1 AND pb.page_is_redirect=1" .
718 - " AND la.pl_from=pa.page_id" .
719 - " AND la.pl_namespace=pb.page_namespace" .
720 - " AND la.pl_title=pb.page_title" .
721 - " AND lb.pl_from=pb.page_id" .
722 - " AND lb.pl_namespace=pc.page_namespace" .
723 - " AND lb.pl_title=pc.page_title";
724 -
725 - $sql = $this->db->limitResult( $sql, $drlimit, $droffset );
726709
727 - // Add found page ids to the list of requested ids - they will be auto-populated later
728 - $this->startProfiling();
729 - $res = $this->db->query( $sql, $this->classname . '::genMetaDoubleRedirects' );
730 - $this->endProfiling($prop);
731 - while ( $row = $this->db->fetchObject( $res ) ) {
732 - $this->addRaw( 'pageids', $row->id_a .'|'. $row->id_b .'|'. $row->id_c );
733 - $this->data['pages'][$row->id_a]['dblredirect'] = $row->id_c;
734 - }
735 - $this->db->freeResult( $res );
736 - }
 710+ //
 711+ // Two part query:
 712+ // first part finds all the redirect, who's targets are regular existing pages
 713+ // second part finds targets that either do not exist or are redirects themselves.
 714+ //
 715+ $sql = 'SELECT '
 716+ . 'la.pl_from a_id,'
 717+ . 'la.pl_namespace b_namespace, la.pl_title b_title, pb.page_id b_id, pb.page_is_redirect b_is_redirect, '
 718+ . 'null c_namespace, null c_title, null c_id, null c_is_redirect '
 719+ . "FROM $pagelinks AS la, $page AS pb "
 720+ . ' WHERE ' . $this->db->makeList( array(
 721+ 'la.pl_from' => $this->redirectPageIds,
 722+ 'la.pl_namespace = pb.page_namespace',
 723+ 'la.pl_title = pb.page_title',
 724+ 'pb.page_is_redirect' => 0
 725+ ), LIST_AND )
 726+ . ' UNION SELECT '
 727+ . 'la.pl_from a_id,'
 728+ . 'la.pl_namespace b_namespace, la.pl_title b_title, pb.page_id b_id, pb.page_is_redirect b_is_redirect,'
 729+ . 'lb.pl_namespace c_namespace, lb.pl_title c_title, pc.page_id c_id, pc.page_is_redirect c_is_redirect '
 730+ . 'FROM '
 731+ . "(($pagelinks AS la LEFT JOIN $page AS pb ON la.pl_namespace = pb.page_namespace AND la.pl_title = pb.page_title) LEFT JOIN "
 732+ . "$pagelinks AS lb ON pb.page_id = lb.pl_from) LEFT JOIN "
 733+ . "$page AS pc ON lb.pl_namespace = pc.page_namespace AND lb.pl_title = pc.page_title "
 734+ . ' WHERE ' . $this->db->makeList( array(
 735+ 'la.pl_from' => $this->redirectPageIds,
 736+ "pb.page_is_redirect IS NULL OR pb.page_is_redirect = '1'"
 737+ ), LIST_AND );
737738
738 - function genPageLangLinks(&$prop, &$genInfo) {
739 - if( !$this->nonRedirPageIds ) {
740 - return;
741 - }
742739 $this->startProfiling();
743 - $res = $this->db->select(
744 - array( 'langlinks' ),
745 - array( 'll_from', 'll_lang', 'll_title' ),
746 - array( 'll_from' => $this->nonRedirPageIds ),
747 - $this->classname . '::genPageLangLinks' );
748 - $this->endProfiling($prop);
 740+ $res = $this->db->query( $sql, $this->classname . '::genRedirectInfo' );
 741+ $this->endProfiling('redirects');
749742 while ( $row = $this->db->fetchObject( $res ) ) {
750 - $this->addPageSubElement( $row->ll_from, 'langlinks', 'll', array('lang' => $row->ll_lang, '*' => $row->ll_title));
 743+ $this->addPageSubElement( $row->a_id, 'redirect', 'to', $this->getLinkInfo( $row->b_namespace, $row->b_title, $row->b_id, $row->b_is_redirect ), false);
 744+ if( $row->b_is_redirect ) {
 745+ $this->addPageSubElement( $row->a_id, 'redirect', 'dblredirectto', $this->getLinkInfo( $row->c_namespace, $row->c_title, $row->c_id, $row->c_is_redirect ), false);
 746+ }
751747 }
752748 $this->db->freeResult( $res );
753749 }
754750
755 - function genPageTemplates(&$prop, &$genInfo) {
756 - if( !$this->nonRedirPageIds ) {
757 - return;
758 - }
759 - $this->startProfiling();
760 - $res = $this->db->select(
761 - 'templatelinks',
762 - array( 'tl_from', 'tl_namespace', 'tl_title' ),
763 - array( 'tl_from' => $this->nonRedirPageIds ),
764 - $this->classname . '::genPageTemplates' );
765 - $this->endProfiling($prop);
766 - while ( $row = $this->db->fetchObject( $res ) ) {
767 - $this->addPageSubElement( $row->tl_from, 'templates', 'tl', $this->getLinkInfo( $row->tl_namespace, $row->tl_title ));
768 - }
769 - $this->db->freeResult( $res );
770 - }
 751+ var $genPageLinksSettings = array( // database column name prefix, output element name
 752+ 'links' => array( 'prefix' => 'pl', 'code' => 'l', 'linktbl' => 'pagelinks', 'langlinks' => false ),
 753+ 'langlinks' => array( 'prefix' => 'll', 'code' => 'll', 'linktbl' => 'langlinks', 'langlinks' => true ),
 754+ 'templates' => array( 'prefix' => 'tl', 'code' => 'tl', 'linktbl' => 'templatelinks', 'langlinks' => false ));
771755
772756 /**
773 - * Generates list of links for all pages. Optionally it can be called to populate only a subset of pages by given ids.
 757+ * Generates list of links/langlinks/templates for all non-redirect pages.
774758 */
775 - function genPageLinks(&$prop, &$genInfo, $pageIdsList = null ) {
776 - if( $pageIdsList === null ) {
777 - $pageIdsList = $this->nonRedirPageIds;
778 - }
779 - if( !$pageIdsList ) {
 759+ function genPageLinksHelper(&$prop, &$genInfo) {
 760+ if( empty($this->nonRedirPageIds) ) {
780761 return;
781762 }
 763+ extract( $this->genPageLinksSettings[$prop] );
 764+
782765 $this->startProfiling();
783766 $res = $this->db->select(
784 - 'pagelinks',
785 - array( 'pl_from', 'pl_namespace', 'pl_title' ),
786 - array( 'pl_from' => $pageIdsList ),
787 - $this->classname . '::genPageLinks' );
 767+ $linktbl,
 768+ array( "{$prefix}_from from_id",
 769+ ($langlinks ? 'll_lang' : "{$prefix}_namespace to_namespace"),
 770+ "{$prefix}_title to_title" ),
 771+ array( "{$prefix}_from" => $this->nonRedirPageIds ),
 772+ $this->classname . "::genPageLinks_{$code}" );
788773 $this->endProfiling($prop);
 774+
789775 while ( $row = $this->db->fetchObject( $res ) ) {
790 - $this->addPageSubElement( $row->pl_from, 'links', 'l', $this->getLinkInfo( $row->pl_namespace, $row->pl_title ));
 776+ if( $langlinks ) {
 777+ $values = array('lang' => $row->ll_lang, '*' => $row->to_title);
 778+ } else {
 779+ $values = $this->getLinkInfo( $row->to_namespace, $row->to_title );
 780+ }
 781+ $this->addPageSubElement( $row->from_id, $prop, $code, $values);
791782 }
792783 $this->db->freeResult( $res );
793 - }
 784+ }
794785
 786+ var $genPageBackLinksSettings = array( // database column name prefix, output element name
 787+ 'embeddedin' => array( 'prefix' => 'tl', 'code' => 'ei', 'linktbl' => 'templatelinks', 'isImage' => false ),
 788+ 'backlinks' => array( 'prefix' => 'pl', 'code' => 'bl', 'linktbl' => 'pagelinks', 'isImage' => false ),
 789+ 'imagelinks' => array( 'prefix' => 'il', 'code' => 'il', 'linktbl' => 'imagelinks', 'isImage' => true ));
 790+
795791 /**
796792 * Generate backlinks for either links, templates, or both
797793 * $type - either 'template' or 'page'
798794 */
799795 function genPageBackLinksHelper(&$prop, &$genInfo) {
800 - //
801 - // Determine what is being asked
802 - //
803 - $isImage = false;
804 - switch( $prop ) {
805 - case 'embeddedin' :
806 - $prefix = 'tl'; // database column name prefix
807 - $code = 'ei';
808 - $linktbl = 'templatelinks';
809 - break;
810 - case 'backlinks' :
811 - $prefix = 'pl';
812 - $code = 'bl';
813 - $linktbl = 'pagelinks';
814 - break;
815 - case 'imagelinks' :
816 - $prefix = 'il';
817 - $code = 'il';
818 - $linktbl = 'imagelinks';
819 - $isImage = true;
820 - break;
821 - default :
822 - die("unknown type");
823 - }
824796
 797+ extract( $this->genPageBackLinksSettings[$prop] );
 798+
825799 //
826800 // Parse and validate parameters
827801 //
@@ -833,7 +807,6 @@
834808 } else {
835809 $filter = $filter[0];
836810 }
837 -
838811 //
839812 // Prase contFrom - will be in the format ns|db_key|page_id - determine how to continue
840813 //
@@ -870,7 +843,6 @@
871844 default:
872845 $this->dieUsage( "{$code}filter '$filter' is not one of the allowed: 'all', 'existing' [default], and 'nonredirects'", "{$code}_badfilter" );
873846 }
874 -
875847 //
876848 // Make a list of pages to query
877849 //
@@ -894,12 +866,10 @@
895867 $linkBatch->addObj( $title );
896868 }
897869 }
898 -
899870 if( $linkBatch->isEmpty() ) {
900871 $this->addStatusMessage( $prop, array('error'=>'emptyrequest') );
901872 return; // Nothing to do
902873 }
903 -
904874 //
905875 // Create query parameters
906876 //
@@ -927,7 +897,6 @@
928898 }
929899 }
930900 $options = array( 'ORDER BY' => $orderBy, 'LIMIT' => $limit );
931 -
932901 //
933902 // Execute
934903 //
@@ -961,7 +930,7 @@
962931 }
963932
964933 function genPageHistory(&$prop, &$genInfo) {
965 - if( !$this->existingPageIds ) {
 934+ if( empty( $this->existingPageIds ) ) {
966935 return;
967936 }
968937 extract( $this->getParams( $prop, $genInfo ));
@@ -1078,7 +1047,15 @@
10791048 return $val;
10801049 }
10811050
1082 - function getTitleInfo( $title, $id = 0 ) {
 1051+ function getLinkInfo( $ns, $title, $id = -1, $isRedirect = false ) {
 1052+ return $this->getTitleInfo( Title::makeTitle( $ns, $title ), $id, $isRedirect );
 1053+ }
 1054+
 1055+ /**
 1056+ * Creates an element <$title ns='xx' iw='xx' id='xx'>Prefixed Title</$title>
 1057+ * All attributes are optional.
 1058+ */
 1059+ function getTitleInfo( $title, $id = -1, $isRedirect = false ) {
10831060 $data = array();
10841061 if( $title->getNamespace() != NS_MAIN ) {
10851062 $data['ns'] = $title->getNamespace();
@@ -1086,22 +1063,31 @@
10871064 if( $title->isExternal() ) {
10881065 $data['iw'] = $title->getInterwiki();
10891066 }
1090 - if( $id !== 0 ) {
 1067+ if( $id === null ) {
 1068+ $id = 0;
 1069+ }
 1070+ if( $id >= 0 ) {
10911071 $data['id'] = $id;
10921072 }
 1073+ if( $isRedirect ) {
 1074+ $data['redirect'] = 'true';
 1075+ }
10931076 $data['*'] = $title->getPrefixedText();
10941077
10951078 return $data;
10961079 }
10971080
1098 - function getLinkInfo( $ns, $title, $id = 0 ) {
1099 - return $this->getTitleInfo( Title::makeTitle( $ns, $title ));
1100 - }
1101 -
1102 - function addPageSubElement( $pageId, $mainElem, $itemElem, $params ) {
 1081+ function addPageSubElement( $pageId, $mainElem, $itemElem, $params, $multiItems = true ) {
11031082 $data = & $this->data['pages'][$pageId][$mainElem];
1104 - $data['_element'] = $itemElem;
1105 - $data[] = $params;
 1083+ if( $multiItems ) {
 1084+ $data['_element'] = $itemElem;
 1085+ $data[] = $params;
 1086+ } else {
 1087+ if( !empty($data) && (array_key_exists( $itemElem, $data ) || array_key_exists( '_element', $data ))) {
 1088+ die("Internal error: multiple calls to addPageSubElement($itemElem)");
 1089+ }
 1090+ $data[$itemElem] = $params;
 1091+ }
11061092 }
11071093
11081094 function prepareTimestamp( $value ) {

Status & tagging log