r39467 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r39466‎ | r39467 | r39468 >
Date:10:13, 16 August 2008
Author:krimpet
Status:old
Tags:
Comment:
Fix for problems with r39414; LinkHolderArray::replaceInterwiki() was badly broken
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/LinkCache.php (modified) (history)
  • /trunk/phase3/includes/MessageCache.php (modified) (history)
  • /trunk/phase3/includes/StringUtils.php (modified) (history)
  • /trunk/phase3/includes/Title.php (modified) (history)
  • /trunk/phase3/includes/parser/LinkHolderArray.php (added) (history)
  • /trunk/phase3/includes/parser/LinkHolderArray.php (added) (history)
  • /trunk/phase3/includes/parser/Parser.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/parser/LinkHolderArray.php
@@ -0,0 +1,406 @@
 2+<?php
 3+
 4+class LinkHolderArray {
 5+ var $batchSize = 1000;
 6+
 7+ var $internals = array(), $interwikis = array();
 8+ var $size = 0;
 9+ var $parent;
 10+
 11+ function __construct( $parent ) {
 12+ $this->parent = $parent;
 13+ }
 14+
 15+ /**
 16+ * Merge another LinkHolderArray into this one
 17+ */
 18+ function merge( $other ) {
 19+ foreach ( $other->internals as $ns => $entries ) {
 20+ $this->size += count( $entries );
 21+ if ( !isset( $this->internals[$ns] ) ) {
 22+ $this->internals[$ns] = $entries;
 23+ } else {
 24+ $this->internals[$ns] += $entries;
 25+ }
 26+ }
 27+ $this->interwikis += $other->interwikis;
 28+ }
 29+
 30+ /**
 31+ * Returns true if the memory requirements of this object are getting large
 32+ */
 33+ function isBig() {
 34+ return $this->size > $this->batchSize;
 35+ }
 36+
 37+ /**
 38+ * Clear all stored link holders.
 39+ * Make sure you don't have any text left using these link holders, before you call this
 40+ */
 41+ function clear() {
 42+ $this->internals = array();
 43+ $this->interwikis = array();
 44+ $this->size = 0;
 45+ }
 46+
 47+ /**
 48+ * Make a link placeholder. The text returned can be later resolved to a real link with
 49+ * replaceLinkHolders(). This is done for two reasons: firstly to avoid further
 50+ * parsing of interwiki links, and secondly to allow all existence checks and
 51+ * article length checks (for stub links) to be bundled into a single query.
 52+ *
 53+ */
 54+ function makeHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
 55+ wfProfileIn( __METHOD__ );
 56+ if ( ! is_object($nt) ) {
 57+ # Fail gracefully
 58+ $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
 59+ } else {
 60+ # Separate the link trail from the rest of the link
 61+ list( $inside, $trail ) = Linker::splitTrail( $trail );
 62+
 63+ $entry = array(
 64+ 'title' => $nt,
 65+ 'text' => $prefix.$text.$inside,
 66+ 'pdbk' => $nt->getPrefixedDBkey(),
 67+ );
 68+ if ( $query !== '' ) {
 69+ $entry['query'] = $query;
 70+ }
 71+
 72+ if ( $nt->isExternal() ) {
 73+ // Use a globally unique ID to keep the objects mergable
 74+ $key = $this->parent->nextLinkID();
 75+ $this->interwikis[$key] = $entry;
 76+ $retVal = "<!--IWLINK $key-->{$trail}";
 77+ } else {
 78+ $key = $this->parent->nextLinkID();
 79+ $ns = $nt->getNamespace();
 80+ $this->internals[$ns][$key] = $entry;
 81+ $retVal = "<!--LINK $ns:$key-->{$trail}";
 82+ }
 83+ $this->size++;
 84+ }
 85+ wfProfileOut( __METHOD__ );
 86+ return $retVal;
 87+ }
 88+
 89+ /**
 90+ * Replace <!--LINK--> link placeholders with actual links, in the buffer
 91+ * Placeholders created in Skin::makeLinkObj()
 92+ * Returns an array of link CSS classes, indexed by PDBK.
 93+ */
 94+ function replace( &$text ) {
 95+ wfProfileIn( __METHOD__ );
 96+
 97+ $colours = $this->replaceInternal( $text );
 98+ $this->replaceInterwiki( $text );
 99+
 100+ wfProfileOut( __METHOD__ );
 101+ return $colours;
 102+ }
 103+
 104+ /**
 105+ * Replace internal links
 106+ */
 107+ protected function replaceInternal( &$text ) {
 108+ if ( !$this->internals ) {
 109+ return;
 110+ }
 111+
 112+ wfProfileIn( __METHOD__ );
 113+ global $wgUser, $wgContLang;
 114+
 115+ $pdbks = array();
 116+ $colours = array();
 117+ $linkcolour_ids = array();
 118+ $sk = $this->parent->getOptions()->getSkin();
 119+ $linkCache = LinkCache::singleton();
 120+ $output = $this->parent->getOutput();
 121+
 122+ wfProfileIn( __METHOD__.'-check' );
 123+ $dbr = wfGetDB( DB_SLAVE );
 124+ $page = $dbr->tableName( 'page' );
 125+ $threshold = $wgUser->getOption('stubthreshold');
 126+
 127+ # Sort by namespace
 128+ ksort( $this->internals );
 129+
 130+ # Generate query
 131+ $query = false;
 132+ $current = null;
 133+ foreach ( $this->internals as $ns => $entries ) {
 134+ foreach ( $entries as $index => $entry ) {
 135+ $key = "$ns:$index";
 136+ $title = $entry['title'];
 137+ $pdbk = $entry['pdbk'];
 138+
 139+ # Skip invalid entries.
 140+ # Result will be ugly, but prevents crash.
 141+ if ( is_null( $title ) ) {
 142+ continue;
 143+ }
 144+
 145+ # Check if it's a static known link, e.g. interwiki
 146+ if ( $title->isAlwaysKnown() ) {
 147+ $colours[$pdbk] = '';
 148+ } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
 149+ $colours[$pdbk] = '';
 150+ $output->addLink( $title, $id );
 151+ } elseif ( $linkCache->isBadLink( $pdbk ) ) {
 152+ $colours[$pdbk] = 'new';
 153+ } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) {
 154+ $colours[$pdbk] = 'new';
 155+ } else {
 156+ # Not in the link cache, add it to the query
 157+ if ( !isset( $current ) ) {
 158+ $current = $ns;
 159+ $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
 160+ $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN(";
 161+ } elseif ( $current != $ns ) {
 162+ $current = $ns;
 163+ $query .= ")) OR (page_namespace=$ns AND page_title IN(";
 164+ } else {
 165+ $query .= ', ';
 166+ }
 167+
 168+ $query .= $dbr->addQuotes( $title->getDBkey() );
 169+ }
 170+ }
 171+ }
 172+ if ( $query ) {
 173+ $query .= '))';
 174+
 175+ $res = $dbr->query( $query, __METHOD__ );
 176+
 177+ # Fetch data and form into an associative array
 178+ # non-existent = broken
 179+ while ( $s = $dbr->fetchObject($res) ) {
 180+ $title = Title::makeTitle( $s->page_namespace, $s->page_title );
 181+ $pdbk = $title->getPrefixedDBkey();
 182+ $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect );
 183+ $output->addLink( $title, $s->page_id );
 184+ $colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
 185+ //add id to the extension todolist
 186+ $linkcolour_ids[$s->page_id] = $pdbk;
 187+ }
 188+ unset( $res );
 189+ //pass an array of page_ids to an extension
 190+ wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
 191+ }
 192+ wfProfileOut( __METHOD__.'-check' );
 193+
 194+ # Do a second query for different language variants of links and categories
 195+ if($wgContLang->hasVariants()){
 196+ $linkBatch = new LinkBatch();
 197+ $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
 198+ $categoryMap = array(); // maps $category_variant => $category (dbkeys)
 199+ $varCategories = array(); // category replacements oldDBkey => newDBkey
 200+
 201+ $categories = $output->getCategoryLinks();
 202+
 203+ // Add variants of links to link batch
 204+ foreach ( $this->internals as $ns => $entries ) {
 205+ foreach ( $entries as $index => $entry ) {
 206+ $key = "$ns:$index";
 207+ $pdbk = $entry['pdbk'];
 208+ $title = $entry['title'];
 209+ $titleText = $title->getText();
 210+
 211+ // generate all variants of the link title text
 212+ $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText);
 213+
 214+ // if link was not found (in first query), add all variants to query
 215+ if ( !isset($colours[$pdbk]) ){
 216+ foreach($allTextVariants as $textVariant){
 217+ if($textVariant != $titleText){
 218+ $variantTitle = Title::makeTitle( $ns, $textVariant );
 219+ if(is_null($variantTitle)) continue;
 220+ $linkBatch->addObj( $variantTitle );
 221+ $variantMap[$variantTitle->getPrefixedDBkey()][] = $key;
 222+ }
 223+ }
 224+ }
 225+ }
 226+ }
 227+
 228+ // process categories, check if a category exists in some variant
 229+ foreach( $categories as $category ){
 230+ $variants = $wgContLang->convertLinkToAllVariants($category);
 231+ foreach($variants as $variant){
 232+ if($variant != $category){
 233+ $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) );
 234+ if(is_null($variantTitle)) continue;
 235+ $linkBatch->addObj( $variantTitle );
 236+ $categoryMap[$variant] = $category;
 237+ }
 238+ }
 239+ }
 240+
 241+
 242+ if(!$linkBatch->isEmpty()){
 243+ // construct query
 244+ $titleClause = $linkBatch->constructSet('page', $dbr);
 245+
 246+ $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
 247+
 248+ $variantQuery .= " FROM $page WHERE $titleClause";
 249+
 250+ $varRes = $dbr->query( $variantQuery, __METHOD__ );
 251+
 252+ // for each found variants, figure out link holders and replace
 253+ while ( $s = $dbr->fetchObject($varRes) ) {
 254+
 255+ $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
 256+ $varPdbk = $variantTitle->getPrefixedDBkey();
 257+ $vardbk = $variantTitle->getDBkey();
 258+
 259+ $holderKeys = array();
 260+ if(isset($variantMap[$varPdbk])){
 261+ $holderKeys = $variantMap[$varPdbk];
 262+ $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect );
 263+ $output->addLink( $variantTitle, $s->page_id );
 264+ }
 265+
 266+ // loop over link holders
 267+ foreach($holderKeys as $key){
 268+ list( $ns, $index ) = explode( ':', $key, 2 );
 269+ $entry =& $this->internals[$ns][$index];
 270+ $pdbk = $entry['pdbk'];
 271+
 272+ if(!isset($colours[$pdbk])){
 273+ // found link in some of the variants, replace the link holder data
 274+ $entry['title'] = $variantTitle;
 275+ $entry['pdbk'] = $varPdbk;
 276+
 277+ // set pdbk and colour
 278+ $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold );
 279+ $linkcolour_ids[$s->page_id] = $pdbk;
 280+ }
 281+ wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
 282+ }
 283+
 284+ // check if the object is a variant of a category
 285+ if(isset($categoryMap[$vardbk])){
 286+ $oldkey = $categoryMap[$vardbk];
 287+ if($oldkey != $vardbk)
 288+ $varCategories[$oldkey]=$vardbk;
 289+ }
 290+ }
 291+
 292+ // rebuild the categories in original order (if there are replacements)
 293+ if(count($varCategories)>0){
 294+ $newCats = array();
 295+ $originalCats = $output->getCategories();
 296+ foreach($originalCats as $cat => $sortkey){
 297+ // make the replacement
 298+ if( array_key_exists($cat,$varCategories) )
 299+ $newCats[$varCategories[$cat]] = $sortkey;
 300+ else $newCats[$cat] = $sortkey;
 301+ }
 302+ $this->mOutput->parent->setCategoryLinks($newCats);
 303+ }
 304+ }
 305+ }
 306+
 307+ # Construct search and replace arrays
 308+ wfProfileIn( __METHOD__.'-construct' );
 309+ $replacePairs = array();
 310+ foreach ( $this->internals as $ns => $entries ) {
 311+ foreach ( $entries as $index => $entry ) {
 312+ $pdbk = $entry['pdbk'];
 313+ $title = $entry['title'];
 314+ $query = isset( $entry['query'] ) ? $entry['query'] : '';
 315+ $key = "$ns:$index";
 316+ $searchkey = "<!--LINK $key-->";
 317+ if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) {
 318+ $linkCache->addBadLinkObj( $title );
 319+ $colours[$pdbk] = 'new';
 320+ $output->addLink( $title, 0 );
 321+ $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title,
 322+ $entry['text'],
 323+ $query );
 324+ } else {
 325+ $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk],
 326+ $entry['text'],
 327+ $query );
 328+ }
 329+ }
 330+ }
 331+ $replacer = new HashtableReplacer( $replacePairs, 1 );
 332+ wfProfileOut( __METHOD__.'-construct' );
 333+
 334+ # Do the thing
 335+ wfProfileIn( __METHOD__.'-replace' );
 336+ $text = preg_replace_callback(
 337+ '/(<!--LINK .*?-->)/',
 338+ $replacer->cb(),
 339+ $text);
 340+
 341+ wfProfileOut( __METHOD__.'-replace' );
 342+ wfProfileOut( __METHOD__ );
 343+ }
 344+
 345+ /**
 346+ * Replace interwiki links
 347+ */
 348+ protected function replaceInterwiki( &$text ) {
 349+ if ( empty( $this->interwikis ) ) {
 350+ return;
 351+ }
 352+
 353+ wfProfileIn( __METHOD__ );
 354+ # Make interwiki link HTML
 355+ $sk = $this->parent->getOptions()->getSkin();
 356+ $replacePairs = array();
 357+ foreach( $this->interwikis as $key => $link ) {
 358+ $replacePairs[$key] = $sk->link( $link['title'], $link['text'] );
 359+ }
 360+ $replacer = new HashtableReplacer( $replacePairs, 1 );
 361+
 362+ $text = preg_replace_callback(
 363+ '/<!--IWLINK (.*?)-->/',
 364+ $replacer->cb(),
 365+ $text );
 366+ wfProfileOut( __METHOD__ );
 367+ }
 368+
 369+ /**
 370+ * Replace <!--LINK--> link placeholders with plain text of links
 371+ * (not HTML-formatted).
 372+ * @param string $text
 373+ * @return string
 374+ */
 375+ function replaceText( $text ) {
 376+ wfProfileIn( __METHOD__ );
 377+
 378+ $text = preg_replace_callback(
 379+ '/<!--(LINK|IWLINK) (.*?)-->/',
 380+ array( &$this, 'replaceTextCallback' ),
 381+ $text );
 382+
 383+ wfProfileOut( __METHOD__ );
 384+ return $text;
 385+ }
 386+
 387+ /**
 388+ * @param array $matches
 389+ * @return string
 390+ * @private
 391+ */
 392+ function replaceTextCallback( $matches ) {
 393+ $type = $matches[1];
 394+ $key = $matches[2];
 395+ if( $type == 'LINK' ) {
 396+ list( $ns, $index ) = explode( ':', $key, 2 );
 397+ if( isset( $this->internals[$ns][$index]['text'] ) ) {
 398+ return $this->internals[$ns][$index]['text'];
 399+ }
 400+ } elseif( $type == 'IWLINK' ) {
 401+ if( isset( $this->interwikis[$key]['text'] ) ) {
 402+ return $this->interwikis[$key]['text'];
 403+ }
 404+ }
 405+ return $matches[0];
 406+ }
 407+}
Property changes on: trunk/phase3/includes/parser/LinkHolderArray.php
___________________________________________________________________
Added: svn:eol-style
1408 + native
Index: trunk/phase3/includes/parser/Parser.php
@@ -98,7 +98,7 @@
9999 # Cleared with clearState():
100100 var $mOutput, $mAutonumber, $mDTopen, $mStripState;
101101 var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
102 - var $mInterwikiLinkHolders, $mLinkHolders;
 102+ var $mLinkHolders, $mLinkID;
103103 var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
104104 var $mTplExpandCache; // empty-frame expansion cache
105105 var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
@@ -179,17 +179,8 @@
180180 $this->mStripState = new StripState;
181181 $this->mArgStack = false;
182182 $this->mInPre = false;
183 - $this->mInterwikiLinkHolders = array(
184 - 'texts' => array(),
185 - 'titles' => array()
186 - );
187 - $this->mLinkHolders = array(
188 - 'namespaces' => array(),
189 - 'dbkeys' => array(),
190 - 'queries' => array(),
191 - 'texts' => array(),
192 - 'titles' => array()
193 - );
 183+ $this->mLinkHolders = new LinkHolderArray( $this );
 184+ $this->mLinkID = 0;
194185 $this->mRevisionTimestamp = $this->mRevisionId = null;
195186
196187 /**
@@ -204,7 +195,7 @@
205196 */
206197 #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
207198 # Changed to \x7f to allow XML double-parsing -- TS
208 - $this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString();
 199+ $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
209200
210201
211202 # Clear these on every parse, bug 4549
@@ -294,7 +285,7 @@
295286 */
296287
297288 global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang;
298 - $fname = 'Parser::parse-' . wfGetCaller();
 289+ $fname = __METHOD__.'-' . wfGetCaller();
299290 wfProfileIn( __METHOD__ );
300291 wfProfileIn( $fname );
301292
@@ -328,7 +319,6 @@
329320 );
330321 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
331322
332 - # only once and last
333323 $text = $this->doBlockLevels( $text, $linestart );
334324
335325 $this->replaceLinkHolders( $text );
@@ -348,7 +338,7 @@
349339 $uniq_prefix = $this->mUniqPrefix;
350340 $matches = array();
351341 $elements = array_keys( $this->mTransparentTagHooks );
352 - $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
 342+ $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
353343
354344 foreach( $matches as $marker => $data ) {
355345 list( $element, $content, $params, $tag ) = $data;
@@ -366,7 +356,7 @@
367357 $text = Sanitizer::normalizeCharReferences( $text );
368358
369359 if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) {
370 - $text = Parser::tidy($text);
 360+ $text = self::tidy($text);
371361 } else {
372362 # attempt to sanitize at least some nesting problems
373363 # (bug #2702 and quite a few others)
@@ -471,6 +461,8 @@
472462 function &getTitle() { return $this->mTitle; }
473463 function getOptions() { return $this->mOptions; }
474464 function getRevisionId() { return $this->mRevisionId; }
 465+ function getOutput() { return $this->mOutput; }
 466+ function nextLinkID() { return $this->mLinkID++; }
475467
476468 function getFunctionLang() {
477469 global $wgLang, $wgContLang;
@@ -658,9 +650,9 @@
659651 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
660652 '<head><title>test</title></head><body>'.$text.'</body></html>';
661653 if( $wgTidyInternal ) {
662 - $correctedtext = Parser::internalTidy( $wrappedtext );
 654+ $correctedtext = self::internalTidy( $wrappedtext );
663655 } else {
664 - $correctedtext = Parser::externalTidy( $wrappedtext );
 656+ $correctedtext = self::externalTidy( $wrappedtext );
665657 }
666658 if( is_null( $correctedtext ) ) {
667659 wfDebug( "Tidy error detected!\n" );
@@ -677,8 +669,7 @@
678670 */
679671 function externalTidy( $text ) {
680672 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
681 - $fname = 'Parser::externalTidy';
682 - wfProfileIn( $fname );
 673+ wfProfileIn( __METHOD__ );
683674
684675 $cleansource = '';
685676 $opts = ' -utf8';
@@ -707,7 +698,7 @@
708699 }
709700 }
710701
711 - wfProfileOut( $fname );
 702+ wfProfileOut( __METHOD__ );
712703
713704 if( $cleansource == '' && $text != '') {
714705 // Some kind of error happened, so we couldn't get the corrected text.
@@ -729,8 +720,7 @@
730721 */
731722 function internalTidy( $text ) {
732723 global $wgTidyConf, $IP, $wgDebugTidy;
733 - $fname = 'Parser::internalTidy';
734 - wfProfileIn( $fname );
 724+ wfProfileIn( __METHOD__ );
735725
736726 $tidy = new tidy;
737727 $tidy->parseString( $text, $wgTidyConf, 'utf8' );
@@ -748,7 +738,7 @@
749739 "\n-->";
750740 }
751741
752 - wfProfileOut( $fname );
 742+ wfProfileOut( __METHOD__ );
753743 return $cleansource;
754744 }
755745
@@ -758,34 +748,35 @@
759749 * @private
760750 */
761751 function doTableStuff ( $text ) {
762 - $fname = 'Parser::doTableStuff';
763 - wfProfileIn( $fname );
 752+ wfProfileIn( __METHOD__ );
764753
765 - $lines = explode ( "\n" , $text );
 754+ $lines = StringUtils::explode( "\n", $text );
 755+ $out = '';
766756 $td_history = array (); // Is currently a td tag open?
767757 $last_tag_history = array (); // Save history of last lag activated (td, th or caption)
768758 $tr_history = array (); // Is currently a tr tag open?
769759 $tr_attributes = array (); // history of tr attributes
770760 $has_opened_tr = array(); // Did this table open a <tr> element?
771761 $indent_level = 0; // indent level of the table
772 - foreach ( $lines as $key => $line )
773 - {
774 - $line = trim ( $line );
775762
 763+ foreach ( $lines as $outLine ) {
 764+ $line = trim( $outLine );
 765+
776766 if( $line == '' ) { // empty line, go to next line
 767+ $out .= "\n";
777768 continue;
778769 }
779 - $first_character = $line{0};
 770+ $first_character = $line[0];
780771 $matches = array();
781772
782 - if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) {
 773+ if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) {
783774 // First check if we are starting a new table
784775 $indent_level = strlen( $matches[1] );
785776
786777 $attributes = $this->mStripState->unstripBoth( $matches[2] );
787778 $attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' );
788779
789 - $lines[$key] = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
 780+ $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
790781 array_push ( $td_history , false );
791782 array_push ( $last_tag_history , '' );
792783 array_push ( $tr_history , false );
@@ -793,6 +784,7 @@
794785 array_push ( $has_opened_tr , false );
795786 } else if ( count ( $td_history ) == 0 ) {
796787 // Don't do any of the following
 788+ $out .= $outLine."\n";
797789 continue;
798790 } else if ( substr ( $line , 0 , 2 ) == '|}' ) {
799791 // We are ending a table
@@ -811,7 +803,7 @@
812804 $line = "</{$last_tag}>{$line}";
813805 }
814806 array_pop ( $tr_attributes );
815 - $lines[$key] = $line . str_repeat( '</dd></dl>' , $indent_level );
 807+ $outLine = $line . str_repeat( '</dd></dl>' , $indent_level );
816808 } else if ( substr ( $line , 0 , 2 ) == '|-' ) {
817809 // Now we have a table row
818810 $line = preg_replace( '#^\|-+#', '', $line );
@@ -835,7 +827,7 @@
836828 $line = "</{$last_tag}>{$line}";
837829 }
838830
839 - $lines[$key] = $line;
 831+ $outLine = $line;
840832 array_push ( $tr_history , false );
841833 array_push ( $td_history , false );
842834 array_push ( $last_tag_history , '' );
@@ -859,7 +851,7 @@
860852 // attribute values containing literal "||".
861853 $cells = StringUtils::explodeMarkup( '||' , $line );
862854
863 - $lines[$key] = '';
 855+ $outLine = '';
864856
865857 // Loop through each table cell
866858 foreach ( $cells as $cell )
@@ -910,38 +902,42 @@
911903 $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
912904 }
913905
914 - $lines[$key] .= $cell;
 906+ $outLine .= $cell;
915907 array_push ( $td_history , true );
916908 }
917909 }
 910+ $out .= $outLine . "\n";
918911 }
919912
920913 // Closing open td, tr && table
921914 while ( count ( $td_history ) > 0 )
922915 {
923916 if ( array_pop ( $td_history ) ) {
924 - $lines[] = '</td>' ;
 917+ $out .= "</td>\n";
925918 }
926919 if ( array_pop ( $tr_history ) ) {
927 - $lines[] = '</tr>' ;
 920+ $out .= "</tr>\n";
928921 }
929922 if ( !array_pop ( $has_opened_tr ) ) {
930 - $lines[] = "<tr><td></td></tr>" ;
 923+ $out .= "<tr><td></td></tr>\n" ;
931924 }
932925
933 - $lines[] = '</table>' ;
 926+ $out .= "</table>\n";
934927 }
935928
936 - $output = implode ( "\n" , $lines ) ;
 929+ // Remove trailing line-ending (b/c)
 930+ if ( substr( $out, -1 ) == "\n" ) {
 931+ $out = substr( $out, 0, -1 );
 932+ }
937933
938934 // special case: don't return empty table
939 - if( $output == "<table>\n<tr><td></td></tr>\n</table>" ) {
940 - $output = '';
 935+ if( $out == "<table>\n<tr><td></td></tr>\n</table>" ) {
 936+ $out = '';
941937 }
942938
943 - wfProfileOut( $fname );
 939+ wfProfileOut( __METHOD__ );
944940
945 - return $output;
 941+ return $out;
946942 }
947943
948944 /**
@@ -952,12 +948,11 @@
953949 */
954950 function internalParse( $text ) {
955951 $isMain = true;
956 - $fname = 'Parser::internalParse';
957 - wfProfileIn( $fname );
 952+ wfProfileIn( __METHOD__ );
958953
959954 # Hook to suspend the parser in this state
960955 if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
961 - wfProfileOut( $fname );
 956+ wfProfileOut( __METHOD__ );
962957 return $text ;
963958 }
964959
@@ -990,7 +985,7 @@
991986 $text = $this->doMagicLinks( $text );
992987 $text = $this->formatHeadings( $text, $isMain );
993988
994 - wfProfileOut( $fname );
 989+ wfProfileOut( __METHOD__ );
995990 return $text;
996991 }
997992
@@ -1060,14 +1055,13 @@
10611056 * @private
10621057 */
10631058 function doHeadings( $text ) {
1064 - $fname = 'Parser::doHeadings';
1065 - wfProfileIn( $fname );
 1059+ wfProfileIn( __METHOD__ );
10661060 for ( $i = 6; $i >= 1; --$i ) {
10671061 $h = str_repeat( '=', $i );
10681062 $text = preg_replace( "/^$h(.+)$h\\s*$/m",
10691063 "<h$i>\\1</h$i>", $text );
10701064 }
1071 - wfProfileOut( $fname );
 1065+ wfProfileOut( __METHOD__ );
10721066 return $text;
10731067 }
10741068
@@ -1077,15 +1071,14 @@
10781072 * @return string the altered text
10791073 */
10801074 function doAllQuotes( $text ) {
1081 - $fname = 'Parser::doAllQuotes';
1082 - wfProfileIn( $fname );
 1075+ wfProfileIn( __METHOD__ );
10831076 $outtext = '';
1084 - $lines = explode( "\n", $text );
 1077+ $lines = StringUtils::explode( "\n", $text );
10851078 foreach ( $lines as $line ) {
1086 - $outtext .= $this->doQuotes ( $line ) . "\n";
 1079+ $outtext .= $this->doQuotes( $line ) . "\n";
10871080 }
10881081 $outtext = substr($outtext, 0,-1);
1089 - wfProfileOut( $fname );
 1082+ wfProfileOut( __METHOD__ );
10901083 return $outtext;
10911084 }
10921085
@@ -1264,8 +1257,7 @@
12651258 */
12661259 function replaceExternalLinks( $text ) {
12671260 global $wgContLang;
1268 - $fname = 'Parser::replaceExternalLinks';
1269 - wfProfileIn( $fname );
 1261+ wfProfileIn( __METHOD__ );
12701262
12711263 $sk = $this->mOptions->getSkin();
12721264
@@ -1335,11 +1327,11 @@
13361328 # Register link in the output object.
13371329 # Replace unnecessary URL escape codes with the referenced character
13381330 # This prevents spammers from hiding links from the filters
1339 - $pasteurized = Parser::replaceUnusualEscapes( $url );
 1331+ $pasteurized = self::replaceUnusualEscapes( $url );
13401332 $this->mOutput->addExternalLink( $pasteurized );
13411333 }
13421334
1343 - wfProfileOut( $fname );
 1335+ wfProfileOut( __METHOD__ );
13441336 return $s;
13451337 }
13461338
@@ -1349,8 +1341,7 @@
13501342 */
13511343 function replaceFreeExternalLinks( $text ) {
13521344 global $wgContLang;
1353 - $fname = 'Parser::replaceFreeExternalLinks';
1354 - wfProfileIn( $fname );
 1345+ wfProfileIn( __METHOD__ );
13551346
13561347 $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
13571348 $s = array_shift( $bits );
@@ -1412,7 +1403,7 @@
14131404 $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() );
14141405 # Register it in the output object...
14151406 # Replace unnecessary URL escape codes with their equivalent characters
1416 - $pasteurized = Parser::replaceUnusualEscapes( $url );
 1407+ $pasteurized = self::replaceUnusualEscapes( $url );
14171408 $this->mOutput->addExternalLink( $pasteurized );
14181409 }
14191410 $s .= $text . $trail;
@@ -1420,7 +1411,7 @@
14211412 $s .= $protocol . $remainder;
14221413 }
14231414 }
1424 - wfProfileOut( $fname );
 1415+ wfProfileOut( __METHOD__ );
14251416 return $s;
14261417 }
14271418
@@ -1436,7 +1427,7 @@
14371428 */
14381429 static function replaceUnusualEscapes( $url ) {
14391430 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
1440 - array( 'Parser', 'replaceUnusualEscapesCallback' ), $url );
 1431+ array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
14411432 }
14421433
14431434 /**
@@ -1480,35 +1471,48 @@
14811472
14821473 /**
14831474 * Process [[ ]] wikilinks
 1475+ * @return processed text
14841476 *
14851477 * @private
14861478 */
14871479 function replaceInternalLinks( $s ) {
 1480+ $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
 1481+ return $s;
 1482+ }
 1483+
 1484+ /**
 1485+ * Process [[ ]] wikilinks
 1486+ * @return LinkHolderArray
 1487+ *
 1488+ * @private
 1489+ */
 1490+ function replaceInternalLinks2( &$s ) {
14881491 global $wgContLang;
1489 - static $fname = 'Parser::replaceInternalLinks' ;
14901492
1491 - wfProfileIn( $fname );
 1493+ wfProfileIn( __METHOD__ );
14921494
1493 - wfProfileIn( $fname.'-setup' );
1494 - static $tc = FALSE;
 1495+ wfProfileIn( __METHOD__.'-setup' );
 1496+ static $tc = FALSE, $e1, $e1_img;
14951497 # the % is needed to support urlencoded titles as well
1496 - if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
 1498+ if ( !$tc ) {
 1499+ $tc = Title::legalChars() . '#%';
 1500+ # Match a link having the form [[namespace:link|alternate]]trail
 1501+ $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
 1502+ # Match cases where there is no "]]", which might still be images
 1503+ $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
 1504+ }
14971505
14981506 $sk = $this->mOptions->getSkin();
 1507+ $holders = new LinkHolderArray( $this );
14991508
15001509 #split the entire text string on occurences of [[
1501 - $a = explode( '[[', ' ' . $s );
 1510+ $a = StringUtils::explode( '[[', ' ' . $s );
15021511 #get the first element (all text up to first [[), and remove the space we added
1503 - $s = array_shift( $a );
 1512+ $s = $a->current();
 1513+ $a->next();
 1514+ $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
15041515 $s = substr( $s, 1 );
15051516
1506 - # Match a link having the form [[namespace:link|alternate]]trail
1507 - static $e1 = FALSE;
1508 - if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; }
1509 - # Match cases where there is no "]]", which might still be images
1510 - static $e1_img = FALSE;
1511 - if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; }
1512 -
15131517 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
15141518 $e2 = null;
15151519 if ( $useLinkPrefixExtension ) {
@@ -1518,8 +1522,8 @@
15191523 }
15201524
15211525 if( is_null( $this->mTitle ) ) {
1522 - wfProfileOut( $fname );
1523 - wfProfileOut( $fname.'-setup' );
 1526+ wfProfileOut( __METHOD__ );
 1527+ wfProfileOut( __METHOD__.'-setup' );
15241528 throw new MWException( __METHOD__.": \$this->mTitle is null\n" );
15251529 }
15261530 $nottalk = !$this->mTitle->isTalkPage();
@@ -1541,13 +1545,20 @@
15421546 $selflink = array($this->mTitle->getPrefixedText());
15431547 }
15441548 $useSubpages = $this->areSubpagesAllowed();
1545 - wfProfileOut( $fname.'-setup' );
 1549+ wfProfileOut( __METHOD__.'-setup' );
15461550
15471551 # Loop for each link
1548 - for ($k = 0; isset( $a[$k] ); $k++) {
1549 - $line = $a[$k];
 1552+ for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) {
 1553+ # Check for excessive memory usage
 1554+ if ( $holders->isBig() ) {
 1555+ # Too big
 1556+ # Do the existence check, replace the link holders and clear the array
 1557+ $holders->replace( $s );
 1558+ $holders->clear();
 1559+ }
 1560+
15501561 if ( $useLinkPrefixExtension ) {
1551 - wfProfileIn( $fname.'-prefixhandling' );
 1562+ wfProfileIn( __METHOD__.'-prefixhandling' );
15521563 if ( preg_match( $e2, $s, $m ) ) {
15531564 $prefix = $m[2];
15541565 $s = $m[1];
@@ -1559,12 +1570,12 @@
15601571 $prefix = $first_prefix;
15611572 $first_prefix = false;
15621573 }
1563 - wfProfileOut( $fname.'-prefixhandling' );
 1574+ wfProfileOut( __METHOD__.'-prefixhandling' );
15641575 }
15651576
15661577 $might_be_img = false;
15671578
1568 - wfProfileIn( "$fname-e1" );
 1579+ wfProfileIn( __METHOD__."-e1" );
15691580 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
15701581 $text = $m[2];
15711582 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
@@ -1598,18 +1609,18 @@
15991610 $trail = "";
16001611 } else { # Invalid form; output directly
16011612 $s .= $prefix . '[[' . $line ;
1602 - wfProfileOut( "$fname-e1" );
 1613+ wfProfileOut( __METHOD__."-e1" );
16031614 continue;
16041615 }
1605 - wfProfileOut( "$fname-e1" );
1606 - wfProfileIn( "$fname-misc" );
 1616+ wfProfileOut( __METHOD__."-e1" );
 1617+ wfProfileIn( __METHOD__."-misc" );
16071618
16081619 # Don't allow internal links to pages containing
16091620 # PROTO: where PROTO is a valid URL protocol; these
16101621 # should be external links.
16111622 if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) {
16121623 $s .= $prefix . '[[' . $line ;
1613 - wfProfileOut( "$fname-misc" );
 1624+ wfProfileOut( __METHOD__."-misc" );
16141625 continue;
16151626 }
16161627
@@ -1626,27 +1637,30 @@
16271638 $link = substr($link, 1);
16281639 }
16291640
1630 - wfProfileOut( "$fname-misc" );
1631 - wfProfileIn( "$fname-title" );
 1641+ wfProfileOut( __METHOD__."-misc" );
 1642+ wfProfileIn( __METHOD__."-title" );
16321643 $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) );
16331644 if( !$nt ) {
16341645 $s .= $prefix . '[[' . $line;
1635 - wfProfileOut( "$fname-title" );
 1646+ wfProfileOut( __METHOD__."-title" );
16361647 continue;
16371648 }
16381649
16391650 $ns = $nt->getNamespace();
16401651 $iw = $nt->getInterWiki();
1641 - wfProfileOut( "$fname-title" );
 1652+ wfProfileOut( __METHOD__."-title" );
16421653
16431654 if ($might_be_img) { # if this is actually an invalid link
1644 - wfProfileIn( "$fname-might_be_img" );
 1655+ wfProfileIn( __METHOD__."-might_be_img" );
16451656 if ($ns == NS_IMAGE && $noforce) { #but might be an image
16461657 $found = false;
1647 - while (isset ($a[$k+1]) ) {
 1658+ while ( true ) {
16481659 #look at the next 'line' to see if we can close it there
1649 - $spliced = array_splice( $a, $k + 1, 1 );
1650 - $next_line = array_shift( $spliced );
 1660+ $a->next();
 1661+ $next_line = $a->current();
 1662+ if ( $next_line === false || $next_line === null ) {
 1663+ break;
 1664+ }
16511665 $m = explode( ']]', $next_line, 3 );
16521666 if ( count( $m ) == 3 ) {
16531667 # the first ]] closes the inner link, the second the image
@@ -1666,19 +1680,19 @@
16671681 if ( !$found ) {
16681682 # we couldn't find the end of this imageLink, so output it raw
16691683 #but don't ignore what might be perfectly normal links in the text we've examined
1670 - $text = $this->replaceInternalLinks($text);
 1684+ $holders->merge( $this->replaceInternalLinks2( $text ) );
16711685 $s .= "{$prefix}[[$link|$text";
16721686 # note: no $trail, because without an end, there *is* no trail
1673 - wfProfileOut( "$fname-might_be_img" );
 1687+ wfProfileOut( __METHOD__."-might_be_img" );
16741688 continue;
16751689 }
16761690 } else { #it's not an image, so output it raw
16771691 $s .= "{$prefix}[[$link|$text";
16781692 # note: no $trail, because without an end, there *is* no trail
1679 - wfProfileOut( "$fname-might_be_img" );
 1693+ wfProfileOut( __METHOD__."-might_be_img" );
16801694 continue;
16811695 }
1682 - wfProfileOut( "$fname-might_be_img" );
 1696+ wfProfileOut( __METHOD__."-might_be_img" );
16831697 }
16841698
16851699 $wasblank = ( '' == $text );
@@ -1688,41 +1702,38 @@
16891703 if( $noforce ) {
16901704
16911705 # Interwikis
1692 - wfProfileIn( "$fname-interwiki" );
 1706+ wfProfileIn( __METHOD__."-interwiki" );
16931707 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
16941708 $this->mOutput->addLanguageLink( $nt->getFullText() );
16951709 $s = rtrim($s . $prefix);
16961710 $s .= trim($trail, "\n") == '' ? '': $prefix . $trail;
1697 - wfProfileOut( "$fname-interwiki" );
 1711+ wfProfileOut( __METHOD__."-interwiki" );
16981712 continue;
16991713 }
1700 - wfProfileOut( "$fname-interwiki" );
 1714+ wfProfileOut( __METHOD__."-interwiki" );
17011715
17021716 if ( $ns == NS_IMAGE ) {
1703 - wfProfileIn( "$fname-image" );
 1717+ wfProfileIn( __METHOD__."-image" );
17041718 if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
17051719 # recursively parse links inside the image caption
17061720 # actually, this will parse them in any other parameters, too,
17071721 # but it might be hard to fix that, and it doesn't matter ATM
17081722 $text = $this->replaceExternalLinks($text);
1709 - $text = $this->replaceInternalLinks($text);
 1723+ $holders->merge( $this->replaceInternalLinks2( $text ) );
17101724
17111725 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
1712 - $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail;
1713 - $this->mOutput->addImage( $nt->getDBkey() );
 1726+ $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail;
17141727
1715 - wfProfileOut( "$fname-image" );
 1728+ wfProfileOut( __METHOD__."-image" );
17161729 continue;
1717 - } else {
1718 - # We still need to record the image's presence on the page
1719 - $this->mOutput->addImage( $nt->getDBkey() );
17201730 }
1721 - wfProfileOut( "$fname-image" );
 1731+ $this->mOutput->addImage( $nt->getDBkey() );
 1732+ wfProfileOut( __METHOD__."-image" );
17221733
17231734 }
17241735
17251736 if ( $ns == NS_CATEGORY ) {
1726 - wfProfileIn( "$fname-category" );
 1737+ wfProfileIn( __METHOD__."-category" );
17271738 $s = rtrim($s . "\n"); # bug 87
17281739
17291740 if ( $wasblank ) {
@@ -1741,7 +1752,7 @@
17421753 */
17431754 $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail;
17441755
1745 - wfProfileOut( "$fname-category" );
 1756+ wfProfileOut( __METHOD__."-category" );
17461757 continue;
17471758 }
17481759 }
@@ -1772,7 +1783,7 @@
17731784 if( SpecialPage::exists( $nt->getDBkey() ) ) {
17741785 $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix );
17751786 } else {
1776 - $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix );
 1787+ $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix );
17771788 }
17781789 continue;
17791790 } elseif( $ns == NS_IMAGE ) {
@@ -1786,10 +1797,10 @@
17871798 continue;
17881799 }
17891800 }
1790 - $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix );
 1801+ $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix );
17911802 }
1792 - wfProfileOut( $fname );
1793 - return $s;
 1803+ wfProfileOut( __METHOD__ );
 1804+ return $holders;
17941805 }
17951806
17961807 /**
@@ -1798,32 +1809,10 @@
17991810 * parsing of interwiki links, and secondly to allow all existence checks and
18001811 * article length checks (for stub links) to be bundled into a single query.
18011812 *
 1813+ * @deprecated
18021814 */
18031815 function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
1804 - wfProfileIn( __METHOD__ );
1805 - if ( ! is_object($nt) ) {
1806 - # Fail gracefully
1807 - $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
1808 - } else {
1809 - # Separate the link trail from the rest of the link
1810 - list( $inside, $trail ) = Linker::splitTrail( $trail );
1811 -
1812 - if ( $nt->isExternal() ) {
1813 - $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside );
1814 - $this->mInterwikiLinkHolders['titles'][] = $nt;
1815 - $retVal = '<!--IWLINK '. ($nr-1) ."-->{$trail}";
1816 - } else {
1817 - $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() );
1818 - $this->mLinkHolders['dbkeys'][] = $nt->getDBkey();
1819 - $this->mLinkHolders['queries'][] = $query;
1820 - $this->mLinkHolders['texts'][] = $prefix.$text.$inside;
1821 - $this->mLinkHolders['titles'][] = $nt;
1822 -
1823 - $retVal = '<!--LINK '. ($nr-1) ."-->{$trail}";
1824 - }
1825 - }
1826 - wfProfileOut( __METHOD__ );
1827 - return $retVal;
 1816+ return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix );
18281817 }
18291818
18301819 /**
@@ -1889,8 +1878,7 @@
18901879 # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage
18911880 # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage
18921881
1893 - $fname = 'Parser::maybeDoSubpageLink';
1894 - wfProfileIn( $fname );
 1882+ wfProfileIn( __METHOD__ );
18951883 $ret = $target; # default return value is no change
18961884
18971885 # Some namespaces don't allow subpages,
@@ -1949,7 +1937,7 @@
19501938 }
19511939 }
19521940
1953 - wfProfileOut( $fname );
 1941+ wfProfileOut( __METHOD__ );
19541942 return $ret;
19551943 }
19561944
@@ -2036,50 +2024,53 @@
20372025 * @return string the lists rendered as HTML
20382026 */
20392027 function doBlockLevels( $text, $linestart ) {
2040 - $fname = 'Parser::doBlockLevels';
2041 - wfProfileIn( $fname );
 2028+ wfProfileIn( __METHOD__ );
20422029
20432030 # Parsing through the text line by line. The main thing
20442031 # happening here is handling of block-level elements p, pre,
20452032 # and making lists from lines starting with * # : etc.
20462033 #
2047 - $textLines = explode( "\n", $text );
 2034+ $textLines = StringUtils::explode( "\n", $text );
20482035
20492036 $lastPrefix = $output = '';
20502037 $this->mDTopen = $inBlockElem = false;
20512038 $prefixLength = 0;
20522039 $paragraphStack = false;
20532040
2054 - if ( !$linestart ) {
2055 - $output .= array_shift( $textLines );
2056 - }
20572041 foreach ( $textLines as $oLine ) {
 2042+ # Fix up $linestart
 2043+ if ( !$linestart ) {
 2044+ $output .= $oLine;
 2045+ $linestart = true;
 2046+ continue;
 2047+ }
 2048+
20582049 $lastPrefixLength = strlen( $lastPrefix );
20592050 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
20602051 $preOpenMatch = preg_match('/<pre/i', $oLine );
20612052 if ( !$this->mInPre ) {
20622053 # Multiple prefixes may abut each other for nested lists.
20632054 $prefixLength = strspn( $oLine, '*#:;' );
2064 - $pref = substr( $oLine, 0, $prefixLength );
 2055+ $prefix = substr( $oLine, 0, $prefixLength );
20652056
20662057 # eh?
2067 - $pref2 = str_replace( ';', ':', $pref );
 2058+ $prefix2 = str_replace( ';', ':', $prefix );
20682059 $t = substr( $oLine, $prefixLength );
2069 - $this->mInPre = !empty($preOpenMatch);
 2060+ $this->mInPre = (bool)$preOpenMatch;
20702061 } else {
20712062 # Don't interpret any other prefixes in preformatted text
20722063 $prefixLength = 0;
2073 - $pref = $pref2 = '';
 2064+ $prefix = $prefix2 = '';
20742065 $t = $oLine;
20752066 }
20762067
20772068 # List generation
2078 - if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
 2069+ if( $prefixLength && $lastPrefix === $prefix2 ) {
20792070 # Same as the last item, so no need to deal with nesting or opening stuff
2080 - $output .= $this->nextItem( substr( $pref, -1 ) );
 2071+ $output .= $this->nextItem( substr( $prefix, -1 ) );
20812072 $paragraphStack = false;
20822073
2083 - if ( substr( $pref, -1 ) == ';') {
 2074+ if ( substr( $prefix, -1 ) == ';') {
20842075 # The one nasty exception: definition lists work like this:
20852076 # ; title : definition text
20862077 # So we check for : in the remainder text to split up the
@@ -2092,18 +2083,18 @@
20932084 }
20942085 } elseif( $prefixLength || $lastPrefixLength ) {
20952086 # Either open or close a level...
2096 - $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
 2087+ $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
20972088 $paragraphStack = false;
20982089
20992090 while( $commonPrefixLength < $lastPrefixLength ) {
2100 - $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
 2091+ $output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] );
21012092 --$lastPrefixLength;
21022093 }
21032094 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2104 - $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
 2095+ $output .= $this->nextItem( $prefix[$commonPrefixLength-1] );
21052096 }
21062097 while ( $prefixLength > $commonPrefixLength ) {
2107 - $char = substr( $pref, $commonPrefixLength, 1 );
 2098+ $char = substr( $prefix, $commonPrefixLength, 1 );
21082099 $output .= $this->openList( $char );
21092100
21102101 if ( ';' == $char ) {
@@ -2115,10 +2106,10 @@
21162107 }
21172108 ++$commonPrefixLength;
21182109 }
2119 - $lastPrefix = $pref2;
 2110+ $lastPrefix = $prefix2;
21202111 }
21212112 if( 0 == $prefixLength ) {
2122 - wfProfileIn( "$fname-paragraph" );
 2113+ wfProfileIn( __METHOD__."-paragraph" );
21232114 # No prefix (not in list)--go to paragraph mode
21242115 // XXX: use a stack for nestable elements like span, table and div
21252116 $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
@@ -2174,7 +2165,7 @@
21752166 }
21762167 }
21772168 }
2178 - wfProfileOut( "$fname-paragraph" );
 2169+ wfProfileOut( __METHOD__."-paragraph" );
21792170 }
21802171 // somewhere above we forget to get out of pre block (bug 785)
21812172 if($preCloseMatch && $this->mInPre) {
@@ -2185,7 +2176,7 @@
21862177 }
21872178 }
21882179 while ( $prefixLength ) {
2189 - $output .= $this->closeList( $pref2{$prefixLength-1} );
 2180+ $output .= $this->closeList( $prefix2[$prefixLength-1] );
21902181 --$prefixLength;
21912182 }
21922183 if ( '' != $this->mLastSection ) {
@@ -2193,7 +2184,7 @@
21942185 $this->mLastSection = '';
21952186 }
21962187
2197 - wfProfileOut( $fname );
 2188+ wfProfileOut( __METHOD__ );
21982189 return $output;
21992190 }
22002191
@@ -2206,13 +2197,12 @@
22072198 * return string the position of the ':', or false if none found
22082199 */
22092200 function findColonNoLinks($str, &$before, &$after) {
2210 - $fname = 'Parser::findColonNoLinks';
2211 - wfProfileIn( $fname );
 2201+ wfProfileIn( __METHOD__ );
22122202
22132203 $pos = strpos( $str, ':' );
22142204 if( $pos === false ) {
22152205 // Nothing to find!
2216 - wfProfileOut( $fname );
 2206+ wfProfileOut( __METHOD__ );
22172207 return false;
22182208 }
22192209
@@ -2221,7 +2211,7 @@
22222212 // Easy; no tag nesting to worry about
22232213 $before = substr( $str, 0, $pos );
22242214 $after = substr( $str, $pos+1 );
2225 - wfProfileOut( $fname );
 2215+ wfProfileOut( __METHOD__ );
22262216 return $pos;
22272217 }
22282218
@@ -2245,7 +2235,7 @@
22462236 // We found it!
22472237 $before = substr( $str, 0, $i );
22482238 $after = substr( $str, $i + 1 );
2249 - wfProfileOut( $fname );
 2239+ wfProfileOut( __METHOD__ );
22502240 return $i;
22512241 }
22522242 // Embedded in a tag; don't break it.
@@ -2255,7 +2245,7 @@
22562246 $colon = strpos( $str, ':', $i );
22572247 if( $colon === false ) {
22582248 // Nothing else interesting
2259 - wfProfileOut( $fname );
 2249+ wfProfileOut( __METHOD__ );
22602250 return false;
22612251 }
22622252 $lt = strpos( $str, '<', $i );
@@ -2264,7 +2254,7 @@
22652255 // We found it!
22662256 $before = substr( $str, 0, $colon );
22672257 $after = substr( $str, $colon + 1 );
2268 - wfProfileOut( $fname );
 2258+ wfProfileOut( __METHOD__ );
22692259 return $i;
22702260 }
22712261 }
@@ -2314,8 +2304,8 @@
23152305 if( $c == ">" ) {
23162306 $stack--;
23172307 if( $stack < 0 ) {
2318 - wfDebug( "Invalid input in $fname; too many close tags\n" );
2319 - wfProfileOut( $fname );
 2308+ wfDebug( __METHOD__.": Invalid input; too many close tags\n" );
 2309+ wfProfileOut( __METHOD__ );
23202310 return false;
23212311 }
23222312 $state = self::COLON_STATE_TEXT;
@@ -2350,14 +2340,14 @@
23512341 }
23522342 break;
23532343 default:
2354 - throw new MWException( "State machine error in $fname" );
 2344+ throw new MWException( "State machine error in " . __METHOD__ );
23552345 }
23562346 }
23572347 if( $stack > 0 ) {
2358 - wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" );
 2348+ wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" );
23592349 return false;
23602350 }
2361 - wfProfileOut( $fname );
 2351+ wfProfileOut( __METHOD__ );
23622352 return false;
23632353 }
23642354
@@ -2587,12 +2577,11 @@
25882578 * @private
25892579 */
25902580 function initialiseVariables() {
2591 - $fname = 'Parser::initialiseVariables';
2592 - wfProfileIn( $fname );
 2581+ wfProfileIn( __METHOD__ );
25932582 $variableIDs = MagicWord::getVariableIDs();
25942583
25952584 $this->mVariables = new MagicWordArray( $variableIDs );
2596 - wfProfileOut( $fname );
 2585+ wfProfileOut( __METHOD__ );
25972586 }
25982587
25992588 /**
@@ -2661,8 +2650,7 @@
26622651 return $text;
26632652 }
26642653
2665 - $fname = __METHOD__;
2666 - wfProfileIn( $fname );
 2654+ wfProfileIn( __METHOD__ );
26672655
26682656 if ( $frame === false ) {
26692657 $frame = $this->getPreprocessor()->newFrame();
@@ -2675,7 +2663,7 @@
26762664 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
26772665 $text = $frame->expand( $dom, $flags );
26782666
2679 - wfProfileOut( $fname );
 2667+ wfProfileOut( __METHOD__ );
26802668 return $text;
26812669 }
26822670
@@ -2738,8 +2726,7 @@
27392727 */
27402728 function braceSubstitution( $piece, $frame ) {
27412729 global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces;
2742 - $fname = __METHOD__;
2743 - wfProfileIn( $fname );
 2730+ wfProfileIn( __METHOD__ );
27442731 wfProfileIn( __METHOD__.'-setup' );
27452732
27462733 # Flags
@@ -2926,7 +2913,7 @@
29272914 }
29282915 } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) {
29292916 $found = false; //access denied
2930 - wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() );
 2917+ wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() );
29312918 } else {
29322919 list( $text, $title ) = $this->getTemplateDom( $title );
29332920 if ( $text !== false ) {
@@ -2960,7 +2947,7 @@
29612948 # Recover the source wikitext and return it
29622949 if ( !$found ) {
29632950 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
2964 - wfProfileOut( $fname );
 2951+ wfProfileOut( __METHOD__ );
29652952 return array( 'object' => $text );
29662953 }
29672954
@@ -3019,7 +3006,7 @@
30203007 $ret = array( 'text' => $text );
30213008 }
30223009
3023 - wfProfileOut( $fname );
 3010+ wfProfileOut( __METHOD__ );
30243011 return $ret;
30253012 }
30263013
@@ -3562,12 +3549,7 @@
35633550 # <!--LINK number-->
35643551 # turns into
35653552 # link text with suffix
3566 - $safeHeadline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
3567 - "\$this->mLinkHolders['texts'][\$1]",
3568 - $safeHeadline );
3569 - $safeHeadline = preg_replace( '/<!--IWLINK ([0-9]*)-->/e',
3570 - "\$this->mInterwikiLinkHolders['texts'][\$1]",
3571 - $safeHeadline );
 3553+ $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
35723554
35733555 # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
35743556 $tocline = preg_replace(
@@ -3795,7 +3777,7 @@
37963778 } else {
37973779 # Failed to validate; fall back to the default
37983780 $nickname = $username;
3799 - wfDebug( "Parser::getUserSig: $username has bad XML tags in signature.\n" );
 3781+ wfDebug( __METHOD__.": $username has bad XML tags in signature.\n" );
38003782 }
38013783 }
38023784
@@ -3901,19 +3883,17 @@
39023884 global $wgTitle;
39033885 static $executing = false;
39043886
3905 - $fname = "Parser::transformMsg";
3906 -
39073887 # Guard against infinite recursion
39083888 if ( $executing ) {
39093889 return $text;
39103890 }
39113891 $executing = true;
39123892
3913 - wfProfileIn($fname);
 3893+ wfProfileIn(__METHOD__);
39143894 $text = $this->preprocess( $text, $wgTitle, $options );
39153895
39163896 $executing = false;
3917 - wfProfileOut($fname);
 3897+ wfProfileOut(__METHOD__);
39183898 return $text;
39193899 }
39203900
@@ -4010,7 +3990,7 @@
40113991 # Add to function cache
40123992 $mw = MagicWord::get( $id );
40133993 if( !$mw )
4014 - throw new MWException( 'Parser::setFunctionHook() expecting a magic word identifier.' );
 3994+ throw new MWException( __METHOD__.'() expecting a magic word identifier.' );
40153995
40163996 $synonyms = $mw->getSynonyms();
40173997 $sensitive = intval( $mw->isCaseSensitive() );
@@ -4046,266 +4026,9 @@
40474027 * Replace <!--LINK--> link placeholders with actual links, in the buffer
40484028 * Placeholders created in Skin::makeLinkObj()
40494029 * Returns an array of link CSS classes, indexed by PDBK.
4050 - * $options is a bit field, RLH_FOR_UPDATE to select for update
40514030 */
40524031 function replaceLinkHolders( &$text, $options = 0 ) {
4053 - global $wgUser;
4054 - global $wgContLang;
4055 -
4056 - $fname = 'Parser::replaceLinkHolders';
4057 - wfProfileIn( $fname );
4058 -
4059 - $pdbks = array();
4060 - $colours = array();
4061 - $linkcolour_ids = array();
4062 - $sk = $this->mOptions->getSkin();
4063 - $linkCache = LinkCache::singleton();
4064 -
4065 - if ( !empty( $this->mLinkHolders['namespaces'] ) ) {
4066 - wfProfileIn( $fname.'-check' );
4067 - $dbr = wfGetDB( DB_SLAVE );
4068 - $page = $dbr->tableName( 'page' );
4069 - $threshold = $wgUser->getOption('stubthreshold');
4070 -
4071 - # Sort by namespace
4072 - asort( $this->mLinkHolders['namespaces'] );
4073 -
4074 - # Generate query
4075 - $query = false;
4076 - $current = null;
4077 - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
4078 - # Make title object
4079 - $title = $this->mLinkHolders['titles'][$key];
4080 -
4081 - # Skip invalid entries.
4082 - # Result will be ugly, but prevents crash.
4083 - if ( is_null( $title ) ) {
4084 - continue;
4085 - }
4086 - $pdbk = $pdbks[$key] = $title->getPrefixedDBkey();
4087 -
4088 - # Check if it's a static known link, e.g. interwiki
4089 - if ( $title->isAlwaysKnown() ) {
4090 - $colours[$pdbk] = '';
4091 - } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
4092 - $colours[$pdbk] = '';
4093 - $this->mOutput->addLink( $title, $id );
4094 - } elseif ( $linkCache->isBadLink( $pdbk ) ) {
4095 - $colours[$pdbk] = 'new';
4096 - } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) {
4097 - $colours[$pdbk] = 'new';
4098 - } else {
4099 - # Not in the link cache, add it to the query
4100 - if ( !isset( $current ) ) {
4101 - $current = $ns;
4102 - $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
4103 - $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN(";
4104 - } elseif ( $current != $ns ) {
4105 - $current = $ns;
4106 - $query .= ")) OR (page_namespace=$ns AND page_title IN(";
4107 - } else {
4108 - $query .= ', ';
4109 - }
4110 -
4111 - $query .= $dbr->addQuotes( $this->mLinkHolders['dbkeys'][$key] );
4112 - }
4113 - }
4114 - if ( $query ) {
4115 - $query .= '))';
4116 - if ( $options & RLH_FOR_UPDATE ) {
4117 - $query .= ' FOR UPDATE';
4118 - }
4119 -
4120 - $res = $dbr->query( $query, $fname );
4121 -
4122 - # Fetch data and form into an associative array
4123 - # non-existent = broken
4124 - while ( $s = $dbr->fetchObject($res) ) {
4125 - $title = Title::makeTitle( $s->page_namespace, $s->page_title );
4126 - $pdbk = $title->getPrefixedDBkey();
4127 - $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect );
4128 - $this->mOutput->addLink( $title, $s->page_id );
4129 - $colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
4130 - //add id to the extension todolist
4131 - $linkcolour_ids[$s->page_id] = $pdbk;
4132 - }
4133 - //pass an array of page_ids to an extension
4134 - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
4135 - }
4136 - wfProfileOut( $fname.'-check' );
4137 -
4138 - # Do a second query for different language variants of links and categories
4139 - if($wgContLang->hasVariants()){
4140 - $linkBatch = new LinkBatch();
4141 - $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
4142 - $categoryMap = array(); // maps $category_variant => $category (dbkeys)
4143 - $varCategories = array(); // category replacements oldDBkey => newDBkey
4144 -
4145 - $categories = $this->mOutput->getCategoryLinks();
4146 -
4147 - // Add variants of links to link batch
4148 - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
4149 - $title = $this->mLinkHolders['titles'][$key];
4150 - if ( is_null( $title ) )
4151 - continue;
4152 -
4153 - $pdbk = $title->getPrefixedDBkey();
4154 - $titleText = $title->getText();
4155 -
4156 - // generate all variants of the link title text
4157 - $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText);
4158 -
4159 - // if link was not found (in first query), add all variants to query
4160 - if ( !isset($colours[$pdbk]) ){
4161 - foreach($allTextVariants as $textVariant){
4162 - if($textVariant != $titleText){
4163 - $variantTitle = Title::makeTitle( $ns, $textVariant );
4164 - if(is_null($variantTitle)) continue;
4165 - $linkBatch->addObj( $variantTitle );
4166 - $variantMap[$variantTitle->getPrefixedDBkey()][] = $key;
4167 - }
4168 - }
4169 - }
4170 - }
4171 -
4172 - // process categories, check if a category exists in some variant
4173 - foreach( $categories as $category ){
4174 - $variants = $wgContLang->convertLinkToAllVariants($category);
4175 - foreach($variants as $variant){
4176 - if($variant != $category){
4177 - $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) );
4178 - if(is_null($variantTitle)) continue;
4179 - $linkBatch->addObj( $variantTitle );
4180 - $categoryMap[$variant] = $category;
4181 - }
4182 - }
4183 - }
4184 -
4185 -
4186 - if(!$linkBatch->isEmpty()){
4187 - // construct query
4188 - $titleClause = $linkBatch->constructSet('page', $dbr);
4189 -
4190 - $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
4191 -
4192 - $variantQuery .= " FROM $page WHERE $titleClause";
4193 - if ( $options & RLH_FOR_UPDATE ) {
4194 - $variantQuery .= ' FOR UPDATE';
4195 - }
4196 -
4197 - $varRes = $dbr->query( $variantQuery, $fname );
4198 -
4199 - // for each found variants, figure out link holders and replace
4200 - while ( $s = $dbr->fetchObject($varRes) ) {
4201 -
4202 - $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
4203 - $varPdbk = $variantTitle->getPrefixedDBkey();
4204 - $vardbk = $variantTitle->getDBkey();
4205 -
4206 - $holderKeys = array();
4207 - if(isset($variantMap[$varPdbk])){
4208 - $holderKeys = $variantMap[$varPdbk];
4209 - $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect );
4210 - $this->mOutput->addLink( $variantTitle, $s->page_id );
4211 - }
4212 -
4213 - // loop over link holders
4214 - foreach($holderKeys as $key){
4215 - $title = $this->mLinkHolders['titles'][$key];
4216 - if ( is_null( $title ) ) continue;
4217 -
4218 - $pdbk = $title->getPrefixedDBkey();
4219 -
4220 - if(!isset($colours[$pdbk])){
4221 - // found link in some of the variants, replace the link holder data
4222 - $this->mLinkHolders['titles'][$key] = $variantTitle;
4223 - $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey();
4224 -
4225 - // set pdbk and colour
4226 - $pdbks[$key] = $varPdbk;
4227 - $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold );
4228 - $linkcolour_ids[$s->page_id] = $pdbk;
4229 - }
4230 - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
4231 - }
4232 -
4233 - // check if the object is a variant of a category
4234 - if(isset($categoryMap[$vardbk])){
4235 - $oldkey = $categoryMap[$vardbk];
4236 - if($oldkey != $vardbk)
4237 - $varCategories[$oldkey]=$vardbk;
4238 - }
4239 - }
4240 -
4241 - // rebuild the categories in original order (if there are replacements)
4242 - if(count($varCategories)>0){
4243 - $newCats = array();
4244 - $originalCats = $this->mOutput->getCategories();
4245 - foreach($originalCats as $cat => $sortkey){
4246 - // make the replacement
4247 - if( array_key_exists($cat,$varCategories) )
4248 - $newCats[$varCategories[$cat]] = $sortkey;
4249 - else $newCats[$cat] = $sortkey;
4250 - }
4251 - $this->mOutput->setCategoryLinks($newCats);
4252 - }
4253 - }
4254 - }
4255 -
4256 - # Construct search and replace arrays
4257 - wfProfileIn( $fname.'-construct' );
4258 - $replacePairs = array();
4259 - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
4260 - $pdbk = $pdbks[$key];
4261 - $searchkey = "<!--LINK $key-->";
4262 - $title = $this->mLinkHolders['titles'][$key];
4263 - if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) {
4264 - $linkCache->addBadLinkObj( $title );
4265 - $colours[$pdbk] = 'new';
4266 - $this->mOutput->addLink( $title, 0 );
4267 - $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title,
4268 - $this->mLinkHolders['texts'][$key],
4269 - $this->mLinkHolders['queries'][$key] );
4270 - } else {
4271 - $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk],
4272 - $this->mLinkHolders['texts'][$key],
4273 - $this->mLinkHolders['queries'][$key] );
4274 - }
4275 - }
4276 - $replacer = new HashtableReplacer( $replacePairs, 1 );
4277 - wfProfileOut( $fname.'-construct' );
4278 -
4279 - # Do the thing
4280 - wfProfileIn( $fname.'-replace' );
4281 - $text = preg_replace_callback(
4282 - '/(<!--LINK .*?-->)/',
4283 - $replacer->cb(),
4284 - $text);
4285 -
4286 - wfProfileOut( $fname.'-replace' );
4287 - }
4288 -
4289 - # Now process interwiki link holders
4290 - # This is quite a bit simpler than internal links
4291 - if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) {
4292 - wfProfileIn( $fname.'-interwiki' );
4293 - # Make interwiki link HTML
4294 - $replacePairs = array();
4295 - foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) {
4296 - $title = $this->mInterwikiLinkHolders['titles'][$key];
4297 - $replacePairs[$key] = $sk->link( $title, $link );
4298 - }
4299 - $replacer = new HashtableReplacer( $replacePairs, 1 );
4300 -
4301 - $text = preg_replace_callback(
4302 - '/<!--IWLINK (.*?)-->/',
4303 - $replacer->cb(),
4304 - $text );
4305 - wfProfileOut( $fname.'-interwiki' );
4306 - }
4307 -
4308 - wfProfileOut( $fname );
4309 - return $colours;
 4032+ return $this->mLinkHolders->replace( $text );
43104033 }
43114034
43124035 /**
@@ -4315,39 +4038,10 @@
43164039 * @return string
43174040 */
43184041 function replaceLinkHoldersText( $text ) {
4319 - $fname = 'Parser::replaceLinkHoldersText';
4320 - wfProfileIn( $fname );
4321 -
4322 - $text = preg_replace_callback(
4323 - '/<!--(LINK|IWLINK) (.*?)-->/',
4324 - array( &$this, 'replaceLinkHoldersTextCallback' ),
4325 - $text );
4326 -
4327 - wfProfileOut( $fname );
4328 - return $text;
 4042+ return $this->mLinkHolders->replaceText( $text );
43294043 }
43304044
43314045 /**
4332 - * @param array $matches
4333 - * @return string
4334 - * @private
4335 - */
4336 - function replaceLinkHoldersTextCallback( $matches ) {
4337 - $type = $matches[1];
4338 - $key = $matches[2];
4339 - if( $type == 'LINK' ) {
4340 - if( isset( $this->mLinkHolders['texts'][$key] ) ) {
4341 - return $this->mLinkHolders['texts'][$key];
4342 - }
4343 - } elseif( $type == 'IWLINK' ) {
4344 - if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) {
4345 - return $this->mInterwikiLinkHolders['texts'][$key];
4346 - }
4347 - }
4348 - return $matches[0];
4349 - }
4350 -
4351 - /**
43524046 * Tag hook handler for 'pre'.
43534047 */
43544048 function renderPreTag( $text, $attribs ) {
@@ -4398,7 +4092,7 @@
43994093
44004094 wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) );
44014095
4402 - $lines = explode( "\n", $text );
 4096+ $lines = StringUtils::explode( "\n", $text );
44034097 foreach ( $lines as $line ) {
44044098 # match lines like these:
44054099 # Image:someimage.jpg|This is some image
@@ -4411,7 +4105,7 @@
44124106
44134107 if ( strpos( $matches[0], '%' ) !== false )
44144108 $matches[1] = urldecode( $matches[1] );
4415 - $tp = Title::newFromText( $matches[1] );
 4109+ $tp = Title::newFromText( $matches[1], NS_IMAGE );
44164110 $nt =& $tp;
44174111 if( is_null( $nt ) ) {
44184112 # Bogus title. Ignore these so we don't bomb out later.
@@ -4477,8 +4171,11 @@
44784172
44794173 /**
44804174 * Parse image options text and use it to make an image
 4175+ * @param Title $title
 4176+ * @param string $options
 4177+ * @param LinkHolderArray $holders
44814178 */
4482 - function makeImage( $title, $options ) {
 4179+ function makeImage( $title, $options, $holders = false ) {
44834180 # Check if the options text is of the form "options|alt text"
44844181 # Options are:
44854182 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
@@ -4501,7 +4198,7 @@
45024199 # * bottom
45034200 # * text-bottom
45044201
4505 - $parts = array_map( 'trim', explode( '|', $options) );
 4202+ $parts = StringUtils::explode( "|", $options );
45064203 $sk = $this->mOptions->getSkin();
45074204
45084205 # Give extensions a chance to select the file revision for us
@@ -4588,7 +4285,13 @@
45894286 }
45904287
45914288 # Strip bad stuff out of the alt text
4592 - $alt = $this->replaceLinkHoldersText( $caption );
 4289+ # We can't just use replaceLinkHoldersText() here, because if this function
 4290+ # is called from replaceInternalLinks2(), mLinkHolders won't be up to date.
 4291+ if ( $holders ) {
 4292+ $alt = $holders->replaceText( $caption );
 4293+ } else {
 4294+ $alt = $this->replaceLinkHoldersText( $caption );
 4295+ }
45934296
45944297 # make sure there are no placeholders in thumbnail attributes
45954298 # that are later expanded to html- so expand them now and
Index: trunk/phase3/includes/MessageCache.php
@@ -44,7 +44,6 @@
4545
4646 /**
4747 * ParserOptions is lazy initialised.
48 - * Access should probably be protected.
4948 */
5049 function getParserOptions() {
5150 if ( !$this->mParserOptions ) {
Index: trunk/phase3/includes/AutoLoader.php
@@ -66,6 +66,7 @@
6767 'EnotifNotifyJob' => 'includes/EnotifNotifyJob.php',
6868 'ErrorPageError' => 'includes/Exception.php',
6969 'Exif' => 'includes/Exif.php',
 70+ 'ExplodeIterator' => 'includes/StringUtils.php',
7071 'ExternalEdit' => 'includes/ExternalEdit.php',
7172 'ExternalStoreDB' => 'includes/ExternalStoreDB.php',
7273 'ExternalStoreHttp' => 'includes/ExternalStoreHttp.php',
@@ -356,6 +357,7 @@
357358 # includes/parser
358359 'CoreParserFunctions' => 'includes/parser/CoreParserFunctions.php',
359360 'DateFormatter' => 'includes/parser/DateFormatter.php',
 361+ 'LinkHolderArray' => 'includes/parser/LinkHolderArray.php',
360362 'OnlyIncludeReplacer' => 'includes/parser/Parser.php',
361363 'PPDAccum_Hash' => 'includes/parser/Preprocessor_Hash.php',
362364 'PPDPart' => 'includes/parser/Preprocessor_DOM.php',
Index: trunk/phase3/includes/Title.php
@@ -10,12 +10,6 @@
1111
1212 define ( 'GAID_FOR_UPDATE', 1 );
1313
14 -/**
15 - * Title::newFromText maintains a cache to avoid expensive re-normalization of
16 - * commonly used titles. On a batch operation this can become a memory leak
17 - * if not bounded. After hitting this many titles reset the cache.
18 - */
19 -define( 'MW_TITLECACHE_MAX', 1000 );
2014
2115 /**
2216 * Constants for pr_cascade bitfield
@@ -36,6 +30,14 @@
3731 //@}
3832
3933 /**
 34+ * Title::newFromText maintains a cache to avoid expensive re-normalization of
 35+ * commonly used titles. On a batch operation this can become a memory leak
 36+ * if not bounded. After hitting this many titles reset the cache.
 37+ */
 38+ const CACHE_MAX = 1000;
 39+
 40+
 41+ /**
4042 * @name Private member variables
4143 * Please use the accessor functions instead.
4244 * @private
@@ -131,7 +133,7 @@
132134 static $cachedcount = 0 ;
133135 if( $t->secureAndSplit() ) {
134136 if( $defaultNamespace == NS_MAIN ) {
135 - if( $cachedcount >= MW_TITLECACHE_MAX ) {
 137+ if( $cachedcount >= self::CACHE_MAX ) {
136138 # Avoid memory leaks on mass operations...
137139 Title::$titleCache = array();
138140 $cachedcount=0;
Index: trunk/phase3/includes/LinkCache.php
@@ -9,7 +9,6 @@
1010 // becomes incompatible with the new version.
1111 /* private */ var $mClassVer = 4;
1212
13 - /* private */ var $mPageLinks;
1413 /* private */ var $mGoodLinks, $mBadLinks;
1514 /* private */ var $mForUpdate;
1615
@@ -26,7 +25,6 @@
2726
2827 function __construct() {
2928 $this->mForUpdate = false;
30 - $this->mPageLinks = array();
3129 $this->mGoodLinks = array();
3230 $this->mGoodLinkFields = array();
3331 $this->mBadLinks = array();
@@ -78,14 +76,12 @@
7977 $dbkey = $title->getPrefixedDbKey();
8078 $this->mGoodLinks[$dbkey] = $id;
8179 $this->mGoodLinkFields[$dbkey] = array( 'length' => $len, 'redirect' => $redir );
82 - $this->mPageLinks[$dbkey] = $title;
8380 }
8481
8582 public function addBadLinkObj( $title ) {
8683 $dbkey = $title->getPrefixedDbKey();
8784 if ( ! $this->isBadLink( $dbkey ) ) {
8885 $this->mBadLinks[$dbkey] = 1;
89 - $this->mPageLinks[$dbkey] = $title;
9086 }
9187 }
9288
@@ -96,7 +92,6 @@
9793 /* obsolete, for old $wgLinkCacheMemcached stuff */
9894 public function clearLink( $title ) {}
9995
100 - public function getPageLinks() { return $this->mPageLinks; }
10196 public function getGoodLinks() { return $this->mGoodLinks; }
10297 public function getBadLinks() { return array_keys( $this->mBadLinks ); }
10398
@@ -181,7 +176,6 @@
182177 * Clears cache
183178 */
184179 public function clear() {
185 - $this->mPageLinks = array();
186180 $this->mGoodLinks = array();
187181 $this->mGoodLinkFields = array();
188182 $this->mBadLinks = array();
Index: trunk/phase3/includes/StringUtils.php
@@ -167,6 +167,18 @@
168168 $string = str_replace( '$', '\\$', $string );
169169 return $string;
170170 }
 171+
 172+ /**
 173+ * Workalike for explode() with limited memory usage.
 174+ * Returns an Iterator
 175+ */
 176+ static function explode( $separator, $subject ) {
 177+ if ( substr_count( $subject, $separator ) > 1000 ) {
 178+ return new ExplodeIterator( $separator, $subject );
 179+ } else {
 180+ return new ArrayIterator( explode( $separator, $subject ) );
 181+ }
 182+ }
171183 }
172184
173185 /**
@@ -310,3 +322,90 @@
311323 return $result;
312324 }
313325 }
 326+
 327+/**
 328+ * An iterator which works exactly like:
 329+ *
 330+ * foreach ( explode( $delim, $s ) as $element ) {
 331+ * ...
 332+ * }
 333+ *
 334+ * Except it doesn't use 193 byte per element
 335+ */
 336+class ExplodeIterator implements Iterator {
 337+ // The subject string
 338+ var $subject, $subjectLength;
 339+
 340+ // The delimiter
 341+ var $delim, $delimLength;
 342+
 343+ // The position of the start of the line
 344+ var $curPos;
 345+
 346+ // The position after the end of the next delimiter
 347+ var $endPos;
 348+
 349+ // The current token
 350+ var $current;
 351+
 352+ /**
 353+ * Construct a DelimIterator
 354+ */
 355+ function __construct( $delim, $s ) {
 356+ $this->subject = $s;
 357+ $this->delim = $delim;
 358+
 359+ // Micro-optimisation (theoretical)
 360+ $this->subjectLength = strlen( $s );
 361+ $this->delimLength = strlen( $delim );
 362+
 363+ $this->rewind();
 364+ }
 365+
 366+ function rewind() {
 367+ $this->curPos = 0;
 368+ $this->endPos = strpos( $this->subject, $this->delim );
 369+ $this->refreshCurrent();
 370+ }
 371+
 372+
 373+ function refreshCurrent() {
 374+ if ( $this->curPos === false ) {
 375+ $this->current = false;
 376+ } elseif ( $this->curPos >= $this->subjectLength ) {
 377+ $this->current = '';
 378+ } elseif ( $this->endPos === false ) {
 379+ $this->current = substr( $this->subject, $this->curPos );
 380+ } else {
 381+ $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos );
 382+ }
 383+ }
 384+
 385+ function current() {
 386+ return $this->current;
 387+ }
 388+
 389+ function key() {
 390+ return $this->curPos;
 391+ }
 392+
 393+ function next() {
 394+ if ( $this->endPos === false ) {
 395+ $this->curPos = false;
 396+ } else {
 397+ $this->curPos = $this->endPos + $this->delimLength;
 398+ if ( $this->curPos >= $this->subjectLength ) {
 399+ $this->endPos = false;
 400+ } else {
 401+ $this->endPos = strpos( $this->subject, $this->delim, $this->curPos );
 402+ }
 403+ }
 404+ $this->refreshCurrent();
 405+ return $this->current;
 406+ }
 407+
 408+ function valid() {
 409+ return $this->curPos !== false;
 410+ }
 411+}
 412+
Index: trunk/phase3/RELEASE-NOTES
@@ -130,6 +130,7 @@
131131 gives results
132132 * Avoid recursive crazy expansions in section edit comments for pages which
133133 contain '/*' in the title
 134+* Fix excessive memory usage when parsing pages with lots of links
134135
135136 === API changes in 1.14 ===
136137

Follow-up revisions

RevisionCommit summaryAuthorDate
r396632 new PASSING test(s) :)...brion21:10, 19 August 2008

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r39414* In the parser: do link existence tests in batches of 1000. Avoids using exc...tstarling16:35, 15 August 2008

Status & tagging log