r40020 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r40019‎ | r40020 | r40021 >
Date:14:37, 26 August 2008
Author:tstarling
Status:old
Tags:
Comment:
* Revert back to my parser pseudo-branch again. Note: if you feel like reverting this, reverting Parser.php alone to r40010 will work just fine.
* Merged replaceFreeExternalLinks() with doMagicLinks(). Makes a lot of sense, very similar operations, doesn't break any parser tests. Stops free links from interacting with other parser stages, the same way ISBN links don't.
* The pass order change fixes Brion's complaint in r39980. Early link expansion, triggered by having more than 1000 links in the page, was outputting URLs which were destroyed by RFEL. Added parser test.
* Fixed an unrelated bug in LinkHolderArray::replace(): if a link to a redirect appears in two separate RLH calls, the second and subsequent calls do not add the mw-redirect class. Caused by an unmigrated LinkCache fetch.
* Added a parser test for a pass interaction bug that the pass order change fixes.
* The fuzzer told me to tell you that free external links in non-caption image parameters, which are and have always been invisible, are now not registered either.
* Miscellaneous supporting updates to the test infrastructure.
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/DefaultSettings.php (modified) (history)
  • /trunk/phase3/includes/Exception.php (modified) (history)
  • /trunk/phase3/includes/MessageCache.php (modified) (history)
  • /trunk/phase3/includes/Title.php (modified) (history)
  • /trunk/phase3/includes/parser/LinkHolderArray.php (modified) (history)
  • /trunk/phase3/includes/parser/Parser.php (modified) (history)
  • /trunk/phase3/includes/parser/Parser_DiffTest.php (modified) (history)
  • /trunk/phase3/languages/Language.php (modified) (history)
  • /trunk/phase3/languages/LanguageConverter.php (modified) (history)
  • /trunk/phase3/maintenance/parserTests.inc (modified) (history)
  • /trunk/phase3/maintenance/parserTests.php (modified) (history)
  • /trunk/phase3/maintenance/parserTests.txt (modified) (history)
  • /trunk/phase3/maintenance/parserTestsStaticParserHook.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/parserTests.inc
@@ -26,7 +26,7 @@
2727
2828 /** */
2929 $options = array( 'quick', 'color', 'quiet', 'help', 'show-output', 'record' );
30 -$optionsWithArgs = array( 'regex' );
 30+$optionsWithArgs = array( 'regex', 'seed' );
3131
3232 require_once( 'commandLine.inc' );
3333 require_once( "$IP/maintenance/parserTestsParserHook.php" );
@@ -62,6 +62,10 @@
6363 */
6464 private $oldTablePrefix;
6565
 66+ private $maxFuzzTestLength = 300;
 67+ private $fuzzSeed = 0;
 68+ private $memoryLimit = 50;
 69+
6670 /**
6771 * Sets terminal colorization and diff/quick modes depending on OS and
6872 * command-line options (--color and --quick).
@@ -117,6 +121,10 @@
118122 }
119123 $this->keepUploads = isset( $options['keep-uploads'] );
120124
 125+ if ( isset( $options['seed'] ) ) {
 126+ $this->fuzzSeed = intval( $options['seed'] ) - 1;
 127+ }
 128+
121129 $this->hooks = array();
122130 $this->functionHooks = array();
123131 }
@@ -134,6 +142,119 @@
135143 }
136144
137145 /**
 146+ * Run a fuzz test series
 147+ * Draw input from a set of test files
 148+ */
 149+ function fuzzTest( $filenames ) {
 150+ $dict = $this->getFuzzInput( $filenames );
 151+ $dictSize = strlen( $dict );
 152+ $logMaxLength = log( $this->maxFuzzTestLength );
 153+ $this->setupDatabase();
 154+ ini_set( 'memory_limit', $this->memoryLimit * 1048576 );
 155+
 156+ $numTotal = 0;
 157+ $numSuccess = 0;
 158+ $user = new User;
 159+ $opts = ParserOptions::newFromUser( $user );
 160+ $title = Title::makeTitle( NS_MAIN, 'Parser_test' );
 161+
 162+ while ( true ) {
 163+ // Generate test input
 164+ mt_srand( ++$this->fuzzSeed );
 165+ $totalLength = mt_rand( 1, $this->maxFuzzTestLength );
 166+ $input = '';
 167+ while ( strlen( $input ) < $totalLength ) {
 168+ $logHairLength = mt_rand( 0, 1000000 ) / 1000000 * $logMaxLength;
 169+ $hairLength = min( intval( exp( $logHairLength ) ), $dictSize );
 170+ $offset = mt_rand( 0, $dictSize - $hairLength );
 171+ $input .= substr( $dict, $offset, $hairLength );
 172+ }
 173+
 174+ $this->setupGlobals();
 175+ $parser = $this->getParser();
 176+ // Run the test
 177+ try {
 178+ $parser->parse( $input, $title, $opts );
 179+ $fail = false;
 180+ } catch ( Exception $exception ) {
 181+ $fail = true;
 182+ }
 183+
 184+ if ( $fail ) {
 185+ echo "Test failed with seed {$this->fuzzSeed}\n";
 186+ echo "Input:\n";
 187+ var_dump( $input );
 188+ echo "\n\n";
 189+ echo "$exception\n";
 190+ } else {
 191+ $numSuccess++;
 192+ }
 193+ $numTotal++;
 194+ $this->teardownGlobals();
 195+ $parser->__destruct();
 196+
 197+ if ( $numTotal % 100 == 0 ) {
 198+ $usage = intval( memory_get_usage( true ) / $this->memoryLimit / 1048576 * 100 );
 199+ echo "{$this->fuzzSeed}: $numSuccess/$numTotal (mem: $usage%)\n";
 200+ if ( $usage > 90 ) {
 201+ echo "Out of memory:\n";
 202+ $memStats = $this->getMemoryBreakdown();
 203+ foreach ( $memStats as $name => $usage ) {
 204+ echo "$name: $usage\n";
 205+ }
 206+ $this->abort();
 207+ }
 208+ }
 209+ }
 210+ }
 211+
 212+ /**
 213+ * Get an input dictionary from a set of parser test files
 214+ */
 215+ function getFuzzInput( $filenames ) {
 216+ $dict = '';
 217+ foreach( $filenames as $filename ) {
 218+ $contents = file_get_contents( $filename );
 219+ preg_match_all( '/!!\s*input\n(.*?)\n!!\s*result/s', $contents, $matches );
 220+ foreach ( $matches[1] as $match ) {
 221+ $dict .= $match . "\n";
 222+ }
 223+ }
 224+ return $dict;
 225+ }
 226+
 227+ /**
 228+ * Get a memory usage breakdown
 229+ */
 230+ function getMemoryBreakdown() {
 231+ $memStats = array();
 232+ foreach ( $GLOBALS as $name => $value ) {
 233+ $memStats['$'.$name] = strlen( serialize( $value ) );
 234+ }
 235+ $classes = get_declared_classes();
 236+ foreach ( $classes as $class ) {
 237+ $rc = new ReflectionClass( $class );
 238+ $props = $rc->getStaticProperties();
 239+ $memStats[$class] = strlen( serialize( $props ) );
 240+ $methods = $rc->getMethods();
 241+ foreach ( $methods as $method ) {
 242+ $memStats[$class] += strlen( serialize( $method->getStaticVariables() ) );
 243+ }
 244+ }
 245+ $functions = get_defined_functions();
 246+ foreach ( $functions['user'] as $function ) {
 247+ $rf = new ReflectionFunction( $function );
 248+ $memStats["$function()"] = strlen( serialize( $rf->getStaticVariables() ) );
 249+ }
 250+ asort( $memStats );
 251+ return $memStats;
 252+ }
 253+
 254+ function abort() {
 255+ $this->abort();
 256+ }
 257+
 258+ /**
138259 * Run a series of tests listed in the given text files.
139260 * Each test consists of a brief description, wikitext input,
140261 * and the expected HTML output.
@@ -267,6 +388,24 @@
268389 }
269390
270391 /**
 392+ * Get a Parser object
 393+ */
 394+ function getParser() {
 395+ global $wgParserConf;
 396+ $class = $wgParserConf['class'];
 397+ $parser = new $class( $wgParserConf );
 398+ foreach( $this->hooks as $tag => $callback ) {
 399+ $parser->setHook( $tag, $callback );
 400+ }
 401+ foreach( $this->functionHooks as $tag => $bits ) {
 402+ list( $callback, $flags ) = $bits;
 403+ $parser->setFunctionHook( $tag, $callback, $flags );
 404+ }
 405+ wfRunHooks( 'ParserTestParser', array( &$parser ) );
 406+ return $parser;
 407+ }
 408+
 409+ /**
271410 * Run a given wikitext input through a freshly-constructed wiki parser,
272411 * and compare the output against the expected results.
273412 * Prints status and explanatory messages to stdout.
@@ -276,7 +415,6 @@
277416 * @return bool
278417 */
279418 private function runTest( $desc, $input, $result, $opts ) {
280 - global $wgParserConf;
281419 if( $this->showProgress ) {
282420 $this->showTesting( $desc );
283421 }
@@ -300,18 +438,7 @@
301439 }
302440
303441 $noxml = (bool)preg_match( '~\\b noxml \\b~x', $opts );
304 -
305 - $class = $wgParserConf['class'];
306 - $parser = new $class( $wgParserConf );
307 - foreach( $this->hooks as $tag => $callback ) {
308 - $parser->setHook( $tag, $callback );
309 - }
310 - foreach( $this->functionHooks as $tag => $bits ) {
311 - list( $callback, $flags ) = $bits;
312 - $parser->setFunctionHook( $tag, $callback, $flags );
313 - }
314 - wfRunHooks( 'ParserTestParser', array( &$parser ) );
315 -
 442+ $parser = $this->getParser();
316443 $title =& Title::makeTitle( NS_MAIN, $titleText );
317444
318445 $matches = array();
@@ -387,6 +514,8 @@
388515 self::getOptionValue( '/variant=([a-z]+(?:-[a-z]+)?)/', $opts, false );
389516 $maxtoclevel =
390517 self::getOptionValue( '/wgMaxTocLevel=(\d+)/', $opts, 999 );
 518+ $linkHolderBatchSize =
 519+ self::getOptionValue( '/wgLinkHolderBatchSize=(\d+)/', $opts, 1000 );
391520
392521 $settings = array(
393522 'wgServer' => 'http://localhost',
@@ -432,6 +561,7 @@
433562 ) ),
434563 'wgDefaultExternalStore' => array(),
435564 'wgForeignFileRepos' => array(),
 565+ 'wgLinkHolderBatchSize' => $linkHolderBatchSize,
436566 );
437567 $this->savedGlobals = array();
438568 foreach( $settings as $var => $val ) {
@@ -441,6 +571,7 @@
442572 $langObj = Language::factory( $lang );
443573 $GLOBALS['wgLang'] = $langObj;
444574 $GLOBALS['wgContLang'] = $langObj;
 575+ $GLOBALS['wgMemc'] = new FakeMemCachedClient;
445576
446577 //$GLOBALS['wgMessageCache'] = new MessageCache( new BagOStuff(), false, 0, $GLOBALS['wgDBname'] );
447578
@@ -551,10 +682,10 @@
552683 # Hack: insert a few Wikipedia in-project interwiki prefixes,
553684 # for testing inter-language links
554685 $db->insert( 'interwiki', array(
555 - array( 'iw_prefix' => 'Wikipedia',
 686+ array( 'iw_prefix' => 'wikipedia',
556687 'iw_url' => 'http://en.wikipedia.org/wiki/$1',
557688 'iw_local' => 0 ),
558 - array( 'iw_prefix' => 'MeatBall',
 689+ array( 'iw_prefix' => 'meatball',
559690 'iw_url' => 'http://www.usemod.com/cgi-bin/mb.pl?$1',
560691 'iw_local' => 0 ),
561692 array( 'iw_prefix' => 'zh',
@@ -621,11 +752,12 @@
622753 return;
623754 }
624755
 756+ /*
625757 $tables = $this->listTables();
626758 $db = wfGetDB( DB_MASTER );
627759 foreach ( $tables as $table ) {
628760 $db->query( "DROP TABLE `parsertest_$table`" );
629 - }
 761+ }*/
630762 }
631763
632764 /**
@@ -645,6 +777,10 @@
646778 }
647779
648780 wfDebug( "Creating upload directory $dir\n" );
 781+ if ( file_exists( $dir ) ) {
 782+ wfDebug( "Already exists!\n" );
 783+ return $dir;
 784+ }
649785 mkdir( $dir );
650786 mkdir( $dir . '/3' );
651787 mkdir( $dir . '/3/3a' );
@@ -658,6 +794,8 @@
659795 */
660796 private function teardownGlobals() {
661797 RepoGroup::destroySingleton();
 798+ LinkCache::singleton()->clear();
 799+ $GLOBALS['wgLang']->__destruct();
662800 foreach( $this->savedGlobals as $var => $val ) {
663801 $GLOBALS[$var] = $val;
664802 }
Index: trunk/phase3/maintenance/parserTests.php
@@ -28,22 +28,21 @@
2929 if( isset( $options['help'] ) ) {
3030 echo <<<ENDS
3131 MediaWiki $wgVersion parser test suite
32 -Usage: php parserTests.php [--quick] [--quiet] [--show-output]
33 - [--color[=(yes|no)]]
34 - [--regex=<expression>] [--file=<testfile>]
35 - [--record] [--compare]
36 - [--help]
 32+Usage: php parserTests.php [options...]
 33+
3734 Options:
3835 --quick Suppress diff output of failed tests
3936 --quiet Suppress notification of passed tests (shows only failed tests)
4037 --show-output Show expected and actual output
41 - --color Override terminal detection and force color output on or off
 38+ --color[=yes|no] Override terminal detection and force color output on or off
4239 use wgCommandLineDarkBg = true; if your term is dark
4340 --regex Only run tests whose descriptions which match given regex
44 - --file Run test cases from a custom file instead of parserTests.txt
 41+ --file=<testfile> Run test cases from a custom file instead of parserTests.txt
4542 --record Record tests in database
4643 --compare Compare with recorded results, without updating the database.
4744 --keep-uploads Re-use the same upload directory for each test, don't delete it
 45+ --fuzz Do a fuzz test instead of a normal test
 46+ --seed <n> Start the fuzz test from the specified seed
4847 --help Show this help message
4948
5049
@@ -67,7 +66,10 @@
6867 # Print out software version to assist with locating regressions
6968 $version = SpecialVersion::getVersion();
7069 echo( "This is MediaWiki version {$version}.\n\n" );
71 -$ok = $tester->runTestsFromFiles( $files );
7270
73 -exit ($ok ? 0 : -1);
74 -
 71+if ( isset( $options['fuzz'] ) ) {
 72+ $tester->fuzzTest( $files );
 73+} else {
 74+ $ok = $tester->runTestsFromFiles( $files );
 75+ exit ($ok ? 0 : -1);
 76+}
Index: trunk/phase3/maintenance/parserTests.txt
@@ -7066,7 +7066,30 @@
70677067
70687068 !! end
70697069
 7070+!! test
 7071+Interwiki links trounced by replaceExternalLinks after early LinkHolderArray expansion
 7072+!! options
 7073+wgLinkHolderBatchSize=0
 7074+!! input
 7075+[[meatball:1]]
 7076+[[meatball:2]]
 7077+[[meatball:3]]
 7078+!! result
 7079+<p><a href="http://www.usemod.com/cgi-bin/mb.pl?1" class="extiw" title="meatball:1">meatball:1</a>
 7080+<a href="http://www.usemod.com/cgi-bin/mb.pl?2" class="extiw" title="meatball:2">meatball:2</a>
 7081+<a href="http://www.usemod.com/cgi-bin/mb.pl?3" class="extiw" title="meatball:3">meatball:3</a>
 7082+</p>
 7083+!! end
70707084
 7085+!! test
 7086+Free external link invading image caption
 7087+!! input
 7088+[[Image:Foobar.jpg|thumb|http://x|hello]]
 7089+!! result
 7090+<div class="thumb tright"><div class="thumbinner" style="width:182px;"><a href="https://www.mediawiki.org/wiki/Image:Foobar.jpg" class="image" title="hello"><img alt="hello" src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" width="180" height="20" border="0" class="thumbimage" /></a> <div class="thumbcaption"><div class="magnify"><a href="https://www.mediawiki.org/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="" /></a></div>hello</div></div></div>
 7091+
 7092+!! end
 7093+
70717094 #
70727095 #
70737096 #
Index: trunk/phase3/maintenance/parserTestsStaticParserHook.php
@@ -21,24 +21,27 @@
2222 return true;
2323 }
2424
25 -function wfParserTestStaticParserHookHook( $in, $argv ) {
26 - static $buf = null;
27 -
 25+function wfParserTestStaticParserHookHook( $in, $argv, $parser ) {
2826 if ( ! count( $argv ) ) {
29 - $buf = $in;
 27+ $parser->static_tag_buf = $in;
3028 return '';
31 - } else if ( count( $argv ) === 1 && $argv['action'] === 'flush' && $in === null ) {
 29+ } else if ( count( $argv ) === 1 && isset( $argv['action'] )
 30+ && $argv['action'] === 'flush' && $in === null )
 31+ {
3232 // Clear the buffer, we probably don't need to
33 - $tmp = $buf;
34 - $buf = null;
 33+ if ( isset( $parser->static_tag_buf ) ) {
 34+ $tmp = $parser->static_tag_buf;
 35+ } else {
 36+ $tmp = '';
 37+ }
 38+ $parser->static_tag_buf = null;
3539 return $tmp;
3640 } else
3741 // wtf?
38 - die(
 42+ return
3943 "\nCall this extension as <statictag>string</statictag> or as" .
4044 " <statictag action=flush/>, not in any other way.\n" .
4145 "text: " . var_export( $in, true ) . "\n" .
42 - "argv: " . var_export( $argv, true ) . "\n"
43 - );
 46+ "argv: " . var_export( $argv, true ) . "\n";
4447 }
4548
Index: trunk/phase3/includes/parser/LinkHolderArray.php
@@ -1,8 +1,6 @@
22 <?php
33
44 class LinkHolderArray {
5 - var $batchSize = 1000;
6 -
75 var $internals = array(), $interwikis = array();
86 var $size = 0;
97 var $parent;
@@ -12,6 +10,15 @@
1311 }
1412
1513 /**
 14+ * Reduce memory usage to reduce the impact of circular references
 15+ */
 16+ function __destruct() {
 17+ foreach ( $this as $name => $value ) {
 18+ unset( $this->$name );
 19+ }
 20+ }
 21+
 22+ /**
1623 * Merge another LinkHolderArray into this one
1724 */
1825 function merge( $other ) {
@@ -30,7 +37,8 @@
3138 * Returns true if the memory requirements of this object are getting large
3239 */
3340 function isBig() {
34 - return $this->size > $this->batchSize;
 41+ global $wgLinkHolderBatchSize;
 42+ return $this->size > $wgLinkHolderBatchSize;
3543 }
3644
3745 /**
@@ -145,7 +153,7 @@
146154 if ( $title->isAlwaysKnown() ) {
147155 $colours[$pdbk] = '';
148156 } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
149 - $colours[$pdbk] = '';
 157+ $colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
150158 $output->addLink( $title, $id );
151159 } elseif ( $linkCache->isBadLink( $pdbk ) ) {
152160 $colours[$pdbk] = 'new';
@@ -180,6 +188,9 @@
181189 $pdbk = $title->getPrefixedDBkey();
182190 $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect );
183191 $output->addLink( $title, $s->page_id );
 192+ # FIXME: convoluted data flow
 193+ # The redirect status and length is passed to getLinkColour via the LinkCache
 194+ # Use formal parameters instead
184195 $colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
185196 //add id to the extension todolist
186197 $linkcolour_ids[$s->page_id] = $pdbk;
@@ -274,6 +285,9 @@
275286 $entry['pdbk'] = $varPdbk;
276287
277288 // set pdbk and colour
 289+ # FIXME: convoluted data flow
 290+ # The redirect status and length is passed to getLinkColour via the LinkCache
 291+ # Use formal parameters instead
278292 $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold );
279293 $linkcolour_ids[$s->page_id] = $pdbk;
280294 }
Index: trunk/phase3/includes/parser/Parser.php
@@ -92,13 +92,13 @@
9393 # Persistent:
9494 var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
9595 $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex, $mPreprocessor,
96 - $mExtLinkBracketedRegex, $mDefaultStripList, $mVarCache, $mConf;
 96+ $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList, $mVarCache, $mConf;
9797
9898
9999 # Cleared with clearState():
100100 var $mOutput, $mAutonumber, $mDTopen, $mStripState;
101101 var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
102 - var $mInterwikiLinkHolders, $mLinkHolders;
 102+ var $mLinkHolders, $mLinkID;
103103 var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
104104 var $mTplExpandCache; // empty-frame expansion cache
105105 var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
@@ -128,6 +128,7 @@
129129 $this->mFunctionHooks = array();
130130 $this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
131131 $this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery' );
 132+ $this->mUrlProtocols = wfUrlProtocols();
132133 $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
133134 '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S';
134135 $this->mVarCache = array();
@@ -146,6 +147,9 @@
147148 * Reduce memory usage to reduce the impact of circular references
148149 */
149150 function __destruct() {
 151+ if ( isset( $this->mLinkHolders ) ) {
 152+ $this->mLinkHolders->__destruct();
 153+ }
150154 foreach ( $this as $name => $value ) {
151155 unset( $this->$name );
152156 }
@@ -188,17 +192,8 @@
189193 $this->mStripState = new StripState;
190194 $this->mArgStack = false;
191195 $this->mInPre = false;
192 - $this->mInterwikiLinkHolders = array(
193 - 'texts' => array(),
194 - 'titles' => array()
195 - );
196 - $this->mLinkHolders = array(
197 - 'namespaces' => array(),
198 - 'dbkeys' => array(),
199 - 'queries' => array(),
200 - 'texts' => array(),
201 - 'titles' => array()
202 - );
 196+ $this->mLinkHolders = new LinkHolderArray( $this );
 197+ $this->mLinkID = 0;
203198 $this->mRevisionTimestamp = $this->mRevisionId = null;
204199
205200 /**
@@ -213,7 +208,7 @@
214209 */
215210 #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
216211 # Changed to \x7f to allow XML double-parsing -- TS
217 - $this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString();
 212+ $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
218213
219214
220215 # Clear these on every parse, bug 4549
@@ -303,7 +298,7 @@
304299 */
305300
306301 global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang;
307 - $fname = 'Parser::parse-' . wfGetCaller();
 302+ $fname = __METHOD__.'-' . wfGetCaller();
308303 wfProfileIn( __METHOD__ );
309304 wfProfileIn( $fname );
310305
@@ -337,7 +332,6 @@
338333 );
339334 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
340335
341 - # only once and last
342336 $text = $this->doBlockLevels( $text, $linestart );
343337
344338 $this->replaceLinkHolders( $text );
@@ -357,7 +351,7 @@
358352 $uniq_prefix = $this->mUniqPrefix;
359353 $matches = array();
360354 $elements = array_keys( $this->mTransparentTagHooks );
361 - $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
 355+ $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
362356
363357 foreach( $matches as $marker => $data ) {
364358 list( $element, $content, $params, $tag ) = $data;
@@ -375,7 +369,7 @@
376370 $text = Sanitizer::normalizeCharReferences( $text );
377371
378372 if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) {
379 - $text = Parser::tidy($text);
 373+ $text = self::tidy($text);
380374 } else {
381375 # attempt to sanitize at least some nesting problems
382376 # (bug #2702 and quite a few others)
@@ -480,6 +474,8 @@
481475 function &getTitle() { return $this->mTitle; }
482476 function getOptions() { return $this->mOptions; }
483477 function getRevisionId() { return $this->mRevisionId; }
 478+ function getOutput() { return $this->mOutput; }
 479+ function nextLinkID() { return $this->mLinkID++; }
484480
485481 function getFunctionLang() {
486482 global $wgLang, $wgContLang;
@@ -558,7 +554,7 @@
559555 $text = $inside;
560556 $tail = null;
561557 } else {
562 - if( $element == '!--' ) {
 558+ if( $element === '!--' ) {
563559 $end = '/(-->)/';
564560 } else {
565561 $end = "/(<\\/$element\\s*>)/i";
@@ -667,9 +663,9 @@
668664 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
669665 '<head><title>test</title></head><body>'.$text.'</body></html>';
670666 if( $wgTidyInternal ) {
671 - $correctedtext = Parser::internalTidy( $wrappedtext );
 667+ $correctedtext = self::internalTidy( $wrappedtext );
672668 } else {
673 - $correctedtext = Parser::externalTidy( $wrappedtext );
 669+ $correctedtext = self::externalTidy( $wrappedtext );
674670 }
675671 if( is_null( $correctedtext ) ) {
676672 wfDebug( "Tidy error detected!\n" );
@@ -686,8 +682,7 @@
687683 */
688684 function externalTidy( $text ) {
689685 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
690 - $fname = 'Parser::externalTidy';
691 - wfProfileIn( $fname );
 686+ wfProfileIn( __METHOD__ );
692687
693688 $cleansource = '';
694689 $opts = ' -utf8';
@@ -716,7 +711,7 @@
717712 }
718713 }
719714
720 - wfProfileOut( $fname );
 715+ wfProfileOut( __METHOD__ );
721716
722717 if( $cleansource == '' && $text != '') {
723718 // Some kind of error happened, so we couldn't get the corrected text.
@@ -738,8 +733,7 @@
739734 */
740735 function internalTidy( $text ) {
741736 global $wgTidyConf, $IP, $wgDebugTidy;
742 - $fname = 'Parser::internalTidy';
743 - wfProfileIn( $fname );
 737+ wfProfileIn( __METHOD__ );
744738
745739 $tidy = new tidy;
746740 $tidy->parseString( $text, $wgTidyConf, 'utf8' );
@@ -757,7 +751,7 @@
758752 "\n-->";
759753 }
760754
761 - wfProfileOut( $fname );
 755+ wfProfileOut( __METHOD__ );
762756 return $cleansource;
763757 }
764758
@@ -767,34 +761,35 @@
768762 * @private
769763 */
770764 function doTableStuff ( $text ) {
771 - $fname = 'Parser::doTableStuff';
772 - wfProfileIn( $fname );
 765+ wfProfileIn( __METHOD__ );
773766
774 - $lines = explode ( "\n" , $text );
 767+ $lines = StringUtils::explode( "\n", $text );
 768+ $out = '';
775769 $td_history = array (); // Is currently a td tag open?
776770 $last_tag_history = array (); // Save history of last lag activated (td, th or caption)
777771 $tr_history = array (); // Is currently a tr tag open?
778772 $tr_attributes = array (); // history of tr attributes
779773 $has_opened_tr = array(); // Did this table open a <tr> element?
780774 $indent_level = 0; // indent level of the table
781 - foreach ( $lines as $key => $line )
782 - {
783 - $line = trim ( $line );
784775
 776+ foreach ( $lines as $outLine ) {
 777+ $line = trim( $outLine );
 778+
785779 if( $line == '' ) { // empty line, go to next line
 780+ $out .= $outLine."\n";
786781 continue;
787782 }
788 - $first_character = $line{0};
 783+ $first_character = $line[0];
789784 $matches = array();
790785
791 - if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) {
 786+ if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) {
792787 // First check if we are starting a new table
793788 $indent_level = strlen( $matches[1] );
794789
795790 $attributes = $this->mStripState->unstripBoth( $matches[2] );
796791 $attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' );
797792
798 - $lines[$key] = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
 793+ $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
799794 array_push ( $td_history , false );
800795 array_push ( $last_tag_history , '' );
801796 array_push ( $tr_history , false );
@@ -802,8 +797,9 @@
803798 array_push ( $has_opened_tr , false );
804799 } else if ( count ( $td_history ) == 0 ) {
805800 // Don't do any of the following
 801+ $out .= $outLine."\n";
806802 continue;
807 - } else if ( substr ( $line , 0 , 2 ) == '|}' ) {
 803+ } else if ( substr ( $line , 0 , 2 ) === '|}' ) {
808804 // We are ending a table
809805 $line = '</table>' . substr ( $line , 2 );
810806 $last_tag = array_pop ( $last_tag_history );
@@ -820,8 +816,8 @@
821817 $line = "</{$last_tag}>{$line}";
822818 }
823819 array_pop ( $tr_attributes );
824 - $lines[$key] = $line . str_repeat( '</dd></dl>' , $indent_level );
825 - } else if ( substr ( $line , 0 , 2 ) == '|-' ) {
 820+ $outLine = $line . str_repeat( '</dd></dl>' , $indent_level );
 821+ } else if ( substr ( $line , 0 , 2 ) === '|-' ) {
826822 // Now we have a table row
827823 $line = preg_replace( '#^\|-+#', '', $line );
828824
@@ -844,21 +840,21 @@
845841 $line = "</{$last_tag}>{$line}";
846842 }
847843
848 - $lines[$key] = $line;
 844+ $outLine = $line;
849845 array_push ( $tr_history , false );
850846 array_push ( $td_history , false );
851847 array_push ( $last_tag_history , '' );
852848 }
853 - else if ( $first_character == '|' || $first_character == '!' || substr ( $line , 0 , 2 ) == '|+' ) {
 849+ else if ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 ) === '|+' ) {
854850 // This might be cell elements, td, th or captions
855 - if ( substr ( $line , 0 , 2 ) == '|+' ) {
 851+ if ( substr ( $line , 0 , 2 ) === '|+' ) {
856852 $first_character = '+';
857853 $line = substr ( $line , 1 );
858854 }
859855
860856 $line = substr ( $line , 1 );
861857
862 - if ( $first_character == '!' ) {
 858+ if ( $first_character === '!' ) {
863859 $line = str_replace ( '!!' , '||' , $line );
864860 }
865861
@@ -868,13 +864,13 @@
869865 // attribute values containing literal "||".
870866 $cells = StringUtils::explodeMarkup( '||' , $line );
871867
872 - $lines[$key] = '';
 868+ $outLine = '';
873869
874870 // Loop through each table cell
875871 foreach ( $cells as $cell )
876872 {
877873 $previous = '';
878 - if ( $first_character != '+' )
 874+ if ( $first_character !== '+' )
879875 {
880876 $tr_after = array_pop ( $tr_attributes );
881877 if ( !array_pop ( $tr_history ) ) {
@@ -892,11 +888,11 @@
893889 $previous = "</{$last_tag}>{$previous}";
894890 }
895891
896 - if ( $first_character == '|' ) {
 892+ if ( $first_character === '|' ) {
897893 $last_tag = 'td';
898 - } else if ( $first_character == '!' ) {
 894+ } else if ( $first_character === '!' ) {
899895 $last_tag = 'th';
900 - } else if ( $first_character == '+' ) {
 896+ } else if ( $first_character === '+' ) {
901897 $last_tag = 'caption';
902898 } else {
903899 $last_tag = '';
@@ -919,38 +915,42 @@
920916 $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
921917 }
922918
923 - $lines[$key] .= $cell;
 919+ $outLine .= $cell;
924920 array_push ( $td_history , true );
925921 }
926922 }
 923+ $out .= $outLine . "\n";
927924 }
928925
929926 // Closing open td, tr && table
930927 while ( count ( $td_history ) > 0 )
931928 {
932929 if ( array_pop ( $td_history ) ) {
933 - $lines[] = '</td>' ;
 930+ $out .= "</td>\n";
934931 }
935932 if ( array_pop ( $tr_history ) ) {
936 - $lines[] = '</tr>' ;
 933+ $out .= "</tr>\n";
937934 }
938935 if ( !array_pop ( $has_opened_tr ) ) {
939 - $lines[] = "<tr><td></td></tr>" ;
 936+ $out .= "<tr><td></td></tr>\n" ;
940937 }
941938
942 - $lines[] = '</table>' ;
 939+ $out .= "</table>\n";
943940 }
944941
945 - $output = implode ( "\n" , $lines ) ;
 942+ // Remove trailing line-ending (b/c)
 943+ if ( substr( $out, -1 ) === "\n" ) {
 944+ $out = substr( $out, 0, -1 );
 945+ }
946946
947947 // special case: don't return empty table
948 - if( $output == "<table>\n<tr><td></td></tr>\n</table>" ) {
949 - $output = '';
 948+ if( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
 949+ $out = '';
950950 }
951951
952 - wfProfileOut( $fname );
 952+ wfProfileOut( __METHOD__ );
953953
954 - return $output;
 954+ return $out;
955955 }
956956
957957 /**
@@ -961,12 +961,11 @@
962962 */
963963 function internalParse( $text ) {
964964 $isMain = true;
965 - $fname = 'Parser::internalParse';
966 - wfProfileIn( $fname );
 965+ wfProfileIn( __METHOD__ );
967966
968967 # Hook to suspend the parser in this state
969968 if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
970 - wfProfileOut( $fname );
 969+ wfProfileOut( __METHOD__ );
971970 return $text ;
972971 }
973972
@@ -999,84 +998,146 @@
1000999 $text = $this->doMagicLinks( $text );
10011000 $text = $this->formatHeadings( $text, $isMain );
10021001
1003 - wfProfileOut( $fname );
 1002+ wfProfileOut( __METHOD__ );
10041003 return $text;
10051004 }
10061005
10071006 /**
10081007 * Replace special strings like "ISBN xxx" and "RFC xxx" with
10091008 * magic external links.
1010 - *
 1009+ *
 1010+ * DML
10111011 * @private
10121012 */
10131013 function doMagicLinks( $text ) {
10141014 wfProfileIn( __METHOD__ );
 1015+ $prots = $this->mUrlProtocols;
 1016+ $urlChar = self::EXT_LINK_URL_CLASS;
10151017 $text = preg_replace_callback(
10161018 '!(?: # Start cases
1017 - <a.*?</a> | # Skip link text
1018 - <.*?> | # Skip stuff inside HTML elements
1019 - (?:RFC|PMID)\s+([0-9]+) | # RFC or PMID, capture number as m[1]
1020 - ISBN\s+(\b # ISBN, capture number as m[2]
1021 - (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
1022 - (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
1023 - [0-9Xx] # check digit
1024 - \b)
 1019+ (<a.*?</a>) | # m[1]: Skip link text
 1020+ (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
 1021+ (\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . '
 1022+ (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
 1023+ ISBN\s+(\b # m[5]: ISBN, capture number
 1024+ (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
 1025+ (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
 1026+ [0-9Xx] # check digit
 1027+ \b)
10251028 )!x', array( &$this, 'magicLinkCallback' ), $text );
10261029 wfProfileOut( __METHOD__ );
10271030 return $text;
10281031 }
10291032
10301033 function magicLinkCallback( $m ) {
1031 - if ( substr( $m[0], 0, 1 ) == '<' ) {
 1034+ if ( isset( $m[1] ) && strval( $m[1] ) !== '' ) {
 1035+ # Skip anchor
 1036+ return $m[0];
 1037+ } elseif ( isset( $m[2] ) && strval( $m[2] ) !== '' ) {
10321038 # Skip HTML element
10331039 return $m[0];
1034 - } elseif ( substr( $m[0], 0, 4 ) == 'ISBN' ) {
1035 - $isbn = $m[2];
1036 - $num = strtr( $isbn, array(
1037 - '-' => '',
1038 - ' ' => '',
1039 - 'x' => 'X',
1040 - ));
1041 - $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
1042 - $text = '<a href="' .
1043 - $titleObj->escapeLocalUrl() .
1044 - "\" class=\"internal\">ISBN $isbn</a>";
1045 - } else {
1046 - if ( substr( $m[0], 0, 3 ) == 'RFC' ) {
 1040+ } elseif ( isset( $m[3] ) && strval( $m[3] ) !== '' ) {
 1041+ # Free external link
 1042+ return $this->makeFreeExternalLink( $m[0] );
 1043+ } elseif ( isset( $m[4] ) && strval( $m[4] ) !== '' ) {
 1044+ # RFC or PMID
 1045+ if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
10471046 $keyword = 'RFC';
10481047 $urlmsg = 'rfcurl';
1049 - $id = $m[1];
1050 - } elseif ( substr( $m[0], 0, 4 ) == 'PMID' ) {
 1048+ $id = $m[4];
 1049+ } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
10511050 $keyword = 'PMID';
10521051 $urlmsg = 'pubmedurl';
1053 - $id = $m[1];
 1052+ $id = $m[4];
10541053 } else {
10551054 throw new MWException( __METHOD__.': unrecognised match type "' .
10561055 substr($m[0], 0, 20 ) . '"' );
10571056 }
1058 -
10591057 $url = wfMsg( $urlmsg, $id);
10601058 $sk = $this->mOptions->getSkin();
10611059 $la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
1062 - $text = "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
 1060+ return "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
 1061+ } elseif ( isset( $m[5] ) && strval( $m[5] ) !== '' ) {
 1062+ # ISBN
 1063+ $isbn = $m[5];
 1064+ $num = strtr( $isbn, array(
 1065+ '-' => '',
 1066+ ' ' => '',
 1067+ 'x' => 'X',
 1068+ ));
 1069+ $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
 1070+ return'<a href="' .
 1071+ $titleObj->escapeLocalUrl() .
 1072+ "\" class=\"internal\">ISBN $isbn</a>";
 1073+ } else {
 1074+ return $m[0];
10631075 }
1064 - return $text;
10651076 }
10661077
10671078 /**
 1079+ * Make a free external link, given a user-supplied URL
 1080+ * @return HTML
 1081+ * @private
 1082+ */
 1083+ function makeFreeExternalLink( $url ) {
 1084+ global $wgContLang;
 1085+ wfProfileIn( __METHOD__ );
 1086+
 1087+ $sk = $this->mOptions->getSkin();
 1088+ $trail = '';
 1089+
 1090+ # The characters '<' and '>' (which were escaped by
 1091+ # removeHTMLtags()) should not be included in
 1092+ # URLs, per RFC 2396.
 1093+ $m2 = array();
 1094+ if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
 1095+ $trail = substr($url, $m2[0][1]) . $trail;
 1096+ $url = substr($url, 0, $m2[0][1]);
 1097+ }
 1098+
 1099+ # Move trailing punctuation to $trail
 1100+ $sep = ',;\.:!?';
 1101+ # If there is no left bracket, then consider right brackets fair game too
 1102+ if ( strpos( $url, '(' ) === false ) {
 1103+ $sep .= ')';
 1104+ }
 1105+
 1106+ $numSepChars = strspn( strrev( $url ), $sep );
 1107+ if ( $numSepChars ) {
 1108+ $trail = substr( $url, -$numSepChars ) . $trail;
 1109+ $url = substr( $url, 0, -$numSepChars );
 1110+ }
 1111+
 1112+ $url = Sanitizer::cleanUrl( $url );
 1113+
 1114+ # Is this an external image?
 1115+ $text = $this->maybeMakeExternalImage( $url );
 1116+ if ( $text === false ) {
 1117+ # Not an image, make a link
 1118+ $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() );
 1119+ # Register it in the output object...
 1120+ # Replace unnecessary URL escape codes with their equivalent characters
 1121+ $pasteurized = self::replaceUnusualEscapes( $url );
 1122+ $this->mOutput->addExternalLink( $pasteurized );
 1123+ }
 1124+ wfProfileOut( __METHOD__ );
 1125+ return $text . $trail;
 1126+ }
 1127+
 1128+
 1129+ /**
10681130 * Parse headers and return html
10691131 *
10701132 * @private
10711133 */
10721134 function doHeadings( $text ) {
1073 - $fname = 'Parser::doHeadings';
1074 - wfProfileIn( $fname );
 1135+ wfProfileIn( __METHOD__ );
10751136 for ( $i = 6; $i >= 1; --$i ) {
10761137 $h = str_repeat( '=', $i );
10771138 $text = preg_replace( "/^$h(.+)$h\\s*$/m",
10781139 "<h$i>\\1</h$i>", $text );
10791140 }
1080 - wfProfileOut( $fname );
 1141+ wfProfileOut( __METHOD__ );
10811142 return $text;
10821143 }
10831144
@@ -1086,15 +1147,14 @@
10871148 * @return string the altered text
10881149 */
10891150 function doAllQuotes( $text ) {
1090 - $fname = 'Parser::doAllQuotes';
1091 - wfProfileIn( $fname );
 1151+ wfProfileIn( __METHOD__ );
10921152 $outtext = '';
1093 - $lines = explode( "\n", $text );
 1153+ $lines = StringUtils::explode( "\n", $text );
10941154 foreach ( $lines as $line ) {
1095 - $outtext .= $this->doQuotes ( $line ) . "\n";
 1155+ $outtext .= $this->doQuotes( $line ) . "\n";
10961156 }
10971157 $outtext = substr($outtext, 0,-1);
1098 - wfProfileOut( $fname );
 1158+ wfProfileOut( __METHOD__ );
10991159 return $outtext;
11001160 }
11011161
@@ -1156,9 +1216,9 @@
11571217 {
11581218 $x1 = substr ($arr[$i-1], -1);
11591219 $x2 = substr ($arr[$i-1], -2, 1);
1160 - if ($x1 == ' ') {
 1220+ if ($x1 === ' ') {
11611221 if ($firstspace == -1) $firstspace = $i;
1162 - } else if ($x2 == ' ') {
 1222+ } else if ($x2 === ' ') {
11631223 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
11641224 } else {
11651225 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
@@ -1198,7 +1258,7 @@
11991259 {
12001260 if (($i % 2) == 0)
12011261 {
1202 - if ($state == 'both')
 1262+ if ($state === 'both')
12031263 $buffer .= $r;
12041264 else
12051265 $output .= $r;
@@ -1207,41 +1267,41 @@
12081268 {
12091269 if (strlen ($r) == 2)
12101270 {
1211 - if ($state == 'i')
 1271+ if ($state === 'i')
12121272 { $output .= '</i>'; $state = ''; }
1213 - else if ($state == 'bi')
 1273+ else if ($state === 'bi')
12141274 { $output .= '</i>'; $state = 'b'; }
1215 - else if ($state == 'ib')
 1275+ else if ($state === 'ib')
12161276 { $output .= '</b></i><b>'; $state = 'b'; }
1217 - else if ($state == 'both')
 1277+ else if ($state === 'both')
12181278 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
12191279 else # $state can be 'b' or ''
12201280 { $output .= '<i>'; $state .= 'i'; }
12211281 }
12221282 else if (strlen ($r) == 3)
12231283 {
1224 - if ($state == 'b')
 1284+ if ($state === 'b')
12251285 { $output .= '</b>'; $state = ''; }
1226 - else if ($state == 'bi')
 1286+ else if ($state === 'bi')
12271287 { $output .= '</i></b><i>'; $state = 'i'; }
1228 - else if ($state == 'ib')
 1288+ else if ($state === 'ib')
12291289 { $output .= '</b>'; $state = 'i'; }
1230 - else if ($state == 'both')
 1290+ else if ($state === 'both')
12311291 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
12321292 else # $state can be 'i' or ''
12331293 { $output .= '<b>'; $state .= 'b'; }
12341294 }
12351295 else if (strlen ($r) == 5)
12361296 {
1237 - if ($state == 'b')
 1297+ if ($state === 'b')
12381298 { $output .= '</b><i>'; $state = 'i'; }
1239 - else if ($state == 'i')
 1299+ else if ($state === 'i')
12401300 { $output .= '</i><b>'; $state = 'b'; }
1241 - else if ($state == 'bi')
 1301+ else if ($state === 'bi')
12421302 { $output .= '</i></b>'; $state = ''; }
1243 - else if ($state == 'ib')
 1303+ else if ($state === 'ib')
12441304 { $output .= '</b></i>'; $state = ''; }
1245 - else if ($state == 'both')
 1305+ else if ($state === 'both')
12461306 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
12471307 else # ($state == '')
12481308 { $buffer = ''; $state = 'both'; }
@@ -1250,21 +1310,21 @@
12511311 $i++;
12521312 }
12531313 # Now close all remaining tags. Notice that the order is important.
1254 - if ($state == 'b' || $state == 'ib')
 1314+ if ($state === 'b' || $state === 'ib')
12551315 $output .= '</b>';
1256 - if ($state == 'i' || $state == 'bi' || $state == 'ib')
 1316+ if ($state === 'i' || $state === 'bi' || $state === 'ib')
12571317 $output .= '</i>';
1258 - if ($state == 'bi')
 1318+ if ($state === 'bi')
12591319 $output .= '</b>';
12601320 # There might be lonely ''''', so make sure we have a buffer
1261 - if ($state == 'both' && $buffer)
 1321+ if ($state === 'both' && $buffer)
12621322 $output .= '<b><i>'.$buffer.'</i></b>';
12631323 return $output;
12641324 }
12651325 }
12661326
12671327 /**
1268 - * Replace external links
 1328+ * Replace external links (REL)
12691329 *
12701330 * Note: this is all very hackish and the order of execution matters a lot.
12711331 * Make sure to run maintenance/parserTests.php if you change this code.
@@ -1273,15 +1333,13 @@
12741334 */
12751335 function replaceExternalLinks( $text ) {
12761336 global $wgContLang;
1277 - $fname = 'Parser::replaceExternalLinks';
1278 - wfProfileIn( $fname );
 1337+ wfProfileIn( __METHOD__ );
12791338
12801339 $sk = $this->mOptions->getSkin();
12811340
12821341 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 1342+ $s = array_shift( $bits );
12831343
1284 - $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
1285 -
12861344 $i = 0;
12871345 while ( $i<count( $bits ) ) {
12881346 $url = $bits[$i++];
@@ -1308,7 +1366,7 @@
13091367 $dtrail = '';
13101368
13111369 # Set linktype for CSS - if URL==text, link is essentially free
1312 - $linktype = ($text == $url) ? 'free' : 'text';
 1370+ $linktype = ($text === $url) ? 'free' : 'text';
13131371
13141372 # No link text, e.g. [http://domain.tld/some.link]
13151373 if ( $text == '' ) {
@@ -1331,10 +1389,6 @@
13321390
13331391 $url = Sanitizer::cleanUrl( $url );
13341392
1335 - # Process the trail (i.e. everything after this link up until start of the next link),
1336 - # replacing any non-bracketed links
1337 - $trail = $this->replaceFreeExternalLinks( $trail );
1338 -
13391393 # Use the encoded URL
13401394 # This means that users can paste URLs directly into the text
13411395 # Funny characters like &ouml; aren't valid in URLs anyway
@@ -1344,96 +1398,15 @@
13451399 # Register link in the output object.
13461400 # Replace unnecessary URL escape codes with the referenced character
13471401 # This prevents spammers from hiding links from the filters
1348 - $pasteurized = Parser::replaceUnusualEscapes( $url );
 1402+ $pasteurized = self::replaceUnusualEscapes( $url );
13491403 $this->mOutput->addExternalLink( $pasteurized );
13501404 }
13511405
1352 - wfProfileOut( $fname );
 1406+ wfProfileOut( __METHOD__ );
13531407 return $s;
13541408 }
13551409
13561410 /**
1357 - * Replace anything that looks like a URL with a link
1358 - * @private
1359 - */
1360 - function replaceFreeExternalLinks( $text ) {
1361 - global $wgContLang;
1362 - $fname = 'Parser::replaceFreeExternalLinks';
1363 - wfProfileIn( $fname );
1364 -
1365 - $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1366 - $s = array_shift( $bits );
1367 - $i = 0;
1368 -
1369 - $sk = $this->mOptions->getSkin();
1370 -
1371 - while ( $i < count( $bits ) ){
1372 - $protocol = $bits[$i++];
1373 - $remainder = $bits[$i++];
1374 -
1375 - $m = array();
1376 - if ( preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1377 - # Found some characters after the protocol that look promising
1378 - $url = $protocol . $m[1];
1379 - $trail = $m[2];
1380 -
1381 - # special case: handle urls as url args:
1382 - # http://www.example.com/foo?=http://www.example.com/bar
1383 - if(strlen($trail) == 0 &&
1384 - isset($bits[$i]) &&
1385 - preg_match('/^'. wfUrlProtocols() . '$/S', $bits[$i]) &&
1386 - preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m ))
1387 - {
1388 - # add protocol, arg
1389 - $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link
1390 - $i += 2;
1391 - $trail = $m[2];
1392 - }
1393 -
1394 - # The characters '<' and '>' (which were escaped by
1395 - # removeHTMLtags()) should not be included in
1396 - # URLs, per RFC 2396.
1397 - $m2 = array();
1398 - if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
1399 - $trail = substr($url, $m2[0][1]) . $trail;
1400 - $url = substr($url, 0, $m2[0][1]);
1401 - }
1402 -
1403 - # Move trailing punctuation to $trail
1404 - $sep = ',;\.:!?';
1405 - # If there is no left bracket, then consider right brackets fair game too
1406 - if ( strpos( $url, '(' ) === false ) {
1407 - $sep .= ')';
1408 - }
1409 -
1410 - $numSepChars = strspn( strrev( $url ), $sep );
1411 - if ( $numSepChars ) {
1412 - $trail = substr( $url, -$numSepChars ) . $trail;
1413 - $url = substr( $url, 0, -$numSepChars );
1414 - }
1415 -
1416 - $url = Sanitizer::cleanUrl( $url );
1417 -
1418 - # Is this an external image?
1419 - $text = $this->maybeMakeExternalImage( $url );
1420 - if ( $text === false ) {
1421 - # Not an image, make a link
1422 - $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() );
1423 - # Register it in the output object...
1424 - # Replace unnecessary URL escape codes with their equivalent characters
1425 - $pasteurized = Parser::replaceUnusualEscapes( $url );
1426 - $this->mOutput->addExternalLink( $pasteurized );
1427 - }
1428 - $s .= $text . $trail;
1429 - } else {
1430 - $s .= $protocol . $remainder;
1431 - }
1432 - }
1433 - wfProfileOut( $fname );
1434 - return $s;
1435 - }
1436 -
1437 - /**
14381411 * Replace unusual URL escape codes with their equivalent characters
14391412 * @param string
14401413 * @return string
@@ -1445,7 +1418,7 @@
14461419 */
14471420 static function replaceUnusualEscapes( $url ) {
14481421 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
1449 - array( 'Parser', 'replaceUnusualEscapesCallback' ), $url );
 1422+ array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
14501423 }
14511424
14521425 /**
@@ -1489,35 +1462,48 @@
14901463
14911464 /**
14921465 * Process [[ ]] wikilinks
 1466+ * @return processed text
14931467 *
14941468 * @private
14951469 */
14961470 function replaceInternalLinks( $s ) {
 1471+ $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
 1472+ return $s;
 1473+ }
 1474+
 1475+ /**
 1476+ * Process [[ ]] wikilinks (RIL)
 1477+ * @return LinkHolderArray
 1478+ *
 1479+ * @private
 1480+ */
 1481+ function replaceInternalLinks2( &$s ) {
14971482 global $wgContLang;
1498 - static $fname = 'Parser::replaceInternalLinks' ;
14991483
1500 - wfProfileIn( $fname );
 1484+ wfProfileIn( __METHOD__ );
15011485
1502 - wfProfileIn( $fname.'-setup' );
1503 - static $tc = FALSE;
 1486+ wfProfileIn( __METHOD__.'-setup' );
 1487+ static $tc = FALSE, $e1, $e1_img;
15041488 # the % is needed to support urlencoded titles as well
1505 - if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
 1489+ if ( !$tc ) {
 1490+ $tc = Title::legalChars() . '#%';
 1491+ # Match a link having the form [[namespace:link|alternate]]trail
 1492+ $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
 1493+ # Match cases where there is no "]]", which might still be images
 1494+ $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
 1495+ }
15061496
15071497 $sk = $this->mOptions->getSkin();
 1498+ $holders = new LinkHolderArray( $this );
15081499
15091500 #split the entire text string on occurences of [[
1510 - $a = explode( '[[', ' ' . $s );
 1501+ $a = StringUtils::explode( '[[', ' ' . $s );
15111502 #get the first element (all text up to first [[), and remove the space we added
1512 - $s = array_shift( $a );
 1503+ $s = $a->current();
 1504+ $a->next();
 1505+ $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
15131506 $s = substr( $s, 1 );
15141507
1515 - # Match a link having the form [[namespace:link|alternate]]trail
1516 - static $e1 = FALSE;
1517 - if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; }
1518 - # Match cases where there is no "]]", which might still be images
1519 - static $e1_img = FALSE;
1520 - if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; }
1521 -
15221508 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
15231509 $e2 = null;
15241510 if ( $useLinkPrefixExtension ) {
@@ -1527,8 +1513,8 @@
15281514 }
15291515
15301516 if( is_null( $this->mTitle ) ) {
1531 - wfProfileOut( $fname );
1532 - wfProfileOut( $fname.'-setup' );
 1517+ wfProfileOut( __METHOD__.'-setup' );
 1518+ wfProfileOut( __METHOD__ );
15331519 throw new MWException( __METHOD__.": \$this->mTitle is null\n" );
15341520 }
15351521 $nottalk = !$this->mTitle->isTalkPage();
@@ -1550,13 +1536,20 @@
15511537 $selflink = array($this->mTitle->getPrefixedText());
15521538 }
15531539 $useSubpages = $this->areSubpagesAllowed();
1554 - wfProfileOut( $fname.'-setup' );
 1540+ wfProfileOut( __METHOD__.'-setup' );
15551541
15561542 # Loop for each link
1557 - for ($k = 0; isset( $a[$k] ); $k++) {
1558 - $line = $a[$k];
 1543+ for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) {
 1544+ # Check for excessive memory usage
 1545+ if ( $holders->isBig() ) {
 1546+ # Too big
 1547+ # Do the existence check, replace the link holders and clear the array
 1548+ $holders->replace( $s );
 1549+ $holders->clear();
 1550+ }
 1551+
15591552 if ( $useLinkPrefixExtension ) {
1560 - wfProfileIn( $fname.'-prefixhandling' );
 1553+ wfProfileIn( __METHOD__.'-prefixhandling' );
15611554 if ( preg_match( $e2, $s, $m ) ) {
15621555 $prefix = $m[2];
15631556 $s = $m[1];
@@ -1568,12 +1561,12 @@
15691562 $prefix = $first_prefix;
15701563 $first_prefix = false;
15711564 }
1572 - wfProfileOut( $fname.'-prefixhandling' );
 1565+ wfProfileOut( __METHOD__.'-prefixhandling' );
15731566 }
15741567
15751568 $might_be_img = false;
15761569
1577 - wfProfileIn( "$fname-e1" );
 1570+ wfProfileIn( __METHOD__."-e1" );
15781571 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
15791572 $text = $m[2];
15801573 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
@@ -1607,18 +1600,18 @@
16081601 $trail = "";
16091602 } else { # Invalid form; output directly
16101603 $s .= $prefix . '[[' . $line ;
1611 - wfProfileOut( "$fname-e1" );
 1604+ wfProfileOut( __METHOD__."-e1" );
16121605 continue;
16131606 }
1614 - wfProfileOut( "$fname-e1" );
1615 - wfProfileIn( "$fname-misc" );
 1607+ wfProfileOut( __METHOD__."-e1" );
 1608+ wfProfileIn( __METHOD__."-misc" );
16161609
16171610 # Don't allow internal links to pages containing
16181611 # PROTO: where PROTO is a valid URL protocol; these
16191612 # should be external links.
16201613 if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) {
16211614 $s .= $prefix . '[[' . $line ;
1622 - wfProfileOut( "$fname-misc" );
 1615+ wfProfileOut( __METHOD__."-misc" );
16231616 continue;
16241617 }
16251618
@@ -1629,33 +1622,36 @@
16301623 $link = $m[1];
16311624 }
16321625
1633 - $noforce = (substr($m[1], 0, 1) != ':');
 1626+ $noforce = (substr($m[1], 0, 1) !== ':');
16341627 if (!$noforce) {
16351628 # Strip off leading ':'
16361629 $link = substr($link, 1);
16371630 }
16381631
1639 - wfProfileOut( "$fname-misc" );
1640 - wfProfileIn( "$fname-title" );
 1632+ wfProfileOut( __METHOD__."-misc" );
 1633+ wfProfileIn( __METHOD__."-title" );
16411634 $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) );
16421635 if( !$nt ) {
16431636 $s .= $prefix . '[[' . $line;
1644 - wfProfileOut( "$fname-title" );
 1637+ wfProfileOut( __METHOD__."-title" );
16451638 continue;
16461639 }
16471640
16481641 $ns = $nt->getNamespace();
16491642 $iw = $nt->getInterWiki();
1650 - wfProfileOut( "$fname-title" );
 1643+ wfProfileOut( __METHOD__."-title" );
16511644
16521645 if ($might_be_img) { # if this is actually an invalid link
1653 - wfProfileIn( "$fname-might_be_img" );
 1646+ wfProfileIn( __METHOD__."-might_be_img" );
16541647 if ($ns == NS_IMAGE && $noforce) { #but might be an image
16551648 $found = false;
1656 - while (isset ($a[$k+1]) ) {
 1649+ while ( true ) {
16571650 #look at the next 'line' to see if we can close it there
1658 - $spliced = array_splice( $a, $k + 1, 1 );
1659 - $next_line = array_shift( $spliced );
 1651+ $a->next();
 1652+ $next_line = $a->current();
 1653+ if ( $next_line === false || $next_line === null ) {
 1654+ break;
 1655+ }
16601656 $m = explode( ']]', $next_line, 3 );
16611657 if ( count( $m ) == 3 ) {
16621658 # the first ]] closes the inner link, the second the image
@@ -1675,19 +1671,19 @@
16761672 if ( !$found ) {
16771673 # we couldn't find the end of this imageLink, so output it raw
16781674 #but don't ignore what might be perfectly normal links in the text we've examined
1679 - $text = $this->replaceInternalLinks($text);
 1675+ $holders->merge( $this->replaceInternalLinks2( $text ) );
16801676 $s .= "{$prefix}[[$link|$text";
16811677 # note: no $trail, because without an end, there *is* no trail
1682 - wfProfileOut( "$fname-might_be_img" );
 1678+ wfProfileOut( __METHOD__."-might_be_img" );
16831679 continue;
16841680 }
16851681 } else { #it's not an image, so output it raw
16861682 $s .= "{$prefix}[[$link|$text";
16871683 # note: no $trail, because without an end, there *is* no trail
1688 - wfProfileOut( "$fname-might_be_img" );
 1684+ wfProfileOut( __METHOD__."-might_be_img" );
16891685 continue;
16901686 }
1691 - wfProfileOut( "$fname-might_be_img" );
 1687+ wfProfileOut( __METHOD__."-might_be_img" );
16921688 }
16931689
16941690 $wasblank = ( '' == $text );
@@ -1697,41 +1693,36 @@
16981694 if( $noforce ) {
16991695
17001696 # Interwikis
1701 - wfProfileIn( "$fname-interwiki" );
 1697+ wfProfileIn( __METHOD__."-interwiki" );
17021698 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
17031699 $this->mOutput->addLanguageLink( $nt->getFullText() );
17041700 $s = rtrim($s . $prefix);
17051701 $s .= trim($trail, "\n") == '' ? '': $prefix . $trail;
1706 - wfProfileOut( "$fname-interwiki" );
 1702+ wfProfileOut( __METHOD__."-interwiki" );
17071703 continue;
17081704 }
1709 - wfProfileOut( "$fname-interwiki" );
 1705+ wfProfileOut( __METHOD__."-interwiki" );
17101706
17111707 if ( $ns == NS_IMAGE ) {
1712 - wfProfileIn( "$fname-image" );
 1708+ wfProfileIn( __METHOD__."-image" );
17131709 if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
17141710 # recursively parse links inside the image caption
17151711 # actually, this will parse them in any other parameters, too,
17161712 # but it might be hard to fix that, and it doesn't matter ATM
17171713 $text = $this->replaceExternalLinks($text);
1718 - $text = $this->replaceInternalLinks($text);
 1714+ $holders->merge( $this->replaceInternalLinks2( $text ) );
17191715
17201716 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
1721 - $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail;
1722 - $this->mOutput->addImage( $nt->getDBkey() );
1723 -
1724 - wfProfileOut( "$fname-image" );
1725 - continue;
1726 - } else {
1727 - # We still need to record the image's presence on the page
1728 - $this->mOutput->addImage( $nt->getDBkey() );
 1717+ $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail;
17291718 }
1730 - wfProfileOut( "$fname-image" );
 1719+ $this->mOutput->addImage( $nt->getDBkey() );
 1720+ wfProfileOut( __METHOD__."-image" );
 1721+ continue;
17311722
17321723 }
17331724
17341725 if ( $ns == NS_CATEGORY ) {
1735 - wfProfileIn( "$fname-category" );
 1726+ wfProfileIn( __METHOD__."-category" );
17361727 $s = rtrim($s . "\n"); # bug 87
17371728
17381729 if ( $wasblank ) {
@@ -1750,7 +1741,7 @@
17511742 */
17521743 $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail;
17531744
1754 - wfProfileOut( "$fname-category" );
 1745+ wfProfileOut( __METHOD__."-category" );
17551746 continue;
17561747 }
17571748 }
@@ -1781,7 +1772,7 @@
17821773 if( SpecialPage::exists( $nt->getDBkey() ) ) {
17831774 $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix );
17841775 } else {
1785 - $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix );
 1776+ $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix );
17861777 }
17871778 continue;
17881779 } elseif( $ns == NS_IMAGE ) {
@@ -1795,10 +1786,10 @@
17961787 continue;
17971788 }
17981789 }
1799 - $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix );
 1790+ $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix );
18001791 }
1801 - wfProfileOut( $fname );
1802 - return $s;
 1792+ wfProfileOut( __METHOD__ );
 1793+ return $holders;
18031794 }
18041795
18051796 /**
@@ -1807,32 +1798,10 @@
18081799 * parsing of interwiki links, and secondly to allow all existence checks and
18091800 * article length checks (for stub links) to be bundled into a single query.
18101801 *
 1802+ * @deprecated
18111803 */
18121804 function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
1813 - wfProfileIn( __METHOD__ );
1814 - if ( ! is_object($nt) ) {
1815 - # Fail gracefully
1816 - $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
1817 - } else {
1818 - # Separate the link trail from the rest of the link
1819 - list( $inside, $trail ) = Linker::splitTrail( $trail );
1820 -
1821 - if ( $nt->isExternal() ) {
1822 - $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside );
1823 - $this->mInterwikiLinkHolders['titles'][] = $nt;
1824 - $retVal = '<!--IWLINK '. ($nr-1) ."-->{$trail}";
1825 - } else {
1826 - $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() );
1827 - $this->mLinkHolders['dbkeys'][] = $nt->getDBkey();
1828 - $this->mLinkHolders['queries'][] = $query;
1829 - $this->mLinkHolders['texts'][] = $prefix.$text.$inside;
1830 - $this->mLinkHolders['titles'][] = $nt;
1831 -
1832 - $retVal = '<!--LINK '. ($nr-1) ."-->{$trail}";
1833 - }
1834 - }
1835 - wfProfileOut( __METHOD__ );
1836 - return $retVal;
 1805+ return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix );
18371806 }
18381807
18391808 /**
@@ -1860,11 +1829,9 @@
18611830 * Insert a NOPARSE hacky thing into any inline links in a chunk that's
18621831 * going to go through further parsing steps before inline URL expansion.
18631832 *
1864 - * In particular this is important when using action=render, which causes
1865 - * full URLs to be included.
 1833+ * Not needed quite as much as it used to be since free links are a bit
 1834+ * more sensible these days. But bracketed links are still an issue.
18661835 *
1867 - * Oh man I hate our multi-layer parser!
1868 - *
18691836 * @param string more-or-less HTML
18701837 * @return string less-or-more HTML with NOPARSE bits
18711838 */
@@ -1898,8 +1865,7 @@
18991866 # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage
19001867 # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage
19011868
1902 - $fname = 'Parser::maybeDoSubpageLink';
1903 - wfProfileIn( $fname );
 1869+ wfProfileIn( __METHOD__ );
19041870 $ret = $target; # default return value is no change
19051871
19061872 # Some namespaces don't allow subpages,
@@ -1915,7 +1881,7 @@
19161882 # bug 7425
19171883 $target = trim( $target );
19181884 # Look at the first character
1919 - if( $target != '' && $target{0} == '/' ) {
 1885+ if( $target != '' && $target{0} === '/' ) {
19201886 # / at end means we don't want the slash to be shown
19211887 $m = array();
19221888 $trailingSlashes = preg_match_all( '%(/+)$%', $target, $m );
@@ -1942,7 +1908,7 @@
19431909 if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page
19441910 $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) );
19451911 # / at the end means don't show full path
1946 - if( substr( $nodotdot, -1, 1 ) == '/' ) {
 1912+ if( substr( $nodotdot, -1, 1 ) === '/' ) {
19471913 $nodotdot = substr( $nodotdot, 0, -1 );
19481914 if( '' === $text ) {
19491915 $text = $nodotdot . $suffix;
@@ -1958,7 +1924,7 @@
19591925 }
19601926 }
19611927
1962 - wfProfileOut( $fname );
 1928+ wfProfileOut( __METHOD__ );
19631929 return $ret;
19641930 }
19651931
@@ -1994,10 +1960,10 @@
19951961 /* private */ function openList( $char ) {
19961962 $result = $this->closeParagraph();
19971963
1998 - if ( '*' == $char ) { $result .= '<ul><li>'; }
1999 - else if ( '#' == $char ) { $result .= '<ol><li>'; }
2000 - else if ( ':' == $char ) { $result .= '<dl><dd>'; }
2001 - else if ( ';' == $char ) {
 1964+ if ( '*' === $char ) { $result .= '<ul><li>'; }
 1965+ else if ( '#' === $char ) { $result .= '<ol><li>'; }
 1966+ else if ( ':' === $char ) { $result .= '<dl><dd>'; }
 1967+ else if ( ';' === $char ) {
20021968 $result .= '<dl><dt>';
20031969 $this->mDTopen = true;
20041970 }
@@ -2007,11 +1973,11 @@
20081974 }
20091975
20101976 /* private */ function nextItem( $char ) {
2011 - if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
2012 - else if ( ':' == $char || ';' == $char ) {
 1977+ if ( '*' === $char || '#' === $char ) { return '</li><li>'; }
 1978+ else if ( ':' === $char || ';' === $char ) {
20131979 $close = '</dd>';
20141980 if ( $this->mDTopen ) { $close = '</dt>'; }
2015 - if ( ';' == $char ) {
 1981+ if ( ';' === $char ) {
20161982 $this->mDTopen = true;
20171983 return $close . '<dt>';
20181984 } else {
@@ -2023,9 +1989,9 @@
20241990 }
20251991
20261992 /* private */ function closeList( $char ) {
2027 - if ( '*' == $char ) { $text = '</li></ul>'; }
2028 - else if ( '#' == $char ) { $text = '</li></ol>'; }
2029 - else if ( ':' == $char ) {
 1993+ if ( '*' === $char ) { $text = '</li></ul>'; }
 1994+ else if ( '#' === $char ) { $text = '</li></ol>'; }
 1995+ else if ( ':' === $char ) {
20301996 if ( $this->mDTopen ) {
20311997 $this->mDTopen = false;
20321998 $text = '</dt></dl>';
@@ -2039,56 +2005,59 @@
20402006 /**#@-*/
20412007
20422008 /**
2043 - * Make lists from lines starting with ':', '*', '#', etc.
 2009+ * Make lists from lines starting with ':', '*', '#', etc. (DBL)
20442010 *
20452011 * @private
20462012 * @return string the lists rendered as HTML
20472013 */
20482014 function doBlockLevels( $text, $linestart ) {
2049 - $fname = 'Parser::doBlockLevels';
2050 - wfProfileIn( $fname );
 2015+ wfProfileIn( __METHOD__ );
20512016
20522017 # Parsing through the text line by line. The main thing
20532018 # happening here is handling of block-level elements p, pre,
20542019 # and making lists from lines starting with * # : etc.
20552020 #
2056 - $textLines = explode( "\n", $text );
 2021+ $textLines = StringUtils::explode( "\n", $text );
20572022
20582023 $lastPrefix = $output = '';
20592024 $this->mDTopen = $inBlockElem = false;
20602025 $prefixLength = 0;
20612026 $paragraphStack = false;
20622027
2063 - if ( !$linestart ) {
2064 - $output .= array_shift( $textLines );
2065 - }
20662028 foreach ( $textLines as $oLine ) {
 2029+ # Fix up $linestart
 2030+ if ( !$linestart ) {
 2031+ $output .= $oLine;
 2032+ $linestart = true;
 2033+ continue;
 2034+ }
 2035+
20672036 $lastPrefixLength = strlen( $lastPrefix );
20682037 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
20692038 $preOpenMatch = preg_match('/<pre/i', $oLine );
20702039 if ( !$this->mInPre ) {
20712040 # Multiple prefixes may abut each other for nested lists.
20722041 $prefixLength = strspn( $oLine, '*#:;' );
2073 - $pref = substr( $oLine, 0, $prefixLength );
 2042+ $prefix = substr( $oLine, 0, $prefixLength );
20742043
20752044 # eh?
2076 - $pref2 = str_replace( ';', ':', $pref );
 2045+ $prefix2 = str_replace( ';', ':', $prefix );
20772046 $t = substr( $oLine, $prefixLength );
2078 - $this->mInPre = !empty($preOpenMatch);
 2047+ $this->mInPre = (bool)$preOpenMatch;
20792048 } else {
20802049 # Don't interpret any other prefixes in preformatted text
20812050 $prefixLength = 0;
2082 - $pref = $pref2 = '';
 2051+ $prefix = $prefix2 = '';
20832052 $t = $oLine;
20842053 }
20852054
20862055 # List generation
2087 - if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
 2056+ if( $prefixLength && $lastPrefix === $prefix2 ) {
20882057 # Same as the last item, so no need to deal with nesting or opening stuff
2089 - $output .= $this->nextItem( substr( $pref, -1 ) );
 2058+ $output .= $this->nextItem( substr( $prefix, -1 ) );
20902059 $paragraphStack = false;
20912060
2092 - if ( substr( $pref, -1 ) == ';') {
 2061+ if ( substr( $prefix, -1 ) === ';') {
20932062 # The one nasty exception: definition lists work like this:
20942063 # ; title : definition text
20952064 # So we check for : in the remainder text to split up the
@@ -2101,21 +2070,21 @@
21022071 }
21032072 } elseif( $prefixLength || $lastPrefixLength ) {
21042073 # Either open or close a level...
2105 - $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
 2074+ $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
21062075 $paragraphStack = false;
21072076
21082077 while( $commonPrefixLength < $lastPrefixLength ) {
2109 - $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
 2078+ $output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] );
21102079 --$lastPrefixLength;
21112080 }
21122081 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2113 - $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
 2082+ $output .= $this->nextItem( $prefix[$commonPrefixLength-1] );
21142083 }
21152084 while ( $prefixLength > $commonPrefixLength ) {
2116 - $char = substr( $pref, $commonPrefixLength, 1 );
 2085+ $char = substr( $prefix, $commonPrefixLength, 1 );
21172086 $output .= $this->openList( $char );
21182087
2119 - if ( ';' == $char ) {
 2088+ if ( ';' === $char ) {
21202089 # FIXME: This is dupe of code above
21212090 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
21222091 $t = $t2;
@@ -2124,10 +2093,10 @@
21252094 }
21262095 ++$commonPrefixLength;
21272096 }
2128 - $lastPrefix = $pref2;
 2097+ $lastPrefix = $prefix2;
21292098 }
21302099 if( 0 == $prefixLength ) {
2131 - wfProfileIn( "$fname-paragraph" );
 2100+ wfProfileIn( __METHOD__."-paragraph" );
21322101 # No prefix (not in list)--go to paragraph mode
21332102 // XXX: use a stack for nestable elements like span, table and div
21342103 $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
@@ -2147,9 +2116,9 @@
21482117 $inBlockElem = true;
21492118 }
21502119 } else if ( !$inBlockElem && !$this->mInPre ) {
2151 - if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
 2120+ if ( ' ' == $t{0} and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
21522121 // pre
2153 - if ($this->mLastSection != 'pre') {
 2122+ if ($this->mLastSection !== 'pre') {
21542123 $paragraphStack = false;
21552124 $output .= $this->closeParagraph().'<pre>';
21562125 $this->mLastSection = 'pre';
@@ -2163,7 +2132,7 @@
21642133 $paragraphStack = false;
21652134 $this->mLastSection = 'p';
21662135 } else {
2167 - if ($this->mLastSection != 'p' ) {
 2136+ if ($this->mLastSection !== 'p' ) {
21682137 $output .= $this->closeParagraph();
21692138 $this->mLastSection = '';
21702139 $paragraphStack = '<p>';
@@ -2176,14 +2145,14 @@
21772146 $output .= $paragraphStack;
21782147 $paragraphStack = false;
21792148 $this->mLastSection = 'p';
2180 - } else if ($this->mLastSection != 'p') {
 2149+ } else if ($this->mLastSection !== 'p') {
21812150 $output .= $this->closeParagraph().'<p>';
21822151 $this->mLastSection = 'p';
21832152 }
21842153 }
21852154 }
21862155 }
2187 - wfProfileOut( "$fname-paragraph" );
 2156+ wfProfileOut( __METHOD__."-paragraph" );
21882157 }
21892158 // somewhere above we forget to get out of pre block (bug 785)
21902159 if($preCloseMatch && $this->mInPre) {
@@ -2194,7 +2163,7 @@
21952164 }
21962165 }
21972166 while ( $prefixLength ) {
2198 - $output .= $this->closeList( $pref2{$prefixLength-1} );
 2167+ $output .= $this->closeList( $prefix2[$prefixLength-1] );
21992168 --$prefixLength;
22002169 }
22012170 if ( '' != $this->mLastSection ) {
@@ -2202,7 +2171,7 @@
22032172 $this->mLastSection = '';
22042173 }
22052174
2206 - wfProfileOut( $fname );
 2175+ wfProfileOut( __METHOD__ );
22072176 return $output;
22082177 }
22092178
@@ -2215,13 +2184,12 @@
22162185 * return string the position of the ':', or false if none found
22172186 */
22182187 function findColonNoLinks($str, &$before, &$after) {
2219 - $fname = 'Parser::findColonNoLinks';
2220 - wfProfileIn( $fname );
 2188+ wfProfileIn( __METHOD__ );
22212189
22222190 $pos = strpos( $str, ':' );
22232191 if( $pos === false ) {
22242192 // Nothing to find!
2225 - wfProfileOut( $fname );
 2193+ wfProfileOut( __METHOD__ );
22262194 return false;
22272195 }
22282196
@@ -2230,7 +2198,7 @@
22312199 // Easy; no tag nesting to worry about
22322200 $before = substr( $str, 0, $pos );
22332201 $after = substr( $str, $pos+1 );
2234 - wfProfileOut( $fname );
 2202+ wfProfileOut( __METHOD__ );
22352203 return $pos;
22362204 }
22372205
@@ -2254,7 +2222,7 @@
22552223 // We found it!
22562224 $before = substr( $str, 0, $i );
22572225 $after = substr( $str, $i + 1 );
2258 - wfProfileOut( $fname );
 2226+ wfProfileOut( __METHOD__ );
22592227 return $i;
22602228 }
22612229 // Embedded in a tag; don't break it.
@@ -2264,7 +2232,7 @@
22652233 $colon = strpos( $str, ':', $i );
22662234 if( $colon === false ) {
22672235 // Nothing else interesting
2268 - wfProfileOut( $fname );
 2236+ wfProfileOut( __METHOD__ );
22692237 return false;
22702238 }
22712239 $lt = strpos( $str, '<', $i );
@@ -2273,7 +2241,7 @@
22742242 // We found it!
22752243 $before = substr( $str, 0, $colon );
22762244 $after = substr( $str, $colon + 1 );
2277 - wfProfileOut( $fname );
 2245+ wfProfileOut( __METHOD__ );
22782246 return $i;
22792247 }
22802248 }
@@ -2320,18 +2288,18 @@
23212289 break;
23222290 case 3: // self::COLON_STATE_CLOSETAG:
23232291 // In a </tag>
2324 - if( $c == ">" ) {
 2292+ if( $c === ">" ) {
23252293 $stack--;
23262294 if( $stack < 0 ) {
2327 - wfDebug( "Invalid input in $fname; too many close tags\n" );
2328 - wfProfileOut( $fname );
 2295+ wfDebug( __METHOD__.": Invalid input; too many close tags\n" );
 2296+ wfProfileOut( __METHOD__ );
23292297 return false;
23302298 }
23312299 $state = self::COLON_STATE_TEXT;
23322300 }
23332301 break;
23342302 case self::COLON_STATE_TAGSLASH:
2335 - if( $c == ">" ) {
 2303+ if( $c === ">" ) {
23362304 // Yes, a self-closed tag <blah/>
23372305 $state = self::COLON_STATE_TEXT;
23382306 } else {
@@ -2340,33 +2308,33 @@
23412309 }
23422310 break;
23432311 case 5: // self::COLON_STATE_COMMENT:
2344 - if( $c == "-" ) {
 2312+ if( $c === "-" ) {
23452313 $state = self::COLON_STATE_COMMENTDASH;
23462314 }
23472315 break;
23482316 case self::COLON_STATE_COMMENTDASH:
2349 - if( $c == "-" ) {
 2317+ if( $c === "-" ) {
23502318 $state = self::COLON_STATE_COMMENTDASHDASH;
23512319 } else {
23522320 $state = self::COLON_STATE_COMMENT;
23532321 }
23542322 break;
23552323 case self::COLON_STATE_COMMENTDASHDASH:
2356 - if( $c == ">" ) {
 2324+ if( $c === ">" ) {
23572325 $state = self::COLON_STATE_TEXT;
23582326 } else {
23592327 $state = self::COLON_STATE_COMMENT;
23602328 }
23612329 break;
23622330 default:
2363 - throw new MWException( "State machine error in $fname" );
 2331+ throw new MWException( "State machine error in " . __METHOD__ );
23642332 }
23652333 }
23662334 if( $stack > 0 ) {
2367 - wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" );
 2335+ wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" );
23682336 return false;
23692337 }
2370 - wfProfileOut( $fname );
 2338+ wfProfileOut( __METHOD__ );
23712339 return false;
23722340 }
23732341
@@ -2596,12 +2564,11 @@
25972565 * @private
25982566 */
25992567 function initialiseVariables() {
2600 - $fname = 'Parser::initialiseVariables';
2601 - wfProfileIn( $fname );
 2568+ wfProfileIn( __METHOD__ );
26022569 $variableIDs = MagicWord::getVariableIDs();
26032570
26042571 $this->mVariables = new MagicWordArray( $variableIDs );
2605 - wfProfileOut( $fname );
 2572+ wfProfileOut( __METHOD__ );
26062573 }
26072574
26082575 /**
@@ -2670,8 +2637,7 @@
26712638 return $text;
26722639 }
26732640
2674 - $fname = __METHOD__;
2675 - wfProfileIn( $fname );
 2641+ wfProfileIn( __METHOD__ );
26762642
26772643 if ( $frame === false ) {
26782644 $frame = $this->getPreprocessor()->newFrame();
@@ -2684,7 +2650,7 @@
26852651 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
26862652 $text = $frame->expand( $dom, $flags );
26872653
2688 - wfProfileOut( $fname );
 2654+ wfProfileOut( __METHOD__ );
26892655 return $text;
26902656 }
26912657
@@ -2747,8 +2713,7 @@
27482714 */
27492715 function braceSubstitution( $piece, $frame ) {
27502716 global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces;
2751 - $fname = __METHOD__;
2752 - wfProfileIn( $fname );
 2717+ wfProfileIn( __METHOD__ );
27532718 wfProfileIn( __METHOD__.'-setup' );
27542719
27552720 # Flags
@@ -2935,7 +2900,7 @@
29362901 }
29372902 } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) {
29382903 $found = false; //access denied
2939 - wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() );
 2904+ wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() );
29402905 } else {
29412906 list( $text, $title ) = $this->getTemplateDom( $title );
29422907 if ( $text !== false ) {
@@ -2969,7 +2934,7 @@
29702935 # Recover the source wikitext and return it
29712936 if ( !$found ) {
29722937 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
2973 - wfProfileOut( $fname );
 2938+ wfProfileOut( __METHOD__ );
29742939 return array( 'object' => $text );
29752940 }
29762941
@@ -3028,7 +2993,7 @@
30292994 $ret = array( 'text' => $text );
30302995 }
30312996
3032 - wfProfileOut( $fname );
 2997+ wfProfileOut( __METHOD__ );
30332998 return $ret;
30342999 }
30353000
@@ -3315,7 +3280,7 @@
33163281 }
33173282 }
33183283
3319 - if ( $name == 'html' || $name == 'nowiki' ) {
 3284+ if ( $name === 'html' || $name === 'nowiki' ) {
33203285 $this->mStripState->nowiki->setPair( $marker, $output );
33213286 } else {
33223287 $this->mStripState->general->setPair( $marker, $output );
@@ -3571,12 +3536,7 @@
35723537 # <!--LINK number-->
35733538 # turns into
35743539 # link text with suffix
3575 - $safeHeadline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
3576 - "\$this->mLinkHolders['texts'][\$1]",
3577 - $safeHeadline );
3578 - $safeHeadline = preg_replace( '/<!--IWLINK ([0-9]*)-->/e',
3579 - "\$this->mInterwikiLinkHolders['texts'][\$1]",
3580 - $safeHeadline );
 3540+ $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
35813541
35823542 # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
35833543 $tocline = preg_replace(
@@ -3652,7 +3612,7 @@
36533613 $i = 0;
36543614
36553615 foreach( $blocks as $block ) {
3656 - if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
 3616+ if( $showEditLink && $headlineCount > 0 && $i == 0 && $block !== "\n" ) {
36573617 # This is the [edit] link that appears for the top block of text when
36583618 # section editing is enabled
36593619
@@ -3804,7 +3764,7 @@
38053765 } else {
38063766 # Failed to validate; fall back to the default
38073767 $nickname = $username;
3808 - wfDebug( "Parser::getUserSig: $username has bad XML tags in signature.\n" );
 3768+ wfDebug( __METHOD__.": $username has bad XML tags in signature.\n" );
38093769 }
38103770 }
38113771
@@ -3910,19 +3870,17 @@
39113871 global $wgTitle;
39123872 static $executing = false;
39133873
3914 - $fname = "Parser::transformMsg";
3915 -
39163874 # Guard against infinite recursion
39173875 if ( $executing ) {
39183876 return $text;
39193877 }
39203878 $executing = true;
39213879
3922 - wfProfileIn($fname);
 3880+ wfProfileIn(__METHOD__);
39233881 $text = $this->preprocess( $text, $wgTitle, $options );
39243882
39253883 $executing = false;
3926 - wfProfileOut($fname);
 3884+ wfProfileOut(__METHOD__);
39273885 return $text;
39283886 }
39293887
@@ -4019,7 +3977,7 @@
40203978 # Add to function cache
40213979 $mw = MagicWord::get( $id );
40223980 if( !$mw )
4023 - throw new MWException( 'Parser::setFunctionHook() expecting a magic word identifier.' );
 3981+ throw new MWException( __METHOD__.'() expecting a magic word identifier.' );
40243982
40253983 $synonyms = $mw->getSynonyms();
40263984 $sensitive = intval( $mw->isCaseSensitive() );
@@ -4034,7 +3992,7 @@
40353993 $syn = '#' . $syn;
40363994 }
40373995 # Remove trailing colon
4038 - if ( substr( $syn, -1, 1 ) == ':' ) {
 3996+ if ( substr( $syn, -1, 1 ) === ':' ) {
40393997 $syn = substr( $syn, 0, -1 );
40403998 }
40413999 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
@@ -4055,266 +4013,9 @@
40564014 * Replace <!--LINK--> link placeholders with actual links, in the buffer
40574015 * Placeholders created in Skin::makeLinkObj()
40584016 * Returns an array of link CSS classes, indexed by PDBK.
4059 - * $options is a bit field, RLH_FOR_UPDATE to select for update
40604017 */
40614018 function replaceLinkHolders( &$text, $options = 0 ) {
4062 - global $wgUser;
4063 - global $wgContLang;
4064 -
4065 - $fname = 'Parser::replaceLinkHolders';
4066 - wfProfileIn( $fname );
4067 -
4068 - $pdbks = array();
4069 - $colours = array();
4070 - $linkcolour_ids = array();
4071 - $sk = $this->mOptions->getSkin();
4072 - $linkCache = LinkCache::singleton();
4073 -
4074 - if ( !empty( $this->mLinkHolders['namespaces'] ) ) {
4075 - wfProfileIn( $fname.'-check' );
4076 - $dbr = wfGetDB( DB_SLAVE );
4077 - $page = $dbr->tableName( 'page' );
4078 - $threshold = $wgUser->getOption('stubthreshold');
4079 -
4080 - # Sort by namespace
4081 - asort( $this->mLinkHolders['namespaces'] );
4082 -
4083 - # Generate query
4084 - $query = false;
4085 - $current = null;
4086 - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
4087 - # Make title object
4088 - $title = $this->mLinkHolders['titles'][$key];
4089 -
4090 - # Skip invalid entries.
4091 - # Result will be ugly, but prevents crash.
4092 - if ( is_null( $title ) ) {
4093 - continue;
4094 - }
4095 - $pdbk = $pdbks[$key] = $title->getPrefixedDBkey();
4096 -
4097 - # Check if it's a static known link, e.g. interwiki
4098 - if ( $title->isAlwaysKnown() ) {
4099 - $colours[$pdbk] = '';
4100 - } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
4101 - $colours[$pdbk] = '';
4102 - $this->mOutput->addLink( $title, $id );
4103 - } elseif ( $linkCache->isBadLink( $pdbk ) ) {
4104 - $colours[$pdbk] = 'new';
4105 - } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) {
4106 - $colours[$pdbk] = 'new';
4107 - } else {
4108 - # Not in the link cache, add it to the query
4109 - if ( !isset( $current ) ) {
4110 - $current = $ns;
4111 - $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
4112 - $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN(";
4113 - } elseif ( $current != $ns ) {
4114 - $current = $ns;
4115 - $query .= ")) OR (page_namespace=$ns AND page_title IN(";
4116 - } else {
4117 - $query .= ', ';
4118 - }
4119 -
4120 - $query .= $dbr->addQuotes( $this->mLinkHolders['dbkeys'][$key] );
4121 - }
4122 - }
4123 - if ( $query ) {
4124 - $query .= '))';
4125 - if ( $options & RLH_FOR_UPDATE ) {
4126 - $query .= ' FOR UPDATE';
4127 - }
4128 -
4129 - $res = $dbr->query( $query, $fname );
4130 -
4131 - # Fetch data and form into an associative array
4132 - # non-existent = broken
4133 - while ( $s = $dbr->fetchObject($res) ) {
4134 - $title = Title::makeTitle( $s->page_namespace, $s->page_title );
4135 - $pdbk = $title->getPrefixedDBkey();
4136 - $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect );
4137 - $this->mOutput->addLink( $title, $s->page_id );
4138 - $colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
4139 - //add id to the extension todolist
4140 - $linkcolour_ids[$s->page_id] = $pdbk;
4141 - }
4142 - //pass an array of page_ids to an extension
4143 - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
4144 - }
4145 - wfProfileOut( $fname.'-check' );
4146 -
4147 - # Do a second query for different language variants of links and categories
4148 - if($wgContLang->hasVariants()){
4149 - $linkBatch = new LinkBatch();
4150 - $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
4151 - $categoryMap = array(); // maps $category_variant => $category (dbkeys)
4152 - $varCategories = array(); // category replacements oldDBkey => newDBkey
4153 -
4154 - $categories = $this->mOutput->getCategoryLinks();
4155 -
4156 - // Add variants of links to link batch
4157 - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
4158 - $title = $this->mLinkHolders['titles'][$key];
4159 - if ( is_null( $title ) )
4160 - continue;
4161 -
4162 - $pdbk = $title->getPrefixedDBkey();
4163 - $titleText = $title->getText();
4164 -
4165 - // generate all variants of the link title text
4166 - $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText);
4167 -
4168 - // if link was not found (in first query), add all variants to query
4169 - if ( !isset($colours[$pdbk]) ){
4170 - foreach($allTextVariants as $textVariant){
4171 - if($textVariant != $titleText){
4172 - $variantTitle = Title::makeTitle( $ns, $textVariant );
4173 - if(is_null($variantTitle)) continue;
4174 - $linkBatch->addObj( $variantTitle );
4175 - $variantMap[$variantTitle->getPrefixedDBkey()][] = $key;
4176 - }
4177 - }
4178 - }
4179 - }
4180 -
4181 - // process categories, check if a category exists in some variant
4182 - foreach( $categories as $category ){
4183 - $variants = $wgContLang->convertLinkToAllVariants($category);
4184 - foreach($variants as $variant){
4185 - if($variant != $category){
4186 - $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) );
4187 - if(is_null($variantTitle)) continue;
4188 - $linkBatch->addObj( $variantTitle );
4189 - $categoryMap[$variant] = $category;
4190 - }
4191 - }
4192 - }
4193 -
4194 -
4195 - if(!$linkBatch->isEmpty()){
4196 - // construct query
4197 - $titleClause = $linkBatch->constructSet('page', $dbr);
4198 -
4199 - $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
4200 -
4201 - $variantQuery .= " FROM $page WHERE $titleClause";
4202 - if ( $options & RLH_FOR_UPDATE ) {
4203 - $variantQuery .= ' FOR UPDATE';
4204 - }
4205 -
4206 - $varRes = $dbr->query( $variantQuery, $fname );
4207 -
4208 - // for each found variants, figure out link holders and replace
4209 - while ( $s = $dbr->fetchObject($varRes) ) {
4210 -
4211 - $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
4212 - $varPdbk = $variantTitle->getPrefixedDBkey();
4213 - $vardbk = $variantTitle->getDBkey();
4214 -
4215 - $holderKeys = array();
4216 - if(isset($variantMap[$varPdbk])){
4217 - $holderKeys = $variantMap[$varPdbk];
4218 - $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect );
4219 - $this->mOutput->addLink( $variantTitle, $s->page_id );
4220 - }
4221 -
4222 - // loop over link holders
4223 - foreach($holderKeys as $key){
4224 - $title = $this->mLinkHolders['titles'][$key];
4225 - if ( is_null( $title ) ) continue;
4226 -
4227 - $pdbk = $title->getPrefixedDBkey();
4228 -
4229 - if(!isset($colours[$pdbk])){
4230 - // found link in some of the variants, replace the link holder data
4231 - $this->mLinkHolders['titles'][$key] = $variantTitle;
4232 - $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey();
4233 -
4234 - // set pdbk and colour
4235 - $pdbks[$key] = $varPdbk;
4236 - $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold );
4237 - $linkcolour_ids[$s->page_id] = $pdbk;
4238 - }
4239 - wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
4240 - }
4241 -
4242 - // check if the object is a variant of a category
4243 - if(isset($categoryMap[$vardbk])){
4244 - $oldkey = $categoryMap[$vardbk];
4245 - if($oldkey != $vardbk)
4246 - $varCategories[$oldkey]=$vardbk;
4247 - }
4248 - }
4249 -
4250 - // rebuild the categories in original order (if there are replacements)
4251 - if(count($varCategories)>0){
4252 - $newCats = array();
4253 - $originalCats = $this->mOutput->getCategories();
4254 - foreach($originalCats as $cat => $sortkey){
4255 - // make the replacement
4256 - if( array_key_exists($cat,$varCategories) )
4257 - $newCats[$varCategories[$cat]] = $sortkey;
4258 - else $newCats[$cat] = $sortkey;
4259 - }
4260 - $this->mOutput->setCategoryLinks($newCats);
4261 - }
4262 - }
4263 - }
4264 -
4265 - # Construct search and replace arrays
4266 - wfProfileIn( $fname.'-construct' );
4267 - $replacePairs = array();
4268 - foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
4269 - $pdbk = $pdbks[$key];
4270 - $searchkey = "<!--LINK $key-->";
4271 - $title = $this->mLinkHolders['titles'][$key];
4272 - if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) {
4273 - $linkCache->addBadLinkObj( $title );
4274 - $colours[$pdbk] = 'new';
4275 - $this->mOutput->addLink( $title, 0 );
4276 - $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title,
4277 - $this->mLinkHolders['texts'][$key],
4278 - $this->mLinkHolders['queries'][$key] );
4279 - } else {
4280 - $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk],
4281 - $this->mLinkHolders['texts'][$key],
4282 - $this->mLinkHolders['queries'][$key] );
4283 - }
4284 - }
4285 - $replacer = new HashtableReplacer( $replacePairs, 1 );
4286 - wfProfileOut( $fname.'-construct' );
4287 -
4288 - # Do the thing
4289 - wfProfileIn( $fname.'-replace' );
4290 - $text = preg_replace_callback(
4291 - '/(<!--LINK .*?-->)/',
4292 - $replacer->cb(),
4293 - $text);
4294 -
4295 - wfProfileOut( $fname.'-replace' );
4296 - }
4297 -
4298 - # Now process interwiki link holders
4299 - # This is quite a bit simpler than internal links
4300 - if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) {
4301 - wfProfileIn( $fname.'-interwiki' );
4302 - # Make interwiki link HTML
4303 - $replacePairs = array();
4304 - foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) {
4305 - $title = $this->mInterwikiLinkHolders['titles'][$key];
4306 - $replacePairs[$key] = $sk->link( $title, $link );
4307 - }
4308 - $replacer = new HashtableReplacer( $replacePairs, 1 );
4309 -
4310 - $text = preg_replace_callback(
4311 - '/<!--IWLINK (.*?)-->/',
4312 - $replacer->cb(),
4313 - $text );
4314 - wfProfileOut( $fname.'-interwiki' );
4315 - }
4316 -
4317 - wfProfileOut( $fname );
4318 - return $colours;
 4019+ return $this->mLinkHolders->replace( $text );
43194020 }
43204021
43214022 /**
@@ -4324,39 +4025,10 @@
43254026 * @return string
43264027 */
43274028 function replaceLinkHoldersText( $text ) {
4328 - $fname = 'Parser::replaceLinkHoldersText';
4329 - wfProfileIn( $fname );
4330 -
4331 - $text = preg_replace_callback(
4332 - '/<!--(LINK|IWLINK) (.*?)-->/',
4333 - array( &$this, 'replaceLinkHoldersTextCallback' ),
4334 - $text );
4335 -
4336 - wfProfileOut( $fname );
4337 - return $text;
 4029+ return $this->mLinkHolders->replaceText( $text );
43384030 }
43394031
43404032 /**
4341 - * @param array $matches
4342 - * @return string
4343 - * @private
4344 - */
4345 - function replaceLinkHoldersTextCallback( $matches ) {
4346 - $type = $matches[1];
4347 - $key = $matches[2];
4348 - if( $type == 'LINK' ) {
4349 - if( isset( $this->mLinkHolders['texts'][$key] ) ) {
4350 - return $this->mLinkHolders['texts'][$key];
4351 - }
4352 - } elseif( $type == 'IWLINK' ) {
4353 - if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) {
4354 - return $this->mInterwikiLinkHolders['texts'][$key];
4355 - }
4356 - }
4357 - return $matches[0];
4358 - }
4359 -
4360 - /**
43614033 * Tag hook handler for 'pre'.
43624034 */
43634035 function renderPreTag( $text, $attribs ) {
@@ -4407,7 +4079,7 @@
44084080
44094081 wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) );
44104082
4411 - $lines = explode( "\n", $text );
 4083+ $lines = StringUtils::explode( "\n", $text );
44124084 foreach ( $lines as $line ) {
44134085 # match lines like these:
44144086 # Image:someimage.jpg|This is some image
@@ -4420,7 +4092,7 @@
44214093
44224094 if ( strpos( $matches[0], '%' ) !== false )
44234095 $matches[1] = urldecode( $matches[1] );
4424 - $tp = Title::newFromText( $matches[1] );
 4096+ $tp = Title::newFromText( $matches[1]/*, NS_IMAGE*/ );
44254097 $nt =& $tp;
44264098 if( is_null( $nt ) ) {
44274099 # Bogus title. Ignore these so we don't bomb out later.
@@ -4486,8 +4158,11 @@
44874159
44884160 /**
44894161 * Parse image options text and use it to make an image
 4162+ * @param Title $title
 4163+ * @param string $options
 4164+ * @param LinkHolderArray $holders
44904165 */
4491 - function makeImage( $title, $options ) {
 4166+ function makeImage( $title, $options, $holders = false ) {
44924167 # Check if the options text is of the form "options|alt text"
44934168 # Options are:
44944169 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
@@ -4510,7 +4185,7 @@
45114186 # * bottom
45124187 # * text-bottom
45134188
4514 - $parts = array_map( 'trim', explode( '|', $options) );
 4189+ $parts = StringUtils::explode( "|", $options );
45154190 $sk = $this->mOptions->getSkin();
45164191
45174192 # Give extensions a chance to select the file revision for us
@@ -4532,13 +4207,14 @@
45334208 $params = array( 'frame' => array(), 'handler' => array(),
45344209 'horizAlign' => array(), 'vertAlign' => array() );
45354210 foreach( $parts as $part ) {
 4211+ $part = trim( $part );
45364212 list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
45374213 $validated = false;
45384214 if( isset( $paramMap[$magicName] ) ) {
45394215 list( $type, $paramName ) = $paramMap[$magicName];
45404216
45414217 // Special case; width and height come in one variable together
4542 - if( $type == 'handler' && $paramName == 'width' ) {
 4218+ if( $type === 'handler' && $paramName === 'width' ) {
45434219 $m = array();
45444220 # (bug 13500) In both cases (width/height and width only),
45454221 # permit trailing "px" for backward compatibility.
@@ -4561,7 +4237,7 @@
45624238 }
45634239 } // else no validation -- bug 13436
45644240 } else {
4565 - if ( $type == 'handler' ) {
 4241+ if ( $type === 'handler' ) {
45664242 # Validate handler parameter
45674243 $validated = $handler->validateParam( $paramName, $value );
45684244 } else {
@@ -4597,7 +4273,13 @@
45984274 }
45994275
46004276 # Strip bad stuff out of the alt text
4601 - $alt = $this->replaceLinkHoldersText( $caption );
 4277+ # We can't just use replaceLinkHoldersText() here, because if this function
 4278+ # is called from replaceInternalLinks2(), mLinkHolders won't be up to date.
 4279+ if ( $holders ) {
 4280+ $alt = $holders->replaceText( $caption );
 4281+ } else {
 4282+ $alt = $this->replaceLinkHoldersText( $caption );
 4283+ }
46024284
46034285 # make sure there are no placeholders in thumbnail attributes
46044286 # that are later expanded to html- so expand them now and
@@ -4700,7 +4382,7 @@
47014383 $sectionParts = explode( '-', $section );
47024384 $sectionIndex = array_pop( $sectionParts );
47034385 foreach ( $sectionParts as $part ) {
4704 - if ( $part == 'T' ) {
 4386+ if ( $part === 'T' ) {
47054387 $flags |= self::PTD_FOR_INCLUSION;
47064388 }
47074389 }
@@ -4717,14 +4399,14 @@
47184400 $targetLevel = 1000;
47194401 } else {
47204402 while ( $node ) {
4721 - if ( $node->getName() == 'h' ) {
 4403+ if ( $node->getName() === 'h' ) {
47224404 $bits = $node->splitHeading();
47234405 if ( $bits['i'] == $sectionIndex ) {
47244406 $targetLevel = $bits['level'];
47254407 break;
47264408 }
47274409 }
4728 - if ( $mode == 'replace' ) {
 4410+ if ( $mode === 'replace' ) {
47294411 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
47304412 }
47314413 $node = $node->getNextSibling();
@@ -4733,7 +4415,7 @@
47344416
47354417 if ( !$node ) {
47364418 // Not found
4737 - if ( $mode == 'get' ) {
 4419+ if ( $mode === 'get' ) {
47384420 return $newText;
47394421 } else {
47404422 return $text;
@@ -4742,21 +4424,21 @@
47434425
47444426 // Find the end of the section, including nested sections
47454427 do {
4746 - if ( $node->getName() == 'h' ) {
 4428+ if ( $node->getName() === 'h' ) {
47474429 $bits = $node->splitHeading();
47484430 $curLevel = $bits['level'];
47494431 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
47504432 break;
47514433 }
47524434 }
4753 - if ( $mode == 'get' ) {
 4435+ if ( $mode === 'get' ) {
47544436 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
47554437 }
47564438 $node = $node->getNextSibling();
47574439 } while ( $node );
47584440
47594441 // Write out the remainder (in replace mode only)
4760 - if ( $mode == 'replace' ) {
 4442+ if ( $mode === 'replace' ) {
47614443 // Output the replacement text
47624444 // Add two newlines on -- trailing whitespace in $newText is conventionally
47634445 // stripped by the editor, so we need both newlines to restore the paragraph gap
@@ -4986,7 +4668,7 @@
49874669 do {
49884670 $oldText = $text;
49894671 $text = $this->general->replace( $text );
4990 - } while ( $text != $oldText );
 4672+ } while ( $text !== $oldText );
49914673 wfProfileOut( __METHOD__ );
49924674 return $text;
49934675 }
@@ -4996,7 +4678,7 @@
49974679 do {
49984680 $oldText = $text;
49994681 $text = $this->nowiki->replace( $text );
5000 - } while ( $text != $oldText );
 4682+ } while ( $text !== $oldText );
50014683 wfProfileOut( __METHOD__ );
50024684 return $text;
50034685 }
@@ -5007,7 +4689,7 @@
50084690 $oldText = $text;
50094691 $text = $this->general->replace( $text );
50104692 $text = $this->nowiki->replace( $text );
5011 - } while ( $text != $oldText );
 4693+ } while ( $text !== $oldText );
50124694 wfProfileOut( __METHOD__ );
50134695 return $text;
50144696 }
@@ -5021,7 +4703,7 @@
50224704 var $output = '';
50234705
50244706 function replace( $matches ) {
5025 - if ( substr( $matches[1], -1 ) == "\n" ) {
 4707+ if ( substr( $matches[1], -1 ) === "\n" ) {
50264708 $this->output .= substr( $matches[1], 0, -1 );
50274709 } else {
50284710 $this->output .= $matches[1];
Index: trunk/phase3/includes/parser/Parser_DiffTest.php
@@ -69,9 +69,22 @@
7070 $lastResult = $currentResult;
7171 }
7272 if ( $mismatch ) {
73 - throw new MWException( "Parser_DiffTest: results mismatch on call to $name\n" .
74 - 'Arguments: ' . $this->formatArray( $args ) . "\n" .
75 - 'Results: ' . $this->formatArray( $results ) . "\n" );
 73+ if ( count( $results ) == 2 ) {
 74+ $resultsList = array();
 75+ foreach ( $this->parsers as $i => $parser ) {
 76+ $resultsList[] = var_export( $results[$i], true );
 77+ }
 78+ $diff = wfDiff( $resultsList[0], $resultsList[1] );
 79+ } else {
 80+ $diff = '[too many parsers]';
 81+ }
 82+ $msg = "Parser_DiffTest: results mismatch on call to $name\n";
 83+ if ( !$this->shortOutput ) {
 84+ $msg .= 'Arguments: ' . $this->formatArray( $args ) . "\n";
 85+ }
 86+ $msg .= 'Results: ' . $this->formatArray( $results ) . "\n" .
 87+ "Diff: $diff\n";
 88+ throw new MWException( $msg );
7689 }
7790 return $lastResult;
7891 }
Index: trunk/phase3/includes/MessageCache.php
@@ -649,12 +649,18 @@
650650 return $message;
651651 }
652652
653 - global $wgParser;
 653+ global $wgParser, $wgParserConf;
654654 if ( !$this->mParser && isset( $wgParser ) ) {
655655 # Do some initialisation so that we don't have to do it twice
656656 $wgParser->firstCallInit();
657657 # Clone it and store it
658 - $this->mParser = clone $wgParser;
 658+ $class = $wgParserConf['class'];
 659+ if ( $class == 'Parser_DiffTest' ) {
 660+ # Uncloneable
 661+ $this->mParser = new $class( $wgParserConf );
 662+ } else {
 663+ $this->mParser = clone $wgParser;
 664+ }
659665 #wfDebug( __METHOD__ . ": following contents triggered transform: $message\n" );
660666 }
661667 if ( $this->mParser ) {
Index: trunk/phase3/includes/Title.php
@@ -410,6 +410,12 @@
411411 global $wgInterwikiCache, $wgContLang;
412412 $fname = 'Title::getInterwikiLink';
413413
 414+ if ( count( Title::$interwikiCache ) >= self::CACHE_MAX ) {
 415+ // Don't use infinite memory
 416+ reset( Title::$interwikiCache );
 417+ unset( Title::$interwikiCache[ key( Title::$interwikiCache ) ] );
 418+ }
 419+
414420 $key = $wgContLang->lc( $key );
415421
416422 $k = wfMemcKey( 'interwiki', $key );
Index: trunk/phase3/includes/DefaultSettings.php
@@ -3336,6 +3336,12 @@
33373337 );
33383338
33393339 /**
 3340+ * LinkHolderArray batch size
 3341+ * For debugging
 3342+ */
 3343+$wgLinkHolderBatchSize = 1000;
 3344+
 3345+/**
33403346 * Hooks that are used for outputting exceptions. Format is:
33413347 * $wgExceptionHooks[] = $funcname
33423348 * or:
Index: trunk/phase3/includes/Exception.php
@@ -83,7 +83,7 @@
8484 function getHTML() {
8585 global $wgShowExceptionDetails;
8686 if( $wgShowExceptionDetails ) {
87 - return '<p>' . htmlspecialchars( $this->getMessage() ) .
 87+ return '<p>' . nl2br( htmlspecialchars( $this->getMessage() ) ) .
8888 '</p><p>Backtrace:</p><p>' . nl2br( htmlspecialchars( $this->getTraceAsString() ) ) .
8989 "</p>\n";
9090 } else {
Index: trunk/phase3/languages/LanguageConverter.php
@@ -435,8 +435,9 @@
436436 if ($isTitle) return $this->convertTitle($text);
437437
438438 $plang = $this->getPreferredVariant();
439 - $tarray = explode($this->mMarkup['end'], $text);
 439+ $tarray = StringUtils::explode($this->mMarkup['end'], $text);
440440 $text = '';
 441+ $lastDelim = false;
441442 foreach($tarray as $txt) {
442443 $marked = explode($this->mMarkup['begin'], $txt, 2);
443444
@@ -452,8 +453,17 @@
453454
454455 $text .= $crule->getDisplay();
455456 $this->applyManualConv($crule);
 457+ $lastDelim = false;
 458+ } else {
 459+ // Reinsert the }- which wasn't part of anything
 460+ $text .= $this->mMarkup['end'];
 461+ $lastDelim = true;
456462 }
457463 }
 464+ if ( $lastDelim ) {
 465+ // Remove the last delimiter (wasn't real)
 466+ $text = substr( $text, 0, -strlen( $this->mMarkup['end'] ) );
 467+ }
458468
459469 return $text;
460470 }
Index: trunk/phase3/languages/Language.php
@@ -177,6 +177,15 @@
178178 }
179179
180180 /**
 181+ * Reduce memory usage
 182+ */
 183+ function __destruct() {
 184+ foreach ( $this as $name => $value ) {
 185+ unset( $this->$name );
 186+ }
 187+ }
 188+
 189+ /**
181190 * Hook which will be called if this is the content language.
182191 * Descendants can use this to register hook functions or modify globals
183192 */
Index: trunk/phase3/RELEASE-NOTES
@@ -94,6 +94,7 @@
9595 * HTML entities like &nbsp; now work (are not escaped) in edit summaries.
9696 * (bug 13815) In the comment for page moves, use the colon-separator message
9797 instead of a hardcoded colon.
 98+* Allow <gallery> to accept image names without an Image: prefix
9899
99100 === Bug fixes in 1.14 ===
100101

Follow-up revisions

RevisionCommit summaryAuthorDate
r69596Uncommented the DROP TABLE queries for parsertest_*, accidentally commented o...tstarling10:10, 20 July 2010

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r39980Revert r39949 "* Revert revert r39662 of my parser changes."...brion22:19, 25 August 2008
r40010Added __destruct(), for stable pseudo-branchtstarling06:48, 26 August 2008

Status & tagging log