r64442 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64441‎ | r64442 | r64443 >
Date:15:46, 31 March 2010
Author:than4213
Status:deferred
Tags:
Comment:
Remove a lot of dead wood in the parser. This change is very broken, I will fix it as quickly as I can.
Modified paths:
  • /branches/parser-work/phase3/includes/AutoLoader.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/ParseEngine.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/Parser.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/Preprocessor.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/WikiTextGrammar.xml (modified) (history)
  • /branches/parser-work/phase3/maintenance/parserTests.txt (modified) (history)

Diff [purge]

Index: branches/parser-work/phase3/maintenance/parserTests.txt
@@ -3792,9 +3792,10 @@
37933793 <li class="toclevel-5 tocsection-5"><a href="#Level_5_Heading"><span class="tocnumber">1.1.1.1.1</span> <span class="toctext">Level 5 Heading</span></a>
37943794 <ul>
37953795 <li class="toclevel-6 tocsection-6"><a href="#Level_6_Heading"><span class="tocnumber">1.1.1.1.1.1</span> <span class="toctext">Level 6 Heading</span></a></li>
3796 -<li class="toclevel-6 tocsection-7"><a href="#.3D_Level_7_Heading"><span class="tocnumber">1.1.1.1.1.2</span> <span class="toctext">= Level 7 Heading</span></a></li>
3797 -<li class="toclevel-6 tocsection-8"><a href="#.3D.3D_Level_8_Heading"><span class="tocnumber">1.1.1.1.1.3</span> <span class="toctext">== Level 8 Heading</span></a></li>
3798 -<li class="toclevel-6 tocsection-9"><a href="#.3D.3D.3D_Level_9_Heading"><span class="tocnumber">1.1.1.1.1.4</span> <span class="toctext">=== Level 9 Heading</span></a></li>
 3796+<li class="toclevel-6 tocsection-7"><a href="#.3D_Level_7_Heading.3D"><span class="tocnumber">1.1.1.1.1.2</span> <span class="toctext">= Level 7 Heading=</span></a></li>
 3797+<li class="toclevel-6 tocsection-8"><a href="#.3D.3D_Level_8_Heading.3D.3D"><span class="tocnumber">1.1.1.1.1.3</span> <span class="toctext">== Level 8 Heading==</span></a></li>
 3798+<li class="toclevel-6 tocsection-9"><a href="#.3D.3D.3D_Level_9_Heading.3D.3D.3D"><span class="tocnumber">1.1.1.1.1.4</span> <span class="toctext">=== Level 9 Heading===</span></a></li>
 3799+<li class="toclevel-6 tocsection-10"><a href="#.3D.3D.3D.3D_Level_10_Heading.3D.3D.3D.3D"><span class="tocnumber">1.1.1.1.1.5</span> <span class="toctext">==== Level 10 Heading====</span></a></li>
37993800 </ul>
38003801 </li>
38013802 </ul>
@@ -3805,13 +3806,7 @@
38063807 </li>
38073808 </ul>
38083809 </li>
3809 -<li class="toclevel-1"><a href="#.3D"><span class="tocnumber">2</span> <span class="toctext">=</span></a>
3810 -<ul>
3811 -<li class="toclevel-2 tocsection-10"><a href="#.3D.3D.3D.3D_Level_10_Heading"><span class="tocnumber">2.1</span> <span class="toctext">==== Level 10 Heading</span></a></li>
38123810 </ul>
3813 -</li>
3814 -<li class="toclevel-1"><a href="#.3D.3D"><span class="tocnumber">3</span> <span class="toctext">==</span></a></li>
3815 -</ul>
38163811 </td></tr></table><script>if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
38173812 <h1><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: Level 1 Heading">edit</a>]</span> <span class="mw-headline" id="Level_1_Heading"> Level 1 Heading</span></h1>
38183813 <h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=2" title="Edit section: Level 2 Heading">edit</a>]</span> <span class="mw-headline" id="Level_2_Heading"> Level 2 Heading</span></h2>
@@ -3819,16 +3814,10 @@
38203815 <h4><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=4" title="Edit section: Level 4 Heading">edit</a>]</span> <span class="mw-headline" id="Level_4_Heading"> Level 4 Heading</span></h4>
38213816 <h5><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=5" title="Edit section: Level 5 Heading">edit</a>]</span> <span class="mw-headline" id="Level_5_Heading"> Level 5 Heading</span></h5>
38223817 <h6><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=6" title="Edit section: Level 6 Heading">edit</a>]</span> <span class="mw-headline" id="Level_6_Heading"> Level 6 Heading</span></h6>
3823 -<h6><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=7" title="Edit section: = Level 7 Heading">edit</a>]</span> <span class="mw-headline" id=".3D_Level_7_Heading">= Level 7 Heading</span></h6>
3824 -<p>=
3825 -</p>
3826 -<h6><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=8" title="Edit section: == Level 8 Heading">edit</a>]</span> <span class="mw-headline" id=".3D.3D_Level_8_Heading">== Level 8 Heading</span></h6>
3827 -<p>==
3828 -</p>
3829 -<h6><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=9" title="Edit section: === Level 9 Heading">edit</a>]</span> <span class="mw-headline" id=".3D.3D.3D_Level_9_Heading">=== Level 9 Heading</span></h6>
3830 -<h1> <span class="mw-headline" id=".3D">=</span></h1>
3831 -<h6><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=10" title="Edit section: ==== Level 10 Heading">edit</a>]</span> <span class="mw-headline" id=".3D.3D.3D.3D_Level_10_Heading">==== Level 10 Heading</span></h6>
3832 -<h1> <span class="mw-headline" id=".3D.3D">==</span></h1>
 3818+<h6><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=7" title="Edit section: = Level 7 Heading=">edit</a>]</span> <span class="mw-headline" id=".3D_Level_7_Heading.3D">= Level 7 Heading=</span></h6>
 3819+<h6><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=8" title="Edit section: == Level 8 Heading==">edit</a>]</span> <span class="mw-headline" id=".3D.3D_Level_8_Heading.3D.3D">== Level 8 Heading==</span></h6>
 3820+<h6><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=9" title="Edit section: === Level 9 Heading===">edit</a>]</span> <span class="mw-headline" id=".3D.3D.3D_Level_9_Heading.3D.3D.3D">=== Level 9 Heading===</span></h6>
 3821+<h6><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=10" title="Edit section: ==== Level 10 Heading====">edit</a>]</span> <span class="mw-headline" id=".3D.3D.3D.3D_Level_10_Heading.3D.3D.3D.3D">==== Level 10 Heading====</span></h6>
38333822
38343823 !! end
38353824
@@ -5505,6 +5494,16 @@
55065495 !! end
55075496
55085497 !! test
 5498+Invalid header with following text
 5499+!! input
 5500+= x = y
 5501+!! result
 5502+<p>= x = y
 5503+</p>
 5504+!! end
 5505+
 5506+
 5507+!! test
55095508 Section extraction test (section 0)
55105509 !! options
55115510 section=0
@@ -5753,18 +5752,43 @@
57545753 !! end
57555754
57565755 !! test
 5756+Section extraction test with bogus heading (section 1)
 5757+!! options
 5758+section=1
 5759+!! input
 5760+==a==
 5761+==bogus== not a legal section
 5762+==b==
 5763+!! result
 5764+==a==
 5765+==bogus== not a legal section
 5766+!! end
 5767+
 5768+!! test
57575769 Section extraction test with bogus heading (section 2)
57585770 !! options
57595771 section=2
57605772 !! input
57615773 ==a==
5762 -==b== now a legal section
 5774+==bogus== not a legal section
 5775+==b==
57635776 !! result
57645777 ==b==
5765 - now a legal section
57665778 !! end
57675779
57685780 !! test
 5781+Section extraction test with comment after heading (section 1)
 5782+!! options
 5783+section=1
 5784+!! input
 5785+==a==
 5786+==b== <!-- -->
 5787+==c==
 5788+!! result
 5789+==a==
 5790+!! end
 5791+
 5792+!! test
57695793 Section extraction test with comment after heading (section 2)
57705794 !! options
57715795 section=2
@@ -5773,8 +5797,20 @@
57745798 ==b== <!-- -->
57755799 ==c==
57765800 !! result
 5801+==b== <!-- -->
 5802+!! end
 5803+
 5804+!! test
 5805+Section extraction test with bogus <nowiki> heading (section 1)
 5806+!! options
 5807+section=1
 5808+!! input
 5809+==a==
 5810+==bogus== <nowiki>not a legal section</nowiki>
57775811 ==b==
5778 - <!-- -->
 5812+!! result
 5813+==a==
 5814+==bogus== <nowiki>not a legal section</nowiki>
57795815 !! end
57805816
57815817 !! test
@@ -5783,10 +5819,10 @@
57845820 section=2
57855821 !! input
57865822 ==a==
5787 -==b== <nowiki>now a legal section</nowiki>
 5823+==bogus== <nowiki>not a legal section</nowiki>
 5824+==b==
57885825 !! result
57895826 ==b==
5790 - <nowiki>now a legal section</nowiki>
57915827 !! end
57925828
57935829
Index: branches/parser-work/phase3/includes/parser/Parser.php
@@ -95,7 +95,7 @@
9696 # Persistent:
9797 var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
9898 $mSubsts, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex,
99 - $mParseEngine, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList,
 99+ $mParseEngine, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList,
100100 $mVarCache, $mConf, $mFunctionTagHooks;
101101
102102
@@ -227,11 +227,6 @@
228228 $this->mDoubleUnderscores = array();
229229 $this->mExpensiveFunctionCount = 0;
230230
231 - # Fix cloning
232 - if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
233 - $this->mPreprocessor = null;
234 - }
235 -
236231 wfRunHooks( 'ParserClearState', array( &$this ) );
237232 wfProfileOut( __METHOD__ );
238233 }
@@ -535,16 +530,6 @@
536531 }
537532
538533 /**
539 - * Get a preprocessor object
540 - */
541 - function getPreprocessor() {
542 - if ( !isset( $this->mPreprocessor ) ) {
543 - $this->mPreprocessor = new Preprocessor( $this->mParseEngine );
544 - }
545 - return $this->mPreprocessor;
546 - }
547 -
548 - /**
549534 * Replaces all occurrences of HTML-style comments and the given tags
550535 * in the text with a random marker and returns the next text. The output
551536 * parameter $matches will be an associative array filled with data in
@@ -912,7 +897,7 @@
913898 $flag = 0;
914899 else
915900 $flag = Parser::PTD_FOR_INCLUSION;
916 - $dom = $this->preprocessToDom( $text, $flag );
 901+ $dom = $this->mParseEngine->parse($text);
917902 $text = $frame->expand( $dom, $flag );
918903 }
919904 // if $frame is not provided, then use old-style replaceVariables
@@ -2715,33 +2700,6 @@
27162701 wfProfileOut( __METHOD__ );
27172702 }
27182703
2719 - /**
2720 - * Preprocess some wikitext and return the document tree.
2721 - * This is the ghost of replace_variables().
2722 - *
2723 - * @param string $text The text to parse
2724 - * @param integer flags Bitwise combination of:
2725 - * self::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being
2726 - * included. Default is to assume a direct page view.
2727 - *
2728 - * The generated DOM tree must depend only on the input text and the flags.
2729 - * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
2730 - *
2731 - * Any flag added to the $flags parameter here, or any other parameter liable to cause a
2732 - * change in the DOM tree for a given text, must be passed through the section identifier
2733 - * in the section edit link and thus back to extractSections().
2734 - *
2735 - * The output of this function is currently only cached in process memory, but a persistent
2736 - * cache may be implemented at a later date which takes further advantage of these strict
2737 - * dependency requirements.
2738 - *
2739 - * @private
2740 - */
2741 - function preprocessToDom ( $text, $flags = 0 ) {
2742 - $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
2743 - return $dom;
2744 - }
2745 -
27462704 /*
27472705 * Return a three-element array: leading whitespace, string contents, trailing whitespace
27482706 */
@@ -2772,27 +2730,19 @@
27732731 * @param PPFrame $frame Object describing the arguments passed to the template.
27742732 * Arguments may also be provided as an associative array, as was the usual case before MW1.12.
27752733 * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly.
2776 - * @param bool $argsOnly Only do argument (triple-brace) expansion, not double-brace expansion
27772734 * @private
27782735 */
2779 - function replaceVariables( $text, $frame = false, $argsOnly = false ) {
 2736+ function replaceVariables( $text ) {
27802737 # Is there any text? Also, Prevent too big inclusions!
27812738 if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
27822739 return $text;
27832740 }
27842741 wfProfileIn( __METHOD__ );
27852742
2786 - if ( $frame === false ) {
2787 - $frame = new PPFrame($this);
2788 - } elseif ( !( $frame instanceof PPFrame ) ) {
2789 - wfDebug( __METHOD__." called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" );
2790 - $frame = $this->getPreprocessor()->newCustomFrame($frame);
2791 - }
 2743+ $dom = $this->mParseEngine->parse($text);
 2744+ $frame = new PPFrame($this);
 2745+ $text = $frame->expand($dom);
27922746
2793 - $dom = $this->preprocessToDom( $text );
2794 - $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2795 - $text = $frame->expand( $dom, $flags );
2796 -
27972747 wfProfileOut( __METHOD__ );
27982748 return $text;
27992749 }
@@ -2994,7 +2944,7 @@
29952945 $text = $result;
29962946 }
29972947 if ( !$noparse ) {
2998 - $text = $this->preprocessToDom( $text, $preprocessFlags );
 2948+ $text = $this->mParseEngine->parse($text);
29992949 $isChildObj = true;
30002950 }
30012951 }
@@ -3063,7 +3013,7 @@
30643014 } else {
30653015 $text = $this->interwikiTransclude( $title, 'raw' );
30663016 // Preprocess it like a template
3067 - $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
 3017+ $text = $this->mParseEngine->parse($text);
30683018 $isChildObj = true;
30693019 }
30703020 $found = true;
@@ -3167,7 +3117,7 @@
31683118 return array( false, $title );
31693119 }
31703120
3171 - $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
 3121+ $dom = $this->mParseEngine->parse($text);
31723122 $this->mTplDomCache[ $titleText ] = $dom;
31733123
31743124 if (! $title->equals($cacheTitle)) {
@@ -3600,7 +3550,7 @@
36013551 $oldType = $this->mOutputType;
36023552 $this->setOutputType( self::OT_WIKI );
36033553 $frame = new PPFrame($this);
3604 - $root = $this->preprocessToDom( $origText );
 3554+ $root = $this->mParseEngine->parse($origText);
36053555 $node = $root->firstChild;
36063556 $byteOffset = 0;
36073557 $tocraw = array();
@@ -4163,7 +4113,7 @@
41644114
41654115 $text = preg_replace( $substRegex, $substText, $text );
41664116 $text = $this->cleanSigInSig( $text );
4167 - $dom = $this->preprocessToDom( $text );
 4117+ $dom = $this->mParseEngine->parse($text);
41684118 $frame = new PPFrame($this);
41694119 $text = $frame->expand( $dom );
41704120
@@ -4748,7 +4698,7 @@
47494699 * @private
47504700 */
47514701 function attributeStripCallback( &$text, $frame = false ) {
4752 - $text = $this->replaceVariables( $text, $frame );
 4702+ $text = $this->replaceVariables( $text );
47534703 $text = $this->mStripState->unstripBoth( $text );
47544704 return $text;
47554705 }
@@ -4814,7 +4764,7 @@
48154765 }
48164766 }
48174767 // Preprocess the text
4818 - $root = $this->preprocessToDom( $text, $flags );
 4768+ $root = $this->mParseEngine->parse($text);
48194769 PPFrame::updateIncTags($root, $flags);
48204770
48214771 // <h> nodes indicate section breaks
Index: branches/parser-work/phase3/includes/parser/ParseEngine.php
@@ -22,6 +22,8 @@
2323 }
2424
2525 function parse($text) {
 26+//print("Text: $text\n");
 27+//foreach (debug_backtrace() as $func) print("{$func["function"]}:{$func["file"]}:{$func["line"]}\n");
2628 global $wgDebugParserLog;
2729 if ($wgDebugParserLog != '') {
2830 wfErrorLog("==========Start Parsing==========\n", $wgDebugParserLog);
@@ -43,20 +45,6 @@
4446 return $doc;
4547 }
4648
47 - static function unparse($node) {
48 - $retStr = "";
49 - if ($node instanceof DOMElement) {
50 - $retStr .= $node->getAttribute("startTag");
51 - foreach ($node->childNodes as $child) {
52 - $retStr .= ParseEngine::unparse($child);
53 - }
54 - $retStr .= $node->getAttribute("endTag");
55 - } else {
56 - $retStr .= $node->textContent;
57 - }
58 - return $retStr;
59 - }
60 -
6149 private function parseRec($rule, $replaceStr, $saveTags, &$iter, &$text, &$outNode) {
6250 global $wgDebugParserLog;
6351 if ($wgDebugParserLog != '') {
@@ -79,23 +67,23 @@
8068 $retCode = FALSE;
8169 if ($rule->nodeName == "Assignment") {
8270 $startPat = $rule->getAttribute("tag");
83 - $startTag = NULL;
 71+ $tag = NULL;
8472 if ($rule->getAttribute("regex") != NULL) {
8573 if (preg_match("/^$startPat/s", $text, $matches)) {
86 - $startTag = $matches[0];
 74+ $tag = $matches[0];
8775 if (isset($matches[1])) {
8876 $replaceStr = $matches[1];
8977 }
9078 }
9179 } elseif ($startPat != NULL && strncmp($startPat, $text, strlen($startPat)) == 0) {
92 - $startTag = $startPat;
 80+ $tag = $startPat;
9381 }
94 - if ($startTag != NULL || $startPat == NULL) {
 82+ if ($tag != NULL || $startPat == NULL) {
9583 $newText = $text;
96 - $newElement = $dom->createElement($rule->getAttribute("name"));
97 - if ($startTag != NULL) {
98 - $newText = substr($newText, strlen($startTag));
99 - $newElement->setAttribute("startTag", $startTag);
 84+ $newElement = $dom->createElement($rule->getAttribute("tagName"));
 85+ if ($tag != NULL) {
 86+ $newText = substr($newText, strlen($tag));
 87+ $newElement->setAttribute("tag", $tag);
10088 }
10189 $retCode = $rule->firstChild == NULL || $this->parseRec($rule->firstChild, $replaceStr, $saveTags, $iter, $newText, $newElement);
10290 if ($retCode) {
@@ -103,14 +91,6 @@
10492 $text = $newText;
10593 }
10694 }
107 - } elseif ($rule->nodeName == "EndTag") {
108 - $tag = str_replace("~r", $replaceStr, $rule->getAttribute("tag"));
109 - $tagLength = strlen($tag);
110 - if (strncmp($tag, $text, $tagLength) == 0) {
111 - $text = substr($text, $tagLength);
112 - $outNode->setAttribute("endTag", $tag);
113 - $retCode = TRUE;
114 - }
11595 } elseif ($rule->nodeName == "Sequence") {
11696 $saveText = $text;
11797 $saveNode = $outNode->cloneNode(TRUE);
@@ -195,7 +175,7 @@
196176 }
197177 $rule->setAttribute("pushInd", $pushInd);
198178 } else {
199 - if ($rule->nodeName != "Choice" && $rule->nodeName != "EndTag") {
 179+ if ($rule->nodeName != "Choice") {
200180 $rule->setAttribute("saveTags", $tagStr);
201181 $tagStr = NULL;
202182 if ($rule->nodeName == "Text") {
@@ -223,7 +203,7 @@
224204 }
225205 $childTags = "";
226206 $failSafe = TRUE;
227 - if ($rule->nodeName == "EndTag" || $rule->nodeName == "Assignment") {
 207+ if ($rule->nodeName == "Assignment") {
228208 $childTags = $rule->getAttribute("tag");
229209 if ($rule->nodeName != "Assignment" || $rule->getAttribute("regex") == NULL) {
230210 $childTags = preg_quote($childTags, "/");
Index: branches/parser-work/phase3/includes/parser/Preprocessor.php
@@ -1,117 +1,5 @@
22 <?php
3 -
43 /**
5 - * @ingroup Parser
6 - */
7 -class Preprocessor {
8 - private $mParser, $memoryLimit;
9 -
10 - const CACHE_VERSION = 1;
11 -
12 - function __construct( $parser ) {
13 - $this->mParser = $parser;
14 - $mem = ini_get( 'memory_limit' );
15 - $this->memoryLimit = false;
16 - if ( strval( $mem ) !== '' && $mem != -1 ) {
17 - if ( preg_match( '/^\d+$/', $mem ) ) {
18 - $this->memoryLimit = $mem;
19 - } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) {
20 - $this->memoryLimit = $m[1] * 1048576;
21 - }
22 - }
23 - }
24 -
25 - function memCheck() {
26 - if ( $this->memoryLimit === false ) {
27 - return;
28 - }
29 - $usage = memory_get_usage();
30 - if ( $usage > $this->memoryLimit * 0.9 ) {
31 - $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 );
32 - throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" );
33 - }
34 - return $usage <= $this->memoryLimit * 0.8;
35 - }
36 -
37 - /**
38 - * Preprocess some wikitext and return the document tree.
39 - * This is the ghost of Parser::replace_variables().
40 - *
41 - * @param string $text The text to parse
42 - * @param integer flags Bitwise combination of:
43 - * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being
44 - * included. Default is to assume a direct page view.
45 - *
46 - * The generated DOM tree must depend only on the input text and the flags.
47 - * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
48 - *
49 - * Any flag added to the $flags parameter here, or any other parameter liable to cause a
50 - * change in the DOM tree for a given text, must be passed through the section identifier
51 - * in the section edit link and thus back to extractSections().
52 - *
53 - * The output of this function is currently only cached in process memory, but a persistent
54 - * cache may be implemented at a later date which takes further advantage of these strict
55 - * dependency requirements.
56 - *
57 - * @private
58 - */
59 - function preprocessToObj( $text, $flags = 0 ) {
60 - wfProfileIn( __METHOD__ );
61 - global $wgMemc, $wgPreprocessorCacheThreshold;
62 -
63 - $xml = false;
64 - $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
65 - if ( $cacheable ) {
66 - wfProfileIn( __METHOD__.'-cacheable' );
67 -
68 - $cacheKey = wfMemcKey( 'preprocess-xml', md5($text), $flags );
69 - $cacheValue = $wgMemc->get( $cacheKey );
70 - if ( $cacheValue ) {
71 - $version = substr( $cacheValue, 0, 8 );
72 - if ( intval( $version ) == self::CACHE_VERSION ) {
73 - $xml = substr( $cacheValue, 8 );
74 - // From the cache
75 - wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" );
76 - }
77 - }
78 - }
79 - $dom = false;
80 - if ( $xml === false ) {
81 - if ( $cacheable ) {
82 - wfProfileIn( __METHOD__.'-cache-miss' );
83 - }
84 - $dom = $this->mParser->parse($text);
85 - if ( $cacheable ) {
86 - $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $dom->saveXML();
87 - $wgMemc->set( $cacheKey, $cacheValue, 86400 );
88 - wfProfileOut( __METHOD__.'-cache-miss' );
89 - wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" );
90 - }
91 - } else {
92 - wfProfileIn( __METHOD__.'-loadXML' );
93 - $dom = new DOMDocument;
94 - wfSuppressWarnings();
95 - $result = $dom->loadXML( $xml );
96 - wfRestoreWarnings();
97 - if ( !$result ) {
98 - // Try running the XML through UtfNormal to get rid of invalid characters
99 - $xml = UtfNormal::cleanUp( $xml );
100 - $result = $dom->loadXML( $xml );
101 - if ( !$result ) {
102 - throw new MWException( __METHOD__.' generated invalid XML' );
103 - }
104 - }
105 - wfProfileOut( __METHOD__.'-loadXML' );
106 - }
107 - if ( $cacheable ) {
108 - wfProfileOut( __METHOD__.'-cacheable' );
109 - }
110 - wfProfileOut( __METHOD__ );
111 - return $dom;
112 - }
113 -}
114 -
115 -/**
1164 * An expansion frame, used as a context to expand the result of preprocessToObj()
1175 * @ingroup Parser
1186 */
@@ -141,7 +29,7 @@
14230
14331 /**
14432 * Construct a new preprocessor frame.
145 - * @param Preprocessor $parser The parent parser
 33+ * @param Parser $parser The parent parser
14634 */
14735 function __construct( $parser ) {
14836 $this->parser = $parser;
@@ -216,11 +104,11 @@
217105 //print("UpdIn - {$root->ownerDocument->saveXML()}\n");
218106 PPFrame::updateIncTags($root, $flags);
219107
220 -print("ParseIn - {$root->ownerDocument->saveXML()}\n");
 108+//print("ParseIn - {$root->ownerDocument->saveXML()}\n");
221109 $headingIndex = 1;
222110 $this->expandRec($root->childNodes, $flags, $headingIndex);
223111 $output = $root->textContent;
224 -print("ParseOut - {$output}\n");
 112+//print("ParseOut - {$output}\n");
225113
226114 --$expansionDepth;
227115 wfProfileOut( __METHOD__ );
@@ -228,31 +116,22 @@
229117 }
230118
231119 private function expandRec($contextNode, $flags, &$headingIndex) {
 120+ $xpath = new DOMXPath($contextNode->ownerDocument);
232121 if ($contextNode instanceof DOMNodeList) {
233 - for ($i = 0; $i < $contextNode->length; $i ++) {
234 - $child = $contextNode->item($i);
 122+ $retStr = "";
 123+ foreach ($contextNode as $child) {
235124 if ($child instanceof DOMElement) {
236 - $this->expandRec($child, $flags, $headingIndex);
237 - $i --;
 125+ $retStr .= $this->expandRec($child, $flags, $headingIndex);
 126+ } else {
 127+ $retstr .= $child->data;
238128 }
239129 }
240130 } else {
241 -print("ParseRecIn - {$contextNode->nodeName}\n");
242 - if (($contextNode->nodeName == 'template' || $contextNode->nodeName == 'tplarg') && ! ($flags & self::NO_ARGS)) {
243 - foreach ($contextNode->childNodes as $child) {
244 - if ($child->nodeName == "part") {
245 - foreach ($child->childNodes as $partChild) {
246 - $this->expandRec($partChild->childNodes, $flags, $headingIndex);
247 - }
248 - } else {
249 - $this->expandRec($child->childNodes, $flags, $headingIndex);
250 - }
251 - }
252 - if ( $contextNode->nodeName == 'template' ) {
253 - $this->parser->braceSubstitution($contextNode, $this);
254 - } else {
255 - $this->parser->argSubstitution($contextNode, $this);
256 - }
 131+//print("ParseRecIn - {$contextNode->nodeName}\n");
 132+ if ($contextNode->nodeName == 'template' && ! ($flags & self::NO_TEMPLATES)) {
 133+ $retStr = $this->parser->braceSubstitution($contextNode, $this);
 134+ } elseif ($contextNode->nodeName == 'tplarg' && ! ($flags & self::NO_ARGS)) {
 135+ $retStr = $this->parser->argSubstitution($contextNode, $this);
257136 } elseif ( $contextNode->nodeName == 'comment' ) {
258137 $comment = $contextNode->getAttribute("startTag");
259138 # HTML-style comment
@@ -277,35 +156,32 @@
278157 else {
279158 $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($comment), $contextNode);
280159 }
281 - } elseif ($contextNode->nodeName == 'ignore') {
282 - # Output suppression used by <includeonly> etc.
283 - # OT_WIKI will only respect <ignore> in substed templates.
284 - # The other output types respect it unless NO_IGNORE is set.
285 - # extractSections() sets NO_IGNORE and so never respects it.
286 - if (($this instanceof PPTemplateFrame || ! $this->parser->ot['wiki']) && ! ($flags & self::NO_IGNORE)) {
287 - $contextNode->parentNode->removeChild($contextNode);
 160+ } elseif ($contextNode->nodeName == "xmltag" || $contextNode->nodeName == "onlyinclude") {
 161+ $tagName = $contextNode->nodeName == "xmltag" ? $xpath->query("name", $contextNode)->item(0)->getAttribute("tag") : "noinclude";
 162+ if ($tagName == "noinclude" || $tagName == "includeonly") {
 163+ if (((! $this instanceof PPTemplateFrame && $this->parser->ot['wiki']) || ($flags & self::NO_IGNORE)) &&
 164+ ((($flags & Parser::PTD_FOR_INCLUSION) && $tagName == "includeonly") ||
 165+ (! ($flags & Parser::PTD_FOR_INCLUSION) && $tagName == "noinclude"))) {
 166+ $retStr = "" . $contextNode->getAttribute("tag") . $this->expandRec($contextNode->childNodes, $flags, $headingIndex);
 167+ }
288168 } else {
289 - $outText = ParseEngine::unparse($contextNode);
290 - $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode);
291 - }
292 - } elseif ( $contextNode->nodeName == 'xmltag' ) {
293 - foreach ($contextNode->childNodes as $child) {
294 - $this->expandRec($child->childNodes, $flags, $headingIndex);
295 - }
296 - $tagName = substr($contextNode->getAttribute("startTag"), 1);
297 - $isStripTag = false;
298 - foreach ($this->parser->getStripList() as $stripTag) {
299 - $isStripTag = $tagName == $stripTag;
 169+ foreach ($contextNode->childNodes as $child) {
 170+ $this->expandRec($child->childNodes, $flags, $headingIndex);
 171+ }
 172+ $isStripTag = false;
 173+ foreach ($this->parser->getStripList() as $stripTag) {
 174+ $isStripTag = $tagName == $stripTag;
 175+ if ($isStripTag) {
 176+ break;
 177+ }
 178+ }
300179 if ($isStripTag) {
301 - break;
 180+ $outText = $this->parser->extensionSubstitution($contextNode, $this);
 181+ } else {
 182+ $outText = ParseEngine::unparse($contextNode);
302183 }
 184+ $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode);
303185 }
304 - if ($isStripTag) {
305 - $outText = $this->parser->extensionSubstitution($contextNode, $this);
306 - } else {
307 - $outText = ParseEngine::unparse($contextNode);
308 - }
309 - $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode);
310186 } elseif ($contextNode->nodeName == 'h' && $contextNode->parentNode->nodeName == 'root' && $this->parser->ot['html']) {
311187 # Insert a heading marker only for <h> children of <root>
312188 # This is to stop extractSections from going over multiple tree levels
@@ -325,70 +201,11 @@
326202 $outText = ParseEngine::unparse($contextNode);
327203 $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode);
328204 }
329 -print("ParseRecOut - {$contextNode->ownerDocument->saveXML()}\n");
 205+//print("ParseRecOut - {$contextNode->ownerDocument->saveXML()}\n");
 206+ return retStr;
330207 }
331208 }
332209
333 - static function updateIncTags($root, $flags = 0) {
334 - if ( $root instanceof DOMDocument ) {
335 - $root = $root->documentElement;
336 - }
337 - $parent = $root;
338 - if ($parent instanceof DOMNodeList) {
339 - $parent = $parent->item(0)->parentNode;
340 - }
341 - $xpath = new DOMXPath( $parent->ownerDocument );
342 - $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
343 - $ignoreRest = $forInclusion && $xpath->query("xmltag[@startTag='<onlyinclude']", $parent)->length > 0;
344 - $children = array();
345 - $ind = -1;
346 - while ($parent->hasChildNodes()) {
347 - $child = $parent->firstChild;
348 - $parent->removeChild($child);
349 - $tagName = $child instanceof DOMElement ? substr($child->getAttribute("startTag"), 1) : "";
350 - if ($tagName != "onlyinclude" && $ignoreRest) {
351 - if ($ind < 0 || $children[$ind]->nodeName != "ignore") {
352 - $children[] = $parent->ownerDocument->createElement("ignore");
353 - $ind ++;
354 - }
355 - $children[$ind]->appendChild($child);
356 - } elseif ($tagName == "includeonly" || $tagName == "noinclude" || $tagName == "onlyinclude") {
357 - $leftTag = $parent->ownerDocument->createTextNode("<$tagName>");
358 - $rightTag = $parent->ownerDocument->createTextNode("</$tagName>");
359 - $inner = $child->lastChild;
360 - if (($tagName == "includeonly" && ! $forInclusion) || ($tagName == "noinclude" && $forInclusion)) {
361 - $children[] = $parent->ownerDocument->createElement("ignore");
362 - $ind ++;
363 - $children[$ind]->appendChild($leftTag);
364 - while ($inner->hasChildNodes()) {
365 - $gChild = $inner->firstChild;
366 - $inner->removeChild($gChild);
367 - $children[$ind]->appendChild($gChild);
368 - }
369 - $children[$ind]->appendChild($rightTag);
370 - } else {
371 - $children[] = $parent->ownerDocument->createElement("ignore");
372 - $ind ++;
373 - $children[$ind]->appendChild($leftTag);
374 - while ($inner->hasChildNodes()) {
375 - $children[] = $inner->firstChild;
376 - $ind ++;
377 - $inner->removeChild($inner->firstChild);
378 - }
379 - $children[] = $parent->ownerDocument->createElement("ignore");
380 - $ind ++;
381 - $children[$ind]->appendChild($rightTag);
382 - }
383 - } else {
384 - $children[] = $child;
385 - $ind ++;
386 - }
387 - }
388 - foreach ($children as $child) {
389 - $parent->appendChild($child);
390 - }
391 - }
392 -
393210 function __toString() {
394211 return 'frame{}';
395212 }
@@ -551,43 +368,3 @@
552369 }
553370 }
554371
555 -/**
556 - * Expansion frame with custom arguments
557 - * @ingroup Parser
558 - */
559 -class PPCustomFrame extends PPFrame {
560 - private $args;
561 -
562 - function __construct( $args ) {
563 - PPFrame::__construct( );
564 - $this->args = $args;
565 - }
566 -
567 - function __toString() {
568 - $s = 'cstmframe{';
569 - $first = true;
570 - foreach ( $this->args as $name => $value ) {
571 - if ( $first ) {
572 - $first = false;
573 - } else {
574 - $s .= ', ';
575 - }
576 - $s .= "\"$name\":\"" .
577 - str_replace( '"', '\\"', $value->__toString() ) . '"';
578 - }
579 - $s .= '}';
580 - return $s;
581 - }
582 -
583 - function isEmpty() {
584 - return !count( $this->args );
585 - }
586 -
587 - function getArgument( $index ) {
588 - if ( !isset( $this->args[$index] ) ) {
589 - return false;
590 - }
591 - return $this->args[$index];
592 - }
593 -}
594 -
Index: branches/parser-work/phase3/includes/parser/WikiTextGrammar.xml
@@ -2,34 +2,44 @@
33 <Grammar rootTag="root" startRule="start">
44 <Sequence name="start" >
55 <Choice failSafe="true">
6 - <Assignment name="h" tag="(={1,6})" regex="true">
 6+ <Assignment tagName="noinclude" tag="(?=.*(&lt;onlyinclude>))" regex="true">
77 <Reference name="endText" />
88 </Assignment>
 9+ <Reference name="heading" />
910 </Choice>
1011 <Reference name="main" />
1112 </Sequence>
1213 <Text name="main">
13 - <Assignment name="link" tag="[[">
 14+ <Reference name="newLine" />
 15+ <Assignment tagName="link" tag="[[">
1416 <Reference name="endText" var="]]" />
1517 </Assignment>
16 - <Assignment name="h" tag="\n(={1,6})" regex="true">
17 - <Reference name="endText" />
18 - </Assignment>
19 - <Assignment name="tplarg" tag="{{{(?!{)" regex="true">
 18+ <Assignment tagName="tplarg" tag="{{{(?!{)" regex="true">
2019 <Reference name="tplSeq" var="}}}" />
2120 </Assignment>
22 - <Assignment name="template" tag="{{">
 21+ <Assignment tagName="template" tag="{{">
2322 <Reference name="tplSeq" var="}}" />
2423 </Assignment>
25 - <Assignment name="comment" tag="\n?(?:&lt;!--.*?(?:-->\n?|$))+" regex="true" />
26 - <Assignment name="xmltag" tag="&lt;(\w+)(?= |>)" regex="true">
 24+ <Reference name="comment" />
 25+ <Assignment tagName="onlyinclude" tag="&lt;/onlyinclude>">
2726 <Sequence>
28 - <Assignment name="attr">
29 - <Reference name="main" />
30 - </Assignment>
 27+ <Reference name="main" />
 28+ <Choice failSafe="true">
 29+ <Assignment tagName="endtag" tag="&lt;onlyinclude>" />
 30+ </Choice>
 31+ </Sequence>
 32+ </Assignment>
 33+ <Assignment tagName="xmltag" tag="&lt;(?=(\w+)[\s\/>])" regex="true">
 34+ <Sequence>
 35+ <Assignment tagName="name" tag="~r">
 36+ <Choice failSafe="true">
 37+ <Assignment tagName="attr" tag="\s+" regex="true">
 38+ <Reference name="main" />
 39+ </Assignment>
 40+ </Choice>
3141 <Choice>
32 - <EndTag tag="/>" />
33 - <Assignment name="inner" tag=">">
 42+ <Assignment tagName="endtag" tag="/>" />
 43+ <Assignment tagName="inner" tag=">">
3444 <Reference name="endText" var="&lt;/~r>" />
3545 </Assignment>
3646 </Choice>
@@ -38,24 +48,45 @@
3949 </Text>
4050 <Sequence name="endText">
4151 <Reference name="main" />
42 - <EndTag tag="~r" />
 52+ <Assignment tagName="endtag" tag="~r" />
4353 </Sequence>
 54+ <Choice name="ignoreList" failSafe="true">
 55+ <Sequence>
 56+ <Choice>
 57+ <Assignment name="whitespace" tag="[ \t]+" regex="true" />
 58+ <Reference name="comment" />
 59+ </Choice>
 60+ <Reference name="ignoreList" />
 61+ </Sequence>
 62+ </Choice>
 63+ <Assignment name="newLine" tag="\n">
 64+ <Choice failSafe="true">
 65+ <Reference name="heading" />
 66+ </Choice>
 67+ </Assignment>
 68+ <Sequence name="heading">
 69+ <Assignment tagName="h" tag="(={1,6})" regex="true">
 70+ <Reference name="endText" />
 71+ </Assignment>
 72+ <Reference name="ignoreList" />
 73+ <Reference name="newLine" />
 74+ </Sequence>
4475 <Sequence name="tplSeq">
45 - <Assignment name="title">
 76+ <Assignment tagName="title">
4677 <Reference name="main" />
4778 </Assignment>
4879 <Reference name="partList" />
49 - <EndTag tag="~r" />
 80+ <Assignment tagName="endtag" tag="~r" />
5081 </Sequence>
5182 <Choice name="partList" failSafe="true">
5283 <Sequence>
53 - <Assignment name="part" tag="|">
 84+ <Assignment tagName="part" tag="|">
5485 <Sequence>
55 - <Assignment name="first">
 86+ <Assignment tagName="first">
5687 <Reference name="main" />
5788 </Assignment>
5889 <Choice failSafe="true">
59 - <Assignment name="value" tag="=">
 90+ <Assignment tagName="value" tag="=">
6091 <Reference name="main" />
6192 </Assignment>
6293 </Choice>
@@ -64,4 +95,5 @@
6596 <Reference name="partList" />
6697 </Sequence>
6798 </Choice>
 99+ <Assignment name="comment" tagName="comment" tag="&lt;!--.*?(?:-->|$)" regex="true" />
68100 </Grammar>
Index: branches/parser-work/phase3/includes/AutoLoader.php
@@ -443,7 +443,6 @@
444444 'LinkHolderArray' => 'includes/parser/LinkHolderArray.php',
445445 'LinkMarkerReplacer' => 'includes/parser/Parser_LinkHooks.php',
446446 'OnlyIncludeReplacer' => 'includes/parser/Parser.php',
447 - 'PPCustomFrame' => 'includes/parser/Preprocessor.php',
448447 'PPFrame' => 'includes/parser/Preprocessor.php',
449448 'PPTemplateFrame' => 'includes/parser/Preprocessor.php',
450449 'ParseEngine' => 'includes/parser/ParseEngine.php',
@@ -453,7 +452,6 @@
454453 'ParserOutput' => 'includes/parser/ParserOutput.php',
455454 'Parser_DiffTest' => 'includes/parser/Parser_DiffTest.php',
456455 'Parser_LinkHooks' => 'includes/parser/Parser_LinkHooks.php',
457 - 'Preprocessor' => 'includes/parser/Preprocessor.php',
458456 'StripState' => 'includes/parser/Parser.php',
459457 'MWTidy' => 'includes/parser/Tidy.php',
460458

Status & tagging log