Index: branches/parser-work/phase3/includes/parser/Parser.php |
— | — | @@ -76,7 +76,7 @@ |
77 | 77 | const COLON_STATE_COMMENTDASHDASH = 7; |
78 | 78 | |
79 | 79 | // Flags for preprocessToDom |
80 | | - const PTD_FOR_INCLUSION = 1; |
| 80 | + const PTD_FOR_INCLUSION = 1024; |
81 | 81 | |
82 | 82 | // Allowed values for $this->mOutputType |
83 | 83 | // Parameter to startExternalParse(). |
— | — | @@ -95,7 +95,7 @@ |
96 | 96 | # Persistent: |
97 | 97 | var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables, |
98 | 98 | $mSubsts, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex, |
99 | | - $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList, |
| 99 | + $mParseEngine, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList, |
100 | 100 | $mVarCache, $mConf, $mFunctionTagHooks; |
101 | 101 | |
102 | 102 | |
— | — | @@ -137,19 +137,10 @@ |
138 | 138 | $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. |
139 | 139 | '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; |
140 | 140 | $this->mVarCache = array(); |
141 | | - if ( isset( $conf['preprocessorClass'] ) ) { |
142 | | - $this->mPreprocessorClass = $conf['preprocessorClass']; |
143 | | - } elseif ( extension_loaded( 'domxml' ) ) { |
144 | | - // PECL extension that conflicts with the core DOM extension (bug 13770) |
145 | | - wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" ); |
146 | | - $this->mPreprocessorClass = 'Preprocessor_Hash'; |
147 | | - } elseif ( extension_loaded( 'dom' ) ) { |
148 | | - $this->mPreprocessorClass = 'Preprocessor_DOM'; |
149 | | - } else { |
150 | | - $this->mPreprocessorClass = 'Preprocessor_Hash'; |
151 | | - } |
152 | 141 | $this->mMarkerIndex = 0; |
153 | 142 | $this->mFirstCall = true; |
| 143 | + |
| 144 | + $this->mParseEngine = new ParseEngine("includes/parser/WikiTextGrammar.xml"); |
154 | 145 | } |
155 | 146 | |
156 | 147 | /** |
— | — | @@ -512,7 +503,8 @@ |
513 | 504 | |
514 | 505 | list( $text, $title ) = $this->getTemplateDom( $title ); |
515 | 506 | $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES; |
516 | | - return $this->getPreprocessor()->newFrame()->expand( $text, $flags ); |
| 507 | + $frame = new PPFrame($this); |
| 508 | + return $frame->expand( $text, $flags ); |
517 | 509 | } |
518 | 510 | |
519 | 511 | /** |
— | — | @@ -547,8 +539,7 @@ |
548 | 540 | */ |
549 | 541 | function getPreprocessor() { |
550 | 542 | if ( !isset( $this->mPreprocessor ) ) { |
551 | | - $class = $this->mPreprocessorClass; |
552 | | - $this->mPreprocessor = new $class( $this ); |
| 543 | + $this->mPreprocessor = new Preprocessor( $this->mParseEngine ); |
553 | 544 | } |
554 | 545 | return $this->mPreprocessor; |
555 | 546 | } |
— | — | @@ -922,7 +913,7 @@ |
923 | 914 | else |
924 | 915 | $flag = Parser::PTD_FOR_INCLUSION; |
925 | 916 | $dom = $this->preprocessToDom( $text, $flag ); |
926 | | - $text = $frame->expand( $dom ); |
| 917 | + $text = $frame->expand( $dom, $flag ); |
927 | 918 | } |
928 | 919 | // if $frame is not provided, then use old-style replaceVariables |
929 | 920 | else { |
— | — | @@ -2792,7 +2783,7 @@ |
2793 | 2784 | wfProfileIn( __METHOD__ ); |
2794 | 2785 | |
2795 | 2786 | if ( $frame === false ) { |
2796 | | - $frame = $this->getPreprocessor()->newFrame(); |
| 2787 | + $frame = new PPFrame($this); |
2797 | 2788 | } elseif ( !( $frame instanceof PPFrame ) ) { |
2798 | 2789 | wfDebug( __METHOD__." called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" ); |
2799 | 2790 | $frame = $this->getPreprocessor()->newCustomFrame($frame); |
— | — | @@ -2859,7 +2850,7 @@ |
2860 | 2851 | * @return string the text of the template |
2861 | 2852 | * @private |
2862 | 2853 | */ |
2863 | | - function braceSubstitution( $piece, $frame ) { |
| 2854 | + function braceSubstitution( $template, $frame ) { |
2864 | 2855 | global $wgContLang, $wgNonincludableNamespaces; |
2865 | 2856 | wfProfileIn( __METHOD__ ); |
2866 | 2857 | wfProfileIn( __METHOD__.'-setup' ); |
— | — | @@ -2873,11 +2864,12 @@ |
2874 | 2865 | $isLocalObj = false; # $text is a DOM node needing expansion in the current frame |
2875 | 2866 | |
2876 | 2867 | # Title object, where $text came from |
| 2868 | + $xpath = new DOMXPath($template->ownerDocument); |
2877 | 2869 | $title = null; |
2878 | 2870 | |
2879 | 2871 | # $part1 is the bit before the first |, and must contain only title characters. |
2880 | 2872 | # Various prefixes will be stripped from it later. |
2881 | | - $titleWithSpaces = $frame->expand( $piece['title'] ); |
| 2873 | + $titleWithSpaces = $xpath->query("title", $template)->item(0)->textContent; |
2882 | 2874 | $part1 = trim( $titleWithSpaces ); |
2883 | 2875 | $titleText = false; |
2884 | 2876 | |
— | — | @@ -2885,7 +2877,10 @@ |
2886 | 2878 | $originalTitle = $part1; |
2887 | 2879 | |
2888 | 2880 | # $args is a list of argument nodes, starting from index 0, not including $part1 |
2889 | | - $args = (null == $piece['parts']) ? array() : $piece['parts']; |
| 2881 | + $args = array(); |
| 2882 | + foreach ($xpath->query("part", $template) as $part) { |
| 2883 | + $args[] = $part; |
| 2884 | + } |
2890 | 2885 | wfProfileOut( __METHOD__.'-setup' ); |
2891 | 2886 | |
2892 | 2887 | # SUBST |
— | — | @@ -2900,14 +2895,15 @@ |
2901 | 2896 | # safesubst || (subst && PST) || (false && !PST) => transclude (skip the if) |
2902 | 2897 | # (false && PST) || (subst && !PST) => return input (handled by if) |
2903 | 2898 | if ( $substMatch != 'safesubst' && ($substMatch == 'subst' xor $this->ot['wiki']) ) { |
2904 | | - $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); |
| 2899 | + $outText = ParseEngine::unparse($template); |
| 2900 | + $template->parentNode->replaceChild($template->ownerDocument->createTextNode($outText), $template); |
2905 | 2901 | $isLocalObj = true; |
2906 | 2902 | $found = true; |
2907 | 2903 | } |
2908 | 2904 | } |
2909 | 2905 | |
2910 | 2906 | # Variables |
2911 | | - if ( !$found && $args->getLength() == 0 ) { |
| 2907 | + if ( !$found && $args->length == 0 ) { |
2912 | 2908 | $id = $this->mVariables->matchStartToEnd( $part1 ); |
2913 | 2909 | if ( $id !== false ) { |
2914 | 2910 | $text = $this->getVariableValue( $id, $frame ); |
— | — | @@ -2964,14 +2960,12 @@ |
2965 | 2961 | # Add a frame parameter, and pass the arguments as an array |
2966 | 2962 | $allArgs = $initialArgs; |
2967 | 2963 | $allArgs[] = $frame; |
2968 | | - for ( $i = 0; $i < $args->getLength(); $i++ ) { |
2969 | | - $funcArgs[] = $args->item( $i ); |
2970 | | - } |
| 2964 | + $funcArgs = array_merge( $funcArgs, $args ); |
2971 | 2965 | $allArgs[] = $funcArgs; |
2972 | 2966 | } else { |
2973 | 2967 | # Convert arguments to plain text |
2974 | | - for ( $i = 0; $i < $args->getLength(); $i++ ) { |
2975 | | - $funcArgs[] = trim( $frame->expand( $args->item( $i ) ) ); |
| 2968 | + foreach ($args as $arg) { |
| 2969 | + $funcArgs[] = substr(ParseEngine::unparse($arg), 1); |
2976 | 2970 | } |
2977 | 2971 | $allArgs = array_merge( $initialArgs, $funcArgs ); |
2978 | 2972 | } |
— | — | @@ -3088,9 +3082,9 @@ |
3089 | 3083 | # If we haven't found text to substitute by now, we're done |
3090 | 3084 | # Recover the source wikitext and return it |
3091 | 3085 | if ( !$found ) { |
3092 | | - $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); |
| 3086 | + $outText = ParseEngine::unparse($template); |
| 3087 | + $template->parentNode->replaceChild($template->ownerDocument->createTextNode($outText), $template); |
3093 | 3088 | wfProfileOut( __METHOD__ ); |
3094 | | - return array( 'object' => $text ); |
3095 | 3089 | } |
3096 | 3090 | |
3097 | 3091 | # Expand DOM-style return values in a child frame |
— | — | @@ -3105,7 +3099,7 @@ |
3106 | 3100 | if ( isset( $this->mTplExpandCache[$titleText] ) ) { |
3107 | 3101 | $text = $this->mTplExpandCache[$titleText]; |
3108 | 3102 | } else { |
3109 | | - $text = $newFrame->expand( $text ); |
| 3103 | + $text = $newFrame->expand( $text, self::PTD_FOR_INCLUSION ); |
3110 | 3104 | $this->mTplExpandCache[$titleText] = $text; |
3111 | 3105 | } |
3112 | 3106 | } else { |
— | — | @@ -3141,15 +3135,11 @@ |
3142 | 3136 | $this->insertStripItem( '<!-- WARNING: template omitted, post-expand include size too large -->' ); |
3143 | 3137 | $this->limitationWarn( 'post-expand-template-inclusion' ); |
3144 | 3138 | } |
3145 | | - |
3146 | | - if ( $isLocalObj ) { |
3147 | | - $ret = array( 'object' => $text ); |
3148 | | - } else { |
3149 | | - $ret = array( 'text' => $text ); |
| 3139 | + if ($template->parentNode != NULL) { |
| 3140 | + $template->parentNode->replaceChild($template->ownerDocument->createTextNode($text), $template); |
3150 | 3141 | } |
3151 | 3142 | |
3152 | 3143 | wfProfileOut( __METHOD__ ); |
3153 | | - return $ret; |
3154 | 3144 | } |
3155 | 3145 | |
3156 | 3146 | /** |
— | — | @@ -3314,16 +3304,16 @@ |
3315 | 3305 | * Triple brace replacement -- used for template arguments |
3316 | 3306 | * @private |
3317 | 3307 | */ |
3318 | | - function argSubstitution( $piece, $frame ) { |
| 3308 | + function argSubstitution( $tplArg, $frame ) { |
3319 | 3309 | wfProfileIn( __METHOD__ ); |
3320 | 3310 | |
3321 | | - $error = false; |
3322 | | - $parts = $piece['parts']; |
3323 | | - $nameWithSpaces = $frame->expand( $piece['title'] ); |
| 3311 | + $xpath = new DOMXPath($tplArg->ownerDocument); |
| 3312 | + $parts = $xpath->query("part", $tplArg); |
| 3313 | + $nameWithSpaces = $xpath->query("title", $tplArg)->item(0)->textContent; |
3324 | 3314 | $argName = trim( $nameWithSpaces ); |
3325 | 3315 | $object = false; |
3326 | 3316 | $text = $frame->getArgument( $argName ); |
3327 | | - if ( $text === false && $parts->getLength() > 0 |
| 3317 | + if ( $text === false && $parts->length > 0 |
3328 | 3318 | && ( |
3329 | 3319 | $this->ot['html'] |
3330 | 3320 | || $this->ot['pre'] |
— | — | @@ -3331,28 +3321,18 @@ |
3332 | 3322 | ) |
3333 | 3323 | ) { |
3334 | 3324 | # No match in frame, use the supplied default |
3335 | | - $object = $parts->item( 0 )->getChildren(); |
| 3325 | + $text = $parts->item( 0 )->firstChild->textContent; |
3336 | 3326 | } |
3337 | 3327 | if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { |
3338 | | - $error = '<!-- WARNING: argument omitted, expansion size too large -->'; |
| 3328 | + $text .= '<!-- WARNING: argument omitted, expansion size too large -->'; |
3339 | 3329 | $this->limitationWarn( 'post-expand-template-argument' ); |
3340 | 3330 | } |
3341 | | - |
3342 | | - if ( $text === false && $object === false ) { |
3343 | | - # No match anywhere |
3344 | | - $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts ); |
| 3331 | + if ($text == NULL) { |
| 3332 | + $text = ParseEngine::unparse($tplArg); |
3345 | 3333 | } |
3346 | | - if ( $error !== false ) { |
3347 | | - $text .= $error; |
3348 | | - } |
3349 | | - if ( $object !== false ) { |
3350 | | - $ret = array( 'object' => $object ); |
3351 | | - } else { |
3352 | | - $ret = array( 'text' => $text ); |
3353 | | - } |
| 3334 | + $tplArg->parentNode->replaceChild($tplArg->ownerDocument->createTextNode($text), $tplArg); |
3354 | 3335 | |
3355 | 3336 | wfProfileOut( __METHOD__ ); |
3356 | | - return $ret; |
3357 | 3337 | } |
3358 | 3338 | |
3359 | 3339 | /** |
— | — | @@ -3367,12 +3347,14 @@ |
3368 | 3348 | * noClose Original text did not have a close tag |
3369 | 3349 | * @param PPFrame $frame |
3370 | 3350 | */ |
3371 | | - function extensionSubstitution( $params, $frame ) { |
| 3351 | + function extensionSubstitution( $xmltag, $frame ) { |
3372 | 3352 | global $wgRawHtml, $wgContLang; |
3373 | 3353 | |
3374 | | - $name = $frame->expand( $params['name'] ); |
3375 | | - $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); |
3376 | | - $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); |
| 3354 | + $xpath = new DOMXPath($xmltag->ownerDocument); |
| 3355 | + $name = substr($xmltag->getAttribute("startTag"), 1); |
| 3356 | + $attrText = $xpath->query("attr", $xmltag)->item(0)->textContent; |
| 3357 | + $inner = $xpath->query("inner", $xmltag); |
| 3358 | + $content = $inner->length == 0 ? NULL : $inner->item(0)->textContent; |
3377 | 3359 | $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX; |
3378 | 3360 | |
3379 | 3361 | $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower($name)] ) && |
— | — | @@ -3416,21 +3398,7 @@ |
3417 | 3399 | extract( $flags ); |
3418 | 3400 | } |
3419 | 3401 | } else { |
3420 | | - if ( is_null( $attrText ) ) { |
3421 | | - $attrText = ''; |
3422 | | - } |
3423 | | - if ( isset( $params['attributes'] ) ) { |
3424 | | - foreach ( $params['attributes'] as $attrName => $attrValue ) { |
3425 | | - $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' . |
3426 | | - htmlspecialchars( $attrValue ) . '"'; |
3427 | | - } |
3428 | | - } |
3429 | | - if ( $content === null ) { |
3430 | | - $output = "<$name$attrText/>"; |
3431 | | - } else { |
3432 | | - $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); |
3433 | | - $output = "<$name$attrText>$content$close"; |
3434 | | - } |
| 3402 | + $output = ParseEngine::unparse($xmltag); |
3435 | 3403 | } |
3436 | 3404 | |
3437 | 3405 | if( $markerType === 'none' ) { |
— | — | @@ -3631,9 +3599,9 @@ |
3632 | 3600 | $baseTitleText = $this->mTitle->getPrefixedDBkey(); |
3633 | 3601 | $oldType = $this->mOutputType; |
3634 | 3602 | $this->setOutputType( self::OT_WIKI ); |
3635 | | - $frame = $this->getPreprocessor()->newFrame(); |
| 3603 | + $frame = new PPFrame($this); |
3636 | 3604 | $root = $this->preprocessToDom( $origText ); |
3637 | | - $node = $root->getFirstChild(); |
| 3605 | + $node = $root->firstChild; |
3638 | 3606 | $byteOffset = 0; |
3639 | 3607 | $tocraw = array(); |
3640 | 3608 | |
— | — | @@ -3810,14 +3778,14 @@ |
3811 | 3779 | # Add the section to the section tree |
3812 | 3780 | # Find the DOM node for this header |
3813 | 3781 | while ( $node && !$isTemplate ) { |
3814 | | - if ( $node->getName() === 'h' ) { |
| 3782 | + if ( $node->nodeName === 'h' ) { |
3815 | 3783 | $bits = $node->splitHeading(); |
3816 | 3784 | if ( $bits['i'] == $sectionIndex ) |
3817 | 3785 | break; |
3818 | 3786 | } |
3819 | 3787 | $byteOffset += mb_strlen( $this->mStripState->unstripBoth( |
3820 | 3788 | $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) ); |
3821 | | - $node = $node->getNextSibling(); |
| 3789 | + $node = $node->nextSibling; |
3822 | 3790 | } |
3823 | 3791 | $tocraw[] = array( |
3824 | 3792 | 'toclevel' => $toclevel, |
— | — | @@ -4196,7 +4164,7 @@ |
4197 | 4165 | $text = preg_replace( $substRegex, $substText, $text ); |
4198 | 4166 | $text = $this->cleanSigInSig( $text ); |
4199 | 4167 | $dom = $this->preprocessToDom( $text ); |
4200 | | - $frame = $this->getPreprocessor()->newFrame(); |
| 4168 | + $frame = new PPFrame($this); |
4201 | 4169 | $text = $frame->expand( $dom ); |
4202 | 4170 | |
4203 | 4171 | if ( !$parsing ) { |
— | — | @@ -4834,7 +4802,7 @@ |
4835 | 4803 | $this->mOptions = new ParserOptions; |
4836 | 4804 | $this->setOutputType( self::OT_WIKI ); |
4837 | 4805 | $outText = ''; |
4838 | | - $frame = $this->getPreprocessor()->newFrame(); |
| 4806 | + $frame = new PPFrame($this); |
4839 | 4807 | |
4840 | 4808 | // Process section extraction flags |
4841 | 4809 | $flags = 0; |
— | — | @@ -4847,10 +4815,11 @@ |
4848 | 4816 | } |
4849 | 4817 | // Preprocess the text |
4850 | 4818 | $root = $this->preprocessToDom( $text, $flags ); |
| 4819 | + PPFrame::updateIncTags($root, $flags); |
4851 | 4820 | |
4852 | 4821 | // <h> nodes indicate section breaks |
4853 | 4822 | // They can only occur at the top level, so we can find them by iterating the root's children |
4854 | | - $node = $root->getFirstChild(); |
| 4823 | + $node = $root->firstChild->firstChild; |
4855 | 4824 | |
4856 | 4825 | // Find the target section |
4857 | 4826 | $ind = 1; |
— | — | @@ -4859,18 +4828,17 @@ |
4860 | 4829 | $targetLevel = 1000; |
4861 | 4830 | } else { |
4862 | 4831 | while ( $node ) { |
4863 | | - if ( $node->getName() === 'h' ) { |
| 4832 | + if ( $node->nodeName === 'h' ) { |
4864 | 4833 | if ( $ind == $sectionIndex ) { |
4865 | | - $bits = $node->splitHeading(); |
4866 | | - $targetLevel = $bits['level']; |
| 4834 | + $targetLevel = strlen($node->getAttribute("endTag")); |
4867 | 4835 | break; |
4868 | 4836 | } |
4869 | 4837 | $ind ++; |
4870 | 4838 | } |
4871 | 4839 | if ( $mode === 'replace' ) { |
4872 | | - $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
| 4840 | + $outText .= ParseEngine::unparse($node); |
4873 | 4841 | } |
4874 | | - $node = $node->getNextSibling(); |
| 4842 | + $node = $node->nextSibling; |
4875 | 4843 | } |
4876 | 4844 | } |
4877 | 4845 | |
— | — | @@ -4885,18 +4853,17 @@ |
4886 | 4854 | |
4887 | 4855 | // Find the end of the section, including nested sections |
4888 | 4856 | do { |
4889 | | - if ( $node->getName() === 'h' ) { |
4890 | | - $bits = $node->splitHeading(); |
4891 | | - $curLevel = $bits['level']; |
| 4857 | + if ( $node->nodeName === 'h' ) { |
| 4858 | + $curLevel = strlen($node->getAttribute("endTag")); |
4892 | 4859 | if ( $ind != $sectionIndex && $curLevel <= $targetLevel ) { |
4893 | 4860 | break; |
4894 | 4861 | } |
4895 | 4862 | $ind ++; |
4896 | 4863 | } |
4897 | 4864 | if ( $mode === 'get' ) { |
4898 | | - $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
| 4865 | + $outText .= ParseEngine::unparse($node); |
4899 | 4866 | } |
4900 | | - $node = $node->getNextSibling(); |
| 4867 | + $node = $node->nextSibling; |
4901 | 4868 | } while ( $node ); |
4902 | 4869 | |
4903 | 4870 | // Write out the remainder (in replace mode only) |
— | — | @@ -4911,7 +4878,7 @@ |
4912 | 4879 | |
4913 | 4880 | while ( $node ) { |
4914 | 4881 | $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
4915 | | - $node = $node->getNextSibling(); |
| 4882 | + $node = $node->nextSibling; |
4916 | 4883 | } |
4917 | 4884 | } |
4918 | 4885 | |
— | — | @@ -4919,6 +4886,9 @@ |
4920 | 4887 | // Re-insert stripped tags |
4921 | 4888 | $outText = rtrim( $this->mStripState->unstripBoth( $outText ) ); |
4922 | 4889 | } |
| 4890 | + if ($outText[0] == "\n") { |
| 4891 | + $outText = substr($outText, 1); |
| 4892 | + } |
4923 | 4893 | |
4924 | 4894 | return $outText; |
4925 | 4895 | } |
Index: branches/parser-work/phase3/includes/parser/ParseEngine.php |
— | — | @@ -1,216 +1,254 @@ |
2 | 2 | <?php |
3 | 3 | /** |
4 | 4 | * Acts as the primary interface between the world and the parser. |
| 5 | + * mStartRule - the first rule to use while parsing |
5 | 6 | * mRules - The list of rules to use while parsing |
6 | | - * mStartRule - the first rule to use while parsing |
7 | 7 | * mDom - Used to create Dom objects and get's returned at the end of parsing |
| 8 | + * mIter - Keeps track of how many times the parser recurses to stop endless loops |
8 | 9 | */ |
9 | 10 | class ParseEngine { |
10 | | - const maxIter = 8192; |
11 | | - private $mRules, $mStartRule, $mDom, $mIter; |
| 11 | + const maxIter = 2048; |
| 12 | + private $mGrammar, $mTextPats; |
12 | 13 | |
13 | | - function __construct($rules, $startRule) { |
14 | | - $this->mRules = $rules; |
15 | | - $this->mStartRule = $startRule; |
| 14 | + function __construct($grammarFile) { |
| 15 | + global $IP; |
| 16 | + $this->mGrammar = new DOMDocument(); |
| 17 | + if (! $this->mGrammar->load("$IP/$grammarFile", LIBXML_NOBLANKS)) { |
| 18 | + throw new MWException("Failed to load $grammarFile."); |
| 19 | + } |
| 20 | + foreach ($this->mGrammar->documentElement->childNodes as $crrnt) { |
| 21 | + $this->pushTags($crrnt, NULL); |
| 22 | + } |
16 | 23 | } |
17 | 24 | |
18 | | - function parse(&$text) { |
| 25 | + function parse($text) { |
19 | 26 | global $wgDebugParserLog; |
20 | 27 | if ($wgDebugParserLog != '') { |
21 | 28 | wfErrorLog("==========Start Parsing==========\n", $wgDebugParserLog); |
22 | 29 | } |
23 | | - $this->mIter = 0; |
24 | | - $this->mDom = new DOMDocument(); |
25 | | - if (! $this->callParser($this->mStartRule, $text, $children, NULL)) { |
26 | | - throw new MWException("Parser rejected text."); |
| 30 | + $doc = new DOMDocument(); |
| 31 | + $rule = $this->mGrammar->documentElement; |
| 32 | + $rootTag = $doc->createElement($rule->getAttribute("rootTag")); |
| 33 | + $doc->appendChild($rootTag); |
| 34 | + $xpath = new DOMXPath($this->mGrammar); |
| 35 | + $startRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("startRule")}']")->item(0); |
| 36 | + $iter = 0; |
| 37 | + if (! $this->parseRec($startRule, "", $saveTags, $iter, $text, $rootTag)) { |
| 38 | + throw new MWException("Failed to parse the given text."); |
27 | 39 | } |
28 | | - $this->mDom->appendChild($children[0]); |
29 | | - $this->mDom->normalizeDocument(); |
| 40 | + $doc->normalizeDocument(); |
30 | 41 | if ($wgDebugParserLog != '') { |
31 | | - wfErrorLog("XML - {$this->mDom->saveXML()}\n", $wgDebugParserLog); |
| 42 | + wfErrorLog("XML - {$doc->saveXML()}\n", $wgDebugParserLog); |
32 | 43 | } |
33 | | - return $this->mDom; |
| 44 | + return $doc; |
34 | 45 | } |
35 | 46 | |
36 | | - function callParser($child, &$text, &$children, $replaceStr) { |
37 | | - $childName = get_class($child); |
38 | | - if (is_string($child)) { |
39 | | - $childName = $child; |
40 | | - $child = $this->mRules[$childName]; |
| 47 | + static function unparse($node) { |
| 48 | + $retStr = ""; |
| 49 | + if ($node instanceof DOMElement) { |
| 50 | + $retStr .= $node->getAttribute("startTag"); |
| 51 | + foreach ($node->childNodes as $child) { |
| 52 | + $retStr .= ParseEngine::unparse($child); |
| 53 | + } |
| 54 | + $retStr .= $node->getAttribute("endTag"); |
| 55 | + } else { |
| 56 | + $retStr .= $node->textContent; |
41 | 57 | } |
| 58 | + return $retStr; |
| 59 | + } |
| 60 | + |
| 61 | + private function parseRec($rule, $replaceStr, $saveTags, &$iter, &$text, &$outNode) { |
42 | 62 | global $wgDebugParserLog; |
43 | 63 | if ($wgDebugParserLog != '') { |
44 | | - wfErrorLog("Entering $childName\n", $wgDebugParserLog); |
| 64 | + wfErrorLog("Entering {$rule->nodeName}, {$rule->getAttribute("name")}\n", $wgDebugParserLog); |
45 | 65 | } |
46 | | - $this->mIter ++; |
47 | | - if ($this->mIter > ParseEngine::maxIter) { |
| 66 | + $iter ++; |
| 67 | + if ($iter > ParseEngine::maxIter) { |
48 | 68 | throw new MWException("Parser iterated too many times. Probable loop in grammar."); |
49 | 69 | } |
50 | | - $retCode = $child->parse($text, $this, $this->mDom, $children, $replaceStr); |
| 70 | + if ($rule->nodeName == "Assignment" || $rule->nodeName == "Reference" || $rule->nodeName == "Text") { |
| 71 | + $saveTags = str_replace("~r", preg_quote($replaceStr, "/"), $saveTags); |
| 72 | + $newTags = $rule->getAttribute("saveTags"); |
| 73 | + if ($saveTags == "") { |
| 74 | + $saveTags = $newTags; |
| 75 | + } elseif ($newTags != "") { |
| 76 | + $saveTags .= "|" . $newTags; |
| 77 | + } |
| 78 | + } |
| 79 | + $dom = $outNode->ownerDocument; |
| 80 | + $retCode = FALSE; |
| 81 | + if ($rule->nodeName == "Assignment") { |
| 82 | + $startPat = $rule->getAttribute("tag"); |
| 83 | + $startTag = NULL; |
| 84 | + if ($rule->getAttribute("regex") != NULL) { |
| 85 | + if (preg_match("/^$startPat/s", $text, $matches)) { |
| 86 | + $startTag = $matches[0]; |
| 87 | + if (isset($matches[1])) { |
| 88 | + $replaceStr = $matches[1]; |
| 89 | + } |
| 90 | + } |
| 91 | + } elseif ($startPat != NULL && strncmp($startPat, $text, strlen($startPat)) == 0) { |
| 92 | + $startTag = $startPat; |
| 93 | + } |
| 94 | + if ($startTag != NULL || $startPat == NULL) { |
| 95 | + $newText = $text; |
| 96 | + $newElement = $dom->createElement($rule->getAttribute("name")); |
| 97 | + if ($startTag != NULL) { |
| 98 | + $newText = substr($newText, strlen($startTag)); |
| 99 | + $newElement->setAttribute("startTag", $startTag); |
| 100 | + } |
| 101 | + $retCode = $rule->firstChild == NULL || $this->parseRec($rule->firstChild, $replaceStr, $saveTags, $iter, $newText, $newElement); |
| 102 | + if ($retCode) { |
| 103 | + $outNode->appendChild($newElement); |
| 104 | + $text = $newText; |
| 105 | + } |
| 106 | + } |
| 107 | + } elseif ($rule->nodeName == "EndTag") { |
| 108 | + $tag = str_replace("~r", $replaceStr, $rule->getAttribute("tag")); |
| 109 | + $tagLength = strlen($tag); |
| 110 | + if (strncmp($tag, $text, $tagLength) == 0) { |
| 111 | + $text = substr($text, $tagLength); |
| 112 | + $outNode->setAttribute("endTag", $tag); |
| 113 | + $retCode = TRUE; |
| 114 | + } |
| 115 | + } elseif ($rule->nodeName == "Sequence") { |
| 116 | + $saveText = $text; |
| 117 | + $saveNode = $outNode->cloneNode(TRUE); |
| 118 | + $pushInd = $rule->getAttribute("pushInd"); |
| 119 | + foreach ($rule->childNodes as $i => $crrnt) { |
| 120 | + $pushTags = $i >= $pushInd ? $saveTags : ""; |
| 121 | + $retCode = $this->parseRec($crrnt, $replaceStr, $pushTags, $iter, $text, $outNode); |
| 122 | + if (! $retCode) { |
| 123 | + $text = $saveText; |
| 124 | + $outNode = $saveNode; |
| 125 | + break; |
| 126 | + } |
| 127 | + } |
| 128 | + } elseif ($rule->nodeName == "Choice") { |
| 129 | + foreach ($rule->childNodes as $crrnt) { |
| 130 | + $retCode = $this->parseRec($crrnt, $replaceStr, $saveTags, $iter, $text, $outNode); |
| 131 | + if ($retCode) { |
| 132 | + break; |
| 133 | + } |
| 134 | + } |
| 135 | + $retCode |= $rule->getAttribute("failSafe") != NULL; |
| 136 | + } elseif ($rule->nodeName == "Reference") { |
| 137 | + $varAttr = $rule->getAttribute("var"); |
| 138 | + $newVar = $varAttr == NULL ? $replaceStr : str_replace("~r", $replaceStr, $varAttr); |
| 139 | + $xpath = new DOMXPath($this->mGrammar); |
| 140 | + $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0); |
| 141 | + $retCode = $this->parseRec($refRule, $newVar, $saveTags, $iter, $text, $outNode); |
| 142 | + } elseif ($rule->nodeName == "Text") { |
| 143 | + $tagSearch = $rule->getAttribute("childTags"); |
| 144 | + if ($saveTags != "") { |
| 145 | + $tagSearch .= "|" . $saveTags; |
| 146 | + } |
| 147 | + while ($text != "" && ($saveTags == "" || ! preg_match("/^($saveTags)/s", $text))) { |
| 148 | + $offset = 1; |
| 149 | + foreach ($rule->childNodes as $crrnt) { |
| 150 | + if ($this->parseRec($crrnt, $replaceStr, "", $iter, $text, $outNode)) { |
| 151 | + $offset = 0; |
| 152 | + break; |
| 153 | + } |
| 154 | + } |
| 155 | + if (preg_match("/$tagSearch/s", $text, $matches, PREG_OFFSET_CAPTURE, $offset)) { |
| 156 | + if ($matches[0][1] > 0) { |
| 157 | + $outNode->appendChild($dom->createTextNode(substr($text, 0, $matches[0][1]))); |
| 158 | + $text = substr($text, $matches[0][1]); |
| 159 | + } |
| 160 | + } else { |
| 161 | + $outNode->appendChild($dom->createTextNode($text)); |
| 162 | + $text = ""; |
| 163 | + } |
| 164 | + } |
| 165 | + $retCode = true; |
| 166 | + } |
51 | 167 | if ($wgDebugParserLog != '') { |
52 | | - wfErrorLog("Exiting $childName, Return Code - $retCode\n", $wgDebugParserLog); |
| 168 | + wfErrorLog("Exiting {$rule->nodeName}, Return Code - $retCode\n", $wgDebugParserLog); |
53 | 169 | wfErrorLog("Text - $text\n", $wgDebugParserLog); |
54 | 170 | } |
55 | 171 | return $retCode; |
56 | 172 | } |
57 | | -} |
58 | 173 | |
59 | | - |
60 | | -// Interface for Parse objects each with a specialized task while parsing |
61 | | -interface ParseObject { |
62 | | - // Does the parse task specific to each parse object |
63 | | - function parse(&$text, &$engine, &$dom, &$children, $replaceStr); |
64 | | -} |
65 | | - |
66 | | -/** |
67 | | - * Deals with pattern matching and saving strings from the text. |
68 | | - * mMatchPat - the regular expression used to determine if this is the rule that should be used |
69 | | - */ |
70 | | -class ParsePattern implements ParseObject { |
71 | | - private $mMatchPat; |
72 | | - |
73 | | - function __construct($matchPat) { |
74 | | - $this->mMatchPat = $matchPat; |
75 | | - } |
76 | | - |
77 | | - function parse(&$text, &$engine, &$dom, &$children, $replaceStr) { |
78 | | - $regEx = $this->mMatchPat; |
79 | | - if ($replaceStr != NULL) { |
80 | | - $regEx = str_replace('~r', $replaceStr, $regEx); |
81 | | - } |
82 | | - if (! preg_match($regEx, $text, $matches)) { |
83 | | - return FALSE; |
84 | | - } |
85 | | - $text = substr($text, strlen($matches[0])); |
86 | | - $children = array(); |
87 | | - if (isset($matches[1])) { |
88 | | - $children[] = $dom->createTextNode($matches[1]); |
89 | | - } |
90 | | - return TRUE; |
91 | | - } |
92 | | -} |
93 | | - |
94 | | -/** |
95 | | - * Deals with cases where a rule can be matched multiple or 0 times. |
96 | | - * mChildRule - What Parse rule to quantify |
97 | | - * mMinChildren - Minimum amount of children for this rule |
98 | | - * mMaxChildren - Maximum amount of children for this rule, 0 means unlimited |
99 | | - */ |
100 | | -class ParseQuant implements ParseObject { |
101 | | - private $mChildRule, $mMinChildren, $mMaxChildren; |
102 | | - |
103 | | - function __construct($childRule, $minChildren = 0, $maxChildren = 0) { |
104 | | - $this->mChildRule = $childRule; |
105 | | - $this->mMinChildren = $minChildren; |
106 | | - $this->mMaxChildren = $maxChildren; |
107 | | - } |
108 | | - |
109 | | - function parse(&$text, &$engine, &$dom, &$children, $replaceStr) { |
110 | | - $children = array(); |
111 | | - for ($i = 0; $this->mMaxChildren <= 0 || $i < $this->mMaxChildren; $i ++) { |
112 | | - if (! $engine->callParser($this->mChildRule, $text, $retChildren, $replaceStr)) { |
113 | | - if ($i < $this->mMinChildren) { |
114 | | - return FALSE; |
| 174 | + private function pushTags($rule, $tagStr) { |
| 175 | + $iter = 0; |
| 176 | + if ($rule->nodeName == "Sequence") { |
| 177 | + $pushInd = $rule->childNodes->length - 1; |
| 178 | + $shouldPush = true; |
| 179 | + for ($child = $rule->lastChild; $child != NULL; $child = $child->previousSibling) { |
| 180 | + $this->pushTags($child, $tagStr); |
| 181 | + if ($child->previousSibling != NULL) { |
| 182 | + if ($this->pullTags($child, $iter, $childTag)) { |
| 183 | + if ($shouldPush) { |
| 184 | + $pushInd --; |
| 185 | + } |
| 186 | + if ($tagStr == "") { |
| 187 | + $tagStr = $childTag; |
| 188 | + } elseif ($childTag != "") { |
| 189 | + $tagStr .= "|" . $childTag; |
| 190 | + } |
| 191 | + } else { |
| 192 | + $shouldPush = false; |
| 193 | + $tagStr = $childTag; |
| 194 | + } |
115 | 195 | } |
116 | | - break; |
117 | 196 | } |
118 | | - $children = array_merge($children, $retChildren); |
119 | | - } |
120 | | - return TRUE; |
121 | | - } |
122 | | -} |
123 | | - |
124 | | -/** |
125 | | - * Cycles throug array of rules until it finds one that succeeds |
126 | | - * mList - The list of rules |
127 | | - * mMatchChar - This is a shortcut. If the starting char of the text is different then parse will return FALSE. |
128 | | - */ |
129 | | -class ParseChoice implements ParseObject { |
130 | | - private $mList; |
131 | | - |
132 | | - function __construct() { |
133 | | - $this->mList = $args = func_get_args(); |
134 | | - } |
135 | | - |
136 | | - function parse(&$text, &$engine, &$dom, &$children, $replaceStr) { |
137 | | - foreach ($this->mList as $crrnt) { |
138 | | - $newText = $text; |
139 | | - if ($engine->callParser($crrnt, $newText, $children, $replaceStr)) { |
140 | | - $text = $newText; |
141 | | - return TRUE; |
| 197 | + $rule->setAttribute("pushInd", $pushInd); |
| 198 | + } else { |
| 199 | + if ($rule->nodeName != "Choice" && $rule->nodeName != "EndTag") { |
| 200 | + $rule->setAttribute("saveTags", $tagStr); |
| 201 | + $tagStr = NULL; |
| 202 | + if ($rule->nodeName == "Text") { |
| 203 | + $childTags = ""; |
| 204 | + foreach ($rule->childNodes as $crrnt) { |
| 205 | + if ($childTags != "") { |
| 206 | + $childTags .= "|"; |
| 207 | + } |
| 208 | + $this->pullTags($crrnt, $iter, $childTag); |
| 209 | + $childTags .= $childTag; |
| 210 | + } |
| 211 | + $rule->setAttribute("childTags", $childTags); |
| 212 | + } |
142 | 213 | } |
| 214 | + foreach ($rule->childNodes as $crrnt) { |
| 215 | + $this->pushTags($crrnt, $tagStr); |
| 216 | + } |
143 | 217 | } |
144 | | - return FALSE; |
145 | 218 | } |
146 | | -} |
147 | 219 | |
148 | | -/** |
149 | | - * Contains a sequence of rules all of which must pass |
150 | | - * mList - The sequence of rules |
151 | | - * mReplaceStr - A string used to determine the close tag of bracketed markup |
152 | | - * mSaveStr - Boolean specifying wheter to pull mReplaceStr from text |
153 | | - */ |
154 | | -class ParseSeq implements ParseObject { |
155 | | - private $mList, $mReplaceStr, $mSaveStr; |
156 | | - |
157 | | - function __construct($list, $replaceStr = NULL, $saveStr = FALSE) { |
158 | | - $this->mList = $list; |
159 | | - $this->mReplaceStr = $replaceStr; |
160 | | - $this->mSaveStr = $saveStr; |
161 | | - } |
162 | | - |
163 | | - function parse(&$text, &$engine, &$dom, &$children, $replaceStr) { |
164 | | - if ($this->mReplaceStr != NULL) { |
165 | | - if ($replaceStr != NULL) { |
166 | | - $replaceStr = str_replace('~r', $replaceStr, $this->mReplaceStr); |
167 | | - } else { |
168 | | - $replaceStr = $this->mReplaceStr; |
169 | | - } |
| 220 | + private function pullTags($rule, &$iter, &$childTags) { |
| 221 | + $iter ++; |
| 222 | + if ($iter > ParseEngine::maxIter) { |
| 223 | + throw new MWException("Collecter iterated too many times. Probable loop in grammar."); |
170 | 224 | } |
171 | | - $children = array(); |
172 | | - foreach ($this->mList as $i => $crrnt) { |
173 | | - if (! $engine->callParser($crrnt, $text, $retChildren, $replaceStr)) { |
174 | | - return FALSE; |
| 225 | + $childTags = ""; |
| 226 | + $failSafe = TRUE; |
| 227 | + if ($rule->nodeName == "EndTag" || $rule->nodeName == "Assignment") { |
| 228 | + $childTags = $rule->getAttribute("tag"); |
| 229 | + if ($rule->nodeName != "Assignment" || $rule->getAttribute("regex") == NULL) { |
| 230 | + $childTags = preg_quote($childTags, "/"); |
175 | 231 | } |
176 | | - if ($i == 0 && $this->mSaveStr && isset($retChildren[0]) && $retChildren[0] instanceof DOMText) { |
177 | | - $replaceStr = $retChildren[0]->wholeText; |
178 | | - } else { |
179 | | - $children = array_merge($children, $retChildren); |
| 232 | + $failSafe = FALSE; |
| 233 | + } elseif ($rule->nodeName == "Choice" || $rule->nodeName == "Sequence") { |
| 234 | + $failSafe = $rule->nodeName == "Sequence"; |
| 235 | + foreach ($rule->childNodes as $child) { |
| 236 | + $failSafe = $this->pullTags($child, $iter, $newTags); |
| 237 | + if ($childTags == "") { |
| 238 | + $childTags = $newTags; |
| 239 | + } elseif ($newTags != "") { |
| 240 | + $childTags .= "|" . $newTags; |
| 241 | + } |
| 242 | + if (($failSafe && $rule->nodeName == "Choice") || (! $failSafe && $rule->nodeName == "Sequence")) { |
| 243 | + break; |
| 244 | + } |
180 | 245 | } |
| 246 | + $failSafe |= $rule->nodeName == "Choice" && $rule->getAttribute("failSafe") != NULL; |
| 247 | + } elseif ($rule->nodeName == "Reference") { |
| 248 | + $xpath = new DOMXPath($this->mGrammar); |
| 249 | + $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0); |
| 250 | + $failSafe = $this->pullTags($refRule, $iter, $childTags); |
181 | 251 | } |
182 | | - return TRUE; |
| 252 | + return $failSafe; |
183 | 253 | } |
184 | 254 | } |
185 | 255 | |
186 | | -/** |
187 | | - * Creates a Dom element |
188 | | - * mName - The name to give the resultant ParseTree object |
189 | | - * mAttrName - name of an attribute to add to the element |
190 | | - * mAttrValue - value of the attribute |
191 | | - */ |
192 | | -class ParseAssign implements ParseObject { |
193 | | - private $mName, $mChildRule, $mAttrName, $mAttrValue; |
194 | | - |
195 | | - function __construct($name, $childRule, $attrName = NULL, $attrValue = NULL) { |
196 | | - $this->mName = $name; |
197 | | - $this->mChildRule = $childRule; |
198 | | - $this->mAttrName = $attrName; |
199 | | - $this->mAttrValue = $attrValue; |
200 | | - } |
201 | | - |
202 | | - function parse(&$text, &$engine, &$dom, &$children, $replaceStr) { |
203 | | - if (! $engine->callParser($this->mChildRule, $text, $retChildren, $replaceStr)) { |
204 | | - return FALSE; |
205 | | - } |
206 | | - $retNode = $dom->createElement($this->mName); |
207 | | - foreach ($retChildren as $child) { |
208 | | - $retNode->appendChild($child); |
209 | | - } |
210 | | - if ($this->mAttrName != NULL && $this->mAttrValue != NULL) { |
211 | | - $retNode->setAttribute($this->mAttrName, $this->mAttrValue); |
212 | | - } |
213 | | - $children = array($retNode); |
214 | | - return TRUE; |
215 | | - } |
216 | | -} |
217 | | - |
Index: branches/parser-work/phase3/includes/parser/Preprocessor.php |
— | — | @@ -3,24 +3,119 @@ |
4 | 4 | /** |
5 | 5 | * @ingroup Parser |
6 | 6 | */ |
7 | | -interface Preprocessor { |
8 | | - /** Create a new preprocessor object based on an initialised Parser object */ |
9 | | - function __construct( $parser ); |
| 7 | +class Preprocessor { |
| 8 | + private $mParser, $memoryLimit; |
10 | 9 | |
11 | | - /** Create a new top-level frame for expansion of a page */ |
12 | | - function newFrame(); |
| 10 | + const CACHE_VERSION = 1; |
13 | 11 | |
14 | | - /** Create a new custom frame for programmatic use of parameter replacement as used in some extensions */ |
15 | | - function newCustomFrame( $args ); |
| 12 | + function __construct( $parser ) { |
| 13 | + $this->mParser = $parser; |
| 14 | + $mem = ini_get( 'memory_limit' ); |
| 15 | + $this->memoryLimit = false; |
| 16 | + if ( strval( $mem ) !== '' && $mem != -1 ) { |
| 17 | + if ( preg_match( '/^\d+$/', $mem ) ) { |
| 18 | + $this->memoryLimit = $mem; |
| 19 | + } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) { |
| 20 | + $this->memoryLimit = $m[1] * 1048576; |
| 21 | + } |
| 22 | + } |
| 23 | + } |
16 | 24 | |
17 | | - /** Preprocess text to a PPNode */ |
18 | | - function preprocessToObj( $text, $flags = 0 ); |
| 25 | + function memCheck() { |
| 26 | + if ( $this->memoryLimit === false ) { |
| 27 | + return; |
| 28 | + } |
| 29 | + $usage = memory_get_usage(); |
| 30 | + if ( $usage > $this->memoryLimit * 0.9 ) { |
| 31 | + $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 ); |
| 32 | + throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" ); |
| 33 | + } |
| 34 | + return $usage <= $this->memoryLimit * 0.8; |
| 35 | + } |
| 36 | + |
| 37 | + /** |
| 38 | + * Preprocess some wikitext and return the document tree. |
| 39 | + * This is the ghost of Parser::replace_variables(). |
| 40 | + * |
| 41 | + * @param string $text The text to parse |
| 42 | + * @param integer flags Bitwise combination of: |
| 43 | + * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being |
| 44 | + * included. Default is to assume a direct page view. |
| 45 | + * |
| 46 | + * The generated DOM tree must depend only on the input text and the flags. |
| 47 | + * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. |
| 48 | + * |
| 49 | + * Any flag added to the $flags parameter here, or any other parameter liable to cause a |
| 50 | + * change in the DOM tree for a given text, must be passed through the section identifier |
| 51 | + * in the section edit link and thus back to extractSections(). |
| 52 | + * |
| 53 | + * The output of this function is currently only cached in process memory, but a persistent |
| 54 | + * cache may be implemented at a later date which takes further advantage of these strict |
| 55 | + * dependency requirements. |
| 56 | + * |
| 57 | + * @private |
| 58 | + */ |
| 59 | + function preprocessToObj( $text, $flags = 0 ) { |
| 60 | + wfProfileIn( __METHOD__ ); |
| 61 | + global $wgMemc, $wgPreprocessorCacheThreshold; |
| 62 | + |
| 63 | + $xml = false; |
| 64 | + $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold; |
| 65 | + if ( $cacheable ) { |
| 66 | + wfProfileIn( __METHOD__.'-cacheable' ); |
| 67 | + |
| 68 | + $cacheKey = wfMemcKey( 'preprocess-xml', md5($text), $flags ); |
| 69 | + $cacheValue = $wgMemc->get( $cacheKey ); |
| 70 | + if ( $cacheValue ) { |
| 71 | + $version = substr( $cacheValue, 0, 8 ); |
| 72 | + if ( intval( $version ) == self::CACHE_VERSION ) { |
| 73 | + $xml = substr( $cacheValue, 8 ); |
| 74 | + // From the cache |
| 75 | + wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" ); |
| 76 | + } |
| 77 | + } |
| 78 | + } |
| 79 | + $dom = false; |
| 80 | + if ( $xml === false ) { |
| 81 | + if ( $cacheable ) { |
| 82 | + wfProfileIn( __METHOD__.'-cache-miss' ); |
| 83 | + } |
| 84 | + $dom = $this->mParser->parse($text); |
| 85 | + if ( $cacheable ) { |
| 86 | + $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $dom->saveXML(); |
| 87 | + $wgMemc->set( $cacheKey, $cacheValue, 86400 ); |
| 88 | + wfProfileOut( __METHOD__.'-cache-miss' ); |
| 89 | + wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" ); |
| 90 | + } |
| 91 | + } else { |
| 92 | + wfProfileIn( __METHOD__.'-loadXML' ); |
| 93 | + $dom = new DOMDocument; |
| 94 | + wfSuppressWarnings(); |
| 95 | + $result = $dom->loadXML( $xml ); |
| 96 | + wfRestoreWarnings(); |
| 97 | + if ( !$result ) { |
| 98 | + // Try running the XML through UtfNormal to get rid of invalid characters |
| 99 | + $xml = UtfNormal::cleanUp( $xml ); |
| 100 | + $result = $dom->loadXML( $xml ); |
| 101 | + if ( !$result ) { |
| 102 | + throw new MWException( __METHOD__.' generated invalid XML' ); |
| 103 | + } |
| 104 | + } |
| 105 | + wfProfileOut( __METHOD__.'-loadXML' ); |
| 106 | + } |
| 107 | + if ( $cacheable ) { |
| 108 | + wfProfileOut( __METHOD__.'-cacheable' ); |
| 109 | + } |
| 110 | + wfProfileOut( __METHOD__ ); |
| 111 | + return $dom; |
| 112 | + } |
19 | 113 | } |
20 | 114 | |
21 | 115 | /** |
| 116 | + * An expansion frame, used as a context to expand the result of preprocessToObj() |
22 | 117 | * @ingroup Parser |
23 | 118 | */ |
24 | | -interface PPFrame { |
| 119 | +class PPFrame { |
25 | 120 | const NO_ARGS = 1; |
26 | 121 | const NO_TEMPLATES = 2; |
27 | 122 | const STRIP_COMMENTS = 4; |
— | — | @@ -29,150 +124,470 @@ |
30 | 125 | |
31 | 126 | const RECOVER_ORIG = 27; // = 1|2|8|16 no constant expression support in PHP yet |
32 | 127 | |
33 | | - /** |
34 | | - * Create a child frame |
35 | | - */ |
36 | | - function newChild( $args = false, $title = false ); |
| 128 | + protected $parser, $title, $titleCache; |
37 | 129 | |
38 | 130 | /** |
39 | | - * Expand a document tree node |
| 131 | + * Hashtable listing templates which are disallowed for expansion in this frame, |
| 132 | + * having been encountered previously in parent frames. |
40 | 133 | */ |
41 | | - function expand( $root, $flags = 0 ); |
| 134 | + protected $loopCheckHash; |
42 | 135 | |
43 | 136 | /** |
44 | | - * Implode with flags for expand() |
| 137 | + * Recursion depth of this frame, top = 0 |
| 138 | + * Note that this is NOT the same as expansion depth in expand() |
45 | 139 | */ |
46 | | - function implodeWithFlags( $sep, $flags /*, ... */ ); |
| 140 | + protected $depth; |
47 | 141 | |
48 | | - /** |
49 | | - * Implode with no flags specified |
50 | | - */ |
51 | | - function implode( $sep /*, ... */ ); |
52 | 142 | |
53 | 143 | /** |
54 | | - * Makes an object that, when expand()ed, will be the same as one obtained |
55 | | - * with implode() |
| 144 | + * Construct a new preprocessor frame. |
| 145 | + * @param Preprocessor $parser The parent parser |
56 | 146 | */ |
57 | | - function virtualImplode( $sep /*, ... */ ); |
| 147 | + function __construct( $parser ) { |
| 148 | + $this->parser = $parser; |
| 149 | + $this->title = $this->parser->mTitle; |
| 150 | + $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false ); |
| 151 | + $this->loopCheckHash = array(); |
| 152 | + $this->depth = 0; |
| 153 | + } |
58 | 154 | |
| 155 | + function __get($var) { |
| 156 | + $retVal = NULL; |
| 157 | + if ($var = "depth") { |
| 158 | + return $depth; |
| 159 | + } |
| 160 | + return $retVal; |
| 161 | + } |
59 | 162 | /** |
60 | | - * Virtual implode with brackets |
| 163 | + * Create a new child frame |
| 164 | + * $args is optionally a multi-root PPNode or array containing the template arguments |
61 | 165 | */ |
62 | | - function virtualBracketedImplode( $start, $sep, $end /*, ... */ ); |
| 166 | + function newChild( $args = false, $title = false ) { |
| 167 | + $namedArgs = array(); |
| 168 | + $numberedArgs = array(); |
| 169 | + if ( $title === false ) { |
| 170 | + $title = $this->title; |
| 171 | + } |
| 172 | + if ($args !== false) { |
| 173 | + $xpath = false; |
| 174 | + $index = 1; |
| 175 | + foreach ( $args as $arg ) { |
| 176 | + if ( !$xpath ) { |
| 177 | + $xpath = new DOMXPath( $arg->ownerDocument ); |
| 178 | + } |
| 179 | + $first = $xpath->query( 'first', $arg )->item(0)->textContent; |
| 180 | + $value = $xpath->query( 'value', $arg ); |
| 181 | + if ($value->length <= 0) { |
| 182 | + // Numbered parameter |
| 183 | + $numberedArgs[$index] = $first; |
| 184 | + $index ++; |
| 185 | + } else { |
| 186 | + // Named parameter |
| 187 | + $namedArgs[trim($first)] = $value->item( 0 )->textContent; |
| 188 | + } |
| 189 | + } |
| 190 | + } |
| 191 | + return new PPTemplateFrame( $this, $numberedArgs, $namedArgs, $title ); |
| 192 | + } |
63 | 193 | |
64 | | - /** |
65 | | - * Returns true if there are no arguments in this frame |
66 | | - */ |
67 | | - function isEmpty(); |
| 194 | + function expand( $root, $flags = 0 ) { |
| 195 | + static $expansionDepth = 0; |
| 196 | + if ( is_string( $root ) ) { |
| 197 | + return $root; |
| 198 | + } |
68 | 199 | |
69 | | - /** |
70 | | - * Returns all arguments of this frame |
71 | | - */ |
72 | | - function getArguments(); |
| 200 | + if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount ) |
| 201 | + { |
| 202 | + return '<span class="error">Node-count limit exceeded</span>'; |
| 203 | + } |
73 | 204 | |
74 | | - /** |
75 | | - * Returns all numbered arguments of this frame |
76 | | - */ |
77 | | - function getNumberedArguments(); |
| 205 | + if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) { |
| 206 | + return '<span class="error">Expansion depth limit exceeded</span>'; |
| 207 | + } |
| 208 | + wfProfileIn( __METHOD__ ); |
| 209 | + ++$expansionDepth; |
78 | 210 | |
79 | | - /** |
80 | | - * Returns all named arguments of this frame |
81 | | - */ |
82 | | - function getNamedArguments(); |
| 211 | + if ( $root instanceof DOMDocument ) { |
| 212 | + $root = $root->documentElement; |
| 213 | + } |
| 214 | + if (! $root instanceof DOMElement ) { |
| 215 | + throw new MWException( __METHOD__.': Invalid parameter type' ); |
| 216 | + } |
| 217 | +//print("UpdIn - {$root->ownerDocument->saveXML()}\n"); |
| 218 | + PPFrame::updateIncTags($root, $flags); |
83 | 219 | |
| 220 | +print("ParseIn - {$root->ownerDocument->saveXML()}\n"); |
| 221 | + $headingIndex = 1; |
| 222 | + $this->expandRec($root->childNodes, $flags, $headingIndex); |
| 223 | + $output = $root->textContent; |
| 224 | +print("ParseOut - {$output}\n"); |
| 225 | + |
| 226 | + --$expansionDepth; |
| 227 | + wfProfileOut( __METHOD__ ); |
| 228 | + return $output; |
| 229 | + } |
| 230 | + |
| 231 | + private function expandRec($contextNode, $flags, &$headingIndex) { |
| 232 | + if ($contextNode instanceof DOMNodeList) { |
| 233 | + for ($i = 0; $i < $contextNode->length; $i ++) { |
| 234 | + $child = $contextNode->item($i); |
| 235 | + if ($child instanceof DOMElement) { |
| 236 | + $this->expandRec($child, $flags, $headingIndex); |
| 237 | + $i --; |
| 238 | + } |
| 239 | + } |
| 240 | + } else { |
| 241 | +print("ParseRecIn - {$contextNode->nodeName}\n"); |
| 242 | + if (($contextNode->nodeName == 'template' || $contextNode->nodeName == 'tplarg') && ! ($flags & self::NO_ARGS)) { |
| 243 | + foreach ($contextNode->childNodes as $child) { |
| 244 | + if ($child->nodeName == "part") { |
| 245 | + foreach ($child->childNodes as $partChild) { |
| 246 | + $this->expandRec($partChild->childNodes, $flags, $headingIndex); |
| 247 | + } |
| 248 | + } else { |
| 249 | + $this->expandRec($child->childNodes, $flags, $headingIndex); |
| 250 | + } |
| 251 | + } |
| 252 | + if ( $contextNode->nodeName == 'template' ) { |
| 253 | + $this->parser->braceSubstitution($contextNode, $this); |
| 254 | + } else { |
| 255 | + $this->parser->argSubstitution($contextNode, $this); |
| 256 | + } |
| 257 | + } elseif ( $contextNode->nodeName == 'comment' ) { |
| 258 | + $comment = $contextNode->getAttribute("startTag"); |
| 259 | + # HTML-style comment |
| 260 | + # Remove it in HTML, pre+remove and STRIP_COMMENTS modes |
| 261 | + if ( $this->parser->ot['html'] |
| 262 | + || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() ) |
| 263 | + || ( $flags & self::STRIP_COMMENTS ) ) |
| 264 | + { |
| 265 | + if ($comment[0] == "\n" || $comment[strlen($comment) - 1] == "\n") { |
| 266 | + $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode("\n"), $contextNode); |
| 267 | + } else { |
| 268 | + $contextNode->parentNode->removeChild($contextNode); |
| 269 | + } |
| 270 | + } |
| 271 | + # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result |
| 272 | + # Not in RECOVER_COMMENTS mode (extractSections) though |
| 273 | + elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) { |
| 274 | + $outText = $this->parser->insertStripItem($contextNode->getAttribute("startTag")); |
| 275 | + $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode); |
| 276 | + } |
| 277 | + # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove |
| 278 | + else { |
| 279 | + $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($comment), $contextNode); |
| 280 | + } |
| 281 | + } elseif ($contextNode->nodeName == 'ignore') { |
| 282 | + # Output suppression used by <includeonly> etc. |
| 283 | + # OT_WIKI will only respect <ignore> in substed templates. |
| 284 | + # The other output types respect it unless NO_IGNORE is set. |
| 285 | + # extractSections() sets NO_IGNORE and so never respects it. |
| 286 | + if (($this instanceof PPTemplateFrame || ! $this->parser->ot['wiki']) && ! ($flags & self::NO_IGNORE)) { |
| 287 | + $contextNode->parentNode->removeChild($contextNode); |
| 288 | + } else { |
| 289 | + $outText = ParseEngine::unparse($contextNode); |
| 290 | + $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode); |
| 291 | + } |
| 292 | + } elseif ( $contextNode->nodeName == 'xmltag' ) { |
| 293 | + foreach ($contextNode->childNodes as $child) { |
| 294 | + $this->expandRec($child->childNodes, $flags, $headingIndex); |
| 295 | + } |
| 296 | + $tagName = substr($contextNode->getAttribute("startTag"), 1); |
| 297 | + $isStripTag = false; |
| 298 | + foreach ($this->parser->getStripList() as $stripTag) { |
| 299 | + $isStripTag = $tagName == $stripTag; |
| 300 | + if ($isStripTag) { |
| 301 | + break; |
| 302 | + } |
| 303 | + } |
| 304 | + if ($isStripTag) { |
| 305 | + $outText = $this->parser->extensionSubstitution($contextNode, $this); |
| 306 | + } else { |
| 307 | + $outText = ParseEngine::unparse($contextNode); |
| 308 | + } |
| 309 | + $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode); |
| 310 | + } elseif ($contextNode->nodeName == 'h' && $contextNode->parentNode->nodeName == 'root' && $this->parser->ot['html']) { |
| 311 | + # Insert a heading marker only for <h> children of <root> |
| 312 | + # This is to stop extractSections from going over multiple tree levels |
| 313 | + # Insert heading index marker |
| 314 | + $this->expandRec($contextNode->childNodes, $flags, $headingIndex); |
| 315 | + $titleText = $this->title->getPrefixedDBkey(); |
| 316 | + $this->parser->mHeadings[] = array( $titleText, $headingIndex ); |
| 317 | + $serial = count( $this->parser->mHeadings ) - 1; |
| 318 | + $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX; |
| 319 | + $this->parser->mStripState->general->setPair( $marker, '' ); |
| 320 | + $outText = $contextNode->getAttribute("startTag") . $marker . $contextNode->firstChild->wholeText . |
| 321 | + $contextNode->getAttribute("endTag"); |
| 322 | + $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode); |
| 323 | + $headingIndex ++; |
| 324 | + } else { |
| 325 | + $this->expandRec($contextNode->childNodes, $flags, $headingIndex); |
| 326 | + $outText = ParseEngine::unparse($contextNode); |
| 327 | + $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode); |
| 328 | + } |
| 329 | +print("ParseRecOut - {$contextNode->ownerDocument->saveXML()}\n"); |
| 330 | + } |
| 331 | + } |
| 332 | + |
| 333 | + static function updateIncTags($root, $flags = 0) { |
| 334 | + if ( $root instanceof DOMDocument ) { |
| 335 | + $root = $root->documentElement; |
| 336 | + } |
| 337 | + $parent = $root; |
| 338 | + if ($parent instanceof DOMNodeList) { |
| 339 | + $parent = $parent->item(0)->parentNode; |
| 340 | + } |
| 341 | + $xpath = new DOMXPath( $parent->ownerDocument ); |
| 342 | + $forInclusion = $flags & Parser::PTD_FOR_INCLUSION; |
| 343 | + $ignoreRest = $forInclusion && $xpath->query("xmltag[@startTag='<onlyinclude']", $parent)->length > 0; |
| 344 | + $children = array(); |
| 345 | + $ind = -1; |
| 346 | + while ($parent->hasChildNodes()) { |
| 347 | + $child = $parent->firstChild; |
| 348 | + $parent->removeChild($child); |
| 349 | + $tagName = $child instanceof DOMElement ? substr($child->getAttribute("startTag"), 1) : ""; |
| 350 | + if ($tagName != "onlyinclude" && $ignoreRest) { |
| 351 | + if ($ind < 0 || $children[$ind]->nodeName != "ignore") { |
| 352 | + $children[] = $parent->ownerDocument->createElement("ignore"); |
| 353 | + $ind ++; |
| 354 | + } |
| 355 | + $children[$ind]->appendChild($child); |
| 356 | + } elseif ($tagName == "includeonly" || $tagName == "noinclude" || $tagName == "onlyinclude") { |
| 357 | + $leftTag = $parent->ownerDocument->createTextNode("<$tagName>"); |
| 358 | + $rightTag = $parent->ownerDocument->createTextNode("</$tagName>"); |
| 359 | + $inner = $child->lastChild; |
| 360 | + if (($tagName == "includeonly" && ! $forInclusion) || ($tagName == "noinclude" && $forInclusion)) { |
| 361 | + $children[] = $parent->ownerDocument->createElement("ignore"); |
| 362 | + $ind ++; |
| 363 | + $children[$ind]->appendChild($leftTag); |
| 364 | + while ($inner->hasChildNodes()) { |
| 365 | + $gChild = $inner->firstChild; |
| 366 | + $inner->removeChild($gChild); |
| 367 | + $children[$ind]->appendChild($gChild); |
| 368 | + } |
| 369 | + $children[$ind]->appendChild($rightTag); |
| 370 | + } else { |
| 371 | + $children[] = $parent->ownerDocument->createElement("ignore"); |
| 372 | + $ind ++; |
| 373 | + $children[$ind]->appendChild($leftTag); |
| 374 | + while ($inner->hasChildNodes()) { |
| 375 | + $children[] = $inner->firstChild; |
| 376 | + $ind ++; |
| 377 | + $inner->removeChild($inner->firstChild); |
| 378 | + } |
| 379 | + $children[] = $parent->ownerDocument->createElement("ignore"); |
| 380 | + $ind ++; |
| 381 | + $children[$ind]->appendChild($rightTag); |
| 382 | + } |
| 383 | + } else { |
| 384 | + $children[] = $child; |
| 385 | + $ind ++; |
| 386 | + } |
| 387 | + } |
| 388 | + foreach ($children as $child) { |
| 389 | + $parent->appendChild($child); |
| 390 | + } |
| 391 | + } |
| 392 | + |
| 393 | + function __toString() { |
| 394 | + return 'frame{}'; |
| 395 | + } |
| 396 | + |
| 397 | + function getPDBK( $level = false ) { |
| 398 | + if ( $level === false ) { |
| 399 | + return $this->title->getPrefixedDBkey(); |
| 400 | + } else { |
| 401 | + return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false; |
| 402 | + } |
| 403 | + } |
| 404 | + |
| 405 | + function getArguments() { |
| 406 | + return array(); |
| 407 | + } |
| 408 | + |
| 409 | + function getNumberedArguments() { |
| 410 | + return array(); |
| 411 | + } |
| 412 | + |
| 413 | + function getNamedArguments() { |
| 414 | + return array(); |
| 415 | + } |
| 416 | + |
84 | 417 | /** |
85 | | - * Get an argument to this frame by name |
| 418 | + * Returns true if there are no arguments in this frame |
86 | 419 | */ |
87 | | - function getArgument( $name ); |
| 420 | + function isEmpty() { |
| 421 | + return true; |
| 422 | + } |
88 | 423 | |
| 424 | + function getArgument( $name ) { |
| 425 | + return false; |
| 426 | + } |
| 427 | + |
89 | 428 | /** |
90 | 429 | * Returns true if the infinite loop check is OK, false if a loop is detected |
91 | 430 | */ |
92 | | - function loopCheck( $title ); |
| 431 | + function loopCheck( $title ) { |
| 432 | + return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] ); |
| 433 | + } |
93 | 434 | |
94 | 435 | /** |
95 | 436 | * Return true if the frame is a template frame |
96 | 437 | */ |
97 | | - function isTemplate(); |
| 438 | + function isTemplate() { |
| 439 | + return false; |
| 440 | + } |
98 | 441 | } |
99 | 442 | |
100 | 443 | /** |
101 | | - * There are three types of nodes: |
102 | | - * * Tree nodes, which have a name and contain other nodes as children |
103 | | - * * Array nodes, which also contain other nodes but aren't considered part of a tree |
104 | | - * * Leaf nodes, which contain the actual data |
105 | | - * |
106 | | - * This interface provides access to the tree structure and to the contents of array nodes, |
107 | | - * but it does not provide access to the internal structure of leaf nodes. Access to leaf |
108 | | - * data is provided via two means: |
109 | | - * * PPFrame::expand(), which provides expanded text |
110 | | - * * The PPNode::split*() functions, which provide metadata about certain types of tree node |
| 444 | + * Expansion frame with template arguments |
111 | 445 | * @ingroup Parser |
112 | 446 | */ |
113 | | -interface PPNode { |
114 | | - /** |
115 | | - * Get an array-type node containing the children of this node. |
116 | | - * Returns false if this is not a tree node. |
117 | | - */ |
118 | | - function getChildren(); |
| 447 | +class PPTemplateFrame extends PPFrame { |
| 448 | + private $numberedArgs, $namedArgs, $parent, $numberedExpansionCache, $namedExpansionCache; |
119 | 449 | |
120 | | - /** |
121 | | - * Get the first child of a tree node. False if there isn't one. |
122 | | - */ |
123 | | - function getFirstChild(); |
| 450 | + function __construct( $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) { |
| 451 | + PPFrame::__construct( $parent->parser ); |
| 452 | + $this->parent = $parent; |
| 453 | + $this->numberedArgs = $numberedArgs; |
| 454 | + $this->namedArgs = $namedArgs; |
| 455 | + $this->title = $title; |
| 456 | + $pdbk = $title ? $title->getPrefixedDBkey() : false; |
| 457 | + $this->titleCache = $parent->titleCache; |
| 458 | + $this->titleCache[] = $pdbk; |
| 459 | + $this->loopCheckHash = /*clone*/ $parent->loopCheckHash; |
| 460 | + if ( $pdbk !== false ) { |
| 461 | + $this->loopCheckHash[$pdbk] = true; |
| 462 | + } |
| 463 | + $this->depth = $parent->depth + 1; |
| 464 | + $this->numberedExpansionCache = $this->namedExpansionCache = array(); |
| 465 | + } |
124 | 466 | |
| 467 | + function __toString() { |
| 468 | + $s = 'tplframe{'; |
| 469 | + $first = true; |
| 470 | + $args = $this->numberedArgs + $this->namedArgs; |
| 471 | + foreach ( $args as $name => $value ) { |
| 472 | + if ( $first ) { |
| 473 | + $first = false; |
| 474 | + } else { |
| 475 | + $s .= ', '; |
| 476 | + } |
| 477 | + $s .= "\"$name\":\"" . |
| 478 | + str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"'; |
| 479 | + } |
| 480 | + $s .= '}'; |
| 481 | + return $s; |
| 482 | + } |
125 | 483 | /** |
126 | | - * Get the next sibling of any node. False if there isn't one |
| 484 | + * Returns true if there are no arguments in this frame |
127 | 485 | */ |
128 | | - function getNextSibling(); |
| 486 | + function isEmpty() { |
| 487 | + return !count( $this->numberedArgs ) && !count( $this->namedArgs ); |
| 488 | + } |
129 | 489 | |
130 | | - /** |
131 | | - * Get all children of this tree node which have a given name. |
132 | | - * Returns an array-type node, or false if this is not a tree node. |
133 | | - */ |
134 | | - function getChildrenOfType( $type ); |
| 490 | + function getArguments() { |
| 491 | + $arguments = array(); |
| 492 | + foreach ( array_merge( |
| 493 | + array_keys($this->numberedArgs), |
| 494 | + array_keys($this->namedArgs)) as $key ) { |
| 495 | + $arguments[$key] = $this->getArgument($key); |
| 496 | + } |
| 497 | + return $arguments; |
| 498 | + } |
| 499 | + |
| 500 | + function getNumberedArguments() { |
| 501 | + $arguments = array(); |
| 502 | + foreach ( array_keys($this->numberedArgs) as $key ) { |
| 503 | + $arguments[$key] = $this->getArgument($key); |
| 504 | + } |
| 505 | + return $arguments; |
| 506 | + } |
| 507 | + |
| 508 | + function getNamedArguments() { |
| 509 | + $arguments = array(); |
| 510 | + foreach ( array_keys($this->namedArgs) as $key ) { |
| 511 | + $arguments[$key] = $this->getArgument($key); |
| 512 | + } |
| 513 | + return $arguments; |
| 514 | + } |
135 | 515 | |
| 516 | + function getNumberedArgument( $index ) { |
| 517 | + if ( !isset( $this->numberedArgs[$index] ) ) { |
| 518 | + return false; |
| 519 | + } |
| 520 | + if ( !isset( $this->numberedExpansionCache[$index] ) ) { |
| 521 | + # No trimming for unnamed arguments |
| 522 | + $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS ); |
| 523 | + } |
| 524 | + return $this->numberedExpansionCache[$index]; |
| 525 | + } |
136 | 526 | |
137 | | - /** |
138 | | - * Returns the length of the array, or false if this is not an array-type node |
139 | | - */ |
140 | | - function getLength(); |
| 527 | + function getNamedArgument( $name ) { |
| 528 | + if ( !isset( $this->namedArgs[$name] ) ) { |
| 529 | + return false; |
| 530 | + } |
| 531 | + if ( !isset( $this->namedExpansionCache[$name] ) ) { |
| 532 | + # Trim named arguments post-expand, for backwards compatibility |
| 533 | + $this->namedExpansionCache[$name] = trim( |
| 534 | + $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) ); |
| 535 | + } |
| 536 | + return $this->namedExpansionCache[$name]; |
| 537 | + } |
141 | 538 | |
142 | | - /** |
143 | | - * Returns an item of an array-type node |
144 | | - */ |
145 | | - function item( $i ); |
| 539 | + function getArgument( $name ) { |
| 540 | + $text = $this->getNumberedArgument( $name ); |
| 541 | + if ( $text === false ) { |
| 542 | + $text = $this->getNamedArgument( $name ); |
| 543 | + } |
| 544 | + return $text; |
| 545 | + } |
146 | 546 | |
147 | 547 | /** |
148 | | - * Get the name of this node. The following names are defined here: |
149 | | - * |
150 | | - * h A heading node. |
151 | | - * template A double-brace node. |
152 | | - * tplarg A triple-brace node. |
153 | | - * title The first argument to a template or tplarg node. |
154 | | - * part Subsequent arguments to a template or tplarg node. |
155 | | - * #nodelist An array-type node |
156 | | - * |
157 | | - * The subclass may define various other names for tree and leaf nodes. |
| 548 | + * Return true if the frame is a template frame |
158 | 549 | */ |
159 | | - function getName(); |
| 550 | + function isTemplate() { |
| 551 | + return true; |
| 552 | + } |
| 553 | +} |
160 | 554 | |
161 | | - /** |
162 | | - * Split a <part> node into an associative array containing: |
163 | | - * name PPNode name |
164 | | - * index String index |
165 | | - * value PPNode value |
166 | | - */ |
167 | | - function splitArg(); |
| 555 | +/** |
| 556 | + * Expansion frame with custom arguments |
| 557 | + * @ingroup Parser |
| 558 | + */ |
| 559 | +class PPCustomFrame extends PPFrame { |
| 560 | + private $args; |
168 | 561 | |
169 | | - /** |
170 | | - * Split an <ext> node into an associative array containing name, attr, inner and close |
171 | | - * All values in the resulting array are PPNodes. Inner and close are optional. |
172 | | - */ |
173 | | - function splitExt(); |
| 562 | + function __construct( $args ) { |
| 563 | + PPFrame::__construct( ); |
| 564 | + $this->args = $args; |
| 565 | + } |
174 | 566 | |
175 | | - /** |
176 | | - * Split an <h> node |
177 | | - */ |
178 | | - function splitHeading(); |
| 567 | + function __toString() { |
| 568 | + $s = 'cstmframe{'; |
| 569 | + $first = true; |
| 570 | + foreach ( $this->args as $name => $value ) { |
| 571 | + if ( $first ) { |
| 572 | + $first = false; |
| 573 | + } else { |
| 574 | + $s .= ', '; |
| 575 | + } |
| 576 | + $s .= "\"$name\":\"" . |
| 577 | + str_replace( '"', '\\"', $value->__toString() ) . '"'; |
| 578 | + } |
| 579 | + $s .= '}'; |
| 580 | + return $s; |
| 581 | + } |
| 582 | + |
| 583 | + function isEmpty() { |
| 584 | + return !count( $this->args ); |
| 585 | + } |
| 586 | + |
| 587 | + function getArgument( $index ) { |
| 588 | + if ( !isset( $this->args[$index] ) ) { |
| 589 | + return false; |
| 590 | + } |
| 591 | + return $this->args[$index]; |
| 592 | + } |
179 | 593 | } |
| 594 | + |
Index: branches/parser-work/phase3/includes/parser/WikiTextGrammar.xml |
— | — | @@ -0,0 +1,67 @@ |
| 2 | +<?xml version="1.0"?> |
| 3 | +<Grammar rootTag="root" startRule="start"> |
| 4 | + <Sequence name="start" > |
| 5 | + <Choice failSafe="true"> |
| 6 | + <Assignment name="h" tag="(={1,6})" regex="true"> |
| 7 | + <Reference name="endText" /> |
| 8 | + </Assignment> |
| 9 | + </Choice> |
| 10 | + <Reference name="main" /> |
| 11 | + </Sequence> |
| 12 | + <Text name="main"> |
| 13 | + <Assignment name="link" tag="[["> |
| 14 | + <Reference name="endText" var="]]" /> |
| 15 | + </Assignment> |
| 16 | + <Assignment name="h" tag="\n(={1,6})" regex="true"> |
| 17 | + <Reference name="endText" /> |
| 18 | + </Assignment> |
| 19 | + <Assignment name="tplarg" tag="{{{(?!{)" regex="true"> |
| 20 | + <Reference name="tplSeq" var="}}}" /> |
| 21 | + </Assignment> |
| 22 | + <Assignment name="template" tag="{{"> |
| 23 | + <Reference name="tplSeq" var="}}" /> |
| 24 | + </Assignment> |
| 25 | + <Assignment name="comment" tag="\n?(?:<!--.*?(?:-->\n?|$))+" regex="true" /> |
| 26 | + <Assignment name="xmltag" tag="<(\w+)(?= |>)" regex="true"> |
| 27 | + <Sequence> |
| 28 | + <Assignment name="attr"> |
| 29 | + <Reference name="main" /> |
| 30 | + </Assignment> |
| 31 | + <Choice> |
| 32 | + <EndTag tag="/>" /> |
| 33 | + <Assignment name="inner" tag=">"> |
| 34 | + <Reference name="endText" var="</~r>" /> |
| 35 | + </Assignment> |
| 36 | + </Choice> |
| 37 | + </Sequence> |
| 38 | + </Assignment> |
| 39 | + </Text> |
| 40 | + <Sequence name="endText"> |
| 41 | + <Reference name="main" /> |
| 42 | + <EndTag tag="~r" /> |
| 43 | + </Sequence> |
| 44 | + <Sequence name="tplSeq"> |
| 45 | + <Assignment name="title"> |
| 46 | + <Reference name="main" /> |
| 47 | + </Assignment> |
| 48 | + <Reference name="partList" /> |
| 49 | + <EndTag tag="~r" /> |
| 50 | + </Sequence> |
| 51 | + <Choice name="partList" failSafe="true"> |
| 52 | + <Sequence> |
| 53 | + <Assignment name="part" tag="|"> |
| 54 | + <Sequence> |
| 55 | + <Assignment name="first"> |
| 56 | + <Reference name="main" /> |
| 57 | + </Assignment> |
| 58 | + <Choice failSafe="true"> |
| 59 | + <Assignment name="value" tag="="> |
| 60 | + <Reference name="main" /> |
| 61 | + </Assignment> |
| 62 | + </Choice> |
| 63 | + </Sequence> |
| 64 | + </Assignment> |
| 65 | + <Reference name="partList" /> |
| 66 | + </Sequence> |
| 67 | + </Choice> |
| 68 | +</Grammar> |
Index: branches/parser-work/phase3/includes/AutoLoader.php |
— | — | @@ -443,30 +443,10 @@ |
444 | 444 | 'LinkHolderArray' => 'includes/parser/LinkHolderArray.php', |
445 | 445 | 'LinkMarkerReplacer' => 'includes/parser/Parser_LinkHooks.php', |
446 | 446 | 'OnlyIncludeReplacer' => 'includes/parser/Parser.php', |
447 | | - 'PPCustomFrame_Hash' => 'includes/parser/Preprocessor_Hash.php', |
448 | | - 'PPCustomFrame_DOM' => 'includes/parser/Preprocessor_DOM.php', |
449 | | - 'PPDAccum_Hash' => 'includes/parser/Preprocessor_Hash.php', |
450 | | - 'PPDPart_Hash' => 'includes/parser/Preprocessor_Hash.php', |
451 | | - 'PPDStackElement_Hash' => 'includes/parser/Preprocessor_Hash.php', |
452 | | - 'PPDStack_Hash' => 'includes/parser/Preprocessor_Hash.php', |
| 447 | + 'PPCustomFrame' => 'includes/parser/Preprocessor.php', |
453 | 448 | 'PPFrame' => 'includes/parser/Preprocessor.php', |
454 | | - 'PPFrame_DOM' => 'includes/parser/Preprocessor_DOM.php', |
455 | | - 'PPFrame_Hash' => 'includes/parser/Preprocessor_Hash.php', |
456 | | - 'PPNode' => 'includes/parser/Preprocessor.php', |
457 | | - 'PPNode_DOM' => 'includes/parser/Preprocessor_DOM.php', |
458 | | - 'PPNode_Hash_Array' => 'includes/parser/Preprocessor_Hash.php', |
459 | | - 'PPNode_Hash_Attr' => 'includes/parser/Preprocessor_Hash.php', |
460 | | - 'PPNode_Hash_Text' => 'includes/parser/Preprocessor_Hash.php', |
461 | | - 'PPNode_Hash_Tree' => 'includes/parser/Preprocessor_Hash.php', |
462 | | - 'PPTemplateFrame_DOM' => 'includes/parser/Preprocessor_DOM.php', |
463 | | - 'PPTemplateFrame_Hash' => 'includes/parser/Preprocessor_Hash.php', |
464 | | - 'ParseAssign' => 'includes/parser/ParseEngine.php', |
465 | | - 'ParseChoice' => 'includes/parser/ParseEngine.php', |
| 449 | + 'PPTemplateFrame' => 'includes/parser/Preprocessor.php', |
466 | 450 | 'ParseEngine' => 'includes/parser/ParseEngine.php', |
467 | | - 'ParseObject' => 'includes/parser/ParseEngine.php', |
468 | | - 'ParsePattern' => 'includes/parser/ParseEngine.php', |
469 | | - 'ParseQuant' => 'includes/parser/ParseEngine.php', |
470 | | - 'ParseSeq' => 'includes/parser/ParseEngine.php', |
471 | 451 | 'Parser' => 'includes/parser/Parser.php', |
472 | 452 | 'ParserCache' => 'includes/parser/ParserCache.php', |
473 | 453 | 'ParserOptions' => 'includes/parser/ParserOptions.php', |
— | — | @@ -474,8 +454,6 @@ |
475 | 455 | 'Parser_DiffTest' => 'includes/parser/Parser_DiffTest.php', |
476 | 456 | 'Parser_LinkHooks' => 'includes/parser/Parser_LinkHooks.php', |
477 | 457 | 'Preprocessor' => 'includes/parser/Preprocessor.php', |
478 | | - 'Preprocessor_DOM' => 'includes/parser/Preprocessor_DOM.php', |
479 | | - 'Preprocessor_Hash' => 'includes/parser/Preprocessor_Hash.php', |
480 | 458 | 'StripState' => 'includes/parser/Parser.php', |
481 | 459 | 'MWTidy' => 'includes/parser/Tidy.php', |
482 | 460 | |