r64972 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64971‎ | r64972 | r64973 >
Date:19:49, 12 April 2010
Author:than4213
Status:deferred
Tags:
Comment:
Add a hook for Before PreSaveTransform runs
Modified paths:
  • /branches/parser-work/phase3/docs/hooks.txt (modified) (history)
  • /branches/parser-work/phase3/includes/parser/ParseEngine.php (deleted) (history)
  • /branches/parser-work/phase3/includes/parser/Parser.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/WikiTextGrammar.xml (deleted) (history)

Diff [purge]

Index: branches/parser-work/phase3/docs/hooks.txt
@@ -541,6 +541,9 @@
542542 &$parser: Parser object
543543 &$ig: ImageGallery object
544544
 545+'BeforePreSaveTransform': before wikitext is prepared for saving
 546+&$text: Text that will be transformed
 547+
545548 'BlockIp': before an IP address or user is blocked
546549 $block: the Block object about to be saved
547550 $user: the user _doing_ the block (not the one being blocked)
Index: branches/parser-work/phase3/includes/parser/ParseEngine.php
@@ -1,244 +0,0 @@
2 -<?php
3 -/**
4 - * Acts as the primary interface between the world and the parser.
5 - * mStartRule - the first rule to use while parsing
6 - * mRules - The list of rules to use while parsing
7 - * mDom - Used to create Dom objects and get's returned at the end of parsing
8 - * mIter - Keeps track of how many times the parser recurses to stop endless loops
9 - */
10 -class ParseEngine {
11 - const maxIter = 2048;
12 - private $mGrammar;
13 -
14 - function __construct($grammarFile) {
15 - global $IP;
16 - $this->mGrammar = new DOMDocument();
17 - if (! $this->mGrammar->load("$IP/$grammarFile", LIBXML_NOBLANKS)) {
18 - throw new MWException("Failed to load $grammarFile.");
19 - }
20 - foreach ($this->mGrammar->documentElement->childNodes as $crrnt) {
21 - $this->pushTags($crrnt, NULL);
22 - }
23 - }
24 -
25 - function parse($text) {
26 - global $wgDebugParserLog;
27 - if ($wgDebugParserLog != '') {
28 - wfErrorLog("==========Start Parsing==========\n", $wgDebugParserLog);
29 - }
30 - $doc = new DOMDocument();
31 - $rule = $this->mGrammar->documentElement;
32 - $rootTag = $doc->createElement($rule->getAttribute("rootTag"));
33 - $xpath = new DOMXPath($this->mGrammar);
34 - $startRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("startRule")}']")->item(0);
35 - $iter = 0;
36 - if (! $this->parseRec($startRule, "", $saveTags, $iter, $text, $rootTag)) {
37 - throw new MWException("Failed to parse the given text.");
38 - }
39 - $doc->appendChild($rootTag);
40 - $doc->normalizeDocument();
41 - if ($wgDebugParserLog != '') {
42 - wfErrorLog("XML - {$doc->saveXML()}\n", $wgDebugParserLog);
43 - }
44 - return $doc;
45 - }
46 -
47 - static function unparse($inNodes) {
48 - $retStr = "";
49 - foreach ($inNodes as $child) {
50 - if ($child instanceof DOMText) {
51 - $retStr .= $child->data;
52 - } else {
53 - $retStr .= $child->getAttribute("tag") . self::unparse($child->childNodes);
54 - }
55 - }
56 - return $retStr;
57 - }
58 -
59 - private function parseRec($rule, $replaceStr, $saveTags, &$iter, &$text, &$outNode) {
60 - global $wgDebugParserLog;
61 - if ($wgDebugParserLog != '') {
62 - wfErrorLog("Entering {$rule->nodeName}, {$rule->getAttribute("name")}\n", $wgDebugParserLog);
63 - }
64 - $iter ++;
65 - if ($iter > ParseEngine::maxIter) {
66 - throw new MWException("Parser iterated too many times. Probable loop in grammar.");
67 - }
68 - if ($rule->nodeName == "Assignment" || $rule->nodeName == "Reference" || $rule->nodeName == "Text") {
69 - $saveTags = str_replace("~r", preg_quote($replaceStr, "/"), $saveTags);
70 - $newTags = $rule->getAttribute("saveTags");
71 - if ($saveTags == "") {
72 - $saveTags = $newTags;
73 - } elseif ($newTags != "") {
74 - $saveTags .= "|" . $newTags;
75 - }
76 - }
77 - $dom = $outNode->ownerDocument;
78 - $retCode = FALSE;
79 - if ($rule->nodeName == "Assignment") {
80 - $tag = $rule->getAttribute("tag");
81 - $foundTag = $tag == NULL;
82 - if (! $foundTag) {
83 - if ($rule->getAttribute("regex") != NULL) {
84 - $tag = str_replace("~r", preg_quote($replaceStr, "/"), $tag);
85 - $foundTag = preg_match("/^$tag/s", $text, $matches);
86 - if ($foundTag) {
87 - $tag = $matches[0];
88 - if (isset($matches[1])) {
89 - $replaceStr = $matches[1];
90 - }
91 - }
92 - } else {
93 - $tag = str_replace("~r", $replaceStr, $tag);
94 - $foundTag = strncmp($tag, $text, strlen($tag)) == 0;
95 - }
96 - }
97 - if ($foundTag) {
98 - $newText = $text;
99 - $newElement = $dom->createElement($rule->getAttribute("tagName"));
100 - if ($tag != NULL) {
101 - $newText = substr($newText, strlen($tag));
102 - $newElement->setAttribute("tag", $tag);
103 - }
104 - $retCode = $rule->firstChild == NULL || $this->parseRec($rule->firstChild, $replaceStr, $saveTags, $iter, $newText, $newElement);
105 - if ($retCode) {
106 - $outNode->appendChild($newElement);
107 - $text = $newText;
108 - }
109 - }
110 - } elseif ($rule->nodeName == "Sequence") {
111 - $saveText = $text;
112 - $saveNode = $outNode->cloneNode(TRUE);
113 - $pushInd = $rule->getAttribute("pushInd");
114 - foreach ($rule->childNodes as $i => $crrnt) {
115 - $pushTags = $i >= $pushInd ? $saveTags : "";
116 - $retCode = $this->parseRec($crrnt, $replaceStr, $pushTags, $iter, $text, $outNode);
117 - if (! $retCode) {
118 - $text = $saveText;
119 - $outNode = $saveNode;
120 - break;
121 - }
122 - }
123 - } elseif ($rule->nodeName == "Choice") {
124 - foreach ($rule->childNodes as $crrnt) {
125 - $retCode = $this->parseRec($crrnt, $replaceStr, $saveTags, $iter, $text, $outNode);
126 - if ($retCode) {
127 - break;
128 - }
129 - }
130 - $retCode |= $rule->getAttribute("failSafe") != NULL;
131 - } elseif ($rule->nodeName == "Reference") {
132 - $newVar = $rule->hasAttribute("var") ? str_replace("~r", $replaceStr, $rule->getAttribute("var")) : $replaceStr;
133 - $xpath = new DOMXPath($this->mGrammar);
134 - $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0);
135 - $retCode = $this->parseRec($refRule, $newVar, $saveTags, $iter, $text, $outNode);
136 - } elseif ($rule->nodeName == "Text") {
137 - $tagSearch = $rule->getAttribute("childTags");
138 - if ($tagSearch == "") {
139 - $tagSearch = $saveTags;
140 - } elseif ($saveTags != "") {
141 - $tagSearch .= "|" . $saveTags;
142 - }
143 - while ($text != "" && ($saveTags == "" || ! preg_match("/^($saveTags)/s", $text))) {
144 - $offset = $rule->firstChild != NULL && $this->parseRec($rule->firstChild, $replaceStr, "", $iter, $text, $outNode) ? 0 : 1;
145 - if (preg_match("/$tagSearch/s", $text, $matches, PREG_OFFSET_CAPTURE, $offset)) {
146 - if ($matches[0][1] > 0) {
147 - $outNode->appendChild($dom->createTextNode(substr($text, 0, $matches[0][1])));
148 - $text = substr($text, $matches[0][1]);
149 - }
150 - } else {
151 - $outNode->appendChild($dom->createTextNode($text));
152 - $text = "";
153 - }
154 - }
155 - $retCode = true;
156 - }
157 - if ($wgDebugParserLog != '') {
158 - wfErrorLog("Exiting {$rule->nodeName}, Return Code - $retCode\n", $wgDebugParserLog);
159 - wfErrorLog("Text - $text\n", $wgDebugParserLog);
160 - }
161 - return $retCode;
162 - }
163 -
164 - private function pushTags($rule, $tagStr) {
165 - $iter = 0;
166 - if ($rule->nodeName == "Sequence") {
167 - $pushInd = $rule->childNodes->length - 1;
168 - $shouldPush = true;
169 - for ($child = $rule->lastChild; $child != NULL; $child = $child->previousSibling) {
170 - $this->pushTags($child, $tagStr);
171 - if ($child->previousSibling != NULL) {
172 - if ($this->pullTags($child, $iter, $childTag)) {
173 - if ($shouldPush) {
174 - $pushInd --;
175 - }
176 - if ($tagStr == "") {
177 - $tagStr = $childTag;
178 - } elseif ($childTag != "") {
179 - $tagStr .= "|" . $childTag;
180 - }
181 - } else {
182 - $shouldPush = false;
183 - $tagStr = $childTag;
184 - }
185 - }
186 - }
187 - $rule->setAttribute("pushInd", $pushInd);
188 - } else {
189 - if ($rule->nodeName != "Choice") {
190 - $rule->setAttribute("saveTags", $tagStr);
191 - $tagStr = NULL;
192 - if ($rule->nodeName == "Text") {
193 - $childTags = "";
194 - foreach ($rule->childNodes as $crrnt) {
195 - if ($childTags != "") {
196 - $childTags .= "|";
197 - }
198 - $this->pullTags($crrnt, $iter, $childTag);
199 - $childTags .= $childTag;
200 - }
201 - $rule->setAttribute("childTags", $childTags);
202 - }
203 - }
204 - foreach ($rule->childNodes as $crrnt) {
205 - $this->pushTags($crrnt, $tagStr);
206 - }
207 - }
208 - }
209 -
210 - private function pullTags($rule, &$iter, &$childTags) {
211 - $iter ++;
212 - if ($iter > ParseEngine::maxIter) {
213 - throw new MWException("Collecter iterated too many times. Probable loop in grammar.");
214 - }
215 - $childTags = "";
216 - $failSafe = TRUE;
217 - if ($rule->nodeName == "Assignment") {
218 - $childTags = $rule->getAttribute("tag");
219 - if ($rule->getAttribute("regex") == NULL) {
220 - $childTags = preg_quote($childTags, "/");
221 - }
222 - $failSafe = FALSE;
223 - } elseif ($rule->nodeName == "Choice" || $rule->nodeName == "Sequence") {
224 - $failSafe = $rule->nodeName == "Sequence";
225 - foreach ($rule->childNodes as $child) {
226 - $failSafe = $this->pullTags($child, $iter, $newTags);
227 - if ($childTags == "") {
228 - $childTags = $newTags;
229 - } elseif ($newTags != "") {
230 - $childTags .= "|" . $newTags;
231 - }
232 - if (($failSafe && $rule->nodeName == "Choice") || (! $failSafe && $rule->nodeName == "Sequence")) {
233 - break;
234 - }
235 - }
236 - $failSafe |= $rule->nodeName == "Choice" && $rule->getAttribute("failSafe") != NULL;
237 - } elseif ($rule->nodeName == "Reference") {
238 - $xpath = new DOMXPath($this->mGrammar);
239 - $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0);
240 - $failSafe = $this->pullTags($refRule, $iter, $childTags);
241 - }
242 - return $failSafe;
243 - }
244 -}
245 -
Index: branches/parser-work/phase3/includes/parser/WikiTextGrammar.xml
@@ -1,145 +0,0 @@
2 -<?xml version="1.0"?>
3 -<Grammar rootTag="root" startRule="start" version="1.0">
4 - <Sequence name="start" >
5 - <Reference name="postNewLine" />
6 - <Reference name="main" />
7 - </Sequence>
8 - <Text name="main">
9 - <Choice>
10 - <Sequence>
11 - <Reference name="newLine" />
12 - <Reference name="postNewLine" />
13 - </Sequence>
14 - <Assignment tagName="link" tag="[[">
15 - <Reference name="endText" var="]]" />
16 - </Assignment>
17 - <Assignment tagName="tplArg" tag="{{{(?!{)" regex="true">
18 - <Sequence>
19 - <Reference name="name" />
20 - <Choice failSafe="true">
21 - <Assignment tagName="default" tag="|">
22 - <Reference name="main" />
23 - </Assignment>
24 - </Choice>
25 - <Assignment tagName="endTag" tag="}}}" />
26 - </Sequence>
27 - </Assignment>
28 - <Assignment tagName="template" tag="{{">
29 - <Sequence>
30 - <Reference name="name" />
31 - <Choice failSafe="true">
32 - <Assignment tagName="name2" tag=":">
33 - <Reference name="main" />
34 - </Assignment>
35 - </Choice>
36 - <Reference name="partList" />
37 - <Assignment tagName="endTag" tag="}}" />
38 - </Sequence>
39 - </Assignment>
40 - <Reference name="comment" />
41 - <Assignment tagName="noWiki" tag="&lt;nowiki>">
42 - <Sequence>
43 - <Text />
44 - <Assignment tagName="endTag" tag="&lt;\/nowiki>" />
45 - </Sequence>
46 - </Assignment>
47 - <Assignment tagName="xmlTag" tag="&lt;(?=(\w+)[\s\/>])" regex="true">
48 - <Sequence>
49 - <Assignment tagName="name" tag="~r" />
50 - <Reference name="attrList" />
51 - <Choice>
52 - <Assignment tagName="endTag" tag="\s*\/>" regex="true" />
53 - <Sequence>
54 - <Assignment tagName="inner" tag="\s*>" regex="true">
55 - <Reference name="main" />
56 - </Assignment>
57 - <Assignment tagName="endTag" tag="&lt;/~r>" />
58 - </Sequence>
59 - </Choice>
60 - </Sequence>
61 - </Assignment>
62 - </Choice>
63 - </Text>
64 - <Sequence name="endText">
65 - <Reference name="main" />
66 - <Assignment tagName="endTag" tag="~r" />
67 - </Sequence>
68 - <Assignment name="newLine" tagName="newLine" tag="\r?\n" regex="true" />
69 - <Assignment name="eol" tagName="eol" tag="(?=\n|$)" regex="true" />
70 - <Choice name="ignoreList" failSafe="true">
71 - <Sequence>
72 - <Choice>
73 - <Assignment tag="[ \t]+" regex="true" />
74 - <Reference name="comment" />
75 - </Choice>
76 - <Reference name="ignoreList" />
77 - </Sequence>
78 - </Choice>
79 - <Choice name="postNewLine" failSafe="true">
80 - <Sequence>
81 - <Assignment tagName="h" tag="(={1,6})" regex="true">
82 - <Reference name="endText" />
83 - </Assignment>
84 - <Reference name="ignoreList" />
85 - <Reference name="eol" />
86 - </Sequence>
87 - <Reference name="listChoice" var="" />
88 - </Choice>
89 - <Choice name="listChoice">
90 - <Assignment tagName="orderedList" tag="(?=(~r#))" regex="true">
91 - <Reference name="itemList" />
92 - </Assignment>
93 - <Assignment tagName="unorderedList" tag="(?=(~r\*))" regex="true">
94 - <Reference name="itemList" />
95 - </Assignment>
96 - </Choice>
97 - <Sequence name="itemList">
98 - <Choice>
99 - <Reference name="listChoice" />
100 - <Assignment tagName="listItem" tag="~r">
101 - <Sequence>
102 - <Reference name="main" />
103 - <Reference name="eol" />
104 - </Sequence>
105 - </Assignment>
106 - </Choice>
107 - <Choice failSafe="true">
108 - <Sequence>
109 - <Reference name="newLine" />
110 - <Reference name="itemList" />
111 - </Sequence>
112 - </Choice>
113 - </Sequence>
114 - <Assignment name="comment" tagName="comment" tag="&lt;!--.*?(?:-->|$)" regex="true" />
115 - <Assignment name="name" tagName="name">
116 - <Reference name="main" />
117 - </Assignment>
118 - <Choice name="partList" failSafe="true">
119 - <Sequence>
120 - <Assignment tagName="part" tag="|">
121 - <Sequence>
122 - <Reference name="name" />
123 - <Choice failSafe="true">
124 - <Assignment tagName="value" tag="=">
125 - <Reference name="main" />
126 - </Assignment>
127 - </Choice>
128 - </Sequence>
129 - </Assignment>
130 - <Reference name="partList" />
131 - </Sequence>
132 - </Choice>
133 - <Choice name="attrList" failSafe="true">
134 - <Sequence>
135 - <Assignment tagName="attribute" tag="\s+(?!\/?>)" regex="true">
136 - <Sequence>
137 - <Reference name="name" />
138 - <Assignment tagName="value" tag="\s*=\s*(&quot;|')" regex="true">
139 - <Reference name="endText" />
140 - </Assignment>
141 - </Sequence>
142 - </Assignment>
143 - <Reference name="attrList" />
144 - </Sequence>
145 - </Choice>
146 -</Grammar>
Index: branches/parser-work/phase3/includes/parser/Parser.php
@@ -3978,6 +3978,8 @@
39793979 $this->clearState();
39803980 }
39813981
 3982+ wfRunHooks( 'BeforePreSaveTransform', array( &$text ) );
 3983+
39823984 $pairs = array(
39833985 "\r\n" => "\n",
39843986 );

Status & tagging log