Index: branches/parser-work/phase3/docs/hooks.txt |
— | — | @@ -541,6 +541,9 @@ |
542 | 542 | &$parser: Parser object |
543 | 543 | &$ig: ImageGallery object |
544 | 544 | |
| 545 | +'BeforePreSaveTransform': before wikitext is prepared for saving |
| 546 | +&$text: Text that will be transformed |
| 547 | + |
545 | 548 | 'BlockIp': before an IP address or user is blocked |
546 | 549 | $block: the Block object about to be saved |
547 | 550 | $user: the user _doing_ the block (not the one being blocked) |
Index: branches/parser-work/phase3/includes/parser/ParseEngine.php |
— | — | @@ -1,244 +0,0 @@ |
2 | | -<?php |
3 | | -/** |
4 | | - * Acts as the primary interface between the world and the parser. |
5 | | - * mStartRule - the first rule to use while parsing |
6 | | - * mRules - The list of rules to use while parsing |
7 | | - * mDom - Used to create Dom objects and get's returned at the end of parsing |
8 | | - * mIter - Keeps track of how many times the parser recurses to stop endless loops |
9 | | - */ |
10 | | -class ParseEngine { |
11 | | - const maxIter = 2048; |
12 | | - private $mGrammar; |
13 | | - |
14 | | - function __construct($grammarFile) { |
15 | | - global $IP; |
16 | | - $this->mGrammar = new DOMDocument(); |
17 | | - if (! $this->mGrammar->load("$IP/$grammarFile", LIBXML_NOBLANKS)) { |
18 | | - throw new MWException("Failed to load $grammarFile."); |
19 | | - } |
20 | | - foreach ($this->mGrammar->documentElement->childNodes as $crrnt) { |
21 | | - $this->pushTags($crrnt, NULL); |
22 | | - } |
23 | | - } |
24 | | - |
25 | | - function parse($text) { |
26 | | - global $wgDebugParserLog; |
27 | | - if ($wgDebugParserLog != '') { |
28 | | - wfErrorLog("==========Start Parsing==========\n", $wgDebugParserLog); |
29 | | - } |
30 | | - $doc = new DOMDocument(); |
31 | | - $rule = $this->mGrammar->documentElement; |
32 | | - $rootTag = $doc->createElement($rule->getAttribute("rootTag")); |
33 | | - $xpath = new DOMXPath($this->mGrammar); |
34 | | - $startRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("startRule")}']")->item(0); |
35 | | - $iter = 0; |
36 | | - if (! $this->parseRec($startRule, "", $saveTags, $iter, $text, $rootTag)) { |
37 | | - throw new MWException("Failed to parse the given text."); |
38 | | - } |
39 | | - $doc->appendChild($rootTag); |
40 | | - $doc->normalizeDocument(); |
41 | | - if ($wgDebugParserLog != '') { |
42 | | - wfErrorLog("XML - {$doc->saveXML()}\n", $wgDebugParserLog); |
43 | | - } |
44 | | - return $doc; |
45 | | - } |
46 | | - |
47 | | - static function unparse($inNodes) { |
48 | | - $retStr = ""; |
49 | | - foreach ($inNodes as $child) { |
50 | | - if ($child instanceof DOMText) { |
51 | | - $retStr .= $child->data; |
52 | | - } else { |
53 | | - $retStr .= $child->getAttribute("tag") . self::unparse($child->childNodes); |
54 | | - } |
55 | | - } |
56 | | - return $retStr; |
57 | | - } |
58 | | - |
59 | | - private function parseRec($rule, $replaceStr, $saveTags, &$iter, &$text, &$outNode) { |
60 | | - global $wgDebugParserLog; |
61 | | - if ($wgDebugParserLog != '') { |
62 | | - wfErrorLog("Entering {$rule->nodeName}, {$rule->getAttribute("name")}\n", $wgDebugParserLog); |
63 | | - } |
64 | | - $iter ++; |
65 | | - if ($iter > ParseEngine::maxIter) { |
66 | | - throw new MWException("Parser iterated too many times. Probable loop in grammar."); |
67 | | - } |
68 | | - if ($rule->nodeName == "Assignment" || $rule->nodeName == "Reference" || $rule->nodeName == "Text") { |
69 | | - $saveTags = str_replace("~r", preg_quote($replaceStr, "/"), $saveTags); |
70 | | - $newTags = $rule->getAttribute("saveTags"); |
71 | | - if ($saveTags == "") { |
72 | | - $saveTags = $newTags; |
73 | | - } elseif ($newTags != "") { |
74 | | - $saveTags .= "|" . $newTags; |
75 | | - } |
76 | | - } |
77 | | - $dom = $outNode->ownerDocument; |
78 | | - $retCode = FALSE; |
79 | | - if ($rule->nodeName == "Assignment") { |
80 | | - $tag = $rule->getAttribute("tag"); |
81 | | - $foundTag = $tag == NULL; |
82 | | - if (! $foundTag) { |
83 | | - if ($rule->getAttribute("regex") != NULL) { |
84 | | - $tag = str_replace("~r", preg_quote($replaceStr, "/"), $tag); |
85 | | - $foundTag = preg_match("/^$tag/s", $text, $matches); |
86 | | - if ($foundTag) { |
87 | | - $tag = $matches[0]; |
88 | | - if (isset($matches[1])) { |
89 | | - $replaceStr = $matches[1]; |
90 | | - } |
91 | | - } |
92 | | - } else { |
93 | | - $tag = str_replace("~r", $replaceStr, $tag); |
94 | | - $foundTag = strncmp($tag, $text, strlen($tag)) == 0; |
95 | | - } |
96 | | - } |
97 | | - if ($foundTag) { |
98 | | - $newText = $text; |
99 | | - $newElement = $dom->createElement($rule->getAttribute("tagName")); |
100 | | - if ($tag != NULL) { |
101 | | - $newText = substr($newText, strlen($tag)); |
102 | | - $newElement->setAttribute("tag", $tag); |
103 | | - } |
104 | | - $retCode = $rule->firstChild == NULL || $this->parseRec($rule->firstChild, $replaceStr, $saveTags, $iter, $newText, $newElement); |
105 | | - if ($retCode) { |
106 | | - $outNode->appendChild($newElement); |
107 | | - $text = $newText; |
108 | | - } |
109 | | - } |
110 | | - } elseif ($rule->nodeName == "Sequence") { |
111 | | - $saveText = $text; |
112 | | - $saveNode = $outNode->cloneNode(TRUE); |
113 | | - $pushInd = $rule->getAttribute("pushInd"); |
114 | | - foreach ($rule->childNodes as $i => $crrnt) { |
115 | | - $pushTags = $i >= $pushInd ? $saveTags : ""; |
116 | | - $retCode = $this->parseRec($crrnt, $replaceStr, $pushTags, $iter, $text, $outNode); |
117 | | - if (! $retCode) { |
118 | | - $text = $saveText; |
119 | | - $outNode = $saveNode; |
120 | | - break; |
121 | | - } |
122 | | - } |
123 | | - } elseif ($rule->nodeName == "Choice") { |
124 | | - foreach ($rule->childNodes as $crrnt) { |
125 | | - $retCode = $this->parseRec($crrnt, $replaceStr, $saveTags, $iter, $text, $outNode); |
126 | | - if ($retCode) { |
127 | | - break; |
128 | | - } |
129 | | - } |
130 | | - $retCode |= $rule->getAttribute("failSafe") != NULL; |
131 | | - } elseif ($rule->nodeName == "Reference") { |
132 | | - $newVar = $rule->hasAttribute("var") ? str_replace("~r", $replaceStr, $rule->getAttribute("var")) : $replaceStr; |
133 | | - $xpath = new DOMXPath($this->mGrammar); |
134 | | - $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0); |
135 | | - $retCode = $this->parseRec($refRule, $newVar, $saveTags, $iter, $text, $outNode); |
136 | | - } elseif ($rule->nodeName == "Text") { |
137 | | - $tagSearch = $rule->getAttribute("childTags"); |
138 | | - if ($tagSearch == "") { |
139 | | - $tagSearch = $saveTags; |
140 | | - } elseif ($saveTags != "") { |
141 | | - $tagSearch .= "|" . $saveTags; |
142 | | - } |
143 | | - while ($text != "" && ($saveTags == "" || ! preg_match("/^($saveTags)/s", $text))) { |
144 | | - $offset = $rule->firstChild != NULL && $this->parseRec($rule->firstChild, $replaceStr, "", $iter, $text, $outNode) ? 0 : 1; |
145 | | - if (preg_match("/$tagSearch/s", $text, $matches, PREG_OFFSET_CAPTURE, $offset)) { |
146 | | - if ($matches[0][1] > 0) { |
147 | | - $outNode->appendChild($dom->createTextNode(substr($text, 0, $matches[0][1]))); |
148 | | - $text = substr($text, $matches[0][1]); |
149 | | - } |
150 | | - } else { |
151 | | - $outNode->appendChild($dom->createTextNode($text)); |
152 | | - $text = ""; |
153 | | - } |
154 | | - } |
155 | | - $retCode = true; |
156 | | - } |
157 | | - if ($wgDebugParserLog != '') { |
158 | | - wfErrorLog("Exiting {$rule->nodeName}, Return Code - $retCode\n", $wgDebugParserLog); |
159 | | - wfErrorLog("Text - $text\n", $wgDebugParserLog); |
160 | | - } |
161 | | - return $retCode; |
162 | | - } |
163 | | - |
164 | | - private function pushTags($rule, $tagStr) { |
165 | | - $iter = 0; |
166 | | - if ($rule->nodeName == "Sequence") { |
167 | | - $pushInd = $rule->childNodes->length - 1; |
168 | | - $shouldPush = true; |
169 | | - for ($child = $rule->lastChild; $child != NULL; $child = $child->previousSibling) { |
170 | | - $this->pushTags($child, $tagStr); |
171 | | - if ($child->previousSibling != NULL) { |
172 | | - if ($this->pullTags($child, $iter, $childTag)) { |
173 | | - if ($shouldPush) { |
174 | | - $pushInd --; |
175 | | - } |
176 | | - if ($tagStr == "") { |
177 | | - $tagStr = $childTag; |
178 | | - } elseif ($childTag != "") { |
179 | | - $tagStr .= "|" . $childTag; |
180 | | - } |
181 | | - } else { |
182 | | - $shouldPush = false; |
183 | | - $tagStr = $childTag; |
184 | | - } |
185 | | - } |
186 | | - } |
187 | | - $rule->setAttribute("pushInd", $pushInd); |
188 | | - } else { |
189 | | - if ($rule->nodeName != "Choice") { |
190 | | - $rule->setAttribute("saveTags", $tagStr); |
191 | | - $tagStr = NULL; |
192 | | - if ($rule->nodeName == "Text") { |
193 | | - $childTags = ""; |
194 | | - foreach ($rule->childNodes as $crrnt) { |
195 | | - if ($childTags != "") { |
196 | | - $childTags .= "|"; |
197 | | - } |
198 | | - $this->pullTags($crrnt, $iter, $childTag); |
199 | | - $childTags .= $childTag; |
200 | | - } |
201 | | - $rule->setAttribute("childTags", $childTags); |
202 | | - } |
203 | | - } |
204 | | - foreach ($rule->childNodes as $crrnt) { |
205 | | - $this->pushTags($crrnt, $tagStr); |
206 | | - } |
207 | | - } |
208 | | - } |
209 | | - |
210 | | - private function pullTags($rule, &$iter, &$childTags) { |
211 | | - $iter ++; |
212 | | - if ($iter > ParseEngine::maxIter) { |
213 | | - throw new MWException("Collecter iterated too many times. Probable loop in grammar."); |
214 | | - } |
215 | | - $childTags = ""; |
216 | | - $failSafe = TRUE; |
217 | | - if ($rule->nodeName == "Assignment") { |
218 | | - $childTags = $rule->getAttribute("tag"); |
219 | | - if ($rule->getAttribute("regex") == NULL) { |
220 | | - $childTags = preg_quote($childTags, "/"); |
221 | | - } |
222 | | - $failSafe = FALSE; |
223 | | - } elseif ($rule->nodeName == "Choice" || $rule->nodeName == "Sequence") { |
224 | | - $failSafe = $rule->nodeName == "Sequence"; |
225 | | - foreach ($rule->childNodes as $child) { |
226 | | - $failSafe = $this->pullTags($child, $iter, $newTags); |
227 | | - if ($childTags == "") { |
228 | | - $childTags = $newTags; |
229 | | - } elseif ($newTags != "") { |
230 | | - $childTags .= "|" . $newTags; |
231 | | - } |
232 | | - if (($failSafe && $rule->nodeName == "Choice") || (! $failSafe && $rule->nodeName == "Sequence")) { |
233 | | - break; |
234 | | - } |
235 | | - } |
236 | | - $failSafe |= $rule->nodeName == "Choice" && $rule->getAttribute("failSafe") != NULL; |
237 | | - } elseif ($rule->nodeName == "Reference") { |
238 | | - $xpath = new DOMXPath($this->mGrammar); |
239 | | - $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0); |
240 | | - $failSafe = $this->pullTags($refRule, $iter, $childTags); |
241 | | - } |
242 | | - return $failSafe; |
243 | | - } |
244 | | -} |
245 | | - |
Index: branches/parser-work/phase3/includes/parser/WikiTextGrammar.xml |
— | — | @@ -1,145 +0,0 @@ |
2 | | -<?xml version="1.0"?> |
3 | | -<Grammar rootTag="root" startRule="start" version="1.0"> |
4 | | - <Sequence name="start" > |
5 | | - <Reference name="postNewLine" /> |
6 | | - <Reference name="main" /> |
7 | | - </Sequence> |
8 | | - <Text name="main"> |
9 | | - <Choice> |
10 | | - <Sequence> |
11 | | - <Reference name="newLine" /> |
12 | | - <Reference name="postNewLine" /> |
13 | | - </Sequence> |
14 | | - <Assignment tagName="link" tag="[["> |
15 | | - <Reference name="endText" var="]]" /> |
16 | | - </Assignment> |
17 | | - <Assignment tagName="tplArg" tag="{{{(?!{)" regex="true"> |
18 | | - <Sequence> |
19 | | - <Reference name="name" /> |
20 | | - <Choice failSafe="true"> |
21 | | - <Assignment tagName="default" tag="|"> |
22 | | - <Reference name="main" /> |
23 | | - </Assignment> |
24 | | - </Choice> |
25 | | - <Assignment tagName="endTag" tag="}}}" /> |
26 | | - </Sequence> |
27 | | - </Assignment> |
28 | | - <Assignment tagName="template" tag="{{"> |
29 | | - <Sequence> |
30 | | - <Reference name="name" /> |
31 | | - <Choice failSafe="true"> |
32 | | - <Assignment tagName="name2" tag=":"> |
33 | | - <Reference name="main" /> |
34 | | - </Assignment> |
35 | | - </Choice> |
36 | | - <Reference name="partList" /> |
37 | | - <Assignment tagName="endTag" tag="}}" /> |
38 | | - </Sequence> |
39 | | - </Assignment> |
40 | | - <Reference name="comment" /> |
41 | | - <Assignment tagName="noWiki" tag="<nowiki>"> |
42 | | - <Sequence> |
43 | | - <Text /> |
44 | | - <Assignment tagName="endTag" tag="<\/nowiki>" /> |
45 | | - </Sequence> |
46 | | - </Assignment> |
47 | | - <Assignment tagName="xmlTag" tag="<(?=(\w+)[\s\/>])" regex="true"> |
48 | | - <Sequence> |
49 | | - <Assignment tagName="name" tag="~r" /> |
50 | | - <Reference name="attrList" /> |
51 | | - <Choice> |
52 | | - <Assignment tagName="endTag" tag="\s*\/>" regex="true" /> |
53 | | - <Sequence> |
54 | | - <Assignment tagName="inner" tag="\s*>" regex="true"> |
55 | | - <Reference name="main" /> |
56 | | - </Assignment> |
57 | | - <Assignment tagName="endTag" tag="</~r>" /> |
58 | | - </Sequence> |
59 | | - </Choice> |
60 | | - </Sequence> |
61 | | - </Assignment> |
62 | | - </Choice> |
63 | | - </Text> |
64 | | - <Sequence name="endText"> |
65 | | - <Reference name="main" /> |
66 | | - <Assignment tagName="endTag" tag="~r" /> |
67 | | - </Sequence> |
68 | | - <Assignment name="newLine" tagName="newLine" tag="\r?\n" regex="true" /> |
69 | | - <Assignment name="eol" tagName="eol" tag="(?=\n|$)" regex="true" /> |
70 | | - <Choice name="ignoreList" failSafe="true"> |
71 | | - <Sequence> |
72 | | - <Choice> |
73 | | - <Assignment tag="[ \t]+" regex="true" /> |
74 | | - <Reference name="comment" /> |
75 | | - </Choice> |
76 | | - <Reference name="ignoreList" /> |
77 | | - </Sequence> |
78 | | - </Choice> |
79 | | - <Choice name="postNewLine" failSafe="true"> |
80 | | - <Sequence> |
81 | | - <Assignment tagName="h" tag="(={1,6})" regex="true"> |
82 | | - <Reference name="endText" /> |
83 | | - </Assignment> |
84 | | - <Reference name="ignoreList" /> |
85 | | - <Reference name="eol" /> |
86 | | - </Sequence> |
87 | | - <Reference name="listChoice" var="" /> |
88 | | - </Choice> |
89 | | - <Choice name="listChoice"> |
90 | | - <Assignment tagName="orderedList" tag="(?=(~r#))" regex="true"> |
91 | | - <Reference name="itemList" /> |
92 | | - </Assignment> |
93 | | - <Assignment tagName="unorderedList" tag="(?=(~r\*))" regex="true"> |
94 | | - <Reference name="itemList" /> |
95 | | - </Assignment> |
96 | | - </Choice> |
97 | | - <Sequence name="itemList"> |
98 | | - <Choice> |
99 | | - <Reference name="listChoice" /> |
100 | | - <Assignment tagName="listItem" tag="~r"> |
101 | | - <Sequence> |
102 | | - <Reference name="main" /> |
103 | | - <Reference name="eol" /> |
104 | | - </Sequence> |
105 | | - </Assignment> |
106 | | - </Choice> |
107 | | - <Choice failSafe="true"> |
108 | | - <Sequence> |
109 | | - <Reference name="newLine" /> |
110 | | - <Reference name="itemList" /> |
111 | | - </Sequence> |
112 | | - </Choice> |
113 | | - </Sequence> |
114 | | - <Assignment name="comment" tagName="comment" tag="<!--.*?(?:-->|$)" regex="true" /> |
115 | | - <Assignment name="name" tagName="name"> |
116 | | - <Reference name="main" /> |
117 | | - </Assignment> |
118 | | - <Choice name="partList" failSafe="true"> |
119 | | - <Sequence> |
120 | | - <Assignment tagName="part" tag="|"> |
121 | | - <Sequence> |
122 | | - <Reference name="name" /> |
123 | | - <Choice failSafe="true"> |
124 | | - <Assignment tagName="value" tag="="> |
125 | | - <Reference name="main" /> |
126 | | - </Assignment> |
127 | | - </Choice> |
128 | | - </Sequence> |
129 | | - </Assignment> |
130 | | - <Reference name="partList" /> |
131 | | - </Sequence> |
132 | | - </Choice> |
133 | | - <Choice name="attrList" failSafe="true"> |
134 | | - <Sequence> |
135 | | - <Assignment tagName="attribute" tag="\s+(?!\/?>)" regex="true"> |
136 | | - <Sequence> |
137 | | - <Reference name="name" /> |
138 | | - <Assignment tagName="value" tag="\s*=\s*("|')" regex="true"> |
139 | | - <Reference name="endText" /> |
140 | | - </Assignment> |
141 | | - </Sequence> |
142 | | - </Assignment> |
143 | | - <Reference name="attrList" /> |
144 | | - </Sequence> |
145 | | - </Choice> |
146 | | -</Grammar> |
Index: branches/parser-work/phase3/includes/parser/Parser.php |
— | — | @@ -3978,6 +3978,8 @@ |
3979 | 3979 | $this->clearState(); |
3980 | 3980 | } |
3981 | 3981 | |
| 3982 | + wfRunHooks( 'BeforePreSaveTransform', array( &$text ) ); |
| 3983 | + |
3982 | 3984 | $pairs = array( |
3983 | 3985 | "\r\n" => "\n", |
3984 | 3986 | ); |