Index: trunk/extensions/ParserWiki/ParseEngine.php |
— | — | @@ -10,136 +10,136 @@ |
11 | 11 | const maxIter = 4096; |
12 | 12 | private $mGrammar; |
13 | 13 | |
14 | | - function __construct($grammar) { |
15 | | - $xpath = new DOMXPath($grammar); |
16 | | - $rootRules = $xpath->query("/grammar/parseObject | /grammar/rule/parseObject"); |
17 | | - foreach ($rootRules as $child) { |
18 | | - $this->pushTags($child, NULL); |
| 14 | + function __construct( $grammar ) { |
| 15 | + $xpath = new DOMXPath( $grammar ); |
| 16 | + $rootRules = $xpath->query( "/grammar/parseObject | /grammar/rule/parseObject" ); |
| 17 | + foreach ( $rootRules as $child ) { |
| 18 | + $this->pushTags( $child, NULL ); |
19 | 19 | } |
20 | 20 | $this->mGrammar = $grammar; |
21 | 21 | } |
22 | 22 | |
23 | | - function parse($text) { |
24 | | - wfDebugLog("ParseEngine", "==========Start Parse Engine==========\n"); |
25 | | - $xpath = new DOMXPath($this->mGrammar); |
26 | | - $rootAssign = $xpath->query("/grammar/parseObject")->item(0); |
| 23 | + function parse( $text ) { |
| 24 | + wfDebugLog( "ParseEngine", "==========Start Parse Engine==========\n" ); |
| 25 | + $xpath = new DOMXPath( $this->mGrammar ); |
| 26 | + $rootAssign = $xpath->query( "/grammar/parseObject" )->item( 0 ); |
27 | 27 | $doc = new DOMDocument(); |
28 | | - if (! $this->parseRec($rootAssign, "", "", $iter, $text, $doc)) { |
| 28 | + if ( ! $this->parseRec( $rootAssign, "", "", $iter, $text, $doc ) ) { |
29 | 29 | $doc = NULL; |
30 | 30 | } |
31 | 31 | return $doc; |
32 | 32 | } |
33 | 33 | |
34 | | - static function unparse($inNodes) { |
| 34 | + static function unparse( $inNodes ) { |
35 | 35 | $retStr = ""; |
36 | | - foreach ($inNodes as $child) { |
37 | | - if ($child instanceof DOMText) { |
| 36 | + foreach ( $inNodes as $child ) { |
| 37 | + if ( $child instanceof DOMText ) { |
38 | 38 | $retStr .= $child->data; |
39 | 39 | } else { |
40 | | - $retStr .= $child->getAttribute("tag") . self::unparse($child->childNodes); |
| 40 | + $retStr .= $child->getAttribute( "tag" ) . self::unparse( $child->childNodes ); |
41 | 41 | } |
42 | 42 | } |
43 | 43 | return $retStr; |
44 | 44 | } |
45 | 45 | |
46 | | - private function parseRec($rule, $replaceStr, $saveTags, &$iter, &$text, &$outNode) { |
| 46 | + private function parseRec( $rule, $replaceStr, $saveTags, &$iter, &$text, &$outNode ) { |
47 | 47 | $iter ++; |
48 | | - if ($iter > ParseEngine::maxIter) { |
49 | | - throw new MWException("Parser iterated too many times. Probable loop in grammar."); |
| 48 | + if ( $iter > ParseEngine::maxIter ) { |
| 49 | + throw new MWException( "Parser iterated too many times. Probable loop in grammar." ); |
50 | 50 | } |
51 | 51 | $rule = $rule->firstChild; |
52 | | - if ($rule->nodeName == "assignment" || $rule->nodeName == "reference" || $rule->nodeName == "text") { |
53 | | - $saveTags = str_replace("~r", preg_quote($replaceStr, "/"), $saveTags); |
54 | | - $newTags = $rule->getAttribute("saveTags"); |
55 | | - if ($saveTags == "") { |
| 52 | + if ( $rule->nodeName == "assignment" || $rule->nodeName == "reference" || $rule->nodeName == "text" ) { |
| 53 | + $saveTags = str_replace( "~r", preg_quote( $replaceStr, "/" ), $saveTags ); |
| 54 | + $newTags = $rule->getAttribute( "saveTags" ); |
| 55 | + if ( $saveTags == "" ) { |
56 | 56 | $saveTags = $newTags; |
57 | | - } elseif ($newTags != "") { |
| 57 | + } elseif ( $newTags != "" ) { |
58 | 58 | $saveTags .= "|" . $newTags; |
59 | 59 | } |
60 | 60 | } |
61 | 61 | $dom = $outNode->ownerDocument == NULL ? $outNode : $outNode->ownerDocument; |
62 | | - $xpath = new DOMXPath($rule->ownerDocument); |
63 | | - $childRules = $xpath->query("parseObject", $rule); |
| 62 | + $xpath = new DOMXPath( $rule->ownerDocument ); |
| 63 | + $childRules = $xpath->query( "parseObject", $rule ); |
64 | 64 | $retCode = TRUE; |
65 | | - if ($rule->nodeName == "assignment") { |
66 | | - $patterns = $xpath->query("pattern", $rule); |
| 65 | + if ( $rule->nodeName == "assignment" ) { |
| 66 | + $patterns = $xpath->query( "pattern", $rule ); |
67 | 67 | $tag = ""; |
68 | | - if ($patterns->length > 0) { |
69 | | - $pattern = str_replace("~r", $replaceStr, $patterns->item(0)->getAttribute("tag")); |
70 | | - $retCode = preg_match("/^$pattern/s", $text, $matches); |
71 | | - if ($retCode) { |
| 68 | + if ( $patterns->length > 0 ) { |
| 69 | + $pattern = str_replace( "~r", $replaceStr, $patterns->item( 0 )->getAttribute( "tag" ) ); |
| 70 | + $retCode = preg_match( "/^$pattern/s", $text, $matches ); |
| 71 | + if ( $retCode ) { |
72 | 72 | $tag = $matches[0]; |
73 | | - if (isset($matches[1])) { |
| 73 | + if ( isset( $matches[1] ) ) { |
74 | 74 | $replaceStr = $matches[1]; |
75 | 75 | } |
76 | 76 | } |
77 | 77 | } |
78 | | - if ($retCode) { |
| 78 | + if ( $retCode ) { |
79 | 79 | $newText = $text; |
80 | | - $newElement = $dom->createElement($rule->getAttribute("tag")); |
81 | | - if ($tag != "") { |
82 | | - $newText = substr($newText, strlen($tag)); |
83 | | - $newElement->setAttribute("tag", $tag); |
| 80 | + $newElement = $dom->createElement( $rule->getAttribute( "tag" ) ); |
| 81 | + if ( $tag != "" ) { |
| 82 | + $newText = substr( $newText, strlen( $tag ) ); |
| 83 | + $newElement->setAttribute( "tag", $tag ); |
84 | 84 | } |
85 | | - $retCode = $childRules->length <= 0 || $this->parseRec($childRules->item(0), $replaceStr, $saveTags, $iter, $newText, $newElement); |
86 | | - if ($retCode) { |
87 | | - $outNode->appendChild($newElement); |
| 85 | + $retCode = $childRules->length <= 0 || $this->parseRec( $childRules->item( 0 ), $replaceStr, $saveTags, $iter, $newText, $newElement ); |
| 86 | + if ( $retCode ) { |
| 87 | + $outNode->appendChild( $newElement ); |
88 | 88 | $text = $newText; |
89 | 89 | } |
90 | 90 | } |
91 | | - } elseif ($rule->nodeName == "sequence") { |
92 | | - $pushInd = $rule->getAttribute("pushInd"); |
93 | | - if ($pushInd > 0) { |
| 91 | + } elseif ( $rule->nodeName == "sequence" ) { |
| 92 | + $pushInd = $rule->getAttribute( "pushInd" ); |
| 93 | + if ( $pushInd > 0 ) { |
94 | 94 | $saveText = $text; |
95 | | - $saveNode = $outNode->cloneNode(TRUE); |
| 95 | + $saveNode = $outNode->cloneNode( TRUE ); |
96 | 96 | } |
97 | | - foreach ($childRules as $i => $child) { |
| 97 | + foreach ( $childRules as $i => $child ) { |
98 | 98 | $pushTags = $i >= $pushInd ? $saveTags : ""; |
99 | | - $retCode = $this->parseRec($child, $replaceStr, $pushTags, $iter, $text, $outNode); |
100 | | - if (! $retCode) { |
101 | | - if ($i > 0) { |
| 99 | + $retCode = $this->parseRec( $child, $replaceStr, $pushTags, $iter, $text, $outNode ); |
| 100 | + if ( ! $retCode ) { |
| 101 | + if ( $i > 0 ) { |
102 | 102 | $text = $saveText; |
103 | 103 | $outNode = $saveNode; |
104 | 104 | } |
105 | 105 | break; |
106 | 106 | } |
107 | 107 | } |
108 | | - } elseif ($rule->nodeName == "choice") { |
109 | | - foreach ($childRules as $child) { |
110 | | - $retCode = $this->parseRec($child, $replaceStr, $saveTags, $iter, $text, $outNode); |
111 | | - if ($retCode) { |
| 108 | + } elseif ( $rule->nodeName == "choice" ) { |
| 109 | + foreach ( $childRules as $child ) { |
| 110 | + $retCode = $this->parseRec( $child, $replaceStr, $saveTags, $iter, $text, $outNode ); |
| 111 | + if ( $retCode ) { |
112 | 112 | break; |
113 | 113 | } |
114 | 114 | } |
115 | | - $retCode |= $rule->hasAttribute("tag"); |
116 | | - } elseif ($rule->nodeName == "reference") { |
117 | | - $childRule = $rule->getAttribute("tag"); |
118 | | - wfDebugLog("ParseEngine", "Entering $childRule\n"); |
119 | | - $varNode = $xpath->query("pattern", $rule); |
120 | | - if ($varNode->length > 0) { |
121 | | - $replaceStr = str_replace("~r", $replaceStr, $varNode->item(0)->getAttribute("tag")); |
| 115 | + $retCode |= $rule->hasAttribute( "tag" ); |
| 116 | + } elseif ( $rule->nodeName == "reference" ) { |
| 117 | + $childRule = $rule->getAttribute( "tag" ); |
| 118 | + wfDebugLog( "ParseEngine", "Entering $childRule\n" ); |
| 119 | + $varNode = $xpath->query( "pattern", $rule ); |
| 120 | + if ( $varNode->length > 0 ) { |
| 121 | + $replaceStr = str_replace( "~r", $replaceStr, $varNode->item( 0 )->getAttribute( "tag" ) ); |
122 | 122 | } |
123 | | - $refRule = $xpath->query("/grammar/rule[@tag='$childRule']/parseObject")->item(0); |
124 | | - $retCode = $this->parseRec($refRule, $replaceStr, $saveTags, $iter, $text, $outNode); |
125 | | - wfDebugLog("ParseEngine", "Exiting $childRule, Return Code - $retCode\n"); |
126 | | - wfDebugLog("ParseEngine", "text - $text\n"); |
127 | | - } elseif ($rule->nodeName == "text") { |
128 | | - $tagSearch = $rule->getAttribute("childTags"); |
129 | | - if ($tagSearch == "") { |
| 123 | + $refRule = $xpath->query( "/grammar/rule[@tag='$childRule']/parseObject" )->item( 0 ); |
| 124 | + $retCode = $this->parseRec( $refRule, $replaceStr, $saveTags, $iter, $text, $outNode ); |
| 125 | + wfDebugLog( "ParseEngine", "Exiting $childRule, Return Code - $retCode\n" ); |
| 126 | + wfDebugLog( "ParseEngine", "text - $text\n" ); |
| 127 | + } elseif ( $rule->nodeName == "text" ) { |
| 128 | + $tagSearch = $rule->getAttribute( "childTags" ); |
| 129 | + if ( $tagSearch == "" ) { |
130 | 130 | $tagSearch = $saveTags; |
131 | | - } elseif ($saveTags != "") { |
| 131 | + } elseif ( $saveTags != "" ) { |
132 | 132 | $tagSearch .= "|" . $saveTags; |
133 | 133 | } |
134 | | - $childNode = $childRules->length <= 0 ? NULL : $childRules->item(0); |
135 | | - while ($text != "" && ($saveTags == "" || ! preg_match("/^($saveTags)/s", $text))) { |
136 | | - $offset = $childNode != NULL && $this->parseRec($childNode, $replaceStr, "", $iter, $text, $outNode) ? 0 : 1; |
137 | | - if (preg_match("/$tagSearch/s", $text, $matches, PREG_OFFSET_CAPTURE, $offset)) { |
138 | | - if ($matches[0][1] > 0) { |
139 | | - $outNode->appendChild($dom->createTextNode(substr($text, 0, $matches[0][1]))); |
140 | | - $text = substr($text, $matches[0][1]); |
| 134 | + $childNode = $childRules->length <= 0 ? NULL : $childRules->item( 0 ); |
| 135 | + while ( $text != "" && ( $saveTags == "" || ! preg_match( "/^($saveTags)/s", $text ) ) ) { |
| 136 | + $offset = $childNode != NULL && $this->parseRec( $childNode, $replaceStr, "", $iter, $text, $outNode ) ? 0 : 1; |
| 137 | + if ( preg_match( "/$tagSearch/s", $text, $matches, PREG_OFFSET_CAPTURE, $offset ) ) { |
| 138 | + if ( $matches[0][1] > 0 ) { |
| 139 | + $outNode->appendChild( $dom->createTextNode( substr( $text, 0, $matches[0][1] ) ) ); |
| 140 | + $text = substr( $text, $matches[0][1] ); |
141 | 141 | } |
142 | 142 | } else { |
143 | | - $outNode->appendChild($dom->createTextNode($text)); |
| 143 | + $outNode->appendChild( $dom->createTextNode( $text ) ); |
144 | 144 | $text = ""; |
145 | 145 | } |
146 | 146 | } |
— | — | @@ -147,85 +147,85 @@ |
148 | 148 | return $retCode; |
149 | 149 | } |
150 | 150 | |
151 | | - private function pushTags($rule, $tagStr) { |
| 151 | + private function pushTags( $rule, $tagStr ) { |
152 | 152 | $rule = $rule->firstChild; |
153 | | - $xpath = new DOMXPath($rule->ownerDocument); |
154 | | - $childRules = $xpath->query("parseObject", $rule); |
155 | | - if ($rule->nodeName == "sequence") { |
| 153 | + $xpath = new DOMXPath( $rule->ownerDocument ); |
| 154 | + $childRules = $xpath->query( "parseObject", $rule ); |
| 155 | + if ( $rule->nodeName == "sequence" ) { |
156 | 156 | $pushInd = 0; |
157 | | - for ($i = $childRules->length - 1; $i >= 0; $i --) { |
158 | | - $this->pushTags($childRules->item($i), $tagStr); |
159 | | - if ($i > 0) { |
160 | | - if ($this->pullTags($childRules->item($i), $iter, $childTag)) { |
161 | | - if ($tagStr == "") { |
| 157 | + for ( $i = $childRules->length - 1; $i >= 0; $i -- ) { |
| 158 | + $this->pushTags( $childRules->item( $i ), $tagStr ); |
| 159 | + if ( $i > 0 ) { |
| 160 | + if ( $this->pullTags( $childRules->item( $i ), $iter, $childTag ) ) { |
| 161 | + if ( $tagStr == "" ) { |
162 | 162 | $tagStr = $childTag; |
163 | | - } elseif ($childTag != "") { |
| 163 | + } elseif ( $childTag != "" ) { |
164 | 164 | $tagStr .= "|" . $childTag; |
165 | 165 | } |
166 | 166 | } else { |
167 | | - if ($pushInd < $i) { |
| 167 | + if ( $pushInd < $i ) { |
168 | 168 | $pushInd = $i; |
169 | 169 | } |
170 | 170 | $tagStr = $childTag; |
171 | 171 | } |
172 | 172 | } |
173 | 173 | } |
174 | | - $rule->setAttribute("pushInd", $pushInd); |
| 174 | + $rule->setAttribute( "pushInd", $pushInd ); |
175 | 175 | } else { |
176 | | - if ($rule->nodeName != "choice") { |
177 | | - $rule->setAttribute("saveTags", $tagStr); |
| 176 | + if ( $rule->nodeName != "choice" ) { |
| 177 | + $rule->setAttribute( "saveTags", $tagStr ); |
178 | 178 | $tagStr = NULL; |
179 | | - if ($rule->nodeName == "text") { |
| 179 | + if ( $rule->nodeName == "text" ) { |
180 | 180 | $childTags = ""; |
181 | | - foreach ($childRules as $child) { |
182 | | - if ($childTags != "") { |
| 181 | + foreach ( $childRules as $child ) { |
| 182 | + if ( $childTags != "" ) { |
183 | 183 | $childTags .= "|"; |
184 | 184 | } |
185 | | - $this->pullTags($child, $iter, $childTag); |
| 185 | + $this->pullTags( $child, $iter, $childTag ); |
186 | 186 | $childTags .= $childTag; |
187 | 187 | } |
188 | | - $rule->setAttribute("childTags", $childTags); |
| 188 | + $rule->setAttribute( "childTags", $childTags ); |
189 | 189 | } |
190 | 190 | } |
191 | | - foreach ($childRules as $child) { |
192 | | - $this->pushTags($child, $tagStr); |
| 191 | + foreach ( $childRules as $child ) { |
| 192 | + $this->pushTags( $child, $tagStr ); |
193 | 193 | } |
194 | 194 | } |
195 | 195 | } |
196 | 196 | |
197 | | - private function pullTags($rule, &$iter, &$childTags) { |
| 197 | + private function pullTags( $rule, &$iter, &$childTags ) { |
198 | 198 | $iter ++; |
199 | | - if ($iter > ParseEngine::maxIter) { |
200 | | - throw new MWException("Collecter iterated too many times. Probable loop in grammar."); |
| 199 | + if ( $iter > ParseEngine::maxIter ) { |
| 200 | + throw new MWException( "Collecter iterated too many times. Probable loop in grammar." ); |
201 | 201 | } |
202 | 202 | $rule = $rule->firstChild; |
203 | | - $xpath = new DOMXPath($rule->ownerDocument); |
| 203 | + $xpath = new DOMXPath( $rule->ownerDocument ); |
204 | 204 | $childTags = ""; |
205 | 205 | $failSafe = TRUE; |
206 | | - if ($rule->nodeName == "assignment") { |
207 | | - $patterns = $xpath->query("pattern", $rule); |
208 | | - if ($patterns->length > 0) { |
209 | | - $childTags = $patterns->item(0)->getAttribute("tag"); |
| 206 | + if ( $rule->nodeName == "assignment" ) { |
| 207 | + $patterns = $xpath->query( "pattern", $rule ); |
| 208 | + if ( $patterns->length > 0 ) { |
| 209 | + $childTags = $patterns->item( 0 )->getAttribute( "tag" ); |
210 | 210 | } |
211 | 211 | $failSafe = FALSE; |
212 | | - } elseif ($rule->nodeName == "choice" || $rule->nodeName == "sequence") { |
213 | | - $childRules = $xpath->query("parseObject", $rule); |
| 212 | + } elseif ( $rule->nodeName == "choice" || $rule->nodeName == "sequence" ) { |
| 213 | + $childRules = $xpath->query( "parseObject", $rule ); |
214 | 214 | $failSafe = $rule->nodeName == "sequence"; |
215 | | - foreach ($childRules as $child) { |
216 | | - $failSafe = $this->pullTags($child, $iter, $newTags); |
217 | | - if ($childTags == "") { |
| 215 | + foreach ( $childRules as $child ) { |
| 216 | + $failSafe = $this->pullTags( $child, $iter, $newTags ); |
| 217 | + if ( $childTags == "" ) { |
218 | 218 | $childTags = $newTags; |
219 | | - } elseif ($newTags != "") { |
| 219 | + } elseif ( $newTags != "" ) { |
220 | 220 | $childTags .= "|" . $newTags; |
221 | 221 | } |
222 | | - if (($failSafe && $rule->nodeName == "choice") || (! $failSafe && $rule->nodeName == "sequence")) { |
| 222 | + if ( ( $failSafe && $rule->nodeName == "choice" ) || ( ! $failSafe && $rule->nodeName == "sequence" ) ) { |
223 | 223 | break; |
224 | 224 | } |
225 | 225 | } |
226 | | - $failSafe |= $rule->hasAttribute("tag"); |
227 | | - } elseif ($rule->nodeName == "reference") { |
228 | | - $refRule = $xpath->query("/grammar/rule[@tag='{$rule->getAttribute("tag")}']/parseObject")->item(0); |
229 | | - $failSafe = $this->pullTags($refRule, $iter, $childTags); |
| 226 | + $failSafe |= $rule->hasAttribute( "tag" ); |
| 227 | + } elseif ( $rule->nodeName == "reference" ) { |
| 228 | + $refRule = $xpath->query( "/grammar/rule[@tag='{$rule->getAttribute("tag")}']/parseObject" )->item( 0 ); |
| 229 | + $failSafe = $this->pullTags( $refRule, $iter, $childTags ); |
230 | 230 | } |
231 | 231 | return $failSafe; |
232 | 232 | } |
Index: trunk/extensions/ParserWiki/ParserWiki.php |
— | — | @@ -22,10 +22,10 @@ |
23 | 23 | $wgAutoloadClasses["ParseEngine"] = dirname( __FILE__ ) . "/ParseEngine.php"; |
24 | 24 | |
25 | 25 | $wgTheParserWiki = new ParserWiki(); |
26 | | -$wgHooks["ParserBeforeStrip"][] = array($wgTheParserWiki, "callFromParse"); |
| 26 | +$wgHooks["ParserBeforeStrip"][] = array( $wgTheParserWiki, "callFromParse" ); |
27 | 27 | |
28 | | -define ( "NS_GRAMMAR" , 91628); |
29 | | -define ( "NS_GRAMMAR_TALK" , 91629); |
| 28 | +define ( "NS_GRAMMAR" , 91628 ); |
| 29 | +define ( "NS_GRAMMAR_TALK" , 91629 ); |
30 | 30 | $wgExtraNamespaces[NS_GRAMMAR] = "Grammar"; |
31 | 31 | $wgExtraNamespaces[NS_GRAMMAR_TALK] = "Grammar_talk"; |
32 | 32 | |
— | — | @@ -36,20 +36,20 @@ |
37 | 37 | $this->mEngines = array(); |
38 | 38 | } |
39 | 39 | |
40 | | - function callFromParse($unUsed, &$text) { |
| 40 | + function callFromParse( $unUsed, &$text ) { |
41 | 41 | global $wgParserWikiGrammar; |
42 | 42 | $engine = $this->mEngines[$wgParserWikiGrammar]; |
43 | | - if ($engine == NULL) { |
44 | | - $revision = Revision::newFromTitle(Title::newFromText($wgParserWikiGrammar, NS_GRAMMAR)); |
| 43 | + if ( $engine == NULL ) { |
| 44 | + $revision = Revision::newFromTitle( Title::newFromText( $wgParserWikiGrammar, NS_GRAMMAR ) ); |
45 | 45 | $grammar = new DOMDocument(); |
46 | | - if ($revision == NULL || ! $grammar->loadXML($revision->getText(), LIBXML_NOBLANKS)) { |
| 46 | + if ( $revision == NULL || ! $grammar->loadXML( $revision->getText(), LIBXML_NOBLANKS ) ) { |
47 | 47 | return TRUE; |
48 | 48 | } |
49 | | - $engine = new ParseEngine($grammar); |
| 49 | + $engine = new ParseEngine( $grammar ); |
50 | 50 | $this->mEngines[$wgParserWikiGrammar] = $engine; |
51 | 51 | } |
52 | | - $parseTree = $engine->parse($text); |
53 | | - if ($parseTree == NULL) { |
| 52 | + $parseTree = $engine->parse( $text ); |
| 53 | + if ( $parseTree == NULL ) { |
54 | 54 | return TRUE; |
55 | 55 | } |
56 | 56 | $text = $parseTree->saveXML(); |