Index: trunk/extensions/WikiScripts/interpreter/buildLRTables.php |
— | — | @@ -19,7 +19,7 @@ |
20 | 20 | |
21 | 21 | class Grammar { |
22 | 22 | var $mTerminals, $mNonterminals, $mProductions, $mSymbols; |
23 | | - var $mFirst, $mFollow, $mAction, $mGoto; |
| 23 | + var $mFirst, $mFollow, $mAction, $mGoto, $mCanon; |
24 | 24 | |
25 | 25 | private function __construct() { |
26 | 26 | $this->mTerminals = |
— | — | @@ -28,16 +28,25 @@ |
29 | 29 | array(); |
30 | 30 | } |
31 | 31 | |
| 32 | + /** |
| 33 | + * Returns the ID of the nonterminal |
| 34 | + */ |
32 | 35 | private function getNonterminalID( $name ) { |
33 | 36 | if( !in_array( $name, $this->mNonterminals ) ) |
34 | 37 | $this->mNonterminals[] = $name; |
35 | 38 | return array_search( $name, $this->mNonterminals ); |
36 | 39 | } |
37 | 40 | |
| 41 | + /** |
| 42 | + * Adds productions |
| 43 | + */ |
38 | 44 | private function addProduction( $nonterm, $prod ) { |
39 | 45 | $this->mProductions[] = array( $nonterm, $prod ); |
40 | 46 | } |
41 | 47 | |
| 48 | + /** |
| 49 | + * Returns all possible productions for the nonterminal. |
| 50 | + */ |
42 | 51 | private function getProdsForNt( $nonterm ) { |
43 | 52 | $prods = array(); |
44 | 53 | for( $i = 0; $i < count( $this->mProductions ); $i++ ) { |
— | — | @@ -52,12 +61,16 @@ |
53 | 62 | return $this->mNonterminals[$id]; |
54 | 63 | } |
55 | 64 | |
| 65 | + /** |
| 66 | + * Returns the grammar object for a given BNF definition. |
| 67 | + */ |
56 | 68 | public static function parse( $def ) { |
57 | 69 | $g = new Grammar(); |
58 | 70 | $def = strtolower( $def ); |
59 | 71 | $lines = explode( "\n", $def ); |
60 | 72 | for( $i = 1; $i <= count( $lines ); $i++ ) { |
61 | | - $line = trim( $lines[$i - 1] ); |
| 73 | + $line = preg_replace( '/#(.+)/us', '', $lines[$i - 1] ); |
| 74 | + $line = trim( $line ); |
62 | 75 | if( !$line ) |
63 | 76 | continue; |
64 | 77 | |
— | — | @@ -79,13 +92,14 @@ |
80 | 93 | return $g; |
81 | 94 | } |
82 | 95 | |
| 96 | + /** |
| 97 | + * Parses a line in the BNF file. |
| 98 | + */ |
83 | 99 | private static function parseLine( $g, $line, $lnum ) { |
84 | 100 | $i = 0; |
85 | 101 | wfSuppressWarnings(); // @ doesn't help to supress "uninitialized string offset" warning |
86 | 102 | |
87 | 103 | self::skipWhitespace( $line, $i ); |
88 | | - if( $line[$i] == '#' ) |
89 | | - return null; |
90 | 104 | if( $line[$i] != '<' ) |
91 | 105 | die( "Invalid BNF at line $lnum" ); |
92 | 106 | $i++; |
— | — | @@ -129,11 +143,19 @@ |
130 | 144 | return array( $name, $prods ); |
131 | 145 | } |
132 | 146 | |
| 147 | + /** |
| 148 | + * Skips all the whitespace in the line and updates $pos |
| 149 | + */ |
133 | 150 | private static function skipWhitespace( $line, &$pos ) { |
134 | 151 | while( ctype_space( $line[$pos] ) && $pos + 1 < strlen( $line ) ) |
135 | 152 | $pos++; |
136 | 153 | } |
137 | 154 | |
| 155 | + /** |
| 156 | + * Builds the FIRST() table. |
| 157 | + * |
| 158 | + * FIRST( x ) is a set of terminals with which the productions of x may begin. |
| 159 | + */ |
138 | 160 | private function buildFirstTable() { |
139 | 161 | foreach( $this->mSymbols as $symbol ) |
140 | 162 | $this->mFirst[$symbol] = array(); |
— | — | @@ -157,6 +179,11 @@ |
158 | 180 | } |
159 | 181 | } |
160 | 182 | |
| 183 | + /** |
| 184 | + * Builds the FOLLOW() table. |
| 185 | + * |
| 186 | + * FOLLOW( x ) is a set of all terminals that may immediately after x. |
| 187 | + */ |
161 | 188 | private function buildFollowTable() { |
162 | 189 | foreach( $this->mSymbols as $symbol ) |
163 | 190 | $this->mFollow[$symbol] = array(); |
— | — | @@ -191,27 +218,31 @@ |
192 | 219 | } |
193 | 220 | |
194 | 221 | private function itemsClosure( $items ) { |
195 | | - for( ; ; ) { |
196 | | - $oldsize = count( $items ); |
197 | | - foreach( $items as $item ) { |
198 | | - list( $prodid, $idx ) = $item; |
199 | | - list( $unused, $prod ) = $this->mProductions[$prodid]; |
200 | | - if( is_int( @$prod[$idx] ) ) { |
201 | | - foreach( $this->getProdsForNt( $prod[$idx] ) as $id => $newProd ) { |
202 | | - $item = array( $id, 0 ); |
203 | | - if( !in_array( $item, $items ) ) |
204 | | - $items[] = $item; |
| 222 | + $limit = count( $items ); |
| 223 | + |
| 224 | + for( $i = 0; $i < $limit; $i++ ) { |
| 225 | + $item = $items[$i]; |
| 226 | + |
| 227 | + list( $prodid, $idx ) = $item; |
| 228 | + list( $unused, $prod ) = $this->mProductions[$prodid]; |
| 229 | + if( is_int( @$prod[$idx] ) ) { |
| 230 | + foreach( $this->getProdsForNt( $prod[$idx] ) as $id => $newProd ) { |
| 231 | + $item = array( $id, 0 ); |
| 232 | + if( !in_array( $item, $items ) ) { |
| 233 | + $items[] = $item; |
| 234 | + $limit++; |
205 | 235 | } |
206 | 236 | } |
207 | 237 | } |
208 | | - if( count( $items ) == $oldsize ) |
209 | | - return $items; |
210 | 238 | } |
| 239 | + |
| 240 | + return $items; |
211 | 241 | } |
212 | 242 | |
213 | 243 | public function itemsGoto( $items, $symbol ) { |
214 | 244 | if( is_null( $symbol ) ) |
215 | 245 | return array(); |
| 246 | + |
216 | 247 | $result = array(); |
217 | 248 | foreach( $items as $item ) { |
218 | 249 | list( $prodid, $idx ) = $item; |
— | — | @@ -225,18 +256,20 @@ |
226 | 257 | public function buildCanonicalSet() { |
227 | 258 | $r = array( $this->itemsClosure( array( array( 0, 0 ) ) ) ); |
228 | 259 | $symbols = array_merge( $this->mTerminals, array_keys( $this->mNonterminals ) ); |
229 | | - for( ; ; ) { |
230 | | - $oldsize = count( $r ); |
231 | | - foreach( $r as $set ) { |
232 | | - foreach( $symbols as $symbol ) { |
233 | | - $goto = $this->itemsGoto( $set, $symbol ); |
234 | | - if( $goto && !in_array( $goto, $r ) ) |
235 | | - $r[] = $goto; |
| 260 | + |
| 261 | + $limit = 1; |
| 262 | + |
| 263 | + for( $i = 0; $i < $limit; $i++ ) { |
| 264 | + $set = $r[$i]; |
| 265 | + foreach( $symbols as $symbol ) { |
| 266 | + $goto = $this->itemsGoto( $set, $symbol ); |
| 267 | + if( $goto && !in_array( $goto, $r ) ) { |
| 268 | + $r[] = $goto; |
| 269 | + $limit++; |
236 | 270 | } |
237 | 271 | } |
238 | | - if( $oldsize == count( $r ) ) |
239 | | - break; |
240 | 272 | } |
| 273 | + |
241 | 274 | return $r; |
242 | 275 | } |
243 | 276 | |
— | — | @@ -244,6 +277,7 @@ |
245 | 278 | $this->buildFirstTable(); |
246 | 279 | $this->buildFollowTable(); |
247 | 280 | $canonSet = $this->buildCanonicalSet(); |
| 281 | + |
248 | 282 | $actionTable = array(); |
249 | 283 | $gotoTable = array(); |
250 | 284 | for( $i = 0; $i < count( $canonSet ); $i++ ) { |
— | — | @@ -283,8 +317,10 @@ |
284 | 318 | $actionTable[$i] = $row; |
285 | 319 | $gotoTable[$i] = $rowGoto; |
286 | 320 | } |
| 321 | + |
287 | 322 | $this->mAction = $actionTable; |
288 | 323 | $this->mGoto = $gotoTable; |
| 324 | + $this->mCanon = $canonSet; |
289 | 325 | } |
290 | 326 | |
291 | 327 | /** Debug */ |
— | — | @@ -300,18 +336,23 @@ |
301 | 337 | return implode( ' ', $s ); |
302 | 338 | } |
303 | 339 | |
304 | | - public function formatItem( $item ) { |
| 340 | + public function formatItem( $item, $html = false ) { |
305 | 341 | list( $prodid, $idx ) = $item; |
306 | 342 | list( $subj, $val ) = $this->mProductions[$prodid]; |
307 | | - $s = array( $this->getNtName( $subj ), "->" ); |
| 343 | + $subjname = $this->getNtName( $subj ); |
| 344 | + $s = $html ? |
| 345 | + array( "<b>{$subjname}</b>", '→' ) : |
| 346 | + array( $subjname, '->' ); |
308 | 347 | for( $i = 0; $i <= count( $val ); $i++ ) { |
309 | 348 | if( $i == $idx ) |
310 | | - $s[] = '(!)'; |
| 349 | + $s[] = $html ? '•' : '(!)'; |
311 | 350 | if( $symbol = @$val[$i] ) { |
312 | 351 | if( is_string( $symbol ) ) |
313 | | - $s[] = strtoupper( $symbol ); |
| 352 | + $s[] = $html ? $symbol : strtoupper( $symbol ); |
314 | 353 | else |
315 | | - $s[] = $this->getNtName( $symbol ); |
| 354 | + $s[] = $html ? |
| 355 | + '<b>' . $this->getNtName( $symbol ) . '</b>' : |
| 356 | + $this->getNtName( $symbol ); |
316 | 357 | } |
317 | 358 | } |
318 | 359 | return implode( ' ', $s ); |
— | — | @@ -374,7 +415,7 @@ |
375 | 416 | <body> |
376 | 417 | <p>Here is the dump of LR table itself, as well as data used to build it.</p> |
377 | 418 | <p>Navigate: <a href="#first">FIRST()</a> | <a href="#follow">FOLLOW()</a> | <a href="#prods">Productions</a> |
378 | | - | <a href="#table">ACTION/GOTO</a></p> |
| 419 | + | <a href="#canon">Canonical set</a> | <a href="#table">ACTION/GOTO</a></p> |
379 | 420 | END; |
380 | 421 | |
381 | 422 | $s .= "<h1><a name='first' id='first'>FIRST()</h1><table><tr><th>Symbol</th><th>FIRST(Symbol)</th></tr>\n"; |
— | — | @@ -401,30 +442,54 @@ |
402 | 443 | } |
403 | 444 | $s .= "</table>\n"; |
404 | 445 | |
| 446 | + // Find max length of a canonical set |
| 447 | + $max = 1; |
| 448 | + foreach( $this->mCanon as $state ) { |
| 449 | + $max = max( $max, count( $state ) ); |
| 450 | + } |
| 451 | + |
| 452 | + $s .= "<h1><a name='canon' id='canon' />Canonical set</h1>"; |
| 453 | + $s .= "<table><tr><th>State ID</th><th colspan={$max}>Items</th></tr>\n"; |
| 454 | + for( $i = 0; $i < count( $this->mCanon ); $i++ ) { |
| 455 | + $s .= "<tr id='state{$i}'><td><a href='#lrtable{$i}'><b>{$i}</b></a></td>"; |
| 456 | + for( $j = 0; $j < $max; $j++ ) |
| 457 | + $s .= isset( $this->mCanon[$i][$j] ) ? |
| 458 | + "<td style='white-space: nowrap'>" . $this->formatItem( $this->mCanon[$i][$j], true ) . "</li>" : |
| 459 | + "<td></td>"; |
| 460 | + $s .= "</tr>\n"; |
| 461 | + } |
| 462 | + $s .= "</table>\n"; |
| 463 | + |
405 | 464 | $termLen = count( $this->mTerminals ); |
406 | 465 | $nontermLen = count( $this->mNonterminals ); |
407 | | - $s .= "<h1><a name='table' id='action'>LR-table (ACTION/GOTO)</h1><table><tr><th rowspan=2>State ID</th>" . |
| 466 | + $s .= "<h1><a name='table' id='action'>LR-table (ACTION/GOTO)</h1><table><tr><th rowspan=2>State ID</th><th rowspawn=2>State item</th>" . |
408 | 467 | "<th colspan={$termLen}>ACTION</th><th colspan={$nontermLen}>GOTO</th></tr><tr><th>" . |
409 | 468 | implode( '</th><th>', $this->mTerminals ) . '</th><th>' . implode( '</th><th>', array_values( $this->mNonterminals ) ) . "</th></tr>\n"; |
410 | 469 | for( $id = 0; $id < count( $this->mAction ); $id++ ) { |
411 | 470 | $row = $this->mAction[$id]; |
412 | 471 | $goto = $this->mGoto[$id]; |
413 | | - $s .= "\t<tr><td><b>{$id}</b></td>"; |
| 472 | + |
| 473 | + // Output ID |
| 474 | + $s .= "\t<tr id='lrtable{$id}'><td><b><a href='#state{$id}'>{$id}</a></b></td>"; |
| 475 | + |
| 476 | + // Output ACTION |
414 | 477 | foreach( $this->mTerminals as $t ) { |
415 | 478 | $act = @$row[$t]; |
416 | 479 | if( $act ) { |
417 | 480 | switch( $act[0] ) { |
418 | 481 | case 'shift': |
419 | | - $s .= "<td>s{$act[1]}</td>"; break; |
| 482 | + $s .= "<td style='background-color: #FFF0A0'>s{$act[1]}</td>"; break; |
420 | 483 | case 'reduce': |
421 | | - $s .= "<td>r{$act[1]}</td>"; break; |
| 484 | + $s .= "<td style='background-color: #A0B5FF'>r{$act[1]}</td>"; break; |
422 | 485 | case 'accept': |
423 | | - $s .= "<td>acc</td>"; break; |
| 486 | + $s .= "<td style='background-color: #AAFFA0'>acc</td>"; break; |
424 | 487 | } |
425 | 488 | } else { |
426 | 489 | $s .= "<td></td>"; |
427 | 490 | } |
428 | 491 | } |
| 492 | + |
| 493 | + // Output GOTO |
429 | 494 | foreach( $this->mNonterminals as $ntid => $ntname ) { |
430 | 495 | if( isset( $goto[$ntid] ) ) { |
431 | 496 | $s .= "<td>{$goto[$ntid]}</td>"; |