r94537 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r94536‎ | r94537 | r94538 >
Date:18:15, 15 August 2011
Author:vasilievvv
Status:deferred
Tags:
Comment:
Fix buildLRTables.php:
* Optimize it (from 30 seconds to 1!)
* Add canonical set to the debug output page
* Some formatting
Modified paths:
  • /trunk/extensions/WikiScripts/interpreter/buildLRTables.php (modified) (history)

Diff [purge]

Index: trunk/extensions/WikiScripts/interpreter/buildLRTables.php
@@ -19,7 +19,7 @@
2020
2121 class Grammar {
2222 var $mTerminals, $mNonterminals, $mProductions, $mSymbols;
23 - var $mFirst, $mFollow, $mAction, $mGoto;
 23+ var $mFirst, $mFollow, $mAction, $mGoto, $mCanon;
2424
2525 private function __construct() {
2626 $this->mTerminals =
@@ -28,16 +28,25 @@
2929 array();
3030 }
3131
 32+ /**
 33+ * Returns the ID of the nonterminal
 34+ */
3235 private function getNonterminalID( $name ) {
3336 if( !in_array( $name, $this->mNonterminals ) )
3437 $this->mNonterminals[] = $name;
3538 return array_search( $name, $this->mNonterminals );
3639 }
3740
 41+ /**
 42+ * Adds productions
 43+ */
3844 private function addProduction( $nonterm, $prod ) {
3945 $this->mProductions[] = array( $nonterm, $prod );
4046 }
4147
 48+ /**
 49+ * Returns all possible productions for the nonterminal.
 50+ */
4251 private function getProdsForNt( $nonterm ) {
4352 $prods = array();
4453 for( $i = 0; $i < count( $this->mProductions ); $i++ ) {
@@ -52,12 +61,16 @@
5362 return $this->mNonterminals[$id];
5463 }
5564
 65+ /**
 66+ * Returns the grammar object for a given BNF definition.
 67+ */
5668 public static function parse( $def ) {
5769 $g = new Grammar();
5870 $def = strtolower( $def );
5971 $lines = explode( "\n", $def );
6072 for( $i = 1; $i <= count( $lines ); $i++ ) {
61 - $line = trim( $lines[$i - 1] );
 73+ $line = preg_replace( '/#(.+)/us', '', $lines[$i - 1] );
 74+ $line = trim( $line );
6275 if( !$line )
6376 continue;
6477
@@ -79,13 +92,14 @@
8093 return $g;
8194 }
8295
 96+ /**
 97+ * Parses a line in the BNF file.
 98+ */
8399 private static function parseLine( $g, $line, $lnum ) {
84100 $i = 0;
85101 wfSuppressWarnings(); // @ doesn't help to supress "uninitialized string offset" warning
86102
87103 self::skipWhitespace( $line, $i );
88 - if( $line[$i] == '#' )
89 - return null;
90104 if( $line[$i] != '<' )
91105 die( "Invalid BNF at line $lnum" );
92106 $i++;
@@ -129,11 +143,19 @@
130144 return array( $name, $prods );
131145 }
132146
 147+ /**
 148+ * Skips all the whitespace in the line and updates $pos
 149+ */
133150 private static function skipWhitespace( $line, &$pos ) {
134151 while( ctype_space( $line[$pos] ) && $pos + 1 < strlen( $line ) )
135152 $pos++;
136153 }
137154
 155+ /**
 156+ * Builds the FIRST() table.
 157+ *
 158+ * FIRST( x ) is a set of terminals with which the productions of x may begin.
 159+ */
138160 private function buildFirstTable() {
139161 foreach( $this->mSymbols as $symbol )
140162 $this->mFirst[$symbol] = array();
@@ -157,6 +179,11 @@
158180 }
159181 }
160182
 183+ /**
 184+ * Builds the FOLLOW() table.
 185+ *
 186+ * FOLLOW( x ) is a set of all terminals that may immediately after x.
 187+ */
161188 private function buildFollowTable() {
162189 foreach( $this->mSymbols as $symbol )
163190 $this->mFollow[$symbol] = array();
@@ -191,27 +218,31 @@
192219 }
193220
194221 private function itemsClosure( $items ) {
195 - for( ; ; ) {
196 - $oldsize = count( $items );
197 - foreach( $items as $item ) {
198 - list( $prodid, $idx ) = $item;
199 - list( $unused, $prod ) = $this->mProductions[$prodid];
200 - if( is_int( @$prod[$idx] ) ) {
201 - foreach( $this->getProdsForNt( $prod[$idx] ) as $id => $newProd ) {
202 - $item = array( $id, 0 );
203 - if( !in_array( $item, $items ) )
204 - $items[] = $item;
 222+ $limit = count( $items );
 223+
 224+ for( $i = 0; $i < $limit; $i++ ) {
 225+ $item = $items[$i];
 226+
 227+ list( $prodid, $idx ) = $item;
 228+ list( $unused, $prod ) = $this->mProductions[$prodid];
 229+ if( is_int( @$prod[$idx] ) ) {
 230+ foreach( $this->getProdsForNt( $prod[$idx] ) as $id => $newProd ) {
 231+ $item = array( $id, 0 );
 232+ if( !in_array( $item, $items ) ) {
 233+ $items[] = $item;
 234+ $limit++;
205235 }
206236 }
207237 }
208 - if( count( $items ) == $oldsize )
209 - return $items;
210238 }
 239+
 240+ return $items;
211241 }
212242
213243 public function itemsGoto( $items, $symbol ) {
214244 if( is_null( $symbol ) )
215245 return array();
 246+
216247 $result = array();
217248 foreach( $items as $item ) {
218249 list( $prodid, $idx ) = $item;
@@ -225,18 +256,20 @@
226257 public function buildCanonicalSet() {
227258 $r = array( $this->itemsClosure( array( array( 0, 0 ) ) ) );
228259 $symbols = array_merge( $this->mTerminals, array_keys( $this->mNonterminals ) );
229 - for( ; ; ) {
230 - $oldsize = count( $r );
231 - foreach( $r as $set ) {
232 - foreach( $symbols as $symbol ) {
233 - $goto = $this->itemsGoto( $set, $symbol );
234 - if( $goto && !in_array( $goto, $r ) )
235 - $r[] = $goto;
 260+
 261+ $limit = 1;
 262+
 263+ for( $i = 0; $i < $limit; $i++ ) {
 264+ $set = $r[$i];
 265+ foreach( $symbols as $symbol ) {
 266+ $goto = $this->itemsGoto( $set, $symbol );
 267+ if( $goto && !in_array( $goto, $r ) ) {
 268+ $r[] = $goto;
 269+ $limit++;
236270 }
237271 }
238 - if( $oldsize == count( $r ) )
239 - break;
240272 }
 273+
241274 return $r;
242275 }
243276
@@ -244,6 +277,7 @@
245278 $this->buildFirstTable();
246279 $this->buildFollowTable();
247280 $canonSet = $this->buildCanonicalSet();
 281+
248282 $actionTable = array();
249283 $gotoTable = array();
250284 for( $i = 0; $i < count( $canonSet ); $i++ ) {
@@ -283,8 +317,10 @@
284318 $actionTable[$i] = $row;
285319 $gotoTable[$i] = $rowGoto;
286320 }
 321+
287322 $this->mAction = $actionTable;
288323 $this->mGoto = $gotoTable;
 324+ $this->mCanon = $canonSet;
289325 }
290326
291327 /** Debug */
@@ -300,18 +336,23 @@
301337 return implode( ' ', $s );
302338 }
303339
304 - public function formatItem( $item ) {
 340+ public function formatItem( $item, $html = false ) {
305341 list( $prodid, $idx ) = $item;
306342 list( $subj, $val ) = $this->mProductions[$prodid];
307 - $s = array( $this->getNtName( $subj ), "->" );
 343+ $subjname = $this->getNtName( $subj );
 344+ $s = $html ?
 345+ array( "<b>{$subjname}</b>", '&rarr;' ) :
 346+ array( $subjname, '->' );
308347 for( $i = 0; $i <= count( $val ); $i++ ) {
309348 if( $i == $idx )
310 - $s[] = '(!)';
 349+ $s[] = $html ? '&bullet;' : '(!)';
311350 if( $symbol = @$val[$i] ) {
312351 if( is_string( $symbol ) )
313 - $s[] = strtoupper( $symbol );
 352+ $s[] = $html ? $symbol : strtoupper( $symbol );
314353 else
315 - $s[] = $this->getNtName( $symbol );
 354+ $s[] = $html ?
 355+ '<b>' . $this->getNtName( $symbol ) . '</b>' :
 356+ $this->getNtName( $symbol );
316357 }
317358 }
318359 return implode( ' ', $s );
@@ -374,7 +415,7 @@
375416 <body>
376417 <p>Here is the dump of LR table itself, as well as data used to build it.</p>
377418 <p>Navigate: <a href="#first">FIRST()</a> | <a href="#follow">FOLLOW()</a> | <a href="#prods">Productions</a>
378 - | <a href="#table">ACTION/GOTO</a></p>
 419+ | <a href="#canon">Canonical set</a> | <a href="#table">ACTION/GOTO</a></p>
379420 END;
380421
381422 $s .= "<h1><a name='first' id='first'>FIRST()</h1><table><tr><th>Symbol</th><th>FIRST(Symbol)</th></tr>\n";
@@ -401,30 +442,54 @@
402443 }
403444 $s .= "</table>\n";
404445
 446+ // Find max length of a canonical set
 447+ $max = 1;
 448+ foreach( $this->mCanon as $state ) {
 449+ $max = max( $max, count( $state ) );
 450+ }
 451+
 452+ $s .= "<h1><a name='canon' id='canon' />Canonical set</h1>";
 453+ $s .= "<table><tr><th>State ID</th><th colspan={$max}>Items</th></tr>\n";
 454+ for( $i = 0; $i < count( $this->mCanon ); $i++ ) {
 455+ $s .= "<tr id='state{$i}'><td><a href='#lrtable{$i}'><b>{$i}</b></a></td>";
 456+ for( $j = 0; $j < $max; $j++ )
 457+ $s .= isset( $this->mCanon[$i][$j] ) ?
 458+ "<td style='white-space: nowrap'>" . $this->formatItem( $this->mCanon[$i][$j], true ) . "</li>" :
 459+ "<td></td>";
 460+ $s .= "</tr>\n";
 461+ }
 462+ $s .= "</table>\n";
 463+
405464 $termLen = count( $this->mTerminals );
406465 $nontermLen = count( $this->mNonterminals );
407 - $s .= "<h1><a name='table' id='action'>LR-table (ACTION/GOTO)</h1><table><tr><th rowspan=2>State ID</th>" .
 466+ $s .= "<h1><a name='table' id='action'>LR-table (ACTION/GOTO)</h1><table><tr><th rowspan=2>State ID</th><th rowspawn=2>State item</th>" .
408467 "<th colspan={$termLen}>ACTION</th><th colspan={$nontermLen}>GOTO</th></tr><tr><th>" .
409468 implode( '</th><th>', $this->mTerminals ) . '</th><th>' . implode( '</th><th>', array_values( $this->mNonterminals ) ) . "</th></tr>\n";
410469 for( $id = 0; $id < count( $this->mAction ); $id++ ) {
411470 $row = $this->mAction[$id];
412471 $goto = $this->mGoto[$id];
413 - $s .= "\t<tr><td><b>{$id}</b></td>";
 472+
 473+ // Output ID
 474+ $s .= "\t<tr id='lrtable{$id}'><td><b><a href='#state{$id}'>{$id}</a></b></td>";
 475+
 476+ // Output ACTION
414477 foreach( $this->mTerminals as $t ) {
415478 $act = @$row[$t];
416479 if( $act ) {
417480 switch( $act[0] ) {
418481 case 'shift':
419 - $s .= "<td>s{$act[1]}</td>"; break;
 482+ $s .= "<td style='background-color: #FFF0A0'>s{$act[1]}</td>"; break;
420483 case 'reduce':
421 - $s .= "<td>r{$act[1]}</td>"; break;
 484+ $s .= "<td style='background-color: #A0B5FF'>r{$act[1]}</td>"; break;
422485 case 'accept':
423 - $s .= "<td>acc</td>"; break;
 486+ $s .= "<td style='background-color: #AAFFA0'>acc</td>"; break;
424487 }
425488 } else {
426489 $s .= "<td></td>";
427490 }
428491 }
 492+
 493+ // Output GOTO
429494 foreach( $this->mNonterminals as $ntid => $ntname ) {
430495 if( isset( $goto[$ntid] ) ) {
431496 $s .= "<td>{$goto[$ntid]}</td>";

Status & tagging log