r52056 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r52055‎ | r52056 | r52057 >
Date:17:20, 17 June 2009
Author:catrope
Status:ok
Tags:
Comment:
Moving ConfEditor class from switch-master to core
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/ConfEditor.php (added) (history)

Diff [purge]

Index: trunk/phase3/includes/AutoLoader.php
@@ -32,6 +32,7 @@
3333 'ChangeTags' => 'includes/ChangeTags.php',
3434 'ChannelFeed' => 'includes/Feed.php',
3535 'ConcatenatedGzipHistoryBlob' => 'includes/HistoryBlob.php',
 36+ 'ConfEditor' => 'includes/ConfEditor.php',
3637 'ConstantDependency' => 'includes/CacheDependency.php',
3738 'CreativeCommonsRdf' => 'includes/Metadata.php',
3839 'Credits' => 'includes/Credits.php',
Index: trunk/phase3/includes/ConfEditor.php
@@ -0,0 +1,1053 @@
 2+<?php
 3+
 4+/**
 5+ * This is a state machine style parser with two internal stacks:
 6+ * * A next state stack, which determines the state the machine will progress to next
 7+ * * A path stack, which keeps track of the logical location in the file.
 8+ *
 9+ * Reference grammar:
 10+ *
 11+ * file = T_OPEN_TAG *statement
 12+ * statement = T_VARIABLE "=" expression ";"
 13+ * expression = array / scalar / T_VARIABLE
 14+ * array = T_ARRAY "(" [ element *( "," element ) [ "," ] ] ")"
 15+ * element = assoc-element / expression
 16+ * assoc-element = scalar T_DOUBLE_ARROW expression
 17+ * scalar = T_LNUMBER / T_DNUMBER / T_STRING / T_CONSTANT_ENCAPSED_STRING
 18+ */
 19+class ConfEditor {
 20+ /** The text to parse */
 21+ var $text;
 22+
 23+ /** The token array from token_get_all() */
 24+ var $tokens;
 25+
 26+ /** The current position in the token array */
 27+ var $pos;
 28+
 29+ /** The current 1-based line number */
 30+ var $lineNum;
 31+
 32+ /** The current 1-based column number */
 33+ var $colNum;
 34+
 35+ /** The current 0-based byte number */
 36+ var $byteNum;
 37+
 38+ /** The current ConfEditorToken object */
 39+ var $currentToken;
 40+
 41+ /** The previous ConfEditorToken object */
 42+ var $prevToken;
 43+
 44+ /**
 45+ * The state machine stack. This is an array of strings where the topmost
 46+ * element will be popped off and become the next parser state.
 47+ */
 48+ var $stateStack;
 49+
 50+
 51+ /**
 52+ * The path stack is a stack of associative arrays with the following elements:
 53+ * name The name of top level of the path
 54+ * level The level (number of elements) of the path
 55+ * startByte The byte offset of the start of the path
 56+ * startToken The token offset of the start
 57+ * endByte The byte offset of thee
 58+ * endToken The token offset of the end, plus one
 59+ * valueStartToken The start token offset of the value part
 60+ * valueStartByte The start byte offset of the value part
 61+ * valueEndToken The end token offset of the value part, plus one
 62+ * valueEndByte The end byte offset of the value part, plus one
 63+ * nextArrayIndex The next numeric array index at this level
 64+ * hasComma True if the array element ends with a comma
 65+ * arrowByte The byte offset of the "=>", or false if there isn't one
 66+ */
 67+ var $pathStack;
 68+
 69+ /**
 70+ * The elements of the top of the pathStack for every path encountered, indexed
 71+ * by slash-separated path.
 72+ */
 73+ var $pathInfo;
 74+
 75+ /**
 76+ * Next serial number for whitespace placeholder paths (@extra-N)
 77+ */
 78+ var $serial;
 79+
 80+ /**
 81+ * Editor state. This consists of the internal copy/insert operations which
 82+ * are applied to the source string to obtain the destination string.
 83+ */
 84+ var $edits;
 85+
 86+ /**
 87+ * Simple entry point for command-line testing
 88+ */
 89+ static function test( $text ) {
 90+ try {
 91+ $ce = new self( $text );
 92+ $ce->parse();
 93+ } catch ( ConfEditorParseError $e ) {
 94+ return $e->getMessage() . "\n" . $e->highlight( $text );
 95+ }
 96+ return "OK";
 97+ }
 98+
 99+ /**
 100+ * Construct a new parser
 101+ */
 102+ public function __construct( $text ) {
 103+ $this->text = $text;
 104+ }
 105+
 106+ /**
 107+ * Edit the text. Returns the edited text.
 108+ * @param array $ops Array of operations.
 109+ *
 110+ * Operations are given as an associative array, with members:
 111+ * type: One of delete, set, append or insert (required)
 112+ * path: The path to operate on (required)
 113+ * key: The array key to insert/append, with PHP quotes
 114+ * value: The value, with PHP quotes
 115+ *
 116+ * delete
 117+ * Deletes an array element or statement with the specified path.
 118+ * e.g.
 119+ * array('type' => 'delete', 'path' => '$foo/bar/baz' )
 120+ * is equivalent to the runtime PHP code:
 121+ * unset( $foo['bar']['baz'] );
 122+ *
 123+ * set
 124+ * Sets the value of an array element. If the element doesn't exist, it
 125+ * is appended to the array. If it does exist, the value is set, with
 126+ * comments and indenting preserved.
 127+ *
 128+ * append
 129+ * Appends a new element to the end of the array. Adds a trailing comma.
 130+ * e.g.
 131+ * array( 'type' => 'append', 'path', '$foo/bar',
 132+ * 'key' => 'baz', 'value' => "'x'" )
 133+ * is like the PHP code:
 134+ * $foo['bar']['baz'] = 'x';
 135+ *
 136+ * insert
 137+ * Insert a new element at the start of the array.
 138+ *
 139+ */
 140+ public function edit( $ops ) {
 141+ $this->parse();
 142+
 143+ $this->edits = array(
 144+ array( 'copy', 0, strlen( $this->text ) )
 145+ );
 146+ foreach ( $ops as $op ) {
 147+ $type = $op['type'];
 148+ $path = $op['path'];
 149+ $value = isset( $op['value'] ) ? $op['value'] : null;
 150+ $key = isset( $op['key'] ) ? $op['key'] : null;
 151+
 152+ switch ( $type ) {
 153+ case 'delete':
 154+ list( $start, $end ) = $this->findDeletionRegion( $path );
 155+ $this->replaceSourceRegion( $start, $end, false );
 156+ break;
 157+ case 'set':
 158+ if ( isset( $this->pathInfo[$path] ) ) {
 159+ list( $start, $end ) = $this->findValueRegion( $path );
 160+ $encValue = $value; // var_export( $value, true );
 161+ $this->replaceSourceRegion( $start, $end, $encValue );
 162+ break;
 163+ }
 164+ // No existing path, fall through to append
 165+ $slashPos = strrpos( $path, '/' );
 166+ $key = var_export( substr( $path, $slashPos + 1 ), true );
 167+ $path = substr( $path, 0, $slashPos );
 168+ // Fall through
 169+ case 'append':
 170+ // Find the last array element
 171+ $lastEltPath = $this->findLastArrayElement( $path );
 172+ if ( $lastEltPath === false ) {
 173+ throw new MWException( "Can't find any element of array \"$path\"" );
 174+ }
 175+ $lastEltInfo = $this->pathInfo[$lastEltPath];
 176+
 177+ // Has it got a comma already?
 178+ if ( strpos( $lastEltPath, '@extra' ) === false && !$lastEltInfo['hasComma'] ) {
 179+ // No comma, insert one after the value region
 180+ list( $start, $end ) = $this->findValueRegion( $lastEltPath );
 181+ $this->replaceSourceRegion( $end - 1, $end - 1, ',' );
 182+ }
 183+
 184+ // Make the text to insert
 185+ list( $start, $end ) = $this->findDeletionRegion( $lastEltPath );
 186+
 187+ if ( $key === null ) {
 188+ list( $indent, $arrowIndent ) = $this->getIndent( $start );
 189+ $textToInsert = "$indent$value,";
 190+ } else {
 191+ list( $indent, $arrowIndent ) =
 192+ $this->getIndent( $start, $key, $lastEltInfo['arrowByte'] );
 193+ $textToInsert = "$indent$key$arrowIndent=> $value,";
 194+ }
 195+ $textToInsert .= ( $indent === false ? ' ' : "\n" );
 196+
 197+ // Insert the item
 198+ $this->replaceSourceRegion( $end, $end, $textToInsert );
 199+ break;
 200+ case 'insert':
 201+ // Find first array element
 202+ $firstEltPath = $this->findFirstArrayElement( $path );
 203+ if ( $firstEltPath === false ) {
 204+ throw new MWException( "Can't find array element of \"$path\"" );
 205+ }
 206+ list( $start, $end ) = $this->findDeletionRegion( $firstEltPath );
 207+ $info = $this->pathInfo[$firstEltPath];
 208+
 209+ // Make the text to insert
 210+ if ( $key === null ) {
 211+ list( $indent, $arrowIndent ) = $this->getIndent( $start );
 212+ $textToInsert = "$indent$value,";
 213+ } else {
 214+ list( $indent, $arrowIndent ) =
 215+ $this->getIndent( $start, $key, $info['arrowByte'] );
 216+ $textToInsert = "$indent$key$arrowIndent=> $value,";
 217+ }
 218+ $textToInsert .= ( $indent === false ? ' ' : "\n" );
 219+
 220+ // Insert the item
 221+ $this->replaceSourceRegion( $start, $start, $textToInsert );
 222+ break;
 223+ default:
 224+ throw new MWException( "Unrecognised operation: \"$type\"" );
 225+ }
 226+ }
 227+
 228+ // Do the edits
 229+ $out = '';
 230+ foreach ( $this->edits as $edit ) {
 231+ if ( $edit[0] == 'copy' ) {
 232+ $out .= substr( $this->text, $edit[1], $edit[2] - $edit[1] );
 233+ } else { // if ( $edit[0] == 'insert' )
 234+ $out .= $edit[1];
 235+ }
 236+ }
 237+
 238+ // Do a second parse as a sanity check
 239+ $this->text = $out;
 240+ try {
 241+ $this->parse();
 242+ } catch ( ConfEditorParseError $e ) {
 243+ throw new MWException(
 244+ "Sorry, ConfEditor broke the file during editing and it won't parse anymore: " .
 245+ $e->getMessage() );
 246+ }
 247+ return $out;
 248+ }
 249+
 250+ /**
 251+ * Get the variables defined in the text
 252+ * @return array( varname => value )
 253+ */
 254+ function getVars() {
 255+ $vars = array();
 256+ $this->parse();
 257+ foreach( $this->pathInfo as $path => $data ) {
 258+ if ( $path[0] != '$' )
 259+ continue;
 260+ $trimmedPath = substr( $path, 1 );
 261+ $name = $data['name'];
 262+ if ( $name[0] == '@' )
 263+ continue;
 264+ if ( $name[0] == '$' )
 265+ $name = substr( $name, 1 );
 266+ $parentPath = substr( $trimmedPath, 0,
 267+ strlen( $trimmedPath ) - strlen( $name ) );
 268+ if( substr( $parentPath, -1 ) == '/' )
 269+ $parentPath = substr( $parentPath, 0, -1 );
 270+
 271+ $value = substr( $this->text, $data['valueStartByte'],
 272+ $data['valueEndByte'] - $data['valueStartByte']
 273+ );
 274+ $this->setVar( $vars, $parentPath, $name,
 275+ $this->parseScalar( $value ) );
 276+ }
 277+ return $vars;
 278+ }
 279+
 280+ /**
 281+ * Set a value in an array, unless it's set already. For instance,
 282+ * setVar( $arr, 'foo/bar', 'baz', 3 ); will set
 283+ * $arr['foo']['bar']['baz'] = 3;
 284+ * @param $array array
 285+ * @param $path string slash-delimited path
 286+ * @param $key mixed Key
 287+ * @param $value mixed Value
 288+ */
 289+ function setVar( &$array, $path, $key, $value ) {
 290+ $pathArr = explode( '/', $path );
 291+ $target =& $array;
 292+ if ( $path !== '' ) {
 293+ foreach ( $pathArr as $p ) {
 294+ if( !isset( $target[$p] ) )
 295+ $target[$p] = array();
 296+ $target =& $target[$p];
 297+ }
 298+ }
 299+ if ( !isset( $target[$key] ) )
 300+ $target[$key] = $value;
 301+ }
 302+
 303+ /**
 304+ * Parse a scalar value in PHP
 305+ * @return mixed Parsed value
 306+ */
 307+ function parseScalar( $str ) {
 308+ if ( $str !== '' && $str[0] == '\'' )
 309+ // Single-quoted string
 310+ return strtr( substr( $str, 1, -1 ),
 311+ array( '\\\'' => '\'', '\\\\' => '\\' ) );
 312+ if ( $str !== '' && @$str[0] == '"' )
 313+ // Double-quoted string
 314+ return strtr( stripcslashes( substr( $str, 1, -1 ) ),
 315+ array( '\'' => '\\\'' ) );
 316+ if ( substr( $str, 0, 4 ) == 'true' )
 317+ return true;
 318+ if ( substr( $str, 0, 5 ) == 'false' )
 319+ return false;
 320+ if ( substr( $str, 0, 4 ) == 'null' )
 321+ return null;
 322+ // Must be some kind of numeric value, so let PHP's weak typing
 323+ // be useful for a change
 324+ return $str;
 325+ }
 326+
 327+ /**
 328+ * Replace the byte offset region of the source with $newText.
 329+ * Works by adding elements to the $this->edits array.
 330+ */
 331+ function replaceSourceRegion( $start, $end, $newText = false ) {
 332+ // Split all copy operations with a source corresponding to the region
 333+ // in question.
 334+ $newEdits = array();
 335+ foreach ( $this->edits as $i => $edit ) {
 336+ if ( $edit[0] !== 'copy' ) {
 337+ $newEdits[] = $edit;
 338+ continue;
 339+ }
 340+ $copyStart = $edit[1];
 341+ $copyEnd = $edit[2];
 342+ if ( $start >= $copyEnd || $end <= $copyStart ) {
 343+ // Outside this region
 344+ $newEdits[] = $edit;
 345+ continue;
 346+ }
 347+ if ( ( $start < $copyStart && $end > $copyStart )
 348+ || ( $start < $copyEnd && $end > $copyEnd )
 349+ ) {
 350+ throw new MWException( "Overlapping regions found, can't do the edit" );
 351+ }
 352+ // Split the copy
 353+ $newEdits[] = array( 'copy', $copyStart, $start );
 354+ if ( $newText !== false ) {
 355+ $newEdits[] = array( 'insert', $newText );
 356+ }
 357+ $newEdits[] = array( 'copy', $end, $copyEnd );
 358+ }
 359+ $this->edits = $newEdits;
 360+ }
 361+
 362+ /**
 363+ * Finds the source byte region which you would want to delete, if $pathName
 364+ * was to be deleted. Includes the leading spaces and tabs, the trailing line
 365+ * break, and any comments in between.
 366+ */
 367+ function findDeletionRegion( $pathName ) {
 368+ if ( !isset( $this->pathInfo[$pathName] ) ) {
 369+ throw new MWException( "Can't find path \"$pathName\"" );
 370+ }
 371+ $path = $this->pathInfo[$pathName];
 372+ // Find the start
 373+ $this->firstToken();
 374+ while ( $this->pos != $path['startToken'] ) {
 375+ $this->nextToken();
 376+ }
 377+ $regionStart = $path['startByte'];
 378+ for ( $offset = -1; $offset >= -$this->pos; $offset-- ) {
 379+ $token = $this->getTokenAhead( $offset );
 380+ if ( !$token->isSkip() ) {
 381+ // If there is other content on the same line, don't move the start point
 382+ // back, because that will cause the regions to overlap.
 383+ $regionStart = $path['startByte'];
 384+ break;
 385+ }
 386+ $lfPos = strrpos( $token->text, "\n" );
 387+ if ( $lfPos === false ) {
 388+ $regionStart -= strlen( $token->text );
 389+ } else {
 390+ // The line start does not include the LF
 391+ $regionStart -= strlen( $token->text ) - $lfPos - 1;
 392+ break;
 393+ }
 394+ }
 395+ // Find the end
 396+ while ( $this->pos != $path['endToken'] ) {
 397+ $this->nextToken();
 398+ }
 399+ $regionEnd = $path['endByte']; // past the end
 400+ for ( $offset = 0; $offset < count( $this->tokens ) - $this->pos; $offset++ ) {
 401+ $token = $this->getTokenAhead( $offset );
 402+ if ( !$token->isSkip() ) {
 403+ break;
 404+ }
 405+ $lfPos = strpos( $token->text, "\n" );
 406+ if ( $lfPos === false ) {
 407+ $regionEnd += strlen( $token->text );
 408+ } else {
 409+ // This should point past the LF
 410+ $regionEnd += $lfPos + 1;
 411+ break;
 412+ }
 413+ }
 414+ return array( $regionStart, $regionEnd );
 415+ }
 416+
 417+ /**
 418+ * Find the byte region in the source corresponding to the value part.
 419+ * This includes the quotes, but does not include the trailing comma
 420+ * or semicolon.
 421+ *
 422+ * The end position is the past-the-end (end + 1) value as per convention.
 423+ */
 424+ function findValueRegion( $pathName ) {
 425+ if ( !isset( $this->pathInfo[$pathName] ) ) {
 426+ throw new MWEXception( "Can't find path \"$pathName\"" );
 427+ }
 428+ $path = $this->pathInfo[$pathName];
 429+ if ( $path['valueStartByte'] === false || $path['valueEndByte'] === false ) {
 430+ throw new MWException( "Can't find value region for path \"$pathName\"" );
 431+ }
 432+ return array( $path['valueStartByte'], $path['valueEndByte'] );
 433+ }
 434+
 435+ /**
 436+ * Find the path name of the last element in the array.
 437+ * If the array is empty, this will return the @extra interstitial element.
 438+ * If the specified path is not found or is not an array, it will return false.
 439+ */
 440+ function findLastArrayElement( $path ) {
 441+ // Try for a real element
 442+ $lastEltPath = false;
 443+ foreach ( $this->pathInfo as $candidatePath => $info ) {
 444+ $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
 445+ $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
 446+ if ( $part2 == '@' ) {
 447+ // Do nothing
 448+ } elseif ( $part1 == "$path/" ) {
 449+ $lastEltPath = $candidatePath;
 450+ } elseif ( $lastEltPath !== false ) {
 451+ break;
 452+ }
 453+ }
 454+ if ( $lastEltPath !== false ) {
 455+ return $lastEltPath;
 456+ }
 457+
 458+ // Try for an interstitial element
 459+ $extraPath = false;
 460+ foreach ( $this->pathInfo as $candidatePath => $info ) {
 461+ $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
 462+ if ( $part1 == "$path/" ) {
 463+ $extraPath = $candidatePath;
 464+ } elseif ( $extraPath !== false ) {
 465+ break;
 466+ }
 467+ }
 468+ return $extraPath;
 469+ }
 470+
 471+ /*
 472+ * Find the path name of first element in the array.
 473+ * If the array is empty, this will return the @extra interstitial element.
 474+ * If the specified path is not found or is not an array, it will return false.
 475+ */
 476+ function findFirstArrayElement( $path ) {
 477+ // Try for an ordinary element
 478+ foreach ( $this->pathInfo as $candidatePath => $info ) {
 479+ $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
 480+ $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
 481+ if ( $part1 == "$path/" && $part2 != '@' ) {
 482+ return $candidatePath;
 483+ }
 484+ }
 485+
 486+ // Try for an interstitial element
 487+ foreach ( $this->pathInfo as $candidatePath => $info ) {
 488+ $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
 489+ if ( $part1 == "$path/" ) {
 490+ return $candidatePath;
 491+ }
 492+ }
 493+ return false;
 494+ }
 495+
 496+ /**
 497+ * Get the indent string which sits after a given start position.
 498+ * Returns false if the position is not at the start of the line.
 499+ */
 500+ function getIndent( $pos, $key = false, $arrowPos = false ) {
 501+ $arrowIndent = ' ';
 502+ if ( $pos == 0 || $this->text[$pos-1] == "\n" ) {
 503+ $indentLength = strspn( $this->text, " \t", $pos );
 504+ $indent = substr( $this->text, $pos, $indentLength );
 505+ } else {
 506+ $indent = false;
 507+ }
 508+ if ( $indent !== false && $arrowPos !== false ) {
 509+ $textToInsert = "$indent$key ";
 510+ $arrowIndentLength = $arrowPos - $pos - $indentLength - strlen( $key );
 511+ if ( $arrowIndentLength > 0 ) {
 512+ $arrowIndent = str_repeat( ' ', $arrowIndentLength );
 513+ }
 514+ }
 515+ return array( $indent, $arrowIndent );
 516+ }
 517+
 518+ /**
 519+ * Run the parser on the text. Throws an exception if the string does not
 520+ * match our defined subset of PHP syntax.
 521+ */
 522+ public function parse() {
 523+ $this->initParse();
 524+ $this->pushState( 'file' );
 525+ $this->pushPath( '@extra-' . ($this->serial++) );
 526+ $token = $this->firstToken();
 527+
 528+ while ( !$token->isEnd() ) {
 529+ $state = $this->popState();
 530+ if ( !$state ) {
 531+ $this->error( 'internal error: empty state stack' );
 532+ }
 533+
 534+ switch ( $state ) {
 535+ case 'file':
 536+ $token = $this->expect( T_OPEN_TAG );
 537+ $token = $this->skipSpace();
 538+ if ( $token->isEnd() ) {
 539+ break 2;
 540+ }
 541+ $this->pushState( 'statement', 'file 2' );
 542+ break;
 543+ case 'file 2':
 544+ $token = $this->skipSpace();
 545+ if ( $token->isEnd() ) {
 546+ break 2;
 547+ }
 548+ $this->pushState( 'statement', 'file 2' );
 549+ break;
 550+ case 'statement':
 551+ $token = $this->skipSpace();
 552+ if ( !$this->validatePath( $token->text ) ) {
 553+ $this->error( "Invalid variable name \"{$token->text}\"" );
 554+ }
 555+ $this->nextPath( $token->text );
 556+ $this->expect( T_VARIABLE );
 557+ $this->skipSpace();
 558+ $arrayAssign = false;
 559+ if ( $this->currentToken()->type == '[' ) {
 560+ $this->nextToken();
 561+ $token = $this->skipSpace();
 562+ if ( !$token->isScalar() ) {
 563+ $this->error( "expected a string or number for the array key" );
 564+ }
 565+ if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
 566+ $text = $this->parseScalar( $token->text );
 567+ } else {
 568+ $text = $token->text;
 569+ }
 570+ if ( !$this->validatePath( $text ) ) {
 571+ $this->error( "Invalid associative array name \"$text\"" );
 572+ }
 573+ $this->pushPath( $text );
 574+ $this->nextToken();
 575+ $this->skipSpace();
 576+ $this->expect( ']' );
 577+ $this->skipSpace();
 578+ $arrayAssign = true;
 579+ }
 580+ $this->expect( '=' );
 581+ $this->skipSpace();
 582+ $this->startPathValue();
 583+ if ( $arrayAssign )
 584+ $this->pushState( 'expression', 'array assign end' );
 585+ else
 586+ $this->pushState( 'expression', 'statement end' );
 587+ break;
 588+ case 'array assign end':
 589+ case 'statement end':
 590+ $this->endPathValue();
 591+ if ( $state == 'array assign end' )
 592+ $this->popPath();
 593+ $this->skipSpace();
 594+ $this->expect( ';' );
 595+ $this->nextPath( '@extra-' . ($this->serial++) );
 596+ break;
 597+ case 'expression':
 598+ $token = $this->skipSpace();
 599+ if ( $token->type == T_ARRAY ) {
 600+ $this->pushState( 'array' );
 601+ } elseif ( $token->isScalar() ) {
 602+ $this->nextToken();
 603+ } elseif ( $token->type == T_VARIABLE ) {
 604+ $this->nextToken();
 605+ } else {
 606+ $this->error( "expected simple expression" );
 607+ }
 608+ break;
 609+ case 'array':
 610+ $this->skipSpace();
 611+ $this->expect( T_ARRAY );
 612+ $this->skipSpace();
 613+ $this->expect( '(' );
 614+ $this->skipSpace();
 615+ $this->pushPath( '@extra-' . ($this->serial++) );
 616+ if ( $this->isAhead( ')' ) ) {
 617+ // Empty array
 618+ $this->pushState( 'array end' );
 619+ } else {
 620+ $this->pushState( 'element', 'array end' );
 621+ }
 622+ break;
 623+ case 'array end':
 624+ $this->skipSpace();
 625+ $this->popPath();
 626+ $this->expect( ')' );
 627+ break;
 628+ case 'element':
 629+ $token = $this->skipSpace();
 630+ // Look ahead to find the double arrow
 631+ if ( $token->isScalar() && $this->isAhead( T_DOUBLE_ARROW, 1 ) ) {
 632+ // Found associative element
 633+ $this->pushState( 'assoc-element', 'element end' );
 634+ } else {
 635+ // Not associative
 636+ $this->nextPath( '@next' );
 637+ $this->startPathValue();
 638+ $this->pushState( 'expression', 'element end' );
 639+ }
 640+ break;
 641+ case 'element end':
 642+ $token = $this->skipSpace();
 643+ if ( $token->type == ',' ) {
 644+ $this->endPathValue();
 645+ $this->markComma();
 646+ $this->nextToken();
 647+ $this->nextPath( '@extra-' . ($this->serial++) );
 648+ // Look ahead to find ending bracket
 649+ if ( $this->isAhead( ")" ) ) {
 650+ // Found ending bracket, no continuation
 651+ $this->skipSpace();
 652+ } else {
 653+ // No ending bracket, continue to next element
 654+ $this->pushState( 'element' );
 655+ }
 656+ } elseif ( $token->type == ')' ) {
 657+ // End array
 658+ $this->endPathValue();
 659+ } else {
 660+ $this->error( "expected the next array element or the end of the array" );
 661+ }
 662+ break;
 663+ case 'assoc-element':
 664+ $token = $this->skipSpace();
 665+ if ( !$token->isScalar() ) {
 666+ $this->error( "expected a string or number for the array key" );
 667+ }
 668+ if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
 669+ $text = $this->parseScalar( $token->text );
 670+ } else {
 671+ $text = $token->text;
 672+ }
 673+ if ( !$this->validatePath( $text ) ) {
 674+ $this->error( "Invalid associative array name \"$text\"" );
 675+ }
 676+ $this->nextPath( $text );
 677+ $this->nextToken();
 678+ $this->skipSpace();
 679+ $this->markArrow();
 680+ $this->expect( T_DOUBLE_ARROW );
 681+ $this->skipSpace();
 682+ $this->startPathValue();
 683+ $this->pushState( 'expression' );
 684+ break;
 685+ }
 686+ }
 687+ if ( count( $this->stateStack ) ) {
 688+ $this->error( 'unexpected end of file' );
 689+ }
 690+ $this->popPath();
 691+ }
 692+
 693+ /**
 694+ * Initialise a parse.
 695+ */
 696+ protected function initParse() {
 697+ $this->tokens = token_get_all( $this->text );
 698+ $this->stateStack = array();
 699+ $this->pathStack = array();
 700+ $this->firstToken();
 701+ $this->pathInfo = array();
 702+ $this->serial = 1;
 703+ }
 704+
 705+ /**
 706+ * Set the parse position. Do not call this except from firstToken() and
 707+ * nextToken(), there is more to update than just the position.
 708+ */
 709+ protected function setPos( $pos ) {
 710+ $this->pos = $pos;
 711+ if ( $this->pos >= count( $this->tokens ) ) {
 712+ $this->currentToken = ConfEditorToken::newEnd();
 713+ } else {
 714+ $this->currentToken = $this->newTokenObj( $this->tokens[$this->pos] );
 715+ }
 716+ return $this->currentToken;
 717+ }
 718+
 719+ /**
 720+ * Create a ConfEditorToken from an element of token_get_all()
 721+ */
 722+ function newTokenObj( $internalToken ) {
 723+ if ( is_array( $internalToken ) ) {
 724+ return new ConfEditorToken( $internalToken[0], $internalToken[1] );
 725+ } else {
 726+ return new ConfEditorToken( $internalToken, $internalToken );
 727+ }
 728+ }
 729+
 730+ /**
 731+ * Reset the parse position
 732+ */
 733+ function firstToken() {
 734+ $this->setPos( 0 );
 735+ $this->prevToken = ConfEditorToken::newEnd();
 736+ $this->lineNum = 1;
 737+ $this->colNum = 1;
 738+ $this->byteNum = 0;
 739+ return $this->currentToken;
 740+ }
 741+
 742+ /**
 743+ * Get the current token
 744+ */
 745+ function currentToken() {
 746+ return $this->currentToken;
 747+ }
 748+
 749+ /**
 750+ * Advance the current position and return the resulting next token
 751+ */
 752+ function nextToken() {
 753+ if ( $this->currentToken ) {
 754+ $text = $this->currentToken->text;
 755+ $lfCount = substr_count( $text, "\n" );
 756+ if ( $lfCount ) {
 757+ $this->lineNum += $lfCount;
 758+ $this->colNum = strlen( $text ) - strrpos( $text, "\n" );
 759+ } else {
 760+ $this->colNum += strlen( $text );
 761+ }
 762+ $this->byteNum += strlen( $text );
 763+ }
 764+ $this->prevToken = $this->currentToken;
 765+ $this->setPos( $this->pos + 1 );
 766+ return $this->currentToken;
 767+ }
 768+
 769+ /**
 770+ * Get the token $offset steps ahead of the current position.
 771+ * $offset may be negative, to get tokens behind the current position.
 772+ */
 773+ function getTokenAhead( $offset ) {
 774+ $pos = $this->pos + $offset;
 775+ if ( $pos >= count( $this->tokens ) || $pos < 0 ) {
 776+ return ConfEditorToken::newEnd();
 777+ } else {
 778+ return $this->newTokenObj( $this->tokens[$pos] );
 779+ }
 780+ }
 781+
 782+ /**
 783+ * Advances the current position past any whitespace or comments
 784+ */
 785+ function skipSpace() {
 786+ while ( $this->currentToken && $this->currentToken->isSkip() ) {
 787+ $this->nextToken();
 788+ }
 789+ return $this->currentToken;
 790+ }
 791+
 792+ /**
 793+ * Throws an error if the current token is not of the given type, and
 794+ * then advances to the next position.
 795+ */
 796+ function expect( $type ) {
 797+ if ( $this->currentToken && $this->currentToken->type == $type ) {
 798+ return $this->nextToken();
 799+ } else {
 800+ $this->error( "expected " . $this->getTypeName( $type ) .
 801+ ", got " . $this->getTypeName( $this->currentToken->type ) );
 802+ }
 803+ }
 804+
 805+ /**
 806+ * Push a state or two on to the state stack.
 807+ */
 808+ function pushState( $nextState, $stateAfterThat = null ) {
 809+ if ( $stateAfterThat !== null ) {
 810+ $this->stateStack[] = $stateAfterThat;
 811+ }
 812+ $this->stateStack[] = $nextState;
 813+ }
 814+
 815+ /**
 816+ * Pop a state from the state stack.
 817+ */
 818+ function popState() {
 819+ return array_pop( $this->stateStack );
 820+ }
 821+
 822+ /**
 823+ * Returns true if the user input path is valid.
 824+ * This exists to allow "/" and "@" to be reserved for string path keys
 825+ */
 826+ function validatePath( $path ) {
 827+ return strpos( $path, '/' ) === false && substr( $path, 0, 1 ) != '@';
 828+ }
 829+
 830+ /**
 831+ * Internal function to update some things at the end of a path region. Do
 832+ * not call except from popPath() or nextPath().
 833+ */
 834+ function endPath() {
 835+ $i = count( $this->pathStack ) - 1;
 836+ $key = '';
 837+ foreach ( $this->pathStack as $pathInfo ) {
 838+ if ( $key !== '' ) {
 839+ $key .= '/';
 840+ }
 841+ $key .= $pathInfo['name'];
 842+ }
 843+ $pathInfo['endByte'] = $this->byteNum;
 844+ $pathInfo['endToken'] = $this->pos;
 845+ $this->pathInfo[$key] = $pathInfo;
 846+ }
 847+
 848+ /**
 849+ * Go up to a new path level, for example at the start of an array.
 850+ */
 851+ function pushPath( $path ) {
 852+ $this->pathStack[] = array(
 853+ 'name' => $path,
 854+ 'level' => count( $this->pathStack ) + 1,
 855+ 'startByte' => $this->byteNum,
 856+ 'startToken' => $this->pos,
 857+ 'valueStartToken' => false,
 858+ 'valueStartByte' => false,
 859+ 'valueEndToken' => false,
 860+ 'valueEndByte' => false,
 861+ 'nextArrayIndex' => 0,
 862+ 'hasComma' => false,
 863+ 'arrowByte' => false
 864+ );
 865+ }
 866+
 867+ /**
 868+ * Go down a path level, for example at the end of an array.
 869+ */
 870+ function popPath() {
 871+ $this->endPath();
 872+ array_pop( $this->pathStack );
 873+ }
 874+
 875+ /**
 876+ * Go to the next path on the same level. This ends the current path and
 877+ * starts a new one. If $path is @next, the new path is set to the next
 878+ * numeric array element.
 879+ */
 880+ function nextPath( $path ) {
 881+ $this->endPath();
 882+ $i = count( $this->pathStack ) - 1;
 883+ if ( $path == '@next' ) {
 884+ $nextArrayIndex =& $this->pathStack[$i]['nextArrayIndex'];
 885+ $this->pathStack[$i]['name'] = $nextArrayIndex;
 886+ $nextArrayIndex++;
 887+ } else {
 888+ $this->pathStack[$i]['name'] = $path;
 889+ }
 890+ $this->pathStack[$i] =
 891+ array(
 892+ 'startByte' => $this->byteNum,
 893+ 'startToken' => $this->pos,
 894+ 'valueStartToken' => false,
 895+ 'valueStartByte' => false,
 896+ 'valueEndToken' => false,
 897+ 'valueEndByte' => false,
 898+ 'hasComma' => false,
 899+ 'arrowByte' => false,
 900+ ) + $this->pathStack[$i];
 901+ }
 902+
 903+ /**
 904+ * Mark the start of the value part of a path.
 905+ */
 906+ function startPathValue() {
 907+ $path =& $this->pathStack[count( $this->pathStack ) - 1];
 908+ $path['valueStartToken'] = $this->pos;
 909+ $path['valueStartByte'] = $this->byteNum;
 910+ }
 911+
 912+ /**
 913+ * Mark the end of the value part of a path.
 914+ */
 915+ function endPathValue() {
 916+ $path =& $this->pathStack[count( $this->pathStack ) - 1];
 917+ $path['valueEndToken'] = $this->pos;
 918+ $path['valueEndByte'] = $this->byteNum;
 919+ }
 920+
 921+ /**
 922+ * Mark the comma separator in an array element
 923+ */
 924+ function markComma() {
 925+ $path =& $this->pathStack[count( $this->pathStack ) - 1];
 926+ $path['hasComma'] = true;
 927+ }
 928+
 929+ /**
 930+ * Mark the arrow separator in an associative array element
 931+ */
 932+ function markArrow() {
 933+ $path =& $this->pathStack[count( $this->pathStack ) - 1];
 934+ $path['arrowByte'] = $this->byteNum;
 935+ }
 936+
 937+ /**
 938+ * Generate a parse error
 939+ */
 940+ function error( $msg ) {
 941+ throw new ConfEditorParseError( $this, $msg );
 942+ }
 943+
 944+ /**
 945+ * Get a readable name for the given token type.
 946+ */
 947+ function getTypeName( $type ) {
 948+ if ( is_int( $type ) ) {
 949+ return token_name( $type );
 950+ } else {
 951+ return "\"$type\"";
 952+ }
 953+ }
 954+
 955+ /**
 956+ * Looks ahead to see if the given type is the next token type, starting
 957+ * from the current position plus the given offset. Skips any intervening
 958+ * whitespace.
 959+ */
 960+ function isAhead( $type, $offset = 0 ) {
 961+ $ahead = $offset;
 962+ $token = $this->getTokenAhead( $offset );
 963+ while ( !$token->isEnd() ) {
 964+ if ( $token->isSkip() ) {
 965+ $ahead++;
 966+ $token = $this->getTokenAhead( $ahead );
 967+ continue;
 968+ } elseif ( $token->type == $type ) {
 969+ // Found the type
 970+ return true;
 971+ } else {
 972+ // Not found
 973+ return false;
 974+ }
 975+ }
 976+ return false;
 977+ }
 978+
 979+ /**
 980+ * Get the previous token object
 981+ */
 982+ function prevToken() {
 983+ return $this->prevToken;
 984+ }
 985+
 986+ /**
 987+ * Echo a reasonably readable representation of the tokenizer array.
 988+ */
 989+ function dumpTokens() {
 990+ $out = '';
 991+ foreach ( $this->tokens as $token ) {
 992+ $obj = $this->newTokenObj( $token );
 993+ $out .= sprintf( "%-28s %s\n",
 994+ $this->getTypeName( $obj->type ),
 995+ addcslashes( $obj->text, "\0..\37" ) );
 996+ }
 997+ echo "<pre>" . htmlspecialchars( $out ) . "</pre>";
 998+ }
 999+}
 1000+
 1001+/**
 1002+ * Exception class for parse errors
 1003+ */
 1004+class ConfEditorParseError extends MWException {
 1005+ var $lineNum, $colNum;
 1006+ function __construct( $editor, $msg ) {
 1007+ $this->lineNum = $editor->lineNum;
 1008+ $this->colNum = $editor->colNum;
 1009+ parent::__construct( "Parse error on line {$editor->lineNum} " .
 1010+ "col {$editor->colNum}: $msg" );
 1011+ }
 1012+
 1013+ function highlight( $text ) {
 1014+ $lines = StringUtils::explode( "\n", $text );
 1015+ foreach ( $lines as $lineNum => $line ) {
 1016+ if ( $lineNum == $this->lineNum - 1 ) {
 1017+ return "$line\n" .str_repeat( ' ', $this->colNum - 1 ) . "^\n";
 1018+ }
 1019+ }
 1020+ }
 1021+
 1022+}
 1023+
 1024+/**
 1025+ * Class to wrap a token from the tokenizer.
 1026+ */
 1027+class ConfEditorToken {
 1028+ var $type, $text;
 1029+
 1030+ static $scalarTypes = array( T_LNUMBER, T_DNUMBER, T_STRING, T_CONSTANT_ENCAPSED_STRING );
 1031+ static $skipTypes = array( T_WHITESPACE, T_COMMENT, T_DOC_COMMENT );
 1032+
 1033+ static function newEnd() {
 1034+ return new self( 'END', '' );
 1035+ }
 1036+
 1037+ function __construct( $type, $text ) {
 1038+ $this->type = $type;
 1039+ $this->text = $text;
 1040+ }
 1041+
 1042+ function isSkip() {
 1043+ return in_array( $this->type, self::$skipTypes );
 1044+ }
 1045+
 1046+ function isScalar() {
 1047+ return in_array( $this->type, self::$scalarTypes );
 1048+ }
 1049+
 1050+ function isEnd() {
 1051+ return $this->type == 'END';
 1052+ }
 1053+}
 1054+
Property changes on: trunk/phase3/includes/ConfEditor.php
___________________________________________________________________
Name: svn:mergeinfo
11055 +
Name: svn:eol-style
21056 + native

Follow-up revisions

RevisionCommit summaryAuthorDate
r52057switch-master: Changes for r52056catrope17:20, 17 June 2009

Status & tagging log