r55169 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r55168‎ | r55169 | r55170 >
Date:09:32, 17 August 2009
Author:brion
Status:deferred
Tags:
Comment:
Quickie parser class that can happily read the subset of PHP we need for our localization arrays safely.
About an order of magnitude faster than ConfEditor(), but still an order of magnitude slower than eval().
Confirmed to pass on all present core and extension localization files in SVN.

Sample test runs on my MacBook:

MessagesEn.php d838d9382f0b5b08c771642bb683e06d 2694 messages - 7.6ms - eval
MessagesEn.php d838d9382f0b5b08c771642bb683e06d 2694 messages - 91.8ms - QuickArrayReader
MessagesEn.php d838d9382f0b5b08c771642bb683e06d 2694 messages - 1024.5ms - ConfEditor

AbuseFilter.i18n.php 1a8838a7fa65f828ecc0d46333a2e4d0 112 langs - 62.6ms - eval
AbuseFilter.i18n.php 1a8838a7fa65f828ecc0d46333a2e4d0 112 langs - 672.8ms - QuickArrayReader
AbuseFilter.i18n.php 1a8838a7fa65f828ecc0d46333a2e4d0 112 langs - 7859.5ms - ConfEditor

(In other testing, I was able to optimize speed of ConfEditor's parsing by at most a factor of 3; this version is much more limited but adequate for present needs.)
Modified paths:
  • /trunk/extensions/LocalisationUpdate/LocalisationUpdate.class.php (modified) (history)
  • /trunk/extensions/LocalisationUpdate/LocalisationUpdate.php (modified) (history)
  • /trunk/extensions/LocalisationUpdate/QuickArrayReader.php (added) (history)
  • /trunk/extensions/LocalisationUpdate/tests/tokenTest.php (modified) (history)

Diff [purge]

Index: trunk/extensions/LocalisationUpdate/tests/tokenTest.php
@@ -23,6 +23,12 @@
2424 return $retval;
2525 }
2626
 27+function quickTokenExtractArray( $php, $varname ) {
 28+ $reader = new QuickArrayReader("<?php $php");
 29+ return $reader->getVar( $varname );
 30+}
 31+
 32+
2733 if( count( $args ) ) {
2834 $sources = $args;
2935 } else {
@@ -33,7 +39,7 @@
3440 }
3541
3642 foreach( $sources as $sourceFile ) {
37 - $rel = wfRelativePath( $sourceFile, $IP );
 43+ $rel = basename( $sourceFile );
3844 $out = str_replace( '/', '-', $rel );
3945
4046 $sourceData = file_get_contents( $sourceFile );
@@ -53,24 +59,33 @@
5460 $deltaEval = microtime(true) - $start;
5561
5662 $start = microtime(true);
 63+ $quick = quickTokenExtractArray( $sourceData, 'messages' );
 64+ $deltaQuick = microtime(true) - $start;
 65+
 66+ $start = microtime(true);
5767 $token = confExtractArray( $sourceData, 'messages' );
5868 $deltaToken = microtime(true) - $start;
5969
6070 $hashEval = md5(serialize($eval));
6171 $hashToken = md5(serialize($token));
 72+ $hashQuick = md5(serialize($quick));
6273 $countEval = count( (array)$eval);
6374 $countToken = count( (array)$token );
 75+ $countQuick = count( (array)$quick );
6476
6577 printf( "%s %s %d $items - %0.1fms - eval\n", $rel, $hashEval, $countEval, $deltaEval * 1000 );
66 - printf( "%s %s %d $items - %0.1fms - token\n", $rel, $hashToken, $countToken, $deltaToken * 1000 );
 78+ printf( "%s %s %d $items - %0.1fms - QuickArrayReader\n", $rel, $hashQuick, $countQuick, $deltaQuick * 1000 );
 79+ printf( "%s %s %d $items - %0.1fms - ConfEditor\n", $rel, $hashToken, $countToken, $deltaToken * 1000 );
6780
68 - if( $hashEval !== $hashToken ) {
 81+ if( $hashEval !== $hashToken || $hashEval !== $hashQuick ) {
6982 echo "FAILED on $rel\n";
7083 file_put_contents( "$out-eval.txt", var_export( $eval, true ) );
7184 file_put_contents( "$out-token.txt", var_export( $token, true ) );
 85+ file_put_contents( "$out-quick.txt", var_export( $quick, true ) );
7286 #die("check eval.txt and token.txt\n");
7387 }
7488 echo "\n";
7589 }
7690
7791 echo "ok\n";
 92+
Index: trunk/extensions/LocalisationUpdate/QuickArrayReader.php
@@ -0,0 +1,172 @@
 2+<?php
 3+
 4+/**
 5+ * Quickie parser class that can happily read the subset of PHP we need
 6+ * for our localization arrays safely.
 7+ *
 8+ * About an order of magnitude faster than ConfEditor(), but still an
 9+ * order of magnitude slower than eval().
 10+ */
 11+class QuickArrayReader {
 12+ var $vars = array();
 13+
 14+ function __construct( $string ) {
 15+ $scalarTypes = array(
 16+ T_LNUMBER => true,
 17+ T_DNUMBER => true,
 18+ T_STRING => true,
 19+ T_CONSTANT_ENCAPSED_STRING => true,
 20+ );
 21+ $skipTypes = array(
 22+ T_WHITESPACE => true,
 23+ T_COMMENT => true,
 24+ T_DOC_COMMENT => true,
 25+ );
 26+ $tokens = token_get_all( $string );
 27+ $count = count( $tokens );
 28+ for( $i = 0; $i < $count; ) {
 29+ while( isset($skipTypes[$tokens[$i][0]] ) ) {
 30+ $i++;
 31+ }
 32+ switch( $tokens[$i][0] ) {
 33+ case T_OPEN_TAG:
 34+ $i++;
 35+ continue;
 36+ case T_VARIABLE:
 37+ // '$messages' -> 'messages'
 38+ $varname = trim( substr( $tokens[$i][1], 1 ) );
 39+ $varindex = null;
 40+
 41+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 42+
 43+ if( $tokens[$i] === '[' ) {
 44+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 45+
 46+ if( isset($scalarTypes[$tokens[$i][0]] ) ) {
 47+ $varindex = $this->parseScalar( $tokens[$i] );
 48+ } else {
 49+ throw $this->except( $tokens[$i], 'scalar index' );
 50+ }
 51+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 52+
 53+ if( $tokens[$i] !== ']' ) {
 54+ throw $this->except( $tokens[$i], ']' );
 55+ }
 56+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 57+ }
 58+
 59+ if( $tokens[$i] !== '=' ) {
 60+ throw $this->except( $tokens[$i], '=' );
 61+ }
 62+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 63+
 64+ if( isset($scalarTypes[$tokens[$i][0]] ) ) {
 65+ $buildval = $this->parseScalar( $tokens[$i] );
 66+ } elseif( $tokens[$i][0] === T_ARRAY ) {
 67+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 68+ if( $tokens[$i] !== '(' ) {
 69+ throw $this->except( $tokens[$i], '(' );
 70+ }
 71+ $buildval = array();
 72+ do {
 73+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 74+
 75+ if( $tokens[$i] === ')' ) {
 76+ break;
 77+ }
 78+ if( isset($scalarTypes[$tokens[$i][0]] ) ) {
 79+ $key = $this->parseScalar( $tokens[$i] );
 80+ }
 81+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 82+
 83+ if( $tokens[$i][0] !== T_DOUBLE_ARROW ) {
 84+ throw $this->except( $tokens[$i], '=>' );
 85+ }
 86+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 87+
 88+ if( isset($scalarTypes[$tokens[$i][0]] ) ) {
 89+ $val = $this->parseScalar( $tokens[$i] );
 90+ }
 91+ @$buildval[$key] = $val;
 92+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 93+
 94+ if( $tokens[$i] === ',' ) {
 95+ continue;
 96+ } elseif( $tokens[$i] === ')' ) {
 97+ break;
 98+ } else {
 99+ throw $this->except( $tokens[$i], ', or )' );
 100+ }
 101+ } while(true);
 102+ } else {
 103+ throw $this->except( $tokens[$i], 'scalar or array' );
 104+ }
 105+ if( is_null( $varindex ) ) {
 106+ $this->vars[$varname] = $buildval;
 107+ } else {
 108+ @$this->vars[$varname][$varindex] = $buildval;
 109+ }
 110+ while( isset($skipTypes[$tokens[++$i][0]] ) );
 111+ if( $tokens[$i] !== ';' ) {
 112+ throw $this->except($tokens[$i], ';');
 113+ }
 114+ $i++;
 115+ break;
 116+ default:
 117+ throw $this->except($tokens[$i], 'open tag, whitespace, or variable.');
 118+ }
 119+ }
 120+ }
 121+
 122+ private function except( $got, $expected ) {
 123+ if( is_array( $got ) ) {
 124+ $got = token_name( $got[0] ) . " ('" . $got[1] . "')";
 125+ } else {
 126+ $got = "'" . $got . "'";
 127+ }
 128+ return new Exception( "Expected $expected, got $got" );
 129+ }
 130+
 131+ /**
 132+ * Parse a scalar value in PHP
 133+ * @return mixed Parsed value
 134+ */
 135+ function parseScalar( $token ) {
 136+ if( is_array( $token ) ) {
 137+ $str = $token[1];
 138+ } else {
 139+ $str = $token;
 140+ }
 141+ if ( $str !== '' && $str[0] == '\'' )
 142+ // Single-quoted string
 143+ // @fixme trim() call is due to mystery bug where whitespace gets
 144+ // appended to the token; without it we ended up reading in the
 145+ // extra quote on the end!
 146+ return strtr( substr( trim( $str ), 1, -1 ),
 147+ array( '\\\'' => '\'', '\\\\' => '\\' ) );
 148+ if ( $str !== '' && @$str[0] == '"' )
 149+ // Double-quoted string
 150+ // @fixme trim() call is due to mystery bug where whitespace gets
 151+ // appended to the token; without it we ended up reading in the
 152+ // extra quote on the end!
 153+ return stripcslashes( substr( trim( $str ), 1, -1 ) );
 154+ if ( substr( $str, 0, 4 ) === 'true' )
 155+ return true;
 156+ if ( substr( $str, 0, 5 ) === 'false' )
 157+ return false;
 158+ if ( substr( $str, 0, 4 ) === 'null' )
 159+ return null;
 160+ // Must be some kind of numeric value, so let PHP's weak typing
 161+ // be useful for a change
 162+ return $str;
 163+ }
 164+
 165+ function getVar( $varname ) {
 166+ if( isset( $this->vars[$varname] ) ) {
 167+ return $this->vars[$varname];
 168+ } else {
 169+ return null;
 170+ }
 171+ }
 172+}
 173+
Property changes on: trunk/extensions/LocalisationUpdate/QuickArrayReader.php
___________________________________________________________________
Name: svn:eol-style
1174 + native
Index: trunk/extensions/LocalisationUpdate/LocalisationUpdate.php
@@ -39,6 +39,7 @@
4040 $wgExtensionMessagesFiles['LocalisationUpdate'] = $dir . 'LocalisationUpdate.i18n.php';
4141 $wgAutoloadClasses['LocalisationUpdate'] = $dir . 'LocalisationUpdate.class.php';
4242 $wgAutoloadClasses['LUDependency'] = $dir . 'LocalisationUpdate.class.php';
 43+$wgAutoloadClasses['QuickArrayReader'] = $dir . 'QuickArrayReader.php';
4344
4445 $wgHooks['LoadExtensionSchemaUpdates'][] = 'LocalisationUpdate::schemaUpdates';
4546
Index: trunk/extensions/LocalisationUpdate/LocalisationUpdate.class.php
@@ -475,9 +475,8 @@
476476 }
477477
478478 public static function parsePHP( $php, $varname ) {
479 - $ce = new ConfEditor("<?php $php");
480 - $vars = $ce->getVars();
481 - return @$vars[$varname];
 479+ $reader = new QuickArrayReader("<?php $php");
 480+ return $reader->getVar( $varname );
482481 }
483482 }
484483

Status & tagging log