r48806 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r48805‎ | r48806 | r48807 >
Date:11:36, 25 March 2009
Author:werdna
Status:ok
Tags:
Comment:
Abuse Filter Parser:
* Efficiency -- use /A instead of PREG_OFFSET_CAPTURE and comparing offsets.
* Expand error messages to enhance debugging.
* General code quality
Modified paths:
  • /trunk/extensions/AbuseFilter/AbuseFilter.i18n.php (modified) (history)
  • /trunk/extensions/AbuseFilter/AbuseFilter.parser.php (modified) (history)
  • /trunk/extensions/AbuseFilter/tests/string.t (modified) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/AbuseFilter.parser.php
@@ -425,7 +425,9 @@
426426 $this->doLevelBoolOps( $result );
427427
428428 if( !($this->mCur->type == AFPToken::TKeyword && $this->mCur->value == 'then') )
429 - throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, array('then') );
 429+ throw new AFPUserVisibleException( 'expectednotfound',
 430+ $this->mCur->pos,
 431+ array('then', $this->mCur->type, $this->mCur->value ) );
430432 $this->move();
431433
432434
@@ -444,7 +446,9 @@
445447 }
446448
447449 if( !($this->mCur->type == AFPToken::TKeyword && $this->mCur->value == 'else') )
448 - throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, array('else') );
 450+ throw new AFPUserVisibleException( 'expectednotfound',
 451+ $this->mCur->pos,
 452+ array('else', $this->mCur->type, $this->mCur->value ) );
449453 $this->move();
450454
451455 if (!$isTrue) {
@@ -457,7 +461,9 @@
458462 }
459463
460464 if( !($this->mCur->type == AFPToken::TKeyword && $this->mCur->value == 'end') )
461 - throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, array('end') );
 465+ throw new AFPUserVisibleException( 'expectednotfound',
 466+ $this->mCur->pos,
 467+ array('end', $this->mCur->type, $this->mCur->value ) );
462468 $this->move();
463469
464470 if( $result->toBool() ) {
@@ -485,7 +491,9 @@
486492 }
487493
488494 if( !($this->mCur->type == AFPToken::TOp && $this->mCur->value == ':') )
489 - throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, array(':') );
 495+ throw new AFPUserVisibleException( 'expectednotfound',
 496+ $this->mCur->pos,
 497+ array(':', $this->mCur->type, $this->mCur->value ) );
490498 $this->move();
491499
492500 if (!$isTrue) {
@@ -657,7 +665,9 @@
658666 $this->move();
659667 $this->doLevelSet( $result );
660668 if( !($this->mCur->type == AFPToken::TBrace && $this->mCur->value == ')') )
661 - throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, array(')') );
 669+ throw new AFPUserVisibleException( 'expectednotfound',
 670+ $this->mCur->pos,
 671+ array(')', $this->mCur->type, $this->mCur->value ) );
662672 $this->move();
663673 } else {
664674 $this->doLevelFunction( $result );
@@ -670,7 +680,9 @@
671681 $func = self::$mFunctions[$this->mCur->value];
672682 $this->move();
673683 if( $this->mCur->type != AFPToken::TBrace || $this->mCur->value != '(' )
674 - throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, array('(') );
 684+ throw new AFPUserVisibleException( 'expectednotfound',
 685+ $this->mCur->pos,
 686+ array('(', $this->mCur->type, $this->mCur->value ) );
675687 wfProfileIn( __METHOD__."-loadargs" );
676688 $args = array();
677689 do {
@@ -681,7 +693,9 @@
682694 } while( $this->mCur->type == AFPToken::TComma );
683695
684696 if( $this->mCur->type != AFPToken::TBrace || $this->mCur->value != ')' ) {
685 - throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, array(')') );
 697+ throw new AFPUserVisibleException( 'expectednotfound',
 698+ $this->mCur->pos,
 699+ array(')', $this->mCur->type, $this->mCur->value ) );
686700 }
687701 $this->move();
688702
@@ -794,9 +808,8 @@
795809
796810 // Spaces
797811 $matches = array();
798 - if ( preg_match( '/\s+/u', $code, $matches, PREG_OFFSET_CAPTURE, $offset ) &&
799 - $matches[0][1] == $offset ) {
800 - $offset += strlen($matches[0][0]);
 812+ if ( preg_match( '/\s+/uA', $code, $matches, 0, $offset ) ) {
 813+ $offset += strlen($matches[0]);
801814 }
802815
803816 if( $offset >= strlen($code) ) return array( '', AFPToken::TNone, $code, $offset );
@@ -825,7 +838,8 @@
826839 if( $code[$offset] == '"' || $code[$offset] == "'" ) {
827840 $type = $code[$offset];
828841 $offset++;
829 - while( $offset < strlen($code) ) {
 842+ $strLen = strlen($code);
 843+ while( $offset < $strLen ) {
830844
831845 if( $code[$offset] == $type ) {
832846 $offset++;
@@ -839,28 +853,35 @@
840854 $tok .= substr( $code, $offset, $addLength );
841855 $offset += $addLength;
842856 } elseif( $code[$offset] == '\\' ) {
843 - if( $code[$offset + 1] == '\\' )
844 - $tok .= '\\';
845 - elseif( $code[$offset + 1] == $type )
846 - $tok .= $type;
847 - elseif( $code[$offset + 1] == 'n' )
848 - $tok .= "\n";
849 - elseif( $code[$offset + 1] == 'r' )
850 - $tok .= "\r";
851 - elseif( $code[$offset + 1] == 't' )
852 - $tok .= "\t";
853 - elseif( $code[$offset + 1] == 'x' ) {
854 - $chr = substr( $code, $offset + 2, 2 );
855 -
856 - if ( preg_match( '/^[0-9A-Fa-f]{2}$/', $chr ) ) {
857 - $chr = base_convert( $chr, 16, 10 );
858 - $tok .= chr($chr);
859 - $offset += 2; # \xXX -- 2 done later
860 - } else {
861 - $tok .= 'x';
862 - }
863 - } else {
864 - $tok .= "\\" . $code[$offset + 1];
 857+ switch( $code[$offset + 1] ) {
 858+ case '\\':
 859+ $tok .= '\\';
 860+ break;
 861+ case $type:
 862+ $tok .= $type;
 863+ break;
 864+ case 'n';
 865+ $tok .= "\n";
 866+ break;
 867+ case 'r':
 868+ $tok .= "\r";
 869+ break;
 870+ case 't':
 871+ $tok .= "\t";
 872+ break;
 873+ case 'x':
 874+ $chr = substr( $code, $offset + 2, 2 );
 875+
 876+ if ( preg_match( '/^[0-9A-Fa-f]{2}$/', $chr ) ) {
 877+ $chr = base_convert( $chr, 16, 10 );
 878+ $tok .= chr($chr);
 879+ $offset += 2; # \xXX -- 2 done later
 880+ } else {
 881+ $tok .= 'x';
 882+ }
 883+ break;
 884+ default:
 885+ $tok .= "\\" . $code[$offset + 1];
865886 }
866887
867888 $offset+=2;
@@ -882,15 +903,15 @@
883904
884905 foreach( self::$mOps as $op )
885906 $quoted_operators[] = preg_quote( $op, '/' );
886 - $operator_regex = '/('.implode('|', $quoted_operators).')/';
 907+ $operator_regex = '/('.implode('|', $quoted_operators).')/A';
887908 }
888909
889910 $matches = array();
890911
891 - preg_match( $operator_regex, $code, $matches, PREG_OFFSET_CAPTURE, $offset );
 912+ preg_match( $operator_regex, $code, $matches, 0, $offset );
892913
893 - if( count( $matches ) && $matches[0][1] == $offset ) {
894 - $tok = $matches[0][0];
 914+ if( count( $matches ) ) {
 915+ $tok = $matches[0];
895916 $offset += strlen( $tok );
896917 return array( $tok, AFPToken::TOp, $code, $offset );
897918 }
@@ -907,16 +928,13 @@
908929 10 => '[0-9.]',
909930 );
910931 $baseClass = '['.implode('', array_keys($bases)).']';
911 - $radixRegex = "/([0-9A-Fa-f]*(?:\.\d*)?)($baseClass)?/u";
 932+ $radixRegex = "/([0-9A-Fa-f]*(?:\.\d*)?)($baseClass)?/Au";
912933 $matches = array();
913934
914 - preg_match( $radixRegex, $code, $matches, PREG_OFFSET_CAPTURE, $offset );
915 -
916 - if ( count( $matches ) && $matches[0][1] == $offset ) {
917 - $input = $matches[1][0];
918 - $baseChar = @$matches[2][0];
 935+ if ( preg_match( $radixRegex, $code, $matches, 0, $offset ) ) {
 936+ $input = $matches[1];
 937+ $baseChar = @$matches[2];
919938 $num = null;
920 -
921939 // Sometimes the base char gets mixed in with the rest of it because
922940 // the regex targets hex, too.
923941 // This mostly happens with binary
@@ -941,7 +959,7 @@
942960 $num = $input;
943961 }
944962
945 - $offset += strlen( $matches[0][0] );
 963+ $offset += strlen( $matches[0] );
946964
947965 $float = in_string( '.', $input );
948966
@@ -961,12 +979,11 @@
962980 // The rest are considered IDs
963981
964982 // Regex match > PHP
965 - $idSymbolRegex = '/[0-9A-Za-z_]+/';
 983+ $idSymbolRegex = '/[0-9A-Za-z_]+/A';
966984 $matches = array();
967 - preg_match( $idSymbolRegex, $code, $matches, PREG_OFFSET_CAPTURE, $offset );
968985
969 - if ( $matches[0][1] == $offset ) {
970 - $tok = $matches[0][0];
 986+ if ( preg_match( $idSymbolRegex, $code, $matches, 0, $offset ) ) {
 987+ $tok = $matches[0];
971988
972989 $type = in_array( $tok, self::$mKeywords )
973990 ? AFPToken::TKeyword
@@ -1201,4 +1218,4 @@
12021219 return preg_match("#^".strtr(preg_quote($pattern, '#'), array('\*' => '.*', '\?' => '.'))."$#i", $string);
12031220 } // end
12041221
1205 -} // end if
\ No newline at end of file
 1222+} // end if
Index: trunk/extensions/AbuseFilter/tests/string.t
@@ -1 +1 @@
2 -"a\tb" = "a b" & "a\qb" = "aqb"
 2+"a\tb" = "a b" & "a\qb" = "a\qb"
Index: trunk/extensions/AbuseFilter/AbuseFilter.i18n.php
@@ -313,7 +313,7 @@
314314
315315 // Exceptions
316316 'abusefilter-exception-unexpectedatend' => 'Unexpected "$2" at character $1.',
317 - 'abusefilter-exception-expectednotfound' => 'Expected a $2 at character $1, not found.',
 317+ 'abusefilter-exception-expectednotfound' => 'Expected a $2 at character $1, not found (found $3 $4 instead).',
318318 'abusefilter-exception-unrecognisedkeyword' => 'Unrecognised keyword $2 at character $1.',
319319 'abusefilter-exception-unexpectedtoken' => 'Unexpected token "$3" (of type $2) at character $1.',
320320 'abusefilter-exception-unclosedstring' => 'Unclosed string starting at character $1.',

Status & tagging log