r49218 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r49217‎ | r49218 | r49219 >
Date:17:11, 5 April 2009
Author:vasilievvv
Status:ok
Tags:
Comment:
Introduce list (non-associated array) support into abuse filter parser.
Modified paths:
  • /trunk/extensions/AbuseFilter/AbuseFilter.i18n.php (modified) (history)
  • /trunk/extensions/AbuseFilter/AbuseFilter.parser.php (modified) (history)
  • /trunk/extensions/AbuseFilter/tests/arrays.r (added) (history)
  • /trunk/extensions/AbuseFilter/tests/arrays.t (added) (history)
  • /trunk/extensions/AbuseFilter/tests/wptest1.t (modified) (history)
  • /trunk/extensions/AbuseFilter/tests/wptest2.t (modified) (history)
  • /trunk/extensions/AbuseFilter/tests/wptest3.t (modified) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/AbuseFilter.parser.php
@@ -14,8 +14,13 @@
1515 * T_STRING - string, in "" or ''
1616 * T_KEYWORD - keyword
1717 * T_ID - identifier
 18+* T_STATEMENT_SEPARATOR - ;
 19+* T_SQUARE_BRACKETS - [ or ]
1820
1921 Levels of parsing:
 22+* Entry - catches unexpected characters
 23+* Semicolon - ;
 24+* Set - :=
2025 * Conditionls (IF) - if-then-else-end, cond ? a :b
2126 * BoolOps (BO) - &, |, ^
2227 * CompOps (CO) - ==, !=, ===, !==, >, <, >=, <=
@@ -25,6 +30,7 @@
2631 * BoolNeg (BN) - ! operation
2732 * SpecialOperators (SO) - in and like
2833 * Unarys (U) - plus and minus in cases like -5 or -(2 * +2)
 34+* ListElement (LE) - list[number]
2935 * Braces (B) - ( and )
3036 * Functions (F)
3137 * Atom (A) - return value
@@ -40,6 +46,7 @@
4147 const TFloat = 'T_FLOAT';
4248 const TOp = 'T_OP';
4349 const TBrace = 'T_BRACE';
 50+ const TSquareBracket = 'T_SQUARE_BRACKET';
4451 const TComma = 'T_COMMA';
4552 const TStatementSeparator = 'T_STATEMENT_SEPARATOR';
4653
@@ -61,6 +68,7 @@
6269 const DNull = 'null';
6370 const DBool = 'bool';
6471 const DFloat = 'float';
 72+ const DList = 'list';
6573
6674 var $type;
6775 var $data;
@@ -79,6 +87,12 @@
8088 return new AFPData( self::DFloat, $var );
8189 elseif( is_bool( $var ) )
8290 return new AFPData( self::DBool, $var );
 91+ elseif( is_array( $var ) ) {
 92+ $result = array();
 93+ foreach( $var as $item )
 94+ $result[] = self::newFromPHPVar( $item );
 95+ return new AFPData( self::DList, $result );
 96+ }
8397 elseif( is_null( $var ) )
8498 return new AFPData();
8599 else
@@ -96,6 +110,24 @@
97111 if( $target == self::DNull ) {
98112 return new AFPData();
99113 }
 114+
 115+ if( $orig->type == self::DList ) {
 116+ if( $target == self::DBool )
 117+ return new AFPData( self::DBool, (bool)count( $orig->data ) );
 118+ if( $target == self::DFloat ) {
 119+ return new AFPData( self::DFloat, doubleval( count( $orig->data ) ) );
 120+ }
 121+ if( $target == self::DInt ) {
 122+ return new AFPData( self::DInt, intval( count( $orig->data ) ) );
 123+ }
 124+ if( $target == self::DString ) {
 125+ $lines = array();
 126+ foreach( $orig->data as $item )
 127+ $lines[] = $item->toString();
 128+ return new AFPData( self::DString, implode( "\n", $lines ) );
 129+ }
 130+ }
 131+
100132 if( $target == self::DBool ) {
101133 return new AFPData( self::DBool, (bool)$orig->data );
102134 }
@@ -108,6 +140,9 @@
109141 if( $target == self::DString ) {
110142 return new AFPData( self::DString, strval( $orig->data ) );
111143 }
 144+ if( $target == self::DList ) {
 145+ return new AFPData( self::DList, array( $orig ) );
 146+ }
112147 }
113148
114149 public static function boolInvert( $value ) {
@@ -119,6 +154,8 @@
120155 }
121156
122157 public static function keywordIn( $a, $b ) {
 158+ if( $b->type == self::DList )
 159+ return new AFPData( self::DBool, self::listContains( $a, $b ) );
123160 $a = $a->toString();
124161 $b = $b->toString();
125162
@@ -130,6 +167,8 @@
131168 }
132169
133170 public static function keywordContains( $a, $b ) {
 171+ if( $a->type == self::DList )
 172+ return new AFPData( self::DBool, self::listContains( $b, $a ) );
134173 $a = $a->toString();
135174 $b = $b->toString();
136175
@@ -140,6 +179,20 @@
141180 return new AFPData( self::DBool, in_string( $b, $a ) );
142181 }
143182
 183+ public static function listContains( $value, $list ) {
 184+ // Should use built-in PHP function somehow
 185+ foreach( $list->data as $item ) {
 186+ if( self::equals( $value, $item ) )
 187+ return true;
 188+ }
 189+ return false;
 190+ }
 191+
 192+ public static function equals( $d1, $d2 ) {
 193+ return $d1->type != self::DList && $d2->type != self::DList &&
 194+ $d1->toString() === $d2->toString();
 195+ }
 196+
144197 public static function keywordLike( $str, $pattern ) {
145198 $str = $str->toString();
146199 $pattern = $pattern->toString();
@@ -184,13 +237,13 @@
185238
186239 public static function compareOp( $a, $b, $op ) {
187240 if( $op == '==' || $op == '=' )
188 - return new AFPData( self::DBool, $a->toString() === $b->toString() );
 241+ return new AFPData( self::DBool, self::equals( $a, $b ) );
189242 if( $op == '!=' )
190 - return new AFPData( self::DBool, $a->toString() !== $b->toString() );
 243+ return new AFPData( self::DBool, !self::equals( $a, $b ) );
191244 if( $op == '===' )
192 - return new AFPData( self::DBool, $a->data == $b->data && $a->type == $b->type );
 245+ return new AFPData( self::DBool, $a->type == $b->type && self::equals( $a, $b ) );
193246 if( $op == '!==' )
194 - return new AFPData( self::DBool, $a->data !== $b->data || $a->type != $b->type );
 247+ return new AFPData( self::DBool, $a->type != $b->type || !self::equals( $a, $b ) );
195248 $a = $a->toString();
196249 $b = $b->toString();
197250 if( $op == '>' )
@@ -241,7 +294,9 @@
242295
243296 public static function sum( $a, $b ) {
244297 if( $a->type == self::DString || $b->type == self::DString )
245 - return new AFPData( self::DFloat, $a->toString() . $b->toString() );
 298+ return new AFPData( self::DString, $a->toString() . $b->toString() );
 299+ elseif( $a->type == self::DList && $b->type == self::DList )
 300+ return new AFPData( self::DList, array_merge( $a->toList(), $b->toList() ) );
246301 else
247302 return new AFPData( self::DFloat, $a->toFloat() + $b->toFloat() );
248303 }
@@ -266,6 +321,10 @@
267322 public function toInt() {
268323 return self::castTypes( $this, self::DInt )->data;
269324 }
 325+
 326+ public function toList() {
 327+ return self::castTypes( $this, self::DList )->data;
 328+ }
270329 }
271330
272331 class AFPParserState {
@@ -497,6 +556,45 @@
498557 $this->doLevelSet( $result );
499558 $this->setUserVariable( $varname, $result );
500559 return;
 560+ } elseif( $this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == '[' ) {
 561+ if( !$this->mVars->varIsSet( $varname ) ) {
 562+ throw new AFPUserVisibleException( 'unrecognisedvar',
 563+ $this->mCur->pos,
 564+ array( $var ) );
 565+ }
 566+ $list = $this->mVars->getVar( $varname );
 567+ if( $list->type != AFPData::DList )
 568+ throw new AFPUserVisibleException( 'notlist', $this->mCur->pos, array() );
 569+ $list = $list->toList();
 570+ $this->move();
 571+ if( $this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == ']' ) {
 572+ $idx = 'new';
 573+ } else {
 574+ $this->setState( $prev ); $this->move();
 575+ $idx = new AFPData();
 576+ $this->doLevelSemicolon( $idx );
 577+ $idx = $idx->toInt();
 578+ if( !($this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == ']') )
 579+ throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos,
 580+ array(']', $this->mCur->type, $this->mCur->value ) );
 581+ if( count( $list ) <= $idx ) {
 582+ throw new AFPUserVisibleException( 'outofbounds', $this->mCur->pos,
 583+ array( $idx, count( $result->data ) ) );
 584+ }
 585+ }
 586+ $this->move();
 587+ if( $this->mCur->type == AFPToken::TOp && $this->mCur->value == ':=' ) {
 588+ $this->move();
 589+ $this->doLevelSet( $result );
 590+ if( $idx == 'new' )
 591+ $list[] = $result;
 592+ else
 593+ $list[$idx] = $result;
 594+ $this->setUserVariable( $varname, new AFPData( AFPData::DList, $list ) );
 595+ return;
 596+ } else {
 597+ $this->setState( $prev );
 598+ }
501599 } else {
502600 $this->setState( $prev );
503601 }
@@ -734,17 +832,40 @@
735833 $op = $this->mCur->value;
736834 if( $this->mCur->type == AFPToken::TOp && ( $op == "+" || $op == "-" ) ) {
737835 $this->move();
738 - $this->doLevelBraces( $result );
 836+ $this->doLevelListElements( $result );
739837 wfProfileIn( __METHOD__ );
740838 if( $op == '-' ) {
741839 $result = AFPData::unaryMinus( $result );
742840 }
743841 wfProfileOut( __METHOD__ );
744842 } else {
745 - $this->doLevelBraces( $result );
 843+ $this->doLevelListElements( $result );
746844 }
747845 }
748 -
 846+
 847+ protected function doLevelListElements( &$result ) {
 848+ $this->doLevelBraces( $result );
 849+ while( $this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == '[' ) {
 850+ $idx = new AFPData();
 851+ $this->doLevelSemicolon( $idx );
 852+ if( !($this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == ']') ) {
 853+ throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos,
 854+ array(']', $this->mCur->type, $this->mCur->value ) );
 855+ }
 856+ $idx = $idx->toInt();
 857+ if( $result->type == AFPData::DList ) {
 858+ if( count( $result->data ) <= $idx ) {
 859+ throw new AFPUserVisibleException( 'outofbounds', $this->mCur->pos,
 860+ array( $idx, count( $result->data ) ) );
 861+ }
 862+ $result = $result->data[$idx];
 863+ } else {
 864+ throw new AFPUserVisibleException( 'notlist', $this->mCur->pos, array() );
 865+ }
 866+ $this->move();
 867+ }
 868+ }
 869+
749870 protected function doLevelBraces( &$result ) {
750871 if( $this->mCur->type == AFPToken::TBrace && $this->mCur->value == '(' ) {
751872 if( $this->mShortCircuit ) {
@@ -850,11 +971,31 @@
851972 $this->mCur->pos,
852973 array($tok) );
853974 break;
854 - case AFPToken::TBrace:
855 - if( $this->mCur->value == ')' )
856 - return; // Handled at the entry level
857975 case AFPToken::TNone:
858976 return; // Handled at entry level
 977+ case AFPToken::TBrace:
 978+ if( $this->mCur->value == ')' )
 979+ return; // Handled at the entry level
 980+ case AFPToken::TSquareBracket:
 981+ if( $this->mCur->value == '[' ) {
 982+ $list = array();
 983+ for(;;) {
 984+ $this->move();
 985+ if( $this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == ']' )
 986+ break;
 987+ $item = new AFPData();
 988+ $this->doLevelSet( $item );
 989+ $list[] = $item;
 990+ if( $this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == ']' )
 991+ break;
 992+ if( $this->mCur->type != AFPToken::TComma )
 993+ throw new AFPUserVisibleException( 'expectednotfound',
 994+ $this->mCur->pos,
 995+ array(', or ]', $this->mCur->type, $this->mCur->value ) );
 996+ }
 997+ $result = new AFPData( AFPData::DList, $list );
 998+ break;
 999+ }
8591000 default:
8601001 throw new AFPUserVisibleException( 'unexpectedtoken',
8611002 $this->mCur->pos,
@@ -930,6 +1071,11 @@
9311072 return array( $code[$offset], AFPToken::TBrace, $code, $offset + 1 );
9321073 }
9331074
 1075+ // Square brackets
 1076+ if( $code[$offset] == '[' or $code[$offset] == ']' ) {
 1077+ return array( $code[$offset], AFPToken::TSquareBracket, $code, $offset + 1 );
 1078+ }
 1079+
9341080 // Semicolons
9351081 if ($code[$offset] == ';') {
9361082 return array( ';', AFPToken::TStatementSeparator, $code, $offset + 1 );
@@ -1112,6 +1258,9 @@
11131259 if( count( $args ) < 1 )
11141260 throw new AFPUserVisibleException( 'notenoughargs', $this->mCur->pos,
11151261 array( 'len', 2, count($args) ) );
 1262+ if( $args[0]->type == AFPData::DList ) {
 1263+ return new AFPData( AFPData::DInt, count( $args[0]->data ) );
 1264+ }
11161265 $s = $args[0]->toString();
11171266 return new AFPData( AFPData::DInt, mb_strlen( $s, 'utf-8' ) );
11181267 }
@@ -1148,9 +1297,21 @@
11491298 if( count( $args ) < 1 )
11501299 throw new AFPUserVisibleException( 'notenoughargs', $this->mCur->pos,
11511300 array( 'count', 1, count($args) ) );
1152 -
 1301+
 1302+ if( $args[0]->type == AFPData::DList && count( $args ) == 1 ) {
 1303+ return new AFPData( AFPData::DInt, count( $args[0]->data ) );
 1304+ } elseif( count( $args ) > 1 && $args[1]->type == AFPData::DList ) {
 1305+ $needle = $args[0];
 1306+ $haystack = $args[1]->toList();
 1307+ $count = 0;
 1308+ foreach( $haystack as $item )
 1309+ if( AFPData::equals( $needle, $item ))
 1310+ $count++;
 1311+ return new AFPData( AFPData::DInt, $count );
 1312+ }
 1313+
11531314 $offset = -1;
1154 -
 1315+
11551316 if (count($args) == 1) {
11561317 $count = count( explode( ",", $args[0]->toString() ) );
11571318 } else {
@@ -1224,7 +1385,15 @@
12251386 throw new AFPUserVisibleException( 'notenoughargs', $this->mCur->pos,
12261387 array( 'contains_any', 2, count($args) ) );
12271388 }
1228 -
 1389+
 1390+ if( $args[0]->type == AFPData::DList ) {
 1391+ $list = array_shift( $args );
 1392+ foreach( $args as $arg )
 1393+ if( AFPData::listContains( $arg, $list ) )
 1394+ return new AFPData( AFPData::DBool, true );
 1395+ return new AFPData( AFPData::DBool, false );
 1396+ }
 1397+
12291398 $s = array_shift( $args );
12301399 $s = $s->toString();
12311400
@@ -1402,7 +1571,7 @@
14031572 throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, array(__METHOD__) );
14041573 $val = $args[0];
14051574
1406 - return new AFPData( AFPData::DString, $val->data );
 1575+ return AFPData::castTypes( $val, AFPData::DString );
14071576 }
14081577
14091578 protected function castInt( $args ) {
@@ -1410,7 +1579,7 @@
14111580 throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, array(__METHOD__) );
14121581 $val = $args[0];
14131582
1414 - return new AFPData( AFPData::DInt, intval($val->data) );
 1583+ return AFPData::castTypes( $val, AFPData::DInt );
14151584 }
14161585
14171586 protected function castFloat( $args ) {
@@ -1418,7 +1587,7 @@
14191588 throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, array(__METHOD__) );
14201589 $val = $args[0];
14211590
1422 - return new AFPData( AFPData::DFloat, doubleval($val->data) );
 1591+ return AFPData::castTypes( $val, AFPData::DFloat );
14231592 }
14241593
14251594 protected function castBool( $args ) {
@@ -1426,7 +1595,7 @@
14271596 throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, array(__METHOD__) );
14281597 $val = $args[0];
14291598
1430 - return new AFPData( AFPData::DBool, (bool)($val->data) );
 1599+ return AFPData::castTypes( $val, AFPData::DBool );
14311600 }
14321601
14331602 public static function regexErrorHandler( $errno, $errstr, $errfile, $errline, $context ) {
Index: trunk/extensions/AbuseFilter/tests/wptest1.t
@@ -1,5 +1,5 @@
22 /* Filter 30 from English Wikipedia (large deletion from article by new editors) */
3 -user_groups_test := "*";
 3+user_groups_test := ["*"];
44 new_size_test := 100;
55 article_namespace_test := 0;
66 edit_delta_test := -5000;
Index: trunk/extensions/AbuseFilter/tests/wptest2.t
@@ -1,5 +1,5 @@
22 /* Filter 61 from English Wikipedia (new user removing references) */
3 -user_groups_test := "*";
 3+user_groups_test := ["*"];
44 new_size_test := 100;
55 article_namespace_test := 0;
66 edit_delta_test := -22;
Index: trunk/extensions/AbuseFilter/tests/wptest3.t
@@ -1,5 +1,5 @@
22 /* Filter 18 from English Wikipedia (test type edits from clicking on edit bar) */
3 -user_groups_test := "*";
 3+user_groups_test := ["*"];
44 article_namespace_test := 0;
55 added_lines_test := "Hello world! '''Bold text''' [http://www.example.com link title]";
66
Index: trunk/extensions/AbuseFilter/tests/arrays.r
@@ -0,0 +1 @@
 2+MATCH
Index: trunk/extensions/AbuseFilter/tests/arrays.t
@@ -0,0 +1,12 @@
 2+array1 := [ 'a', 'b', 'c', ];
 3+array2 := [];
 4+array2[] := 'd';
 5+array2[] := 'g';
 6+array2[] := 'f';
 7+array2[1] := 'e';
 8+
 9+array3 := array1 + array2;
 10+array4 := [ [ 1, 2, 3 ], [ 4, 5, 6 ] ];
 11+
 12+(string(array3) == "a\nb\nc\nd\ne\nf" & !('b' in array2) & array1 contains 'c' & [ false, !(1;0), null ][1] & length(array3) == 6 &
 13+ array4[1][1] == 5 )
\ No newline at end of file
Index: trunk/extensions/AbuseFilter/AbuseFilter.i18n.php
@@ -338,6 +338,8 @@
339339 Expected $3 {{PLURAL:$3|argument|arguments}}, got $4',
340340 'abusefilter-exception-regexfailure' => 'Error in regular expression "$3" at character $1: "$2"',
341341 'abusefilter-exception-overridebuiltin' => 'Illegal overriding of built-in variable "$2" at character $1.',
 342+ 'abusefilter-exception-outofbounds' => 'Requesting non-existent list item $2 (list size = 3) at character $1.',
 343+ 'abusefilter-exception-notlist' => 'Requesting array item of non-array at character $1.',
342344
343345 // Actions
344346 'abusefilter-action-throttle' => 'Throttle',

Status & tagging log