r49206 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r49205‎ | r49206 | r49207 >
Date:11:47, 5 April 2009
Author:vasilievvv
Status:ok (Comments)
Tags:
Comment:
Abuse filter:
* Introduce := operator for setting variables
* Throw an exception when user tries to override built-in variable
* Fix UTF-8 handling in fnmatch() fallback
* Copy three main abuse filters from enwiki to test suite
* Fix update.php integration
Modified paths:
  • /trunk/extensions/AbuseFilter/AbuseFilter.hooks.php (modified) (history)
  • /trunk/extensions/AbuseFilter/AbuseFilter.i18n.php (modified) (history)
  • /trunk/extensions/AbuseFilter/AbuseFilter.parser.php (modified) (history)
  • /trunk/extensions/AbuseFilter/tests/vars.r (added) (history)
  • /trunk/extensions/AbuseFilter/tests/vars.t (added) (history)
  • /trunk/extensions/AbuseFilter/tests/wptest1.r (added) (history)
  • /trunk/extensions/AbuseFilter/tests/wptest1.t (added) (history)
  • /trunk/extensions/AbuseFilter/tests/wptest2.r (added) (history)
  • /trunk/extensions/AbuseFilter/tests/wptest2.t (added) (history)
  • /trunk/extensions/AbuseFilter/tests/wptest3.r (added) (history)
  • /trunk/extensions/AbuseFilter/tests/wptest3.t (added) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/AbuseFilter.parser.php
@@ -268,6 +268,16 @@
269269 }
270270 }
271271
 272+class AFPParserState {
 273+ var $pos, $token, $lastInput;
 274+
 275+ public function __construct( $token, $pos ) {
 276+ $this->token = $token;
 277+ $this->pos = $pos;
 278+ $this->lastInput = AbuseFilterParser::$lastHandledToken;
 279+ }
 280+}
 281+
272282 class AFPException extends MWException {}
273283
274284 // Exceptions that we might conceivably want to report to ordinary users
@@ -325,6 +335,7 @@
326336 '**', '*', // Multiplication/exponentiation
327337 '/', '+', '-', '%', // Other arithmetic
328338 '&', '|', '^', // Logic
 339+ ':=', // Setting
329340 '?', ':', // Ternery
330341 '<=','<', // Less than
331342 '>=', '>', // Greater than
@@ -339,6 +350,8 @@
340351
341352 static $funcCache = array();
342353
 354+ static $lastHandledToken = array();
 355+
343356 public function __construct() {
344357 $this->resetState();
345358 }
@@ -391,7 +404,18 @@
392405 wfProfileOut( __METHOD__ );
393406 return $this->mCur = $token;
394407 }
395 -
 408+
 409+ // getState() and setState() function allows parser state to be rollbacked to several tokens back
 410+ protected function getState() {
 411+ return new AFPParserState( $this->mCur, $this->mPos );
 412+ }
 413+
 414+ protected function setState( AFPParserState $state ) {
 415+ $this->mCur = $state->token;
 416+ $this->mPos = $state->pos;
 417+ self::$lastHandledToken = $state->lastInput;
 418+ }
 419+
396420 protected function skipOverBraces() {
397421 if( !($this->mCur->type == AFPToken::TBrace && $this->mCur->value == '(') || !$this->mShortCircuit ) {
398422 return;
@@ -446,7 +470,7 @@
447471
448472 /** Handles unexpected characters after the expression */
449473 protected function doLevelEntry( &$result ) {
450 - $this->doLevelSet( $result );
 474+ $this->doLevelSemicolon( $result );
451475
452476 if( $this->mCur->type != AFPToken::TNone ) {
453477 throw new AFPUserVisibleException( 'unexpectedatend', $this->mCur->pos, array($this->mCur->type) );
@@ -454,14 +478,32 @@
455479 }
456480
457481 /** Handles multiple expressions */
458 - protected function doLevelSet( &$result ) {
 482+ protected function doLevelSemicolon( &$result ) {
459483 do {
460484 $this->move();
461 - $lastPos = $this->mCur->pos;
462 - $this->doLevelConditions( $result );
 485+ if( $this->mCur->type != AFPToken::TStatementSeparator )
 486+ $this->doLevelSet( $result );
463487 } while ($this->mCur->type == AFPToken::TStatementSeparator);
464488 }
465489
 490+ protected function doLevelSet( &$result ) {
 491+ if( $this->mCur->type == AFPToken::TID ) {
 492+ $varname = $this->mCur->value;
 493+ $prev = $this->getState();
 494+ $this->move();
 495+
 496+ if( $this->mCur->type == AFPToken::TOp && $this->mCur->value == ':=' ) {
 497+ $this->move();
 498+ $this->doLevelSet( $result );
 499+ $this->setUserVariable( $varname, $result );
 500+ return;
 501+ } else {
 502+ $this->setState( $prev );
 503+ }
 504+ }
 505+ $this->doLevelConditions( $result );
 506+ }
 507+
466508 protected function doLevelConditions( &$result ) {
467509 if( $this->mCur->type == AFPToken::TKeyword && $this->mCur->value == 'if' ) {
468510 $this->move();
@@ -708,7 +750,7 @@
709751 if( $this->mShortCircuit ) {
710752 $this->skipOverBraces();
711753 } else {
712 - $this->doLevelSet( $result );
 754+ $this->doLevelSemicolon( $result );
713755 }
714756 if( !($this->mCur->type == AFPToken::TBrace && $this->mCur->value == ')') )
715757 throw new AFPUserVisibleException( 'expectednotfound',
@@ -741,7 +783,7 @@
742784 $args = array();
743785 do {
744786 $r = new AFPData();
745 - $this->doLevelSet( $r );
 787+ $this->doLevelSemicolon( $r );
746788 $args[] = $r;
747789 } while( $this->mCur->type == AFPToken::TComma );
748790
@@ -825,6 +867,8 @@
826868 wfProfileOut( __METHOD__ );
827869 }
828870
 871+ /* End of levels */
 872+
829873 protected function getVarValue( $var ) {
830874 wfProfileIn( __METHOD__ );
831875 $var = strtolower($var);
@@ -842,21 +886,24 @@
843887 return $val;
844888 }
845889 }
846 -
847 - /* End of levels */
848 -
 890+
 891+ protected function setUserVariable( $name, $value ) {
 892+ $builderValues = AbuseFilter::getBuilderValues();
 893+ if( array_key_exists( $name, $builderValues['vars'] ) )
 894+ throw new AFPUserVisibleException( 'overridebuiltin', $this->mCur->pos, array( $name ) );
 895+ $this->mVars->setVar( $name, $value );
 896+ }
 897+
849898 static function nextToken( $code, $offset ) {
850899 $tok = '';
851900
852 - static $lastInput = array();
853 -
854901 // Check for infinite loops
855 - if ( $lastInput == array( $code, $offset ) ) {
 902+ if ( self::$lastHandledToken == array( $code, $offset ) ) {
856903 // Should never happen
857904 throw new AFPException( "Entered infinite loop. Offset $offset of $code" );
858905 }
859906
860 - $lastInput = array( $code, $offset );
 907+ self::$lastHandledToken = array( $code, $offset );
861908
862909 // Spaces
863910 $matches = array();
@@ -1345,7 +1392,7 @@
13461393 $varName = $args[0]->toString();
13471394 $value = $args[1];
13481395
1349 - $this->mVars->setVar( $varName, $value );
 1396+ $this->setUserVariable( $varName, $value );
13501397
13511398 return $value;
13521399 }
@@ -1395,7 +1442,7 @@
13961443 if(!function_exists('fnmatch')) {
13971444
13981445 function fnmatch($pattern, $string) {
1399 - return preg_match("#^".strtr(preg_quote($pattern, '#'), array('\*' => '.*', '\?' => '.'))."$#i", $string);
 1446+ return preg_match("#^".strtr(preg_quote($pattern, '#'), array('\*' => '.*', '\?' => '.'))."$#iu", $string);
14001447 } // end
14011448
14021449 } // end if
Index: trunk/extensions/AbuseFilter/tests/wptest1.r
@@ -0,0 +1 @@
 2+MATCH
Index: trunk/extensions/AbuseFilter/tests/wptest2.r
@@ -0,0 +1 @@
 2+MATCH
Index: trunk/extensions/AbuseFilter/tests/wptest3.r
@@ -0,0 +1 @@
 2+MATCH
Index: trunk/extensions/AbuseFilter/tests/wptest1.t
@@ -0,0 +1,9 @@
 2+/* Filter 30 from English Wikipedia (large deletion from article by new editors) */
 3+user_groups_test := "*";
 4+new_size_test := 100;
 5+article_namespace_test := 0;
 6+edit_delta_test := -5000;
 7+added_lines_test := '';
 8+
 9+!("autoconfirmed" in user_groups_test) & (new_size_test > 50) & (article_namespace_test == 0) &
 10+ (edit_delta_test < -2000) & !("#redirect" in lcase(added_lines_test))
Index: trunk/extensions/AbuseFilter/tests/wptest2.t
@@ -0,0 +1,21 @@
 2+/* Filter 61 from English Wikipedia (new user removing references) */
 3+user_groups_test := "*";
 4+new_size_test := 100;
 5+article_namespace_test := 0;
 6+edit_delta_test := -22;
 7+added_lines_test := '<ref name="bah">test</ref> test2!';
 8+removed_lines_test := '<ref name="bah">test</ref><ref name="wah">test2</ref>';
 9+
 10+!("autoconfirmed" in user_groups_test)
 11+/* this edit_delta ignores large blankings that are treated by another filter */
 12+& edit_delta_test >= -1000
 13+& article_namespace_test == 0
 14+/* No added lines usually mean a blanking which is dealt with by other filter */
 15+& length(added_lines_test) != 0
 16+& !("#redirect" in lcase(added_lines_test))
 17+/*Counts of more reference tags are removed than added */
 18+& (rcount("(<ref>|<ref\sname|</ref>)",removed_lines_test) > rcount("(<ref>|<ref\sname|</ref>)",added_lines_test))
 19+/*Excludes changing to the named reference format and removing closing tags attached to formerly named refs. Unequality is to account for closing the first named tag */
 20+& !(rcount("<ref>",removed_lines_test) = rcount("<ref\sname",added_lines_test) | rcount("</ref>",removed_lines_test) <= rcount("<ref\sname",added_lines_test))
 21+/*Excludes removal of references to Wikipedia itself */
 22+& !(count("http://en.wikipedia.org",removed_lines_test) > count("http://en.wikipedia.org",added_lines_test))
Index: trunk/extensions/AbuseFilter/tests/wptest3.t
@@ -0,0 +1,28 @@
 2+/* Filter 18 from English Wikipedia (test type edits from clicking on edit bar) */
 3+user_groups_test := "*";
 4+article_namespace_test := 0;
 5+added_lines_test := "Hello world! '''Bold text''' [http://www.example.com link title]";
 6+
 7+(article_namespace_test == 0) &
 8+!("autoconfirmed" in user_groups_test) &
 9+(contains_any(added_lines_test,
 10+ "'''Bold text'''",
 11+ "''Italic text''",
 12+ "[[Link title]]",
 13+ "[http://www.example.com link title]",
 14+ "== Headline text ==",
 15+ "[[File:Example.jpg]]",
 16+ "[[Media:Example.ogg]]",
 17+ "<math>Insert formula here</math>",
 18+ "<nowiki>Insert non-formatted text here</nowiki>",
 19+ "#REDIRECT [[Target page name]]",
 20+ "<s>Strike-through text</s>",
 21+ "<sup>Superscript text</sup>",
 22+ "<sub>Subscript text</sub>",
 23+ "<small>Small Text</small>",
 24+ "<!-- Comment -->",
 25+ "Image:Example.jpg|Caption",
 26+ "<ref>Insert footnote text here</ref>",
 27+ "Ǣ ǣ ǖ ǘ ǚ ǜ Ă"
 28+))
 29+
Index: trunk/extensions/AbuseFilter/tests/vars.r
@@ -0,0 +1 @@
 2+MATCH
Index: trunk/extensions/AbuseFilter/tests/vars.t
@@ -0,0 +1,5 @@
 2+/* Variables test */
 3+test_var1 := test_var2 := "aa";
 4+set( 'ResulT', set_var( 'TV3', "bb" ) );
 5+
 6+str_replace( test_var1, test_var2, tv3 ) == result;
\ No newline at end of file
Index: trunk/extensions/AbuseFilter/AbuseFilter.i18n.php
@@ -337,6 +337,7 @@
338338 'abusefilter-exception-notenoughargs' => 'Not enough arguments to function $2 called at character $1.
339339 Expected $3 {{PLURAL:$3|argument|arguments}}, got $4',
340340 'abusefilter-exception-regexfailure' => 'Error in regular expression "$3" at character $1: "$2"',
 341+ 'abusefilter-exception-overridebuiltin' => 'Illegal overriding of built-in variable "$2" at character $1.',
341342
342343 // Actions
343344 'abusefilter-action-throttle' => 'Throttle',
Index: trunk/extensions/AbuseFilter/AbuseFilter.hooks.php
@@ -157,14 +157,12 @@
158158
159159 // DB updates
160160 if( $wgDBtype == 'mysql' ) {
161 - $wgExtNewTables = array_merge( $wgExtNewTables,
162 - array(
163 - array( 'abuse_filter', "$dir/abusefilter.tables.sql" ),
164 - array( 'abuse_filter_history', "$dir/db_patches/patch-abuse_filter_history.sql" ),
165 - array( 'abuse_filter_history', 'afh_changed_fields', "$dir/db_patches/patch-afh_changed_fields.sql" ),
166 - array( 'abuse_filter', 'af_deleted', "$dir/db_patches/patch-af_deleted.sql" ),
167 - array( 'abuse_filter', 'af_actions', "$dir/db_patches/patch-af_actions.sql" ),
168 - ) );
 161+ $wgExtNewTables[] = array( 'abuse_filter', "$dir/abusefilter.tables.sql" );
 162+ $wgExtNewTables[] = array( 'abuse_filter_history', "$dir/db_patches/patch-abuse_filter_history.sql" );
 163+ $wgExtNewFields[] = array( 'abuse_filter_history', 'afh_changed_fields', "$dir/db_patches/patch-afh_changed_fields.sql" );
 164+ $wgExtNewFields[] = array( 'abuse_filter', 'af_deleted', "$dir/db_patches/patch-af_deleted.sql" );
 165+ $wgExtNewFields[] = array( 'abuse_filter', 'af_actions', "$dir/db_patches/patch-af_actions.sql" );
 166+ $wgExtNewFields[] = array( 'abuse_filter', 'af_global', "$dir/db_patches/patch-global_filters.sql" );
169167 } else if ( $wgDBtype == 'postgres' ) {
170168 $wgExtNewTables = array_merge( $wgExtNewTables,
171169 array(

Comments

#Comment by Werdna (talk | contribs)   11:53, 5 April 2009

This looks okay to me. Thanks very much!

Status & tagging log