r48849 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r48848‎ | r48849 | r48850 >
Date:02:03, 26 March 2009
Author:werdna
Status:ok (Comments)
Tags:
Comment:
Add contains_any function, for searching a single haystack for multiple needles. Implemented with FSS with a fallback to a for loop, so it should be really fast.
Modified paths:
  • /trunk/extensions/AbuseFilter/AbuseFilter.class.php (modified) (history)
  • /trunk/extensions/AbuseFilter/AbuseFilter.i18n.php (modified) (history)
  • /trunk/extensions/AbuseFilter/AbuseFilter.parser.php (modified) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/AbuseFilter.parser.php
@@ -302,6 +302,7 @@
303303 'count' => 'funcCount',
304304 'rcount' => 'funcRCount',
305305 'ip_in_range' => 'funcIPInRange',
 306+ 'contains_any' => 'funcContainsAny',
306307 );
307308
308309 // Order is important. The punctuation-matching regex requires that
@@ -1140,6 +1141,38 @@
11411142 return new AFPData( AFPData::DString, $s );
11421143 }
11431144
 1145+ protected function funcContainsAny( $args ) {
 1146+ if (count( $args ) < 2 ) {
 1147+ throw new AFPException( "Not enough params to ".__METHOD__ );
 1148+ }
 1149+
 1150+ $s = array_shift( $args );
 1151+ $s = $s->toString();
 1152+
 1153+ $searchStrings = array();
 1154+
 1155+ foreach( $args as $arg ) {
 1156+ $searchStrings[] = $arg->toString();
 1157+ }
 1158+
 1159+ if ( function_exists( 'fss_prep_search' ) ) {
 1160+ $fss = fss_prep_search( $searchStrings );
 1161+ $result = fss_exec_search( $fss, $s );
 1162+
 1163+ $ok = is_array($result);
 1164+ } else {
 1165+ $ok = false;
 1166+ foreach( $searchStrings as $needle ) {
 1167+ if (in_string( $needle, $s ) ) {
 1168+ $ok = true;
 1169+ break;
 1170+ }
 1171+ }
 1172+ }
 1173+
 1174+ return new AFPData( AFPData::DBool, $ok );
 1175+ }
 1176+
11441177 protected function ccnorm( $s ) {
11451178 if (!class_exists( 'AntiSpoof' ) ) {
11461179 return $s;
Index: trunk/extensions/AbuseFilter/AbuseFilter.class.php
@@ -65,6 +65,7 @@
6666 'rmwhitespace(text)' => 'rmwhitespace',
6767 'rmspecials(text)' => 'rmspecials',
6868 'ip_in_range(ip, range)' => 'ip_in_range',
 69+ 'contains_any(haystack,needle1,needle2,needle3)' => 'contains-any',
6970 ),
7071 'vars' => array(
7172 'accountname' => 'accountname',
Index: trunk/extensions/AbuseFilter/AbuseFilter.i18n.php
@@ -246,6 +246,7 @@
247247 'abusefilter-edit-builder-funcs-rmwhitespace' => 'Remove whitespace',
248248 'abusefilter-edit-builder-funcs-rmspecials' => 'Remove special characters',
249249 'abusefilter-edit-builder-funcs-ip_in_range' => 'Is IP in range?',
 250+ 'abusefilter-edit-builder-funcs-contains-any' => 'Search string for multiple substrings',
250251 'abusefilter-edit-builder-group-vars' => 'Variables',
251252 'abusefilter-edit-builder-vars-accountname' => 'Account name (on account creation)',
252253 'abusefilter-edit-builder-vars-action' => 'Action',

Comments

#Comment by Tim Starling (talk | contribs)   05:46, 20 May 2009

The fastest way to do this in pure PHP is by creating a regex with the study modifier, "/needle1|needle2|needle3/S". I benchmarked all the ways of doing it while developing SpamBlacklist. Apparently it creates a tree structure similar to the one used by FSS. I would recommend changing this to use that method. BTW please don't mark your own code as "ok".

Status & tagging log