r46040 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r46039‎ | r46040 | r46041 >
Date:23:11, 22 January 2009
Author:werdna
Status:deferred
Tags:
Comment:
MAJOR refactoring of AbuseFilter class, including fixing a few bugs which I don't remember right now.

Mostly, a general cleanup of some code that was written in a really ugly way, and splitting of huge methods into five or six functions to make a much more logical flow.
Modified paths:
  • /branches/change-tagging/extensions/AbuseFilter/AbuseFilter.class.php (modified) (history)

Diff [purge]

Index: branches/change-tagging/extensions/AbuseFilter/AbuseFilter.class.php
@@ -149,134 +149,160 @@
150150
151151 return $result;
152152 }
153 -
154 - public static function filterAction( $vars, $title ) {
155 - global $wgUser,$wgMemc;
156 -
 153+
 154+ /** Returns an associative array of filters which were tripped */
 155+ public static function checkAllFilters( $vars ) {
157156 // Fetch from the database.
158157 $dbr = wfGetDB( DB_SLAVE );
159158 $res = $dbr->select( 'abuse_filter', '*', array( 'af_enabled' => 1, 'af_deleted' => 0 ) );
160 -
161 - $blocking_filters = array();
162 - $log_entries = array();
163 - $log_template = array( 'afl_user' => $wgUser->getId(), 'afl_user_text' => $wgUser->getName(),
164 - 'afl_var_dump' => serialize( $vars ), 'afl_timestamp' => $dbr->timestamp(wfTimestampNow()),
165 - 'afl_namespace' => $title->getNamespace(), 'afl_title' => $title->getDBKey(), 'afl_ip' => wfGetIp() );
166 - $doneActionsByFilter = array();
167 - $filter_matched = array();
168 -
 159+
169160 while ( $row = $dbr->fetchObject( $res ) ) {
 161+ // Store the row somewhere convenient
170162 self::$filters[$row->af_id] = $row;
 163+
 164+ // Check conditions...
171165 $pattern = trim($row->af_pattern);
172166 if ( self::checkConditions( $pattern, $vars ) ) {
173 - $blocking_filters[$row->af_id] = $row;
174 -
175 - $newLog = $log_template;
176 - $newLog['afl_filter'] = $row->af_id;
177 - $newLog['afl_action'] = $vars['ACTION'];
178 -
179 - $log_entries[] = $newLog;
180 -
181 - $doneActionsByFilter[$row->af_id] = array();
 167+ // Record match.
182168 $filter_matched[$row->af_id] = true;
183169 } else {
 170+ // Record non-match.
184171 $filter_matched[$row->af_id] = false;
185172 }
186173 }
187 -
188 - //// Clean up from checking all the filters
189 -
 174+
 175+ // Update statistics, and disable filters which are over-blocking.
190176 self::recordStats( $filter_matched );
191 -
192 - if (count($blocking_filters) == 0 ) {
193 - // No problems.
194 - return true;
195 - }
196 -
 177+
 178+ return $filter_matched;
 179+ }
 180+
 181+ /** Returns an array [ list of actions taken by filter, error message to display, if any ] */
 182+ public static function executeFilterActions( $filters, $title, $vars ) {
 183+ $dbr = wfGetDB( DB_SLAVE );
197184 // Retrieve the consequences.
198 - $res = $dbr->select( 'abuse_filter_action', '*', array( 'afa_filter' => array_keys( $blocking_filters ) ), __METHOD__, array( "ORDER BY" => " (afa_consequence in ('throttle','warn'))-(afa_consequence in ('disallow')) desc" ) );
199 - // We want throttles, warnings first, as they have a bit of a special treatment. We want disallow last, so that it can be "eaten" by other actions.
200 -
201 - $actions_done = array();
202 - $throttled_filters = array();
203 -
204 - $display = '';
205 -
 185+ $res = $dbr->select( 'abuse_filter_action', '*', array( 'afa_filter' => $filters ), __METHOD__ );
 186+
 187+ $actionsByFilter = array_fill_keys( $filters, array() );
 188+ $actionsTaken = array_fill_keys( $filters, array() );
 189+
 190+ // Categorise consequences by filter.
206191 while ( $row = $dbr->fetchObject( $res ) ) {
207 - // Don't do the same action-parameters twice
208 - $action_key = md5( $row->afa_consequence . $row->afa_parameters );
209 -
210 - // Skip if we've already done this action-parameter, or a passive action has sufficed.
211 - $skipAction = ( in_array( $action_key, $actions_done ) || in_array( $row->afa_filter, $throttled_filters ) );
212 -
213 - // Don't disallow if we've already done something active. It produces two messages, where one would suffice.
214 - if ($row->afa_consequence == 'disallow' && !$skipAction) {
215 - $doneActiveActions = array_diff( $doneActionsByFilter[$row->afa_filter], array( 'throttle', 'warn' /* passive actions */ ) );
216 - $skipAction = (bool)count($doneActiveActions);
 192+ $actionsByFilter[$row->afa_filter][$row->afa_consequence] = array( 'action' => $row->afa_consequence, 'parameters' => explode( "\n", $row->afa_parameters ) );
 193+ }
 194+
 195+ wfLoadExtensionMessages( 'AbuseFilter' );
 196+
 197+ $messages = array();
 198+
 199+ foreach( $actionsByFilter as $filter => $actions ) {
 200+ // Special-case handling for warnings.
 201+ if ( !empty( $actions['warn'] ) ) {
 202+ $parameters = $actions['warn']['parameters'];
 203+ $warnKey = 'abusefilter-warned-'.$title->getPrefixedText();
 204+ if (!isset($_SESSION[$warnKey]) || !$_SESSION[$warnKey]) {
 205+ $_SESSION[$warnKey] = true;
 206+
 207+ // Threaten them a little bit
 208+ $msg = ( !empty($parameters[0]) && strlen($parameters[0]) ) ? $parameters[0] : 'abusefilter-warning';
 209+ $messages[] = wfMsgNoTrans( $msg, self::$filters[$filter]->af_public_comments ) . "<br />\n";
 210+
 211+ $actionsTaken[] = 'warn';
 212+
 213+ continue; // Don't do anything else.
 214+ } else {
 215+ // We already warned them
 216+ $_SESSION[$warnKey] = false;
 217+ }
 218+
 219+ unset( $actions['warn'] );
217220 }
218 -
219 - if ( !$skipAction ) {
220 - // Unpack parameters
221 - $parameters = explode( "\n", $row->afa_parameters );
222 -
223 - // Take the action.
224 - $result = self::takeConsequenceAction( $row->afa_consequence, $parameters, $title, $vars, $display, $continue, $blocking_filters[$row->afa_filter]->af_public_comments );
225 -
226 - // Don't do it twice.
227 - $doneActionsByFilter[$row->afa_filter][] = $row->afa_consequence;
228 - $actions_done[] = $action_key;
229 -
230 - // Only execute other actions for a filter if that filter's rate limiter has been tripped.
231 - if (!$result) {
232 - $throttled_filters[] = $row->afa_filter;
 221+
 222+ if ( !empty( $actions['throttle'] ) ) {
 223+ $parameters = $actions['throttle']['parameters'];
 224+ $throttleId = array_shift( $parameters );
 225+ list( $rateCount, $ratePeriod ) = explode( ',', array_shift( $parameters ) );
 226+
 227+ $hitThrottle = false;
 228+
 229+ // The rest are throttle-types.
 230+ foreach( $parameters as $throttleType ) {
 231+ $hitThrottle = $hitThrottle || self::isThrottled( $throttleId, $throttleType, $title, $rateCount, $ratePeriod );
233232 }
234 - } else {
235 - // Ignore it, until we hit the rate limit.
 233+
 234+ unset( $actions['throttle'] );
 235+ if (!$hitThrottle) {
 236+ $actionsTaken[] = 'throttle';
 237+ continue;
 238+ }
236239 }
 240+
 241+ // Do the rest of the actions
 242+ foreach( $actions as $action => $info ) {
 243+ $newMsg = self::takeConsequenceAction( $action, $info['parameters'], $title, $vars, self::$filters[$filter]->af_public_comments );
 244+
 245+ if ($newMsg)
 246+ $messages[] = $newMsg;
 247+ $actionsTaken[$filter][] = $action;
 248+ }
237249 }
 250+
 251+ return array( $actionsTaken, implode( "\n", $messages ) );
 252+ }
 253+
 254+ public static function filterAction( $vars, $title ) {
 255+ global $wgUser,$wgMemc;
 256+
 257+ $dbr = wfGetDB( DB_SLAVE );
 258+
 259+ $filter_matched = self::checkAllFilters( $vars );
 260+
 261+ // Short-cut any remaining code if no filters were hit.
 262+ if ( count( array_filter( $filter_matched) ) == 0 ) {
 263+ die( var_dump( $filter_matched ) );
 264+ return true;
 265+ }
 266+
 267+ list( $actions_taken, $error_msg ) = self::executeFilterActions( array_keys( array_filter( $filter_matched ) ), $title, $vars );
 268+
 269+ // Create a template
 270+ $log_template = array( 'afl_user' => $wgUser->getId(), 'afl_user_text' => $wgUser->getName(),
 271+ 'afl_var_dump' => serialize( $vars ), 'afl_timestamp' => $dbr->timestamp(wfTimestampNow()),
 272+ 'afl_namespace' => $title->getNamespace(), 'afl_title' => $title->getDBKey(), 'afl_ip' => wfGetIp() );
 273+
 274+ self::addLogEntries( $actions_taken, $log_template, $vars['ACTION'] );
238275
 276+ return $error_msg;
 277+ }
 278+
 279+ public static function addLogEntries( $actions_taken, $log_template, $action ) {
239280 $dbw = wfGetDB( DB_MASTER );
240 -
241 - // Log it
242 - foreach( $log_entries as $index => $entry ) {
243 - $actions = $log_entries[$index]['afl_actions'] = implode( ',', $doneActionsByFilter[$entry['afl_filter']] );
244 -
245 - if ($actions == 'throttle') {
246 - // Just a throttle, don't record it.
247 - unset($log_entries[$index]);
 281+
 282+ $log_rows = array();
 283+
 284+ foreach( $actions_taken as $filter => $actions ) {
 285+ $thisLog = $log_template;
 286+ $thisLog['afl_filter'] = $filter;
 287+ $thisLog['afl_action'] = $action;
 288+ $thisLog['afl_actions'] = implode( ',', $actions_taken[$filter] );
 289+
 290+ // Don't log if we were only throttling.
 291+ if ($thisLog['afl_actions'] != 'throttle') {
 292+ $log_rows[] = $thisLog;
248293 }
249 -
250 - // Increment the hit counter
251 - $dbw->update( 'abuse_filter', array( 'af_hit_count=af_hit_count+1' ), array( 'af_id' => $entry['afl_filter'] ), __METHOD__ );
252294 }
253 -
254 - if (!count($log_entries)) {
 295+
 296+ if (!count($log_rows)) {
255297 return;
256298 }
257 -
258 - $dbw->insert( 'abuse_filter_log', $log_entries, __METHOD__ );
259 -
260 - return $display;
 299+
 300+ $dbw->insert( 'abuse_filter_log', $log_rows, __METHOD__ );
261301 }
262302
263 - public static function takeConsequenceAction( $action, $parameters, $title, $vars, &$display, &$continue, $rule_desc ) {
 303+ public static function takeConsequenceAction( $action, $parameters, $title, $vars, $rule_desc ) {
264304 wfLoadExtensionMessages( 'AbuseFilter' );
 305+ $display = '';
265306 switch ($action) {
266 - case 'warn':
267 - if (!isset($_SESSION['abusefilter-warned']) || !$_SESSION['abusefilter-warned']) {
268 - $_SESSION['abusefilter-warned'] = true;
269 -
270 - // Threaten them a little bit
271 - $msg = ( !empty($parameters[0]) && strlen($parameters[0]) ) ? $parameters[0] : 'abusefilter-warning';
272 - $display .= wfMsgNoTrans( $msg, $rule_desc ) . "<br />\n";
273 -
274 - return false; // Don't apply the other stuff yet.
275 - } else {
276 - // We already warned them
277 - $_SESSION['abusefilter-warned'] = false;
278 - }
279 - break;
280 -
281307 case 'disallow':
282308 if (strlen($parameters[0])) {
283309 $display .= wfMsgNoTrans( $parameters[0], $rule_desc ) . "\n";
@@ -352,20 +378,6 @@
353379
354380 $display .= wfMsgNoTrans( 'abusefilter-blocked-display', $rule_desc ) ."<br />\n";
355381 break;
356 -
357 - case 'throttle':
358 - $throttleId = array_shift( $parameters );
359 - list( $rateCount, $ratePeriod ) = explode( ',', array_shift( $parameters ) );
360 -
361 - $hitThrottle = false;
362 -
363 - // The rest are throttle-types.
364 - foreach( $parameters as $throttleType ) {
365 - $hitThrottle = $hitThrottle || self::isThrottled( $throttleId, $throttleType, $title, $rateCount, $ratePeriod );
366 - }
367 -
368 - return $hitThrottle;
369 - break;
370382 case 'degroup':
371383 global $wgUser;
372384 if (!$wgUser->isAnon()) {
@@ -418,7 +430,7 @@
419431 break;
420432 }
421433
422 - return true;
 434+ return $display;
423435 }
424436
425437 public static function isThrottled( $throttleId, $types, $title, $rateCount, $ratePeriod ) {
@@ -496,82 +508,81 @@
497509 global $wgAbuseFilterConditionLimit,$wgMemc;
498510
499511 $blocking_filters = array_keys( array_filter( $filters ) );
500 -
 512+
 513+ // Figure out if we've triggered overflows and blocks.
501514 $overflow_triggered = (self::$condCount > $wgAbuseFilterConditionLimit);
502 - $filter_triggered = count($blocking_filters);
503 -
 515+ $filter_triggered = count( $blocking_filters ) > 0;
 516+
 517+ // Store some keys...
504518 $overflow_key = self::filterLimitReachedKey();
 519+ $total_key = self::filterUsedKey();
505520
506 - $total_key = self::filterUsedKey();
507521 $total = $wgMemc->get( $total_key );
508522
509 - $storage_period = self::$statsStoragePeriod; // One day.
 523+ $storage_period = self::$statsStoragePeriod;
510524
511525 if (!$total || $total > 1000) {
512 - $wgMemc->set( $total_key, 1, $storage_period );
513 -
514 - if ($overflow_triggered) {
515 - $wgMemc->set( $overflow_key, 1, $storage_period );
516 - } else {
517 - $wgMemc->set( $overflow_key, 0, $storage_period );
518 - }
519 -
520 - $anyMatch = false;
521 -
 526+ // This is for if the total doesn't exist, or has gone past 1000.
 527+ // Recreate all the keys at the same time, so they expire together.
 528+ $wgMemc->set( $total_key, 0, $storage_period );
 529+ $wgMemc->set( $overflow_key, 0, $storage_period );
 530+
522531 foreach( $filters as $filter => $matched ) {
523 - $filter_key = self::filterMatchesKey( $filter );
524 - if ($matched) {
525 - $anyMatch = true;
526 - $wgMemc->set( $filter_key, 1, $storage_period );
527 - } else {
528 - $wgMemc->set( $filter_key, 0, $storage_period );
529 - }
 532+ $wgMemc->set( self::filterMatchesKey( $filter ), 0, $storage_period );
530533 }
 534+ $wgMemc->set( self::filterMatchesKey(), 0, $storage_period );
531535
532 - if ($anyMatch) {
533 - $wgMemc->set( self::filterMatchesKey(), 1, $storage_period );
534 - } else {
535 - $wgMemc->set( self::filterMatchesKey(), 0, $storage_period );
536 - }
537 -
538536 return;
539537 }
540 -
 538+
 539+ // Increment total
541540 $wgMemc->incr( $total_key );
542 -
 541+
 542+ // Increment overflow counter, if our condition limit overflowed
543543 if ($overflow_triggered) {
544544 $wgMemc->incr( $overflow_key );
545545 }
546 -
547 - $anyMatch = false;
548 -
549 - global $wgAbuseFilterEmergencyDisableThreshold, $wgAbuseFilterEmergencyDisableCount, $wgAbuseFilterEmergencyDisableAge;
550 -
 546+
 547+ self::checkEmergencyDisable( $filters );
 548+
 549+ // Increment trigger counter
 550+ if ($filter_triggered) {
 551+ $wgMemc->incr( self::filterMatchesKey() );
 552+ }
 553+
 554+ $dbw = wfGetDB( DB_MASTER );
 555+
 556+ // Update hit-counter.
 557+ $dbw->update( 'abuse_filter', array( 'af_hit_count=af_hit_count+1' ), array( 'af_id' => array_keys( array_filter( $filters ) ) ), __METHOD__ );
 558+ }
 559+
 560+ public static function checkEmergencyDisable( $filters ) {
 561+ global $wgAbuseFilterEmergencyDisableThreshold, $wgAbuseFilterEmergencyDisableCount, $wgAbuseFilterEmergencyDisableAge, $wgMemc;
 562+
551563 foreach( $filters as $filter => $matched ) {
552564 if ($matched) {
553 - $anyMatch = true;
554 - $match_count = $wgMemc->get( self::filterMatchesKey( $filter ) );
555 -
556 - if ($match_count > 0) {
 565+ // Increment counter
 566+ $matchCount = $wgMemc->get( self::filterMatchesKey( $filter ) );
 567+
 568+ // Handle missing keys...
 569+ if (!$matchCount) {
 570+ $wgMemc->set( self::filterMatchesKey( $filter ), 1, self::$statsStoragePeriod );
 571+ } else {
557572 $wgMemc->incr( self::filterMatchesKey( $filter ) );
558 - } else {
559 - $wgMemc->set( self::filterMatchesKey( $filter ), 1, self::$statsStoragePeriod );
560573 }
561 -
 574+ $matchCount++;
 575+
 576+ // Figure out if the filter is subject to being deleted.
562577 $filter_age = wfTimestamp( TS_UNIX, self::$filters[$filter]->af_timestamp );
563578 $throttle_exempt_time = $filter_age + $wgAbuseFilterEmergencyDisableAge;
564 -
565 - if ($throttle_exempt_time > time() && $match_count > $wgAbuseFilterEmergencyDisableCount && ($match_count / $total) > $wgAbuseFilterEmergencyDisableThreshold) {
566 - // More than X matches, constituting more than Y% of last Z edits. Disable it.
 579+
 580+ if ($throttle_exempt_time > time() && $matchCount > $wgAbuseFilterEmergencyDisableCount && ($matchCount / $total) > $wgAbuseFilterEmergencyDisableThreshold) {
 581+ // More than $wgAbuseFilterEmergencyDisableCount matches, constituting more than $wgAbuseFilterEmergencyDisableThreshold (a fraction) of last few edits. Disable it.
567582 $dbw = wfGetDB( DB_MASTER );
568583 $dbw->update( 'abuse_filter', array( 'af_enabled' => 0, 'af_throttled' => 1 ), array( 'af_id' => $filter ), __METHOD__ );
569584 }
570585 }
571586 }
572 -
573 - if ($anyMatch) {
574 - $wgMemc->incr( self::filterMatchesKey() );
575 - }
576587 }
577588
578589 public static function filterLimitReachedKey() {

Status & tagging log