Index: branches/change-tagging/extensions/AbuseFilter/AbuseFilter.class.php |
— | — | @@ -149,134 +149,160 @@ |
150 | 150 | |
151 | 151 | return $result; |
152 | 152 | } |
153 | | - |
154 | | - public static function filterAction( $vars, $title ) { |
155 | | - global $wgUser,$wgMemc; |
156 | | - |
| 153 | + |
| 154 | + /** Returns an associative array of filters which were tripped */ |
| 155 | + public static function checkAllFilters( $vars ) { |
157 | 156 | // Fetch from the database. |
158 | 157 | $dbr = wfGetDB( DB_SLAVE ); |
159 | 158 | $res = $dbr->select( 'abuse_filter', '*', array( 'af_enabled' => 1, 'af_deleted' => 0 ) ); |
160 | | - |
161 | | - $blocking_filters = array(); |
162 | | - $log_entries = array(); |
163 | | - $log_template = array( 'afl_user' => $wgUser->getId(), 'afl_user_text' => $wgUser->getName(), |
164 | | - 'afl_var_dump' => serialize( $vars ), 'afl_timestamp' => $dbr->timestamp(wfTimestampNow()), |
165 | | - 'afl_namespace' => $title->getNamespace(), 'afl_title' => $title->getDBKey(), 'afl_ip' => wfGetIp() ); |
166 | | - $doneActionsByFilter = array(); |
167 | | - $filter_matched = array(); |
168 | | - |
| 159 | + |
169 | 160 | while ( $row = $dbr->fetchObject( $res ) ) { |
| 161 | + // Store the row somewhere convenient |
170 | 162 | self::$filters[$row->af_id] = $row; |
| 163 | + |
| 164 | + // Check conditions... |
171 | 165 | $pattern = trim($row->af_pattern); |
172 | 166 | if ( self::checkConditions( $pattern, $vars ) ) { |
173 | | - $blocking_filters[$row->af_id] = $row; |
174 | | - |
175 | | - $newLog = $log_template; |
176 | | - $newLog['afl_filter'] = $row->af_id; |
177 | | - $newLog['afl_action'] = $vars['ACTION']; |
178 | | - |
179 | | - $log_entries[] = $newLog; |
180 | | - |
181 | | - $doneActionsByFilter[$row->af_id] = array(); |
| 167 | + // Record match. |
182 | 168 | $filter_matched[$row->af_id] = true; |
183 | 169 | } else { |
| 170 | + // Record non-match. |
184 | 171 | $filter_matched[$row->af_id] = false; |
185 | 172 | } |
186 | 173 | } |
187 | | - |
188 | | - //// Clean up from checking all the filters |
189 | | - |
| 174 | + |
| 175 | + // Update statistics, and disable filters which are over-blocking. |
190 | 176 | self::recordStats( $filter_matched ); |
191 | | - |
192 | | - if (count($blocking_filters) == 0 ) { |
193 | | - // No problems. |
194 | | - return true; |
195 | | - } |
196 | | - |
| 177 | + |
| 178 | + return $filter_matched; |
| 179 | + } |
| 180 | + |
| 181 | + /** Returns an array [ list of actions taken by filter, error message to display, if any ] */ |
| 182 | + public static function executeFilterActions( $filters, $title, $vars ) { |
| 183 | + $dbr = wfGetDB( DB_SLAVE ); |
197 | 184 | // Retrieve the consequences. |
198 | | - $res = $dbr->select( 'abuse_filter_action', '*', array( 'afa_filter' => array_keys( $blocking_filters ) ), __METHOD__, array( "ORDER BY" => " (afa_consequence in ('throttle','warn'))-(afa_consequence in ('disallow')) desc" ) ); |
199 | | - // We want throttles, warnings first, as they have a bit of a special treatment. We want disallow last, so that it can be "eaten" by other actions. |
200 | | - |
201 | | - $actions_done = array(); |
202 | | - $throttled_filters = array(); |
203 | | - |
204 | | - $display = ''; |
205 | | - |
| 185 | + $res = $dbr->select( 'abuse_filter_action', '*', array( 'afa_filter' => $filters ), __METHOD__ ); |
| 186 | + |
| 187 | + $actionsByFilter = array_fill_keys( $filters, array() ); |
| 188 | + $actionsTaken = array_fill_keys( $filters, array() ); |
| 189 | + |
| 190 | + // Categorise consequences by filter. |
206 | 191 | while ( $row = $dbr->fetchObject( $res ) ) { |
207 | | - // Don't do the same action-parameters twice |
208 | | - $action_key = md5( $row->afa_consequence . $row->afa_parameters ); |
209 | | - |
210 | | - // Skip if we've already done this action-parameter, or a passive action has sufficed. |
211 | | - $skipAction = ( in_array( $action_key, $actions_done ) || in_array( $row->afa_filter, $throttled_filters ) ); |
212 | | - |
213 | | - // Don't disallow if we've already done something active. It produces two messages, where one would suffice. |
214 | | - if ($row->afa_consequence == 'disallow' && !$skipAction) { |
215 | | - $doneActiveActions = array_diff( $doneActionsByFilter[$row->afa_filter], array( 'throttle', 'warn' /* passive actions */ ) ); |
216 | | - $skipAction = (bool)count($doneActiveActions); |
| 192 | + $actionsByFilter[$row->afa_filter][$row->afa_consequence] = array( 'action' => $row->afa_consequence, 'parameters' => explode( "\n", $row->afa_parameters ) ); |
| 193 | + } |
| 194 | + |
| 195 | + wfLoadExtensionMessages( 'AbuseFilter' ); |
| 196 | + |
| 197 | + $messages = array(); |
| 198 | + |
| 199 | + foreach( $actionsByFilter as $filter => $actions ) { |
| 200 | + // Special-case handling for warnings. |
| 201 | + if ( !empty( $actions['warn'] ) ) { |
| 202 | + $parameters = $actions['warn']['parameters']; |
| 203 | + $warnKey = 'abusefilter-warned-'.$title->getPrefixedText(); |
| 204 | + if (!isset($_SESSION[$warnKey]) || !$_SESSION[$warnKey]) { |
| 205 | + $_SESSION[$warnKey] = true; |
| 206 | + |
| 207 | + // Threaten them a little bit |
| 208 | + $msg = ( !empty($parameters[0]) && strlen($parameters[0]) ) ? $parameters[0] : 'abusefilter-warning'; |
| 209 | + $messages[] = wfMsgNoTrans( $msg, self::$filters[$filter]->af_public_comments ) . "<br />\n"; |
| 210 | + |
| 211 | + $actionsTaken[] = 'warn'; |
| 212 | + |
| 213 | + continue; // Don't do anything else. |
| 214 | + } else { |
| 215 | + // We already warned them |
| 216 | + $_SESSION[$warnKey] = false; |
| 217 | + } |
| 218 | + |
| 219 | + unset( $actions['warn'] ); |
217 | 220 | } |
218 | | - |
219 | | - if ( !$skipAction ) { |
220 | | - // Unpack parameters |
221 | | - $parameters = explode( "\n", $row->afa_parameters ); |
222 | | - |
223 | | - // Take the action. |
224 | | - $result = self::takeConsequenceAction( $row->afa_consequence, $parameters, $title, $vars, $display, $continue, $blocking_filters[$row->afa_filter]->af_public_comments ); |
225 | | - |
226 | | - // Don't do it twice. |
227 | | - $doneActionsByFilter[$row->afa_filter][] = $row->afa_consequence; |
228 | | - $actions_done[] = $action_key; |
229 | | - |
230 | | - // Only execute other actions for a filter if that filter's rate limiter has been tripped. |
231 | | - if (!$result) { |
232 | | - $throttled_filters[] = $row->afa_filter; |
| 221 | + |
| 222 | + if ( !empty( $actions['throttle'] ) ) { |
| 223 | + $parameters = $actions['throttle']['parameters']; |
| 224 | + $throttleId = array_shift( $parameters ); |
| 225 | + list( $rateCount, $ratePeriod ) = explode( ',', array_shift( $parameters ) ); |
| 226 | + |
| 227 | + $hitThrottle = false; |
| 228 | + |
| 229 | + // The rest are throttle-types. |
| 230 | + foreach( $parameters as $throttleType ) { |
| 231 | + $hitThrottle = $hitThrottle || self::isThrottled( $throttleId, $throttleType, $title, $rateCount, $ratePeriod ); |
233 | 232 | } |
234 | | - } else { |
235 | | - // Ignore it, until we hit the rate limit. |
| 233 | + |
| 234 | + unset( $actions['throttle'] ); |
| 235 | + if (!$hitThrottle) { |
| 236 | + $actionsTaken[] = 'throttle'; |
| 237 | + continue; |
| 238 | + } |
236 | 239 | } |
| 240 | + |
| 241 | + // Do the rest of the actions |
| 242 | + foreach( $actions as $action => $info ) { |
| 243 | + $newMsg = self::takeConsequenceAction( $action, $info['parameters'], $title, $vars, self::$filters[$filter]->af_public_comments ); |
| 244 | + |
| 245 | + if ($newMsg) |
| 246 | + $messages[] = $newMsg; |
| 247 | + $actionsTaken[$filter][] = $action; |
| 248 | + } |
237 | 249 | } |
| 250 | + |
| 251 | + return array( $actionsTaken, implode( "\n", $messages ) ); |
| 252 | + } |
| 253 | + |
| 254 | + public static function filterAction( $vars, $title ) { |
| 255 | + global $wgUser,$wgMemc; |
| 256 | + |
| 257 | + $dbr = wfGetDB( DB_SLAVE ); |
| 258 | + |
| 259 | + $filter_matched = self::checkAllFilters( $vars ); |
| 260 | + |
| 261 | + // Short-cut any remaining code if no filters were hit. |
| 262 | + if ( count( array_filter( $filter_matched) ) == 0 ) { |
| 263 | + die( var_dump( $filter_matched ) ); |
| 264 | + return true; |
| 265 | + } |
| 266 | + |
| 267 | + list( $actions_taken, $error_msg ) = self::executeFilterActions( array_keys( array_filter( $filter_matched ) ), $title, $vars ); |
| 268 | + |
| 269 | + // Create a template |
| 270 | + $log_template = array( 'afl_user' => $wgUser->getId(), 'afl_user_text' => $wgUser->getName(), |
| 271 | + 'afl_var_dump' => serialize( $vars ), 'afl_timestamp' => $dbr->timestamp(wfTimestampNow()), |
| 272 | + 'afl_namespace' => $title->getNamespace(), 'afl_title' => $title->getDBKey(), 'afl_ip' => wfGetIp() ); |
| 273 | + |
| 274 | + self::addLogEntries( $actions_taken, $log_template, $vars['ACTION'] ); |
238 | 275 | |
| 276 | + return $error_msg; |
| 277 | + } |
| 278 | + |
| 279 | + public static function addLogEntries( $actions_taken, $log_template, $action ) { |
239 | 280 | $dbw = wfGetDB( DB_MASTER ); |
240 | | - |
241 | | - // Log it |
242 | | - foreach( $log_entries as $index => $entry ) { |
243 | | - $actions = $log_entries[$index]['afl_actions'] = implode( ',', $doneActionsByFilter[$entry['afl_filter']] ); |
244 | | - |
245 | | - if ($actions == 'throttle') { |
246 | | - // Just a throttle, don't record it. |
247 | | - unset($log_entries[$index]); |
| 281 | + |
| 282 | + $log_rows = array(); |
| 283 | + |
| 284 | + foreach( $actions_taken as $filter => $actions ) { |
| 285 | + $thisLog = $log_template; |
| 286 | + $thisLog['afl_filter'] = $filter; |
| 287 | + $thisLog['afl_action'] = $action; |
| 288 | + $thisLog['afl_actions'] = implode( ',', $actions_taken[$filter] ); |
| 289 | + |
| 290 | + // Don't log if we were only throttling. |
| 291 | + if ($thisLog['afl_actions'] != 'throttle') { |
| 292 | + $log_rows[] = $thisLog; |
248 | 293 | } |
249 | | - |
250 | | - // Increment the hit counter |
251 | | - $dbw->update( 'abuse_filter', array( 'af_hit_count=af_hit_count+1' ), array( 'af_id' => $entry['afl_filter'] ), __METHOD__ ); |
252 | 294 | } |
253 | | - |
254 | | - if (!count($log_entries)) { |
| 295 | + |
| 296 | + if (!count($log_rows)) { |
255 | 297 | return; |
256 | 298 | } |
257 | | - |
258 | | - $dbw->insert( 'abuse_filter_log', $log_entries, __METHOD__ ); |
259 | | - |
260 | | - return $display; |
| 299 | + |
| 300 | + $dbw->insert( 'abuse_filter_log', $log_rows, __METHOD__ ); |
261 | 301 | } |
262 | 302 | |
263 | | - public static function takeConsequenceAction( $action, $parameters, $title, $vars, &$display, &$continue, $rule_desc ) { |
| 303 | + public static function takeConsequenceAction( $action, $parameters, $title, $vars, $rule_desc ) { |
264 | 304 | wfLoadExtensionMessages( 'AbuseFilter' ); |
| 305 | + $display = ''; |
265 | 306 | switch ($action) { |
266 | | - case 'warn': |
267 | | - if (!isset($_SESSION['abusefilter-warned']) || !$_SESSION['abusefilter-warned']) { |
268 | | - $_SESSION['abusefilter-warned'] = true; |
269 | | - |
270 | | - // Threaten them a little bit |
271 | | - $msg = ( !empty($parameters[0]) && strlen($parameters[0]) ) ? $parameters[0] : 'abusefilter-warning'; |
272 | | - $display .= wfMsgNoTrans( $msg, $rule_desc ) . "<br />\n"; |
273 | | - |
274 | | - return false; // Don't apply the other stuff yet. |
275 | | - } else { |
276 | | - // We already warned them |
277 | | - $_SESSION['abusefilter-warned'] = false; |
278 | | - } |
279 | | - break; |
280 | | - |
281 | 307 | case 'disallow': |
282 | 308 | if (strlen($parameters[0])) { |
283 | 309 | $display .= wfMsgNoTrans( $parameters[0], $rule_desc ) . "\n"; |
— | — | @@ -352,20 +378,6 @@ |
353 | 379 | |
354 | 380 | $display .= wfMsgNoTrans( 'abusefilter-blocked-display', $rule_desc ) ."<br />\n"; |
355 | 381 | break; |
356 | | - |
357 | | - case 'throttle': |
358 | | - $throttleId = array_shift( $parameters ); |
359 | | - list( $rateCount, $ratePeriod ) = explode( ',', array_shift( $parameters ) ); |
360 | | - |
361 | | - $hitThrottle = false; |
362 | | - |
363 | | - // The rest are throttle-types. |
364 | | - foreach( $parameters as $throttleType ) { |
365 | | - $hitThrottle = $hitThrottle || self::isThrottled( $throttleId, $throttleType, $title, $rateCount, $ratePeriod ); |
366 | | - } |
367 | | - |
368 | | - return $hitThrottle; |
369 | | - break; |
370 | 382 | case 'degroup': |
371 | 383 | global $wgUser; |
372 | 384 | if (!$wgUser->isAnon()) { |
— | — | @@ -418,7 +430,7 @@ |
419 | 431 | break; |
420 | 432 | } |
421 | 433 | |
422 | | - return true; |
| 434 | + return $display; |
423 | 435 | } |
424 | 436 | |
425 | 437 | public static function isThrottled( $throttleId, $types, $title, $rateCount, $ratePeriod ) { |
— | — | @@ -496,82 +508,81 @@ |
497 | 509 | global $wgAbuseFilterConditionLimit,$wgMemc; |
498 | 510 | |
499 | 511 | $blocking_filters = array_keys( array_filter( $filters ) ); |
500 | | - |
| 512 | + |
| 513 | + // Figure out if we've triggered overflows and blocks. |
501 | 514 | $overflow_triggered = (self::$condCount > $wgAbuseFilterConditionLimit); |
502 | | - $filter_triggered = count($blocking_filters); |
503 | | - |
| 515 | + $filter_triggered = count( $blocking_filters ) > 0; |
| 516 | + |
| 517 | + // Store some keys... |
504 | 518 | $overflow_key = self::filterLimitReachedKey(); |
| 519 | + $total_key = self::filterUsedKey(); |
505 | 520 | |
506 | | - $total_key = self::filterUsedKey(); |
507 | 521 | $total = $wgMemc->get( $total_key ); |
508 | 522 | |
509 | | - $storage_period = self::$statsStoragePeriod; // One day. |
| 523 | + $storage_period = self::$statsStoragePeriod; |
510 | 524 | |
511 | 525 | if (!$total || $total > 1000) { |
512 | | - $wgMemc->set( $total_key, 1, $storage_period ); |
513 | | - |
514 | | - if ($overflow_triggered) { |
515 | | - $wgMemc->set( $overflow_key, 1, $storage_period ); |
516 | | - } else { |
517 | | - $wgMemc->set( $overflow_key, 0, $storage_period ); |
518 | | - } |
519 | | - |
520 | | - $anyMatch = false; |
521 | | - |
| 526 | + // This is for if the total doesn't exist, or has gone past 1000. |
| 527 | + // Recreate all the keys at the same time, so they expire together. |
| 528 | + $wgMemc->set( $total_key, 0, $storage_period ); |
| 529 | + $wgMemc->set( $overflow_key, 0, $storage_period ); |
| 530 | + |
522 | 531 | foreach( $filters as $filter => $matched ) { |
523 | | - $filter_key = self::filterMatchesKey( $filter ); |
524 | | - if ($matched) { |
525 | | - $anyMatch = true; |
526 | | - $wgMemc->set( $filter_key, 1, $storage_period ); |
527 | | - } else { |
528 | | - $wgMemc->set( $filter_key, 0, $storage_period ); |
529 | | - } |
| 532 | + $wgMemc->set( self::filterMatchesKey( $filter ), 0, $storage_period ); |
530 | 533 | } |
| 534 | + $wgMemc->set( self::filterMatchesKey(), 0, $storage_period ); |
531 | 535 | |
532 | | - if ($anyMatch) { |
533 | | - $wgMemc->set( self::filterMatchesKey(), 1, $storage_period ); |
534 | | - } else { |
535 | | - $wgMemc->set( self::filterMatchesKey(), 0, $storage_period ); |
536 | | - } |
537 | | - |
538 | 536 | return; |
539 | 537 | } |
540 | | - |
| 538 | + |
| 539 | + // Increment total |
541 | 540 | $wgMemc->incr( $total_key ); |
542 | | - |
| 541 | + |
| 542 | + // Increment overflow counter, if our condition limit overflowed |
543 | 543 | if ($overflow_triggered) { |
544 | 544 | $wgMemc->incr( $overflow_key ); |
545 | 545 | } |
546 | | - |
547 | | - $anyMatch = false; |
548 | | - |
549 | | - global $wgAbuseFilterEmergencyDisableThreshold, $wgAbuseFilterEmergencyDisableCount, $wgAbuseFilterEmergencyDisableAge; |
550 | | - |
| 546 | + |
| 547 | + self::checkEmergencyDisable( $filters ); |
| 548 | + |
| 549 | + // Increment trigger counter |
| 550 | + if ($filter_triggered) { |
| 551 | + $wgMemc->incr( self::filterMatchesKey() ); |
| 552 | + } |
| 553 | + |
| 554 | + $dbw = wfGetDB( DB_MASTER ); |
| 555 | + |
| 556 | + // Update hit-counter. |
| 557 | + $dbw->update( 'abuse_filter', array( 'af_hit_count=af_hit_count+1' ), array( 'af_id' => array_keys( array_filter( $filters ) ) ), __METHOD__ ); |
| 558 | + } |
| 559 | + |
| 560 | + public static function checkEmergencyDisable( $filters ) { |
| 561 | + global $wgAbuseFilterEmergencyDisableThreshold, $wgAbuseFilterEmergencyDisableCount, $wgAbuseFilterEmergencyDisableAge, $wgMemc; |
| 562 | + |
551 | 563 | foreach( $filters as $filter => $matched ) { |
552 | 564 | if ($matched) { |
553 | | - $anyMatch = true; |
554 | | - $match_count = $wgMemc->get( self::filterMatchesKey( $filter ) ); |
555 | | - |
556 | | - if ($match_count > 0) { |
| 565 | + // Increment counter |
| 566 | + $matchCount = $wgMemc->get( self::filterMatchesKey( $filter ) ); |
| 567 | + |
| 568 | + // Handle missing keys... |
| 569 | + if (!$matchCount) { |
| 570 | + $wgMemc->set( self::filterMatchesKey( $filter ), 1, self::$statsStoragePeriod ); |
| 571 | + } else { |
557 | 572 | $wgMemc->incr( self::filterMatchesKey( $filter ) ); |
558 | | - } else { |
559 | | - $wgMemc->set( self::filterMatchesKey( $filter ), 1, self::$statsStoragePeriod ); |
560 | 573 | } |
561 | | - |
| 574 | + $matchCount++; |
| 575 | + |
| 576 | + // Figure out if the filter is subject to being deleted. |
562 | 577 | $filter_age = wfTimestamp( TS_UNIX, self::$filters[$filter]->af_timestamp ); |
563 | 578 | $throttle_exempt_time = $filter_age + $wgAbuseFilterEmergencyDisableAge; |
564 | | - |
565 | | - if ($throttle_exempt_time > time() && $match_count > $wgAbuseFilterEmergencyDisableCount && ($match_count / $total) > $wgAbuseFilterEmergencyDisableThreshold) { |
566 | | - // More than X matches, constituting more than Y% of last Z edits. Disable it. |
| 579 | + |
| 580 | + if ($throttle_exempt_time > time() && $matchCount > $wgAbuseFilterEmergencyDisableCount && ($matchCount / $total) > $wgAbuseFilterEmergencyDisableThreshold) { |
| 581 | + // More than $wgAbuseFilterEmergencyDisableCount matches, constituting more than $wgAbuseFilterEmergencyDisableThreshold (a fraction) of last few edits. Disable it. |
567 | 582 | $dbw = wfGetDB( DB_MASTER ); |
568 | 583 | $dbw->update( 'abuse_filter', array( 'af_enabled' => 0, 'af_throttled' => 1 ), array( 'af_id' => $filter ), __METHOD__ ); |
569 | 584 | } |
570 | 585 | } |
571 | 586 | } |
572 | | - |
573 | | - if ($anyMatch) { |
574 | | - $wgMemc->incr( self::filterMatchesKey() ); |
575 | | - } |
576 | 587 | } |
577 | 588 | |
578 | 589 | public static function filterLimitReachedKey() { |