Index: trunk/extensions/SpamBlacklist/SpamBlacklist.php |
— | — | @@ -10,8 +10,8 @@ |
11 | 11 | $wgExtensionCredits['other'][] = array( |
12 | 12 | 'name' => 'SpamBlacklist', |
13 | 13 | 'author' => 'Tim Starling', |
14 | | - 'svn-date' => '$LastChangedDate$', |
15 | | - 'svn-revision' => '$LastChangedRevision$', |
| 14 | + 'svn-date' => '$LastChangedDate$', |
| 15 | + 'svn-revision' => '$LastChangedRevision$', |
16 | 16 | 'url' => 'http://www.mediawiki.org/wiki/Extension:SpamBlacklist', |
17 | 17 | 'description' => 'Regex-based anti-spam tool', |
18 | 18 | 'descriptionmsg' => 'spam-blacklist-desc', |
— | — | @@ -24,7 +24,6 @@ |
25 | 25 | global $wgSpamBlacklistFiles; |
26 | 26 | global $wgSpamBlacklistSettings; |
27 | 27 | |
28 | | - |
29 | 28 | $wgSpamBlacklistFiles = false; |
30 | 29 | $wgSpamBlacklistSettings = array(); |
31 | 30 | |
— | — | @@ -39,9 +38,9 @@ |
40 | 39 | $wgFilterCallback = 'wfSpamBlacklistFilter'; |
41 | 40 | } |
42 | 41 | |
43 | | - |
44 | 42 | $wgHooks['EditFilter'][] = 'wfSpamBlacklistValidate'; |
45 | 43 | $wgHooks['ArticleSaveComplete'][] = 'wfSpamBlacklistArticleSave'; |
| 44 | +$wgHooks['APIEditBeforeSave'][] = 'wfSpamBlacklistFilterAPIEditBeforeSave'; |
46 | 45 | |
47 | 46 | /** |
48 | 47 | * Internationalization messages |
— | — | @@ -74,21 +73,45 @@ |
75 | 74 | */ |
76 | 75 | function wfSpamBlacklistFilter( &$title, $text, $section, &$hookErr, $editSummary ) { |
77 | 76 | $spamObj = wfSpamBlacklistObject(); |
78 | | - return $spamObj->filter( $title, $text, $section, $editSummary ); |
| 77 | + $ret = $spamObj->filter( $title, $text, $section, $editSummary ); |
| 78 | + if ( $ret !== false ) EditPage::spamPage( $ret ); |
| 79 | + return ( $ret !== false ); |
79 | 80 | } |
80 | 81 | |
81 | 82 | /** |
82 | 83 | * Hook function for EditFilterMerged, replaces wfSpamBlacklistFilter |
83 | 84 | */ |
84 | 85 | function wfSpamBlacklistFilterMerged( &$editPage, $text, &$hookErr, $editSummary ) { |
| 86 | + global $wgTitle; |
| 87 | + if( is_null( $wgTitle ) ) { |
| 88 | + # API mode |
| 89 | + # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist |
| 90 | + return true; |
| 91 | + } |
| 92 | + |
85 | 93 | $spamObj = wfSpamBlacklistObject(); |
86 | 94 | $title = $editPage->mArticle->getTitle(); |
87 | 95 | $ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage ); |
| 96 | + if ( $ret !== false ) $editPage->spamPage( $ret ); |
88 | 97 | // Return convention for hooks is the inverse of $wgFilterCallback |
89 | | - return !$ret; |
| 98 | + return ( $ret === false ); |
90 | 99 | } |
91 | 100 | |
92 | 101 | /** |
| 102 | + * Hook function for APIEditBeforeSave |
| 103 | + */ |
| 104 | +function wfSpamBlacklistFilterAPIEditBeforeSave( &$editPage, $text, &$resultArr ) { |
| 105 | + $spamObj = wfSpamBlacklistObject(); |
| 106 | + $title = $editPage->mArticle->getTitle(); |
| 107 | + $ret = $spamObj->filter( $title, $text, '', '', $editPage ); |
| 108 | + if ( $ret!==false ) { |
| 109 | + $resultArr['spamblacklist'] = $ret; |
| 110 | + } |
| 111 | + // Return convention for hooks is the inverse of $wgFilterCallback |
| 112 | + return ( $ret === false ); |
| 113 | +} |
| 114 | + |
| 115 | +/** |
93 | 116 | * Hook function for EditFilter |
94 | 117 | * Confirm that a local blacklist page being saved is valid, |
95 | 118 | * and toss back a warning to the user if it isn't. |
Index: trunk/extensions/SpamBlacklist/SpamBlacklist_body.php |
— | — | @@ -25,7 +25,7 @@ |
26 | 26 | */ |
27 | 27 | function isLocalSource( $title ) { |
28 | 28 | global $wgDBname; |
29 | | - |
| 29 | + |
30 | 30 | if( $title->getNamespace() == NS_MEDIAWIKI ) { |
31 | 31 | $sources = array( |
32 | 32 | "Spam-blacklist", |
— | — | @@ -34,10 +34,10 @@ |
35 | 35 | return true; |
36 | 36 | } |
37 | 37 | } |
38 | | - |
| 38 | + |
39 | 39 | $thisHttp = $title->getFullUrl( 'action=raw' ); |
40 | 40 | $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/'; |
41 | | - |
| 41 | + |
42 | 42 | foreach( $this->files as $fileName ) { |
43 | 43 | if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) { |
44 | 44 | if ( $wgDBname == $matches[1] ) { |
— | — | @@ -52,17 +52,17 @@ |
53 | 53 | return true; |
54 | 54 | } |
55 | 55 | } |
56 | | - |
| 56 | + |
57 | 57 | return false; |
58 | 58 | } |
59 | | - |
| 59 | + |
60 | 60 | /** |
61 | 61 | * @deprecated back-compat |
62 | 62 | */ |
63 | 63 | function getRegexes() { |
64 | 64 | return $this->getBlacklists(); |
65 | 65 | } |
66 | | - |
| 66 | + |
67 | 67 | /** |
68 | 68 | * Fetch local and (possibly cached) remote blacklists. |
69 | 69 | * Will be cached locally across multiple invocations. |
— | — | @@ -76,7 +76,7 @@ |
77 | 77 | } |
78 | 78 | return $this->regexes; |
79 | 79 | } |
80 | | - |
| 80 | + |
81 | 81 | /** |
82 | 82 | * Fetch (possibly cached) remote blacklists. |
83 | 83 | * @return array |
— | — | @@ -103,19 +103,19 @@ |
104 | 104 | wfProfileOut( $fname ); |
105 | 105 | return $cachedRegexes; |
106 | 106 | } |
107 | | - |
| 107 | + |
108 | 108 | $regexes = $this->buildSharedBlacklists(); |
109 | 109 | $wgMemc->set( "$wgDBname:spam_blacklist_regexes", $regexes, $this->expiryTime ); |
110 | | - |
| 110 | + |
111 | 111 | return $regexes; |
112 | 112 | } |
113 | | - |
| 113 | + |
114 | 114 | function clearCache() { |
115 | 115 | global $wgMemc, $wgDBname; |
116 | 116 | $wgMemc->delete( "$wgDBname:spam_blacklist_regexes" ); |
117 | 117 | wfDebugLog( 'SpamBlacklist', "Spam blacklist local cache cleared.\n" ); |
118 | 118 | } |
119 | | - |
| 119 | + |
120 | 120 | function buildSharedBlacklists() { |
121 | 121 | $regexes = array(); |
122 | 122 | # Load lists |
— | — | @@ -129,7 +129,7 @@ |
130 | 130 | $text = file_get_contents( $fileName ); |
131 | 131 | wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" ); |
132 | 132 | } |
133 | | - |
| 133 | + |
134 | 134 | // Build a separate batch of regexes from each source. |
135 | 135 | // While in theory we could squeeze a little efficiency |
136 | 136 | // out of combining multiple sources in one regex, if |
— | — | @@ -138,20 +138,20 @@ |
139 | 139 | $regexes = array_merge( $regexes, |
140 | 140 | SpamRegexBatch::regexesFromText( $text, $fileName ) ); |
141 | 141 | } |
142 | | - |
| 142 | + |
143 | 143 | return $regexes; |
144 | 144 | } |
145 | | - |
| 145 | + |
146 | 146 | function getHttpText( $fileName ) { |
147 | 147 | global $wgDBname, $messageMemc; |
148 | | - |
| 148 | + |
149 | 149 | # HTTP request |
150 | 150 | # To keep requests to a minimum, we save results into $messageMemc, which is |
151 | 151 | # similar to $wgMemc except almost certain to exist. By default, it is stored |
152 | 152 | # in the database |
153 | 153 | # |
154 | 154 | # There are two keys, when the warning key expires, a random thread will refresh |
155 | | - # the real key. This reduces the chance of multiple requests under high traffic |
| 155 | + # the real key. This reduces the chance of multiple requests under high traffic |
156 | 156 | # conditions. |
157 | 157 | $key = "spam_blacklist_file:$fileName"; |
158 | 158 | $warningKey = "$wgDBname:spamfilewarning:$fileName"; |
— | — | @@ -171,11 +171,11 @@ |
172 | 172 | } |
173 | 173 | return $httpText; |
174 | 174 | } |
175 | | - |
| 175 | + |
176 | 176 | static function getLocalBlacklists() { |
177 | 177 | return SpamRegexBatch::regexesFromMessage( 'spam-blacklist' ); |
178 | 178 | } |
179 | | - |
| 179 | + |
180 | 180 | static function getWhitelists() { |
181 | 181 | return SpamRegexBatch::regexesFromMessage( 'spam-whitelist' ); |
182 | 182 | } |
— | — | @@ -186,8 +186,7 @@ |
187 | 187 | * @param string $section Section number or name |
188 | 188 | * @param EditSummary $editSummary Edit summary if one exists, some people use urls there too |
189 | 189 | * @param EditPage $editPage EditPage if EditFilterMerged was called, null otherwise |
190 | | - * @return True if the edit should not be allowed, false otherwise |
191 | | - * If the return value is true, an error will have been sent to $wgOut |
| 190 | + * @return Matched text if the edit should not be allowed, false otherwise |
192 | 191 | */ |
193 | 192 | function filter( &$title, $text, $section, $editsummary = '', EditPage &$editPage = null ) { |
194 | 193 | global $wgArticle, $wgVersion, $wgOut, $wgParser, $wgUser; |
— | — | @@ -226,14 +225,14 @@ |
227 | 226 | $newLinks = array_keys( $out->getExternalLinks() ); |
228 | 227 | $oldLinks = $this->getCurrentLinks( $title ); |
229 | 228 | $addedLinks = array_diff( $newLinks, $oldLinks ); |
230 | | - |
| 229 | + |
231 | 230 | // We add the edit summary if one exists |
232 | 231 | if ( !empty( $editsummary ) ) $addedLinks[] = $editsummary; |
233 | | - |
| 232 | + |
234 | 233 | wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', ', $oldLinks ) ); |
235 | 234 | wfDebugLog( 'SpamBlacklist', "New URLs: " . implode( ', ', $newLinks ) ); |
236 | 235 | wfDebugLog( 'SpamBlacklist', "Added URLs: " . implode( ', ', $addedLinks ) ); |
237 | | - |
| 236 | + |
238 | 237 | $links = implode( "\n", $addedLinks ); |
239 | 238 | |
240 | 239 | # Strip whitelisted URLs from the match |
— | — | @@ -263,12 +262,7 @@ |
264 | 263 | wfDebugLog( 'SpamBlacklist', "Match!\n" ); |
265 | 264 | $ip = wfGetIP(); |
266 | 265 | wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: {$matches[0]}\n" ); |
267 | | - if ( $editPage ) { |
268 | | - $editPage->spamPage( $matches[0] ); |
269 | | - } else { |
270 | | - EditPage::spamPage( $matches[0] ); |
271 | | - } |
272 | | - $retVal = true; |
| 266 | + $retVal = $matches[0]; |
273 | 267 | break; |
274 | 268 | } |
275 | 269 | } |
— | — | @@ -279,7 +273,7 @@ |
280 | 274 | wfProfileOut( $fname ); |
281 | 275 | return $retVal; |
282 | 276 | } |
283 | | - |
| 277 | + |
284 | 278 | /** |
285 | 279 | * Look up the links currently in the article, so we can |
286 | 280 | * ignore them on a second run. |
— | — | @@ -289,7 +283,7 @@ |
290 | 284 | function getCurrentLinks( $title ) { |
291 | 285 | $dbr =& wfGetDB( DB_SLAVE ); |
292 | 286 | $id = $title->getArticleId(); // should be zero queries |
293 | | - $res = $dbr->select( 'externallinks', array( 'el_to' ), |
| 287 | + $res = $dbr->select( 'externallinks', array( 'el_to' ), |
294 | 288 | array( 'el_from' => $id ), __METHOD__ ); |
295 | 289 | $links = array(); |
296 | 290 | while ( $row = $dbr->fetchObject( $res ) ) { |
— | — | @@ -440,7 +434,7 @@ |
441 | 435 | } |
442 | 436 | return $regexes; |
443 | 437 | } |
444 | | - |
| 438 | + |
445 | 439 | /** |
446 | 440 | * Confirm that a set of regexes is either empty or valid. |
447 | 441 | * @param array $lines set of regexes |
— | — | @@ -453,14 +447,14 @@ |
454 | 448 | wfSuppressWarnings(); |
455 | 449 | $ok = preg_match( $regex, '' ); |
456 | 450 | wfRestoreWarnings(); |
457 | | - |
| 451 | + |
458 | 452 | if( $ok === false ) { |
459 | 453 | return false; |
460 | 454 | } |
461 | 455 | } |
462 | 456 | return true; |
463 | 457 | } |
464 | | - |
| 458 | + |
465 | 459 | /** |
466 | 460 | * Strip comments and whitespace, then remove blanks |
467 | 461 | * @private |
— | — | @@ -472,7 +466,7 @@ |
473 | 467 | preg_replace( '/#.*$/', '', |
474 | 468 | $lines ) ) ); |
475 | 469 | } |
476 | | - |
| 470 | + |
477 | 471 | /** |
478 | 472 | * Do a sanity check on the batch regex. |
479 | 473 | * @param lines unsanitized input lines |
— | — | @@ -496,7 +490,7 @@ |
497 | 491 | return SpamRegexBatch::buildRegexes( $lines, 0 ); |
498 | 492 | } |
499 | 493 | } |
500 | | - |
| 494 | + |
501 | 495 | /** |
502 | 496 | * @param array $lines |
503 | 497 | * @return array of input lines which produce invalid input, or empty array if no problems |
— | — | @@ -504,7 +498,7 @@ |
505 | 499 | */ |
506 | 500 | static function getBadLines( $lines ) { |
507 | 501 | $lines = SpamRegexBatch::stripLines( $lines ); |
508 | | - |
| 502 | + |
509 | 503 | $badLines = array(); |
510 | 504 | foreach( $lines as $line ) { |
511 | 505 | if( substr( $line, -1, 1 ) == "\\" ) { |
— | — | @@ -512,13 +506,13 @@ |
513 | 507 | $badLines[] = $line; |
514 | 508 | } |
515 | 509 | } |
516 | | - |
| 510 | + |
517 | 511 | $regexes = SpamRegexBatch::buildRegexes( $lines ); |
518 | 512 | if( SpamRegexBatch::validateRegexes( $regexes ) ) { |
519 | 513 | // No other problems! |
520 | 514 | return $badLines; |
521 | 515 | } |
522 | | - |
| 516 | + |
523 | 517 | // Something failed in the batch, so check them one by one. |
524 | 518 | foreach( $lines as $line ) { |
525 | 519 | $regexes = SpamRegexBatch::buildRegexes( array( $line ) ); |
— | — | @@ -528,7 +522,7 @@ |
529 | 523 | } |
530 | 524 | return $badLines; |
531 | 525 | } |
532 | | - |
| 526 | + |
533 | 527 | /** |
534 | 528 | * Build a set of regular expressions from the given multiline input text, |
535 | 529 | * with empty lines and comments stripped. |
— | — | @@ -542,7 +536,7 @@ |
543 | 537 | $lines = explode( "\n", $source ); |
544 | 538 | return SpamRegexBatch::buildSafeRegexes( $lines, $fileName ); |
545 | 539 | } |
546 | | - |
| 540 | + |
547 | 541 | /** |
548 | 542 | * Build a set of regular expressions from a MediaWiki message. |
549 | 543 | * Will be correctly empty if the message isn't present. |