r43098 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r43097‎ | r43098 | r43099 >
Date:22:40, 2 November 2008
Author:siebrand
Status:old
Tags:
Comment:
(bug 16120) Prevent death on Spam Blacklist trigger using API. Patch by Brad Jorsch.

An API edit attempt with Spam Blacklist firing will now output something instead of crashing:

<?xml version="1.0"?><api><edit spamblacklist="http://blacklistme.example.com&quot;
result="Failure" /></api>
Modified paths:
  • /trunk/extensions/SpamBlacklist/SpamBlacklist.php (modified) (history)
  • /trunk/extensions/SpamBlacklist/SpamBlacklist_body.php (modified) (history)

Diff [purge]

Index: trunk/extensions/SpamBlacklist/SpamBlacklist.php
@@ -10,8 +10,8 @@
1111 $wgExtensionCredits['other'][] = array(
1212 'name' => 'SpamBlacklist',
1313 'author' => 'Tim Starling',
14 - 'svn-date' => '$LastChangedDate$',
15 - 'svn-revision' => '$LastChangedRevision$',
 14+ 'svn-date' => '$LastChangedDate$',
 15+ 'svn-revision' => '$LastChangedRevision$',
1616 'url' => 'http://www.mediawiki.org/wiki/Extension:SpamBlacklist',
1717 'description' => 'Regex-based anti-spam tool',
1818 'descriptionmsg' => 'spam-blacklist-desc',
@@ -24,7 +24,6 @@
2525 global $wgSpamBlacklistFiles;
2626 global $wgSpamBlacklistSettings;
2727
28 -
2928 $wgSpamBlacklistFiles = false;
3029 $wgSpamBlacklistSettings = array();
3130
@@ -39,9 +38,9 @@
4039 $wgFilterCallback = 'wfSpamBlacklistFilter';
4140 }
4241
43 -
4442 $wgHooks['EditFilter'][] = 'wfSpamBlacklistValidate';
4543 $wgHooks['ArticleSaveComplete'][] = 'wfSpamBlacklistArticleSave';
 44+$wgHooks['APIEditBeforeSave'][] = 'wfSpamBlacklistFilterAPIEditBeforeSave';
4645
4746 /**
4847 * Internationalization messages
@@ -74,21 +73,45 @@
7574 */
7675 function wfSpamBlacklistFilter( &$title, $text, $section, &$hookErr, $editSummary ) {
7776 $spamObj = wfSpamBlacklistObject();
78 - return $spamObj->filter( $title, $text, $section, $editSummary );
 77+ $ret = $spamObj->filter( $title, $text, $section, $editSummary );
 78+ if ( $ret !== false ) EditPage::spamPage( $ret );
 79+ return ( $ret !== false );
7980 }
8081
8182 /**
8283 * Hook function for EditFilterMerged, replaces wfSpamBlacklistFilter
8384 */
8485 function wfSpamBlacklistFilterMerged( &$editPage, $text, &$hookErr, $editSummary ) {
 86+ global $wgTitle;
 87+ if( is_null( $wgTitle ) ) {
 88+ # API mode
 89+ # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist
 90+ return true;
 91+ }
 92+
8593 $spamObj = wfSpamBlacklistObject();
8694 $title = $editPage->mArticle->getTitle();
8795 $ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage );
 96+ if ( $ret !== false ) $editPage->spamPage( $ret );
8897 // Return convention for hooks is the inverse of $wgFilterCallback
89 - return !$ret;
 98+ return ( $ret === false );
9099 }
91100
92101 /**
 102+ * Hook function for APIEditBeforeSave
 103+ */
 104+function wfSpamBlacklistFilterAPIEditBeforeSave( &$editPage, $text, &$resultArr ) {
 105+ $spamObj = wfSpamBlacklistObject();
 106+ $title = $editPage->mArticle->getTitle();
 107+ $ret = $spamObj->filter( $title, $text, '', '', $editPage );
 108+ if ( $ret!==false ) {
 109+ $resultArr['spamblacklist'] = $ret;
 110+ }
 111+ // Return convention for hooks is the inverse of $wgFilterCallback
 112+ return ( $ret === false );
 113+}
 114+
 115+/**
93116 * Hook function for EditFilter
94117 * Confirm that a local blacklist page being saved is valid,
95118 * and toss back a warning to the user if it isn't.
Index: trunk/extensions/SpamBlacklist/SpamBlacklist_body.php
@@ -25,7 +25,7 @@
2626 */
2727 function isLocalSource( $title ) {
2828 global $wgDBname;
29 -
 29+
3030 if( $title->getNamespace() == NS_MEDIAWIKI ) {
3131 $sources = array(
3232 "Spam-blacklist",
@@ -34,10 +34,10 @@
3535 return true;
3636 }
3737 }
38 -
 38+
3939 $thisHttp = $title->getFullUrl( 'action=raw' );
4040 $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
41 -
 41+
4242 foreach( $this->files as $fileName ) {
4343 if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
4444 if ( $wgDBname == $matches[1] ) {
@@ -52,17 +52,17 @@
5353 return true;
5454 }
5555 }
56 -
 56+
5757 return false;
5858 }
59 -
 59+
6060 /**
6161 * @deprecated back-compat
6262 */
6363 function getRegexes() {
6464 return $this->getBlacklists();
6565 }
66 -
 66+
6767 /**
6868 * Fetch local and (possibly cached) remote blacklists.
6969 * Will be cached locally across multiple invocations.
@@ -76,7 +76,7 @@
7777 }
7878 return $this->regexes;
7979 }
80 -
 80+
8181 /**
8282 * Fetch (possibly cached) remote blacklists.
8383 * @return array
@@ -103,19 +103,19 @@
104104 wfProfileOut( $fname );
105105 return $cachedRegexes;
106106 }
107 -
 107+
108108 $regexes = $this->buildSharedBlacklists();
109109 $wgMemc->set( "$wgDBname:spam_blacklist_regexes", $regexes, $this->expiryTime );
110 -
 110+
111111 return $regexes;
112112 }
113 -
 113+
114114 function clearCache() {
115115 global $wgMemc, $wgDBname;
116116 $wgMemc->delete( "$wgDBname:spam_blacklist_regexes" );
117117 wfDebugLog( 'SpamBlacklist', "Spam blacklist local cache cleared.\n" );
118118 }
119 -
 119+
120120 function buildSharedBlacklists() {
121121 $regexes = array();
122122 # Load lists
@@ -129,7 +129,7 @@
130130 $text = file_get_contents( $fileName );
131131 wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
132132 }
133 -
 133+
134134 // Build a separate batch of regexes from each source.
135135 // While in theory we could squeeze a little efficiency
136136 // out of combining multiple sources in one regex, if
@@ -138,20 +138,20 @@
139139 $regexes = array_merge( $regexes,
140140 SpamRegexBatch::regexesFromText( $text, $fileName ) );
141141 }
142 -
 142+
143143 return $regexes;
144144 }
145 -
 145+
146146 function getHttpText( $fileName ) {
147147 global $wgDBname, $messageMemc;
148 -
 148+
149149 # HTTP request
150150 # To keep requests to a minimum, we save results into $messageMemc, which is
151151 # similar to $wgMemc except almost certain to exist. By default, it is stored
152152 # in the database
153153 #
154154 # There are two keys, when the warning key expires, a random thread will refresh
155 - # the real key. This reduces the chance of multiple requests under high traffic
 155+ # the real key. This reduces the chance of multiple requests under high traffic
156156 # conditions.
157157 $key = "spam_blacklist_file:$fileName";
158158 $warningKey = "$wgDBname:spamfilewarning:$fileName";
@@ -171,11 +171,11 @@
172172 }
173173 return $httpText;
174174 }
175 -
 175+
176176 static function getLocalBlacklists() {
177177 return SpamRegexBatch::regexesFromMessage( 'spam-blacklist' );
178178 }
179 -
 179+
180180 static function getWhitelists() {
181181 return SpamRegexBatch::regexesFromMessage( 'spam-whitelist' );
182182 }
@@ -186,8 +186,7 @@
187187 * @param string $section Section number or name
188188 * @param EditSummary $editSummary Edit summary if one exists, some people use urls there too
189189 * @param EditPage $editPage EditPage if EditFilterMerged was called, null otherwise
190 - * @return True if the edit should not be allowed, false otherwise
191 - * If the return value is true, an error will have been sent to $wgOut
 190+ * @return Matched text if the edit should not be allowed, false otherwise
192191 */
193192 function filter( &$title, $text, $section, $editsummary = '', EditPage &$editPage = null ) {
194193 global $wgArticle, $wgVersion, $wgOut, $wgParser, $wgUser;
@@ -226,14 +225,14 @@
227226 $newLinks = array_keys( $out->getExternalLinks() );
228227 $oldLinks = $this->getCurrentLinks( $title );
229228 $addedLinks = array_diff( $newLinks, $oldLinks );
230 -
 229+
231230 // We add the edit summary if one exists
232231 if ( !empty( $editsummary ) ) $addedLinks[] = $editsummary;
233 -
 232+
234233 wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', ', $oldLinks ) );
235234 wfDebugLog( 'SpamBlacklist', "New URLs: " . implode( ', ', $newLinks ) );
236235 wfDebugLog( 'SpamBlacklist', "Added URLs: " . implode( ', ', $addedLinks ) );
237 -
 236+
238237 $links = implode( "\n", $addedLinks );
239238
240239 # Strip whitelisted URLs from the match
@@ -263,12 +262,7 @@
264263 wfDebugLog( 'SpamBlacklist', "Match!\n" );
265264 $ip = wfGetIP();
266265 wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: {$matches[0]}\n" );
267 - if ( $editPage ) {
268 - $editPage->spamPage( $matches[0] );
269 - } else {
270 - EditPage::spamPage( $matches[0] );
271 - }
272 - $retVal = true;
 266+ $retVal = $matches[0];
273267 break;
274268 }
275269 }
@@ -279,7 +273,7 @@
280274 wfProfileOut( $fname );
281275 return $retVal;
282276 }
283 -
 277+
284278 /**
285279 * Look up the links currently in the article, so we can
286280 * ignore them on a second run.
@@ -289,7 +283,7 @@
290284 function getCurrentLinks( $title ) {
291285 $dbr =& wfGetDB( DB_SLAVE );
292286 $id = $title->getArticleId(); // should be zero queries
293 - $res = $dbr->select( 'externallinks', array( 'el_to' ),
 287+ $res = $dbr->select( 'externallinks', array( 'el_to' ),
294288 array( 'el_from' => $id ), __METHOD__ );
295289 $links = array();
296290 while ( $row = $dbr->fetchObject( $res ) ) {
@@ -440,7 +434,7 @@
441435 }
442436 return $regexes;
443437 }
444 -
 438+
445439 /**
446440 * Confirm that a set of regexes is either empty or valid.
447441 * @param array $lines set of regexes
@@ -453,14 +447,14 @@
454448 wfSuppressWarnings();
455449 $ok = preg_match( $regex, '' );
456450 wfRestoreWarnings();
457 -
 451+
458452 if( $ok === false ) {
459453 return false;
460454 }
461455 }
462456 return true;
463457 }
464 -
 458+
465459 /**
466460 * Strip comments and whitespace, then remove blanks
467461 * @private
@@ -472,7 +466,7 @@
473467 preg_replace( '/#.*$/', '',
474468 $lines ) ) );
475469 }
476 -
 470+
477471 /**
478472 * Do a sanity check on the batch regex.
479473 * @param lines unsanitized input lines
@@ -496,7 +490,7 @@
497491 return SpamRegexBatch::buildRegexes( $lines, 0 );
498492 }
499493 }
500 -
 494+
501495 /**
502496 * @param array $lines
503497 * @return array of input lines which produce invalid input, or empty array if no problems
@@ -504,7 +498,7 @@
505499 */
506500 static function getBadLines( $lines ) {
507501 $lines = SpamRegexBatch::stripLines( $lines );
508 -
 502+
509503 $badLines = array();
510504 foreach( $lines as $line ) {
511505 if( substr( $line, -1, 1 ) == "\\" ) {
@@ -512,13 +506,13 @@
513507 $badLines[] = $line;
514508 }
515509 }
516 -
 510+
517511 $regexes = SpamRegexBatch::buildRegexes( $lines );
518512 if( SpamRegexBatch::validateRegexes( $regexes ) ) {
519513 // No other problems!
520514 return $badLines;
521515 }
522 -
 516+
523517 // Something failed in the batch, so check them one by one.
524518 foreach( $lines as $line ) {
525519 $regexes = SpamRegexBatch::buildRegexes( array( $line ) );
@@ -528,7 +522,7 @@
529523 }
530524 return $badLines;
531525 }
532 -
 526+
533527 /**
534528 * Build a set of regular expressions from the given multiline input text,
535529 * with empty lines and comments stripped.
@@ -542,7 +536,7 @@
543537 $lines = explode( "\n", $source );
544538 return SpamRegexBatch::buildSafeRegexes( $lines, $fileName );
545539 }
546 -
 540+
547541 /**
548542 * Build a set of regular expressions from a MediaWiki message.
549543 * Will be correctly empty if the message isn't present.

Status & tagging log