r109111 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r109110‎ | r109111 | r109112 >
Date:06:13, 17 January 2012
Author:johnduhart
Status:resolved (Comments)
Tags:miscextensions, todo 
Comment:
Refactored SpamBlacklist to be extendable for other blacklist types

This is the groundwork for Bug 33761
Modified paths:
  • /trunk/extensions/SpamBlacklist/BaseBlacklist.php (added) (history)
  • /trunk/extensions/SpamBlacklist/SpamBlacklist.php (modified) (history)
  • /trunk/extensions/SpamBlacklist/SpamBlacklistHooks.php (added) (history)
  • /trunk/extensions/SpamBlacklist/SpamBlacklist_body.php (modified) (history)
  • /trunk/extensions/SpamBlacklist/SpamRegexBatch.php (added) (history)

Diff [purge]

Index: trunk/extensions/SpamBlacklist/SpamBlacklist.php
@@ -18,90 +18,34 @@
1919 $dir = dirname(__FILE__) . '/';
2020 $wgExtensionMessagesFiles['SpamBlackList'] = $dir . 'SpamBlacklist.i18n.php';
2121
22 -global $wgSpamBlacklistFiles;
23 -global $wgSpamBlacklistSettings;
24 -
25 -$wgSpamBlacklistFiles = false;
26 -$wgSpamBlacklistSettings = array();
27 -
28 -$wgHooks['EditFilterMerged'][] = 'wfSpamBlacklistFilterMerged';
29 -$wgHooks['EditFilter'][] = 'wfSpamBlacklistValidate';
30 -$wgHooks['ArticleSaveComplete'][] = 'wfSpamBlacklistArticleSave';
31 -$wgHooks['APIEditBeforeSave'][] = 'wfSpamBlacklistFilterAPIEditBeforeSave';
32 -
3322 /**
34 - * Get an instance of SpamBlacklist and do some first-call initialisation.
35 - * All actual functionality is implemented in that object
 23+ * Array of settings for blacklist classes
3624 */
37 -function wfSpamBlacklistObject() {
38 - global $wgSpamBlacklistFiles, $wgSpamBlacklistSettings;
39 - static $spamObj;
40 - if ( !$spamObj ) {
41 - require_once( "SpamBlacklist_body.php" );
42 - $spamObj = new SpamBlacklist( $wgSpamBlacklistSettings );
43 - if( $wgSpamBlacklistFiles ) {
44 - $spamObj->files = $wgSpamBlacklistFiles;
45 - }
46 - }
47 - return $spamObj;
48 -}
 25+$wgBlacklistSettings = array(
 26+ 'spam' => array(
 27+ 'files' => array(),
 28+ ),
 29+);
4930
5031 /**
51 - * Hook function for EditFilterMerged
 32+ * @deprecated
5233 */
53 -function wfSpamBlacklistFilterMerged( $editPage, $text, &$hookErr, $editSummary ) {
54 - global $wgTitle;
55 - if( is_null( $wgTitle ) ) {
56 - # API mode
57 - # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist
58 - return true;
59 - }
 34+$wgSpamBlacklistFiles =& $wgBlacklistSettings['spam']['files'];
6035
61 - $spamObj = wfSpamBlacklistObject();
62 - $title = $editPage->mArticle->getTitle();
63 - $ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage );
64 - if ( $ret !== false ) {
65 - // spamPageWithContent() method was added in MW 1.17
66 - if ( method_exists( $editPage, 'spamPageWithContent' ) ) {
67 - $editPage->spamPageWithContent( $ret );
68 - } else {
69 - $editPage->spamPage( $ret );
70 - }
71 - }
72 - // Return convention for hooks is the inverse of $wgFilterCallback
73 - return ( $ret === false );
74 -}
75 -
7636 /**
77 - * Hook function for APIEditBeforeSave
 37+ * @deprecated
7838 */
79 -function wfSpamBlacklistFilterAPIEditBeforeSave( $editPage, $text, &$resultArr ) {
80 - $spamObj = wfSpamBlacklistObject();
81 - $title = $editPage->mArticle->getTitle();
82 - $ret = $spamObj->filter( $title, $text, '', '', $editPage );
83 - if ( $ret!==false ) {
84 - $resultArr['spamblacklist'] = $ret;
85 - }
86 - // Return convention for hooks is the inverse of $wgFilterCallback
87 - return ( $ret === false );
88 -}
 39+$wgSpamBlacklistSettings =& $wgBlacklistSettings['spam'];
8940
90 -/**
91 - * Hook function for EditFilter
92 - * Confirm that a local blacklist page being saved is valid,
93 - * and toss back a warning to the user if it isn't.
94 - */
95 -function wfSpamBlacklistValidate( $editPage, $text, $section, &$hookError ) {
96 - $spamObj = wfSpamBlacklistObject();
97 - return $spamObj->validate( $editPage, $text, $section, $hookError );
98 -}
 41+$wgHooks['EditFilterMerged'][] = 'SpamBlacklistHooks::filterMerged';
 42+$wgHooks['APIEditBeforeSave'][] = 'SpamBlacklistHooks::filterAPIEditBeforeSave';
 43+$wgHooks['EditFilter'][] = 'SpamBlacklistHooks::validate';
 44+$wgHooks['ArticleSaveComplete'][] = 'SpamBlacklistHooks::articleSave';
9945
100 -/**
101 - * Hook function for ArticleSaveComplete
102 - * Clear local spam blacklist caches on page save.
103 - */
104 -function wfSpamBlacklistArticleSave( &$article, &$user, $text, $summary, $isminor, $iswatch, $section ) {
105 - $spamObj = wfSpamBlacklistObject();
106 - return $spamObj->onArticleSave( $article, $user, $text, $summary, $isminor, $iswatch, $section );
107 -}
 46+$wgAutoloadClasses['BaseBlacklist'] = $dir . 'BaseBlacklist.php';
 47+$wgAutoloadClasses['SpamBlacklistHooks'] = $dir . 'SpamBlacklistHooks.php';
 48+$wgAutoloadClasses['SpamBlacklist'] = $dir . 'SpamBlacklist_body.php';
 49+$wgAutoloadClasses['SpamRegexBatch'] = $dir . 'SpamRegexBatch.php';
10850
 51+
 52+
Index: trunk/extensions/SpamBlacklist/SpamBlacklistHooks.php
@@ -0,0 +1,126 @@
 2+<?php
 3+
 4+/**
 5+ * Hooks for the spam blacklist extension
 6+ */
 7+class SpamBlacklistHooks {
 8+
 9+ /**
 10+ * Hook function for EditFilterMerged
 11+ *
 12+ * @param $editPage EditPage
 13+ * @param $text string
 14+ * @param $hookErr string
 15+ * @param $editSummary string
 16+ * @return bool
 17+ */
 18+ static function filterMerged( $editPage, $text, &$hookErr, $editSummary ) {
 19+ global $wgTitle;
 20+ if( is_null( $wgTitle ) ) {
 21+ # API mode
 22+ # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist
 23+ return true;
 24+ }
 25+
 26+ $spamObj = BaseBlacklist::getInstance( 'spam' );
 27+ $title = $editPage->mArticle->getTitle();
 28+ $ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage );
 29+ if ( $ret !== false ) {
 30+ // spamPageWithContent() method was added in MW 1.17
 31+ if ( method_exists( $editPage, 'spamPageWithContent' ) ) {
 32+ $editPage->spamPageWithContent( $ret );
 33+ } else {
 34+ $editPage->spamPage( $ret );
 35+ }
 36+ }
 37+ // Return convention for hooks is the inverse of $wgFilterCallback
 38+ return ( $ret === false );
 39+ }
 40+
 41+ /**
 42+ * Hook function for APIEditBeforeSave
 43+ *
 44+ * @param $editPage EditPage
 45+ * @param $text string
 46+ * @param $resultArr array
 47+ * @return bool
 48+ */
 49+ static function filterAPIEditBeforeSave( $editPage, $text, &$resultArr ) {
 50+ $spamObj = BaseBlacklist::getInstance( 'spam' );
 51+ $title = $editPage->mArticle->getTitle();
 52+ $ret = $spamObj->filter( $title, $text, '', '', $editPage );
 53+ if ( $ret!==false ) {
 54+ $resultArr['spamblacklist'] = $ret;
 55+ }
 56+ // Return convention for hooks is the inverse of $wgFilterCallback
 57+ return ( $ret === false );
 58+ }
 59+
 60+ /**
 61+ * Hook function for EditFilter
 62+ * Confirm that a local blacklist page being saved is valid,
 63+ * and toss back a warning to the user if it isn't.
 64+ *
 65+ * @param $editPage EditPage
 66+ * @param $text string
 67+ * @param $section string
 68+ * @param $hookError string
 69+ * @return bool
 70+ */
 71+ static function validate( $editPage, $text, $section, &$hookError ) {
 72+ $thisPageName = $editPage->mTitle->getPrefixedDBkey();
 73+
 74+ if( !BaseBlacklist::isLocalSource( $editPage->mTitle ) ) {
 75+ wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] not a local blacklist\n" );
 76+ return true;
 77+ }
 78+
 79+ $lines = explode( "\n", $text );
 80+
 81+ $badLines = SpamRegexBatch::getBadLines( $lines );
 82+ if( $badLines ) {
 83+ wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] given invalid input lines: " .
 84+ implode( ', ', $badLines ) . "\n" );
 85+
 86+ $badList = "*<tt>" .
 87+ implode( "</tt>\n*<tt>",
 88+ array_map( 'wfEscapeWikiText', $badLines ) ) .
 89+ "</tt>\n";
 90+ $hookError =
 91+ "<div class='errorbox'>" .
 92+ wfMsgExt( 'spam-invalid-lines', array( 'parsemag' ), count( $badLines ) ) . "<br />" .
 93+ $badList .
 94+ "</div>\n" .
 95+ "<br clear='all' />\n";
 96+ return true;
 97+ } else {
 98+ wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] ok or empty blacklist\n" );
 99+ return true;
 100+ }
 101+ }
 102+
 103+ /**
 104+ * Hook function for ArticleSaveComplete
 105+ * Clear local spam blacklist caches on page save.
 106+ *
 107+ * @param $article Article
 108+ * @param $user User
 109+ * @param $text string
 110+ * @param $summary string
 111+ * @param $isminor
 112+ * @param $iswatch
 113+ * @param $section
 114+ * @return bool
 115+ */
 116+ static function articleSave( &$article, &$user, $text, $summary, $isminor, $iswatch, $section ) {
 117+ if( !BaseBlacklist::isLocalSource( $article->getTitle() ) ) {
 118+ return false;
 119+ }
 120+ global $wgMemc, $wgDBname;
 121+
 122+ // This sucks because every Blacklist needs to be cleared
 123+ foreach ( BaseBlacklist::getBlacklistTypes() as $type => $class ) {
 124+ $wgMemc->delete( "$wgDBname:{$type}_blacklist_regexes" );
 125+ }
 126+ }
 127+}
Property changes on: trunk/extensions/SpamBlacklist/SpamBlacklistHooks.php
___________________________________________________________________
Added: svn:eol-style
1128 + native
Index: trunk/extensions/SpamBlacklist/SpamRegexBatch.php
@@ -0,0 +1,171 @@
 2+<?php
 3+
 4+/**
 5+ * Utility class for working with blacklists
 6+ */
 7+class SpamRegexBatch {
 8+ /**
 9+ * Build a set of regular expressions matching URLs with the list of regex fragments.
 10+ * Returns an empty list if the input list is empty.
 11+ *
 12+ * @param array $lines list of fragments which will match in URLs
 13+ * @param int $batchSize largest allowed batch regex;
 14+ * if 0, will produce one regex per line
 15+ * @return array
 16+ */
 17+ static function buildRegexes( $lines, $batchSize=4096 ) {
 18+ # Make regex
 19+ # It's faster using the S modifier even though it will usually only be run once
 20+ //$regex = 'https?://+[a-z0-9_\-.]*(' . implode( '|', $lines ) . ')';
 21+ //return '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $regex) ) . '/Sim';
 22+ $regexes = array();
 23+ $regexStart = '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
 24+ $regexEnd = ($batchSize > 0 ) ? ')/Sim' : ')/im';
 25+ $build = false;
 26+ foreach( $lines as $line ) {
 27+ if( substr( $line, -1, 1 ) == "\\" ) {
 28+ // Final \ will break silently on the batched regexes.
 29+ // Skip it here to avoid breaking the next line;
 30+ // warnings from getBadLines() will still trigger on
 31+ // edit to keep new ones from floating in.
 32+ continue;
 33+ }
 34+ // FIXME: not very robust size check, but should work. :)
 35+ if( $build === false ) {
 36+ $build = $line;
 37+ } elseif( strlen( $build ) + strlen( $line ) > $batchSize ) {
 38+ $regexes[] = $regexStart .
 39+ str_replace( '/', '\/', preg_replace('|\\\*/|u', '/', $build) ) .
 40+ $regexEnd;
 41+ $build = $line;
 42+ } else {
 43+ $build .= '|';
 44+ $build .= $line;
 45+ }
 46+ }
 47+ if( $build !== false ) {
 48+ $regexes[] = $regexStart .
 49+ str_replace( '/', '\/', preg_replace('|\\\*/|u', '/', $build) ) .
 50+ $regexEnd;
 51+ }
 52+ return $regexes;
 53+ }
 54+
 55+ /**
 56+ * Confirm that a set of regexes is either empty or valid.
 57+ *
 58+ * @param $regexes array set of regexes
 59+ * @return bool true if ok, false if contains invalid lines
 60+ */
 61+ static function validateRegexes( $regexes ) {
 62+ foreach( $regexes as $regex ) {
 63+ wfSuppressWarnings();
 64+ $ok = preg_match( $regex, '' );
 65+ wfRestoreWarnings();
 66+
 67+ if( $ok === false ) {
 68+ return false;
 69+ }
 70+ }
 71+ return true;
 72+ }
 73+
 74+ /**
 75+ * Strip comments and whitespace, then remove blanks
 76+ *
 77+ * @param $lines array
 78+ * @return array
 79+ */
 80+ static function stripLines( $lines ) {
 81+ return array_filter(
 82+ array_map( 'trim',
 83+ preg_replace( '/#.*$/', '',
 84+ $lines ) ) );
 85+ }
 86+
 87+ /**
 88+ * Do a sanity check on the batch regex.
 89+ *
 90+ * @param $lines string unsanitized input lines
 91+ * @param $fileName string optional for debug reporting
 92+ * @return array of regexes
 93+ */
 94+ static function buildSafeRegexes( $lines, $fileName=false ) {
 95+ $lines = SpamRegexBatch::stripLines( $lines );
 96+ $regexes = SpamRegexBatch::buildRegexes( $lines );
 97+ if( SpamRegexBatch::validateRegexes( $regexes ) ) {
 98+ return $regexes;
 99+ } else {
 100+ // _Something_ broke... rebuild line-by-line; it'll be
 101+ // slower if there's a lot of blacklist lines, but one
 102+ // broken line won't take out hundreds of its brothers.
 103+ if( $fileName ) {
 104+ wfDebugLog( 'SpamBlacklist', "Spam blacklist warning: bogus line in $fileName\n" );
 105+ }
 106+ return SpamRegexBatch::buildRegexes( $lines, 0 );
 107+ }
 108+ }
 109+
 110+ /**
 111+ * Returns an array of invalid lines
 112+ *
 113+ * @param array $lines
 114+ * @return array of input lines which produce invalid input, or empty array if no problems
 115+ */
 116+ static function getBadLines( $lines ) {
 117+ $lines = SpamRegexBatch::stripLines( $lines );
 118+
 119+ $badLines = array();
 120+ foreach( $lines as $line ) {
 121+ if( substr( $line, -1, 1 ) == "\\" ) {
 122+ // Final \ will break silently on the batched regexes.
 123+ $badLines[] = $line;
 124+ }
 125+ }
 126+
 127+ $regexes = SpamRegexBatch::buildRegexes( $lines );
 128+ if( SpamRegexBatch::validateRegexes( $regexes ) ) {
 129+ // No other problems!
 130+ return $badLines;
 131+ }
 132+
 133+ // Something failed in the batch, so check them one by one.
 134+ foreach( $lines as $line ) {
 135+ $regexes = SpamRegexBatch::buildRegexes( array( $line ) );
 136+ if( !SpamRegexBatch::validateRegexes( $regexes ) ) {
 137+ $badLines[] = $line;
 138+ }
 139+ }
 140+ return $badLines;
 141+ }
 142+
 143+ /**
 144+ * Build a set of regular expressions from the given multiline input text,
 145+ * with empty lines and comments stripped.
 146+ *
 147+ * @param $source string
 148+ * @param $fileName bool|string optional, for reporting of bad files
 149+ * @return array of regular expressions, potentially empty
 150+ */
 151+ static function regexesFromText( $source, $fileName=false ) {
 152+ $lines = explode( "\n", $source );
 153+ return SpamRegexBatch::buildSafeRegexes( $lines, $fileName );
 154+ }
 155+
 156+ /**
 157+ * Build a set of regular expressions from a MediaWiki message.
 158+ * Will be correctly empty if the message isn't present.
 159+ *
 160+ * @param $message string
 161+ * @return array of regular expressions, potentially empty
 162+ */
 163+ static function regexesFromMessage( $message ) {
 164+ $source = wfMsgForContent( $message );
 165+ if( $source && !wfEmptyMsg( $message, $source ) ) {
 166+ return SpamRegexBatch::regexesFromText( $source );
 167+ } else {
 168+ return array();
 169+ }
 170+ }
 171+}
 172+
Property changes on: trunk/extensions/SpamBlacklist/SpamRegexBatch.php
___________________________________________________________________
Added: svn:eol-style
1173 + native
Index: trunk/extensions/SpamBlacklist/BaseBlacklist.php
@@ -0,0 +1,336 @@
 2+<?php
 3+
 4+/**
 5+ * Base class for different kinds of blacklists
 6+ */
 7+abstract class BaseBlacklist {
 8+
 9+ /**
 10+ * Array of blacklist sources
 11+ *
 12+ * @var array
 13+ */
 14+ public $files = array();
 15+
 16+ /**
 17+ * Array containing regexes to test against
 18+ *
 19+ * @var bool|array
 20+ */
 21+ protected $regexes = false;
 22+
 23+ /**
 24+ * Chance of receiving a warning when the filter is hit
 25+ *
 26+ * @var int
 27+ */
 28+ public $warningChance = 100;
 29+
 30+ /**
 31+ * @var int
 32+ */
 33+ public $warningTime = 600;
 34+
 35+ /**
 36+ * @var int
 37+ */
 38+ public $expiryTime = 900;
 39+
 40+ /**
 41+ * Array containing blacklists that extend BaseBlacklist
 42+ *
 43+ * @var array
 44+ */
 45+ private static $blacklistTypes = array(
 46+ 'spam' => 'SpamBlacklist',
 47+ );
 48+
 49+ /**
 50+ * Array of blacklist instances
 51+ *
 52+ * @var array
 53+ */
 54+ private static $instances = array();
 55+
 56+ /**
 57+ * Constructor
 58+ *
 59+ * @param array $settings
 60+ */
 61+ function __construct( $settings = array() ) {
 62+ foreach ( $settings as $name => $value ) {
 63+ $this->$name = $value;
 64+ }
 65+ }
 66+
 67+ /**
 68+ * Adds a blacklist class to the registry
 69+ *
 70+ * @param $type string
 71+ * @param $class string
 72+ */
 73+ public static function addBlacklistType( $type, $class ) {
 74+ self::$blacklistTypes[$type] = $class;
 75+ }
 76+
 77+ /**
 78+ * Return the array of blacklist types currently defined
 79+ *
 80+ * @return array
 81+ */
 82+ public static function getBlacklistTypes() {
 83+ return self::$blacklistTypes;
 84+ }
 85+
 86+ /**
 87+ * Returns an instance of the given blacklist
 88+ *
 89+ * @param $type string Code for the blacklist
 90+ * @return BaseBlacklist
 91+ * @throws MWException
 92+ */
 93+ public static function getInstance( $type ) {
 94+ if ( !isset( self::$blacklistTypes[$type] ) ) {
 95+ throw new MWException( "Invalid blacklist type '$type' passed to " . __METHOD__ );
 96+ }
 97+
 98+ if ( !isset( self::$instances[$type] ) ) {
 99+ global $wgBlacklistSettings;
 100+
 101+ // Prevent notices
 102+ if ( !isset( $wgBlacklistSettings[$type] ) ) {
 103+ $wgBlacklistSettings[$type] = array();
 104+ }
 105+
 106+ self::$instances[$type] = new self::$blacklistTypes[$type]( $wgBlacklistSettings[$type] );
 107+ }
 108+
 109+ return self::$instances[$type];
 110+ }
 111+
 112+ /**
 113+ * Returns the code for the blacklist implementation
 114+ *
 115+ * @return string
 116+ */
 117+ abstract protected function getBlacklistType();
 118+
 119+ /**
 120+ * Check if the given local page title is a spam regex source.
 121+ *
 122+ * @param Title $title
 123+ * @return bool
 124+ */
 125+ public static function isLocalSource( $title ) {
 126+ global $wgDBname, $wgBlacklistSettings;
 127+
 128+ if( $title->getNamespace() == NS_MEDIAWIKI ) {
 129+ $sources = array();
 130+ foreach ( self::$blacklistTypes as $type => $class ) {
 131+ $type = ucfirst( $type );
 132+ $sources += array(
 133+ "$type-blacklist",
 134+ "$type-whitelist"
 135+ );
 136+ }
 137+
 138+ if( in_array( $title->getDBkey(), $sources ) ) {
 139+ return true;
 140+ }
 141+ }
 142+
 143+ $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
 144+ $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
 145+
 146+ $files = array();
 147+ foreach ( self::$blacklistTypes as $type => $class ) {
 148+ if ( isset( $wgBlacklistSettings[$type]['files'] ) ) {
 149+ $files += $wgBlacklistSettings[$type]['files'];
 150+ }
 151+ }
 152+
 153+ foreach( $files as $fileName ) {
 154+ $matches = array();
 155+ if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
 156+ if ( $wgDBname == $matches[1] ) {
 157+ if( $matches[2] == $title->getPrefixedDbKey() ) {
 158+ // Local DB fetch of this page...
 159+ return true;
 160+ }
 161+ }
 162+ } elseif( preg_match( $thisHttpRegex, $fileName ) ) {
 163+ // Raw view of this page
 164+ return true;
 165+ }
 166+ }
 167+
 168+ return false;
 169+ }
 170+
 171+ /**
 172+ * Fetch local and (possibly cached) remote blacklists.
 173+ * Will be cached locally across multiple invocations.
 174+ * @return array set of regular expressions, potentially empty.
 175+ */
 176+ function getBlacklists() {
 177+ if( $this->regexes === false ) {
 178+ $this->regexes = array_merge(
 179+ $this->getLocalBlacklists(),
 180+ $this->getSharedBlacklists() );
 181+ }
 182+ return $this->regexes;
 183+ }
 184+
 185+ /**
 186+ * Returns the local blacklist
 187+ *
 188+ * @return array Regular expressions
 189+ */
 190+ public function getLocalBlacklists() {
 191+ return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-blacklist" );
 192+ }
 193+
 194+ /**
 195+ * Returns the (local) whitelist
 196+ *
 197+ * @return array Regular expressions
 198+ */
 199+ public function getWhitelists() {
 200+ return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-whitelist" );
 201+ }
 202+
 203+ /**
 204+ * Fetch (possibly cached) remote blacklists.
 205+ * @return array
 206+ */
 207+ function getSharedBlacklists() {
 208+ global $wgMemc, $wgDBname;
 209+ $listType = $this->getBlacklistType();
 210+ $fname = 'SpamBlacklist::getRegex';
 211+ wfProfileIn( $fname );
 212+
 213+ wfDebugLog( 'SpamBlacklist', "Loading $listType regex..." );
 214+
 215+ if ( count( $this->files ) == 0 ){
 216+ # No lists
 217+ wfDebugLog( 'SpamBlacklist', "no files specified\n" );
 218+ wfProfileOut( $fname );
 219+ return array();
 220+ }
 221+
 222+ // This used to be cached per-site, but that could be bad on a shared
 223+ // server where not all wikis have the same configuration.
 224+ $cachedRegexes = $wgMemc->get( "$wgDBname:{$listType}_blacklist_regexes" );
 225+ if( is_array( $cachedRegexes ) ) {
 226+ wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
 227+ wfProfileOut( $fname );
 228+ return $cachedRegexes;
 229+ }
 230+
 231+ $regexes = $this->buildSharedBlacklists();
 232+ $wgMemc->set( "$wgDBname:{$listType}_blacklist_regexes", $regexes, $this->expiryTime );
 233+
 234+ return $regexes;
 235+ }
 236+
 237+ function clearCache() {
 238+ global $wgMemc, $wgDBname;
 239+ $listType = $this->getBlacklistType();
 240+
 241+ $wgMemc->delete( "$wgDBname:{$listType}_blacklist_regexes" );
 242+ wfDebugLog( 'SpamBlacklist', "$listType blacklist local cache cleared.\n" );
 243+ }
 244+
 245+ function buildSharedBlacklists() {
 246+ $regexes = array();
 247+ $listType = $this->getBlacklistType();
 248+ # Load lists
 249+ wfDebugLog( 'SpamBlacklist', "Constructing $listType blacklist\n" );
 250+ foreach ( $this->files as $fileName ) {
 251+ $matches = array();
 252+ if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
 253+ $text = $this->getArticleText( $matches[1], $matches[2] );
 254+ } elseif ( preg_match( '/^http:\/\//', $fileName ) ) {
 255+ $text = $this->getHttpText( $fileName );
 256+ } else {
 257+ $text = file_get_contents( $fileName );
 258+ wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
 259+ }
 260+
 261+ // Build a separate batch of regexes from each source.
 262+ // While in theory we could squeeze a little efficiency
 263+ // out of combining multiple sources in one regex, if
 264+ // there's a bad line in one of them we'll gain more
 265+ // from only having to break that set into smaller pieces.
 266+ $regexes = array_merge( $regexes,
 267+ SpamRegexBatch::regexesFromText( $text, $fileName ) );
 268+ }
 269+
 270+ return $regexes;
 271+ }
 272+
 273+ function getHttpText( $fileName ) {
 274+ global $wgDBname, $messageMemc;
 275+ $listType = $this->getBlacklistType();
 276+
 277+ # HTTP request
 278+ # To keep requests to a minimum, we save results into $messageMemc, which is
 279+ # similar to $wgMemc except almost certain to exist. By default, it is stored
 280+ # in the database
 281+ #
 282+ # There are two keys, when the warning key expires, a random thread will refresh
 283+ # the real key. This reduces the chance of multiple requests under high traffic
 284+ # conditions.
 285+ $key = "{$listType}_blacklist_file:$fileName";
 286+ $warningKey = "$wgDBname:{$listType}filewarning:$fileName";
 287+ $httpText = $messageMemc->get( $key );
 288+ $warning = $messageMemc->get( $warningKey );
 289+
 290+ if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
 291+ wfDebugLog( 'SpamBlacklist', "Loading $listType blacklist from $fileName\n" );
 292+ $httpText = Http::get( $fileName );
 293+ if( $httpText === false ) {
 294+ wfDebugLog( 'SpamBlacklist', "Error loading $listType blacklist from $fileName\n" );
 295+ }
 296+ $messageMemc->set( $warningKey, 1, $this->warningTime );
 297+ $messageMemc->set( $key, $httpText, $this->expiryTime );
 298+ } else {
 299+ wfDebugLog( 'SpamBlacklist', "Got $listType blacklist from HTTP cache for $fileName\n" );
 300+ }
 301+ return $httpText;
 302+ }
 303+
 304+ /**
 305+ * Fetch an article from this or another local MediaWiki database.
 306+ * This is probably *very* fragile, and shouldn't be used perhaps.
 307+ *
 308+ * @param string $db
 309+ * @param string $article
 310+ * @return string
 311+ */
 312+ function getArticleText( $db, $article ) {
 313+ wfDebugLog( 'SpamBlacklist', "Fetching {$this->getBlacklistType()} spam blacklist from '$article' on '$db'...\n" );
 314+ global $wgDBname;
 315+ $dbr = wfGetDB( DB_READ );
 316+ $dbr->selectDB( $db );
 317+ $text = false;
 318+ if ( $dbr->tableExists( 'page' ) ) {
 319+ // 1.5 schema
 320+ $dbw = wfGetDB( DB_READ );
 321+ $dbw->selectDB( $db );
 322+ $revision = Revision::newFromTitle( Title::newFromText( $article ) );
 323+ if ( $revision ) {
 324+ $text = $revision->getText();
 325+ }
 326+ $dbw->selectDB( $wgDBname );
 327+ } else {
 328+ // 1.4 schema
 329+ $title = Title::newFromText( $article );
 330+ $text = $dbr->selectField( 'cur', 'cur_text', array( 'cur_namespace' => $title->getNamespace(),
 331+ 'cur_title' => $title->getDBkey() ), __METHOD__ );
 332+ }
 333+ $dbr->selectDB( $wgDBname );
 334+ return strval( $text );
 335+ }
 336+
 337+}
Property changes on: trunk/extensions/SpamBlacklist/BaseBlacklist.php
___________________________________________________________________
Added: svn:eol-style
1338 + native
Index: trunk/extensions/SpamBlacklist/SpamBlacklist_body.php
@@ -4,177 +4,20 @@
55 exit;
66 }
77
8 -class SpamBlacklist {
9 - var $regexes = false;
 8+class SpamBlacklist extends BaseBlacklist {
109 var $files = array( "http://meta.wikimedia.org/w/index.php?title=Spam_blacklist&action=raw&sb_ver=1" );
11 - var $warningTime = 600;
12 - var $expiryTime = 900;
13 - var $warningChance = 100;
1410 var $ignoreEditSummary = false;
1511
16 - function __construct( $settings = array() ) {
17 - foreach ( $settings as $name => $value ) {
18 - $this->$name = $value;
19 - }
20 - }
21 -
2212 /**
23 - * Check if the given local page title is a spam regex source.
24 - * @param Title $title
25 - * @return bool
 13+ * Returns the code for the blacklist implementation
 14+ *
 15+ * @return string
2616 */
27 - function isLocalSource( $title ) {
28 - global $wgDBname;
29 -
30 - if( $title->getNamespace() == NS_MEDIAWIKI ) {
31 - $sources = array(
32 - "Spam-blacklist",
33 - "Spam-whitelist" );
34 - if( in_array( $title->getDBkey(), $sources ) ) {
35 - return true;
36 - }
37 - }
38 -
39 - $thisHttp = wfExpandUrl( $title->getFullUrl( 'action=raw' ), PROTO_HTTP );
40 - $thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
41 -
42 - foreach( $this->files as $fileName ) {
43 - $matches = array();
44 - if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
45 - if ( $wgDBname == $matches[1] ) {
46 - if( $matches[2] == $title->getPrefixedDbKey() ) {
47 - // Local DB fetch of this page...
48 - return true;
49 - }
50 - }
51 - } elseif( preg_match( $thisHttpRegex, $fileName ) ) {
52 - // Raw view of this page
53 - return true;
54 - }
55 - }
56 -
57 - return false;
 17+ protected function getBlacklistType() {
 18+ return 'spam';
5819 }
5920
6021 /**
61 - * Fetch local and (possibly cached) remote blacklists.
62 - * Will be cached locally across multiple invocations.
63 - * @return array set of regular expressions, potentially empty.
64 - */
65 - function getBlacklists() {
66 - if( $this->regexes === false ) {
67 - $this->regexes = array_merge(
68 - $this->getLocalBlacklists(),
69 - $this->getSharedBlacklists() );
70 - }
71 - return $this->regexes;
72 - }
73 -
74 - /**
75 - * Fetch (possibly cached) remote blacklists.
76 - * @return array
77 - */
78 - function getSharedBlacklists() {
79 - global $wgMemc, $wgDBname;
80 - $fname = 'SpamBlacklist::getRegex';
81 - wfProfileIn( $fname );
82 -
83 - wfDebugLog( 'SpamBlacklist', "Loading spam regex..." );
84 -
85 - if ( count( $this->files ) == 0 ){
86 - # No lists
87 - wfDebugLog( 'SpamBlacklist', "no files specified\n" );
88 - wfProfileOut( $fname );
89 - return array();
90 - }
91 -
92 - // This used to be cached per-site, but that could be bad on a shared
93 - // server where not all wikis have the same configuration.
94 - $cachedRegexes = $wgMemc->get( "$wgDBname:spam_blacklist_regexes" );
95 - if( is_array( $cachedRegexes ) ) {
96 - wfDebugLog( 'SpamBlacklist', "Got shared spam regexes from cache\n" );
97 - wfProfileOut( $fname );
98 - return $cachedRegexes;
99 - }
100 -
101 - $regexes = $this->buildSharedBlacklists();
102 - $wgMemc->set( "$wgDBname:spam_blacklist_regexes", $regexes, $this->expiryTime );
103 -
104 - return $regexes;
105 - }
106 -
107 - function clearCache() {
108 - global $wgMemc, $wgDBname;
109 - $wgMemc->delete( "$wgDBname:spam_blacklist_regexes" );
110 - wfDebugLog( 'SpamBlacklist', "Spam blacklist local cache cleared.\n" );
111 - }
112 -
113 - function buildSharedBlacklists() {
114 - $regexes = array();
115 - # Load lists
116 - wfDebugLog( 'SpamBlacklist', "Constructing spam blacklist\n" );
117 - foreach ( $this->files as $fileName ) {
118 - $matches = array();
119 - if ( preg_match( '/^DB: ([\w-]*) (.*)$/', $fileName, $matches ) ) {
120 - $text = $this->getArticleText( $matches[1], $matches[2] );
121 - } elseif ( preg_match( '/^http:\/\//', $fileName ) ) {
122 - $text = $this->getHttpText( $fileName );
123 - } else {
124 - $text = file_get_contents( $fileName );
125 - wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
126 - }
127 -
128 - // Build a separate batch of regexes from each source.
129 - // While in theory we could squeeze a little efficiency
130 - // out of combining multiple sources in one regex, if
131 - // there's a bad line in one of them we'll gain more
132 - // from only having to break that set into smaller pieces.
133 - $regexes = array_merge( $regexes,
134 - SpamRegexBatch::regexesFromText( $text, $fileName ) );
135 - }
136 -
137 - return $regexes;
138 - }
139 -
140 - function getHttpText( $fileName ) {
141 - global $wgDBname, $messageMemc;
142 -
143 - # HTTP request
144 - # To keep requests to a minimum, we save results into $messageMemc, which is
145 - # similar to $wgMemc except almost certain to exist. By default, it is stored
146 - # in the database
147 - #
148 - # There are two keys, when the warning key expires, a random thread will refresh
149 - # the real key. This reduces the chance of multiple requests under high traffic
150 - # conditions.
151 - $key = "spam_blacklist_file:$fileName";
152 - $warningKey = "$wgDBname:spamfilewarning:$fileName";
153 - $httpText = $messageMemc->get( $key );
154 - $warning = $messageMemc->get( $warningKey );
155 -
156 - if ( !is_string( $httpText ) || ( !$warning && !mt_rand( 0, $this->warningChance ) ) ) {
157 - wfDebugLog( 'SpamBlacklist', "Loading spam blacklist from $fileName\n" );
158 - $httpText = Http::get( $fileName );
159 - if( $httpText === false ) {
160 - wfDebugLog( 'SpamBlacklist', "Error loading blacklist from $fileName\n" );
161 - }
162 - $messageMemc->set( $warningKey, 1, $this->warningTime );
163 - $messageMemc->set( $key, $httpText, $this->expiryTime );
164 - } else {
165 - wfDebugLog( 'SpamBlacklist', "Got spam blacklist from HTTP cache for $fileName\n" );
166 - }
167 - return $httpText;
168 - }
169 -
170 - static function getLocalBlacklists() {
171 - return SpamRegexBatch::regexesFromMessage( 'spam-blacklist' );
172 - }
173 -
174 - static function getWhitelists() {
175 - return SpamRegexBatch::regexesFromMessage( 'spam-whitelist' );
176 - }
177 -
178 - /**
17922 * @param Title $title
18023 * @param string $text Text of section, or entire text if $editPage!=false
18124 * @param string $section Section number or name
@@ -183,14 +26,18 @@
18427 * @return Matched text if the edit should not be allowed, false otherwise
18528 */
18629 function filter( &$title, $text, $section, $editsummary = '', EditPage &$editPage = null ) {
 30+ /**
 31+ * @var $wgParser Parser
 32+ */
18733 global $wgParser, $wgUser;
18834
18935 $fname = 'wfSpamBlacklistFilter';
19036 wfProfileIn( $fname );
19137
192 - $this->title = $title;
193 - $this->text = $text;
194 - $this->section = $section;
 38+ # These don't do anything, commenting out...
 39+ #$this->title = $title;
 40+ #$this->text = $text;
 41+ #$this->section = $section;
19542 $text = str_replace( '.', '.', $text ); //@bug 12896
19643
19744 $blacklists = $this->getBlacklists();
@@ -267,6 +114,8 @@
268115 * ignore them on a second run.
269116 *
270117 * WARNING: I can add more *of the same link* with no problem here.
 118+ * @param $title Title
 119+ * @return array
271120 */
272121 function getCurrentLinks( $title ) {
273122 $dbr = wfGetDB( DB_SLAVE );
@@ -279,250 +128,4 @@
280129 }
281130 return $links;
282131 }
283 -
284 - /**
285 - * Fetch an article from this or another local MediaWiki database.
286 - * This is probably *very* fragile, and shouldn't be used perhaps.
287 - * @param string $db
288 - * @param string $article
289 - */
290 - function getArticleText( $db, $article ) {
291 - wfDebugLog( 'SpamBlacklist', "Fetching local spam blacklist from '$article' on '$db'...\n" );
292 - global $wgDBname;
293 - $dbr = wfGetDB( DB_READ );
294 - $dbr->selectDB( $db );
295 - $text = false;
296 - if ( $dbr->tableExists( 'page' ) ) {
297 - // 1.5 schema
298 - $dbw = wfGetDB( DB_READ );
299 - $dbw->selectDB( $db );
300 - $revision = Revision::newFromTitle( Title::newFromText( $article ) );
301 - if ( $revision ) {
302 - $text = $revision->getText();
303 - }
304 - $dbw->selectDB( $wgDBname );
305 - } else {
306 - // 1.4 schema
307 - $title = Title::newFromText( $article );
308 - $text = $dbr->selectField( 'cur', 'cur_text', array( 'cur_namespace' => $title->getNamespace(),
309 - 'cur_title' => $title->getDBkey() ), 'SpamBlacklist::getArticleText' );
310 - }
311 - $dbr->selectDB( $wgDBname );
312 - return strval( $text );
313 - }
314 -
315 - /**
316 - * Confirm that a local blacklist page being saved is valid,
317 - * and toss back a warning to the user if it isn't.
318 - * This is an EditFilter hook.
319 - */
320 - function validate( $editPage, $text, $section, &$hookError ) {
321 - $thisPageName = $editPage->mTitle->getPrefixedDBkey();
322 -
323 - if( !$this->isLocalSource( $editPage->mTitle ) ) {
324 - wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] not a local blacklist\n" );
325 - return true;
326 - }
327 -
328 - $lines = explode( "\n", $text );
329 -
330 - $badLines = SpamRegexBatch::getBadLines( $lines );
331 - if( $badLines ) {
332 - wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] given invalid input lines: " .
333 - implode( ', ', $badLines ) . "\n" );
334 -
335 - $badList = "*<tt>" .
336 - implode( "</tt>\n*<tt>",
337 - array_map( 'wfEscapeWikiText', $badLines ) ) .
338 - "</tt>\n";
339 - $hookError =
340 - "<div class='errorbox'>" .
341 - wfMsgExt( 'spam-invalid-lines', array( 'parsemag' ), count( $badLines ) ) . "<br />" .
342 - $badList .
343 - "</div>\n" .
344 - "<br clear='all' />\n";
345 - return true;
346 - } else {
347 - wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] ok or empty blacklist\n" );
348 - return true;
349 - }
350 - }
351 -
352 - function onArticleSave( &$article, &$user, $text, $summary, $isminor, $iswatch, $section ) {
353 - if( $this->isLocalSource( $article->getTitle() ) ) {
354 - $this->clearCache();
355 - }
356 - return true;
357 - }
358 -}
359 -
360 -
361 -class SpamRegexBatch {
362 - /**
363 - * Build a set of regular expressions matching URLs with the list of regex fragments.
364 - * Returns an empty list if the input list is empty.
365 - *
366 - * @param array $lines list of fragments which will match in URLs
367 - * @param int $batchSize largest allowed batch regex;
368 - * if 0, will produce one regex per line
369 - * @return array
370 - * @private
371 - * @static
372 - */
373 - static function buildRegexes( $lines, $batchSize=4096 ) {
374 - # Make regex
375 - # It's faster using the S modifier even though it will usually only be run once
376 - //$regex = 'https?://+[a-z0-9_\-.]*(' . implode( '|', $lines ) . ')';
377 - //return '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $regex) ) . '/Sim';
378 - $regexes = array();
379 - $regexStart = '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
380 - $regexEnd = ($batchSize > 0 ) ? ')/Sim' : ')/im';
381 - $build = false;
382 - foreach( $lines as $line ) {
383 - if( substr( $line, -1, 1 ) == "\\" ) {
384 - // Final \ will break silently on the batched regexes.
385 - // Skip it here to avoid breaking the next line;
386 - // warnings from getBadLines() will still trigger on
387 - // edit to keep new ones from floating in.
388 - continue;
389 - }
390 - // FIXME: not very robust size check, but should work. :)
391 - if( $build === false ) {
392 - $build = $line;
393 - } elseif( strlen( $build ) + strlen( $line ) > $batchSize ) {
394 - $regexes[] = $regexStart .
395 - str_replace( '/', '\/', preg_replace('|\\\*/|u', '/', $build) ) .
396 - $regexEnd;
397 - $build = $line;
398 - } else {
399 - $build .= '|';
400 - $build .= $line;
401 - }
402 - }
403 - if( $build !== false ) {
404 - $regexes[] = $regexStart .
405 - str_replace( '/', '\/', preg_replace('|\\\*/|u', '/', $build) ) .
406 - $regexEnd;
407 - }
408 - return $regexes;
409 - }
410 -
411 - /**
412 - * Confirm that a set of regexes is either empty or valid.
413 - * @param array $lines set of regexes
414 - * @return bool true if ok, false if contains invalid lines
415 - * @private
416 - * @static
417 - */
418 - static function validateRegexes( $regexes ) {
419 - foreach( $regexes as $regex ) {
420 - wfSuppressWarnings();
421 - $ok = preg_match( $regex, '' );
422 - wfRestoreWarnings();
423 -
424 - if( $ok === false ) {
425 - return false;
426 - }
427 - }
428 - return true;
429 - }
430 -
431 - /**
432 - * Strip comments and whitespace, then remove blanks
433 - * @private
434 - * @static
435 - */
436 - static function stripLines( $lines ) {
437 - return array_filter(
438 - array_map( 'trim',
439 - preg_replace( '/#.*$/', '',
440 - $lines ) ) );
441 - }
442 -
443 - /**
444 - * Do a sanity check on the batch regex.
445 - * @param lines unsanitized input lines
446 - * @param string $fileName optional for debug reporting
447 - * @return array of regexes
448 - * @private
449 - * @static
450 - */
451 - static function buildSafeRegexes( $lines, $fileName=false ) {
452 - $lines = SpamRegexBatch::stripLines( $lines );
453 - $regexes = SpamRegexBatch::buildRegexes( $lines );
454 - if( SpamRegexBatch::validateRegexes( $regexes ) ) {
455 - return $regexes;
456 - } else {
457 - // _Something_ broke... rebuild line-by-line; it'll be
458 - // slower if there's a lot of blacklist lines, but one
459 - // broken line won't take out hundreds of its brothers.
460 - if( $fileName ) {
461 - wfDebugLog( 'SpamBlacklist', "Spam blacklist warning: bogus line in $fileName\n" );
462 - }
463 - return SpamRegexBatch::buildRegexes( $lines, 0 );
464 - }
465 - }
466 -
467 - /**
468 - * @param array $lines
469 - * @return array of input lines which produce invalid input, or empty array if no problems
470 - * @static
471 - */
472 - static function getBadLines( $lines ) {
473 - $lines = SpamRegexBatch::stripLines( $lines );
474 -
475 - $badLines = array();
476 - foreach( $lines as $line ) {
477 - if( substr( $line, -1, 1 ) == "\\" ) {
478 - // Final \ will break silently on the batched regexes.
479 - $badLines[] = $line;
480 - }
481 - }
482 -
483 - $regexes = SpamRegexBatch::buildRegexes( $lines );
484 - if( SpamRegexBatch::validateRegexes( $regexes ) ) {
485 - // No other problems!
486 - return $badLines;
487 - }
488 -
489 - // Something failed in the batch, so check them one by one.
490 - foreach( $lines as $line ) {
491 - $regexes = SpamRegexBatch::buildRegexes( array( $line ) );
492 - if( !SpamRegexBatch::validateRegexes( $regexes ) ) {
493 - $badLines[] = $line;
494 - }
495 - }
496 - return $badLines;
497 - }
498 -
499 - /**
500 - * Build a set of regular expressions from the given multiline input text,
501 - * with empty lines and comments stripped.
502 - *
503 - * @param string $source
504 - * @param string $fileName optional, for reporting of bad files
505 - * @return array of regular expressions, potentially empty
506 - * @static
507 - */
508 - static function regexesFromText( $source, $fileName=false ) {
509 - $lines = explode( "\n", $source );
510 - return SpamRegexBatch::buildSafeRegexes( $lines, $fileName );
511 - }
512 -
513 - /**
514 - * Build a set of regular expressions from a MediaWiki message.
515 - * Will be correctly empty if the message isn't present.
516 - * @param string $source
517 - * @return array of regular expressions, potentially empty
518 - * @static
519 - */
520 - static function regexesFromMessage( $message ) {
521 - $source = wfMsgForContent( $message );
522 - if( $source && !wfEmptyMsg( $message, $source ) ) {
523 - return SpamRegexBatch::regexesFromText( $source );
524 - } else {
525 - return array();
526 - }
527 - }
528 -}
529 -
 132+}
\ No newline at end of file

Follow-up revisions

RevisionCommit summaryAuthorDate
r109455Adding Email blacklisting to the SpamBlacklist extension...johnduhart23:29, 18 January 2012
r110624Fix fixme on r109111 per Tbleherreedy22:12, 2 February 2012
r111542Fix r109111: no point in aborting hook executionmaxsem14:58, 15 February 2012

Comments

#Comment by MaxSem (talk | contribs)   12:38, 20 January 2012
  • Although I admit I'm prone to abuse regexes too, $thisHttpRegex is not really needed, strpos() will suffice.
  • $cachedRegexes = $wgMemc->get( "$wgDBname:{$listType}_blacklist_regexes" ); - Use wfMemcKey(), it automatically prepends $wgDBname. Also please put variable part of the key name to the end to make reading key names easier.
  • getArticleText() gives me the creeps, but it was so before you.
  • BaseBlocklist probably should have been called BlocklistBase per precendents.
#Comment by Tbleher (talk | contribs)   19:36, 1 February 2012

The articleSave() function is missing a return code at the end, leading to the following error when trying to edit MediaWiki:spam-blacklist:

Detected bug in an extension! Hook SpamBlacklistHooks::articleSave failed to return a value; should return true to continue hook processing or false to abort.

Backtrace:

#0 /srv/www/mediawiki/code/includes/GlobalFunctions.php(3799): Hooks::run('ArticleSaveComp...', Array)
#1 /srv/www/mediawiki/code/includes/WikiPage.php(1448): wfRunHooks('ArticleSaveComp...', Array)
#2 [internal function]: WikiPage->doEdit(' # Externe URLs...', '', 98)
#3 /srv/www/mediawiki/code/includes/Article.php(1777): call_user_func_array(Array, Array)
#4 /srv/www/mediawiki/code/includes/EditPage.php(1434): Article->__call('doEdit', Array)
#5 /srv/www/mediawiki/code/includes/EditPage.php(1434): Article->doEdit(' # Externe URLs...', '', 98)
#6 /srv/www/mediawiki/code/includes/EditPage.php(959): EditPage->internalAttemptSave(Array, false)
#7 /srv/www/mediawiki/code/includes/EditPage.php(357): EditPage->attemptSave()
#8 /srv/www/mediawiki/code/includes/actions/EditAction.php(51): EditPage->edit()
#9 /srv/www/mediawiki/code/includes/actions/EditAction.php(71): EditAction->show()
#10 /srv/www/mediawiki/code/includes/Wiki.php(484): SubmitAction->show()
#11 /srv/www/mediawiki/code/includes/Wiki.php(278): MediaWiki->performAction(Object(Article))
#12 /srv/www/mediawiki/code/includes/Wiki.php(593): MediaWiki->performRequest()
#13 /srv/www/mediawiki/code/includes/Wiki.php(503): MediaWiki->main()
#14 /srv/www/mediawiki/code/index.php(58): MediaWiki->run()
#15 {main}

The following patch fixes the issue for me:

--- a/SpamBlacklistHooks.php
+++ b/SpamBlacklistHooks.php
@@ -164,5 +164,6 @@ class SpamBlacklistHooks {
                foreach ( BaseBlacklist::getBlacklistTypes() as $type => $class ) {
                        $wgMemc->delete( "$wgDBname:{$type}_blacklist_regexes" );
                }
+               return true;
        }
 }

Status & tagging log