r109455 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r109454‎ | r109455 | r109456 >
Date:23:29, 18 January 2012
Author:johnduhart
Status:ok (Comments)
Tags:miscextensions 
Comment:
Adding Email blacklisting to the SpamBlacklist extension

This relies on r109111
Modified paths:
  • /trunk/extensions/SpamBlacklist/BaseBlacklist.php (modified) (history)
  • /trunk/extensions/SpamBlacklist/EmailBlacklist.php (added) (history)
  • /trunk/extensions/SpamBlacklist/SpamBlacklist.i18n.php (modified) (history)
  • /trunk/extensions/SpamBlacklist/SpamBlacklist.php (modified) (history)
  • /trunk/extensions/SpamBlacklist/SpamBlacklistHooks.php (modified) (history)
  • /trunk/extensions/SpamBlacklist/SpamBlacklist_body.php (modified) (history)
  • /trunk/extensions/SpamBlacklist/SpamRegexBatch.php (modified) (history)

Diff [purge]

Index: trunk/extensions/SpamBlacklist/SpamBlacklistHooks.php
@@ -57,6 +57,43 @@
5858 }
5959
6060 /**
 61+ * Verify that the user can send emails
 62+ *
 63+ * @param $user User
 64+ * @param $hookErr array
 65+ * @return bool
 66+ */
 67+ public static function userCanSendEmail( &$user, &$hookErr ) {
 68+ /** @var $blacklist EmailBlacklist */
 69+ $blacklist = BaseBlacklist::getInstance( 'email' );
 70+ if ( $blacklist->checkUser( $user ) ) {
 71+ return true;
 72+ }
 73+
 74+ $hookErr = array( 'spam-blacklisted-email', 'spam-blacklisted-email-text', null );
 75+
 76+ return false;
 77+ }
 78+
 79+ /**
 80+ * Processes new accounts for valid emails
 81+ *
 82+ * @param $user User
 83+ * @param $abortError
 84+ * @return bool
 85+ */
 86+ public static function abortNewAccount( $user, &$abortError ) {
 87+ /** @var $blacklist EmailBlacklist */
 88+ $blacklist = BaseBlacklist::getInstance( 'email' );
 89+ if ( $blacklist->checkUser( $user ) ) {
 90+ return true;
 91+ }
 92+
 93+ $abortError = wfMessage( 'spam-blacklisted-email-signup' )->escaped();
 94+ return false;
 95+ }
 96+
 97+ /**
6198 * Hook function for EditFilter
6299 * Confirm that a local blacklist page being saved is valid,
63100 * and toss back a warning to the user if it isn't.
@@ -75,9 +112,14 @@
76113 return true;
77114 }
78115
 116+ $type = BaseBlacklist::getTypeFromTitle( $editPage->mTitle );
 117+ if ( $type === false ) {
 118+ return true;
 119+ }
 120+
79121 $lines = explode( "\n", $text );
80122
81 - $badLines = SpamRegexBatch::getBadLines( $lines );
 123+ $badLines = SpamRegexBatch::getBadLines( $lines, BaseBlacklist::getInstance( $type ) );
82124 if( $badLines ) {
83125 wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] given invalid input lines: " .
84126 implode( ', ', $badLines ) . "\n" );
@@ -92,11 +134,11 @@
93135 $badList .
94136 "</div>\n" .
95137 "<br clear='all' />\n";
96 - return true;
97138 } else {
98139 wfDebugLog( 'SpamBlacklist', "Spam blacklist validator: [[$thisPageName]] ok or empty blacklist\n" );
99 - return true;
100140 }
 141+
 142+ return true;
101143 }
102144
103145 /**
Index: trunk/extensions/SpamBlacklist/SpamRegexBatch.php
@@ -13,14 +13,14 @@
1414 * if 0, will produce one regex per line
1515 * @return array
1616 */
17 - static function buildRegexes( $lines, $batchSize=4096 ) {
 17+ static function buildRegexes( $lines, BaseBlacklist $blacklist, $batchSize=4096 ) {
1818 # Make regex
1919 # It's faster using the S modifier even though it will usually only be run once
2020 //$regex = 'https?://+[a-z0-9_\-.]*(' . implode( '|', $lines ) . ')';
2121 //return '/' . str_replace( '/', '\/', preg_replace('|\\\*/|', '/', $regex) ) . '/Sim';
2222 $regexes = array();
23 - $regexStart = '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
24 - $regexEnd = ($batchSize > 0 ) ? ')/Sim' : ')/im';
 23+ $regexStart = $blacklist->getRegexStart();
 24+ $regexEnd = $blacklist->getRegexEnd( $batchSize );
2525 $build = false;
2626 foreach( $lines as $line ) {
2727 if( substr( $line, -1, 1 ) == "\\" ) {
@@ -90,9 +90,9 @@
9191 * @param $fileName string optional for debug reporting
9292 * @return array of regexes
9393 */
94 - static function buildSafeRegexes( $lines, $fileName=false ) {
 94+ static function buildSafeRegexes( $lines, BaseBlacklist $blacklist, $fileName=false ) {
9595 $lines = SpamRegexBatch::stripLines( $lines );
96 - $regexes = SpamRegexBatch::buildRegexes( $lines );
 96+ $regexes = SpamRegexBatch::buildRegexes( $lines, $blacklist );
9797 if( SpamRegexBatch::validateRegexes( $regexes ) ) {
9898 return $regexes;
9999 } else {
@@ -102,7 +102,7 @@
103103 if( $fileName ) {
104104 wfDebugLog( 'SpamBlacklist', "Spam blacklist warning: bogus line in $fileName\n" );
105105 }
106 - return SpamRegexBatch::buildRegexes( $lines, 0 );
 106+ return SpamRegexBatch::buildRegexes( $lines, $blacklist, 0 );
107107 }
108108 }
109109
@@ -112,7 +112,7 @@
113113 * @param array $lines
114114 * @return array of input lines which produce invalid input, or empty array if no problems
115115 */
116 - static function getBadLines( $lines ) {
 116+ static function getBadLines( $lines, BaseBlacklist $blacklist ) {
117117 $lines = SpamRegexBatch::stripLines( $lines );
118118
119119 $badLines = array();
@@ -123,7 +123,7 @@
124124 }
125125 }
126126
127 - $regexes = SpamRegexBatch::buildRegexes( $lines );
 127+ $regexes = SpamRegexBatch::buildRegexes( $lines, $blacklist );
128128 if( SpamRegexBatch::validateRegexes( $regexes ) ) {
129129 // No other problems!
130130 return $badLines;
@@ -131,7 +131,7 @@
132132
133133 // Something failed in the batch, so check them one by one.
134134 foreach( $lines as $line ) {
135 - $regexes = SpamRegexBatch::buildRegexes( array( $line ) );
 135+ $regexes = SpamRegexBatch::buildRegexes( array( $line ), $blacklist );
136136 if( !SpamRegexBatch::validateRegexes( $regexes ) ) {
137137 $badLines[] = $line;
138138 }
@@ -147,9 +147,9 @@
148148 * @param $fileName bool|string optional, for reporting of bad files
149149 * @return array of regular expressions, potentially empty
150150 */
151 - static function regexesFromText( $source, $fileName=false ) {
 151+ static function regexesFromText( $source, BaseBlacklist $blacklist, $fileName=false ) {
152152 $lines = explode( "\n", $source );
153 - return SpamRegexBatch::buildSafeRegexes( $lines, $fileName );
 153+ return SpamRegexBatch::buildSafeRegexes( $lines, $blacklist, $fileName );
154154 }
155155
156156 /**
@@ -159,10 +159,10 @@
160160 * @param $message string
161161 * @return array of regular expressions, potentially empty
162162 */
163 - static function regexesFromMessage( $message ) {
 163+ static function regexesFromMessage( $message, BaseBlacklist $blacklist ) {
164164 $source = wfMsgForContent( $message );
165165 if( $source && !wfEmptyMsg( $message, $source ) ) {
166 - return SpamRegexBatch::regexesFromText( $source );
 166+ return SpamRegexBatch::regexesFromText( $source, $blacklist );
167167 } else {
168168 return array();
169169 }
Index: trunk/extensions/SpamBlacklist/EmailBlacklist.php
@@ -0,0 +1,59 @@
 2+<?php
 3+
 4+/**
 5+ * Email Blacklisting
 6+ */
 7+class EmailBlacklist extends BaseBlacklist {
 8+
 9+ /**
 10+ * Returns the code for the blacklist implementation
 11+ *
 12+ * @return string
 13+ */
 14+ protected function getBlacklistType() {
 15+ return 'email';
 16+ }
 17+
 18+ /**
 19+ * Checks a User object for a blacklisted email
 20+ *
 21+ * @param User $user
 22+ * @return bool True on valid email
 23+ */
 24+ public function checkUser( User $user ) {
 25+ $blacklists = $this->getBlacklists();
 26+ $whitelists = $this->getWhitelists();
 27+
 28+ // The email to check
 29+ $email = $user->getEmail();
 30+
 31+ if ( !count( $blacklists ) ) {
 32+ // Nothing to check
 33+ return true;
 34+ }
 35+
 36+ // Check for whitelisted emails
 37+ if ( is_array( $whitelists ) ) {
 38+ wfDebugLog( 'SpamBlacklist', "Excluding whitelisted emails from " . count( $whitelists ) .
 39+ " regexes: " . implode( ', ', $whitelists ) . "\n" );
 40+ foreach ( $whitelists as $regex ) {
 41+ if ( preg_match( $regex, $email ) ) {
 42+ // Whitelisted email
 43+ return true;
 44+ }
 45+ }
 46+ }
 47+
 48+
 49+ # Do the match
 50+ wfDebugLog( 'SpamBlacklist', "Checking email against " . count( $blacklists ) .
 51+ " regexes: " . implode( ', ', $blacklists ) . "\n" );
 52+ foreach ( $blacklists as $regex ) {
 53+ if ( preg_match( $regex, $email ) ) {
 54+ return false;
 55+ }
 56+ }
 57+
 58+ return true;
 59+ }
 60+}
Property changes on: trunk/extensions/SpamBlacklist/EmailBlacklist.php
___________________________________________________________________
Added: svn:eol-style
161 + native
Index: trunk/extensions/SpamBlacklist/BaseBlacklist.php
@@ -43,6 +43,7 @@
4444 */
4545 private static $blacklistTypes = array(
4646 'spam' => 'SpamBlacklist',
 47+ 'email' => 'EmailBlacklist',
4748 );
4849
4950 /**
@@ -121,7 +122,7 @@
122123 * @param Title $title
123124 * @return bool
124125 */
125 - public static function isLocalSource( $title ) {
 126+ public static function isLocalSource( Title $title ) {
126127 global $wgDBname, $wgBlacklistSettings;
127128
128129 if( $title->getNamespace() == NS_MEDIAWIKI ) {
@@ -168,6 +169,23 @@
169170 }
170171
171172 /**
 173+ * Returns the type of blacklist from the given title
 174+ *
 175+ * @param Title $title
 176+ * @return bool|string
 177+ */
 178+ public static function getTypeFromTitle( Title $title ) {
 179+ $types = array_map( 'preg_quote', array_keys( self::$blacklistTypes ), array( '/' ) );
 180+ $regex = '/(' . implode( '|', $types ). ')-(?:Blacklist|Whitelist)/';
 181+
 182+ if ( preg_match( $regex, $title->getDBkey(), $m ) ) {
 183+ return strtolower( $m[1] );
 184+ }
 185+
 186+ return false;
 187+ }
 188+
 189+ /**
172190 * Fetch local and (possibly cached) remote blacklists.
173191 * Will be cached locally across multiple invocations.
174192 * @return array set of regular expressions, potentially empty.
@@ -187,7 +205,7 @@
188206 * @return array Regular expressions
189207 */
190208 public function getLocalBlacklists() {
191 - return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-blacklist" );
 209+ return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-blacklist", $this );
192210 }
193211
194212 /**
@@ -196,7 +214,7 @@
197215 * @return array Regular expressions
198216 */
199217 public function getWhitelists() {
200 - return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-whitelist" );
 218+ return SpamRegexBatch::regexesFromMessage( "{$this->getBlacklistType()}-whitelist", $this );
201219 }
202220
203221 /**
@@ -263,7 +281,7 @@
264282 // there's a bad line in one of them we'll gain more
265283 // from only having to break that set into smaller pieces.
266284 $regexes = array_merge( $regexes,
267 - SpamRegexBatch::regexesFromText( $text, $fileName ) );
 285+ SpamRegexBatch::regexesFromText( $text, $this, $fileName ) );
268286 }
269287
270288 return $regexes;
@@ -333,4 +351,23 @@
334352 return strval( $text );
335353 }
336354
 355+ /**
 356+ * Returns the start of the regex for matches
 357+ *
 358+ * @return string
 359+ */
 360+ public function getRegexStart() {
 361+ return '/[a-z0-9_\-.]*';
 362+ }
 363+
 364+ /**
 365+ * Returns the end of the regex for matches
 366+ *
 367+ * @param $batchSize
 368+ * @return string
 369+ */
 370+ public function getRegexEnd( $batchSize ) {
 371+ return ($batchSize > 0 ) ? '/Sim' : '/im';
 372+ }
 373+
337374 }
Index: trunk/extensions/SpamBlacklist/SpamBlacklist_body.php
@@ -98,7 +98,7 @@
9999 $ip = wfGetIP();
100100 wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: {$matches[0]}\n" );
101101 $retVal = $matches[0];
102 - break;
 102+ break;
103103 }
104104 }
105105 } else {
@@ -128,4 +128,23 @@
129129 }
130130 return $links;
131131 }
 132+
 133+ /**
 134+ * Returns the start of the regex for matches
 135+ *
 136+ * @return string
 137+ */
 138+ public function getRegexStart() {
 139+ return '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
 140+ }
 141+
 142+ /**
 143+ * Returns the end of the regex for matches
 144+ *
 145+ * @param $batchSize
 146+ * @return string
 147+ */
 148+ public function getRegexEnd( $batchSize ) {
 149+ return ')' . parent::getRegexEnd( $batchSize );
 150+ }
132151 }
\ No newline at end of file
Index: trunk/extensions/SpamBlacklist/SpamBlacklist.i18n.php
@@ -23,11 +23,30 @@
2424 # External URLs matching this list will *not* be blocked even if they would
2525 # have been blocked by blacklist entries.
2626 #
 27+ #</pre> <!-- leave this line exactly as it is -->',
 28+ 'email-blacklist' => ' # Emails matching this list will be blocked from registering or sending email
 29+ # This list affects only this wiki; refer also to the global blacklist.
 30+ # For documentation see http://www.mediawiki.org/wiki/Extension:SpamBlacklist
 31+ #<!-- leave this line exactly as it is --> <pre>
 32+#
2733 # Syntax is as follows:
2834 # * Everything from a "#" character to the end of the line is a comment
2935 # * Every non-blank line is a regex fragment which will only match hosts inside URLs
3036
3137 #</pre> <!-- leave this line exactly as it is -->',
 38+ 'email-whitelist' => ' #<!-- leave this line exactly as it is --> <pre>
 39+# Emails matching this list will *not* be blocked even if they would
 40+# have been blocked by blacklist entries.
 41+#
 42+ #</pre> <!-- leave this line exactly as it is -->',
 43+# Syntax is as follows:
 44+# * Everything from a "#" character to the end of the line is a comment
 45+# * Every non-blank line is a regex fragment which will only match hosts inside URLs
 46+
 47+ 'spam-blacklisted-email' => 'Blacklisted E-mail',
 48+ 'spam-blacklisted-email-text' => 'Your e-mail address is currently blacklisted from sending e-mails to other users.',
 49+ 'spam-blacklisted-email-signup' => 'The e-mail address given is currently blacklisted from use.',
 50+
3251 'spam-invalid-lines' => "The following spam blacklist {{PLURAL:$1|line is an|lines are}} invalid regular {{PLURAL:$1|expression|expressions}} and {{PLURAL:$1|needs|need}} to be corrected before saving the page:",
3352 'spam-blacklist-desc' => 'Regex-based anti-spam tool: [[MediaWiki:Spam-blacklist]] and [[MediaWiki:Spam-whitelist]]',
3453 );
@@ -40,6 +59,11 @@
4160 $messages['qqq'] = array(
4261 'spam-blacklist' => "See also: [[MediaWiki:spam-whitelist]] and [[MediaWiki:captcha-addurl-whitelist]]. You can translate the text, including 'Leave this line exactly as it is'. Some lines of this messages have one (1) leading space.",
4362 'spam-whitelist' => "See also: [[MediaWiki:spam-blacklist]] and [[MediaWiki:captcha-addurl-whitelist]]. You can translate the text, including 'Leave this line exactly as it is'. Some lines of this messages have one (1) leading space.",
 63+
 64+ 'spam-blacklisted-email' => 'Title of errorpage when trying to send an email with a blacklisted email',
 65+ 'spam-blacklisted-email-text' => 'Text of errorpage when trying to send an email with a blacklisted email',
 66+ 'spam-blacklisted-email-signup' => 'Error when trying to create an account with an invalid email',
 67+
4468 'spam-blacklist-desc' => '{{desc}}',
4569 );
4670
Index: trunk/extensions/SpamBlacklist/SpamBlacklist.php
@@ -10,7 +10,7 @@
1111 $wgExtensionCredits[version_compare($wgVersion, '1.17alpha', '>=') ? 'antispam' : 'other'][] = array(
1212 'path' => __FILE__,
1313 'name' => 'SpamBlacklist',
14 - 'author' => 'Tim Starling',
 14+ 'author' => array( 'Tim Starling', 'John Du Hart' ),
1515 'url' => 'https://www.mediawiki.org/wiki/Extension:SpamBlacklist',
1616 'descriptionmsg' => 'spam-blacklist-desc',
1717 );
@@ -41,8 +41,11 @@
4242 $wgHooks['APIEditBeforeSave'][] = 'SpamBlacklistHooks::filterAPIEditBeforeSave';
4343 $wgHooks['EditFilter'][] = 'SpamBlacklistHooks::validate';
4444 $wgHooks['ArticleSaveComplete'][] = 'SpamBlacklistHooks::articleSave';
 45+$wgHooks['UserCanSendEmail'][] = 'SpamBlacklistHooks::userCanSendEmail';
 46+$wgHooks['AbortNewAccount'][] = 'SpamBlacklistHooks::abortNewAccount';
4547
4648 $wgAutoloadClasses['BaseBlacklist'] = $dir . 'BaseBlacklist.php';
 49+$wgAutoloadClasses['EmailBlacklist'] = $dir . 'EmailBlacklist.php';
4750 $wgAutoloadClasses['SpamBlacklistHooks'] = $dir . 'SpamBlacklistHooks.php';
4851 $wgAutoloadClasses['SpamBlacklist'] = $dir . 'SpamBlacklist_body.php';
4952 $wgAutoloadClasses['SpamRegexBatch'] = $dir . 'SpamRegexBatch.php';

Sign-offs

UserFlagDate
Nikerabbitinspected09:27, 19 January 2012

Follow-up revisions

RevisionCommit summaryAuthorDate
r109611r109455: Fix some errors...raymond11:10, 20 January 2012
r109681Follow-up r109455: make it clear that it's about e-mail *addresses*, also fix...robin15:05, 21 January 2012

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r109111Refactored SpamBlacklist to be extendable for other blacklist types...johnduhart06:13, 17 January 2012

Comments

#Comment by Johnduhart (talk | contribs)   23:31, 18 January 2012

Oh, this is for Bug 33761

#Comment by Aaron Schulz (talk | contribs)   00:25, 28 January 2012
public function checkUser( User $user )

The docs are confusing. It seems like it's checking if they ARE blacklisted, when it's return value is the opposite. The function should be renamed and the docs clearer.

#Comment by He7d3r (talk | contribs)   02:42, 20 October 2012

This caused bugzilla:41235.

Status & tagging log