r39660 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r39659‎ | r39660 | r39661 >
Date:20:32, 19 August 2008
Author:brion
Status:old
Tags:
Comment:
* $wgSpamRegex now matches the edit summary and page move descriptions in
addition to body text.

Could use some cleanup to the error display on page moves; it recycles the general edit notice for spam hits currently, which is pretty big on English Wikipedia, and doesn't say what match you hit.
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/DefaultSettings.php (modified) (history)
  • /trunk/phase3/includes/EditPage.php (modified) (history)
  • /trunk/phase3/includes/Title.php (modified) (history)

Diff [purge]

Index: trunk/phase3/RELEASE-NOTES
@@ -37,6 +37,7 @@
3838 * $wgRestrictDisplayTitle controls if the use of the {{DISPLAYTITLE}} magic
3939 word is restricted to titles equivalent to the actual page title. This
4040 is true per default, but can be set to false to allow any title.
 41+* $wgSpamRegex may now be an array of multiple regular expressions.
4142
4243 === New features in 1.14 ===
4344
@@ -134,7 +135,10 @@
135136 * Avoid recursive crazy expansions in section edit comments for pages which
136137 contain '/*' in the title
137138 * Fix excessive memory usage when parsing pages with lots of links
 139+* $wgSpamRegex now matches the edit summary and page move descriptions in
 140+ addition to body text.
138141
 142+
139143 === API changes in 1.14 ===
140144
141145 * Registration time of users registered before the DB field was created is now
Index: trunk/phase3/includes/EditPage.php
@@ -733,7 +733,7 @@
734734 * @return one of the constants describing the result
735735 */
736736 function internalAttemptSave( &$result, $bot = false ) {
737 - global $wgSpamRegex, $wgFilterCallback, $wgUser, $wgOut, $wgParser;
 737+ global $wgFilterCallback, $wgUser, $wgOut, $wgParser;
738738 global $wgMaxArticleSize;
739739
740740 $fname = 'EditPage::attemptSave';
@@ -762,12 +762,15 @@
763763 $this->mMetaData = '' ;
764764
765765 # Check for spam
766 - $matches = array();
767 - if ( $wgSpamRegex && preg_match( $wgSpamRegex, $this->textbox1, $matches ) ) {
768 - $result['spam'] = $matches[0];
 766+ $match = self::matchSpamRegex( $this->summary );
 767+ if( $match === false ) {
 768+ $match = self::matchSpamRegex( $this->textbox1 );
 769+ }
 770+ if( $match !== false ) {
 771+ $result['spam'] = $match;
769772 $ip = wfGetIP();
770773 $pdbk = $this->mTitle->getPrefixedDBkey();
771 - $match = str_replace( "\n", '', $matches[0] );
 774+ $match = str_replace( "\n", '', $match );
772775 wfDebugLog( 'SpamRegex', "$ip spam regex hit [[$pdbk]]: \"$match\"" );
773776 wfProfileOut( "$fname-checks" );
774777 wfProfileOut( $fname );
@@ -1022,6 +1025,25 @@
10231026 wfProfileOut( $fname );
10241027 return self::AS_END;
10251028 }
 1029+
 1030+ /**
 1031+ * Check given input text against $wgSpamRegex, and return the text of the first match.
 1032+ * @return mixed -- matching string or false
 1033+ */
 1034+ public static function matchSpamRegex( $text ) {
 1035+ global $wgSpamRegex;
 1036+ if( $wgSpamRegex ) {
 1037+ // For back compatibility, $wgSpamRegex may be a single string or an array of regexes.
 1038+ $regexes = (array)$wgSpamRegex;
 1039+ foreach( $regexes as $regex ) {
 1040+ $matches = array();
 1041+ if ( preg_match( $regex, $text, $matches ) ) {
 1042+ return $matches[0];
 1043+ }
 1044+ }
 1045+ }
 1046+ return false;
 1047+ }
10261048
10271049 /**
10281050 * Initialise form fields in the object
Index: trunk/phase3/includes/DefaultSettings.php
@@ -2095,9 +2095,17 @@
20962096 $wgExportAllowListContributors = false ;
20972097
20982098
2099 -/** Text matching this regular expression will be recognised as spam
2100 - * See http://en.wikipedia.org/wiki/Regular_expression */
2101 -$wgSpamRegex = false;
 2099+/**
 2100+ * Edits matching these regular expressions in body text or edit summary
 2101+ * will be recognised as spam and rejected automatically.
 2102+ *
 2103+ * There's no administrator override on-wiki, so be careful what you set. :)
 2104+ * May be an array of regexes or a single string for backwards compatibility.
 2105+ *
 2106+ * See http://en.wikipedia.org/wiki/Regular_expression
 2107+ */
 2108+$wgSpamRegex = array();
 2109+
21022110 /** Similarly you can get a function to do the job. The function will be given
21032111 * the following args:
21042112 * - a Title object for the article the edit is made on
Index: trunk/phase3/includes/Title.php
@@ -2500,6 +2500,12 @@
25012501 $nt->getUserPermissionsErrors('edit', $wgUser));
25022502 }
25032503
 2504+ $match = EditPage::matchSpamRegex( $reason );
 2505+ if( $match !== false ) {
 2506+ // This is kind of lame, won't display nice
 2507+ $errors[] = array('spamprotectiontext');
 2508+ }
 2509+
25042510 global $wgUser;
25052511 $err = null;
25062512 if( !wfRunHooks( 'AbortMove', array( $this, $nt, $wgUser, &$err, $reason ) ) ) {

Status & tagging log