Index: trunk/extensions/SpamDiffTool/README |
— | — | @@ -0,0 +1,96 @@ |
| 2 | +SpamDiffTool Extension |
| 3 | + |
| 4 | +Travis Derouin |
| 5 | +v 0.1 |
| 6 | +2006-11-08 |
| 7 | + |
| 8 | +mediawiki=1.6+ |
| 9 | +Allows users to add entries to the Spam blacklist right off of a diff page |
| 10 | + |
| 11 | +This extension allows users to add URLs to the Spam Blacklist (see the Spamblacklist Extension http://meta.wikimedia.org/wiki/SpamBlacklist_extension) easily without learning how to format entries for the blacklist and copy and paste, etc. By inserting a link on each diff page, if an editor sees that a given page has been the victim of spam, they can click on "add to spam", which extracts all of the URLs the spammer has placed on the page, going back over their most recent edits on the article (similar to Rollback). Then, for each URL detected, the tool prompts the user the degree to which they want to blacklist the URL. The options are block: |
| 12 | + |
| 13 | +# all links from the domain |
| 14 | +# all links from the specific subdomain |
| 15 | +# all links from the subdirectory and subdomain |
| 16 | +# nothing - don't add this to the blacklist |
| 17 | + |
| 18 | +The idea is to try to streamline the process of cleaning up spam, rolling back spam edits and blocking users. When the spam addition process is finished, the user is taken back to the diff page they were on to begin with. From there, they can rollback the edits as required. |
| 19 | + |
| 20 | +As of November 9, 2006 this tool has been running for a few weeks on wikiHow, and made the lives of some admins much easier. Future versions might include the ability to block the user while adding the links to the spam blacklist, and rolling back the edits all at once. |
| 21 | + |
| 22 | +== License == |
| 23 | + |
| 24 | +Copyright 2006 Travis Derouin |
| 25 | + |
| 26 | +This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by |
| 27 | +the Free Software Foundation; either version 2 of the License, or (at your option) any later version. |
| 28 | + |
| 29 | +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 30 | + |
| 31 | +You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 32 | + |
| 33 | +== Author == |
| 34 | + |
| 35 | +Travis Derouin ( travis @ wikihow.com) |
| 36 | + |
| 37 | +== Pre-requisites == |
| 38 | + |
| 39 | +This software was tested with MediaWiki 1.6.7 and 1.8.2. |
| 40 | + |
| 41 | + |
| 42 | +== Installation == |
| 43 | + |
| 44 | +To install, copy the file SpamDiffTool.php in the archive you downloaded to your extensions directory. |
| 45 | + |
| 46 | +In your MediaWiki LocalSettings.php, add the following line some place towards the bottom of the file: |
| 47 | + |
| 48 | + require_once("$IP/extensions/SpamDiffTool.php"); |
| 49 | + |
| 50 | +Then, unfortunately, you have to hack 2 files to get the "add to spam" link placed on the appropriate diff pages. |
| 51 | + |
| 52 | +In DifferenceEngine.php, in showDiffPage, add after this section: |
| 53 | + |
| 54 | +<pre> |
| 55 | + |
| 56 | + if ( $this->mNewRev->isCurrent() && $wgUser->isAllowed('rollback') ) { |
| 57 | + $username = $this->mNewRev->getUserText(); |
| 58 | + $rollback = ' <strong>[' . $sk->makeKnownLinkObj( $this->mTitle, wfMsg( 'rollbacklink' ), |
| 59 | + 'action=rollback&from=' . urlencode( $username ) . |
| 60 | + '&token=' . urlencode( $wgUser->editToken( array( $this->mTitle->getPrefixedText(), $username ) ) ) ) . |
| 61 | + ']</strong>'; |
| 62 | + } else { |
| 63 | + $rollback = ''; |
| 64 | + } |
| 65 | +</pre> |
| 66 | + |
| 67 | +The line: |
| 68 | +<pre> |
| 69 | + $rollback .= '<br/> <strong>' . wfSpamDiffLink($this->mTitle) . '</strong>'; |
| 70 | +</pre> |
| 71 | + |
| 72 | +Then, there's another change so that new pages also get this feature, edit Article.php at line 930: |
| 73 | + |
| 74 | +<pre> |
| 75 | + $wgOut->addHTML( |
| 76 | + "<div class='patrollink'>" . |
| 77 | + wfMsg ( 'markaspatrolledlink', |
| 78 | + $sk->makeKnownLinkObj( $this->mTitle, wfMsg('markaspatrolledtext'), "action=markpatrolled&rcid=$rcid" ) |
| 79 | + ) . |
| 80 | ++ . wfSpamDiffLink ($this->mTitle) . |
| 81 | + '</div>' |
| 82 | +</pre> |
| 83 | + |
| 84 | + |
| 85 | +wfSpamDiffLink will not be displayed for the users who aren't able to edit the Spam Blacklist. |
| 86 | + |
| 87 | + |
| 88 | +== Configuration == |
| 89 | + |
| 90 | +To specify which article you use as a Spam Blacklist, change the $wgSpamBlacklistArticle variable in SpamDiffTool.php. |
| 91 | + |
| 92 | +== Bugs and enhancements == |
| 93 | + |
| 94 | +Bugs or feature requests can be sent to the author at travis @ wikihow.com. There is a good chance the regular expressions are not 100% perfect. |
| 95 | + |
| 96 | +[[Category:Extensions]] |
| 97 | + |
Index: trunk/extensions/SpamDiffTool/SpamDiffTool.php |
— | — | @@ -0,0 +1,261 @@ |
| 2 | +<?php |
| 3 | +if ( ! defined( 'MEDIAWIKI' ) ) |
| 4 | + die(); |
| 5 | + |
| 6 | +/**#@+ |
| 7 | + * An extension that allows users to rate articles. |
| 8 | + * |
| 9 | + * @package MediaWiki |
| 10 | + * @subpackage Extensions |
| 11 | + * |
| 12 | + * @link http://www.mediawiki.org/wiki/SpamDiffTool_Extension Documentation |
| 13 | + * |
| 14 | + * |
| 15 | + * @author Travis Derouin <travis@wikihow.com> |
| 16 | + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later |
| 17 | + */ |
| 18 | + |
| 19 | +$wgExtensionFunctions[] = 'wfSpamDiffTool'; |
| 20 | +$wgSpamBlacklistArticle = "Project:Spam-Blacklist"; |
| 21 | + |
| 22 | +require_once("SpecialPage.php"); |
| 23 | + |
| 24 | + |
| 25 | + |
| 26 | + |
| 27 | +$wgExtensionCredits['other'][] = array( |
| 28 | + 'name' => 'SpamDiffTool', |
| 29 | + 'author' => 'Travis Derouin', |
| 30 | + 'description' => 'Provides a basic way of adding new entries to the Spam Blacklist from diff pages', |
| 31 | + 'url' => 'http://www.mediawiki.org/wiki/SpamDiffTool_Extension', |
| 32 | +); |
| 33 | + |
| 34 | +function wfSpamDiffTool() { |
| 35 | + global $wgMessageCache; |
| 36 | + $wgMessageCache->addMessages( |
| 37 | + array( |
| 38 | + 'spamdifftool' => 'Manage Spam Blacklist', |
| 39 | + 'spamdifftool_cantedit' => 'Sorry - you don\'t have permission to edit the Spam Blacklist.', |
| 40 | + 'spamdifftool_notext' => 'There is no text to add to the Spam Blacklist. Click <a href=\'$1\'>here</a> to continue. ', |
| 41 | + 'spamdifftool_confirm' => 'Confirm that you want to add these entries to the Spam Blacklist. (Click <a href=\'http://www.mediawiki.org/w/index.php?title=Talk:SpamDiffTool_Extension&action=edit§ion=new\' target=\'new\'>here</a> to report a problem.)', |
| 42 | + 'spamdifftool_summary' => 'Adding to Spam Blacklist', |
| 43 | + 'spamdifftool_urls_detected' => 'The following URLs were detected in the edit(s), which ones would you like to add to the Spam Blacklist? These options order from more restrictive to less restrictive, blocking the entire domain will block all links to anything coming from that domain. <br/><br/>Be sure not to block entire domains that host user accounts, like blogpost.com, geocities.com, etc. ', |
| 44 | + 'spamdifftool_no_urls_detected' => 'No urls were detected. Click <a href=\'$1\'>here</a> to return.', |
| 45 | + 'spamdifftool_spam_link_text' => 'add to spam', |
| 46 | + 'spamdifftool_option_domain' => 'all from this domain', |
| 47 | + 'spamdifftool_option_subdomain' => 'all from this subdomain', |
| 48 | + 'spamdifftool_option_directory' => 'this subdomain and directory', |
| 49 | + 'spamdifftool_option_none' => 'nothing', |
| 50 | + 'spamdifftool_block' => 'Block:', |
| 51 | + 'spamdifftool_submit_buttom' => 'Submit', |
| 52 | + ) |
| 53 | + ); |
| 54 | + SpecialPage::AddPage(new UnlistedSpecialPage('SpamDiffTool')); |
| 55 | +} |
| 56 | + |
| 57 | + |
| 58 | + |
| 59 | +function wfSpamDiffLink($title) { |
| 60 | + global $wgUser, $wgRequest, $wgSpamBlacklistArticle; |
| 61 | + $sk = $wgUser->getSkin(); |
| 62 | + $sb = Title::newFromDBKey($wgSpamBlacklistArticle); |
| 63 | + if (!$sb->userCanEdit()) { |
| 64 | + return ''; |
| 65 | + } |
| 66 | + $link = '[' . $sk->makeKnownLinkObj( Title::newFromText("SpamDiffTool", NS_SPECIAL), wfMsg('spamdifftool_spam_link_text'), |
| 67 | + 'target=' . $title->getPrefixedURL(). |
| 68 | + '&oldid2=' . $wgRequest->getVal('oldid') . |
| 69 | + '&rcid='. $wgRequest->getVal('rcid') . |
| 70 | + '&diff2='. $wgRequest->getVal('diff') . |
| 71 | + '&returnto=' . urlencode($_SERVER['QUERY_STRING']) |
| 72 | + ) . |
| 73 | + ']'; |
| 74 | + |
| 75 | + return $link; |
| 76 | +} |
| 77 | + |
| 78 | +function wfSpecialSpamDiffTool() { |
| 79 | + global $wgRequest, $wgContLang, $wgOut, $wgSpamBlacklistArticle, $wgUser, $wgScript; |
| 80 | + $title = Title::newFromDBKey($wgRequest->getVal('target')); |
| 81 | + $diff = $wgRequest->getVal( 'diff2' ); |
| 82 | + $rcid = $wgRequest->getVal( 'rcid' ); |
| 83 | + $rdfrom = $wgRequest->getVal( 'rdfrom' ); |
| 84 | + |
| 85 | + |
| 86 | + // can the user even edit this? |
| 87 | + $sb = Title::newFromDBKey($wgSpamBlacklistArticle); |
| 88 | + if (!$sb->userCanEdit()) { |
| 89 | + $wgOut->addHTML(wfMsg('spamdifftool_cantedit')); |
| 90 | + return; |
| 91 | + } |
| 92 | + // do the processing |
| 93 | + if ($wgRequest->wasPosted() ) { |
| 94 | + |
| 95 | + if ($wgRequest->getVal('confirm', null) != null) { |
| 96 | + $t = Title::newFromDBKey($wgSpamBlacklistArticle); |
| 97 | + $a = new Article(&$t); |
| 98 | + $text = ""; |
| 99 | + $insert = true; |
| 100 | + // make sure this page exists |
| 101 | + if ($t->getArticleID() > 0) { |
| 102 | + $text = $a->getContent(); |
| 103 | + $insert = false; |
| 104 | + } |
| 105 | + |
| 106 | + // insert the before the <pre> at the bottom if there is one |
| 107 | + $i = strrpos($text, "</pre>"); |
| 108 | + if ($i !== false) { |
| 109 | + $text = substr($text, 0, $i) |
| 110 | + . $wgRequest->getVal('newurls') |
| 111 | + . "\n" . substr($text, $i); |
| 112 | + } else { |
| 113 | + $text .= "\n" . $wgRequest->getVal('newurls'); |
| 114 | + } |
| 115 | + $watch = false; |
| 116 | + if ($wgUser->getID() > 0) |
| 117 | + $watch = $wgUser->isWatched($t); |
| 118 | + if ($insert) { |
| 119 | + $a->insertNewArticle($text, wfMsg('spamdifftool_summary'), false, $watch); |
| 120 | + } else { |
| 121 | +//print_r($a); exit; |
| 122 | + $a->updateArticle($text, wfMsg('spamdifftool_summary'), false, $watch) ; |
| 123 | + } |
| 124 | + $returnto = $wgRequest->getVal('returnto', null); |
| 125 | + if ($returnto != null && $returnto != '') |
| 126 | + $wgOut->redirect($wgScript . "?" . urldecode($returnto) ); // clear the redirect set by updateArticle |
| 127 | + return; |
| 128 | + } |
| 129 | + $vals = $wgRequest->getValues(); |
| 130 | + $text = ''; |
| 131 | + foreach ($vals as $key=>$value) { |
| 132 | + if (strpos($key, "http://") === 0) { |
| 133 | + $url = str_replace("%2E", ".", $key); |
| 134 | + if ($value == 'none') continue; |
| 135 | + switch ($value) { |
| 136 | + case 'domain': |
| 137 | + $url = str_replace("http://", "", $url); |
| 138 | + $url = preg_replace("/(.*[^\/])*\/.*/", "$1", $url); // trim everything after the slash |
| 139 | + $k = split('\.', $url); |
| 140 | + $url = $k[sizeof($k) - 2] . "." . $k[sizeof($k) - 1]; |
| 141 | + $url = str_replace(".", "\.", $url); // escape the periods |
| 142 | + break; |
| 143 | + case 'subdomain': |
| 144 | + $url = str_replace("http://", "", $url); |
| 145 | + $url = str_replace(".", "\.", $url); // escape the periods |
| 146 | + $url = preg_replace("/^([^\/]*)\/.*/", "$1", $url); // trim everything after the slash |
| 147 | + break; |
| 148 | + case 'dir': |
| 149 | + $url = str_replace("http://", "", $url); |
| 150 | + $url = str_replace(".", "\.", $url); // escape the periods |
| 151 | + $url = str_replace("/", "\/", $url); // escape the slashes |
| 152 | + break; |
| 153 | + } |
| 154 | + $text .= "$url\n"; |
| 155 | + } |
| 156 | + } |
| 157 | + if (trim($text) == '') { |
| 158 | + $wgOut->addHTML( wfMsg('spamdifftool_notext', $wgScript . "?" . urldecode($wgRequest->getVal('returnto') ))); |
| 159 | + return; |
| 160 | + } |
| 161 | + $wgOut->addHTML("<form method=POST> |
| 162 | + <input type='hidden' name='confirm' value='true'> |
| 163 | + <input type='hidden' name='newurls' value=\"" . htmlspecialchars($text) . "\"> |
| 164 | + <input type='hidden' name='returnto' value=\"" . htmlspecialchars($wgRequest->getVal('returnto')) . "\"> |
| 165 | + "); |
| 166 | + $wgOut->addHTML(wfMsg('spamdifftool_confirm') . "<pre>$text</pre>"); |
| 167 | + $wgOut->addHTML("</table><input type=submit value=\"" . htmlspecialchars(wfMsg('spamdifftool_submit_buttom')) . "\"></form>"); |
| 168 | + return; |
| 169 | + } |
| 170 | + if ( !is_null( $diff ) ) { |
| 171 | + require_once( 'DifferenceEngine.php' ); |
| 172 | + |
| 173 | + # Get the last edit not by this guy |
| 174 | + $current = Revision::newFromTitle( $title ); |
| 175 | + $dbw =& wfGetDB( DB_MASTER ); |
| 176 | + $user = intval( $current->getUser() ); |
| 177 | + $user_text = $dbw->addQuotes( $current->getUserText() ); |
| 178 | + $s = $dbw->selectRow( 'revision', |
| 179 | + //array( 'min(rev_id)', 'rev_timestamp' ), |
| 180 | + array( 'min(rev_id) as rev_id'), |
| 181 | + array( |
| 182 | + 'rev_page' => $current->getPage(), |
| 183 | + "rev_user <> {$user} OR rev_user_text <> {$user_text}", |
| 184 | + $diff != "" ? "rev_id < $diff" : " 1= 1", // sure - why not! |
| 185 | + ), $fname, |
| 186 | + array( |
| 187 | + 'USE INDEX' => 'page_timestamp', |
| 188 | + 'ORDER BY' => 'rev_timestamp DESC' ) |
| 189 | + ); |
| 190 | + if ($s) { |
| 191 | + // set oldid |
| 192 | + $oldid = $s->rev_id; |
| 193 | + } |
| 194 | + |
| 195 | + // new diff object to extract the revision texts |
| 196 | + if ($rcid != "") { |
| 197 | + $de = new DifferenceEngine( $title, $oldid, $diff, $rcid ); |
| 198 | + } else { |
| 199 | + $de = new DifferenceEngine( $title, $oldid, $diff); |
| 200 | + } |
| 201 | + |
| 202 | + $de->loadText(); |
| 203 | + $otext = $de->mOldtext; |
| 204 | + $ntext = $de->mNewtext; |
| 205 | + $ota = explode( "\n", $wgContLang->segmentForDiff( $otext ) ); |
| 206 | + $nta = explode( "\n", $wgContLang->segmentForDiff( $ntext ) ); |
| 207 | + $diffs =& new Diff( $ota, $nta ); |
| 208 | + |
| 209 | + // iterate over the edits and get all of the changed text |
| 210 | + foreach ($diffs->edits as $edit) { |
| 211 | + if ($edit->type != 'copy') { |
| 212 | + $text .= implode("\n", $edit->closing) . "\n"; |
| 213 | + } |
| 214 | + } |
| 215 | + } else { |
| 216 | + $a = new Article($title); |
| 217 | + $text = $a->getContent(true); |
| 218 | + } |
| 219 | + |
| 220 | +//header("Content-type: text/plain;"); |
| 221 | +$matches = array(); |
| 222 | +$preg = "/http:\/\/[^] \n'\"]*/"; |
| 223 | +preg_match_all($preg, $text, $matches); |
| 224 | +//exit; |
| 225 | + if (sizeof($matches[0]) == 0) { |
| 226 | + $wgOut->addHTML( wfMsg('spamdifftool_no_urls_detected', $wgScript . "?" . urldecode($wgRequest->getVal('returnto') ))); |
| 227 | + return; |
| 228 | + } |
| 229 | + $wgOut->addHTML(" |
| 230 | + <form method='POST'> |
| 231 | + <input type='hidden' name='returnto' value=\"" . htmlspecialchars($wgRequest->getVal('returnto')) . "\"> |
| 232 | + <style type='text/css'> |
| 233 | + td.spam-url-row { |
| 234 | + border: 1px solid #ccc; |
| 235 | + } |
| 236 | + </style> " . wfMsg('spamdifftool_urls_detected') . " |
| 237 | + <br/><br/><table cellpadding='5px' width='100%'>"); |
| 238 | + |
| 239 | + $urls = array(); |
| 240 | + foreach ($matches as $match) { |
| 241 | + foreach ($match as $url) { |
| 242 | + if (isset($urls[$url])) continue; // avoid dupes |
| 243 | + $urls[$url] = true; |
| 244 | + $name = htmlspecialchars(str_replace(".", "%2E", $url)); |
| 245 | + $wgOut->addHTML("<tr> |
| 246 | + <td class='spam-url-row'><b>$url</b><br/> |
| 247 | + " . wfMsg('spamdifftool_block') . " |
| 248 | + <INPUT type='radio' name=\"" . $name . "\" value='domain' checked> " . wfMsg('spamdifftool_option_domain') . " |
| 249 | + <INPUT type='radio' name=\"" . $name . "\" value='subdomain'> " . wfMsg('spamdifftool_option_subdomain') . " |
| 250 | + <INPUT type='radio' name=\"" . $name . "\" value='dir'>" . wfMsg('spamdifftool_option_directory') . " |
| 251 | + <INPUT type='radio' name=\"" . $name . "\" value='none'>" . wfMsg('spamdifftool_option_none') . " |
| 252 | + </td> |
| 253 | + </tr> |
| 254 | + "); |
| 255 | + } |
| 256 | + } |
| 257 | + $wgOut->addHTML("</table><input type=submit value=\"" . htmlspecialchars(wfMsg('spamdifftool_submit_buttom')) . "\"></form>"); |
| 258 | + // DifferenceEngine directly fetched the revision: |
| 259 | + $RevIdFetched = $de->mNewid; |
| 260 | + //$de->showDiffPage(); |
| 261 | +} |
| 262 | + |
Property changes on: trunk/extensions/SpamDiffTool/SpamDiffTool.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 263 | + native |