r49055 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r49054‎ | r49055 | r49056 >
Date:03:39, 31 March 2009
Author:werdna
Status:deferred (Comments)
Tags:
Comment:
ProjectHoneypot Integration updates: Actually implement list parsing, storage and retrieval in memcached.
Modified paths:
  • /trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php (modified) (history)
  • /trunk/extensions/HoneypotIntegration/HoneypotIntegration.php (modified) (history)

Diff [purge]

Index: trunk/extensions/HoneypotIntegration/HoneypotIntegration.php
@@ -24,11 +24,21 @@
2525 $wgExtensionMessagesFiles['HoneypotIntegration'] = "$dir/HoneypotIntegration.i18n.php";
2626 $wgAutoloadClasses[ 'HoneypotIntegration' ] = "$dir/HoneypotIntegration.class.php";
2727
28 -$wgHooks['AbuseFilter-filterAction'][] = 'HoneypotIntegration::onAbuseFilterFilterAction';
29 -$wgHooks['AbuseFilter-builder'][] = 'HoneypotIntegration::onAbuseFilterBuilder';
 28+#$wgHooks['AbuseFilter-filterAction'][] = 'HoneypotIntegration::onAbuseFilterFilterAction';
 29+#$wgHooks['AbuseFilter-builder'][] = 'HoneypotIntegration::onAbuseFilterBuilder';
3030 $wgHooks['EditPage::showEditForm:fields'][] = 'HoneypotIntegration::onShowEditForm';
 31+$wgHooks['GetUserPermissionsErrorsExpensive'][] =
 32+ 'HoneypotIntegration::onGetUserPermissionsErrorsExpensive';
3133
3234 $wgHoneypotURLs = array( 'http://www.google.com' );
3335 $wgHoneypotTemplates = array(
3436 '<a href="honeypoturl"><!-- randomtext --></a>',
35 -);
\ No newline at end of file
 37+);
 38+
 39+$wgHoneypotAutoLoad = false;
 40+
 41+$wgHoneypotDataFile = false;
 42+
 43+if ( !extension_loaded( 'fss' ) ) {
 44+ die( "FastStringSearch is required for Project Honeypot Integration" );
 45+}
Index: trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php
@@ -1,6 +1,8 @@
22 <?php
33
44 class HoneypotIntegration {
 5+ static $IPs = array();
 6+ static $Data = array();
57 public static function onAbuseFilterFilterAction( &$vars, $title ) {
68 $vars->setVar( 'honeypot_list_count', self::listingCount() ? 1 : 0 );
79 return true;
@@ -12,14 +14,6 @@
1315 return true;
1416 }
1517
16 - public static function listingCount( $ip = null ) {
17 - if ( $ip === null )
18 - $ip = wfGetIP();
19 -
20 - // TODO IMPLEMENT
21 - return 0;
22 - }
23 -
2418 public static function onShowEditForm( &$editPage, &$out ) {
2519
2620 // Spammers are more likely to fall for real text than for a random token.
@@ -61,4 +55,121 @@
6256
6357 return "$output\n";
6458 }
 59+
 60+ public static function isIPListed( $ip ) {
 61+ $subnet = substr( IP::toHex( $ip ), 0, -6 );
 62+ $subnet_ips = self::getHoneypotIPs( $subnet );
 63+
 64+ $fss = fss_prep_search( "[$ip]" );
 65+ return false !== fss_exec_search( $fss, $subnet_ips );
 66+ }
 67+
 68+ // Gets data from memcached
 69+ // for a given class A subnet
 70+ public static function getHoneypotData( $subnet ) {
 71+ if ( isset(self::$Data[$subnet]) ) {
 72+ return self::$Data[$subnet];
 73+ }
 74+ // Check cache
 75+ global $wgMemc;
 76+
 77+ $data = $wgMemc->get( wfMemcKey( 'honeypot-data', $subnet ) );
 78+ if ($data) {
 79+ wfDebug( "Honeypot Integration: Got data for subnet $subnet from memcached\n" );
 80+ self::$mData[$subnet] = $data;
 81+ return $data;
 82+ }
 83+
 84+ global $wgHoneypotAutoLoad;
 85+
 86+ if ($wgHoneypotAutoLoad) {
 87+ list($data,$ips) = self::loadHoneypotData( $subnet );
 88+ return $data;
 89+ }
 90+
 91+ wfDebug( "Honeypot Integration: Couldn't find honeypot data for subnet $subnet ".
 92+ "in cache, and AutoLoad disabled\n" );
 93+ }
 94+
 95+ // Gets IPs from memcached for a given Class A subnet
 96+ public static function getHoneypotIPs( $subnet ) {
 97+ if ( isset(self::$IPs[$subnet]) ) {
 98+ return self::$IPs[$subnet];
 99+ }
 100+
 101+ // Check cache
 102+ global $wgMemc;
 103+
 104+ $ips = $wgMemc->get( wfMemcKey( 'honeypot-ips', $subnet ) );
 105+ if ($ips) {
 106+ wfDebug( "Honeypot Integration: Got IPs for subnet $subnet from memcached\n" );
 107+ self::$IPs[$subnet] = $ips;
 108+ return $ips;
 109+ }
 110+
 111+ global $wgHoneypotAutoLoad;
 112+
 113+ if ($wgHoneypotAutoLoad) {
 114+ list($data,$ips) = self::loadHoneypotData( $subnet );
 115+ return $ips;
 116+ }
 117+
 118+ wfDebug( "Honeypot Integration: Couldn't find honeypot data for subnet $subnet" .
 119+ " in cache, and AutoLoad disabled\n" );
 120+ }
 121+
 122+ // Loads data and saves it to memcached
 123+ public static function loadHoneypotData() {
 124+ list($data,$ips) = self::loadHoneypotDataFromFile();
 125+
 126+ global $wgMemc;
 127+ foreach ( $ips as $subnet => $ipData ) {
 128+ $wgMemc->set( wfMemcKey( 'honeypot-data', $subnet ), $data[$subnet], 86400 );
 129+ $wgMemc->set( wfMemcKey( 'honeypot-ips', $subnet ), $ips[$subnet], 86400 );
 130+ }
 131+
 132+ return array($data,$ips);
 133+ }
 134+
 135+ // Loads data
 136+ public static function loadHoneypotDataFromFile() {
 137+ global $wgHoneypotDataFile;
 138+ $fh = fopen( $wgHoneypotDataFile, 'r' );
 139+
 140+ $save_data = array();
 141+ $ips = array();
 142+
 143+ while ( !feof($fh) ) {
 144+ $line = trim( fgets( $fh ) );
 145+ $data = preg_split( '/\s/', $line, 3 );
 146+
 147+ if ( IP::isIPAddress( $data[0] ) ) {
 148+ $subnet = substr( IP::toHex( $data[0] ), 0, -6 );
 149+
 150+ if ( !isset($ips[$subnet]) )
 151+ $ips[$subnet] = '';
 152+ if ( !isset( $save_data[$subnet] ) )
 153+ $save_data[$subnet] = array();
 154+
 155+ $save_data[$subnet][$data[0]] = $data;
 156+ $ips[$subnet] .= '['.$data[0]."]\n";
 157+ }
 158+ }
 159+
 160+ fclose( $fh );
 161+
 162+ self::$IPs = $ips;
 163+ self::$Data = $save_data;
 164+
 165+ return array( $save_data, $ips );
 166+ }
 167+
 168+ public static function onGetUserPermissionsErrorsExpensive() {
 169+ $ip = wfGetIP();
 170+
 171+ if ( self::isIPListed( $ip ) ) {
 172+ wfDebugLog( 'HoneypotIntegrationMatches', "Attempted edit from $ip matched honeypot" );
 173+ }
 174+ return true;
 175+ }
65176 }

Comments

#Comment by Tim Starling (talk | contribs)   07:15, 22 May 2009

I don't think you understand what's going on inside FSS and in what situations it's faster than the built-in functions in PHP, which are, like FSS, written in C. This is not one of the cases where it's useful, it might even be slower than strpos(). You should read these Wikipedia articles:

Your algorithm as a whole is O(N) in the number of IP addresses in the subnet, that would be reduced to almost O(1) if you used a pre-generated hashtable like what the TrustedXFF extension does.

Status & tagging log