r51740 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r51739‎ | r51740 | r51741 >
Date:12:15, 11 June 2009
Author:werdna
Status:deferred
Tags:
Comment:
Fix HoneypotIntegration extension to use hashes instead of FSS. Now takes ~8m to load a day's worth of data, and 2ms or so to look up a particular IP.
Modified paths:
  • /trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php (modified) (history)
  • /trunk/extensions/HoneypotIntegration/HoneypotIntegration.php (modified) (history)

Diff [purge]

Index: trunk/extensions/HoneypotIntegration/HoneypotIntegration.php
@@ -31,7 +31,8 @@
3232 $wgHooks['GetUserPermissionsErrorsExpensive'][] =
3333 'HoneypotIntegration::onGetUserPermissionsErrorsExpensive';
3434
35 -$wgHoneypotURLs = array( 'http://www.google.com' );
 35+$wgHoneypotURLSource = '';
 36+
3637 $wgHoneypotTemplates = array(
3738 '<a href="honeypoturl"><!-- randomtext --></a>',
3839 );
@@ -40,6 +41,3 @@
4142
4243 $wgHoneypotDataFile = false;
4344
44 -if ( !extension_loaded( 'fss' ) ) {
45 - die( "FastStringSearch is required for Project Honeypot Integration" );
46 -}
Index: trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php
@@ -33,12 +33,41 @@
3434 $out->addHTML( self::generateHoneypotLink( $randomText ) );
3535 return 1;
3636 }
 37+
 38+ public static function getHoneypotURLs() {
 39+ $key = wfMemcKey( 'honeypot-integration-urls' );
 40+
 41+ global $wgMemc;
 42+ $urls = $wgMemc->get( $key );
 43+
 44+ if ( is_array($urls) ) {
 45+ return $urls;
 46+ }
 47+
 48+ global $wgHoneypotAutoLoad;
 49+ if (!$wgHoneypotAutoLoad)
 50+ return array( 'http://www.google.com' ); // Dummy URL
 51+
 52+ global $wgHoneypotURLSource;
 53+ // Curl opt is a hack because the honeypot folks don't seem to have a valid
 54+ // certificate.
 55+ $data = Http::get( $wgHoneypotURLSource, 'default',
 56+ array( CURLOPT_SSL_VERIFYHOST => 1 ) );
 57+
 58+ $urls = explode( "\n", $data );
 59+
 60+ $wgMemc->set( $key, $urls, 86400 );
 61+
 62+ return $urls;
 63+ }
3764
3865 public static function generateHoneypotLink( $randomText = null ) {
39 - global $wgHoneypotURLs, $wgHoneypotTemplates;
 66+ global $wgHoneypotTemplates;
 67+
 68+ $urls = self::getHoneypotURLs();
4069
41 - $index = rand( 0, count( $wgHoneypotURLs ) - 1 );
42 - $url = $wgHoneypotURLs[$index];
 70+ $index = rand( 0, count( $urls ) - 1 );
 71+ $url = $urls[$index];
4372 $index = rand( 0, count( $wgHoneypotTemplates ) - 1 );
4473 $template = $wgHoneypotTemplates[$index];
4574
@@ -59,9 +88,8 @@
6089 public static function isIPListed( $ip ) {
6190 $subnet = substr( IP::toHex( $ip ), 0, -6 );
6291 $subnet_ips = self::getHoneypotIPs( $subnet );
63 -
64 - $fss = fss_prep_search( "[$ip]" );
65 - return false !== fss_exec_search( $fss, $subnet_ips );
 92+
 93+ return !empty($subnet_ips[$ip]);
6694 }
6795
6896 // Gets data from memcached
@@ -124,6 +152,7 @@
125153
126154 global $wgMemc;
127155 foreach ( $ips as $subnet => $ipData ) {
 156+ wfDebugLog( 'HoneypotDebug', "Inserting data for subnet $subnet" );
128157 $wgMemc->set( wfMemcKey( 'honeypot-data', $subnet ), $data[$subnet], 86400 );
129158 $wgMemc->set( wfMemcKey( 'honeypot-ips', $subnet ), $ips[$subnet], 86400 );
130159 }
@@ -139,6 +168,8 @@
140169 $save_data = array();
141170 $ips = array();
142171
 172+ $count = 0;
 173+
143174 while ( !feof($fh) ) {
144175 $line = trim( fgets( $fh ) );
145176 $data = preg_split( '/\s/', $line, 3 );
@@ -147,12 +178,18 @@
148179 $subnet = substr( IP::toHex( $data[0] ), 0, -6 );
149180
150181 if ( !isset($ips[$subnet]) )
151 - $ips[$subnet] = '';
 182+ $ips[$subnet] = array();
152183 if ( !isset( $save_data[$subnet] ) )
153184 $save_data[$subnet] = array();
154185
155186 $save_data[$subnet][$data[0]] = $data;
156 - $ips[$subnet] .= '['.$data[0]."]\n";
 187+ $ips[$subnet][$data[0]] = true;
 188+
 189+ $count++;
 190+
 191+ if ( $count % 100 == 0) {
 192+ wfDebugLog( 'HoneypotDebug', "Done $count IPs -- $data[0]" );
 193+ }
157194 }
158195 }
159196

Status & tagging log