r52203 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r52202‎ | r52203 | r52204 >
Date:15:59, 20 June 2009
Author:tstarling
Status:resolved (Comments)
Tags:
Comment:
Added PHP port of CDB, with abstraction layer. Tested for correctness with a differential fuzz tester, not yet benchmarked. The idea is to open up new applications for CDB, and benefit both shell and shared hosting users.

Ported existing uses of CDB to the new abstraction layer.
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/Cdb.php (added) (history)
  • /trunk/phase3/includes/Cdb_PHP.php (added) (history)
  • /trunk/phase3/includes/Interwiki.php (modified) (history)
  • /trunk/phase3/maintenance/dumpInterwiki.inc (modified) (history)
  • /trunk/phase3/maintenance/dumpInterwiki.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/dumpInterwiki.inc
@@ -201,7 +201,7 @@
202202 array_key_exists($entry['iw_prefix'],$prefixRewrites[$source]))
203203 $entry['iw_prefix'] = $prefixRewrites[$source][$entry['iw_prefix']];
204204 if ($dbFile)
205 - dba_insert("{$source}:{$entry['iw_prefix']}", trim("{$entry['iw_local']} {$entry['iw_url']}"),$dbFile);
 205+ $dbFile->set( "{$source}:{$entry['iw_prefix']}", trim("{$entry['iw_local']} {$entry['iw_url']}") );
206206 else
207207 print "{$source}:{$entry['iw_prefix']} {$entry['iw_url']} {$entry['iw_local']}\n";
208208
Index: trunk/phase3/maintenance/dumpInterwiki.php
@@ -19,7 +19,7 @@
2020 # Output
2121 if ( isset( $options['o'] ) ) {
2222 # To database specified with -o
23 - $dbFile = dba_open( $options['o'], "n", "cdb_make" );
 23+ $dbFile = CdbWriter::open( $options['o'] );
2424 }
2525
2626 getRebuildInterwikiDump();
Index: trunk/phase3/includes/Interwiki.php
@@ -104,24 +104,24 @@
105105
106106 wfDebug( __METHOD__ . "( $prefix )\n" );
107107 if( !$db ) {
108 - $db = dba_open( $wgInterwikiCache, 'r', 'cdb' );
 108+ $db = CdbReader::open( $wgInterwikiCache );
109109 }
110110 /* Resolve site name */
111111 if( $wgInterwikiScopes>=3 && !$site ) {
112 - $site = dba_fetch( '__sites:' . wfWikiID(), $db );
 112+ $site = $db->get( '__sites:' . wfWikiID() );
113113 if ( $site == '' ) {
114114 $site = $wgInterwikiFallbackSite;
115115 }
116116 }
117117
118 - $value = dba_fetch( wfMemcKey( $prefix ), $db );
 118+ $value = $db->get( wfMemcKey( $prefix ) );
119119 // Site level
120120 if ( $value == '' && $wgInterwikiScopes >= 3 ) {
121 - $value = dba_fetch( "_{$site}:{$prefix}", $db );
 121+ $value = $db->get( "_{$site}:{$prefix}" );
122122 }
123123 // Global Level
124124 if ( $value == '' && $wgInterwikiScopes >= 2 ) {
125 - $value = dba_fetch( "__global:{$prefix}", $db );
 125+ $value = $db->get( "__global:{$prefix}" );
126126 }
127127 if ( $value == 'undef' )
128128 $value = '';
Index: trunk/phase3/includes/Cdb_PHP.php
@@ -0,0 +1,388 @@
 2+<?php
 3+
 4+/**
 5+ * This is a port of D.J. Bernstein's CDB to PHP. It's based on the copy that
 6+ * appears in PHP 5.3. Changes are:
 7+ * * Error returns replaced with exceptions
 8+ * * Exception thrown if sizes or offsets are between 2GB and 4GB
 9+ * * Some variables renamed
 10+ */
 11+
 12+/**
 13+ * Common functions for readers and writers
 14+ */
 15+class CdbFunctions {
 16+ /**
 17+ * Do a sum of 32-bit signed integers with 2's complement overflow.
 18+ *
 19+ * PHP has broken plus and minus operators, but the bitwise operators
 20+ * (&, |, ^, ~, <<, >>) are all implemented as a simple wrapper around the
 21+ * underlying C operator. The algorithm here uses a binary view of addition
 22+ * to simulate 32-bit addition using 31-bit registers.
 23+ */
 24+ public static function sumWithOverflow( $a, $b ) {
 25+ $sum = $a + $b;
 26+ if ( is_float( $sum ) ) {
 27+ // Use the plus operator to do a sum of the lowest 30 bits to produce a 31-bit result
 28+ $lowA = $a & 0x3fffffff;
 29+ $lowB = $b & 0x3fffffff;
 30+ $sum = $lowA + $lowB;
 31+
 32+ // Strip off the carry bit
 33+ $carry = ($sum & 0x40000000) >> 30;
 34+ $sum = $sum & 0x3fffffff;
 35+
 36+ // Get the last two bits
 37+ $highA = self::unsignedShiftRight( $a, 30 );
 38+ $highB = self::unsignedShiftRight( $b, 30 );
 39+
 40+ // Add with carry
 41+ $highSum = $carry + $highA + $highB;
 42+
 43+ // Recombine
 44+ $sum = $sum | ( $highSum << 30 );
 45+ }
 46+ return $sum;
 47+ }
 48+
 49+ /**
 50+ * Take a modulo of a signed integer as if it were an unsigned integer.
 51+ * $b must be less than 0x40000000 and greater than 0
 52+ */
 53+ public static function unsignedMod( $a, $b ) {
 54+ if ( $a < 0 ) {
 55+ $m = ( $a & 0x7fffffff ) % $b + 2 * ( 0x40000000 % $b );
 56+ return $m % $b;
 57+ } else {
 58+ return $a % $b;
 59+ }
 60+ }
 61+
 62+ /**
 63+ * Shift a signed integer right as if it were unsigned
 64+ */
 65+ public static function unsignedShiftRight( $a, $b ) {
 66+ if ( $b == 0 ) {
 67+ return $a;
 68+ }
 69+ if ( $a < 0 ) {
 70+ return ( ( $a & 0x7fffffff ) >> $b ) | ( 0x40000000 >> ( $b - 1 ) );
 71+ } else {
 72+ return $a >> $b;
 73+ }
 74+ }
 75+
 76+ public static function hash( $s ) {
 77+ $h = 5381;
 78+ for ( $i = 0; $i < strlen( $s ); $i++ ) {
 79+ $h = self::sumWithOverflow( $h, $h << 5 ) ^ ord( $s[$i] );
 80+ }
 81+ return $h;
 82+ }
 83+}
 84+
 85+/**
 86+ * CDB reader class
 87+ */
 88+class CdbReader_PHP extends CdbReader {
 89+ /** The file handle */
 90+ var $handle;
 91+
 92+ /* number of hash slots searched under this key */
 93+ var $loop;
 94+
 95+ /* initialized if loop is nonzero */
 96+ var $khash;
 97+
 98+ /* initialized if loop is nonzero */
 99+ var $kpos;
 100+
 101+ /* initialized if loop is nonzero */
 102+ var $hpos;
 103+
 104+ /* initialized if loop is nonzero */
 105+ var $hslots;
 106+
 107+ /* initialized if findNext() returns true */
 108+ var $dpos;
 109+
 110+ /* initialized if cdb_findnext() returns 1 */
 111+ var $dlen;
 112+
 113+ function __construct( $fileName ) {
 114+ $this->handle = fopen( $fileName, 'rb' );
 115+ if ( !$this->handle ) {
 116+ throw new MWException( 'Unable to open DB file "' . $fileName . '"' );
 117+ }
 118+ $this->findStart();
 119+ }
 120+
 121+ function close() {
 122+ fclose( $this->handle );
 123+ unset( $this->handle );
 124+ }
 125+
 126+ public function get( $key ) {
 127+ // strval is required
 128+ if ( $this->find( strval( $key ) ) ) {
 129+ return $this->read( $this->dlen, $this->dpos );
 130+ } else {
 131+ return false;
 132+ }
 133+ }
 134+
 135+ protected function match( $key, $pos ) {
 136+ $buf = $this->read( strlen( $key ), $pos );
 137+ return $buf === $key;
 138+ }
 139+
 140+ protected function findStart() {
 141+ $this->loop = 0;
 142+ }
 143+
 144+ protected function read( $length, $pos ) {
 145+ if ( fseek( $this->handle, $pos ) == -1 ) {
 146+ // This can easily happen if the internal pointers are incorrect
 147+ throw new MWException( __METHOD__.': seek failed, file may be corrupted.' );
 148+ }
 149+
 150+ if ( $length == 0 ) {
 151+ return '';
 152+ }
 153+
 154+ $buf = fread( $this->handle, $length );
 155+ if ( $buf === false || strlen( $buf ) !== $length ) {
 156+ throw new MWException( __METHOD__.': read from cdb file failed, file may be corrupted' );
 157+ }
 158+ return $buf;
 159+ }
 160+
 161+ /**
 162+ * Unpack an unsigned integer and throw an exception if it needs more than 31 bits
 163+ */
 164+ protected function unpack31( $s ) {
 165+ $data = unpack( 'V', $s );
 166+ if ( $data[1] > 0x7fffffff ) {
 167+ throw new MWException( __METHOD__.': error in CDB file, integer too big' );
 168+ }
 169+ return $data[1];
 170+ }
 171+
 172+ /**
 173+ * Unpack a 32-bit signed integer
 174+ */
 175+ protected function unpackSigned( $s ) {
 176+ $data = unpack( 'va/vb', $s );
 177+ return $data['a'] | ( $data['b'] << 16 );
 178+ }
 179+
 180+ protected function findNext( $key ) {
 181+ if ( !$this->loop ) {
 182+ $u = CdbFunctions::hash( $key );
 183+ $buf = $this->read( 8, ( $u << 3 ) & 2047 );
 184+ $this->hslots = $this->unpack31( substr( $buf, 4 ) );
 185+ if ( !$this->hslots ) {
 186+ return false;
 187+ }
 188+ $this->hpos = $this->unpack31( substr( $buf, 0, 4 ) );
 189+ $this->khash = $u;
 190+ $u = CdbFunctions::unsignedShiftRight( $u, 8 );
 191+ $u = CdbFunctions::unsignedMod( $u, $this->hslots );
 192+ $u <<= 3;
 193+ $this->kpos = $this->hpos + $u;
 194+ }
 195+
 196+ while ( $this->loop < $this->hslots ) {
 197+ $buf = $this->read( 8, $this->kpos );
 198+ $pos = $this->unpack31( substr( $buf, 4 ) );
 199+ if ( !$pos ) {
 200+ return false;
 201+ }
 202+ $this->loop += 1;
 203+ $this->kpos += 8;
 204+ if ( $this->kpos == $this->hpos + ( $this->hslots << 3 ) ) {
 205+ $this->kpos = $this->hpos;
 206+ }
 207+ $u = $this->unpackSigned( substr( $buf, 0, 4 ) );
 208+ if ( $u === $this->khash ) {
 209+ $buf = $this->read( 8, $pos );
 210+ $keyLen = $this->unpack31( substr( $buf, 0, 4 ) );
 211+ if ( $keyLen == strlen( $key ) && $this->match( $key, $pos + 8 ) ) {
 212+ // Found
 213+ $this->dlen = $this->unpack31( substr( $buf, 4 ) );
 214+ $this->dpos = $pos + 8 + $keyLen;
 215+ return true;
 216+ }
 217+ }
 218+ }
 219+ return false;
 220+ }
 221+
 222+ protected function find( $key ) {
 223+ $this->findStart();
 224+ return $this->findNext( $key );
 225+ }
 226+}
 227+
 228+/**
 229+ * CDB writer class
 230+ */
 231+class CdbWriter_PHP extends CdbWriter {
 232+ var $handle, $realFileName, $tmpFileName;
 233+
 234+ var $hplist;
 235+ var $numEntries, $pos;
 236+
 237+ function __construct( $fileName ) {
 238+ $this->realFileName = $fileName;
 239+ $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
 240+ $this->handle = fopen( $this->tmpFileName, 'wb' );
 241+ if ( !$this->handle ) {
 242+ throw new MWException( 'Unable to open DB file for write "' . $fileName . '"' );
 243+ }
 244+ $this->hplist = array();
 245+ $this->numentries = 0;
 246+ $this->pos = 2048; // leaving space for the pointer array, 256 * 8
 247+ if ( fseek( $this->handle, $this->pos ) == -1 ) {
 248+ throw new MWException( __METHOD__.': fseek failed' );
 249+ }
 250+ }
 251+
 252+ function __destruct() {
 253+ if ( isset( $this->handle ) ) {
 254+ $this->close();
 255+ }
 256+ }
 257+
 258+ public function set( $key, $value ) {
 259+ if ( strval( $key ) === '' ) {
 260+ // DBA cross-check hack
 261+ return;
 262+ }
 263+ $this->addbegin( strlen( $key ), strlen( $value ) );
 264+ $this->write( $key );
 265+ $this->write( $value );
 266+ $this->addend( strlen( $key ), strlen( $value ), CdbFunctions::hash( $key ) );
 267+ }
 268+
 269+ public function close() {
 270+ $this->finish();
 271+ fclose( $this->handle );
 272+ if ( wfIsWindows() ) {
 273+ unlink( $this->realFileName );
 274+ }
 275+ if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
 276+ throw new MWException( 'Unable to move the new CDB file into place.' );
 277+ }
 278+ unset( $this->handle );
 279+ }
 280+
 281+ protected function write( $buf ) {
 282+ $len = fwrite( $this->handle, $buf );
 283+ if ( $len !== strlen( $buf ) ) {
 284+ throw new MWException( 'Error writing to CDB file.' );
 285+ }
 286+ }
 287+
 288+ protected function posplus( $len ) {
 289+ $newpos = $this->pos + $len;
 290+ if ( $newpos > 0x7fffffff ) {
 291+ throw new MWException( 'A value in the CDB file is too large' );
 292+ }
 293+ $this->pos = $newpos;
 294+ }
 295+
 296+ protected function addend( $keylen, $datalen, $h ) {
 297+ $this->hplist[] = array(
 298+ 'h' => $h,
 299+ 'p' => $this->pos
 300+ );
 301+
 302+ $this->numentries++;
 303+ $this->posplus( 8 );
 304+ $this->posplus( $keylen );
 305+ $this->posplus( $datalen );
 306+ }
 307+
 308+ protected function addbegin( $keylen, $datalen ) {
 309+ if ( $keylen > 0x7fffffff ) {
 310+ throw new MWException( __METHOD__.': key length too long' );
 311+ }
 312+ if ( $datalen > 0x7fffffff ) {
 313+ throw new MWException( __METHOD__.': data length too long' );
 314+ }
 315+ $buf = pack( 'VV', $keylen, $datalen );
 316+ $this->write( $buf );
 317+ }
 318+
 319+ protected function finish() {
 320+ // Hack for DBA cross-check
 321+ $this->hplist = array_reverse( $this->hplist );
 322+
 323+ // Calculate the number of items that will be in each hashtable
 324+ $counts = array_fill( 0, 256, 0 );
 325+ foreach ( $this->hplist as $item ) {
 326+ ++ $counts[ 255 & $item['h'] ];
 327+ }
 328+
 329+ // Fill in $starts with the *end* indexes
 330+ $starts = array();
 331+ $pos = 0;
 332+ for ( $i = 0; $i < 256; ++$i ) {
 333+ $pos += $counts[$i];
 334+ $starts[$i] = $pos;
 335+ }
 336+
 337+ // Excessively clever and indulgent code to simultaneously fill $packedTables
 338+ // with the packed hashtables, and adjust the elements of $starts
 339+ // to actually point to the starts instead of the ends.
 340+ $packedTables = array_fill( 0, $this->numentries, false );
 341+ foreach ( $this->hplist as $item ) {
 342+ $packedTables[--$starts[255 & $item['h']]] = $item;
 343+ }
 344+
 345+ $final = '';
 346+ for ( $i = 0; $i < 256; ++$i ) {
 347+ $count = $counts[$i];
 348+
 349+ // The size of the hashtable will be double the item count.
 350+ // The rest of the slots will be empty.
 351+ $len = $count + $count;
 352+ $final .= pack( 'VV', $this->pos, $len );
 353+
 354+ $hashtable = array();
 355+ for ( $u = 0; $u < $len; ++$u ) {
 356+ $hashtable[$u] = array( 'h' => 0, 'p' => 0 );
 357+ }
 358+
 359+ // Fill the hashtable, using the next empty slot if the hashed slot
 360+ // is taken.
 361+ for ( $u = 0; $u < $count; ++$u ) {
 362+ $hp = $packedTables[$starts[$i] + $u];
 363+ $where = CdbFunctions::unsignedMod(
 364+ CdbFunctions::unsignedShiftRight( $hp['h'], 8 ), $len );
 365+ while ( $hashtable[$where]['p'] )
 366+ if ( ++$where == $len )
 367+ $where = 0;
 368+ $hashtable[$where] = $hp;
 369+ }
 370+
 371+ // Write the hashtable
 372+ for ( $u = 0; $u < $len; ++$u ) {
 373+ $buf = pack( 'vvV',
 374+ $hashtable[$u]['h'] & 0xffff,
 375+ CdbFunctions::unsignedShiftRight( $hashtable[$u]['h'], 16 ),
 376+ $hashtable[$u]['p'] );
 377+ $this->write( $buf );
 378+ $this->posplus( 8 );
 379+ }
 380+ }
 381+
 382+ // Write the pointer array at the start of the file
 383+ rewind( $this->handle );
 384+ if ( ftell( $this->handle ) != 0 ) {
 385+ throw new MWException( __METHOD__.': Error rewinding to start of file' );
 386+ }
 387+ $this->write( $final );
 388+ }
 389+}
Property changes on: trunk/phase3/includes/Cdb_PHP.php
___________________________________________________________________
Name: svn:eol-style
1390 + native
Index: trunk/phase3/includes/Cdb.php
@@ -0,0 +1,147 @@
 2+<?php
 3+
 4+/**
 5+ * Read from a CDB file.
 6+ * Native and pure PHP implementations are provided.
 7+ * http://cr.yp.to/cdb.html
 8+ */
 9+abstract class CdbReader {
 10+ /**
 11+ * Open a file and return a subclass instance
 12+ */
 13+ public static function open( $fileName ) {
 14+ if ( self::haveExtension() ) {
 15+ return new CdbReader_DBA( $fileName );
 16+ } else {
 17+ wfDebug( 'Warning: no dba extension found, using emulation.' );
 18+ return new CdbReader_PHP( $fileName );
 19+ }
 20+ }
 21+
 22+ /**
 23+ * Returns true if the native extension is available
 24+ */
 25+ public static function haveExtension() {
 26+ if ( !function_exists( 'dba_handlers' ) ) {
 27+ return false;
 28+ }
 29+ $handlers = dba_handlers();
 30+ if ( !in_array( 'cdb', $handlers ) || !in_array( 'cdb_make', $handlers ) ) {
 31+ return false;
 32+ }
 33+ return true;
 34+ }
 35+
 36+ /**
 37+ * Construct the object and open the file
 38+ */
 39+ abstract function __construct( $fileName );
 40+
 41+ /**
 42+ * Close the file. Optional, you can just let the variable go out of scope.
 43+ */
 44+ abstract function close();
 45+
 46+ /**
 47+ * Get a value with a given key. Only string values are supported.
 48+ */
 49+ abstract public function get( $key );
 50+}
 51+
 52+/**
 53+ * Write to a CDB file.
 54+ * Native and pure PHP implementations are provided.
 55+ */
 56+abstract class CdbWriter {
 57+ /**
 58+ * Open a writer and return a subclass instance.
 59+ * The user must have write access to the directory, for temporary file creation.
 60+ */
 61+ public static function open( $fileName ) {
 62+ if ( CdbReader::haveExtension() ) {
 63+ return new CdbWriter_DBA( $fileName );
 64+ } else {
 65+ wfDebug( 'Warning: no dba extension found, using emulation.' );
 66+ return new CdbWriter_PHP( $fileName );
 67+ }
 68+ }
 69+
 70+ /**
 71+ * Create the object and open the file
 72+ */
 73+ abstract function __construct( $fileName );
 74+
 75+ /**
 76+ * Set a key to a given value. The value will be converted to string.
 77+ */
 78+ abstract public function set( $key, $value );
 79+
 80+ /**
 81+ * Close the writer object. You should call this function before the object
 82+ * goes out of scope, to write out the final hashtables.
 83+ */
 84+ abstract public function close();
 85+}
 86+
 87+
 88+/**
 89+ * Reader class which uses the DBA extension
 90+ */
 91+class CdbReader_DBA {
 92+ var $handle;
 93+
 94+ function __construct( $fileName ) {
 95+ $this->handle = dba_open( $fileName, 'r-', 'cdb' );
 96+ if ( !$this->handle ) {
 97+ throw new MWException( 'Unable to open DB file "' . $fileName . '"' );
 98+ }
 99+ }
 100+
 101+ function close() {
 102+ dba_close( $this->handle );
 103+ unset( $this->handle );
 104+ }
 105+
 106+ function get( $key ) {
 107+ return dba_fetch( $key, $this->handle );
 108+ }
 109+}
 110+
 111+
 112+/**
 113+ * Writer class which uses the DBA extension
 114+ */
 115+class CdbWriter_DBA {
 116+ var $handle, $realFileName, $tmpFileName;
 117+
 118+ function __construct( $fileName ) {
 119+ $this->realFileName = $fileName;
 120+ $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
 121+ $this->handle = dba_open( $this->tmpFileName, 'n', 'cdb_make' );
 122+ if ( !$this->handle ) {
 123+ throw new MWException( 'Unable to open DB file for write "' . $fileName . '"' );
 124+ }
 125+ }
 126+
 127+ function set( $key, $value ) {
 128+ return dba_insert( $key, $value, $this->handle );
 129+ }
 130+
 131+ function close() {
 132+ dba_close( $this->handle );
 133+ if ( wfIsWindows() ) {
 134+ unlink( $this->realFileName );
 135+ }
 136+ if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
 137+ throw new MWException( 'Unable to move the new CDB file into place.' );
 138+ }
 139+ unset( $this->handle );
 140+ }
 141+
 142+ function __destruct() {
 143+ if ( isset( $this->handle ) ) {
 144+ $this->close();
 145+ }
 146+ }
 147+}
 148+
Property changes on: trunk/phase3/includes/Cdb.php
___________________________________________________________________
Name: svn:eol-style
1149 + native
Index: trunk/phase3/includes/AutoLoader.php
@@ -27,6 +27,13 @@
2828 'Categoryfinder' => 'includes/Categoryfinder.php',
2929 'CategoryPage' => 'includes/CategoryPage.php',
3030 'CategoryViewer' => 'includes/CategoryPage.php',
 31+ 'CdbFunctions' => 'includes/Cdb_PHP.php',
 32+ 'CdbReader' => 'includes/Cdb.php',
 33+ 'CdbReader_DBA' => 'includes/Cdb.php',
 34+ 'CdbReader_PHP' => 'includes/Cdb_PHP.php',
 35+ 'CdbWriter' => 'includes/Cdb.php',
 36+ 'CdbWriter_DBA' => 'includes/Cdb.php',
 37+ 'CdbWriter_PHP' => 'includes/Cdb_PHP.php',
3138 'ChangesList' => 'includes/ChangesList.php',
3239 'ChangesFeed' => 'includes/ChangesFeed.php',
3340 'ChangeTags' => 'includes/ChangeTags.php',
Index: trunk/phase3/RELEASE-NOTES
@@ -89,7 +89,10 @@
9090 * Add $wgRevokePermissions as a means of restricting a group's rights. The syntax is
9191 identical to $wgGroupPermissions, but users in these groups will have these rights
9292 stripped from them.
 93+* Added a PHP port of CDB (constant database), for improved local caching when
 94+ the DBA extension is not available.
9395
 96+
9497 === Bug fixes in 1.16 ===
9598
9699 * (bug 18031) Make namespace selector on Special:Export remember the previous

Comments

#Comment by Brion VIBBER (talk | contribs)   15:02, 25 August 2009

64-bit issues have been resolved :D

Status & tagging log