Index: trunk/phase3/maintenance/storage/testCompression.php |
— | — | @@ -1,70 +0,0 @@ |
2 | | -<?php |
3 | | - |
4 | | -$optionsWithArgs = array( 'start', 'limit', 'type' ); |
5 | | -require( dirname(__FILE__).'/../commandLine.inc' ); |
6 | | - |
7 | | -if ( !isset( $args[0] ) ) { |
8 | | - echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] [--limit=<num-revs>] <page-title>\n"; |
9 | | - exit( 1 ); |
10 | | -} |
11 | | - |
12 | | -$title = Title::newFromText( $args[0] ); |
13 | | -if ( isset( $options['start'] ) ) { |
14 | | - $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) ); |
15 | | - echo "Starting from " . $wgLang->timeanddate( $start ) . "\n"; |
16 | | -} else { |
17 | | - $start = '19700101000000'; |
18 | | -} |
19 | | -$limit = isset( $options['limit'] ) ? $options['limit'] : 10; |
20 | | -$type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob'; |
21 | | - |
22 | | - |
23 | | -$dbr = wfGetDB( DB_SLAVE ); |
24 | | -$res = $dbr->select( |
25 | | - array( 'page', 'revision', 'text' ), |
26 | | - '*', |
27 | | - array( |
28 | | - 'page_namespace' => $title->getNamespace(), |
29 | | - 'page_title' => $title->getDBkey(), |
30 | | - 'page_id=rev_page', |
31 | | - 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ), |
32 | | - 'rev_text_id=old_id' |
33 | | - ), __FILE__, array( 'LIMIT' => $limit ) |
34 | | -); |
35 | | - |
36 | | -$blob = new $type; |
37 | | -$hashes = array(); |
38 | | -$keys = array(); |
39 | | -$uncompressedSize = 0; |
40 | | -$t = -microtime( true ); |
41 | | -foreach ( $res as $row ) { |
42 | | - $revision = new Revision( $row ); |
43 | | - $text = $revision->getText(); |
44 | | - $uncompressedSize += strlen( $text ); |
45 | | - $hashes[$row->rev_id] = md5( $text ); |
46 | | - $keys[$row->rev_id] = $blob->addItem( $text ); |
47 | | -} |
48 | | - |
49 | | -$serialized = serialize( $blob ); |
50 | | -$t += microtime( true ); |
51 | | - |
52 | | -printf( "Compression ratio for %d revisions: %5.2f, %s -> %s\n", |
53 | | - $res->numRows(), |
54 | | - $uncompressedSize / strlen( $serialized ), |
55 | | - $wgLang->formatSize( $uncompressedSize ), |
56 | | - $wgLang->formatSize( strlen( $serialized ) ) |
57 | | -); |
58 | | -printf( "Compression time: %5.2f ms\n", $t * 1000 ); |
59 | | - |
60 | | -$t = -microtime( true ); |
61 | | -$blob = unserialize( $serialized ); |
62 | | -foreach ( $keys as $id => $key ) { |
63 | | - $text = $blob->getItem( $key ); |
64 | | - if ( md5( $text ) != $hashes[$id] ) { |
65 | | - echo "Content hash mismatch for rev_id $id\n"; |
66 | | - #var_dump( $text ); |
67 | | - } |
68 | | -} |
69 | | -$t += microtime( true ); |
70 | | -printf( "Decompression time: %5.2f ms\n", $t * 1000 ); |
71 | | - |
Index: trunk/phase3/includes/AutoLoader.php |
— | — | @@ -34,7 +34,6 @@ |
35 | 35 | 'Credits' => 'includes/Credits.php', |
36 | 36 | 'DBABagOStuff' => 'includes/BagOStuff.php', |
37 | 37 | 'DependencyWrapper' => 'includes/CacheDependency.php', |
38 | | - 'DiffHistoryBlob' => 'includes/HistoryBlob.php', |
39 | 38 | 'DjVuImage' => 'includes/DjVuImage.php', |
40 | 39 | 'DoubleReplacer' => 'includes/StringUtils.php', |
41 | 40 | 'DoubleRedirectJob' => 'includes/DoubleRedirectJob.php', |
Index: trunk/phase3/includes/HistoryBlob.php |
— | — | @@ -1,33 +1,41 @@ |
2 | 2 | <?php |
3 | 3 | |
4 | 4 | /** |
5 | | - * Base class for general text storage via the "object" flag in old_flags, or |
6 | | - * two-part external storage URLs. Used for represent efficient concatenated |
7 | | - * storage, and migration-related pointer objects. |
| 5 | + * Pure virtual parent |
| 6 | + * @todo document (needs a one-sentence top-level class description, that answers the question: "what is a HistoryBlob?") |
8 | 7 | */ |
9 | 8 | interface HistoryBlob |
10 | 9 | { |
11 | 10 | /** |
| 11 | + * setMeta and getMeta currently aren't used for anything, I just thought |
| 12 | + * they might be useful in the future. |
| 13 | + * @param $meta String: a single string. |
| 14 | + */ |
| 15 | + public function setMeta( $meta ); |
| 16 | + |
| 17 | + /** |
| 18 | + * setMeta and getMeta currently aren't used for anything, I just thought |
| 19 | + * they might be useful in the future. |
| 20 | + * Gets the meta-value |
| 21 | + */ |
| 22 | + public function getMeta(); |
| 23 | + |
| 24 | + /** |
12 | 25 | * Adds an item of text, returns a stub object which points to the item. |
13 | 26 | * You must call setLocation() on the stub object before storing it to the |
14 | 27 | * database |
15 | | - * Returns the key for getItem() |
16 | 28 | */ |
17 | 29 | public function addItem( $text ); |
18 | 30 | |
19 | 31 | /** |
20 | | - * Get item by key, or false if the key is not present |
| 32 | + * Get item by hash |
21 | 33 | */ |
22 | | - public function getItem( $key ); |
| 34 | + public function getItem( $hash ); |
23 | 35 | |
24 | | - /** |
25 | | - * Set the "default text" |
26 | | - * This concept is an odd property of the current DB schema, whereby each text item has a revision |
27 | | - * associated with it. The default text is the text of the associated revision. There may, however, |
28 | | - * be other revisions in the same object. |
29 | | - * |
30 | | - * Default text is not required for two-part external storage URLs. |
31 | | - */ |
| 36 | + # Set the "default text" |
| 37 | + # This concept is an odd property of the current DB schema, whereby each text item has a revision |
| 38 | + # associated with it. The default text is the text of the associated revision. There may, however, |
| 39 | + # be other revisions in the same object |
32 | 40 | public function setText( $text ); |
33 | 41 | |
34 | 42 | /** |
— | — | @@ -37,8 +45,8 @@ |
38 | 46 | } |
39 | 47 | |
40 | 48 | /** |
41 | | - * Concatenated gzip (CGZ) storage |
42 | | - * Improves compression ratio by concatenating like objects before gzipping |
| 49 | + * The real object |
| 50 | + * @todo document (needs one-sentence top-level class description + function descriptions). |
43 | 51 | */ |
44 | 52 | class ConcatenatedGzipHistoryBlob implements HistoryBlob |
45 | 53 | { |
— | — | @@ -52,15 +60,34 @@ |
53 | 61 | } |
54 | 62 | } |
55 | 63 | |
| 64 | + # |
| 65 | + # HistoryBlob implementation: |
| 66 | + # |
| 67 | + |
| 68 | + /** @todo document */ |
| 69 | + public function setMeta( $metaData ) { |
| 70 | + $this->uncompress(); |
| 71 | + $this->mItems['meta'] = $metaData; |
| 72 | + } |
| 73 | + |
| 74 | + /** @todo document */ |
| 75 | + public function getMeta() { |
| 76 | + $this->uncompress(); |
| 77 | + return $this->mItems['meta']; |
| 78 | + } |
| 79 | + |
| 80 | + /** @todo document */ |
56 | 81 | public function addItem( $text ) { |
57 | 82 | $this->uncompress(); |
58 | 83 | $hash = md5( $text ); |
59 | 84 | $this->mItems[$hash] = $text; |
60 | 85 | $this->mSize += strlen( $text ); |
61 | 86 | |
62 | | - return $hash; |
| 87 | + $stub = new HistoryBlobStub( $hash ); |
| 88 | + return $stub; |
63 | 89 | } |
64 | 90 | |
| 91 | + /** @todo document */ |
65 | 92 | public function getItem( $hash ) { |
66 | 93 | $this->uncompress(); |
67 | 94 | if ( array_key_exists( $hash, $this->mItems ) ) { |
— | — | @@ -70,28 +97,29 @@ |
71 | 98 | } |
72 | 99 | } |
73 | 100 | |
| 101 | + /** @todo document */ |
74 | 102 | public function setText( $text ) { |
75 | 103 | $this->uncompress(); |
76 | 104 | $stub = $this->addItem( $text ); |
77 | 105 | $this->mDefaultHash = $stub->mHash; |
78 | 106 | } |
79 | 107 | |
| 108 | + /** @todo document */ |
80 | 109 | public function getText() { |
81 | 110 | $this->uncompress(); |
82 | 111 | return $this->getItem( $this->mDefaultHash ); |
83 | 112 | } |
84 | 113 | |
85 | | - /** |
86 | | - * Remove an item |
87 | | - */ |
| 114 | + # HistoryBlob implemented. |
| 115 | + |
| 116 | + |
| 117 | + /** @todo document */ |
88 | 118 | public function removeItem( $hash ) { |
89 | 119 | $this->mSize -= strlen( $this->mItems[$hash] ); |
90 | 120 | unset( $this->mItems[$hash] ); |
91 | 121 | } |
92 | 122 | |
93 | | - /** |
94 | | - * Compress the bulk data in the object |
95 | | - */ |
| 123 | + /** @todo document */ |
96 | 124 | public function compress() { |
97 | 125 | if ( !$this->mCompressed ) { |
98 | 126 | $this->mItems = gzdeflate( serialize( $this->mItems ) ); |
— | — | @@ -99,9 +127,7 @@ |
100 | 128 | } |
101 | 129 | } |
102 | 130 | |
103 | | - /** |
104 | | - * Uncompress bulk data |
105 | | - */ |
| 131 | + /** @todo document */ |
106 | 132 | public function uncompress() { |
107 | 133 | if ( $this->mCompressed ) { |
108 | 134 | $this->mItems = unserialize( gzinflate( $this->mItems ) ); |
— | — | @@ -110,18 +136,19 @@ |
111 | 137 | } |
112 | 138 | |
113 | 139 | |
| 140 | + /** @todo document */ |
114 | 141 | function __sleep() { |
115 | 142 | $this->compress(); |
116 | 143 | return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ); |
117 | 144 | } |
118 | 145 | |
| 146 | + /** @todo document */ |
119 | 147 | function __wakeup() { |
120 | 148 | $this->uncompress(); |
121 | 149 | } |
122 | 150 | |
123 | 151 | /** |
124 | | - * Helper function for compression jobs |
125 | | - * Returns true until the object is "full" and ready to be committed |
| 152 | + * Determines if this object is happy |
126 | 153 | */ |
127 | 154 | public function isHappy( $maxFactor, $factorThreshold ) { |
128 | 155 | if ( count( $this->mItems ) == 0 ) { |
— | — | @@ -157,15 +184,12 @@ |
158 | 185 | |
159 | 186 | |
160 | 187 | /** |
161 | | - * Pointer object for an item within a CGZ blob stored in the text table. |
| 188 | + * @todo document (needs one-sentence top-level class description + some function descriptions). |
162 | 189 | */ |
163 | 190 | class HistoryBlobStub { |
164 | 191 | var $mOldId, $mHash, $mRef; |
165 | 192 | |
166 | | - /** |
167 | | - * @param string $hash The content hash of the text |
168 | | - * @param integer $oldid The old_id for the CGZ object |
169 | | - */ |
| 193 | + /** @todo document */ |
170 | 194 | function HistoryBlobStub( $hash = '', $oldid = 0 ) { |
171 | 195 | $this->mHash = $hash; |
172 | 196 | } |
— | — | @@ -192,6 +216,7 @@ |
193 | 217 | return $this->mRef; |
194 | 218 | } |
195 | 219 | |
| 220 | + /** @todo document */ |
196 | 221 | function getText() { |
197 | 222 | $fname = 'HistoryBlobStub::getText'; |
198 | 223 | global $wgBlobCache; |
— | — | @@ -239,9 +264,7 @@ |
240 | 265 | return $obj->getItem( $this->mHash ); |
241 | 266 | } |
242 | 267 | |
243 | | - /** |
244 | | - * Get the content hash |
245 | | - */ |
| 268 | + /** @todo document */ |
246 | 269 | function getHash() { |
247 | 270 | return $this->mHash; |
248 | 271 | } |
— | — | @@ -259,9 +282,7 @@ |
260 | 283 | class HistoryBlobCurStub { |
261 | 284 | var $mCurId; |
262 | 285 | |
263 | | - /** |
264 | | - * @param integer $curid The cur_id pointed to |
265 | | - */ |
| 286 | + /** @todo document */ |
266 | 287 | function HistoryBlobCurStub( $curid = 0 ) { |
267 | 288 | $this->mCurId = $curid; |
268 | 289 | } |
— | — | @@ -274,6 +295,7 @@ |
275 | 296 | $this->mCurId = $id; |
276 | 297 | } |
277 | 298 | |
| 299 | + /** @todo document */ |
278 | 300 | function getText() { |
279 | 301 | $dbr = wfGetDB( DB_SLAVE ); |
280 | 302 | $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) ); |
— | — | @@ -283,123 +305,3 @@ |
284 | 306 | return $row->cur_text; |
285 | 307 | } |
286 | 308 | } |
287 | | - |
288 | | -/** |
289 | | - * Diff-based history compression |
290 | | - * Requires xdiff 1.5+ and zlib |
291 | | - */ |
292 | | -class DiffHistoryBlob implements HistoryBlob { |
293 | | - /** Uncompressed item cache */ |
294 | | - var $mItems = array(); |
295 | | - |
296 | | - /** |
297 | | - * Array of diffs, where $this->mDiffs[0] is the diff between |
298 | | - * $this->mDiffs[0] and $this->mDiffs[1] |
299 | | - */ |
300 | | - var $mDiffs = array(); |
301 | | - |
302 | | - /** |
303 | | - * The key for getText() |
304 | | - */ |
305 | | - var $mDefaultKey; |
306 | | - |
307 | | - /** |
308 | | - * Compressed storage |
309 | | - */ |
310 | | - var $mCompressed; |
311 | | - |
312 | | - /** |
313 | | - * True if the object is locked against further writes |
314 | | - */ |
315 | | - var $mFrozen = false; |
316 | | - |
317 | | - |
318 | | - function __construct() { |
319 | | - if ( !function_exists( 'xdiff_string_bdiff' ) ){ |
320 | | - throw new MWException( "Need xdiff 1.5+ support to read or write DiffHistoryBlob\n" ); |
321 | | - } |
322 | | - if ( !function_exists( 'gzdeflate' ) ) { |
323 | | - throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" ); |
324 | | - } |
325 | | - } |
326 | | - |
327 | | - function addItem( $text ) { |
328 | | - if ( $this->mFrozen ) { |
329 | | - throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" ); |
330 | | - } |
331 | | - |
332 | | - $this->mItems[] = $text; |
333 | | - $i = count( $this->mItems ) - 1; |
334 | | - if ( $i > 0 ) { |
335 | | - # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff |
336 | | - # "String is not zero-terminated" |
337 | | - wfSuppressWarnings(); |
338 | | - $this->mDiffs[] = xdiff_string_bdiff( $this->mItems[$i-1], $text ) . ''; |
339 | | - wfRestoreWarnings(); |
340 | | - } |
341 | | - return $i; |
342 | | - } |
343 | | - |
344 | | - function getItem( $key ) { |
345 | | - if ( $key > count( $this->mDiffs ) + 1 ) { |
346 | | - return false; |
347 | | - } |
348 | | - $key = intval( $key ); |
349 | | - if ( $key == 0 ) { |
350 | | - return $this->mItems[0]; |
351 | | - } |
352 | | - |
353 | | - $last = count( $this->mItems ) - 1; |
354 | | - for ( $i = $last + 1; $i <= $key; $i++ ) { |
355 | | - # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff |
356 | | - # "String is not zero-terminated" |
357 | | - wfSuppressWarnings(); |
358 | | - $this->mItems[$i] = xdiff_string_bpatch( $this->mItems[$i - 1], $this->mDiffs[$i - 1] ) . ''; |
359 | | - wfRestoreWarnings(); |
360 | | - } |
361 | | - return $this->mItems[$key]; |
362 | | - } |
363 | | - |
364 | | - function setText( $text ) { |
365 | | - $this->mDefaultKey = $this->addItem( $text ); |
366 | | - } |
367 | | - |
368 | | - function getText() { |
369 | | - return $this->getItem( $this->mDefaultKey ); |
370 | | - } |
371 | | - |
372 | | - function __sleep() { |
373 | | - if ( !isset( $this->mItems[0] ) ) { |
374 | | - // Empty object |
375 | | - $info = false; |
376 | | - } else { |
377 | | - $info = array( |
378 | | - 'base' => $this->mItems[0], |
379 | | - 'diffs' => $this->mDiffs |
380 | | - ); |
381 | | - } |
382 | | - if ( isset( $this->mDefaultKey ) ) { |
383 | | - $info['default'] = $this->mDefaultKey; |
384 | | - } |
385 | | - $this->mCompressed = gzdeflate( serialize( $info ) ); |
386 | | - return array( 'mCompressed' ); |
387 | | - } |
388 | | - |
389 | | - function __wakeup() { |
390 | | - // addItem() doesn't work if mItems is partially filled from mDiffs |
391 | | - $this->mFrozen = true; |
392 | | - $info = unserialize( gzinflate( $this->mCompressed ) ); |
393 | | - unset( $this->mCompressed ); |
394 | | - |
395 | | - if ( !$info ) { |
396 | | - // Empty object |
397 | | - return; |
398 | | - } |
399 | | - |
400 | | - if ( isset( $info['default'] ) ) { |
401 | | - $this->mDefaultKey = $info['default']; |
402 | | - } |
403 | | - $this->mItems[0] = $info['base']; |
404 | | - $this->mDiffs = $info['diffs']; |
405 | | - } |
406 | | -} |