Index: trunk/phase3/maintenance/storage/compressOld.inc |
— | — | @@ -1,317 +0,0 @@ |
2 | | -<?php |
3 | | -/** |
4 | | - * Helper functions for compressOld.php script. |
5 | | - * |
6 | | - * This program is free software; you can redistribute it and/or modify |
7 | | - * it under the terms of the GNU General Public License as published by |
8 | | - * the Free Software Foundation; either version 2 of the License, or |
9 | | - * (at your option) any later version. |
10 | | - * |
11 | | - * This program is distributed in the hope that it will be useful, |
12 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | - * GNU General Public License for more details. |
15 | | - * |
16 | | - * You should have received a copy of the GNU General Public License along |
17 | | - * with this program; if not, write to the Free Software Foundation, Inc., |
18 | | - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
19 | | - * http://www.gnu.org/copyleft/gpl.html |
20 | | - * |
21 | | - * @file |
22 | | - * @ingroup Maintenance ExternalStorage |
23 | | - */ |
24 | | - |
25 | | -/** @todo document */ |
26 | | -function compressOldPages( $start = 0, $extdb = '' ) { |
27 | | - $fname = 'compressOldPages'; |
28 | | - |
29 | | - $chunksize = 50; |
30 | | - print "Starting from old_id $start...\n"; |
31 | | - $dbw = wfGetDB( DB_MASTER ); |
32 | | - do { |
33 | | - $res = $dbw->select( 'text', array( 'old_id','old_flags','old_text' ), |
34 | | - "old_id>=$start", $fname, array( 'ORDER BY' => 'old_id', 'LIMIT' => $chunksize, 'FOR UPDATE' ) ); |
35 | | - if( $dbw->numRows( $res ) == 0 ) { |
36 | | - break; |
37 | | - } |
38 | | - $last = $start; |
39 | | - foreach ( $res as $row ) { |
40 | | - # print " {$row->old_id} - {$row->old_namespace}:{$row->old_title}\n"; |
41 | | - compressPage( $row, $extdb ); |
42 | | - $last = $row->old_id; |
43 | | - } |
44 | | - $start = $last + 1; # Deletion may leave long empty stretches |
45 | | - print "$start...\n"; |
46 | | - } while( true ); |
47 | | -} |
48 | | - |
49 | | -/** @todo document */ |
50 | | -function compressPage( $row, $extdb ) { |
51 | | - $fname = 'compressPage'; |
52 | | - if ( false !== strpos( $row->old_flags, 'gzip' ) || false !== strpos( $row->old_flags, 'object' ) ) { |
53 | | - #print "Already compressed row {$row->old_id}\n"; |
54 | | - return false; |
55 | | - } |
56 | | - $dbw = wfGetDB( DB_MASTER ); |
57 | | - $flags = $row->old_flags ? "{$row->old_flags},gzip" : "gzip"; |
58 | | - $compress = gzdeflate( $row->old_text ); |
59 | | - |
60 | | - # Store in external storage if required |
61 | | - if ( $extdb !== '' ) { |
62 | | - $storeObj = new ExternalStoreDB; |
63 | | - $compress = $storeObj->store( $extdb, $compress ); |
64 | | - if ( $compress === false ) { |
65 | | - print "Unable to store object\n"; |
66 | | - return false; |
67 | | - } |
68 | | - } |
69 | | - |
70 | | - # Update text row |
71 | | - $dbw->update( 'text', |
72 | | - array( /* SET */ |
73 | | - 'old_flags' => $flags, |
74 | | - 'old_text' => $compress |
75 | | - ), array( /* WHERE */ |
76 | | - 'old_id' => $row->old_id |
77 | | - ), $fname, |
78 | | - array( 'LIMIT' => 1 ) |
79 | | - ); |
80 | | - return true; |
81 | | -} |
82 | | - |
83 | | -define( 'LS_INDIVIDUAL', 0 ); |
84 | | -define( 'LS_CHUNKED', 1 ); |
85 | | - |
86 | | -/** @todo document */ |
87 | | -function compressWithConcat( $startId, $maxChunkSize, $beginDate, |
88 | | - $endDate, $extdb="", $maxPageId = false ) |
89 | | -{ |
90 | | - $fname = 'compressWithConcat'; |
91 | | - $loadStyle = LS_CHUNKED; |
92 | | - |
93 | | - $dbr = wfGetDB( DB_SLAVE ); |
94 | | - $dbw = wfGetDB( DB_MASTER ); |
95 | | - |
96 | | - # Set up external storage |
97 | | - if ( $extdb != '' ) { |
98 | | - $storeObj = new ExternalStoreDB; |
99 | | - } |
100 | | - |
101 | | - # Get all articles by page_id |
102 | | - if ( !$maxPageId ) { |
103 | | - $maxPageId = $dbr->selectField( 'page', 'max(page_id)', '', $fname ); |
104 | | - } |
105 | | - print "Starting from $startId of $maxPageId\n"; |
106 | | - $pageConds = array(); |
107 | | - |
108 | | - /* |
109 | | - if ( $exclude_ns0 ) { |
110 | | - print "Excluding main namespace\n"; |
111 | | - $pageConds[] = 'page_namespace<>0'; |
112 | | - } |
113 | | - if ( $queryExtra ) { |
114 | | - $pageConds[] = $queryExtra; |
115 | | - } |
116 | | - */ |
117 | | - |
118 | | - # For each article, get a list of revisions which fit the criteria |
119 | | - |
120 | | - # No recompression, use a condition on old_flags |
121 | | - # Don't compress object type entities, because that might produce data loss when |
122 | | - # overwriting bulk storage concat rows. Don't compress external references, because |
123 | | - # the script doesn't yet delete rows from external storage. |
124 | | - $conds = array( |
125 | | - 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'object', $dbr->anyString() ) . ' AND old_flags NOT ' |
126 | | - . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ) ); |
127 | | - |
128 | | - if ( $beginDate ) { |
129 | | - if ( !preg_match( '/^\d{14}$/', $beginDate ) ) { |
130 | | - print "Invalid begin date \"$beginDate\"\n"; |
131 | | - return false; |
132 | | - } |
133 | | - $conds[] = "rev_timestamp>'" . $beginDate . "'"; |
134 | | - } |
135 | | - if ( $endDate ) { |
136 | | - if ( !preg_match( '/^\d{14}$/', $endDate ) ) { |
137 | | - print "Invalid end date \"$endDate\"\n"; |
138 | | - return false; |
139 | | - } |
140 | | - $conds[] = "rev_timestamp<'" . $endDate . "'"; |
141 | | - } |
142 | | - if ( $loadStyle == LS_CHUNKED ) { |
143 | | - $tables = array( 'revision', 'text' ); |
144 | | - $fields = array( 'rev_id', 'rev_text_id', 'old_flags', 'old_text' ); |
145 | | - $conds[] = 'rev_text_id=old_id'; |
146 | | - $revLoadOptions = 'FOR UPDATE'; |
147 | | - } else { |
148 | | - $tables = array( 'revision' ); |
149 | | - $fields = array( 'rev_id', 'rev_text_id' ); |
150 | | - $revLoadOptions = array(); |
151 | | - } |
152 | | - |
153 | | - # Don't work with current revisions |
154 | | - # Don't lock the page table for update either -- TS 2006-04-04 |
155 | | - #$tables[] = 'page'; |
156 | | - #$conds[] = 'page_id=rev_page AND rev_id != page_latest'; |
157 | | - |
158 | | - for ( $pageId = $startId; $pageId <= $maxPageId; $pageId++ ) { |
159 | | - wfWaitForSlaves(); |
160 | | - |
161 | | - # Wake up |
162 | | - $dbr->ping(); |
163 | | - |
164 | | - # Get the page row |
165 | | - $pageRes = $dbr->select( 'page', |
166 | | - array('page_id', 'page_namespace', 'page_title','page_latest'), |
167 | | - $pageConds + array('page_id' => $pageId), $fname ); |
168 | | - if ( $dbr->numRows( $pageRes ) == 0 ) { |
169 | | - continue; |
170 | | - } |
171 | | - $pageRow = $dbr->fetchObject( $pageRes ); |
172 | | - |
173 | | - # Display progress |
174 | | - $titleObj = Title::makeTitle( $pageRow->page_namespace, $pageRow->page_title ); |
175 | | - print "$pageId\t" . $titleObj->getPrefixedDBkey() . " "; |
176 | | - |
177 | | - # Load revisions |
178 | | - $revRes = $dbw->select( $tables, $fields, |
179 | | - array_merge( array( |
180 | | - 'rev_page' => $pageRow->page_id, |
181 | | - # Don't operate on the current revision |
182 | | - # Use < instead of <> in case the current revision has changed |
183 | | - # since the page select, which wasn't locking |
184 | | - 'rev_id < ' . $pageRow->page_latest |
185 | | - ), $conds ), |
186 | | - $fname, |
187 | | - $revLoadOptions |
188 | | - ); |
189 | | - $revs = array(); |
190 | | - foreach ( $revRes as $revRow ) { |
191 | | - $revs[] = $revRow; |
192 | | - } |
193 | | - |
194 | | - if ( count( $revs ) < 2) { |
195 | | - # No revisions matching, no further processing |
196 | | - print "\n"; |
197 | | - continue; |
198 | | - } |
199 | | - |
200 | | - # For each chunk |
201 | | - $i = 0; |
202 | | - while ( $i < count( $revs ) ) { |
203 | | - if ( $i < count( $revs ) - $maxChunkSize ) { |
204 | | - $thisChunkSize = $maxChunkSize; |
205 | | - } else { |
206 | | - $thisChunkSize = count( $revs ) - $i; |
207 | | - } |
208 | | - |
209 | | - $chunk = new ConcatenatedGzipHistoryBlob(); |
210 | | - $stubs = array(); |
211 | | - $dbw->begin(); |
212 | | - $usedChunk = false; |
213 | | - $primaryOldid = $revs[$i]->rev_text_id; |
214 | | - |
215 | | - # Get the text of each revision and add it to the object |
216 | | - for ( $j = 0; $j < $thisChunkSize && $chunk->isHappy(); $j++ ) { |
217 | | - $oldid = $revs[$i + $j]->rev_text_id; |
218 | | - |
219 | | - # Get text |
220 | | - if ( $loadStyle == LS_INDIVIDUAL ) { |
221 | | - $textRow = $dbw->selectRow( 'text', |
222 | | - array( 'old_flags', 'old_text' ), |
223 | | - array( 'old_id' => $oldid ), |
224 | | - $fname, |
225 | | - 'FOR UPDATE' |
226 | | - ); |
227 | | - $text = Revision::getRevisionText( $textRow ); |
228 | | - } else { |
229 | | - $text = Revision::getRevisionText( $revs[$i + $j] ); |
230 | | - } |
231 | | - |
232 | | - if ( $text === false ) { |
233 | | - print "\nError, unable to get text in old_id $oldid\n"; |
234 | | - #$dbw->delete( 'old', array( 'old_id' => $oldid ) ); |
235 | | - } |
236 | | - |
237 | | - if ( $extdb == "" && $j == 0 ) { |
238 | | - $chunk->setText( $text ); |
239 | | - print '.'; |
240 | | - } else { |
241 | | - # Don't make a stub if it's going to be longer than the article |
242 | | - # Stubs are typically about 100 bytes |
243 | | - if ( strlen( $text ) < 120 ) { |
244 | | - $stub = false; |
245 | | - print 'x'; |
246 | | - } else { |
247 | | - $stub = new HistoryBlobStub( $chunk->addItem( $text ) ); |
248 | | - $stub->setLocation( $primaryOldid ); |
249 | | - $stub->setReferrer( $oldid ); |
250 | | - print '.'; |
251 | | - $usedChunk = true; |
252 | | - } |
253 | | - $stubs[$j] = $stub; |
254 | | - } |
255 | | - } |
256 | | - $thisChunkSize = $j; |
257 | | - |
258 | | - # If we couldn't actually use any stubs because the pages were too small, do nothing |
259 | | - if ( $usedChunk ) { |
260 | | - if ( $extdb != "" ) { |
261 | | - # Move blob objects to External Storage |
262 | | - $stored = $storeObj->store( $extdb, serialize( $chunk )); |
263 | | - if ($stored === false) { |
264 | | - print "Unable to store object\n"; |
265 | | - return false; |
266 | | - } |
267 | | - # Store External Storage URLs instead of Stub placeholders |
268 | | - foreach ($stubs as $stub) { |
269 | | - if ($stub===false) |
270 | | - continue; |
271 | | - # $stored should provide base path to a BLOB |
272 | | - $url = $stored."/".$stub->getHash(); |
273 | | - $dbw->update( 'text', |
274 | | - array( /* SET */ |
275 | | - 'old_text' => $url, |
276 | | - 'old_flags' => 'external,utf-8', |
277 | | - ), array ( /* WHERE */ |
278 | | - 'old_id' => $stub->getReferrer(), |
279 | | - ) |
280 | | - ); |
281 | | - } |
282 | | - } else { |
283 | | - # Store the main object locally |
284 | | - $dbw->update( 'text', |
285 | | - array( /* SET */ |
286 | | - 'old_text' => serialize( $chunk ), |
287 | | - 'old_flags' => 'object,utf-8', |
288 | | - ), array( /* WHERE */ |
289 | | - 'old_id' => $primaryOldid |
290 | | - ) |
291 | | - ); |
292 | | - |
293 | | - # Store the stub objects |
294 | | - for ( $j = 1; $j < $thisChunkSize; $j++ ) { |
295 | | - # Skip if not compressing and don't overwrite the first revision |
296 | | - if ( $stubs[$j] !== false && $revs[$i + $j]->rev_text_id != $primaryOldid ) { |
297 | | - $dbw->update( 'text', |
298 | | - array( /* SET */ |
299 | | - 'old_text' => serialize($stubs[$j]), |
300 | | - 'old_flags' => 'object,utf-8', |
301 | | - ), array( /* WHERE */ |
302 | | - 'old_id' => $revs[$i + $j]->rev_text_id |
303 | | - ) |
304 | | - ); |
305 | | - } |
306 | | - } |
307 | | - } |
308 | | - } |
309 | | - # Done, next |
310 | | - print "/"; |
311 | | - $dbw->commit(); |
312 | | - $i += $thisChunkSize; |
313 | | - wfWaitForSlaves(); |
314 | | - } |
315 | | - print "\n"; |
316 | | - } |
317 | | - return true; |
318 | | -} |
Index: trunk/phase3/maintenance/storage/compressOld.php |
— | — | @@ -39,50 +39,356 @@ |
40 | 40 | * @ingroup Maintenance ExternalStorage |
41 | 41 | */ |
42 | 42 | |
43 | | -$optionsWithArgs = array( 't', 'c', 's', 'f', 'h', 'extdb', 'endid', 'e' ); |
44 | | -require_once( dirname( __FILE__ ) . '/../commandLine.inc' ); |
45 | | -require_once( "compressOld.inc" ); |
| 43 | +require_once( dirname( __FILE__ ) . '/../Maintenance.php' ); |
46 | 44 | |
47 | | -if ( !function_exists( "gzdeflate" ) ) { |
48 | | - print "You must enable zlib support in PHP to compress old revisions!\n"; |
49 | | - print "Please see http://www.php.net/manual/en/ref.zlib.php\n\n"; |
50 | | - wfDie(); |
51 | | -} |
| 45 | +class CompressOld extends Maintenance { |
| 46 | + /** |
| 47 | + * @todo document |
| 48 | + */ |
| 49 | + const LS_INDIVIDUAL = 0; |
| 50 | + const LS_CHUNKED = 1; |
52 | 51 | |
53 | | -$defaults = array( |
54 | | - 't' => 'concat', |
55 | | - 'c' => 20, |
56 | | - 's' => 0, |
57 | | - 'b' => '', |
58 | | - 'e' => '', |
59 | | - 'extdb' => '', |
60 | | - 'endid' => false, |
61 | | -); |
| 52 | + public function __construct() { |
| 53 | + parent::__construct(); |
| 54 | + $this->mDescription = 'Compress the text of a wiki'; |
| 55 | + $this->addOption( 'type', 'Set compression type to either: gzip|concat', false, true, 't' ); |
| 56 | + $this->addOption( 'chunksize', 'Maximum number of revisions in a concat chunk', false, true, 'c' ); |
| 57 | + $this->addOption( 'begin-date', 'Earliest date to check for uncompressed revisions', false, true, 'b' ); |
| 58 | + $this->addOption( 'end-date', 'Latest revision date to compress', false, true, 'e' ); |
| 59 | + $this->addOption( 'start-id', 'The old_id to start from', false, true, 's'); |
| 60 | + $this->addOption( 'extdb', 'Store specified revisions in an external cluster (untested)', false, true ); |
| 61 | + $this->addOption( 'endid', 'Stop at this old_id', false, true, 'n' ); |
| 62 | + } |
62 | 63 | |
63 | | -$options = $options + $defaults; |
| 64 | + public function execute() { |
| 65 | + global $wgDBname; |
| 66 | + if ( !function_exists( "gzdeflate" ) ) { |
| 67 | + $this->error( "You must enable zlib support in PHP to compress old revisions!\n" . |
| 68 | + "Please see http://www.php.net/manual/en/ref.zlib.php\n", true ); |
| 69 | + } |
64 | 70 | |
65 | | -if ( $options['t'] != 'concat' && $options['t'] != 'gzip' ) { |
66 | | - print "Type \"{$options['t']}\" not supported\n"; |
67 | | -} |
| 71 | + $type = $this->getOption( 'type', 'concat' ); |
| 72 | + $chunkSize = $this->getOption( 'chunksize', 20 ); |
| 73 | + $startId = $this->getOption( 'start-id', 0 ); |
| 74 | + $beginDate = $this->getOption( 'begin-date', '' ); |
| 75 | + $endDate = $this->getOption( 'end-date', '' ); |
| 76 | + $extDB = $this->getOption( 'extdb', '' ); |
| 77 | + $endId = $this->getOption( 'endid', false ); |
68 | 78 | |
69 | | -if ( $options['extdb'] != '' ) { |
70 | | - print "Compressing database $wgDBname to external cluster {$options['extdb']}\n" . str_repeat( '-', 76 ) . "\n\n"; |
71 | | -} else { |
72 | | - print "Compressing database $wgDBname\n" . str_repeat( '-', 76 ) . "\n\n"; |
73 | | -} |
| 79 | + if ( $type != 'concat' && $type != 'gzip' ) { |
| 80 | + $this->error( "Type \"{$type}\" not supported" ); |
| 81 | + } |
74 | 82 | |
75 | | -$success = true; |
76 | | -if ( $options['t'] == 'concat' ) { |
77 | | - $success = compressWithConcat( $options['s'], $options['c'], $options['b'], |
78 | | - $options['e'], $options['extdb'], $options['endid'] ); |
79 | | -} else { |
80 | | - compressOldPages( $options['s'], $options['extdb'] ); |
81 | | -} |
| 83 | + if ( $extDB != '' ) { |
| 84 | + $this->output( "Compressing database {$wgDBname} to external cluster {$extDB}\n" |
| 85 | + . str_repeat( '-', 76 ) . "\n\n" ); |
| 86 | + } else { |
| 87 | + $this->output( "Compressing database {$wgDBname}\n" |
| 88 | + . str_repeat( '-', 76 ) . "\n\n" ); |
| 89 | + } |
82 | 90 | |
83 | | -if ( $success ) { |
84 | | - print "Done.\n"; |
85 | | -} |
| 91 | + $success = true; |
| 92 | + if ( $type == 'concat' ) { |
| 93 | + $success = $this->compressWithConcat( $startId, $chunkSize, $beginDate, |
| 94 | + $endDate, $extDB, $endId ); |
| 95 | + } else { |
| 96 | + $this->compressOldPages( $startId, $extDB ); |
| 97 | + } |
86 | 98 | |
87 | | -exit( 0 ); |
| 99 | + if ( $success ) { |
| 100 | + $this->output( "Done.\n" ); |
| 101 | + } |
| 102 | + } |
88 | 103 | |
| 104 | + /** @todo document */ |
| 105 | + private function compressOldPages( $start = 0, $extdb = '' ) { |
| 106 | + $chunksize = 50; |
| 107 | + $this->output( "Starting from old_id $start...\n" ); |
| 108 | + $dbw = wfGetDB( DB_MASTER ); |
| 109 | + do { |
| 110 | + $res = $dbw->select( 'text', array( 'old_id','old_flags','old_text' ), |
| 111 | + "old_id>=$start", __METHOD__, array( 'ORDER BY' => 'old_id', 'LIMIT' => $chunksize, 'FOR UPDATE' ) ); |
| 112 | + if( $dbw->numRows( $res ) == 0 ) { |
| 113 | + break; |
| 114 | + } |
| 115 | + $last = $start; |
| 116 | + foreach ( $res as $row ) { |
| 117 | + # print " {$row->old_id} - {$row->old_namespace}:{$row->old_title}\n"; |
| 118 | + $this->compressPage( $row, $extdb ); |
| 119 | + $last = $row->old_id; |
| 120 | + } |
| 121 | + $start = $last + 1; # Deletion may leave long empty stretches |
| 122 | + $this->output( "$start...\n" ); |
| 123 | + } while( true ); |
| 124 | + } |
89 | 125 | |
| 126 | + /** @todo document */ |
| 127 | + private function compressPage( $row, $extdb ) { |
| 128 | + if ( false !== strpos( $row->old_flags, 'gzip' ) || false !== strpos( $row->old_flags, 'object' ) ) { |
| 129 | + #print "Already compressed row {$row->old_id}\n"; |
| 130 | + return false; |
| 131 | + } |
| 132 | + $dbw = wfGetDB( DB_MASTER ); |
| 133 | + $flags = $row->old_flags ? "{$row->old_flags},gzip" : "gzip"; |
| 134 | + $compress = gzdeflate( $row->old_text ); |
| 135 | + |
| 136 | + # Store in external storage if required |
| 137 | + if ( $extdb !== '' ) { |
| 138 | + $storeObj = new ExternalStoreDB; |
| 139 | + $compress = $storeObj->store( $extdb, $compress ); |
| 140 | + if ( $compress === false ) { |
| 141 | + $this->error( "Unable to store object" ); |
| 142 | + return false; |
| 143 | + } |
| 144 | + } |
| 145 | + |
| 146 | + # Update text row |
| 147 | + $dbw->update( 'text', |
| 148 | + array( /* SET */ |
| 149 | + 'old_flags' => $flags, |
| 150 | + 'old_text' => $compress |
| 151 | + ), array( /* WHERE */ |
| 152 | + 'old_id' => $row->old_id |
| 153 | + ), __METHOD__, |
| 154 | + array( 'LIMIT' => 1 ) |
| 155 | + ); |
| 156 | + return true; |
| 157 | + } |
| 158 | + |
| 159 | + /** @todo document */ |
| 160 | + private function compressWithConcat( $startId, $maxChunkSize, $beginDate, |
| 161 | + $endDate, $extdb = "", $maxPageId = false ) |
| 162 | + { |
| 163 | + $loadStyle = self::LS_CHUNKED; |
| 164 | + |
| 165 | + $dbr = wfGetDB( DB_SLAVE ); |
| 166 | + $dbw = wfGetDB( DB_MASTER ); |
| 167 | + |
| 168 | + # Set up external storage |
| 169 | + if ( $extdb != '' ) { |
| 170 | + $storeObj = new ExternalStoreDB; |
| 171 | + } |
| 172 | + |
| 173 | + # Get all articles by page_id |
| 174 | + if ( !$maxPageId ) { |
| 175 | + $maxPageId = $dbr->selectField( 'page', 'max(page_id)', '', __METHOD__ ); |
| 176 | + } |
| 177 | + $this->output( "Starting from $startId of $maxPageId\n" ); |
| 178 | + $pageConds = array(); |
| 179 | + |
| 180 | + /* |
| 181 | + if ( $exclude_ns0 ) { |
| 182 | + print "Excluding main namespace\n"; |
| 183 | + $pageConds[] = 'page_namespace<>0'; |
| 184 | + } |
| 185 | + if ( $queryExtra ) { |
| 186 | + $pageConds[] = $queryExtra; |
| 187 | + } |
| 188 | + */ |
| 189 | + |
| 190 | + # For each article, get a list of revisions which fit the criteria |
| 191 | + |
| 192 | + # No recompression, use a condition on old_flags |
| 193 | + # Don't compress object type entities, because that might produce data loss when |
| 194 | + # overwriting bulk storage concat rows. Don't compress external references, because |
| 195 | + # the script doesn't yet delete rows from external storage. |
| 196 | + $conds = array( |
| 197 | + 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'object', $dbr->anyString() ) . ' AND old_flags NOT ' |
| 198 | + . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ) ); |
| 199 | + |
| 200 | + if ( $beginDate ) { |
| 201 | + if ( !preg_match( '/^\d{14}$/', $beginDate ) ) { |
| 202 | + $this->error( "Invalid begin date \"$beginDate\"\n" ); |
| 203 | + return false; |
| 204 | + } |
| 205 | + $conds[] = "rev_timestamp>'" . $beginDate . "'"; |
| 206 | + } |
| 207 | + if ( $endDate ) { |
| 208 | + if ( !preg_match( '/^\d{14}$/', $endDate ) ) { |
| 209 | + $this->error( "Invalid end date \"$endDate\"\n" ); |
| 210 | + return false; |
| 211 | + } |
| 212 | + $conds[] = "rev_timestamp<'" . $endDate . "'"; |
| 213 | + } |
| 214 | + if ( $loadStyle == self::LS_CHUNKED ) { |
| 215 | + $tables = array( 'revision', 'text' ); |
| 216 | + $fields = array( 'rev_id', 'rev_text_id', 'old_flags', 'old_text' ); |
| 217 | + $conds[] = 'rev_text_id=old_id'; |
| 218 | + $revLoadOptions = 'FOR UPDATE'; |
| 219 | + } else { |
| 220 | + $tables = array( 'revision' ); |
| 221 | + $fields = array( 'rev_id', 'rev_text_id' ); |
| 222 | + $revLoadOptions = array(); |
| 223 | + } |
| 224 | + |
| 225 | + # Don't work with current revisions |
| 226 | + # Don't lock the page table for update either -- TS 2006-04-04 |
| 227 | + #$tables[] = 'page'; |
| 228 | + #$conds[] = 'page_id=rev_page AND rev_id != page_latest'; |
| 229 | + |
| 230 | + for ( $pageId = $startId; $pageId <= $maxPageId; $pageId++ ) { |
| 231 | + wfWaitForSlaves(); |
| 232 | + |
| 233 | + # Wake up |
| 234 | + $dbr->ping(); |
| 235 | + |
| 236 | + # Get the page row |
| 237 | + $pageRes = $dbr->select( 'page', |
| 238 | + array('page_id', 'page_namespace', 'page_title','page_latest'), |
| 239 | + $pageConds + array('page_id' => $pageId), __METHOD__ ); |
| 240 | + if ( $dbr->numRows( $pageRes ) == 0 ) { |
| 241 | + continue; |
| 242 | + } |
| 243 | + $pageRow = $dbr->fetchObject( $pageRes ); |
| 244 | + |
| 245 | + # Display progress |
| 246 | + $titleObj = Title::makeTitle( $pageRow->page_namespace, $pageRow->page_title ); |
| 247 | + $this->output( "$pageId\t" . $titleObj->getPrefixedDBkey() . " " ); |
| 248 | + |
| 249 | + # Load revisions |
| 250 | + $revRes = $dbw->select( $tables, $fields, |
| 251 | + array_merge( array( |
| 252 | + 'rev_page' => $pageRow->page_id, |
| 253 | + # Don't operate on the current revision |
| 254 | + # Use < instead of <> in case the current revision has changed |
| 255 | + # since the page select, which wasn't locking |
| 256 | + 'rev_id < ' . $pageRow->page_latest |
| 257 | + ), $conds ), |
| 258 | + __METHOD__, |
| 259 | + $revLoadOptions |
| 260 | + ); |
| 261 | + $revs = array(); |
| 262 | + foreach ( $revRes as $revRow ) { |
| 263 | + $revs[] = $revRow; |
| 264 | + } |
| 265 | + |
| 266 | + if ( count( $revs ) < 2) { |
| 267 | + # No revisions matching, no further processing |
| 268 | + $this->output( "\n" ); |
| 269 | + continue; |
| 270 | + } |
| 271 | + |
| 272 | + # For each chunk |
| 273 | + $i = 0; |
| 274 | + while ( $i < count( $revs ) ) { |
| 275 | + if ( $i < count( $revs ) - $maxChunkSize ) { |
| 276 | + $thisChunkSize = $maxChunkSize; |
| 277 | + } else { |
| 278 | + $thisChunkSize = count( $revs ) - $i; |
| 279 | + } |
| 280 | + |
| 281 | + $chunk = new ConcatenatedGzipHistoryBlob(); |
| 282 | + $stubs = array(); |
| 283 | + $dbw->begin(); |
| 284 | + $usedChunk = false; |
| 285 | + $primaryOldid = $revs[$i]->rev_text_id; |
| 286 | + |
| 287 | + # Get the text of each revision and add it to the object |
| 288 | + for ( $j = 0; $j < $thisChunkSize && $chunk->isHappy(); $j++ ) { |
| 289 | + $oldid = $revs[$i + $j]->rev_text_id; |
| 290 | + |
| 291 | + # Get text |
| 292 | + if ( $loadStyle == self::LS_INDIVIDUAL ) { |
| 293 | + $textRow = $dbw->selectRow( 'text', |
| 294 | + array( 'old_flags', 'old_text' ), |
| 295 | + array( 'old_id' => $oldid ), |
| 296 | + __METHOD__, |
| 297 | + 'FOR UPDATE' |
| 298 | + ); |
| 299 | + $text = Revision::getRevisionText( $textRow ); |
| 300 | + } else { |
| 301 | + $text = Revision::getRevisionText( $revs[$i + $j] ); |
| 302 | + } |
| 303 | + |
| 304 | + if ( $text === false ) { |
| 305 | + $this->error( "\nError, unable to get text in old_id $oldid" ); |
| 306 | + #$dbw->delete( 'old', array( 'old_id' => $oldid ) ); |
| 307 | + } |
| 308 | + |
| 309 | + if ( $extdb == "" && $j == 0 ) { |
| 310 | + $chunk->setText( $text ); |
| 311 | + $this->output( '.' ); |
| 312 | + } else { |
| 313 | + # Don't make a stub if it's going to be longer than the article |
| 314 | + # Stubs are typically about 100 bytes |
| 315 | + if ( strlen( $text ) < 120 ) { |
| 316 | + $stub = false; |
| 317 | + $this->output( 'x' ); |
| 318 | + } else { |
| 319 | + $stub = new HistoryBlobStub( $chunk->addItem( $text ) ); |
| 320 | + $stub->setLocation( $primaryOldid ); |
| 321 | + $stub->setReferrer( $oldid ); |
| 322 | + $this->output( '.' ); |
| 323 | + $usedChunk = true; |
| 324 | + } |
| 325 | + $stubs[$j] = $stub; |
| 326 | + } |
| 327 | + } |
| 328 | + $thisChunkSize = $j; |
| 329 | + |
| 330 | + # If we couldn't actually use any stubs because the pages were too small, do nothing |
| 331 | + if ( $usedChunk ) { |
| 332 | + if ( $extdb != "" ) { |
| 333 | + # Move blob objects to External Storage |
| 334 | + $stored = $storeObj->store( $extdb, serialize( $chunk )); |
| 335 | + if ($stored === false) { |
| 336 | + $this->error( "Unable to store object" ); |
| 337 | + return false; |
| 338 | + } |
| 339 | + # Store External Storage URLs instead of Stub placeholders |
| 340 | + foreach ($stubs as $stub) { |
| 341 | + if ($stub===false) |
| 342 | + continue; |
| 343 | + # $stored should provide base path to a BLOB |
| 344 | + $url = $stored."/".$stub->getHash(); |
| 345 | + $dbw->update( 'text', |
| 346 | + array( /* SET */ |
| 347 | + 'old_text' => $url, |
| 348 | + 'old_flags' => 'external,utf-8', |
| 349 | + ), array ( /* WHERE */ |
| 350 | + 'old_id' => $stub->getReferrer(), |
| 351 | + ) |
| 352 | + ); |
| 353 | + } |
| 354 | + } else { |
| 355 | + # Store the main object locally |
| 356 | + $dbw->update( 'text', |
| 357 | + array( /* SET */ |
| 358 | + 'old_text' => serialize( $chunk ), |
| 359 | + 'old_flags' => 'object,utf-8', |
| 360 | + ), array( /* WHERE */ |
| 361 | + 'old_id' => $primaryOldid |
| 362 | + ) |
| 363 | + ); |
| 364 | + |
| 365 | + # Store the stub objects |
| 366 | + for ( $j = 1; $j < $thisChunkSize; $j++ ) { |
| 367 | + # Skip if not compressing and don't overwrite the first revision |
| 368 | + if ( $stubs[$j] !== false && $revs[$i + $j]->rev_text_id != $primaryOldid ) { |
| 369 | + $dbw->update( 'text', |
| 370 | + array( /* SET */ |
| 371 | + 'old_text' => serialize($stubs[$j]), |
| 372 | + 'old_flags' => 'object,utf-8', |
| 373 | + ), array( /* WHERE */ |
| 374 | + 'old_id' => $revs[$i + $j]->rev_text_id |
| 375 | + ) |
| 376 | + ); |
| 377 | + } |
| 378 | + } |
| 379 | + } |
| 380 | + } |
| 381 | + # Done, next |
| 382 | + $this->output( "/" ); |
| 383 | + $dbw->commit(); |
| 384 | + $i += $thisChunkSize; |
| 385 | + wfWaitForSlaves(); |
| 386 | + } |
| 387 | + $this->output( "\n" ); |
| 388 | + } |
| 389 | + return true; |
| 390 | + } |
| 391 | + |
| 392 | +} |
| 393 | + |
| 394 | +$maintClass = 'CompressOld'; |
| 395 | +require_once( RUN_MAINTENANCE_IF_MAIN ); |