Index: trunk/phase3/maintenance/convertLinks.php |
— | — | @@ -24,11 +24,18 @@ |
25 | 25 | require_once( dirname( __FILE__ ) . '/Maintenance.php' ); |
26 | 26 | |
27 | 27 | class ConvertLinks extends Maintenance { |
| 28 | + private $logPerformance; |
28 | 29 | |
29 | 30 | public function __construct() { |
30 | 31 | parent::__construct(); |
31 | 32 | $this->mDescription = "Convert from the old links schema (string->ID) to the new schema (ID->ID) |
32 | 33 | The wiki should be put into read-only mode while this script executes"; |
| 34 | + |
| 35 | + $this->addArg( 'logperformance', "Log performance to perfLogFilename.", false ); |
| 36 | + $this->addArg( 'perfLogFilename', "Filename where performance is logged if --logperformance was set (defaults to 'convLinksPerf.txt').", false ); |
| 37 | + $this->addArg( 'keep-links-table', "Don't overwrite the old links table with the new one, leave the new table at links_temp.", false ); |
| 38 | + $this->addArg( 'nokeys', "Don't create keys, and so allow duplicates in the new links table.\n |
| 39 | +This gives a huge speed improvement for very large links tables which are MyISAM." /* (What about InnoDB?) */, false ); |
33 | 40 | } |
34 | 41 | |
35 | 42 | public function getDbType() { |
— | — | @@ -44,7 +51,7 @@ |
45 | 52 | return; |
46 | 53 | } |
47 | 54 | |
48 | | - global $wgLang, $noKeys, $logPerformance, $fh; |
| 55 | + global $wgContLang; |
49 | 56 | |
50 | 57 | $tuplesAdded = $numBadLinks = $curRowsRead = 0; # counters etc |
51 | 58 | $totalTuplesInserted = 0; # total tuples INSERTed into links_temp |
— | — | @@ -58,17 +65,11 @@ |
59 | 66 | $initialRowOffset = 0; |
60 | 67 | # $finalRowOffset = 0; # not used yet; highest row number from links table to process |
61 | 68 | |
62 | | - # Overwrite the old links table with the new one. If this is set to false, |
63 | | - # the new table will be left at links_temp. |
64 | | - $overwriteLinksTable = true; |
65 | | - |
66 | | - # Don't create keys, and so allow duplicates in the new links table. |
67 | | - # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?) |
68 | | - $noKeys = false; |
69 | | - |
70 | | - |
71 | | - $logPerformance = false; # output performance data to a file |
72 | | - $perfLogFilename = "convLinksPerf.txt"; |
| 69 | + $overwriteLinksTable = !$this->hasOption( 'keep-links-table' ); |
| 70 | + $noKeys = $this->hasOption( 'noKeys' ); |
| 71 | + $this->logPerformance = $this->hasOption( 'logperformance' ); |
| 72 | + $perfLogFilename = $this->getArg( 'perfLogFilename', "convLinksPerf.txt" ); |
| 73 | + |
73 | 74 | # -------------------------------------------------------------------- |
74 | 75 | |
75 | 76 | list ( $cur, $links, $links_temp, $links_backup ) = $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' ); |
— | — | @@ -93,12 +94,19 @@ |
94 | 95 | $this->output( "Updating schema (no rows to convert)...\n" ); |
95 | 96 | $this->createTempTable(); |
96 | 97 | } else { |
97 | | - if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); } |
| 98 | + $fh = false; |
| 99 | + if ( $this->logPerformance ) { |
| 100 | + $fh = fopen ( $perfLogFilename, "w" ); |
| 101 | + if ( !$fh ) { |
| 102 | + $this->error( "Couldn't open $perfLogFilename" ); |
| 103 | + $this->logPerformance = false; |
| 104 | + } |
| 105 | + } |
98 | 106 | $baseTime = $startTime = $this->getMicroTime(); |
99 | 107 | # Create a title -> cur_id map |
100 | 108 | $this->output( "Loading IDs from $cur table...\n" ); |
101 | | - $this->performanceLog ( "Reading $numRows rows from cur table...\n" ); |
102 | | - $this->performanceLog ( "rows read vs seconds elapsed:\n" ); |
| 109 | + $this->performanceLog ( $fh, "Reading $numRows rows from cur table...\n" ); |
| 110 | + $this->performanceLog ( $fh, "rows read vs seconds elapsed:\n" ); |
103 | 111 | |
104 | 112 | $dbw->bufferResults( false ); |
105 | 113 | $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" ); |
— | — | @@ -107,13 +115,13 @@ |
108 | 116 | while ( $row = $dbw->fetchObject( $res ) ) { |
109 | 117 | $title = $row->cur_title; |
110 | 118 | if ( $row->cur_namespace ) { |
111 | | - $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title"; |
| 119 | + $title = $wgContLang->getNsText( $row->cur_namespace ) . ":$title"; |
112 | 120 | } |
113 | 121 | $ids[$title] = $row->cur_id; |
114 | 122 | $curRowsRead++; |
115 | 123 | if ( $reportCurReadProgress ) { |
116 | 124 | if ( ( $curRowsRead % $curReadReportInterval ) == 0 ) { |
117 | | - $this->performanceLog( $curRowsRead . " " . ( $this->getMicroTime() - $baseTime ) . "\n" ); |
| 125 | + $this->performanceLog( $fh, $curRowsRead . " " . ( $this->getMicroTime() - $baseTime ) . "\n" ); |
118 | 126 | $this->output( "\t$curRowsRead rows of $cur table read.\n" ); |
119 | 127 | } |
120 | 128 | } |
— | — | @@ -121,18 +129,18 @@ |
122 | 130 | $dbw->freeResult( $res ); |
123 | 131 | $dbw->bufferResults( true ); |
124 | 132 | $this->output( "Finished loading IDs.\n\n" ); |
125 | | - $this->performanceLog( "Took " . ( $this->getMicroTime() - $baseTime ) . " seconds to load IDs.\n\n" ); |
| 133 | + $this->performanceLog( $fh, "Took " . ( $this->getMicroTime() - $baseTime ) . " seconds to load IDs.\n\n" ); |
126 | 134 | |
127 | 135 | # -------------------------------------------------------------------- |
128 | 136 | |
129 | 137 | # Now, step through the links table (in chunks of $linksConvInsertInterval rows), |
130 | 138 | # convert, and write to the new table. |
131 | 139 | $this->createTempTable(); |
132 | | - $this->performanceLog( "Resetting timer.\n\n" ); |
| 140 | + $this->performanceLog( $fh, "Resetting timer.\n\n" ); |
133 | 141 | $baseTime = $this->getMicroTime(); |
134 | 142 | $this->output( "Processing $numRows rows from $links table...\n" ); |
135 | | - $this->performanceLog( "Processing $numRows rows from $links table...\n" ); |
136 | | - $this->performanceLog( "rows inserted vs seconds elapsed:\n" ); |
| 143 | + $this->performanceLog( $fh, "Processing $numRows rows from $links table...\n" ); |
| 144 | + $this->performanceLog( $fh, "rows inserted vs seconds elapsed:\n" ); |
137 | 145 | |
138 | 146 | for ( $rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval ) { |
139 | 147 | $sqlRead = "SELECT * FROM $links "; |
— | — | @@ -169,13 +177,15 @@ |
170 | 178 | $totalTuplesInserted += $tuplesAdded; |
171 | 179 | if ( $reportLinksConvProgress ) |
172 | 180 | $this->output( " done. Total $totalTuplesInserted tuples inserted.\n" ); |
173 | | - $this->performanceLog( $totalTuplesInserted . " " . ( $this->getMicroTime() - $baseTime ) . "\n" ); |
| 181 | + $this->performanceLog( $fh, $totalTuplesInserted . " " . ( $this->getMicroTime() - $baseTime ) . "\n" ); |
174 | 182 | } |
175 | 183 | } |
176 | 184 | $this->output( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n" ); |
177 | | - $this->performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" ); |
178 | | - $this->performanceLog( "Total execution time: " . ( $this->getMicroTime() - $startTime ) . " seconds.\n" ); |
179 | | - if ( $logPerformance ) { fclose ( $fh ); } |
| 185 | + $this->performanceLog( $fh, "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" ); |
| 186 | + $this->performanceLog( $fh, "Total execution time: " . ( $this->getMicroTime() - $startTime ) . " seconds.\n" ); |
| 187 | + if ( $this->logPerformance ) { |
| 188 | + fclose ( $fh ); |
| 189 | + } |
180 | 190 | } |
181 | 191 | # -------------------------------------------------------------------- |
182 | 192 | |
— | — | @@ -200,7 +210,6 @@ |
201 | 211 | } |
202 | 212 | |
203 | 213 | private function createTempTable() { |
204 | | - global $noKeys; |
205 | 214 | $dbConn = wfGetDB( DB_MASTER ); |
206 | 215 | |
207 | 216 | if ( !( $dbConn->isOpen() ) ) { |
— | — | @@ -214,7 +223,7 @@ |
215 | 224 | $this->output( " done.\n" ); |
216 | 225 | |
217 | 226 | $this->output( "Creating temporary links table..." ); |
218 | | - if ( $noKeys ) { |
| 227 | + if ( $this->hasOption( 'noKeys' ) ) { |
219 | 228 | $dbConn->query( "CREATE TABLE $links_temp ( " . |
220 | 229 | "l_from int(8) unsigned NOT NULL default '0', " . |
221 | 230 | "l_to int(8) unsigned NOT NULL default '0')" ); |
— | — | @@ -228,9 +237,8 @@ |
229 | 238 | $this->output( " done.\n\n" ); |
230 | 239 | } |
231 | 240 | |
232 | | - private function performanceLog( $text ) { |
233 | | - global $logPerformance, $fh; |
234 | | - if ( $logPerformance ) { |
| 241 | + private function performanceLog( $fh, $text ) { |
| 242 | + if ( $this->logPerformance ) { |
235 | 243 | fwrite( $fh, $text ); |
236 | 244 | } |
237 | 245 | } |