Index: trunk/phase3/maintenance/convertLinks.inc |
— | — | @@ -0,0 +1,216 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * @file |
| 5 | + * @todo document |
| 6 | + * @ingroup Maintenance |
| 7 | + */ |
| 8 | + |
| 9 | +/** */ |
| 10 | +function convertLinks() { |
| 11 | + global $wgDBtype; |
| 12 | + if( $wgDBtype == 'postgres' ) { |
| 13 | + wfOut( "Links table already ok on Postgres.\n" ); |
| 14 | + return; |
| 15 | + } |
| 16 | + |
| 17 | + wfOut( "Converting links table to ID-ID...\n" ); |
| 18 | + |
| 19 | + global $wgLang, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname; |
| 20 | + global $noKeys, $logPerformance, $fh; |
| 21 | + |
| 22 | + $tuplesAdded = $numBadLinks = $curRowsRead = 0; #counters etc |
| 23 | + $totalTuplesInserted = 0; # total tuples INSERTed into links_temp |
| 24 | + |
| 25 | + $reportCurReadProgress = true; #whether or not to give progress reports while reading IDs from cur table |
| 26 | + $curReadReportInterval = 1000; #number of rows between progress reports |
| 27 | + |
| 28 | + $reportLinksConvProgress = true; #whether or not to give progress reports during conversion |
| 29 | + $linksConvInsertInterval = 1000; #number of rows per INSERT |
| 30 | + |
| 31 | + $initialRowOffset = 0; |
| 32 | + #$finalRowOffset = 0; # not used yet; highest row number from links table to process |
| 33 | + |
| 34 | + # Overwrite the old links table with the new one. If this is set to false, |
| 35 | + # the new table will be left at links_temp. |
| 36 | + $overwriteLinksTable = true; |
| 37 | + |
| 38 | + # Don't create keys, and so allow duplicates in the new links table. |
| 39 | + # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?) |
| 40 | + $noKeys = false; |
| 41 | + |
| 42 | + |
| 43 | + $logPerformance = false; # output performance data to a file |
| 44 | + $perfLogFilename = "convLinksPerf.txt"; |
| 45 | + #-------------------------------------------------------------------- |
| 46 | + |
| 47 | + $dbw = wfGetDB( DB_MASTER ); |
| 48 | + list ($cur, $links, $links_temp, $links_backup) = $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' ); |
| 49 | + |
| 50 | + $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" ); |
| 51 | + if ( $dbw->fieldType( $res, 0 ) == "int" ) { |
| 52 | + wfOut( "Schema already converted\n" ); |
| 53 | + return; |
| 54 | + } |
| 55 | + |
| 56 | + $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" ); |
| 57 | + $row = $dbw->fetchObject($res); |
| 58 | + $numRows = $row->count; |
| 59 | + $dbw->freeResult( $res ); |
| 60 | + |
| 61 | + if ( $numRows == 0 ) { |
| 62 | + wfOut( "Updating schema (no rows to convert)...\n" ); |
| 63 | + createTempTable(); |
| 64 | + } else { |
| 65 | + if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); } |
| 66 | + $baseTime = $startTime = getMicroTime(); |
| 67 | + # Create a title -> cur_id map |
| 68 | + wfOut( "Loading IDs from $cur table...\n" ); |
| 69 | + performanceLog ( "Reading $numRows rows from cur table...\n" ); |
| 70 | + performanceLog ( "rows read vs seconds elapsed:\n" ); |
| 71 | + |
| 72 | + $dbw->bufferResults( false ); |
| 73 | + $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" ); |
| 74 | + $ids = array(); |
| 75 | + |
| 76 | + while ( $row = $dbw->fetchObject( $res ) ) { |
| 77 | + $title = $row->cur_title; |
| 78 | + if ( $row->cur_namespace ) { |
| 79 | + $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title"; |
| 80 | + } |
| 81 | + $ids[$title] = $row->cur_id; |
| 82 | + $curRowsRead++; |
| 83 | + if ($reportCurReadProgress) { |
| 84 | + if (($curRowsRead % $curReadReportInterval) == 0) { |
| 85 | + performanceLog( $curRowsRead . " " . (getMicroTime() - $baseTime) . "\n" ); |
| 86 | + wfOut( "\t$curRowsRead rows of $cur table read.\n" ); |
| 87 | + } |
| 88 | + } |
| 89 | + } |
| 90 | + $dbw->freeResult( $res ); |
| 91 | + $dbw->bufferResults( true ); |
| 92 | + wfOut( "Finished loading IDs.\n\n" ); |
| 93 | + performanceLog( "Took " . (getMicroTime() - $baseTime) . " seconds to load IDs.\n\n" ); |
| 94 | + #-------------------------------------------------------------------- |
| 95 | + |
| 96 | + # Now, step through the links table (in chunks of $linksConvInsertInterval rows), |
| 97 | + # convert, and write to the new table. |
| 98 | + createTempTable(); |
| 99 | + performanceLog( "Resetting timer.\n\n" ); |
| 100 | + $baseTime = getMicroTime(); |
| 101 | + wfOut( "Processing $numRows rows from $links table...\n" ); |
| 102 | + performanceLog( "Processing $numRows rows from $links table...\n" ); |
| 103 | + performanceLog( "rows inserted vs seconds elapsed:\n" ); |
| 104 | + |
| 105 | + for ($rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval) { |
| 106 | + $sqlRead = "SELECT * FROM $links "; |
| 107 | + $sqlRead = $dbw->limitResult($sqlRead, $linksConvInsertInterval,$rowOffset); |
| 108 | + $res = $dbw->query($sqlRead); |
| 109 | + if ( $noKeys ) { |
| 110 | + $sqlWrite = array("INSERT INTO $links_temp (l_from,l_to) VALUES "); |
| 111 | + } else { |
| 112 | + $sqlWrite = array("INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES "); |
| 113 | + } |
| 114 | + |
| 115 | + $tuplesAdded = 0; # no tuples added to INSERT yet |
| 116 | + while ( $row = $dbw->fetchObject($res) ) { |
| 117 | + $fromTitle = $row->l_from; |
| 118 | + if ( array_key_exists( $fromTitle, $ids ) ) { # valid title |
| 119 | + $from = $ids[$fromTitle]; |
| 120 | + $to = $row->l_to; |
| 121 | + if ( $tuplesAdded != 0 ) { |
| 122 | + $sqlWrite[] = ","; |
| 123 | + } |
| 124 | + $sqlWrite[] = "($from,$to)"; |
| 125 | + $tuplesAdded++; |
| 126 | + } else { # invalid title |
| 127 | + $numBadLinks++; |
| 128 | + } |
| 129 | + } |
| 130 | + $dbw->freeResult($res); |
| 131 | + #wfOut( "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n" ); |
| 132 | + if ( $tuplesAdded != 0 ) { |
| 133 | + if ($reportLinksConvProgress) { |
| 134 | + wfOut( "Inserting $tuplesAdded tuples into $links_temp..." ); |
| 135 | + } |
| 136 | + $dbw->query( implode("",$sqlWrite) ); |
| 137 | + $totalTuplesInserted += $tuplesAdded; |
| 138 | + if ($reportLinksConvProgress) |
| 139 | + wfOut( " done. Total $totalTuplesInserted tuples inserted.\n" ); |
| 140 | + performanceLog( $totalTuplesInserted . " " . (getMicroTime() - $baseTime) . "\n" ); |
| 141 | + } |
| 142 | + } |
| 143 | + wfOut( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n" ); |
| 144 | + performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" ); |
| 145 | + performanceLog( "Total execution time: " . (getMicroTime() - $startTime) . " seconds.\n" ); |
| 146 | + if ( $logPerformance ) { fclose ( $fh ); } |
| 147 | + } |
| 148 | + #-------------------------------------------------------------------- |
| 149 | + |
| 150 | + if ( $overwriteLinksTable ) { |
| 151 | + $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); |
| 152 | + if (!($dbConn->isOpen())) { |
| 153 | + wfOut( "Opening connection to database failed.\n" ); |
| 154 | + return; |
| 155 | + } |
| 156 | + # Check for existing links_backup, and delete it if it exists. |
| 157 | + wfOut( "Dropping backup links table if it exists..." ); |
| 158 | + $dbConn->query( "DROP TABLE IF EXISTS $links_backup", DB_MASTER); |
| 159 | + wfOut( " done.\n" ); |
| 160 | + |
| 161 | + # Swap in the new table, and move old links table to links_backup |
| 162 | + wfOut( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." ); |
| 163 | + $dbConn->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", DB_MASTER ); |
| 164 | + wfOut( " done.\n\n" ); |
| 165 | + |
| 166 | + $dbConn->close(); |
| 167 | + wfOut( "Conversion complete. The old table remains at $links_backup;\n" ); |
| 168 | + wfOut( "delete at your leisure.\n" ); |
| 169 | + } else { |
| 170 | + wfOut( "Conversion complete. The converted table is at $links_temp;\n" ); |
| 171 | + wfOut( "the original links table is unchanged.\n" ); |
| 172 | + } |
| 173 | +} |
| 174 | + |
| 175 | +#-------------------------------------------------------------------- |
| 176 | + |
| 177 | +function createTempTable() { |
| 178 | + global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname; |
| 179 | + global $noKeys; |
| 180 | + $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); |
| 181 | + |
| 182 | + if (!($dbConn->isOpen())) { |
| 183 | + wfOut( "Opening connection to database failed.\n" ); |
| 184 | + return; |
| 185 | + } |
| 186 | + $links_temp = $dbConn->tableName( 'links_temp' ); |
| 187 | + |
| 188 | + wfOut( "Dropping temporary links table if it exists..." ); |
| 189 | + $dbConn->query( "DROP TABLE IF EXISTS $links_temp"); |
| 190 | + wfOut( " done.\n" ); |
| 191 | + |
| 192 | + wfOut( "Creating temporary links table..." ); |
| 193 | + if ( $noKeys ) { |
| 194 | + $dbConn->query( "CREATE TABLE $links_temp ( " . |
| 195 | + "l_from int(8) unsigned NOT NULL default '0', " . |
| 196 | + "l_to int(8) unsigned NOT NULL default '0')"); |
| 197 | + } else { |
| 198 | + $dbConn->query( "CREATE TABLE $links_temp ( " . |
| 199 | + "l_from int(8) unsigned NOT NULL default '0', " . |
| 200 | + "l_to int(8) unsigned NOT NULL default '0', " . |
| 201 | + "UNIQUE KEY l_from(l_from,l_to), " . |
| 202 | + "KEY (l_to))"); |
| 203 | + } |
| 204 | + wfOut( " done.\n\n" ); |
| 205 | +} |
| 206 | + |
| 207 | +function performanceLog( $text ) { |
| 208 | + global $logPerformance, $fh; |
| 209 | + if ( $logPerformance ) { |
| 210 | + fwrite( $fh, $text ); |
| 211 | + } |
| 212 | +} |
| 213 | + |
| 214 | +function getMicroTime() { # return time in seconds, with microsecond accuracy |
| 215 | + list($usec, $sec) = explode(" ", microtime()); |
| 216 | + return ((float)$usec + (float)$sec); |
| 217 | +} |
Property changes on: trunk/phase3/maintenance/convertLinks.inc |
___________________________________________________________________ |
Name: svn:mergeinfo |
1 | 218 | + /branches/REL1_15/phase3/maintenance/convertLinks.inc:51646 |
Name: svn:eol-style |
2 | 219 | + native |
Name: svn:keywords |
3 | 220 | + Author Date Id Revision |