Index: trunk/phase3/maintenance/convertLinks.inc |
— | — | @@ -1,218 +0,0 @@ |
2 | | -<?php |
3 | | -/** |
4 | | - * @file |
5 | | - * @todo document |
6 | | - * @ingroup Maintenance |
7 | | - */ |
8 | | - |
9 | | -/** */ |
10 | | -function convertLinks() { |
11 | | - global $wgDBtype; |
12 | | - if ( $wgDBtype == 'postgres' ) { |
13 | | - wfOut( "Links table already ok on Postgres.\n" ); |
14 | | - return; |
15 | | - } |
16 | | - |
17 | | - wfOut( "Converting links table to ID-ID...\n" ); |
18 | | - |
19 | | - global $wgLang, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname; |
20 | | - global $noKeys, $logPerformance, $fh; |
21 | | - |
22 | | - $tuplesAdded = $numBadLinks = $curRowsRead = 0; # counters etc |
23 | | - $totalTuplesInserted = 0; # total tuples INSERTed into links_temp |
24 | | - |
25 | | - $reportCurReadProgress = true; # whether or not to give progress reports while reading IDs from cur table |
26 | | - $curReadReportInterval = 1000; # number of rows between progress reports |
27 | | - |
28 | | - $reportLinksConvProgress = true; # whether or not to give progress reports during conversion |
29 | | - $linksConvInsertInterval = 1000; # number of rows per INSERT |
30 | | - |
31 | | - $initialRowOffset = 0; |
32 | | - # $finalRowOffset = 0; # not used yet; highest row number from links table to process |
33 | | - |
34 | | - # Overwrite the old links table with the new one. If this is set to false, |
35 | | - # the new table will be left at links_temp. |
36 | | - $overwriteLinksTable = true; |
37 | | - |
38 | | - # Don't create keys, and so allow duplicates in the new links table. |
39 | | - # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?) |
40 | | - $noKeys = false; |
41 | | - |
42 | | - |
43 | | - $logPerformance = false; # output performance data to a file |
44 | | - $perfLogFilename = "convLinksPerf.txt"; |
45 | | - # -------------------------------------------------------------------- |
46 | | - |
47 | | - $dbw = wfGetDB( DB_MASTER ); |
48 | | - list ( $cur, $links, $links_temp, $links_backup ) = $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' ); |
49 | | - |
50 | | - // Get database-agnostic limit clause |
51 | | - $sql_limit = $dbw->limitResult( "SELECT l_from FROM $links", 1 ); |
52 | | - $res = $dbw->query( $sql_limit ); |
53 | | - if ( $dbw->fieldType( $res, 0 ) == "int" ) { |
54 | | - wfOut( "Schema already converted\n" ); |
55 | | - return; |
56 | | - } |
57 | | - |
58 | | - $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" ); |
59 | | - $row = $dbw->fetchObject( $res ); |
60 | | - $numRows = $row->count; |
61 | | - $dbw->freeResult( $res ); |
62 | | - |
63 | | - if ( $numRows == 0 ) { |
64 | | - wfOut( "Updating schema (no rows to convert)...\n" ); |
65 | | - createTempTable(); |
66 | | - } else { |
67 | | - if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); } |
68 | | - $baseTime = $startTime = getMicroTime(); |
69 | | - # Create a title -> cur_id map |
70 | | - wfOut( "Loading IDs from $cur table...\n" ); |
71 | | - performanceLog ( "Reading $numRows rows from cur table...\n" ); |
72 | | - performanceLog ( "rows read vs seconds elapsed:\n" ); |
73 | | - |
74 | | - $dbw->bufferResults( false ); |
75 | | - $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" ); |
76 | | - $ids = array(); |
77 | | - |
78 | | - while ( $row = $dbw->fetchObject( $res ) ) { |
79 | | - $title = $row->cur_title; |
80 | | - if ( $row->cur_namespace ) { |
81 | | - $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title"; |
82 | | - } |
83 | | - $ids[$title] = $row->cur_id; |
84 | | - $curRowsRead++; |
85 | | - if ( $reportCurReadProgress ) { |
86 | | - if ( ( $curRowsRead % $curReadReportInterval ) == 0 ) { |
87 | | - performanceLog( $curRowsRead . " " . ( getMicroTime() - $baseTime ) . "\n" ); |
88 | | - wfOut( "\t$curRowsRead rows of $cur table read.\n" ); |
89 | | - } |
90 | | - } |
91 | | - } |
92 | | - $dbw->freeResult( $res ); |
93 | | - $dbw->bufferResults( true ); |
94 | | - wfOut( "Finished loading IDs.\n\n" ); |
95 | | - performanceLog( "Took " . ( getMicroTime() - $baseTime ) . " seconds to load IDs.\n\n" ); |
96 | | - # -------------------------------------------------------------------- |
97 | | - |
98 | | - # Now, step through the links table (in chunks of $linksConvInsertInterval rows), |
99 | | - # convert, and write to the new table. |
100 | | - createTempTable(); |
101 | | - performanceLog( "Resetting timer.\n\n" ); |
102 | | - $baseTime = getMicroTime(); |
103 | | - wfOut( "Processing $numRows rows from $links table...\n" ); |
104 | | - performanceLog( "Processing $numRows rows from $links table...\n" ); |
105 | | - performanceLog( "rows inserted vs seconds elapsed:\n" ); |
106 | | - |
107 | | - for ( $rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval ) { |
108 | | - $sqlRead = "SELECT * FROM $links "; |
109 | | - $sqlRead = $dbw->limitResult( $sqlRead, $linksConvInsertInterval, $rowOffset ); |
110 | | - $res = $dbw->query( $sqlRead ); |
111 | | - if ( $noKeys ) { |
112 | | - $sqlWrite = array( "INSERT INTO $links_temp (l_from,l_to) VALUES " ); |
113 | | - } else { |
114 | | - $sqlWrite = array( "INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES " ); |
115 | | - } |
116 | | - |
117 | | - $tuplesAdded = 0; # no tuples added to INSERT yet |
118 | | - while ( $row = $dbw->fetchObject( $res ) ) { |
119 | | - $fromTitle = $row->l_from; |
120 | | - if ( array_key_exists( $fromTitle, $ids ) ) { # valid title |
121 | | - $from = $ids[$fromTitle]; |
122 | | - $to = $row->l_to; |
123 | | - if ( $tuplesAdded != 0 ) { |
124 | | - $sqlWrite[] = ","; |
125 | | - } |
126 | | - $sqlWrite[] = "($from,$to)"; |
127 | | - $tuplesAdded++; |
128 | | - } else { # invalid title |
129 | | - $numBadLinks++; |
130 | | - } |
131 | | - } |
132 | | - $dbw->freeResult( $res ); |
133 | | - # wfOut( "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n" ); |
134 | | - if ( $tuplesAdded != 0 ) { |
135 | | - if ( $reportLinksConvProgress ) { |
136 | | - wfOut( "Inserting $tuplesAdded tuples into $links_temp..." ); |
137 | | - } |
138 | | - $dbw->query( implode( "", $sqlWrite ) ); |
139 | | - $totalTuplesInserted += $tuplesAdded; |
140 | | - if ( $reportLinksConvProgress ) |
141 | | - wfOut( " done. Total $totalTuplesInserted tuples inserted.\n" ); |
142 | | - performanceLog( $totalTuplesInserted . " " . ( getMicroTime() - $baseTime ) . "\n" ); |
143 | | - } |
144 | | - } |
145 | | - wfOut( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n" ); |
146 | | - performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" ); |
147 | | - performanceLog( "Total execution time: " . ( getMicroTime() - $startTime ) . " seconds.\n" ); |
148 | | - if ( $logPerformance ) { fclose ( $fh ); } |
149 | | - } |
150 | | - # -------------------------------------------------------------------- |
151 | | - |
152 | | - if ( $overwriteLinksTable ) { |
153 | | - $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); |
154 | | - if ( !( $dbConn->isOpen() ) ) { |
155 | | - wfOut( "Opening connection to database failed.\n" ); |
156 | | - return; |
157 | | - } |
158 | | - # Check for existing links_backup, and delete it if it exists. |
159 | | - wfOut( "Dropping backup links table if it exists..." ); |
160 | | - $dbConn->query( "DROP TABLE IF EXISTS $links_backup", DB_MASTER ); |
161 | | - wfOut( " done.\n" ); |
162 | | - |
163 | | - # Swap in the new table, and move old links table to links_backup |
164 | | - wfOut( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." ); |
165 | | - $dbConn->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", DB_MASTER ); |
166 | | - wfOut( " done.\n\n" ); |
167 | | - |
168 | | - $dbConn->close(); |
169 | | - wfOut( "Conversion complete. The old table remains at $links_backup;\n" ); |
170 | | - wfOut( "delete at your leisure.\n" ); |
171 | | - } else { |
172 | | - wfOut( "Conversion complete. The converted table is at $links_temp;\n" ); |
173 | | - wfOut( "the original links table is unchanged.\n" ); |
174 | | - } |
175 | | -} |
176 | | - |
177 | | -# -------------------------------------------------------------------- |
178 | | - |
179 | | -function createTempTable() { |
180 | | - global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname; |
181 | | - global $noKeys; |
182 | | - $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); |
183 | | - |
184 | | - if ( !( $dbConn->isOpen() ) ) { |
185 | | - wfOut( "Opening connection to database failed.\n" ); |
186 | | - return; |
187 | | - } |
188 | | - $links_temp = $dbConn->tableName( 'links_temp' ); |
189 | | - |
190 | | - wfOut( "Dropping temporary links table if it exists..." ); |
191 | | - $dbConn->query( "DROP TABLE IF EXISTS $links_temp" ); |
192 | | - wfOut( " done.\n" ); |
193 | | - |
194 | | - wfOut( "Creating temporary links table..." ); |
195 | | - if ( $noKeys ) { |
196 | | - $dbConn->query( "CREATE TABLE $links_temp ( " . |
197 | | - "l_from int(8) unsigned NOT NULL default '0', " . |
198 | | - "l_to int(8) unsigned NOT NULL default '0')" ); |
199 | | - } else { |
200 | | - $dbConn->query( "CREATE TABLE $links_temp ( " . |
201 | | - "l_from int(8) unsigned NOT NULL default '0', " . |
202 | | - "l_to int(8) unsigned NOT NULL default '0', " . |
203 | | - "UNIQUE KEY l_from(l_from,l_to), " . |
204 | | - "KEY (l_to))" ); |
205 | | - } |
206 | | - wfOut( " done.\n\n" ); |
207 | | -} |
208 | | - |
209 | | -function performanceLog( $text ) { |
210 | | - global $logPerformance, $fh; |
211 | | - if ( $logPerformance ) { |
212 | | - fwrite( $fh, $text ); |
213 | | - } |
214 | | -} |
215 | | - |
216 | | -function getMicroTime() { # return time in seconds, with microsecond accuracy |
217 | | - list( $usec, $sec ) = explode( " ", microtime() ); |
218 | | - return ( (float)$usec + (float)$sec ); |
219 | | -} |