Index: trunk/phase3/maintenance/refreshLinks.inc |
— | — | @@ -136,23 +136,13 @@ |
137 | 137 | $dbw->immediateCommit(); |
138 | 138 | } |
139 | 139 | |
140 | | -/* |
141 | | - * Removes non-existing links from pages from pagelinks, imagelinks, |
142 | | - * categorylinks, templatelinks and externallinks tables. |
143 | | - * |
144 | | - * @param $maxLag |
145 | | - * @param $batchSize The size of deletion batches |
146 | | - * |
147 | | - * @author Merlijn van Deen <valhallasw@arctus.nl> |
148 | | - */ |
149 | | -function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { |
| 140 | +function deleteLinksFromNonexistent( $maxLag = 0 ) { |
150 | 141 | $fname = 'deleteLinksFromNonexistent'; |
| 142 | + |
151 | 143 | wfWaitForSlaves( $maxLag ); |
152 | | - |
| 144 | + |
153 | 145 | $dbw = wfGetDB( DB_MASTER ); |
154 | | - $dbr = wfGetDB( DB_SLAVE ); |
155 | | - $dbr->bufferResults(false); |
156 | | - |
| 146 | + |
157 | 147 | $linksTables = array( |
158 | 148 | 'pagelinks' => 'pl_from', |
159 | 149 | 'imagelinks' => 'il_from', |
— | — | @@ -160,65 +150,27 @@ |
161 | 151 | 'templatelinks' => 'tl_from', |
162 | 152 | 'externallinks' => 'el_from', |
163 | 153 | ); |
164 | | - |
165 | | - |
166 | | - $readPage = $dbr->tableName( 'page' ); |
| 154 | + |
| 155 | + $page = $dbw->tableName( 'page' ); |
| 156 | + |
| 157 | + |
167 | 158 | foreach ( $linksTables as $table => $field ) { |
168 | | - $readLinks = $dbr->tableName( $table ); |
169 | | - |
170 | | - $sql = "SELECT DISTINCT( $field ) FROM $readLinks LEFT JOIN $readPage ON $field=page_id WHERE page_id IS NULL;"; |
171 | | - print "Retrieving illegal entries from $table: \tRUNNING"; |
172 | | - |
173 | | - $results = $dbr->query( $sql, $fname . ':' . $readLinks ); |
174 | | - print "\x08\x08\x08\x08\x08\x08\x08" . $results->numRows() . " illegal " . $field. "s. "; |
175 | | - |
176 | | - if ( $results->numRows() == 0 ) { |
| 159 | + if ( !$dbw->ping() ) { |
| 160 | + print "DB disconnected, reconnecting..."; |
| 161 | + while ( !$dbw->ping() ) { |
| 162 | + print "."; |
| 163 | + sleep(10); |
| 164 | + } |
177 | 165 | print "\n"; |
178 | | - continue; |
179 | 166 | } |
180 | | - |
181 | | - $counter = 0; |
182 | | - $list = array(); |
183 | | - print "Removing illegal links: 1.."; |
184 | | - foreach( $results as $row ) { |
185 | | - $counter++; |
186 | | - $list[] = $row->$field; |
187 | | - if ( ( $counter % $batchSize ) == 0 ) { |
188 | | - print $counter . ".."; |
189 | | - deleteBatch($dbw, $table, $field, $list); |
190 | | - $list = ''; |
191 | | - } |
192 | | - } |
193 | | - print $counter . "\n"; |
194 | | - deleteBatch($dbw, $table, $field, $list); |
195 | | - } |
196 | | -} |
197 | 167 | |
198 | | -/* Deletes a batch of items from a table. |
199 | | - * Runs the query: DELETE FROM <$table> WHERE <$field> IN (<$list>) |
200 | | - * |
201 | | - * @param $dbw Database Database object to run the DELETE query on |
202 | | - * @param $table table to work on; will be converted via $dbw->tableName. |
203 | | - * @param $field column to search in |
204 | | - * @param $list values to remove. Array with SQL-safe (!) values. |
205 | | - * |
206 | | - * @author Merlijn van Deen <valhallasw@arctus.nl> |
207 | | - */ |
208 | | -function deleteBatch($dbw, $table, $field, $list) { |
209 | | - if (count($list) == 0) return; |
210 | | - |
211 | | - $masterLinks = $dbw->tableName( $table ); |
212 | | - $fname = "deleteBatch:masterLinks"; |
213 | | - |
214 | | - if ( !$dbw->ping() ) { |
215 | | - print "\nDB disconnected, reconnecting..."; |
216 | | - while ( !$dbw->ping() ) { |
217 | | - print "."; |
218 | | - sleep(10); |
219 | | - } |
220 | | - print "\n"; |
221 | | - } |
| 168 | + $pTable = $dbw->tableName( $table ); |
| 169 | + $sql = "DELETE $pTable FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL"; |
222 | 170 | |
223 | | - $sql = "DELETE FROM $masterLinks WHERE $field IN (" . join("," , $list) . ");"; |
224 | | - $dbw->query($sql, $fname); |
| 171 | + print "Deleting $table from non-existent articles..."; |
| 172 | + $dbw->query( $sql, $fname ); |
| 173 | + print " fixed " .$dbw->affectedRows() . " row(s)\n"; |
| 174 | + } |
225 | 175 | } |
| 176 | + |
| 177 | +?> |
Index: trunk/phase3/maintenance/refreshLinks.php |
— | — | @@ -18,16 +18,14 @@ |
19 | 19 | [--new-only] [--redirects-only] |
20 | 20 | php refreshLinks.php [<start>] [-e <end>] [-m <maxlag>] --old-redirects-only |
21 | 21 | |
22 | | - --help : This help message |
23 | | - --dfn-only : Delete links from nonexistent articles only |
24 | | - --batch-size <number> : The delete batch size when removing links from |
25 | | - nonexistent articles (default 100) |
26 | | - --new-only : Only affect articles with just a single edit |
27 | | - --redirects-only : Only fix redirects, not all links |
28 | | - --old-redirects-only : Only fix redirects with no redirect table entry |
29 | | - -m <number> : Maximum replication lag |
30 | | - <start> : First page id to refresh |
31 | | - -e <number> : Last page id to refresh |
| 22 | + --help : This help message |
| 23 | + --dfn-only : Delete links from nonexistent articles only |
| 24 | + --new-only : Only affect articles with just a single edit |
| 25 | + --redirects-only : Only fix redirects, not all links |
| 26 | + --old-redirects-only : Only fix redirects with no redirect table entry |
| 27 | + -m <number> : Maximum replication lag |
| 28 | + <start> : First page id to refresh |
| 29 | + -e <number> : Last page id to refresh |
32 | 30 | |
33 | 31 | TEXT; |
34 | 32 | exit(0); |
— | — | @@ -46,8 +44,10 @@ |
47 | 45 | } |
48 | 46 | // this bit's bad for replication: disabling temporarily |
49 | 47 | // --brion 2005-07-16 |
50 | | -deleteLinksFromNonexistent($options['m'], $options['batch-size']); |
| 48 | +//deleteLinksFromNonexistent(); |
51 | 49 | |
52 | 50 | if ( $options['globals'] ) { |
53 | 51 | print_r( $GLOBALS ); |
54 | 52 | } |
| 53 | + |
| 54 | + |