r45482 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r45481‎ | r45482 | r45483 >
Date:03:33, 7 January 2009
Author:brion
Status:ok
Tags:
Comment:
Pull back r45431 for the moment "Updated deleteLinksFromNonexistent function:" etc
There's some funny output with \x08 stuff, and I don't want to fiddle with it just now...
Modified paths:
  • /trunk/phase3/maintenance/refreshLinks.inc (modified) (history)
  • /trunk/phase3/maintenance/refreshLinks.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/refreshLinks.inc
@@ -136,23 +136,13 @@
137137 $dbw->immediateCommit();
138138 }
139139
140 -/*
141 - * Removes non-existing links from pages from pagelinks, imagelinks,
142 - * categorylinks, templatelinks and externallinks tables.
143 - *
144 - * @param $maxLag
145 - * @param $batchSize The size of deletion batches
146 - *
147 - * @author Merlijn van Deen <valhallasw@arctus.nl>
148 - */
149 -function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
 140+function deleteLinksFromNonexistent( $maxLag = 0 ) {
150141 $fname = 'deleteLinksFromNonexistent';
 142+
151143 wfWaitForSlaves( $maxLag );
152 -
 144+
153145 $dbw = wfGetDB( DB_MASTER );
154 - $dbr = wfGetDB( DB_SLAVE );
155 - $dbr->bufferResults(false);
156 -
 146+
157147 $linksTables = array(
158148 'pagelinks' => 'pl_from',
159149 'imagelinks' => 'il_from',
@@ -160,65 +150,27 @@
161151 'templatelinks' => 'tl_from',
162152 'externallinks' => 'el_from',
163153 );
164 -
165 -
166 - $readPage = $dbr->tableName( 'page' );
 154+
 155+ $page = $dbw->tableName( 'page' );
 156+
 157+
167158 foreach ( $linksTables as $table => $field ) {
168 - $readLinks = $dbr->tableName( $table );
169 -
170 - $sql = "SELECT DISTINCT( $field ) FROM $readLinks LEFT JOIN $readPage ON $field=page_id WHERE page_id IS NULL;";
171 - print "Retrieving illegal entries from $table: \tRUNNING";
172 -
173 - $results = $dbr->query( $sql, $fname . ':' . $readLinks );
174 - print "\x08\x08\x08\x08\x08\x08\x08" . $results->numRows() . " illegal " . $field. "s. ";
175 -
176 - if ( $results->numRows() == 0 ) {
 159+ if ( !$dbw->ping() ) {
 160+ print "DB disconnected, reconnecting...";
 161+ while ( !$dbw->ping() ) {
 162+ print ".";
 163+ sleep(10);
 164+ }
177165 print "\n";
178 - continue;
179166 }
180 -
181 - $counter = 0;
182 - $list = array();
183 - print "Removing illegal links: 1..";
184 - foreach( $results as $row ) {
185 - $counter++;
186 - $list[] = $row->$field;
187 - if ( ( $counter % $batchSize ) == 0 ) {
188 - print $counter . "..";
189 - deleteBatch($dbw, $table, $field, $list);
190 - $list = '';
191 - }
192 - }
193 - print $counter . "\n";
194 - deleteBatch($dbw, $table, $field, $list);
195 - }
196 -}
197167
198 -/* Deletes a batch of items from a table.
199 - * Runs the query: DELETE FROM <$table> WHERE <$field> IN (<$list>)
200 - *
201 - * @param $dbw Database Database object to run the DELETE query on
202 - * @param $table table to work on; will be converted via $dbw->tableName.
203 - * @param $field column to search in
204 - * @param $list values to remove. Array with SQL-safe (!) values.
205 - *
206 - * @author Merlijn van Deen <valhallasw@arctus.nl>
207 - */
208 -function deleteBatch($dbw, $table, $field, $list) {
209 - if (count($list) == 0) return;
210 -
211 - $masterLinks = $dbw->tableName( $table );
212 - $fname = "deleteBatch:masterLinks";
213 -
214 - if ( !$dbw->ping() ) {
215 - print "\nDB disconnected, reconnecting...";
216 - while ( !$dbw->ping() ) {
217 - print ".";
218 - sleep(10);
219 - }
220 - print "\n";
221 - }
 168+ $pTable = $dbw->tableName( $table );
 169+ $sql = "DELETE $pTable FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL";
222170
223 - $sql = "DELETE FROM $masterLinks WHERE $field IN (" . join("," , $list) . ");";
224 - $dbw->query($sql, $fname);
 171+ print "Deleting $table from non-existent articles...";
 172+ $dbw->query( $sql, $fname );
 173+ print " fixed " .$dbw->affectedRows() . " row(s)\n";
 174+ }
225175 }
 176+
 177+?>
Index: trunk/phase3/maintenance/refreshLinks.php
@@ -18,16 +18,14 @@
1919 [--new-only] [--redirects-only]
2020 php refreshLinks.php [<start>] [-e <end>] [-m <maxlag>] --old-redirects-only
2121
22 - --help : This help message
23 - --dfn-only : Delete links from nonexistent articles only
24 - --batch-size <number> : The delete batch size when removing links from
25 - nonexistent articles (default 100)
26 - --new-only : Only affect articles with just a single edit
27 - --redirects-only : Only fix redirects, not all links
28 - --old-redirects-only : Only fix redirects with no redirect table entry
29 - -m <number> : Maximum replication lag
30 - <start> : First page id to refresh
31 - -e <number> : Last page id to refresh
 22+ --help : This help message
 23+ --dfn-only : Delete links from nonexistent articles only
 24+ --new-only : Only affect articles with just a single edit
 25+ --redirects-only : Only fix redirects, not all links
 26+ --old-redirects-only : Only fix redirects with no redirect table entry
 27+ -m <number> : Maximum replication lag
 28+ <start> : First page id to refresh
 29+ -e <number> : Last page id to refresh
3230
3331 TEXT;
3432 exit(0);
@@ -46,8 +44,10 @@
4745 }
4846 // this bit's bad for replication: disabling temporarily
4947 // --brion 2005-07-16
50 -deleteLinksFromNonexistent($options['m'], $options['batch-size']);
 48+//deleteLinksFromNonexistent();
5149
5250 if ( $options['globals'] ) {
5351 print_r( $GLOBALS );
5452 }
 53+
 54+

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r45431Updated deleteLinksFromNonexistent function:...valhallasw02:10, 6 January 2009

Status & tagging log