r45721 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r45720‎ | r45721 | r45722 >
Date:23:58, 13 January 2009
Author:valhallasw
Status:ok (Comments)
Tags:
Comment:
Fixes for r45514 and r45516:

* Removed deprecated constructs
* Replaced SQL queries with their functional equivalents
** Removed deleteBatch function: equivalent to $dbw->delete()
* Allow slave servers to catch up before deleting more rows
* Fixed to use a new, unbuffered, slave database connection
* This one should actually work
Modified paths:
  • /trunk/phase3/maintenance/refreshLinks.inc (modified) (history)
  • /trunk/phase3/maintenance/refreshLinks.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/refreshLinks.inc
@@ -146,12 +146,13 @@
147147 * @author Merlijn van Deen <valhallasw@arctus.nl>
148148 */
149149 function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
150 - $fname = 'deleteLinksFromNonexistent';
151150 wfWaitForSlaves( $maxLag );
152151
153152 $dbw = wfGetDB( DB_MASTER );
154 - $dbr = wfGetDB( DB_SLAVE );
155 - $dbr->bufferResults(false);
 153+
 154+ $lb = wfGetLBFactory()->newMainLB();
 155+ $dbr = $lb->getConnection( DB_SLAVE );
 156+ $dbr->bufferResults( false );
156157
157158 $linksTables = array( // table name => page_id field
158159 'pagelinks' => 'pl_from',
@@ -161,65 +162,41 @@
162163 'externallinks' => 'el_from',
163164 );
164165
165 - $readPage = $dbr->tableName( 'page' );
166 -
167166 foreach ( $linksTables as $table => $field ) {
168 - $readLinks = $dbr->tableName( $table );
169 -
170167 print "Retrieving illegal entries from $table... ";
171168
172 - $sql = "SELECT DISTINCT( $field ) FROM $readLinks LEFT JOIN $readPage ON $field=page_id WHERE page_id IS NULL;";
173 - $results = $dbr->query( $sql, $fname . ':' . $readLinks );
 169+ // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL;
 170+ $results = $dbr->select( array( $table, 'page' ),
 171+ $field,
 172+ array('page_id' => null ),
 173+ __METHOD__,
 174+ 'DISTINCT',
 175+ array( 'page' => array( 'LEFT JOIN', "$field=page_id"))
 176+ );
174177
175 - print $results->numRows() . " illegal " . $field. "s. ";
 178+ $counter = 0;
 179+ $list = array();
 180+ print "0..";
176181
177 - if ( $results->numRows() > 0 ) {
178 - $counter = 0;
179 - $list = array();
180 - print "Removing illegal links: 1..";
181 -
182 - foreach( $results as $row ) {
183 - $counter++;
184 - $list[] = $row->$field;
185 - if ( ( $counter % $batchSize ) == 0 ) {
186 - print $counter . "..";
187 - deleteBatch($dbw, $table, $field, $list);
188 - $list = array();
189 - }
 182+ foreach( $results as $row ) {
 183+ $counter++;
 184+ $list[] = $row->$field;
 185+ if ( ( $counter % $batchSize ) == 0 ) {
 186+ wfWaitForSlaves(5);
 187+ $dbw->delete( $table, array( $field => $list ), __METHOD__ );
 188+
 189+ print $counter . "..";
 190+ $list = array();
190191 }
191 - print $counter;
192 - deleteBatch($dbw, $table, $field, $list);
193192 }
194193
195 - print "\n";
196 - }
197 -}
198 -
199 -/* Deletes a batch of items from a table.
200 - * Runs the query: DELETE FROM <$table> WHERE <$field> IN (<$list>)
201 - *
202 - * @param $dbw Database Database object to run the DELETE query on
203 - * @param $table table to work on; will be converted via $dbw->tableName.
204 - * @param $field column to search in
205 - * @param $list values to remove. Array with SQL-safe (!) values.
206 - *
207 - * @author Merlijn van Deen <valhallasw@arctus.nl>
208 - */
209 -function deleteBatch($dbw, $table, $field, $list) {
210 - if (count($list) == 0) return;
211 -
212 - $masterLinks = $dbw->tableName( $table );
213 - $fname = "deleteBatch:masterLinks";
214 -
215 - if ( !$dbw->ping() ) {
216 - print "\nDB disconnected, reconnecting...";
217 - while ( !$dbw->ping() ) {
218 - print ".";
219 - sleep(10);
 194+ print $counter;
 195+ if (count($list) > 0) {
 196+ $dbw->delete( $table, array( $field => $list ), __METHOD__ );
220197 }
 198+
221199 print "\n";
222200 }
223 -
224 - $sql = "DELETE FROM $masterLinks WHERE $field IN (" . join("," , $list) . ");";
225 - $dbw->query($sql, $fname);
 201+
 202+ $lb->closeAll();
226203 }
Index: trunk/phase3/maintenance/refreshLinks.php
@@ -45,6 +45,10 @@
4646 refreshLinks( $start, $options['new-only'], $options['m'], $options['e'], $options['redirects-only'], $options['old-redirects-only'] );
4747 }
4848
 49+if ( !isset( $options['batch-size'] ) ) {
 50+ $options['batch-size'] = 100;
 51+}
 52+
4953 deleteLinksFromNonexistent($options['m'], $options['batch-size']);
5054
5155 if ( $options['globals'] ) {

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r45514Recommit of r45431 with these changes:...valhallasw19:51, 7 January 2009
r45516* Added batch-size parameter to optionsWithArgs...valhallasw20:34, 7 January 2009

Comments

#Comment by Brion VIBBER (talk | contribs)   23:59, 14 January 2009

It does actually work! Yay! :D

Status & tagging log