Index: branches/REL1_4/phase3/maintenance/cleanupDupes.php |
— | — | @@ -35,7 +35,8 @@ |
36 | 36 | function fixDupes( $fixthem = false) { |
37 | 37 | $dbw =& wfGetDB( DB_MASTER ); |
38 | 38 | $cur = $dbw->tableName( 'cur' ); |
39 | | - $dbw->query( "LOCK TABLES $cur WRITE" ); |
| 39 | + $old = $dbw->tableName( 'old' ); |
| 40 | + $dbw->query( "LOCK TABLES $cur WRITE, $old WRITE" ); |
40 | 41 | echo "Checking for duplicate cur table entries... (this may take a while on a large wiki)\n"; |
41 | 42 | $res = $dbw->query( <<<END |
42 | 43 | SELECT cur_namespace,cur_title,count(*) as c,min(cur_id) as id |
— | — | @@ -55,15 +56,56 @@ |
56 | 57 | while( $row = $dbw->fetchObject( $res ) ) { |
57 | 58 | $ns = IntVal( $row->cur_namespace ); |
58 | 59 | $title = $dbw->addQuotes( $row->cur_title ); |
59 | | - $id = IntVal( $row->id ); |
| 60 | + |
| 61 | + # Get the first responding ID; that'll be the one we keep. |
| 62 | + $id = $dbw->selectField( 'cur', 'cur_id', array( |
| 63 | + 'cur_namespace' => $row->cur_namespace, |
| 64 | + 'cur_title' => $row->cur_title ) ); |
| 65 | + |
60 | 66 | echo "$ns:$row->cur_title (canonical ID $id)\n"; |
| 67 | + if( $id != $row->id ) { |
| 68 | + echo " ** minimum ID $row->id; "; |
| 69 | + $timeMin = $dbw->selectField( 'cur', 'cur_timestamp', array( |
| 70 | + 'cur_id' => $row->id ) ); |
| 71 | + $timeFirst = $dbw->selectField( 'cur', 'cur_timestamp', array( |
| 72 | + 'cur_id' => $id ) ); |
| 73 | + if( $timeMin == $timeFirst ) { |
| 74 | + echo "timestamps match at $timeFirst; ok\n"; |
| 75 | + } else { |
| 76 | + echo "timestamps don't match! min: $timeMin, first: $timeFirst; "; |
| 77 | + if( $timeMin > $timeFirst ) { |
| 78 | + $id = $row->id; |
| 79 | + echo "keeping minimum: $id\n"; |
| 80 | + } else { |
| 81 | + echo "keeping first: $id\n"; |
| 82 | + } |
| 83 | + } |
| 84 | + } |
| 85 | + |
61 | 86 | if( $fixthem ) { |
62 | 87 | $dbw->query( <<<END |
| 88 | +INSERT |
| 89 | + INTO $old |
| 90 | + (old_namespace, old_title, old_text, |
| 91 | + old_comment, old_user, old_user_text, |
| 92 | + old_timestamp, old_minor_edit, old_flags, |
| 93 | + inverse_timestamp) |
| 94 | +SELECT cur_namespace, cur_title, cur_text, |
| 95 | + cur_comment, cur_user, cur_user_text, |
| 96 | + cur_timestamp, cur_minor_edit, '', |
| 97 | + inverse_timestamp |
| 98 | + FROM $cur |
| 99 | + WHERE cur_namespace=$ns |
| 100 | + AND cur_title=$title |
| 101 | + AND cur_id != $id |
| 102 | +END |
| 103 | + ); |
| 104 | + $dbw->query( <<<END |
63 | 105 | DELETE |
64 | 106 | FROM $cur |
65 | 107 | WHERE cur_namespace=$ns |
66 | 108 | AND cur_title=$title |
67 | | - AND cur_id>$id |
| 109 | + AND cur_id != $id |
68 | 110 | END |
69 | 111 | ); |
70 | 112 | } |
Index: branches/REL1_4/phase3/RELEASE-NOTES |
— | — | @@ -650,6 +650,8 @@ |
651 | 651 | * (bug 2355) Use content language in image blacklist check |
652 | 652 | * (bug 2368) Avoid fatally breaking PHP 4.1.2 in a debug line |
653 | 653 | * (bug 2384) Fix typo in regex for IP address checking |
| 654 | +* Enhance cleanupDupes.php to save the pruned revisions to old for safety, and |
| 655 | + to try to pick the same one that would be read by the wiki |
654 | 656 | |
655 | 657 | |
656 | 658 | === Caveats === |