Index: trunk/phase3/maintenance/archives/patch-categorylinks-better-collation.sql |
— | — | @@ -8,9 +8,9 @@ |
9 | 9 | -- to work for MySQL for now, without table prefixes, possibly other random |
10 | 10 | -- limitations. |
11 | 11 | ALTER TABLE categorylinks |
12 | | - ADD COLUMN cl_raw_sortkey varchar(255) binary NULL default NULL, |
| 12 | + ADD COLUMN cl_sortkey_prefix varchar(255) binary NOT NULL default '', |
13 | 13 | ADD COLUMN cl_collation tinyint NOT NULL default 0, |
14 | | - ADD COLUMN cl_type ENUM('page', 'subcat', 'file') NOT NULL, |
| 14 | + ADD COLUMN cl_type ENUM('page', 'subcat', 'file') NOT NULL default 'page', |
15 | 15 | ADD INDEX (cl_collation), |
16 | 16 | DROP INDEX cl_sortkey, |
17 | 17 | ADD INDEX cl_sortkey (cl_to, cl_type, cl_sortkey, cl_from); |
Index: trunk/phase3/maintenance/updateCollation.php |
— | — | @@ -1,6 +1,6 @@ |
2 | 2 | <?php |
3 | 3 | /** |
4 | | - * @file |
| 4 | + * @file |
5 | 5 | * @ingroup Maintenance |
6 | 6 | * @author Aryeh Gregor (Simetrical) |
7 | 7 | */ |
— | — | @@ -17,10 +17,10 @@ |
18 | 18 | |
19 | 19 | global $wgCollationVersion; |
20 | 20 | $this->mDescription = <<<TEXT |
21 | | -This script will find all rows in the categorylinks table whose collation is |
22 | | -out-of-date (cl_collation < $wgCollationVersion) and repopulate cl_sortkey |
23 | | -using cl_raw_sortkey. If everything's collation is up-to-date, it will do |
24 | | -nothing. |
| 21 | +This script will find all rows in the categorylinks table whose collation is |
| 22 | +out-of-date (cl_collation != $wgCollationVersion) and repopulate cl_sortkey |
| 23 | +using the page title and cl_sortkey_prefix. If everything's collation is |
| 24 | +up-to-date, it will do nothing. |
25 | 25 | TEXT; |
26 | 26 | |
27 | 27 | #$this->addOption( 'force', 'Run on all rows, even if the collation is supposed to be up-to-date.' ); |
— | — | @@ -32,8 +32,8 @@ |
33 | 33 | $dbw = wfGetDB( DB_MASTER ); |
34 | 34 | $count = $dbw->estimateRowCount( |
35 | 35 | 'categorylinks', |
36 | | - array( 'cl_from', 'cl_to', 'cl_raw_sortkey' ), |
37 | | - 'cl_collation < ' . $dbw->addQuotes( $wgCollationVersion ), |
| 36 | + array( 'cl_from', 'cl_to', 'cl_sortkey_prefix' ), |
| 37 | + 'cl_collation != ' . $dbw->addQuotes( $wgCollationVersion ), |
38 | 38 | __METHOD__ |
39 | 39 | ); |
40 | 40 | |
— | — | @@ -42,21 +42,50 @@ |
43 | 43 | $count = 0; |
44 | 44 | do { |
45 | 45 | $res = $dbw->select( |
46 | | - 'categorylinks', |
47 | | - array( 'cl_from', 'cl_to', 'cl_raw_sortkey' ), |
48 | | - 'cl_collation < ' . $dbw->addQuotes( $wgCollationVersion ), |
| 46 | + array( 'categorylinks', 'page' ), |
| 47 | + array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', |
| 48 | + 'cl_sortkey', 'page_namespace', 'page_title' |
| 49 | + ), |
| 50 | + array( |
| 51 | + 'cl_collation != ' . $dbw->addQuotes( $wgCollationVersion ), |
| 52 | + 'cl_from = page_id' |
| 53 | + ), |
49 | 54 | __METHOD__, |
50 | 55 | array( 'LIMIT' => self::BATCH_SIZE ) |
51 | 56 | ); |
52 | 57 | |
53 | 58 | $dbw->begin(); |
54 | 59 | foreach ( $res as $row ) { |
55 | | - # TODO: Handle the case where cl_raw_sortkey is null. |
| 60 | + $title = Title::newFromRow( $row ); |
| 61 | + $rawSortkey = $title->getCategorySortkey(); |
| 62 | + if ( $row->cl_collation == 0 ) { |
| 63 | + # This is an old-style row, so the sortkey needs to be |
| 64 | + # converted. |
| 65 | + if ( $row->cl_sortkey == $rawSortkey ) { |
| 66 | + $prefix = ''; |
| 67 | + } else { |
| 68 | + # Custom sortkey, use it as a prefix |
| 69 | + $prefix = $row->cl_sortkey; |
| 70 | + } |
| 71 | + } else { |
| 72 | + $prefix = $row->cl_sortkey_prefix; |
| 73 | + } |
| 74 | + # cl_type will be wrong for lots of pages if cl_collation is 0, |
| 75 | + # so let's update it while we're here. |
| 76 | + if ( $title->getNamespace() == NS_CATEGORY ) { |
| 77 | + $type = 'subcat'; |
| 78 | + } elseif ( $title->getNamespace() == NS_FILE ) { |
| 79 | + $type = 'file'; |
| 80 | + } else { |
| 81 | + $type = 'page'; |
| 82 | + } |
56 | 83 | $dbw->update( |
57 | 84 | 'categorylinks', |
58 | 85 | array( |
59 | | - 'cl_sortkey' => $wgContLang->convertToSortkey( $row->cl_raw_sortkey ), |
60 | | - 'cl_collation' => $wgCollationVersion |
| 86 | + 'cl_sortkey' => $wgContLang->convertToSortkey( $prefix . $rawSortkey ), |
| 87 | + 'cl_sortkey_prefix' => $prefix, |
| 88 | + 'cl_collation' => $wgCollationVersion, |
| 89 | + 'cl_type' => $type, |
61 | 90 | ), |
62 | 91 | array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ), |
63 | 92 | __METHOD__ |
— | — | @@ -64,9 +93,9 @@ |
65 | 94 | } |
66 | 95 | $dbw->commit(); |
67 | 96 | |
68 | | - $count += self::BATCH_SIZE; |
| 97 | + $count += $res->numRows(); |
69 | 98 | $this->output( "$count done.\n" ); |
70 | | - } while ( $res->numRows() >= self::BATCH_SIZE ); |
| 99 | + } while ( $res->numRows() == self::BATCH_SIZE ); |
71 | 100 | } |
72 | 101 | } |
73 | 102 | |
Index: trunk/phase3/includes/CategoryPage.php |
— | — | @@ -270,7 +270,7 @@ |
271 | 271 | foreach ( array( 'page', 'subcat', 'file' ) as $type ) { |
272 | 272 | $res = $dbr->select( |
273 | 273 | $tables, |
274 | | - array_merge( $fields, array( 'cl_raw_sortkey' ) ), |
| 274 | + array_merge( $fields, array( 'cl_sortkey_prefix' ) ), |
275 | 275 | $conds + array( 'cl_type' => $type ) + ( $type == 'page' ? array( $pageCondition ) : array() ), |
276 | 276 | __METHOD__, |
277 | 277 | $opts + ( $type == 'page' ? array( 'LIMIT' => $this->limit + 1 ) : array() ), |
— | — | @@ -286,14 +286,15 @@ |
287 | 287 | } |
288 | 288 | |
289 | 289 | $title = Title::newFromRow( $row ); |
| 290 | + $rawSortkey = $row->cl_sortkey_prefix . $title->getCategorySortkey(); |
290 | 291 | |
291 | 292 | if ( $title->getNamespace() == NS_CATEGORY ) { |
292 | 293 | $cat = Category::newFromRow( $row, $title ); |
293 | | - $this->addSubcategoryObject( $cat, $row->cl_raw_sortkey, $row->page_len ); |
| 294 | + $this->addSubcategoryObject( $cat, $rawSortkey, $row->page_len ); |
294 | 295 | } elseif ( $this->showGallery && $title->getNamespace() == NS_FILE ) { |
295 | | - $this->addImage( $title, $row->cl_raw_sortkey, $row->page_len, $row->page_is_redirect ); |
| 296 | + $this->addImage( $title, $rawSortkey, $row->page_len, $row->page_is_redirect ); |
296 | 297 | } else { |
297 | | - $this->addPage( $title, $row->cl_raw_sortkey, $row->page_len, $row->page_is_redirect ); |
| 298 | + $this->addPage( $title, $rawSortkey, $row->page_len, $row->page_is_redirect ); |
298 | 299 | } |
299 | 300 | } |
300 | 301 | } |
Index: trunk/phase3/includes/parser/Parser.php |
— | — | @@ -5056,12 +5056,8 @@ |
5057 | 5057 | global $wgCategoryPrefixedDefaultSortkey; |
5058 | 5058 | if ( $this->mDefaultSort !== false ) { |
5059 | 5059 | return $this->mDefaultSort; |
5060 | | - } elseif ( $this->mTitle->getNamespace() == NS_CATEGORY || |
5061 | | - !$wgCategoryPrefixedDefaultSortkey ) |
5062 | | - { |
5063 | | - return $this->mTitle->getText(); |
5064 | 5060 | } else { |
5065 | | - return $this->mTitle->getPrefixedText(); |
| 5061 | + return $this->mTitle->getCategorySortkey(); |
5066 | 5062 | } |
5067 | 5063 | } |
5068 | 5064 | |
Index: trunk/phase3/includes/LinksUpdate.php |
— | — | @@ -441,14 +441,31 @@ |
442 | 442 | } else { |
443 | 443 | $type = 'page'; |
444 | 444 | } |
445 | | - $convertedSortkey = $wgContLang->convertToSortkey( $sortkey ); |
446 | | - # TODO: Set $sortkey to null if it's redundant |
| 445 | + |
| 446 | + # TODO: This is kind of wrong, because someone might set a sort |
| 447 | + # key prefix that's the same as the default sortkey for the |
| 448 | + # title. This should be fixed by refactoring code to replace |
| 449 | + # $sortkey in this array by a prefix, but it's basically harmless |
| 450 | + # (Title::moveTo() has had the same issue for a long time). |
| 451 | + if ( $this->mTitle->getCategorySortkey() == $sortkey ) { |
| 452 | + $prefix = ''; |
| 453 | + $sortkey = $wgContLang->convertToSortkey( $sortkey ); |
| 454 | + } else { |
| 455 | + # Treat custom sortkeys as a prefix, so that if multiple |
| 456 | + # things are forced to sort as '*' or something, they'll |
| 457 | + # sort properly in the category rather than in page_id |
| 458 | + # order or such. |
| 459 | + $prefix = $sortkey; |
| 460 | + $sortkey = $wgContLang->convertToSortkey( |
| 461 | + $prefix . $this->mTitle->getCategorySortkey() ); |
| 462 | + } |
| 463 | + |
447 | 464 | $arr[] = array( |
448 | 465 | 'cl_from' => $this->mId, |
449 | 466 | 'cl_to' => $name, |
450 | | - 'cl_sortkey' => $convertedSortkey, |
| 467 | + 'cl_sortkey' => $sortkey, |
451 | 468 | 'cl_timestamp' => $this->mDb->timestamp(), |
452 | | - 'cl_raw_sortkey' => $sortkey, |
| 469 | + 'cl_sortkey_prefix' => $prefix, |
453 | 470 | 'cl_collation' => $wgCollationVersion, |
454 | 471 | 'cl_type' => $type, |
455 | 472 | ); |
Index: trunk/phase3/includes/Title.php |
— | — | @@ -4137,4 +4137,22 @@ |
4138 | 4138 | |
4139 | 4139 | return $types; |
4140 | 4140 | } |
| 4141 | + |
| 4142 | + /** |
| 4143 | + * Returns what the default sort key for categories would be, if |
| 4144 | + * {{defaultsort:}} isn't used. This is the same as getText() for |
| 4145 | + * categories, and for everything if $wgCategoryPrefixedDefaultSortkey is |
| 4146 | + * false; otherwise it's the same as getPrefixedText(). |
| 4147 | + * |
| 4148 | + * @return string |
| 4149 | + */ |
| 4150 | + public function getCategorySortkey() { |
| 4151 | + global $wgCategoryPrefixedDefaultSortkey; |
| 4152 | + if ( $this->getNamespace() == NS_CATEGORY |
| 4153 | + || !$wgCategoryPrefixedDefaultSortkey ) { |
| 4154 | + return $this->getText(); |
| 4155 | + } else { |
| 4156 | + return $this->getPrefixedText(); |
| 4157 | + } |
| 4158 | + } |
4141 | 4159 | } |
Index: trunk/phase3/includes/DefaultSettings.php |
— | — | @@ -4476,8 +4476,8 @@ |
4477 | 4477 | /** |
4478 | 4478 | * A version indicator for collations that will be stored in cl_collation for |
4479 | 4479 | * all new rows. Used when the collation algorithm changes: a script checks |
4480 | | - * for all rows where cl_collation < $wgCollationVersion and regenerates |
4481 | | - * cl_sortkey based on cl_raw_sortkey. |
| 4480 | + * for all rows where cl_collation != $wgCollationVersion and regenerates |
| 4481 | + * cl_sortkey based on the page name and cl_sortkey_prefix. |
4482 | 4482 | */ |
4483 | 4483 | $wgCollationVersion = 1; |
4484 | 4484 | |
Index: trunk/phase3/languages/Language.php |
— | — | @@ -2939,7 +2939,9 @@ |
2940 | 2940 | * Given a string, convert it to a (hopefully short) key that can be used |
2941 | 2941 | * for efficient sorting. A binary sort according to the sortkeys |
2942 | 2942 | * corresponds to a logical sort of the corresponding strings. Applying |
2943 | | - * this to cl_raw_sortkey produces cl_sortkey. |
| 2943 | + * this to cl_sortkey_prefix concatenated with the page title (possibly |
| 2944 | + * with namespace prefix, depending on $wgCategoryPrefixedDefaultSortkey) |
| 2945 | + * gives you cl_sortkey. |
2944 | 2946 | * |
2945 | 2947 | * @param string $string UTF-8 string |
2946 | 2948 | * @return string Binary sortkey |