Index: trunk/phase3/includes/CategoryPage.php |
— | — | @@ -226,6 +226,8 @@ |
227 | 227 | } |
228 | 228 | |
229 | 229 | function doCategoryQuery() { |
| 230 | + global $wgExperimentalCategorySort; |
| 231 | + |
230 | 232 | $dbr = wfGetDB( DB_SLAVE, 'category' ); |
231 | 233 | if ( $this->from != '' ) { |
232 | 234 | $pageCondition = 'cl_sortkey >= ' . $dbr->addQuotes( $this->from ); |
— | — | @@ -238,17 +240,23 @@ |
239 | 241 | $this->flip = false; |
240 | 242 | } |
241 | 243 | |
| 244 | + $tables = array( 'page', 'categorylinks', 'category' ); |
| 245 | + $fields = array( 'page_title', 'page_namespace', 'page_len', |
| 246 | + 'page_is_redirect', 'cl_sortkey', 'cat_id', 'cat_title', |
| 247 | + 'cat_subcats', 'cat_pages', 'cat_files' ); |
| 248 | + $conds = array( $pageCondition, 'cl_to' => $this->title->getDBkey() ); |
| 249 | + $opts = array( 'ORDER BY' => $this->flip ? 'cl_sortkey DESC' : |
| 250 | + 'cl_sortkey', 'USE INDEX' => array( 'categorylinks' => 'cl_sortkey' ) ); |
| 251 | + $joins = array( 'categorylinks' => array( 'INNER JOIN', 'cl_from = page_id' ), |
| 252 | + 'category' => array( 'LEFT JOIN', 'cat_title = page_title AND page_namespace = ' . NS_CATEGORY ) ); |
| 253 | + |
242 | 254 | $res = $dbr->select( |
243 | | - array( 'page', 'categorylinks', 'category' ), |
244 | | - array( 'page_title', 'page_namespace', 'page_len', 'page_is_redirect', 'cl_sortkey', |
245 | | - 'cat_id', 'cat_title', 'cat_subcats', 'cat_pages', 'cat_files' ), |
246 | | - array( $pageCondition, 'cl_to' => $this->title->getDBkey() ), |
| 255 | + $tables, |
| 256 | + $fields, |
| 257 | + $conds + ( $wgExperimentalCategorySort ? array( 'cl_type' => 'page' ) : array() ), |
247 | 258 | __METHOD__, |
248 | | - array( 'ORDER BY' => $this->flip ? 'cl_sortkey DESC' : 'cl_sortkey', |
249 | | - 'USE INDEX' => array( 'categorylinks' => 'cl_sortkey' ), |
250 | | - 'LIMIT' => $this->limit + 1 ), |
251 | | - array( 'categorylinks' => array( 'INNER JOIN', 'cl_from = page_id' ), |
252 | | - 'category' => array( 'LEFT JOIN', 'cat_title = page_title AND page_namespace = ' . NS_CATEGORY ) ) |
| 259 | + $opts + array( 'LIMIT' => $this->limit + 1 ), |
| 260 | + $joins |
253 | 261 | ); |
254 | 262 | |
255 | 263 | $count = 0; |
— | — | @@ -273,6 +281,45 @@ |
274 | 282 | $this->addPage( $title, $x->cl_sortkey, $x->page_len, $x->page_is_redirect ); |
275 | 283 | } |
276 | 284 | } |
| 285 | + |
| 286 | + if ( $wgExperimentalCategorySort ) { |
| 287 | + # Now add all subcategories and files. TODO: rewrite to be sane |
| 288 | + # (this is basically a proof-of-concept, e.g., no pagination here). |
| 289 | + $subcatsRes = $dbr->select( |
| 290 | + $tables, $fields, |
| 291 | + $conds + array( 'cl_type' => 'subcat' ), |
| 292 | + __METHOD__, $opts, $joins |
| 293 | + ); |
| 294 | + |
| 295 | + foreach ( $subcatsRes as $row ) { |
| 296 | + $title = Title::newFromRow( $row ); |
| 297 | + |
| 298 | + if ( $title->getNamespace() == NS_CATEGORY ) { |
| 299 | + $cat = Category::newFromRow( $row, $title ); |
| 300 | + $this->addSubcategoryObject( $cat, $row->cl_sortkey, $row->page_len ); |
| 301 | + } else { |
| 302 | + # Will handle this sanely in final code |
| 303 | + throw new MWException( 'Debug: cl_type = subcat but not category' ); |
| 304 | + } |
| 305 | + } |
| 306 | + |
| 307 | + $filesRes = $dbr->select( |
| 308 | + $tables, $fields, |
| 309 | + $conds + array( 'cl_type' => 'file' ), |
| 310 | + __METHOD__, $opts, $joins |
| 311 | + ); |
| 312 | + |
| 313 | + foreach ( $filesRes as $row ) { |
| 314 | + $title = Title::newFromRow( $row ); |
| 315 | + |
| 316 | + if ( $this->showGallery && $title->getNamespace() == NS_FILE ) { |
| 317 | + $this->addImage( $title, $row->cl_sortkey, $row->page_len, $row->page_is_redirect ); |
| 318 | + } else { |
| 319 | + # More temporary debugging |
| 320 | + throw new MWException( 'Debug: cl_type = file but not file' ); |
| 321 | + } |
| 322 | + } |
| 323 | + } |
277 | 324 | } |
278 | 325 | |
279 | 326 | function getCategoryTop() { |
Index: trunk/phase3/includes/LinksUpdate.php |
— | — | @@ -426,18 +426,40 @@ |
427 | 427 | * @private |
428 | 428 | */ |
429 | 429 | function getCategoryInsertions( $existing = array() ) { |
430 | | - global $wgContLang; |
| 430 | + global $wgContLang, $wgExperimentalCategorySort, $wgCollationVersion; |
431 | 431 | $diffs = array_diff_assoc( $this->mCategories, $existing ); |
432 | 432 | $arr = array(); |
433 | 433 | foreach ( $diffs as $name => $sortkey ) { |
434 | 434 | $nt = Title::makeTitleSafe( NS_CATEGORY, $name ); |
435 | 435 | $wgContLang->findVariantLink( $name, $nt, true ); |
436 | | - $arr[] = array( |
437 | | - 'cl_from' => $this->mId, |
438 | | - 'cl_to' => $name, |
439 | | - 'cl_sortkey' => $sortkey, |
440 | | - 'cl_timestamp' => $this->mDb->timestamp() |
441 | | - ); |
| 436 | + |
| 437 | + if ( $wgExperimentalCategorySort ) { |
| 438 | + if ( $this->mTitle->getNamespace() == NS_CATEGORY ) { |
| 439 | + $type = 'subcat'; |
| 440 | + } elseif ( $this->mTitle->getNamespace() == NS_FILE ) { |
| 441 | + $type = 'file'; |
| 442 | + } else { |
| 443 | + $type = 'page'; |
| 444 | + } |
| 445 | + $convertedSortkey = $wgContLang->convertToSortkey( $sortkey ); |
| 446 | + # TODO: Set $sortkey to null if it's redundant |
| 447 | + $arr[] = array( |
| 448 | + 'cl_from' => $this->mId, |
| 449 | + 'cl_to' => $name, |
| 450 | + 'cl_sortkey' => $convertedSortkey, |
| 451 | + 'cl_timestamp' => $this->mDb->timestamp(), |
| 452 | + 'cl_raw_sortkey' => $sortkey, |
| 453 | + 'cl_collation' => $wgCollationVersion, |
| 454 | + 'cl_type' => $type, |
| 455 | + ); |
| 456 | + } else { |
| 457 | + $arr[] = array( |
| 458 | + 'cl_from' => $this->mId, |
| 459 | + 'cl_to' => $name, |
| 460 | + 'cl_sortkey' => $sortkey, |
| 461 | + 'cl_timestamp' => $this->mDb->timestamp() |
| 462 | + ); |
| 463 | + } |
442 | 464 | } |
443 | 465 | return $arr; |
444 | 466 | } |
Index: trunk/phase3/includes/DefaultSettings.php |
— | — | @@ -4458,6 +4458,24 @@ |
4459 | 4459 | */ |
4460 | 4460 | $wgCategoryPrefixedDefaultSortkey = true; |
4461 | 4461 | |
| 4462 | +/** |
| 4463 | + * Enable experimental support for non-braindead collation on category pages. |
| 4464 | + * For this to work, you need to alter your categorylinks table by applying |
| 4465 | + * maintenance/archives/patch-categorylinks-better-collation.sql, then keep |
| 4466 | + * up-to-date with changes that are made to that file (they won't be |
| 4467 | + * automatically applied). You should also set $wgUseDumbLinkUpdate = true and |
| 4468 | + * run maintenance/refreshLinks.php. |
| 4469 | + */ |
| 4470 | +$wgExperimentalCategorySort = false; |
| 4471 | + |
| 4472 | +/** |
| 4473 | + * A version indicator for collations that will be stored in cl_collation for |
| 4474 | + * all new rows. Used when the collation algorithm changes: a script checks |
| 4475 | + * for all rows where cl_collation < $wgCollationVersion and regenerates |
| 4476 | + * cl_sortkey based on cl_raw_sortkey. |
| 4477 | + */ |
| 4478 | +$wgCollationVersion = 0; |
| 4479 | + |
4462 | 4480 | /** @} */ # End categories } |
4463 | 4481 | |
4464 | 4482 | /*************************************************************************//** |
Index: trunk/phase3/languages/Language.php |
— | — | @@ -2934,4 +2934,58 @@ |
2935 | 2935 | function getConvRuleTitle() { |
2936 | 2936 | return $this->mConverter->getConvRuleTitle(); |
2937 | 2937 | } |
| 2938 | + |
| 2939 | + /** |
| 2940 | + * Given a string, convert it to a (hopefully short) key that can be used |
| 2941 | + * for efficient sorting. A binary sort according to the sortkeys |
| 2942 | + * corresponds to a logical sort of the corresponding strings. Applying |
| 2943 | + * this to cl_raw_sortkey produces cl_sortkey. |
| 2944 | + * |
| 2945 | + * @param string $string UTF-8 string |
| 2946 | + * @return string Binary sortkey |
| 2947 | + */ |
| 2948 | + public function convertToSortkey( $string ) { |
| 2949 | + # Stub function for now |
| 2950 | + return $string; |
| 2951 | + } |
| 2952 | + |
| 2953 | + /** |
| 2954 | + * Does it make sense for lists to be split up into sections based on their |
| 2955 | + * first letter? Logogram-based scripts probably want to return false. |
| 2956 | + * |
| 2957 | + * TODO: Use this in CategoryPage.php. |
| 2958 | + * |
| 2959 | + * @return boolean |
| 2960 | + */ |
| 2961 | + public function usesFirstLettersInLists() { |
| 2962 | + return true; |
| 2963 | + } |
| 2964 | + |
| 2965 | + /** |
| 2966 | + * Given a string, return the logical "first letter" to be used for |
| 2967 | + * grouping on category pages and so on. This has to be coordinated |
| 2968 | + * carefully with convertToSortkey(), or else the sorted list might jump |
| 2969 | + * back and forth between the same "initial letters" or other pathological |
| 2970 | + * behavior. For instance, if you just return the first character, but "a" |
| 2971 | + * sorts the same as "A" based on convertToSortkey(), then you might get a |
| 2972 | + * list like |
| 2973 | + * |
| 2974 | + * == A == |
| 2975 | + * * [[Aardvark]] |
| 2976 | + * |
| 2977 | + * == a == |
| 2978 | + * * [[antelope]] |
| 2979 | + * |
| 2980 | + * == A == |
| 2981 | + * * [[Ape]] |
| 2982 | + * |
| 2983 | + * etc., assuming for the sake of argument that $wgCapitalLinks is false. |
| 2984 | + * Obviously, this is ignored if usesFirstLettersInLists() is false. |
| 2985 | + * |
| 2986 | + * @param string $string UTF-8 string |
| 2987 | + * @return string UTF-8 string corresponding to the first letter of input |
| 2988 | + */ |
| 2989 | + public function firstLetterForLists( $string ) { |
| 2990 | + return mb_substr( $string, 0, 1 ); |
| 2991 | + } |
2938 | 2992 | } |