r69810 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r69809‎ | r69810 | r69811 >
Date:19:52, 23 July 2010
Author:simetrical
Status:resolved
Tags:
Comment:
Initial commit for category collation framework

Hidden behind $wgExperimentalCategorySort until it's reasonably
complete. If that's false, no behavior should change (but I didn't test
carefully, so poke me if there's a bug). See DefaultSettings.php for
documentation on setting it to true. Currently you should not do this
except if you're working on the feature, since functionality is not
close to reasonable yet and will change rapidly.

Bug 1211 is already fixed with this commit for me. However, many other
things still need to be done, so this is all very much a
proof-of-concept.
Modified paths:
  • /trunk/phase3/includes/CategoryPage.php (modified) (history)
  • /trunk/phase3/includes/DefaultSettings.php (modified) (history)
  • /trunk/phase3/includes/LinksUpdate.php (modified) (history)
  • /trunk/phase3/languages/Language.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/CategoryPage.php
@@ -226,6 +226,8 @@
227227 }
228228
229229 function doCategoryQuery() {
 230+ global $wgExperimentalCategorySort;
 231+
230232 $dbr = wfGetDB( DB_SLAVE, 'category' );
231233 if ( $this->from != '' ) {
232234 $pageCondition = 'cl_sortkey >= ' . $dbr->addQuotes( $this->from );
@@ -238,17 +240,23 @@
239241 $this->flip = false;
240242 }
241243
 244+ $tables = array( 'page', 'categorylinks', 'category' );
 245+ $fields = array( 'page_title', 'page_namespace', 'page_len',
 246+ 'page_is_redirect', 'cl_sortkey', 'cat_id', 'cat_title',
 247+ 'cat_subcats', 'cat_pages', 'cat_files' );
 248+ $conds = array( $pageCondition, 'cl_to' => $this->title->getDBkey() );
 249+ $opts = array( 'ORDER BY' => $this->flip ? 'cl_sortkey DESC' :
 250+ 'cl_sortkey', 'USE INDEX' => array( 'categorylinks' => 'cl_sortkey' ) );
 251+ $joins = array( 'categorylinks' => array( 'INNER JOIN', 'cl_from = page_id' ),
 252+ 'category' => array( 'LEFT JOIN', 'cat_title = page_title AND page_namespace = ' . NS_CATEGORY ) );
 253+
242254 $res = $dbr->select(
243 - array( 'page', 'categorylinks', 'category' ),
244 - array( 'page_title', 'page_namespace', 'page_len', 'page_is_redirect', 'cl_sortkey',
245 - 'cat_id', 'cat_title', 'cat_subcats', 'cat_pages', 'cat_files' ),
246 - array( $pageCondition, 'cl_to' => $this->title->getDBkey() ),
 255+ $tables,
 256+ $fields,
 257+ $conds + ( $wgExperimentalCategorySort ? array( 'cl_type' => 'page' ) : array() ),
247258 __METHOD__,
248 - array( 'ORDER BY' => $this->flip ? 'cl_sortkey DESC' : 'cl_sortkey',
249 - 'USE INDEX' => array( 'categorylinks' => 'cl_sortkey' ),
250 - 'LIMIT' => $this->limit + 1 ),
251 - array( 'categorylinks' => array( 'INNER JOIN', 'cl_from = page_id' ),
252 - 'category' => array( 'LEFT JOIN', 'cat_title = page_title AND page_namespace = ' . NS_CATEGORY ) )
 259+ $opts + array( 'LIMIT' => $this->limit + 1 ),
 260+ $joins
253261 );
254262
255263 $count = 0;
@@ -273,6 +281,45 @@
274282 $this->addPage( $title, $x->cl_sortkey, $x->page_len, $x->page_is_redirect );
275283 }
276284 }
 285+
 286+ if ( $wgExperimentalCategorySort ) {
 287+ # Now add all subcategories and files. TODO: rewrite to be sane
 288+ # (this is basically a proof-of-concept, e.g., no pagination here).
 289+ $subcatsRes = $dbr->select(
 290+ $tables, $fields,
 291+ $conds + array( 'cl_type' => 'subcat' ),
 292+ __METHOD__, $opts, $joins
 293+ );
 294+
 295+ foreach ( $subcatsRes as $row ) {
 296+ $title = Title::newFromRow( $row );
 297+
 298+ if ( $title->getNamespace() == NS_CATEGORY ) {
 299+ $cat = Category::newFromRow( $row, $title );
 300+ $this->addSubcategoryObject( $cat, $row->cl_sortkey, $row->page_len );
 301+ } else {
 302+ # Will handle this sanely in final code
 303+ throw new MWException( 'Debug: cl_type = subcat but not category' );
 304+ }
 305+ }
 306+
 307+ $filesRes = $dbr->select(
 308+ $tables, $fields,
 309+ $conds + array( 'cl_type' => 'file' ),
 310+ __METHOD__, $opts, $joins
 311+ );
 312+
 313+ foreach ( $filesRes as $row ) {
 314+ $title = Title::newFromRow( $row );
 315+
 316+ if ( $this->showGallery && $title->getNamespace() == NS_FILE ) {
 317+ $this->addImage( $title, $row->cl_sortkey, $row->page_len, $row->page_is_redirect );
 318+ } else {
 319+ # More temporary debugging
 320+ throw new MWException( 'Debug: cl_type = file but not file' );
 321+ }
 322+ }
 323+ }
277324 }
278325
279326 function getCategoryTop() {
Index: trunk/phase3/includes/LinksUpdate.php
@@ -426,18 +426,40 @@
427427 * @private
428428 */
429429 function getCategoryInsertions( $existing = array() ) {
430 - global $wgContLang;
 430+ global $wgContLang, $wgExperimentalCategorySort, $wgCollationVersion;
431431 $diffs = array_diff_assoc( $this->mCategories, $existing );
432432 $arr = array();
433433 foreach ( $diffs as $name => $sortkey ) {
434434 $nt = Title::makeTitleSafe( NS_CATEGORY, $name );
435435 $wgContLang->findVariantLink( $name, $nt, true );
436 - $arr[] = array(
437 - 'cl_from' => $this->mId,
438 - 'cl_to' => $name,
439 - 'cl_sortkey' => $sortkey,
440 - 'cl_timestamp' => $this->mDb->timestamp()
441 - );
 436+
 437+ if ( $wgExperimentalCategorySort ) {
 438+ if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
 439+ $type = 'subcat';
 440+ } elseif ( $this->mTitle->getNamespace() == NS_FILE ) {
 441+ $type = 'file';
 442+ } else {
 443+ $type = 'page';
 444+ }
 445+ $convertedSortkey = $wgContLang->convertToSortkey( $sortkey );
 446+ # TODO: Set $sortkey to null if it's redundant
 447+ $arr[] = array(
 448+ 'cl_from' => $this->mId,
 449+ 'cl_to' => $name,
 450+ 'cl_sortkey' => $convertedSortkey,
 451+ 'cl_timestamp' => $this->mDb->timestamp(),
 452+ 'cl_raw_sortkey' => $sortkey,
 453+ 'cl_collation' => $wgCollationVersion,
 454+ 'cl_type' => $type,
 455+ );
 456+ } else {
 457+ $arr[] = array(
 458+ 'cl_from' => $this->mId,
 459+ 'cl_to' => $name,
 460+ 'cl_sortkey' => $sortkey,
 461+ 'cl_timestamp' => $this->mDb->timestamp()
 462+ );
 463+ }
442464 }
443465 return $arr;
444466 }
Index: trunk/phase3/includes/DefaultSettings.php
@@ -4458,6 +4458,24 @@
44594459 */
44604460 $wgCategoryPrefixedDefaultSortkey = true;
44614461
 4462+/**
 4463+ * Enable experimental support for non-braindead collation on category pages.
 4464+ * For this to work, you need to alter your categorylinks table by applying
 4465+ * maintenance/archives/patch-categorylinks-better-collation.sql, then keep
 4466+ * up-to-date with changes that are made to that file (they won't be
 4467+ * automatically applied). You should also set $wgUseDumbLinkUpdate = true and
 4468+ * run maintenance/refreshLinks.php.
 4469+ */
 4470+$wgExperimentalCategorySort = false;
 4471+
 4472+/**
 4473+ * A version indicator for collations that will be stored in cl_collation for
 4474+ * all new rows. Used when the collation algorithm changes: a script checks
 4475+ * for all rows where cl_collation < $wgCollationVersion and regenerates
 4476+ * cl_sortkey based on cl_raw_sortkey.
 4477+ */
 4478+$wgCollationVersion = 0;
 4479+
44624480 /** @} */ # End categories }
44634481
44644482 /*************************************************************************//**
Index: trunk/phase3/languages/Language.php
@@ -2934,4 +2934,58 @@
29352935 function getConvRuleTitle() {
29362936 return $this->mConverter->getConvRuleTitle();
29372937 }
 2938+
 2939+ /**
 2940+ * Given a string, convert it to a (hopefully short) key that can be used
 2941+ * for efficient sorting. A binary sort according to the sortkeys
 2942+ * corresponds to a logical sort of the corresponding strings. Applying
 2943+ * this to cl_raw_sortkey produces cl_sortkey.
 2944+ *
 2945+ * @param string $string UTF-8 string
 2946+ * @return string Binary sortkey
 2947+ */
 2948+ public function convertToSortkey( $string ) {
 2949+ # Stub function for now
 2950+ return $string;
 2951+ }
 2952+
 2953+ /**
 2954+ * Does it make sense for lists to be split up into sections based on their
 2955+ * first letter? Logogram-based scripts probably want to return false.
 2956+ *
 2957+ * TODO: Use this in CategoryPage.php.
 2958+ *
 2959+ * @return boolean
 2960+ */
 2961+ public function usesFirstLettersInLists() {
 2962+ return true;
 2963+ }
 2964+
 2965+ /**
 2966+ * Given a string, return the logical "first letter" to be used for
 2967+ * grouping on category pages and so on. This has to be coordinated
 2968+ * carefully with convertToSortkey(), or else the sorted list might jump
 2969+ * back and forth between the same "initial letters" or other pathological
 2970+ * behavior. For instance, if you just return the first character, but "a"
 2971+ * sorts the same as "A" based on convertToSortkey(), then you might get a
 2972+ * list like
 2973+ *
 2974+ * == A ==
 2975+ * * [[Aardvark]]
 2976+ *
 2977+ * == a ==
 2978+ * * [[antelope]]
 2979+ *
 2980+ * == A ==
 2981+ * * [[Ape]]
 2982+ *
 2983+ * etc., assuming for the sake of argument that $wgCapitalLinks is false.
 2984+ * Obviously, this is ignored if usesFirstLettersInLists() is false.
 2985+ *
 2986+ * @param string $string UTF-8 string
 2987+ * @return string UTF-8 string corresponding to the first letter of input
 2988+ */
 2989+ public function firstLetterForLists( $string ) {
 2990+ return mb_substr( $string, 0, 1 );
 2991+ }
29382992 }

Follow-up revisions

RevisionCommit summaryAuthorDate
r69817Commit forgotten file from r69810simetrical20:58, 23 July 2010
r69852Followup to r69817, r69810 add PG patch file, defaults to cl_type of PAGE.overlordq21:44, 24 July 2010
r72456Get rid of Language::usesFirstLettersInLists()...simetrical20:43, 5 September 2010

Status & tagging log