r42715 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r42714‎ | r42715 | r42716 >
Date:08:07, 28 October 2008
Author:tstarling
Status:old (Comments)
Tags:
Comment:
In sortable tables:
* (bug 8063) Use the content language digit transform table.
* Don't recognise C-style hexadecimal notation as a number, that feature never actually worked.
* Be more forgiving about things that look like numbers but turn out not to be. Sort them stringwise.
* Optimised ts_resortTable by having it calculate the sort keys at the start, and then using a single trivial comparison function. There are potentially many more comparisons than rows. Observed factor of 2 speedup.
* Use RegExp.test() instead of String.match() when a true/false value is desired, as recommended by the Mozilla reference
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/DefaultSettings.php (modified) (history)
  • /trunk/phase3/includes/Skin.php (modified) (history)
  • /trunk/phase3/skins/common/wikibits.js (modified) (history)

Diff [purge]

Index: trunk/phase3/skins/common/wikibits.js
@@ -503,6 +503,8 @@
504504 var ts_image_none = "sort_none.gif";
505505 var ts_europeandate = wgContentLanguage != "en"; // The non-American-inclined can change to "true"
506506 var ts_alternate_row_colors = false;
 507+var ts_number_transform_table = null;
 508+var ts_number_regex = null;
507509
508510 function sortables_init() {
509511 var idnum = 0;
@@ -575,9 +577,14 @@
576578 table = table.parentNode;
577579 if (!table) return;
578580
579 - // Work out a type for the column
580581 if (table.rows.length <= 1) return;
581582
 583+ // Generate the number transform table if it's not done already
 584+ if (ts_number_transform_table == null) {
 585+ ts_initTransformTable();
 586+ }
 587+
 588+ // Work out a type for the column
582589 // Skip the first row if that's where the headings are
583590 var rowStart = (table.tHead && table.tHead.rows.length > 0 ? 0 : 1);
584591
@@ -590,22 +597,21 @@
591598 }
592599 }
593600
594 - var sortfn = ts_sort_caseinsensitive;
595 - if (itm.match(/^\d\d[\/. -][a-zA-Z]{3}[\/. -]\d\d\d\d$/))
596 - sortfn = ts_sort_date;
597 - else if (itm.match(/^\d\d[\/.-]\d\d[\/.-]\d\d\d\d$/))
598 - sortfn = ts_sort_date;
599 - else if (itm.match(/^\d\d[\/.-]\d\d[\/.-]\d\d$/))
600 - sortfn = ts_sort_date;
 601+ // TODO: bug 8226, localised date formats
 602+ var sortfn = ts_sort_generic;
 603+ var preprocessor = ts_toLowerCase;
 604+ if (/^\d\d[\/. -][a-zA-Z]{3}[\/. -]\d\d\d\d$/.test(itm)) {
 605+ preprocessor = ts_dateToSortKey;
 606+ } else if (/^\d\d[\/.-]\d\d[\/.-]\d\d\d\d$/.test(itm)) {
 607+ preprocessor = ts_dateToSortKey;
 608+ } else if (/^\d\d[\/.-]\d\d[\/.-]\d\d$/.test(itm)) {
 609+ preprocessor = ts_dateToSortKey;
601610 // pound dollar euro yen currency cents
602 - else if (itm.match(/(^[\u00a3$\u20ac\u00a4\u00a5]|\u00a2$)/))
603 - sortfn = ts_sort_currency;
604 - // We allow a trailing percent sign, which we just strip. This works fine
605 - // if percents and regular numbers aren't being mixed.
606 - else if (itm.match(/^[+-]?\d[\d,]*(\.[\d,]*)?([eE][+-]?\d[\d,]*)?\%?$/) ||
607 - itm.match(/^[+-]?\.\d[\d,]*([eE][+-]?\d[\d,]*)?\%?$/) ||
608 - itm.match(/^0x[\da-f]+$/i))
609 - sortfn = ts_sort_numeric;
 611+ } else if (/(^[\u00a3$\u20ac\u00a4\u00a5]|\u00a2$)/.test(itm)) {
 612+ preprocessor = ts_currencyToSortKey;
 613+ } else if (ts_number_regex.test(itm)) {
 614+ preprocessor = ts_parseFloat;
 615+ }
610616
611617 var reverse = (span.getAttribute("sortdir") == 'down');
612618
@@ -614,8 +620,9 @@
615621 var row = table.rows[j];
616622 var keyText = ts_getInnerText(row.cells[column]);
617623 var oldIndex = (reverse ? -j : j);
 624+ var preprocessed = preprocessor( keyText );
618625
619 - newRows[newRows.length] = new Array(row, keyText, oldIndex);
 626+ newRows[newRows.length] = new Array(row, preprocessed, oldIndex);
620627 }
621628
622629 newRows.sort(sortfn);
@@ -654,6 +661,63 @@
655662 }
656663 }
657664
 665+function ts_initTransformTable() {
 666+ if ( typeof wgSeparatorTransformTable == "undefined"
 667+ || ( wgSeparatorTransformTable[0] == '' && wgDigitTransformTable[2] == '' ) )
 668+ {
 669+ digitClass = "[0-9,.]";
 670+ ts_number_transform_table = false;
 671+ } else {
 672+ ts_number_transform_table = {};
 673+ // Unpack the transform table
 674+ // Separators
 675+ ascii = wgSeparatorTransformTable[0].split("\t");
 676+ localised = wgSeparatorTransformTable[1].split("\t");
 677+ for ( var i = 0; i < ascii.length; i++ ) {
 678+ ts_number_transform_table[localised[i]] = ascii[i];
 679+ }
 680+ // Digits
 681+ ascii = wgDigitTransformTable[0].split("\t");
 682+ localised = wgDigitTransformTable[1].split("\t");
 683+ for ( var i = 0; i < ascii.length; i++ ) {
 684+ ts_number_transform_table[localised[i]] = ascii[i];
 685+ }
 686+
 687+ // Construct regex for number identification
 688+ digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',', '\\.'];
 689+ maxDigitLength = 1;
 690+ for ( var digit in ts_number_transform_table ) {
 691+ // Escape regex metacharacters
 692+ digits.push(
 693+ digit.replace( /[\\\\$\*\+\?\.\(\)\|\{\}\[\]\-]/,
 694+ function( s ) { return '\\' + s; } )
 695+ );
 696+ if (digit.length > maxDigitLength) {
 697+ maxDigitLength = digit.length;
 698+ }
 699+ }
 700+ if ( maxDigitLength > 1 ) {
 701+ digitClass = '[' + digits.join( '', digits ) + ']';
 702+ } else {
 703+ digitClass = '(' + digits.join( '|', digits ) + ')';
 704+ }
 705+ }
 706+
 707+ // We allow a trailing percent sign, which we just strip. This works fine
 708+ // if percents and regular numbers aren't being mixed.
 709+ ts_number_regex = new RegExp(
 710+ "^(" +
 711+ "[+-]?[0-9][0-9,]*(\\.[0-9,]*)?(E[+-]?[0-9][0-9,]*)?" + // Fortran-style scientific
 712+ "|" +
 713+ "[+-]?" + digitClass + "+%?" + // Generic localised
 714+ ")$", "i"
 715+ );
 716+}
 717+
 718+function ts_toLowerCase( s ) {
 719+ return s.toLowerCase();
 720+}
 721+
658722 function ts_dateToSortKey(date) {
659723 // y2k notes: two digit years less than 50 are treated as 20XX, greater than 50 are treated as 19XX
660724 if (date.length == 11) {
@@ -695,40 +759,36 @@
696760 return "00000000";
697761 }
698762
699 -function ts_parseFloat(num) {
700 - if (!num) return 0;
701 - num = parseFloat(num.replace(/,/g, ""));
702 - return (isNaN(num) ? 0 : num);
703 -}
 763+function ts_parseFloat( s ) {
 764+ if ( !s ) {
 765+ return 0;
 766+ }
 767+ if (ts_number_transform_table != false) {
 768+ var newNum = '', c;
 769+
 770+ for ( var p = 0; p < s.length; p++ ) {
 771+ c = s.charAt( p );
 772+ if (c in ts_number_transform_table) {
 773+ newNum += ts_number_transform_table[c];
 774+ } else {
 775+ newNum += c;
 776+ }
 777+ }
 778+ s = newNum;
 779+ }
704780
705 -function ts_sort_date(a,b) {
706 - var aa = ts_dateToSortKey(a[1]);
707 - var bb = ts_dateToSortKey(b[1]);
708 - return (aa < bb ? -1 : aa > bb ? 1 : a[2] - b[2]);
 781+ num = parseFloat(s.replace(/,/g, ""));
 782+ return (isNaN(num) ? s : num);
709783 }
710784
711 -function ts_sort_currency(a,b) {
712 - var aa = ts_parseFloat(a[1].replace(/[^0-9.,]/g,''));
713 - var bb = ts_parseFloat(b[1].replace(/[^0-9.,]/g,''));
714 - return (aa != bb ? aa - bb : a[2] - b[2]);
 785+function ts_currencyToSortKey( s ) {
 786+ return ts_parseFloat(s.replace(/[^0-9.,]/g,''));
715787 }
716788
717 -function ts_sort_numeric(a,b) {
718 - var aa = ts_parseFloat(a[1]);
719 - var bb = ts_parseFloat(b[1]);
720 - return (aa != bb ? aa - bb : a[2] - b[2]);
 789+function ts_sort_generic(a, b) {
 790+ return a[1] < b[1] ? -1 : a[1] > b[1] ? 1 : a[2] - b[2];
721791 }
722792
723 -function ts_sort_caseinsensitive(a,b) {
724 - var aa = a[1].toLowerCase();
725 - var bb = b[1].toLowerCase();
726 - return (aa < bb ? -1 : aa > bb ? 1 : a[2] - b[2]);
727 -}
728 -
729 -function ts_sort_default(a,b) {
730 - return (a[1] < b[1] ? -1 : a[1] > b[1] ? 1 : a[2] - b[2]);
731 -}
732 -
733793 function ts_alternate(table) {
734794 // Take object table and get all it's tbodies.
735795 var tableBodies = table.getElementsByTagName("tbody");
Index: trunk/phase3/includes/DefaultSettings.php
@@ -1399,7 +1399,7 @@
14001400 * to ensure that client-side caches don't keep obsolete copies of global
14011401 * styles.
14021402 */
1403 -$wgStyleVersion = '182';
 1403+$wgStyleVersion = '183';
14041404
14051405
14061406 # Server-side caching:
Index: trunk/phase3/includes/Skin.php
@@ -341,6 +341,18 @@
342342
343343 $ns = $wgTitle->getNamespace();
344344 $nsname = isset( $wgCanonicalNamespaceNames[ $ns ] ) ? $wgCanonicalNamespaceNames[ $ns ] : $wgTitle->getNsText();
 345+ $separatorTransTable = $wgContLang->separatorTransformTable();
 346+ $separatorTransTable = $separatorTransTable ? $separatorTransTable : array();
 347+ $compactSeparatorTransTable = array(
 348+ implode( "\t", array_keys( $separatorTransTable ) ),
 349+ implode( "\t", $separatorTransTable ),
 350+ );
 351+ $digitTransTable = $wgContLang->digitTransformTable();
 352+ $digitTransTable = $digitTransTable ? $digitTransTable : array();
 353+ $compactDigitTransTable = array(
 354+ implode( "\t", array_keys( $digitTransTable ) ),
 355+ implode( "\t", $digitTransTable ),
 356+ );
345357
346358 $vars = array(
347359 'skin' => $data['skinname'],
@@ -368,6 +380,8 @@
369381 'wgVersion' => $wgVersion,
370382 'wgEnableAPI' => $wgEnableAPI,
371383 'wgEnableWriteAPI' => $wgEnableWriteAPI,
 384+ 'wgSeparatorTransformTable' => $compactSeparatorTransTable,
 385+ 'wgDigitTransformTable' => $compactDigitTransTable,
372386 );
373387
374388 if( $wgUseAjax && $wgEnableMWSuggest && !$wgUser->getOption( 'disablesuggest', false )){
Index: trunk/phase3/RELEASE-NOTES
@@ -289,6 +289,7 @@
290290 JavaScript is disabled.
291291 * (bug 4253) Recentchanges IRC messages no longer include title in diff URLs
292292 * Allow '0' to be an accesskey.
 293+* (bug 8063) Use language-dependent sorting in client-side sortable tables
293294
294295 === API changes in 1.14 ===
295296

Comments

#Comment by Mormegil (talk | contribs)   22:12, 26 September 2010

I just noticed a tiny bug in this code: In ts_initTransformTable(), the ts_number_regex is constructed as either “[xyz]”, or “(x|y|z)”, but the logic to choose between those is backwards! If maxDigitLength > 1, the code chooses the “[xyz]” variant! Since all languages use single-character transformations (AFAIK), this works correctly if unoptimally (using “(x|y|z)” all the time).

Status & tagging log