r72796 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r72795‎ | r72796 | r72797 >
Date:10:10, 11 September 2010
Author:nikerabbit
Status:ok
Tags:
Comment:
Not so beautiful script for comparing the plural rules in different implementations
Modified paths:
  • /trunk/extensions/Translate/scripts/plural-comparison.php (added) (history)

Diff [purge]

Index: trunk/extensions/Translate/scripts/plural-comparison.php
@@ -0,0 +1,204 @@
 2+<?php
 3+/**
 4+ * Script for comparing different plural implementations.
 5+ *
 6+ * @author Niklas Laxstrom
 7+ *
 8+ * @copyright Copyright © 2010, Niklas Laxström
 9+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
 10+ * @file
 11+ */
 12+
 13+// Standard boilerplate to define $IP
 14+if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
 15+ $IP = getenv( 'MW_INSTALL_PATH' );
 16+} else {
 17+ $dir = dirname( __FILE__ ); $IP = "$dir/../../..";
 18+}
 19+require_once( "$IP/maintenance/Maintenance.php" );
 20+
 21+/// Script for comparing different plural implementations.
 22+class PluralCompare extends Maintenance {
 23+ public function __construct() {
 24+ parent::__construct();
 25+ $this->mDescription = 'Script for comparing different plural implementations.';
 26+ }
 27+
 28+ public function execute() {
 29+ $mwLanguages = $this->loadMediaWiki();
 30+ $gtLanguages = $this->loadGettext();
 31+ $clLanguages = $this->loadCLDR();
 32+
 33+ $allkeys = array_keys( $mwLanguages + $gtLanguages + $clLanguages );
 34+ sort( $allkeys );
 35+
 36+ $this->output( sprintf( "%12s %3s %3s %4s\n", 'Code', 'MW', 'Get', 'CLDR' ) );
 37+ foreach( $allkeys as $index => $code ) {
 38+ $mw = isset( $mwLanguages[$code] ) ? ( $mwLanguages[$code] === false ? '.' : '+' ) : '';
 39+ $gt = isset( $gtLanguages[$code] ) ? ( $gtLanguages[$code] === '(n != 1)' ? '.' : '+' ) : '';
 40+ $cl = isset( $clLanguages[$code] ) ? ( $clLanguages[$code][0] === 'Default' ? '.' : '+' ) : '';
 41+ $this->output( sprintf( "%12s %-3s %-3s %-4s\n", $code, $mw, $gt, $cl ) );
 42+
 43+ if ( substr_count( sprintf( '%s%s%s', $mw, $gt, $cl ), '+' ) < 2 ) {
 44+ unset( $allkeys[$index] );
 45+ }
 46+ }
 47+
 48+ $this->output( "\n" );
 49+ $c = count( $allkeys );
 50+ $this->output( "Proceeding to test differences in $c languages\n" );
 51+
 52+ foreach ( $allkeys as $code ) {
 53+ $output = sprintf( "%3s %3s %3s %4s for [$code]\n", 'I', 'MW', 'Get', 'CLDR' );
 54+
 55+ if ( isset( $mwLanguages[$code] ) && $mwLanguages[$code] !== false ) {
 56+ $obj = Language::factory( $code );
 57+ } else {
 58+ $obj = false;
 59+ }
 60+
 61+ if ( isset( $gtLanguages[$code] ) ) {
 62+ $gtExp = 'return (int) ' . str_replace( 'n', '$i', $gtLanguages[$code] ) . ';';
 63+ } else {
 64+ $gtExp = false;
 65+ }
 66+
 67+ if ( isset( $clLanguages[$code] ) ) {
 68+ $cldrExp = $clLanguages[$code][1];
 69+ } else {
 70+ $cldrExp = false;
 71+ }
 72+
 73+ $cldrmap = array();
 74+ $error = false;
 75+
 76+ for( $i = 0; $i <= 200; $i++ ) {
 77+ $mw = $obj ? $obj->convertPlural( $i, array( 0, 1, 2, 3, 4, 5 ) ) : '?';
 78+ $gt = $gtExp ? eval( $gtExp ) : '?';
 79+ $cldr = $cldrExp !== false ? $this->evalCLDRRule( $i, $cldrExp ) : '?';
 80+
 81+ if ( self::comp( $mw, $gt ) ) {
 82+ $value = $gt !== '?' ? $gt : $mw;
 83+ if ( !isset( $cldrmap[$cldr] ) ) {
 84+ $cldrmap[$cldr] = $value;
 85+ if ( $cldr !== '?' ) {
 86+ $output .= sprintf( "%3s %-3s %-3s %-6s # Established that %-6s == $mw\n", $i, $mw, $gt, $cldr, $cldr );
 87+ }
 88+ continue;
 89+ } elseif ( self::comp( $cldrmap[$cldr], $value ) ) {
 90+ continue;
 91+ } elseif ( $i > 4 && $value === 1 && self::comp( $cldr, 'other' ) ) {
 92+ if ( $i === 5 ) {
 93+ $output .= "Supressing further output for this language.\n";
 94+ }
 95+ continue;
 96+ }
 97+ }
 98+ $error = true;
 99+ $output .= sprintf( "%3s %-3s %-3s %-6s\n", $i, $mw, $gt, $cldr );
 100+ }
 101+
 102+ if ( $error ) {
 103+ $this->output( "$output\n" );
 104+ }
 105+ }
 106+
 107+ }
 108+
 109+ public static function comp( $a, $b ) {
 110+ return $a === '?' || $b === '?' || $a === $b;
 111+ }
 112+
 113+ public function loadCLDR() {
 114+ $filename = dirname( __FILE__ ) . '/../data/plural-cldr.yaml';
 115+ $data = TranslateYaml::load( $filename );
 116+ $languages = array();
 117+ $ruleExps = array();
 118+ foreach ( $data['rulesets'] as $name => $rules ) {
 119+ $ruleExps[$name] = array();
 120+ foreach( $rules as $rulename => $rule ) {
 121+ $ruleExps[$name][$rulename] = $this->parseCLDRRule( $rule );
 122+ }
 123+ }
 124+
 125+ foreach ( $data['locales'] as $code => $rulename ) {
 126+ $languages[$code] = array( $rulename, $ruleExps[$rulename] );
 127+ }
 128+
 129+ return $languages;
 130+ }
 131+
 132+ public function loadMediaWiki() {
 133+ $mwLanguages = Language::getLanguageNames( true );
 134+ foreach ( $mwLanguages as $code => $name ) {
 135+ $obj = Language::factory( $code );
 136+ $method = new ReflectionMethod( $obj, 'convertPlural' );
 137+ if ( $method->getDeclaringClass()->name === 'Language' ) {
 138+ $mwLanguages[$code] = false;
 139+ }
 140+ }
 141+ return $mwLanguages;
 142+ }
 143+
 144+ public function loadGettext() {
 145+ $gtData = file_get_contents( dirname( __FILE__ ) . '/../data/plural-gettext.txt' );
 146+ $gtLanguages = array();
 147+ foreach ( preg_split( '/\n|\r/', $gtData, -1, PREG_SPLIT_NO_EMPTY ) as $line ) {
 148+ list( $code, $rule ) = explode( "\t", $line );
 149+ $rule = preg_replace( '/^.*?plural=/', '', $rule );
 150+ $gtLanguages[$code] = $rule;
 151+ }
 152+ return $gtLanguages;
 153+ }
 154+
 155+ public function evalCLDRRule( $i, $rules ) {
 156+ foreach ( $rules as $name => $rule ) {
 157+ if ( eval( "return $rule;" ) ) {
 158+ return $name;
 159+ }
 160+ }
 161+
 162+ return "other";
 163+ }
 164+
 165+ public function parseCLDRRule( $rule ) {
 166+ $rule = preg_replace( '/\bn\b/', '$i', $rule );
 167+ $rule = preg_replace( '/([^ ]+) mod (\d+)/', 'self::mod(\1,\2)', $rule );
 168+ $rule = preg_replace( '/([^ ]+) is not (\d+)/' , '\1!==\2', $rule );
 169+ $rule = preg_replace( '/([^ ]+) is (\d+)/', '\1===\2', $rule );
 170+ $rule = preg_replace( '/([^ ]+) not in (\d+)\.\.(\d+)/', '!self::in(\1,\2,\3)', $rule );
 171+ $rule = preg_replace( '/([^ ]+) not within (\d+)\.\.(\d+)/', '!self::within(\1,\2,\3)', $rule );
 172+ $rule = preg_replace( '/([^ ]+) in (\d+)\.\.(\d+)/', 'self::in(\1,\2,\3)', $rule );
 173+ $rule = preg_replace( '/([^ ]+) within (\d+)\.\.(\d+)/', 'self::within(\1,\2,\3)', $rule );
 174+ // AND takes precedence over OR
 175+ $andrule = '/([^ ]+) and ([^ ]+)/i';
 176+ while( preg_match( $andrule, $rule ) ) {
 177+ $rule = preg_replace( $andrule, '(\1&&\2)', $rule );
 178+ }
 179+ $orrule = '/([^ ]+) or ([^ ]+)/i';
 180+ while( preg_match( $orrule, $rule ) ) {
 181+ $rule = preg_replace( $orrule, '(\1||\2)', $rule );
 182+ }
 183+
 184+ return $rule;
 185+ }
 186+
 187+ public static function in( $num, $low, $high ) {
 188+ return is_int( $num ) && $num >= $low && $num <= $high;
 189+ }
 190+
 191+ public static function within( $num, $low, $high ) {
 192+ return $num >= $low && $num <= $high;
 193+ }
 194+
 195+ public static function mod( $num, $mod ) {
 196+ if ( is_int( $num ) ) {
 197+ return (int) fmod( $num, $mod );
 198+ }
 199+ return fmod( $num, $mod );
 200+ }
 201+
 202+}
 203+
 204+$maintClass = 'PluralCompare';
 205+require_once( DO_MAINTENANCE );
Property changes on: trunk/extensions/Translate/scripts/plural-comparison.php
___________________________________________________________________
Added: svn:eol-style
1206 + native

Status & tagging log