r86885 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r86884‎ | r86885 | r86886 >
Date:19:00, 25 April 2011
Author:robin
Status:ok
Tags:
Comment:
Fix for r86670: properly convert uppercase latin to syllabics
Modified paths:
  • /trunk/phase3/languages/classes/LanguageIu.php (modified) (history)

Diff [purge]

Index: trunk/phase3/languages/classes/LanguageIu.php
@@ -4,8 +4,10 @@
55 */
66
77 /*
8 -* Conversion script between Latin and Syllabics
9 -* for Inuktitut
 8+* Conversion script between Latin and Syllabics for Inuktitut.
 9+* - Syllabics -> lowercase Latin
 10+* - lowercase/uppercase Latin -> Syllabics
 11+*
1012 *
1113 * Based on:
1214 * - http://commons.wikimedia.org/wiki/Image:Inuktitut.png
@@ -17,7 +19,6 @@
1820
1921 class IuConverter extends LanguageConverter {
2022 var $mToLatin = array(
21 - # Lowercase
2223 'ᐦ' => 'h', 'ᐃ' => 'i', 'ᐄ' => 'ii', 'ᐅ' => 'u', 'ᐆ' => 'uu', 'ᐊ' => 'a', 'ᐋ' => 'aa',
2324 'ᑉ' => 'p', 'ᐱ' => 'pi', 'ᐲ' => 'pii', 'ᐳ' => 'pu', 'ᐴ' => 'puu', 'ᐸ' => 'pa', 'ᐹ' => 'paa',
2425 'ᑦ' => 't', 'ᑎ' => 'ti', 'ᑏ' => 'tii', 'ᑐ' => 'tu', 'ᑑ' => 'tuu', 'ᑕ' => 'ta', 'ᑖ' => 'taa',
@@ -35,12 +36,18 @@
3637 'ᖕ' => 'ng', 'ᖏ' => 'ngi', 'ᖐ' => 'ngii', 'ᖑ' => 'ngu', 'ᖒ' => 'nguu', 'ᖓ' => 'nga', 'ᖔ' => 'ngaa',
3738 'ᖖ' => 'nng', 'ᙱ' => 'nngi', 'ᙲ' => 'nngii', 'ᙳ' => 'nngu', 'ᙴ' => 'nnguu', 'ᙵ' => 'nnga', 'ᙶ' => 'nngaa',
3839 'ᖦ' => 'ɫ', 'ᖠ' => 'ɫi', 'ᖡ' => 'ɫii', 'ᖢ' => 'ɫu', 'ᖣ' => 'ɫuu', 'ᖤ' => 'ɫa', 'ᖥ' => 'ɫaa',
 40+ );
3941
40 - # There is no uppercase in Syllabics
 42+ var $mUpperToLowerCaseLatin = array(
 43+ 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e',
 44+ 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j',
 45+ 'K' => 'k', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o',
 46+ 'P' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't',
 47+ 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x', 'Y' => 'y',
 48+ 'Z' => 'z',
4149 );
4250
4351 var $mToSyllabics = array(
44 - # Lowercase
4552 'h' => 'ᐦ', 'i' => 'ᐃ', 'ii' => 'ᐄ', 'u' => 'ᐅ', 'uu' => 'ᐆ', 'a' => 'ᐊ', 'aa' => 'ᐋ',
4653 'p' => 'ᑉ', 'pi' => 'ᐱ', 'pii' => 'ᐲ', 'pu' => 'ᐳ', 'puu' => 'ᐴ', 'pa' => 'ᐸ', 'paa' => 'ᐹ',
4754 't' => 'ᑦ', 'ti' => 'ᑎ', 'tii' => 'ᑏ', 'tu' => 'ᑐ', 'tuu' => 'ᑑ', 'ta' => 'ᑕ', 'taa' => 'ᑖ',
@@ -58,28 +65,11 @@
5966 'ng' => 'ᖕ', 'ngi' => 'ᖏ', 'ngii' => 'ᖐ', 'ngu' => 'ᖑ', 'nguu' => 'ᖒ', 'nga' => 'ᖓ', 'ngaa' => 'ᖔ',
6067 'nng' => 'ᖖ', 'nngi' => 'ᙱ', 'nngii' => 'ᙲ', 'nngu' => 'ᙳ', 'nnguu' => 'ᙴ', 'nnga' => 'ᙵ', 'nngaa' => 'ᙶ',
6168 'ɫ' => 'ᖦ', 'ɫi' => 'ᖠ', 'ɫii' => 'ᖡ', 'ɫu' => 'ᖢ', 'ɫuu' => 'ᖣ', 'ɫa' => 'ᖤ', 'ɫaa' => 'ᖥ',
62 -
63 - # Uppercase
64 - 'H' => 'ᐦ', 'I' => 'ᐃ', 'Ii' => 'ᐄ', 'U' => 'ᐅ', 'Uu' => 'ᐆ', 'A' => 'ᐊ', 'Aa' => 'ᐋ',
65 - 'P' => 'ᑉ', 'Pi' => 'ᐱ', 'Pii' => 'ᐲ', 'Pu' => 'ᐳ', 'Puu' => 'ᐴ', 'Pa' => 'ᐸ', 'Paa' => 'ᐹ',
66 - 'T' => 'ᑦ', 'Ti' => 'ᑎ', 'Tii' => 'ᑏ', 'Tu' => 'ᑐ', 'Tuu' => 'ᑑ', 'Ta' => 'ᑕ', 'Taa' => 'ᑖ',
67 - 'K' => 'ᒃ', 'Ki' => 'ᑭ', 'Kii' => 'ᑮ', 'Ku' => 'ᑯ', 'Kuu' => 'ᑰ', 'Ka' => 'ᑲ', 'Kaa' => 'ᑳ',
68 - 'G' => 'ᒡ', 'Gi' => 'ᒋ', 'Gii' => 'ᒌ', 'Gu' => 'ᒍ', 'Guu' => 'ᒎ', 'Ga' => 'ᒐ', 'Gaa' => 'ᒑ',
69 - 'M' => 'ᒻ', 'Mi' => 'ᒥ', 'Mii' => 'ᒦ', 'Mu' => 'ᒧ', 'Muu' => 'ᒨ', 'Ma' => 'ᒪ', 'Maa' => 'ᒫ',
70 - 'N' => 'ᓐ', 'Ni' => 'ᓂ', 'Nii' => 'ᓃ', 'Nu' => 'ᓄ', 'Nuu' => 'ᓅ', 'Na' => 'ᓇ', 'Naa' => 'ᓈ',
71 - 'S' => 'ᔅ', 'Si' => 'ᓯ', 'Sii' => 'ᓰ', 'Su' => 'ᓱ', 'Suu' => 'ᓲ', 'Sa' => 'ᓴ', 'Saa' => 'ᓵ',
72 - 'L' => 'ᓪ', 'Li' => 'ᓕ', 'Lii' => 'ᓖ', 'Lu' => 'ᓗ', 'Luu' => 'ᓘ', 'La' => 'ᓚ', 'Laa' => 'ᓛ',
73 - 'J' => 'ᔾ', 'Ji' => 'ᔨ', 'Jii' => 'ᔩ', 'Ju' => 'ᔪ', 'Juu' => 'ᔫ', 'Ja' => 'ᔭ', 'Jaa' => 'ᔮ',
74 - 'V' => 'ᕝ', 'Vi' => 'ᕕ', 'Vii' => 'ᕖ', 'Vu' => 'ᕗ', 'Vuu' => 'ᕘ', 'Va' => 'ᕙ', 'Vaa' => 'ᕚ',
75 - 'R' => 'ᕐ', 'Ri' => 'ᕆ', 'Rii' => 'ᕇ', 'Ru' => 'ᕈ', 'Ruu' => 'ᕉ', 'Ra' => 'ᕋ', 'Raa' => 'ᕌ',
76 - 'Q' => 'ᖅ', 'Qi' => 'ᕿ', 'Qii' => 'ᖀ', 'Qu' => 'ᖁ', 'Quu' => 'ᖂ', 'Qa' => 'ᖃ', 'Qaa' => 'ᖄ',
77 - 'Ng' => 'ᖕ', 'Ngi' => 'ᖏ', 'Ngii' => 'ᖐ', 'Ngu' => 'ᖑ', 'Nguu' => 'ᖒ', 'Nga' => 'ᖓ', 'Ngaa' => 'ᖔ',
78 - 'Nng' => 'ᖖ', 'Nngi' => 'ᙱ', 'Nngii' => 'ᙲ', 'Nngu' => 'ᙳ', 'Nnguu' => 'ᙴ', 'Nnga' => 'ᙵ', 'Nngaa' => 'ᙶ',
79 -# 'ɫ' => 'ᖦ', 'ɫi' => 'ᖠ', 'ɫii' => 'ᖡ', 'ɫu' => 'ᖢ', 'ɫuu' => 'ᖣ', 'ɫa' => 'ᖤ', 'ɫaa' => 'ᖥ',
8069 );
8170
8271 function loadDefaultTables() {
8372 $this->mTables = array(
 73+ 'lowercase' => new ReplacementArray( $this->mUpperToLowerCaseLatin ),
8474 'ike-cans' => new ReplacementArray( $this->mToSyllabics ),
8575 'ike-latn' => new ReplacementArray( $this->mToLatin ),
8676 'iu' => new ReplacementArray()
@@ -159,32 +149,20 @@
160150 }
161151
162152 /**
163 - * It translates text into variant, specials:
164 - * - ommiting roman numbers
 153+ * It translates text into variant
165154 */
166155 function translate( $text, $toVariant ) {
167 - $breaks = '[^\w\x80-\xff]';
168 -
169 - // regexp for roman numbers
170 - $roman = 'M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})';
171 -
172 - $reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks . $roman . '$|' . $breaks . $roman . $breaks . '/';
173 -
174 - $matches = preg_split( $reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
175 -
176 - $m = array_shift( $matches );
177 - if ( !isset( $this->mTables[$toVariant] ) ) {
178 - throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
 156+ // If $text is empty or only includes spaces, do nothing
 157+ // Otherwise translate it
 158+ if ( trim( $text ) ) {
 159+ $this->loadTables();
 160+ // To syllabics, first translate uppercase to lowercase Latin
 161+ if($toVariant == 'ike-cans') {
 162+ $text = $this->mTables['lowercase']->replace( $text );
 163+ }
 164+ $text = $this->mTables[$toVariant]->replace( $text );
179165 }
180 - $ret = $this->mTables[$toVariant]->replace( $m[0] );
181 - $mstart = $m[1] + strlen( $m[0] );
182 - foreach ( $matches as $m ) {
183 - $ret .= substr( $text, $mstart, $m[1] -$mstart );
184 - $ret .= parent::translate( $m[0], $toVariant );
185 - $mstart = $m[1] + strlen( $m[0] );
186 - }
187 -
188 - return $ret;
 166+ return $text;
189167 }
190168 }
191169

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r86670Conversion script between Syllabics and Latin for the Inuktitut languagerobin21:21, 21 April 2011

Status & tagging log