Index: trunk/phase3/languages/classes/LanguageIu.php |
— | — | @@ -4,8 +4,10 @@ |
5 | 5 | */ |
6 | 6 | |
7 | 7 | /* |
8 | | -* Conversion script between Latin and Syllabics |
9 | | -* for Inuktitut |
| 8 | +* Conversion script between Latin and Syllabics for Inuktitut. |
| 9 | +* - Syllabics -> lowercase Latin |
| 10 | +* - lowercase/uppercase Latin -> Syllabics |
| 11 | +* |
10 | 12 | * |
11 | 13 | * Based on: |
12 | 14 | * - http://commons.wikimedia.org/wiki/Image:Inuktitut.png |
— | — | @@ -17,7 +19,6 @@ |
18 | 20 | |
19 | 21 | class IuConverter extends LanguageConverter { |
20 | 22 | var $mToLatin = array( |
21 | | - # Lowercase |
22 | 23 | 'ᐦ' => 'h', 'ᐃ' => 'i', 'ᐄ' => 'ii', 'ᐅ' => 'u', 'ᐆ' => 'uu', 'ᐊ' => 'a', 'ᐋ' => 'aa', |
23 | 24 | 'ᑉ' => 'p', 'ᐱ' => 'pi', 'ᐲ' => 'pii', 'ᐳ' => 'pu', 'ᐴ' => 'puu', 'ᐸ' => 'pa', 'ᐹ' => 'paa', |
24 | 25 | 'ᑦ' => 't', 'ᑎ' => 'ti', 'ᑏ' => 'tii', 'ᑐ' => 'tu', 'ᑑ' => 'tuu', 'ᑕ' => 'ta', 'ᑖ' => 'taa', |
— | — | @@ -35,12 +36,18 @@ |
36 | 37 | 'ᖕ' => 'ng', 'ᖏ' => 'ngi', 'ᖐ' => 'ngii', 'ᖑ' => 'ngu', 'ᖒ' => 'nguu', 'ᖓ' => 'nga', 'ᖔ' => 'ngaa', |
37 | 38 | 'ᖖ' => 'nng', 'ᙱ' => 'nngi', 'ᙲ' => 'nngii', 'ᙳ' => 'nngu', 'ᙴ' => 'nnguu', 'ᙵ' => 'nnga', 'ᙶ' => 'nngaa', |
38 | 39 | 'ᖦ' => 'ɫ', 'ᖠ' => 'ɫi', 'ᖡ' => 'ɫii', 'ᖢ' => 'ɫu', 'ᖣ' => 'ɫuu', 'ᖤ' => 'ɫa', 'ᖥ' => 'ɫaa', |
| 40 | + ); |
39 | 41 | |
40 | | - # There is no uppercase in Syllabics |
| 42 | + var $mUpperToLowerCaseLatin = array( |
| 43 | + 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', |
| 44 | + 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', |
| 45 | + 'K' => 'k', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o', |
| 46 | + 'P' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't', |
| 47 | + 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x', 'Y' => 'y', |
| 48 | + 'Z' => 'z', |
41 | 49 | ); |
42 | 50 | |
43 | 51 | var $mToSyllabics = array( |
44 | | - # Lowercase |
45 | 52 | 'h' => 'ᐦ', 'i' => 'ᐃ', 'ii' => 'ᐄ', 'u' => 'ᐅ', 'uu' => 'ᐆ', 'a' => 'ᐊ', 'aa' => 'ᐋ', |
46 | 53 | 'p' => 'ᑉ', 'pi' => 'ᐱ', 'pii' => 'ᐲ', 'pu' => 'ᐳ', 'puu' => 'ᐴ', 'pa' => 'ᐸ', 'paa' => 'ᐹ', |
47 | 54 | 't' => 'ᑦ', 'ti' => 'ᑎ', 'tii' => 'ᑏ', 'tu' => 'ᑐ', 'tuu' => 'ᑑ', 'ta' => 'ᑕ', 'taa' => 'ᑖ', |
— | — | @@ -58,28 +65,11 @@ |
59 | 66 | 'ng' => 'ᖕ', 'ngi' => 'ᖏ', 'ngii' => 'ᖐ', 'ngu' => 'ᖑ', 'nguu' => 'ᖒ', 'nga' => 'ᖓ', 'ngaa' => 'ᖔ', |
60 | 67 | 'nng' => 'ᖖ', 'nngi' => 'ᙱ', 'nngii' => 'ᙲ', 'nngu' => 'ᙳ', 'nnguu' => 'ᙴ', 'nnga' => 'ᙵ', 'nngaa' => 'ᙶ', |
61 | 68 | 'ɫ' => 'ᖦ', 'ɫi' => 'ᖠ', 'ɫii' => 'ᖡ', 'ɫu' => 'ᖢ', 'ɫuu' => 'ᖣ', 'ɫa' => 'ᖤ', 'ɫaa' => 'ᖥ', |
62 | | - |
63 | | - # Uppercase |
64 | | - 'H' => 'ᐦ', 'I' => 'ᐃ', 'Ii' => 'ᐄ', 'U' => 'ᐅ', 'Uu' => 'ᐆ', 'A' => 'ᐊ', 'Aa' => 'ᐋ', |
65 | | - 'P' => 'ᑉ', 'Pi' => 'ᐱ', 'Pii' => 'ᐲ', 'Pu' => 'ᐳ', 'Puu' => 'ᐴ', 'Pa' => 'ᐸ', 'Paa' => 'ᐹ', |
66 | | - 'T' => 'ᑦ', 'Ti' => 'ᑎ', 'Tii' => 'ᑏ', 'Tu' => 'ᑐ', 'Tuu' => 'ᑑ', 'Ta' => 'ᑕ', 'Taa' => 'ᑖ', |
67 | | - 'K' => 'ᒃ', 'Ki' => 'ᑭ', 'Kii' => 'ᑮ', 'Ku' => 'ᑯ', 'Kuu' => 'ᑰ', 'Ka' => 'ᑲ', 'Kaa' => 'ᑳ', |
68 | | - 'G' => 'ᒡ', 'Gi' => 'ᒋ', 'Gii' => 'ᒌ', 'Gu' => 'ᒍ', 'Guu' => 'ᒎ', 'Ga' => 'ᒐ', 'Gaa' => 'ᒑ', |
69 | | - 'M' => 'ᒻ', 'Mi' => 'ᒥ', 'Mii' => 'ᒦ', 'Mu' => 'ᒧ', 'Muu' => 'ᒨ', 'Ma' => 'ᒪ', 'Maa' => 'ᒫ', |
70 | | - 'N' => 'ᓐ', 'Ni' => 'ᓂ', 'Nii' => 'ᓃ', 'Nu' => 'ᓄ', 'Nuu' => 'ᓅ', 'Na' => 'ᓇ', 'Naa' => 'ᓈ', |
71 | | - 'S' => 'ᔅ', 'Si' => 'ᓯ', 'Sii' => 'ᓰ', 'Su' => 'ᓱ', 'Suu' => 'ᓲ', 'Sa' => 'ᓴ', 'Saa' => 'ᓵ', |
72 | | - 'L' => 'ᓪ', 'Li' => 'ᓕ', 'Lii' => 'ᓖ', 'Lu' => 'ᓗ', 'Luu' => 'ᓘ', 'La' => 'ᓚ', 'Laa' => 'ᓛ', |
73 | | - 'J' => 'ᔾ', 'Ji' => 'ᔨ', 'Jii' => 'ᔩ', 'Ju' => 'ᔪ', 'Juu' => 'ᔫ', 'Ja' => 'ᔭ', 'Jaa' => 'ᔮ', |
74 | | - 'V' => 'ᕝ', 'Vi' => 'ᕕ', 'Vii' => 'ᕖ', 'Vu' => 'ᕗ', 'Vuu' => 'ᕘ', 'Va' => 'ᕙ', 'Vaa' => 'ᕚ', |
75 | | - 'R' => 'ᕐ', 'Ri' => 'ᕆ', 'Rii' => 'ᕇ', 'Ru' => 'ᕈ', 'Ruu' => 'ᕉ', 'Ra' => 'ᕋ', 'Raa' => 'ᕌ', |
76 | | - 'Q' => 'ᖅ', 'Qi' => 'ᕿ', 'Qii' => 'ᖀ', 'Qu' => 'ᖁ', 'Quu' => 'ᖂ', 'Qa' => 'ᖃ', 'Qaa' => 'ᖄ', |
77 | | - 'Ng' => 'ᖕ', 'Ngi' => 'ᖏ', 'Ngii' => 'ᖐ', 'Ngu' => 'ᖑ', 'Nguu' => 'ᖒ', 'Nga' => 'ᖓ', 'Ngaa' => 'ᖔ', |
78 | | - 'Nng' => 'ᖖ', 'Nngi' => 'ᙱ', 'Nngii' => 'ᙲ', 'Nngu' => 'ᙳ', 'Nnguu' => 'ᙴ', 'Nnga' => 'ᙵ', 'Nngaa' => 'ᙶ', |
79 | | -# 'ɫ' => 'ᖦ', 'ɫi' => 'ᖠ', 'ɫii' => 'ᖡ', 'ɫu' => 'ᖢ', 'ɫuu' => 'ᖣ', 'ɫa' => 'ᖤ', 'ɫaa' => 'ᖥ', |
80 | 69 | ); |
81 | 70 | |
82 | 71 | function loadDefaultTables() { |
83 | 72 | $this->mTables = array( |
| 73 | + 'lowercase' => new ReplacementArray( $this->mUpperToLowerCaseLatin ), |
84 | 74 | 'ike-cans' => new ReplacementArray( $this->mToSyllabics ), |
85 | 75 | 'ike-latn' => new ReplacementArray( $this->mToLatin ), |
86 | 76 | 'iu' => new ReplacementArray() |
— | — | @@ -159,32 +149,20 @@ |
160 | 150 | } |
161 | 151 | |
162 | 152 | /** |
163 | | - * It translates text into variant, specials: |
164 | | - * - ommiting roman numbers |
| 153 | + * It translates text into variant |
165 | 154 | */ |
166 | 155 | function translate( $text, $toVariant ) { |
167 | | - $breaks = '[^\w\x80-\xff]'; |
168 | | - |
169 | | - // regexp for roman numbers |
170 | | - $roman = 'M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'; |
171 | | - |
172 | | - $reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks . $roman . '$|' . $breaks . $roman . $breaks . '/'; |
173 | | - |
174 | | - $matches = preg_split( $reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE ); |
175 | | - |
176 | | - $m = array_shift( $matches ); |
177 | | - if ( !isset( $this->mTables[$toVariant] ) ) { |
178 | | - throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) ); |
| 156 | + // If $text is empty or only includes spaces, do nothing |
| 157 | + // Otherwise translate it |
| 158 | + if ( trim( $text ) ) { |
| 159 | + $this->loadTables(); |
| 160 | + // To syllabics, first translate uppercase to lowercase Latin |
| 161 | + if($toVariant == 'ike-cans') { |
| 162 | + $text = $this->mTables['lowercase']->replace( $text ); |
| 163 | + } |
| 164 | + $text = $this->mTables[$toVariant]->replace( $text ); |
179 | 165 | } |
180 | | - $ret = $this->mTables[$toVariant]->replace( $m[0] ); |
181 | | - $mstart = $m[1] + strlen( $m[0] ); |
182 | | - foreach ( $matches as $m ) { |
183 | | - $ret .= substr( $text, $mstart, $m[1] -$mstart ); |
184 | | - $ret .= parent::translate( $m[0], $toVariant ); |
185 | | - $mstart = $m[1] + strlen( $m[0] ); |
186 | | - } |
187 | | - |
188 | | - return $ret; |
| 166 | + return $text; |
189 | 167 | } |
190 | 168 | } |
191 | 169 | |