r36664 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r36663‎ | r36664 | r36665 >
Date:03:00, 26 June 2008
Author:shinjiman
Status:old
Tags:
Comment:
* (bug 14604) Introduced the following features for the LanguageConverter: Multi-tag support, single conversion flag, remove conversion flag on a single page, description flag, variant name, multi-variant fallbacks.
patch by fdcn
* Added zh-mo and zh-my variants for the zh language
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/StringUtils.php (modified) (history)
  • /trunk/phase3/languages/LanguageConverter.php (modified) (history)
  • /trunk/phase3/languages/Names.php (modified) (history)
  • /trunk/phase3/languages/classes/LanguageZh.php (modified) (history)
  • /trunk/phase3/languages/messages/MessagesZh.php (modified) (history)
  • /trunk/phase3/languages/messages/MessagesZh_mo.php (added) (history)
  • /trunk/phase3/languages/messages/MessagesZh_my.php (added) (history)
  • /trunk/phase3/maintenance/language/messageTypes.inc (modified) (history)
  • /trunk/phase3/maintenance/language/messages.inc (modified) (history)

Diff [purge]

Index: trunk/phase3/RELEASE-NOTES
@@ -164,6 +164,8 @@
165165 * Add support for Hijri (Islamic) calendar
166166 * Add a new hook LinkerMakeExternalImage to allow extensions to modify the output of
167167 external (hotlinked) images.
 168+* (bug 14604) Introduced the following features for the LanguageConverter: Multi-tag support, single conversion flag, remove conversion flag on a single page, description flag, variant name, multi-variant fallbacks.
 169+* Add zh-mo and zh-my variants for the zh language
168170
169171 === Bug fixes in 1.13 ===
170172
@@ -484,8 +486,9 @@
485487 * Silesian (szl) (new)
486488 * Tajiki (Cyrllic script) (tg-cyrl) (new)
487489 * Tajiki (Latin script) (tg-latn) (new)
 490+* Chinese (Macau) (zh-mo) (new)
 491+* Chinese (Malaysia) (zh-my) (new)
488492
489 -
490493 == Compatibility ==
491494
492495 MediaWiki 1.13 requires PHP 5 (5.1 recommended). PHP 4 is no longer supported.
Index: trunk/phase3/includes/StringUtils.php
@@ -283,6 +283,17 @@
284284 $this->fss = false;
285285 }
286286
 287+ function removePair( $from ) {
 288+ unset($this->data[$from]);
 289+ $this->fss = false;
 290+ }
 291+
 292+ function removeArray( $data ) {
 293+ foreach( $data as $from => $to )
 294+ $this->removePair( $from );
 295+ $this->fss = false;
 296+ }
 297+
287298 function replace( $subject ) {
288299 if ( function_exists( 'fss_prep_replace' ) ) {
289300 wfProfileIn( __METHOD__.'-fss' );
Index: trunk/phase3/languages/messages/MessagesZh_my.php
@@ -0,0 +1,10 @@
 2+<?php
 3+/**
 4+ * Chinese (Singapore) (中文 (马来西亚))
 5+ *
 6+ * @ingroup Language
 7+ * @file
 8+ */
 9+
 10+# Inherit everything for now
 11+$fallback = 'zh-sg';
Index: trunk/phase3/languages/messages/MessagesZh.php
@@ -22,7 +22,9 @@
2323 'variantname-zh-cn' => '大陆简体',
2424 'variantname-zh-tw' => '台灣正體',
2525 'variantname-zh-hk' => '香港繁體',
 26+'variantname-zh-mo' => '澳門繁體',
2627 'variantname-zh-sg' => '新加坡简体',
 28+'variantname-zh-my' => '大马简体',
2729 'variantname-zh' => '不转换/不轉換',
2830
2931 );
Index: trunk/phase3/languages/messages/MessagesZh_mo.php
@@ -0,0 +1,10 @@
 2+<?php
 3+/**
 4+ * Chinese (Macau) (中文 (澳門))
 5+ *
 6+ * @ingroup Language
 7+ * @file
 8+ */
 9+
 10+# Inherit everything for now
 11+$fallback = 'zh-hk';
Index: trunk/phase3/languages/Names.php
@@ -340,10 +340,12 @@
341341 'zh-cn' => "\xE2\x80\xAA中文(中国大陆)\xE2\x80\xAC", # Chinese (PRC)
342342 'zh-hans' => "\xE2\x80\xAA中文(简体)\xE2\x80\xAC", # Chinese written using the Simplified Chinese script
343343 'zh-hant' => "\xE2\x80\xAA中文(繁體)\xE2\x80\xAC", # Chinese written using the Traditional Chinese script
344 - 'zh-hk' => "\xE2\x80\xAA中文(香港)\xE2\x80\xAC", # Chinese (Hong Kong)
 344+ 'zh-hk' => "\xE2\x80\xAA中文(香港)\xE2\x80\xAC", # Chinese (Hong Kong)
345345 'zh-min-nan' => 'Bân-lâm-gú', # Min-nan -- (see bug 8217)
 346+ 'zh-mo' => "\xE2\x80\xAA中文(澳門)\xE2\x80\xAC", # Chinese (Macau)
 347+ 'zh-my' => "\xE2\x80\xAA中文(马来西亚)\xE2\x80\xAC", # Chinese (Malaysia)
346348 'zh-sg' => "\xE2\x80\xAA中文(新加坡)\xE2\x80\xAC", # Chinese (Singapore)
347 - 'zh-tw' => "\xE2\x80\xAA中文(台灣)\xE2\x80\xAC", # Chinese (Taiwan)
 349+ 'zh-tw' => "\xE2\x80\xAA中文(台灣)\xE2\x80\xAC", # Chinese (Taiwan)
348350 'zh-yue' => '粵語', # Cantonese -- (see bug 8217)
349351 'zu' => 'isiZulu' # Zulu
350352 );
Index: trunk/phase3/languages/classes/LanguageZh.php
@@ -7,33 +7,72 @@
88 * @ingroup Language
99 */
1010 class ZhConverter extends LanguageConverter {
 11+
 12+ function __construct($langobj, $maincode,
 13+ $variants=array(),
 14+ $variantfallbacks=array(),
 15+ $markup=array(),
 16+ $flags = array(),
 17+ $manualLevel = array() ) {
 18+ parent::__construct($langobj, $maincode,
 19+ $variants,
 20+ $variantfallbacks,
 21+ $markup,
 22+ $flags,
 23+ $manualLevel);
 24+ $names = array(
 25+ 'zh' => '原文',
 26+ 'zh-hans' => '简体',
 27+ 'zh-hant' => '繁體',
 28+ 'zh-cn' => '大陆',
 29+ 'zh-tw' => '台灣',
 30+ 'zh-hk' => '香港',
 31+ 'zh-mo' => '澳門',
 32+ 'zh-sg' => '新加坡',
 33+ 'zh-my' => '马来西亚',
 34+ );
 35+ $this->mVariantNames = array_merge($this->mVariantNames,$names);
 36+ }
 37+
1138 function loadDefaultTables() {
1239 require( dirname(__FILE__)."/../../includes/ZhConversion.php" );
1340 $this->mTables = array(
1441 'zh-hans' => new ReplacementArray( $zh2Hans ),
1542 'zh-hant' => new ReplacementArray( $zh2Hant ),
1643 'zh-cn' => new ReplacementArray( array_merge($zh2Hans, $zh2CN) ),
 44+ 'zh-hk' => new ReplacementArray( array_merge($zh2Hant, $zh2HK) ),
 45+ 'zh-mo' => new ReplacementArray( array_merge($zh2Hant, $zh2HK) ),
 46+ 'zh-my' => new ReplacementArray( array_merge($zh2Hans, $zh2SG) ),
 47+ 'zh-sg' => new ReplacementArray( array_merge($zh2Hans, $zh2SG) ),
1748 'zh-tw' => new ReplacementArray( array_merge($zh2Hant, $zh2TW) ),
18 - 'zh-sg' => new ReplacementArray( array_merge($zh2Hans, $zh2SG) ),
19 - 'zh-hk' => new ReplacementArray( array_merge($zh2Hant, $zh2HK) ),
2049 'zh' => new ReplacementArray
2150 );
2251 }
2352
2453 function postLoadTables() {
2554 $this->mTables['zh-cn']->merge( $this->mTables['zh-hans'] );
 55+ $this->mTables['zh-hk']->merge( $this->mTables['zh-hant'] );
 56+ $this->mTables['zh-mo']->merge( $this->mTables['zh-hant'] );
 57+ $this->mTables['zh-my']->merge( $this->mTables['zh-hans'] );
 58+ $this->mTables['zh-sg']->merge( $this->mTables['zh-hans'] );
2659 $this->mTables['zh-tw']->merge( $this->mTables['zh-hant'] );
27 - $this->mTables['zh-sg']->merge( $this->mTables['zh-hans'] );
28 - $this->mTables['zh-hk']->merge( $this->mTables['zh-hant'] );
2960 }
3061
3162 /* there shouldn't be any latin text in Chinese conversion, so no need
3263 to mark anything.
3364 $noParse is there for compatibility with LanguageConvert::markNoConversion
34 - */
 65+ */
3566 function markNoConversion($text, $noParse = false) {
3667 return $text;
3768 }
 69+
 70+ /* description of convert code in chinese language*/
 71+ function getRulesDesc($bidtable,$unidtable){
 72+ $text=parent::getRulesDesc($bidtable,$unidtable);
 73+ $text=str_replace(':','F',$text);
 74+ $text=str_replace(';','G',$text);
 75+ return $text;
 76+ }
3877
3978 function convertCategoryKey( $key ) {
4079 return $this->autoConvert( $key, 'zh' );
@@ -52,18 +91,28 @@
5392 global $wgHooks;
5493 parent::__construct();
5594
56 - $variants = array('zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-sg', 'zh-hk');
 95+ $variants = array('zh','zh-hans','zh-hant','zh-cn','zh-hk','zh-mo','zh-my','zh-sg','zh-tw');
5796 $variantfallbacks = array(
58 - 'zh' => 'zh-hans',
59 - 'zh-hans' => 'zh',
60 - 'zh-hant' => 'zh',
61 - 'zh-cn' => 'zh-hans',
62 - 'zh-sg' => 'zh-hans',
63 - 'zh-tw' => 'zh-hant',
64 - 'zh-hk' => 'zh-hant',
 97+ 'zh' => array('zh-hans','zh-hant','zh-cn','zh-tw','zh-hk','zh-sg','zh-mo','zh-my'),
 98+ 'zh-hans' => array('zh-cn','zh-sg','zh-my'),
 99+ 'zh-hant' => array('zh-tw','zh-hk','zh-mo'),
 100+ 'zh-cn' => array('zh-hans','zh-sg','zh-my'),
 101+ 'zh-sg' => array('zh-hans','zh-cn','zh-my'),
 102+ 'zh-my' => array('zh-hant','zh-sg','zh-cn'),
 103+ 'zh-tw' => array('zh-hant','zh-hk','zh-mo'),
 104+ 'zh-hk' => array('zh-hant','zh-mo','zh-tw'),
 105+ 'zh-mo' => array('zh-hant','zh-hk','zh-tw'),
65106 );
 107+ $ml=array(
 108+ 'zh' => 'disable',
 109+ 'zh-hans' => 'unidirectional',
 110+ 'zh-hant' => 'unidirectional',
 111+ );
66112
67 - $this->mConverter = new ZhConverter( $this, 'zh', $variants, $variantfallbacks );
 113+ $this->mConverter = new ZhConverter( $this, 'zh',
 114+ $variants, $variantfallbacks,
 115+ array(),array(),
 116+ $ml);
68117
69118 $wgHooks['ArticleSaveComplete'][] = $this->mConverter;
70119 }
@@ -111,3 +160,4 @@
112161 return $ret;
113162 }
114163 }
 164+
Index: trunk/phase3/languages/LanguageConverter.php
@@ -1,19 +1,24 @@
22 <?php
 3+//require_once( dirname(__FILE__).'/Names.php' );
 4+
35 /**
46 * @ingroup Language
57 *
68 * @author Zhengzhu Feng <zhengzhu@gmail.com>
79 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
 10+ * @maintainers fdcn <fdcn64@gmail.com>, shinjiman <shinjiman@gmail.com>
811 */
912
1013 class LanguageConverter {
1114 var $mPreferredVariant='';
1215 var $mMainLanguageCode;
13 - var $mVariants, $mVariantFallbacks;
 16+ var $mVariants, $mVariantFallbacks, $mVariantNames;
1417 var $mTablesLoaded = false;
1518 var $mTables;
1619 var $mTitleDisplay='';
1720 var $mDoTitleConvert=true, $mDoContentConvert=true;
 21+ var $mManualLevel; // 'bidirectional' 'unidirectional' 'disable' for each variants
 22+ var $mManualCodeError='<span style="color: red;">code error!</span>';
1823 var $mTitleFromFlag = false;
1924 var $mCacheKey;
2025 var $mLangObj;
@@ -21,38 +26,63 @@
2227 var $mFlags;
2328 var $mUcfirst = false;
2429
25 - const CACHE_VERSION_KEY = 'VERSION 5';
 30+ const CACHE_VERSION_KEY = 'VERSION 6';
2631
2732 /**
28 - * Constructor
 33+ * Constructor
2934 *
30 - * @param string $maincode the main language code of this language
31 - * @param array $variants the supported variants of this language
32 - * @param array $variantfallback the fallback language of each variant
33 - * @param array $markup array defining the markup used for manual conversion
 35+ * @param string $maincode the main language code of this language
 36+ * @param array $variants the supported variants of this language
 37+ * @param array $variantfallback the fallback language of each variant
 38+ * @param array $markup array defining the markup used for manual conversion
3439 * @param array $flags array defining the custom strings that maps to the flags
35 - * @access public
36 - */
 40+ * @access public
 41+ */
3742 function __construct($langobj, $maincode,
3843 $variants=array(),
3944 $variantfallbacks=array(),
4045 $markup=array(),
41 - $flags = array()) {
 46+ $flags = array(),
 47+ $manualLevel = array() ) {
4248 $this->mLangObj = $langobj;
4349 $this->mMainLanguageCode = $maincode;
4450 $this->mVariants = $variants;
4551 $this->mVariantFallbacks = $variantfallbacks;
 52+ global $wgLanguageNames;
 53+ $this->mVariantNames = $wgLanguageNames;
4654 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
47 - $m = array('begin'=>'-{', 'flagsep'=>'|', 'codesep'=>':',
48 - 'varsep'=>';', 'end'=>'}-');
 55+ $m = array(
 56+ 'begin'=>'-{',
 57+ 'flagsep'=>'|',
 58+ 'unidsep'=>'=>', //for unidirectional conversion
 59+ 'codesep'=>':',
 60+ 'varsep'=>';',
 61+ 'end'=>'}-'
 62+ );
4963 $this->mMarkup = array_merge($m, $markup);
50 - $f = array('A'=>'A', 'T'=>'T', 'R' => 'R');
 64+ $f = array(
 65+ // 'S' show converted text
 66+ // '+' add rules for alltext
 67+ // 'E' the gave flags is error
 68+ // these flags above are reserved for program
 69+ 'A'=>'A', // add rule for convert code (all text convert)
 70+ 'T'=>'T', // title convert
 71+ 'R'=>'R', // raw content
 72+ 'D'=>'D', // convert description (subclass implement)
 73+ '-'=>'-', // remove convert (not implement)
 74+ 'H'=>'H', // add rule for convert code (but no display in placed code )
 75+ 'N'=>'N' // current variant name
 76+ );
5177 $this->mFlags = array_merge($f, $flags);
 78+ foreach( $this->mVariants as $v)
 79+ $this->mManualLevel[$v]=array_key_exists($v,$manualLevel)
 80+ ?$manualLevel[$v]
 81+ :'bidirectional';
5282 }
5383
5484 /**
55 - * @access public
56 - */
 85+ * @access public
 86+ */
5787 function getVariants() {
5888 return $this->mVariants;
5989 }
@@ -60,25 +90,47 @@
6191 /**
6292 * in case some variant is not defined in the markup, we need
6393 * to have some fallback. for example, in zh, normally people
64 - * will define zh-cn and zh-tw, but less so for zh-sg or zh-hk.
65 - * when zh-sg is preferred but not defined, we will pick zh-cn
 94+ * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk.
 95+ * when zh-sg is preferred but not defined, we will pick zh-hans
6696 * in this case. right now this is only used by zh.
6797 *
6898 * @param string $v the language code of the variant
69 - * @return string the code of the fallback language or false if there is no fallback
70 - * @private
71 - */
72 - function getVariantFallback($v) {
73 - return $this->mVariantFallbacks[$v];
 99+ * @return string array the code of the fallback language or false if there is no fallback
 100+ * @private
 101+ */
 102+ function getVariantFallbacks($v) {
 103+ if( isset( $this->mVariantFallbacks[$v] ) ) {
 104+ return $this->mVariantFallbacks[$v];
 105+ }
 106+ return $this->mMainLanguageCode;
74107 }
75108
 109+ /**
 110+ * check if variants array in convert array
 111+ *
 112+ * @param string $variant Variant language code
 113+ * @param array $carray convert array
 114+ * @param string $text Text to convert
 115+ * @return string Translated text
 116+ * @private
 117+ */
 118+ function getTextInCArray($variants,$carray){
 119+ if(is_string($variants)){ $variants=array($variants); }
 120+ if(!is_array($variants)) return false;
 121+ foreach ($variants as $variant){
 122+ if(array_key_exists($variant, $carray)){
 123+ return $carray[$variant];
 124+ }
 125+ }
 126+ return false;
 127+ }
76128
77129 /**
78130 * get preferred language variants.
79131 * @param boolean $fromUser Get it from $wgUser's preferences
80 - * @return string the preferred language code
81 - * @access public
82 - */
 132+ * @return string the preferred language code
 133+ * @access public
 134+ */
83135 function getPreferredVariant( $fromUser = true ) {
84136 global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant;
85137
@@ -140,15 +192,15 @@
141193 return $this->mMainLanguageCode;
142194
143195 }
144 -
 196+
145197 /**
146 - * dictionary-based conversion
147 - *
148 - * @param string $text the text to be converted
149 - * @param string $toVariant the target language code
150 - * @return string the converted text
151 - * @private
152 - */
 198+ * dictionary-based conversion
 199+ *
 200+ * @param string $text the text to be converted
 201+ * @param string $toVariant the target language code
 202+ * @return string the converted text
 203+ * @private
 204+ */
153205 function autoConvert($text, $toVariant=false) {
154206 $fname="LanguageConverter::autoConvert";
155207
@@ -182,7 +234,7 @@
183235 $scriptfix = '<script.*?>.*?<\/script>|';
184236
185237 $reg = '/'.$codefix . $scriptfix . '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
186 -
 238+
187239 $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
188240
189241 $m = array_shift($matches);
@@ -216,12 +268,12 @@
217269 }
218270
219271 /**
220 - * convert text to all supported variants
221 - *
222 - * @param string $text the text to be converted
223 - * @return array of string
224 - * @public
225 - */
 272+ * convert text to all supported variants
 273+ *
 274+ * @param string $text the text to be converted
 275+ * @return array of string
 276+ * @public
 277+ */
226278 function autoConvertToAllVariants($text) {
227279 $fname="LanguageConverter::autoConvertToAllVariants";
228280 wfProfileIn( $fname );
@@ -238,12 +290,12 @@
239291 }
240292
241293 /**
242 - * convert link text to all supported variants
243 - *
244 - * @param string $text the text to be converted
245 - * @return array of string
246 - * @public
247 - */
 294+ * convert link text to all supported variants
 295+ *
 296+ * @param string $text the text to be converted
 297+ * @return array of string
 298+ * @public
 299+ */
248300 function convertLinkToAllVariants($text) {
249301 if( !$this->mTablesLoaded )
250302 $this->loadTables();
@@ -295,37 +347,271 @@
296348 *
297349 */
298350 function parseFlags($marked){
299 - $flags = array();
 351+ $flags = array();
300352
301 - // process flag only if the flag is valid
302 - if(strlen($marked) < 2 || !(in_array($marked[0],$this->mFlags) && $marked[1]=='|' ) )
303 - return array($marked,array());
 353+ // for multi-FLAGs
 354+ if(strlen($marked) < 2 )
 355+ return array($marked,array('R'));
304356
305 - $tt = explode($this->mMarkup['flagsep'], $marked, 2);
 357+ $tt = explode($this->mMarkup['flagsep'], $marked, 2);
306358
307 - if(sizeof($tt) == 2) {
308 - $f = explode($this->mMarkup['varsep'], $tt[0]);
309 - foreach($f as $ff) {
310 - $ff = trim($ff);
311 - if(array_key_exists($ff, $this->mFlags) &&
312 - !array_key_exists($this->mFlags[$ff], $flags))
313 - $flags[] = $this->mFlags[$ff];
 359+ if(count($tt) == 2) {
 360+ $f = explode($this->mMarkup['varsep'], $tt[0]);
 361+ foreach($f as $ff) {
 362+ $ff = trim($ff);
 363+ if(array_key_exists($ff, $this->mFlags) &&
 364+ !in_array($this->mFlags[$ff], $flags))
 365+ $flags[] = $this->mFlags[$ff];
 366+ }
 367+ $rules = $tt[1];
 368+ } else {
 369+ $rules = $marked;
 370+ }
 371+
 372+ if( !in_array('R',$flags) ){
 373+ //FIXME: may cause trouble here...
 374+ //strip &nbsp; since it interferes with the parsing, plus,
 375+ //all spaces should be stripped in this tag anyway.
 376+ $rules = str_replace('&nbsp;', '', $rules);
 377+ $rules = str_replace('=&gt;','=>',$rules);
 378+ }
 379+
 380+ //check flags
 381+ if( in_array('R',$flags) ){
 382+ $flags = array('R');// remove other flags
 383+ } elseif ( in_array('N',$flags) ){
 384+ $flags = array('N');// remove other flags
 385+ } elseif ( in_array('-',$flags) ){
 386+ $flags = array('-');// remove other flags
 387+ } elseif (count($flags)==1 && $flags[0]=='T'){
 388+ $flags[]='H';
 389+ } elseif ( in_array('H',$flags) ){
 390+ // replace A flag, and remove other flags except T
 391+ $temp=array('+','H');
 392+ if(in_array('T',$flags)) $temp[] = 'T';
 393+ if(in_array('D',$flags)) $temp[] = 'D';
 394+ $flags = $temp;
 395+ } else {
 396+ if ( in_array('A',$flags)) {
 397+ $flags[]='+';
 398+ $flags[]='S';
 399+ }
 400+ if ( in_array('D',$flags) )
 401+ $flags=array_diff($flags,array('S'));
 402+ }
 403+ if ( count($flags)==0 )
 404+ $flags = array('S');
 405+
 406+ return array($rules,$flags);
 407+ }
 408+
 409+ function getRulesDesc($bidtable,$unidtable){
 410+ $text='';
 411+ foreach($bidtable as $k => $v)
 412+ $text .= $this->mVariantNames[$k].':'.$v.';';
 413+ foreach($unidtable as $k => $a)
 414+ foreach($a as $from=>$to)
 415+ $text.=$from.'��'.$this->mVariantNames[$k].':'.$to.';';
 416+ return $text;
 417+ }
 418+
 419+ /**
 420+ * parse the manually marked conversion rule
 421+ * @param string $rule the text of the rule
 422+ * @return array of the translation in each variant
 423+ * @private
 424+ */
 425+ function getConvTableFromRules($rules,$flags=array()) {
 426+ $bidtable = array();
 427+ $unidtable = array();
 428+ $choice = explode($this->mMarkup['varsep'], $rules );
 429+ foreach($choice as $c) {
 430+ $v = explode($this->mMarkup['codesep'], $c);
 431+ if(count($v) != 2)
 432+ continue;// syntax error, skip
 433+ $to=trim($v[1]);
 434+ $v=trim($v[0]);
 435+ $u = explode($this->mMarkup['unidsep'], $v);
 436+ if(count($u) == 1) {
 437+ $bidtable[$v] = $to;
 438+ } else if(count($u) == 2){
 439+ $from=trim($u[0]);$v=trim($u[1]);
 440+ if( array_key_exists($v,$unidtable) && !is_array($unidtable[$v]) )
 441+ $unidtable[$v]=array($from=>$to);
 442+ else
 443+ $unidtable[$v][$from]=$to;
 444+ }
 445+ // syntax error, pass
 446+ }
 447+ return array($bidtable,$unidtable);
 448+ }
 449+
 450+ /**
 451+ * get display text on markup -{...}-
 452+ * @param string $rules the original code
 453+ * @param array $flags FLAGs
 454+ * @param array $bidtable bidirectional convert table
 455+ * @param string $unidtable unidirectional convert table
 456+ * @param string $variant the current variant
 457+ * @param bool $$doConvert if do convert
 458+ * @private
 459+ */
 460+ function getRulesDisplay($rules,$flags,
 461+ $bidtable,$unidtable,
 462+ $variant=false,$doConvert=true){
 463+ if(!$variant) $variant = $this->getPreferredVariant();
 464+ $is_mc_disable = $this->mManualLevel[$variant]=='disable';
 465+
 466+ if( in_array('R',$flags) ) {
 467+ // if we don't do content convert, still strip the -{}- tags
 468+ $disp = $rules;
 469+ } elseif ( in_array('N',$flags) ){
 470+ // proces N flag: output current variant name
 471+ $disp = $this->mVariantNames[trim($rules)];
 472+ } elseif ( in_array('D',$flags) ){
 473+ // proces D flag: output rules description
 474+ $disp = $this->getRulesDesc($bidtable,$unidtable);
 475+ } elseif ( in_array('H',$flags) || in_array('-',$flags) ) {
 476+ // proces H,- flag or T only: output nothing
 477+ $disp = '';
 478+ } elseif ( in_array('S',$flags) ){
 479+ // the text converted
 480+ if($doConvert){
 481+ // display current variant in bidirectional array
 482+ $disp = $this->getTextInCArray($variant,$bidtable);
 483+ // or display current variant in fallbacks
 484+ if(!$disp)
 485+ $disp = $this->getTextInCArray($this->getVariantFallbacks($variant),$bidtable);
 486+ // or display current variant in unidirectional array
 487+ if(!$disp && array_key_exists($variant,$unidtable)){
 488+ $disp = array_values($unidtable[$variant]);
 489+ $disp = $disp[0];
314490 }
315 - $rules = $tt[1];
 491+ // or display frist text under disable manual convert
 492+ if(!$disp && $is_mc_disable) {
 493+ if(count($bidtable)>0){
 494+ $disp = array_values($bidtable);
 495+ $disp = $disp[0];
 496+ } else {
 497+ $disp = array_values($unidtable);
 498+ $disp = array_values($disp[0]);
 499+ $disp = $disp[0];
 500+ }
 501+ }
 502+ } else {// no convert
 503+ $disp = $rules;
316504 }
317 - else
318 - $rules = $marked;
 505+ } elseif ( in_array('T',$flags) ) {
 506+ // proces T flag : output nothing
 507+ $disp = '';
 508+ }
 509+ else
 510+ $disp= $this->mManualCodeError;
319511
320 - if( !in_array('R',$flags) ){
321 - //FIXME: may cause trouble here...
322 - //strip &nbsp; since it interferes with the parsing, plus,
323 - //all spaces should be stripped in this tag anyway.
324 - $rules = str_replace('&nbsp;', '', $rules);
 512+ return $disp;
 513+ }
 514+
 515+ function applyManualFlag($flags,$bidtable,$unidtable,$variant=false){
 516+ if(!$variant) $variant = $this->getPreferredVariant();
 517+
 518+ $is_title_flag = in_array('T', $flags);
 519+ // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title
 520+ if($is_title_flag){
 521+ $this->mTitleFromFlag = true;
 522+ $this->mTitleDisplay = $this->getRulesDisplay($rules,array('S'),
 523+ $bidtable,$unidtable,
 524+ $variant,
 525+ $this->mDoTitleConvert);
 526+ }
 527+
 528+ if($this->mManualLevel[$variant]=='disable') return;
 529+
 530+ $is_remove_flag = !$is_title_flag && in_array('-', $flags);
 531+ $is_add_flag = !$is_remove_flag && in_array('+', $flags);
 532+ $is_bidMC = $this->mManualLevel[$variant]=='bidirectional';
 533+ $is_unidMC = $this->mManualLevel[$variant]=='unidirectional';
 534+ $vmarked=array();
 535+
 536+ foreach($this->mVariants as $v) {
 537+ /* for bidirectional array
 538+ fill in the missing variants, if any,
 539+ with fallbacks */
 540+ if($is_bidMC && !array_key_exists($v, $bidtable)) {
 541+ $vf = $this->getTextInCArray($this->getVariantFallbacks($v),$bidtable);
 542+ if($vf) $bidtable[$v] = $vf;
325543 }
 544+ if($is_bidMC && array_key_exists($v,$bidtable)){
 545+ foreach($vmarked as $vo){
 546+ // use syntax:
 547+ // -{A|zh:WordZh;zh-tw:WordTw}- or -{+|zh:WordZh;zh-tw:WordTw}-
 548+ // to introduce a custom mapping between
 549+ // words WordZh and WordTw in the whole text
 550+ if($is_add_flag){
 551+ $this->mTables[$v]->setPair($bidtable[$vo], $bidtable[$v]);
 552+ $this->mTables[$vo]->setPair($bidtable[$v], $bidtable[$vo]);
 553+ }
 554+ // use syntax -{-|zh:WordZh;zh-tw:WordTw}- to remove a conversion
 555+ // words WordZh and WordTw in the whole text
 556+ if($is_remove_flag){
 557+ $this->mTables[$v]->removePair($bidtable[$vo]);
 558+ $this->mTables[$vo]->removePair($bidtable[$v]);
 559+ }
 560+ }
 561+ $vmarked[]=$v;
 562+ }
 563+ /*for unidirectional array
 564+ fill to convert tables */
 565+ if($is_unidMC && array_key_exists($v,$unidtable)){
 566+ if($is_add_flag)$this->mTables[$v]->mergeArray($unidtable[$v]);
 567+ if($is_remove_flag)$this->mTables[$v]->removeArray($unidtable[$v]);
 568+ }
 569+ }
 570+ }
326571
327 - return array($rules,$flags);
 572+ /**
 573+ * Parse rules and flags
 574+ * @private
 575+ */
 576+ function parseRules($rules,$flags,$variant=false){
 577+ if(!$variant) $variant = $this->getPreferredVariant();
 578+
 579+ list($bidtable,$unidtable) = $this->getConvTableFromRules($rules, $flags);
 580+ if(count($bidtable)==0 && count($unidtable)==0
 581+ && !in_array('N',$flags) && !in_array('T',$flags) )
 582+ $flags = array('R');
 583+ $disp = $this->getRulesDisplay($rules,$flags,
 584+ $bidtable,$unidtable,
 585+ $variant,
 586+ $this->mDoContentConvert);
 587+ $this->applyManualFlag($flags,$bidtable,$unidtable);
 588+
 589+ return $disp;
328590 }
 591+
 592+ function convertTitle($text){
 593+ // check for __NOTC__ tag
 594+ if( !$this->mDoTitleConvert ) {
 595+ $this->mTitleDisplay = $text;
 596+ return $text;
 597+ }
329598
 599+ // use the title from the T flag if any
 600+ if($this->mTitleFromFlag){
 601+ $this->mTitleFromFlag = false;
 602+ return $this->mTitleDisplay;
 603+ }
 604+
 605+ global $wgRequest;
 606+ $isredir = $wgRequest->getText( 'redirect', 'yes' );
 607+ $action = $wgRequest->getText( 'action' );
 608+ if ( $isredir == 'no' || $action == 'edit' ) {
 609+ return $text;
 610+ } else {
 611+ $this->mTitleDisplay = $this->convert($text);
 612+ return $this->mTitleDisplay;
 613+ }
 614+ }
 615+
330616 /**
331617 * convert text to different variants of a language. the automatic
332618 * conversion is done in autoConvert(). here we parse the text
@@ -334,134 +620,51 @@
335621 *
336622 * syntax of the markup:
337623 * -{code1:text1;code2:text2;...}- or
 624+ * -{flags|code1:text1;code2:text2;...}- or
338625 * -{text}- in which case no conversion should take place for text
339 - *
340 - * @param string $text text to be converted
341 - * @param bool $isTitle whether this conversion is for the article title
342 - * @return string converted text
343 - * @access public
344 - */
 626+ *
 627+ * @param string $text text to be converted
 628+ * @param bool $isTitle whether this conversion is for the article title
 629+ * @return string converted text
 630+ * @access public
 631+ */
345632 function convert( $text , $isTitle=false) {
346 - $mw =& MagicWord::get( 'notitleconvert' );
 633+
 634+ $mw =& MagicWord::get( 'notitleconvert' );
347635 if( $mw->matchAndRemove( $text ) )
348636 $this->mDoTitleConvert = false;
349 -
350 - $mw =& MagicWord::get( 'nocontentconvert' );
 637+ $mw =& MagicWord::get( 'nocontentconvert' );
351638 if( $mw->matchAndRemove( $text ) ) {
352639 $this->mDoContentConvert = false;
353640 }
354641
355642 // no conversion if redirecting
356 - $mw =& MagicWord::get( 'redirect' );
 643+ $mw =& MagicWord::get( 'redirect' );
357644 if( $mw->matchStart( $text ))
358645 return $text;
359646
360 - if( $isTitle ) {
 647+ // for title convertion
 648+ if ($isTitle) return $this->convertTitle($text);
361649
362 - // use the title from the T flag if any
363 - if($this->mTitleFromFlag){
364 - $this->mTitleFromFlag = false;
365 - return $this->mTitleDisplay;
366 - }
367 -
368 - // check for __NOTC__ tag
369 - if( !$this->mDoTitleConvert ) {
370 - $this->mTitleDisplay = $text;
371 - return $text;
372 - }
373 -
374 - global $wgRequest;
375 - $isredir = $wgRequest->getText( 'redirect', 'yes' );
376 - $action = $wgRequest->getText( 'action' );
377 - if ( $isredir == 'no' || $action == 'edit' ) {
378 - return $text;
379 - }
380 - else {
381 - $this->mTitleDisplay = $this->convert($text);
382 - return $this->mTitleDisplay;
383 - }
384 - }
385 -
386650 $plang = $this->getPreferredVariant();
387 - if( isset( $this->mVariantFallbacks[$plang] ) ) {
388 - $fallback = $this->mVariantFallbacks[$plang];
389 - } else {
390 - $fallback = $this->mMainLanguageCode;
391 - }
392651
393652 $tarray = explode($this->mMarkup['begin'], $text);
394653 $tfirst = array_shift($tarray);
395654 if($this->mDoContentConvert)
396 - $text = $this->autoConvert($tfirst);
 655+ $text = $this->autoConvert($tfirst,$plang);
397656 else
398657 $text = $tfirst;
399 - foreach($tarray as $txt) {
 658+ foreach($tarray as $txt) {
400659 $marked = explode($this->mMarkup['end'], $txt, 2);
401660
402661 // strip the flags from syntax like -{T| ... }-
403662 list($rules,$flags) = $this->parseFlags($marked[0]);
404663
405 - // proces R flag: output raw content of -{ ... }-
406 - if( in_array('R',$flags) ){
407 - $disp = $rules;
408 - } else if( $this->mDoContentConvert){
409 - // parse the contents -{ ... }-
410 - $carray = $this->parseManualRule($rules, $flags);
 664+ $text .= $this->parseRules($rules,$flags,$plang);
411665
412 - $disp = '';
413 - if(array_key_exists($plang, $carray)) {
414 - $disp = $carray[$plang];
415 - } else if(array_key_exists($fallback, $carray)) {
416 - $disp = $carray[$fallback];
417 - }
418 - } else{
419 - // if we don't do content convert, still strip the -{}- tags
420 - $disp = $rules;
421 - $flags = array();
422 - }
423 -
424 - if($disp) {
425 - // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title
426 - if(in_array('T', $flags)){
427 - $this->mTitleFromFlag = true;
428 - $this->mTitleDisplay = $disp;
429 - }
430 - else
431 - $text .= $disp;
432 -
433 - // use syntax -{A|zh:WordZh;zh-tw:WordTw}- to introduce a custom mapping between
434 - // words WordZh and WordTw in the whole text
435 - if(in_array('A', $flags)) {
436 -
437 - /* fill in the missing variants, if any,
438 - with fallbacks */
439 - foreach($this->mVariants as $v) {
440 - if(!array_key_exists($v, $carray)) {
441 - $vf = $this->getVariantFallback($v);
442 - if(array_key_exists($vf, $carray))
443 - $carray[$v] = $carray[$vf];
444 - }
445 - }
446 -
447 - foreach($this->mVariants as $vfrom) {
448 - if(!array_key_exists($vfrom, $carray))
449 - continue;
450 - foreach($this->mVariants as $vto) {
451 - if($vfrom == $vto)
452 - continue;
453 - if(!array_key_exists($vto, $carray))
454 - continue;
455 - $this->mTables[$vto]->setPair($carray[$vfrom], $carray[$vto]);
456 - }
457 - }
458 - }
459 - }
460 - else {
461 - $text .= $marked[0];
462 - }
463666 if(array_key_exists(1, $marked)){
464667 if( $this->mDoContentConvert )
465 - $text .= $this->autoConvert($marked[1]);
 668+ $text .= $this->autoConvert($marked[1],$plang);
466669 else
467670 $text .= $marked[1];
468671 }
@@ -471,32 +674,6 @@
472675 }
473676
474677 /**
475 - * parse the manually marked conversion rule
476 - * @param string $rule the text of the rule
477 - * @return array of the translation in each variant
478 - * @private
479 - */
480 - function parseManualRule($rules, $flags=array()) {
481 -
482 - $choice = explode($this->mMarkup['varsep'], $rules);
483 - $carray = array();
484 - if(sizeof($choice) == 1) {
485 - /* a single choice */
486 - foreach($this->mVariants as $v)
487 - $carray[$v] = $choice[0];
488 - }
489 - else {
490 - foreach($choice as $c) {
491 - $v = explode($this->mMarkup['codesep'], $c);
492 - if(sizeof($v) != 2) // syntax error, skip
493 - continue;
494 - $carray[trim($v[0])] = trim($v[1]);
495 - }
496 - }
497 - return $carray;
498 - }
499 -
500 - /**
501678 * if a language supports multiple variants, it is
502679 * possible that non-existing link in one variant
503680 * actually exists in another variant. this function
@@ -505,7 +682,7 @@
506683 * @param string $link the name of the link
507684 * @param mixed $nt the title object of the link
508685 * @return null the input parameters may be modified upon return
509 - * @access public
 686+ * @access public
510687 */
511688 function findVariantLink( &$link, &$nt ) {
512689 global $wgDisableLangConversion;
@@ -546,29 +723,29 @@
547724 }
548725
549726 /**
550 - * returns language specific hash options
551 - *
552 - * @access public
553 - */
 727+ * returns language specific hash options
 728+ *
 729+ * @access public
 730+ */
554731 function getExtraHashOptions() {
555732 $variant = $this->getPreferredVariant();
556733 return '!' . $variant ;
557734 }
558735
559736 /**
560 - * get title text as defined in the body of the article text
561 - *
562 - * @access public
563 - */
 737+ * get title text as defined in the body of the article text
 738+ *
 739+ * @access public
 740+ */
564741 function getParsedTitle() {
565742 return $this->mTitleDisplay;
566743 }
567744
568745 /**
569 - * a write lock to the cache
570 - *
571 - * @private
572 - */
 746+ * a write lock to the cache
 747+ *
 748+ * @private
 749+ */
573750 function lockCache() {
574751 global $wgMemc;
575752 $success = false;
@@ -581,10 +758,10 @@
582759 }
583760
584761 /**
585 - * unlock cache
586 - *
587 - * @private
588 - */
 762+ * unlock cache
 763+ *
 764+ * @private
 765+ */
589766 function unlockCache() {
590767 global $wgMemc;
591768 $wgMemc->delete($this->mCacheKey . "lock");
@@ -592,20 +769,20 @@
593770
594771
595772 /**
596 - * Load default conversion tables
597 - * This method must be implemented in derived class
598 - *
599 - * @private
600 - */
 773+ * Load default conversion tables
 774+ * This method must be implemented in derived class
 775+ *
 776+ * @private
 777+ */
601778 function loadDefaultTables() {
602779 $name = get_class($this);
603780 wfDie("Must implement loadDefaultTables() method in class $name");
604781 }
605782
606783 /**
607 - * load conversion tables either from the cache or the disk
608 - * @private
609 - */
 784+ * load conversion tables either from the cache or the disk
 785+ * @private
 786+ */
610787 function loadTables($fromcache=true) {
611788 global $wgMemc;
612789 if( $this->mTablesLoaded )
@@ -642,16 +819,16 @@
643820 }
644821
645822 /**
646 - * Hook for post processig after conversion tables are loaded
647 - *
648 - */
 823+ * Hook for post processig after conversion tables are loaded
 824+ *
 825+ */
649826 function postLoadTables() {}
650827
651828 /**
652 - * Reload the conversion tables
653 - *
654 - * @private
655 - */
 829+ * Reload the conversion tables
 830+ *
 831+ * @private
 832+ */
656833 function reloadTables() {
657834 if($this->mTables)
658835 unset($this->mTables);
@@ -661,20 +838,18 @@
662839
663840
664841 /**
665 - * parse the conversion table stored in the cache
666 - *
667 - * the tables should be in blocks of the following form:
668 -
669 - * -{
670 - * word => word ;
671 - * word => word ;
672 - * ...
673 - * }-
674 - *
675 - * to make the tables more manageable, subpages are allowed
676 - * and will be parsed recursively if $recursive=true
677 - *
678 - * @private
 842+ * parse the conversion table stored in the cache
 843+ *
 844+ * the tables should be in blocks of the following form:
 845+ * -{
 846+ * word => word ;
 847+ * word => word ;
 848+ * ...
 849+ * }-
 850+ *
 851+ * to make the tables more manageable, subpages are allowed
 852+ * and will be parsed recursively if $recursive=true
 853+ *
679854 */
680855 function parseCachedTable($code, $subpage='', $recursive=true) {
681856 global $wgMessageCache;
@@ -757,37 +932,32 @@
758933 *
759934 * @param string $text text to be tagged for no conversion
760935 * @return string the tagged text
761 - */
 936+ */
762937 function markNoConversion($text, $noParse=false) {
763938 # don't mark if already marked
764939 if(strpos($text, $this->mMarkup['begin']) ||
765940 strpos($text, $this->mMarkup['end']))
766941 return $text;
767942
768 - $ret = $this->mMarkup['begin'] . $text . $this->mMarkup['end'];
 943+ $ret = $this->mMarkup['begin'] .'R|'. $text . $this->mMarkup['end'];
769944 return $ret;
770945 }
771946
772947 /**
773948 * convert the sorting key for category links. this should make different
774949 * keys that are variants of each other map to the same key
775 - */
 950+ */
776951 function convertCategoryKey( $key ) {
777952 return $key;
778953 }
779954 /**
780 - * hook to refresh the cache of conversion tables when
781 - * MediaWiki:conversiontable* is updated
782 - * @private
783 - */
 955+ * hook to refresh the cache of conversion tables when
 956+ * MediaWiki:conversiontable* is updated
 957+ * @private
 958+ */
784959 function OnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section, $flags, $revision) {
785960 $titleobj = $article->getTitle();
786961 if($titleobj->getNamespace() == NS_MEDIAWIKI) {
787 - /*
788 - global $wgContLang; // should be an LanguageZh.
789 - if(get_class($wgContLang) != 'languagezh')
790 - return true;
791 - */
792962 $title = $titleobj->getDBkey();
793963 $t = explode('/', $title, 3);
794964 $c = count($t);
@@ -808,6 +978,4 @@
809979 $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end'];
810980 return $ret;
811981 }
812 -
813 -
814982 }
Index: trunk/phase3/maintenance/language/messages.inc
@@ -2011,7 +2011,9 @@
20122012 'variantname-zh-cn',
20132013 'variantname-zh-tw',
20142014 'variantname-zh-hk',
 2015+ 'variantname-zh-mo',
20152016 'variantname-zh-sg',
 2017+ 'variantname-zh-my',
20162018 'variantname-zh',
20172019 ),
20182020 'variantname-sr' => array(
Index: trunk/phase3/maintenance/language/messageTypes.inc
@@ -171,6 +171,8 @@
172172 'variantname-zh-cn',
173173 'variantname-zh-tw',
174174 'variantname-zh-hk',
 175+ 'variantname-zh-mo',
 176+ 'variantname-zh-my',
175177 'variantname-zh-sg',
176178 'variantname-zh',
177179 'variantname-sr-ec',

Follow-up revisions

RevisionCommit summaryAuthorDate
r36752fixing encoding problems on r36664...shinjiman15:08, 27 June 2008
r37058* (bug 14604) Update LanguageConverter for T (Title) conversion...shinjiman15:01, 4 July 2008
r37186(bug 14604#c6): Fix regression in variant conversion when semicolon is within...rainman17:23, 6 July 2008
r37373* (bug 14604) Update LanguageConverter for compatibility on -{*|xxx}- usage...shinjiman08:16, 9 July 2008

Status & tagging log