Index: trunk/phase3/maintenance/parserTests.txt |
— | — | @@ -6982,6 +6982,28 @@ |
6983 | 6983 | </p> |
6984 | 6984 | !! end |
6985 | 6985 | |
| 6986 | +!! test |
| 6987 | +Unclosed language converter markup "-{" |
| 6988 | +!! options |
| 6989 | +language=sr |
| 6990 | +!! input |
| 6991 | +-{T|hello |
| 6992 | +!! result |
| 6993 | +<p>-{T|hello |
| 6994 | +</p> |
| 6995 | +!! end |
| 6996 | + |
| 6997 | +!! test |
| 6998 | +Don't convert raw rule "-{R|=>}-" to "=>" |
| 6999 | +!! options |
| 7000 | +language=sr |
| 7001 | +!! input |
| 7002 | +-{R|=>}- |
| 7003 | +!! result |
| 7004 | +<p>=> |
| 7005 | +</p> |
| 7006 | +!!end |
| 7007 | + |
6986 | 7008 | !!article |
6987 | 7009 | Template:Bullet |
6988 | 7010 | !!text |
Index: trunk/phase3/maintenance/language/messages.inc |
— | — | @@ -614,6 +614,7 @@ |
615 | 615 | 'post-expand-template-argument-category', |
616 | 616 | 'parser-template-loop-warning', |
617 | 617 | 'parser-template-recursion-depth-warning', |
| 618 | + 'language-converter-depth-warning', |
618 | 619 | ), |
619 | 620 | 'undo' => array( |
620 | 621 | 'undo-success', |
Index: trunk/phase3/languages/LanguageConverter.php |
— | — | @@ -25,7 +25,6 @@ |
26 | 26 | var $mManualLevel; |
27 | 27 | var $mCacheKey; |
28 | 28 | var $mLangObj; |
29 | | - var $mMarkup; |
30 | 29 | var $mFlags; |
31 | 30 | var $mDescCodeSep = ':', $mDescVarSep = ';'; |
32 | 31 | var $mUcfirst = false; |
— | — | @@ -33,6 +32,8 @@ |
34 | 33 | var $mURLVariant; |
35 | 34 | var $mUserVariant; |
36 | 35 | var $mHeaderVariant; |
| 36 | + var $mMaxDepth = 10; |
| 37 | + var $mVarSeparatorPattern; |
37 | 38 | |
38 | 39 | const CACHE_VERSION_KEY = 'VERSION 6'; |
39 | 40 | |
— | — | @@ -52,7 +53,6 @@ |
53 | 54 | function __construct( $langobj, $maincode, |
54 | 55 | $variants = array(), |
55 | 56 | $variantfallbacks = array(), |
56 | | - $markup = array(), |
57 | 57 | $flags = array(), |
58 | 58 | $manualLevel = array() ) { |
59 | 59 | $this->mLangObj = $langobj; |
— | — | @@ -69,15 +69,6 @@ |
70 | 70 | global $wgLanguageNames; |
71 | 71 | $this->mVariantNames = $wgLanguageNames; |
72 | 72 | $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); |
73 | | - $m = array( |
74 | | - 'begin' => '-{', |
75 | | - 'flagsep' => '|', |
76 | | - 'unidsep' => '=>', // for unidirectional conversion |
77 | | - 'codesep' => ':', |
78 | | - 'varsep' => ';', |
79 | | - 'end' => '}-' |
80 | | - ); |
81 | | - $this->mMarkup = array_merge( $m, $markup ); |
82 | 73 | $f = array( |
83 | 74 | // 'S' show converted text |
84 | 75 | // '+' add rules for alltext |
— | — | @@ -124,7 +115,7 @@ |
125 | 116 | * @public |
126 | 117 | */ |
127 | 118 | function getVariantFallbacks( $v ) { |
128 | | - if ( array_key_exists( $v, $this->mVariantFallbacks ) ) { |
| 119 | + if ( isset( $this->mVariantFallbacks[$v] ) ) { |
129 | 120 | return $this->mVariantFallbacks[$v]; |
130 | 121 | } |
131 | 122 | return $this->mMainLanguageCode; |
— | — | @@ -259,7 +250,7 @@ |
260 | 251 | } |
261 | 252 | |
262 | 253 | // explode by comma |
263 | | - $result = explode( ',', strtolower( $acceptLanguage ) ); |
| 254 | + $result = StringUtils::explode( ',', strtolower( $acceptLanguage ) ); |
264 | 255 | $languages = array(); |
265 | 256 | |
266 | 257 | foreach ( $result as $elem ) { |
— | — | @@ -473,19 +464,22 @@ |
474 | 465 | } |
475 | 466 | |
476 | 467 | $ret = array(); |
477 | | - $tarray = explode( $this->mMarkup['begin'], $text ); |
478 | | - $tfirst = array_shift( $tarray ); |
| 468 | + $tarray = StringUtils::explode( '-{', $text ); |
| 469 | + $first = true; |
479 | 470 | |
480 | | - foreach ( $this->mVariants as $variant ) { |
481 | | - $ret[$variant] = $this->translate( $tfirst, $variant ); |
482 | | - } |
483 | | - |
484 | 471 | foreach ( $tarray as $txt ) { |
485 | | - $marked = explode( $this->mMarkup['end'], $txt, 2 ); |
| 472 | + if ( $first ) { |
| 473 | + $first = false; |
| 474 | + foreach ( $this->mVariants as $variant ) { |
| 475 | + $ret[$variant] = $this->translate( $txt, $variant ); |
| 476 | + } |
| 477 | + continue; |
| 478 | + } |
486 | 479 | |
| 480 | + $marked = explode( '}-', $txt, 2 ); |
| 481 | + |
487 | 482 | foreach ( $this->mVariants as $variant ) { |
488 | | - $ret[$variant] .= $this->mMarkup['begin'] . $marked[0] . |
489 | | - $this->mMarkup['end']; |
| 483 | + $ret[$variant] .= '-{' . $marked[0] . '}-'; |
490 | 484 | if ( array_key_exists( 1, $marked ) ) { |
491 | 485 | $ret[$variant] .= $this->translate( $marked[1], $variant ); |
492 | 486 | } |
— | — | @@ -535,7 +529,7 @@ |
536 | 530 | * @private |
537 | 531 | */ |
538 | 532 | function convertNamespace( $title, $variant ) { |
539 | | - $splittitle = explode( ':', $title ); |
| 533 | + $splittitle = explode( ':', $title, 2 ); |
540 | 534 | if ( count( $splittitle ) < 2 ) { |
541 | 535 | return $title; |
542 | 536 | } |
— | — | @@ -547,73 +541,6 @@ |
548 | 542 | } |
549 | 543 | |
550 | 544 | /** |
551 | | - * Convert a text array. |
552 | | - * |
553 | | - * @param string $tarray text array to be converted |
554 | | - * @param string $plang preferred variant |
555 | | - * @return string converted text |
556 | | - * @private |
557 | | - */ |
558 | | - function convertArray( $tarray, $plang ) { |
559 | | - $beginlen = strlen( $this->mMarkup['begin'] ); |
560 | | - $converted = ''; |
561 | | - $middle = ''; |
562 | | - |
563 | | - foreach ( $tarray as $text ) { |
564 | | - // for nested use |
565 | | - if( $middle ) { |
566 | | - $text = $middle . $text; |
567 | | - $middle = ''; |
568 | | - } |
569 | | - |
570 | | - // find first and last begin markup(s) |
571 | | - $firstbegin = strpos( $text, $this->mMarkup['begin'] ); |
572 | | - $lastbegin = strrpos( $text, $this->mMarkup['begin'] ); |
573 | | - |
574 | | - // if $text contains no begin markup, |
575 | | - // append $text to $converted and restore end markup |
576 | | - if( $firstbegin === false ) { |
577 | | - $converted .= $this->autoConvert( $text, $plang ); |
578 | | - $converted .= $this->mMarkup['end']; |
579 | | - continue; |
580 | | - } |
581 | | - |
582 | | - // split $text into $left and $right, |
583 | | - // omit the begin markup in $right |
584 | | - $left = substr( $text, 0, $firstbegin ); |
585 | | - $right = substr( $text, $lastbegin + $beginlen ); |
586 | | - |
587 | | - // always convert $left and append it to $converted |
588 | | - // for nested case, $left is blank but can also be converted |
589 | | - $converted .= $this->autoConvert( $left, $plang ); |
590 | | - |
591 | | - // parse and apply manual rule from $right |
592 | | - $crule = new ConverterRule( $right, $this ); |
593 | | - $crule->parse( $plang ); |
594 | | - $right = $crule->getDisplay(); |
595 | | - $this->applyManualConv( $crule ); |
596 | | - |
597 | | - // if $text contains only one begin markup, |
598 | | - // append $left and $right to $converted. |
599 | | - // |
600 | | - // otherwise it's a nested use like "-{-{}-}-", |
601 | | - // this should be handled properly. |
602 | | - if( $firstbegin === $lastbegin ) { |
603 | | - $converted .= $right; |
604 | | - } |
605 | | - else { |
606 | | - // not omit the first begin markup |
607 | | - $middle = substr( $text, $firstbegin, $lastbegin - $firstbegin ); |
608 | | - $middle .= $right; |
609 | | - //print $middle; |
610 | | - } |
611 | | - } |
612 | | - // Remove the last delimiter (wasn't real) |
613 | | - $converted = substr( $converted, 0, - strlen( $this->mMarkup['end'] ) ); |
614 | | - return $converted; |
615 | | - } |
616 | | - |
617 | | - /** |
618 | 545 | * Convert text to different variants of a language. The automatic |
619 | 546 | * conversion is done in autoConvert(). Here we parse the text |
620 | 547 | * marked with -{}-, which specifies special conversions of the |
— | — | @@ -632,14 +559,107 @@ |
633 | 560 | global $wgDisableLangConversion; |
634 | 561 | if ( $wgDisableLangConversion ) return $text; |
635 | 562 | |
636 | | - $plang = $this->getPreferredVariant(); |
| 563 | + $variant = $this->getPreferredVariant(); |
637 | 564 | |
638 | | - $tarray = StringUtils::explode( $this->mMarkup['end'], $text ); |
639 | | - $converted = $this->convertArray( $tarray, $plang ); |
| 565 | + return $this->recursiveConvertTopLevel( $text, $variant ); |
| 566 | + } |
640 | 567 | |
641 | | - return $converted; |
| 568 | + protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { |
| 569 | + $startPos = 0; |
| 570 | + $out = ''; |
| 571 | + $length = strlen( $text ); |
| 572 | + while ( $startPos < $length ) { |
| 573 | + $m = false; |
| 574 | + $pos = strpos( $text, '-{', $startPos ); |
| 575 | + |
| 576 | + if ( $pos === false ) { |
| 577 | + // No more markup, append final segment |
| 578 | + $out .= $this->autoConvert( substr( $text, $startPos ), $variant ); |
| 579 | + $startPos = $length; |
| 580 | + return $out; |
| 581 | + } |
| 582 | + |
| 583 | + // Markup found |
| 584 | + // Append initial segment |
| 585 | + $out .= $this->autoConvert( substr( $text, $startPos, $pos - $startPos ), $variant ); |
| 586 | + |
| 587 | + // Advance position |
| 588 | + $startPos = $pos; |
| 589 | + |
| 590 | + // Do recursive conversion |
| 591 | + $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
| 592 | + } |
| 593 | + |
| 594 | + return $out; |
642 | 595 | } |
643 | 596 | |
| 597 | + protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { |
| 598 | + // Quick sanity check (no function calls) |
| 599 | + if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { |
| 600 | + throw new MWException( __METHOD__.': invalid input string' ); |
| 601 | + } |
| 602 | + |
| 603 | + $startPos += 2; |
| 604 | + $inner = ''; |
| 605 | + $warningDone = false; |
| 606 | + $length = strlen( $text ); |
| 607 | + |
| 608 | + while ( $startPos < $length ) { |
| 609 | + $m = false; |
| 610 | + preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); |
| 611 | + if ( !$m ) { |
| 612 | + // Unclosed rule |
| 613 | + break; |
| 614 | + } |
| 615 | + |
| 616 | + $token = $m[0][0]; |
| 617 | + $pos = $m[0][1]; |
| 618 | + |
| 619 | + // Markup found |
| 620 | + // Append initial segment |
| 621 | + $inner .= substr( $text, $startPos, $pos - $startPos ); |
| 622 | + |
| 623 | + // Advance position |
| 624 | + $startPos = $pos; |
| 625 | + |
| 626 | + switch ( $token ) { |
| 627 | + case '-{': |
| 628 | + // Check max depth |
| 629 | + if ( $depth >= $this->mMaxDepth ) { |
| 630 | + $inner .= '-{'; |
| 631 | + if ( !$warningDone ) { |
| 632 | + $inner .= '<span class="error">' . |
| 633 | + wfMsgForContent( 'language-converter-depth-warning', |
| 634 | + $this->mMaxDepth ) . |
| 635 | + '</span>'; |
| 636 | + $warningDone = true; |
| 637 | + } |
| 638 | + $startPos += 2; |
| 639 | + continue; |
| 640 | + } |
| 641 | + // Recursively parse another rule |
| 642 | + $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
| 643 | + break; |
| 644 | + case '}-': |
| 645 | + // Apply the rule |
| 646 | + $startPos += 2; |
| 647 | + $rule = new ConverterRule( $inner, $this ); |
| 648 | + $rule->parse( $variant ); |
| 649 | + $this->applyManualConv( $rule ); |
| 650 | + return $rule->getDisplay(); |
| 651 | + default: |
| 652 | + throw new MWException( __METHOD__.': invalid regex match' ); |
| 653 | + } |
| 654 | + } |
| 655 | + |
| 656 | + // Unclosed rule |
| 657 | + if ( $startPos < $length ) { |
| 658 | + $inner .= substr( $text, $startPos ); |
| 659 | + } |
| 660 | + $startPos = $length; |
| 661 | + return '-{' . $this->autoConvert( $inner, $variant ); |
| 662 | + } |
| 663 | + |
644 | 664 | /** |
645 | 665 | * If a language supports multiple variants, it is |
646 | 666 | * possible that non-existing link in one variant |
— | — | @@ -840,14 +860,14 @@ |
841 | 861 | // [[MediaWiki:conversiontable/zh-xx/...|...]] |
842 | 862 | $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . |
843 | 863 | ':Conversiontable'; |
844 | | - $subs = explode( '[[', $txt ); |
| 864 | + $subs = StringUtils::explode( '[[', $txt ); |
845 | 865 | $sublinks = array(); |
846 | 866 | foreach ( $subs as $sub ) { |
847 | 867 | $link = explode( ']]', $sub, 2 ); |
848 | 868 | if ( count( $link ) != 2 ) { |
849 | 869 | continue; |
850 | 870 | } |
851 | | - $b = explode( '|', $link[0] ); |
| 871 | + $b = explode( '|', $link[0], 2 ); |
852 | 872 | $b = explode( '/', trim( $b[0] ), 3 ); |
853 | 873 | if ( count( $b ) == 3 ) { |
854 | 874 | $sublink = $b[2]; |
— | — | @@ -862,16 +882,21 @@ |
863 | 883 | |
864 | 884 | |
865 | 885 | // parse the mappings in this page |
866 | | - $blocks = explode( $this->mMarkup['begin'], $txt ); |
867 | | - array_shift( $blocks ); |
| 886 | + $blocks = StringUtils::explode( '-{', $txt ); |
868 | 887 | $ret = array(); |
| 888 | + $first = true; |
869 | 889 | foreach ( $blocks as $block ) { |
870 | | - $mappings = explode( $this->mMarkup['end'], $block, 2 ); |
| 890 | + if ( $first ) { |
| 891 | + // Skip the part before the first -{ |
| 892 | + $first = false; |
| 893 | + continue; |
| 894 | + } |
| 895 | + $mappings = explode( '}-', $block, 2 ); |
871 | 896 | $stripped = str_replace( array( "'", '"', '*', '#' ), '', |
872 | 897 | $mappings[0] ); |
873 | | - $table = explode( ';', $stripped ); |
| 898 | + $table = StringUtils::explode( ';', $stripped ); |
874 | 899 | foreach ( $table as $t ) { |
875 | | - $m = explode( '=>', $t ); |
| 900 | + $m = explode( '=>', $t, 3 ); |
876 | 901 | if ( count( $m ) != 2 ) |
877 | 902 | continue; |
878 | 903 | // trim any trailling comments starting with '//' |
— | — | @@ -908,12 +933,11 @@ |
909 | 934 | */ |
910 | 935 | function markNoConversion( $text, $noParse = false ) { |
911 | 936 | # don't mark if already marked |
912 | | - if ( strpos( $text, $this->mMarkup['begin'] ) |
913 | | - || strpos( $text, $this->mMarkup['end'] ) ) { |
| 937 | + if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) { |
914 | 938 | return $text; |
915 | 939 | } |
916 | 940 | |
917 | | - $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end']; |
| 941 | + $ret = "-{R|$text}-"; |
918 | 942 | return $ret; |
919 | 943 | } |
920 | 944 | |
— | — | @@ -955,9 +979,38 @@ |
956 | 980 | // we need to convert '-{' and '}-' to '-{' and '}-' |
957 | 981 | // to avoid a unwanted '}-' appeared after the math-image. |
958 | 982 | $text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) ); |
959 | | - $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end']; |
| 983 | + $ret = "-{R|$text}-"; |
960 | 984 | return $ret; |
961 | 985 | } |
| 986 | + |
| 987 | + /** |
| 988 | + * Get the cached separator pattern for ConverterRule::parseRules() |
| 989 | + */ |
| 990 | + function getVarSeparatorPattern() { |
| 991 | + if ( is_null( $this->mVarSeparatorPattern ) ) { |
| 992 | + // varsep_pattern for preg_split: |
| 993 | + // text should be splited by ";" only if a valid variant |
| 994 | + // name exist after the markup, for example: |
| 995 | + // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ |
| 996 | + // <span style="font-size:120%;">yyy</span>;}- |
| 997 | + // we should split it as: |
| 998 | + // array( |
| 999 | + // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' |
| 1000 | + // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' |
| 1001 | + // [2] => '' |
| 1002 | + // ) |
| 1003 | + $pat = '/;\s*(?='; |
| 1004 | + foreach ( $this->mVariants as $variant ) { |
| 1005 | + // zh-hans:xxx;zh-hant:yyy |
| 1006 | + $pat .= $variant . '\s*:|'; |
| 1007 | + // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz |
| 1008 | + $pat .= '[^;]*?=>\s*' . $variant . '\s*:|'; |
| 1009 | + } |
| 1010 | + $pat .= '\s*$)/'; |
| 1011 | + $this->mVarSeparatorPattern = $pat; |
| 1012 | + } |
| 1013 | + return $this->mVarSeparatorPattern; |
| 1014 | + } |
962 | 1015 | } |
963 | 1016 | |
964 | 1017 | /** |
— | — | @@ -974,6 +1027,7 @@ |
975 | 1028 | var $mRules = '';// string : the text of the rules |
976 | 1029 | var $mRulesAction = 'none'; |
977 | 1030 | var $mFlags = array(); |
| 1031 | + var $mVariantFlags = array(); |
978 | 1032 | var $mConvTable = array(); |
979 | 1033 | var $mBidtable = array();// array of the translation in each variant |
980 | 1034 | var $mUnidtable = array();// array of the translation in each variant |
— | — | @@ -988,9 +1042,6 @@ |
989 | 1043 | function __construct( $text, $converter ) { |
990 | 1044 | $this->mText = $text; |
991 | 1045 | $this->mConverter = $converter; |
992 | | - foreach ( $converter->mVariants as $v ) { |
993 | | - $this->mConvTable[$v] = array(); |
994 | | - } |
995 | 1046 | } |
996 | 1047 | |
997 | 1048 | /** |
— | — | @@ -1001,14 +1052,12 @@ |
1002 | 1053 | * @public |
1003 | 1054 | */ |
1004 | 1055 | function getTextInBidtable( $variants ) { |
1005 | | - if ( is_string( $variants ) ) { |
1006 | | - $variants = array( $variants ); |
1007 | | - } |
1008 | | - if ( !is_array( $variants ) ) { |
| 1056 | + $variants = (array)$variants; |
| 1057 | + if ( !$variants ) { |
1009 | 1058 | return false; |
1010 | 1059 | } |
1011 | 1060 | foreach ( $variants as $variant ) { |
1012 | | - if ( array_key_exists( $variant, $this->mBidtable ) ) { |
| 1061 | + if ( isset( $this->mBidtable[$variant] ) ) { |
1013 | 1062 | return $this->mBidtable[$variant]; |
1014 | 1063 | } |
1015 | 1064 | } |
— | — | @@ -1021,74 +1070,60 @@ |
1022 | 1071 | */ |
1023 | 1072 | function parseFlags() { |
1024 | 1073 | $text = $this->mText; |
1025 | | - if ( strlen( $text ) < 2 ) { |
1026 | | - $this->mFlags = array( 'R' ); |
1027 | | - $this->mRules = $text; |
1028 | | - return; |
1029 | | - } |
1030 | | - |
1031 | 1074 | $flags = array(); |
1032 | | - $markup = $this->mConverter->mMarkup; |
1033 | | - $validFlags = $this->mConverter->mFlags; |
1034 | | - $variants = $this->mConverter->mVariants; |
| 1075 | + $variantFlags = array(); |
1035 | 1076 | |
1036 | | - $tt = explode( $markup['flagsep'], $text, 2 ); |
1037 | | - if ( count( $tt ) == 2 ) { |
1038 | | - $f = explode( $markup['varsep'], $tt[0] ); |
| 1077 | + $sepPos = strpos( $text, '|' ); |
| 1078 | + if ( $sepPos !== false ) { |
| 1079 | + $validFlags = $this->mConverter->mFlags; |
| 1080 | + $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) ); |
1039 | 1081 | foreach ( $f as $ff ) { |
1040 | 1082 | $ff = trim( $ff ); |
1041 | | - if ( array_key_exists( $ff, $validFlags ) |
1042 | | - && !in_array( $validFlags[$ff], $flags ) ) { |
1043 | | - $flags[] = $validFlags[$ff]; |
| 1083 | + if ( isset( $validFlags[$ff] ) ) { |
| 1084 | + $flags[$validFlags[$ff]] = true; |
1044 | 1085 | } |
1045 | 1086 | } |
1046 | | - $rules = $tt[1]; |
1047 | | - } else { |
1048 | | - $rules = $text; |
| 1087 | + $text = strval( substr( $text, $sepPos + 1 ) ); |
1049 | 1088 | } |
1050 | 1089 | |
1051 | | - // check flags |
1052 | | - if ( in_array( 'R', $flags ) ) { |
1053 | | - $flags = array( 'R' );// remove other flags |
1054 | | - } elseif ( in_array( 'N', $flags ) ) { |
1055 | | - $flags = array( 'N' );// remove other flags |
1056 | | - } elseif ( in_array( '-', $flags ) ) { |
1057 | | - $flags = array( '-' );// remove other flags |
1058 | | - } elseif ( count( $flags ) == 1 && $flags[0] == 'T' ) { |
1059 | | - $flags[] = 'H'; |
1060 | | - } elseif ( in_array( 'H', $flags ) ) { |
| 1090 | + if ( !$flags ) { |
| 1091 | + $flags['S'] = true; |
| 1092 | + } elseif ( isset( $flags['R'] ) ) { |
| 1093 | + $flags = array( 'R' => true );// remove other flags |
| 1094 | + } elseif ( isset( $flags['N'] ) ) { |
| 1095 | + $flags = array( 'N' => true );// remove other flags |
| 1096 | + } elseif ( isset( $flags['-'] ) ) { |
| 1097 | + $flags = array( '-' => true );// remove other flags |
| 1098 | + } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) { |
| 1099 | + $flags['H'] = true; |
| 1100 | + } elseif ( isset( $flags['H'] ) ) { |
1061 | 1101 | // replace A flag, and remove other flags except T |
1062 | | - $temp = array( '+', 'H' ); |
1063 | | - if ( in_array( 'T', $flags ) ) { |
1064 | | - $temp[] = 'T'; |
| 1102 | + $temp = array( '+' => true, 'H' => true ); |
| 1103 | + if ( isset( $flags['T'] ) ) { |
| 1104 | + $temp['T'] = true; |
1065 | 1105 | } |
1066 | | - if ( in_array( 'D', $flags ) ) { |
1067 | | - $temp[] = 'D'; |
| 1106 | + if ( isset( $flags['D'] ) ) { |
| 1107 | + $temp['D'] = true; |
1068 | 1108 | } |
1069 | 1109 | $flags = $temp; |
1070 | 1110 | } else { |
1071 | | - if ( in_array( 'A', $flags ) ) { |
1072 | | - $flags[] = '+'; |
1073 | | - $flags[] = 'S'; |
| 1111 | + if ( isset( $flags['A'] ) ) { |
| 1112 | + $flags['+'] = true; |
| 1113 | + $flags['S'] = true; |
1074 | 1114 | } |
1075 | | - if ( in_array( 'D', $flags ) ) { |
1076 | | - $flags = array_diff( $flags, array( 'S' ) ); |
| 1115 | + if ( isset( $flags['D'] ) ) { |
| 1116 | + unset( $flags['S'] ); |
1077 | 1117 | } |
1078 | | - $flags_temp = array(); |
1079 | | - foreach ( $variants as $variant ) { |
1080 | | - // try to find flags like "zh-hans", "zh-hant" |
1081 | | - // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" |
1082 | | - if ( in_array( $variant, $flags ) ) |
1083 | | - $flags_temp[] = $variant; |
| 1118 | + // try to find flags like "zh-hans", "zh-hant" |
| 1119 | + // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" |
| 1120 | + $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants ); |
| 1121 | + if ( $variantFlags ) { |
| 1122 | + $variantFlags = array_flip( $variantFlags ); |
| 1123 | + $flags = array(); |
1084 | 1124 | } |
1085 | | - if ( count( $flags_temp ) !== 0 ) { |
1086 | | - $flags = $flags_temp; |
1087 | | - } |
1088 | 1125 | } |
1089 | | - if ( count( $flags ) == 0 ) { |
1090 | | - $flags = array( 'S' ); |
1091 | | - } |
1092 | | - $this->mRules = $rules; |
| 1126 | + $this->mVariantFlags = $variantFlags; |
| 1127 | + $this->mRules = $text; |
1093 | 1128 | $this->mFlags = $flags; |
1094 | 1129 | } |
1095 | 1130 | |
— | — | @@ -1101,41 +1136,20 @@ |
1102 | 1137 | $flags = $this->mFlags; |
1103 | 1138 | $bidtable = array(); |
1104 | 1139 | $unidtable = array(); |
1105 | | - $markup = $this->mConverter->mMarkup; |
1106 | 1140 | $variants = $this->mConverter->mVariants; |
| 1141 | + $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); |
1107 | 1142 | |
1108 | | - // varsep_pattern for preg_split: |
1109 | | - // text should be splited by ";" only if a valid variant |
1110 | | - // name exist after the markup, for example: |
1111 | | - // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ |
1112 | | - // <span style="font-size:120%;">yyy</span>;}- |
1113 | | - // we should split it as: |
1114 | | - // array( |
1115 | | - // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' |
1116 | | - // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' |
1117 | | - // [2] => '' |
1118 | | - // ) |
1119 | | - $varsep_pattern = '/' . $markup['varsep'] . '\s*' . '(?='; |
1120 | | - foreach ( $variants as $variant ) { |
1121 | | - // zh-hans:xxx;zh-hant:yyy |
1122 | | - $varsep_pattern .= $variant . '\s*' . $markup['codesep'] . '|'; |
1123 | | - // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz |
1124 | | - $varsep_pattern .= '[^;]*?' . $markup['unidsep'] . '\s*' . $variant |
1125 | | - . '\s*' . $markup['codesep'] . '|'; |
1126 | | - } |
1127 | | - $varsep_pattern .= '\s*$)/'; |
1128 | | - |
1129 | 1143 | $choice = preg_split( $varsep_pattern, $rules ); |
1130 | 1144 | |
1131 | 1145 | foreach ( $choice as $c ) { |
1132 | | - $v = explode( $markup['codesep'], $c, 2 ); |
| 1146 | + $v = explode( ':', $c, 2 ); |
1133 | 1147 | if ( count( $v ) != 2 ) { |
1134 | 1148 | // syntax error, skip |
1135 | 1149 | continue; |
1136 | 1150 | } |
1137 | 1151 | $to = trim( $v[1] ); |
1138 | 1152 | $v = trim( $v[0] ); |
1139 | | - $u = explode( $markup['unidsep'], $v, 2 ); |
| 1153 | + $u = explode( '=>', $v, 2 ); |
1140 | 1154 | // if $to is empty, strtr() could return a wrong result |
1141 | 1155 | if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) { |
1142 | 1156 | $bidtable[$v] = $to; |
— | — | @@ -1152,7 +1166,7 @@ |
1153 | 1167 | } |
1154 | 1168 | } |
1155 | 1169 | // syntax error, pass |
1156 | | - if ( !array_key_exists( $v, $this->mConverter->mVariantNames ) ) { |
| 1170 | + if ( !isset( $this->mConverter->mVariantNames[$v] ) ) { |
1157 | 1171 | $bidtable = array(); |
1158 | 1172 | $unidtable = array(); |
1159 | 1173 | break; |
— | — | @@ -1225,7 +1239,12 @@ |
1226 | 1240 | * @private |
1227 | 1241 | */ |
1228 | 1242 | function generateConvTable() { |
1229 | | - $flags = $this->mFlags; |
| 1243 | + // Special case optimisation |
| 1244 | + if ( !$this->mBidtable && !$this->mUnidtable ) { |
| 1245 | + $this->mConvTable = array(); |
| 1246 | + return; |
| 1247 | + } |
| 1248 | + |
1230 | 1249 | $bidtable = $this->mBidtable; |
1231 | 1250 | $unidtable = $this->mUnidtable; |
1232 | 1251 | $manLevel = $this->mConverter->mManualLevel; |
— | — | @@ -1235,7 +1254,7 @@ |
1236 | 1255 | /* for bidirectional array |
1237 | 1256 | fill in the missing variants, if any, |
1238 | 1257 | with fallbacks */ |
1239 | | - if ( !array_key_exists( $v, $bidtable ) ) { |
| 1258 | + if ( !isset( $bidtable[$v] ) ) { |
1240 | 1259 | $variantFallbacks = |
1241 | 1260 | $this->mConverter->getVariantFallbacks( $v ); |
1242 | 1261 | $vf = $this->getTextInBidtable( $variantFallbacks ); |
— | — | @@ -1244,7 +1263,7 @@ |
1245 | 1264 | } |
1246 | 1265 | } |
1247 | 1266 | |
1248 | | - if ( array_key_exists( $v, $bidtable ) ) { |
| 1267 | + if ( isset( $bidtable[$v] ) ) { |
1249 | 1268 | foreach ( $vmarked as $vo ) { |
1250 | 1269 | // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- |
1251 | 1270 | // or -{H|zh:WordZh;zh-tw:WordTw}- |
— | — | @@ -1261,11 +1280,14 @@ |
1262 | 1281 | $vmarked[] = $v; |
1263 | 1282 | } |
1264 | 1283 | /*for unidirectional array fill to convert tables */ |
1265 | | - if ( ( $manLevel[$v] == 'bidirectional' |
1266 | | - || $manLevel[$v] == 'unidirectional' ) |
1267 | | - && array_key_exists( $v, $unidtable ) ) { |
1268 | | - $ct = $this->mConvTable[$v]; |
1269 | | - $this->mConvTable[$v] = array_merge( $ct, $unidtable[$v] ); |
| 1284 | + if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' ) |
| 1285 | + && isset( $unidtable[$v] ) ) |
| 1286 | + { |
| 1287 | + if ( isset( $this->mConvTable[$v] ) ) { |
| 1288 | + $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] ); |
| 1289 | + } else { |
| 1290 | + $this->mConvTable[$v] = $unidtable[$v]; |
| 1291 | + } |
1270 | 1292 | } |
1271 | 1293 | } |
1272 | 1294 | } |
— | — | @@ -1285,10 +1307,9 @@ |
1286 | 1308 | |
1287 | 1309 | // convert to specified variant |
1288 | 1310 | // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}- |
1289 | | - if ( count( array_diff( $flags, $variants ) ) == 0 |
1290 | | - and count( $flags ) != 0 ) { |
| 1311 | + if ( $this->mVariantFlags ) { |
1291 | 1312 | // check if current variant in flags |
1292 | | - if ( in_array( $variant, $flags ) ) { |
| 1313 | + if ( isset( $this->mVariantFlags[$variant] ) ) { |
1293 | 1314 | // then convert <text to convert> to current language |
1294 | 1315 | $this->mRules = $this->mConverter->autoConvert( $this->mRules, |
1295 | 1316 | $variant ); |
— | — | @@ -1298,7 +1319,7 @@ |
1299 | 1320 | $this->mConverter->getVariantFallbacks( $variant ); |
1300 | 1321 | foreach ( $variantFallbacks as $variantFallback ) { |
1301 | 1322 | // if current variant's fallback exist in flags |
1302 | | - if ( in_array( $variantFallback, $flags ) ) { |
| 1323 | + if ( isset( $this->mVariantFlags[$variantFallback] ) ) { |
1303 | 1324 | // then convert <text to convert> to fallback language |
1304 | 1325 | $this->mRules = |
1305 | 1326 | $this->mConverter->autoConvert( $this->mRules, |
— | — | @@ -1307,59 +1328,74 @@ |
1308 | 1329 | } |
1309 | 1330 | } |
1310 | 1331 | } |
1311 | | - $this->mFlags = $flags = array( 'R' ); |
| 1332 | + $this->mFlags = $flags = array( 'R' => true ); |
1312 | 1333 | } |
1313 | 1334 | |
1314 | | - if ( !in_array( 'R', $flags ) || !in_array( 'N', $flags ) ) { |
| 1335 | + if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) { |
1315 | 1336 | // decode => HTML entities modified by Sanitizer::removeHTMLtags |
1316 | 1337 | $this->mRules = str_replace( '=>', '=>', $this->mRules ); |
1317 | | - |
1318 | 1338 | $this->parseRules(); |
1319 | 1339 | } |
1320 | 1340 | $rules = $this->mRules; |
1321 | 1341 | |
1322 | | - if ( count( $this->mBidtable ) == 0 |
1323 | | - && count( $this->mUnidtable ) == 0 ) { |
1324 | | - if ( in_array( '+', $flags ) || in_array( '-', $flags ) ) { |
| 1342 | + if ( !$this->mBidtable && !$this->mUnidtable ) { |
| 1343 | + if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) { |
1325 | 1344 | // fill all variants if text in -{A/H/-|text} without rules |
1326 | 1345 | foreach ( $this->mConverter->mVariants as $v ) { |
1327 | 1346 | $this->mBidtable[$v] = $rules; |
1328 | 1347 | } |
1329 | | - } elseif ( !in_array( 'N', $flags ) && !in_array( 'T', $flags ) ) { |
1330 | | - $this->mFlags = $flags = array( 'R' ); |
| 1348 | + } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) { |
| 1349 | + $this->mFlags = $flags = array( 'R' => true ); |
1331 | 1350 | } |
1332 | 1351 | } |
1333 | 1352 | |
1334 | | - if ( in_array( 'R', $flags ) ) { |
1335 | | - // if we don't do content convert, still strip the -{}- tags |
1336 | | - $this->mRuleDisplay = $rules; |
1337 | | - } elseif ( in_array( 'N', $flags ) ) { |
1338 | | - // proces N flag: output current variant name |
1339 | | - $this->mRuleDisplay = |
1340 | | - $this->mConverter->mVariantNames[ trim( $rules ) ]; |
1341 | | - } elseif ( in_array( 'D', $flags ) ) { |
1342 | | - // proces D flag: output rules description |
1343 | | - $this->mRuleDisplay = $this->getRulesDesc(); |
1344 | | - } elseif ( in_array( 'H', $flags ) || in_array( '-', $flags ) ) { |
1345 | | - // proces H,- flag or T only: output nothing |
1346 | | - $this->mRuleDisplay = ''; |
1347 | | - } elseif ( in_array( 'S', $flags ) ) { |
1348 | | - $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); |
1349 | | - } else { |
| 1353 | + $this->mRuleDisplay = false; |
| 1354 | + foreach ( $flags as $flag => $unused ) { |
| 1355 | + switch ( $flag ) { |
| 1356 | + case 'R': |
| 1357 | + // if we don't do content convert, still strip the -{}- tags |
| 1358 | + $this->mRuleDisplay = $rules; |
| 1359 | + break; |
| 1360 | + case 'N': |
| 1361 | + // process N flag: output current variant name |
| 1362 | + $ruleVar = trim( $rules ); |
| 1363 | + if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) { |
| 1364 | + $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar]; |
| 1365 | + } else { |
| 1366 | + $this->mRuleDisplay = ''; |
| 1367 | + } |
| 1368 | + break; |
| 1369 | + case 'D': |
| 1370 | + // process D flag: output rules description |
| 1371 | + $this->mRuleDisplay = $this->getRulesDesc(); |
| 1372 | + break; |
| 1373 | + case 'H': |
| 1374 | + // process H,- flag or T only: output nothing |
| 1375 | + $this->mRuleDisplay = ''; |
| 1376 | + break; |
| 1377 | + case '-': |
| 1378 | + $this->mRulesAction = 'remove'; |
| 1379 | + $this->mRuleDisplay = ''; |
| 1380 | + break; |
| 1381 | + case '+': |
| 1382 | + $this->mRulesAction = 'add'; |
| 1383 | + $this->mRuleDisplay = ''; |
| 1384 | + break; |
| 1385 | + case 'S': |
| 1386 | + $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); |
| 1387 | + break; |
| 1388 | + case 'T': |
| 1389 | + $this->mRuleTitle = $this->getRuleConvertedStr( $variant ); |
| 1390 | + $this->mRuleDisplay = ''; |
| 1391 | + break; |
| 1392 | + default: |
| 1393 | + // ignore unknown flags (but see error case below) |
| 1394 | + } |
| 1395 | + } |
| 1396 | + if ( $this->mRuleDisplay === false ) { |
1350 | 1397 | $this->mRuleDisplay = $this->mManualCodeError; |
1351 | 1398 | } |
1352 | | - // process T flag |
1353 | | - if ( in_array( 'T', $flags ) ) { |
1354 | | - $this->mRuleTitle = $this->getRuleConvertedStr( $variant ); |
1355 | | - } |
1356 | 1399 | |
1357 | | - if ( in_array( '-', $flags ) ) { |
1358 | | - $this->mRulesAction = 'remove'; |
1359 | | - } |
1360 | | - if ( in_array( '+', $flags ) ) { |
1361 | | - $this->mRulesAction = 'add'; |
1362 | | - } |
1363 | | - |
1364 | 1400 | $this->generateConvTable(); |
1365 | 1401 | } |
1366 | 1402 | |
Index: trunk/phase3/languages/messages/MessagesEn.php |
— | — | @@ -1381,6 +1381,7 @@ |
1382 | 1382 | 'post-expand-template-argument-category' => 'Pages containing omitted template arguments', |
1383 | 1383 | 'parser-template-loop-warning' => 'Template loop detected: [[$1]]', |
1384 | 1384 | 'parser-template-recursion-depth-warning' => 'Template recursion depth limit exceeded ($1)', |
| 1385 | +'language-converter-depth-warning' => 'Language converter depth limit exceeded ($1)', |
1385 | 1386 | |
1386 | 1387 | # "Undo" feature |
1387 | 1388 | 'undo-success' => 'The edit can be undone. |
Index: trunk/phase3/languages/classes/LanguageGan.php |
— | — | @@ -11,7 +11,6 @@ |
12 | 12 | function __construct($langobj, $maincode, |
13 | 13 | $variants=array(), |
14 | 14 | $variantfallbacks=array(), |
15 | | - $markup=array(), |
16 | 15 | $flags = array(), |
17 | 16 | $manualLevel = array() ) { |
18 | 17 | $this->mDescCodeSep = ':'; |
— | — | @@ -19,7 +18,6 @@ |
20 | 19 | parent::__construct($langobj, $maincode, |
21 | 20 | $variants, |
22 | 21 | $variantfallbacks, |
23 | | - $markup, |
24 | 22 | $flags, |
25 | 23 | $manualLevel); |
26 | 24 | $names = array( |
— | — | @@ -117,7 +115,7 @@ |
118 | 116 | |
119 | 117 | $this->mConverter = new GanConverter( $this, 'gan', |
120 | 118 | $variants, $variantfallbacks, |
121 | | - array(),array(), |
| 119 | + array(), |
122 | 120 | $ml); |
123 | 121 | |
124 | 122 | $wgHooks['ArticleSaveComplete'][] = $this->mConverter; |
— | — | @@ -149,4 +147,4 @@ |
150 | 148 | $ret = array_unique( explode('|', $terms) ); |
151 | 149 | return $ret; |
152 | 150 | } |
153 | | -} |
\ No newline at end of file |
| 151 | +} |
Index: trunk/phase3/languages/classes/LanguageZh.php |
— | — | @@ -11,7 +11,6 @@ |
12 | 12 | function __construct($langobj, $maincode, |
13 | 13 | $variants=array(), |
14 | 14 | $variantfallbacks=array(), |
15 | | - $markup=array(), |
16 | 15 | $flags = array(), |
17 | 16 | $manualLevel = array() ) { |
18 | 17 | $this->mDescCodeSep = ':'; |
— | — | @@ -19,7 +18,6 @@ |
20 | 19 | parent::__construct($langobj, $maincode, |
21 | 20 | $variants, |
22 | 21 | $variantfallbacks, |
23 | | - $markup, |
24 | 22 | $flags, |
25 | 23 | $manualLevel); |
26 | 24 | $names = array( |
— | — | @@ -153,7 +151,7 @@ |
154 | 152 | |
155 | 153 | $this->mConverter = new ZhConverter( $this, 'zh', |
156 | 154 | $variants, $variantfallbacks, |
157 | | - array(),array(), |
| 155 | + array(), |
158 | 156 | $ml); |
159 | 157 | |
160 | 158 | $wgHooks['ArticleSaveComplete'][] = $this->mConverter; |
Index: trunk/phase3/languages/classes/LanguageKk.php |
— | — | @@ -21,10 +21,9 @@ |
22 | 22 | function __construct($langobj, $maincode, |
23 | 23 | $variants=array(), |
24 | 24 | $variantfallbacks=array(), |
25 | | - $markup=array(), |
26 | 25 | $flags = array()) { |
27 | 26 | parent::__construct( $langobj, $maincode, |
28 | | - $variants, $variantfallbacks, $markup, $flags ); |
| 27 | + $variants, $variantfallbacks, $flags ); |
29 | 28 | |
30 | 29 | // No point delaying this since they're in code. |
31 | 30 | // Waiting until loadDefaultTables() means they never get loaded |
Index: trunk/phase3/languages/classes/LanguageSr.php |
— | — | @@ -165,12 +165,11 @@ |
166 | 166 | 'sr-el' => 'sr', |
167 | 167 | ); |
168 | 168 | |
169 | | - $marker = array();//don't mess with these, leave them as they are |
170 | 169 | $flags = array( |
171 | 170 | 'S' => 'S', 'писмо' => 'S', 'pismo' => 'S', |
172 | 171 | 'W' => 'W', 'реч' => 'W', 'reč' => 'W', 'ријеч' => 'W', 'riječ' => 'W' |
173 | 172 | ); |
174 | | - $this->mConverter = new SrConverter($this, 'sr', $variants, $variantfallbacks, $marker, $flags); |
| 173 | + $this->mConverter = new SrConverter($this, 'sr', $variants, $variantfallbacks, $flags); |
175 | 174 | $wgHooks['ArticleSaveComplete'][] = $this->mConverter; |
176 | 175 | } |
177 | 176 | |
Index: trunk/phase3/RELEASE-NOTES |
— | — | @@ -721,6 +721,7 @@ |
722 | 722 | for image divs. |
723 | 723 | * (bug 22096) IE50Fixes.css and IE55Fixes.css have been dropped from the Monobook |
724 | 724 | and Chick skins |
| 725 | +* Fixed bug involving unclosed "-{" markup in the language converter |
725 | 726 | |
726 | 727 | == API changes in 1.16 == |
727 | 728 | |