Index: trunk/phase3/tests/phpunit/languages/LanguageSrTest.php |
— | — | @@ -0,0 +1,165 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * PHPUnit tests for the Serbian language. |
| 5 | + * The language can be represented using two scripts: |
| 6 | + * - Latin (SR_el) |
| 7 | + * - Cyrillic (SR_ec) |
| 8 | + * Both representations seems to be bijective, hence MediaWiki can convert |
| 9 | + * from one script to the other. |
| 10 | + * |
| 11 | + * @author Antoine Musso <hashar at free dot fr> |
| 12 | + * @copyright Copyright © 2011, Antoine Musso <hashar at free dot fr> |
| 13 | + * @file |
| 14 | + */ |
| 15 | + |
| 16 | +require_once dirname(dirname(__FILE__)). '/bootstrap.php'; |
| 17 | + |
| 18 | +/** Tests for MediaWiki languages/LanguageTr.php */ |
| 19 | +class LanguageSrTest extends MediaWikiTestCase { |
| 20 | + /* Language object. Initialized before each test */ |
| 21 | + private $lang; |
| 22 | + |
| 23 | + function setUp() { |
| 24 | + $this->lang = Language::factory( 'Sr' ); |
| 25 | + } |
| 26 | + function tearDown() { |
| 27 | + unset( $this->lang ); |
| 28 | + } |
| 29 | + |
| 30 | + ##### TESTS ####################################################### |
| 31 | + |
| 32 | + function testEasyConversions( ) { |
| 33 | + $this->assertCyrillic( |
| 34 | + 'шђчћжШЂЧЋЖ', |
| 35 | + 'Cyrillic guessing characters' |
| 36 | + ); |
| 37 | + $this->assertLatin( |
| 38 | + 'šđč枊ĐČĆŽ', |
| 39 | + 'Latin guessing characters' |
| 40 | + ); |
| 41 | + } |
| 42 | + |
| 43 | + function testMixedConversions() { |
| 44 | + $this->assertCyrillic( |
| 45 | + 'шђчћжШЂЧЋЖ - šđčćž', |
| 46 | + 'Mostly cyrillic characters' |
| 47 | + ); |
| 48 | + $this->assertLatin( |
| 49 | + 'šđč枊ĐČĆŽ - шђчћж', |
| 50 | + 'Mostly latin characters' |
| 51 | + ); |
| 52 | + } |
| 53 | + |
| 54 | + function testSameAmountOfLatinAndCyrillicGetConverted() { |
| 55 | + $this->assertConverted( |
| 56 | + '4 latin: šđčć | 4 cyrillic: шђчћ', |
| 57 | + 'sr-ec' |
| 58 | + ); |
| 59 | + $this->assertConverted( |
| 60 | + '4 latin: šđčć | 4 cyrillic: шђчћ', |
| 61 | + 'sr-el' |
| 62 | + ); |
| 63 | + } |
| 64 | + |
| 65 | + /** |
| 66 | + * @author Nikola Smolenski |
| 67 | + */ |
| 68 | + function testConversionToCyrillic() { |
| 69 | + $this->assertEquals( 'абвг', |
| 70 | + $this->convertToCyrillic( 'abvg' ) |
| 71 | + ); |
| 72 | + $this->assertEquals( 'абвг', |
| 73 | + $this->convertToCyrillic( 'абвг' ) |
| 74 | + ); |
| 75 | + $this->assertEquals( 'abvgшђжчћ', |
| 76 | + $this->convertToCyrillic( 'abvgшђжчћ' ) |
| 77 | + ); |
| 78 | + $this->assertEquals( 'абвгшђжчћ', |
| 79 | + $this->convertToCyrillic( 'абвгšđžčć' ) |
| 80 | + ); |
| 81 | + //Roman numerals are not converted |
| 82 | + $this->assertEquals( 'а I б II в III г IV шђжчћ', |
| 83 | + $this->convertToCyrillic( 'a I b II v III g IV šđžčć' ) |
| 84 | + ); |
| 85 | + } |
| 86 | + |
| 87 | + function testConversionToLatin() { |
| 88 | + $this->assertEquals( 'abcd', |
| 89 | + $this->convertToLatin( 'abcd' ) |
| 90 | + ); |
| 91 | + $this->assertEquals( 'abcd', |
| 92 | + $this->convertToLatin( 'абцд' ) |
| 93 | + ); |
| 94 | + $this->assertEquals( 'abcdšđžčć', |
| 95 | + $this->convertToLatin( 'abcdшђжчћ' ) |
| 96 | + ); |
| 97 | + $this->assertEquals( 'абцдšđžčć', |
| 98 | + $this->convertToLatin( 'абцдšđžčć' ) |
| 99 | + ); |
| 100 | + |
| 101 | + } |
| 102 | + |
| 103 | + ##### HELPERS ##################################################### |
| 104 | + /** |
| 105 | + *Wrapper to verify text stay the same after applying conversion |
| 106 | + * @param $text string Text to convert |
| 107 | + * @param $variant string Language variant 'sr-ec' or 'sr-el' |
| 108 | + * @param $msg string Optional message |
| 109 | + */ |
| 110 | + function assertUnConverted( $text, $variant, $msg = '' ) { |
| 111 | + $this->assertEquals( |
| 112 | + $text, |
| 113 | + $this->convertTo( $text, $variant ), |
| 114 | + $msg |
| 115 | + ); |
| 116 | + } |
| 117 | + /** |
| 118 | + * Wrapper to verify a text is different once converted to a variant. |
| 119 | + * @param $text string Text to convert |
| 120 | + * @param $variant string Language variant 'sr-ec' or 'sr-el' |
| 121 | + * @param $msg string Optional message |
| 122 | + */ |
| 123 | + function assertConverted( $text, $variant, $msg = '' ) { |
| 124 | + $this->assertNotEquals( |
| 125 | + $text, |
| 126 | + $this->convertTo( $text, $variant ), |
| 127 | + $msg |
| 128 | + ); |
| 129 | + } |
| 130 | + |
| 131 | + /** |
| 132 | + * Verifiy the given Cyrillic text is not converted when using |
| 133 | + * using the cyrillic variant and converted to Latin when using |
| 134 | + * the Latin variant. |
| 135 | + */ |
| 136 | + function assertCyrillic( $text, $msg = '' ) { |
| 137 | + $this->assertUnConverted( $text, 'sr-ec', $msg ); |
| 138 | + $this->assertConverted( $text, 'sr-el', $msg ); |
| 139 | + } |
| 140 | + /** |
| 141 | + * Verifiy the given Latin text is not converted when using |
| 142 | + * using the Latin variant and converted to Cyrillic when using |
| 143 | + * the Cyrillic variant. |
| 144 | + */ |
| 145 | + function assertLatin( $text, $msg = '' ) { |
| 146 | + $this->assertUnConverted( $text, 'sr-el', $msg ); |
| 147 | + $this->assertConverted( $text, 'sr-ec', $msg ); |
| 148 | + } |
| 149 | + |
| 150 | + |
| 151 | + /** Wrapper for converter::convertTo() method*/ |
| 152 | + function convertTo( $text, $variant ) { |
| 153 | + return $this |
| 154 | + ->lang |
| 155 | + ->mConverter |
| 156 | + ->convertTo( |
| 157 | + $text, $variant |
| 158 | + ); |
| 159 | + } |
| 160 | + function convertToCyrillic( $text ) { |
| 161 | + return $this->convertTo( $text, 'sr-ec' ); |
| 162 | + } |
| 163 | + function convertToLatin( $text ) { |
| 164 | + return $this->convertTo( $text, 'sr-el' ); |
| 165 | + } |
| 166 | +} |
Property changes on: trunk/phase3/tests/phpunit/languages/LanguageSrTest.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 167 | + native |
Index: trunk/phase3/RELEASE-NOTES-1.19 |
— | — | @@ -166,6 +166,7 @@ |
167 | 167 | * (bug 30217) Make pt-br a fallback of pt. |
168 | 168 | * (bug 31193) Set fallback language of Assamese from Bengali to English. |
169 | 169 | * Update date format for dsb and hsb: month names need the genitive. |
| 170 | +* (bug 28643) Serbian variant conversion improvements (Nikola Smolenski) |
170 | 171 | |
171 | 172 | === Other changes in 1.19 === |
172 | 173 | * jquery.mwPrototypes module was renamed to jquery.mwExtension. |
Index: trunk/phase3/languages/classes/LanguageSr.php |
— | — | @@ -173,6 +173,32 @@ |
174 | 174 | |
175 | 175 | return $ret; |
176 | 176 | } |
| 177 | + |
| 178 | + /** |
| 179 | + * Guess if a text is written in Cyrillic or Latin. |
| 180 | + * Overrides LanguageConverter::guessVariant() |
| 181 | + * |
| 182 | + * @param string $text The text to be checked |
| 183 | + * @param string $variant Language code of the variant to be checked for |
| 184 | + * @return bool true if $text appears to be written in $variant |
| 185 | + * |
| 186 | + * @author Nikola Smolenski <smolensk@eunet.rs> |
| 187 | + * @since 1.19 |
| 188 | + */ |
| 189 | + public function guessVariant( $text, $variant ) { |
| 190 | + $numCyrillic = preg_match_all("/[шђчћжШЂЧЋЖ]/u", $text, $dummy); |
| 191 | + $numLatin = preg_match_all("/[šđč枊ĐČĆŽ]/u", $text, $dummy); |
| 192 | + |
| 193 | + if( $variant == 'sr-ec' ) { |
| 194 | + return (boolean) ($numCyrillic > $numLatin); |
| 195 | + } else if( $variant == 'sr-el' ) { |
| 196 | + return (boolean) ($numLatin > $numCyrillic); |
| 197 | + } else { |
| 198 | + return false; |
| 199 | + } |
| 200 | + |
| 201 | + } |
| 202 | + |
177 | 203 | } |
178 | 204 | |
179 | 205 | /** |
Index: trunk/phase3/languages/LanguageConverter.php |
— | — | @@ -322,6 +322,10 @@ |
323 | 323 | } |
324 | 324 | } |
325 | 325 | |
| 326 | + if( $this->guessVariant( $text, $toVariant ) ) { |
| 327 | + return $text; |
| 328 | + } |
| 329 | + |
326 | 330 | /* we convert everything except: |
327 | 331 | 1. HTML markups (anything between < and >) |
328 | 332 | 2. HTML entities |
— | — | @@ -571,7 +575,7 @@ |
572 | 576 | */ |
573 | 577 | public function convertTo( $text, $variant ) { |
574 | 578 | global $wgDisableLangConversion; |
575 | | - if ( $wgDisableLangConversion ) { |
| 579 | + if ( $wgDisableLangConversion || $this->guessVariant( $text, $variant ) ) { |
576 | 580 | return $text; |
577 | 581 | } |
578 | 582 | return $this->recursiveConvertTopLevel( $text, $variant ); |
— | — | @@ -773,6 +777,20 @@ |
774 | 778 | } |
775 | 779 | |
776 | 780 | /** |
| 781 | + * Guess if a text is written in a variant. This should be implemented in subclasses. |
| 782 | + * |
| 783 | + * @param string $text the text to be checked |
| 784 | + * @param string $variant language code of the variant to be checked for |
| 785 | + * @return bool true if $text appears to be written in $variant, false if not |
| 786 | + * |
| 787 | + * @author Nikola Smolenski <smolensk@eunet.rs> |
| 788 | + * @since 1.19 |
| 789 | + */ |
| 790 | + public function guessVariant($text, $variant) { |
| 791 | + return false; |
| 792 | + } |
| 793 | + |
| 794 | + /** |
777 | 795 | * Load default conversion tables. |
778 | 796 | * This method must be implemented in derived class. |
779 | 797 | * |
Property changes on: trunk/phase3/languages/LanguageConverter.php |
___________________________________________________________________ |
Added: svn:mergeinfo |
780 | 798 | Merged /branches/new-installer/phase3/languages/LanguageConverter.php:r43664-66004 |
781 | 799 | Merged /branches/REL1_15/phase3/languages/LanguageConverter.php:r51646 |
782 | 800 | Merged /branches/REL1_17/phase3/languages/LanguageConverter.php:r81445,81448 |
783 | 801 | Merged /branches/nikola/phase3/languages/LanguageConverter.php:r85106-103326 |
784 | 802 | Merged /branches/sqlite/languages/LanguageConverter.php:r58211-58321 |