Index: trunk/phase3/tests/phpunit/includes/GlobalTest.php |
— | — | @@ -632,6 +632,133 @@ |
633 | 633 | |
634 | 634 | } |
635 | 635 | |
| 636 | + /** |
| 637 | + * test @see wfBCP47(). |
| 638 | + * Please note the BCP explicitly state that language codes are case |
| 639 | + * insensitive, there are some exceptions to the rule :) |
| 640 | + * This test is used to verify our formatting against all lower and |
| 641 | + * all upper cases language code. |
| 642 | + * |
| 643 | + * @see http://tools.ietf.org/html/bcp47 |
| 644 | + * @dataProvider provideLanguageCodes() |
| 645 | + */ |
| 646 | + function testBCP47( $code, $expected ) { |
| 647 | + $code = strtolower( $code ); |
| 648 | + $this->assertEquals( $expected, wfBCP47($code), |
| 649 | + "Applying BCP47 standard to lower case '$code'" |
| 650 | + ); |
| 651 | + |
| 652 | + $code = strtoupper( $code ); |
| 653 | + $this->assertEquals( $expected, wfBCP47($code), |
| 654 | + "Applying BCP47 standard to upper case '$code'" |
| 655 | + ); |
| 656 | + } |
| 657 | + |
| 658 | + /** |
| 659 | + * Array format is ($code, $expected) |
| 660 | + */ |
| 661 | + function provideLanguageCodes() { |
| 662 | + return array( |
| 663 | + // Extracted from BCP47 (list not exhaustive) |
| 664 | + # 2.1.1 |
| 665 | + array( 'en-ca-x-ca' , 'en-CA-x-ca' ), |
| 666 | + array( 'sgn-be-fr' , 'sgn-BE-FR' ), |
| 667 | + array( 'az-latn-x-latn', 'az-Latn-x-latn' ), |
| 668 | + # 2.2 |
| 669 | + array( 'sr-Latn-RS', 'sr-Latn-RS' ), |
| 670 | + array( 'az-arab-ir', 'az-Arab-IR' ), |
| 671 | + |
| 672 | + # 2.2.5 |
| 673 | + array( 'sl-nedis' , 'sl-nedis' ), |
| 674 | + array( 'de-ch-1996', 'de-CH-1996' ), |
| 675 | + |
| 676 | + # 2.2.6 |
| 677 | + array( |
| 678 | + 'en-latn-gb-boont-r-extended-sequence-x-private', |
| 679 | + 'en-Latn-GB-boont-r-extended-sequence-x-private' |
| 680 | + ), |
| 681 | + |
| 682 | + // Examples from BCP47 Appendix A |
| 683 | + # Simple language subtag: |
| 684 | + array( 'DE', 'de' ), |
| 685 | + array( 'fR', 'fr' ), |
| 686 | + array( 'ja', 'ja' ), |
| 687 | + |
| 688 | + # Language subtag plus script subtag: |
| 689 | + array( 'zh-hans', 'zh-Hans'), |
| 690 | + array( 'sr-cyrl', 'sr-Cyrl'), |
| 691 | + array( 'sr-latn', 'sr-Latn'), |
| 692 | + |
| 693 | + # Extended language subtags and their primary language subtag |
| 694 | + # counterparts: |
| 695 | + array( 'zh-cmn-hans-cn', 'zh-cmn-Hans-CN' ), |
| 696 | + array( 'cmn-hans-cn' , 'cmn-Hans-CN' ), |
| 697 | + array( 'zh-yue-hk' , 'zh-yue-HK' ), |
| 698 | + array( 'yue-hk' , 'yue-HK' ), |
| 699 | + |
| 700 | + # Language-Script-Region: |
| 701 | + array( 'zh-hans-cn', 'zh-Hans-CN' ), |
| 702 | + array( 'sr-latn-RS', 'sr-Latn-RS' ), |
| 703 | + |
| 704 | + # Language-Variant: |
| 705 | + array( 'sl-rozaj' , 'sl-rozaj' ), |
| 706 | + array( 'sl-rozaj-biske', 'sl-rozaj-biske' ), |
| 707 | + array( 'sl-nedis' , 'sl-nedis' ), |
| 708 | + |
| 709 | + # Language-Region-Variant: |
| 710 | + array( 'de-ch-1901' , 'de-CH-1901' ), |
| 711 | + array( 'sl-it-nedis' , 'sl-IT-nedis' ), |
| 712 | + |
| 713 | + # Language-Script-Region-Variant: |
| 714 | + array( 'hy-latn-it-arevela', 'hy-Latn-IT-arevela' ), |
| 715 | + |
| 716 | + # Language-Region: |
| 717 | + array( 'de-de' , 'de-DE' ), |
| 718 | + array( 'en-us' , 'en-US' ), |
| 719 | + array( 'es-419', 'es-419'), |
| 720 | + |
| 721 | + # Private use subtags: |
| 722 | + array( 'de-ch-x-phonebk' , 'de-CH-x-phonebk' ), |
| 723 | + array( 'az-arab-x-aze-derbend', 'az-Arab-x-aze-derbend' ), |
| 724 | + /** |
| 725 | + * Previous test does not reflect the BCP which states: |
| 726 | + * az-Arab-x-AZE-derbend |
| 727 | + * AZE being private, it should be lower case, hence the test above |
| 728 | + * should probably be: |
| 729 | + #array( 'az-arab-x-aze-derbend', 'az-Arab-x-AZE-derbend' ), |
| 730 | + */ |
| 731 | + |
| 732 | + # Private use registry values: |
| 733 | + array( 'x-whatever', 'x-whatever' ), |
| 734 | + array( 'qaa-qaaa-qm-x-southern', 'qaa-Qaaa-QM-x-southern' ), |
| 735 | + array( 'de-qaaa' , 'de-Qaaa' ), |
| 736 | + array( 'sr-latn-qm', 'sr-Latn-QM' ), |
| 737 | + array( 'sr-qaaa-rs', 'sr-Qaaa-RS' ), |
| 738 | + |
| 739 | + # Tags that use extensions |
| 740 | + array( 'en-us-u-islamcal', 'en-US-u-islamcal' ), |
| 741 | + array( 'zh-cn-a-myext-x-private', 'zh-CN-a-myext-x-private' ), |
| 742 | + array( 'en-a-myext-b-another', 'en-a-myext-b-another' ), |
| 743 | + |
| 744 | + # Invalid: |
| 745 | + // de-419-DE |
| 746 | + // a-DE |
| 747 | + // ar-a-aaa-b-bbb-a-ccc |
| 748 | + |
| 749 | + /* |
| 750 | + // ISO 15924 : |
| 751 | + array( 'sr-Cyrl', 'sr-Cyrl' ), |
| 752 | + array( 'SR-lATN', 'sr-Latn' ), # FIXME fix our function? |
| 753 | + array( 'fr-latn', 'fr-Latn' ), |
| 754 | + // Use lowercase for single segment |
| 755 | + // ISO 3166-1-alpha-2 code |
| 756 | + array( 'US', 'us' ), # USA |
| 757 | + array( 'uS', 'us' ), # USA |
| 758 | + array( 'Fr', 'fr' ), # France |
| 759 | + array( 'va', 'va' ), # Holy See (Vatican City State) |
| 760 | + */); |
| 761 | + } |
| 762 | + |
636 | 763 | /* TODO: many more! */ |
637 | 764 | } |
638 | 765 | |
Index: trunk/phase3/includes/GlobalFunctions.php |
— | — | @@ -3397,6 +3397,7 @@ |
3398 | 3398 | |
3399 | 3399 | /** |
3400 | 3400 | * Get the normalised IETF language tag |
| 3401 | + * See unit test for examples. |
3401 | 3402 | * @param $code String: The language code. |
3402 | 3403 | * @return $langCode String: The language code which complying with BCP 47 standards. |
3403 | 3404 | */ |
— | — | @@ -3404,12 +3405,15 @@ |
3405 | 3406 | $codeSegment = explode( '-', $code ); |
3406 | 3407 | foreach ( $codeSegment as $segNo => $seg ) { |
3407 | 3408 | if ( count( $codeSegment ) > 0 ) { |
| 3409 | + // when previous segment is x, it is a private segment and should be lc |
| 3410 | + if( $segNo > 0 && strtolower( $codeSegment[($segNo - 1)] ) == 'x') { |
| 3411 | + $codeBCP[$segNo] = strtolower( $seg ); |
3408 | 3412 | // ISO 3166 country code |
3409 | | - if ( ( strlen( $seg ) == 2 ) && ( $segNo > 0 ) ) { |
| 3413 | + } elseif ( ( strlen( $seg ) == 2 ) && ( $segNo > 0 ) ) { |
3410 | 3414 | $codeBCP[$segNo] = strtoupper( $seg ); |
3411 | 3415 | // ISO 15924 script code |
3412 | 3416 | } elseif ( ( strlen( $seg ) == 4 ) && ( $segNo > 0 ) ) { |
3413 | | - $codeBCP[$segNo] = ucfirst( $seg ); |
| 3417 | + $codeBCP[$segNo] = ucfirst( strtolower( $seg ) ); |
3414 | 3418 | // Use lowercase for other cases |
3415 | 3419 | } else { |
3416 | 3420 | $codeBCP[$segNo] = strtolower( $seg ); |