r1612 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r1611‎ | r1612 | r1613 >
Date:09:39, 30 August 2003
Author:vibber
Status:old
Tags:
Comment:
Break UTF-8 support class from the case conversion tables; grab tables from memcached if available
Modified paths:
  • /trunk/phase3/includes/Setup.php (modified) (history)
  • /trunk/phase3/includes/Utf8Case.php (modified) (history)
  • /trunk/phase3/languages/Language.php (modified) (history)
  • /trunk/phase3/languages/LanguageAr.php (modified) (history)
  • /trunk/phase3/languages/LanguageEo.php (modified) (history)
  • /trunk/phase3/languages/LanguageHe.php (modified) (history)
  • /trunk/phase3/languages/LanguageHi.php (modified) (history)
  • /trunk/phase3/languages/LanguageHu.php (modified) (history)
  • /trunk/phase3/languages/LanguageJa.php (modified) (history)
  • /trunk/phase3/languages/LanguageKo.php (modified) (history)
  • /trunk/phase3/languages/LanguageMl.php (modified) (history)
  • /trunk/phase3/languages/LanguageUtf8.php (added) (history)

Diff [purge]

Index: trunk/phase3/includes/Setup.php
@@ -7,7 +7,6 @@
88
99 global $IP;
1010 include_once( "$IP/GlobalFunctions.php" );
11 -include_once( "$IP/Language.php" );
1211 include_once( "$IP/Namespace.php" );
1312 include_once( "$IP/Skin.php" );
1413 include_once( "$IP/OutputPage.php" );
@@ -17,7 +16,7 @@
1817 include_once( "$IP/LinkCache.php" );
1918 include_once( "$IP/Title.php" );
2019 include_once( "$IP/Article.php" );
21 -require( "$IP/MemCachedClient.inc.php" );
 20+include_once( "$IP/MemCachedClient.inc.php" );
2221
2322 wfDebug( "\n\n" );
2423
@@ -37,10 +36,12 @@
3837 $wgMemc->set_debug( $wgMemCachedDebug );
3938 }
4039
 40+include_once( "$IP/Language.php" );
 41+
4142 $wgOut = new OutputPage();
4243 $wgLangClass = "Language" . ucfirst( $wgLanguageCode );
4344 if( ! class_exists( $wgLangClass ) ) {
44 - include_once( "$IP/Utf8Case.php" );
 45+ include_once( "$IP/LanguageUtf8.php" );
4546 $wgLangClass = "LanguageUtf8";
4647 }
4748 $wgLang = new $wgLangClass();
Index: trunk/phase3/includes/Utf8Case.php
@@ -1,13 +1,12 @@
22 <?
3 -$wgInputEncoding = "utf-8";
4 -$wgOutputEncoding = "utf-8";
5 -
63 # Simple 1:1 upper/lowercase switching arrays for utf-8 text
74 # Won't get context-sensitive things yet
85
96 # Hack for bugs in ucfirst() and company
10 -# TODO: store this in shared memory or something
117
 8+# These are pulled from memcached if possible, as this is faster than filling
 9+# up a big array manually. See also languages/LanguageUtf8.php
 10+
1211 $wikiUpperChars = array(
1312 "a" => "A",
1413 "b" => "B",
@@ -1494,49 +1493,4 @@
14951494 "\xf0\x90\x90\xa5" => "\xf0\x90\x91\x8d"
14961495 );
14971496
1498 -# Base stuff useful to all UTF-8 based language files
1499 -class LanguageUtf8 extends Language {
1500 -
1501 - function ucfirst( $string ) {
1502 - # For most languages, this is a wrapper for ucfirst()
1503 - # But that doesn't work right in a UTF-8 locale
1504 - global $wikiUpperChars, $wikiLowerChars;
1505 - return preg_replace (
1506 - "/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
1507 - "strtr ( \"\$1\" , \$wikiUpperChars )",
1508 - $string );
1509 - }
1510 -
1511 - function stripForSearch( $string ) {
1512 - # MySQL fulltext index doesn't grok utf-8, so we
1513 - # need to fold cases and convert to hex
1514 - global $wikiLowerChars;
1515 - return preg_replace(
1516 - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
1517 - "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
1518 - $string );
1519 - }
1520 -
1521 - function fallback8bitEncoding() {
1522 - # Windows codepage 1252 is a superset of iso 8859-1
1523 - # override this to use difference source encoding to
1524 - # translate incoming 8-bit URLs.
1525 - return "windows-1252";
1526 - }
1527 -
1528 - function checkTitleEncoding( $s ) {
1529 - global $wgInputEncoding;
1530 -
1531 - # Check for non-UTF-8 URLs
1532 - $ishigh = preg_match( '/[\x80-\xff]/', $s);
1533 - if(!$ishigh) return $s;
1534 -
1535 - $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
1536 - '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
1537 - if( $isutf8 ) return $s;
1538 -
1539 - return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
1540 - }
1541 -}
1542 -
15431497 ?>
\ No newline at end of file
Index: trunk/phase3/languages/LanguageUtf8.php
@@ -0,0 +1,60 @@
 2+<?
 3+
 4+$wgInputEncoding = "utf-8";
 5+$wgOutputEncoding = "utf-8";
 6+
 7+$wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" );
 8+$wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" );
 9+
 10+if(($wikiUpperChars === false) || ($wikiLowerChars === false)) {
 11+ include_once("$IP/Utf8Case.php");
 12+ $wgMemc->set( $key1, $wikiUpperChars );
 13+ $wgMemc->set( $key2, $wikiLowerChars );
 14+}
 15+
 16+# Base stuff useful to all UTF-8 based language files
 17+class LanguageUtf8 extends Language {
 18+
 19+ function ucfirst( $string ) {
 20+ # For most languages, this is a wrapper for ucfirst()
 21+ # But that doesn't work right in a UTF-8 locale
 22+ global $wikiUpperChars, $wikiLowerChars;
 23+ return preg_replace (
 24+ "/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
 25+ "strtr ( \"\$1\" , \$wikiUpperChars )",
 26+ $string );
 27+ }
 28+
 29+ function stripForSearch( $string ) {
 30+ # MySQL fulltext index doesn't grok utf-8, so we
 31+ # need to fold cases and convert to hex
 32+ global $wikiLowerChars;
 33+ return preg_replace(
 34+ "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
 35+ "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
 36+ $string );
 37+ }
 38+
 39+ function fallback8bitEncoding() {
 40+ # Windows codepage 1252 is a superset of iso 8859-1
 41+ # override this to use difference source encoding to
 42+ # translate incoming 8-bit URLs.
 43+ return "windows-1252";
 44+ }
 45+
 46+ function checkTitleEncoding( $s ) {
 47+ global $wgInputEncoding;
 48+
 49+ # Check for non-UTF-8 URLs
 50+ $ishigh = preg_match( '/[\x80-\xff]/', $s);
 51+ if(!$ishigh) return $s;
 52+
 53+ $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
 54+ '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
 55+ if( $isutf8 ) return $s;
 56+
 57+ return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
 58+ }
 59+}
 60+
 61+?>
Property changes on: trunk/phase3/languages/LanguageUtf8.php
___________________________________________________________________
Added: svn:eol-style
162 + native
Added: svn:keywords
263 + Author Date Id Revision
Index: trunk/phase3/languages/LanguageJa.php
@@ -1,6 +1,6 @@
22 <?php
33 global $IP;
4 -include_once( "$IP/Utf8Case.php" );
 4+include_once( "$IP/LanguageUtf8.php" );
55
66 # The names of the namespaces can be set here, but the numbers
77 # are magical, so don't change or move them! The Namespace class
Index: trunk/phase3/languages/LanguageHe.php
@@ -1,6 +1,6 @@
22 <?
33
4 -include_once("Utf8Case.php");
 4+include_once("LanguageUtf8.php");
55
66 # NOTE: To turn off "Current Events" in the sidebar,
77 # set "currentevents" => "-"
Index: trunk/phase3/languages/LanguageHi.php
@@ -1,6 +1,6 @@
22 <?
33
4 -include( "Utf8Case.php" );
 4+include( "LanguageUtf8.php" );
55
66 # NOTE: To turn off "Current Events" in the sidebar,
77 # set "currentevents" => "-"
Index: trunk/phase3/languages/LanguageAr.php
@@ -1,6 +1,6 @@
22 <?
33 # See language.doc
4 -include_once("Utf8Case.php");
 4+include_once("LanguageUtf8.php");
55
66 class LanguageAr extends LanguageUtf8 {
77 # TODO: TRANSLATION!
Index: trunk/phase3/languages/LanguageEo.php
@@ -1,7 +1,5 @@
22 <?
3 -include("Utf8Case.php");
4 -$wgInputEncoding = "utf-8";
5 -$wgOutputEncoding = "utf-8";
 3+include("LanguageUtf8.php");
64 $wgEditEncoding = "x";
75
86 # See language.doc
Index: trunk/phase3/languages/LanguageMl.php
@@ -1,7 +1,7 @@
22 <?
33 # See language.doc
44
5 -include_once( "Utf8Case.php" );
 5+include_once( "LanguageUtf8.php" );
66
77 class LanguageMl extends LanguageUtf8 {
88 # Inherit everything
Index: trunk/phase3/languages/LanguageKo.php
@@ -1,6 +1,6 @@
22 <?
33 global $IP;
4 -include_once( "$IP/Utf8Case.php" );
 4+include_once( "$IP/LanguageUtf8.php" );
55
66 # The names of the namespaces can be set here, but the numbers
77 # are magical, so don't change or move them! The Namespace class
Index: trunk/phase3/languages/LanguageHu.php
@@ -1,6 +1,6 @@
22 <?
33
4 -include_once("Utf8Case.php");
 4+include_once("LanguageUtf8.php");
55
66 # NOTE: To turn off "Current Events" in the sidebar,
77 # set "currentevents" => "-"
Index: trunk/phase3/languages/Language.php
@@ -276,7 +276,7 @@
277277 "Recentchangeslinked" => "",
278278 "Movepage" => "",
279279 "Booksources" => "External book sources",
280 -"Categories" => "Page categories",
 280+"Categories" => "Page categories"
281281 );
282282
283283 /* private */ $wgSysopSpecialPagesEn = array(

Status & tagging log