r60599 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r60598‎ | r60599 | r60600 >
Date:08:28, 4 January 2010
Author:tstarling
Status:deferred (Comments)
Tags:
Comment:
Fix for bug 9413 and the related Malayalam issue reported on wikitech-l.
* Added $wgFixArchaicUnicode, which, if enabled, converts some deprecated Unicode sequences in Arabic and Malayalam text to their Unicode 5.1 equivalents.
* Added generateNormalizerData.php to generate the relevant data files. Added the generated data files also.
* Made most things call the new wrapper method $wgContLang->normalize() instead of UtfNormal::cleanUp(), so that Unicode normalization can be customised on a per-language basis.
* Added some generic support for conversion tables to Language so that subclasses can easily implement these kinds of transformations.
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/DefaultSettings.php (modified) (history)
  • /trunk/phase3/includes/WebRequest.php (modified) (history)
  • /trunk/phase3/includes/Xml.php (modified) (history)
  • /trunk/phase3/includes/api/ApiResult.php (modified) (history)
  • /trunk/phase3/languages/Language.php (modified) (history)
  • /trunk/phase3/languages/classes/LanguageAr.php (modified) (history)
  • /trunk/phase3/languages/classes/LanguageMl.php (added) (history)
  • /trunk/phase3/maintenance/cleanupImages.php (modified) (history)
  • /trunk/phase3/maintenance/cleanupTitles.php (modified) (history)
  • /trunk/phase3/maintenance/cleanupWatchlist.php (modified) (history)
  • /trunk/phase3/maintenance/dumpTextPass.php (modified) (history)
  • /trunk/phase3/maintenance/language/generateNormalizerData.php (added) (history)
  • /trunk/phase3/maintenance/preprocessorFuzzTest.php (modified) (history)
  • /trunk/phase3/serialized/normalize-ar.ser (added) (history)
  • /trunk/phase3/serialized/normalize-ml.ser (added) (history)

Diff [purge]

Index: trunk/phase3/maintenance/cleanupWatchlist.php
@@ -52,9 +52,10 @@
5353 }
5454
5555 protected function processRow( $row ) {
 56+ global $wgContLang;
5657 $current = Title::makeTitle( $row->wl_namespace, $row->wl_title );
5758 $display = $current->getPrefixedText();
58 - $verified = UtfNormal::cleanUp( $display );
 59+ $verified = $wgContLang->normalize( $display );
5960 $title = Title::newFromText( $verified );
6061
6162 if( $row->wl_user == 0 || is_null( $title ) || !$title->equals( $current ) ) {
Index: trunk/phase3/maintenance/cleanupTitles.php
@@ -37,8 +37,9 @@
3838 }
3939
4040 protected function processRow( $row ) {
 41+ global $wgContLang;
4142 $display = Title::makeName( $row->page_namespace, $row->page_title );
42 - $verified = UtfNormal::cleanUp( $display );
 43+ $verified = $wgContLang->normalize( $display );
4344 $title = Title::newFromText( $verified );
4445
4546 if( !is_null( $title )
Index: trunk/phase3/maintenance/cleanupImages.php
@@ -65,7 +65,7 @@
6666 $cleaned = $wgContLang->checkTitleEncoding( $cleaned );
6767
6868 // Many of remainder look like non-normalized unicode
69 - $cleaned = UtfNormal::cleanUp( $cleaned );
 69+ $cleaned = $wgContLang->normalize( $cleaned );
7070
7171 $title = Title::makeTitleSafe( NS_FILE, $cleaned );
7272
Index: trunk/phase3/maintenance/language/generateNormalizerData.php
@@ -0,0 +1,137 @@
 2+<?php
 3+
 4+require_once( dirname( __FILE__ ) . '/../Maintenance.php' );
 5+
 6+require_once( dirname( __FILE__ ) . '/../../includes/normal/UtfNormalUtil.php' );
 7+
 8+/**
 9+ * Generates normalizer data files for Arabic and Malayalam.
 10+ * For NFC see includes/normal.
 11+ */
 12+class GenerateNormalizerData extends Maintenance {
 13+ var $dataFile;
 14+
 15+ public function __construct() {
 16+ parent::__construct();
 17+ $this->addOption( 'unicode-data-file', 'The local location of the data file ' .
 18+ 'from http://unicode.org/Public/UNIDATA/UnicodeData.txt', false, true );
 19+ }
 20+
 21+ public function execute() {
 22+ if ( !$this->hasOption( 'unicode-data-file' ) ) {
 23+ $this->dataFile = 'UnicodeData.txt';
 24+ if ( !file_exists( $this->dataFile ) ) {
 25+ $this->error( "Unable to find UnicodeData.txt. Please specify its location with --unicode-data-file=<FILE>" );
 26+ exit( 1 );
 27+ }
 28+ } else {
 29+ $this->dataFile = $this->getOption( 'unicode-data-file' );
 30+ if ( !file_exists( $this->dataFile ) ) {
 31+ $this->error( 'Unable to find the specified data file.' );
 32+ exit( 1 );
 33+ }
 34+ }
 35+
 36+ $this->generateArabic();
 37+ $this->generateMalayalam();
 38+ }
 39+
 40+ function generateArabic() {
 41+ $file = fopen( $this->dataFile, 'r' );
 42+ if ( !$file ) {
 43+ $this->error( 'Unable to open the data file.' );
 44+ exit( 1 );
 45+ }
 46+
 47+ // For the file format, see http://www.unicode.org/reports/tr44/
 48+ $fieldNames = array(
 49+ 'Code',
 50+ 'Name',
 51+ 'General_Category',
 52+ 'Canonical_Combining_Class',
 53+ 'Bidi_Class',
 54+ 'Decomposition_Type_Mapping',
 55+ 'Numeric_Type_Value',
 56+ 'Bidi_Mirrored',
 57+ 'Unicode_1_Name',
 58+ 'ISO_Comment',
 59+ 'Simple_Uppercase_Mapping',
 60+ 'Simple_Lowercase_Mapping',
 61+ 'Simple_Titlecase_Mapping'
 62+ );
 63+
 64+ $pairs = array();
 65+
 66+ $lineNum = 0;
 67+ while ( false !== ( $line = fgets( $file ) ) ) {
 68+ ++$lineNum;
 69+
 70+ # Strip comments
 71+ $line = trim( substr( $line, 0, strcspn( $line, '#' ) ) );
 72+ if ( $line === '' ) {
 73+ continue;
 74+ }
 75+
 76+ # Split fields
 77+ $numberedData = explode( ';', $line );
 78+ $data = array();
 79+ foreach ( $fieldNames as $number => $name ) {
 80+ $data[$name] = $numberedData[$number];
 81+ }
 82+
 83+ $code = base_convert( $data['Code'], 16, 10 );
 84+ if ( ( $code >= 0xFB50 && $code <= 0xFDFF ) # Arabic presentation forms A
 85+ || ( $code >= 0xFE70 && $code <= 0xFEFF ) ) # Arabic presentation forms B
 86+ {
 87+ if ( $data['Decomposition_Type_Mapping'] === '' ) {
 88+ // No decomposition
 89+ continue;
 90+ }
 91+ if ( !preg_match( '/^ *(<\w*>) +([0-9A-F ]*)$/',
 92+ $data['Decomposition_Type_Mapping'], $m ) )
 93+ {
 94+ $this->error( "Can't parse Decomposition_Type/Mapping on line $lineNum" );
 95+ $this->error( $line );
 96+ continue;
 97+ }
 98+
 99+ $source = hexSequenceToUtf8( $data['Code'] );
 100+ $dest = hexSequenceToUtf8( $m[2] );
 101+ $pairs[$source] = $dest;
 102+ }
 103+ }
 104+
 105+ global $IP;
 106+ file_put_contents( "$IP/serialized/normalize-ar.ser", serialize( $pairs ) );
 107+ echo "ar: " . count( $pairs ) . " pairs written.\n";
 108+ }
 109+
 110+ function generateMalayalam() {
 111+ $hexPairs = array(
 112+ # From http://unicode.org/versions/Unicode5.1.0/#Malayalam_Chillu_Characters
 113+ '0D23 0D4D 200D' => '0D7A',
 114+ '0D28 0D4D 200D' => '0D7B',
 115+ '0D30 0D4D 200D' => '0D7C',
 116+ '0D32 0D4D 200D' => '0D7D',
 117+ '0D33 0D4D 200D' => '0D7E',
 118+
 119+ # From http://permalink.gmane.org/gmane.science.linguistics.wikipedia.technical/46413
 120+ '0D15 0D4D 200D' => '0D7F',
 121+ );
 122+
 123+ $pairs = array();
 124+ foreach ( $hexPairs as $hexSource => $hexDest ) {
 125+ $source = hexSequenceToUtf8( $hexSource );
 126+ $dest = hexSequenceToUtf8( $hexDest );
 127+ $pairs[$source] = $dest;
 128+ }
 129+
 130+ global $IP;
 131+ file_put_contents( "$IP/serialized/normalize-ml.ser", serialize( $pairs ) );
 132+ echo "ml: " . count( $pairs ) . " pairs written.\n";
 133+ }
 134+}
 135+
 136+$maintClass = 'GenerateNormalizerData';
 137+require_once( DO_MAINTENANCE );
 138+
Property changes on: trunk/phase3/maintenance/language/generateNormalizerData.php
___________________________________________________________________
Added: svn:eol-style
1139 + native
Index: trunk/phase3/maintenance/preprocessorFuzzTest.php
@@ -102,7 +102,8 @@
103103 // This resolves a few differences between the old preprocessor and the
104104 // XML-based one, which doesn't like illegals and converts line endings.
105105 // It's done by the MW UI, so it's a reasonably legitimate thing to do.
106 - $s = UtfNormal::cleanUp( $s );
 106+ global $wgContLang;
 107+ $s = $wgContLang->normalize( $s );
107108 return $s;
108109 }
109110
Index: trunk/phase3/maintenance/dumpTextPass.php
@@ -236,6 +236,7 @@
237237 * May throw a database error if, say, the server dies during query.
238238 */
239239 private function getTextDb( $id ) {
 240+ global $wgContLang;
240241 $id = intval( $id );
241242 $row = $this->db->selectRow( 'text',
242243 array( 'old_text', 'old_flags' ),
@@ -246,7 +247,7 @@
247248 return false;
248249 }
249250 $stripped = str_replace( "\r", "", $text );
250 - $normalized = UtfNormal::cleanUp( $stripped );
 251+ $normalized = $wgContLang->normalize( $stripped );
251252 return $normalized;
252253 }
253254
@@ -321,6 +322,8 @@
322323 }
323324
324325 private function getTextSpawnedOnce( $id ) {
 326+ global $wgContLang;
 327+
325328 $ok = fwrite( $this->spawnWrite, "$id\n" );
326329 //$this->progress( ">> $id" );
327330 if( !$ok ) return false;
@@ -351,7 +354,7 @@
352355
353356 // Do normalization in the dump thread...
354357 $stripped = str_replace( "\r", "", $text );
355 - $normalized = UtfNormal::cleanUp( $stripped );
 358+ $normalized = $wgContLang->normalize( $stripped );
356359 return $normalized;
357360 }
358361
Index: trunk/phase3/includes/Xml.php
@@ -56,7 +56,7 @@
5757
5858 /**
5959 * Format an XML element as with self::element(), but run text through the
60 - * UtfNormal::cleanUp() validator first to ensure that no invalid UTF-8
 60+ * $wgContLang->normalize() validator first to ensure that no invalid UTF-8
6161 * is passed.
6262 *
6363 * @param $element String:
@@ -65,12 +65,13 @@
6666 * @return string
6767 */
6868 public static function elementClean( $element, $attribs = array(), $contents = '') {
 69+ global $wgContLang;
6970 if( $attribs ) {
7071 $attribs = array_map( array( 'UtfNormal', 'cleanUp' ), $attribs );
7172 }
7273 if( $contents ) {
7374 wfProfileIn( __METHOD__ . '-norm' );
74 - $contents = UtfNormal::cleanUp( $contents );
 75+ $contents = $wgContLang->normalize( $contents );
7576 wfProfileOut( __METHOD__ . '-norm' );
7677 }
7778 return self::element( $element, $attribs, $contents );
Index: trunk/phase3/includes/api/ApiResult.php
@@ -304,7 +304,8 @@
305305 {
306306 if(!is_string($s))
307307 return;
308 - $s = UtfNormal::cleanUp($s);
 308+ global $wgContLang;
 309+ $s = $wgContLang->normalize($s);
309310 }
310311
311312 public function execute() {
Index: trunk/phase3/includes/WebRequest.php
@@ -203,7 +203,8 @@
204204 $data[$key] = $this->normalizeUnicode( $val );
205205 }
206206 } else {
207 - $data = UtfNormal::cleanUp( $data );
 207+ global $wgContLang;
 208+ $data = $wgContLang->normalize( $data );
208209 }
209210 return $data;
210211 }
@@ -600,6 +601,7 @@
601602 * @return string or NULL if no such file.
602603 */
603604 public function getFileName( $key ) {
 605+ global $wgContLang;
604606 if( !isset( $_FILES[$key] ) ) {
605607 return null;
606608 }
@@ -608,7 +610,7 @@
609611 # Safari sends filenames in HTML-encoded Unicode form D...
610612 # Horrid and evil! Let's try to make some kind of sense of it.
611613 $name = Sanitizer::decodeCharReferences( $name );
612 - $name = UtfNormal::cleanUp( $name );
 614+ $name = $wgContLang->normalize( $name );
613615 wfDebug( "WebRequest::getFileName() '" . $_FILES[$key]['name'] . "' normalized to '$name'\n" );
614616 return $name;
615617 }
Index: trunk/phase3/includes/DefaultSettings.php
@@ -867,6 +867,19 @@
868868 $wgOutputEncoding = 'UTF-8';
869869 $wgEditEncoding = '';
870870
 871+/**
 872+ * Set this to true to clean up archaic Unicode sequences in Arabic and
 873+ * Malayalam text. Currently only works if $wgLanguageCode is set to Arabic
 874+ * or Malayalam.
 875+ *
 876+ * Enabling this is generally a good idea for new wikis, since it fixes a few
 877+ * technical problems to do with editing these languages. However, if it's
 878+ * enabled on an existing wiki, pages which contain the problematic characters
 879+ * in their page titles may become inaccessible. Running maintenance/cleanupTitles.php
 880+ * after enabling it may fix this.
 881+ */
 882+$wgFixArchaicUnicode = false;
 883+
871884 /**
872885 * Locale for LC_CTYPE, to work around http://bugs.php.net/bug.php?id=45132
873886 * For Unix-like operating systems, set this to to a locale that has a UTF-8
Index: trunk/phase3/serialized/normalize-ar.ser
@@ -0,0 +1 @@
 2+a:731:{s:3:"ﭐ";s:2:"ٱ";s:3:"ﭑ";s:2:"ٱ";s:3:"ﭒ";s:2:"ٻ";s:3:"ﭓ";s:2:"ٻ";s:3:"ﭔ";s:2:"ٻ";s:3:"ﭕ";s:2:"ٻ";s:3:"ﭖ";s:2:"پ";s:3:"ﭗ";s:2:"پ";s:3:"ﭘ";s:2:"پ";s:3:"ﭙ";s:2:"پ";s:3:"ﭚ";s:2:"ڀ";s:3:"ﭛ";s:2:"ڀ";s:3:"ﭜ";s:2:"ڀ";s:3:"ﭝ";s:2:"ڀ";s:3:"ﭞ";s:2:"ٺ";s:3:"ﭟ";s:2:"ٺ";s:3:"ﭠ";s:2:"ٺ";s:3:"ﭡ";s:2:"ٺ";s:3:"ﭢ";s:2:"ٿ";s:3:"ﭣ";s:2:"ٿ";s:3:"ﭤ";s:2:"ٿ";s:3:"ﭥ";s:2:"ٿ";s:3:"ﭦ";s:2:"ٹ";s:3:"ﭧ";s:2:"ٹ";s:3:"ﭨ";s:2:"ٹ";s:3:"ﭩ";s:2:"ٹ";s:3:"ﭪ";s:2:"ڤ";s:3:"ﭫ";s:2:"ڤ";s:3:"ﭬ";s:2:"ڤ";s:3:"ﭭ";s:2:"ڤ";s:3:"ﭮ";s:2:"ڦ";s:3:"ﭯ";s:2:"ڦ";s:3:"ﭰ";s:2:"ڦ";s:3:"ﭱ";s:2:"ڦ";s:3:"ﭲ";s:2:"ڄ";s:3:"ﭳ";s:2:"ڄ";s:3:"ﭴ";s:2:"ڄ";s:3:"ﭵ";s:2:"ڄ";s:3:"ﭶ";s:2:"ڃ";s:3:"ﭷ";s:2:"ڃ";s:3:"ﭸ";s:2:"ڃ";s:3:"ﭹ";s:2:"ڃ";s:3:"ﭺ";s:2:"چ";s:3:"ﭻ";s:2:"چ";s:3:"ﭼ";s:2:"چ";s:3:"ﭽ";s:2:"چ";s:3:"ﭾ";s:2:"ڇ";s:3:"ﭿ";s:2:"ڇ";s:3:"ﮀ";s:2:"ڇ";s:3:"ﮁ";s:2:"ڇ";s:3:"ﮂ";s:2:"ڍ";s:3:"ﮃ";s:2:"ڍ";s:3:"ﮄ";s:2:"ڌ";s:3:"ﮅ";s:2:"ڌ";s:3:"ﮆ";s:2:"ڎ";s:3:"ﮇ";s:2:"ڎ";s:3:"ﮈ";s:2:"ڈ";s:3:"ﮉ";s:2:"ڈ";s:3:"ﮊ";s:2:"ژ";s:3:"ﮋ";s:2:"ژ";s:3:"ﮌ";s:2:"ڑ";s:3:"ﮍ";s:2:"ڑ";s:3:"ﮎ";s:2:"ک";s:3:"ﮏ";s:2:"ک";s:3:"ﮐ";s:2:"ک";s:3:"ﮑ";s:2:"ک";s:3:"ﮒ";s:2:"گ";s:3:"ﮓ";s:2:"گ";s:3:"ﮔ";s:2:"گ";s:3:"ﮕ";s:2:"گ";s:3:"ﮖ";s:2:"ڳ";s:3:"ﮗ";s:2:"ڳ";s:3:"ﮘ";s:2:"ڳ";s:3:"ﮙ";s:2:"ڳ";s:3:"ﮚ";s:2:"ڱ";s:3:"ﮛ";s:2:"ڱ";s:3:"ﮜ";s:2:"ڱ";s:3:"ﮝ";s:2:"ڱ";s:3:"ﮞ";s:2:"ں";s:3:"ﮟ";s:2:"ں";s:3:"ﮠ";s:2:"ڻ";s:3:"ﮡ";s:2:"ڻ";s:3:"ﮢ";s:2:"ڻ";s:3:"ﮣ";s:2:"ڻ";s:3:"ﮤ";s:2:"ۀ";s:3:"ﮥ";s:2:"ۀ";s:3:"ﮦ";s:2:"ہ";s:3:"ﮧ";s:2:"ہ";s:3:"ﮨ";s:2:"ہ";s:3:"ﮩ";s:2:"ہ";s:3:"ﮪ";s:2:"ھ";s:3:"ﮫ";s:2:"ھ";s:3:"ﮬ";s:2:"ھ";s:3:"ﮭ";s:2:"ھ";s:3:"ﮮ";s:2:"ے";s:3:"ﮯ";s:2:"ے";s:3:"ﮰ";s:2:"ۓ";s:3:"ﮱ";s:2:"ۓ";s:3:"ﯓ";s:2:"ڭ";s:3:"ﯔ";s:2:"ڭ";s:3:"ﯕ";s:2:"ڭ";s:3:"ﯖ";s:2:"ڭ";s:3:"ﯗ";s:2:"ۇ";s:3:"ﯘ";s:2:"ۇ";s:3:"ﯙ";s:2:"ۆ";s:3:"ﯚ";s:2:"ۆ";s:3:"ﯛ";s:2:"ۈ";s:3:"ﯜ";s:2:"ۈ";s:3:"ﯝ";s:2:"ٷ";s:3:"ﯞ";s:2:"ۋ";s:3:"ﯟ";s:2:"ۋ";s:3:"ﯠ";s:2:"ۅ";s:3:"ﯡ";s:2:"ۅ";s:3:"ﯢ";s:2:"ۉ";s:3:"ﯣ";s:2:"ۉ";s:3:"ﯤ";s:2:"ې";s:3:"ﯥ";s:2:"ې";s:3:"ﯦ";s:2:"ې";s:3:"ﯧ";s:2:"ې";s:3:"ﯨ";s:2:"ى";s:3:"ﯩ";s:2:"ى";s:3:"ﯪ";s:4:"ئا";s:3:"ﯫ";s:4:"ئا";s:3:"ﯬ";s:4:"ئە";s:3:"ﯭ";s:4:"ئە";s:3:"ﯮ";s:4:"ئو";s:3:"ﯯ";s:4:"ئو";s:3:"ﯰ";s:4:"ئۇ";s:3:"ﯱ";s:4:"ئۇ";s:3:"ﯲ";s:4:"ئۆ";s:3:"ﯳ";s:4:"ئۆ";s:3:"ﯴ";s:4:"ئۈ";s:3:"ﯵ";s:4:"ئۈ";s:3:"ﯶ";s:4:"ئې";s:3:"ﯷ";s:4:"ئې";s:3:"ﯸ";s:4:"ئې";s:3:"ﯹ";s:4:"ئى";s:3:"ﯺ";s:4:"ئى";s:3:"ﯻ";s:4:"ئى";s:3:"ﯼ";s:2:"ی";s:3:"ﯽ";s:2:"ی";s:3:"ﯾ";s:2:"ی";s:3:"ﯿ";s:2:"ی";s:3:"ﰀ";s:4:"ئج";s:3:"ﰁ";s:4:"ئح";s:3:"ﰂ";s:4:"ئم";s:3:"ﰃ";s:4:"ئى";s:3:"ﰄ";s:4:"ئي";s:3:"ﰅ";s:4:"بج";s:3:"ﰆ";s:4:"بح";s:3:"ﰇ";s:4:"بخ";s:3:"ﰈ";s:4:"بم";s:3:"ﰉ";s:4:"بى";s:3:"ﰊ";s:4:"بي";s:3:"ﰋ";s:4:"تج";s:3:"ﰌ";s:4:"تح";s:3:"ﰍ";s:4:"تخ";s:3:"ﰎ";s:4:"تم";s:3:"ﰏ";s:4:"تى";s:3:"ﰐ";s:4:"تي";s:3:"ﰑ";s:4:"ثج";s:3:"ﰒ";s:4:"ثم";s:3:"ﰓ";s:4:"ثى";s:3:"ﰔ";s:4:"ثي";s:3:"ﰕ";s:4:"جح";s:3:"ﰖ";s:4:"جم";s:3:"ﰗ";s:4:"حج";s:3:"ﰘ";s:4:"حم";s:3:"ﰙ";s:4:"خج";s:3:"ﰚ";s:4:"خح";s:3:"ﰛ";s:4:"خم";s:3:"ﰜ";s:4:"سج";s:3:"ﰝ";s:4:"سح";s:3:"ﰞ";s:4:"سخ";s:3:"ﰟ";s:4:"سم";s:3:"ﰠ";s:4:"صح";s:3:"ﰡ";s:4:"صم";s:3:"ﰢ";s:4:"ضج";s:3:"ﰣ";s:4:"ضح";s:3:"ﰤ";s:4:"ضخ";s:3:"ﰥ";s:4:"ضم";s:3:"ﰦ";s:4:"طح";s:3:"ﰧ";s:4:"طم";s:3:"ﰨ";s:4:"ظم";s:3:"ﰩ";s:4:"عج";s:3:"ﰪ";s:4:"عم";s:3:"ﰫ";s:4:"غج";s:3:"ﰬ";s:4:"غم";s:3:"ﰭ";s:4:"فج";s:3:"ﰮ";s:4:"فح";s:3:"ﰯ";s:4:"فخ";s:3:"ﰰ";s:4:"فم";s:3:"ﰱ";s:4:"فى";s:3:"ﰲ";s:4:"في";s:3:"ﰳ";s:4:"قح";s:3:"ﰴ";s:4:"قم";s:3:"ﰵ";s:4:"قى";s:3:"ﰶ";s:4:"قي";s:3:"ﰷ";s:4:"كا";s:3:"ﰸ";s:4:"كج";s:3:"ﰹ";s:4:"كح";s:3:"ﰺ";s:4:"كخ";s:3:"ﰻ";s:4:"كل";s:3:"ﰼ";s:4:"كم";s:3:"ﰽ";s:4:"كى";s:3:"ﰾ";s:4:"كي";s:3:"ﰿ";s:4:"لج";s:3:"ﱀ";s:4:"لح";s:3:"ﱁ";s:4:"لخ";s:3:"ﱂ";s:4:"لم";s:3:"ﱃ";s:4:"لى";s:3:"ﱄ";s:4:"لي";s:3:"ﱅ";s:4:"مج";s:3:"ﱆ";s:4:"مح";s:3:"ﱇ";s:4:"مخ";s:3:"ﱈ";s:4:"مم";s:3:"ﱉ";s:4:"مى";s:3:"ﱊ";s:4:"مي";s:3:"ﱋ";s:4:"نج";s:3:"ﱌ";s:4:"نح";s:3:"ﱍ";s:4:"نخ";s:3:"ﱎ";s:4:"نم";s:3:"ﱏ";s:4:"نى";s:3:"ﱐ";s:4:"ني";s:3:"ﱑ";s:4:"هج";s:3:"ﱒ";s:4:"هم";s:3:"ﱓ";s:4:"هى";s:3:"ﱔ";s:4:"هي";s:3:"ﱕ";s:4:"يج";s:3:"ﱖ";s:4:"يح";s:3:"ﱗ";s:4:"يخ";s:3:"ﱘ";s:4:"يم";s:3:"ﱙ";s:4:"يى";s:3:"ﱚ";s:4:"يي";s:3:"ﱛ";s:4:"ذٰ";s:3:"ﱜ";s:4:"رٰ";s:3:"ﱝ";s:4:"ىٰ";s:3:"ﱞ";s:5:" ٌّ";s:3:"ﱟ";s:5:" ٍّ";s:3:"ﱠ";s:5:" َّ";s:3:"ﱡ";s:5:" ُّ";s:3:"ﱢ";s:5:" ِّ";s:3:"ﱣ";s:5:" ّٰ";s:3:"ﱤ";s:4:"ئر";s:3:"ﱥ";s:4:"ئز";s:3:"ﱦ";s:4:"ئم";s:3:"ﱧ";s:4:"ئن";s:3:"ﱨ";s:4:"ئى";s:3:"ﱩ";s:4:"ئي";s:3:"ﱪ";s:4:"بر";s:3:"ﱫ";s:4:"بز";s:3:"ﱬ";s:4:"بم";s:3:"ﱭ";s:4:"بن";s:3:"ﱮ";s:4:"بى";s:3:"ﱯ";s:4:"بي";s:3:"ﱰ";s:4:"تر";s:3:"ﱱ";s:4:"تز";s:3:"ﱲ";s:4:"تم";s:3:"ﱳ";s:4:"تن";s:3:"ﱴ";s:4:"تى";s:3:"ﱵ";s:4:"تي";s:3:"ﱶ";s:4:"ثر";s:3:"ﱷ";s:4:"ثز";s:3:"ﱸ";s:4:"ثم";s:3:"ﱹ";s:4:"ثن";s:3:"ﱺ";s:4:"ثى";s:3:"ﱻ";s:4:"ثي";s:3:"ﱼ";s:4:"فى";s:3:"ﱽ";s:4:"في";s:3:"ﱾ";s:4:"قى";s:3:"ﱿ";s:4:"قي";s:3:"ﲀ";s:4:"كا";s:3:"ﲁ";s:4:"كل";s:3:"ﲂ";s:4:"كم";s:3:"ﲃ";s:4:"كى";s:3:"ﲄ";s:4:"كي";s:3:"ﲅ";s:4:"لم";s:3:"ﲆ";s:4:"لى";s:3:"ﲇ";s:4:"لي";s:3:"ﲈ";s:4:"ما";s:3:"ﲉ";s:4:"مم";s:3:"ﲊ";s:4:"نر";s:3:"ﲋ";s:4:"نز";s:3:"ﲌ";s:4:"نم";s:3:"ﲍ";s:4:"نن";s:3:"ﲎ";s:4:"نى";s:3:"ﲏ";s:4:"ني";s:3:"ﲐ";s:4:"ىٰ";s:3:"ﲑ";s:4:"ير";s:3:"ﲒ";s:4:"يز";s:3:"ﲓ";s:4:"يم";s:3:"ﲔ";s:4:"ين";s:3:"ﲕ";s:4:"يى";s:3:"ﲖ";s:4:"يي";s:3:"ﲗ";s:4:"ئج";s:3:"ﲘ";s:4:"ئح";s:3:"ﲙ";s:4:"ئخ";s:3:"ﲚ";s:4:"ئم";s:3:"ﲛ";s:4:"ئه";s:3:"ﲜ";s:4:"بج";s:3:"ﲝ";s:4:"بح";s:3:"ﲞ";s:4:"بخ";s:3:"ﲟ";s:4:"بم";s:3:"ﲠ";s:4:"به";s:3:"ﲡ";s:4:"تج";s:3:"ﲢ";s:4:"تح";s:3:"ﲣ";s:4:"تخ";s:3:"ﲤ";s:4:"تم";s:3:"ﲥ";s:4:"ته";s:3:"ﲦ";s:4:"ثم";s:3:"ﲧ";s:4:"جح";s:3:"ﲨ";s:4:"جم";s:3:"ﲩ";s:4:"حج";s:3:"ﲪ";s:4:"حم";s:3:"ﲫ";s:4:"خج";s:3:"ﲬ";s:4:"خم";s:3:"ﲭ";s:4:"سج";s:3:"ﲮ";s:4:"سح";s:3:"ﲯ";s:4:"سخ";s:3:"ﲰ";s:4:"سم";s:3:"ﲱ";s:4:"صح";s:3:"ﲲ";s:4:"صخ";s:3:"ﲳ";s:4:"صم";s:3:"ﲴ";s:4:"ضج";s:3:"ﲵ";s:4:"ضح";s:3:"ﲶ";s:4:"ضخ";s:3:"ﲷ";s:4:"ضم";s:3:"ﲸ";s:4:"طح";s:3:"ﲹ";s:4:"ظم";s:3:"ﲺ";s:4:"عج";s:3:"ﲻ";s:4:"عم";s:3:"ﲼ";s:4:"غج";s:3:"ﲽ";s:4:"غم";s:3:"ﲾ";s:4:"فج";s:3:"ﲿ";s:4:"فح";s:3:"ﳀ";s:4:"فخ";s:3:"ﳁ";s:4:"فم";s:3:"ﳂ";s:4:"قح";s:3:"ﳃ";s:4:"قم";s:3:"ﳄ";s:4:"كج";s:3:"ﳅ";s:4:"كح";s:3:"ﳆ";s:4:"كخ";s:3:"ﳇ";s:4:"كل";s:3:"ﳈ";s:4:"كم";s:3:"ﳉ";s:4:"لج";s:3:"ﳊ";s:4:"لح";s:3:"ﳋ";s:4:"لخ";s:3:"ﳌ";s:4:"لم";s:3:"ﳍ";s:4:"له";s:3:"ﳎ";s:4:"مج";s:3:"ﳏ";s:4:"مح";s:3:"ﳐ";s:4:"مخ";s:3:"ﳑ";s:4:"مم";s:3:"ﳒ";s:4:"نج";s:3:"ﳓ";s:4:"نح";s:3:"ﳔ";s:4:"نخ";s:3:"ﳕ";s:4:"نم";s:3:"ﳖ";s:4:"نه";s:3:"ﳗ";s:4:"هج";s:3:"ﳘ";s:4:"هم";s:3:"ﳙ";s:4:"هٰ";s:3:"ﳚ";s:4:"يج";s:3:"ﳛ";s:4:"يح";s:3:"ﳜ";s:4:"يخ";s:3:"ﳝ";s:4:"يم";s:3:"ﳞ";s:4:"يه";s:3:"ﳟ";s:4:"ئم";s:3:"ﳠ";s:4:"ئه";s:3:"ﳡ";s:4:"بم";s:3:"ﳢ";s:4:"به";s:3:"ﳣ";s:4:"تم";s:3:"ﳤ";s:4:"ته";s:3:"ﳥ";s:4:"ثم";s:3:"ﳦ";s:4:"ثه";s:3:"ﳧ";s:4:"سم";s:3:"ﳨ";s:4:"سه";s:3:"ﳩ";s:4:"شم";s:3:"ﳪ";s:4:"شه";s:3:"ﳫ";s:4:"كل";s:3:"ﳬ";s:4:"كم";s:3:"ﳭ";s:4:"لم";s:3:"ﳮ";s:4:"نم";s:3:"ﳯ";s:4:"نه";s:3:"ﳰ";s:4:"يم";s:3:"ﳱ";s:4:"يه";s:3:"ﳲ";s:6:"ـَّ";s:3:"ﳳ";s:6:"ـُّ";s:3:"ﳴ";s:6:"ـِّ";s:3:"ﳵ";s:4:"طى";s:3:"ﳶ";s:4:"طي";s:3:"ﳷ";s:4:"عى";s:3:"ﳸ";s:4:"عي";s:3:"ﳹ";s:4:"غى";s:3:"ﳺ";s:4:"غي";s:3:"ﳻ";s:4:"سى";s:3:"ﳼ";s:4:"سي";s:3:"ﳽ";s:4:"شى";s:3:"ﳾ";s:4:"شي";s:3:"ﳿ";s:4:"حى";s:3:"ﴀ";s:4:"حي";s:3:"ﴁ";s:4:"جى";s:3:"ﴂ";s:4:"جي";s:3:"ﴃ";s:4:"خى";s:3:"ﴄ";s:4:"خي";s:3:"ﴅ";s:4:"صى";s:3:"ﴆ";s:4:"صي";s:3:"ﴇ";s:4:"ضى";s:3:"ﴈ";s:4:"ضي";s:3:"ﴉ";s:4:"شج";s:3:"ﴊ";s:4:"شح";s:3:"ﴋ";s:4:"شخ";s:3:"ﴌ";s:4:"شم";s:3:"ﴍ";s:4:"شر";s:3:"ﴎ";s:4:"سر";s:3:"ﴏ";s:4:"صر";s:3:"ﴐ";s:4:"ضر";s:3:"ﴑ";s:4:"طى";s:3:"ﴒ";s:4:"طي";s:3:"ﴓ";s:4:"عى";s:3:"ﴔ";s:4:"عي";s:3:"ﴕ";s:4:"غى";s:3:"ﴖ";s:4:"غي";s:3:"ﴗ";s:4:"سى";s:3:"ﴘ";s:4:"سي";s:3:"ﴙ";s:4:"شى";s:3:"ﴚ";s:4:"شي";s:3:"ﴛ";s:4:"حى";s:3:"ﴜ";s:4:"حي";s:3:"ﴝ";s:4:"جى";s:3:"ﴞ";s:4:"جي";s:3:"ﴟ";s:4:"خى";s:3:"ﴠ";s:4:"خي";s:3:"ﴡ";s:4:"صى";s:3:"ﴢ";s:4:"صي";s:3:"ﴣ";s:4:"ضى";s:3:"ﴤ";s:4:"ضي";s:3:"ﴥ";s:4:"شج";s:3:"ﴦ";s:4:"شح";s:3:"ﴧ";s:4:"شخ";s:3:"ﴨ";s:4:"شم";s:3:"ﴩ";s:4:"شر";s:3:"ﴪ";s:4:"سر";s:3:"ﴫ";s:4:"صر";s:3:"ﴬ";s:4:"ضر";s:3:"ﴭ";s:4:"شج";s:3:"ﴮ";s:4:"شح";s:3:"ﴯ";s:4:"شخ";s:3:"ﴰ";s:4:"شم";s:3:"ﴱ";s:4:"سه";s:3:"ﴲ";s:4:"شه";s:3:"ﴳ";s:4:"طم";s:3:"ﴴ";s:4:"سج";s:3:"ﴵ";s:4:"سح";s:3:"ﴶ";s:4:"سخ";s:3:"ﴷ";s:4:"شج";s:3:"ﴸ";s:4:"شح";s:3:"ﴹ";s:4:"شخ";s:3:"ﴺ";s:4:"طم";s:3:"ﴻ";s:4:"ظم";s:3:"ﴼ";s:4:"اً";s:3:"ﴽ";s:4:"اً";s:3:"ﵐ";s:6:"تجم";s:3:"ﵑ";s:6:"تحج";s:3:"ﵒ";s:6:"تحج";s:3:"ﵓ";s:6:"تحم";s:3:"ﵔ";s:6:"تخم";s:3:"ﵕ";s:6:"تمج";s:3:"ﵖ";s:6:"تمح";s:3:"ﵗ";s:6:"تمخ";s:3:"ﵘ";s:6:"جمح";s:3:"ﵙ";s:6:"جمح";s:3:"ﵚ";s:6:"حمي";s:3:"ﵛ";s:6:"حمى";s:3:"ﵜ";s:6:"سحج";s:3:"ﵝ";s:6:"سجح";s:3:"ﵞ";s:6:"سجى";s:3:"ﵟ";s:6:"سمح";s:3:"ﵠ";s:6:"سمح";s:3:"ﵡ";s:6:"سمج";s:3:"ﵢ";s:6:"سمم";s:3:"ﵣ";s:6:"سمم";s:3:"ﵤ";s:6:"صحح";s:3:"ﵥ";s:6:"صحح";s:3:"ﵦ";s:6:"صمم";s:3:"ﵧ";s:6:"شحم";s:3:"ﵨ";s:6:"شحم";s:3:"ﵩ";s:6:"شجي";s:3:"ﵪ";s:6:"شمخ";s:3:"ﵫ";s:6:"شمخ";s:3:"ﵬ";s:6:"شمم";s:3:"ﵭ";s:6:"شمم";s:3:"ﵮ";s:6:"ضحى";s:3:"ﵯ";s:6:"ضخم";s:3:"ﵰ";s:6:"ضخم";s:3:"ﵱ";s:6:"طمح";s:3:"ﵲ";s:6:"طمح";s:3:"ﵳ";s:6:"طمم";s:3:"ﵴ";s:6:"طمي";s:3:"ﵵ";s:6:"عجم";s:3:"ﵶ";s:6:"عمم";s:3:"ﵷ";s:6:"عمم";s:3:"ﵸ";s:6:"عمى";s:3:"ﵹ";s:6:"غمم";s:3:"ﵺ";s:6:"غمي";s:3:"ﵻ";s:6:"غمى";s:3:"ﵼ";s:6:"فخم";s:3:"ﵽ";s:6:"فخم";s:3:"ﵾ";s:6:"قمح";s:3:"ﵿ";s:6:"قمم";s:3:"ﶀ";s:6:"لحم";s:3:"ﶁ";s:6:"لحي";s:3:"ﶂ";s:6:"لحى";s:3:"ﶃ";s:6:"لجج";s:3:"ﶄ";s:6:"لجج";s:3:"ﶅ";s:6:"لخم";s:3:"ﶆ";s:6:"لخم";s:3:"ﶇ";s:6:"لمح";s:3:"ﶈ";s:6:"لمح";s:3:"ﶉ";s:6:"محج";s:3:"ﶊ";s:6:"محم";s:3:"ﶋ";s:6:"محي";s:3:"ﶌ";s:6:"مجح";s:3:"ﶍ";s:6:"مجم";s:3:"ﶎ";s:6:"مخج";s:3:"ﶏ";s:6:"مخم";s:3:"ﶒ";s:6:"مجخ";s:3:"ﶓ";s:6:"همج";s:3:"ﶔ";s:6:"همم";s:3:"ﶕ";s:6:"نحم";s:3:"ﶖ";s:6:"نحى";s:3:"ﶗ";s:6:"نجم";s:3:"ﶘ";s:6:"نجم";s:3:"ﶙ";s:6:"نجى";s:3:"ﶚ";s:6:"نمي";s:3:"ﶛ";s:6:"نمى";s:3:"ﶜ";s:6:"يمم";s:3:"ﶝ";s:6:"يمم";s:3:"ﶞ";s:6:"بخي";s:3:"ﶟ";s:6:"تجي";s:3:"ﶠ";s:6:"تجى";s:3:"ﶡ";s:6:"تخي";s:3:"ﶢ";s:6:"تخى";s:3:"ﶣ";s:6:"تمي";s:3:"ﶤ";s:6:"تمى";s:3:"ﶥ";s:6:"جمي";s:3:"ﶦ";s:6:"جحى";s:3:"ﶧ";s:6:"جمى";s:3:"ﶨ";s:6:"سخى";s:3:"ﶩ";s:6:"صحي";s:3:"ﶪ";s:6:"شحي";s:3:"ﶫ";s:6:"ضحي";s:3:"ﶬ";s:6:"لجي";s:3:"ﶭ";s:6:"لمي";s:3:"ﶮ";s:6:"يحي";s:3:"ﶯ";s:6:"يجي";s:3:"ﶰ";s:6:"يمي";s:3:"ﶱ";s:6:"ممي";s:3:"ﶲ";s:6:"قمي";s:3:"ﶳ";s:6:"نحي";s:3:"ﶴ";s:6:"قمح";s:3:"ﶵ";s:6:"لحم";s:3:"ﶶ";s:6:"عمي";s:3:"ﶷ";s:6:"كمي";s:3:"ﶸ";s:6:"نجح";s:3:"ﶹ";s:6:"مخي";s:3:"ﶺ";s:6:"لجم";s:3:"ﶻ";s:6:"كمم";s:3:"ﶼ";s:6:"لجم";s:3:"ﶽ";s:6:"نجح";s:3:"ﶾ";s:6:"جحي";s:3:"ﶿ";s:6:"حجي";s:3:"ﷀ";s:6:"مجي";s:3:"ﷁ";s:6:"فمي";s:3:"ﷂ";s:6:"بحي";s:3:"ﷃ";s:6:"كمم";s:3:"ﷄ";s:6:"عجم";s:3:"ﷅ";s:6:"صمم";s:3:"ﷆ";s:6:"سخي";s:3:"ﷇ";s:6:"نجي";s:3:"ﷰ";s:6:"صلے";s:3:"ﷱ";s:6:"قلے";s:3:"ﷲ";s:8:"الله";s:3:"ﷳ";s:8:"اكبر";s:3:"ﷴ";s:8:"محمد";s:3:"ﷵ";s:8:"صلعم";s:3:"ﷶ";s:8:"رسول";s:3:"ﷷ";s:8:"عليه";s:3:"ﷸ";s:8:"وسلم";s:3:"ﷹ";s:6:"صلى";s:3:"ﷺ";s:33:"صلى الله عليه وسلم";s:3:"ﷻ";s:15:"جل جلاله";s:3:"﷼";s:8:"ریال";s:3:"ﹰ";s:3:" ً";s:3:"ﹱ";s:4:"ـً";s:3:"ﹲ";s:3:" ٌ";s:3:"ﹴ";s:3:" ٍ";s:3:"ﹶ";s:3:" َ";s:3:"ﹷ";s:4:"ـَ";s:3:"ﹸ";s:3:" ُ";s:3:"ﹹ";s:4:"ـُ";s:3:"ﹺ";s:3:" ِ";s:3:"ﹻ";s:4:"ـِ";s:3:"ﹼ";s:3:" ّ";s:3:"ﹽ";s:4:"ـّ";s:3:"ﹾ";s:3:" ْ";s:3:"ﹿ";s:4:"ـْ";s:3:"ﺀ";s:2:"ء";s:3:"ﺁ";s:2:"آ";s:3:"ﺂ";s:2:"آ";s:3:"ﺃ";s:2:"أ";s:3:"ﺄ";s:2:"أ";s:3:"ﺅ";s:2:"ؤ";s:3:"ﺆ";s:2:"ؤ";s:3:"ﺇ";s:2:"إ";s:3:"ﺈ";s:2:"إ";s:3:"ﺉ";s:2:"ئ";s:3:"ﺊ";s:2:"ئ";s:3:"ﺋ";s:2:"ئ";s:3:"ﺌ";s:2:"ئ";s:3:"ﺍ";s:2:"ا";s:3:"ﺎ";s:2:"ا";s:3:"ﺏ";s:2:"ب";s:3:"ﺐ";s:2:"ب";s:3:"ﺑ";s:2:"ب";s:3:"ﺒ";s:2:"ب";s:3:"ﺓ";s:2:"ة";s:3:"ﺔ";s:2:"ة";s:3:"ﺕ";s:2:"ت";s:3:"ﺖ";s:2:"ت";s:3:"ﺗ";s:2:"ت";s:3:"ﺘ";s:2:"ت";s:3:"ﺙ";s:2:"ث";s:3:"ﺚ";s:2:"ث";s:3:"ﺛ";s:2:"ث";s:3:"ﺜ";s:2:"ث";s:3:"ﺝ";s:2:"ج";s:3:"ﺞ";s:2:"ج";s:3:"ﺟ";s:2:"ج";s:3:"ﺠ";s:2:"ج";s:3:"ﺡ";s:2:"ح";s:3:"ﺢ";s:2:"ح";s:3:"ﺣ";s:2:"ح";s:3:"ﺤ";s:2:"ح";s:3:"ﺥ";s:2:"خ";s:3:"ﺦ";s:2:"خ";s:3:"ﺧ";s:2:"خ";s:3:"ﺨ";s:2:"خ";s:3:"ﺩ";s:2:"د";s:3:"ﺪ";s:2:"د";s:3:"ﺫ";s:2:"ذ";s:3:"ﺬ";s:2:"ذ";s:3:"ﺭ";s:2:"ر";s:3:"ﺮ";s:2:"ر";s:3:"ﺯ";s:2:"ز";s:3:"ﺰ";s:2:"ز";s:3:"ﺱ";s:2:"س";s:3:"ﺲ";s:2:"س";s:3:"ﺳ";s:2:"س";s:3:"ﺴ";s:2:"س";s:3:"ﺵ";s:2:"ش";s:3:"ﺶ";s:2:"ش";s:3:"ﺷ";s:2:"ش";s:3:"ﺸ";s:2:"ش";s:3:"ﺹ";s:2:"ص";s:3:"ﺺ";s:2:"ص";s:3:"ﺻ";s:2:"ص";s:3:"ﺼ";s:2:"ص";s:3:"ﺽ";s:2:"ض";s:3:"ﺾ";s:2:"ض";s:3:"ﺿ";s:2:"ض";s:3:"ﻀ";s:2:"ض";s:3:"ﻁ";s:2:"ط";s:3:"ﻂ";s:2:"ط";s:3:"ﻃ";s:2:"ط";s:3:"ﻄ";s:2:"ط";s:3:"ﻅ";s:2:"ظ";s:3:"ﻆ";s:2:"ظ";s:3:"ﻇ";s:2:"ظ";s:3:"ﻈ";s:2:"ظ";s:3:"ﻉ";s:2:"ع";s:3:"ﻊ";s:2:"ع";s:3:"ﻋ";s:2:"ع";s:3:"ﻌ";s:2:"ع";s:3:"ﻍ";s:2:"غ";s:3:"ﻎ";s:2:"غ";s:3:"ﻏ";s:2:"غ";s:3:"ﻐ";s:2:"غ";s:3:"ﻑ";s:2:"ف";s:3:"ﻒ";s:2:"ف";s:3:"ﻓ";s:2:"ف";s:3:"ﻔ";s:2:"ف";s:3:"ﻕ";s:2:"ق";s:3:"ﻖ";s:2:"ق";s:3:"ﻗ";s:2:"ق";s:3:"ﻘ";s:2:"ق";s:3:"ﻙ";s:2:"ك";s:3:"ﻚ";s:2:"ك";s:3:"ﻛ";s:2:"ك";s:3:"ﻜ";s:2:"ك";s:3:"ﻝ";s:2:"ل";s:3:"ﻞ";s:2:"ل";s:3:"ﻟ";s:2:"ل";s:3:"ﻠ";s:2:"ل";s:3:"ﻡ";s:2:"م";s:3:"ﻢ";s:2:"م";s:3:"ﻣ";s:2:"م";s:3:"ﻤ";s:2:"م";s:3:"ﻥ";s:2:"ن";s:3:"ﻦ";s:2:"ن";s:3:"ﻧ";s:2:"ن";s:3:"ﻨ";s:2:"ن";s:3:"ﻩ";s:2:"ه";s:3:"ﻪ";s:2:"ه";s:3:"ﻫ";s:2:"ه";s:3:"ﻬ";s:2:"ه";s:3:"ﻭ";s:2:"و";s:3:"ﻮ";s:2:"و";s:3:"ﻯ";s:2:"ى";s:3:"ﻰ";s:2:"ى";s:3:"ﻱ";s:2:"ي";s:3:"ﻲ";s:2:"ي";s:3:"ﻳ";s:2:"ي";s:3:"ﻴ";s:2:"ي";s:3:"ﻵ";s:4:"لآ";s:3:"ﻶ";s:4:"لآ";s:3:"ﻷ";s:4:"لأ";s:3:"ﻸ";s:4:"لأ";s:3:"ﻹ";s:4:"لإ";s:3:"ﻺ";s:4:"لإ";s:3:"ﻻ";s:4:"لا";s:3:"ﻼ";s:4:"لا";}
\ No newline at end of file
Index: trunk/phase3/serialized/normalize-ml.ser
@@ -0,0 +1 @@
 2+a:6:{s:9:"ണ്‍";s:3:"ൺ";s:9:"ന്‍";s:3:"ൻ";s:9:"ര്‍";s:3:"ർ";s:9:"ല്‍";s:3:"ൽ";s:9:"ള്‍";s:3:"ൾ";s:9:"ക്‍";s:3:"ൿ";}
\ No newline at end of file
Index: trunk/phase3/languages/Language.php
@@ -62,6 +62,11 @@
6363 var $minSearchLength;
6464 var $mExtendedSpecialPageAliases;
6565
 66+ /**
 67+ * ReplacementArray object caches
 68+ */
 69+ var $transformData = array();
 70+
6671 static public $dataCache;
6772 static public $mLangObjCache = array();
6873
@@ -1866,6 +1871,36 @@
18671872 }
18681873
18691874 /**
 1875+ * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
 1876+ * also cleans up certain backwards-compatible sequences, converting them
 1877+ * to the modern Unicode equivalent.
 1878+ *
 1879+ * This is language-specific for performance reasons only.
 1880+ */
 1881+ function normalize( $s ) {
 1882+ return UtfNormal::cleanUp( $s );
 1883+ }
 1884+
 1885+ /**
 1886+ * Transform a string using serialized data stored in the given file (which
 1887+ * must be in the serialized subdirectory of $IP). The file contains pairs
 1888+ * mapping source characters to destination characters.
 1889+ *
 1890+ * The data is cached in process memory. This will go faster if you have the
 1891+ * FastStringSearch extension.
 1892+ */
 1893+ function transformUsingPairFile( $file, $string ) {
 1894+ if ( !isset( $this->transformData[$file] ) ) {
 1895+ $data = wfGetPrecompiledData( $file );
 1896+ if ( $data === false ) {
 1897+ throw new MWException( __METHOD__.": The transformation file $file is missing" );
 1898+ }
 1899+ $this->transformData[$file] = new ReplacementArray( $data );
 1900+ }
 1901+ return $this->transformData[$file]->replace( $string );
 1902+ }
 1903+
 1904+ /**
18701905 * For right-to-left language support
18711906 *
18721907 * @return bool
Index: trunk/phase3/languages/classes/LanguageAr.php
@@ -6,6 +6,7 @@
77 * @author Niklas Laxström
88 */
99 class LanguageAr extends Language {
 10+ var $normalizeArray;
1011
1112 function convertPlural( $count, $forms ) {
1213 if ( !count($forms) ) { return ''; }
@@ -26,4 +27,20 @@
2728 }
2829 return $forms[$index];
2930 }
 31+
 32+ /**
 33+ * Temporary hack for bug 9413: replace Arabic presentation forms with their
 34+ * standard equivalents.
 35+ *
 36+ * FIXME: This is language-specific for now only to avoid the negative
 37+ * performance impact of enabling it for all languages.
 38+ */
 39+ function normalize( $s ) {
 40+ global $wgFixArchaicUnicode;
 41+ $s = parent::normalize( $s );
 42+ if ( $wgFixArchaicUnicode ) {
 43+ $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
 44+ }
 45+ return $s;
 46+ }
3047 }
Index: trunk/phase3/languages/classes/LanguageMl.php
@@ -0,0 +1,22 @@
 2+<?php
 3+
 4+class LanguageMl extends Language {
 5+ /**
 6+ * Temporary hack for the issue described at
 7+ * http://permalink.gmane.org/gmane.science.linguistics.wikipedia.technical/46396
 8+ * Convert Unicode 5.0 style Malayalam input to Unicode 5.1. Similar to
 9+ * bug 9413. Also fixes miscellaneous problems due to mishandling of ZWJ,
 10+ * e.g. bug 11162.
 11+ *
 12+ * FIXME: This is language-specific for now only to avoid the negative
 13+ * performance impact of enabling it for all languages.
 14+ */
 15+ function normalize( $s ) {
 16+ global $wgFixArchaicUnicode;
 17+ $s = parent::normalize( $s );
 18+ if ( $wgFixArchaicUnicode ) {
 19+ $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
 20+ }
 21+ return $s;
 22+ }
 23+}
Property changes on: trunk/phase3/languages/classes/LanguageMl.php
___________________________________________________________________
Added: svn:eol-style
124 + native
Index: trunk/phase3/RELEASE-NOTES
@@ -283,6 +283,9 @@
284284 * (bug 19791) Add URL of file source as comment to thumbs (for ImageMagick)
285285 * (bug 21946) Sorted wikitables do not properly handle minus signs
286286 * (bug 18885) Red links for media files do not support shared repositories
 287+* Added $wgFixArchaicUnicode, which, if enabled, converts some deprecated
 288+ Unicode sequences in Arabic and Malayalam text to their Unicode 5.1
 289+ equivalents.
287290
288291 === Bug fixes in 1.16 ===
289292

Follow-up revisions

RevisionCommit summaryAuthorDate
r61282Fixes for r60599:...tstarling01:50, 20 January 2010
r70211Remove the require for UtfNormal.php...platonides22:35, 30 July 2010
r89478Follow up r60599. Make normalizeUnicode() work even if $wgContLang is still n...platonides21:19, 4 June 2011

Comments

#Comment by Bryan (talk | contribs)   11:32, 4 January 2010

If this the only drawback with this is that existing page titles become inaccessible, can't the installer it set to true in LocalSettings (but keep it false in DefaultSettings)?

#Comment by Tim Starling (talk | contribs)   12:10, 4 January 2010

In the case of Malayalam, there is an additional drawback in that not all clients support the new encoding, this is discussed in the mailing list thread I cited.

#Comment by Alnokta (talk | contribs)   15:11, 5 January 2010

I think that there is no reason to not always convert Arabic presentation forms, no need to pollute the text by it. so wgFixArchaicUnicode should be true by default, *at least for Arabic*.

#Comment by Tim Starling (talk | contribs)   01:50, 20 January 2010

Better now?

#Comment by Alnokta (talk | contribs)   08:26, 20 January 2010

Much better. Thank you!

Status & tagging log