r69643 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r69642‎ | r69643 | r69644 >
Date:21:17, 20 July 2010
Author:mah
Status:deferred
Tags:
Comment:
Slightly different, but similar to brion's tests. I wrote this before I discovered brion's tests. Maybe they'll be useful to someone some day.
Modified paths:
  • /trunk/phase3/includes/normal/UtfNormalTest2.php (added) (history)

Diff [purge]

Index: trunk/phase3/includes/normal/UtfNormalTest2.php
@@ -0,0 +1,223 @@
 2+#!/usr/bin/php
 3+<?php
 4+
 5+if( php_sapi_name() != 'cli' ) {
 6+ die( "Run me from the command line please.\n" );
 7+}
 8+
 9+// From http://unicode.org/Public/UNIDATA/NormalizationTest.txt
 10+$file = "NormalizationTest.txt";
 11+$sep = ';';
 12+$comment = "#";
 13+$f = fopen($file, "r");
 14+
 15+/**
 16+ * The following section will be used for testing different normalization methods.
 17+ * - Pure PHP
 18+ ~ no assertion errors
 19+ ~ 6.25 minutes
 20+
 21+ * - php_utfnormal.so or intl extension: both are wrappers around
 22+ libicu so we list the version of libicu when making the
 23+ comparison
 24+
 25+ * - libicu Ubuntu 3.8.1-3ubuntu1.1 php 5.2.6-3ubuntu4.5
 26+ ~ 2200 assertion errors
 27+ ~ 5 seconds
 28+ ~ output: http://paste2.org/p/921566
 29+
 30+ * - libicu Ubuntu 4.2.1-3 php 5.3.2-1ubuntu4.2
 31+ ~ 1384 assertion errors
 32+ ~ 15 seconds
 33+ ~ output: http://paste2.org/p/921435
 34+
 35+ * - libicu Debian 4.4.1-5 php 5.3.2-1ubuntu4.2
 36+ ~ no assertion errors
 37+ ~ 13 seconds
 38+
 39+ * - Tests comparing pure PHP output with libicu output were added
 40+ later and slow down the runtime.
 41+ */
 42+
 43+require_once("./UtfNormal.php");
 44+function normalize_form_c($c) { return UtfNormal::toNFC($c); }
 45+function normalize_form_c_php($c) { return UtfNormal::toNFC($c, "php"); }
 46+function normalize_form_d($c) { return UtfNormal::toNFD($c); }
 47+function normalize_form_d_php($c) { return UtfNormal::toNFD($c, "php"); }
 48+function normalize_form_kc($c) { return UtfNormal::toNFKC($c); }
 49+function normalize_form_kc_php($c) { return UtfNormal::toNFKC($c, "php"); }
 50+function normalize_form_kd($c) { return UtfNormal::toNFKD($c); }
 51+function normalize_form_kd_php($c) { return UtfNormal::toNFKD($c, "php"); }
 52+
 53+assert_options(ASSERT_ACTIVE, 1);
 54+assert_options(ASSERT_WARNING, 0);
 55+assert_options(ASSERT_QUIET_EVAL, 1);
 56+assert_options(ASSERT_CALLBACK, 'my_assert');
 57+
 58+function my_assert( $file, $line, $code ) {
 59+ global $col, $count, $lineNo;
 60+ echo "Assertion that '$code' failed on line $lineNo ($col[5])\n";
 61+}
 62+
 63+$count = 0;
 64+$lineNo = 0;
 65+if( $f !== false ) {
 66+ while( ( $col = getRow( $f ) ) !== false ) {
 67+ $lineNo++;
 68+
 69+ if(count($col) == 6) {
 70+ $count++;
 71+ if( $count % 100 === 0 ) echo "Count: $count\n";
 72+ } else {
 73+ continue;
 74+ }
 75+
 76+ # verify that the pure PHP version is correct
 77+ $NFCc1 = normalize_form_c($col[0]);
 78+ $NFCc1p = normalize_form_c_php($col[0]);
 79+ assert('$NFCc1 === $NFCc1p');
 80+ $NFCc2 = normalize_form_c($col[1]);
 81+ $NFCc2p = normalize_form_c_php($col[1]);
 82+ assert('$NFCc2 === $NFCc2p');
 83+ $NFCc3 = normalize_form_c($col[2]);
 84+ $NFCc3p = normalize_form_c_php($col[2]);
 85+ assert('$NFCc3 === $NFCc3p');
 86+ $NFCc4 = normalize_form_c($col[3]);
 87+ $NFCc4p = normalize_form_c_php($col[3]);
 88+ assert('$NFCc4 === $NFCc4p');
 89+ $NFCc5 = normalize_form_c($col[4]);
 90+ $NFCc5p = normalize_form_c_php($col[4]);
 91+ assert('$NFCc5 === $NFCc5p');
 92+
 93+ $NFDc1 = normalize_form_d($col[0]);
 94+ $NFDc1p = normalize_form_d_php($col[0]);
 95+ assert('$NFDc1 === $NFDc1p');
 96+ $NFDc2 = normalize_form_d($col[1]);
 97+ $NFDc2p = normalize_form_d_php($col[1]);
 98+ assert('$NFDc2 === $NFDc2p');
 99+ $NFDc3 = normalize_form_d($col[2]);
 100+ $NFDc3p = normalize_form_d_php($col[2]);
 101+ assert('$NFDc3 === $NFDc3p');
 102+ $NFDc4 = normalize_form_d($col[3]);
 103+ $NFDc4p = normalize_form_d_php($col[3]);
 104+ assert('$NFDc4 === $NFDc4p');
 105+ $NFDc5 = normalize_form_d($col[4]);
 106+ $NFDc5p = normalize_form_d_php($col[4]);
 107+ assert('$NFDc5 === $NFDc5p');
 108+
 109+ $NFKDc1 = normalize_form_kd($col[0]);
 110+ $NFKDc1p = normalize_form_kd_php($col[0]);
 111+ assert('$NFKDc1 === $NFKDc1p');
 112+ $NFKDc2 = normalize_form_kd($col[1]);
 113+ $NFKDc2p = normalize_form_kd_php($col[1]);
 114+ assert('$NFKDc2 === $NFKDc2p');
 115+ $NFKDc3 = normalize_form_kd($col[2]);
 116+ $NFKDc3p = normalize_form_kd_php($col[2]);
 117+ assert('$NFKDc3 === $NFKDc3p');
 118+ $NFKDc4 = normalize_form_kd($col[3]);
 119+ $NFKDc4p = normalize_form_kd_php($col[3]);
 120+ assert('$NFKDc4 === $NFKDc4p');
 121+ $NFKDc5 = normalize_form_kd($col[4]);
 122+ $NFKDc5p = normalize_form_kd_php($col[4]);
 123+ assert('$NFKDc5 === $NFKDc5p');
 124+
 125+ $NFKCc1 = normalize_form_kc($col[0]);
 126+ $NFKCc1p = normalize_form_kc_php($col[0]);
 127+ assert('$NFKCc1 === $NFKCc1p');
 128+ $NFKCc2 = normalize_form_kc($col[1]);
 129+ $NFKCc2p = normalize_form_kc_php($col[1]);
 130+ assert('$NFKCc2 === $NFKCc2p');
 131+ $NFKCc3 = normalize_form_kc($col[2]);
 132+ $NFKCc3p = normalize_form_kc_php($col[2]);
 133+ assert('$NFKCc3 === $NFKCc3p');
 134+ $NFKCc4 = normalize_form_kc($col[3]);
 135+ $NFKCc4p = normalize_form_kc_php($col[3]);
 136+ assert('$NFKCc4 === $NFKCc4p');
 137+ $NFKCc5 = normalize_form_kc($col[4]);
 138+ $NFKCc5p = normalize_form_kc_php($col[4]);
 139+ assert('$NFKCc5 === $NFKCc5p');
 140+
 141+ # c2 == NFC(c1) == NFC(c2) == NFC(c3)
 142+ assert('$col[1] === $NFCc1');
 143+ assert('$col[1] === $NFCc2');
 144+ assert('$col[1] === $NFCc3');
 145+
 146+ # c4 == NFC(c4) == NFC(c5)
 147+ assert('$col[3] === $NFCc4');
 148+ assert('$col[3] === $NFCc5');
 149+
 150+ # c3 == NFD(c1) == NFD(c2) == NFD(c3)
 151+ assert('$col[2] === $NFDc1');
 152+ assert('$col[2] === $NFDc2');
 153+ assert('$col[2] === $NFDc3');
 154+
 155+ # c5 == NFD(c4) == NFD(c5)
 156+ assert('$col[4] === $NFDc4');
 157+ assert('$col[4] === $NFDc5');
 158+
 159+ # c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
 160+ assert('$col[3] === $NFKCc1');
 161+ assert('$col[3] === $NFKCc2');
 162+ assert('$col[3] === $NFKCc3');
 163+ assert('$col[3] === $NFKCc4');
 164+ assert('$col[3] === $NFKCc5');
 165+
 166+ # c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
 167+ assert('$col[4] === $NFKDc1');
 168+ assert('$col[4] === $NFKDc2');
 169+ assert('$col[4] === $NFKDc3');
 170+ assert('$col[4] === $NFKDc4');
 171+ assert('$col[4] === $NFKDc5');
 172+ }
 173+}
 174+echo "done.\n";
 175+
 176+// Compare against http://en.wikipedia.org/wiki/UTF-8#Description
 177+function unichr($c) {
 178+ if ($c <= 0x7F) {
 179+ return chr($c);
 180+ } else if ($c <= 0x7FF) {
 181+ return chr(0xC0 | $c >> 6) . chr(0x80 | $c & 0x3F);
 182+ } else if ($c <= 0xFFFF) {
 183+ return chr(0xE0 | $c >> 12) . chr(0x80 | $c >> 6 & 0x3F)
 184+ . chr(0x80 | $c & 0x3F);
 185+ } else if ($c <= 0x10FFFF) {
 186+ return chr(0xF0 | $c >> 18) . chr(0x80 | $c >> 12 & 0x3F)
 187+ . chr(0x80 | $c >> 6 & 0x3F)
 188+ . chr(0x80 | $c & 0x3F);
 189+ } else {
 190+ return false;
 191+ }
 192+}
 193+
 194+function unistr($c) {
 195+ return implode("", array_map("unichr", array_map("hexdec", explode(" ", $c))));
 196+}
 197+
 198+function getRow( $f ) {
 199+ global $comment, $sep;
 200+
 201+ $row = fgets( $f );
 202+ if( $row === false ) return false;
 203+ $row = rtrim($row);
 204+ $pos = strpos( $row, $comment );
 205+ $pos2 = strpos( $row, ")" );
 206+ if( $pos === 0 ) return array($row);
 207+ $c = "";
 208+
 209+ if( $pos ) {
 210+ if($pos2) $c = substr( $row, $pos2 + 2 );
 211+ else $c = substr( $row, $pos );
 212+ $row = substr( $row, 0, $pos );
 213+ }
 214+
 215+ $ret = array();
 216+ foreach(explode( $sep, $row ) as $ent) {
 217+ if(trim($ent) !== "") {
 218+ $ret[] = unistr($ent);
 219+ }
 220+ }
 221+ $ret[] = $c;
 222+
 223+ return $ret;
 224+}
\ No newline at end of file
Property changes on: trunk/phase3/includes/normal/UtfNormalTest2.php
___________________________________________________________________
Added: svn:eol-syle
1225 + native

Follow-up revisions

RevisionCommit summaryAuthorDate
r69891Follow up r69643. Remove unused global.platonides18:26, 25 July 2010

Status & tagging log