Index: trunk/phase3/includes/normal/UtfNormalMemStress.php |
— | — | @@ -0,0 +1,109 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Approximate benchmark for some basic operations. |
| 5 | + * Runs large chunks of text through cleanup with a lowish memory limit, |
| 6 | + * to test regression on mem usage (bug 28146) |
| 7 | + * |
| 8 | + * Copyright © 2004-2011 Brion Vibber <brion@wikimedia.org> |
| 9 | + * http://www.mediawiki.org/ |
| 10 | + * |
| 11 | + * This program is free software; you can redistribute it and/or modify |
| 12 | + * it under the terms of the GNU General Public License as published by |
| 13 | + * the Free Software Foundation; either version 2 of the License, or |
| 14 | + * (at your option) any later version. |
| 15 | + * |
| 16 | + * This program is distributed in the hope that it will be useful, |
| 17 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 19 | + * GNU General Public License for more details. |
| 20 | + * |
| 21 | + * You should have received a copy of the GNU General Public License along |
| 22 | + * with this program; if not, write to the Free Software Foundation, Inc., |
| 23 | + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 24 | + * http://www.gnu.org/copyleft/gpl.html |
| 25 | + * |
| 26 | + * @file |
| 27 | + * @ingroup UtfNormal |
| 28 | + */ |
| 29 | + |
| 30 | +if( isset( $_SERVER['argv'] ) && in_array( '--icu', $_SERVER['argv'] ) ) { |
| 31 | + dl( 'php_utfnormal.so' ); |
| 32 | +} |
| 33 | + |
| 34 | +require_once 'UtfNormalUtil.php'; |
| 35 | +require_once 'UtfNormal.php'; |
| 36 | + |
| 37 | +define( 'BENCH_CYCLES', 1 ); |
| 38 | +define( 'BIGSIZE', 1024 * 1024 * 10); // 10m |
| 39 | +ini_set('memory_limit', BIGSIZE + 120 * 1024 * 1024); |
| 40 | + |
| 41 | +if( php_sapi_name() != 'cli' ) { |
| 42 | + die( "Run me from the command line please.\n" ); |
| 43 | +} |
| 44 | + |
| 45 | +$testfiles = array( |
| 46 | + 'testdata/washington.txt' => 'English text', |
| 47 | + 'testdata/berlin.txt' => 'German text', |
| 48 | + 'testdata/bulgakov.txt' => 'Russian text', |
| 49 | + 'testdata/tokyo.txt' => 'Japanese text', |
| 50 | + 'testdata/young.txt' => 'Korean text' |
| 51 | +); |
| 52 | +$normalizer = new UtfNormal; |
| 53 | +UtfNormal::loadData(); |
| 54 | +foreach( $testfiles as $file => $desc ) { |
| 55 | + benchmarkTest( $normalizer, $file, $desc ); |
| 56 | +} |
| 57 | + |
| 58 | +# ------- |
| 59 | + |
| 60 | +function benchmarkTest( &$u, $filename, $desc ) { |
| 61 | + print "Testing $filename ($desc)...\n"; |
| 62 | + $data = file_get_contents( $filename ); |
| 63 | + $all = $data; |
| 64 | + while (strlen($all) < BIGSIZE) { |
| 65 | + $all .= $all; |
| 66 | + } |
| 67 | + $data = $all; |
| 68 | + echo "Data is " . strlen($data) . " bytes.\n"; |
| 69 | + $forms = array( |
| 70 | + 'quickIsNFCVerify', |
| 71 | + 'cleanUp', |
| 72 | + ); |
| 73 | + foreach( $forms as $form ) { |
| 74 | + if( is_array( $form ) ) { |
| 75 | + $str = $data; |
| 76 | + foreach( $form as $step ) { |
| 77 | + $str = benchmarkForm( $u, $str, $step ); |
| 78 | + } |
| 79 | + } else { |
| 80 | + benchmarkForm( $u, $data, $form ); |
| 81 | + } |
| 82 | + } |
| 83 | +} |
| 84 | + |
| 85 | +function benchTime(){ |
| 86 | + $st = explode( ' ', microtime() ); |
| 87 | + return (float)$st[0] + (float)$st[1]; |
| 88 | +} |
| 89 | + |
| 90 | +function benchmarkForm( &$u, &$data, $form ) { |
| 91 | + #$start = benchTime(); |
| 92 | + for( $i = 0; $i < BENCH_CYCLES; $i++ ) { |
| 93 | + $start = benchTime(); |
| 94 | + $out = $u->$form( $data, UtfNormal::$utfCanonicalDecomp ); |
| 95 | + $deltas[] = (benchTime() - $start); |
| 96 | + } |
| 97 | + #$delta = (benchTime() - $start) / BENCH_CYCLES; |
| 98 | + sort( $deltas ); |
| 99 | + $delta = $deltas[0]; # Take shortest time |
| 100 | + |
| 101 | + $rate = intval( strlen( $data ) / $delta ); |
| 102 | + $same = (0 == strcmp( $data, $out ) ); |
| 103 | + |
| 104 | + printf( " %20s %6.1fms %12s bytes/s (%s)\n", |
| 105 | + $form, |
| 106 | + $delta*1000.0, |
| 107 | + number_format( $rate ), |
| 108 | + ($same ? 'no change' : 'changed' ) ); |
| 109 | + return $out; |
| 110 | +} |