r33490 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r33489‎ | r33490 | r33491 >
Date:16:09, 17 April 2008
Author:tstarling
Status:old
Tags:
Comment:
Basic interface to the ICU text transformation/transliteration service.
Modified paths:
  • /trunk/extensions/transliterate (added) (history)
  • /trunk/extensions/transliterate/CREDITS (added) (history)
  • /trunk/extensions/transliterate/README (added) (history)
  • /trunk/extensions/transliterate/config.m4 (added) (history)
  • /trunk/extensions/transliterate/php_transliterate.h (added) (history)
  • /trunk/extensions/transliterate/tests (added) (history)
  • /trunk/extensions/transliterate/tests/001.phpt (added) (history)
  • /trunk/extensions/transliterate/transliterate.cpp (added) (history)

Diff [purge]

Index: trunk/extensions/transliterate/tests/001.phpt
@@ -0,0 +1,10 @@
 2+--TEST--
 3+Basic transliterate extension check
 4+--SKIPIF--
 5+<?php if (!extension_loaded("transliterate")) print "skip"; ?>
 6+--FILE--
 7+<?php
 8+echo transliterate_with_id('Any-Latin','ウィキペディア');
 9+?>
 10+--EXPECT--
 11+u~ikipedia
Property changes on: trunk/extensions/transliterate/tests/001.phpt
___________________________________________________________________
Added: svn:eol-style
112 + native
Index: trunk/extensions/transliterate/transliterate.cpp
@@ -0,0 +1,151 @@
 2+
 3+extern "C" {
 4+#ifdef HAVE_CONFIG_H
 5+#include "config.h"
 6+#endif
 7+
 8+#include "php.h"
 9+#include "php_ini.h"
 10+#include "ext/standard/info.h"
 11+#include "php_transliterate.h"
 12+}
 13+
 14+#include <unicode/translit.h>
 15+
 16+extern "C" {
 17+
 18+/* True global resources - no need for thread safety here */
 19+static int le_transliterate;
 20+
 21+/* {{{ transliterate_functions[]
 22+ */
 23+zend_function_entry transliterate_functions[] = {
 24+ PHP_FE(transliterate_with_id, NULL)
 25+ /*PHP_FE(transliterate_with_rules, NULL)*/
 26+ {NULL, NULL, NULL}
 27+};
 28+/* }}} */
 29+
 30+/* {{{ transliterate_module_entry
 31+ */
 32+zend_module_entry transliterate_module_entry = {
 33+#if ZEND_MODULE_API_NO >= 20010901
 34+ STANDARD_MODULE_HEADER,
 35+#endif
 36+ "transliterate",
 37+ transliterate_functions,
 38+ PHP_MINIT(transliterate),
 39+ PHP_MSHUTDOWN(transliterate),
 40+ NULL, /* RINIT */
 41+ NULL, /* RSHUTDOWN */
 42+ PHP_MINFO(transliterate),
 43+#if ZEND_MODULE_API_NO >= 20010901
 44+ "0.1", /* Version */
 45+#endif
 46+ STANDARD_MODULE_PROPERTIES
 47+};
 48+/* }}} */
 49+
 50+#ifdef COMPILE_DL_TRANSLITERATE
 51+ZEND_GET_MODULE(transliterate)
 52+#endif
 53+
 54+/* }}} */
 55+
 56+/* {{{ PHP_MINIT_FUNCTION
 57+ */
 58+PHP_MINIT_FUNCTION(transliterate)
 59+{
 60+ return SUCCESS;
 61+}
 62+/* }}} */
 63+
 64+/* {{{ PHP_MSHUTDOWN_FUNCTION
 65+ */
 66+PHP_MSHUTDOWN_FUNCTION(transliterate)
 67+{
 68+ return SUCCESS;
 69+}
 70+/* }}} */
 71+
 72+/* {{{ PHP_MINFO_FUNCTION
 73+ */
 74+PHP_MINFO_FUNCTION(transliterate)
 75+{
 76+ php_info_print_table_start();
 77+ php_info_print_table_row(2, "ICU transliteration support", "enabled");
 78+ php_info_print_table_end();
 79+}
 80+/* }}} */
 81+
 82+
 83+/* {{{ proto string transliterate_with_id(string transID, string source)
 84+ Transliterate with a given ICU transform ID */
 85+PHP_FUNCTION(transliterate_with_id)
 86+{
 87+ char *transID = NULL, *source = NULL, *output;
 88+ int transIDLength, sourceLength, tempLength, outputLength;
 89+
 90+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss",
 91+ &transID, &transIDLength, &source, &sourceLength) == FAILURE)
 92+ {
 93+ RETURN_FALSE;
 94+ }
 95+
 96+ try {
 97+ /* Open the transliterator */
 98+ UErrorCode error;
 99+ UParseError parseError;
 100+ UnicodeString uTransID(transID, transIDLength, "UTF-8");
 101+ Transliterator * trans = Transliterator::createInstance(
 102+ transID, UTRANS_FORWARD, parseError, error);
 103+ if (U_FAILURE(error)) {
 104+ if (error == U_INVALID_ID) {
 105+ php_error(E_WARNING, "transliterate_with_id: Invalid transliterator ID");
 106+ } else {
 107+ php_error(E_WARNING, "transliterate_with_id: Transliterator::createInstance returned %s",
 108+ u_errorName(error));
 109+ }
 110+ delete trans;
 111+ RETURN_FALSE;
 112+ }
 113+
 114+ /* Convert the string */
 115+ UnicodeString buffer(source, sourceLength, "UTF-8");
 116+ trans->transliterate(buffer);
 117+
 118+ delete trans;
 119+
 120+ /* Write it out to an emalloc'd buffer */
 121+ tempLength = buffer.length() + 1;
 122+ if (tempLength <= 0) {
 123+ php_error(E_WARNING, "transliterate_with_id: output buffer too large (>2GB)");
 124+ RETURN_FALSE;
 125+ }
 126+ output = (char*)emalloc(tempLength);
 127+ outputLength = buffer.extract(0, buffer.length(), output, tempLength, "UTF-8");
 128+
 129+ /* If the buffer wasn't big enough, expand it to the correct size and try again */
 130+ if (outputLength > tempLength) {
 131+ output = (char*)erealloc(output, outputLength + 1);
 132+ buffer.extract(0, buffer.length(), output, outputLength + 1, "UTF-8");
 133+ }
 134+
 135+ RETURN_STRINGL(output, outputLength, 0);
 136+ } catch (...) {
 137+ }
 138+ php_error(E_WARNING, "transliterate_with_id: unexpected C++ exception");
 139+ RETURN_FALSE;
 140+}
 141+/* }}} */
 142+
 143+} // end extern "C"
 144+
 145+/*
 146+ * Local variables:
 147+ * tab-width: 4
 148+ * c-basic-offset: 4
 149+ * End:
 150+ * vim600: noet sw=4 ts=4 fdm=marker
 151+ * vim<600: noet sw=4 ts=4
 152+ */
Property changes on: trunk/extensions/transliterate/transliterate.cpp
___________________________________________________________________
Added: svn:eol-style
1153 + native
Index: trunk/extensions/transliterate/config.m4
@@ -0,0 +1,56 @@
 2+dnl $Id$
 3+dnl config.m4 for extension transliterate
 4+
 5+dnl Comments in this file start with the string 'dnl'.
 6+dnl Remove where necessary. This file will not work
 7+dnl without editing.
 8+
 9+PHP_ARG_WITH(transliterate, for ICU transliteration support,
 10+[ --with-transliterate Include ICU transliteration support])
 11+
 12+if test "$PHP_TRANSLITERATE" != "no"; then
 13+ dnl Write more examples of tests here...
 14+
 15+ dnl # --with-transliterate -> check with-path
 16+ SEARCH_PATH="/usr/local /usr"
 17+ SEARCH_FOR="/include/unicode/translit.h"
 18+ if test -r $PHP_TRANSLITERATE/$SEARCH_FOR; then # path given as parameter
 19+ ICU_DIR=$PHP_TRANSLITERATE
 20+ else # search default path list
 21+ AC_MSG_CHECKING([for ICU files in default path])
 22+ for i in $SEARCH_PATH ; do
 23+ if test -r $i/$SEARCH_FOR; then
 24+ ICU_DIR=$i
 25+ AC_MSG_RESULT(found in $i)
 26+ fi
 27+ done
 28+ fi
 29+
 30+ if test -z "$ICU_DIR"; then
 31+ AC_MSG_RESULT([not found])
 32+ AC_MSG_ERROR([Please reinstall the ICU header files])
 33+ fi
 34+
 35+ dnl # --with-transliterate -> add include path
 36+ PHP_ADD_INCLUDE($ICU_DIR/include)
 37+
 38+ dnl # --with-transliterate -> check for lib and symbol presence
 39+ LIBNAME=icuuc
 40+ LIBSYMBOL=_ZN7icu_3_613UnicodeStringC1EPKciS2_
 41+
 42+ PHP_CHECK_LIBRARY($LIBNAME,$LIBSYMBOL,
 43+ [
 44+ PHP_ADD_LIBRARY_WITH_PATH(icuuc, $ICU_DIR/lib, TRANSLITERATE_SHARED_LIBADD)
 45+ PHP_ADD_LIBRARY_WITH_PATH(icui18n, $ICU_DIR/lib, TRANSLITERATE_SHARED_LIBADD)
 46+ PHP_ADD_LIBRARY_WITH_PATH(icudata, $ICU_DIR/lib, TRANSLITERATE_SHARED_LIBADD)
 47+ AC_DEFINE(HAVE_TRANSLITERATELIB,1,[ ])
 48+ ],[
 49+ AC_MSG_ERROR([wrong ICU lib version or lib not found])
 50+ ],[
 51+ -L$ICU_DIR/lib -ldl
 52+ ])
 53+
 54+ PHP_SUBST(TRANSLITERATE_SHARED_LIBADD)
 55+
 56+ PHP_NEW_EXTENSION(transliterate, transliterate.cpp, $ext_shared)
 57+fi
Property changes on: trunk/extensions/transliterate/config.m4
___________________________________________________________________
Added: svn:eol-style
158 + native
Index: trunk/extensions/transliterate/CREDITS
@@ -0,0 +1,2 @@
 2+transliterate
 3+Tim Starling
Property changes on: trunk/extensions/transliterate/CREDITS
___________________________________________________________________
Added: svn:eol-style
14 + native
Index: trunk/extensions/transliterate/README
@@ -0,0 +1,15 @@
 2+This is an interface to the ICU text transformation/transliteration service.
 3+
 4+For some reason some PHP installations need a CXX environment variable to
 5+configure properly:
 6+
 7+CXX=g++ ./configure
 8+
 9+Not sure if that's my fault or not.
 10+
 11+Typical usage:
 12+
 13+ $latin = transliterate_with_id('Any-Latin', $foreign);
 14+
 15+Observed timing for is ~30ms on first call and ~3ms thereafter, plus ~1.5us per
 16+character.
Property changes on: trunk/extensions/transliterate/README
___________________________________________________________________
Added: svn:eol-style
117 + native
Index: trunk/extensions/transliterate/php_transliterate.h
@@ -0,0 +1,53 @@
 2+/*
 3+ +----------------------------------------------------------------------+
 4+ | PHP Version 5 |
 5+ +----------------------------------------------------------------------+
 6+ | Copyright (c) 1997-2007 The PHP Group |
 7+ +----------------------------------------------------------------------+
 8+ | This source file is subject to version 3.01 of the PHP license, |
 9+ | that is bundled with this package in the file LICENSE, and is |
 10+ | available through the world-wide-web at the following url: |
 11+ | http://www.php.net/license/3_01.txt |
 12+ | If you did not receive a copy of the PHP license and are unable to |
 13+ | obtain it through the world-wide-web, please send a note to |
 14+ | license@php.net so we can mail you a copy immediately. |
 15+ +----------------------------------------------------------------------+
 16+ | Author: |
 17+ +----------------------------------------------------------------------+
 18+*/
 19+
 20+/* $Id: header,v 1.16.2.1.2.1 2007/01/01 19:32:09 iliaa Exp $ */
 21+
 22+#ifndef PHP_TRANSLITERATE_H
 23+#define PHP_TRANSLITERATE_H
 24+
 25+extern zend_module_entry transliterate_module_entry;
 26+#define phpext_transliterate_ptr &transliterate_module_entry
 27+
 28+#ifdef PHP_WIN32
 29+#define PHP_TRANSLITERATE_API __declspec(dllexport)
 30+#else
 31+#define PHP_TRANSLITERATE_API
 32+#endif
 33+
 34+#ifdef ZTS
 35+#include "TSRM.h"
 36+#endif
 37+
 38+PHP_MINIT_FUNCTION(transliterate);
 39+PHP_MSHUTDOWN_FUNCTION(transliterate);
 40+PHP_MINFO_FUNCTION(transliterate);
 41+
 42+PHP_FUNCTION(transliterate_with_id);
 43+
 44+#endif /* PHP_TRANSLITERATE_H */
 45+
 46+
 47+/*
 48+ * Local variables:
 49+ * tab-width: 4
 50+ * c-basic-offset: 4
 51+ * End:
 52+ * vim600: noet sw=4 ts=4 fdm=marker
 53+ * vim<600: noet sw=4 ts=4
 54+ */
Property changes on: trunk/extensions/transliterate/php_transliterate.h
___________________________________________________________________
Added: svn:eol-style
155 + native

Status & tagging log