r114650 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r114649‎ | r114650 | r114651 >
Date:08:53, 2 April 2012
Author:vvv
Status:deferred
Tags:
Comment:
Add the initial implementation of the Unicode support.
Modified paths:
  • /trunk/php/luasandbox/config.m4 (modified) (history)
  • /trunk/php/luasandbox/data_conversion.c (modified) (history)
  • /trunk/php/luasandbox/library.c (modified) (history)
  • /trunk/php/luasandbox/luasandbox_unicode.h (added) (history)
  • /trunk/php/luasandbox/m4/ac_check_icu.m4 (added) (history)
  • /trunk/php/luasandbox/ustring.c (added) (history)

Diff [purge]

Index: trunk/php/luasandbox/ustring.c
@@ -0,0 +1,1038 @@
 2+#ifdef HAVE_CONFIG_H
 3+#include "config.h"
 4+#endif
 5+
 6+#include <lua.h>
 7+#include <lauxlib.h>
 8+
 9+#include <unicode/utf.h>
 10+#include <unicode/uchar.h>
 11+#include <unicode/ustring.h>
 12+
 13+#include "php.h"
 14+#include "php_luasandbox.h"
 15+#include "luasandbox_unicode.h"
 16+
 17+#define LUASANDBOX_CHECK_ICU_ERROR(errorCode, cleanupCode) { \
 18+ if( U_FAILURE(errorCode) ) { \
 19+ char _luasandbox_errmsg[1024]; \
 20+ snprintf( _luasandbox_errmsg, 1024, "Unicode handling error: %s", u_errorName(errorCode) ); \
 21+ lua_pushstring( L, _luasandbox_errmsg ); \
 22+ cleanupCode; \
 23+ lua_error(L); \
 24+ } \
 25+ errorCode = U_ZERO_ERROR; \
 26+ }
 27+
 28+/****************** Prototypes ******************/
 29+
 30+int luasandbox_ustr_create(lua_State * L);
 31+int luasandbox_ustr_len(lua_State * L);
 32+int luasandbox_ustr_concat(lua_State * L);
 33+int luasandbox_ustr_eq(lua_State * L);
 34+int luasandbox_ustr_index(lua_State * L);
 35+
 36+int luasandbox_ustr_ucfirst(lua_State * L);
 37+int luasandbox_ustr_uc(lua_State * L);
 38+int luasandbox_ustr_lc(lua_State * L);
 39+int luasandbox_ustr_tc(lua_State * L);
 40+int luasandbox_ustr_trim(lua_State * L);
 41+int luasandbox_ustr_sub(lua_State * L);
 42+int luasandbox_ustr_pos(lua_State * L);
 43+int luasandbox_ustr_replace(lua_State * L);
 44+int luasandbox_ustr_split(lua_State * L);
 45+
 46+/****************** Registration of functions ******************/
 47+
 48+static luaL_Reg luasandbox_ustr_functions[] = {
 49+ { "len", luasandbox_ustr_len },
 50+ { "ucfirst", luasandbox_ustr_ucfirst },
 51+ { "uc", luasandbox_ustr_uc },
 52+ { "lc", luasandbox_ustr_lc },
 53+ { "tc", luasandbox_ustr_tc },
 54+ { "trim", luasandbox_ustr_trim },
 55+ { "sub", luasandbox_ustr_sub },
 56+ { "pos", luasandbox_ustr_pos },
 57+ { "replace", luasandbox_ustr_replace },
 58+ { "split", luasandbox_ustr_split },
 59+ NULL
 60+};
 61+
 62+/** {{{ luasandbox_install_unicode_functions
 63+ *
 64+ * Installs the unicode module into the global namespace.
 65+ */
 66+void luasandbox_install_unicode_functions(lua_State * L)
 67+{
 68+ luaL_newmetatable( L, "luasandbox_ustr" );
 69+
 70+ lua_pushstring( L, "__len" );
 71+ lua_pushcfunction( L, luasandbox_ustr_len );
 72+ lua_rawset( L, -3 );
 73+
 74+ lua_pushstring( L, "__concat" );
 75+ lua_pushcfunction( L, luasandbox_ustr_concat );
 76+ lua_rawset( L, -3 );
 77+
 78+ lua_pushstring( L, "__eq" );
 79+ lua_pushcfunction( L, luasandbox_ustr_eq );
 80+ lua_rawset( L, -3 );
 81+
 82+ lua_pushstring( L, "__index" );
 83+ lua_pushcfunction( L, luasandbox_ustr_index );
 84+ lua_rawset( L, -3 );
 85+
 86+ lua_pushcfunction( L, luasandbox_ustr_create );
 87+ lua_setglobal( L, "u" );
 88+
 89+ luaL_register( L, "ustring", luasandbox_ustr_functions );
 90+}
 91+/* }}} */
 92+
 93+/****************** Common functions ******************/
 94+
 95+/** {{{ luasandbox_init_ustr
 96+ *
 97+ * Initializes a ustring header and assigns the metatable to it.
 98+ */
 99+luasandbox_ustr_header *luasandbox_init_ustr(lua_State * L, size_t len)
 100+{
 101+ luasandbox_ustr_header *result;
 102+
 103+ result = (luasandbox_ustr_header*) lua_newuserdata( L, sizeof(luasandbox_ustr_header) + len );
 104+ result->raw_len = len;
 105+
 106+ luaL_getmetatable( L, "luasandbox_ustr" );
 107+ lua_setmetatable( L, -2 );
 108+
 109+ return result;
 110+}
 111+/* }}} */
 112+
 113+/** {{{ luasandbox_push_ustr
 114+ *
 115+ * Constructs the ustring object from a UTF-8 string. Validates the string and
 116+ * raises an error if the string is invalid.
 117+ */
 118+luasandbox_ustr_header *luasandbox_push_ustr(lua_State * L, uint8_t *str, size_t len)
 119+{
 120+ luasandbox_ustr_header *header;
 121+ int32_t i, cp_len;
 122+
 123+ // Validate the string + calculate length
 124+ for( i = cp_len = 0; i < len; cp_len++ ) {
 125+ UChar32 cur;
 126+
 127+ U8_NEXT( str, i, len, cur );
 128+ if( cur < 0 ) {
 129+ lua_pushstring( L, "Invalid UTF-8 supplied" );
 130+ lua_error( L );
 131+ }
 132+ }
 133+
 134+ header = luasandbox_init_ustr( L, len );
 135+ header->cp_len = cp_len;
 136+ memcpy( LUASANDBOX_USTR_RAW(header), str, len );
 137+
 138+ return header;
 139+}
 140+/* }}} */
 141+
 142+/** {{{ luasandbox_isustr
 143+ *
 144+ * Checks if the the object on the stack is a ustring.
 145+ */
 146+int luasandbox_isustr(lua_State * L, int idx)
 147+{
 148+ int result;
 149+
 150+ if( lua_type( L, idx ) != LUA_TUSERDATA )
 151+ return FALSE;
 152+
 153+ if( !lua_getmetatable( L, idx ) )
 154+ return FALSE;
 155+
 156+ luaL_getmetatable( L, "luasandbox_ustr" );
 157+
 158+ result = lua_equal( L, -1, -2 );
 159+ lua_pop( L, 2 );
 160+ return result;
 161+}
 162+/* }}} */
 163+
 164+/** {{{ luasandbox_checkustring
 165+ *
 166+ * Checks whether the specified object on the stack is a ustring
 167+ * or an object which may be converted to it. Returns the pointer
 168+ * to the ustring's header.
 169+ */
 170+luasandbox_ustr_header* luasandbox_checkustring(lua_State * L, int idx)
 171+{
 172+ if ( lua_type( L, idx ) == LUA_TSTRING || lua_type( L, idx ) == LUA_TNUMBER ) {
 173+ // A usual string. Magically convert it to ustring.
 174+ lua_checkstack( L, 2 );
 175+ lua_pushvalue( L, idx );
 176+ luasandbox_ustr_create(L);
 177+ lua_replace( L, idx );
 178+ lua_pop( L, 1 );
 179+ }
 180+
 181+ return luaL_checkudata( L, idx, "luasandbox_ustr" );
 182+}
 183+/* }}} */
 184+
 185+/** {{{ luasandbox_checkustring
 186+ *
 187+ * Returns the pointer to the string itself and sets raw_len
 188+ * to the length of string in bytes.
 189+ */
 190+const uint8_t* luasandbox_getustr(lua_State * L, int idx, size_t* raw_len)
 191+{
 192+ luasandbox_ustr_header *header;
 193+ header = luasandbox_checkustring( L, idx );
 194+ *raw_len = header->raw_len;
 195+ return LUASANDBOX_USTR_RAW(header);
 196+}
 197+/* }}} */
 198+
 199+/** {{{ luasandbox_ustr_index_to_offset
 200+ *
 201+ * Converts a Lua index (starting with 1) to a C offset (starting with 0).
 202+ * Handles negative indexes as indexes numbered from the end of the string.
 203+ */
 204+int32_t luasandbox_ustr_index_to_offset(lua_State * L, luasandbox_ustr_header *str, int32_t idx, int check_limits)
 205+{
 206+ if( !idx || check_limits && (idx > str->cp_len || -idx > str->cp_len) ) {
 207+ lua_pushfstring( L, "Trying to access invalid index %d for string with length %d", idx, str->cp_len );
 208+ lua_error( L );
 209+ }
 210+
 211+ if( idx > 0 ) {
 212+ return idx - 1;
 213+ } else {
 214+ return str->cp_len + idx;
 215+ }
 216+}
 217+/* }}} */
 218+
 219+/****************** Conversions ******************/
 220+
 221+/** {{{ luasandbox_convert_toUTF16
 222+ *
 223+ * Converts the specified ustring to UTF-16, and pushes
 224+ * the resulting UTF-16 string on the top of the stack.
 225+ */
 226+void luasandbox_convert_toUTF16(lua_State * L, int idx)
 227+{
 228+ luasandbox_ustr_header *header;
 229+ UChar *utf16_string;
 230+ int32_t result_len;
 231+ UErrorCode error_code = U_ZERO_ERROR;
 232+
 233+ header = luasandbox_checkustring( L, idx );
 234+
 235+ utf16_string = emalloc( header->raw_len * 2 );
 236+ u_strFromUTF8( utf16_string, header->raw_len, &result_len,
 237+ LUASANDBOX_USTR_RAW(header), header->raw_len, &error_code );
 238+ LUASANDBOX_CHECK_ICU_ERROR( error_code, efree( utf16_string ) );
 239+
 240+ lua_pushlstring( L, (char*)utf16_string, result_len * 2 );
 241+ efree( utf16_string );
 242+}
 243+/* }}} */
 244+
 245+/** {{{ luasandbox_convert_fromUTF16
 246+ *
 247+ * Converts the specified UTF-16 string to UTF-8, and pushes
 248+ * the resulting ustring on the top of the stack.
 249+ */
 250+void luasandbox_convert_fromUTF16(lua_State * L, int idx)
 251+{
 252+ luasandbox_ustr_header *header;
 253+ uint8_t *utf8_string;
 254+ UChar *utf16_string;
 255+ size_t orig_len;
 256+ int32_t result_len;
 257+ UErrorCode error_code = U_ZERO_ERROR;
 258+
 259+ utf16_string = (UChar*) lua_tolstring( L, idx, &orig_len );
 260+
 261+ utf8_string = emalloc( orig_len );
 262+ u_strToUTF8( utf8_string, orig_len, &result_len,
 263+ utf16_string, orig_len / 2, &error_code );
 264+ LUASANDBOX_CHECK_ICU_ERROR( error_code, efree( utf8_string ) );
 265+
 266+ luasandbox_push_ustr( L, utf8_string, result_len );
 267+ efree( utf8_string );
 268+}
 269+/* }}} */
 270+
 271+/****************** Operators ******************/
 272+
 273+/** {{{ luasandbox_ustr_create
 274+ *
 275+ * Initializes the Unicode string from the string on the top of the stack.
 276+ */
 277+int luasandbox_ustr_create(lua_State * L)
 278+{
 279+ uint8_t *str;
 280+ size_t raw_len = 0;
 281+
 282+ str = luaL_checklstring( L, -1, &raw_len );
 283+ luasandbox_push_ustr( L, str, raw_len );
 284+ return 1;
 285+}
 286+/* }}} */
 287+
 288+/** {{{ luasandbox_ustr_len
 289+ *
 290+ * Lua function providing the length of the string.
 291+ */
 292+int luasandbox_ustr_len(lua_State * L)
 293+{
 294+ luasandbox_ustr_header *header;
 295+
 296+ header = luaL_checkudata( L, 1, "luasandbox_ustr" );
 297+
 298+ lua_pushinteger( L, header->cp_len );
 299+ return 1;
 300+}
 301+/* }}} */
 302+
 303+/** {{{ luasandbox_ustr_concat
 304+ *
 305+ * Lua function handling the concatention operator.
 306+ */
 307+int luasandbox_ustr_concat(lua_State * L)
 308+{
 309+ luasandbox_ustr_header *s1, *s2, *newhdr;
 310+ int32_t new_len;
 311+ void* newstr;
 312+
 313+ s1 = luasandbox_checkustring( L, 1 );
 314+ s2 = luasandbox_checkustring( L, 2 );
 315+
 316+ new_len = s1->raw_len + s2->raw_len;
 317+ newhdr = luasandbox_init_ustr( L, new_len );
 318+ newhdr->cp_len = s1->cp_len + s2->cp_len;
 319+ newstr = LUASANDBOX_USTR_RAW(newhdr);
 320+ memcpy( newstr, LUASANDBOX_USTR_RAW(s1), s1->raw_len );
 321+ memcpy( newstr + s1->raw_len, LUASANDBOX_USTR_RAW(s2), s2->raw_len );
 322+
 323+ return 1;
 324+}
 325+/* }}} */
 326+
 327+/** {{{ luasandbox_ustr_eq
 328+ *
 329+ * Lua function providing the equality operator.
 330+ */
 331+int luasandbox_ustr_eq(lua_State * L)
 332+{
 333+ luasandbox_ustr_header *s1, *s2;
 334+
 335+ s1 = luasandbox_checkustring( L, 1 );
 336+ s2 = luasandbox_checkustring( L, 2 );
 337+
 338+ if( s1->cp_len != s2->cp_len || s1->raw_len != s2->raw_len ) {
 339+ lua_pushboolean( L, FALSE );
 340+ return 1;
 341+ }
 342+
 343+ lua_pushboolean( L, !memcmp( LUASANDBOX_USTR_RAW(s1), LUASANDBOX_USTR_RAW(s2), s1->raw_len ) );
 344+ return 1;
 345+}
 346+/* }}} */
 347+
 348+/** {{{ luasandbox_ustr_index
 349+ *
 350+ * Lua function providing the index operator.
 351+ * Provides access both to class methods and
 352+ * per-position access to string characters.
 353+ */
 354+int luasandbox_ustr_index(lua_State * L)
 355+{
 356+ luasandbox_ustr_header *str;
 357+ uint8_t *raw;
 358+
 359+ str = luaL_checkudata( L, 1, "luasandbox_ustr" );
 360+ raw = LUASANDBOX_USTR_RAW(str);
 361+
 362+ if( lua_type( L, 2 ) == LUA_TNUMBER ) {
 363+ // If it is a number, treat as accessing string by position
 364+ int32_t i, idx, curidx, offset;
 365+ uint8_t* result_pos;
 366+ UChar32 cur, result;
 367+
 368+ idx = lua_tointeger( L, 2 );
 369+ offset = luasandbox_ustr_index_to_offset( L, str, idx, TRUE );
 370+
 371+ for( i = curidx = 0; ; curidx++ ) {
 372+ UChar32 tmp;
 373+
 374+ U8_GET_UNSAFE( raw, i, result );
 375+ if( curidx == offset ) {
 376+ result_pos = raw + i;
 377+ break;
 378+ }
 379+ U8_NEXT_UNSAFE( raw, i, tmp );
 380+ }
 381+
 382+ lua_pushlstring( L, result_pos, U8_LENGTH( result ) );
 383+ return 1;
 384+ } else {
 385+ // Otherwise treat it as an access to member functions
 386+ lua_getglobal( L, "ustring" );
 387+ lua_pushvalue( L, 2 );
 388+ lua_gettable( L, -2 );
 389+ return 1;
 390+ }
 391+}
 392+/* }}} */
 393+
 394+/****************** Library ******************/
 395+
 396+/** {{{ luasandbox_ustr_ucfirst
 397+ *
 398+ * Lua function:
 399+ * ustring ucfirst( ustring str )
 400+ * Converts the first code point of str to upper case.
 401+ */
 402+int luasandbox_ustr_ucfirst(lua_State * L)
 403+{
 404+ luasandbox_ustr_header *header;
 405+ uint8_t *utf_string;
 406+ size_t raw_len;
 407+ UChar32 first, newfirst;
 408+ int offset = 0;
 409+
 410+ header = luaL_checkudata( L, 1, "luasandbox_ustr" );
 411+ utf_string = LUASANDBOX_USTR_RAW( header );
 412+ raw_len = header->raw_len;
 413+
 414+ if( !raw_len ) {
 415+ lua_pushstring( L, "" );
 416+ return 1;
 417+ }
 418+
 419+ U8_GET_UNSAFE( utf_string, 0, first );
 420+
 421+ newfirst = u_toupper( first );
 422+
 423+ // The actions depend upon whether the lengths of symbol match
 424+ if( U8_LENGTH(first) == U8_LENGTH(newfirst) ) {
 425+ // Just replace the symbol
 426+ luasandbox_ustr_header *newstr;
 427+ uint8_t *result;
 428+
 429+ newstr = lua_newuserdata( L, LUASANDBOX_USTR_TOTALLEN(header) );
 430+ luaL_getmetatable( L, "luasandbox_ustr" );
 431+ lua_setmetatable( L, -2 );
 432+
 433+ memcpy( newstr, header, LUASANDBOX_USTR_TOTALLEN(header) );
 434+ result = LUASANDBOX_USTR_RAW(newstr);
 435+ U8_APPEND_UNSAFE( result, offset, newfirst );
 436+ } else {
 437+ // I have tested this code in cases when len(old) < len(new),
 438+ // but I am unaware of any cases when those lengths do not match.
 439+ // It should have happened with eszett, but since capital eszett is
 440+ // considered substandard, u_toupper does not convert it.
 441+ size_t oldlen = U8_LENGTH(first),
 442+ newlen = U8_LENGTH(newfirst);
 443+ size_t delta = newlen - oldlen;
 444+
 445+ uint8_t *result;
 446+ size_t new_len;
 447+
 448+ result = emalloc( raw_len + delta );
 449+ memcpy( result + newlen, utf_string + oldlen, raw_len - oldlen );
 450+ U8_APPEND_UNSAFE( result, offset, newfirst );
 451+ new_len = raw_len + delta;
 452+
 453+ luasandbox_push_ustr( L, result, new_len );
 454+ efree( result );
 455+ }
 456+
 457+ return 1;
 458+}
 459+/* }}} */
 460+
 461+#define LUASANDBOX_UTF8_CHANGE_CASE_TOUPPER 1
 462+#define LUASANDBOX_UTF8_CHANGE_CASE_TOLOWER 2
 463+#define LUASANDBOX_UTF8_CHANGE_CASE_TOTITLE 3
 464+
 465+/** {{{ luasandbox_ustr_change_case
 466+ *
 467+ * Backend function for uc(), lc() and tc(). Converts string into UTF-16,
 468+ * passes it to ICU function and then converts back to UTF-8. This is required
 469+ * since casing algorithms are rather non-trivial and may be even locale-dependant.
 470+ */
 471+static int luasandbox_ustr_change_case(lua_State * L, int action)
 472+{
 473+ UChar *utf16_orig, *utf16_result;
 474+ size_t orig_length, x;
 475+ int32_t result_len;
 476+ UErrorCode errorCode = U_ZERO_ERROR;
 477+
 478+ luasandbox_convert_toUTF16( L, 1 );
 479+ utf16_orig = (UChar*)lua_tolstring( L, -1, &orig_length );
 480+
 481+ utf16_result = emalloc( orig_length * 2 );
 482+ switch( action ) {
 483+ case LUASANDBOX_UTF8_CHANGE_CASE_TOUPPER:
 484+ result_len = u_strToUpper( utf16_result, orig_length, utf16_orig, orig_length / 2, "", &errorCode );
 485+ break;
 486+ case LUASANDBOX_UTF8_CHANGE_CASE_TOLOWER:
 487+ result_len = u_strToLower( utf16_result, orig_length, utf16_orig, orig_length / 2, "", &errorCode );
 488+ break;
 489+ case LUASANDBOX_UTF8_CHANGE_CASE_TOTITLE:
 490+ result_len = u_strToTitle( utf16_result, orig_length, utf16_orig, orig_length / 2, NULL, "", &errorCode );
 491+ break;
 492+ }
 493+ LUASANDBOX_CHECK_ICU_ERROR( errorCode, efree(utf16_result) );
 494+ lua_pop( L, 1 ); // Pop UTF-16 string out of the stack
 495+
 496+ // Back to UTF-8
 497+ lua_pushlstring( L, utf16_result, result_len * 2 );
 498+ luasandbox_convert_fromUTF16( L, -1 );
 499+ lua_replace( L, -2 );
 500+ efree( utf16_result );
 501+
 502+ return 1;
 503+}
 504+/* }}} */
 505+
 506+int luasandbox_ustr_uc(lua_State * L)
 507+{
 508+ luasandbox_ustr_change_case( L, LUASANDBOX_UTF8_CHANGE_CASE_TOUPPER );
 509+}
 510+
 511+int luasandbox_ustr_lc(lua_State * L)
 512+{
 513+ luasandbox_ustr_change_case( L, LUASANDBOX_UTF8_CHANGE_CASE_TOLOWER );
 514+}
 515+
 516+int luasandbox_ustr_tc(lua_State * L)
 517+{
 518+ luasandbox_ustr_change_case( L, LUASANDBOX_UTF8_CHANGE_CASE_TOTITLE );
 519+}
 520+
 521+/** {{{ luasandbox_utf8_trim_lua
 522+ *
 523+ * Lua function:
 524+ * ustring trim( ustring str )
 525+ * Removes all the whitespace from the beginning and end of the string.
 526+ */
 527+int luasandbox_ustr_trim(lua_State * L)
 528+{
 529+ luasandbox_ustr_header *header, *newheader;
 530+ uint8_t *utf_string, *result;
 531+ size_t new_len;
 532+ UChar32 cur;
 533+ uint32_t i = 0, ltrim_len = 0, rtrim_len = 0, ltrim_len_cp = 0, rtrim_len_cp = 0;
 534+
 535+ header = luasandbox_checkustring( L, 1 );
 536+ utf_string = LUASANDBOX_USTR_RAW(header);
 537+
 538+ // Left side
 539+ while( i < header->raw_len ) {
 540+ U8_NEXT_UNSAFE( utf_string, i, cur );
 541+
 542+ if( u_isWhitespace( cur ) || u_isUWhiteSpace( cur ) ) {
 543+ ltrim_len = i;
 544+ ltrim_len_cp++;
 545+ } else {
 546+ break;
 547+ }
 548+ }
 549+ // Right side
 550+ while( i < header->raw_len ) {
 551+ U8_NEXT_UNSAFE( utf_string, i, cur );
 552+
 553+ if( u_isWhitespace( cur ) || u_isUWhiteSpace( cur ) ) {
 554+ rtrim_len += U8_LENGTH( cur );
 555+ rtrim_len_cp++;
 556+ } else {
 557+ rtrim_len = 0;
 558+ rtrim_len_cp = 0;
 559+ }
 560+ }
 561+
 562+ new_len = header->raw_len - ltrim_len - rtrim_len;
 563+ newheader = luasandbox_init_ustr( L, new_len );
 564+ newheader->cp_len = header->cp_len - ltrim_len_cp - rtrim_len_cp;
 565+ memcpy( LUASANDBOX_USTR_RAW(newheader), utf_string + ltrim_len, new_len );
 566+
 567+ return 1;
 568+}
 569+/* }}} */
 570+
 571+/** {{{ luasandbox_ustr_sub
 572+ *
 573+ * Lua function:
 574+ * ustring sub( ustring str, int offset[, int length] )
 575+ * Returns the substring of str. Starts from the offset,
 576+ * and returns at most length code points.
 577+ */
 578+int luasandbox_ustr_sub(lua_State * L)
 579+{
 580+ luasandbox_ustr_header *header;
 581+ uint8_t *utf_string, *result;
 582+ size_t len;
 583+
 584+ int32_t i = 0, idx = 0, target = 0, target_len;
 585+ int32_t target_start, target_end = -1;
 586+ int found = 0;
 587+ UChar32 cur;
 588+
 589+ header = luasandbox_checkustring( L, 1 );
 590+ utf_string = LUASANDBOX_USTR_RAW(header);
 591+ target = luaL_checkinteger( L, 2 );
 592+ if( lua_type( L, 3 ) == LUA_TNUMBER ) {
 593+ target_len = lua_tointeger( L, 3 );
 594+ } else {
 595+ target_len = -1;
 596+ }
 597+
 598+ target = luasandbox_ustr_index_to_offset( L, header, target, TRUE );
 599+
 600+ // Find the start symbol
 601+ while( i < header->raw_len ) {
 602+ if( idx == target ) {
 603+ found = TRUE;
 604+ break;
 605+ }
 606+
 607+ U8_NEXT_UNSAFE( utf_string, i, cur );
 608+ idx++;
 609+ }
 610+
 611+ // If start symbol index is larger than string size, return null
 612+ if( !found ) {
 613+ lua_pushstring( L, "" );
 614+ return 1;
 615+ }
 616+
 617+ target_start = i;
 618+ idx = 0;
 619+
 620+ // Find the end position
 621+ while( i < header->raw_len ) {
 622+ if( idx == target_len ) {
 623+ target_end = i;
 624+ break;
 625+ }
 626+
 627+ U8_NEXT_UNSAFE( utf_string, i, cur );
 628+ idx++;
 629+ }
 630+
 631+ if( target_end == -1 ) {
 632+ target_end = header->raw_len;
 633+ }
 634+
 635+ luasandbox_push_ustr( L, utf_string + target_start, target_end - target_start );
 636+ return 1;
 637+}
 638+/* }}} */
 639+
 640+/****************** Substring search and related operators. Beware. ******************/
 641+
 642+typedef struct {
 643+ UChar32* string; // UTF-32 representation of the needle string
 644+ int32_t* table; // KMP table
 645+ int32_t length; // Length of the needle string in code points
 646+ int32_t raw_length; // Length of the needle string in UTF-8 bytes
 647+ int singleCharMode; // Whether the needle string is a single character
 648+} ustr_needle_string;
 649+
 650+#define UTF8_SEARCH_STATUS_FOUND 1
 651+#define UTF8_SEARCH_STATUS_NOTFOUND 0
 652+
 653+typedef struct {
 654+ int32_t status; // Status of the search
 655+ int32_t raw_index; // Index in bytes
 656+ int32_t cp_index; // Index in codepoints
 657+} ustr_search_result;
 658+
 659+/** {{{ luasandbox_ustr_search_prepare
 660+ *
 661+ * Preprocesses the string so a search may be performed on it using KMP algorithm.
 662+ */
 663+static ustr_needle_string* luasandbox_ustr_search_prepare(uint8_t* utf_string, int32_t raw_len)
 664+{
 665+ ustr_needle_string* str;
 666+ int32_t i, idx;
 667+ UChar32 cur;
 668+ UErrorCode errorCode = U_ZERO_ERROR;
 669+ int32_t cnd = 0;
 670+
 671+ // Here we use the worst-case allocation
 672+ str = emalloc( sizeof( ustr_needle_string ) );
 673+ memset( str, 0, sizeof( ustr_needle_string ) );
 674+ str->string = emalloc( raw_len * 4 );
 675+ str->raw_length = raw_len;
 676+
 677+ // Convert UTF-8 to UTF-32 for search purposes
 678+ for( i = idx = 0; i < raw_len; idx++ ) {
 679+ U8_NEXT_UNSAFE( utf_string, i, cur );
 680+ str->string[idx] = cur;
 681+ }
 682+ str->length = idx;
 683+
 684+ // KMP cannot handle single character search
 685+ // (or it can, but my implementation cannot)
 686+ // Use special case handler
 687+ str->singleCharMode = str->length == 1;
 688+ if( str->singleCharMode )
 689+ return str;
 690+
 691+ // Fill the search prefix table
 692+ str->table = emalloc( str->length * sizeof(int32_t) );
 693+ str->table[0] = -1; // Yes, UChar32 is a signed type. "U" is for "Unicode", not for "unsigned"
 694+ str->table[1] = 0;
 695+ for( i = 2; i < str->length; i++ ) {
 696+ if( str->string[i - 1] == str->string[cnd] ) {
 697+ cnd++;
 698+ str->table[i] = cnd;
 699+ } else if( cnd > 0 ) {
 700+ cnd = str->table[cnd];
 701+ i--;
 702+ } else {
 703+ str->table[i] = 0;
 704+ }
 705+ }
 706+
 707+ return str;
 708+}
 709+
 710+/** {{{ luasandbox_ustr_search_free
 711+ *
 712+ * Frees the memory allocated for the preprocessed needle string.
 713+ */
 714+void luasandbox_ustr_search_free(ustr_needle_string *needle)
 715+{
 716+ if( needle->table )
 717+ efree( needle->table );
 718+ efree( needle->string );
 719+ efree( needle );
 720+}
 721+
 722+#define UTF8_SEARCH_OFFSET_NONE 0
 723+#define UTF8_SEARCH_OFFSET_RAW 1
 724+#define UTF8_SEARCH_OFFSET_CP 2
 725+
 726+/** {{{ luasandbox_ustr_search
 727+ *
 728+ * Performs search of a substring in a string using the Knuth-Morris-Pratt algorithm.
 729+ * Allows different types of start offset. The needle string must be preprocessed.
 730+ */
 731+ustr_search_result luasandbox_ustr_search(uint8_t *haystack, int32_t haystack_len, int offset_type, int offset, ustr_needle_string* needle) {
 732+ int i, j, idx; // Raw offset in haystack, CP offset in needle, CP offset in haystack
 733+ UChar32 cur;
 734+ ustr_search_result result;
 735+
 736+ // Defaults
 737+ result.raw_index = -1;
 738+ result.cp_index = -1;
 739+
 740+ // If we are given raw offset, start with it
 741+ if( offset_type == UTF8_SEARCH_OFFSET_RAW ) {
 742+ i = offset;
 743+ } else {
 744+ i = 0;
 745+ }
 746+
 747+ if( needle->singleCharMode ) {
 748+ // Handle special case of single character
 749+ for( idx = 0; i < haystack_len; idx++ ) {
 750+ U8_NEXT_UNSAFE( haystack, i, cur );
 751+
 752+ if( offset_type == UTF8_SEARCH_OFFSET_CP && idx < offset )
 753+ continue;
 754+
 755+ if( needle->string[0] == cur ) {
 756+ result.status = UTF8_SEARCH_STATUS_FOUND;
 757+ result.cp_index = idx;
 758+ result.raw_index = i - needle->raw_length;
 759+ return result;
 760+ }
 761+ }
 762+ } else {
 763+ // Otherwise use KMP search
 764+ for( j = idx = 0; i < haystack_len; idx++ ) {
 765+ U8_NEXT_UNSAFE( haystack, i, cur );
 766+
 767+ if( offset_type == UTF8_SEARCH_OFFSET_CP && idx < offset )
 768+ continue;
 769+
 770+ while( j > 0 && needle->string[j] != cur ) {
 771+ j = needle->table[j];
 772+ }
 773+ if( needle->string[j] == cur )
 774+ j++;
 775+ if( j == needle->length ) {
 776+ result.status = UTF8_SEARCH_STATUS_FOUND;
 777+ result.cp_index = (idx+1) - needle->length;
 778+ result.raw_index = i - needle->raw_length;
 779+ return result;
 780+ }
 781+ }
 782+ }
 783+
 784+ result.status = UTF8_SEARCH_STATUS_NOTFOUND;
 785+ return result;
 786+}
 787+/* }}} */
 788+
 789+/** {{{ luasandbox_ustr_pos
 790+ *
 791+ * Lua function
 792+ * int pos( ustring haystack, ustring needle[, int offset] )
 793+ * Searches for a substring in a string. Returns an offset
 794+ * according to Lua conventions (starting with 1).
 795+ */
 796+int luasandbox_ustr_pos(lua_State * L)
 797+{
 798+ luasandbox_ustr_header *header_haystack, *header_needle;
 799+ uint8_t *haystack, *needle_raw;
 800+ ustr_needle_string *needle;
 801+ int32_t offset;
 802+ ustr_search_result result;
 803+
 804+ header_haystack = luasandbox_checkustring( L, 1 );
 805+ header_needle = luasandbox_checkustring( L, 2 );
 806+
 807+ haystack = LUASANDBOX_USTR_RAW(header_haystack);
 808+ needle_raw = LUASANDBOX_USTR_RAW(header_needle);
 809+ if( lua_type( L, 3 ) == LUA_TNUMBER ) {
 810+ offset = lua_tointeger( L, 3 );
 811+ } else {
 812+ offset = 1;
 813+ }
 814+
 815+ offset = luasandbox_ustr_index_to_offset( L, header_haystack, offset, TRUE );
 816+
 817+ if( !header_needle->raw_len ) {
 818+ lua_pushstring( L, "The needle parameter may not be empty" );
 819+ lua_error( L );
 820+ }
 821+
 822+ needle = luasandbox_ustr_search_prepare( needle_raw, header_needle->raw_len );
 823+
 824+ result = luasandbox_ustr_search( haystack, header_haystack->raw_len, UTF8_SEARCH_OFFSET_CP, offset, needle );
 825+ luasandbox_ustr_search_free( needle );
 826+
 827+ switch( result.status ) {
 828+ case UTF8_SEARCH_STATUS_FOUND:
 829+ lua_pushinteger( L, result.cp_index + 1 );
 830+ return 1;
 831+ case UTF8_SEARCH_STATUS_NOTFOUND:
 832+ lua_pushinteger( L, -1 );
 833+ return 1;
 834+ }
 835+}
 836+/* }}} */
 837+
 838+/** {{{ luasandbox_ustr_replace
 839+ *
 840+ * Lua function:
 841+ * replace( ustring haystack, ustring needle, ustring replacement[, int offset[, int limit]] )
 842+ * Replaces at most limit occurances of needle in haystack with replacement,
 843+ * starting at offset.
 844+ */
 845+int luasandbox_ustr_replace(lua_State * L)
 846+{
 847+ luasandbox_ustr_header *header_haystack, *header_needle, *header_replacement, *header_result;
 848+ uint8_t *haystack, *needle_raw, *replacement, *result;
 849+ size_t haystack_len, needle_len, replacement_len, result_len;
 850+ ustr_needle_string *needle;
 851+ ustr_search_result cur;
 852+ int32_t i, offset, offset_src, offset_dest, matches_num, limit;
 853+ int32_t *matches;
 854+ int offset_mode;
 855+
 856+ header_haystack = luasandbox_checkustring( L, 1 );
 857+ header_needle = luasandbox_checkustring( L, 2 );
 858+ header_replacement = luasandbox_checkustring( L, 3 );
 859+
 860+ haystack = LUASANDBOX_USTR_RAW(header_haystack);
 861+ haystack_len = header_haystack->raw_len;
 862+ needle_raw = LUASANDBOX_USTR_RAW(header_needle);
 863+ needle_len = header_needle->raw_len;
 864+ replacement = LUASANDBOX_USTR_RAW(header_replacement);
 865+ replacement_len = header_replacement->raw_len;
 866+
 867+ if( lua_type( L, 4 ) == LUA_TNUMBER ) {
 868+ offset = lua_tointeger( L, 4 );
 869+ offset = luasandbox_ustr_index_to_offset( L, header_haystack, offset, TRUE );
 870+ offset_mode = UTF8_SEARCH_OFFSET_CP;
 871+ } else {
 872+ offset = 0;
 873+ offset_mode = UTF8_SEARCH_OFFSET_RAW;
 874+ }
 875+ limit = ( lua_type( L, 5 ) == LUA_TNUMBER ) ?
 876+ luaL_checkinteger( L, 5 ) :
 877+ -1;
 878+
 879+ if( !needle_len ) {
 880+ lua_pushstring( L, "The needle parameter may not be empty" );
 881+ lua_error( L );
 882+ }
 883+
 884+ needle = luasandbox_ustr_search_prepare( needle_raw, needle_len );
 885+
 886+ // As usually, just use worst-case scenario for memory allocation
 887+ matches = emalloc( ( haystack_len / needle_len + 1 ) * sizeof(int32_t) );
 888+
 889+ // Find all substrings to repalce
 890+ matches_num = 0;
 891+ for(;;) {
 892+ if( limit > 0 && matches_num >= limit ) {
 893+ break;
 894+ }
 895+
 896+ cur = luasandbox_ustr_search( haystack, haystack_len, offset_mode, offset, needle );
 897+
 898+ if( cur.status == UTF8_SEARCH_STATUS_FOUND ) {
 899+ matches[matches_num] = cur.raw_index;
 900+ matches_num++;
 901+ offset = cur.raw_index + needle->raw_length;
 902+ offset_mode = UTF8_SEARCH_OFFSET_RAW;
 903+ } else {
 904+ break;
 905+ }
 906+ }
 907+ luasandbox_ustr_search_free( needle );
 908+
 909+ if( !matches_num ) {
 910+ lua_pushvalue( L, 1 );
 911+ return 1;
 912+ }
 913+
 914+ // Initialize the resulting string
 915+ result_len = haystack_len + ( replacement_len - needle_len ) * matches_num;
 916+ header_result = luasandbox_init_ustr( L, result_len );
 917+ header_result->cp_len = header_haystack->cp_len +
 918+ ( header_replacement->raw_len - header_needle->raw_len ) * matches_num;
 919+ result = LUASANDBOX_USTR_RAW(header_result);
 920+
 921+ // Replace all substrings
 922+ memcpy( result, haystack, matches[i] );
 923+ offset_src = offset_dest = matches[i];
 924+ for( i = 0; i < matches_num; i++ ) {
 925+ int32_t postfix_len;
 926+
 927+ memcpy( result + offset_dest, replacement, replacement_len );
 928+ offset_src += needle_len;
 929+ offset_dest += replacement_len;
 930+
 931+ if( i == matches_num - 1 ) {
 932+ postfix_len = haystack_len - offset_src;
 933+ } else {
 934+ postfix_len = matches[i+1] - offset_src;
 935+ }
 936+
 937+ memcpy( result + offset_dest, haystack + offset_src, postfix_len );
 938+ offset_src += postfix_len;
 939+ offset_dest += postfix_len;
 940+ }
 941+
 942+ efree( matches );
 943+
 944+ return 1;
 945+}
 946+/* }}} */
 947+
 948+/** {{{ luasandbox_ustr_split
 949+ *
 950+ * Lua function:
 951+ * split( ustring haystack, ustring separator[, int limit] )
 952+ *
 953+ */
 954+int luasandbox_ustr_split(lua_State * L)
 955+{
 956+ luasandbox_ustr_header *header_haystack, *header_needle;
 957+ uint8_t *haystack, *needle_raw;
 958+ size_t haystack_len, needle_len;
 959+ ustr_needle_string *needle;
 960+ ustr_search_result cur;
 961+ int32_t i, offset, matches_num, limit;
 962+ int32_t *matches;
 963+
 964+ header_haystack = luasandbox_checkustring( L, 1 );
 965+ header_needle = luasandbox_checkustring( L, 2 );
 966+
 967+ haystack = LUASANDBOX_USTR_RAW(header_haystack);
 968+ needle_raw = LUASANDBOX_USTR_RAW(header_needle);
 969+ haystack_len = header_haystack->raw_len;
 970+ needle_len = header_needle->raw_len;
 971+
 972+ limit = ( lua_tointeger( L, 3 ) == LUA_TNUMBER ) ?
 973+ luaL_checkinteger( L, 3 ) :
 974+ -1;
 975+
 976+ if( !needle_len ) {
 977+ lua_pushstring( L, "The needle parameter may not be empty" );
 978+ lua_error( L );
 979+ }
 980+
 981+ needle = luasandbox_ustr_search_prepare( needle_raw, needle_len );
 982+ if( !needle ) {
 983+ LUASANDBOX_UNICODE_INVALID_FAIL();
 984+ }
 985+
 986+ // As usually, just use worst-case scenario for memory allocation
 987+ matches = emalloc( ( haystack_len / needle_len + 1 ) * sizeof(int32_t) );
 988+
 989+ // Find all substrings to split
 990+ matches_num = 0;
 991+ offset = 0;
 992+ for(;;) {
 993+ if( limit > 0 && matches_num >= limit ) {
 994+ break;
 995+ }
 996+
 997+ cur = luasandbox_ustr_search( haystack, haystack_len, UTF8_SEARCH_OFFSET_RAW, offset, needle );
 998+
 999+ if( cur.status == UTF8_SEARCH_STATUS_FOUND ) {
 1000+ matches[matches_num] = cur.raw_index;
 1001+ matches_num++;
 1002+ offset = cur.raw_index + needle->raw_length;
 1003+ } else {
 1004+ break;
 1005+ }
 1006+ }
 1007+ luasandbox_ustr_search_free( needle );
 1008+
 1009+ lua_createtable( L, matches_num + 1, 0 );
 1010+
 1011+ if( !matches_num ) {
 1012+ lua_pushlstring( L, haystack, haystack_len );
 1013+ lua_rawseti( L, -2, 1 );
 1014+ return 1;
 1015+ }
 1016+
 1017+ // Push all matches into the table
 1018+ lua_pushlstring( L, haystack, matches[0] );
 1019+ lua_rawseti( L, -2, 1 );
 1020+ offset = matches[0];
 1021+ for( i = 0; i < matches_num; i++ ) {
 1022+ int32_t bit_len;
 1023+
 1024+ offset += needle_len;
 1025+
 1026+ if( i == matches_num - 1 ) {
 1027+ bit_len = haystack_len - offset;
 1028+ } else {
 1029+ bit_len = matches[i+1] - offset;
 1030+ }
 1031+
 1032+ lua_pushlstring( L, haystack + offset, bit_len );
 1033+ lua_rawseti( L, -2, i + 2 );
 1034+ offset += bit_len;
 1035+ }
 1036+
 1037+ return 1;
 1038+}
 1039+/* }}} */
Property changes on: trunk/php/luasandbox/ustring.c
___________________________________________________________________
Added: svn:eol-style
11040 + native
Index: trunk/php/luasandbox/m4/ac_check_icu.m4
@@ -0,0 +1,62 @@
 2+dnl @synopsis AC_CHECK_ICU(version, action-if, action-if-not)
 3+dnl
 4+dnl @summary check for ICU of sufficient version by looking at icu-config
 5+dnl
 6+dnl Defines ICU_LIBS, ICU_CFLAGS, ICU_CXXFLAGS. See icu-config(1) man
 7+dnl page.
 8+dnl
 9+dnl @category InstalledPackages
 10+dnl @author Akos Maroy <darkeye@tyrell.hu>
 11+dnl @version 2005-09-20
 12+dnl @license AllPermissive
 13+
 14+AC_DEFUN([AC_CHECK_ICU], [
 15+ succeeded=no
 16+
 17+ if test -z "$ICU_CONFIG"; then
 18+ AC_PATH_PROG(ICU_CONFIG, icu-config, no)
 19+ fi
 20+
 21+ if test "$ICU_CONFIG" = "no" ; then
 22+ echo "*** The icu-config script could not be found. Make sure it is"
 23+ echo "*** in your path, and that taglib is properly installed."
 24+ echo "*** Or see http://ibm.com/software/globalization/icu/"
 25+ else
 26+ ICU_VERSION=`$ICU_CONFIG --version`
 27+ AC_MSG_CHECKING(for ICU >= $1)
 28+ VERSION_CHECK=`expr $ICU_VERSION \>\= $1`
 29+ if test "$VERSION_CHECK" = "1" ; then
 30+ AC_MSG_RESULT(yes)
 31+ succeeded=yes
 32+
 33+ AC_MSG_CHECKING(ICU_CFLAGS)
 34+ ICU_CFLAGS=`$ICU_CONFIG --cflags`
 35+ AC_MSG_RESULT($ICU_CFLAGS)
 36+
 37+ AC_MSG_CHECKING(ICU_CXXFLAGS)
 38+ ICU_CXXFLAGS=`$ICU_CONFIG --cxxflags`
 39+ AC_MSG_RESULT($ICU_CXXFLAGS)
 40+
 41+ AC_MSG_CHECKING(ICU_LIBS)
 42+ ICU_LIBS=`$ICU_CONFIG --ldflags`
 43+ AC_MSG_RESULT($ICU_LIBS)
 44+ else
 45+ ICU_CFLAGS=""
 46+ ICU_CXXFLAGS=""
 47+ ICU_LIBS=""
 48+ ## If we have a custom action on failure, don't print errors, but
 49+ ## do set a variable so people can do so.
 50+ ifelse([$3], ,echo "can't find ICU >= $1",)
 51+ fi
 52+
 53+ AC_SUBST(ICU_CFLAGS)
 54+ AC_SUBST(ICU_CXXFLAGS)
 55+ AC_SUBST(ICU_LIBS)
 56+ fi
 57+
 58+ if test $succeeded = yes; then
 59+ ifelse([$2], , :, [$2])
 60+ else
 61+ ifelse([$3], , AC_MSG_ERROR([Library requirements (ICU) not met.]), [$3])
 62+ fi
 63+])
Index: trunk/php/luasandbox/config.m4
@@ -9,6 +9,10 @@
1010 if test "$PHP_LUASANDBOX" != "no"; then
1111 dnl Include pkg-config macros definitions:
1212 m4_include([m4/pkg.m4])
 13+
 14+ dnl ICU did not support pkg-config till recently; current WM version
 15+ dnl probably does not support it as well
 16+ m4_include([m4/ac_check_icu.m4])
1317 PKG_PROG_PKG_CONFIG
1418
1519 dnl We need lua or fallback to luajit.
@@ -19,12 +23,17 @@
2024 ])
2125 ])
2226
 27+ AC_CHECK_ICU( [4.0] )
 28+
2329 dnl LUA_LIBS and LUA_CFLAGS interprets them:
2430 PHP_EVAL_INCLINE($LUA_CFLAGS)
2531 PHP_EVAL_LIBLINE($LUA_LIBS, LUASANDBOX_SHARED_LIBADD)
26 -
 32+
 33+ PHP_EVAL_INCLINE($ICU_CFLAGS)
 34+ PHP_EVAL_LIBLINE($ICU_LIBS, LUASANDBOX_SHARED_LIBADD)
 35+
2736 PHP_EVAL_LIBLINE("-lrt", LUASANDBOX_SHARED_LIBADD)
2837
2938 PHP_SUBST(LUASANDBOX_SHARED_LIBADD)
30 - PHP_NEW_EXTENSION(luasandbox, alloc.c data_conversion.c library.c luasandbox.c timer.c, $ext_shared)
 39+ PHP_NEW_EXTENSION(luasandbox, alloc.c data_conversion.c library.c luasandbox.c timer.c ustring.c, $ext_shared)
3140 fi
Index: trunk/php/luasandbox/library.c
@@ -14,6 +14,7 @@
1515
1616 #include "php.h"
1717 #include "php_luasandbox.h"
 18+#include "luasandbox_unicode.h"
1819
1920 static HashTable * luasandbox_lib_get_allowed_globals(TSRMLS_D);
2021
@@ -128,6 +129,9 @@
129130 lua_pushcfunction(L, luasandbox_math_randomseed);
130131 lua_setfield(L, -2, "randomseed");
131132 lua_pop(L, 1);
 133+
 134+ // Install string-related functions
 135+ luasandbox_install_unicode_functions(L);
132136 }
133137 /* }}} */
134138
Index: trunk/php/luasandbox/luasandbox_unicode.h
@@ -0,0 +1,30 @@
 2+#ifndef LUASANDBOX_UNICODE_H
 3+#define LUASANDBOX_UNICODE_H
 4+
 5+#include <stdint.h>
 6+#include <lua.h>
 7+
 8+/**
 9+ * Unicode string are input and stored as UTF-8.
 10+ */
 11+typedef struct {
 12+ size_t raw_len; // Byte length in UTF-8
 13+ int32_t cp_len; // Amount of code points
 14+} luasandbox_ustr_header;
 15+
 16+#define LUASANDBOX_USTR_RAW(header) ((uint8_t*) ( ((void*)header) + sizeof(luasandbox_ustr_header) ))
 17+#define LUASANDBOX_USTR_TOTALLEN(header) ( sizeof(luasandbox_ustr_header) + header->raw_len )
 18+
 19+void luasandbox_install_unicode_functions(lua_State * L);
 20+
 21+luasandbox_ustr_header *luasandbox_init_ustr(lua_State * L, size_t len);
 22+luasandbox_ustr_header *luasandbox_push_ustr(lua_State * L, uint8_t *str, size_t len);
 23+int luasandbox_isustr(lua_State * L, int idx);
 24+luasandbox_ustr_header* luasandbox_checkustring(lua_State * L, int idx);
 25+const uint8_t* luasandbox_getustr(lua_State * L, int idx, size_t* raw_len);
 26+int32_t luasandbox_ustr_index_to_offset(lua_State * L, luasandbox_ustr_header *str, int32_t idx, int check_limits);
 27+
 28+void luasandbox_convert_toUTF16(lua_State * L, int idx);
 29+void luasandbox_convert_fromUTF16(lua_State * L, int idx);
 30+
 31+#endif
Property changes on: trunk/php/luasandbox/luasandbox_unicode.h
___________________________________________________________________
Added: svn:eol-style
132 + native
Added: svn:keywords
233 + Author Date Id Rev URL
Index: trunk/php/luasandbox/data_conversion.c
@@ -10,6 +10,7 @@
1111
1212 #include "php.h"
1313 #include "php_luasandbox.h"
 14+#include "luasandbox_unicode.h"
1415
1516 static void luasandbox_lua_to_array(HashTable *ht, lua_State *L, int index,
1617 zval * sandbox_zval, HashTable * recursionGuard TSRMLS_DC);
@@ -305,6 +306,13 @@
306307 break;
307308 }
308309 case LUA_TUSERDATA:
 310+ if(luasandbox_isustr(L, index)) {
 311+ const uint8_t *str;
 312+ size_t length;
 313+ str = luasandbox_getustr(L, index, &length);
 314+ ZVAL_STRINGL(z, str, length, 1);
 315+ break;
 316+ }
309317 case LUA_TTHREAD:
310318 case LUA_TLIGHTUSERDATA:
311319 default:

Status & tagging log