Index: trunk/extensions/FastStringSearch/tests/001.phpt |
— | — | @@ -0,0 +1,65 @@ |
| 2 | +--TEST-- |
| 3 | +Basic fast string search tests |
| 4 | +--SKIPIF-- |
| 5 | +<?php if (!extension_loaded("fss")) print "skip"; ?> |
| 6 | +--FILE-- |
| 7 | +<?php |
| 8 | +$fss = fss_prep_search(array('hello', 'hi')); |
| 9 | +var_dump(fss_exec_search($fss, 'hhhhello')); |
| 10 | +var_dump(fss_exec_search($fss, 'hellohello', 1)); |
| 11 | +var_dump(fss_exec_search($fss, 'hellohihello', 1)); |
| 12 | +var_dump(fss_exec_search($fss, 'adfjshfkjs')); |
| 13 | + |
| 14 | +fss_free($fss); |
| 15 | + |
| 16 | +$fss = fss_prep_search('hello'); |
| 17 | +var_dump(fss_exec_search($fss, 'hhhhello')); |
| 18 | +var_dump(fss_exec_search($fss, 'hellohello', 1)); |
| 19 | +var_dump(fss_exec_search($fss, 'adfjshfkjs')); |
| 20 | + |
| 21 | +var_dump(fss_exec_replace($fss, 'helloabchelloaa')); |
| 22 | + |
| 23 | +fss_free($fss); |
| 24 | + |
| 25 | +$fss = fss_prep_replace(array( |
| 26 | + 'abc' => 'def', |
| 27 | + 'ab' => 'X', |
| 28 | +)); |
| 29 | + |
| 30 | +var_dump(fss_exec_replace($fss, 'ddabcababcaaaab')); |
| 31 | +?> |
| 32 | +--EXPECT-- |
| 33 | +array(2) { |
| 34 | + [0]=> |
| 35 | + int(3) |
| 36 | + [1]=> |
| 37 | + int(5) |
| 38 | +} |
| 39 | +array(2) { |
| 40 | + [0]=> |
| 41 | + int(5) |
| 42 | + [1]=> |
| 43 | + int(5) |
| 44 | +} |
| 45 | +array(2) { |
| 46 | + [0]=> |
| 47 | + int(5) |
| 48 | + [1]=> |
| 49 | + int(2) |
| 50 | +} |
| 51 | +bool(false) |
| 52 | +array(2) { |
| 53 | + [0]=> |
| 54 | + int(3) |
| 55 | + [1]=> |
| 56 | + int(5) |
| 57 | +} |
| 58 | +array(2) { |
| 59 | + [0]=> |
| 60 | + int(5) |
| 61 | + [1]=> |
| 62 | + int(5) |
| 63 | +} |
| 64 | +bool(false) |
| 65 | +string(5) "abcaa" |
| 66 | +string(13) "dddefXdefaaaX" |
Property changes on: trunk/extensions/FastStringSearch/tests/001.phpt |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 67 | + native |
Index: trunk/extensions/FastStringSearch/kwset.h |
— | — | @@ -0,0 +1,57 @@ |
| 2 | +/* kwset.h - header declaring the keyword set library. |
| 3 | + Copyright (C) 1989, 1998, 2005 Free Software Foundation, Inc. |
| 4 | + |
| 5 | + This program is free software; you can redistribute it and/or modify |
| 6 | + it under the terms of the GNU General Public License as published by |
| 7 | + the Free Software Foundation; either version 2, or (at your option) |
| 8 | + any later version. |
| 9 | + |
| 10 | + This program is distributed in the hope that it will be useful, |
| 11 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | + GNU General Public License for more details. |
| 14 | + |
| 15 | + You should have received a copy of the GNU General Public License |
| 16 | + along with this program; if not, write to the Free Software |
| 17 | + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA |
| 18 | + 02110-1301, USA. */ |
| 19 | + |
| 20 | +/* Written August 1989 by Mike Haertel. |
| 21 | + The author may be reached (Email) at the address mike@ai.mit.edu, |
| 22 | + or (US mail) as Mike Haertel c/o Free Software Foundation. */ |
| 23 | + |
| 24 | +struct kwsmatch |
| 25 | +{ |
| 26 | + int index; /* Index number of matching keyword. */ |
| 27 | + size_t offset[1]; /* Offset of each submatch. */ |
| 28 | + size_t size[1]; /* Length of each submatch. */ |
| 29 | +}; |
| 30 | + |
| 31 | +typedef void* kwset_t; |
| 32 | + |
| 33 | +/* Return an opaque pointer to a newly allocated keyword set, or NULL |
| 34 | + if enough memory cannot be obtained. The argument if non-NULL |
| 35 | + specifies a table of character translations to be applied to all |
| 36 | + pattern and search text. */ |
| 37 | +extern kwset_t kwsalloc(char const *); |
| 38 | + |
| 39 | +/* Incrementally extend the keyword set to include the given string. |
| 40 | + Return NULL for success, or an error message. Remember an index |
| 41 | + number for each keyword included in the set. */ |
| 42 | +extern const char *kwsincr(kwset_t, char const *, size_t); |
| 43 | + |
| 44 | +/* When the keyword set has been completely built, prepare it for |
| 45 | + use. Return NULL for success, or an error message. */ |
| 46 | +extern const char *kwsprep(kwset_t); |
| 47 | + |
| 48 | +/* Search through the given buffer for a member of the keyword set. |
| 49 | + Return a pointer to the leftmost longest match found, or NULL if |
| 50 | + no match is found. If foundlen is non-NULL, store the length of |
| 51 | + the matching substring in the integer it points to. Similarly, |
| 52 | + if foundindex is non-NULL, store the index of the particular |
| 53 | + keyword found therein. */ |
| 54 | +extern size_t kwsexec(kwset_t, char const *, size_t, struct kwsmatch *); |
| 55 | + |
| 56 | +/* Deallocate the given keyword set and all its associated storage. */ |
| 57 | +extern void kwsfree(kwset_t); |
| 58 | + |
Property changes on: trunk/extensions/FastStringSearch/kwset.h |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 59 | + native |
Index: trunk/extensions/FastStringSearch/config.m4 |
— | — | @@ -0,0 +1,63 @@ |
| 2 | +dnl $Id$ |
| 3 | +dnl config.m4 for extension fss |
| 4 | + |
| 5 | +dnl Comments in this file start with the string 'dnl'. |
| 6 | +dnl Remove where necessary. This file will not work |
| 7 | +dnl without editing. |
| 8 | + |
| 9 | +dnl If your extension references something external, use with: |
| 10 | + |
| 11 | +dnl PHP_ARG_WITH(fss, for fss support, |
| 12 | +dnl Make sure that the comment is aligned: |
| 13 | +dnl [ --with-fss Include fss support]) |
| 14 | + |
| 15 | +dnl Otherwise use enable: |
| 16 | + |
| 17 | +PHP_ARG_ENABLE(fss, whether to enable fss support, |
| 18 | +dnl Make sure that the comment is aligned: |
| 19 | +[ --enable-fss Enable fss support]) |
| 20 | + |
| 21 | +if test "$PHP_FSS" != "no"; then |
| 22 | + dnl Write more examples of tests here... |
| 23 | + |
| 24 | + dnl # --with-fss -> check with-path |
| 25 | + dnl SEARCH_PATH="/usr/local /usr" # you might want to change this |
| 26 | + dnl SEARCH_FOR="/include/fss.h" # you most likely want to change this |
| 27 | + dnl if test -r $PHP_FSS/$SEARCH_FOR; then # path given as parameter |
| 28 | + dnl FSS_DIR=$PHP_FSS |
| 29 | + dnl else # search default path list |
| 30 | + dnl AC_MSG_CHECKING([for fss files in default path]) |
| 31 | + dnl for i in $SEARCH_PATH ; do |
| 32 | + dnl if test -r $i/$SEARCH_FOR; then |
| 33 | + dnl FSS_DIR=$i |
| 34 | + dnl AC_MSG_RESULT(found in $i) |
| 35 | + dnl fi |
| 36 | + dnl done |
| 37 | + dnl fi |
| 38 | + dnl |
| 39 | + dnl if test -z "$FSS_DIR"; then |
| 40 | + dnl AC_MSG_RESULT([not found]) |
| 41 | + dnl AC_MSG_ERROR([Please reinstall the fss distribution]) |
| 42 | + dnl fi |
| 43 | + |
| 44 | + dnl # --with-fss -> add include path |
| 45 | + dnl PHP_ADD_INCLUDE($FSS_DIR/include) |
| 46 | + |
| 47 | + dnl # --with-fss -> check for lib and symbol presence |
| 48 | + dnl LIBNAME=fss # you may want to change this |
| 49 | + dnl LIBSYMBOL=fss # you most likely want to change this |
| 50 | + |
| 51 | + dnl PHP_CHECK_LIBRARY($LIBNAME,$LIBSYMBOL, |
| 52 | + dnl [ |
| 53 | + dnl PHP_ADD_LIBRARY_WITH_PATH($LIBNAME, $FSS_DIR/lib, FSS_SHARED_LIBADD) |
| 54 | + dnl AC_DEFINE(HAVE_FSSLIB,1,[ ]) |
| 55 | + dnl ],[ |
| 56 | + dnl AC_MSG_ERROR([wrong fss lib version or lib not found]) |
| 57 | + dnl ],[ |
| 58 | + dnl -L$FSS_DIR/lib -lm -ldl |
| 59 | + dnl ]) |
| 60 | + dnl |
| 61 | + dnl PHP_SUBST(FSS_SHARED_LIBADD) |
| 62 | + |
| 63 | + PHP_NEW_EXTENSION(fss, fss.c kwset.c obstack.c, $ext_shared) |
| 64 | +fi |
Property changes on: trunk/extensions/FastStringSearch/config.m4 |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 65 | + native |
Index: trunk/extensions/FastStringSearch/obstack.c |
— | — | @@ -0,0 +1,598 @@ |
| 2 | +/* obstack.c - subroutines used implicitly by object stack macros |
| 3 | + Copyright (C) 1988-1994,96,97,98,99 Free Software Foundation, Inc. |
| 4 | + |
| 5 | + This file is part of the GNU C Library. Its master source is NOT part of |
| 6 | + the C library, however. The master source lives in /gd/gnu/lib. |
| 7 | + |
| 8 | + The GNU C Library is free software; you can redistribute it and/or |
| 9 | + modify it under the terms of the GNU Library General Public License as |
| 10 | + published by the Free Software Foundation; either version 2 of the |
| 11 | + License, or (at your option) any later version. |
| 12 | + |
| 13 | + The GNU C Library is distributed in the hope that it will be useful, |
| 14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | + Library General Public License for more details. |
| 17 | + |
| 18 | + You should have received a copy of the GNU Library General Public |
| 19 | + License along with the GNU C Library; see the file COPYING.LIB. If not, |
| 20 | + write to the Free Software Foundation, Inc., |
| 21 | + 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ |
| 22 | + |
| 23 | +#ifdef HAVE_CONFIG_H |
| 24 | +#include <config.h> |
| 25 | +#endif |
| 26 | + |
| 27 | +#include "obstack.h" |
| 28 | + |
| 29 | +/* NOTE BEFORE MODIFYING THIS FILE: This version number must be |
| 30 | + incremented whenever callers compiled using an old obstack.h can no |
| 31 | + longer properly call the functions in this obstack.c. */ |
| 32 | +#define OBSTACK_INTERFACE_VERSION 1 |
| 33 | + |
| 34 | +/* Comment out all this code if we are using the GNU C Library, and are not |
| 35 | + actually compiling the library itself, and the installed library |
| 36 | + supports the same library interface we do. This code is part of the GNU |
| 37 | + C Library, but also included in many other GNU distributions. Compiling |
| 38 | + and linking in this code is a waste when using the GNU C library |
| 39 | + (especially if it is a shared library). Rather than having every GNU |
| 40 | + program understand `configure --with-gnu-libc' and omit the object |
| 41 | + files, it is simpler to just do this in the source for each such file. */ |
| 42 | + |
| 43 | +#include <stdio.h> /* Random thing to get __GNU_LIBRARY__. */ |
| 44 | +#if !defined (_LIBC) && defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ > 1 |
| 45 | +#include <gnu-versions.h> |
| 46 | +#if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION |
| 47 | +#define ELIDE_CODE |
| 48 | +#endif |
| 49 | +#endif |
| 50 | + |
| 51 | + |
| 52 | +#ifndef ELIDE_CODE |
| 53 | + |
| 54 | + |
| 55 | +#if defined (__STDC__) && __STDC__ |
| 56 | +#define POINTER void * |
| 57 | +#else |
| 58 | +#define POINTER char * |
| 59 | +#endif |
| 60 | + |
| 61 | +/* Determine default alignment. */ |
| 62 | +struct fooalign {char x; double d;}; |
| 63 | +#define DEFAULT_ALIGNMENT \ |
| 64 | + ((PTR_INT_TYPE) ((char *) &((struct fooalign *) 0)->d - (char *) 0)) |
| 65 | +/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT. |
| 66 | + But in fact it might be less smart and round addresses to as much as |
| 67 | + DEFAULT_ROUNDING. So we prepare for it to do that. */ |
| 68 | +union fooround {long x; double d;}; |
| 69 | +#define DEFAULT_ROUNDING (sizeof (union fooround)) |
| 70 | + |
| 71 | +/* When we copy a long block of data, this is the unit to do it with. |
| 72 | + On some machines, copying successive ints does not work; |
| 73 | + in such a case, redefine COPYING_UNIT to `long' (if that works) |
| 74 | + or `char' as a last resort. */ |
| 75 | +#ifndef COPYING_UNIT |
| 76 | +#define COPYING_UNIT int |
| 77 | +#endif |
| 78 | + |
| 79 | + |
| 80 | +/* The functions allocating more room by calling `obstack_chunk_alloc' |
| 81 | + jump to the handler pointed to by `obstack_alloc_failed_handler'. |
| 82 | + This can be set to a user defined function which should either |
| 83 | + abort gracefully or use longjump - but shouldn't return. This |
| 84 | + variable by default points to the internal function |
| 85 | + `print_and_abort'. */ |
| 86 | +#if defined (__STDC__) && __STDC__ |
| 87 | +static void print_and_abort (void); |
| 88 | +void (*obstack_alloc_failed_handler) (void) = print_and_abort; |
| 89 | +#else |
| 90 | +static void print_and_abort (); |
| 91 | +void (*obstack_alloc_failed_handler) () = print_and_abort; |
| 92 | +#endif |
| 93 | + |
| 94 | +/* Exit value used when `print_and_abort' is used. */ |
| 95 | +#if defined __GNU_LIBRARY__ || defined HAVE_STDLIB_H |
| 96 | +#include <stdlib.h> |
| 97 | +#endif |
| 98 | +#ifndef EXIT_FAILURE |
| 99 | +#define EXIT_FAILURE 1 |
| 100 | +#endif |
| 101 | +int obstack_exit_failure = EXIT_FAILURE; |
| 102 | + |
| 103 | +/* The non-GNU-C macros copy the obstack into this global variable |
| 104 | + to avoid multiple evaluation. */ |
| 105 | + |
| 106 | +struct obstack *_obstack; |
| 107 | + |
| 108 | +/* Define a macro that either calls functions with the traditional malloc/free |
| 109 | + calling interface, or calls functions with the mmalloc/mfree interface |
| 110 | + (that adds an extra first argument), based on the state of use_extra_arg. |
| 111 | + For free, do not use ?:, since some compilers, like the MIPS compilers, |
| 112 | + do not allow (expr) ? void : void. */ |
| 113 | + |
| 114 | +#if defined (__STDC__) && __STDC__ |
| 115 | +#define CALL_CHUNKFUN(h, size) \ |
| 116 | + (((h) -> use_extra_arg) \ |
| 117 | + ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \ |
| 118 | + : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size))) |
| 119 | + |
| 120 | +#define CALL_FREEFUN(h, old_chunk) \ |
| 121 | + do { \ |
| 122 | + if ((h) -> use_extra_arg) \ |
| 123 | + (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \ |
| 124 | + else \ |
| 125 | + (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \ |
| 126 | + } while (0) |
| 127 | +#else |
| 128 | +#define CALL_CHUNKFUN(h, size) \ |
| 129 | + (((h) -> use_extra_arg) \ |
| 130 | + ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \ |
| 131 | + : (*(struct _obstack_chunk *(*) ()) (h)->chunkfun) ((size))) |
| 132 | + |
| 133 | +#define CALL_FREEFUN(h, old_chunk) \ |
| 134 | + do { \ |
| 135 | + if ((h) -> use_extra_arg) \ |
| 136 | + (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \ |
| 137 | + else \ |
| 138 | + (*(void (*) ()) (h)->freefun) ((old_chunk)); \ |
| 139 | + } while (0) |
| 140 | +#endif |
| 141 | + |
| 142 | + |
| 143 | +/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default). |
| 144 | + Objects start on multiples of ALIGNMENT (0 means use default). |
| 145 | + CHUNKFUN is the function to use to allocate chunks, |
| 146 | + and FREEFUN the function to free them. |
| 147 | + |
| 148 | + Return nonzero if successful, calls obstack_alloc_failed_handler if |
| 149 | + allocation fails. */ |
| 150 | + |
| 151 | +int |
| 152 | +_obstack_begin (h, size, alignment, chunkfun, freefun) |
| 153 | + struct obstack *h; |
| 154 | + int size; |
| 155 | + int alignment; |
| 156 | +#if defined (__STDC__) && __STDC__ |
| 157 | + POINTER (*chunkfun) (long); |
| 158 | + void (*freefun) (void *); |
| 159 | +#else |
| 160 | + POINTER (*chunkfun) (); |
| 161 | + void (*freefun) (); |
| 162 | +#endif |
| 163 | +{ |
| 164 | + register struct _obstack_chunk *chunk; /* points to new chunk */ |
| 165 | + |
| 166 | + if (alignment == 0) |
| 167 | + alignment = (int) DEFAULT_ALIGNMENT; |
| 168 | + if (size == 0) |
| 169 | + /* Default size is what GNU malloc can fit in a 4096-byte block. */ |
| 170 | + { |
| 171 | + /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc. |
| 172 | + Use the values for range checking, because if range checking is off, |
| 173 | + the extra bytes won't be missed terribly, but if range checking is on |
| 174 | + and we used a larger request, a whole extra 4096 bytes would be |
| 175 | + allocated. |
| 176 | + |
| 177 | + These number are irrelevant to the new GNU malloc. I suspect it is |
| 178 | + less sensitive to the size of the request. */ |
| 179 | + int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1)) |
| 180 | + + 4 + DEFAULT_ROUNDING - 1) |
| 181 | + & ~(DEFAULT_ROUNDING - 1)); |
| 182 | + size = 4096 - extra; |
| 183 | + } |
| 184 | + |
| 185 | +#if defined (__STDC__) && __STDC__ |
| 186 | + h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun; |
| 187 | + h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun; |
| 188 | +#else |
| 189 | + h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun; |
| 190 | + h->freefun = freefun; |
| 191 | +#endif |
| 192 | + h->chunk_size = size; |
| 193 | + h->alignment_mask = alignment - 1; |
| 194 | + h->use_extra_arg = 0; |
| 195 | + |
| 196 | + chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size); |
| 197 | + if (!chunk) |
| 198 | + (*obstack_alloc_failed_handler) (); |
| 199 | + h->next_free = h->object_base = chunk->contents; |
| 200 | + h->chunk_limit = chunk->limit |
| 201 | + = (char *) chunk + h->chunk_size; |
| 202 | + chunk->prev = 0; |
| 203 | + /* The initial chunk now contains no empty object. */ |
| 204 | + h->maybe_empty_object = 0; |
| 205 | + h->alloc_failed = 0; |
| 206 | + return 1; |
| 207 | +} |
| 208 | + |
| 209 | +int |
| 210 | +_obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg) |
| 211 | + struct obstack *h; |
| 212 | + int size; |
| 213 | + int alignment; |
| 214 | +#if defined (__STDC__) && __STDC__ |
| 215 | + POINTER (*chunkfun) (POINTER, long); |
| 216 | + void (*freefun) (POINTER, POINTER); |
| 217 | +#else |
| 218 | + POINTER (*chunkfun) (); |
| 219 | + void (*freefun) (); |
| 220 | +#endif |
| 221 | + POINTER arg; |
| 222 | +{ |
| 223 | + register struct _obstack_chunk *chunk; /* points to new chunk */ |
| 224 | + |
| 225 | + if (alignment == 0) |
| 226 | + alignment = (int) DEFAULT_ALIGNMENT; |
| 227 | + if (size == 0) |
| 228 | + /* Default size is what GNU malloc can fit in a 4096-byte block. */ |
| 229 | + { |
| 230 | + /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc. |
| 231 | + Use the values for range checking, because if range checking is off, |
| 232 | + the extra bytes won't be missed terribly, but if range checking is on |
| 233 | + and we used a larger request, a whole extra 4096 bytes would be |
| 234 | + allocated. |
| 235 | + |
| 236 | + These number are irrelevant to the new GNU malloc. I suspect it is |
| 237 | + less sensitive to the size of the request. */ |
| 238 | + int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1)) |
| 239 | + + 4 + DEFAULT_ROUNDING - 1) |
| 240 | + & ~(DEFAULT_ROUNDING - 1)); |
| 241 | + size = 4096 - extra; |
| 242 | + } |
| 243 | + |
| 244 | +#if defined(__STDC__) && __STDC__ |
| 245 | + h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun; |
| 246 | + h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun; |
| 247 | +#else |
| 248 | + h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun; |
| 249 | + h->freefun = freefun; |
| 250 | +#endif |
| 251 | + h->chunk_size = size; |
| 252 | + h->alignment_mask = alignment - 1; |
| 253 | + h->extra_arg = arg; |
| 254 | + h->use_extra_arg = 1; |
| 255 | + |
| 256 | + chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size); |
| 257 | + if (!chunk) |
| 258 | + (*obstack_alloc_failed_handler) (); |
| 259 | + h->next_free = h->object_base = chunk->contents; |
| 260 | + h->chunk_limit = chunk->limit |
| 261 | + = (char *) chunk + h->chunk_size; |
| 262 | + chunk->prev = 0; |
| 263 | + /* The initial chunk now contains no empty object. */ |
| 264 | + h->maybe_empty_object = 0; |
| 265 | + h->alloc_failed = 0; |
| 266 | + return 1; |
| 267 | +} |
| 268 | + |
| 269 | +/* Allocate a new current chunk for the obstack *H |
| 270 | + on the assumption that LENGTH bytes need to be added |
| 271 | + to the current object, or a new object of length LENGTH allocated. |
| 272 | + Copies any partial object from the end of the old chunk |
| 273 | + to the beginning of the new one. */ |
| 274 | + |
| 275 | +void |
| 276 | +_obstack_newchunk (h, length) |
| 277 | + struct obstack *h; |
| 278 | + int length; |
| 279 | +{ |
| 280 | + register struct _obstack_chunk *old_chunk = h->chunk; |
| 281 | + register struct _obstack_chunk *new_chunk; |
| 282 | + register long new_size; |
| 283 | + register long obj_size = h->next_free - h->object_base; |
| 284 | + register long i; |
| 285 | + long already; |
| 286 | + |
| 287 | + /* Compute size for new chunk. */ |
| 288 | + new_size = (obj_size + length) + (obj_size >> 3) + 100; |
| 289 | + if (new_size < h->chunk_size) |
| 290 | + new_size = h->chunk_size; |
| 291 | + |
| 292 | + /* Allocate and initialize the new chunk. */ |
| 293 | + new_chunk = CALL_CHUNKFUN (h, new_size); |
| 294 | + if (!new_chunk) |
| 295 | + (*obstack_alloc_failed_handler) (); |
| 296 | + h->chunk = new_chunk; |
| 297 | + new_chunk->prev = old_chunk; |
| 298 | + new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size; |
| 299 | + |
| 300 | + /* Move the existing object to the new chunk. |
| 301 | + Word at a time is fast and is safe if the object |
| 302 | + is sufficiently aligned. */ |
| 303 | + if (h->alignment_mask + 1 >= DEFAULT_ALIGNMENT) |
| 304 | + { |
| 305 | + for (i = obj_size / sizeof (COPYING_UNIT) - 1; |
| 306 | + i >= 0; i--) |
| 307 | + ((COPYING_UNIT *)new_chunk->contents)[i] |
| 308 | + = ((COPYING_UNIT *)h->object_base)[i]; |
| 309 | + /* We used to copy the odd few remaining bytes as one extra COPYING_UNIT, |
| 310 | + but that can cross a page boundary on a machine |
| 311 | + which does not do strict alignment for COPYING_UNITS. */ |
| 312 | + already = obj_size / sizeof (COPYING_UNIT) * sizeof (COPYING_UNIT); |
| 313 | + } |
| 314 | + else |
| 315 | + already = 0; |
| 316 | + /* Copy remaining bytes one by one. */ |
| 317 | + for (i = already; i < obj_size; i++) |
| 318 | + new_chunk->contents[i] = h->object_base[i]; |
| 319 | + |
| 320 | + /* If the object just copied was the only data in OLD_CHUNK, |
| 321 | + free that chunk and remove it from the chain. |
| 322 | + But not if that chunk might contain an empty object. */ |
| 323 | + if (h->object_base == old_chunk->contents && ! h->maybe_empty_object) |
| 324 | + { |
| 325 | + new_chunk->prev = old_chunk->prev; |
| 326 | + CALL_FREEFUN (h, old_chunk); |
| 327 | + } |
| 328 | + |
| 329 | + h->object_base = new_chunk->contents; |
| 330 | + h->next_free = h->object_base + obj_size; |
| 331 | + /* The new chunk certainly contains no empty object yet. */ |
| 332 | + h->maybe_empty_object = 0; |
| 333 | +} |
| 334 | + |
| 335 | +/* Return nonzero if object OBJ has been allocated from obstack H. |
| 336 | + This is here for debugging. |
| 337 | + If you use it in a program, you are probably losing. */ |
| 338 | + |
| 339 | +#if defined (__STDC__) && __STDC__ |
| 340 | +/* Suppress -Wmissing-prototypes warning. We don't want to declare this in |
| 341 | + obstack.h because it is just for debugging. */ |
| 342 | +int _obstack_allocated_p (struct obstack *h, POINTER obj); |
| 343 | +#endif |
| 344 | + |
| 345 | +int |
| 346 | +_obstack_allocated_p (h, obj) |
| 347 | + struct obstack *h; |
| 348 | + POINTER obj; |
| 349 | +{ |
| 350 | + register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ |
| 351 | + register struct _obstack_chunk *plp; /* point to previous chunk if any */ |
| 352 | + |
| 353 | + lp = (h)->chunk; |
| 354 | + /* We use >= rather than > since the object cannot be exactly at |
| 355 | + the beginning of the chunk but might be an empty object exactly |
| 356 | + at the end of an adjacent chunk. */ |
| 357 | + while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj)) |
| 358 | + { |
| 359 | + plp = lp->prev; |
| 360 | + lp = plp; |
| 361 | + } |
| 362 | + return lp != 0; |
| 363 | +} |
| 364 | + |
| 365 | +/* Free objects in obstack H, including OBJ and everything allocate |
| 366 | + more recently than OBJ. If OBJ is zero, free everything in H. */ |
| 367 | + |
| 368 | +#undef obstack_free |
| 369 | + |
| 370 | +/* This function has two names with identical definitions. |
| 371 | + This is the first one, called from non-ANSI code. */ |
| 372 | + |
| 373 | +void |
| 374 | +_obstack_free (h, obj) |
| 375 | + struct obstack *h; |
| 376 | + POINTER obj; |
| 377 | +{ |
| 378 | + register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ |
| 379 | + register struct _obstack_chunk *plp; /* point to previous chunk if any */ |
| 380 | + |
| 381 | + lp = h->chunk; |
| 382 | + /* We use >= because there cannot be an object at the beginning of a chunk. |
| 383 | + But there can be an empty object at that address |
| 384 | + at the end of another chunk. */ |
| 385 | + while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj)) |
| 386 | + { |
| 387 | + plp = lp->prev; |
| 388 | + CALL_FREEFUN (h, lp); |
| 389 | + lp = plp; |
| 390 | + /* If we switch chunks, we can't tell whether the new current |
| 391 | + chunk contains an empty object, so assume that it may. */ |
| 392 | + h->maybe_empty_object = 1; |
| 393 | + } |
| 394 | + if (lp) |
| 395 | + { |
| 396 | + h->object_base = h->next_free = (char *) (obj); |
| 397 | + h->chunk_limit = lp->limit; |
| 398 | + h->chunk = lp; |
| 399 | + } |
| 400 | + else if (obj != 0) |
| 401 | + /* obj is not in any of the chunks! */ |
| 402 | + abort (); |
| 403 | +} |
| 404 | + |
| 405 | +/* This function is used from ANSI code. */ |
| 406 | + |
| 407 | +void |
| 408 | +obstack_free (h, obj) |
| 409 | + struct obstack *h; |
| 410 | + POINTER obj; |
| 411 | +{ |
| 412 | + register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ |
| 413 | + register struct _obstack_chunk *plp; /* point to previous chunk if any */ |
| 414 | + |
| 415 | + lp = h->chunk; |
| 416 | + /* We use >= because there cannot be an object at the beginning of a chunk. |
| 417 | + But there can be an empty object at that address |
| 418 | + at the end of another chunk. */ |
| 419 | + while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj)) |
| 420 | + { |
| 421 | + plp = lp->prev; |
| 422 | + CALL_FREEFUN (h, lp); |
| 423 | + lp = plp; |
| 424 | + /* If we switch chunks, we can't tell whether the new current |
| 425 | + chunk contains an empty object, so assume that it may. */ |
| 426 | + h->maybe_empty_object = 1; |
| 427 | + } |
| 428 | + if (lp) |
| 429 | + { |
| 430 | + h->object_base = h->next_free = (char *) (obj); |
| 431 | + h->chunk_limit = lp->limit; |
| 432 | + h->chunk = lp; |
| 433 | + } |
| 434 | + else if (obj != 0) |
| 435 | + /* obj is not in any of the chunks! */ |
| 436 | + abort (); |
| 437 | +} |
| 438 | + |
| 439 | +int |
| 440 | +_obstack_memory_used (h) |
| 441 | + struct obstack *h; |
| 442 | +{ |
| 443 | + register struct _obstack_chunk* lp; |
| 444 | + register int nbytes = 0; |
| 445 | + |
| 446 | + for (lp = h->chunk; lp != 0; lp = lp->prev) |
| 447 | + { |
| 448 | + nbytes += lp->limit - (char *) lp; |
| 449 | + } |
| 450 | + return nbytes; |
| 451 | +} |
| 452 | + |
| 453 | +/* Define the error handler. */ |
| 454 | +#ifndef _ |
| 455 | +# ifdef HAVE_LIBINTL_H |
| 456 | +# include <libintl.h> |
| 457 | +# ifndef _ |
| 458 | +# define _(Str) gettext (Str) |
| 459 | +# endif |
| 460 | +# else |
| 461 | +# define _(Str) (Str) |
| 462 | +# endif |
| 463 | +#endif |
| 464 | +#if defined _LIBC && defined USE_IN_LIBIO |
| 465 | +# include <libio/iolibio.h> |
| 466 | +# define fputs(s, f) _IO_fputs (s, f) |
| 467 | +#endif |
| 468 | + |
| 469 | +static void |
| 470 | +print_and_abort () |
| 471 | +{ |
| 472 | + fputs (_("memory exhausted"), stderr); |
| 473 | + fputc ('\n', stderr); |
| 474 | + exit (obstack_exit_failure); |
| 475 | +} |
| 476 | + |
| 477 | +#if 0 |
| 478 | +/* These are now turned off because the applications do not use it |
| 479 | + and it uses bcopy via obstack_grow, which causes trouble on sysV. */ |
| 480 | + |
| 481 | +/* Now define the functional versions of the obstack macros. |
| 482 | + Define them to simply use the corresponding macros to do the job. */ |
| 483 | + |
| 484 | +#if defined (__STDC__) && __STDC__ |
| 485 | +/* These function definitions do not work with non-ANSI preprocessors; |
| 486 | + they won't pass through the macro names in parentheses. */ |
| 487 | + |
| 488 | +/* The function names appear in parentheses in order to prevent |
| 489 | + the macro-definitions of the names from being expanded there. */ |
| 490 | + |
| 491 | +POINTER (obstack_base) (obstack) |
| 492 | + struct obstack *obstack; |
| 493 | +{ |
| 494 | + return obstack_base (obstack); |
| 495 | +} |
| 496 | + |
| 497 | +POINTER (obstack_next_free) (obstack) |
| 498 | + struct obstack *obstack; |
| 499 | +{ |
| 500 | + return obstack_next_free (obstack); |
| 501 | +} |
| 502 | + |
| 503 | +int (obstack_object_size) (obstack) |
| 504 | + struct obstack *obstack; |
| 505 | +{ |
| 506 | + return obstack_object_size (obstack); |
| 507 | +} |
| 508 | + |
| 509 | +int (obstack_room) (obstack) |
| 510 | + struct obstack *obstack; |
| 511 | +{ |
| 512 | + return obstack_room (obstack); |
| 513 | +} |
| 514 | + |
| 515 | +int (obstack_make_room) (obstack, length) |
| 516 | + struct obstack *obstack; |
| 517 | + int length; |
| 518 | +{ |
| 519 | + return obstack_make_room (obstack, length); |
| 520 | +} |
| 521 | + |
| 522 | +void (obstack_grow) (obstack, pointer, length) |
| 523 | + struct obstack *obstack; |
| 524 | + POINTER pointer; |
| 525 | + int length; |
| 526 | +{ |
| 527 | + obstack_grow (obstack, pointer, length); |
| 528 | +} |
| 529 | + |
| 530 | +void (obstack_grow0) (obstack, pointer, length) |
| 531 | + struct obstack *obstack; |
| 532 | + POINTER pointer; |
| 533 | + int length; |
| 534 | +{ |
| 535 | + obstack_grow0 (obstack, pointer, length); |
| 536 | +} |
| 537 | + |
| 538 | +void (obstack_1grow) (obstack, character) |
| 539 | + struct obstack *obstack; |
| 540 | + int character; |
| 541 | +{ |
| 542 | + obstack_1grow (obstack, character); |
| 543 | +} |
| 544 | + |
| 545 | +void (obstack_blank) (obstack, length) |
| 546 | + struct obstack *obstack; |
| 547 | + int length; |
| 548 | +{ |
| 549 | + obstack_blank (obstack, length); |
| 550 | +} |
| 551 | + |
| 552 | +void (obstack_1grow_fast) (obstack, character) |
| 553 | + struct obstack *obstack; |
| 554 | + int character; |
| 555 | +{ |
| 556 | + obstack_1grow_fast (obstack, character); |
| 557 | +} |
| 558 | + |
| 559 | +void (obstack_blank_fast) (obstack, length) |
| 560 | + struct obstack *obstack; |
| 561 | + int length; |
| 562 | +{ |
| 563 | + obstack_blank_fast (obstack, length); |
| 564 | +} |
| 565 | + |
| 566 | +POINTER (obstack_finish) (obstack) |
| 567 | + struct obstack *obstack; |
| 568 | +{ |
| 569 | + return obstack_finish (obstack); |
| 570 | +} |
| 571 | + |
| 572 | +POINTER (obstack_alloc) (obstack, length) |
| 573 | + struct obstack *obstack; |
| 574 | + int length; |
| 575 | +{ |
| 576 | + return obstack_alloc (obstack, length); |
| 577 | +} |
| 578 | + |
| 579 | +POINTER (obstack_copy) (obstack, pointer, length) |
| 580 | + struct obstack *obstack; |
| 581 | + POINTER pointer; |
| 582 | + int length; |
| 583 | +{ |
| 584 | + return obstack_copy (obstack, pointer, length); |
| 585 | +} |
| 586 | + |
| 587 | +POINTER (obstack_copy0) (obstack, pointer, length) |
| 588 | + struct obstack *obstack; |
| 589 | + POINTER pointer; |
| 590 | + int length; |
| 591 | +{ |
| 592 | + return obstack_copy0 (obstack, pointer, length); |
| 593 | +} |
| 594 | + |
| 595 | +#endif /* __STDC__ */ |
| 596 | + |
| 597 | +#endif /* 0 */ |
| 598 | + |
| 599 | +#endif /* !ELIDE_CODE */ |
Property changes on: trunk/extensions/FastStringSearch/obstack.c |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 600 | + native |
Index: trunk/extensions/FastStringSearch/php_fss.h |
— | — | @@ -0,0 +1,49 @@ |
| 2 | +/* |
| 3 | + * PHP extension for fast string search routines |
| 4 | + * Copyright Tim Starling, 2006 |
| 5 | + * License: DWTFYWWI |
| 6 | +*/ |
| 7 | + |
| 8 | +#ifndef PHP_FSS_H |
| 9 | +#define PHP_FSS_H |
| 10 | + |
| 11 | +extern zend_module_entry fss_module_entry; |
| 12 | +#define phpext_fss_ptr &fss_module_entry |
| 13 | + |
| 14 | +#ifdef PHP_WIN32 |
| 15 | +#define PHP_FSS_API __declspec(dllexport) |
| 16 | +#else |
| 17 | +#define PHP_FSS_API |
| 18 | +#endif |
| 19 | + |
| 20 | +#ifdef ZTS |
| 21 | +#include "TSRM.h" |
| 22 | +#endif |
| 23 | + |
| 24 | +PHP_MINIT_FUNCTION(fss); |
| 25 | +PHP_MSHUTDOWN_FUNCTION(fss); |
| 26 | +PHP_MINFO_FUNCTION(fss); |
| 27 | + |
| 28 | +PHP_FUNCTION(fss_prep_search); |
| 29 | +PHP_FUNCTION(fss_exec_search); |
| 30 | +PHP_FUNCTION(fss_prep_replace); |
| 31 | +PHP_FUNCTION(fss_exec_replace); |
| 32 | +PHP_FUNCTION(fss_free); |
| 33 | + |
| 34 | +#ifdef ZTS |
| 35 | +#define FSS_G(v) TSRMG(fss_globals_id, zend_fss_globals *, v) |
| 36 | +#else |
| 37 | +#define FSS_G(v) (fss_globals.v) |
| 38 | +#endif |
| 39 | + |
| 40 | +#endif /* PHP_FSS_H */ |
| 41 | + |
| 42 | + |
| 43 | +/* |
| 44 | + * Local variables: |
| 45 | + * tab-width: 4 |
| 46 | + * c-basic-offset: 4 |
| 47 | + * End: |
| 48 | + * vim600: noet sw=4 ts=4 fdm=marker |
| 49 | + * vim<600: noet sw=4 ts=4 |
| 50 | + */ |
Property changes on: trunk/extensions/FastStringSearch/php_fss.h |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 51 | + native |
Index: trunk/extensions/FastStringSearch/fss.c |
— | — | @@ -0,0 +1,343 @@ |
| 2 | +/* |
| 3 | + * PHP extension for fast string search routines |
| 4 | + * Copyright Tim Starling, 2006 |
| 5 | + * License: DWTFYWWI |
| 6 | +*/ |
| 7 | + |
| 8 | +#ifdef HAVE_CONFIG_H |
| 9 | +#include "config.h" |
| 10 | +#endif |
| 11 | + |
| 12 | +#include "php.h" |
| 13 | +#include "php_ini.h" |
| 14 | +#include "ext/standard/info.h" |
| 15 | +#include "php_fss.h" |
| 16 | +#include "ext/standard/php_smart_str.h" |
| 17 | +#include "kwset.h" |
| 18 | + |
| 19 | +typedef struct { |
| 20 | + kwset_t set; |
| 21 | + int replace_size; |
| 22 | + zval * replace[1]; |
| 23 | +} fss_resource_t; |
| 24 | + |
| 25 | +static void _php_fss_close(zend_rsrc_list_entry *rsrc TSRMLS_DC); |
| 26 | + |
| 27 | +/* True global resources - no need for thread safety here */ |
| 28 | +static int le_fss; |
| 29 | + |
| 30 | +/* {{{ fss_functions[] |
| 31 | + */ |
| 32 | +zend_function_entry fss_functions[] = { |
| 33 | + PHP_FE(fss_prep_search, NULL) |
| 34 | + PHP_FE(fss_exec_search, NULL) |
| 35 | + PHP_FE(fss_prep_replace, NULL) |
| 36 | + PHP_FE(fss_exec_replace, NULL) |
| 37 | + PHP_FE(fss_free, NULL) |
| 38 | + {NULL, NULL, NULL} /* Must be the last line in fss_functions[] */ |
| 39 | +}; |
| 40 | +/* }}} */ |
| 41 | + |
| 42 | +/* {{{ fss_module_entry |
| 43 | + */ |
| 44 | +zend_module_entry fss_module_entry = { |
| 45 | +#if ZEND_MODULE_API_NO >= 20010901 |
| 46 | + STANDARD_MODULE_HEADER, |
| 47 | +#endif |
| 48 | + "fss", |
| 49 | + fss_functions, |
| 50 | + PHP_MINIT(fss), |
| 51 | + PHP_MSHUTDOWN(fss), |
| 52 | + NULL, /* RINIT */ |
| 53 | + NULL, /* RSHUTDOWN */ |
| 54 | + PHP_MINFO(fss), |
| 55 | +#if ZEND_MODULE_API_NO >= 20010901 |
| 56 | + "0.1.1", |
| 57 | +#endif |
| 58 | + STANDARD_MODULE_PROPERTIES |
| 59 | +}; |
| 60 | +/* }}} */ |
| 61 | + |
| 62 | +#ifdef COMPILE_DL_FSS |
| 63 | +ZEND_GET_MODULE(fss) |
| 64 | +#endif |
| 65 | + |
| 66 | + |
| 67 | + |
| 68 | +/* {{{ PHP_MINIT_FUNCTION |
| 69 | + */ |
| 70 | +PHP_MINIT_FUNCTION(fss) |
| 71 | +{ |
| 72 | + le_fss = zend_register_list_destructors_ex(_php_fss_close, NULL, "fss", module_number); |
| 73 | + return SUCCESS; |
| 74 | +} |
| 75 | +/* }}} */ |
| 76 | + |
| 77 | +/* {{{ PHP_MSHUTDOWN_FUNCTION |
| 78 | + */ |
| 79 | +PHP_MSHUTDOWN_FUNCTION(fss) |
| 80 | +{ |
| 81 | + return SUCCESS; |
| 82 | +} |
| 83 | +/* }}} */ |
| 84 | + |
| 85 | +/* {{{ PHP_MINFO_FUNCTION |
| 86 | + */ |
| 87 | +PHP_MINFO_FUNCTION(fss) |
| 88 | +{ |
| 89 | + php_info_print_table_start(); |
| 90 | + php_info_print_table_header(2, "Fast string search support", "enabled"); |
| 91 | + php_info_print_table_end(); |
| 92 | +} |
| 93 | +/* }}} */ |
| 94 | + |
| 95 | + |
| 96 | +/* {{{ proto resource fss_prep_search(mixed needle) |
| 97 | + Prepare a string search */ |
| 98 | +PHP_FUNCTION(fss_prep_search) |
| 99 | +{ |
| 100 | + int argc = ZEND_NUM_ARGS(); |
| 101 | + zval *needle = NULL, **elem; |
| 102 | + fss_resource_t * res; |
| 103 | + HashTable * hash; |
| 104 | + HashPosition hpos; |
| 105 | + const char *error; |
| 106 | + |
| 107 | + if (zend_parse_parameters(argc TSRMLS_CC, "z", &needle) == FAILURE) |
| 108 | + return; |
| 109 | + |
| 110 | + res = emalloc(sizeof(fss_resource_t)); |
| 111 | + res->set = kwsalloc(NULL); |
| 112 | + res->replace_size = 0; |
| 113 | + |
| 114 | + if (Z_TYPE_P(needle) == IS_ARRAY) { |
| 115 | + hash = Z_ARRVAL_P(needle); |
| 116 | + for (zend_hash_internal_pointer_reset_ex(hash, &hpos); |
| 117 | + zend_hash_get_current_data_ex(hash, (void**)&elem, &hpos) == SUCCESS; |
| 118 | + zend_hash_move_forward_ex(hash, &hpos)) |
| 119 | + { |
| 120 | + convert_to_string_ex(elem); |
| 121 | + /* Don't add zero-length strings, that will cause infinite loops in |
| 122 | + search routines */ |
| 123 | + if (!Z_STRLEN_PP(elem)) { |
| 124 | + continue; |
| 125 | + } |
| 126 | + error = kwsincr(res->set, Z_STRVAL_PP(elem), Z_STRLEN_PP(elem)); |
| 127 | + if (error) { |
| 128 | + php_error(E_WARNING, "fss_prep_search: %s", error); |
| 129 | + } |
| 130 | + } |
| 131 | + } else { |
| 132 | + convert_to_string_ex(&needle); |
| 133 | + error = kwsincr(res->set, Z_STRVAL_P(needle), Z_STRLEN_P(needle)); |
| 134 | + if (error) { |
| 135 | + php_error(E_WARNING, "fss_prep_search: %s", error); |
| 136 | + } |
| 137 | + } |
| 138 | + kwsprep(res->set); |
| 139 | + ZEND_REGISTER_RESOURCE(return_value, res, le_fss); |
| 140 | +} |
| 141 | +/* }}} */ |
| 142 | + |
| 143 | +/* {{{ proto array fss_exec_search(resource handle, string haystack [, int offset]) |
| 144 | + Execute a string search, return the first match */ |
| 145 | +PHP_FUNCTION(fss_exec_search) |
| 146 | +{ |
| 147 | + char *haystack = NULL; |
| 148 | + int argc = ZEND_NUM_ARGS(); |
| 149 | + int handle_id = -1; |
| 150 | + int haystack_len; |
| 151 | + int offset = 0; |
| 152 | + zval *handle = NULL, *temp; |
| 153 | + fss_resource_t * res; |
| 154 | + struct kwsmatch m; |
| 155 | + size_t match_pos; |
| 156 | + |
| 157 | + if (zend_parse_parameters(argc TSRMLS_CC, "rs|l", &handle, &haystack, &haystack_len, &offset) == FAILURE) |
| 158 | + return; |
| 159 | + |
| 160 | + ZEND_FETCH_RESOURCE(res, fss_resource_t*, &handle, handle_id, "fss", le_fss); |
| 161 | + match_pos = kwsexec(res->set, haystack + offset, haystack_len - offset, &m); |
| 162 | + |
| 163 | + if (match_pos == (size_t)-1) { |
| 164 | + RETURN_FALSE; |
| 165 | + } |
| 166 | + |
| 167 | + array_init(return_value); |
| 168 | + |
| 169 | + MAKE_STD_ZVAL(temp); |
| 170 | + ZVAL_LONG(temp, m.offset[0] + offset); |
| 171 | + zend_hash_next_index_insert(HASH_OF(return_value), (void *)&temp, sizeof(zval *), NULL); |
| 172 | + |
| 173 | + MAKE_STD_ZVAL(temp); |
| 174 | + ZVAL_LONG(temp, m.size[0]); |
| 175 | + zend_hash_next_index_insert(HASH_OF(return_value), (void *)&temp, sizeof(zval *), NULL); |
| 176 | +} |
| 177 | +/* }}} */ |
| 178 | + |
| 179 | +/* {{{ proto resource fss_prep_replace(array replace_pairs) |
| 180 | + Prepare a search and replace operation */ |
| 181 | +PHP_FUNCTION(fss_prep_replace) |
| 182 | +{ |
| 183 | + int argc = ZEND_NUM_ARGS(); |
| 184 | + zval *replace_pairs = NULL, **value; |
| 185 | + HashTable * hash; |
| 186 | + HashPosition hpos; |
| 187 | + fss_resource_t * res; |
| 188 | + const char *error; |
| 189 | + int i; |
| 190 | + char *string_key; |
| 191 | + char buffer[40]; |
| 192 | + uint string_key_len; |
| 193 | + ulong num_key; |
| 194 | + |
| 195 | + |
| 196 | + if (zend_parse_parameters(argc TSRMLS_CC, "a", &replace_pairs) == FAILURE) |
| 197 | + return; |
| 198 | + |
| 199 | + hash = Z_ARRVAL_P(replace_pairs); |
| 200 | + |
| 201 | + /* fss_resource_t has an open-ended array, we allocate enough memory for the |
| 202 | + header plus all the array elements, plus one extra element for good measure */ |
| 203 | + res = safe_emalloc(sizeof(zval*), hash->nNumOfElements, sizeof(fss_resource_t)); |
| 204 | + res->set = kwsalloc(NULL); |
| 205 | + res->replace_size = hash->nNumOfElements; |
| 206 | + |
| 207 | + for (zend_hash_internal_pointer_reset_ex(hash, &hpos), i = 0; |
| 208 | + zend_hash_get_current_data_ex(hash, (void**)&value, &hpos) == SUCCESS; |
| 209 | + zend_hash_move_forward_ex(hash, &hpos), ++i) |
| 210 | + { |
| 211 | + /* Convert numeric keys to string */ |
| 212 | + if (zend_hash_get_current_key_ex(hash, &string_key, &string_key_len, &num_key, 0, |
| 213 | + &hpos) == HASH_KEY_IS_LONG) |
| 214 | + { |
| 215 | + sprintf(buffer, "%u", num_key); |
| 216 | + string_key = buffer; |
| 217 | + string_key_len = strlen(buffer); |
| 218 | + } else { |
| 219 | + /* Minus one for null */ |
| 220 | + string_key_len--; |
| 221 | + } |
| 222 | + |
| 223 | + /* Don't add zero-length strings, that will cause infinite loops in |
| 224 | + search routines */ |
| 225 | + if (!string_key_len) { |
| 226 | + res->replace[i] = NULL; |
| 227 | + continue; |
| 228 | + } |
| 229 | + |
| 230 | + /* Add the key to the search tree */ |
| 231 | + error = kwsincr(res->set, string_key, string_key_len); |
| 232 | + if (error) { |
| 233 | + res->replace[i] = NULL; |
| 234 | + php_error(E_WARNING, "fss_prep_replace: %s", error); |
| 235 | + continue; |
| 236 | + } |
| 237 | + |
| 238 | + /* Add the value to the replace array */ |
| 239 | + convert_to_string_ex(value); |
| 240 | + ZVAL_ADDREF(*value); |
| 241 | + res->replace[i] = *value; |
| 242 | + } |
| 243 | + kwsprep(res->set); |
| 244 | + ZEND_REGISTER_RESOURCE(return_value, res, le_fss); |
| 245 | +} |
| 246 | +/* }}} */ |
| 247 | + |
| 248 | +/* {{{ proto string fss_exec_replace(resource handle, string subject) |
| 249 | + Execute a search and replace operation */ |
| 250 | +PHP_FUNCTION(fss_exec_replace) |
| 251 | +{ |
| 252 | + char *subject = NULL; |
| 253 | + int argc = ZEND_NUM_ARGS(); |
| 254 | + int handle_id = -1; |
| 255 | + int subject_len; |
| 256 | + zval *handle = NULL; |
| 257 | + size_t match_pos = 0; |
| 258 | + fss_resource_t * res; |
| 259 | + struct kwsmatch m; |
| 260 | + smart_str result = {0}; |
| 261 | + zval *temp; |
| 262 | + |
| 263 | + if (zend_parse_parameters(argc TSRMLS_CC, "rs", &handle, &subject, &subject_len) == FAILURE) |
| 264 | + return; |
| 265 | + |
| 266 | + ZEND_FETCH_RESOURCE(res, fss_resource_t*, &handle, handle_id, "fss", le_fss); |
| 267 | + |
| 268 | + while (subject_len > 0 && |
| 269 | + (size_t)-1 != (match_pos = kwsexec(res->set, subject, subject_len, &m))) |
| 270 | + { |
| 271 | + /* Output the leading portion */ |
| 272 | + smart_str_appendl(&result, subject, match_pos); |
| 273 | + |
| 274 | + /* Output the replacement portion |
| 275 | + The index may be above the size of the replacement array if the |
| 276 | + object was prepared as a search object instead of a replacement |
| 277 | + object. In that case, we replace the item with an empty string |
| 278 | + */ |
| 279 | + if (m.index < res->replace_size) { |
| 280 | + temp = res->replace[m.index]; |
| 281 | + if (temp) { |
| 282 | + smart_str_appendl(&result, Z_STRVAL_P(temp), Z_STRLEN_P(temp)); |
| 283 | + } |
| 284 | + } |
| 285 | + |
| 286 | + /* Increment and continue */ |
| 287 | + subject_len -= match_pos + m.size[0]; |
| 288 | + subject += match_pos + m.size[0]; |
| 289 | + } |
| 290 | + /* Output the remaining portion */ |
| 291 | + if (subject_len > 0) { |
| 292 | + smart_str_appendl(&result, subject, subject_len); |
| 293 | + } |
| 294 | + /* Return the result */ |
| 295 | + smart_str_0(&result); |
| 296 | + RETVAL_STRINGL(result.c, result.len, 0); |
| 297 | +} |
| 298 | +/* }}} */ |
| 299 | + |
| 300 | +/* {{{ proto void fss_free(resource handle) |
| 301 | + Free an FSS object */ |
| 302 | +PHP_FUNCTION(fss_free) |
| 303 | +{ |
| 304 | + int argc = ZEND_NUM_ARGS(); |
| 305 | + int handle_id = -1; |
| 306 | + zval *handle = NULL; |
| 307 | + fss_resource_t * res; |
| 308 | + |
| 309 | + if (zend_parse_parameters(argc TSRMLS_CC, "r", &handle) == FAILURE) |
| 310 | + return; |
| 311 | + |
| 312 | + ZEND_FETCH_RESOURCE(res, fss_resource_t*, &handle, handle_id, "fss", le_fss); |
| 313 | + if (handle) { |
| 314 | + zend_list_delete(Z_RESVAL_P(handle)); |
| 315 | + } |
| 316 | +} |
| 317 | +/* }}} */ |
| 318 | + |
| 319 | +/* {{{ _php_fss_close |
| 320 | + List destructor for FSS handles */ |
| 321 | +static void _php_fss_close(zend_rsrc_list_entry *rsrc TSRMLS_DC) |
| 322 | +{ |
| 323 | + int i; |
| 324 | + fss_resource_t * res = (fss_resource_t *)rsrc->ptr; |
| 325 | + /* Destroy the replace strings */ |
| 326 | + for (i = 0; i < res->replace_size; i++) { |
| 327 | + zval_ptr_dtor(&res->replace[i]); |
| 328 | + } |
| 329 | + /* Destroy the kwset structure */ |
| 330 | + kwsfree(res->set); |
| 331 | + /* Destroy the resource structure itself */ |
| 332 | + efree(res); |
| 333 | +} |
| 334 | +/* }}} */ |
| 335 | + |
| 336 | + |
| 337 | +/* |
| 338 | + * Local variables: |
| 339 | + * tab-width: 4 |
| 340 | + * c-basic-offset: 4 |
| 341 | + * End: |
| 342 | + * vim600: noet sw=4 ts=4 fdm=marker |
| 343 | + * vim<600: noet sw=4 ts=4 |
| 344 | + */ |
Property changes on: trunk/extensions/FastStringSearch/fss.c |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 345 | + native |
Index: trunk/extensions/FastStringSearch/obstack.h |
— | — | @@ -0,0 +1,593 @@ |
| 2 | +/* obstack.h - object stack macros |
| 3 | + Copyright (C) 1988,89,90,91,92,93,94,96,97,98,99 Free Software Foundation, Inc. |
| 4 | + |
| 5 | + This file is part of the GNU C Library. Its master source is NOT part of |
| 6 | + the C library, however. The master source lives in /gd/gnu/lib. |
| 7 | + |
| 8 | + The GNU C Library is free software; you can redistribute it and/or |
| 9 | + modify it under the terms of the GNU Library General Public License as |
| 10 | + published by the Free Software Foundation; either version 2 of the |
| 11 | + License, or (at your option) any later version. |
| 12 | + |
| 13 | + The GNU C Library is distributed in the hope that it will be useful, |
| 14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | + Library General Public License for more details. |
| 17 | + |
| 18 | + You should have received a copy of the GNU Library General Public |
| 19 | + License along with the GNU C Library; see the file COPYING.LIB. If not, |
| 20 | + write to the Free Software Foundation, Inc., |
| 21 | + 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ |
| 22 | + |
| 23 | +/* Summary: |
| 24 | + |
| 25 | +All the apparent functions defined here are macros. The idea |
| 26 | +is that you would use these pre-tested macros to solve a |
| 27 | +very specific set of problems, and they would run fast. |
| 28 | +Caution: no side-effects in arguments please!! They may be |
| 29 | +evaluated MANY times!! |
| 30 | + |
| 31 | +These macros operate a stack of objects. Each object starts life |
| 32 | +small, and may grow to maturity. (Consider building a word syllable |
| 33 | +by syllable.) An object can move while it is growing. Once it has |
| 34 | +been "finished" it never changes address again. So the "top of the |
| 35 | +stack" is typically an immature growing object, while the rest of the |
| 36 | +stack is of mature, fixed size and fixed address objects. |
| 37 | + |
| 38 | +These routines grab large chunks of memory, using a function you |
| 39 | +supply, called `obstack_chunk_alloc'. On occasion, they free chunks, |
| 40 | +by calling `obstack_chunk_free'. You must define them and declare |
| 41 | +them before using any obstack macros. |
| 42 | + |
| 43 | +Each independent stack is represented by a `struct obstack'. |
| 44 | +Each of the obstack macros expects a pointer to such a structure |
| 45 | +as the first argument. |
| 46 | + |
| 47 | +One motivation for this package is the problem of growing char strings |
| 48 | +in symbol tables. Unless you are "fascist pig with a read-only mind" |
| 49 | +--Gosper's immortal quote from HAKMEM item 154, out of context--you |
| 50 | +would not like to put any arbitrary upper limit on the length of your |
| 51 | +symbols. |
| 52 | + |
| 53 | +In practice this often means you will build many short symbols and a |
| 54 | +few long symbols. At the time you are reading a symbol you don't know |
| 55 | +how long it is. One traditional method is to read a symbol into a |
| 56 | +buffer, realloc()ating the buffer every time you try to read a symbol |
| 57 | +that is longer than the buffer. This is beaut, but you still will |
| 58 | +want to copy the symbol from the buffer to a more permanent |
| 59 | +symbol-table entry say about half the time. |
| 60 | + |
| 61 | +With obstacks, you can work differently. Use one obstack for all symbol |
| 62 | +names. As you read a symbol, grow the name in the obstack gradually. |
| 63 | +When the name is complete, finalize it. Then, if the symbol exists already, |
| 64 | +free the newly read name. |
| 65 | + |
| 66 | +The way we do this is to take a large chunk, allocating memory from |
| 67 | +low addresses. When you want to build a symbol in the chunk you just |
| 68 | +add chars above the current "high water mark" in the chunk. When you |
| 69 | +have finished adding chars, because you got to the end of the symbol, |
| 70 | +you know how long the chars are, and you can create a new object. |
| 71 | +Mostly the chars will not burst over the highest address of the chunk, |
| 72 | +because you would typically expect a chunk to be (say) 100 times as |
| 73 | +long as an average object. |
| 74 | + |
| 75 | +In case that isn't clear, when we have enough chars to make up |
| 76 | +the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed) |
| 77 | +so we just point to it where it lies. No moving of chars is |
| 78 | +needed and this is the second win: potentially long strings need |
| 79 | +never be explicitly shuffled. Once an object is formed, it does not |
| 80 | +change its address during its lifetime. |
| 81 | + |
| 82 | +When the chars burst over a chunk boundary, we allocate a larger |
| 83 | +chunk, and then copy the partly formed object from the end of the old |
| 84 | +chunk to the beginning of the new larger chunk. We then carry on |
| 85 | +accreting characters to the end of the object as we normally would. |
| 86 | + |
| 87 | +A special macro is provided to add a single char at a time to a |
| 88 | +growing object. This allows the use of register variables, which |
| 89 | +break the ordinary 'growth' macro. |
| 90 | + |
| 91 | +Summary: |
| 92 | + We allocate large chunks. |
| 93 | + We carve out one object at a time from the current chunk. |
| 94 | + Once carved, an object never moves. |
| 95 | + We are free to append data of any size to the currently |
| 96 | + growing object. |
| 97 | + Exactly one object is growing in an obstack at any one time. |
| 98 | + You can run one obstack per control block. |
| 99 | + You may have as many control blocks as you dare. |
| 100 | + Because of the way we do it, you can `unwind' an obstack |
| 101 | + back to a previous state. (You may remove objects much |
| 102 | + as you would with a stack.) |
| 103 | +*/ |
| 104 | + |
| 105 | + |
| 106 | +/* Don't do the contents of this file more than once. */ |
| 107 | + |
| 108 | +#ifndef _OBSTACK_H |
| 109 | +#define _OBSTACK_H 1 |
| 110 | + |
| 111 | +#ifdef __cplusplus |
| 112 | +extern "C" { |
| 113 | +#endif |
| 114 | + |
| 115 | +/* We use subtraction of (char *) 0 instead of casting to int |
| 116 | + because on word-addressable machines a simple cast to int |
| 117 | + may ignore the byte-within-word field of the pointer. */ |
| 118 | + |
| 119 | +#ifndef __PTR_TO_INT |
| 120 | +# define __PTR_TO_INT(P) ((P) - (char *) 0) |
| 121 | +#endif |
| 122 | + |
| 123 | +#ifndef __INT_TO_PTR |
| 124 | +# define __INT_TO_PTR(P) ((P) + (char *) 0) |
| 125 | +#endif |
| 126 | + |
| 127 | +/* We need the type of the resulting object. If __PTRDIFF_TYPE__ is |
| 128 | + defined, as with GNU C, use that; that way we don't pollute the |
| 129 | + namespace with <stddef.h>'s symbols. Otherwise, if <stddef.h> is |
| 130 | + available, include it and use ptrdiff_t. In traditional C, long is |
| 131 | + the best that we can do. */ |
| 132 | + |
| 133 | +#ifdef __PTRDIFF_TYPE__ |
| 134 | +# define PTR_INT_TYPE __PTRDIFF_TYPE__ |
| 135 | +#else |
| 136 | +# ifdef HAVE_STDDEF_H |
| 137 | +# include <stddef.h> |
| 138 | +# define PTR_INT_TYPE ptrdiff_t |
| 139 | +# else |
| 140 | +# define PTR_INT_TYPE long |
| 141 | +# endif |
| 142 | +#endif |
| 143 | + |
| 144 | +#if defined _LIBC || defined HAVE_STRING_H |
| 145 | +# include <string.h> |
| 146 | +# define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N)) |
| 147 | +#else |
| 148 | +# ifdef memcpy |
| 149 | +# define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N)) |
| 150 | +# else |
| 151 | +# define _obstack_memcpy(To, From, N) bcopy ((From), (To), (N)) |
| 152 | +# endif |
| 153 | +#endif |
| 154 | + |
| 155 | +struct _obstack_chunk /* Lives at front of each chunk. */ |
| 156 | +{ |
| 157 | + char *limit; /* 1 past end of this chunk */ |
| 158 | + struct _obstack_chunk *prev; /* address of prior chunk or NULL */ |
| 159 | + char contents[4]; /* objects begin here */ |
| 160 | +}; |
| 161 | + |
| 162 | +struct obstack /* control current object in current chunk */ |
| 163 | +{ |
| 164 | + long chunk_size; /* preferred size to allocate chunks in */ |
| 165 | + struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */ |
| 166 | + char *object_base; /* address of object we are building */ |
| 167 | + char *next_free; /* where to add next char to current object */ |
| 168 | + char *chunk_limit; /* address of char after current chunk */ |
| 169 | + PTR_INT_TYPE temp; /* Temporary for some macros. */ |
| 170 | + int alignment_mask; /* Mask of alignment for each object. */ |
| 171 | +#if defined __STDC__ && __STDC__ |
| 172 | + /* These prototypes vary based on `use_extra_arg', and we use |
| 173 | + casts to the prototypeless function type in all assignments, |
| 174 | + but having prototypes here quiets -Wstrict-prototypes. */ |
| 175 | + struct _obstack_chunk *(*chunkfun) (void *, long); |
| 176 | + void (*freefun) (void *, struct _obstack_chunk *); |
| 177 | + void *extra_arg; /* first arg for chunk alloc/dealloc funcs */ |
| 178 | +#else |
| 179 | + struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk. */ |
| 180 | + void (*freefun) (); /* User's function to free a chunk. */ |
| 181 | + char *extra_arg; /* first arg for chunk alloc/dealloc funcs */ |
| 182 | +#endif |
| 183 | + unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */ |
| 184 | + unsigned maybe_empty_object:1;/* There is a possibility that the current |
| 185 | + chunk contains a zero-length object. This |
| 186 | + prevents freeing the chunk if we allocate |
| 187 | + a bigger chunk to replace it. */ |
| 188 | + unsigned alloc_failed:1; /* No longer used, as we now call the failed |
| 189 | + handler on error, but retained for binary |
| 190 | + compatibility. */ |
| 191 | +}; |
| 192 | + |
| 193 | +/* Declare the external functions we use; they are in obstack.c. */ |
| 194 | + |
| 195 | +#if defined __STDC__ && __STDC__ |
| 196 | +extern void _obstack_newchunk (struct obstack *, int); |
| 197 | +extern void _obstack_free (struct obstack *, void *); |
| 198 | +extern int _obstack_begin (struct obstack *, int, int, |
| 199 | + void *(*) (long), void (*) (void *)); |
| 200 | +extern int _obstack_begin_1 (struct obstack *, int, int, |
| 201 | + void *(*) (void *, long), |
| 202 | + void (*) (void *, void *), void *); |
| 203 | +extern int _obstack_memory_used (struct obstack *); |
| 204 | +#else |
| 205 | +extern void _obstack_newchunk (); |
| 206 | +extern void _obstack_free (); |
| 207 | +extern int _obstack_begin (); |
| 208 | +extern int _obstack_begin_1 (); |
| 209 | +extern int _obstack_memory_used (); |
| 210 | +#endif |
| 211 | + |
| 212 | +#if defined __STDC__ && __STDC__ |
| 213 | + |
| 214 | +/* Do the function-declarations after the structs |
| 215 | + but before defining the macros. */ |
| 216 | + |
| 217 | +void obstack_init (struct obstack *obstack); |
| 218 | + |
| 219 | +void * obstack_alloc (struct obstack *obstack, int size); |
| 220 | + |
| 221 | +void * obstack_copy (struct obstack *obstack, void *address, int size); |
| 222 | +void * obstack_copy0 (struct obstack *obstack, void *address, int size); |
| 223 | + |
| 224 | +void obstack_free (struct obstack *obstack, void *block); |
| 225 | + |
| 226 | +void obstack_blank (struct obstack *obstack, int size); |
| 227 | + |
| 228 | +void obstack_grow (struct obstack *obstack, void *data, int size); |
| 229 | +void obstack_grow0 (struct obstack *obstack, void *data, int size); |
| 230 | + |
| 231 | +void obstack_1grow (struct obstack *obstack, int data_char); |
| 232 | +void obstack_ptr_grow (struct obstack *obstack, void *data); |
| 233 | +void obstack_int_grow (struct obstack *obstack, int data); |
| 234 | + |
| 235 | +void * obstack_finish (struct obstack *obstack); |
| 236 | + |
| 237 | +int obstack_object_size (struct obstack *obstack); |
| 238 | + |
| 239 | +int obstack_room (struct obstack *obstack); |
| 240 | +void obstack_make_room (struct obstack *obstack, int size); |
| 241 | +void obstack_1grow_fast (struct obstack *obstack, int data_char); |
| 242 | +void obstack_ptr_grow_fast (struct obstack *obstack, void *data); |
| 243 | +void obstack_int_grow_fast (struct obstack *obstack, int data); |
| 244 | +void obstack_blank_fast (struct obstack *obstack, int size); |
| 245 | + |
| 246 | +void * obstack_base (struct obstack *obstack); |
| 247 | +void * obstack_next_free (struct obstack *obstack); |
| 248 | +int obstack_alignment_mask (struct obstack *obstack); |
| 249 | +int obstack_chunk_size (struct obstack *obstack); |
| 250 | +int obstack_memory_used (struct obstack *obstack); |
| 251 | + |
| 252 | +#endif /* __STDC__ */ |
| 253 | + |
| 254 | +/* Non-ANSI C cannot really support alternative functions for these macros, |
| 255 | + so we do not declare them. */ |
| 256 | + |
| 257 | +/* Error handler called when `obstack_chunk_alloc' failed to allocate |
| 258 | + more memory. This can be set to a user defined function which |
| 259 | + should either abort gracefully or use longjump - but shouldn't |
| 260 | + return. The default action is to print a message and abort. */ |
| 261 | +#if defined __STDC__ && __STDC__ |
| 262 | +extern void (*obstack_alloc_failed_handler) (void); |
| 263 | +#else |
| 264 | +extern void (*obstack_alloc_failed_handler) (); |
| 265 | +#endif |
| 266 | + |
| 267 | +/* Exit value used when `print_and_abort' is used. */ |
| 268 | +extern int obstack_exit_failure; |
| 269 | + |
| 270 | +/* Pointer to beginning of object being allocated or to be allocated next. |
| 271 | + Note that this might not be the final address of the object |
| 272 | + because a new chunk might be needed to hold the final size. */ |
| 273 | + |
| 274 | +#define obstack_base(h) ((h)->object_base) |
| 275 | + |
| 276 | +/* Size for allocating ordinary chunks. */ |
| 277 | + |
| 278 | +#define obstack_chunk_size(h) ((h)->chunk_size) |
| 279 | + |
| 280 | +/* Pointer to next byte not yet allocated in current chunk. */ |
| 281 | + |
| 282 | +#define obstack_next_free(h) ((h)->next_free) |
| 283 | + |
| 284 | +/* Mask specifying low bits that should be clear in address of an object. */ |
| 285 | + |
| 286 | +#define obstack_alignment_mask(h) ((h)->alignment_mask) |
| 287 | + |
| 288 | +/* To prevent prototype warnings provide complete argument list in |
| 289 | + standard C version. */ |
| 290 | +#if defined __STDC__ && __STDC__ |
| 291 | + |
| 292 | +# define obstack_init(h) \ |
| 293 | + _obstack_begin ((h), 0, 0, \ |
| 294 | + (void *(*) (long)) obstack_chunk_alloc, (void (*) (void *)) obstack_chunk_free) |
| 295 | + |
| 296 | +# define obstack_begin(h, size) \ |
| 297 | + _obstack_begin ((h), (size), 0, \ |
| 298 | + (void *(*) (long)) obstack_chunk_alloc, (void (*) (void *)) obstack_chunk_free) |
| 299 | + |
| 300 | +# define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \ |
| 301 | + _obstack_begin ((h), (size), (alignment), \ |
| 302 | + (void *(*) (long)) (chunkfun), (void (*) (void *)) (freefun)) |
| 303 | + |
| 304 | +# define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \ |
| 305 | + _obstack_begin_1 ((h), (size), (alignment), \ |
| 306 | + (void *(*) (void *, long)) (chunkfun), \ |
| 307 | + (void (*) (void *, void *)) (freefun), (arg)) |
| 308 | + |
| 309 | +# define obstack_chunkfun(h, newchunkfun) \ |
| 310 | + ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun)) |
| 311 | + |
| 312 | +# define obstack_freefun(h, newfreefun) \ |
| 313 | + ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun)) |
| 314 | + |
| 315 | +#else |
| 316 | + |
| 317 | +# define obstack_init(h) \ |
| 318 | + _obstack_begin ((h), 0, 0, \ |
| 319 | + (void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free) |
| 320 | + |
| 321 | +# define obstack_begin(h, size) \ |
| 322 | + _obstack_begin ((h), (size), 0, \ |
| 323 | + (void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free) |
| 324 | + |
| 325 | +# define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \ |
| 326 | + _obstack_begin ((h), (size), (alignment), \ |
| 327 | + (void *(*) ()) (chunkfun), (void (*) ()) (freefun)) |
| 328 | + |
| 329 | +# define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \ |
| 330 | + _obstack_begin_1 ((h), (size), (alignment), \ |
| 331 | + (void *(*) ()) (chunkfun), (void (*) ()) (freefun), (arg)) |
| 332 | + |
| 333 | +# define obstack_chunkfun(h, newchunkfun) \ |
| 334 | + ((h) -> chunkfun = (struct _obstack_chunk *(*)()) (newchunkfun)) |
| 335 | + |
| 336 | +# define obstack_freefun(h, newfreefun) \ |
| 337 | + ((h) -> freefun = (void (*)()) (newfreefun)) |
| 338 | + |
| 339 | +#endif |
| 340 | + |
| 341 | +#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar) |
| 342 | + |
| 343 | +#define obstack_blank_fast(h,n) ((h)->next_free += (n)) |
| 344 | + |
| 345 | +#define obstack_memory_used(h) _obstack_memory_used (h) |
| 346 | + |
| 347 | +#if defined __GNUC__ && defined __STDC__ && __STDC__ |
| 348 | +/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and |
| 349 | + does not implement __extension__. But that compiler doesn't define |
| 350 | + __GNUC_MINOR__. */ |
| 351 | +# if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__) |
| 352 | +# define __extension__ |
| 353 | +# endif |
| 354 | + |
| 355 | +/* For GNU C, if not -traditional, |
| 356 | + we can define these macros to compute all args only once |
| 357 | + without using a global variable. |
| 358 | + Also, we can avoid using the `temp' slot, to make faster code. */ |
| 359 | + |
| 360 | +# define obstack_object_size(OBSTACK) \ |
| 361 | + __extension__ \ |
| 362 | + ({ struct obstack *__o = (OBSTACK); \ |
| 363 | + (unsigned) (__o->next_free - __o->object_base); }) |
| 364 | + |
| 365 | +# define obstack_room(OBSTACK) \ |
| 366 | + __extension__ \ |
| 367 | + ({ struct obstack *__o = (OBSTACK); \ |
| 368 | + (unsigned) (__o->chunk_limit - __o->next_free); }) |
| 369 | + |
| 370 | +# define obstack_make_room(OBSTACK,length) \ |
| 371 | +__extension__ \ |
| 372 | +({ struct obstack *__o = (OBSTACK); \ |
| 373 | + int __len = (length); \ |
| 374 | + if (__o->chunk_limit - __o->next_free < __len) \ |
| 375 | + _obstack_newchunk (__o, __len); \ |
| 376 | + (void) 0; }) |
| 377 | + |
| 378 | +# define obstack_empty_p(OBSTACK) \ |
| 379 | + __extension__ \ |
| 380 | + ({ struct obstack *__o = (OBSTACK); \ |
| 381 | + (__o->chunk->prev == 0 && __o->next_free - __o->chunk->contents == 0); }) |
| 382 | + |
| 383 | +# define obstack_grow(OBSTACK,where,length) \ |
| 384 | +__extension__ \ |
| 385 | +({ struct obstack *__o = (OBSTACK); \ |
| 386 | + int __len = (length); \ |
| 387 | + if (__o->next_free + __len > __o->chunk_limit) \ |
| 388 | + _obstack_newchunk (__o, __len); \ |
| 389 | + _obstack_memcpy (__o->next_free, (char *) (where), __len); \ |
| 390 | + __o->next_free += __len; \ |
| 391 | + (void) 0; }) |
| 392 | + |
| 393 | +# define obstack_grow0(OBSTACK,where,length) \ |
| 394 | +__extension__ \ |
| 395 | +({ struct obstack *__o = (OBSTACK); \ |
| 396 | + int __len = (length); \ |
| 397 | + if (__o->next_free + __len + 1 > __o->chunk_limit) \ |
| 398 | + _obstack_newchunk (__o, __len + 1); \ |
| 399 | + _obstack_memcpy (__o->next_free, (char *) (where), __len); \ |
| 400 | + __o->next_free += __len; \ |
| 401 | + *(__o->next_free)++ = 0; \ |
| 402 | + (void) 0; }) |
| 403 | + |
| 404 | +# define obstack_1grow(OBSTACK,datum) \ |
| 405 | +__extension__ \ |
| 406 | +({ struct obstack *__o = (OBSTACK); \ |
| 407 | + if (__o->next_free + 1 > __o->chunk_limit) \ |
| 408 | + _obstack_newchunk (__o, 1); \ |
| 409 | + *(__o->next_free)++ = (datum); \ |
| 410 | + (void) 0; }) |
| 411 | + |
| 412 | +/* These assume that the obstack alignment is good enough for pointers or ints, |
| 413 | + and that the data added so far to the current object |
| 414 | + shares that much alignment. */ |
| 415 | + |
| 416 | +# define obstack_ptr_grow(OBSTACK,datum) \ |
| 417 | +__extension__ \ |
| 418 | +({ struct obstack *__o = (OBSTACK); \ |
| 419 | + if (__o->next_free + sizeof (void *) > __o->chunk_limit) \ |
| 420 | + _obstack_newchunk (__o, sizeof (void *)); \ |
| 421 | + *((void **)__o->next_free)++ = ((void *)datum); \ |
| 422 | + (void) 0; }) |
| 423 | + |
| 424 | +# define obstack_int_grow(OBSTACK,datum) \ |
| 425 | +__extension__ \ |
| 426 | +({ struct obstack *__o = (OBSTACK); \ |
| 427 | + if (__o->next_free + sizeof (int) > __o->chunk_limit) \ |
| 428 | + _obstack_newchunk (__o, sizeof (int)); \ |
| 429 | + *((int *)__o->next_free)++ = ((int)datum); \ |
| 430 | + (void) 0; }) |
| 431 | + |
| 432 | +# define obstack_ptr_grow_fast(h,aptr) (*((void **) (h)->next_free)++ = (void *)aptr) |
| 433 | +# define obstack_int_grow_fast(h,aint) (*((int *) (h)->next_free)++ = (int) aint) |
| 434 | + |
| 435 | +# define obstack_blank(OBSTACK,length) \ |
| 436 | +__extension__ \ |
| 437 | +({ struct obstack *__o = (OBSTACK); \ |
| 438 | + int __len = (length); \ |
| 439 | + if (__o->chunk_limit - __o->next_free < __len) \ |
| 440 | + _obstack_newchunk (__o, __len); \ |
| 441 | + __o->next_free += __len; \ |
| 442 | + (void) 0; }) |
| 443 | + |
| 444 | +# define obstack_alloc(OBSTACK,length) \ |
| 445 | +__extension__ \ |
| 446 | +({ struct obstack *__h = (OBSTACK); \ |
| 447 | + obstack_blank (__h, (length)); \ |
| 448 | + obstack_finish (__h); }) |
| 449 | + |
| 450 | +# define obstack_copy(OBSTACK,where,length) \ |
| 451 | +__extension__ \ |
| 452 | +({ struct obstack *__h = (OBSTACK); \ |
| 453 | + obstack_grow (__h, (where), (length)); \ |
| 454 | + obstack_finish (__h); }) |
| 455 | + |
| 456 | +# define obstack_copy0(OBSTACK,where,length) \ |
| 457 | +__extension__ \ |
| 458 | +({ struct obstack *__h = (OBSTACK); \ |
| 459 | + obstack_grow0 (__h, (where), (length)); \ |
| 460 | + obstack_finish (__h); }) |
| 461 | + |
| 462 | +/* The local variable is named __o1 to avoid a name conflict |
| 463 | + when obstack_blank is called. */ |
| 464 | +# define obstack_finish(OBSTACK) \ |
| 465 | +__extension__ \ |
| 466 | +({ struct obstack *__o1 = (OBSTACK); \ |
| 467 | + void *value; \ |
| 468 | + value = (void *) __o1->object_base; \ |
| 469 | + if (__o1->next_free == value) \ |
| 470 | + __o1->maybe_empty_object = 1; \ |
| 471 | + __o1->next_free \ |
| 472 | + = __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\ |
| 473 | + & ~ (__o1->alignment_mask)); \ |
| 474 | + if (__o1->next_free - (char *)__o1->chunk \ |
| 475 | + > __o1->chunk_limit - (char *)__o1->chunk) \ |
| 476 | + __o1->next_free = __o1->chunk_limit; \ |
| 477 | + __o1->object_base = __o1->next_free; \ |
| 478 | + value; }) |
| 479 | + |
| 480 | +# define obstack_free(OBSTACK, OBJ) \ |
| 481 | +__extension__ \ |
| 482 | +({ struct obstack *__o = (OBSTACK); \ |
| 483 | + void *__obj = (OBJ); \ |
| 484 | + if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \ |
| 485 | + __o->next_free = __o->object_base = (char *)__obj; \ |
| 486 | + else (obstack_free) (__o, __obj); }) |
| 487 | + |
| 488 | +#else /* not __GNUC__ or not __STDC__ */ |
| 489 | + |
| 490 | +# define obstack_object_size(h) \ |
| 491 | + (unsigned) ((h)->next_free - (h)->object_base) |
| 492 | + |
| 493 | +# define obstack_room(h) \ |
| 494 | + (unsigned) ((h)->chunk_limit - (h)->next_free) |
| 495 | + |
| 496 | +# define obstack_empty_p(h) \ |
| 497 | + ((h)->chunk->prev == 0 && (h)->next_free - (h)->chunk->contents == 0) |
| 498 | + |
| 499 | +/* Note that the call to _obstack_newchunk is enclosed in (..., 0) |
| 500 | + so that we can avoid having void expressions |
| 501 | + in the arms of the conditional expression. |
| 502 | + Casting the third operand to void was tried before, |
| 503 | + but some compilers won't accept it. */ |
| 504 | + |
| 505 | +# define obstack_make_room(h,length) \ |
| 506 | +( (h)->temp = (length), \ |
| 507 | + (((h)->next_free + (h)->temp > (h)->chunk_limit) \ |
| 508 | + ? (_obstack_newchunk ((h), (h)->temp), 0) : 0)) |
| 509 | + |
| 510 | +# define obstack_grow(h,where,length) \ |
| 511 | +( (h)->temp = (length), \ |
| 512 | + (((h)->next_free + (h)->temp > (h)->chunk_limit) \ |
| 513 | + ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \ |
| 514 | + _obstack_memcpy ((h)->next_free, (char *) (where), (h)->temp), \ |
| 515 | + (h)->next_free += (h)->temp) |
| 516 | + |
| 517 | +# define obstack_grow0(h,where,length) \ |
| 518 | +( (h)->temp = (length), \ |
| 519 | + (((h)->next_free + (h)->temp + 1 > (h)->chunk_limit) \ |
| 520 | + ? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0), \ |
| 521 | + _obstack_memcpy ((h)->next_free, (char *) (where), (h)->temp), \ |
| 522 | + (h)->next_free += (h)->temp, \ |
| 523 | + *((h)->next_free)++ = 0) |
| 524 | + |
| 525 | +# define obstack_1grow(h,datum) \ |
| 526 | +( (((h)->next_free + 1 > (h)->chunk_limit) \ |
| 527 | + ? (_obstack_newchunk ((h), 1), 0) : 0), \ |
| 528 | + (*((h)->next_free)++ = (datum))) |
| 529 | + |
| 530 | +# define obstack_ptr_grow(h,datum) \ |
| 531 | +( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \ |
| 532 | + ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \ |
| 533 | + (*((char **) (((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *) datum))) |
| 534 | + |
| 535 | +# define obstack_int_grow(h,datum) \ |
| 536 | +( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \ |
| 537 | + ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \ |
| 538 | + (*((int *) (((h)->next_free+=sizeof(int))-sizeof(int))) = ((int) datum))) |
| 539 | + |
| 540 | +# define obstack_ptr_grow_fast(h,aptr) (*((char **) (h)->next_free)++ = (char *) aptr) |
| 541 | +# define obstack_int_grow_fast(h,aint) (*((int *) (h)->next_free)++ = (int) aint) |
| 542 | + |
| 543 | +# define obstack_blank(h,length) \ |
| 544 | +( (h)->temp = (length), \ |
| 545 | + (((h)->chunk_limit - (h)->next_free < (h)->temp) \ |
| 546 | + ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \ |
| 547 | + ((h)->next_free += (h)->temp)) |
| 548 | + |
| 549 | +# define obstack_alloc(h,length) \ |
| 550 | + (obstack_blank ((h), (length)), obstack_finish ((h))) |
| 551 | + |
| 552 | +# define obstack_copy(h,where,length) \ |
| 553 | + (obstack_grow ((h), (where), (length)), obstack_finish ((h))) |
| 554 | + |
| 555 | +# define obstack_copy0(h,where,length) \ |
| 556 | + (obstack_grow0 ((h), (where), (length)), obstack_finish ((h))) |
| 557 | + |
| 558 | +# define obstack_finish(h) \ |
| 559 | +( ((h)->next_free == (h)->object_base \ |
| 560 | + ? (((h)->maybe_empty_object = 1), 0) \ |
| 561 | + : 0), \ |
| 562 | + (h)->temp = __PTR_TO_INT ((h)->object_base), \ |
| 563 | + (h)->next_free \ |
| 564 | + = __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask) \ |
| 565 | + & ~ ((h)->alignment_mask)), \ |
| 566 | + (((h)->next_free - (char *) (h)->chunk \ |
| 567 | + > (h)->chunk_limit - (char *) (h)->chunk) \ |
| 568 | + ? ((h)->next_free = (h)->chunk_limit) : 0), \ |
| 569 | + (h)->object_base = (h)->next_free, \ |
| 570 | + __INT_TO_PTR ((h)->temp)) |
| 571 | + |
| 572 | +# if defined __STDC__ && __STDC__ |
| 573 | +# define obstack_free(h,obj) \ |
| 574 | +( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \ |
| 575 | + (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\ |
| 576 | + ? (int) ((h)->next_free = (h)->object_base \ |
| 577 | + = (h)->temp + (char *) (h)->chunk) \ |
| 578 | + : (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0))) |
| 579 | +# else |
| 580 | +# define obstack_free(h,obj) \ |
| 581 | +( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \ |
| 582 | + (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\ |
| 583 | + ? (int) ((h)->next_free = (h)->object_base \ |
| 584 | + = (h)->temp + (char *) (h)->chunk) \ |
| 585 | + : (_obstack_free ((h), (h)->temp + (char *) (h)->chunk), 0))) |
| 586 | +# endif |
| 587 | + |
| 588 | +#endif /* not __GNUC__ or not __STDC__ */ |
| 589 | + |
| 590 | +#ifdef __cplusplus |
| 591 | +} /* C++ */ |
| 592 | +#endif |
| 593 | + |
| 594 | +#endif /* obstack.h */ |
Property changes on: trunk/extensions/FastStringSearch/obstack.h |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 595 | + native |
Index: trunk/extensions/FastStringSearch/CREDITS |
— | — | @@ -0,0 +1,2 @@ |
| 2 | +fss |
| 3 | +Tim Starling |
Property changes on: trunk/extensions/FastStringSearch/CREDITS |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 4 | + native |
Index: trunk/extensions/FastStringSearch/EXPERIMENTAL |
Property changes on: trunk/extensions/FastStringSearch/EXPERIMENTAL |
___________________________________________________________________ |
Added: svn:eol-style |
2 | 5 | + native |
Index: trunk/extensions/FastStringSearch/README |
— | — | @@ -0,0 +1,52 @@ |
| 2 | +This is a PHP extension for fast string search and replace. It is used by |
| 3 | +LangaugeConverter.php. It supports multiple search terms. We use it as a |
| 4 | +replacement for PHP's strtr, which is extremely slow in certain cases. |
| 5 | +Chinese script conversion is one of those cases. This extension uses a |
| 6 | +Commentz-Walter style algorithm for multiple search terms, or a Boyer-Moore |
| 7 | +algorithm for single search terms. |
| 8 | + |
| 9 | +Several source files were taken from GNU grep, and are under the GNU General |
| 10 | +Public License. The PHP license is incompatible, so a PHP binary containing this |
| 11 | +extension is probably not redistributable under any license. You can use such a |
| 12 | +build privately, however. The source files can be distributed under their |
| 13 | +respective licenses. |
| 14 | + |
| 15 | +The interface synopsis is as follows. To prepare a string search: |
| 16 | + |
| 17 | +$fss = fss_prep_search( 'Hello' ); |
| 18 | + |
| 19 | +or |
| 20 | + |
| 21 | +$fss = fss_prep_search( array( 'Hello', 'Hi' ) ); |
| 22 | + |
| 23 | +To search a string, pass the previously prepared object to fss_exec_search, |
| 24 | +along with the subject string (the "haystack"): |
| 25 | + |
| 26 | +$result = fss_exec_search( $fss, 'xxx Hello xxx' ); |
| 27 | + |
| 28 | +This will return an array with the first element being the string offset of the |
| 29 | +match, and the second element being the length of the match. If no matches are |
| 30 | +found, false is returned. The first match is returned, and if multiple search |
| 31 | +strings match at the same location, the longest will be returned. This function |
| 32 | +also accepts an optional third parameter giving the offset at which to start the |
| 33 | +search. |
| 34 | + |
| 35 | +Replacements are performed like this: |
| 36 | + |
| 37 | +$fss = fss_prep_replace( array( 'from' => 'to' ) ); |
| 38 | +$text = fss_exec_replace( $fss, $text ); |
| 39 | + |
| 40 | +The interpretation of the replacement array is exactly the same as in strtr: the |
| 41 | +longest match is always used, and parts of the string which have already been |
| 42 | +replaced are not processed again. |
| 43 | + |
| 44 | +You can free an FSS result with: |
| 45 | + |
| 46 | +fss_free( $fss ); |
| 47 | + |
| 48 | +This is not generally necessary, since PHP will clean up the memory when all |
| 49 | +references are released. |
| 50 | + |
| 51 | +If you use an FSS object prepared with fss_prep_search() in fss_exec_replace(), |
| 52 | +all strings matched will be removed, i.e. replaced by an empty string. |
| 53 | + |
Property changes on: trunk/extensions/FastStringSearch/README |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 54 | + native |
Index: trunk/extensions/FastStringSearch/kwset.c |
— | — | @@ -0,0 +1,786 @@ |
| 2 | +/* kwset.c - search for any of a set of keywords. |
| 3 | + Copyright 1989, 1998, 2000, 2005 Free Software Foundation, Inc. |
| 4 | + |
| 5 | + This program is free software; you can redistribute it and/or modify |
| 6 | + it under the terms of the GNU General Public License as published by |
| 7 | + the Free Software Foundation; either version 2, or (at your option) |
| 8 | + any later version. |
| 9 | + |
| 10 | + This program is distributed in the hope that it will be useful, |
| 11 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | + GNU General Public License for more details. |
| 14 | + |
| 15 | + You should have received a copy of the GNU General Public License |
| 16 | + along with this program; if not, write to the Free Software |
| 17 | + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA |
| 18 | + 02110-1301, USA. */ |
| 19 | + |
| 20 | +/* Written August 1989 by Mike Haertel. |
| 21 | + The author may be reached (Email) at the address mike@ai.mit.edu, |
| 22 | + or (US mail) as Mike Haertel c/o Free Software Foundation. */ |
| 23 | + |
| 24 | +/* The algorithm implemented by these routines bears a startling resemblence |
| 25 | + to one discovered by Beate Commentz-Walter, although it is not identical. |
| 26 | + See "A String Matching Algorithm Fast on the Average," Technical Report, |
| 27 | + IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900 |
| 28 | + Heidelberg, Germany. See also Aho, A.V., and M. Corasick, "Efficient |
| 29 | + String Matching: An Aid to Bibliographic Search," CACM June 1975, |
| 30 | + Vol. 18, No. 6, which describes the failure function used below. */ |
| 31 | + |
| 32 | +#ifdef HAVE_CONFIG_H |
| 33 | +# include <config.h> |
| 34 | +#endif |
| 35 | +#include <sys/types.h> |
| 36 | +#include <limits.h> |
| 37 | +#include "php.h" |
| 38 | +#include "kwset.h" |
| 39 | +#include "obstack.h" |
| 40 | + |
| 41 | +void *kws_emalloc_wrapper(size_t size); |
| 42 | +void kws_efree_wrapper(void *ptr); |
| 43 | + |
| 44 | +#define NCHAR (UCHAR_MAX + 1) |
| 45 | +#define obstack_chunk_alloc kws_emalloc_wrapper |
| 46 | +#define obstack_chunk_free kws_efree_wrapper |
| 47 | + |
| 48 | +#define U(c) ((unsigned char) (c)) |
| 49 | + |
| 50 | +/* Balanced tree of edges and labels leaving a given trie node. */ |
| 51 | +struct tree |
| 52 | +{ |
| 53 | + struct tree *llink; /* Left link; MUST be first field. */ |
| 54 | + struct tree *rlink; /* Right link (to larger labels). */ |
| 55 | + struct trie *trie; /* Trie node pointed to by this edge. */ |
| 56 | + unsigned char label; /* Label on this edge. */ |
| 57 | + char balance; /* Difference in depths of subtrees. */ |
| 58 | +}; |
| 59 | + |
| 60 | +/* Node of a trie representing a set of reversed keywords. */ |
| 61 | +struct trie |
| 62 | +{ |
| 63 | + unsigned int accepting; /* Word index of accepted word, or zero. */ |
| 64 | + struct tree *links; /* Tree of edges leaving this node. */ |
| 65 | + struct trie *parent; /* Parent of this node. */ |
| 66 | + struct trie *next; /* List of all trie nodes in level order. */ |
| 67 | + struct trie *fail; /* Aho-Corasick failure function. */ |
| 68 | + int depth; /* Depth of this node from the root. */ |
| 69 | + int shift; /* Shift function for search failures. */ |
| 70 | + int maxshift; /* Max shift of self and descendents. */ |
| 71 | +}; |
| 72 | + |
| 73 | +/* Structure returned opaquely to the caller, containing everything. */ |
| 74 | +struct kwset |
| 75 | +{ |
| 76 | + struct obstack obstack; /* Obstack for node allocation. */ |
| 77 | + int words; /* Number of words in the trie. */ |
| 78 | + struct trie *trie; /* The trie itself. */ |
| 79 | + int mind; /* Minimum depth of an accepting node. */ |
| 80 | + int maxd; /* Maximum depth of any node. */ |
| 81 | + unsigned char delta[NCHAR]; /* Delta table for rapid search. */ |
| 82 | + struct trie *next[NCHAR]; /* Table of children of the root. */ |
| 83 | + char *target; /* Target string if there's only one. */ |
| 84 | + int mind2; /* Used in Boyer-Moore search for one string. */ |
| 85 | + char const *trans; /* Character translation table. */ |
| 86 | +}; |
| 87 | + |
| 88 | +void *kws_emalloc_wrapper(size_t size) |
| 89 | +{ |
| 90 | + return emalloc(size); |
| 91 | +} |
| 92 | + |
| 93 | +void kws_efree_wrapper(void *ptr) |
| 94 | +{ |
| 95 | + efree(ptr); |
| 96 | +} |
| 97 | + |
| 98 | +/* Allocate and initialize a keyword set object, returning an opaque |
| 99 | + pointer to it. Return NULL if memory is not available. */ |
| 100 | +kwset_t |
| 101 | +kwsalloc (char const *trans) |
| 102 | +{ |
| 103 | + struct kwset *kwset; |
| 104 | + |
| 105 | + kwset = (struct kwset *) kws_emalloc_wrapper(sizeof (struct kwset)); |
| 106 | + if (!kwset) |
| 107 | + return NULL; |
| 108 | + |
| 109 | + obstack_init(&kwset->obstack); |
| 110 | + kwset->words = 0; |
| 111 | + kwset->trie |
| 112 | + = (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie)); |
| 113 | + if (!kwset->trie) |
| 114 | + { |
| 115 | + kwsfree((kwset_t) kwset); |
| 116 | + return NULL; |
| 117 | + } |
| 118 | + kwset->trie->accepting = 0; |
| 119 | + kwset->trie->links = NULL; |
| 120 | + kwset->trie->parent = NULL; |
| 121 | + kwset->trie->next = NULL; |
| 122 | + kwset->trie->fail = NULL; |
| 123 | + kwset->trie->depth = 0; |
| 124 | + kwset->trie->shift = 0; |
| 125 | + kwset->mind = INT_MAX; |
| 126 | + kwset->maxd = -1; |
| 127 | + kwset->target = NULL; |
| 128 | + kwset->trans = trans; |
| 129 | + |
| 130 | + return (kwset_t) kwset; |
| 131 | +} |
| 132 | + |
| 133 | +/* This upper bound is valid for CHAR_BIT >= 4 and |
| 134 | + exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */ |
| 135 | +#define DEPTH_SIZE (CHAR_BIT + CHAR_BIT/2) |
| 136 | + |
| 137 | +/* Add the given string to the contents of the keyword set. Return NULL |
| 138 | + for success, an error message otherwise. */ |
| 139 | +const char * |
| 140 | +kwsincr (kwset_t kws, char const *text, size_t len) |
| 141 | +{ |
| 142 | + struct kwset *kwset; |
| 143 | + register struct trie *trie; |
| 144 | + register unsigned char label; |
| 145 | + register struct tree *link; |
| 146 | + register int depth; |
| 147 | + struct tree *links[DEPTH_SIZE]; |
| 148 | + enum { L, R } dirs[DEPTH_SIZE]; |
| 149 | + struct tree *t, *r, *l, *rl, *lr; |
| 150 | + |
| 151 | + kwset = (struct kwset *) kws; |
| 152 | + trie = kwset->trie; |
| 153 | + text += len; |
| 154 | + |
| 155 | + /* Descend the trie (built of reversed keywords) character-by-character, |
| 156 | + installing new nodes when necessary. */ |
| 157 | + while (len--) |
| 158 | + { |
| 159 | + label = kwset->trans ? kwset->trans[U(*--text)] : *--text; |
| 160 | + |
| 161 | + /* Descend the tree of outgoing links for this trie node, |
| 162 | + looking for the current character and keeping track |
| 163 | + of the path followed. */ |
| 164 | + link = trie->links; |
| 165 | + links[0] = (struct tree *) &trie->links; |
| 166 | + dirs[0] = L; |
| 167 | + depth = 1; |
| 168 | + |
| 169 | + while (link && label != link->label) |
| 170 | + { |
| 171 | + links[depth] = link; |
| 172 | + if (label < link->label) |
| 173 | + dirs[depth++] = L, link = link->llink; |
| 174 | + else |
| 175 | + dirs[depth++] = R, link = link->rlink; |
| 176 | + } |
| 177 | + |
| 178 | + /* The current character doesn't have an outgoing link at |
| 179 | + this trie node, so build a new trie node and install |
| 180 | + a link in the current trie node's tree. */ |
| 181 | + if (!link) |
| 182 | + { |
| 183 | + link = (struct tree *) obstack_alloc(&kwset->obstack, |
| 184 | + sizeof (struct tree)); |
| 185 | + if (!link) |
| 186 | + return "memory exhausted"; |
| 187 | + link->llink = NULL; |
| 188 | + link->rlink = NULL; |
| 189 | + link->trie = (struct trie *) obstack_alloc(&kwset->obstack, |
| 190 | + sizeof (struct trie)); |
| 191 | + if (!link->trie) |
| 192 | + { |
| 193 | + obstack_free(&kwset->obstack, link); |
| 194 | + return "memory exhausted"; |
| 195 | + } |
| 196 | + link->trie->accepting = 0; |
| 197 | + link->trie->links = NULL; |
| 198 | + link->trie->parent = trie; |
| 199 | + link->trie->next = NULL; |
| 200 | + link->trie->fail = NULL; |
| 201 | + link->trie->depth = trie->depth + 1; |
| 202 | + link->trie->shift = 0; |
| 203 | + link->label = label; |
| 204 | + link->balance = 0; |
| 205 | + |
| 206 | + /* Install the new tree node in its parent. */ |
| 207 | + if (dirs[--depth] == L) |
| 208 | + links[depth]->llink = link; |
| 209 | + else |
| 210 | + links[depth]->rlink = link; |
| 211 | + |
| 212 | + /* Back up the tree fixing the balance flags. */ |
| 213 | + while (depth && !links[depth]->balance) |
| 214 | + { |
| 215 | + if (dirs[depth] == L) |
| 216 | + --links[depth]->balance; |
| 217 | + else |
| 218 | + ++links[depth]->balance; |
| 219 | + --depth; |
| 220 | + } |
| 221 | + |
| 222 | + /* Rebalance the tree by pointer rotations if necessary. */ |
| 223 | + if (depth && ((dirs[depth] == L && --links[depth]->balance) |
| 224 | + || (dirs[depth] == R && ++links[depth]->balance))) |
| 225 | + { |
| 226 | + switch (links[depth]->balance) |
| 227 | + { |
| 228 | + case (char) -2: |
| 229 | + switch (dirs[depth + 1]) |
| 230 | + { |
| 231 | + case L: |
| 232 | + r = links[depth], t = r->llink, rl = t->rlink; |
| 233 | + t->rlink = r, r->llink = rl; |
| 234 | + t->balance = r->balance = 0; |
| 235 | + break; |
| 236 | + case R: |
| 237 | + r = links[depth], l = r->llink, t = l->rlink; |
| 238 | + rl = t->rlink, lr = t->llink; |
| 239 | + t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl; |
| 240 | + l->balance = t->balance != 1 ? 0 : -1; |
| 241 | + r->balance = t->balance != (char) -1 ? 0 : 1; |
| 242 | + t->balance = 0; |
| 243 | + break; |
| 244 | + default: |
| 245 | + abort (); |
| 246 | + } |
| 247 | + break; |
| 248 | + case 2: |
| 249 | + switch (dirs[depth + 1]) |
| 250 | + { |
| 251 | + case R: |
| 252 | + l = links[depth], t = l->rlink, lr = t->llink; |
| 253 | + t->llink = l, l->rlink = lr; |
| 254 | + t->balance = l->balance = 0; |
| 255 | + break; |
| 256 | + case L: |
| 257 | + l = links[depth], r = l->rlink, t = r->llink; |
| 258 | + lr = t->llink, rl = t->rlink; |
| 259 | + t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl; |
| 260 | + l->balance = t->balance != 1 ? 0 : -1; |
| 261 | + r->balance = t->balance != (char) -1 ? 0 : 1; |
| 262 | + t->balance = 0; |
| 263 | + break; |
| 264 | + default: |
| 265 | + abort (); |
| 266 | + } |
| 267 | + break; |
| 268 | + default: |
| 269 | + abort (); |
| 270 | + } |
| 271 | + |
| 272 | + if (dirs[depth - 1] == L) |
| 273 | + links[depth - 1]->llink = t; |
| 274 | + else |
| 275 | + links[depth - 1]->rlink = t; |
| 276 | + } |
| 277 | + } |
| 278 | + |
| 279 | + trie = link->trie; |
| 280 | + } |
| 281 | + |
| 282 | + /* Mark the node we finally reached as accepting, encoding the |
| 283 | + index number of this word in the keyword set so far. */ |
| 284 | + if (!trie->accepting) |
| 285 | + trie->accepting = 1 + 2 * kwset->words; |
| 286 | + ++kwset->words; |
| 287 | + |
| 288 | + /* Keep track of the longest and shortest string of the keyword set. */ |
| 289 | + if (trie->depth < kwset->mind) |
| 290 | + kwset->mind = trie->depth; |
| 291 | + if (trie->depth > kwset->maxd) |
| 292 | + kwset->maxd = trie->depth; |
| 293 | + |
| 294 | + return NULL; |
| 295 | +} |
| 296 | + |
| 297 | +/* Enqueue the trie nodes referenced from the given tree in the |
| 298 | + given queue. */ |
| 299 | +static void |
| 300 | +enqueue (struct tree *tree, struct trie **last) |
| 301 | +{ |
| 302 | + if (!tree) |
| 303 | + return; |
| 304 | + enqueue(tree->llink, last); |
| 305 | + enqueue(tree->rlink, last); |
| 306 | + (*last) = (*last)->next = tree->trie; |
| 307 | +} |
| 308 | + |
| 309 | +/* Compute the Aho-Corasick failure function for the trie nodes referenced |
| 310 | + from the given tree, given the failure function for their parent as |
| 311 | + well as a last resort failure node. */ |
| 312 | +static void |
| 313 | +treefails (register struct tree const *tree, struct trie const *fail, |
| 314 | + struct trie *recourse) |
| 315 | +{ |
| 316 | + register struct tree *link; |
| 317 | + |
| 318 | + if (!tree) |
| 319 | + return; |
| 320 | + |
| 321 | + treefails(tree->llink, fail, recourse); |
| 322 | + treefails(tree->rlink, fail, recourse); |
| 323 | + |
| 324 | + /* Find, in the chain of fails going back to the root, the first |
| 325 | + node that has a descendent on the current label. */ |
| 326 | + while (fail) |
| 327 | + { |
| 328 | + link = fail->links; |
| 329 | + while (link && tree->label != link->label) |
| 330 | + if (tree->label < link->label) |
| 331 | + link = link->llink; |
| 332 | + else |
| 333 | + link = link->rlink; |
| 334 | + if (link) |
| 335 | + { |
| 336 | + tree->trie->fail = link->trie; |
| 337 | + return; |
| 338 | + } |
| 339 | + fail = fail->fail; |
| 340 | + } |
| 341 | + |
| 342 | + tree->trie->fail = recourse; |
| 343 | +} |
| 344 | + |
| 345 | +/* Set delta entries for the links of the given tree such that |
| 346 | + the preexisting delta value is larger than the current depth. */ |
| 347 | +static void |
| 348 | +treedelta (register struct tree const *tree, |
| 349 | + register unsigned int depth, |
| 350 | + unsigned char delta[]) |
| 351 | +{ |
| 352 | + if (!tree) |
| 353 | + return; |
| 354 | + treedelta(tree->llink, depth, delta); |
| 355 | + treedelta(tree->rlink, depth, delta); |
| 356 | + if (depth < delta[tree->label]) |
| 357 | + delta[tree->label] = depth; |
| 358 | +} |
| 359 | + |
| 360 | +/* Return true if A has every label in B. */ |
| 361 | +static int |
| 362 | +hasevery (register struct tree const *a, register struct tree const *b) |
| 363 | +{ |
| 364 | + if (!b) |
| 365 | + return 1; |
| 366 | + if (!hasevery(a, b->llink)) |
| 367 | + return 0; |
| 368 | + if (!hasevery(a, b->rlink)) |
| 369 | + return 0; |
| 370 | + while (a && b->label != a->label) |
| 371 | + if (b->label < a->label) |
| 372 | + a = a->llink; |
| 373 | + else |
| 374 | + a = a->rlink; |
| 375 | + return !!a; |
| 376 | +} |
| 377 | + |
| 378 | +/* Compute a vector, indexed by character code, of the trie nodes |
| 379 | + referenced from the given tree. */ |
| 380 | +static void |
| 381 | +treenext (struct tree const *tree, struct trie *next[]) |
| 382 | +{ |
| 383 | + if (!tree) |
| 384 | + return; |
| 385 | + treenext(tree->llink, next); |
| 386 | + treenext(tree->rlink, next); |
| 387 | + next[tree->label] = tree->trie; |
| 388 | +} |
| 389 | + |
| 390 | +/* Compute the shift for each trie node, as well as the delta |
| 391 | + table and next cache for the given keyword set. */ |
| 392 | +const char * |
| 393 | +kwsprep (kwset_t kws) |
| 394 | +{ |
| 395 | + register struct kwset *kwset; |
| 396 | + register int i; |
| 397 | + register struct trie *curr; |
| 398 | + register char const *trans; |
| 399 | + unsigned char delta[NCHAR]; |
| 400 | + |
| 401 | + kwset = (struct kwset *) kws; |
| 402 | + |
| 403 | + /* Initial values for the delta table; will be changed later. The |
| 404 | + delta entry for a given character is the smallest depth of any |
| 405 | + node at which an outgoing edge is labeled by that character. */ |
| 406 | + memset(delta, kwset->mind < UCHAR_MAX ? kwset->mind : UCHAR_MAX, NCHAR); |
| 407 | + |
| 408 | + /* Check if we can use the simple boyer-moore algorithm, instead |
| 409 | + of the hairy commentz-walter algorithm. */ |
| 410 | + if (kwset->words == 1 && kwset->trans == NULL) |
| 411 | + { |
| 412 | + char c; |
| 413 | + |
| 414 | + /* Looking for just one string. Extract it from the trie. */ |
| 415 | + kwset->target = obstack_alloc(&kwset->obstack, kwset->mind); |
| 416 | + if (!kwset->target) |
| 417 | + return "memory exhausted"; |
| 418 | + for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i) |
| 419 | + { |
| 420 | + kwset->target[i] = curr->links->label; |
| 421 | + curr = curr->links->trie; |
| 422 | + } |
| 423 | + /* Build the Boyer Moore delta. Boy that's easy compared to CW. */ |
| 424 | + for (i = 0; i < kwset->mind; ++i) |
| 425 | + delta[U(kwset->target[i])] = kwset->mind - (i + 1); |
| 426 | + /* Find the minimal delta2 shift that we might make after |
| 427 | + a backwards match has failed. */ |
| 428 | + c = kwset->target[kwset->mind - 1]; |
| 429 | + for (i = kwset->mind - 2; i >= 0; --i) |
| 430 | + if (kwset->target[i] == c) |
| 431 | + break; |
| 432 | + kwset->mind2 = kwset->mind - (i + 1); |
| 433 | + } |
| 434 | + else |
| 435 | + { |
| 436 | + register struct trie *fail; |
| 437 | + struct trie *last, *next[NCHAR]; |
| 438 | + |
| 439 | + /* Traverse the nodes of the trie in level order, simultaneously |
| 440 | + computing the delta table, failure function, and shift function. */ |
| 441 | + for (curr = last = kwset->trie; curr; curr = curr->next) |
| 442 | + { |
| 443 | + /* Enqueue the immediate descendents in the level order queue. */ |
| 444 | + enqueue(curr->links, &last); |
| 445 | + |
| 446 | + curr->shift = kwset->mind; |
| 447 | + curr->maxshift = kwset->mind; |
| 448 | + |
| 449 | + /* Update the delta table for the descendents of this node. */ |
| 450 | + treedelta(curr->links, curr->depth, delta); |
| 451 | + |
| 452 | + /* Compute the failure function for the decendents of this node. */ |
| 453 | + treefails(curr->links, curr->fail, kwset->trie); |
| 454 | + |
| 455 | + /* Update the shifts at each node in the current node's chain |
| 456 | + of fails back to the root. */ |
| 457 | + for (fail = curr->fail; fail; fail = fail->fail) |
| 458 | + { |
| 459 | + /* If the current node has some outgoing edge that the fail |
| 460 | + doesn't, then the shift at the fail should be no larger |
| 461 | + than the difference of their depths. */ |
| 462 | + if (!hasevery(fail->links, curr->links)) |
| 463 | + if (curr->depth - fail->depth < fail->shift) |
| 464 | + fail->shift = curr->depth - fail->depth; |
| 465 | + |
| 466 | + /* If the current node is accepting then the shift at the |
| 467 | + fail and its descendents should be no larger than the |
| 468 | + difference of their depths. */ |
| 469 | + if (curr->accepting && fail->maxshift > curr->depth - fail->depth) |
| 470 | + fail->maxshift = curr->depth - fail->depth; |
| 471 | + } |
| 472 | + } |
| 473 | + |
| 474 | + /* Traverse the trie in level order again, fixing up all nodes whose |
| 475 | + shift exceeds their inherited maxshift. */ |
| 476 | + for (curr = kwset->trie->next; curr; curr = curr->next) |
| 477 | + { |
| 478 | + if (curr->maxshift > curr->parent->maxshift) |
| 479 | + curr->maxshift = curr->parent->maxshift; |
| 480 | + if (curr->shift > curr->maxshift) |
| 481 | + curr->shift = curr->maxshift; |
| 482 | + } |
| 483 | + |
| 484 | + /* Create a vector, indexed by character code, of the outgoing links |
| 485 | + from the root node. */ |
| 486 | + for (i = 0; i < NCHAR; ++i) |
| 487 | + next[i] = NULL; |
| 488 | + treenext(kwset->trie->links, next); |
| 489 | + |
| 490 | + if ((trans = kwset->trans) != NULL) |
| 491 | + for (i = 0; i < NCHAR; ++i) |
| 492 | + kwset->next[i] = next[U(trans[i])]; |
| 493 | + else |
| 494 | + memcpy(kwset->next, next, NCHAR * sizeof(struct trie *)); |
| 495 | + } |
| 496 | + |
| 497 | + /* Fix things up for any translation table. */ |
| 498 | + if ((trans = kwset->trans) != NULL) |
| 499 | + for (i = 0; i < NCHAR; ++i) |
| 500 | + kwset->delta[i] = delta[U(trans[i])]; |
| 501 | + else |
| 502 | + memcpy(kwset->delta, delta, NCHAR); |
| 503 | + |
| 504 | + return NULL; |
| 505 | +} |
| 506 | + |
| 507 | +/* Fast boyer-moore search. */ |
| 508 | +static size_t |
| 509 | +bmexec (kwset_t kws, char const *text, size_t size) |
| 510 | +{ |
| 511 | + struct kwset const *kwset; |
| 512 | + register unsigned char const *d1; |
| 513 | + register char const *ep, *sp, *tp; |
| 514 | + register int d, gc, i, len, md2; |
| 515 | + |
| 516 | + kwset = (struct kwset const *) kws; |
| 517 | + len = kwset->mind; |
| 518 | + |
| 519 | + if (len == 0) |
| 520 | + return 0; |
| 521 | + if (len > size) |
| 522 | + return -1; |
| 523 | + if (len == 1) |
| 524 | + { |
| 525 | + tp = memchr (text, kwset->target[0], size); |
| 526 | + return tp ? tp - text : -1; |
| 527 | + } |
| 528 | + |
| 529 | + d1 = kwset->delta; |
| 530 | + sp = kwset->target + len; |
| 531 | + gc = U(sp[-2]); |
| 532 | + md2 = kwset->mind2; |
| 533 | + tp = text + len; |
| 534 | + |
| 535 | + /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */ |
| 536 | + if (size > 12 * len) |
| 537 | + /* 11 is not a bug, the initial offset happens only once. */ |
| 538 | + for (ep = text + size - 11 * len;;) |
| 539 | + { |
| 540 | + while (tp <= ep) |
| 541 | + { |
| 542 | + d = d1[U(tp[-1])], tp += d; |
| 543 | + d = d1[U(tp[-1])], tp += d; |
| 544 | + if (d == 0) |
| 545 | + goto found; |
| 546 | + d = d1[U(tp[-1])], tp += d; |
| 547 | + d = d1[U(tp[-1])], tp += d; |
| 548 | + d = d1[U(tp[-1])], tp += d; |
| 549 | + if (d == 0) |
| 550 | + goto found; |
| 551 | + d = d1[U(tp[-1])], tp += d; |
| 552 | + d = d1[U(tp[-1])], tp += d; |
| 553 | + d = d1[U(tp[-1])], tp += d; |
| 554 | + if (d == 0) |
| 555 | + goto found; |
| 556 | + d = d1[U(tp[-1])], tp += d; |
| 557 | + d = d1[U(tp[-1])], tp += d; |
| 558 | + } |
| 559 | + break; |
| 560 | + found: |
| 561 | + if (U(tp[-2]) == gc) |
| 562 | + { |
| 563 | + for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i) |
| 564 | + ; |
| 565 | + if (i > len) |
| 566 | + return tp - len - text; |
| 567 | + } |
| 568 | + tp += md2; |
| 569 | + } |
| 570 | + |
| 571 | + /* Now we have only a few characters left to search. We |
| 572 | + carefully avoid ever producing an out-of-bounds pointer. */ |
| 573 | + ep = text + size; |
| 574 | + d = d1[U(tp[-1])]; |
| 575 | + while (d <= ep - tp) |
| 576 | + { |
| 577 | + d = d1[U((tp += d)[-1])]; |
| 578 | + if (d != 0) |
| 579 | + continue; |
| 580 | + if (U(tp[-2]) == gc) |
| 581 | + { |
| 582 | + for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i) |
| 583 | + ; |
| 584 | + if (i > len) |
| 585 | + return tp - len - text; |
| 586 | + } |
| 587 | + d = md2; |
| 588 | + } |
| 589 | + |
| 590 | + return -1; |
| 591 | +} |
| 592 | + |
| 593 | +/* Hairy multiple string search. */ |
| 594 | +static size_t |
| 595 | +cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch) |
| 596 | +{ |
| 597 | + struct kwset const *kwset; |
| 598 | + struct trie * const *next; |
| 599 | + struct trie const *trie; |
| 600 | + struct trie const *accept; |
| 601 | + char const *beg, *lim, *mch, *lmch; |
| 602 | + register unsigned char c; |
| 603 | + register unsigned char const *delta; |
| 604 | + register int d; |
| 605 | + register char const *end, *qlim; |
| 606 | + register struct tree const *tree; |
| 607 | + register char const *trans; |
| 608 | + |
| 609 | +#ifdef lint |
| 610 | + accept = NULL; |
| 611 | +#endif |
| 612 | + |
| 613 | + /* Initialize register copies and look for easy ways out. */ |
| 614 | + kwset = (struct kwset *) kws; |
| 615 | + if (len < kwset->mind) |
| 616 | + return -1; |
| 617 | + next = kwset->next; |
| 618 | + delta = kwset->delta; |
| 619 | + trans = kwset->trans; |
| 620 | + lim = text + len; |
| 621 | + end = text; |
| 622 | + if ((d = kwset->mind) != 0) |
| 623 | + mch = NULL; |
| 624 | + else |
| 625 | + { |
| 626 | + mch = text, accept = kwset->trie; |
| 627 | + goto match; |
| 628 | + } |
| 629 | + |
| 630 | + if (len >= 4 * kwset->mind) |
| 631 | + qlim = lim - 4 * kwset->mind; |
| 632 | + else |
| 633 | + qlim = NULL; |
| 634 | + |
| 635 | + while (lim - end >= d) |
| 636 | + { |
| 637 | + if (qlim && end <= qlim) |
| 638 | + { |
| 639 | + end += d - 1; |
| 640 | + while ((d = delta[c = *end]) && end < qlim) |
| 641 | + { |
| 642 | + end += d; |
| 643 | + end += delta[U(*end)]; |
| 644 | + end += delta[U(*end)]; |
| 645 | + } |
| 646 | + ++end; |
| 647 | + } |
| 648 | + else |
| 649 | + d = delta[c = (end += d)[-1]]; |
| 650 | + if (d) |
| 651 | + continue; |
| 652 | + beg = end - 1; |
| 653 | + trie = next[c]; |
| 654 | + if (trie->accepting) |
| 655 | + { |
| 656 | + mch = beg; |
| 657 | + accept = trie; |
| 658 | + } |
| 659 | + d = trie->shift; |
| 660 | + while (beg > text) |
| 661 | + { |
| 662 | + c = trans ? trans[U(*--beg)] : *--beg; |
| 663 | + tree = trie->links; |
| 664 | + while (tree && c != tree->label) |
| 665 | + if (c < tree->label) |
| 666 | + tree = tree->llink; |
| 667 | + else |
| 668 | + tree = tree->rlink; |
| 669 | + if (tree) |
| 670 | + { |
| 671 | + trie = tree->trie; |
| 672 | + if (trie->accepting) |
| 673 | + { |
| 674 | + mch = beg; |
| 675 | + accept = trie; |
| 676 | + } |
| 677 | + } |
| 678 | + else |
| 679 | + break; |
| 680 | + d = trie->shift; |
| 681 | + } |
| 682 | + if (mch) |
| 683 | + goto match; |
| 684 | + } |
| 685 | + return -1; |
| 686 | + |
| 687 | + match: |
| 688 | + /* Given a known match, find the longest possible match anchored |
| 689 | + at or before its starting point. This is nearly a verbatim |
| 690 | + copy of the preceding main search loops. */ |
| 691 | + if (lim - mch > kwset->maxd) |
| 692 | + lim = mch + kwset->maxd; |
| 693 | + lmch = 0; |
| 694 | + d = 1; |
| 695 | + while (lim - end >= d) |
| 696 | + { |
| 697 | + if ((d = delta[c = (end += d)[-1]]) != 0) |
| 698 | + continue; |
| 699 | + beg = end - 1; |
| 700 | + if (!(trie = next[c])) |
| 701 | + { |
| 702 | + d = 1; |
| 703 | + continue; |
| 704 | + } |
| 705 | + if (trie->accepting && beg <= mch) |
| 706 | + { |
| 707 | + lmch = beg; |
| 708 | + accept = trie; |
| 709 | + } |
| 710 | + d = trie->shift; |
| 711 | + while (beg > text) |
| 712 | + { |
| 713 | + c = trans ? trans[U(*--beg)] : *--beg; |
| 714 | + tree = trie->links; |
| 715 | + while (tree && c != tree->label) |
| 716 | + if (c < tree->label) |
| 717 | + tree = tree->llink; |
| 718 | + else |
| 719 | + tree = tree->rlink; |
| 720 | + if (tree) |
| 721 | + { |
| 722 | + trie = tree->trie; |
| 723 | + if (trie->accepting && beg <= mch) |
| 724 | + { |
| 725 | + lmch = beg; |
| 726 | + accept = trie; |
| 727 | + } |
| 728 | + } |
| 729 | + else |
| 730 | + break; |
| 731 | + d = trie->shift; |
| 732 | + } |
| 733 | + if (lmch) |
| 734 | + { |
| 735 | + mch = lmch; |
| 736 | + goto match; |
| 737 | + } |
| 738 | + if (!d) |
| 739 | + d = 1; |
| 740 | + } |
| 741 | + |
| 742 | + if (kwsmatch) |
| 743 | + { |
| 744 | + kwsmatch->index = accept->accepting / 2; |
| 745 | + kwsmatch->offset[0] = mch - text; |
| 746 | + kwsmatch->size[0] = accept->depth; |
| 747 | + } |
| 748 | + return mch - text; |
| 749 | +} |
| 750 | + |
| 751 | +/* Search through the given text for a match of any member of the |
| 752 | + given keyword set. Return a pointer to the first character of |
| 753 | + the matching substring, or NULL if no match is found. If FOUNDLEN |
| 754 | + is non-NULL store in the referenced location the length of the |
| 755 | + matching substring. Similarly, if FOUNDIDX is non-NULL, store |
| 756 | + in the referenced location the index number of the particular |
| 757 | + keyword matched. */ |
| 758 | +size_t |
| 759 | +kwsexec (kwset_t kws, char const *text, size_t size, |
| 760 | + struct kwsmatch *kwsmatch) |
| 761 | +{ |
| 762 | + struct kwset const *kwset = (struct kwset *) kws; |
| 763 | + if (kwset->words == 1 && kwset->trans == NULL) |
| 764 | + { |
| 765 | + size_t ret = bmexec (kws, text, size); |
| 766 | + if (kwsmatch != NULL && ret != (size_t) -1) |
| 767 | + { |
| 768 | + kwsmatch->index = 0; |
| 769 | + kwsmatch->offset[0] = ret; |
| 770 | + kwsmatch->size[0] = kwset->mind; |
| 771 | + } |
| 772 | + return ret; |
| 773 | + } |
| 774 | + else |
| 775 | + return cwexec(kws, text, size, kwsmatch); |
| 776 | +} |
| 777 | + |
| 778 | +/* Free the components of the given keyword set. */ |
| 779 | +void |
| 780 | +kwsfree (kwset_t kws) |
| 781 | +{ |
| 782 | + struct kwset *kwset; |
| 783 | + |
| 784 | + kwset = (struct kwset *) kws; |
| 785 | + obstack_free(&kwset->obstack, NULL); |
| 786 | + kws_efree_wrapper(kws); |
| 787 | +} |
Property changes on: trunk/extensions/FastStringSearch/kwset.c |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 788 | + native |
Property changes on: trunk/extensions/FastStringSearch |
___________________________________________________________________ |
Added: svn:ignore |
2 | 789 | + .deps |
*.lo |
*.la |