r15830 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r15829‎ | r15830 | r15831 >
Date:14:11, 26 July 2006
Author:tstarling
Status:old
Tags:
Comment:
Fast string search PHP extension.
Modified paths:
  • /trunk/extensions/FastStringSearch (added) (history)
  • /trunk/extensions/FastStringSearch/CREDITS (added) (history)
  • /trunk/extensions/FastStringSearch/EXPERIMENTAL (added) (history)
  • /trunk/extensions/FastStringSearch/README (added) (history)
  • /trunk/extensions/FastStringSearch/config.m4 (added) (history)
  • /trunk/extensions/FastStringSearch/fss.c (added) (history)
  • /trunk/extensions/FastStringSearch/kwset.c (added) (history)
  • /trunk/extensions/FastStringSearch/kwset.h (added) (history)
  • /trunk/extensions/FastStringSearch/obstack.c (added) (history)
  • /trunk/extensions/FastStringSearch/obstack.h (added) (history)
  • /trunk/extensions/FastStringSearch/php_fss.h (added) (history)
  • /trunk/extensions/FastStringSearch/tests (added) (history)
  • /trunk/extensions/FastStringSearch/tests/001.phpt (added) (history)

Diff [purge]

Index: trunk/extensions/FastStringSearch/tests/001.phpt
@@ -0,0 +1,65 @@
 2+--TEST--
 3+Basic fast string search tests
 4+--SKIPIF--
 5+<?php if (!extension_loaded("fss")) print "skip"; ?>
 6+--FILE--
 7+<?php
 8+$fss = fss_prep_search(array('hello', 'hi'));
 9+var_dump(fss_exec_search($fss, 'hhhhello'));
 10+var_dump(fss_exec_search($fss, 'hellohello', 1));
 11+var_dump(fss_exec_search($fss, 'hellohihello', 1));
 12+var_dump(fss_exec_search($fss, 'adfjshfkjs'));
 13+
 14+fss_free($fss);
 15+
 16+$fss = fss_prep_search('hello');
 17+var_dump(fss_exec_search($fss, 'hhhhello'));
 18+var_dump(fss_exec_search($fss, 'hellohello', 1));
 19+var_dump(fss_exec_search($fss, 'adfjshfkjs'));
 20+
 21+var_dump(fss_exec_replace($fss, 'helloabchelloaa'));
 22+
 23+fss_free($fss);
 24+
 25+$fss = fss_prep_replace(array(
 26+ 'abc' => 'def',
 27+ 'ab' => 'X',
 28+));
 29+
 30+var_dump(fss_exec_replace($fss, 'ddabcababcaaaab'));
 31+?>
 32+--EXPECT--
 33+array(2) {
 34+ [0]=>
 35+ int(3)
 36+ [1]=>
 37+ int(5)
 38+}
 39+array(2) {
 40+ [0]=>
 41+ int(5)
 42+ [1]=>
 43+ int(5)
 44+}
 45+array(2) {
 46+ [0]=>
 47+ int(5)
 48+ [1]=>
 49+ int(2)
 50+}
 51+bool(false)
 52+array(2) {
 53+ [0]=>
 54+ int(3)
 55+ [1]=>
 56+ int(5)
 57+}
 58+array(2) {
 59+ [0]=>
 60+ int(5)
 61+ [1]=>
 62+ int(5)
 63+}
 64+bool(false)
 65+string(5) "abcaa"
 66+string(13) "dddefXdefaaaX"
Property changes on: trunk/extensions/FastStringSearch/tests/001.phpt
___________________________________________________________________
Added: svn:eol-style
167 + native
Index: trunk/extensions/FastStringSearch/kwset.h
@@ -0,0 +1,57 @@
 2+/* kwset.h - header declaring the keyword set library.
 3+ Copyright (C) 1989, 1998, 2005 Free Software Foundation, Inc.
 4+
 5+ This program is free software; you can redistribute it and/or modify
 6+ it under the terms of the GNU General Public License as published by
 7+ the Free Software Foundation; either version 2, or (at your option)
 8+ any later version.
 9+
 10+ This program is distributed in the hope that it will be useful,
 11+ but WITHOUT ANY WARRANTY; without even the implied warranty of
 12+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13+ GNU General Public License for more details.
 14+
 15+ You should have received a copy of the GNU General Public License
 16+ along with this program; if not, write to the Free Software
 17+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
 18+ 02110-1301, USA. */
 19+
 20+/* Written August 1989 by Mike Haertel.
 21+ The author may be reached (Email) at the address mike@ai.mit.edu,
 22+ or (US mail) as Mike Haertel c/o Free Software Foundation. */
 23+
 24+struct kwsmatch
 25+{
 26+ int index; /* Index number of matching keyword. */
 27+ size_t offset[1]; /* Offset of each submatch. */
 28+ size_t size[1]; /* Length of each submatch. */
 29+};
 30+
 31+typedef void* kwset_t;
 32+
 33+/* Return an opaque pointer to a newly allocated keyword set, or NULL
 34+ if enough memory cannot be obtained. The argument if non-NULL
 35+ specifies a table of character translations to be applied to all
 36+ pattern and search text. */
 37+extern kwset_t kwsalloc(char const *);
 38+
 39+/* Incrementally extend the keyword set to include the given string.
 40+ Return NULL for success, or an error message. Remember an index
 41+ number for each keyword included in the set. */
 42+extern const char *kwsincr(kwset_t, char const *, size_t);
 43+
 44+/* When the keyword set has been completely built, prepare it for
 45+ use. Return NULL for success, or an error message. */
 46+extern const char *kwsprep(kwset_t);
 47+
 48+/* Search through the given buffer for a member of the keyword set.
 49+ Return a pointer to the leftmost longest match found, or NULL if
 50+ no match is found. If foundlen is non-NULL, store the length of
 51+ the matching substring in the integer it points to. Similarly,
 52+ if foundindex is non-NULL, store the index of the particular
 53+ keyword found therein. */
 54+extern size_t kwsexec(kwset_t, char const *, size_t, struct kwsmatch *);
 55+
 56+/* Deallocate the given keyword set and all its associated storage. */
 57+extern void kwsfree(kwset_t);
 58+
Property changes on: trunk/extensions/FastStringSearch/kwset.h
___________________________________________________________________
Added: svn:eol-style
159 + native
Index: trunk/extensions/FastStringSearch/config.m4
@@ -0,0 +1,63 @@
 2+dnl $Id$
 3+dnl config.m4 for extension fss
 4+
 5+dnl Comments in this file start with the string 'dnl'.
 6+dnl Remove where necessary. This file will not work
 7+dnl without editing.
 8+
 9+dnl If your extension references something external, use with:
 10+
 11+dnl PHP_ARG_WITH(fss, for fss support,
 12+dnl Make sure that the comment is aligned:
 13+dnl [ --with-fss Include fss support])
 14+
 15+dnl Otherwise use enable:
 16+
 17+PHP_ARG_ENABLE(fss, whether to enable fss support,
 18+dnl Make sure that the comment is aligned:
 19+[ --enable-fss Enable fss support])
 20+
 21+if test "$PHP_FSS" != "no"; then
 22+ dnl Write more examples of tests here...
 23+
 24+ dnl # --with-fss -> check with-path
 25+ dnl SEARCH_PATH="/usr/local /usr" # you might want to change this
 26+ dnl SEARCH_FOR="/include/fss.h" # you most likely want to change this
 27+ dnl if test -r $PHP_FSS/$SEARCH_FOR; then # path given as parameter
 28+ dnl FSS_DIR=$PHP_FSS
 29+ dnl else # search default path list
 30+ dnl AC_MSG_CHECKING([for fss files in default path])
 31+ dnl for i in $SEARCH_PATH ; do
 32+ dnl if test -r $i/$SEARCH_FOR; then
 33+ dnl FSS_DIR=$i
 34+ dnl AC_MSG_RESULT(found in $i)
 35+ dnl fi
 36+ dnl done
 37+ dnl fi
 38+ dnl
 39+ dnl if test -z "$FSS_DIR"; then
 40+ dnl AC_MSG_RESULT([not found])
 41+ dnl AC_MSG_ERROR([Please reinstall the fss distribution])
 42+ dnl fi
 43+
 44+ dnl # --with-fss -> add include path
 45+ dnl PHP_ADD_INCLUDE($FSS_DIR/include)
 46+
 47+ dnl # --with-fss -> check for lib and symbol presence
 48+ dnl LIBNAME=fss # you may want to change this
 49+ dnl LIBSYMBOL=fss # you most likely want to change this
 50+
 51+ dnl PHP_CHECK_LIBRARY($LIBNAME,$LIBSYMBOL,
 52+ dnl [
 53+ dnl PHP_ADD_LIBRARY_WITH_PATH($LIBNAME, $FSS_DIR/lib, FSS_SHARED_LIBADD)
 54+ dnl AC_DEFINE(HAVE_FSSLIB,1,[ ])
 55+ dnl ],[
 56+ dnl AC_MSG_ERROR([wrong fss lib version or lib not found])
 57+ dnl ],[
 58+ dnl -L$FSS_DIR/lib -lm -ldl
 59+ dnl ])
 60+ dnl
 61+ dnl PHP_SUBST(FSS_SHARED_LIBADD)
 62+
 63+ PHP_NEW_EXTENSION(fss, fss.c kwset.c obstack.c, $ext_shared)
 64+fi
Property changes on: trunk/extensions/FastStringSearch/config.m4
___________________________________________________________________
Added: svn:eol-style
165 + native
Index: trunk/extensions/FastStringSearch/obstack.c
@@ -0,0 +1,598 @@
 2+/* obstack.c - subroutines used implicitly by object stack macros
 3+ Copyright (C) 1988-1994,96,97,98,99 Free Software Foundation, Inc.
 4+
 5+ This file is part of the GNU C Library. Its master source is NOT part of
 6+ the C library, however. The master source lives in /gd/gnu/lib.
 7+
 8+ The GNU C Library is free software; you can redistribute it and/or
 9+ modify it under the terms of the GNU Library General Public License as
 10+ published by the Free Software Foundation; either version 2 of the
 11+ License, or (at your option) any later version.
 12+
 13+ The GNU C Library is distributed in the hope that it will be useful,
 14+ but WITHOUT ANY WARRANTY; without even the implied warranty of
 15+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 16+ Library General Public License for more details.
 17+
 18+ You should have received a copy of the GNU Library General Public
 19+ License along with the GNU C Library; see the file COPYING.LIB. If not,
 20+ write to the Free Software Foundation, Inc.,
 21+ 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */
 22+
 23+#ifdef HAVE_CONFIG_H
 24+#include <config.h>
 25+#endif
 26+
 27+#include "obstack.h"
 28+
 29+/* NOTE BEFORE MODIFYING THIS FILE: This version number must be
 30+ incremented whenever callers compiled using an old obstack.h can no
 31+ longer properly call the functions in this obstack.c. */
 32+#define OBSTACK_INTERFACE_VERSION 1
 33+
 34+/* Comment out all this code if we are using the GNU C Library, and are not
 35+ actually compiling the library itself, and the installed library
 36+ supports the same library interface we do. This code is part of the GNU
 37+ C Library, but also included in many other GNU distributions. Compiling
 38+ and linking in this code is a waste when using the GNU C library
 39+ (especially if it is a shared library). Rather than having every GNU
 40+ program understand `configure --with-gnu-libc' and omit the object
 41+ files, it is simpler to just do this in the source for each such file. */
 42+
 43+#include <stdio.h> /* Random thing to get __GNU_LIBRARY__. */
 44+#if !defined (_LIBC) && defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ > 1
 45+#include <gnu-versions.h>
 46+#if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION
 47+#define ELIDE_CODE
 48+#endif
 49+#endif
 50+
 51+
 52+#ifndef ELIDE_CODE
 53+
 54+
 55+#if defined (__STDC__) && __STDC__
 56+#define POINTER void *
 57+#else
 58+#define POINTER char *
 59+#endif
 60+
 61+/* Determine default alignment. */
 62+struct fooalign {char x; double d;};
 63+#define DEFAULT_ALIGNMENT \
 64+ ((PTR_INT_TYPE) ((char *) &((struct fooalign *) 0)->d - (char *) 0))
 65+/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
 66+ But in fact it might be less smart and round addresses to as much as
 67+ DEFAULT_ROUNDING. So we prepare for it to do that. */
 68+union fooround {long x; double d;};
 69+#define DEFAULT_ROUNDING (sizeof (union fooround))
 70+
 71+/* When we copy a long block of data, this is the unit to do it with.
 72+ On some machines, copying successive ints does not work;
 73+ in such a case, redefine COPYING_UNIT to `long' (if that works)
 74+ or `char' as a last resort. */
 75+#ifndef COPYING_UNIT
 76+#define COPYING_UNIT int
 77+#endif
 78+
 79+
 80+/* The functions allocating more room by calling `obstack_chunk_alloc'
 81+ jump to the handler pointed to by `obstack_alloc_failed_handler'.
 82+ This can be set to a user defined function which should either
 83+ abort gracefully or use longjump - but shouldn't return. This
 84+ variable by default points to the internal function
 85+ `print_and_abort'. */
 86+#if defined (__STDC__) && __STDC__
 87+static void print_and_abort (void);
 88+void (*obstack_alloc_failed_handler) (void) = print_and_abort;
 89+#else
 90+static void print_and_abort ();
 91+void (*obstack_alloc_failed_handler) () = print_and_abort;
 92+#endif
 93+
 94+/* Exit value used when `print_and_abort' is used. */
 95+#if defined __GNU_LIBRARY__ || defined HAVE_STDLIB_H
 96+#include <stdlib.h>
 97+#endif
 98+#ifndef EXIT_FAILURE
 99+#define EXIT_FAILURE 1
 100+#endif
 101+int obstack_exit_failure = EXIT_FAILURE;
 102+
 103+/* The non-GNU-C macros copy the obstack into this global variable
 104+ to avoid multiple evaluation. */
 105+
 106+struct obstack *_obstack;
 107+
 108+/* Define a macro that either calls functions with the traditional malloc/free
 109+ calling interface, or calls functions with the mmalloc/mfree interface
 110+ (that adds an extra first argument), based on the state of use_extra_arg.
 111+ For free, do not use ?:, since some compilers, like the MIPS compilers,
 112+ do not allow (expr) ? void : void. */
 113+
 114+#if defined (__STDC__) && __STDC__
 115+#define CALL_CHUNKFUN(h, size) \
 116+ (((h) -> use_extra_arg) \
 117+ ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
 118+ : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size)))
 119+
 120+#define CALL_FREEFUN(h, old_chunk) \
 121+ do { \
 122+ if ((h) -> use_extra_arg) \
 123+ (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
 124+ else \
 125+ (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \
 126+ } while (0)
 127+#else
 128+#define CALL_CHUNKFUN(h, size) \
 129+ (((h) -> use_extra_arg) \
 130+ ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
 131+ : (*(struct _obstack_chunk *(*) ()) (h)->chunkfun) ((size)))
 132+
 133+#define CALL_FREEFUN(h, old_chunk) \
 134+ do { \
 135+ if ((h) -> use_extra_arg) \
 136+ (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
 137+ else \
 138+ (*(void (*) ()) (h)->freefun) ((old_chunk)); \
 139+ } while (0)
 140+#endif
 141+
 142+
 143+/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default).
 144+ Objects start on multiples of ALIGNMENT (0 means use default).
 145+ CHUNKFUN is the function to use to allocate chunks,
 146+ and FREEFUN the function to free them.
 147+
 148+ Return nonzero if successful, calls obstack_alloc_failed_handler if
 149+ allocation fails. */
 150+
 151+int
 152+_obstack_begin (h, size, alignment, chunkfun, freefun)
 153+ struct obstack *h;
 154+ int size;
 155+ int alignment;
 156+#if defined (__STDC__) && __STDC__
 157+ POINTER (*chunkfun) (long);
 158+ void (*freefun) (void *);
 159+#else
 160+ POINTER (*chunkfun) ();
 161+ void (*freefun) ();
 162+#endif
 163+{
 164+ register struct _obstack_chunk *chunk; /* points to new chunk */
 165+
 166+ if (alignment == 0)
 167+ alignment = (int) DEFAULT_ALIGNMENT;
 168+ if (size == 0)
 169+ /* Default size is what GNU malloc can fit in a 4096-byte block. */
 170+ {
 171+ /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
 172+ Use the values for range checking, because if range checking is off,
 173+ the extra bytes won't be missed terribly, but if range checking is on
 174+ and we used a larger request, a whole extra 4096 bytes would be
 175+ allocated.
 176+
 177+ These number are irrelevant to the new GNU malloc. I suspect it is
 178+ less sensitive to the size of the request. */
 179+ int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
 180+ + 4 + DEFAULT_ROUNDING - 1)
 181+ & ~(DEFAULT_ROUNDING - 1));
 182+ size = 4096 - extra;
 183+ }
 184+
 185+#if defined (__STDC__) && __STDC__
 186+ h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun;
 187+ h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
 188+#else
 189+ h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
 190+ h->freefun = freefun;
 191+#endif
 192+ h->chunk_size = size;
 193+ h->alignment_mask = alignment - 1;
 194+ h->use_extra_arg = 0;
 195+
 196+ chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
 197+ if (!chunk)
 198+ (*obstack_alloc_failed_handler) ();
 199+ h->next_free = h->object_base = chunk->contents;
 200+ h->chunk_limit = chunk->limit
 201+ = (char *) chunk + h->chunk_size;
 202+ chunk->prev = 0;
 203+ /* The initial chunk now contains no empty object. */
 204+ h->maybe_empty_object = 0;
 205+ h->alloc_failed = 0;
 206+ return 1;
 207+}
 208+
 209+int
 210+_obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg)
 211+ struct obstack *h;
 212+ int size;
 213+ int alignment;
 214+#if defined (__STDC__) && __STDC__
 215+ POINTER (*chunkfun) (POINTER, long);
 216+ void (*freefun) (POINTER, POINTER);
 217+#else
 218+ POINTER (*chunkfun) ();
 219+ void (*freefun) ();
 220+#endif
 221+ POINTER arg;
 222+{
 223+ register struct _obstack_chunk *chunk; /* points to new chunk */
 224+
 225+ if (alignment == 0)
 226+ alignment = (int) DEFAULT_ALIGNMENT;
 227+ if (size == 0)
 228+ /* Default size is what GNU malloc can fit in a 4096-byte block. */
 229+ {
 230+ /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
 231+ Use the values for range checking, because if range checking is off,
 232+ the extra bytes won't be missed terribly, but if range checking is on
 233+ and we used a larger request, a whole extra 4096 bytes would be
 234+ allocated.
 235+
 236+ These number are irrelevant to the new GNU malloc. I suspect it is
 237+ less sensitive to the size of the request. */
 238+ int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
 239+ + 4 + DEFAULT_ROUNDING - 1)
 240+ & ~(DEFAULT_ROUNDING - 1));
 241+ size = 4096 - extra;
 242+ }
 243+
 244+#if defined(__STDC__) && __STDC__
 245+ h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun;
 246+ h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
 247+#else
 248+ h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
 249+ h->freefun = freefun;
 250+#endif
 251+ h->chunk_size = size;
 252+ h->alignment_mask = alignment - 1;
 253+ h->extra_arg = arg;
 254+ h->use_extra_arg = 1;
 255+
 256+ chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
 257+ if (!chunk)
 258+ (*obstack_alloc_failed_handler) ();
 259+ h->next_free = h->object_base = chunk->contents;
 260+ h->chunk_limit = chunk->limit
 261+ = (char *) chunk + h->chunk_size;
 262+ chunk->prev = 0;
 263+ /* The initial chunk now contains no empty object. */
 264+ h->maybe_empty_object = 0;
 265+ h->alloc_failed = 0;
 266+ return 1;
 267+}
 268+
 269+/* Allocate a new current chunk for the obstack *H
 270+ on the assumption that LENGTH bytes need to be added
 271+ to the current object, or a new object of length LENGTH allocated.
 272+ Copies any partial object from the end of the old chunk
 273+ to the beginning of the new one. */
 274+
 275+void
 276+_obstack_newchunk (h, length)
 277+ struct obstack *h;
 278+ int length;
 279+{
 280+ register struct _obstack_chunk *old_chunk = h->chunk;
 281+ register struct _obstack_chunk *new_chunk;
 282+ register long new_size;
 283+ register long obj_size = h->next_free - h->object_base;
 284+ register long i;
 285+ long already;
 286+
 287+ /* Compute size for new chunk. */
 288+ new_size = (obj_size + length) + (obj_size >> 3) + 100;
 289+ if (new_size < h->chunk_size)
 290+ new_size = h->chunk_size;
 291+
 292+ /* Allocate and initialize the new chunk. */
 293+ new_chunk = CALL_CHUNKFUN (h, new_size);
 294+ if (!new_chunk)
 295+ (*obstack_alloc_failed_handler) ();
 296+ h->chunk = new_chunk;
 297+ new_chunk->prev = old_chunk;
 298+ new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
 299+
 300+ /* Move the existing object to the new chunk.
 301+ Word at a time is fast and is safe if the object
 302+ is sufficiently aligned. */
 303+ if (h->alignment_mask + 1 >= DEFAULT_ALIGNMENT)
 304+ {
 305+ for (i = obj_size / sizeof (COPYING_UNIT) - 1;
 306+ i >= 0; i--)
 307+ ((COPYING_UNIT *)new_chunk->contents)[i]
 308+ = ((COPYING_UNIT *)h->object_base)[i];
 309+ /* We used to copy the odd few remaining bytes as one extra COPYING_UNIT,
 310+ but that can cross a page boundary on a machine
 311+ which does not do strict alignment for COPYING_UNITS. */
 312+ already = obj_size / sizeof (COPYING_UNIT) * sizeof (COPYING_UNIT);
 313+ }
 314+ else
 315+ already = 0;
 316+ /* Copy remaining bytes one by one. */
 317+ for (i = already; i < obj_size; i++)
 318+ new_chunk->contents[i] = h->object_base[i];
 319+
 320+ /* If the object just copied was the only data in OLD_CHUNK,
 321+ free that chunk and remove it from the chain.
 322+ But not if that chunk might contain an empty object. */
 323+ if (h->object_base == old_chunk->contents && ! h->maybe_empty_object)
 324+ {
 325+ new_chunk->prev = old_chunk->prev;
 326+ CALL_FREEFUN (h, old_chunk);
 327+ }
 328+
 329+ h->object_base = new_chunk->contents;
 330+ h->next_free = h->object_base + obj_size;
 331+ /* The new chunk certainly contains no empty object yet. */
 332+ h->maybe_empty_object = 0;
 333+}
 334+
 335+/* Return nonzero if object OBJ has been allocated from obstack H.
 336+ This is here for debugging.
 337+ If you use it in a program, you are probably losing. */
 338+
 339+#if defined (__STDC__) && __STDC__
 340+/* Suppress -Wmissing-prototypes warning. We don't want to declare this in
 341+ obstack.h because it is just for debugging. */
 342+int _obstack_allocated_p (struct obstack *h, POINTER obj);
 343+#endif
 344+
 345+int
 346+_obstack_allocated_p (h, obj)
 347+ struct obstack *h;
 348+ POINTER obj;
 349+{
 350+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
 351+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
 352+
 353+ lp = (h)->chunk;
 354+ /* We use >= rather than > since the object cannot be exactly at
 355+ the beginning of the chunk but might be an empty object exactly
 356+ at the end of an adjacent chunk. */
 357+ while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
 358+ {
 359+ plp = lp->prev;
 360+ lp = plp;
 361+ }
 362+ return lp != 0;
 363+}
 364+
 365+/* Free objects in obstack H, including OBJ and everything allocate
 366+ more recently than OBJ. If OBJ is zero, free everything in H. */
 367+
 368+#undef obstack_free
 369+
 370+/* This function has two names with identical definitions.
 371+ This is the first one, called from non-ANSI code. */
 372+
 373+void
 374+_obstack_free (h, obj)
 375+ struct obstack *h;
 376+ POINTER obj;
 377+{
 378+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
 379+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
 380+
 381+ lp = h->chunk;
 382+ /* We use >= because there cannot be an object at the beginning of a chunk.
 383+ But there can be an empty object at that address
 384+ at the end of another chunk. */
 385+ while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
 386+ {
 387+ plp = lp->prev;
 388+ CALL_FREEFUN (h, lp);
 389+ lp = plp;
 390+ /* If we switch chunks, we can't tell whether the new current
 391+ chunk contains an empty object, so assume that it may. */
 392+ h->maybe_empty_object = 1;
 393+ }
 394+ if (lp)
 395+ {
 396+ h->object_base = h->next_free = (char *) (obj);
 397+ h->chunk_limit = lp->limit;
 398+ h->chunk = lp;
 399+ }
 400+ else if (obj != 0)
 401+ /* obj is not in any of the chunks! */
 402+ abort ();
 403+}
 404+
 405+/* This function is used from ANSI code. */
 406+
 407+void
 408+obstack_free (h, obj)
 409+ struct obstack *h;
 410+ POINTER obj;
 411+{
 412+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
 413+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
 414+
 415+ lp = h->chunk;
 416+ /* We use >= because there cannot be an object at the beginning of a chunk.
 417+ But there can be an empty object at that address
 418+ at the end of another chunk. */
 419+ while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
 420+ {
 421+ plp = lp->prev;
 422+ CALL_FREEFUN (h, lp);
 423+ lp = plp;
 424+ /* If we switch chunks, we can't tell whether the new current
 425+ chunk contains an empty object, so assume that it may. */
 426+ h->maybe_empty_object = 1;
 427+ }
 428+ if (lp)
 429+ {
 430+ h->object_base = h->next_free = (char *) (obj);
 431+ h->chunk_limit = lp->limit;
 432+ h->chunk = lp;
 433+ }
 434+ else if (obj != 0)
 435+ /* obj is not in any of the chunks! */
 436+ abort ();
 437+}
 438+
 439+int
 440+_obstack_memory_used (h)
 441+ struct obstack *h;
 442+{
 443+ register struct _obstack_chunk* lp;
 444+ register int nbytes = 0;
 445+
 446+ for (lp = h->chunk; lp != 0; lp = lp->prev)
 447+ {
 448+ nbytes += lp->limit - (char *) lp;
 449+ }
 450+ return nbytes;
 451+}
 452+
 453+/* Define the error handler. */
 454+#ifndef _
 455+# ifdef HAVE_LIBINTL_H
 456+# include <libintl.h>
 457+# ifndef _
 458+# define _(Str) gettext (Str)
 459+# endif
 460+# else
 461+# define _(Str) (Str)
 462+# endif
 463+#endif
 464+#if defined _LIBC && defined USE_IN_LIBIO
 465+# include <libio/iolibio.h>
 466+# define fputs(s, f) _IO_fputs (s, f)
 467+#endif
 468+
 469+static void
 470+print_and_abort ()
 471+{
 472+ fputs (_("memory exhausted"), stderr);
 473+ fputc ('\n', stderr);
 474+ exit (obstack_exit_failure);
 475+}
 476+
 477+#if 0
 478+/* These are now turned off because the applications do not use it
 479+ and it uses bcopy via obstack_grow, which causes trouble on sysV. */
 480+
 481+/* Now define the functional versions of the obstack macros.
 482+ Define them to simply use the corresponding macros to do the job. */
 483+
 484+#if defined (__STDC__) && __STDC__
 485+/* These function definitions do not work with non-ANSI preprocessors;
 486+ they won't pass through the macro names in parentheses. */
 487+
 488+/* The function names appear in parentheses in order to prevent
 489+ the macro-definitions of the names from being expanded there. */
 490+
 491+POINTER (obstack_base) (obstack)
 492+ struct obstack *obstack;
 493+{
 494+ return obstack_base (obstack);
 495+}
 496+
 497+POINTER (obstack_next_free) (obstack)
 498+ struct obstack *obstack;
 499+{
 500+ return obstack_next_free (obstack);
 501+}
 502+
 503+int (obstack_object_size) (obstack)
 504+ struct obstack *obstack;
 505+{
 506+ return obstack_object_size (obstack);
 507+}
 508+
 509+int (obstack_room) (obstack)
 510+ struct obstack *obstack;
 511+{
 512+ return obstack_room (obstack);
 513+}
 514+
 515+int (obstack_make_room) (obstack, length)
 516+ struct obstack *obstack;
 517+ int length;
 518+{
 519+ return obstack_make_room (obstack, length);
 520+}
 521+
 522+void (obstack_grow) (obstack, pointer, length)
 523+ struct obstack *obstack;
 524+ POINTER pointer;
 525+ int length;
 526+{
 527+ obstack_grow (obstack, pointer, length);
 528+}
 529+
 530+void (obstack_grow0) (obstack, pointer, length)
 531+ struct obstack *obstack;
 532+ POINTER pointer;
 533+ int length;
 534+{
 535+ obstack_grow0 (obstack, pointer, length);
 536+}
 537+
 538+void (obstack_1grow) (obstack, character)
 539+ struct obstack *obstack;
 540+ int character;
 541+{
 542+ obstack_1grow (obstack, character);
 543+}
 544+
 545+void (obstack_blank) (obstack, length)
 546+ struct obstack *obstack;
 547+ int length;
 548+{
 549+ obstack_blank (obstack, length);
 550+}
 551+
 552+void (obstack_1grow_fast) (obstack, character)
 553+ struct obstack *obstack;
 554+ int character;
 555+{
 556+ obstack_1grow_fast (obstack, character);
 557+}
 558+
 559+void (obstack_blank_fast) (obstack, length)
 560+ struct obstack *obstack;
 561+ int length;
 562+{
 563+ obstack_blank_fast (obstack, length);
 564+}
 565+
 566+POINTER (obstack_finish) (obstack)
 567+ struct obstack *obstack;
 568+{
 569+ return obstack_finish (obstack);
 570+}
 571+
 572+POINTER (obstack_alloc) (obstack, length)
 573+ struct obstack *obstack;
 574+ int length;
 575+{
 576+ return obstack_alloc (obstack, length);
 577+}
 578+
 579+POINTER (obstack_copy) (obstack, pointer, length)
 580+ struct obstack *obstack;
 581+ POINTER pointer;
 582+ int length;
 583+{
 584+ return obstack_copy (obstack, pointer, length);
 585+}
 586+
 587+POINTER (obstack_copy0) (obstack, pointer, length)
 588+ struct obstack *obstack;
 589+ POINTER pointer;
 590+ int length;
 591+{
 592+ return obstack_copy0 (obstack, pointer, length);
 593+}
 594+
 595+#endif /* __STDC__ */
 596+
 597+#endif /* 0 */
 598+
 599+#endif /* !ELIDE_CODE */
Property changes on: trunk/extensions/FastStringSearch/obstack.c
___________________________________________________________________
Added: svn:eol-style
1600 + native
Index: trunk/extensions/FastStringSearch/php_fss.h
@@ -0,0 +1,49 @@
 2+/*
 3+ * PHP extension for fast string search routines
 4+ * Copyright Tim Starling, 2006
 5+ * License: DWTFYWWI
 6+*/
 7+
 8+#ifndef PHP_FSS_H
 9+#define PHP_FSS_H
 10+
 11+extern zend_module_entry fss_module_entry;
 12+#define phpext_fss_ptr &fss_module_entry
 13+
 14+#ifdef PHP_WIN32
 15+#define PHP_FSS_API __declspec(dllexport)
 16+#else
 17+#define PHP_FSS_API
 18+#endif
 19+
 20+#ifdef ZTS
 21+#include "TSRM.h"
 22+#endif
 23+
 24+PHP_MINIT_FUNCTION(fss);
 25+PHP_MSHUTDOWN_FUNCTION(fss);
 26+PHP_MINFO_FUNCTION(fss);
 27+
 28+PHP_FUNCTION(fss_prep_search);
 29+PHP_FUNCTION(fss_exec_search);
 30+PHP_FUNCTION(fss_prep_replace);
 31+PHP_FUNCTION(fss_exec_replace);
 32+PHP_FUNCTION(fss_free);
 33+
 34+#ifdef ZTS
 35+#define FSS_G(v) TSRMG(fss_globals_id, zend_fss_globals *, v)
 36+#else
 37+#define FSS_G(v) (fss_globals.v)
 38+#endif
 39+
 40+#endif /* PHP_FSS_H */
 41+
 42+
 43+/*
 44+ * Local variables:
 45+ * tab-width: 4
 46+ * c-basic-offset: 4
 47+ * End:
 48+ * vim600: noet sw=4 ts=4 fdm=marker
 49+ * vim<600: noet sw=4 ts=4
 50+ */
Property changes on: trunk/extensions/FastStringSearch/php_fss.h
___________________________________________________________________
Added: svn:eol-style
151 + native
Index: trunk/extensions/FastStringSearch/fss.c
@@ -0,0 +1,343 @@
 2+/*
 3+ * PHP extension for fast string search routines
 4+ * Copyright Tim Starling, 2006
 5+ * License: DWTFYWWI
 6+*/
 7+
 8+#ifdef HAVE_CONFIG_H
 9+#include "config.h"
 10+#endif
 11+
 12+#include "php.h"
 13+#include "php_ini.h"
 14+#include "ext/standard/info.h"
 15+#include "php_fss.h"
 16+#include "ext/standard/php_smart_str.h"
 17+#include "kwset.h"
 18+
 19+typedef struct {
 20+ kwset_t set;
 21+ int replace_size;
 22+ zval * replace[1];
 23+} fss_resource_t;
 24+
 25+static void _php_fss_close(zend_rsrc_list_entry *rsrc TSRMLS_DC);
 26+
 27+/* True global resources - no need for thread safety here */
 28+static int le_fss;
 29+
 30+/* {{{ fss_functions[]
 31+ */
 32+zend_function_entry fss_functions[] = {
 33+ PHP_FE(fss_prep_search, NULL)
 34+ PHP_FE(fss_exec_search, NULL)
 35+ PHP_FE(fss_prep_replace, NULL)
 36+ PHP_FE(fss_exec_replace, NULL)
 37+ PHP_FE(fss_free, NULL)
 38+ {NULL, NULL, NULL} /* Must be the last line in fss_functions[] */
 39+};
 40+/* }}} */
 41+
 42+/* {{{ fss_module_entry
 43+ */
 44+zend_module_entry fss_module_entry = {
 45+#if ZEND_MODULE_API_NO >= 20010901
 46+ STANDARD_MODULE_HEADER,
 47+#endif
 48+ "fss",
 49+ fss_functions,
 50+ PHP_MINIT(fss),
 51+ PHP_MSHUTDOWN(fss),
 52+ NULL, /* RINIT */
 53+ NULL, /* RSHUTDOWN */
 54+ PHP_MINFO(fss),
 55+#if ZEND_MODULE_API_NO >= 20010901
 56+ "0.1.1",
 57+#endif
 58+ STANDARD_MODULE_PROPERTIES
 59+};
 60+/* }}} */
 61+
 62+#ifdef COMPILE_DL_FSS
 63+ZEND_GET_MODULE(fss)
 64+#endif
 65+
 66+
 67+
 68+/* {{{ PHP_MINIT_FUNCTION
 69+ */
 70+PHP_MINIT_FUNCTION(fss)
 71+{
 72+ le_fss = zend_register_list_destructors_ex(_php_fss_close, NULL, "fss", module_number);
 73+ return SUCCESS;
 74+}
 75+/* }}} */
 76+
 77+/* {{{ PHP_MSHUTDOWN_FUNCTION
 78+ */
 79+PHP_MSHUTDOWN_FUNCTION(fss)
 80+{
 81+ return SUCCESS;
 82+}
 83+/* }}} */
 84+
 85+/* {{{ PHP_MINFO_FUNCTION
 86+ */
 87+PHP_MINFO_FUNCTION(fss)
 88+{
 89+ php_info_print_table_start();
 90+ php_info_print_table_header(2, "Fast string search support", "enabled");
 91+ php_info_print_table_end();
 92+}
 93+/* }}} */
 94+
 95+
 96+/* {{{ proto resource fss_prep_search(mixed needle)
 97+ Prepare a string search */
 98+PHP_FUNCTION(fss_prep_search)
 99+{
 100+ int argc = ZEND_NUM_ARGS();
 101+ zval *needle = NULL, **elem;
 102+ fss_resource_t * res;
 103+ HashTable * hash;
 104+ HashPosition hpos;
 105+ const char *error;
 106+
 107+ if (zend_parse_parameters(argc TSRMLS_CC, "z", &needle) == FAILURE)
 108+ return;
 109+
 110+ res = emalloc(sizeof(fss_resource_t));
 111+ res->set = kwsalloc(NULL);
 112+ res->replace_size = 0;
 113+
 114+ if (Z_TYPE_P(needle) == IS_ARRAY) {
 115+ hash = Z_ARRVAL_P(needle);
 116+ for (zend_hash_internal_pointer_reset_ex(hash, &hpos);
 117+ zend_hash_get_current_data_ex(hash, (void**)&elem, &hpos) == SUCCESS;
 118+ zend_hash_move_forward_ex(hash, &hpos))
 119+ {
 120+ convert_to_string_ex(elem);
 121+ /* Don't add zero-length strings, that will cause infinite loops in
 122+ search routines */
 123+ if (!Z_STRLEN_PP(elem)) {
 124+ continue;
 125+ }
 126+ error = kwsincr(res->set, Z_STRVAL_PP(elem), Z_STRLEN_PP(elem));
 127+ if (error) {
 128+ php_error(E_WARNING, "fss_prep_search: %s", error);
 129+ }
 130+ }
 131+ } else {
 132+ convert_to_string_ex(&needle);
 133+ error = kwsincr(res->set, Z_STRVAL_P(needle), Z_STRLEN_P(needle));
 134+ if (error) {
 135+ php_error(E_WARNING, "fss_prep_search: %s", error);
 136+ }
 137+ }
 138+ kwsprep(res->set);
 139+ ZEND_REGISTER_RESOURCE(return_value, res, le_fss);
 140+}
 141+/* }}} */
 142+
 143+/* {{{ proto array fss_exec_search(resource handle, string haystack [, int offset])
 144+ Execute a string search, return the first match */
 145+PHP_FUNCTION(fss_exec_search)
 146+{
 147+ char *haystack = NULL;
 148+ int argc = ZEND_NUM_ARGS();
 149+ int handle_id = -1;
 150+ int haystack_len;
 151+ int offset = 0;
 152+ zval *handle = NULL, *temp;
 153+ fss_resource_t * res;
 154+ struct kwsmatch m;
 155+ size_t match_pos;
 156+
 157+ if (zend_parse_parameters(argc TSRMLS_CC, "rs|l", &handle, &haystack, &haystack_len, &offset) == FAILURE)
 158+ return;
 159+
 160+ ZEND_FETCH_RESOURCE(res, fss_resource_t*, &handle, handle_id, "fss", le_fss);
 161+ match_pos = kwsexec(res->set, haystack + offset, haystack_len - offset, &m);
 162+
 163+ if (match_pos == (size_t)-1) {
 164+ RETURN_FALSE;
 165+ }
 166+
 167+ array_init(return_value);
 168+
 169+ MAKE_STD_ZVAL(temp);
 170+ ZVAL_LONG(temp, m.offset[0] + offset);
 171+ zend_hash_next_index_insert(HASH_OF(return_value), (void *)&temp, sizeof(zval *), NULL);
 172+
 173+ MAKE_STD_ZVAL(temp);
 174+ ZVAL_LONG(temp, m.size[0]);
 175+ zend_hash_next_index_insert(HASH_OF(return_value), (void *)&temp, sizeof(zval *), NULL);
 176+}
 177+/* }}} */
 178+
 179+/* {{{ proto resource fss_prep_replace(array replace_pairs)
 180+ Prepare a search and replace operation */
 181+PHP_FUNCTION(fss_prep_replace)
 182+{
 183+ int argc = ZEND_NUM_ARGS();
 184+ zval *replace_pairs = NULL, **value;
 185+ HashTable * hash;
 186+ HashPosition hpos;
 187+ fss_resource_t * res;
 188+ const char *error;
 189+ int i;
 190+ char *string_key;
 191+ char buffer[40];
 192+ uint string_key_len;
 193+ ulong num_key;
 194+
 195+
 196+ if (zend_parse_parameters(argc TSRMLS_CC, "a", &replace_pairs) == FAILURE)
 197+ return;
 198+
 199+ hash = Z_ARRVAL_P(replace_pairs);
 200+
 201+ /* fss_resource_t has an open-ended array, we allocate enough memory for the
 202+ header plus all the array elements, plus one extra element for good measure */
 203+ res = safe_emalloc(sizeof(zval*), hash->nNumOfElements, sizeof(fss_resource_t));
 204+ res->set = kwsalloc(NULL);
 205+ res->replace_size = hash->nNumOfElements;
 206+
 207+ for (zend_hash_internal_pointer_reset_ex(hash, &hpos), i = 0;
 208+ zend_hash_get_current_data_ex(hash, (void**)&value, &hpos) == SUCCESS;
 209+ zend_hash_move_forward_ex(hash, &hpos), ++i)
 210+ {
 211+ /* Convert numeric keys to string */
 212+ if (zend_hash_get_current_key_ex(hash, &string_key, &string_key_len, &num_key, 0,
 213+ &hpos) == HASH_KEY_IS_LONG)
 214+ {
 215+ sprintf(buffer, "%u", num_key);
 216+ string_key = buffer;
 217+ string_key_len = strlen(buffer);
 218+ } else {
 219+ /* Minus one for null */
 220+ string_key_len--;
 221+ }
 222+
 223+ /* Don't add zero-length strings, that will cause infinite loops in
 224+ search routines */
 225+ if (!string_key_len) {
 226+ res->replace[i] = NULL;
 227+ continue;
 228+ }
 229+
 230+ /* Add the key to the search tree */
 231+ error = kwsincr(res->set, string_key, string_key_len);
 232+ if (error) {
 233+ res->replace[i] = NULL;
 234+ php_error(E_WARNING, "fss_prep_replace: %s", error);
 235+ continue;
 236+ }
 237+
 238+ /* Add the value to the replace array */
 239+ convert_to_string_ex(value);
 240+ ZVAL_ADDREF(*value);
 241+ res->replace[i] = *value;
 242+ }
 243+ kwsprep(res->set);
 244+ ZEND_REGISTER_RESOURCE(return_value, res, le_fss);
 245+}
 246+/* }}} */
 247+
 248+/* {{{ proto string fss_exec_replace(resource handle, string subject)
 249+ Execute a search and replace operation */
 250+PHP_FUNCTION(fss_exec_replace)
 251+{
 252+ char *subject = NULL;
 253+ int argc = ZEND_NUM_ARGS();
 254+ int handle_id = -1;
 255+ int subject_len;
 256+ zval *handle = NULL;
 257+ size_t match_pos = 0;
 258+ fss_resource_t * res;
 259+ struct kwsmatch m;
 260+ smart_str result = {0};
 261+ zval *temp;
 262+
 263+ if (zend_parse_parameters(argc TSRMLS_CC, "rs", &handle, &subject, &subject_len) == FAILURE)
 264+ return;
 265+
 266+ ZEND_FETCH_RESOURCE(res, fss_resource_t*, &handle, handle_id, "fss", le_fss);
 267+
 268+ while (subject_len > 0 &&
 269+ (size_t)-1 != (match_pos = kwsexec(res->set, subject, subject_len, &m)))
 270+ {
 271+ /* Output the leading portion */
 272+ smart_str_appendl(&result, subject, match_pos);
 273+
 274+ /* Output the replacement portion
 275+ The index may be above the size of the replacement array if the
 276+ object was prepared as a search object instead of a replacement
 277+ object. In that case, we replace the item with an empty string
 278+ */
 279+ if (m.index < res->replace_size) {
 280+ temp = res->replace[m.index];
 281+ if (temp) {
 282+ smart_str_appendl(&result, Z_STRVAL_P(temp), Z_STRLEN_P(temp));
 283+ }
 284+ }
 285+
 286+ /* Increment and continue */
 287+ subject_len -= match_pos + m.size[0];
 288+ subject += match_pos + m.size[0];
 289+ }
 290+ /* Output the remaining portion */
 291+ if (subject_len > 0) {
 292+ smart_str_appendl(&result, subject, subject_len);
 293+ }
 294+ /* Return the result */
 295+ smart_str_0(&result);
 296+ RETVAL_STRINGL(result.c, result.len, 0);
 297+}
 298+/* }}} */
 299+
 300+/* {{{ proto void fss_free(resource handle)
 301+ Free an FSS object */
 302+PHP_FUNCTION(fss_free)
 303+{
 304+ int argc = ZEND_NUM_ARGS();
 305+ int handle_id = -1;
 306+ zval *handle = NULL;
 307+ fss_resource_t * res;
 308+
 309+ if (zend_parse_parameters(argc TSRMLS_CC, "r", &handle) == FAILURE)
 310+ return;
 311+
 312+ ZEND_FETCH_RESOURCE(res, fss_resource_t*, &handle, handle_id, "fss", le_fss);
 313+ if (handle) {
 314+ zend_list_delete(Z_RESVAL_P(handle));
 315+ }
 316+}
 317+/* }}} */
 318+
 319+/* {{{ _php_fss_close
 320+ List destructor for FSS handles */
 321+static void _php_fss_close(zend_rsrc_list_entry *rsrc TSRMLS_DC)
 322+{
 323+ int i;
 324+ fss_resource_t * res = (fss_resource_t *)rsrc->ptr;
 325+ /* Destroy the replace strings */
 326+ for (i = 0; i < res->replace_size; i++) {
 327+ zval_ptr_dtor(&res->replace[i]);
 328+ }
 329+ /* Destroy the kwset structure */
 330+ kwsfree(res->set);
 331+ /* Destroy the resource structure itself */
 332+ efree(res);
 333+}
 334+/* }}} */
 335+
 336+
 337+/*
 338+ * Local variables:
 339+ * tab-width: 4
 340+ * c-basic-offset: 4
 341+ * End:
 342+ * vim600: noet sw=4 ts=4 fdm=marker
 343+ * vim<600: noet sw=4 ts=4
 344+ */
Property changes on: trunk/extensions/FastStringSearch/fss.c
___________________________________________________________________
Added: svn:eol-style
1345 + native
Index: trunk/extensions/FastStringSearch/obstack.h
@@ -0,0 +1,593 @@
 2+/* obstack.h - object stack macros
 3+ Copyright (C) 1988,89,90,91,92,93,94,96,97,98,99 Free Software Foundation, Inc.
 4+
 5+ This file is part of the GNU C Library. Its master source is NOT part of
 6+ the C library, however. The master source lives in /gd/gnu/lib.
 7+
 8+ The GNU C Library is free software; you can redistribute it and/or
 9+ modify it under the terms of the GNU Library General Public License as
 10+ published by the Free Software Foundation; either version 2 of the
 11+ License, or (at your option) any later version.
 12+
 13+ The GNU C Library is distributed in the hope that it will be useful,
 14+ but WITHOUT ANY WARRANTY; without even the implied warranty of
 15+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 16+ Library General Public License for more details.
 17+
 18+ You should have received a copy of the GNU Library General Public
 19+ License along with the GNU C Library; see the file COPYING.LIB. If not,
 20+ write to the Free Software Foundation, Inc.,
 21+ 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */
 22+
 23+/* Summary:
 24+
 25+All the apparent functions defined here are macros. The idea
 26+is that you would use these pre-tested macros to solve a
 27+very specific set of problems, and they would run fast.
 28+Caution: no side-effects in arguments please!! They may be
 29+evaluated MANY times!!
 30+
 31+These macros operate a stack of objects. Each object starts life
 32+small, and may grow to maturity. (Consider building a word syllable
 33+by syllable.) An object can move while it is growing. Once it has
 34+been "finished" it never changes address again. So the "top of the
 35+stack" is typically an immature growing object, while the rest of the
 36+stack is of mature, fixed size and fixed address objects.
 37+
 38+These routines grab large chunks of memory, using a function you
 39+supply, called `obstack_chunk_alloc'. On occasion, they free chunks,
 40+by calling `obstack_chunk_free'. You must define them and declare
 41+them before using any obstack macros.
 42+
 43+Each independent stack is represented by a `struct obstack'.
 44+Each of the obstack macros expects a pointer to such a structure
 45+as the first argument.
 46+
 47+One motivation for this package is the problem of growing char strings
 48+in symbol tables. Unless you are "fascist pig with a read-only mind"
 49+--Gosper's immortal quote from HAKMEM item 154, out of context--you
 50+would not like to put any arbitrary upper limit on the length of your
 51+symbols.
 52+
 53+In practice this often means you will build many short symbols and a
 54+few long symbols. At the time you are reading a symbol you don't know
 55+how long it is. One traditional method is to read a symbol into a
 56+buffer, realloc()ating the buffer every time you try to read a symbol
 57+that is longer than the buffer. This is beaut, but you still will
 58+want to copy the symbol from the buffer to a more permanent
 59+symbol-table entry say about half the time.
 60+
 61+With obstacks, you can work differently. Use one obstack for all symbol
 62+names. As you read a symbol, grow the name in the obstack gradually.
 63+When the name is complete, finalize it. Then, if the symbol exists already,
 64+free the newly read name.
 65+
 66+The way we do this is to take a large chunk, allocating memory from
 67+low addresses. When you want to build a symbol in the chunk you just
 68+add chars above the current "high water mark" in the chunk. When you
 69+have finished adding chars, because you got to the end of the symbol,
 70+you know how long the chars are, and you can create a new object.
 71+Mostly the chars will not burst over the highest address of the chunk,
 72+because you would typically expect a chunk to be (say) 100 times as
 73+long as an average object.
 74+
 75+In case that isn't clear, when we have enough chars to make up
 76+the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed)
 77+so we just point to it where it lies. No moving of chars is
 78+needed and this is the second win: potentially long strings need
 79+never be explicitly shuffled. Once an object is formed, it does not
 80+change its address during its lifetime.
 81+
 82+When the chars burst over a chunk boundary, we allocate a larger
 83+chunk, and then copy the partly formed object from the end of the old
 84+chunk to the beginning of the new larger chunk. We then carry on
 85+accreting characters to the end of the object as we normally would.
 86+
 87+A special macro is provided to add a single char at a time to a
 88+growing object. This allows the use of register variables, which
 89+break the ordinary 'growth' macro.
 90+
 91+Summary:
 92+ We allocate large chunks.
 93+ We carve out one object at a time from the current chunk.
 94+ Once carved, an object never moves.
 95+ We are free to append data of any size to the currently
 96+ growing object.
 97+ Exactly one object is growing in an obstack at any one time.
 98+ You can run one obstack per control block.
 99+ You may have as many control blocks as you dare.
 100+ Because of the way we do it, you can `unwind' an obstack
 101+ back to a previous state. (You may remove objects much
 102+ as you would with a stack.)
 103+*/
 104+
 105+
 106+/* Don't do the contents of this file more than once. */
 107+
 108+#ifndef _OBSTACK_H
 109+#define _OBSTACK_H 1
 110+
 111+#ifdef __cplusplus
 112+extern "C" {
 113+#endif
 114+
 115+/* We use subtraction of (char *) 0 instead of casting to int
 116+ because on word-addressable machines a simple cast to int
 117+ may ignore the byte-within-word field of the pointer. */
 118+
 119+#ifndef __PTR_TO_INT
 120+# define __PTR_TO_INT(P) ((P) - (char *) 0)
 121+#endif
 122+
 123+#ifndef __INT_TO_PTR
 124+# define __INT_TO_PTR(P) ((P) + (char *) 0)
 125+#endif
 126+
 127+/* We need the type of the resulting object. If __PTRDIFF_TYPE__ is
 128+ defined, as with GNU C, use that; that way we don't pollute the
 129+ namespace with <stddef.h>'s symbols. Otherwise, if <stddef.h> is
 130+ available, include it and use ptrdiff_t. In traditional C, long is
 131+ the best that we can do. */
 132+
 133+#ifdef __PTRDIFF_TYPE__
 134+# define PTR_INT_TYPE __PTRDIFF_TYPE__
 135+#else
 136+# ifdef HAVE_STDDEF_H
 137+# include <stddef.h>
 138+# define PTR_INT_TYPE ptrdiff_t
 139+# else
 140+# define PTR_INT_TYPE long
 141+# endif
 142+#endif
 143+
 144+#if defined _LIBC || defined HAVE_STRING_H
 145+# include <string.h>
 146+# define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N))
 147+#else
 148+# ifdef memcpy
 149+# define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N))
 150+# else
 151+# define _obstack_memcpy(To, From, N) bcopy ((From), (To), (N))
 152+# endif
 153+#endif
 154+
 155+struct _obstack_chunk /* Lives at front of each chunk. */
 156+{
 157+ char *limit; /* 1 past end of this chunk */
 158+ struct _obstack_chunk *prev; /* address of prior chunk or NULL */
 159+ char contents[4]; /* objects begin here */
 160+};
 161+
 162+struct obstack /* control current object in current chunk */
 163+{
 164+ long chunk_size; /* preferred size to allocate chunks in */
 165+ struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */
 166+ char *object_base; /* address of object we are building */
 167+ char *next_free; /* where to add next char to current object */
 168+ char *chunk_limit; /* address of char after current chunk */
 169+ PTR_INT_TYPE temp; /* Temporary for some macros. */
 170+ int alignment_mask; /* Mask of alignment for each object. */
 171+#if defined __STDC__ && __STDC__
 172+ /* These prototypes vary based on `use_extra_arg', and we use
 173+ casts to the prototypeless function type in all assignments,
 174+ but having prototypes here quiets -Wstrict-prototypes. */
 175+ struct _obstack_chunk *(*chunkfun) (void *, long);
 176+ void (*freefun) (void *, struct _obstack_chunk *);
 177+ void *extra_arg; /* first arg for chunk alloc/dealloc funcs */
 178+#else
 179+ struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk. */
 180+ void (*freefun) (); /* User's function to free a chunk. */
 181+ char *extra_arg; /* first arg for chunk alloc/dealloc funcs */
 182+#endif
 183+ unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */
 184+ unsigned maybe_empty_object:1;/* There is a possibility that the current
 185+ chunk contains a zero-length object. This
 186+ prevents freeing the chunk if we allocate
 187+ a bigger chunk to replace it. */
 188+ unsigned alloc_failed:1; /* No longer used, as we now call the failed
 189+ handler on error, but retained for binary
 190+ compatibility. */
 191+};
 192+
 193+/* Declare the external functions we use; they are in obstack.c. */
 194+
 195+#if defined __STDC__ && __STDC__
 196+extern void _obstack_newchunk (struct obstack *, int);
 197+extern void _obstack_free (struct obstack *, void *);
 198+extern int _obstack_begin (struct obstack *, int, int,
 199+ void *(*) (long), void (*) (void *));
 200+extern int _obstack_begin_1 (struct obstack *, int, int,
 201+ void *(*) (void *, long),
 202+ void (*) (void *, void *), void *);
 203+extern int _obstack_memory_used (struct obstack *);
 204+#else
 205+extern void _obstack_newchunk ();
 206+extern void _obstack_free ();
 207+extern int _obstack_begin ();
 208+extern int _obstack_begin_1 ();
 209+extern int _obstack_memory_used ();
 210+#endif
 211+
 212+#if defined __STDC__ && __STDC__
 213+
 214+/* Do the function-declarations after the structs
 215+ but before defining the macros. */
 216+
 217+void obstack_init (struct obstack *obstack);
 218+
 219+void * obstack_alloc (struct obstack *obstack, int size);
 220+
 221+void * obstack_copy (struct obstack *obstack, void *address, int size);
 222+void * obstack_copy0 (struct obstack *obstack, void *address, int size);
 223+
 224+void obstack_free (struct obstack *obstack, void *block);
 225+
 226+void obstack_blank (struct obstack *obstack, int size);
 227+
 228+void obstack_grow (struct obstack *obstack, void *data, int size);
 229+void obstack_grow0 (struct obstack *obstack, void *data, int size);
 230+
 231+void obstack_1grow (struct obstack *obstack, int data_char);
 232+void obstack_ptr_grow (struct obstack *obstack, void *data);
 233+void obstack_int_grow (struct obstack *obstack, int data);
 234+
 235+void * obstack_finish (struct obstack *obstack);
 236+
 237+int obstack_object_size (struct obstack *obstack);
 238+
 239+int obstack_room (struct obstack *obstack);
 240+void obstack_make_room (struct obstack *obstack, int size);
 241+void obstack_1grow_fast (struct obstack *obstack, int data_char);
 242+void obstack_ptr_grow_fast (struct obstack *obstack, void *data);
 243+void obstack_int_grow_fast (struct obstack *obstack, int data);
 244+void obstack_blank_fast (struct obstack *obstack, int size);
 245+
 246+void * obstack_base (struct obstack *obstack);
 247+void * obstack_next_free (struct obstack *obstack);
 248+int obstack_alignment_mask (struct obstack *obstack);
 249+int obstack_chunk_size (struct obstack *obstack);
 250+int obstack_memory_used (struct obstack *obstack);
 251+
 252+#endif /* __STDC__ */
 253+
 254+/* Non-ANSI C cannot really support alternative functions for these macros,
 255+ so we do not declare them. */
 256+
 257+/* Error handler called when `obstack_chunk_alloc' failed to allocate
 258+ more memory. This can be set to a user defined function which
 259+ should either abort gracefully or use longjump - but shouldn't
 260+ return. The default action is to print a message and abort. */
 261+#if defined __STDC__ && __STDC__
 262+extern void (*obstack_alloc_failed_handler) (void);
 263+#else
 264+extern void (*obstack_alloc_failed_handler) ();
 265+#endif
 266+
 267+/* Exit value used when `print_and_abort' is used. */
 268+extern int obstack_exit_failure;
 269+
 270+/* Pointer to beginning of object being allocated or to be allocated next.
 271+ Note that this might not be the final address of the object
 272+ because a new chunk might be needed to hold the final size. */
 273+
 274+#define obstack_base(h) ((h)->object_base)
 275+
 276+/* Size for allocating ordinary chunks. */
 277+
 278+#define obstack_chunk_size(h) ((h)->chunk_size)
 279+
 280+/* Pointer to next byte not yet allocated in current chunk. */
 281+
 282+#define obstack_next_free(h) ((h)->next_free)
 283+
 284+/* Mask specifying low bits that should be clear in address of an object. */
 285+
 286+#define obstack_alignment_mask(h) ((h)->alignment_mask)
 287+
 288+/* To prevent prototype warnings provide complete argument list in
 289+ standard C version. */
 290+#if defined __STDC__ && __STDC__
 291+
 292+# define obstack_init(h) \
 293+ _obstack_begin ((h), 0, 0, \
 294+ (void *(*) (long)) obstack_chunk_alloc, (void (*) (void *)) obstack_chunk_free)
 295+
 296+# define obstack_begin(h, size) \
 297+ _obstack_begin ((h), (size), 0, \
 298+ (void *(*) (long)) obstack_chunk_alloc, (void (*) (void *)) obstack_chunk_free)
 299+
 300+# define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
 301+ _obstack_begin ((h), (size), (alignment), \
 302+ (void *(*) (long)) (chunkfun), (void (*) (void *)) (freefun))
 303+
 304+# define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
 305+ _obstack_begin_1 ((h), (size), (alignment), \
 306+ (void *(*) (void *, long)) (chunkfun), \
 307+ (void (*) (void *, void *)) (freefun), (arg))
 308+
 309+# define obstack_chunkfun(h, newchunkfun) \
 310+ ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun))
 311+
 312+# define obstack_freefun(h, newfreefun) \
 313+ ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun))
 314+
 315+#else
 316+
 317+# define obstack_init(h) \
 318+ _obstack_begin ((h), 0, 0, \
 319+ (void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
 320+
 321+# define obstack_begin(h, size) \
 322+ _obstack_begin ((h), (size), 0, \
 323+ (void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
 324+
 325+# define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
 326+ _obstack_begin ((h), (size), (alignment), \
 327+ (void *(*) ()) (chunkfun), (void (*) ()) (freefun))
 328+
 329+# define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
 330+ _obstack_begin_1 ((h), (size), (alignment), \
 331+ (void *(*) ()) (chunkfun), (void (*) ()) (freefun), (arg))
 332+
 333+# define obstack_chunkfun(h, newchunkfun) \
 334+ ((h) -> chunkfun = (struct _obstack_chunk *(*)()) (newchunkfun))
 335+
 336+# define obstack_freefun(h, newfreefun) \
 337+ ((h) -> freefun = (void (*)()) (newfreefun))
 338+
 339+#endif
 340+
 341+#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar)
 342+
 343+#define obstack_blank_fast(h,n) ((h)->next_free += (n))
 344+
 345+#define obstack_memory_used(h) _obstack_memory_used (h)
 346+
 347+#if defined __GNUC__ && defined __STDC__ && __STDC__
 348+/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and
 349+ does not implement __extension__. But that compiler doesn't define
 350+ __GNUC_MINOR__. */
 351+# if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__)
 352+# define __extension__
 353+# endif
 354+
 355+/* For GNU C, if not -traditional,
 356+ we can define these macros to compute all args only once
 357+ without using a global variable.
 358+ Also, we can avoid using the `temp' slot, to make faster code. */
 359+
 360+# define obstack_object_size(OBSTACK) \
 361+ __extension__ \
 362+ ({ struct obstack *__o = (OBSTACK); \
 363+ (unsigned) (__o->next_free - __o->object_base); })
 364+
 365+# define obstack_room(OBSTACK) \
 366+ __extension__ \
 367+ ({ struct obstack *__o = (OBSTACK); \
 368+ (unsigned) (__o->chunk_limit - __o->next_free); })
 369+
 370+# define obstack_make_room(OBSTACK,length) \
 371+__extension__ \
 372+({ struct obstack *__o = (OBSTACK); \
 373+ int __len = (length); \
 374+ if (__o->chunk_limit - __o->next_free < __len) \
 375+ _obstack_newchunk (__o, __len); \
 376+ (void) 0; })
 377+
 378+# define obstack_empty_p(OBSTACK) \
 379+ __extension__ \
 380+ ({ struct obstack *__o = (OBSTACK); \
 381+ (__o->chunk->prev == 0 && __o->next_free - __o->chunk->contents == 0); })
 382+
 383+# define obstack_grow(OBSTACK,where,length) \
 384+__extension__ \
 385+({ struct obstack *__o = (OBSTACK); \
 386+ int __len = (length); \
 387+ if (__o->next_free + __len > __o->chunk_limit) \
 388+ _obstack_newchunk (__o, __len); \
 389+ _obstack_memcpy (__o->next_free, (char *) (where), __len); \
 390+ __o->next_free += __len; \
 391+ (void) 0; })
 392+
 393+# define obstack_grow0(OBSTACK,where,length) \
 394+__extension__ \
 395+({ struct obstack *__o = (OBSTACK); \
 396+ int __len = (length); \
 397+ if (__o->next_free + __len + 1 > __o->chunk_limit) \
 398+ _obstack_newchunk (__o, __len + 1); \
 399+ _obstack_memcpy (__o->next_free, (char *) (where), __len); \
 400+ __o->next_free += __len; \
 401+ *(__o->next_free)++ = 0; \
 402+ (void) 0; })
 403+
 404+# define obstack_1grow(OBSTACK,datum) \
 405+__extension__ \
 406+({ struct obstack *__o = (OBSTACK); \
 407+ if (__o->next_free + 1 > __o->chunk_limit) \
 408+ _obstack_newchunk (__o, 1); \
 409+ *(__o->next_free)++ = (datum); \
 410+ (void) 0; })
 411+
 412+/* These assume that the obstack alignment is good enough for pointers or ints,
 413+ and that the data added so far to the current object
 414+ shares that much alignment. */
 415+
 416+# define obstack_ptr_grow(OBSTACK,datum) \
 417+__extension__ \
 418+({ struct obstack *__o = (OBSTACK); \
 419+ if (__o->next_free + sizeof (void *) > __o->chunk_limit) \
 420+ _obstack_newchunk (__o, sizeof (void *)); \
 421+ *((void **)__o->next_free)++ = ((void *)datum); \
 422+ (void) 0; })
 423+
 424+# define obstack_int_grow(OBSTACK,datum) \
 425+__extension__ \
 426+({ struct obstack *__o = (OBSTACK); \
 427+ if (__o->next_free + sizeof (int) > __o->chunk_limit) \
 428+ _obstack_newchunk (__o, sizeof (int)); \
 429+ *((int *)__o->next_free)++ = ((int)datum); \
 430+ (void) 0; })
 431+
 432+# define obstack_ptr_grow_fast(h,aptr) (*((void **) (h)->next_free)++ = (void *)aptr)
 433+# define obstack_int_grow_fast(h,aint) (*((int *) (h)->next_free)++ = (int) aint)
 434+
 435+# define obstack_blank(OBSTACK,length) \
 436+__extension__ \
 437+({ struct obstack *__o = (OBSTACK); \
 438+ int __len = (length); \
 439+ if (__o->chunk_limit - __o->next_free < __len) \
 440+ _obstack_newchunk (__o, __len); \
 441+ __o->next_free += __len; \
 442+ (void) 0; })
 443+
 444+# define obstack_alloc(OBSTACK,length) \
 445+__extension__ \
 446+({ struct obstack *__h = (OBSTACK); \
 447+ obstack_blank (__h, (length)); \
 448+ obstack_finish (__h); })
 449+
 450+# define obstack_copy(OBSTACK,where,length) \
 451+__extension__ \
 452+({ struct obstack *__h = (OBSTACK); \
 453+ obstack_grow (__h, (where), (length)); \
 454+ obstack_finish (__h); })
 455+
 456+# define obstack_copy0(OBSTACK,where,length) \
 457+__extension__ \
 458+({ struct obstack *__h = (OBSTACK); \
 459+ obstack_grow0 (__h, (where), (length)); \
 460+ obstack_finish (__h); })
 461+
 462+/* The local variable is named __o1 to avoid a name conflict
 463+ when obstack_blank is called. */
 464+# define obstack_finish(OBSTACK) \
 465+__extension__ \
 466+({ struct obstack *__o1 = (OBSTACK); \
 467+ void *value; \
 468+ value = (void *) __o1->object_base; \
 469+ if (__o1->next_free == value) \
 470+ __o1->maybe_empty_object = 1; \
 471+ __o1->next_free \
 472+ = __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\
 473+ & ~ (__o1->alignment_mask)); \
 474+ if (__o1->next_free - (char *)__o1->chunk \
 475+ > __o1->chunk_limit - (char *)__o1->chunk) \
 476+ __o1->next_free = __o1->chunk_limit; \
 477+ __o1->object_base = __o1->next_free; \
 478+ value; })
 479+
 480+# define obstack_free(OBSTACK, OBJ) \
 481+__extension__ \
 482+({ struct obstack *__o = (OBSTACK); \
 483+ void *__obj = (OBJ); \
 484+ if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \
 485+ __o->next_free = __o->object_base = (char *)__obj; \
 486+ else (obstack_free) (__o, __obj); })
 487+
 488+#else /* not __GNUC__ or not __STDC__ */
 489+
 490+# define obstack_object_size(h) \
 491+ (unsigned) ((h)->next_free - (h)->object_base)
 492+
 493+# define obstack_room(h) \
 494+ (unsigned) ((h)->chunk_limit - (h)->next_free)
 495+
 496+# define obstack_empty_p(h) \
 497+ ((h)->chunk->prev == 0 && (h)->next_free - (h)->chunk->contents == 0)
 498+
 499+/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
 500+ so that we can avoid having void expressions
 501+ in the arms of the conditional expression.
 502+ Casting the third operand to void was tried before,
 503+ but some compilers won't accept it. */
 504+
 505+# define obstack_make_room(h,length) \
 506+( (h)->temp = (length), \
 507+ (((h)->next_free + (h)->temp > (h)->chunk_limit) \
 508+ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0))
 509+
 510+# define obstack_grow(h,where,length) \
 511+( (h)->temp = (length), \
 512+ (((h)->next_free + (h)->temp > (h)->chunk_limit) \
 513+ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
 514+ _obstack_memcpy ((h)->next_free, (char *) (where), (h)->temp), \
 515+ (h)->next_free += (h)->temp)
 516+
 517+# define obstack_grow0(h,where,length) \
 518+( (h)->temp = (length), \
 519+ (((h)->next_free + (h)->temp + 1 > (h)->chunk_limit) \
 520+ ? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0), \
 521+ _obstack_memcpy ((h)->next_free, (char *) (where), (h)->temp), \
 522+ (h)->next_free += (h)->temp, \
 523+ *((h)->next_free)++ = 0)
 524+
 525+# define obstack_1grow(h,datum) \
 526+( (((h)->next_free + 1 > (h)->chunk_limit) \
 527+ ? (_obstack_newchunk ((h), 1), 0) : 0), \
 528+ (*((h)->next_free)++ = (datum)))
 529+
 530+# define obstack_ptr_grow(h,datum) \
 531+( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \
 532+ ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \
 533+ (*((char **) (((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *) datum)))
 534+
 535+# define obstack_int_grow(h,datum) \
 536+( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \
 537+ ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \
 538+ (*((int *) (((h)->next_free+=sizeof(int))-sizeof(int))) = ((int) datum)))
 539+
 540+# define obstack_ptr_grow_fast(h,aptr) (*((char **) (h)->next_free)++ = (char *) aptr)
 541+# define obstack_int_grow_fast(h,aint) (*((int *) (h)->next_free)++ = (int) aint)
 542+
 543+# define obstack_blank(h,length) \
 544+( (h)->temp = (length), \
 545+ (((h)->chunk_limit - (h)->next_free < (h)->temp) \
 546+ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
 547+ ((h)->next_free += (h)->temp))
 548+
 549+# define obstack_alloc(h,length) \
 550+ (obstack_blank ((h), (length)), obstack_finish ((h)))
 551+
 552+# define obstack_copy(h,where,length) \
 553+ (obstack_grow ((h), (where), (length)), obstack_finish ((h)))
 554+
 555+# define obstack_copy0(h,where,length) \
 556+ (obstack_grow0 ((h), (where), (length)), obstack_finish ((h)))
 557+
 558+# define obstack_finish(h) \
 559+( ((h)->next_free == (h)->object_base \
 560+ ? (((h)->maybe_empty_object = 1), 0) \
 561+ : 0), \
 562+ (h)->temp = __PTR_TO_INT ((h)->object_base), \
 563+ (h)->next_free \
 564+ = __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask) \
 565+ & ~ ((h)->alignment_mask)), \
 566+ (((h)->next_free - (char *) (h)->chunk \
 567+ > (h)->chunk_limit - (char *) (h)->chunk) \
 568+ ? ((h)->next_free = (h)->chunk_limit) : 0), \
 569+ (h)->object_base = (h)->next_free, \
 570+ __INT_TO_PTR ((h)->temp))
 571+
 572+# if defined __STDC__ && __STDC__
 573+# define obstack_free(h,obj) \
 574+( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \
 575+ (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
 576+ ? (int) ((h)->next_free = (h)->object_base \
 577+ = (h)->temp + (char *) (h)->chunk) \
 578+ : (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0)))
 579+# else
 580+# define obstack_free(h,obj) \
 581+( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \
 582+ (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
 583+ ? (int) ((h)->next_free = (h)->object_base \
 584+ = (h)->temp + (char *) (h)->chunk) \
 585+ : (_obstack_free ((h), (h)->temp + (char *) (h)->chunk), 0)))
 586+# endif
 587+
 588+#endif /* not __GNUC__ or not __STDC__ */
 589+
 590+#ifdef __cplusplus
 591+} /* C++ */
 592+#endif
 593+
 594+#endif /* obstack.h */
Property changes on: trunk/extensions/FastStringSearch/obstack.h
___________________________________________________________________
Added: svn:eol-style
1595 + native
Index: trunk/extensions/FastStringSearch/CREDITS
@@ -0,0 +1,2 @@
 2+fss
 3+Tim Starling
Property changes on: trunk/extensions/FastStringSearch/CREDITS
___________________________________________________________________
Added: svn:eol-style
14 + native
Index: trunk/extensions/FastStringSearch/EXPERIMENTAL
Property changes on: trunk/extensions/FastStringSearch/EXPERIMENTAL
___________________________________________________________________
Added: svn:eol-style
25 + native
Index: trunk/extensions/FastStringSearch/README
@@ -0,0 +1,52 @@
 2+This is a PHP extension for fast string search and replace. It is used by
 3+LangaugeConverter.php. It supports multiple search terms. We use it as a
 4+replacement for PHP's strtr, which is extremely slow in certain cases.
 5+Chinese script conversion is one of those cases. This extension uses a
 6+Commentz-Walter style algorithm for multiple search terms, or a Boyer-Moore
 7+algorithm for single search terms.
 8+
 9+Several source files were taken from GNU grep, and are under the GNU General
 10+Public License. The PHP license is incompatible, so a PHP binary containing this
 11+extension is probably not redistributable under any license. You can use such a
 12+build privately, however. The source files can be distributed under their
 13+respective licenses.
 14+
 15+The interface synopsis is as follows. To prepare a string search:
 16+
 17+$fss = fss_prep_search( 'Hello' );
 18+
 19+or
 20+
 21+$fss = fss_prep_search( array( 'Hello', 'Hi' ) );
 22+
 23+To search a string, pass the previously prepared object to fss_exec_search,
 24+along with the subject string (the "haystack"):
 25+
 26+$result = fss_exec_search( $fss, 'xxx Hello xxx' );
 27+
 28+This will return an array with the first element being the string offset of the
 29+match, and the second element being the length of the match. If no matches are
 30+found, false is returned. The first match is returned, and if multiple search
 31+strings match at the same location, the longest will be returned. This function
 32+also accepts an optional third parameter giving the offset at which to start the
 33+search.
 34+
 35+Replacements are performed like this:
 36+
 37+$fss = fss_prep_replace( array( 'from' => 'to' ) );
 38+$text = fss_exec_replace( $fss, $text );
 39+
 40+The interpretation of the replacement array is exactly the same as in strtr: the
 41+longest match is always used, and parts of the string which have already been
 42+replaced are not processed again.
 43+
 44+You can free an FSS result with:
 45+
 46+fss_free( $fss );
 47+
 48+This is not generally necessary, since PHP will clean up the memory when all
 49+references are released.
 50+
 51+If you use an FSS object prepared with fss_prep_search() in fss_exec_replace(),
 52+all strings matched will be removed, i.e. replaced by an empty string.
 53+
Property changes on: trunk/extensions/FastStringSearch/README
___________________________________________________________________
Added: svn:eol-style
154 + native
Index: trunk/extensions/FastStringSearch/kwset.c
@@ -0,0 +1,786 @@
 2+/* kwset.c - search for any of a set of keywords.
 3+ Copyright 1989, 1998, 2000, 2005 Free Software Foundation, Inc.
 4+
 5+ This program is free software; you can redistribute it and/or modify
 6+ it under the terms of the GNU General Public License as published by
 7+ the Free Software Foundation; either version 2, or (at your option)
 8+ any later version.
 9+
 10+ This program is distributed in the hope that it will be useful,
 11+ but WITHOUT ANY WARRANTY; without even the implied warranty of
 12+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13+ GNU General Public License for more details.
 14+
 15+ You should have received a copy of the GNU General Public License
 16+ along with this program; if not, write to the Free Software
 17+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
 18+ 02110-1301, USA. */
 19+
 20+/* Written August 1989 by Mike Haertel.
 21+ The author may be reached (Email) at the address mike@ai.mit.edu,
 22+ or (US mail) as Mike Haertel c/o Free Software Foundation. */
 23+
 24+/* The algorithm implemented by these routines bears a startling resemblence
 25+ to one discovered by Beate Commentz-Walter, although it is not identical.
 26+ See "A String Matching Algorithm Fast on the Average," Technical Report,
 27+ IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
 28+ Heidelberg, Germany. See also Aho, A.V., and M. Corasick, "Efficient
 29+ String Matching: An Aid to Bibliographic Search," CACM June 1975,
 30+ Vol. 18, No. 6, which describes the failure function used below. */
 31+
 32+#ifdef HAVE_CONFIG_H
 33+# include <config.h>
 34+#endif
 35+#include <sys/types.h>
 36+#include <limits.h>
 37+#include "php.h"
 38+#include "kwset.h"
 39+#include "obstack.h"
 40+
 41+void *kws_emalloc_wrapper(size_t size);
 42+void kws_efree_wrapper(void *ptr);
 43+
 44+#define NCHAR (UCHAR_MAX + 1)
 45+#define obstack_chunk_alloc kws_emalloc_wrapper
 46+#define obstack_chunk_free kws_efree_wrapper
 47+
 48+#define U(c) ((unsigned char) (c))
 49+
 50+/* Balanced tree of edges and labels leaving a given trie node. */
 51+struct tree
 52+{
 53+ struct tree *llink; /* Left link; MUST be first field. */
 54+ struct tree *rlink; /* Right link (to larger labels). */
 55+ struct trie *trie; /* Trie node pointed to by this edge. */
 56+ unsigned char label; /* Label on this edge. */
 57+ char balance; /* Difference in depths of subtrees. */
 58+};
 59+
 60+/* Node of a trie representing a set of reversed keywords. */
 61+struct trie
 62+{
 63+ unsigned int accepting; /* Word index of accepted word, or zero. */
 64+ struct tree *links; /* Tree of edges leaving this node. */
 65+ struct trie *parent; /* Parent of this node. */
 66+ struct trie *next; /* List of all trie nodes in level order. */
 67+ struct trie *fail; /* Aho-Corasick failure function. */
 68+ int depth; /* Depth of this node from the root. */
 69+ int shift; /* Shift function for search failures. */
 70+ int maxshift; /* Max shift of self and descendents. */
 71+};
 72+
 73+/* Structure returned opaquely to the caller, containing everything. */
 74+struct kwset
 75+{
 76+ struct obstack obstack; /* Obstack for node allocation. */
 77+ int words; /* Number of words in the trie. */
 78+ struct trie *trie; /* The trie itself. */
 79+ int mind; /* Minimum depth of an accepting node. */
 80+ int maxd; /* Maximum depth of any node. */
 81+ unsigned char delta[NCHAR]; /* Delta table for rapid search. */
 82+ struct trie *next[NCHAR]; /* Table of children of the root. */
 83+ char *target; /* Target string if there's only one. */
 84+ int mind2; /* Used in Boyer-Moore search for one string. */
 85+ char const *trans; /* Character translation table. */
 86+};
 87+
 88+void *kws_emalloc_wrapper(size_t size)
 89+{
 90+ return emalloc(size);
 91+}
 92+
 93+void kws_efree_wrapper(void *ptr)
 94+{
 95+ efree(ptr);
 96+}
 97+
 98+/* Allocate and initialize a keyword set object, returning an opaque
 99+ pointer to it. Return NULL if memory is not available. */
 100+kwset_t
 101+kwsalloc (char const *trans)
 102+{
 103+ struct kwset *kwset;
 104+
 105+ kwset = (struct kwset *) kws_emalloc_wrapper(sizeof (struct kwset));
 106+ if (!kwset)
 107+ return NULL;
 108+
 109+ obstack_init(&kwset->obstack);
 110+ kwset->words = 0;
 111+ kwset->trie
 112+ = (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie));
 113+ if (!kwset->trie)
 114+ {
 115+ kwsfree((kwset_t) kwset);
 116+ return NULL;
 117+ }
 118+ kwset->trie->accepting = 0;
 119+ kwset->trie->links = NULL;
 120+ kwset->trie->parent = NULL;
 121+ kwset->trie->next = NULL;
 122+ kwset->trie->fail = NULL;
 123+ kwset->trie->depth = 0;
 124+ kwset->trie->shift = 0;
 125+ kwset->mind = INT_MAX;
 126+ kwset->maxd = -1;
 127+ kwset->target = NULL;
 128+ kwset->trans = trans;
 129+
 130+ return (kwset_t) kwset;
 131+}
 132+
 133+/* This upper bound is valid for CHAR_BIT >= 4 and
 134+ exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */
 135+#define DEPTH_SIZE (CHAR_BIT + CHAR_BIT/2)
 136+
 137+/* Add the given string to the contents of the keyword set. Return NULL
 138+ for success, an error message otherwise. */
 139+const char *
 140+kwsincr (kwset_t kws, char const *text, size_t len)
 141+{
 142+ struct kwset *kwset;
 143+ register struct trie *trie;
 144+ register unsigned char label;
 145+ register struct tree *link;
 146+ register int depth;
 147+ struct tree *links[DEPTH_SIZE];
 148+ enum { L, R } dirs[DEPTH_SIZE];
 149+ struct tree *t, *r, *l, *rl, *lr;
 150+
 151+ kwset = (struct kwset *) kws;
 152+ trie = kwset->trie;
 153+ text += len;
 154+
 155+ /* Descend the trie (built of reversed keywords) character-by-character,
 156+ installing new nodes when necessary. */
 157+ while (len--)
 158+ {
 159+ label = kwset->trans ? kwset->trans[U(*--text)] : *--text;
 160+
 161+ /* Descend the tree of outgoing links for this trie node,
 162+ looking for the current character and keeping track
 163+ of the path followed. */
 164+ link = trie->links;
 165+ links[0] = (struct tree *) &trie->links;
 166+ dirs[0] = L;
 167+ depth = 1;
 168+
 169+ while (link && label != link->label)
 170+ {
 171+ links[depth] = link;
 172+ if (label < link->label)
 173+ dirs[depth++] = L, link = link->llink;
 174+ else
 175+ dirs[depth++] = R, link = link->rlink;
 176+ }
 177+
 178+ /* The current character doesn't have an outgoing link at
 179+ this trie node, so build a new trie node and install
 180+ a link in the current trie node's tree. */
 181+ if (!link)
 182+ {
 183+ link = (struct tree *) obstack_alloc(&kwset->obstack,
 184+ sizeof (struct tree));
 185+ if (!link)
 186+ return "memory exhausted";
 187+ link->llink = NULL;
 188+ link->rlink = NULL;
 189+ link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
 190+ sizeof (struct trie));
 191+ if (!link->trie)
 192+ {
 193+ obstack_free(&kwset->obstack, link);
 194+ return "memory exhausted";
 195+ }
 196+ link->trie->accepting = 0;
 197+ link->trie->links = NULL;
 198+ link->trie->parent = trie;
 199+ link->trie->next = NULL;
 200+ link->trie->fail = NULL;
 201+ link->trie->depth = trie->depth + 1;
 202+ link->trie->shift = 0;
 203+ link->label = label;
 204+ link->balance = 0;
 205+
 206+ /* Install the new tree node in its parent. */
 207+ if (dirs[--depth] == L)
 208+ links[depth]->llink = link;
 209+ else
 210+ links[depth]->rlink = link;
 211+
 212+ /* Back up the tree fixing the balance flags. */
 213+ while (depth && !links[depth]->balance)
 214+ {
 215+ if (dirs[depth] == L)
 216+ --links[depth]->balance;
 217+ else
 218+ ++links[depth]->balance;
 219+ --depth;
 220+ }
 221+
 222+ /* Rebalance the tree by pointer rotations if necessary. */
 223+ if (depth && ((dirs[depth] == L && --links[depth]->balance)
 224+ || (dirs[depth] == R && ++links[depth]->balance)))
 225+ {
 226+ switch (links[depth]->balance)
 227+ {
 228+ case (char) -2:
 229+ switch (dirs[depth + 1])
 230+ {
 231+ case L:
 232+ r = links[depth], t = r->llink, rl = t->rlink;
 233+ t->rlink = r, r->llink = rl;
 234+ t->balance = r->balance = 0;
 235+ break;
 236+ case R:
 237+ r = links[depth], l = r->llink, t = l->rlink;
 238+ rl = t->rlink, lr = t->llink;
 239+ t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
 240+ l->balance = t->balance != 1 ? 0 : -1;
 241+ r->balance = t->balance != (char) -1 ? 0 : 1;
 242+ t->balance = 0;
 243+ break;
 244+ default:
 245+ abort ();
 246+ }
 247+ break;
 248+ case 2:
 249+ switch (dirs[depth + 1])
 250+ {
 251+ case R:
 252+ l = links[depth], t = l->rlink, lr = t->llink;
 253+ t->llink = l, l->rlink = lr;
 254+ t->balance = l->balance = 0;
 255+ break;
 256+ case L:
 257+ l = links[depth], r = l->rlink, t = r->llink;
 258+ lr = t->llink, rl = t->rlink;
 259+ t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
 260+ l->balance = t->balance != 1 ? 0 : -1;
 261+ r->balance = t->balance != (char) -1 ? 0 : 1;
 262+ t->balance = 0;
 263+ break;
 264+ default:
 265+ abort ();
 266+ }
 267+ break;
 268+ default:
 269+ abort ();
 270+ }
 271+
 272+ if (dirs[depth - 1] == L)
 273+ links[depth - 1]->llink = t;
 274+ else
 275+ links[depth - 1]->rlink = t;
 276+ }
 277+ }
 278+
 279+ trie = link->trie;
 280+ }
 281+
 282+ /* Mark the node we finally reached as accepting, encoding the
 283+ index number of this word in the keyword set so far. */
 284+ if (!trie->accepting)
 285+ trie->accepting = 1 + 2 * kwset->words;
 286+ ++kwset->words;
 287+
 288+ /* Keep track of the longest and shortest string of the keyword set. */
 289+ if (trie->depth < kwset->mind)
 290+ kwset->mind = trie->depth;
 291+ if (trie->depth > kwset->maxd)
 292+ kwset->maxd = trie->depth;
 293+
 294+ return NULL;
 295+}
 296+
 297+/* Enqueue the trie nodes referenced from the given tree in the
 298+ given queue. */
 299+static void
 300+enqueue (struct tree *tree, struct trie **last)
 301+{
 302+ if (!tree)
 303+ return;
 304+ enqueue(tree->llink, last);
 305+ enqueue(tree->rlink, last);
 306+ (*last) = (*last)->next = tree->trie;
 307+}
 308+
 309+/* Compute the Aho-Corasick failure function for the trie nodes referenced
 310+ from the given tree, given the failure function for their parent as
 311+ well as a last resort failure node. */
 312+static void
 313+treefails (register struct tree const *tree, struct trie const *fail,
 314+ struct trie *recourse)
 315+{
 316+ register struct tree *link;
 317+
 318+ if (!tree)
 319+ return;
 320+
 321+ treefails(tree->llink, fail, recourse);
 322+ treefails(tree->rlink, fail, recourse);
 323+
 324+ /* Find, in the chain of fails going back to the root, the first
 325+ node that has a descendent on the current label. */
 326+ while (fail)
 327+ {
 328+ link = fail->links;
 329+ while (link && tree->label != link->label)
 330+ if (tree->label < link->label)
 331+ link = link->llink;
 332+ else
 333+ link = link->rlink;
 334+ if (link)
 335+ {
 336+ tree->trie->fail = link->trie;
 337+ return;
 338+ }
 339+ fail = fail->fail;
 340+ }
 341+
 342+ tree->trie->fail = recourse;
 343+}
 344+
 345+/* Set delta entries for the links of the given tree such that
 346+ the preexisting delta value is larger than the current depth. */
 347+static void
 348+treedelta (register struct tree const *tree,
 349+ register unsigned int depth,
 350+ unsigned char delta[])
 351+{
 352+ if (!tree)
 353+ return;
 354+ treedelta(tree->llink, depth, delta);
 355+ treedelta(tree->rlink, depth, delta);
 356+ if (depth < delta[tree->label])
 357+ delta[tree->label] = depth;
 358+}
 359+
 360+/* Return true if A has every label in B. */
 361+static int
 362+hasevery (register struct tree const *a, register struct tree const *b)
 363+{
 364+ if (!b)
 365+ return 1;
 366+ if (!hasevery(a, b->llink))
 367+ return 0;
 368+ if (!hasevery(a, b->rlink))
 369+ return 0;
 370+ while (a && b->label != a->label)
 371+ if (b->label < a->label)
 372+ a = a->llink;
 373+ else
 374+ a = a->rlink;
 375+ return !!a;
 376+}
 377+
 378+/* Compute a vector, indexed by character code, of the trie nodes
 379+ referenced from the given tree. */
 380+static void
 381+treenext (struct tree const *tree, struct trie *next[])
 382+{
 383+ if (!tree)
 384+ return;
 385+ treenext(tree->llink, next);
 386+ treenext(tree->rlink, next);
 387+ next[tree->label] = tree->trie;
 388+}
 389+
 390+/* Compute the shift for each trie node, as well as the delta
 391+ table and next cache for the given keyword set. */
 392+const char *
 393+kwsprep (kwset_t kws)
 394+{
 395+ register struct kwset *kwset;
 396+ register int i;
 397+ register struct trie *curr;
 398+ register char const *trans;
 399+ unsigned char delta[NCHAR];
 400+
 401+ kwset = (struct kwset *) kws;
 402+
 403+ /* Initial values for the delta table; will be changed later. The
 404+ delta entry for a given character is the smallest depth of any
 405+ node at which an outgoing edge is labeled by that character. */
 406+ memset(delta, kwset->mind < UCHAR_MAX ? kwset->mind : UCHAR_MAX, NCHAR);
 407+
 408+ /* Check if we can use the simple boyer-moore algorithm, instead
 409+ of the hairy commentz-walter algorithm. */
 410+ if (kwset->words == 1 && kwset->trans == NULL)
 411+ {
 412+ char c;
 413+
 414+ /* Looking for just one string. Extract it from the trie. */
 415+ kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
 416+ if (!kwset->target)
 417+ return "memory exhausted";
 418+ for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
 419+ {
 420+ kwset->target[i] = curr->links->label;
 421+ curr = curr->links->trie;
 422+ }
 423+ /* Build the Boyer Moore delta. Boy that's easy compared to CW. */
 424+ for (i = 0; i < kwset->mind; ++i)
 425+ delta[U(kwset->target[i])] = kwset->mind - (i + 1);
 426+ /* Find the minimal delta2 shift that we might make after
 427+ a backwards match has failed. */
 428+ c = kwset->target[kwset->mind - 1];
 429+ for (i = kwset->mind - 2; i >= 0; --i)
 430+ if (kwset->target[i] == c)
 431+ break;
 432+ kwset->mind2 = kwset->mind - (i + 1);
 433+ }
 434+ else
 435+ {
 436+ register struct trie *fail;
 437+ struct trie *last, *next[NCHAR];
 438+
 439+ /* Traverse the nodes of the trie in level order, simultaneously
 440+ computing the delta table, failure function, and shift function. */
 441+ for (curr = last = kwset->trie; curr; curr = curr->next)
 442+ {
 443+ /* Enqueue the immediate descendents in the level order queue. */
 444+ enqueue(curr->links, &last);
 445+
 446+ curr->shift = kwset->mind;
 447+ curr->maxshift = kwset->mind;
 448+
 449+ /* Update the delta table for the descendents of this node. */
 450+ treedelta(curr->links, curr->depth, delta);
 451+
 452+ /* Compute the failure function for the decendents of this node. */
 453+ treefails(curr->links, curr->fail, kwset->trie);
 454+
 455+ /* Update the shifts at each node in the current node's chain
 456+ of fails back to the root. */
 457+ for (fail = curr->fail; fail; fail = fail->fail)
 458+ {
 459+ /* If the current node has some outgoing edge that the fail
 460+ doesn't, then the shift at the fail should be no larger
 461+ than the difference of their depths. */
 462+ if (!hasevery(fail->links, curr->links))
 463+ if (curr->depth - fail->depth < fail->shift)
 464+ fail->shift = curr->depth - fail->depth;
 465+
 466+ /* If the current node is accepting then the shift at the
 467+ fail and its descendents should be no larger than the
 468+ difference of their depths. */
 469+ if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
 470+ fail->maxshift = curr->depth - fail->depth;
 471+ }
 472+ }
 473+
 474+ /* Traverse the trie in level order again, fixing up all nodes whose
 475+ shift exceeds their inherited maxshift. */
 476+ for (curr = kwset->trie->next; curr; curr = curr->next)
 477+ {
 478+ if (curr->maxshift > curr->parent->maxshift)
 479+ curr->maxshift = curr->parent->maxshift;
 480+ if (curr->shift > curr->maxshift)
 481+ curr->shift = curr->maxshift;
 482+ }
 483+
 484+ /* Create a vector, indexed by character code, of the outgoing links
 485+ from the root node. */
 486+ for (i = 0; i < NCHAR; ++i)
 487+ next[i] = NULL;
 488+ treenext(kwset->trie->links, next);
 489+
 490+ if ((trans = kwset->trans) != NULL)
 491+ for (i = 0; i < NCHAR; ++i)
 492+ kwset->next[i] = next[U(trans[i])];
 493+ else
 494+ memcpy(kwset->next, next, NCHAR * sizeof(struct trie *));
 495+ }
 496+
 497+ /* Fix things up for any translation table. */
 498+ if ((trans = kwset->trans) != NULL)
 499+ for (i = 0; i < NCHAR; ++i)
 500+ kwset->delta[i] = delta[U(trans[i])];
 501+ else
 502+ memcpy(kwset->delta, delta, NCHAR);
 503+
 504+ return NULL;
 505+}
 506+
 507+/* Fast boyer-moore search. */
 508+static size_t
 509+bmexec (kwset_t kws, char const *text, size_t size)
 510+{
 511+ struct kwset const *kwset;
 512+ register unsigned char const *d1;
 513+ register char const *ep, *sp, *tp;
 514+ register int d, gc, i, len, md2;
 515+
 516+ kwset = (struct kwset const *) kws;
 517+ len = kwset->mind;
 518+
 519+ if (len == 0)
 520+ return 0;
 521+ if (len > size)
 522+ return -1;
 523+ if (len == 1)
 524+ {
 525+ tp = memchr (text, kwset->target[0], size);
 526+ return tp ? tp - text : -1;
 527+ }
 528+
 529+ d1 = kwset->delta;
 530+ sp = kwset->target + len;
 531+ gc = U(sp[-2]);
 532+ md2 = kwset->mind2;
 533+ tp = text + len;
 534+
 535+ /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
 536+ if (size > 12 * len)
 537+ /* 11 is not a bug, the initial offset happens only once. */
 538+ for (ep = text + size - 11 * len;;)
 539+ {
 540+ while (tp <= ep)
 541+ {
 542+ d = d1[U(tp[-1])], tp += d;
 543+ d = d1[U(tp[-1])], tp += d;
 544+ if (d == 0)
 545+ goto found;
 546+ d = d1[U(tp[-1])], tp += d;
 547+ d = d1[U(tp[-1])], tp += d;
 548+ d = d1[U(tp[-1])], tp += d;
 549+ if (d == 0)
 550+ goto found;
 551+ d = d1[U(tp[-1])], tp += d;
 552+ d = d1[U(tp[-1])], tp += d;
 553+ d = d1[U(tp[-1])], tp += d;
 554+ if (d == 0)
 555+ goto found;
 556+ d = d1[U(tp[-1])], tp += d;
 557+ d = d1[U(tp[-1])], tp += d;
 558+ }
 559+ break;
 560+ found:
 561+ if (U(tp[-2]) == gc)
 562+ {
 563+ for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
 564+ ;
 565+ if (i > len)
 566+ return tp - len - text;
 567+ }
 568+ tp += md2;
 569+ }
 570+
 571+ /* Now we have only a few characters left to search. We
 572+ carefully avoid ever producing an out-of-bounds pointer. */
 573+ ep = text + size;
 574+ d = d1[U(tp[-1])];
 575+ while (d <= ep - tp)
 576+ {
 577+ d = d1[U((tp += d)[-1])];
 578+ if (d != 0)
 579+ continue;
 580+ if (U(tp[-2]) == gc)
 581+ {
 582+ for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
 583+ ;
 584+ if (i > len)
 585+ return tp - len - text;
 586+ }
 587+ d = md2;
 588+ }
 589+
 590+ return -1;
 591+}
 592+
 593+/* Hairy multiple string search. */
 594+static size_t
 595+cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch)
 596+{
 597+ struct kwset const *kwset;
 598+ struct trie * const *next;
 599+ struct trie const *trie;
 600+ struct trie const *accept;
 601+ char const *beg, *lim, *mch, *lmch;
 602+ register unsigned char c;
 603+ register unsigned char const *delta;
 604+ register int d;
 605+ register char const *end, *qlim;
 606+ register struct tree const *tree;
 607+ register char const *trans;
 608+
 609+#ifdef lint
 610+ accept = NULL;
 611+#endif
 612+
 613+ /* Initialize register copies and look for easy ways out. */
 614+ kwset = (struct kwset *) kws;
 615+ if (len < kwset->mind)
 616+ return -1;
 617+ next = kwset->next;
 618+ delta = kwset->delta;
 619+ trans = kwset->trans;
 620+ lim = text + len;
 621+ end = text;
 622+ if ((d = kwset->mind) != 0)
 623+ mch = NULL;
 624+ else
 625+ {
 626+ mch = text, accept = kwset->trie;
 627+ goto match;
 628+ }
 629+
 630+ if (len >= 4 * kwset->mind)
 631+ qlim = lim - 4 * kwset->mind;
 632+ else
 633+ qlim = NULL;
 634+
 635+ while (lim - end >= d)
 636+ {
 637+ if (qlim && end <= qlim)
 638+ {
 639+ end += d - 1;
 640+ while ((d = delta[c = *end]) && end < qlim)
 641+ {
 642+ end += d;
 643+ end += delta[U(*end)];
 644+ end += delta[U(*end)];
 645+ }
 646+ ++end;
 647+ }
 648+ else
 649+ d = delta[c = (end += d)[-1]];
 650+ if (d)
 651+ continue;
 652+ beg = end - 1;
 653+ trie = next[c];
 654+ if (trie->accepting)
 655+ {
 656+ mch = beg;
 657+ accept = trie;
 658+ }
 659+ d = trie->shift;
 660+ while (beg > text)
 661+ {
 662+ c = trans ? trans[U(*--beg)] : *--beg;
 663+ tree = trie->links;
 664+ while (tree && c != tree->label)
 665+ if (c < tree->label)
 666+ tree = tree->llink;
 667+ else
 668+ tree = tree->rlink;
 669+ if (tree)
 670+ {
 671+ trie = tree->trie;
 672+ if (trie->accepting)
 673+ {
 674+ mch = beg;
 675+ accept = trie;
 676+ }
 677+ }
 678+ else
 679+ break;
 680+ d = trie->shift;
 681+ }
 682+ if (mch)
 683+ goto match;
 684+ }
 685+ return -1;
 686+
 687+ match:
 688+ /* Given a known match, find the longest possible match anchored
 689+ at or before its starting point. This is nearly a verbatim
 690+ copy of the preceding main search loops. */
 691+ if (lim - mch > kwset->maxd)
 692+ lim = mch + kwset->maxd;
 693+ lmch = 0;
 694+ d = 1;
 695+ while (lim - end >= d)
 696+ {
 697+ if ((d = delta[c = (end += d)[-1]]) != 0)
 698+ continue;
 699+ beg = end - 1;
 700+ if (!(trie = next[c]))
 701+ {
 702+ d = 1;
 703+ continue;
 704+ }
 705+ if (trie->accepting && beg <= mch)
 706+ {
 707+ lmch = beg;
 708+ accept = trie;
 709+ }
 710+ d = trie->shift;
 711+ while (beg > text)
 712+ {
 713+ c = trans ? trans[U(*--beg)] : *--beg;
 714+ tree = trie->links;
 715+ while (tree && c != tree->label)
 716+ if (c < tree->label)
 717+ tree = tree->llink;
 718+ else
 719+ tree = tree->rlink;
 720+ if (tree)
 721+ {
 722+ trie = tree->trie;
 723+ if (trie->accepting && beg <= mch)
 724+ {
 725+ lmch = beg;
 726+ accept = trie;
 727+ }
 728+ }
 729+ else
 730+ break;
 731+ d = trie->shift;
 732+ }
 733+ if (lmch)
 734+ {
 735+ mch = lmch;
 736+ goto match;
 737+ }
 738+ if (!d)
 739+ d = 1;
 740+ }
 741+
 742+ if (kwsmatch)
 743+ {
 744+ kwsmatch->index = accept->accepting / 2;
 745+ kwsmatch->offset[0] = mch - text;
 746+ kwsmatch->size[0] = accept->depth;
 747+ }
 748+ return mch - text;
 749+}
 750+
 751+/* Search through the given text for a match of any member of the
 752+ given keyword set. Return a pointer to the first character of
 753+ the matching substring, or NULL if no match is found. If FOUNDLEN
 754+ is non-NULL store in the referenced location the length of the
 755+ matching substring. Similarly, if FOUNDIDX is non-NULL, store
 756+ in the referenced location the index number of the particular
 757+ keyword matched. */
 758+size_t
 759+kwsexec (kwset_t kws, char const *text, size_t size,
 760+ struct kwsmatch *kwsmatch)
 761+{
 762+ struct kwset const *kwset = (struct kwset *) kws;
 763+ if (kwset->words == 1 && kwset->trans == NULL)
 764+ {
 765+ size_t ret = bmexec (kws, text, size);
 766+ if (kwsmatch != NULL && ret != (size_t) -1)
 767+ {
 768+ kwsmatch->index = 0;
 769+ kwsmatch->offset[0] = ret;
 770+ kwsmatch->size[0] = kwset->mind;
 771+ }
 772+ return ret;
 773+ }
 774+ else
 775+ return cwexec(kws, text, size, kwsmatch);
 776+}
 777+
 778+/* Free the components of the given keyword set. */
 779+void
 780+kwsfree (kwset_t kws)
 781+{
 782+ struct kwset *kwset;
 783+
 784+ kwset = (struct kwset *) kws;
 785+ obstack_free(&kwset->obstack, NULL);
 786+ kws_efree_wrapper(kws);
 787+}
Property changes on: trunk/extensions/FastStringSearch/kwset.c
___________________________________________________________________
Added: svn:eol-style
1788 + native
Property changes on: trunk/extensions/FastStringSearch
___________________________________________________________________
Added: svn:ignore
2789 + .deps
*.lo
*.la