r45936 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r45935‎ | r45936 | r45937 >
Date:22:12, 20 January 2009
Author:werdna
Status:deferred
Tags:
Comment:
Revert to r39167. This is the last version I can find which compiles correctly.
Modified paths:
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/README (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/affunctions.h (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/afstring.h (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/aftypes.h (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/ast.h (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/check.cpp (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/equiv.h (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/evaluate.cpp (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/expr.cpp (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/filter_evaluator.h (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/fray.cc (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/fray.h (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/include/datum/conversion.h (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/include/datum/create.h (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/include/datum/operators.h (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/include/datum/visitors.h (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/makefile (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/parser.h (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/parserdefs.h (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/request.cpp (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/request.h (added) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/syntax_check.cpp (modified) (history)
  • /branches/change-tagging/extensions/AbuseFilter/parser_native/xml.cpp (modified) (history)

Diff [purge]

Index: branches/change-tagging/extensions/AbuseFilter/parser_native/filter_evaluator.h
@@ -0,0 +1,89 @@
 2+/*
 3+ * Copyright (c) 2008 Andrew Garrett.
 4+ * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 5+ * Derived from public domain code contributed by Victor Vasiliev.
 6+ *
 7+ * Permission is granted to anyone to use this software for any purpose,
 8+ * including commercial applications, and to alter it and redistribute it
 9+ * freely. This software is provided 'as-is', without any express or
 10+ * implied warranty.
 11+ */
 12+
 13+#ifndef FILTER_EVALUATOR_H
 14+#define FILTER_EVALUATOR_H
 15+
 16+#include <string>
 17+#include <map>
 18+
 19+#include <unicode/uchar.h>
 20+
 21+#include "aftypes.h"
 22+#include "parser.h"
 23+#include "affunctions.h"
 24+
 25+namespace afp {
 26+
 27+template<typename charT>
 28+struct basic_filter_evaluator {
 29+ basic_filter_evaluator();
 30+
 31+ bool evaluate(basic_fray<charT> const &filter) const;
 32+
 33+ void add_variable(
 34+ basic_fray<charT> const &key,
 35+ basic_datum<charT> value);
 36+
 37+ void clear_variables();
 38+
 39+private:
 40+ basic_expressor<charT> e;
 41+};
 42+
 43+typedef basic_filter_evaluator<char> filter_evaluator;
 44+typedef basic_filter_evaluator<UChar32> u32filter_evaluator;
 45+
 46+template<typename charT>
 47+basic_filter_evaluator<charT>::basic_filter_evaluator()
 48+{
 49+ e.add_function(make_astring<charT, char>("length"), af_length<charT>);
 50+ e.add_function(make_astring<charT, char>("lcase"), af_lcase<charT>);
 51+ e.add_function(make_astring<charT, char>("ccnorm"), af_ccnorm<charT>);
 52+ e.add_function(make_astring<charT, char>("rmdoubles"), af_rmdoubles<charT>);
 53+ e.add_function(make_astring<charT, char>("specialratio"), af_specialratio<charT>);
 54+ e.add_function(make_astring<charT, char>("rmspecials"), af_rmspecials<charT>);
 55+ e.add_function(make_astring<charT, char>("norm"), af_norm<charT>);
 56+ e.add_function(make_astring<charT, char>("count"), af_count<charT>);
 57+}
 58+
 59+template<typename charT>
 60+void
 61+basic_filter_evaluator<charT>::clear_variables()
 62+{
 63+ e.clear_variables();
 64+}
 65+
 66+template<typename charT>
 67+bool
 68+basic_filter_evaluator<charT>::evaluate(
 69+ basic_fray<charT> const &filter) const
 70+{
 71+ try {
 72+ return e.evaluate(filter).toBool();
 73+ } catch (std::exception &e) {
 74+ std::cerr << "can't evaluate filter: " << e.what() << '\n';
 75+ return false;
 76+ }
 77+}
 78+
 79+template<typename charT>
 80+void
 81+basic_filter_evaluator<charT>::add_variable(
 82+ basic_fray<charT> const &key,
 83+ basic_datum<charT> value)
 84+{
 85+ e.add_variable(key, value);
 86+}
 87+
 88+} // namespace afp
 89+
 90+#endif /* !FILTER_EVALUATOR_H */
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/README
@@ -8,14 +8,10 @@
99 * Debian Linux (Intel Xeon, AMD Opteron) with GCC 4.3.1
1010 * Debian Linux (Intel Xeon) with Intel C++ 10.1.015
1111
12 -parser_native requires Boost (www.boost.org) 1.35.0 or higher. Earlier versions
 12+parser_filter requires Boost (www.boost.org) 1.35.0 or higher. Earlier versions
1313 will generate a parser which does not work.
1414
15 -When compiled with GCC 4.3.1 (and perhaps later, but not earlier), Boost 1.35
16 -will not work due to a bug in the date_time library. Use GCC 4.2 or earlier
17 -instead of this compiler.
18 -
19 -parser_native also requires GNU MP (gmplib.org), with C++ support enabled
 15+parser_filter also requires GNU MP (gmplib.org), with C++ support enabled
2016 (--enable-cxx).
2117
2218 To compile it, copy makefile.config.example to makefile.config, edit it, and
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/check.cpp
@@ -14,7 +14,7 @@
1515 #include "afstring.h"
1616
1717 int main( int argc, char** argv ) {
18 - afp::filter_evaluator f;
 18+ afp::u32filter_evaluator f;
1919 bool result = false;
2020
2121 for(int i=0;i<=100;i++) {
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/expr.cpp
@@ -37,7 +37,7 @@
3838 return 1;
3939 }
4040
41 - afp::expressor e;
 41+ afp::u32expressor e;
4242
4343 e.add_variable(make_u32fray("ONE"), afp::u32datum::from_int(1));
4444 e.add_variable(make_u32fray("TWO"), afp::u32datum::from_int(2));
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/aftypes.h
@@ -0,0 +1,192 @@
 2+/*
 3+ * Copyright (c) 2008 Andrew Garrett.
 4+ * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 5+ * Derived from public domain code contributed by Victor Vasiliev.
 6+ *
 7+ * Permission is granted to anyone to use this software for any purpose,
 8+ * including commercial applications, and to alter it and redistribute it
 9+ * freely. This software is provided 'as-is', without any express or
 10+ * implied warranty.
 11+ */
 12+
 13+#ifndef AFTYPES_H
 14+#define AFTYPES_H
 15+
 16+#include <string>
 17+#include <vector>
 18+#include <iostream>
 19+#include <sstream>
 20+#include <ios>
 21+#include <iostream>
 22+#include <cassert>
 23+#include <algorithm>
 24+#include <cmath>
 25+
 26+#include <boost/lexical_cast.hpp>
 27+#include <boost/variant.hpp>
 28+
 29+#include <unicode/uchar.h>
 30+
 31+#include <gmpxx.h>
 32+
 33+#include "fray.h"
 34+
 35+namespace afp {
 36+
 37+/*
 38+ * ABUSEFILTER VARIABLE STORAGE
 39+ * ============================
 40+ *
 41+ * datum is the AFP variable type. It is runtime polymorphic, storing objects
 42+ * of string, integer or floating point type. It provides the usual operator
 43+ * overloads, except that operator>>(istream, datum) is not provided.
 44+ *
 45+ * A datum automatically converts between types as required, using the
 46+ * following rules:
 47+ *
 48+ * - arithmetic operations convert arguments to doubles if either argument is
 49+ * a double, otherwise to ints.
 50+ * - converting a string to a numeric type attempts to parse the string as an
 51+ * integer. if this is not possible, the value 0 is used.
 52+ * - type-blind compare (operator==, compare()) does a lexical comparison if
 53+ * both arguments are strings, otherwise an arithmetic comparison.
 54+ * - type-sensitive compare always returns false if the types are different;
 55+ * otherwise, it is identical to a type-blind compare.
 56+ * - ordinal comparisons always convert their arguments to arithmetic types,
 57+ * even if both are strings.
 58+ *
 59+ * Internally, datum is implemented using a boost:variant object. This is
 60+ * entirely stack-based, avoiding memory allocation overhead when manipulating
 61+ * datum objects.
 62+ */
 63+
 64+template<typename charT>
 65+struct basic_datum {
 66+ typedef basic_fray<charT> string_t;
 67+ typedef mpz_class integer_t;
 68+ typedef mpf_class float_t;
 69+
 70+ basic_datum();
 71+ basic_datum(basic_datum<charT> const &oldData);
 72+
 73+ // Type forcing construction functions
 74+ static basic_datum<charT> from_string(string_t const &v);
 75+ static basic_datum<charT> from_string_convert(string_t const &v);
 76+ static basic_datum<charT> from_int(integer_t const &v);
 77+ static basic_datum<charT> from_double(float_t const &v);
 78+
 79+ // Assignment operator
 80+ basic_datum<charT> &operator= (const basic_datum<charT> & other);
 81+
 82+ basic_datum<charT> &operator+=(basic_datum<charT> const &other);
 83+ basic_datum<charT> &operator-=(basic_datum<charT> const &other);
 84+ basic_datum<charT> &operator*=(basic_datum<charT> const &other);
 85+ basic_datum<charT> &operator/=(basic_datum<charT> const &other);
 86+ basic_datum<charT> &operator%=(basic_datum<charT> const &other);
 87+ bool operator!() const;
 88+ basic_datum<charT> operator+() const;
 89+ basic_datum<charT> operator-() const;
 90+
 91+ bool compare(basic_datum<charT> const &other) const;
 92+ bool compare_with_type(basic_datum<charT> const &other) const;
 93+ bool less_than(basic_datum<charT> const &other) const;
 94+
 95+ string_t toString() const;
 96+ integer_t toInt() const;
 97+ float_t toFloat() const;
 98+ bool toBool() const {
 99+ return !!toInt().get_si();
 100+ }
 101+
 102+ template<typename traits>
 103+ void
 104+ print_to(std::basic_ostream<charT, traits> &s) const {
 105+ s << value_;
 106+ }
 107+
 108+protected:
 109+ explicit basic_datum(integer_t const &);
 110+ explicit basic_datum(float_t const &);
 111+ explicit basic_datum(string_t const &);
 112+
 113+ typedef boost::variant<integer_t, string_t, float_t> valuetype;
 114+ valuetype value_;
 115+};
 116+
 117+class exception : std::exception {
 118+public:
 119+ exception(std::string const &what)
 120+ : what_(what) {}
 121+ ~exception() throw() {}
 122+
 123+ char const *what() const throw() {
 124+ return what_.c_str();
 125+ }
 126+
 127+private:
 128+ std::string what_;
 129+};
 130+
 131+}
 132+
 133+#include "datum/create.h"
 134+#include "datum/conversion.h"
 135+#include "datum/operators.h"
 136+
 137+namespace afp {
 138+
 139+template<typename charT>
 140+basic_datum<charT>::basic_datum() {
 141+}
 142+
 143+template<typename charT>
 144+basic_datum<charT>::basic_datum(basic_datum<charT> const &other)
 145+ : value_(other.value_)
 146+{
 147+}
 148+
 149+template<typename charT>
 150+basic_datum<charT>::basic_datum(integer_t const &i)
 151+ : value_(i)
 152+{
 153+}
 154+
 155+template<typename charT>
 156+basic_datum<charT>::basic_datum(float_t const &d)
 157+ : value_(d)
 158+{
 159+}
 160+
 161+template<typename charT>
 162+basic_datum<charT>::basic_datum(typename basic_datum<charT>::string_t const &v)
 163+ : value_(v)
 164+{
 165+}
 166+
 167+template<typename charT>
 168+bool
 169+basic_datum<charT>::compare(basic_datum<charT> const &other) const {
 170+ return boost::apply_visitor(datum_impl::compare_visitor<charT, std::equal_to>(), value_, other.value_);
 171+}
 172+
 173+template<typename charT>
 174+bool
 175+basic_datum<charT>::compare_with_type(basic_datum<charT> const &other) const {
 176+ if (value_.which() != other.value_.which())
 177+ return false;
 178+
 179+ return boost::apply_visitor(datum_impl::compare_visitor<charT, std::equal_to>(), value_, other.value_);
 180+}
 181+
 182+template<typename charT>
 183+bool
 184+basic_datum<charT>::less_than(basic_datum<charT> const &other) const {
 185+ return boost::apply_visitor(datum_impl::arith_compare_visitor<charT, std::less>(), value_, other.value_);
 186+}
 187+
 188+typedef basic_datum<char> datum;
 189+typedef basic_datum<UChar32> u32datum;
 190+
 191+} // namespace afp
 192+
 193+#endif /* !AFTYPES_H */
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/afstring.h
@@ -0,0 +1,175 @@
 2+/*
 3+ * Copyright (c) 2008 Andrew Garrett.
 4+ * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 5+ * Derived from public domain code contributed by Victor Vasiliev.
 6+ *
 7+ * Permission is granted to anyone to use this software for any purpose,
 8+ * including commercial applications, and to alter it and redistribute it
 9+ * freely. This software is provided 'as-is', without any express or
 10+ * implied warranty.
 11+ */
 12+#ifndef AFSTRING_H
 13+#define AFSTRING_H
 14+
 15+#include <string>
 16+
 17+#include <unicode/uchar.h>
 18+
 19+#include <boost/regex/pending/unicode_iterator.hpp>
 20+
 21+#include "fray.h"
 22+
 23+typedef std::basic_string<UChar32> u32string;
 24+typedef std::basic_istream<UChar32> u32istream;
 25+typedef std::basic_ostream<UChar32> u32ostream;
 26+typedef std::basic_iostream<UChar32> u32iostream;
 27+typedef std::basic_istringstream<UChar32> u32istringstream;
 28+typedef std::basic_ostringstream<UChar32> u32ostringstream;
 29+typedef std::basic_stringstream<UChar32> u32stringstream;
 30+typedef basic_fray<UChar32> u32fray;
 31+
 32+template<typename iterator, int i> struct u32_conv_type;
 33+
 34+template<typename iterator> struct u32_conv_type<iterator, 1> {
 35+ typedef boost::u8_to_u32_iterator<iterator, UChar32> type;
 36+};
 37+
 38+template<typename iterator> struct u32_conv_type<iterator, 2> {
 39+ typedef boost::u16_to_u32_iterator<iterator, UChar32> type;
 40+};
 41+
 42+template<typename iterator, int i> struct u8_conv_type;
 43+
 44+template<typename iterator> struct u8_conv_type<iterator, 4> {
 45+ typedef boost::u32_to_u8_iterator<iterator, char> type;
 46+};
 47+
 48+/*
 49+ * Convert UTF-8 or UTF-16 strings to u32frays.
 50+ */
 51+template<typename charT>
 52+u32fray
 53+make_u32fray(basic_fray<charT> const &v)
 54+{
 55+ std::vector<UChar32> result;
 56+ result.reserve(v.size() / 3);
 57+
 58+ typedef typename u32_conv_type<
 59+ typename basic_fray<charT>::iterator,
 60+ sizeof(charT)>::type conv_type;
 61+
 62+ std::copy(conv_type(v.begin()), conv_type(v.end()),
 63+ std::back_inserter(result));
 64+
 65+ return u32fray(&result[0], result.size());
 66+}
 67+
 68+template<typename charT>
 69+u32fray
 70+make_u32fray(charT const *v)
 71+{
 72+ return make_u32fray(basic_fray<charT>(v));
 73+}
 74+
 75+template<typename charT>
 76+fray
 77+make_u8fray(basic_fray<charT> const &v)
 78+{
 79+ std::vector<char> result;
 80+ result.reserve(v.size() * 4);
 81+
 82+ typedef typename u8_conv_type<
 83+ typename basic_fray<charT>::iterator,
 84+ sizeof(charT)>::type conv_type;
 85+
 86+ std::copy(conv_type(v.begin()), conv_type(v.end()),
 87+ std::back_inserter(result));
 88+
 89+ return fray(&result[0], result.size());
 90+}
 91+
 92+template<typename charT>
 93+fray
 94+make_u8fray(charT const *v)
 95+{
 96+ return make_u8fray(basic_fray<charT>(v));
 97+}
 98+
 99+template<typename fromT, typename toT>
 100+struct ustring_convertor;
 101+
 102+template<>
 103+struct ustring_convertor<char, UChar32> {
 104+ static u32fray convert(fray const &from) {
 105+ return make_u32fray(from);
 106+ }
 107+};
 108+
 109+template<>
 110+struct ustring_convertor<char, char> {
 111+ static fray convert(fray const &from) {
 112+ return from;
 113+ }
 114+};
 115+
 116+template<typename To, typename From>
 117+basic_fray<To>
 118+make_astring(basic_fray<From> const &from)
 119+{
 120+ return ustring_convertor<From, To>::convert(from);
 121+}
 122+
 123+template<typename To, typename From>
 124+basic_fray<To>
 125+make_astring(From const *from)
 126+{
 127+ return make_astring<To>(basic_fray<From>(from));
 128+}
 129+
 130+struct bad_u32lexical_cast : std::runtime_error {
 131+ bad_u32lexical_cast() : std::runtime_error(
 132+ "bad_u32lexical_cast: source type could not be interpreted as target") {}
 133+};
 134+
 135+template<typename T>
 136+struct u32lexical_cast_type_map {
 137+ typedef T to_type;
 138+ typedef T from_type;
 139+
 140+ static T map_from(T const &s) {
 141+ return s;
 142+ }
 143+
 144+ static T map_to(T const &s) {
 145+ return s;
 146+ }
 147+};
 148+
 149+template<>
 150+struct u32lexical_cast_type_map<u32fray> {
 151+ typedef fray from_type;
 152+ typedef u32fray to_type;
 153+
 154+ static from_type map_from(u32fray const &s) {
 155+ return make_u8fray(s);
 156+ }
 157+
 158+ static to_type map_to(fray const &s) {
 159+ return make_u32fray(s);
 160+ }
 161+};
 162+
 163+template<typename charT, typename To, typename From>
 164+To
 165+u32lexical_cast(From const &f) {
 166+ try {
 167+ return
 168+ u32lexical_cast_type_map<To>::map_to(
 169+ boost::lexical_cast<typename u32lexical_cast_type_map<To>::from_type>(
 170+ u32lexical_cast_type_map<From>::map_from(f)));
 171+ } catch (boost::bad_lexical_cast &e) {
 172+ throw bad_u32lexical_cast();
 173+ }
 174+}
 175+
 176+#endif /* !AFSTRING_H */
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/syntax_check.cpp
@@ -26,7 +26,7 @@
2727 fray filter(ss.str());
2828
2929 try {
30 - afp::filter_evaluator f;
 30+ afp::u32filter_evaluator f;
3131 f.evaluate(make_u32fray(filter));
3232 } catch (afp::exception &excep) {
3333 std::cout << "PARSERR: " << excep.what() << std::endl;
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/affunctions.h
@@ -0,0 +1,461 @@
 2+/*
 3+ * Copyright (c) 2008 Andrew Garrett.
 4+ * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 5+ * Derived from public domain code contributed by Victor Vasiliev.
 6+ *
 7+ * Permission is granted to anyone to use this software for any purpose,
 8+ * including commercial applications, and to alter it and redistribute it
 9+ * freely. This software is provided 'as-is', without any express or
 10+ * implied warranty.
 11+ */
 12+
 13+#ifndef AFFUNCTIONS_H
 14+#define AFFUNCTIONS_H
 15+
 16+#include <map>
 17+#include <vector>
 18+#include <algorithm>
 19+#include <fstream>
 20+#include <sstream>
 21+#include <ios>
 22+#include <iostream>
 23+
 24+#include <unicode/uchar.h>
 25+
 26+#include <boost/format.hpp>
 27+
 28+#include "aftypes.h"
 29+#include "equiv.h"
 30+
 31+namespace afp {
 32+
 33+template<typename charT>
 34+int match(charT const *, charT const *);
 35+
 36+template<typename charT>
 37+basic_datum<charT>
 38+af_length (std::vector<basic_datum<charT> > const &args);
 39+
 40+template<typename charT>
 41+basic_datum<charT>
 42+af_ccnorm (std::vector<basic_datum<charT> > const &args);
 43+
 44+template<typename charT>
 45+basic_datum<charT>
 46+af_rmdoubles (std::vector<basic_datum<charT> > const &args);
 47+
 48+template<typename charT>
 49+basic_datum<charT>
 50+af_specialratio (std::vector<basic_datum<charT> > const &args);
 51+
 52+template<typename charT>
 53+basic_datum<charT>
 54+af_rmspecials (std::vector<basic_datum<charT> > const &args);
 55+
 56+template<typename charT>
 57+basic_datum<charT>
 58+af_norm (std::vector<basic_datum<charT> > const &args);
 59+
 60+template<typename charT>
 61+basic_datum<charT>
 62+af_count (std::vector<basic_datum<charT> > const &args);
 63+
 64+template<typename charT>
 65+basic_fray<charT>
 66+confusable_character_normalise(basic_fray<charT> const &orig);
 67+
 68+template<typename charT>
 69+basic_fray<charT>
 70+rmdoubles(basic_fray<charT> const &orig);
 71+
 72+template<typename charT>
 73+basic_fray<charT>
 74+rmspecials(basic_fray<charT> const &orig);
 75+
 76+struct too_many_arguments_exception : afp::exception {
 77+ too_many_arguments_exception(char const *what)
 78+ : afp::exception(what) {}
 79+};
 80+
 81+struct too_few_arguments_exception : afp::exception {
 82+ too_few_arguments_exception(char const *what)
 83+ : afp::exception(what) {}
 84+};
 85+
 86+namespace {
 87+
 88+void
 89+check_args(std::string const &fname, int args, int min, int max = 0)
 90+{
 91+ if (max == 0)
 92+ max = min;
 93+ if (args < min) {
 94+ std::string s = str(boost::format(
 95+ "too few arguments for function %s (got %d, expected %d)")
 96+ % fname % args % min);
 97+ throw too_few_arguments_exception(s.c_str());
 98+ } else if (args > max) {
 99+ std::string s = str(boost::format(
 100+ "too many arguments for function %s (got %d, expected %d)")
 101+ % fname % args % min);
 102+ throw too_many_arguments_exception(s.c_str());
 103+ }
 104+}
 105+
 106+} // anonymous namespace
 107+
 108+template<typename charT>
 109+basic_datum<charT>
 110+af_count(std::vector<basic_datum<charT> > const &args) {
 111+ check_args("count", args.size(), 1, 2);
 112+
 113+ basic_fray<charT> needle, haystack;
 114+
 115+ if (args.size() < 2) {
 116+ needle = make_astring<charT, char>(",");
 117+ haystack = args[0].toString();
 118+ } else {
 119+ needle = args[0].toString();
 120+ haystack = args[1].toString();
 121+ }
 122+
 123+ size_t last_pos = 0;
 124+ unsigned int count = 0;
 125+
 126+ while (last_pos != haystack.npos) {
 127+ count++;
 128+ last_pos = haystack.find(needle, last_pos + needle.size());
 129+ }
 130+
 131+ // One extra was added, but one extra is needed if only one arg was supplied.
 132+ if (args.size() >= 2)
 133+ count--;
 134+
 135+ return basic_datum<charT>::from_int((long int)count);
 136+}
 137+
 138+template<typename charT>
 139+basic_datum<charT>
 140+af_norm(std::vector<basic_datum<charT> > const &args) {
 141+ check_args("norm", args.size(), 1);
 142+
 143+ basic_fray<charT> orig = args[0].toString();
 144+
 145+ int lastchr = 0;
 146+ equiv_set const &equivs = equiv_set::instance();
 147+ std::vector<charT> result;
 148+ result.reserve(orig.size());
 149+
 150+ for (std::size_t i = 0; i < orig.size(); ++i) {
 151+ int chr = equivs.get(orig[i]);
 152+
 153+ if (chr != lastchr && u_isalnum(chr))
 154+ result.push_back(chr);
 155+
 156+ lastchr = chr;
 157+ }
 158+
 159+ return basic_datum<charT>::from_string(basic_fray<charT>(&result[0], result.size()));
 160+}
 161+
 162+template<typename charT>
 163+basic_fray<charT>
 164+rmdoubles(basic_fray<charT> const &orig) {
 165+ int lastchr = 0;
 166+ std::vector<charT> result;
 167+ result.reserve(orig.size());
 168+
 169+ for (std::size_t i = 0; i < orig.size(); ++i) {
 170+ if (orig[i] != lastchr)
 171+ result.push_back(orig[i]);
 172+
 173+ lastchr = orig[i];
 174+ }
 175+
 176+ return basic_fray<charT>(&result[0], result.size());
 177+}
 178+
 179+template<typename charT>
 180+basic_datum<charT>
 181+af_specialratio(std::vector<basic_datum<charT> > const &args) {
 182+ check_args("specialratio", args.size(), 1);
 183+
 184+ basic_fray<charT> orig = args[0].toString();
 185+ int len = 0;
 186+ int specialcount = 0;
 187+
 188+ for (std::size_t i = 0; i < orig.size(); ++i) {
 189+ len++;
 190+ if (!u_isalnum(orig[i]))
 191+ specialcount++;
 192+ }
 193+
 194+ double ratio = (float)specialcount / len;
 195+
 196+ return basic_datum<charT>::from_double(ratio);
 197+}
 198+
 199+template<typename charT>
 200+basic_datum<charT>
 201+af_rmspecials(std::vector<basic_datum<charT> > const &args) {
 202+ check_args("rmspecials", args.size(), 1);
 203+ return basic_datum<charT>::from_string(rmspecials(args[0].toString()));
 204+}
 205+
 206+template<typename charT>
 207+basic_fray<charT>
 208+rmspecials(basic_fray<charT> const &orig) {
 209+ std::vector<charT> result;
 210+ result.reserve(orig.size());
 211+
 212+ for (std::size_t i = 0; i < orig.size(); ++i) {
 213+ if (u_isalnum(orig[i]))
 214+ result.push_back(orig[i]);
 215+ }
 216+
 217+ return basic_fray<charT>(&result[0], result.size());
 218+}
 219+
 220+template<typename charT>
 221+basic_datum<charT>
 222+af_ccnorm(std::vector<basic_datum<charT> > const &args) {
 223+ check_args("ccnorm", args.size(), 1);
 224+ return basic_datum<charT>::from_string(confusable_character_normalise(args[0].toString()));
 225+}
 226+
 227+template<typename charT>
 228+basic_datum<charT>
 229+af_rmdoubles(std::vector<basic_datum<charT> > const &args) {
 230+ check_args("ccnorm", args.size(), 1);
 231+ return basic_datum<charT>::from_string(rmdoubles(args[0].toString()));
 232+}
 233+
 234+template<typename charT>
 235+basic_datum<charT>
 236+af_length(std::vector<basic_datum<charT> > const &args) {
 237+ check_args("ccnorm", args.size(), 1);
 238+ return basic_datum<charT>::from_int(args[0].toString().size());
 239+}
 240+
 241+template<typename charT>
 242+basic_datum<charT>
 243+af_lcase(std::vector<basic_datum<charT> > const &args) {
 244+ check_args("ccnorm", args.size(), 1);
 245+ std::vector<charT> result;
 246+ basic_fray<charT> const orig = args[0].toString();
 247+ result.reserve(orig.size());
 248+
 249+ for (std::size_t i = 0; i < orig.size(); ++i)
 250+ result.push_back(u_tolower(orig[i]));
 251+
 252+ return basic_datum<charT>::from_string(basic_fray<charT>(&result[0], result.size()));
 253+}
 254+
 255+template<typename charT>
 256+basic_fray<charT>
 257+confusable_character_normalise(basic_fray<charT> const &orig) {
 258+ equiv_set const &equivs = equiv_set::instance();
 259+ std::vector<charT> result;
 260+ result.reserve(orig.size());
 261+
 262+ for (std::size_t i = 0; i < orig.size(); ++i)
 263+ result.push_back(equivs.get(orig[i]));
 264+
 265+ return basic_fray<charT>(&result[0], result.size());
 266+}
 267+
 268+template<typename charT>
 269+basic_datum<charT>
 270+f_in(basic_datum<charT> const &a, basic_datum<charT> const &b)
 271+{
 272+ basic_fray<charT> sa = a.toString(), sb = b.toString();
 273+ return basic_datum<charT>::from_int(std::search(sb.begin(), sb.end(), sa.begin(), sa.end()) != sb.end());
 274+}
 275+
 276+template<typename charT>
 277+basic_datum<charT>
 278+f_like(basic_datum<charT> const &str, basic_datum<charT> const &pattern)
 279+{
 280+ return basic_datum<charT>::from_int(match(pattern.toString().c_str(), str.toString().c_str()));
 281+}
 282+
 283+template<typename charT>
 284+basic_datum<charT>
 285+f_regex(basic_datum<charT> const &str, basic_datum<charT> const &pattern)
 286+{
 287+ basic_fray<charT> f = pattern.toString();
 288+ boost::u32regex r = boost::make_u32regex(f.begin(), f.end(),
 289+ boost::regex_constants::perl);
 290+ basic_fray<charT> s = str.toString();
 291+ return basic_datum<charT>::from_int(boost::u32regex_match(
 292+ s.begin(), s.end(), r));
 293+}
 294+
 295+template<typename charT>
 296+basic_datum<charT>
 297+f_int(std::vector<basic_datum<charT> > const &args)
 298+{
 299+ check_args("int", args.size(), 1);
 300+ return basic_datum<charT>::from_int(args[0].toInt());
 301+}
 302+
 303+template<typename charT>
 304+basic_datum<charT>
 305+f_string(std::vector<basic_datum<charT> > const &args)
 306+{
 307+ check_args("string", args.size(), 1);
 308+ return basic_datum<charT>::from_string(args[0].toString());
 309+}
 310+
 311+template<typename charT>
 312+basic_datum<charT>
 313+f_float(std::vector<basic_datum<charT> > const &args)
 314+{
 315+ check_args("float", args.size(), 1);
 316+ return basic_datum<charT>::from_double(args[0].toFloat());
 317+}
 318+
 319+/* $NetBSD: fnmatch.c,v 1.21 2005/12/24 21:11:16 perry Exp $ */
 320+
 321+/*
 322+ * Copyright (c) 1989, 1993, 1994
 323+ * The Regents of the University of California. All rights reserved.
 324+ *
 325+ * This code is derived from software contributed to Berkeley by
 326+ * Guido van Rossum.
 327+ *
 328+ * Redistribution and use in source and binary forms, with or without
 329+ * modification, are permitted provided that the following conditions
 330+ * are met:
 331+ * 1. Redistributions of source code must retain the above copyright
 332+ * notice, this list of conditions and the following disclaimer.
 333+ * 2. Redistributions in binary form must reproduce the above copyright
 334+ * notice, this list of conditions and the following disclaimer in the
 335+ * documentation and/or other materials provided with the distribution.
 336+ * 3. Neither the name of the University nor the names of its contributors
 337+ * may be used to endorse or promote products derived from this software
 338+ * without specific prior written permission.
 339+ *
 340+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 341+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 342+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 343+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 344+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 345+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 346+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 347+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 348+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 349+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 350+ * SUCH DAMAGE.
 351+ */
 352+
 353+/*
 354+ * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
 355+ * Compares a filename or pathname to a pattern.
 356+ */
 357+
 358+#include <ctype.h>
 359+#include <string.h>
 360+
 361+#define EOS '\0'
 362+
 363+template<typename charT>
 364+const charT *rangematch(const charT *, int);
 365+
 366+template<typename charT>
 367+int
 368+match(charT const *pattern, charT const *string)
 369+{
 370+ const charT *stringstart;
 371+ charT c, test;
 372+
 373+ for (stringstart = string;;)
 374+ switch (c = *pattern++) {
 375+ case EOS:
 376+ return (*string == EOS ? 1 : 0);
 377+ case '?':
 378+ if (*string == EOS)
 379+ return (0);
 380+ ++string;
 381+ break;
 382+ case '*':
 383+ c = *pattern;
 384+ /* Collapse multiple stars. */
 385+ while (c == '*')
 386+ c = *++pattern;
 387+
 388+ /* Optimize for pattern with * at end or before /. */
 389+ if (c == EOS) {
 390+ return (1);
 391+ }
 392+
 393+ /* General case, use recursion. */
 394+ while ((test = *string) != EOS) {
 395+ if (match(pattern, string))
 396+ return (1);
 397+ ++string;
 398+ }
 399+ return (0);
 400+ case '[':
 401+ if (*string == EOS)
 402+ return (0);
 403+ if ((pattern =
 404+ rangematch(pattern, *string)) == NULL)
 405+ return (0);
 406+ ++string;
 407+ break;
 408+ case '\\':
 409+ if ((c = *pattern++) == EOS) {
 410+ c = '\\';
 411+ --pattern;
 412+ }
 413+ /* FALLTHROUGH */
 414+ default:
 415+ if (c != *string++)
 416+ return (0);
 417+ break;
 418+ }
 419+ /* NOTREACHED */
 420+}
 421+
 422+template<typename charT>
 423+const charT *
 424+rangematch(charT const *pattern, int test)
 425+{
 426+ int negate, ok;
 427+ charT c, c2;
 428+
 429+ /*
 430+ * A bracket expression starting with an unquoted circumflex
 431+ * character produces unspecified results (IEEE 1003.2-1992,
 432+ * 3.13.2). This implementation treats it like '!', for
 433+ * consistency with the regular expression syntax.
 434+ * J.T. Conklin (conklin@ngai.kaleida.com)
 435+ */
 436+ if ((negate = (*pattern == '!' || *pattern == '^')) != 0)
 437+ ++pattern;
 438+
 439+ for (ok = 0; (c = *pattern++) != ']';) {
 440+ if (c == '\\')
 441+ c = *pattern++;
 442+ if (c == EOS)
 443+ return (NULL);
 444+ if (*pattern == '-'
 445+ && (c2 = (*(pattern+1))) != EOS &&
 446+ c2 != ']') {
 447+ pattern += 2;
 448+ if (c2 == '\\')
 449+ c2 = *pattern++;
 450+ if (c2 == EOS)
 451+ return (NULL);
 452+ if (c <= test && test <= c2)
 453+ ok = 1;
 454+ } else if (c == test)
 455+ ok = 1;
 456+ }
 457+ return (ok == negate ? NULL : pattern);
 458+}
 459+
 460+} // namespace afp
 461+
 462+#endif /* !AFFUNCTIONS_H */
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/request.cpp
@@ -41,7 +41,7 @@
4242 }
4343
4444 boost::optional<string_t> read(std::size_t max_len = 0) {
45 - std::vector<char, std::allocator<char> > ret;
 45+ std::vector<char, boost::pool_allocator<char> > ret;
4646 ret.reserve(1024); /* probably enough for most inputs */
4747
4848 if (first_)
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/makefile
@@ -9,60 +9,43 @@
1010
1111 include makefile.config
1212
13 -CPPFLAGS = -Iinclude $(EXTRA_CPPFLAGS)
 13+CPPFLAGS = -I. -Iinclude $(EXTRA_CPPFLAGS)
1414 LDFLAGS = $(EXTRA_LDFLAGS)
1515
16 -LIBS = -lboost_regex$(BOOST_TAG) -lboost_date_time$(BOOST_TAG) -lgmp -lgmpxx -licuuc -licui18n -licudata
 16+LIBS = -lboost_regex$(BOOST_TAG) -lgmp -lgmpxx -licuuc -licui18n -licudata
1717
18 -af_expr_objs = \
 18+expr: CPPFLAGS+=-DTEST_PARSER
 19+
 20+af_expr_objs = \
1921 af_expr-eval.o \
20 - af_expr-parser.o \
21 - af_expr-ast.o \
22 - af_expr-filter_evaluator.o \
2322 af_expr-equiv.o \
2423 af_expr-request.o
2524
26 -af_parser_objs = \
 25+af_parser_objs = \
2726 af_parser-main.o \
2827 af_parser-request.o \
29 - af_parser-parser.o \
30 - af_parser-ast.o \
31 - af_parser-filter_evaluator.o \
3228 af_parser-equiv.o
3329
34 -check_objs = \
35 - check-check.o \
36 - check-parser.o \
37 - check-ast.o \
38 - check-filter_evaluator.o \
 30+check_objs = \
 31+ check-check.o \
3932 check-equiv.o
4033
41 -syntax_check_objs = \
 34+syntax_check_objs = \
4235 syntax_check-equiv.o \
43 - syntax_check-parser.o \
44 - syntax_check-ast.o \
45 - syntax_check-filter_evaluator.o \
4636 syntax_check-syntax_check.o
4737
48 -expr_objs = \
49 - expr-equiv.o \
50 - expr-parser.o \
51 - expr-ast.o \
 38+expr_objs = \
 39+ expr-equiv.o \
5240 expr-expr.o
5341
54 -xml_objs = \
55 - xml-parser.o \
56 - xml-ast.o \
 42+xml_objs = \
5743 xml-xml.o
5844
59 -evaluate_objs = \
 45+evaluate_objs = \
6046 evaluate-equiv.o \
61 - evaluate-parser.o \
62 - evaluate-ast.o \
63 - evaluate-filter_evaluator.o \
6447 evaluate-evaluate.o
6548
66 -maketest_objs = \
 49+maketest_objs = \
6750 maketest-maketest.o
6851
6952 progs = check af_parser syntax_check af_expr expr evaluate xml maketest
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/ast.h
@@ -0,0 +1,450 @@
 2+/*
 3+ * Copyright (c) 2008 Andrew Garrett.
 4+ * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 5+ * Derived from public domain code contributed by Victor Vasiliev.
 6+ *
 7+ * Permission is granted to anyone to use this software for any purpose,
 8+ * including commercial applications, and to alter it and redistribute it
 9+ * freely. This software is provided 'as-is', without any express or
 10+ * implied warranty.
 11+ */
 12+
 13+#ifndef AST_H
 14+#define AST_H
 15+
 16+#include "parserdefs.h"
 17+
 18+namespace {
 19+
 20+template<typename charT>
 21+int
 22+hex2int(charT const *str, int ndigits)
 23+{
 24+ int ret = 0;
 25+
 26+ while (ndigits--) {
 27+ ret *= 0x10;
 28+ if (*str >= 'a' && *str <= 'f')
 29+ ret += 10 + int(*str - 'a');
 30+ else if (*str >= 'A' && *str <= 'F')
 31+ ret += 10 + int(*str - 'A');
 32+ else if (*str >= '0' && *str <= '9')
 33+ ret += int(*str - '0');
 34+
 35+ str++;
 36+ }
 37+
 38+ return ret;
 39+}
 40+
 41+}
 42+
 43+namespace afp {
 44+
 45+template<typename T> struct parser_grammar;
 46+
 47+template<typename charT, typename iterator>
 48+struct ast_evaluator {
 49+ parser_grammar<charT> const &grammar_;
 50+
 51+ ast_evaluator(parser_grammar<charT> const &grammar);
 52+
 53+ basic_datum<charT> tree_eval(iterator const &);
 54+
 55+ basic_datum<charT> ast_eval_basic(charT, iterator const &);
 56+ basic_datum<charT> ast_eval_variable(basic_fray<charT> const &);
 57+ basic_datum<charT> ast_eval_in(charT, iterator const &, iterator const &);
 58+ basic_datum<charT> ast_eval_bool(charT, iterator const &, iterator const &);
 59+ basic_datum<charT> ast_eval_plus(charT, iterator const &, iterator const &);
 60+ basic_datum<charT> ast_eval_mult(charT, iterator const &, iterator const &);
 61+ basic_datum<charT> ast_eval_pow(iterator const &, iterator const &);
 62+ basic_datum<charT> ast_eval_string(basic_fray<charT> const &);
 63+ basic_datum<charT> ast_eval_num(basic_fray<charT> const &);
 64+ basic_datum<charT> ast_eval_ord(basic_fray<charT> const &, iterator const &, iterator const &);
 65+ basic_datum<charT> ast_eval_eq(basic_fray<charT> const &, iterator const &, iterator const &);
 66+ basic_datum<charT> ast_eval_tern(iterator const &, iterator const &, iterator const &);
 67+ basic_datum<charT> ast_eval_function(basic_fray<charT> const &, iterator, iterator const &);
 68+
 69+};
 70+
 71+template<typename charT, typename iterator>
 72+ast_evaluator<charT, iterator>::ast_evaluator(parser_grammar<charT> const &grammar)
 73+ : grammar_(grammar)
 74+{
 75+}
 76+
 77+template<typename charT, typename iterator>
 78+basic_datum<charT>
 79+ast_evaluator<charT, iterator>::ast_eval_in(charT oper, iterator const &a, iterator const &b)
 80+{
 81+ switch (oper) {
 82+ case 'i':
 83+ return f_in(tree_eval(a), tree_eval(b));
 84+ case 'c':
 85+ return f_in(tree_eval(b), tree_eval(a));
 86+ case 'l':
 87+ case 'm':
 88+ return f_like(tree_eval(a), tree_eval(b));
 89+ case 'r':
 90+ return f_regex(tree_eval(a), tree_eval(b));
 91+ default:
 92+ abort();
 93+ }
 94+}
 95+
 96+template<typename charT, typename iterator>
 97+basic_datum<charT>
 98+ast_evaluator<charT, iterator>::ast_eval_bool(charT oper, iterator const &a, iterator const &b)
 99+{
 100+ switch (oper) {
 101+ case '&':
 102+ if (tree_eval(a).toBool())
 103+ if (tree_eval(b).toBool())
 104+ return basic_datum<charT>::from_int(1);
 105+ return basic_datum<charT>::from_int(0);
 106+
 107+ case '|':
 108+ if (tree_eval(a).toBool())
 109+ return basic_datum<charT>::from_int(1);
 110+ else
 111+ if (tree_eval(b).toBool())
 112+ return basic_datum<charT>::from_int(1);
 113+ return basic_datum<charT>::from_int(0);
 114+
 115+ case '^':
 116+ {
 117+ int va = tree_eval(a).toBool(), vb = tree_eval(b).toBool();
 118+ if ((va && !vb) || (!va && vb))
 119+ return basic_datum<charT>::from_int(1);
 120+ return basic_datum<charT>::from_int(0);
 121+ }
 122+ }
 123+
 124+ abort();
 125+}
 126+
 127+template<typename charT, typename iterator>
 128+basic_datum<charT>
 129+ast_evaluator<charT, iterator>::ast_eval_plus(charT oper, iterator const &a, iterator const &b)
 130+{
 131+ switch (oper) {
 132+ case '+':
 133+ return tree_eval(a) + tree_eval(b);
 134+
 135+ case '-':
 136+ return tree_eval(a) - tree_eval(b);
 137+
 138+ default:
 139+ abort();
 140+ }
 141+}
 142+
 143+template<typename charT, typename iterator>
 144+basic_datum<charT>
 145+ast_evaluator<charT, iterator>::ast_eval_mult(charT oper, iterator const &a, iterator const &b)
 146+{
 147+ switch (oper) {
 148+ case '*':
 149+ return tree_eval(a) * tree_eval(b);
 150+ case '/':
 151+ return tree_eval(a) / tree_eval(b);
 152+ case '%':
 153+ return tree_eval(a) % tree_eval(b);
 154+ default:
 155+ abort();
 156+ }
 157+}
 158+
 159+template<typename charT, typename iterator>
 160+basic_datum<charT>
 161+ast_evaluator<charT, iterator>::ast_eval_ord(basic_fray<charT> const &oper, iterator const &a, iterator const &b)
 162+{
 163+ switch (oper.size()) {
 164+ case 1:
 165+ switch (oper[0]) {
 166+ case '<':
 167+ return basic_datum<charT>::from_int(tree_eval(a) < tree_eval(b));
 168+ case '>':
 169+ return basic_datum<charT>::from_int(tree_eval(a) > tree_eval(b));
 170+ default:
 171+ abort();
 172+ }
 173+
 174+ case 2:
 175+ switch(oper[0]) {
 176+ case '<':
 177+ return basic_datum<charT>::from_int(tree_eval(a) <= tree_eval(b));
 178+ case '>':
 179+ return basic_datum<charT>::from_int(tree_eval(a) >= tree_eval(b));
 180+ default:
 181+ abort();
 182+ }
 183+
 184+ default:
 185+ abort();
 186+ }
 187+}
 188+
 189+template<typename charT, typename iterator>
 190+basic_datum<charT>
 191+ast_evaluator<charT, iterator>::ast_eval_eq(basic_fray<charT> const &oper, iterator const &a, iterator const &b)
 192+{
 193+ switch (oper.size()) {
 194+ case 1: /* = */
 195+ return basic_datum<charT>::from_int(tree_eval(a) == tree_eval(b));
 196+ case 2: /* != /= == */
 197+ switch (oper[0]) {
 198+ case '!':
 199+ case '/':
 200+ return basic_datum<charT>::from_int(tree_eval(a) != tree_eval(b));
 201+ case '=':
 202+ return basic_datum<charT>::from_int(tree_eval(a) == tree_eval(b));
 203+ default:
 204+ abort();
 205+ }
 206+ case 3: /* === !== */
 207+ switch (oper[0]) {
 208+ case '=':
 209+ return basic_datum<charT>::from_int(tree_eval(a).compare_with_type(tree_eval(b)));
 210+ case '!':
 211+ return basic_datum<charT>::from_int(!tree_eval(a).compare_with_type(tree_eval(b)));
 212+ default:
 213+ abort();
 214+ }
 215+ default:
 216+ abort();
 217+ }
 218+}
 219+
 220+template<typename charT, typename iterator>
 221+basic_datum<charT>
 222+ast_evaluator<charT, iterator>::ast_eval_pow(iterator const &a, iterator const &b)
 223+{
 224+ return pow(tree_eval(a), tree_eval(b));
 225+}
 226+
 227+template<typename charT, typename iterator>
 228+basic_datum<charT>
 229+ast_evaluator<charT, iterator>::ast_eval_string(basic_fray<charT> const &s)
 230+{
 231+ std::vector<charT> ret;
 232+ ret.reserve(int(s.size() * 1.2));
 233+
 234+ for (std::size_t i = 0, end = s.size(); i < end; ++i) {
 235+ if (s[i] != '\\') {
 236+ ret.push_back(s[i]);
 237+ continue;
 238+ }
 239+
 240+ if (i+1 == end)
 241+ break;
 242+
 243+ switch (s[i + 1]) {
 244+ case 't':
 245+ ret.push_back('\t');
 246+ break;
 247+ case 'n':
 248+ ret.push_back('\n');
 249+ break;
 250+ case 'r':
 251+ ret.push_back('\r');
 252+ break;
 253+ case 'b':
 254+ ret.push_back('\b');
 255+ break;
 256+ case 'a':
 257+ ret.push_back('\a');
 258+ break;
 259+ case 'f':
 260+ ret.push_back('\f');
 261+ break;
 262+ case 'v':
 263+ ret.push_back('\v');
 264+ break;
 265+ case 'x':
 266+ if (i + 3 >= end)
 267+ break;
 268+ ret.push_back(hex2int(s.data() + i + 2, 2));
 269+ i += 2;
 270+ break;
 271+
 272+ case 'u':
 273+ if (i + 5 >= end)
 274+ break;
 275+ ret.push_back(hex2int(s.data() + i + 2, 4));
 276+ i += 4;
 277+ break;
 278+
 279+ case 'U':
 280+ if (i + 9 >= end)
 281+ break;
 282+ ret.push_back(hex2int(s.data() + i + 2, 8));
 283+ i += 8;
 284+ break;
 285+
 286+ default:
 287+ ret.push_back(s[i + 1]);
 288+ break;
 289+ }
 290+
 291+ i++;
 292+ }
 293+
 294+ return basic_datum<charT>::from_string(basic_fray<charT>(ret.begin(), ret.end()));
 295+}
 296+
 297+template<typename charT, typename iterator>
 298+basic_datum<charT>
 299+ast_evaluator<charT, iterator>::ast_eval_tern(iterator const &cond, iterator const &iftrue, iterator const &iffalse)
 300+{
 301+ if (tree_eval(cond).toBool())
 302+ return tree_eval(iftrue);
 303+ else
 304+ return tree_eval(iffalse);
 305+}
 306+
 307+template<typename charT, typename iterator>
 308+basic_datum<charT>
 309+ast_evaluator<charT, iterator>::ast_eval_num(basic_fray<charT> const &s)
 310+{
 311+ if (s.find('.') != basic_fray<charT>::npos) {
 312+ return basic_datum<charT>::from_double(
 313+ typename basic_datum<charT>::float_t(
 314+ make_u8fray(s).c_str()));
 315+ }
 316+
 317+ int base;
 318+ int trim = 1;
 319+ switch (s[s.size() - 1]) {
 320+ case 'x':
 321+ base = 16;
 322+ break;
 323+ case 'o':
 324+ base = 8;
 325+ break;
 326+ case 'b':
 327+ base = 2;
 328+ break;
 329+ default:
 330+ base = 10;
 331+ trim = 0;
 332+ break;
 333+ }
 334+
 335+ fray t(make_u8fray(s));
 336+ std::string str(t.begin(), t.end() - trim);
 337+ return basic_datum<charT>::from_int(
 338+ typename basic_datum<charT>::integer_t(str, base));
 339+}
 340+
 341+template<typename charT, typename iterator>
 342+basic_datum<charT>
 343+ast_evaluator<charT, iterator>::ast_eval_function(basic_fray<charT> const &f, iterator abegin, iterator const &aend)
 344+{
 345+ std::vector<basic_datum<charT> > args;
 346+
 347+ for (; abegin != aend; ++abegin)
 348+ args.push_back(tree_eval(abegin));
 349+
 350+ boost::function<basic_datum<charT> (std::vector<basic_datum<charT> >)> *fptr;
 351+ if ((fptr = find(grammar_.functions, f.c_str())) == NULL)
 352+ return basic_datum<charT>::from_string(basic_fray<charT>());
 353+ else
 354+ return (*fptr)(args);
 355+}
 356+
 357+template<typename charT, typename iterator>
 358+basic_datum<charT>
 359+ast_evaluator<charT, iterator>::ast_eval_basic(charT op, iterator const &val)
 360+{
 361+ switch (op) {
 362+ case '!':
 363+ if (tree_eval(val).toBool())
 364+ return basic_datum<charT>::from_int(0);
 365+ else
 366+ return basic_datum<charT>::from_int(1);
 367+
 368+ case '-':
 369+ return -tree_eval(val);
 370+
 371+ case '+':
 372+ return tree_eval(val);
 373+ default:
 374+ abort();
 375+ }
 376+}
 377+
 378+template<typename charT, typename iterator>
 379+basic_datum<charT>
 380+ast_evaluator<charT, iterator>::ast_eval_variable(basic_fray<charT> const &v)
 381+{
 382+ basic_datum<charT> const *var;
 383+ if ((var = find(grammar_.variables, v.c_str())) == NULL)
 384+ return basic_datum<charT>::from_string(basic_fray<charT>());
 385+ else
 386+ return *var;
 387+}
 388+
 389+template<typename charT, typename iterator>
 390+basic_datum<charT>
 391+ast_evaluator<charT, iterator>::tree_eval(iterator const &i)
 392+{
 393+ switch (i->value.id().to_long()) {
 394+ case pid_value:
 395+ return ast_eval_num(
 396+ basic_fray<charT>(i->value.begin(), i->value.end()));
 397+
 398+ case pid_string:
 399+ return ast_eval_string(basic_fray<charT>(i->value.begin(), i->value.end()));
 400+
 401+ case pid_basic:
 402+ return ast_eval_basic(*i->value.begin(), i->children.begin());
 403+
 404+ case pid_variable:
 405+ return ast_eval_variable(basic_fray<charT>(i->value.begin(), i->value.end()));
 406+
 407+ case pid_function:
 408+ return ast_eval_function(
 409+ basic_fray<charT>(i->value.begin(), i->value.end()),
 410+ i->children.begin(), i->children.end());
 411+
 412+ case pid_in_expr:
 413+ return ast_eval_in(*i->value.begin(), i->children.begin(), i->children.begin() + 1);
 414+
 415+ case pid_bool_expr:
 416+ return ast_eval_bool(*i->value.begin(), i->children.begin(), i->children.begin() + 1);
 417+
 418+ case pid_plus_expr:
 419+ return ast_eval_plus(*i->value.begin(), i->children.begin(), i->children.begin() + 1);
 420+
 421+ case pid_mult_expr:
 422+ return ast_eval_mult(*i->value.begin(), i->children.begin(), i->children.begin() + 1);
 423+
 424+ case pid_pow_expr:
 425+ return ast_eval_pow(i->children.begin(), i->children.begin() + 1);
 426+
 427+ case pid_ord_expr:
 428+ return ast_eval_ord(
 429+ basic_fray<charT>(i->value.begin(), i->value.end()),
 430+ i->children.begin(), i->children.begin() + 1);
 431+
 432+ case pid_eq_expr:
 433+ return ast_eval_eq(
 434+ basic_fray<charT>(i->value.begin(), i->value.end()),
 435+ i->children.begin(), i->children.begin() + 1);
 436+
 437+ case pid_tern_expr:
 438+ return ast_eval_tern(
 439+ i->children.begin(),
 440+ i->children.begin() + 1,
 441+ i->children.begin() + 2);
 442+
 443+ default:
 444+ throw parse_error(
 445+ str(boost::format("internal error: unmatched expr type %d") % i->value.id().to_long()));
 446+ }
 447+}
 448+
 449+} // namespace afp
 450+
 451+#endif /* !AST_H */
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/request.h
@@ -0,0 +1,34 @@
 2+/*
 3+ * Copyright (c) 2008 Andrew Garrett.
 4+ * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 5+ * Derived from public domain code contributed by Victor Vasiliev.
 6+ *
 7+ * Permission is granted to anyone to use this software for any purpose,
 8+ * including commercial applications, and to alter it and redistribute it
 9+ * freely. This software is provided 'as-is', without any express or
 10+ * implied warranty.
 11+ */
 12+
 13+#ifndef REQUEST_H
 14+#define REQUEST_H
 15+
 16+#include <string>
 17+#include <istream>
 18+
 19+#include "filter_evaluator.h"
 20+#include "afstring.h"
 21+
 22+namespace afp {
 23+
 24+struct request {
 25+ bool load(std::istream &);
 26+ bool evaluate(void);
 27+
 28+private:
 29+ u32filter_evaluator f;
 30+ u32fray filter;
 31+};
 32+
 33+} // namespace afp
 34+
 35+#endif /* !REQUEST_H */
Property changes on: branches/change-tagging/extensions/AbuseFilter/parser_native/request.h
___________________________________________________________________
Added: svn:eol-style
136 + native
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/equiv.h
@@ -0,0 +1,34 @@
 2+/*
 3+ * Copyright (c) 2008 Andrew Garrett.
 4+ * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 5+ * Derived from public domain code contributed by Victor Vasiliev.
 6+ *
 7+ * Permission is granted to anyone to use this software for any purpose,
 8+ * including commercial applications, and to alter it and redistribute it
 9+ * freely. This software is provided 'as-is', without any express or
 10+ * implied warranty.
 11+ */
 12+
 13+#ifndef EQUIV_H
 14+#define EQUIV_H
 15+
 16+#include <map>
 17+
 18+#include <boost/noncopyable.hpp>
 19+
 20+namespace afp {
 21+
 22+struct equiv_set : boost::noncopyable {
 23+ static equiv_set const &instance();
 24+
 25+ int get(int) const;
 26+
 27+private:
 28+ equiv_set();
 29+
 30+ std::map<int, int> equivs_;
 31+};
 32+
 33+} // namespace afp
 34+
 35+#endif /* !EQUIV_H */
Property changes on: branches/change-tagging/extensions/AbuseFilter/parser_native/equiv.h
___________________________________________________________________
Added: svn:eol-style
136 + native
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/xml.cpp
@@ -43,7 +43,7 @@
4444 return 1;
4545 }
4646
47 - afp::expressor e;
 47+ afp::u32expressor e;
4848
4949 e.add_variable(make_u32fray("ONE"), afp::u32datum::from_int(1));
5050 e.add_variable(make_u32fray("TWO"), afp::u32datum::from_int(2));
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/parser.h
@@ -0,0 +1,509 @@
 2+/*
 3+ * Copyright (c) 2008 Andrew Garrett.
 4+ * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 5+ * Derived from public domain code contributed by Victor Vasiliev.
 6+ *
 7+ * Permission is granted to anyone to use this software for any purpose,
 8+ * including commercial applications, and to alter it and redistribute it
 9+ * freely. This software is provided 'as-is', without any express or
 10+ * implied warranty.
 11+ */
 12+
 13+#ifndef EXPRESSOR_H
 14+#define EXPRESSOR_H
 15+
 16+#include <string>
 17+#include <vector>
 18+#include <stdexcept>
 19+#include <iostream>
 20+
 21+#include <boost/noncopyable.hpp>
 22+#include <boost/function.hpp>
 23+#include <boost/spirit/core.hpp>
 24+#include <boost/spirit/utility/confix.hpp>
 25+#include <boost/spirit/utility/chset.hpp>
 26+#include <boost/spirit/utility/loops.hpp>
 27+#include <boost/spirit/tree/ast.hpp>
 28+#include <boost/spirit/tree/tree_to_xml.hpp>
 29+#include <boost/spirit/symbols.hpp>
 30+#include <boost/spirit/utility/escape_char.hpp>
 31+#include <boost/function.hpp>
 32+#include <boost/noncopyable.hpp>
 33+#include <boost/format.hpp>
 34+#include <boost/regex/icu.hpp>
 35+
 36+#include <unicode/uchar.h>
 37+
 38+#include "aftypes.h"
 39+#include "afstring.h"
 40+#include "affunctions.h"
 41+#include "fray.h"
 42+#include "ast.h"
 43+#include "parserdefs.h"
 44+
 45+namespace afp {
 46+
 47+template<typename T> struct parser_grammar;
 48+
 49+template<typename charT>
 50+struct basic_expressor : boost::noncopyable {
 51+ typedef boost::function<basic_datum<charT> (std::vector<basic_datum<charT> >)> func_t;
 52+
 53+ basic_expressor();
 54+ ~basic_expressor();
 55+
 56+ basic_datum<charT> evaluate(basic_fray<charT> const &expr) const;
 57+ void print_xml(std::ostream &strm, basic_fray<charT> const &expr) const;
 58+
 59+ void add_variable(basic_fray<charT> const &name, basic_datum<charT> const &value);
 60+ void add_function(basic_fray<charT> const &name, func_t value);
 61+
 62+ void clear();
 63+ void clear_functions();
 64+ void clear_variables();
 65+
 66+private:
 67+ parser_grammar<charT> *grammar_;
 68+};
 69+
 70+typedef basic_expressor<char> expressor;
 71+typedef basic_expressor<UChar32> u32expressor;
 72+
 73+using namespace boost::spirit;
 74+
 75+/*
 76+ * ABUSEFILTER EXPRESSION PARSER
 77+ * =============================
 78+ *
 79+ * This is the basic expression parser. It doesn't contain any AF logic
 80+ * itself, but rather presents an interface for the user to add custom
 81+ * functions and variables.
 82+ *
 83+ * The interface to the parser is the 'expressor' class. Use it like this:
 84+ *
 85+ * expressor e;
 86+ * e.add_variable("ONE", 1);
 87+ * e.add_function("f", myfunc);
 88+ * e.evaluate("ONE + 2"); -- returns 3
 89+ *
 90+ * Custom functions should have the following prototype:
 91+ *
 92+ * afp::basic_datum<charT> (std::vector<afp::basic_datum<charT>) const &args);
 93+ *
 94+ * Functions must return a value; they cannot be void. The arguments passed to
 95+ * the function are stored in the 'args' array in left-to-right order.
 96+ *
 97+ * The parser implements a C-like grammar with some differences. The following
 98+ * operators are available:
 99+ *
 100+ * a & b true if a and b are both true
 101+ * a | b true if either a or b is true
 102+ * a ^ b true if either a or b is true, but not if both are true
 103+ * a + b arithmetic
 104+ * a - b
 105+ * a * b
 106+ * a / b
 107+ * a % b
 108+ * a ** b power-of (a^b)
 109+ * a in b true if the string "b" contains the substring "a"
 110+ * a contains b true if b contains the string a
 111+ * a like b true if a matches the Unix glob b
 112+ * a matches b '' ''
 113+ * a rlike b true if a matches the Perl regex b
 114+ * a regex b '' ''
 115+ * !a true if a is false
 116+ * (a) same value as a
 117+ * a ? b : c if a is true, returns the value of b, otherwise c
 118+ * a == b comparison operators
 119+ * a != b
 120+ * a < b
 121+ * a <= b
 122+ * a > b
 123+ * a >= b
 124+ * a === b returns true if a==b and both are the same type
 125+ * a !== b return true if a != b or they are different types
 126+ *
 127+ * The parser uses afp::datum for its variables. This means it supports
 128+ * strings, ints and floats, with automatic conversion between types.
 129+ *
 130+ * String constants are C-style. The standard C escapes \a \b \f \t \r \n \v are
 131+ * supported. \xHH encodes a 1-byte Unicode character, \uHHHH encodes a 2-byte
 132+ * Unicode characters, and \UHHHHHHHH encodes a 4-byte Unicode character.
 133+ *
 134+ * Numeric constants can be integers (e.g. 1), or floating pointers (e.g.
 135+ * 1., .1, 1.2).
 136+ *
 137+ * Function calls are f(arg1, arg2, ...).
 138+ */
 139+
 140+/*
 141+ * The grammar itself.
 142+ */
 143+template<typename charT>
 144+struct parser_grammar : public grammar<parser_grammar<charT> >
 145+{
 146+ /* User-defined variables. */
 147+ symbols<basic_datum<charT>, charT > variables;
 148+
 149+ void add_variable(basic_fray<charT> const &name, basic_datum<charT> const &value) {
 150+ variables.add(name.c_str(), value);
 151+ }
 152+
 153+ /* User-defined functions. */
 154+ symbols<boost::function<basic_datum<charT> (std::vector<basic_datum<charT> >)>, charT > functions;
 155+
 156+ void add_function(
 157+ basic_fray<charT> const &name,
 158+ boost::function<basic_datum<charT> (std::vector<basic_datum<charT> >)> func) {
 159+ functions.add(name.c_str(), func);
 160+ }
 161+
 162+ symbols<int, charT> eq_opers, ord_opers, plus_opers, mult_opers, in_opers, bool_opers;
 163+
 164+ parser_grammar() {
 165+ eq_opers.add("=", 0);
 166+ eq_opers.add("==", 0);
 167+ eq_opers.add("===", 0);
 168+ eq_opers.add("!=", 0);
 169+ eq_opers.add("!==", 0);
 170+ eq_opers.add("/=", 0);
 171+ ord_opers.add("<", 0);
 172+ ord_opers.add("<=", 0);
 173+ ord_opers.add(">", 0);
 174+ ord_opers.add(">=", 0);
 175+ plus_opers.add("+", 0);
 176+ plus_opers.add("-", 0);
 177+ mult_opers.add("*", 0);
 178+ mult_opers.add("/", 0);
 179+ mult_opers.add("%", 0);
 180+ bool_opers.add("&", 0);
 181+ bool_opers.add("|", 0);
 182+ bool_opers.add("^", 0);
 183+ in_opers.add("in", 0);
 184+ in_opers.add("contains", 0);
 185+ in_opers.add("matches", 0);
 186+ in_opers.add("like", 0);
 187+ in_opers.add("rlike", 0);
 188+ in_opers.add("regex", 0);
 189+ }
 190+
 191+ template<typename ScannerT>
 192+ struct definition
 193+ {
 194+ parser_grammar const &self_;
 195+
 196+ definition(parser_grammar const &self)
 197+ : self_(self)
 198+ {
 199+ /*
 200+ * A literal value. Either a string, a floating
 201+ * pointer number or an integer.
 202+ */
 203+ value =
 204+#if 0
 205+ strict_real_p
 206+ | as_lower_d[ leaf_node_d[
 207+ oct_p >> 'o'
 208+ | hex_p >> 'x'
 209+ | bin_p >> 'b'
 210+ | int_p
 211+ ] ]
 212+#endif
 213+ reduced_node_d[ lexeme_d[
 214+ (+chset<>("0-9") >> '.' >> +chset<>("0-9"))
 215+ | ( '.' >> +chset<>("0-9"))
 216+ | (+chset<>("0-9") >> '.' )
 217+ ] ]
 218+ | as_lower_d[ leaf_node_d[
 219+ +chset<>("0-7") >> 'o'
 220+ | +chset<>("0-9a-f") >> 'x'
 221+ | +chset<>("0-1") >> 'b'
 222+ | +chset<>("0-9")
 223+ ] ]
 224+ | string
 225+ ;
 226+
 227+ hexchar = chset<>("a-fA-F0-9")
 228+ ;
 229+
 230+ octchar = chset<>("0-7")
 231+ ;
 232+
 233+ c_string_char =
 234+ "\\x" >> hexchar >> hexchar
 235+ | "\\u" >> repeat_p(4)[hexchar]
 236+ | "\\U" >> repeat_p(8)[hexchar]
 237+ | "\\o" >> octchar >> octchar >> octchar
 238+ | "\\" >> anychar_p - (ch_p('x') | 'u' | 'o')
 239+ | anychar_p - (ch_p('"') | '\\')
 240+ ;
 241+
 242+ /*
 243+ * config_p can't be used here, because it will rewrite
 244+ * *(c_escape_ch_p[x]) into (*c_escape_ch_p)[x]
 245+ */
 246+ string = inner_node_d[
 247+ '"'
 248+ >> leaf_node_d[ *(c_string_char) ]
 249+ >> '"'
 250+ ]
 251+ ;
 252+
 253+ /*
 254+ * A variable. If the variable is found in the
 255+ * user-supplied variable list, we use that.
 256+ * Otherwise, unknown variables (containing uppercase
 257+ * letters and underscore only) are returned as the
 258+ * empty string.
 259+ */
 260+ variable = reduced_node_d[ +(upper_p | '_') ]
 261+ ;
 262+
 263+ /*
 264+ * A function call: func([arg[, arg...]]).
 265+ */
 266+ function =
 267+ (
 268+ root_node_d[ reduced_node_d[
 269+ +(lower_p | '_')
 270+ ] ]
 271+ >> inner_node_d[
 272+ '('
 273+ >> ( tern_expr % discard_node_d[ch_p(',')] )
 274+ >> ')'
 275+ ]
 276+ )
 277+ ;
 278+
 279+ /*
 280+ * A basic atomic value. Either a variable, function
 281+ * or literal, or a negated expression !a, or a
 282+ * parenthesised expression (a).
 283+ */
 284+ basic =
 285+ value
 286+ | variable
 287+ | function
 288+ | inner_node_d[ '(' >> tern_expr >> ')' ]
 289+ | root_node_d[ch_p('!')] >> tern_expr
 290+ | root_node_d[ch_p('+')] >> tern_expr
 291+ | root_node_d[ch_p('-')] >> tern_expr
 292+ ;
 293+
 294+ /*
 295+ * "a in b" operator
 296+ */
 297+ in_expr =
 298+ basic
 299+ >> *( root_node_d[ self.in_opers ] >> basic )
 300+ ;
 301+
 302+ /*
 303+ * power-of. This is right-associative.
 304+ */
 305+ pow_expr =
 306+ in_expr
 307+ >> !( root_node_d[ str_p("**") ] >> pow_expr )
 308+ ;
 309+
 310+ /*
 311+ * Multiplication and operators with the same
 312+ * precedence.
 313+ */
 314+ mult_expr =
 315+ pow_expr
 316+ >> *( root_node_d[ self.mult_opers ] >> pow_expr )
 317+ ;
 318+
 319+ /*
 320+ * Additional and operators with the same precedence.
 321+ */
 322+ plus_expr =
 323+ mult_expr
 324+ >> *( root_node_d[ self.plus_opers ] >> mult_expr )
 325+ ;
 326+
 327+ /*
 328+ * Ordinal comparisons and operators with the same
 329+ * precedence.
 330+ */
 331+ ord_expr =
 332+ plus_expr
 333+ >> *( root_node_d[ self.ord_opers ] >> plus_expr )
 334+ ;
 335+
 336+ /*
 337+ * Equality comparisons.
 338+ */
 339+ eq_expr =
 340+ ord_expr
 341+ >> *( root_node_d[ self.eq_opers ] >> ord_expr )
 342+ ;
 343+
 344+ /*
 345+ * Boolean expressions.
 346+ */
 347+ bool_expr =
 348+ eq_expr
 349+ >> *( root_node_d[ self.bool_opers ] >> eq_expr )
 350+ ;
 351+
 352+ /*
 353+ * The ternary operator. Notice this is
 354+ * right-associative: a ? b ? c : d : e
 355+ * is supported.
 356+ */
 357+ tern_expr =
 358+ bool_expr
 359+ >> !(
 360+ root_node_d[ch_p('?')] >> tern_expr
 361+ >> discard_node_d[ch_p(':')] >> tern_expr
 362+ )
 363+ ;
 364+ }
 365+
 366+ rule<ScannerT, parser_context<>, parser_tag<pid_tern_expr> >
 367+ const &start() const {
 368+ return tern_expr;
 369+ }
 370+
 371+ rule<ScannerT> c_string_char, hexchar, octchar;
 372+ rule<ScannerT, parser_context<>, parser_tag<pid_value> > value;
 373+ rule<ScannerT, parser_context<>, parser_tag<pid_variable> > variable;
 374+ rule<ScannerT, parser_context<>, parser_tag<pid_basic> > basic;
 375+ rule<ScannerT, parser_context<>, parser_tag<pid_bool_expr> > bool_expr;
 376+ rule<ScannerT, parser_context<>, parser_tag<pid_ord_expr> > ord_expr;
 377+ rule<ScannerT, parser_context<>, parser_tag<pid_eq_expr> > eq_expr;
 378+ rule<ScannerT, parser_context<>, parser_tag<pid_pow_expr> > pow_expr;
 379+ rule<ScannerT, parser_context<>, parser_tag<pid_mult_expr> > mult_expr;
 380+ rule<ScannerT, parser_context<>, parser_tag<pid_plus_expr> > plus_expr;
 381+ rule<ScannerT, parser_context<>, parser_tag<pid_in_expr> > in_expr;
 382+
 383+ rule<ScannerT, parser_context<>, parser_tag<pid_function> > function;
 384+ rule<ScannerT, parser_context<>, parser_tag<pid_tern_expr> > tern_expr;
 385+ rule<ScannerT, parser_context<>, parser_tag<pid_string> > string;
 386+ };
 387+};
 388+
 389+template<typename charT>
 390+basic_expressor<charT>::basic_expressor()
 391+ : grammar_(new parser_grammar<charT>)
 392+{
 393+ /*
 394+ * We provide a couple of standard variables everyone wants.
 395+ */
 396+ add_variable(make_astring<charT>("true"), afp::basic_datum<charT>::from_int(true));
 397+ add_variable(make_astring<charT>("false"), afp::basic_datum<charT>::from_int(false));
 398+
 399+ /*
 400+ * The cast functions.
 401+ */
 402+ add_function(make_astring<charT>("int"), &f_int<charT>);
 403+ add_function(make_astring<charT>("string"), &f_string<charT>);
 404+ add_function(make_astring<charT>("float"), &f_float<charT>);
 405+}
 406+
 407+template<typename charT>
 408+basic_expressor<charT>::~basic_expressor()
 409+{
 410+ delete grammar_;
 411+}
 412+
 413+/*
 414+ * The user interface to evaluate an expression. It returns the result, or
 415+ * throws an exception if an error occurs.
 416+ */
 417+template<typename charT>
 418+basic_datum<charT>
 419+basic_expressor<charT>::evaluate(basic_fray<charT> const &filter) const
 420+{
 421+ using namespace boost::spirit;
 422+
 423+ typedef typename basic_fray<charT>::const_iterator iterator_t;
 424+
 425+ basic_datum<charT> ret;
 426+
 427+ tree_parse_info<iterator_t> info = ast_parse(filter.begin(), filter.end(), *grammar_ >> end_p,
 428+ chset<>("\r\n\t ") | comment_p("/*", "*/"));
 429+
 430+ if (info.full) {
 431+ ast_evaluator<charT, typename tree_match<iterator_t>::tree_iterator> ae(*grammar_);
 432+ return ae.tree_eval(info.trees.begin());
 433+ } else {
 434+ throw parse_error("parsing failed");
 435+ }
 436+}
 437+
 438+template<typename charT>
 439+void
 440+basic_expressor<charT>::print_xml(std::ostream &strm, basic_fray<charT> const &filter) const
 441+{
 442+ using namespace boost::spirit;
 443+
 444+ typedef typename basic_fray<charT>::const_iterator iterator_t;
 445+
 446+ tree_parse_info<iterator_t> info = ast_parse(filter.begin(), filter.end(), *grammar_ >> end_p,
 447+ +chset<>("\n\t ") | comment_p("/*", "*/"));
 448+
 449+ if (info.full) {
 450+ std::map<parser_id, std::string> rule_names;
 451+ rule_names[pid_value] = "value";
 452+ rule_names[pid_variable] = "variable";
 453+ rule_names[pid_basic] = "basic";
 454+ rule_names[pid_bool_expr] = "bool_expr";
 455+ rule_names[pid_ord_expr] = "ord_expr";
 456+ rule_names[pid_eq_expr] = "eq_expr";
 457+ rule_names[pid_pow_expr] = "pow_expr";
 458+ rule_names[pid_mult_expr] = "mult_expr";
 459+ rule_names[pid_plus_expr] = "plus_expr";
 460+ rule_names[pid_in_expr] = "in_expr";
 461+ rule_names[pid_function] = "function";
 462+ rule_names[pid_tern_expr] = "tern_expr";
 463+ rule_names[pid_string] = "string";
 464+ tree_to_xml(strm, info.trees, "", rule_names);
 465+ } else {
 466+ throw parse_error("parsing failed");
 467+ }
 468+}
 469+
 470+template<typename charT>
 471+void
 472+basic_expressor<charT>::clear()
 473+{
 474+ clear_variables();
 475+ clear_functions();
 476+}
 477+
 478+template<typename charT>
 479+void
 480+basic_expressor<charT>::clear_variables()
 481+{
 482+ symbols<basic_datum<charT>, charT > variables;
 483+ grammar_->variables = variables;
 484+}
 485+
 486+template<typename charT>
 487+void
 488+basic_expressor<charT>::clear_functions()
 489+{
 490+ symbols<boost::function<basic_datum<charT> (std::vector<basic_datum<charT> >)>, charT > functions;
 491+ grammar_->functions = functions;
 492+}
 493+
 494+template<typename charT>
 495+void
 496+basic_expressor<charT>::add_variable(basic_fray<charT> const &name, basic_datum<charT> const &value)
 497+{
 498+ grammar_->add_variable(name, value);
 499+}
 500+
 501+template<typename charT>
 502+void
 503+basic_expressor<charT>::add_function(basic_fray<charT> const &name, func_t value)
 504+{
 505+ grammar_->add_function(name, value);
 506+}
 507+
 508+} // namespace afp
 509+
 510+#endif /* !EXPRESSOR_H */
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/include/datum/operators.h
@@ -13,59 +13,34 @@
1414 #define DATUM_OPERATORS_H
1515
1616 #include "datum/visitors.h"
17 -#include "functors.h"
1817
1918 namespace afp {
2019
2120 namespace datum_impl {
2221
23 -
 22+/*
 23+ * std::modulus doesn't work with double, so we provide our own.
 24+ */
2425 template<typename T>
25 -struct afpless {
26 - template<typename U>
27 - bool operator() (T a, U b) const {
28 - return a < b;
 26+struct afpmodulus {
 27+ T operator() (T const &a, T const &b) const {
 28+ return a % b;
2929 }
30 -
31 - bool operator() (T, boost::posix_time::ptime const &) const {
32 - throw type_error("operator < not applicable to these types");
33 - }
34 -
35 - bool operator() (boost::posix_time::ptime const &, T) const {
36 - throw type_error("operator < not applicable to these types");
37 - }
38 -
39 - bool operator() (T, boost::posix_time::time_duration const &) const {
40 - throw type_error("operator < not applicable to these types");
41 - }
42 -
43 - bool operator() (boost::posix_time::time_duration const &, T) const {
44 - throw type_error("operator < not applicable to these types");
45 - }
4630 };
4731
4832 template<>
49 -struct afpless<boost::posix_time::ptime> {
50 - template<typename U>
51 - bool operator() (boost::posix_time::ptime const &, U const &b) const {
52 - throw type_error("operator < not applicable to these types");
 33+struct afpmodulus<mpf_class> {
 34+ double operator() (mpf_class const &a, mpf_class const &b) const {
 35+ /* this is less than ideal */
 36+ return std::fmod(a.get_d(), b.get_d());
5337 }
54 -
55 - bool operator() (boost::posix_time::ptime const &a, boost::posix_time::ptime const &b) const {
56 - return a < b;
57 - }
5838 };
5939
60 -template<>
61 -struct afpless<boost::posix_time::time_duration> {
62 - template<typename U>
63 - bool operator() (boost::posix_time::time_duration const &, U const &b) const {
64 - throw type_error("operator < not applicable to these types");
 40+template<typename T>
 41+struct afppower {
 42+ T operator() (T const &a, T const &b) const {
 43+ return std::pow(a,b);
6544 }
66 -
67 - bool operator() (boost::posix_time::time_duration const &a, boost::posix_time::time_duration const &b) const {
68 - return a < b;
69 - }
7045 };
7146
7247 } // namespace datum_impl
@@ -124,8 +99,7 @@
125100 return *this;
126101 }
127102
128 - basic_datum<charT> result = boost::apply_visitor(
129 - datum_impl::arith_visitor<charT, functor::plus>(), value_, other.value_);
 103+ basic_datum<charT> result = boost::apply_visitor(datum_impl::arith_visitor<charT, std::plus>(), value_, other.value_);
130104 *this = result;
131105 return *this;
132106 }
@@ -134,8 +108,7 @@
135109 basic_datum<charT> &
136110 basic_datum<charT>::operator-=(basic_datum<charT> const &other)
137111 {
138 - basic_datum<charT> result = boost::apply_visitor(
139 - datum_impl::arith_visitor<charT, functor::minus>(), value_, other.value_);
 112+ basic_datum<charT> result = boost::apply_visitor(datum_impl::arith_visitor<charT, std::minus>(), value_, other.value_);
140113 *this = result;
141114 return *this;
142115 }
@@ -144,8 +117,7 @@
145118 basic_datum<charT> &
146119 basic_datum<charT>::operator*=(basic_datum<charT> const &other)
147120 {
148 - basic_datum<charT> result = boost::apply_visitor(
149 - datum_impl::arith_visitor<charT, functor::multiply>(), value_, other.value_);
 121+ basic_datum<charT> result = boost::apply_visitor(datum_impl::arith_visitor<charT, std::multiplies>(), value_, other.value_);
150122 *this = result;
151123 return *this;
152124 }
@@ -154,8 +126,7 @@
155127 basic_datum<charT>&
156128 basic_datum<charT>::operator/=(basic_datum<charT> const &other)
157129 {
158 - basic_datum<charT> result = boost::apply_visitor(
159 - datum_impl::arith_visitor<charT, functor::divide>(), value_, other.value_);
 130+ basic_datum<charT> result = boost::apply_visitor(datum_impl::arith_visitor<charT, std::divides>(), value_, other.value_);
160131 *this = result;
161132 return *this;
162133 }
@@ -165,7 +136,7 @@
166137 basic_datum<charT>::operator%=(basic_datum<charT> const &other)
167138 {
168139 basic_datum<charT> result = boost::apply_visitor(
169 - datum_impl::arith_visitor<charT, functor::modulus>(), value_, other.value_);
 140+ datum_impl::arith_visitor<charT, datum_impl::afpmodulus>(), value_, other.value_);
170141 *this = result;
171142 return *this;
172143 }
@@ -181,7 +152,7 @@
182153 basic_datum<charT>
183154 basic_datum<charT>::operator-() const
184155 {
185 - return boost::apply_visitor(datum_impl::unary_arith_visitor<charT, functor::negate>(), value_);
 156+ return boost::apply_visitor(datum_impl::arith_visitor<charT, std::negate>(), value_);
186157 }
187158
188159 template<typename charT>
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/include/datum/visitors.h
@@ -12,11 +12,8 @@
1313 #ifndef DATUM_VISITORS_H
1414 #define DATUM_VISITORS_H
1515
16 -#include <boost/date_time/time_facet.hpp>
17 -
1816 #include "datum/create.h"
1917 #include "afstring.h"
20 -#include "functors.h"
2118
2219 namespace afp {
2320 namespace datum_impl {
@@ -82,18 +79,6 @@
8380 basic_fray<charT> operator() (T const &v) const {
8481 return u32lexical_cast<charT, basic_fray<charT> >(v);
8582 }
86 -
87 - basic_fray<charT> operator() (typename basic_datum<charT>::datetime_t const &v) const {
88 - using namespace boost::date_time;
89 - using namespace boost::posix_time;
90 - typedef boost::date_time::time_facet<typename basic_datum<charT>::datetime_t, char> facet_t;
91 - facet_t *output_facet = new facet_t();
92 - std::ostringstream strm;
93 - strm.imbue(std::locale(std::locale::classic(), output_facet));
94 - output_facet->format("%Y-%m-%d %H:%M:%S");
95 - strm << v;
96 - return make_u32fray(('"' + strm.str() + "\"d").c_str());
97 - }
9883 };
9984
10085 template<typename charT>
@@ -111,14 +96,6 @@
11297 return typename basic_datum<charT>::integer_t(o);
11398 }
11499
115 - typename basic_datum<charT>::integer_t operator() (typename basic_datum<charT>::datetime_t const &) const {
116 - throw type_error("cannot coerce types: datetime_t not compatible with integer_t");
117 - }
118 -
119 - typename basic_datum<charT>::integer_t operator() (typename basic_datum<charT>::interval_t const &) const {
120 - throw type_error("cannot coerce types: interval_t not compatible with integer_t");
121 - }
122 -
123100 template<typename T>
124101 typename basic_datum<charT>::integer_t operator() (T const &v) const {
125102 return v;
@@ -139,14 +116,6 @@
140117 return v.get_si();
141118 }
142119
143 - typename basic_datum<charT>::float_t operator() (typename basic_datum<charT>::datetime_t const &) const {
144 - throw type_error("cannot coerce types: datetime_t not compatible with float_t");
145 - }
146 -
147 - typename basic_datum<charT>::float_t operator() (typename basic_datum<charT>::interval_t const &) const {
148 - throw type_error("cannot coerce types: interval_t not compatible with float_t");
149 - }
150 -
151120 template<typename T>
152121 typename basic_datum<charT>::float_t operator() (T const &v) const {
153122 return v;
@@ -157,11 +126,8 @@
158127 * A visitor that performs an arithmetic operation on its arguments,
159128 * after doing appropriate int->double promotion.
160129 */
161 -template<typename charT, template<typename V, typename W> class Operator>
 130+template<typename charT, template<typename V> class Operator>
162131 struct arith_visitor : boost::static_visitor<basic_datum<charT> > {
163 - typedef typename basic_datum<charT>::datetime_t datetime_t;
164 - typedef typename basic_datum<charT>::interval_t interval_t;
165 -
166132 /*
167133 * Anything involving a double returns a double.
168134 * Otherwise, int is returned.
@@ -170,27 +136,27 @@
171137 basic_datum<charT> operator() (T const &a, U const &b) const {
172138 typedef typename from_string_converter<charT, T>::type a_type;
173139 typedef typename from_string_converter<charT, U>::type b_type;
174 - //typedef typename preferred_type<a_type, b_type>::type preferred_type;
175 - typedef typename functor::return_type<a_type, b_type>::type return_type;
 140+ typedef typename preferred_type<a_type, b_type>::type preferred_type;
176141
177 - Operator<a_type, b_type> op;
178 - return create_datum<charT, return_type>::create(op(
 142+ Operator<preferred_type> op;
 143+ return create_datum<charT, preferred_type>::create(op(
179144 from_string_converter<charT, T>::convert(a),
180145 from_string_converter<charT, U>::convert(b)));
181146 }
182 -};
183147
184 -template<typename charT, template<typename V> class Operator>
185 -struct unary_arith_visitor : boost::static_visitor<basic_datum<charT> > {
 148+ /*
 149+ * Unary version.
 150+ */
186151 template<typename T>
187152 basic_datum<charT> operator() (T const &a) const {
188153 typedef typename from_string_converter<charT, T>::type a_type;
189154 typedef typename preferred_type<a_type, a_type>::type preferred_type;
190155
191 - Operator<a_type> op;
 156+ Operator<preferred_type> op;
192157 return create_datum<charT, preferred_type>::create(
193158 op(from_string_converter<charT, T>::convert(a)));
194159 }
 160+
195161 };
196162
197163 /*
@@ -198,7 +164,7 @@
199165 */
200166 template<
201167 typename charT,
202 - template<typename V, typename W> class Operator,
 168+ template<typename V> class Operator,
203169 typename T,
204170 typename U>
205171 struct compare_visitor_impl {
@@ -207,7 +173,7 @@
208174 typedef typename from_string_converter<charT, U>::type b_type;
209175 typedef typename preferred_type<a_type, b_type>::type preferred_type;
210176
211 - Operator<a_type, b_type> op;
 177+ Operator<preferred_type> op;
212178 return op(
213179 from_string_converter<charT, T>::convert(a),
214180 from_string_converter<charT, U>::convert(b));
@@ -217,7 +183,7 @@
218184 /*
219185 * Specialise for string<>string comparisons
220186 */
221 -template<typename charT, template<typename V, typename W> class Operator>
 187+template<typename charT, template<typename V> class Operator>
222188 struct compare_visitor_impl<
223189 charT,
224190 Operator,
@@ -226,12 +192,12 @@
227193 > : boost::static_visitor<bool> {
228194
229195 bool operator() (basic_fray<charT> const &a, basic_fray<charT> const &b) const {
230 - Operator<basic_fray<charT>, basic_fray<charT> > op;
 196+ Operator<basic_fray<charT> > op;
231197 return op(a, b);
232198 }
233199 };
234200
235 -template<typename charT, template<typename V, typename W> class Operator>
 201+template<typename charT, template<typename V> class Operator>
236202 struct compare_visitor : boost::static_visitor<bool> {
237203 template<typename T, typename U>
238204 bool operator() (T const &a, U const &b) const {
@@ -242,15 +208,15 @@
243209 /*
244210 * For comparisons that only work on integers - strings will be converted.
245211 */
246 -template<typename charT, template<typename V, typename W> class Operator>
 212+template<typename charT, template<typename V> class Operator>
247213 struct arith_compare_visitor : boost::static_visitor<bool> {
248214 template<typename T, typename U>
249215 bool operator() (T const &a, U const &b) const {
250216 typedef typename from_string_converter<charT, T>::type a_type;
251217 typedef typename from_string_converter<charT, U>::type b_type;
252 - //typedef typename preferred_type<a_type, b_type>::type preferred_type;
 218+ typedef typename preferred_type<a_type, b_type>::type preferred_type;
253219
254 - Operator<a_type, b_type> op;
 220+ Operator<preferred_type> op;
255221 return op(
256222 from_string_converter<charT, T>::convert(a),
257223 from_string_converter<charT, U>::convert(b));
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/include/datum/create.h
@@ -38,27 +38,6 @@
3939 }
4040 };
4141
42 -template<typename charT>
43 -struct create_datum<charT, basic_fray<charT> > {
44 - static basic_datum<charT> create(basic_fray<charT> const &v) {
45 - return basic_datum<charT>::from_string(v);
46 - }
47 -};
48 -
49 -template<typename charT>
50 -struct create_datum<charT, typename basic_datum<charT>::datetime_t> {
51 - static basic_datum<charT> create(typename basic_datum<charT>::datetime_t const &v) {
52 - return basic_datum<charT>::from_date(v);
53 - }
54 -};
55 -
56 -template<typename charT>
57 -struct create_datum<charT, typename basic_datum<charT>::interval_t> {
58 - static basic_datum<charT> create(typename basic_datum<charT>::interval_t const &v) {
59 - return basic_datum<charT>::from_interval(v);
60 - }
61 -};
62 -
6342 }
6443
6544 #endif /* !DATUM_CREATE_H */
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/include/datum/conversion.h
@@ -55,20 +55,6 @@
5656 }
5757
5858 template<typename charT>
59 -basic_datum<charT>
60 -basic_datum<charT>::from_date(typename basic_datum<charT>::datetime_t const &v)
61 -{
62 - return basic_datum<charT>(v);
63 -}
64 -
65 -template<typename charT>
66 -basic_datum<charT>
67 -basic_datum<charT>::from_interval(typename basic_datum<charT>::interval_t const &v)
68 -{
69 - return basic_datum<charT>(v);
70 -}
71 -
72 -template<typename charT>
7359 typename basic_datum<charT>::string_t
7460 basic_datum<charT>::toString() const {
7561 return boost::apply_visitor(datum_impl::to_string_visitor<charT>(), value_);
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/parserdefs.h
@@ -0,0 +1,48 @@
 2+/*
 3+ * Copyright (c) 2008 Andrew Garrett.
 4+ * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 5+ * Derived from public domain code contributed by Victor Vasiliev.
 6+ *
 7+ * Permission is granted to anyone to use this software for any purpose,
 8+ * including commercial applications, and to alter it and redistribute it
 9+ * freely. This software is provided 'as-is', without any express or
 10+ * implied warranty.
 11+ */
 12+
 13+#ifndef PARSERDEFS_H
 14+#define PARSERDEFS_H
 15+
 16+#define pid_value 1
 17+#define pid_variable 2
 18+#define pid_basic 3
 19+#define pid_bool_expr 4
 20+#define pid_ord_expr 5
 21+#define pid_eq_expr 6
 22+#define pid_pow_expr 7
 23+#define pid_mult_expr 8
 24+#define pid_plus_expr 9
 25+#define pid_in_expr 10
 26+#define pid_function 12
 27+#define pid_tern_expr 13
 28+#define pid_string 14
 29+
 30+namespace afp {
 31+
 32+struct parse_error : std::exception {
 33+ parse_error(std::string const &what)
 34+ : what_(what)
 35+ {}
 36+
 37+ ~parse_error() throw() {}
 38+
 39+ char const *what() const throw() {
 40+ return what_.c_str();
 41+ }
 42+
 43+private:
 44+ std::string what_;
 45+};
 46+
 47+} // namespace afp
 48+
 49+#endif /* !PARSERDEFS_H */
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/fray.cc
@@ -0,0 +1,716 @@
 2+/* Copyright (C) 2006-2008 River Tarnell <river@wikimedia.org>. */
 3+/*
 4+ * Permission is granted to anyone to use this software for any purpose,
 5+ * including commercial applications, and to alter it and redistribute it
 6+ * freely. This software is provided 'as-is', without any express or implied
 7+ * warranty.
 8+ */
 9+
 10+/* @(#) $Id$ */
 11+
 12+#include <cassert>
 13+#include <boost/functional/hash/hash.hpp>
 14+#include "fray.h"
 15+
 16+template<typename ch, typename tr, typename alloc>
 17+typename alloc::template rebind<fray_impl::fray_root<ch, tr, alloc> >::other basic_fray<ch, tr, alloc>::_alloc;
 18+template<typename ch, typename tr, typename alloc>
 19+typename basic_fray<ch, tr, alloc>::size_type const basic_fray<ch, tr, alloc>::npos;
 20+
 21+namespace fray_impl {
 22+
 23+template<typename ch, typename tr, typename alloc>
 24+alloc fray_root<ch, tr, alloc>::_alloc;
 25+
 26+template<typename ch, typename tr, typename alloc>
 27+fray_root<ch, tr, alloc>::fray_root(ch const *begin, size_type len)
 28+ : _refs(1)
 29+{
 30+ _string = _alloc.allocate(len + sizeof(ch));
 31+ tr::copy(_string, begin, len);
 32+ _string[len] = '\0';
 33+ _end = _string + len;
 34+}
 35+
 36+template<typename ch, typename tr, typename alloc>
 37+fray_root<ch, tr, alloc>::fray_root(size_type len)
 38+ : _refs(1)
 39+ , _string(0)
 40+ , _end(0)
 41+{
 42+ _string = _alloc.allocate(len + sizeof(ch));
 43+ _string[len] = '\0';
 44+ _end = _string + len;
 45+}
 46+
 47+template<typename ch, typename tr, typename alloc>
 48+fray_root<ch, tr, alloc>::~fray_root() {
 49+ assert(_refs == 0);
 50+ _alloc.deallocate(_string, (_end - _string) + sizeof(ch));
 51+}
 52+
 53+template<typename ch, typename tr, typename alloc>
 54+int
 55+fray_root<ch, tr, alloc>::ref(void)
 56+{
 57+ assert(_refs > 0);
 58+ return ++_refs;
 59+}
 60+
 61+template<typename ch, typename tr, typename alloc>
 62+int
 63+fray_root<ch, tr, alloc>::deref(void)
 64+{
 65+ assert(_refs > 0);
 66+ return --_refs;
 67+}
 68+
 69+} // namespace fray_impl
 70+
 71+
 72+template<typename ch, typename tr, typename alloc>
 73+bool
 74+operator!= (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b)
 75+{
 76+ return !(a == b);
 77+}
 78+
 79+template<typename ch, typename tr, typename alloc>
 80+bool
 81+operator> (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b)
 82+{
 83+ return !(a < b) && !(a == b);
 84+}
 85+
 86+template<typename ch, typename tr, typename alloc>
 87+bool
 88+operator<= (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b)
 89+{
 90+ return (a < b) || (a == b);
 91+}
 92+
 93+template<typename ch, typename tr, typename alloc>
 94+bool
 95+operator>= (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b)
 96+{
 97+ return (b < a) || (a == b);
 98+}
 99+
 100+template<typename ch, typename tr, typename alloc>
 101+basic_fray<ch, tr, alloc>::basic_fray()
 102+{
 103+ ch empty[1] = { 0 };
 104+ _root = _alloc.allocate(1);
 105+ _root = new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(empty, 0);
 106+ _begin = _root->_string;
 107+ _end = _root->_end;
 108+}
 109+
 110+template<typename ch, typename tr, typename alloc>
 111+basic_fray<ch, tr, alloc>::basic_fray(ch const *cstring, size_type len)
 112+{
 113+ _root = new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(cstring, len);
 114+ _begin = _root->_string;
 115+ _end = _root->_end;
 116+}
 117+
 118+template<typename ch, typename tr, typename alloc>
 119+basic_fray<ch, tr, alloc>::basic_fray(ch const *cstring)
 120+{
 121+int len = tr::length(cstring);
 122+ _root = new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(cstring, len);
 123+ _begin = _root->_string;
 124+ _end = _root->_end;
 125+}
 126+
 127+template<typename ch, typename tr, typename alloc>
 128+template<typename bstraits, typename bsalloc>
 129+basic_fray<ch, tr, alloc>::basic_fray(std::basic_string<ch, bstraits, bsalloc> const &s)
 130+{
 131+ _root = new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(s.data(), s.size());
 132+ _begin = _root->_string;
 133+ _end = _root->_end;
 134+}
 135+
 136+template<typename ch, typename tr, typename alloc>
 137+basic_fray<ch, tr, alloc>::basic_fray(
 138+ typename basic_fray<ch, tr, alloc>::size_type n,
 139+ ch c)
 140+{
 141+ _root = new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(n);
 142+ _begin = _root->_string;
 143+ _end = _root->_end;
 144+ tr::assign(_root->_string, n, c);
 145+}
 146+
 147+template<typename ch, typename tr, typename alloc>
 148+basic_fray<ch, tr, alloc>::basic_fray(basic_fray<ch> const &other)
 149+ : _root(other._root)
 150+ , _begin(other._begin)
 151+ , _end(other._end)
 152+{
 153+ _root->ref();
 154+}
 155+
 156+template<typename ch, typename tr, typename alloc>
 157+basic_fray<ch, tr, alloc>::basic_fray(fray_impl::fray_root<ch, tr, alloc> *root)
 158+ : _root(root)
 159+ , _begin(root->_string)
 160+ , _end(root->_end)
 161+{
 162+}
 163+
 164+template<typename ch, typename tr, typename alloc>
 165+template<typename InputIterator>
 166+basic_fray<ch, tr, alloc>::basic_fray(InputIterator first, InputIterator last)
 167+{
 168+ _root = new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(last - first);
 169+ _begin = _root->_string;
 170+ _end = _root->_end;
 171+
 172+ ch *s = _root->_string;
 173+ std::copy(first, last, s);
 174+}
 175+
 176+
 177+template<typename ch, typename tr, typename alloc>
 178+void
 179+basic_fray<ch, tr, alloc>::assign (basic_fray<ch, tr, alloc> const &other)
 180+{
 181+ _deref_root();
 182+ _root = other._root;
 183+ _root->ref();
 184+ _begin = other._begin;
 185+ _end = other._end;
 186+}
 187+
 188+template<typename ch, typename tr, typename alloc>
 189+basic_fray<ch, tr, alloc> &
 190+basic_fray<ch, tr, alloc>::operator= (basic_fray<ch, tr, alloc> const &other)
 191+{
 192+ if (this == &other)
 193+ return *this;
 194+ assign(other);
 195+ return *this;
 196+}
 197+
 198+template<typename ch, typename tr, typename alloc>
 199+template<typename bstraits, typename bsalloc>
 200+basic_fray<ch, tr, alloc> &
 201+basic_fray<ch, tr, alloc>::operator= (std::basic_string<ch, bstraits, bsalloc> const &other)
 202+{
 203+ assign(other);
 204+ return *this;
 205+}
 206+
 207+template<typename ch, typename tr, typename alloc>
 208+template<typename bstraits, typename bsalloc>
 209+void
 210+basic_fray<ch, tr, alloc>::assign (std::basic_string<ch, bstraits, bsalloc> const &s)
 211+{
 212+ _deref_root();
 213+ _root = new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(s.data(), s.size());
 214+ _begin = _root->_string;
 215+ _end = _root->_end;
 216+}
 217+
 218+template<typename ch, typename tr, typename alloc>
 219+basic_fray<ch, tr, alloc> &
 220+basic_fray<ch, tr, alloc>::operator= (ch const *cstring)
 221+{
 222+ assign(cstring);
 223+ return *this;
 224+}
 225+
 226+template<typename ch, typename tr, typename alloc>
 227+void
 228+basic_fray<ch, tr, alloc>::assign (ch const *cstring)
 229+{
 230+ assign(cstring, tr::length(cstring));
 231+}
 232+
 233+template<typename ch, typename tr, typename alloc>
 234+void
 235+basic_fray<ch, tr, alloc>::assign (ch const *cstring, size_type len)
 236+{
 237+ _deref_root();
 238+ _root = new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(cstring, len);
 239+ _begin = _root->_string;
 240+ _end = _root->_end;
 241+}
 242+
 243+template<typename ch, typename tr, typename alloc>
 244+void
 245+basic_fray<ch, tr, alloc>::assign(
 246+ typename basic_fray<ch, tr, alloc>::iterator begin,
 247+ typename basic_fray<ch, tr, alloc>::iterator end)
 248+{
 249+ assign(begin._pos, end._pos - begin._pos);
 250+}
 251+
 252+template<typename ch, typename tr, typename alloc>
 253+basic_fray<ch, tr, alloc>
 254+basic_fray<ch, tr, alloc>::substr(
 255+ typename basic_fray<ch, tr, alloc>::size_type off,
 256+ typename basic_fray<ch, tr, alloc>::size_type count) const
 257+{
 258+ if ((count == npos) || count + off > length())
 259+ count = length() - off;
 260+ return basic_fray(_begin + off, count);
 261+}
 262+
 263+template<typename ch, typename tr, typename alloc>
 264+basic_fray<ch, tr, alloc>::~basic_fray() {
 265+ _deref_root();
 266+}
 267+
 268+template<typename ch, typename tr, typename alloc>
 269+template<typename ostr>
 270+void
 271+basic_fray<ch, tr, alloc>::print(std::basic_ostream<ch, ostr> &strm) const {
 272+ strm << std::basic_string<ch, tr>(_begin, _end);
 273+}
 274+
 275+template<typename ch, typename tr, typename alloc>
 276+typename basic_fray<ch, tr, alloc>::size_type
 277+basic_fray<ch, tr, alloc>::length(void) const {
 278+ return _end - _begin;
 279+}
 280+
 281+template<typename ch, typename tr, typename alloc>
 282+typename basic_fray<ch, tr, alloc>::size_type
 283+basic_fray<ch, tr, alloc>::size(void) const {
 284+ return length();
 285+}
 286+
 287+template<typename ch, typename tr, typename alloc>
 288+typename basic_fray<ch, tr, alloc>::iterator
 289+basic_fray<ch, tr, alloc>::begin(void) const {
 290+ return iterator(_begin);
 291+}
 292+
 293+template<typename ch, typename tr, typename alloc>
 294+typename basic_fray<ch, tr, alloc>::iterator
 295+basic_fray<ch, tr, alloc>::end(void) const {
 296+ return iterator(_end);
 297+}
 298+
 299+template<typename ch, typename tr, typename alloc>
 300+typename basic_fray<ch, tr, alloc>::size_type
 301+basic_fray<ch, tr, alloc>::find(ch c, typename basic_fray<ch, tr, alloc>::size_type pos) const
 302+{
 303+ch const *found, *b = _begin + pos;
 304+ found = tr::find(b, _end - b, c);
 305+ if (found == NULL)
 306+ return npos;
 307+
 308+ return found - _begin;
 309+}
 310+
 311+template<typename ch, typename tr, typename alloc>
 312+typename basic_fray<ch, tr, alloc>::size_type
 313+basic_fray<ch, tr, alloc>::find(
 314+ basic_fray<ch, tr, alloc> const &s,
 315+ typename basic_fray<ch, tr, alloc>::size_type pos) const
 316+{
 317+ch const *found, *b = _begin + pos;
 318+ found = std::search(b, _end, s.begin(), s.end());
 319+ if (found == _end)
 320+ return npos;
 321+
 322+ return found - _begin;
 323+}
 324+
 325+template<typename ch, typename tr, typename alloc>
 326+ch const *
 327+basic_fray<ch, tr, alloc>::c_str(void) const
 328+{
 329+ /*
 330+ * If this fray ends with the end of the root, c_str simply returns _begin,
 331+ * because the fray root is always nul terminated. Otherwise, we
 332+ * re-root this fray to a root containing only the contents of this
 333+ * fray.
 334+ *
 335+ * This design saves copying in the common case of c_str() on a full
 336+ * fray, and the degenerate case requires copying with any implementation,
 337+ * because the nul terminator has to be inserted somewhere.
 338+ */
 339+ if (_end == _root->_end)
 340+ return _begin;
 341+
 342+fray_impl::fray_root<ch, tr, alloc> *newroot
 343+ = new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(_begin, length());
 344+ _deref_root();
 345+ _root = newroot;
 346+ return _begin;
 347+}
 348+
 349+template<typename ch, typename tr, typename alloc>
 350+std::basic_string<ch, tr, alloc>
 351+basic_fray<ch, tr, alloc>::str(void) const
 352+{
 353+ return std::basic_string<ch, tr, alloc>(_begin, _end);
 354+}
 355+
 356+template<typename ch, typename tr, typename alloc>
 357+ch const *
 358+basic_fray<ch, tr, alloc>::data(void) const
 359+{
 360+ return _begin;
 361+}
 362+
 363+template<typename ch, typename tr, typename alloc>
 364+ch
 365+basic_fray<ch, tr, alloc>::operator[] (typename basic_fray<ch, tr, alloc>::size_type n) const
 366+{
 367+ return *(begin() + n);
 368+}
 369+
 370+template<typename ch, typename tr, typename alloc>
 371+bool
 372+basic_fray<ch, tr, alloc>::empty(void) const
 373+{
 374+ return length() == 0;
 375+}
 376+
 377+template<typename ch, typename tr, typename alloc>
 378+basic_fray<ch, tr, alloc>
 379+basic_fray<ch, tr, alloc>::append(basic_fray<ch, tr, alloc> const &other) const
 380+{
 381+ return append(other._begin, other._end);
 382+}
 383+
 384+template<typename ch, typename tr, typename alloc>
 385+basic_fray<ch, tr, alloc>
 386+basic_fray<ch, tr, alloc>::append(ch const *cstring) const
 387+{
 388+ return append(cstring, cstring + tr::length(cstring));
 389+}
 390+
 391+template<typename ch, typename tr, typename alloc>
 392+basic_fray<ch, tr, alloc>
 393+basic_fray<ch, tr, alloc>::append(ch c) const
 394+{
 395+ch s[2] = {c, 0};
 396+ return append(s, s + 1);
 397+}
 398+
 399+template<typename ch, typename tr, typename alloc>
 400+basic_fray<ch, tr, alloc>
 401+basic_fray<ch, tr, alloc>::append(ch const *b, ch const *e) const
 402+{
 403+size_type alen = (e - b), newlen = length() + alen;
 404+fray_impl::fray_root<ch, tr, alloc> *newroot =
 405+ new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(newlen);
 406+
 407+ tr::copy(newroot->_string, _begin, length());
 408+ tr::copy(newroot->_string + length(), b, alen);
 409+ newroot->_end = newroot->_string + newlen;
 410+ return basic_fray<ch, tr, alloc>(newroot);
 411+}
 412+
 413+template<typename ch, typename tr, typename alloc>
 414+basic_fray<ch, tr, alloc>
 415+basic_fray<ch, tr, alloc>::prepend(basic_fray<ch, tr, alloc> const &other) const
 416+{
 417+ return prepend(other._begin, other._end);
 418+}
 419+
 420+template<typename ch, typename tr, typename alloc>
 421+basic_fray<ch, tr, alloc>
 422+basic_fray<ch, tr, alloc>::prepend(ch const *cstring) const
 423+{
 424+ return prepend(cstring, cstring + tr::length(cstring));
 425+}
 426+
 427+template<typename ch, typename tr, typename alloc>
 428+basic_fray<ch, tr, alloc>
 429+basic_fray<ch, tr, alloc>::prepend(ch c) const
 430+{
 431+ch s[2] = {c, 0};
 432+ return prepend(s, s + 1);
 433+}
 434+
 435+template<typename ch, typename tr, typename alloc>
 436+basic_fray<ch, tr, alloc>
 437+basic_fray<ch, tr, alloc>::prepend(ch const *b, ch const *e) const
 438+{
 439+size_type alen = (e - b), newlen = length() + alen;
 440+fray_impl::fray_root<ch, tr, alloc> *newroot =
 441+ new (_alloc.allocate(1)) fray_impl::fray_root<ch, tr, alloc>(newlen);
 442+
 443+ tr::copy(newroot->_string, b, alen);
 444+ tr::copy(newroot->_string + alen, _begin, length());
 445+ newroot->_end = newroot->_string + newlen;
 446+ return basic_fray<ch, tr, alloc>(newroot);
 447+}
 448+
 449+template<typename ch, typename tr, typename alloc>
 450+void
 451+basic_fray<ch, tr, alloc>::swap(basic_fray<ch, tr, alloc> &other)
 452+{
 453+ std::swap(_root, other._root);
 454+ std::swap(_begin, other._begin);
 455+ std::swap(_end, other._end);
 456+}
 457+
 458+template<typename ch, typename tr, typename alloc>
 459+void
 460+basic_fray<ch, tr, alloc>::_deref_root(void) const
 461+{
 462+ if (!_root)
 463+ return;
 464+
 465+ if (_root->deref() == 0) {
 466+ _alloc.destroy(_root);
 467+ _alloc.deallocate(_root, 1);
 468+ }
 469+}
 470+
 471+template<typename ch, typename tr, typename alloc>
 472+template<typename tr_>
 473+int
 474+basic_fray<ch, tr, alloc>::compare(basic_fray<ch, tr, alloc> const &other) const
 475+{
 476+int i, alen = length(), blen = other.length();
 477+ i = tr_::compare(_begin, other._begin, std::min(alen, blen));
 478+ if (i == 0)
 479+ return alen - blen; /* shorter string is lesser */
 480+
 481+ return i;
 482+}
 483+
 484+template<typename ch, typename tr, typename alloc, typename ostr>
 485+std::basic_ostream<ch, ostr> &
 486+operator<< (std::basic_ostream<ch, ostr> &strm, basic_fray<ch, tr, alloc> const &s)
 487+{
 488+ s.print(strm);
 489+ return strm;
 490+}
 491+
 492+template<typename ch, typename tr, typename alloc, typename ostr>
 493+std::basic_istream<ch, ostr> &
 494+operator>> (std::basic_istream<ch, ostr> &strm, basic_fray<ch, tr, alloc> &s)
 495+{
 496+ std::basic_string<ch, tr> st;
 497+ strm >> st;
 498+ if (strm)
 499+ s = st;
 500+ return strm;
 501+}
 502+
 503+template<typename ch, typename tr, typename alloc>
 504+basic_fray<ch, tr, alloc>
 505+operator+ (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b)
 506+{
 507+ return a.append(b);
 508+}
 509+
 510+template<typename ch, typename tr, typename alloc, typename tr_, typename alloc_>
 511+basic_fray<ch, tr, alloc>
 512+operator+ (basic_fray<ch, tr, alloc> const &a, std::basic_string<ch, tr_, alloc_> const &b)
 513+{
 514+ return a.append(b.data(), b.data() + b.size());
 515+}
 516+
 517+template<typename ch, typename tr, typename alloc, typename tr_, typename alloc_>
 518+basic_fray<ch, tr, alloc>
 519+operator+ (std::basic_string<ch, tr_, alloc_> const &a, basic_fray<ch, tr, alloc> const &b)
 520+{
 521+ return b.prepend(a.begin(), a.end());
 522+}
 523+
 524+template<typename ch, typename tr, typename alloc>
 525+basic_fray<ch, tr, alloc>
 526+operator+ (basic_fray<ch, tr, alloc> const &a, ch const *cstring)
 527+{
 528+ return a.append(cstring);
 529+}
 530+
 531+template<typename ch, typename tr, typename alloc>
 532+basic_fray<ch, tr, alloc>
 533+operator+ (ch const *cstring, basic_fray<ch, tr, alloc> const &a)
 534+{
 535+ return a.prepend(cstring);
 536+}
 537+
 538+template<typename ch, typename tr, typename alloc>
 539+basic_fray<ch, tr, alloc>
 540+operator+ (basic_fray<ch, tr, alloc> const &s, ch c)
 541+{
 542+ return s.append(c);
 543+}
 544+
 545+template<typename ch, typename tr, typename alloc>
 546+basic_fray<ch, tr, alloc>
 547+operator+ (ch c, basic_fray<ch, tr, alloc> const &s)
 548+{
 549+ return s.prepend(c);
 550+}
 551+
 552+namespace std {
 553+ template<typename ch, typename tr, typename alloc>
 554+ void swap(basic_fray<ch, tr, alloc> &a, basic_fray<ch, tr, alloc> &b)
 555+ {
 556+ a.swap(b);
 557+ }
 558+}
 559+
 560+template<typename traits, typename ch, typename tr, typename alloc>
 561+int trcompare(basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b) {
 562+ return a.template compare<traits>(b);
 563+}
 564+
 565+template<typename traits, typename ch, typename tr, typename alloc>
 566+int trcompare(basic_fray<ch, tr, alloc> const &a, ch const *b) {
 567+ return a.template compare<traits>(basic_fray<ch, tr, alloc>(b));
 568+}
 569+
 570+template<typename traits, typename ch, typename tr, typename alloc>
 571+int trcompare(ch const *a, basic_fray<ch, tr, alloc> const &b) {
 572+ return -b.template compare<traits>(basic_fray<ch, tr, alloc>(a));
 573+}
 574+
 575+template<typename traits, typename ch>
 576+int trcompare(ch const *a, ch const *b)
 577+{
 578+int i, alen = traits::length(a), blen = traits::length(b);
 579+ i = traits::compare(a, b, std::min(alen, blen));
 580+ if (i == 0)
 581+ return alen - blen; /* shorter string is lesser */
 582+
 583+ return i;
 584+}
 585+
 586+template<typename ch, typename tr, typename alloc>
 587+int compare(basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b) {
 588+ return trcompare<tr>(a, b);
 589+}
 590+
 591+template<typename ch, typename tr, typename alloc>
 592+int compare(basic_fray<ch, tr, alloc> const &a, ch const *b) {
 593+ return trcompare<tr>(a, b);
 594+}
 595+
 596+template<typename ch, typename tr, typename alloc>
 597+int compare(ch const *a, basic_fray<ch, tr, alloc> const &b) {
 598+ return trcompare<tr>(a, b);
 599+}
 600+
 601+template<typename ch>
 602+int compare(ch const *a, ch const *b) {
 603+ return trcompare<std::char_traits<ch> >(a, b);
 604+}
 605+
 606+template<typename ch, typename tr, typename alloc>
 607+bool operator< (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b) {
 608+ return compare(a, b) < 0;
 609+}
 610+
 611+template<typename ch, typename tr, typename alloc>
 612+bool operator< (basic_fray<ch, tr, alloc> const &a, ch const *b) {
 613+ return compare(a, b) < 0;
 614+}
 615+
 616+template<typename ch, typename tr, typename alloc>
 617+bool operator< (ch const *a, basic_fray<ch, tr, alloc> const &b) {
 618+ return compare(a, b) < 0;
 619+}
 620+
 621+template<typename ch, typename tr, typename alloc>
 622+bool
 623+operator== (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b)
 624+{
 625+ return compare(a, b) == 0;
 626+}
 627+
 628+template<typename ch, typename tr, typename alloc>
 629+bool operator== (basic_fray<ch, tr, alloc> const &a, ch const *b) {
 630+ return compare(a, b) == 0;
 631+}
 632+
 633+template<typename ch, typename tr, typename alloc>
 634+bool operator== (ch const *a, basic_fray<ch, tr, alloc> const &b) {
 635+ return compare(a, b) == 0;
 636+}
 637+
 638+template<typename ch, typename tr, typename alloc>
 639+bool operator!= (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b) {
 640+ return !(a == b);
 641+}
 642+
 643+template<typename ch, typename tr, typename alloc>
 644+bool operator!= (ch const *a, basic_fray<ch, tr, alloc> const &b) {
 645+ return !(a == b);
 646+}
 647+
 648+template<typename ch, typename tr, typename alloc>
 649+bool operator!= (basic_fray<ch, tr, alloc> const &a, ch const *b) {
 650+ return !(a == b);
 651+}
 652+
 653+template<typename ch, typename tr, typename alloc>
 654+bool operator> (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b) {
 655+ return compare(a, b) > 0;
 656+}
 657+
 658+template<typename ch, typename tr, typename alloc>
 659+bool operator> (basic_fray<ch, tr, alloc> const &a, ch const *b) {
 660+ return compare(a, b) > 0;
 661+}
 662+
 663+template<typename ch, typename tr, typename alloc>
 664+bool operator> (ch const *a, basic_fray<ch, tr, alloc> const &b) {
 665+ return compare(a, b) > 0;
 666+}
 667+
 668+template<typename ch, typename tr, typename alloc>
 669+bool operator<= (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b) {
 670+ return !(a > b);
 671+}
 672+
 673+template<typename ch, typename tr, typename alloc>
 674+bool operator<= (basic_fray<ch, tr, alloc> const &a, ch const *b) {
 675+ return !(a > b);
 676+}
 677+
 678+template<typename ch, typename tr, typename alloc>
 679+bool operator<= (ch const *a, basic_fray<ch, tr, alloc> const &b) {
 680+ return !(a > b);
 681+}
 682+
 683+template<typename ch, typename tr, typename alloc>
 684+bool operator>= (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b) {
 685+ return !(a < b);
 686+}
 687+
 688+template<typename ch, typename tr, typename alloc>
 689+bool operator>= (basic_fray<ch, tr, alloc> const &a, ch const *b) {
 690+ return !(a < b);
 691+}
 692+
 693+template<typename ch, typename tr, typename alloc>
 694+bool operator>= (ch const *a, basic_fray<ch, tr, alloc> const &b) {
 695+ return !(a < b);
 696+}
 697+
 698+/*
 699+ * For boost.hash.
 700+ */
 701+template<typename ch, typename tr, typename alloc>
 702+std::size_t
 703+hash_value(basic_fray<ch, tr, alloc> const &s)
 704+{
 705+ return boost::hash_range(s.begin(), s.end());
 706+}
 707+
 708+template<typename ch, typename tr, typename alloc>
 709+std::istream &
 710+getline(std::basic_istream<ch, tr> &strm, basic_fray<ch, tr, alloc> &s)
 711+{
 712+std::basic_string<ch, tr, alloc> str;
 713+ getline(strm, str);
 714+ if (strm)
 715+ s = str;
 716+ return strm;
 717+}
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/fray.h
@@ -0,0 +1,558 @@
 2+/* fray: a refcounted string with cheap substrings */
 3+/* Copyright (C) 2006-2008 River Tarnell <river@wikimedia.org>. */
 4+/*
 5+ * Permission is granted to anyone to use this software for any purpose,
 6+ * including commercial applications, and to alter it and redistribute it
 7+ * freely. This software is provided 'as-is', without any express or implied
 8+ * warranty.
 9+ */
 10+
 11+/* $Id$ */
 12+
 13+#ifndef FRAY_H
 14+#define FRAY_H
 15+
 16+#include <cassert>
 17+#include <boost/functional/hash/hash.hpp>
 18+#include <boost/pool/pool_alloc.hpp>
 19+
 20+/*
 21+ * A fray is a refcounted immutable string providing copy-free (constant time)
 22+ * substrings. Its interface is the same as std::string where possible;
 23+ * operations which are not possible (e.g. non-const operator[]) are not provided.
 24+ *
 25+ * Although a fray is immutable, a fray object can be rebound to a different
 26+ * fray. This is not valid:
 27+ *
 28+ * fray f("test");
 29+ * f[0] = 'g';
 30+ *
 31+ * But this is:
 32+ *
 33+ * fray f("test"), g("foo");
 34+ * f = g;
 35+ *
 36+ * This has the effect of releasing the string held by 'f' and causing it to
 37+ * refer to g instead.
 38+ *
 39+ * Some mutating std::string functions are provided, such as append(); these
 40+ * do NOT modify the fray, but instead return a new fray:
 41+ *
 42+ * fray foo("foo"), bar("bar");
 43+ * fray foobar = foo.append(bar);
 44+ *
 45+ * Crude benchmarking suggests that passing a fray by value has no
 46+ * noticable speed penalty compared to passing by const reference.
 47+ */
 48+
 49+template<typename ch, typename tr, typename alloc> struct basic_fray;
 50+template<typename ch, typename tr, typename alloc> struct fray_iterator;
 51+
 52+namespace fray_impl {
 53+
 54+/*
 55+ * A fray is simply a pointer to the fray_root, which stores the original
 56+ * string. A fray_root is created by a fray constructed from some other
 57+ * string (e.g. an std::string). The fray_root stores a refcount so the user
 58+ * can delete it when all other users are finished.
 59+ *
 60+ * The string in the fray_root is always nul-terminated, to support c_str()
 61+ * in constant time when the fray refers to the entire root.
 62+ *
 63+ * A fray_root starts with a refcount of 1 (representing the string which
 64+ * created it).
 65+ */
 66+template<typename ch, typename tr, typename allocator>
 67+struct fray_root {
 68+ typedef typename allocator::size_type size_type;
 69+
 70+ fray_root(ch const *begin, size_type len);
 71+ fray_root(size_type len);
 72+
 73+ ~fray_root();
 74+
 75+ int ref(void);
 76+ int deref(void);
 77+
 78+ int _refs;
 79+ ch *_string, *_end;
 80+
 81+ static allocator _alloc;
 82+};
 83+
 84+} // namespace fray_impl
 85+
 86+/*
 87+ * A fray iterator. Also the const_iterator, since frays are immutable.
 88+ */
 89+template<typename ch,
 90+ typename traits = std::char_traits<ch>,
 91+ typename alloc = boost::pool_allocator<ch>
 92+>
 93+struct fray_iterator {
 94+ typedef typename alloc::size_type size_type;
 95+ typedef typename alloc::difference_type difference_type;
 96+ typedef ch value_type;
 97+ typedef ch const &reference;
 98+ typedef ch const &const_reference;
 99+ typedef ch const *pointer;
 100+ typedef ch const *const_pointer;
 101+ typedef std::random_access_iterator_tag iterator_category;
 102+
 103+ fray_iterator()
 104+ : _pos(NULL)
 105+ {
 106+ }
 107+
 108+ fray_iterator(ch const *pos)
 109+ : _pos(pos)
 110+ {
 111+ }
 112+
 113+ const_reference operator* (void) const {
 114+ return *_pos;
 115+ }
 116+
 117+ fray_iterator &operator++ (void) {
 118+ ++_pos;
 119+ return *this;
 120+ }
 121+
 122+ fray_iterator operator++ (int) {
 123+ fray_iterator ret(*this);
 124+ ++_pos;
 125+ return ret;
 126+ }
 127+
 128+ fray_iterator &operator-- (void) {
 129+ --_pos;
 130+ return *this;
 131+ }
 132+
 133+ fray_iterator operator-- (int) {
 134+ fray_iterator ret(*this);
 135+ --_pos;
 136+ return ret;
 137+ }
 138+
 139+ bool operator< (fray_iterator const &other) const {
 140+ return _pos < other._pos;
 141+ }
 142+
 143+ bool operator== (fray_iterator const &other) const {
 144+ return _pos == other._pos;
 145+ }
 146+
 147+ difference_type operator- (fray_iterator const &other) const {
 148+ return _pos - other._pos;
 149+ }
 150+
 151+ fray_iterator &operator-= (size_type const &n) {
 152+ _pos -= n;
 153+ return *this;
 154+ }
 155+
 156+ fray_iterator operator- (size_type const &n) {
 157+ return fray_iterator(*this) -= n;
 158+ }
 159+
 160+ fray_iterator &operator+= (size_type const &n) {
 161+ _pos += n;
 162+ return *this;
 163+ }
 164+
 165+ fray_iterator operator+ (size_type const &n) const {
 166+ return fray_iterator(*this) += n;
 167+ }
 168+
 169+private:
 170+ ch const *_pos;
 171+
 172+ template<typename ch_, typename tr_, typename alloc_>
 173+ fray_iterator(fray_iterator<ch_, tr_, alloc_> const &); // no impl
 174+
 175+ friend struct basic_fray<ch, traits, alloc>;
 176+};
 177+
 178+template<typename ch, typename tr, typename alloc>
 179+bool operator!= (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b);
 180+template<typename ch, typename tr, typename alloc>
 181+bool operator> (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b);
 182+template<typename ch, typename tr, typename alloc>
 183+bool operator<= (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b);
 184+template<typename ch, typename tr, typename alloc>
 185+bool operator>= (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b);
 186+
 187+template<typename ch, typename tr, typename alloc>
 188+bool operator== (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b);
 189+
 190+template<typename ch, typename tr, typename alloc>
 191+bool operator!= (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b);
 192+
 193+template<typename ch, typename tr, typename alloc>
 194+bool operator> (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b);
 195+
 196+template<typename ch, typename tr, typename alloc>
 197+bool operator<= (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b);
 198+
 199+template<typename ch, typename tr, typename alloc>
 200+bool operator>= (fray_iterator<ch, tr, alloc> const &a, fray_iterator<ch, tr, alloc> const &b);
 201+
 202+/*
 203+ * A fray reference. _root holds the fray_root. _begin and _end mark the
 204+ * extent of this substring (must be within the bounds of the fray_root).
 205+ */
 206+template<typename ch,
 207+ typename traits = std::char_traits<ch>,
 208+ typename alloc = boost::pool_allocator<ch>
 209+>
 210+struct basic_fray {
 211+ typedef ch value_type;
 212+ typedef typename alloc::size_type size_type;
 213+ typedef ch const &reference;
 214+ typedef ch const &const_reference;
 215+ typedef fray_iterator<ch, traits, alloc> iterator;
 216+ typedef fray_iterator<ch, traits, alloc> const_iterator;
 217+
 218+ static size_type const npos = static_cast<size_type>(-1);
 219+
 220+ /*
 221+ * Create a new, empty fray.
 222+ */
 223+ basic_fray();
 224+
 225+ /*
 226+ * Create a new fray from the characters [cstring, cstring + len).
 227+ */
 228+ basic_fray(ch const *cstring, size_type len);
 229+
 230+ /*
 231+ * Create a new fray from the characters [cstring, cstring + strlen(cstring)).
 232+ */
 233+ basic_fray(ch const *cstring);
 234+
 235+ /*
 236+ * Create a new fray from the characters [s.begin(), s.end()).
 237+ */
 238+ template<typename traits_, typename alloc_>
 239+ basic_fray(std::basic_string<ch, traits_, alloc_> const &s);
 240+
 241+ /*
 242+ * Create a new fray from the iterator pair [first, last).
 243+ */
 244+ template<typename InputIterator>
 245+ basic_fray(InputIterator first, InputIterator last);
 246+
 247+ /*
 248+ * Create a new fray which holds a copy of other.
 249+ */
 250+ basic_fray(basic_fray<ch> const &other);
 251+
 252+ /*
 253+ * Create a new fray holding n copies of c.
 254+ */
 255+ basic_fray(size_type n, ch c);
 256+
 257+ /*
 258+ * Release the resources held by this fray.
 259+ */
 260+ ~basic_fray();
 261+
 262+ /*
 263+ * Replace the contents of this fray with a copy of other.
 264+ */
 265+ basic_fray &operator= (basic_fray<ch, traits, alloc> const &other);
 266+ void assign (basic_fray<ch, traits, alloc> const &other);
 267+
 268+ /*
 269+ * Replace the contents of this fray with [cstring, cstring + strlen(cstring)).
 270+ */
 271+ basic_fray &operator= (ch const *cstring);
 272+ void assign (ch const *cstring);
 273+ void assign (ch const *cstring, size_type len);
 274+
 275+ /*
 276+ * Replace the contents of this fray with [s.begin(), s.end()).
 277+ */
 278+ template<typename traits_, typename alloc_>
 279+ basic_fray &operator= (std::basic_string<ch, traits_, alloc_> const &s);
 280+ template<typename traits_, typename alloc_>
 281+ void assign (std::basic_string<ch, traits_, alloc_> const &s);
 282+
 283+ /*
 284+ * Replace the contents of this fray with [begin, end).
 285+ */
 286+ void assign(iterator begin, iterator end);
 287+
 288+ /*
 289+ * Return a fray holding the bytes [begin + off, begin + off + count).
 290+ * If the substring would extend past the end of the fray, the copy
 291+ * will extend until the end.
 292+ */
 293+ basic_fray substr(size_type off = 0, size_type count = npos) const;
 294+
 295+ /*
 296+ * Output the contents of this fray to the given stream.
 297+ */
 298+ template<typename ostr>
 299+ void print(std::basic_ostream<ch, ostr> &strm) const;
 300+
 301+ /*
 302+ * Return the number of characters in this fray.
 303+ */
 304+ size_type length(void) const;
 305+ size_type size(void) const;
 306+
 307+ /*
 308+ * Return an iterator referring to the beginning of this fray.
 309+ */
 310+ iterator begin(void) const;
 311+
 312+ /*
 313+ * Return an iterator referring to one character past the end of this
 314+ * fray.
 315+ */
 316+ iterator end(void) const;
 317+
 318+ /*
 319+ * Returns the position of the first occurance of 'ch' in *this
 320+ * not before 'pos'.
 321+ */
 322+ size_type find(ch c, size_type pos = 0) const;
 323+
 324+ /*
 325+ * Returns the position of the first occurance of the string
 326+ * 's' in *this not before 'pos'.
 327+ */
 328+ size_type find(basic_fray const &s, size_type pos = 0) const;
 329+
 330+ /*
 331+ * Equivalent to ::trcompare<tr_>(*this, a).
 332+ */
 333+ template<typename tr_>
 334+ int compare(basic_fray const &other) const;
 335+
 336+ /*
 337+ * Return a C string (nul terminated) with the same contents as
 338+ * this fray. Note: because frays are not nul-terminated internally,
 339+ * this *always* copies the contents. Avoid it if possible.
 340+ *
 341+ * data() is the same but does not copy and is not nul-terminated.
 342+ */
 343+ ch const *c_str(void) const;
 344+ ch const *data(void) const;
 345+
 346+ /*
 347+ * Return an std::basic_string with the same contents as this fray.
 348+ */
 349+ std::basic_string<ch, traits, alloc> str(void) const;
 350+
 351+ /*
 352+ * Return (length() == 0);
 353+ */
 354+ bool empty(void) const;
 355+
 356+ /*
 357+ * Return the character at position n.
 358+ */
 359+ ch operator[] (size_type) const;
 360+
 361+ /*
 362+ * Swap the contents of *this and other. Constant time.
 363+ */
 364+ void swap(basic_fray &other);
 365+
 366+ /*
 367+ * Return a new fray consisting of *this concatenated with other.
 368+ */
 369+ basic_fray append(basic_fray const &other) const;
 370+
 371+ /*
 372+ * Return append(cstring, cstring + traits::length(cstring)).
 373+ */
 374+ basic_fray append(ch const *cstring) const;
 375+
 376+ /*
 377+ * Return append(fray(begin, end));
 378+ */
 379+ basic_fray append(ch const *begin, ch const *end) const;
 380+
 381+ /*
 382+ * Return append(&c, 1);
 383+ */
 384+ basic_fray append(ch c) const;
 385+
 386+ /*
 387+ * Return a new fray consisting of other concatenated with *this.
 388+ */
 389+ basic_fray prepend(basic_fray const &other) const;
 390+
 391+ /*
 392+ * Return prepend(cstring, cstring + traits::length(cstring)).
 393+ */
 394+ basic_fray prepend(ch const *cstring) const;
 395+
 396+ /*
 397+ * Return prepend(fray(begin, end));
 398+ */
 399+ basic_fray prepend(ch const *begin, ch const *end) const;
 400+
 401+ /*
 402+ * Return prepend(&c, 1);
 403+ */
 404+ basic_fray prepend(ch c) const;
 405+
 406+private:
 407+ /*
 408+ * Construct a new fray from an already extant root.
 409+ */
 410+ basic_fray (fray_impl::fray_root<ch, traits, alloc> *);
 411+
 412+ /*
 413+ * Decrement the root's refcount and delete it if 0.
 414+ */
 415+ void _deref_root(void) const;
 416+
 417+ mutable fray_impl::fray_root<ch, traits, alloc> *_root;
 418+ ch const *_begin, *_end;
 419+ static typename alloc::template rebind<fray_impl::fray_root<ch, traits, alloc> >::other
 420+ _alloc;
 421+
 422+ template<typename ch_, typename traits_, typename alloc_>
 423+ basic_fray(basic_fray<ch_, traits_, alloc_> const &other); // no impl
 424+};
 425+
 426+template<typename ch, typename tr, typename alloc, typename ostr>
 427+std::basic_ostream<ch, ostr> &
 428+operator<< (std::basic_ostream<ch, ostr> &strm, basic_fray<ch, tr, alloc> const &s);
 429+
 430+template<typename ch, typename tr, typename alloc, typename ostr>
 431+std::basic_istream<ch, ostr> &
 432+operator>> (std::basic_istream<ch, ostr> &strm, basic_fray<ch, tr, alloc> &s);
 433+
 434+template<typename ch, typename tr, typename alloc>
 435+basic_fray<ch, tr, alloc>
 436+operator+ (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b);
 437+
 438+template<typename ch, typename tr, typename alloc, typename tr_, typename alloc_>
 439+basic_fray<ch, tr, alloc>
 440+operator+ (basic_fray<ch, tr, alloc> const &a, std::basic_string<ch, tr_, alloc_> const &b);
 441+
 442+template<typename ch, typename tr, typename alloc, typename tr_, typename alloc_>
 443+basic_fray<ch, tr, alloc>
 444+operator+ (std::basic_string<ch, tr_, alloc_> const &a, basic_fray<ch, tr, alloc> const &b);
 445+
 446+template<typename ch, typename tr, typename alloc>
 447+basic_fray<ch, tr, alloc>
 448+operator+ (basic_fray<ch, tr, alloc> const &a, ch const *cstring);
 449+
 450+template<typename ch, typename tr, typename alloc>
 451+basic_fray<ch, tr, alloc>
 452+operator+ (ch const *cstring, basic_fray<ch, tr, alloc> const &a);
 453+
 454+template<typename ch, typename tr, typename alloc>
 455+basic_fray<ch, tr, alloc>
 456+operator+ (basic_fray<ch, tr, alloc> const &s, ch c);
 457+
 458+template<typename ch, typename tr, typename alloc>
 459+basic_fray<ch, tr, alloc>
 460+operator+ (ch c, basic_fray<ch, tr, alloc> const &s);
 461+
 462+namespace std {
 463+ template<typename ch, typename tr, typename alloc>
 464+ void swap(basic_fray<ch, tr, alloc> &a, basic_fray<ch, tr, alloc> &b);
 465+}
 466+
 467+typedef basic_fray<char> fray;
 468+typedef basic_fray<wchar_t> wfray;
 469+
 470+template<typename traits, typename ch, typename tr, typename alloc>
 471+int trcompare(basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b);
 472+
 473+template<typename traits, typename ch, typename tr, typename alloc>
 474+int trcompare(basic_fray<ch, tr, alloc> const &a, ch const *b);
 475+
 476+template<typename traits, typename ch, typename tr, typename alloc>
 477+int trcompare(ch const *a, basic_fray<ch, tr, alloc> const &b);
 478+
 479+template<typename traits, typename ch>
 480+int trcompare(ch const *a, ch const *b);
 481+
 482+template<typename ch, typename tr, typename alloc>
 483+int compare(basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b);
 484+
 485+template<typename ch, typename tr, typename alloc>
 486+int compare(basic_fray<ch, tr, alloc> const &a, ch const *b);
 487+
 488+template<typename ch, typename tr, typename alloc>
 489+int compare(ch const *a, basic_fray<ch, tr, alloc> const &b);
 490+
 491+template<typename ch>
 492+int compare(ch const *a, ch const *b);
 493+
 494+template<typename ch, typename tr, typename alloc>
 495+bool operator< (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b);
 496+
 497+template<typename ch, typename tr, typename alloc>
 498+bool operator< (basic_fray<ch, tr, alloc> const &a, ch const *b);
 499+
 500+template<typename ch, typename tr, typename alloc>
 501+bool operator< (ch const *a, basic_fray<ch, tr, alloc> const &b);
 502+
 503+template<typename ch, typename tr, typename alloc>
 504+bool operator== (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b);
 505+
 506+template<typename ch, typename tr, typename alloc>
 507+bool operator== (basic_fray<ch, tr, alloc> const &a, ch const *b);
 508+
 509+template<typename ch, typename tr, typename alloc>
 510+bool operator== (ch const *a, basic_fray<ch, tr, alloc> const &b);
 511+
 512+template<typename ch, typename tr, typename alloc>
 513+bool operator!= (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b);
 514+
 515+template<typename ch, typename tr, typename alloc>
 516+bool operator!= (ch const *a, basic_fray<ch, tr, alloc> const &b);
 517+
 518+template<typename ch, typename tr, typename alloc>
 519+bool operator!= (basic_fray<ch, tr, alloc> const &a, ch const *b);
 520+
 521+template<typename ch, typename tr, typename alloc>
 522+bool operator> (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b);
 523+
 524+template<typename ch, typename tr, typename alloc>
 525+bool operator> (basic_fray<ch, tr, alloc> const &a, ch const *b);
 526+
 527+template<typename ch, typename tr, typename alloc>
 528+bool operator> (ch const *a, basic_fray<ch, tr, alloc> const &b);
 529+
 530+template<typename ch, typename tr, typename alloc>
 531+bool operator<= (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b);
 532+
 533+template<typename ch, typename tr, typename alloc>
 534+bool operator<= (basic_fray<ch, tr, alloc> const &a, ch const *b);
 535+
 536+template<typename ch, typename tr, typename alloc>
 537+bool operator<= (ch const *a, basic_fray<ch, tr, alloc> const &b);
 538+
 539+template<typename ch, typename tr, typename alloc>
 540+bool operator>= (basic_fray<ch, tr, alloc> const &a, basic_fray<ch, tr, alloc> const &b);
 541+
 542+template<typename ch, typename tr, typename alloc>
 543+bool operator>= (basic_fray<ch, tr, alloc> const &a, ch const *b);
 544+
 545+template<typename ch, typename tr, typename alloc>
 546+bool operator>= (ch const *a, basic_fray<ch, tr, alloc> const &b);
 547+
 548+/*
 549+ * For boost.hash.
 550+ */
 551+template<typename ch, typename tr, typename alloc>
 552+std::size_t hash_value(basic_fray<ch, tr, alloc> const &s);
 553+
 554+template<typename ch, typename tr, typename alloc>
 555+std::istream & getline(std::basic_istream<ch, tr> &strm, basic_fray<ch, tr, alloc> &s);
 556+
 557+#include "fray.cc"
 558+
 559+#endif /* !FRAY_H */
Index: branches/change-tagging/extensions/AbuseFilter/parser_native/evaluate.cpp
@@ -12,7 +12,7 @@
1313 #include "filter_evaluator.h"
1414
1515 int main(int argc, char** argv) {
16 - afp::filter_evaluator f;
 16+ afp::u32filter_evaluator f;
1717
1818 if (argc != 2) {
1919 std::cerr << "usage: " << argv[0] << " <filter>\n";

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r39167svn:eol-style nativeialex19:18, 11 August 2008

Status & tagging log