r39001 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r39000‎ | r39001 | r39002 >
Date:14:34, 9 August 2008
Author:river
Status:old
Tags:
Comment:
Unicode conversion, phase 2: parser should be templated on char type
Modified paths:
  • /trunk/extensions/AbuseFilter/parser_native/aftypes.cpp (deleted) (history)
  • /trunk/extensions/AbuseFilter/parser_native/makefile (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/parser.cpp (deleted) (history)
  • /trunk/extensions/AbuseFilter/parser_native/parser.h (modified) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/parser_native/aftypes.cpp
@@ -1,41 +0,0 @@
2 -/*
3 - * Copyright (c) 2008 Andrew Garrett.
4 - * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
5 - * Derived from public domain code contributed by Victor Vasiliev.
6 - *
7 - * Permission is granted to anyone to use this software for any purpose,
8 - * including commercial applications, and to alter it and redistribute it
9 - * freely. This software is provided 'as-is', without any express or
10 - * implied warranty.
11 - */
12 -
13 -#include <sstream>
14 -#include <ios>
15 -#include <iostream>
16 -#include <cassert>
17 -#include <algorithm>
18 -#include <cmath>
19 -
20 -#include <boost/lexical_cast.hpp>
21 -
22 -#include "aftypes.h"
23 -
24 -namespace afp {
25 -
26 -
27 -bool
28 -operator>= (datum const &a, datum const &b) {
29 - return !(a < b);
30 -}
31 -
32 -bool
33 -operator!= (datum const &a, datum const &b) {
34 - return !(a == b);
35 -}
36 -
37 -bool
38 -datum::operator! () const {
39 - return !toBool();
40 -}
41 -
42 -} // namespace afp
Index: trunk/extensions/AbuseFilter/parser_native/parser.cpp
@@ -1,709 +0,0 @@
2 -/*
3 - * Copyright (c) 2008 Andrew Garrett.
4 - * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
5 - * Derived from public domain code contributed by Victor Vasiliev.
6 - *
7 - * Permission is granted to anyone to use this software for any purpose,
8 - * including commercial applications, and to alter it and redistribute it
9 - * freely. This software is provided 'as-is', without any express or
10 - * implied warranty.
11 - */
12 -#include <stdexcept>
13 -#include <iostream>
14 -
15 -#include <boost/spirit.hpp>
16 -#include <boost/spirit/phoenix.hpp>
17 -#include <boost/spirit/phoenix/composite.hpp>
18 -#include <boost/spirit/phoenix/functions.hpp>
19 -#include <boost/spirit/phoenix/operators.hpp>
20 -#include <boost/function.hpp>
21 -#include <boost/noncopyable.hpp>
22 -#include <boost/format.hpp>
23 -#include <boost/regex/icu.hpp>
24 -
25 -#include "aftypes.h"
26 -#include "parser.h"
27 -
28 -using namespace boost::spirit;
29 -using namespace phoenix;
30 -
31 -/*
32 - * ABUSEFILTER EXPRESSION PARSER
33 - * =============================
34 - *
35 - * This is the basic expression parser. It doesn't contain any AF logic
36 - * itself, but rather presents an interface for the user to add custom
37 - * functions and variables.
38 - *
39 - * The interface to the parser is the 'expressor' class. Use it like this:
40 - *
41 - * expressor e;
42 - * e.add_variable("ONE", 1);
43 - * e.evaluate("ONE + 2"); -- returns 3
44 - *
45 - * Custom functions should have the following prototype:
46 - *
47 - * datum (std::vector<afp::datum) const &args);
48 - *
49 - * Functions must return a value; they cannot be void. The arguments passed to
50 - * the function are stored in the 'args' array in left-to-right order.
51 - *
52 - * The parser implements a C-like grammar with some differences. The following
53 - * operators are available:
54 - *
55 - * a & b true if a and b are both true
56 - * a | b true if either a or b is true
57 - * a ^ b true if either a or b is true, but not if both are true
58 - * a + b arithmetic
59 - * a - b
60 - * a * b
61 - * a / b
62 - * a % b
63 - * a ** b power-of (a^b)
64 - * a in b true if the string "b" contains the substring "a"
65 - * !a true if a is false
66 - * (a) same value as a
67 - * a ? b : c if a is true, returns the value of b, otherwise c
68 - * a == b comparison operators
69 - * a != b
70 - * a < b
71 - * a <= b
72 - * a > b
73 - * a >= b
74 - * a === b returns true if a==b and both are the same type
75 - * a !== b return true if a != b or they are different types
76 - *
77 - * The parser uses afp::datum for its variables. This means it supports
78 - * strings, ints and floats, with automatic conversion between types.
79 - */
80 -
81 -namespace px = phoenix;
82 -
83 -namespace afp {
84 -
85 -struct parse_error : std::runtime_error {
86 - parse_error(char const *what) : std::runtime_error(what) {}
87 -};
88 -
89 -/*
90 - * The parser stores the result of each grammar rule in a closure. Most rules
91 - * use the parser_closure, which simply stores a single value.
92 - */
93 -struct parser_closure : boost::spirit::closure<parser_closure, datum>
94 -{
95 - member1 val;
96 -};
97 -
98 -namespace {
99 -
100 -int match(char const *, char const *);
101 -
102 -datum
103 -f_in(datum const &a, datum const &b)
104 -{
105 - std::string sa = a.toString(), sb = b.toString();
106 - return datum::from_int(std::search(sb.begin(), sb.end(), sa.begin(), sa.end()) != sb.end());
107 -}
108 -
109 -datum
110 -f_like(datum const &str, datum const &pattern)
111 -{
112 - return datum::from_int(match(str.toString().c_str(), pattern.toString().c_str()));
113 -}
114 -
115 -datum
116 -f_regex(datum const &str, datum const &pattern)
117 -{
118 - boost::u32regex r = boost::make_u32regex(pattern.toString());
119 - return datum::from_int(boost::u32regex_match(str.toString(), r));
120 -}
121 -
122 -datum
123 -f_ternary(datum const &v, datum const &iftrue, datum const &iffalse)
124 -{
125 - return v.toInt() ? iftrue : iffalse;
126 -}
127 -
128 -datum
129 -f_int(std::vector<datum> const &args)
130 -{
131 - if (args.size() != 1)
132 - throw parse_error("wrong number of arguments to int() (expected 1)");
133 -
134 - return datum::from_int(args[0].toInt());
135 -}
136 -
137 -datum
138 -f_string(std::vector<datum> const &args)
139 -{
140 - if (args.size() != 1)
141 - throw parse_error("wrong number of arguments to string() (expected 1)");
142 -
143 - return datum::from_string(args[0].toString());
144 -}
145 -
146 -datum
147 -f_float(std::vector<datum> const &args)
148 -{
149 - if (args.size() != 1)
150 - throw parse_error("wrong number of arguments to float() (expected 1)");
151 -
152 - return datum::from_double(args[0].toFloat());
153 -}
154 -
155 -datum
156 -f_append(datum const &a, char b)
157 -{
158 - return datum::from_string(a.toString() + b);
159 -}
160 -
161 -datum
162 -f_strip_last(datum const &a)
163 -{
164 - std::string s(a.toString());
165 - s.resize(s.size() - 1);
166 - return datum::from_string(s);
167 -}
168 -
169 -datum
170 -datum_and(datum const &a, datum const &b)
171 -{
172 - return datum::from_int(a.toInt() && b.toInt());
173 -}
174 -
175 -datum
176 -datum_or(datum const &a, datum const &b)
177 -{
178 - return datum::from_int(a.toInt() || b.toInt());
179 -}
180 -
181 -datum
182 -datum_xor(datum const &a, datum const &b)
183 -{
184 - return datum::from_int((bool)a.toInt() ^ (bool)b.toInt());
185 -}
186 -
187 -datum
188 -datum_negate(datum const &a)
189 -{
190 - return datum::from_int(!(a.toBool()));
191 -}
192 -
193 -}
194 -
195 -/*
196 - * This is the closure types for functions. 'val' stores the final result of
197 - * the function call; func and args store the function object and the parsed
198 - * arguments.
199 - */
200 -struct function_closure : boost::spirit::closure<
201 - function_closure,
202 - datum,
203 - boost::function<datum (std::vector<datum>)>,
204 - std::vector<datum> >
205 -{
206 - member1 val;
207 - member2 func;
208 - member3 args;
209 -};
210 -
211 -/*
212 - * The closure for the ?: operator. Parsed as expr ? iftrue : iffalse.
213 - */
214 -struct ternary_closure : boost::spirit::closure<
215 - ternary_closure,
216 - datum,
217 - datum,
218 - datum>
219 -{
220 - member1 val;
221 - member2 iftrue;
222 - member3 iffalse;
223 -};
224 -
225 -/*
226 - * The grammar itself.
227 - */
228 -struct parser_grammar : public grammar<parser_grammar, parser_closure::context_t>
229 -{
230 - /* User-defined variables. */
231 - symbols<datum> variables;
232 -
233 - void add_variable(std::string const &name, datum const &value) {
234 - variables.add(name.c_str(), value);
235 - }
236 -
237 - /* User-defined functions. */
238 - symbols<boost::function<datum (std::vector<datum>)> > functions;
239 -
240 - void add_function(std::string const &name, boost::function<datum (std::vector<datum>)> func) {
241 - functions.add(name.c_str(), func);
242 - }
243 -
244 - template<typename ScannerT>
245 - struct definition
246 - {
247 - typedef rule<ScannerT, parser_closure::context_t> rule_t;
248 -
249 - parser_grammar const &self_;
250 -
251 - /*
252 - * A phoenix actor to append its argument to a container.
253 - */
254 - struct push_back_impl {
255 - template<typename C, typename I>
256 - struct result {
257 - typedef void type;
258 - };
259 -
260 - template<typename C, typename I>
261 - void operator() (C &c, I const &i) const {
262 - c.push_back(i);
263 - }
264 - };
265 -
266 - phoenix::function<push_back_impl> const push_back;
267 -
268 - /*
269 - * A phoenix actor to call a user-defined function given the
270 - * function object and arguments.
271 - */
272 - struct call_function_impl {
273 - template<typename F, typename A>
274 - struct result {
275 - typedef datum type;
276 - };
277 -
278 - template<typename F, typename A>
279 - datum operator() (F const &func, A const &args) const {
280 - return func(args);
281 - }
282 - };
283 -
284 - phoenix::function<call_function_impl> const call_function;
285 -
286 - definition(parser_grammar const &self)
287 - : self_(self)
288 - , push_back(push_back_impl())
289 - , call_function(call_function_impl())
290 - {
291 - /*
292 - * A literal value. Either a string, a floating
293 - * pointer number or an integer.
294 - */
295 - value =
296 - strict_real_p[value.val = bind(&datum::from_double)(arg1)]
297 - | as_lower_d[
298 - oct_p[value.val = bind(&datum::from_int)(arg1)] >> 'o'
299 - | hex_p[value.val = bind(&datum::from_int)(arg1)] >> 'x'
300 - | bin_p[value.val = bind(&datum::from_int)(arg1)] >> 'b'
301 - | int_p[value.val = bind(&datum::from_int)(arg1)]
302 - ]
303 - /*
304 - * config_p can't be used here, because it will rewrite
305 - * *(c_escape_ch_p[x]) into (*c_escape_ch_p)[x]
306 - */
307 - | (
308 - ch_p('"')[value.val = bind(&datum::from_string)("")]
309 - >> *((c_escape_ch_p[value.val = bind(&f_append)(value.val, arg1)] - '"'))
310 - >> ch_p('"')[value.val = bind(&f_strip_last)(value.val)]
311 - )
312 - ;
313 -
314 - /*
315 - * A variable. If the variable is found in the
316 - * user-supplied variable list, we use that.
317 - * Otherwise, unknown variables (containing uppercase
318 - * letters and underscore only) are returned as the
319 - * empty string.
320 - */
321 - variable =
322 - self.variables[variable.val = arg1]
323 - | (+ (upper_p | '_') )[variable.val = bind(&datum::from_string)("")]
324 - ;
325 -
326 - /*
327 - * A function call: func([arg[, arg...]]).
328 - */
329 - function =
330 - (
331 - self.functions[function.func = arg1]
332 - >> '('
333 - >> ( tern_expr[push_back(function.args, arg1)] % ',' )
334 - >> ')'
335 - ) [function.val = call_function(function.func, function.args)]
336 - ;
337 -
338 - /*
339 - * A basic atomic value. Either a variable, function
340 - * or literal, or a negated expression !a, or a
341 - * parenthesised expression (a).
342 - */
343 - basic =
344 - ( '(' >> tern_expr[basic.val = arg1] >> ')' )
345 - | ch_p('!') >> tern_expr[basic.val = bind(&datum_negate)(arg1)]
346 - | ch_p('+') >> tern_expr[basic.val = arg1]
347 - | ch_p('-') >> tern_expr[basic.val = -arg1]
348 - | value[basic.val = arg1]
349 - | variable[basic.val = arg1]
350 - | function[basic.val = arg1]
351 - ;
352 -
353 - /*
354 - * "a in b" operator
355 - */
356 - in_expr =
357 - basic[in_expr.val = arg1]
358 - >> *(
359 - "in" >> basic[in_expr.val = bind(&f_in)(in_expr.val, arg1)]
360 - | "contains" >> basic[in_expr.val = bind(&f_in)(arg1, in_expr.val)]
361 - | "like" >> basic[in_expr.val = bind(&f_like)(arg1, in_expr.val)]
362 - | "matches" >> basic[in_expr.val = bind(&f_like)(arg1, in_expr.val)]
363 - | "rlike" >> basic[in_expr.val = bind(&f_regex)(in_expr.val, arg1)]
364 - | "regex" >> basic[in_expr.val = bind(&f_regex)(in_expr.val, arg1)]
365 - )
366 - ;
367 -
368 - /*
369 - * power-of. This is right-associative.
370 - */
371 - pow_expr =
372 - in_expr[pow_expr.val = arg1]
373 - >> !(
374 - "**" >> pow_expr[pow_expr.val = bind(&afp::pow<char>)(pow_expr.val, arg1)]
375 - )
376 - ;
377 -
378 - /*
379 - * Multiplication and operators with the same
380 - * precedence.
381 - */
382 - mult_expr =
383 - pow_expr[mult_expr.val = arg1]
384 - >> *(
385 - '*' >> pow_expr[mult_expr.val *= arg1]
386 - | '/' >> pow_expr[mult_expr.val /= arg1]
387 - | '%' >> pow_expr[mult_expr.val %= arg1]
388 - )
389 - ;
390 -
391 - /*
392 - * Additional and operators with the same precedence.
393 - */
394 - plus_expr =
395 - mult_expr[plus_expr.val = arg1]
396 - >> *(
397 - '+' >> mult_expr[plus_expr.val += arg1]
398 - | '-' >> mult_expr[plus_expr.val -= arg1]
399 - )
400 - ;
401 -
402 - /*
403 - * Ordinal comparisons and operators with the same
404 - * precedence.
405 - */
406 - ord_expr =
407 - plus_expr[ord_expr.val = arg1]
408 - >> *(
409 - "<" >> plus_expr[ord_expr.val = bind(&datum::from_int)(ord_expr.val < arg1)]
410 - | "<=" >> plus_expr[ord_expr.val = bind(&datum::from_int)(ord_expr.val <= arg1)]
411 - | ">" >> plus_expr[ord_expr.val = bind(&datum::from_int)(ord_expr.val > arg1)]
412 - | ">=" >> plus_expr[ord_expr.val = bind(&datum::from_int)(ord_expr.val >= arg1)]
413 - )
414 - ;
415 -
416 - /*
417 - * Equality comparisons.
418 - */
419 - eq_expr =
420 - ord_expr[eq_expr.val = arg1]
421 - >> *(
422 - "=" >> eq_expr[eq_expr.val = bind(&datum::from_int)(eq_expr.val == arg1)]
423 - | "==" >> eq_expr[eq_expr.val = bind(&datum::from_int)(eq_expr.val == arg1)]
424 - | "!=" >> eq_expr[eq_expr.val = bind(&datum::from_int)(eq_expr.val != arg1)]
425 - | "/=" >> eq_expr[eq_expr.val = bind(&datum::from_int)(eq_expr.val != arg1)]
426 - | "===" >> eq_expr[eq_expr.val =
427 - bind(&datum::from_int)(bind(&datum::compare_with_type)(eq_expr.val, arg1))]
428 - | "!==" >> eq_expr[eq_expr.val =
429 - bind(&datum::from_int)(!bind(&datum::compare_with_type)(eq_expr.val, arg1))]
430 - )
431 - ;
432 -
433 - /*
434 - * Boolean expressions.
435 - */
436 - bool_expr =
437 - eq_expr[bool_expr.val = arg1]
438 - >> *(
439 - '&' >> eq_expr[bool_expr.val = bind(datum_and)(bool_expr.val, arg1)]
440 - | '|' >> eq_expr[bool_expr.val = bind(datum_or)(bool_expr.val, arg1)]
441 - | '^' >> eq_expr[bool_expr.val = bind(datum_xor)(bool_expr.val, arg1)]
442 - )
443 - ;
444 -
445 - /*
446 - * The ternary operator. Notice this is
447 - * right-associative: a ? b ? c : d : e
448 - * is supported.
449 - */
450 - tern_expr =
451 - bool_expr[tern_expr.val = arg1]
452 - >> !(
453 - (
454 - "?" >> tern_expr[tern_expr.iftrue = arg1]
455 - >> ":" >> tern_expr[tern_expr.iffalse = arg1]
456 - )[tern_expr.val =
457 - bind(f_ternary)(tern_expr.val, tern_expr.iftrue, tern_expr.iffalse)]
458 - )
459 - ;
460 -
461 - /*
462 - * The root expression type.
463 - */
464 - expr = tern_expr[self.val = arg1];
465 - }
466 -
467 - rule_t const &start() const {
468 - return expr;
469 - }
470 -
471 - rule_t value, variable, basic, bool_expr,
472 - ord_expr, eq_expr, pow_expr, mult_expr, plus_expr, in_expr, expr;
473 - rule<ScannerT, function_closure::context_t> function;
474 - rule<ScannerT, ternary_closure::context_t> tern_expr;
475 - };
476 -};
477 -
478 -expressor::expressor()
479 - : grammar_(new parser_grammar)
480 -{
481 - /*
482 - * We provide a couple of standard variables everyone wants.
483 - */
484 - add_variable("true", afp::datum::from_int(true));
485 - add_variable("false", afp::datum::from_int(false));
486 -
487 - /*
488 - * The cast functions.
489 - */
490 - add_function("int", &f_int);
491 - add_function("string", &f_string);
492 - add_function("float", &f_float);
493 -}
494 -
495 -expressor::~expressor()
496 -{
497 - delete grammar_;
498 -}
499 -
500 -/*
501 - * The user interface to evaluate an expression. It returns the result, or
502 - * throws an exception if an error occurs.
503 - */
504 -datum
505 -expressor::evaluate(std::string const &filter) const
506 -{
507 - datum ret;
508 - parse_info<std::string::const_iterator> info =
509 - parse(filter.begin(), filter.end(), (*grammar_)[var(ret) = arg1],
510 - comment_p("/*", "*/") | chset<>("\n\t "));
511 - if (info.full) {
512 - return ret;
513 - } else {
514 - std::cerr << "stopped at: [" << std::string(info.stop, filter.end()) << "]\n";
515 - throw parse_error("parsing failed");
516 - }
517 -}
518 -
519 -void
520 -expressor::add_variable(std::string const &name, datum value)
521 -{
522 - grammar_->add_variable(name, value);
523 -}
524 -
525 -void
526 -expressor::add_function(std::string const &name, func_t value)
527 -{
528 - grammar_->add_function(name, value);
529 -}
530 -
531 -namespace {
532 -
533 -/* $NetBSD: fnmatch.c,v 1.21 2005/12/24 21:11:16 perry Exp $ */
534 -
535 -/*
536 - * Copyright (c) 1989, 1993, 1994
537 - * The Regents of the University of California. All rights reserved.
538 - *
539 - * This code is derived from software contributed to Berkeley by
540 - * Guido van Rossum.
541 - *
542 - * Redistribution and use in source and binary forms, with or without
543 - * modification, are permitted provided that the following conditions
544 - * are met:
545 - * 1. Redistributions of source code must retain the above copyright
546 - * notice, this list of conditions and the following disclaimer.
547 - * 2. Redistributions in binary form must reproduce the above copyright
548 - * notice, this list of conditions and the following disclaimer in the
549 - * documentation and/or other materials provided with the distribution.
550 - * 3. Neither the name of the University nor the names of its contributors
551 - * may be used to endorse or promote products derived from this software
552 - * without specific prior written permission.
553 - *
554 - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
555 - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
556 - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
557 - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
558 - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
559 - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
560 - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
561 - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
562 - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
563 - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
564 - * SUCH DAMAGE.
565 - */
566 -
567 -/*
568 - * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
569 - * Compares a filename or pathname to a pattern.
570 - */
571 -
572 -#include <ctype.h>
573 -#include <string.h>
574 -
575 -#define EOS '\0'
576 -
577 -const char *rangematch (const char *, int);
578 -
579 -int
580 -match(char const *pattern, char const *string)
581 -{
582 - const char *stringstart;
583 - char c, test;
584 -
585 - for (stringstart = string;;)
586 - switch (c = *pattern++) {
587 - case EOS:
588 - return (*string == EOS ? 1 : 0);
589 - case '?':
590 - if (*string == EOS)
591 - return (0);
592 - ++string;
593 - break;
594 - case '*':
595 - c = *pattern;
596 - /* Collapse multiple stars. */
597 - while (c == '*')
598 - c = *++pattern;
599 -
600 - /* Optimize for pattern with * at end or before /. */
601 - if (c == EOS) {
602 - return (1);
603 - }
604 -
605 - /* General case, use recursion. */
606 - while ((test = *string) != EOS) {
607 - if (match(pattern, string))
608 - return (1);
609 - ++string;
610 - }
611 - return (0);
612 - case '[':
613 - if (*string == EOS)
614 - return (0);
615 - if ((pattern =
616 - rangematch(pattern, *string)) == NULL)
617 - return (0);
618 - ++string;
619 - break;
620 - case '\\':
621 - if ((c = *pattern++) == EOS) {
622 - c = '\\';
623 - --pattern;
624 - }
625 - /* FALLTHROUGH */
626 - default:
627 - if (c != *string++)
628 - return (0);
629 - break;
630 - }
631 - /* NOTREACHED */
632 -}
633 -
634 -const char *
635 -rangematch(char const *pattern, int test)
636 -{
637 - int negate, ok;
638 - char c, c2;
639 -
640 - /*
641 - * A bracket expression starting with an unquoted circumflex
642 - * character produces unspecified results (IEEE 1003.2-1992,
643 - * 3.13.2). This implementation treats it like '!', for
644 - * consistency with the regular expression syntax.
645 - * J.T. Conklin (conklin@ngai.kaleida.com)
646 - */
647 - if ((negate = (*pattern == '!' || *pattern == '^')) != 0)
648 - ++pattern;
649 -
650 - for (ok = 0; (c = *pattern++) != ']';) {
651 - if (c == '\\')
652 - c = *pattern++;
653 - if (c == EOS)
654 - return (NULL);
655 - if (*pattern == '-'
656 - && (c2 = (*(pattern+1))) != EOS &&
657 - c2 != ']') {
658 - pattern += 2;
659 - if (c2 == '\\')
660 - c2 = *pattern++;
661 - if (c2 == EOS)
662 - return (NULL);
663 - if (c <= test && test <= c2)
664 - ok = 1;
665 - } else if (c == test)
666 - ok = 1;
667 - }
668 - return (ok == negate ? NULL : pattern);
669 -}
670 -
671 -} // anonymous namespace
672 -
673 -} // namespace afp
674 -
675 -#ifdef TEST_PARSER
676 -afp::datum
677 -f_add(std::vector<afp::datum> const &args)
678 -{
679 - return args[0] + args[1];
680 -}
681 -
682 -afp::datum
683 -f_norm(std::vector<afp::datum> const &args)
684 -{
685 - return args[0];
686 -}
687 -
688 -int
689 -main(int argc, char **argv)
690 -{
691 - if (argc != 2) {
692 - std::cerr << boost::format("usage: %s <expr>\n")
693 - % argv[0];
694 - return 1;
695 - }
696 -
697 - afp::expressor e;
698 - e.add_variable("ONE", afp::datum::from_int(1));
699 - e.add_variable("TWO", afp::datum::from_int(2));
700 - e.add_variable("THREE", afp::datum::from_int(3));
701 - e.add_function("add", f_add);
702 - e.add_function("norm", f_norm);
703 -
704 - try {
705 - std::cout << e.evaluate(argv[1]) << '\n';
706 - } catch (std::exception &e) {
707 - std::cout << "parsing failed: " << e.what() << '\n';
708 - }
709 -}
710 -#endif
Index: trunk/extensions/AbuseFilter/parser_native/parser.h
@@ -14,31 +14,727 @@
1515
1616 #include <string>
1717 #include <vector>
 18+#include <stdexcept>
 19+#include <iostream>
1820
1921 #include <boost/noncopyable.hpp>
2022 #include <boost/function.hpp>
 23+#include <boost/spirit.hpp>
 24+#include <boost/spirit/phoenix.hpp>
 25+#include <boost/spirit/phoenix/composite.hpp>
 26+#include <boost/spirit/phoenix/functions.hpp>
 27+#include <boost/spirit/phoenix/operators.hpp>
 28+#include <boost/function.hpp>
 29+#include <boost/noncopyable.hpp>
 30+#include <boost/format.hpp>
 31+#include <boost/regex/icu.hpp>
2132
 33+#include <unicode/uchar.h>
 34+
2235 #include "aftypes.h"
2336
2437 namespace afp {
2538
26 -struct parser_grammar;
 39+template<typename T> struct parser_grammar;
2740
28 -struct expressor : boost::noncopyable {
29 - typedef boost::function<datum (std::vector<datum>)> func_t;
 41+template<typename charT>
 42+struct basic_expressor : boost::noncopyable {
 43+ typedef boost::function<basic_datum<charT> (std::vector<basic_datum<charT> >)> func_t;
3044
31 - expressor();
32 - ~expressor();
 45+ basic_expressor();
 46+ ~basic_expressor();
3347
34 - datum evaluate(std::string const &expr) const;
 48+ basic_datum<charT> evaluate(std::basic_string<charT> const &expr) const;
3549
36 - void add_variable(std::string const &name, datum value);
37 - void add_function(std::string const &name, func_t value);
 50+ void add_variable(std::basic_string<charT> const &name, basic_datum<charT> const &value);
 51+ void add_function(std::basic_string<charT> const &name, func_t value);
3852
3953 private:
40 - parser_grammar *grammar_;
 54+ parser_grammar<charT> *grammar_;
4155 };
4256
 57+typedef basic_expressor<char> expressor;
 58+typedef basic_expressor<UChar> u32expressor;
 59+
 60+using namespace boost::spirit;
 61+using namespace phoenix;
 62+
 63+/*
 64+ * ABUSEFILTER EXPRESSION PARSER
 65+ * =============================
 66+ *
 67+ * This is the basic expression parser. It doesn't contain any AF logic
 68+ * itself, but rather presents an interface for the user to add custom
 69+ * functions and variables.
 70+ *
 71+ * The interface to the parser is the 'expressor' class. Use it like this:
 72+ *
 73+ * expressor e;
 74+ * e.add_variable("ONE", 1);
 75+ * e.evaluate("ONE + 2"); -- returns 3
 76+ *
 77+ * Custom functions should have the following prototype:
 78+ *
 79+ * datum (std::vector<afp::datum) const &args);
 80+ *
 81+ * Functions must return a value; they cannot be void. The arguments passed to
 82+ * the function are stored in the 'args' array in left-to-right order.
 83+ *
 84+ * The parser implements a C-like grammar with some differences. The following
 85+ * operators are available:
 86+ *
 87+ * a & b true if a and b are both true
 88+ * a | b true if either a or b is true
 89+ * a ^ b true if either a or b is true, but not if both are true
 90+ * a + b arithmetic
 91+ * a - b
 92+ * a * b
 93+ * a / b
 94+ * a % b
 95+ * a ** b power-of (a^b)
 96+ * a in b true if the string "b" contains the substring "a"
 97+ * !a true if a is false
 98+ * (a) same value as a
 99+ * a ? b : c if a is true, returns the value of b, otherwise c
 100+ * a == b comparison operators
 101+ * a != b
 102+ * a < b
 103+ * a <= b
 104+ * a > b
 105+ * a >= b
 106+ * a === b returns true if a==b and both are the same type
 107+ * a !== b return true if a != b or they are different types
 108+ *
 109+ * The parser uses afp::datum for its variables. This means it supports
 110+ * strings, ints and floats, with automatic conversion between types.
 111+ */
 112+
 113+namespace px = phoenix;
 114+
 115+struct parse_error : std::runtime_error {
 116+ parse_error(char const *what) : std::runtime_error(what) {}
 117+};
 118+
 119+/*
 120+ * The parser stores the result of each grammar rule in a closure. Most rules
 121+ * use the parser_closure, which simply stores a single value.
 122+ */
 123+template<typename charT>
 124+struct parser_closure : boost::spirit::closure<parser_closure<charT>, basic_datum<charT> >
 125+{
 126+ typename parser_closure<charT>::member1 val;
 127+};
 128+
 129+namespace {
 130+
 131+template<typename charT>
 132+int match(char const *, char const *);
 133+
 134+template<typename charT>
 135+basic_datum<charT>
 136+f_in(basic_datum<charT> const &a, basic_datum<charT> const &b)
 137+{
 138+ std::string sa = a.toString(), sb = b.toString();
 139+ return basic_datum<charT>::from_int(std::search(sb.begin(), sb.end(), sa.begin(), sa.end()) != sb.end());
 140+}
 141+
 142+template<typename charT>
 143+basic_datum<charT>
 144+f_like(basic_datum<charT> const &str, basic_datum<charT> const &pattern)
 145+{
 146+ return basic_datum<charT>::from_int(match(str.toString().c_str(), pattern.toString().c_str()));
 147+}
 148+
 149+template<typename charT>
 150+basic_datum<charT>
 151+f_regex(basic_datum<charT> const &str, basic_datum<charT> const &pattern)
 152+{
 153+ boost::u32regex r = boost::make_u32regex(pattern.toString());
 154+ return basic_datum<charT>::from_int(boost::u32regex_match(str.toString(), r));
 155+}
 156+
 157+template<typename charT>
 158+basic_datum<charT>
 159+f_ternary(basic_datum<charT> const &v, basic_datum<charT> const &iftrue, basic_datum<charT> const &iffalse)
 160+{
 161+ return v.toInt() ? iftrue : iffalse;
 162+}
 163+
 164+template<typename charT>
 165+basic_datum<charT>
 166+f_int(std::vector<basic_datum<charT> > const &args)
 167+{
 168+ if (args.size() != 1)
 169+ throw parse_error("wrong number of arguments to int() (expected 1)");
 170+
 171+ return basic_datum<charT>::from_int(args[0].toInt());
 172+}
 173+
 174+template<typename charT>
 175+basic_datum<charT>
 176+f_string(std::vector<basic_datum<charT> > const &args)
 177+{
 178+ if (args.size() != 1)
 179+ throw parse_error("wrong number of arguments to string() (expected 1)");
 180+
 181+ return basic_datum<charT>::from_string(args[0].toString());
 182+}
 183+
 184+template<typename charT>
 185+basic_datum<charT>
 186+f_float(std::vector<basic_datum<charT> > const &args)
 187+{
 188+ if (args.size() != 1)
 189+ throw parse_error("wrong number of arguments to float() (expected 1)");
 190+
 191+ return basic_datum<charT>::from_double(args[0].toFloat());
 192+}
 193+
 194+template<typename charT>
 195+basic_datum<charT>
 196+f_append(basic_datum<charT> const &a, char b)
 197+{
 198+ return basic_datum<charT>::from_string(a.toString() + b);
 199+}
 200+
 201+template<typename charT>
 202+basic_datum<charT>
 203+f_strip_last(basic_datum<charT> const &a)
 204+{
 205+ std::string s(a.toString());
 206+ s.resize(s.size() - 1);
 207+ return basic_datum<charT>::from_string(s);
 208+}
 209+
 210+template<typename charT>
 211+basic_datum<charT>
 212+datum_and(basic_datum<charT> const &a, basic_datum<charT> const &b)
 213+{
 214+ return basic_datum<charT>::from_int(a.toInt() && b.toInt());
 215+}
 216+
 217+template<typename charT>
 218+basic_datum<charT>
 219+datum_or(basic_datum<charT> const &a, basic_datum<charT> const &b)
 220+{
 221+ return basic_datum<charT>::from_int(a.toInt() || b.toInt());
 222+}
 223+
 224+template<typename charT>
 225+basic_datum<charT>
 226+datum_xor(basic_datum<charT> const &a, basic_datum<charT> const &b)
 227+{
 228+ return basic_datum<charT>::from_int((bool)a.toInt() ^ (bool)b.toInt());
 229+}
 230+
 231+template<typename charT>
 232+basic_datum<charT>
 233+datum_negate(basic_datum<charT> const &a)
 234+{
 235+ return basic_datum<charT>::from_int(!(a.toBool()));
 236+}
 237+
 238+} // anonymous namespace
 239+
 240+/*
 241+ * This is the closure types for functions. 'val' stores the final result of
 242+ * the function call; func and args store the function object and the parsed
 243+ * arguments.
 244+ */
 245+template<typename charT>
 246+struct function_closure : boost::spirit::closure<
 247+ function_closure<charT>,
 248+ basic_datum<charT>,
 249+ boost::function<basic_datum<charT> (std::vector<basic_datum<charT> >)>,
 250+ std::vector<basic_datum<charT> > >
 251+{
 252+ typename function_closure<charT>::member1 val;
 253+ typename function_closure<charT>::member2 func;
 254+ typename function_closure<charT>::member3 args;
 255+};
 256+
 257+/*
 258+ * The closure for the ?: operator. Parsed as expr ? iftrue : iffalse.
 259+ */
 260+template<typename charT>
 261+struct ternary_closure : boost::spirit::closure<
 262+ ternary_closure<charT>,
 263+ basic_datum<charT>,
 264+ basic_datum<charT>,
 265+ basic_datum<charT> >
 266+{
 267+ typename ternary_closure<charT>::member1 val;
 268+ typename ternary_closure<charT>::member2 iftrue;
 269+ typename ternary_closure<charT>::member3 iffalse;
 270+};
 271+
 272+/*
 273+ * The grammar itself.
 274+ */
 275+template<typename charT>
 276+struct parser_grammar : public grammar<parser_grammar<charT>, typename parser_closure<charT>::context_t >
 277+{
 278+ /* User-defined variables. */
 279+ symbols<basic_datum<charT> > variables;
 280+
 281+ void add_variable(std::string const &name, basic_datum<charT> const &value) {
 282+ variables.add(name.c_str(), value);
 283+ }
 284+
 285+ /* User-defined functions. */
 286+ symbols<boost::function<basic_datum<charT> (std::vector<basic_datum<charT> >)> > functions;
 287+
 288+ void add_function(std::string const &name, boost::function<basic_datum<charT> (std::vector<basic_datum<charT> >)> func) {
 289+ functions.add(name.c_str(), func);
 290+ }
 291+
 292+ template<typename ScannerT>
 293+ struct definition
 294+ {
 295+ typedef rule<ScannerT, typename parser_closure<charT>::context_t > rule_t;
 296+
 297+ parser_grammar const &self_;
 298+
 299+ /*
 300+ * A phoenix actor to append its argument to a container.
 301+ */
 302+ struct push_back_impl {
 303+ template<typename C, typename I>
 304+ struct result {
 305+ typedef void type;
 306+ };
 307+
 308+ template<typename C, typename I>
 309+ void operator() (C &c, I const &i) const {
 310+ c.push_back(i);
 311+ }
 312+ };
 313+
 314+ phoenix::function<push_back_impl> const push_back;
 315+
 316+ /*
 317+ * A phoenix actor to call a user-defined function given the
 318+ * function object and arguments.
 319+ */
 320+ struct call_function_impl {
 321+ template<typename F, typename A>
 322+ struct result {
 323+ typedef basic_datum<charT> type;
 324+ };
 325+
 326+ template<typename F, typename A>
 327+ basic_datum<charT> operator() (F const &func, A const &args) const {
 328+ return func(args);
 329+ }
 330+ };
 331+
 332+ phoenix::function<call_function_impl> const call_function;
 333+
 334+ definition(parser_grammar const &self)
 335+ : self_(self)
 336+ , push_back(push_back_impl())
 337+ , call_function(call_function_impl())
 338+ {
 339+ std::basic_string<charT> empty_string;
 340+
 341+ /*
 342+ * A literal value. Either a string, a floating
 343+ * pointer number or an integer.
 344+ */
 345+ value =
 346+ strict_real_p[value.val = bind(&basic_datum<charT>::from_double)(arg1)]
 347+ | as_lower_d[
 348+ oct_p[value.val = bind(&basic_datum<charT>::from_int)(arg1)] >> 'o'
 349+ | hex_p[value.val = bind(&basic_datum<charT>::from_int)(arg1)] >> 'x'
 350+ | bin_p[value.val = bind(&basic_datum<charT>::from_int)(arg1)] >> 'b'
 351+ | int_p[value.val = bind(&basic_datum<charT>::from_int)(arg1)]
 352+ ]
 353+ /*
 354+ * config_p can't be used here, because it will rewrite
 355+ * *(c_escape_ch_p[x]) into (*c_escape_ch_p)[x]
 356+ */
 357+ | (
 358+ ch_p(charT('"'))[value.val = bind(&basic_datum<charT>::from_string)(empty_string)]
 359+ >> *((c_escape_ch_p[value.val = bind(&f_append<charT>)(value.val, arg1)] - charT('"')))
 360+ >> ch_p(charT('"'))[value.val = bind(&f_strip_last<charT>)(value.val)]
 361+ )
 362+ ;
 363+
 364+ /*
 365+ * A variable. If the variable is found in the
 366+ * user-supplied variable list, we use that.
 367+ * Otherwise, unknown variables (containing uppercase
 368+ * letters and underscore only) are returned as the
 369+ * empty string.
 370+ */
 371+ variable =
 372+ self.variables[variable.val = arg1]
 373+ | (+ (upper_p | '_') )[variable.val = bind(&basic_datum<charT>::from_string)(empty_string)]
 374+ ;
 375+
 376+ /*
 377+ * A function call: func([arg[, arg...]]).
 378+ */
 379+ function =
 380+ (
 381+ self.functions[function.func = arg1]
 382+ >> '('
 383+ >> ( tern_expr[push_back(function.args, arg1)] % ',' )
 384+ >> ')'
 385+ ) [function.val = call_function(function.func, function.args)]
 386+ ;
 387+
 388+ /*
 389+ * A basic atomic value. Either a variable, function
 390+ * or literal, or a negated expression !a, or a
 391+ * parenthesised expression (a).
 392+ */
 393+ basic =
 394+ ( '(' >> tern_expr[basic.val = arg1] >> ')' )
 395+ | ch_p('!') >> tern_expr[basic.val = bind(&datum_negate<charT>)(arg1)]
 396+ | ch_p('+') >> tern_expr[basic.val = arg1]
 397+ | ch_p('-') >> tern_expr[basic.val = -arg1]
 398+ | value[basic.val = arg1]
 399+ | variable[basic.val = arg1]
 400+ | function[basic.val = arg1]
 401+ ;
 402+
 403+ /*
 404+ * "a in b" operator
 405+ */
 406+ in_expr =
 407+ basic[in_expr.val = arg1]
 408+ >> *(
 409+ "in" >> basic[in_expr.val = bind(&f_in<charT>)(in_expr.val, arg1)]
 410+ | "contains" >> basic[in_expr.val = bind(&f_in<charT>)(arg1, in_expr.val)]
 411+ | "like" >> basic[in_expr.val = bind(&f_like<charT>)(arg1, in_expr.val)]
 412+ | "matches" >> basic[in_expr.val = bind(&f_like<charT>)(arg1, in_expr.val)]
 413+ | "rlike" >> basic[in_expr.val = bind(&f_regex<charT>)(in_expr.val, arg1)]
 414+ | "regex" >> basic[in_expr.val = bind(&f_regex<charT>)(in_expr.val, arg1)]
 415+ )
 416+ ;
 417+
 418+ /*
 419+ * power-of. This is right-associative.
 420+ */
 421+ pow_expr =
 422+ in_expr[pow_expr.val = arg1]
 423+ >> !(
 424+ "**" >> pow_expr[pow_expr.val = bind(&::afp::pow<charT>)(pow_expr.val, arg1)]
 425+ )
 426+ ;
 427+
 428+ /*
 429+ * Multiplication and operators with the same
 430+ * precedence.
 431+ */
 432+ mult_expr =
 433+ pow_expr[mult_expr.val = arg1]
 434+ >> *(
 435+ '*' >> pow_expr[mult_expr.val *= arg1]
 436+ | '/' >> pow_expr[mult_expr.val /= arg1]
 437+ | '%' >> pow_expr[mult_expr.val %= arg1]
 438+ )
 439+ ;
 440+
 441+ /*
 442+ * Additional and operators with the same precedence.
 443+ */
 444+ plus_expr =
 445+ mult_expr[plus_expr.val = arg1]
 446+ >> *(
 447+ '+' >> mult_expr[plus_expr.val += arg1]
 448+ | '-' >> mult_expr[plus_expr.val -= arg1]
 449+ )
 450+ ;
 451+
 452+ /*
 453+ * Ordinal comparisons and operators with the same
 454+ * precedence.
 455+ */
 456+ ord_expr =
 457+ plus_expr[ord_expr.val = arg1]
 458+ >> *(
 459+ /* don't remove the () from (ord_expr.val) - for some reason it confuses
 460+ gcc into thinkins the < begins a template list */
 461+ "<" >> plus_expr[ord_expr.val = bind(&basic_datum<charT>::from_int)((ord_expr.val) < arg1)]
 462+ | "<=" >> plus_expr[ord_expr.val = bind(&basic_datum<charT>::from_int)(ord_expr.val <= arg1)]
 463+ | ">" >> plus_expr[ord_expr.val = bind(&basic_datum<charT>::from_int)(ord_expr.val > arg1)]
 464+ | ">=" >> plus_expr[ord_expr.val = bind(&basic_datum<charT>::from_int)(ord_expr.val >= arg1)]
 465+ )
 466+ ;
 467+
 468+ /*
 469+ * Equality comparisons.
 470+ */
 471+ eq_expr =
 472+ ord_expr[eq_expr.val = arg1]
 473+ >> *(
 474+ "=" >> eq_expr[eq_expr.val = bind(&basic_datum<charT>::from_int)(eq_expr.val == arg1)]
 475+ | "==" >> eq_expr[eq_expr.val = bind(&basic_datum<charT>::from_int)(eq_expr.val == arg1)]
 476+ | "!=" >> eq_expr[eq_expr.val = bind(&basic_datum<charT>::from_int)(eq_expr.val != arg1)]
 477+ | "/=" >> eq_expr[eq_expr.val = bind(&basic_datum<charT>::from_int)(eq_expr.val != arg1)]
 478+ | "===" >> eq_expr[eq_expr.val =
 479+ bind(&basic_datum<charT>::from_int)(
 480+ bind(&basic_datum<charT>::compare_with_type)(eq_expr.val, arg1))]
 481+ | "!==" >> eq_expr[eq_expr.val =
 482+ bind(&basic_datum<charT>::from_int)(
 483+ !bind(&basic_datum<charT>::compare_with_type)(eq_expr.val, arg1))]
 484+ )
 485+ ;
 486+
 487+ /*
 488+ * Boolean expressions.
 489+ */
 490+ bool_expr =
 491+ eq_expr[bool_expr.val = arg1]
 492+ >> *(
 493+ '&' >> eq_expr[bool_expr.val = bind(datum_and<charT>)(bool_expr.val, arg1)]
 494+ | '|' >> eq_expr[bool_expr.val = bind(datum_or<charT>)(bool_expr.val, arg1)]
 495+ | '^' >> eq_expr[bool_expr.val = bind(datum_xor<charT>)(bool_expr.val, arg1)]
 496+ )
 497+ ;
 498+
 499+ /*
 500+ * The ternary operator. Notice this is
 501+ * right-associative: a ? b ? c : d : e
 502+ * is supported.
 503+ */
 504+ tern_expr =
 505+ bool_expr[tern_expr.val = arg1]
 506+ >> !(
 507+ (
 508+ "?" >> tern_expr[tern_expr.iftrue = arg1]
 509+ >> ":" >> tern_expr[tern_expr.iffalse = arg1]
 510+ )[tern_expr.val =
 511+ bind(f_ternary<charT>)(tern_expr.val, tern_expr.iftrue, tern_expr.iffalse)]
 512+ )
 513+ ;
 514+
 515+ /*
 516+ * The root expression type.
 517+ */
 518+ expr = tern_expr[self.val = arg1];
 519+ }
 520+
 521+ rule_t const &start() const {
 522+ return expr;
 523+ }
 524+
 525+ rule_t value, variable, basic, bool_expr,
 526+ ord_expr, eq_expr, pow_expr, mult_expr, plus_expr, in_expr, expr;
 527+ rule<ScannerT, typename function_closure<charT>::context_t > function;
 528+ rule<ScannerT, typename ternary_closure<charT>::context_t > tern_expr;
 529+ };
 530+};
 531+
 532+template<typename charT>
 533+basic_expressor<charT>::basic_expressor()
 534+ : grammar_(new parser_grammar<charT>)
 535+{
 536+ /*
 537+ * We provide a couple of standard variables everyone wants.
 538+ */
 539+ add_variable("true", afp::basic_datum<charT>::from_int(true));
 540+ add_variable("false", afp::basic_datum<charT>::from_int(false));
 541+
 542+ /*
 543+ * The cast functions.
 544+ */
 545+ add_function("int", &f_int<charT>);
 546+ add_function("string", &f_string<charT>);
 547+ add_function("float", &f_float<charT>);
 548+}
 549+
 550+template<typename charT>
 551+basic_expressor<charT>::~basic_expressor()
 552+{
 553+ delete grammar_;
 554+}
 555+
 556+/*
 557+ * The user interface to evaluate an expression. It returns the result, or
 558+ * throws an exception if an error occurs.
 559+ */
 560+template<typename charT>
 561+basic_datum<charT>
 562+basic_expressor<charT>::evaluate(std::basic_string<charT> const &filter) const
 563+{
 564+ using namespace boost::spirit;
 565+
 566+ typedef typename std::basic_string<charT>::const_iterator iterator_t;
 567+
 568+ basic_datum<charT> ret;
 569+ parse_info<iterator_t> info =
 570+ parse(filter.begin(), filter.end(), (*grammar_)[var(ret) = arg1],
 571+ comment_p("/*", "*/") | chset<>("\n\t "));
 572+ if (info.full) {
 573+ return ret;
 574+ } else {
 575+ std::cerr << "stopped at: [" << std::basic_string<charT>(info.stop, filter.end()) << "]\n";
 576+ throw parse_error("parsing failed");
 577+ }
 578+}
 579+
 580+template<typename charT>
 581+void
 582+basic_expressor<charT>::add_variable(std::basic_string<charT> const &name, basic_datum<charT> const &value)
 583+{
 584+ grammar_->add_variable(name, value);
 585+}
 586+
 587+template<typename charT>
 588+void
 589+basic_expressor<charT>::add_function(std::basic_string<charT> const &name, func_t value)
 590+{
 591+ grammar_->add_function(name, value);
 592+}
 593+
 594+namespace {
 595+
 596+/* $NetBSD: fnmatch.c,v 1.21 2005/12/24 21:11:16 perry Exp $ */
 597+
 598+/*
 599+ * Copyright (c) 1989, 1993, 1994
 600+ * The Regents of the University of California. All rights reserved.
 601+ *
 602+ * This code is derived from software contributed to Berkeley by
 603+ * Guido van Rossum.
 604+ *
 605+ * Redistribution and use in source and binary forms, with or without
 606+ * modification, are permitted provided that the following conditions
 607+ * are met:
 608+ * 1. Redistributions of source code must retain the above copyright
 609+ * notice, this list of conditions and the following disclaimer.
 610+ * 2. Redistributions in binary form must reproduce the above copyright
 611+ * notice, this list of conditions and the following disclaimer in the
 612+ * documentation and/or other materials provided with the distribution.
 613+ * 3. Neither the name of the University nor the names of its contributors
 614+ * may be used to endorse or promote products derived from this software
 615+ * without specific prior written permission.
 616+ *
 617+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 618+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 619+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 620+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 621+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 622+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 623+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 624+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 625+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 626+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 627+ * SUCH DAMAGE.
 628+ */
 629+
 630+/*
 631+ * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
 632+ * Compares a filename or pathname to a pattern.
 633+ */
 634+
 635+#include <ctype.h>
 636+#include <string.h>
 637+
 638+#define EOS '\0'
 639+
 640+template<typename charT>
 641+const charT *rangematch(const charT *, int);
 642+
 643+template<typename charT>
 644+int
 645+match(charT const *pattern, charT const *string)
 646+{
 647+ const charT *stringstart;
 648+ charT c, test;
 649+
 650+ for (stringstart = string;;)
 651+ switch (c = *pattern++) {
 652+ case EOS:
 653+ return (*string == EOS ? 1 : 0);
 654+ case '?':
 655+ if (*string == EOS)
 656+ return (0);
 657+ ++string;
 658+ break;
 659+ case '*':
 660+ c = *pattern;
 661+ /* Collapse multiple stars. */
 662+ while (c == '*')
 663+ c = *++pattern;
 664+
 665+ /* Optimize for pattern with * at end or before /. */
 666+ if (c == EOS) {
 667+ return (1);
 668+ }
 669+
 670+ /* General case, use recursion. */
 671+ while ((test = *string) != EOS) {
 672+ if (match(pattern, string))
 673+ return (1);
 674+ ++string;
 675+ }
 676+ return (0);
 677+ case '[':
 678+ if (*string == EOS)
 679+ return (0);
 680+ if ((pattern =
 681+ rangematch(pattern, *string)) == NULL)
 682+ return (0);
 683+ ++string;
 684+ break;
 685+ case '\\':
 686+ if ((c = *pattern++) == EOS) {
 687+ c = '\\';
 688+ --pattern;
 689+ }
 690+ /* FALLTHROUGH */
 691+ default:
 692+ if (c != *string++)
 693+ return (0);
 694+ break;
 695+ }
 696+ /* NOTREACHED */
 697+}
 698+
 699+template<typename charT>
 700+const charT *
 701+rangematch(charT const *pattern, int test)
 702+{
 703+ int negate, ok;
 704+ charT c, c2;
 705+
 706+ /*
 707+ * A bracket expression starting with an unquoted circumflex
 708+ * character produces unspecified results (IEEE 1003.2-1992,
 709+ * 3.13.2). This implementation treats it like '!', for
 710+ * consistency with the regular expression syntax.
 711+ * J.T. Conklin (conklin@ngai.kaleida.com)
 712+ */
 713+ if ((negate = (*pattern == '!' || *pattern == '^')) != 0)
 714+ ++pattern;
 715+
 716+ for (ok = 0; (c = *pattern++) != ']';) {
 717+ if (c == '\\')
 718+ c = *pattern++;
 719+ if (c == EOS)
 720+ return (NULL);
 721+ if (*pattern == '-'
 722+ && (c2 = (*(pattern+1))) != EOS &&
 723+ c2 != ']') {
 724+ pattern += 2;
 725+ if (c2 == '\\')
 726+ c2 = *pattern++;
 727+ if (c2 == EOS)
 728+ return (NULL);
 729+ if (c <= test && test <= c2)
 730+ ok = 1;
 731+ } else if (c == test)
 732+ ok = 1;
 733+ }
 734+ return (ok == negate ? NULL : pattern);
 735+}
 736+
 737+} // anonymous namespace
 738+
43739 } // namespace afp
44740
45741 #endif /* !EXPRESSOR_H */
Index: trunk/extensions/AbuseFilter/parser_native/makefile
@@ -18,7 +18,6 @@
1919
2020 af_expr_objs = \
2121 af_expr-affunctions.o \
22 - af_expr-parser.o \
2322 af_expr-filter_evaluator.o \
2423 af_expr-eval.o \
2524 af_expr-utf8.o \
@@ -28,7 +27,6 @@
2928 af_parser_objs = \
3029 af_parser-affunctions.o \
3130 af_parser-main.o \
32 - af_parser-parser.o \
3331 af_parser-request.o \
3432 af_parser-utf8.o \
3533 af_parser-equiv.o \
@@ -37,7 +35,6 @@
3836 check_objs = \
3937 check-affunctions.o \
4038 check-check.o \
41 - check-parser.o \
4239 check-utf8.o \
4340 check-equiv.o \
4441 check-filter_evaluator.o
@@ -45,13 +42,12 @@
4643 syntax_check_objs = \
4744 syntax_check-affunctions.o \
4845 syntax_check-filter_evaluator.o \
49 - syntax_check-parser.o \
5046 syntax_check-utf8.o \
5147 syntax_check-equiv.o \
5248 syntax_check-syntax_check.o
5349
5450 expr_objs = \
55 - expr-parser.o
 51+ expr-expr.o
5652
5753 progs = check af_parser syntax_check af_expr expr
5854

Status & tagging log