Index: trunk/extensions/AbuseFilter/parser_native/ast.h |
— | — | @@ -12,6 +12,8 @@ |
13 | 13 | #ifndef AST_H |
14 | 14 | #define AST_H |
15 | 15 | |
| 16 | +#include "parserdefs.h" |
| 17 | + |
16 | 18 | namespace { |
17 | 19 | |
18 | 20 | template<typename charT> |
— | — | @@ -32,7 +34,6 @@ |
33 | 35 | str++; |
34 | 36 | } |
35 | 37 | |
36 | | - std::cerr << "hex2int: " << ret << '\n'; |
37 | 38 | return ret; |
38 | 39 | } |
39 | 40 | |
— | — | @@ -381,58 +382,58 @@ |
382 | 383 | ast_evaluator<charT, iterator>::tree_eval(iterator const &i) |
383 | 384 | { |
384 | 385 | switch (i->value.id().to_long()) { |
385 | | - case parser_grammar<charT>::id_value: |
| 386 | + case pid_value: |
386 | 387 | return ast_eval_num( |
387 | 388 | basic_fray<charT>(i->value.begin(), i->value.end())); |
388 | 389 | |
389 | | - case parser_grammar<charT>::id_string: |
| 390 | + case pid_string: |
390 | 391 | return ast_eval_string(basic_fray<charT>(i->value.begin(), i->value.end())); |
391 | 392 | |
392 | | - case parser_grammar<charT>::id_basic: |
| 393 | + case pid_basic: |
393 | 394 | return ast_eval_basic(*i->value.begin(), i->children.begin()); |
394 | 395 | |
395 | | - case parser_grammar<charT>::id_variable: |
| 396 | + case pid_variable: |
396 | 397 | return ast_eval_variable(basic_fray<charT>(i->value.begin(), i->value.end())); |
397 | 398 | |
398 | | - case parser_grammar<charT>::id_function: |
| 399 | + case pid_function: |
399 | 400 | return ast_eval_function( |
400 | 401 | basic_fray<charT>(i->value.begin(), i->value.end()), |
401 | 402 | i->children.begin(), i->children.end()); |
402 | 403 | |
403 | | - case parser_grammar<charT>::id_in_expr: |
| 404 | + case pid_in_expr: |
404 | 405 | return ast_eval_in(*i->value.begin(), i->children.begin(), i->children.begin() + 1); |
405 | 406 | |
406 | | - case parser_grammar<charT>::id_bool_expr: |
| 407 | + case pid_bool_expr: |
407 | 408 | return ast_eval_bool(*i->value.begin(), i->children.begin(), i->children.begin() + 1); |
408 | 409 | |
409 | | - case parser_grammar<charT>::id_plus_expr: |
| 410 | + case pid_plus_expr: |
410 | 411 | return ast_eval_plus(*i->value.begin(), i->children.begin(), i->children.begin() + 1); |
411 | 412 | |
412 | | - case parser_grammar<charT>::id_mult_expr: |
| 413 | + case pid_mult_expr: |
413 | 414 | return ast_eval_mult(*i->value.begin(), i->children.begin(), i->children.begin() + 1); |
414 | 415 | |
415 | | - case parser_grammar<charT>::id_pow_expr: |
| 416 | + case pid_pow_expr: |
416 | 417 | return ast_eval_pow(i->children.begin(), i->children.begin() + 1); |
417 | 418 | |
418 | | - case parser_grammar<charT>::id_ord_expr: |
| 419 | + case pid_ord_expr: |
419 | 420 | return ast_eval_ord( |
420 | 421 | basic_fray<charT>(i->value.begin(), i->value.end()), |
421 | 422 | i->children.begin(), i->children.begin() + 1); |
422 | 423 | |
423 | | - case parser_grammar<charT>::id_eq_expr: |
| 424 | + case pid_eq_expr: |
424 | 425 | return ast_eval_eq( |
425 | 426 | basic_fray<charT>(i->value.begin(), i->value.end()), |
426 | 427 | i->children.begin(), i->children.begin() + 1); |
427 | 428 | |
428 | | - case parser_grammar<charT>::id_tern_expr: |
| 429 | + case pid_tern_expr: |
429 | 430 | return ast_eval_tern( |
430 | 431 | i->children.begin(), |
431 | 432 | i->children.begin() + 1, |
432 | 433 | i->children.begin() + 2); |
433 | 434 | |
434 | 435 | default: |
435 | | - std::cerr << "warning: unmatched expr type " << i->value.id().to_long() << "\n"; |
436 | | - return basic_datum<charT>::from_int(0); |
| 436 | + throw parse_error( |
| 437 | + str(boost::format("internal error: unmatched expr type %d") % i->value.id().to_long())); |
437 | 438 | } |
438 | 439 | } |
439 | 440 | |
Index: trunk/extensions/AbuseFilter/parser_native/parser.h |
— | — | @@ -39,6 +39,7 @@ |
40 | 40 | #include "affunctions.h" |
41 | 41 | #include "fray.h" |
42 | 42 | #include "ast.h" |
| 43 | +#include "parserdefs.h" |
43 | 44 | |
44 | 45 | namespace afp { |
45 | 46 | |
— | — | @@ -82,11 +83,12 @@ |
83 | 84 | * |
84 | 85 | * expressor e; |
85 | 86 | * e.add_variable("ONE", 1); |
| 87 | + * e.add_function("f", myfunc); |
86 | 88 | * e.evaluate("ONE + 2"); -- returns 3 |
87 | 89 | * |
88 | 90 | * Custom functions should have the following prototype: |
89 | 91 | * |
90 | | - * datum (std::vector<afp::datum) const &args); |
| 92 | + * afp::basic_datum<charT> (std::vector<afp::basic_datum<charT>) const &args); |
91 | 93 | * |
92 | 94 | * Functions must return a value; they cannot be void. The arguments passed to |
93 | 95 | * the function are stored in the 'args' array in left-to-right order. |
— | — | @@ -94,56 +96,52 @@ |
95 | 97 | * The parser implements a C-like grammar with some differences. The following |
96 | 98 | * operators are available: |
97 | 99 | * |
98 | | - * a & b true if a and b are both true |
99 | | - * a | b true if either a or b is true |
100 | | - * a ^ b true if either a or b is true, but not if both are true |
101 | | - * a + b arithmetic |
| 100 | + * a & b true if a and b are both true |
| 101 | + * a | b true if either a or b is true |
| 102 | + * a ^ b true if either a or b is true, but not if both are true |
| 103 | + * a + b arithmetic |
102 | 104 | * a - b |
103 | 105 | * a * b |
104 | 106 | * a / b |
105 | 107 | * a % b |
106 | | - * a ** b power-of (a^b) |
107 | | - * a in b true if the string "b" contains the substring "a" |
108 | | - * !a true if a is false |
109 | | - * (a) same value as a |
110 | | - * a ? b : c if a is true, returns the value of b, otherwise c |
111 | | - * a == b comparison operators |
| 108 | + * a ** b power-of (a^b) |
| 109 | + * a in b true if the string "b" contains the substring "a" |
| 110 | + * a contains b true if b contains the string a |
| 111 | + * a like b true if a matches the Unix glob b |
| 112 | + * a matches b '' '' |
| 113 | + * a rlike b true if a matches the Perl regex b |
| 114 | + * a regex b '' '' |
| 115 | + * !a true if a is false |
| 116 | + * (a) same value as a |
| 117 | + * a ? b : c if a is true, returns the value of b, otherwise c |
| 118 | + * a == b comparison operators |
112 | 119 | * a != b |
113 | 120 | * a < b |
114 | 121 | * a <= b |
115 | 122 | * a > b |
116 | 123 | * a >= b |
117 | | - * a === b returns true if a==b and both are the same type |
118 | | - * a !== b return true if a != b or they are different types |
| 124 | + * a === b returns true if a==b and both are the same type |
| 125 | + * a !== b return true if a != b or they are different types |
119 | 126 | * |
120 | 127 | * The parser uses afp::datum for its variables. This means it supports |
121 | 128 | * strings, ints and floats, with automatic conversion between types. |
| 129 | + * |
| 130 | + * String constants are C-style. The standard C escapes \a \b \f \t \r \n \v are |
| 131 | + * supported. \xHH encodes a 1-byte Unicode character, \uHHHH encodes a 2-byte |
| 132 | + * Unicode characters, and \UHHHHHHHH encodes a 4-byte Unicode character. |
| 133 | + * |
| 134 | + * Numeric constants can be integers (e.g. 1), or floating pointers (e.g. |
| 135 | + * 1., .1, 1.2). |
| 136 | + * |
| 137 | + * Function calls are f(arg1, arg2, ...). |
122 | 138 | */ |
123 | 139 | |
124 | | -struct parse_error : std::runtime_error { |
125 | | - parse_error(char const *what) : std::runtime_error(what) {} |
126 | | -}; |
127 | | - |
128 | 140 | /* |
129 | 141 | * The grammar itself. |
130 | 142 | */ |
131 | 143 | template<typename charT> |
132 | 144 | struct parser_grammar : public grammar<parser_grammar<charT> > |
133 | 145 | { |
134 | | - static const int id_value = 1; |
135 | | - static const int id_variable = 2; |
136 | | - static const int id_basic = 3; |
137 | | - static const int id_bool_expr = 4; |
138 | | - static const int id_ord_expr = 5; |
139 | | - static const int id_eq_expr = 6; |
140 | | - static const int id_pow_expr = 7; |
141 | | - static const int id_mult_expr = 8; |
142 | | - static const int id_plus_expr = 9; |
143 | | - static const int id_in_expr = 10; |
144 | | - static const int id_function = 12; |
145 | | - static const int id_tern_expr = 13; |
146 | | - static const int id_string = 14; |
147 | | - |
148 | 146 | /* User-defined variables. */ |
149 | 147 | symbols<basic_datum<charT>, charT > variables; |
150 | 148 | |
— | — | @@ -245,10 +243,7 @@ |
246 | 244 | * letters and underscore only) are returned as the |
247 | 245 | * empty string. |
248 | 246 | */ |
249 | | - variable = longest_d[ |
250 | | - self.variables |
251 | | - | leaf_node_d[ (+ (upper_p | '_') ) ] |
252 | | - ] |
| 247 | + variable = leaf_node_d[ +(upper_p | '_') ] |
253 | 248 | ; |
254 | 249 | |
255 | 250 | /* |
— | — | @@ -256,7 +251,9 @@ |
257 | 252 | */ |
258 | 253 | function = |
259 | 254 | ( |
260 | | - root_node_d[self.functions] |
| 255 | + root_node_d[ leaf_node_d[ |
| 256 | + +(lower_p | '_') |
| 257 | + ] ] |
261 | 258 | >> inner_node_d[ |
262 | 259 | '(' |
263 | 260 | >> ( tern_expr % discard_node_d[ch_p(',')] ) |
— | — | @@ -352,26 +349,26 @@ |
353 | 350 | ; |
354 | 351 | } |
355 | 352 | |
356 | | - rule<ScannerT, parser_context<>, parser_tag<id_tern_expr> > |
| 353 | + rule<ScannerT, parser_context<>, parser_tag<pid_tern_expr> > |
357 | 354 | const &start() const { |
358 | 355 | return tern_expr; |
359 | 356 | } |
360 | 357 | |
361 | 358 | rule<ScannerT> c_string_char, hexchar, octchar; |
362 | | - rule<ScannerT, parser_context<>, parser_tag<id_value> > value; |
363 | | - rule<ScannerT, parser_context<>, parser_tag<id_variable> > variable; |
364 | | - rule<ScannerT, parser_context<>, parser_tag<id_basic> > basic; |
365 | | - rule<ScannerT, parser_context<>, parser_tag<id_bool_expr> > bool_expr; |
366 | | - rule<ScannerT, parser_context<>, parser_tag<id_ord_expr> > ord_expr; |
367 | | - rule<ScannerT, parser_context<>, parser_tag<id_eq_expr> > eq_expr; |
368 | | - rule<ScannerT, parser_context<>, parser_tag<id_pow_expr> > pow_expr; |
369 | | - rule<ScannerT, parser_context<>, parser_tag<id_mult_expr> > mult_expr; |
370 | | - rule<ScannerT, parser_context<>, parser_tag<id_plus_expr> > plus_expr; |
371 | | - rule<ScannerT, parser_context<>, parser_tag<id_in_expr> > in_expr; |
| 359 | + rule<ScannerT, parser_context<>, parser_tag<pid_value> > value; |
| 360 | + rule<ScannerT, parser_context<>, parser_tag<pid_variable> > variable; |
| 361 | + rule<ScannerT, parser_context<>, parser_tag<pid_basic> > basic; |
| 362 | + rule<ScannerT, parser_context<>, parser_tag<pid_bool_expr> > bool_expr; |
| 363 | + rule<ScannerT, parser_context<>, parser_tag<pid_ord_expr> > ord_expr; |
| 364 | + rule<ScannerT, parser_context<>, parser_tag<pid_eq_expr> > eq_expr; |
| 365 | + rule<ScannerT, parser_context<>, parser_tag<pid_pow_expr> > pow_expr; |
| 366 | + rule<ScannerT, parser_context<>, parser_tag<pid_mult_expr> > mult_expr; |
| 367 | + rule<ScannerT, parser_context<>, parser_tag<pid_plus_expr> > plus_expr; |
| 368 | + rule<ScannerT, parser_context<>, parser_tag<pid_in_expr> > in_expr; |
372 | 369 | |
373 | | - rule<ScannerT, parser_context<>, parser_tag<id_function> > function; |
374 | | - rule<ScannerT, parser_context<>, parser_tag<id_tern_expr> > tern_expr; |
375 | | - rule<ScannerT, parser_context<>, parser_tag<id_string> > string; |
| 370 | + rule<ScannerT, parser_context<>, parser_tag<pid_function> > function; |
| 371 | + rule<ScannerT, parser_context<>, parser_tag<pid_tern_expr> > tern_expr; |
| 372 | + rule<ScannerT, parser_context<>, parser_tag<pid_string> > string; |
376 | 373 | }; |
377 | 374 | }; |
378 | 375 | |
— | — | @@ -414,7 +411,7 @@ |
415 | 412 | basic_datum<charT> ret; |
416 | 413 | |
417 | 414 | tree_parse_info<iterator_t> info = ast_parse(filter.begin(), filter.end(), *grammar_, |
418 | | - +chset<>("\n\t ") | comment_p("/*", "*/")); |
| 415 | + chset<>("\r\n\t ") | comment_p("/*", "*/")); |
419 | 416 | |
420 | 417 | if (info.full) { |
421 | 418 | ast_evaluator<charT, typename tree_match<iterator_t>::tree_iterator> ae(*grammar_); |
Index: trunk/extensions/AbuseFilter/parser_native/parserdefs.h |
— | — | @@ -0,0 +1,48 @@ |
| 2 | +/* |
| 3 | + * Copyright (c) 2008 Andrew Garrett. |
| 4 | + * Copyright (c) 2008 River Tarnell <river@wikimedia.org> |
| 5 | + * Derived from public domain code contributed by Victor Vasiliev. |
| 6 | + * |
| 7 | + * Permission is granted to anyone to use this software for any purpose, |
| 8 | + * including commercial applications, and to alter it and redistribute it |
| 9 | + * freely. This software is provided 'as-is', without any express or |
| 10 | + * implied warranty. |
| 11 | + */ |
| 12 | + |
| 13 | +#ifndef PARSERDEFS_H |
| 14 | +#define PARSERDEFS_H |
| 15 | + |
| 16 | +#define pid_value 1 |
| 17 | +#define pid_variable 2 |
| 18 | +#define pid_basic 3 |
| 19 | +#define pid_bool_expr 4 |
| 20 | +#define pid_ord_expr 5 |
| 21 | +#define pid_eq_expr 6 |
| 22 | +#define pid_pow_expr 7 |
| 23 | +#define pid_mult_expr 8 |
| 24 | +#define pid_plus_expr 9 |
| 25 | +#define pid_in_expr 10 |
| 26 | +#define pid_function 12 |
| 27 | +#define pid_tern_expr 13 |
| 28 | +#define pid_string 14 |
| 29 | + |
| 30 | +namespace afp { |
| 31 | + |
| 32 | +struct parse_error : std::exception { |
| 33 | + parse_error(std::string const &what) |
| 34 | + : what_(what) |
| 35 | + {} |
| 36 | + |
| 37 | + ~parse_error() throw() {} |
| 38 | + |
| 39 | + char const *what() const throw() { |
| 40 | + return what_.c_str(); |
| 41 | + } |
| 42 | + |
| 43 | +private: |
| 44 | + std::string what_; |
| 45 | +}; |
| 46 | + |
| 47 | +} // namespace afp |
| 48 | + |
| 49 | +#endif /* !PARSERDEFS_H */ |