Index: trunk/extensions/AbuseFilter/parser_native/parser.cpp |
— | — | @@ -16,6 +16,56 @@ |
17 | 17 | using namespace boost::spirit; |
18 | 18 | using namespace phoenix; |
19 | 19 | |
| 20 | +/* |
| 21 | + * ABUSEFILTER EXPRESSION PARSER |
| 22 | + * ============================= |
| 23 | + * |
| 24 | + * This is the basic expression parser. It doesn't contain any AF logic |
| 25 | + * itself, but rather presents an interface for the user to add custom |
| 26 | + * functions and variables. |
| 27 | + * |
| 28 | + * The interface to the parser is the 'expressor' class. Use it like this: |
| 29 | + * |
| 30 | + * expressor e; |
| 31 | + * e.add_variable("ONE", 1); |
| 32 | + * e.evaluate("ONE + 2"); -- returns 3 |
| 33 | + * |
| 34 | + * Custom functions should have the following prototype: |
| 35 | + * |
| 36 | + * datum (std::vector<afp::datum) const &args); |
| 37 | + * |
| 38 | + * Functions must return a value; they cannot be void. The arguments passed to |
| 39 | + * the function are stored in the 'args' array in left-to-right order. |
| 40 | + * |
| 41 | + * The parser implements a C-like grammar with some differences. The following |
| 42 | + * operators are available: |
| 43 | + * |
| 44 | + * a & b true if a and b are both true |
| 45 | + * a | b true if either a or b is true |
| 46 | + * a ^ b true if either a or b is true, but not if both are true |
| 47 | + * a + b arithmetic |
| 48 | + * a - b |
| 49 | + * a * b |
| 50 | + * a / b |
| 51 | + * a % b |
| 52 | + * a ** b power-of (a^b) |
| 53 | + * a in b true if the string "b" contains the substring "a" |
| 54 | + * !a true if a is false |
| 55 | + * (a) same value as a |
| 56 | + * a ? b : c if a is true, returns the value of b, otherwise c |
| 57 | + * a == b comparison operators |
| 58 | + * a != b |
| 59 | + * a < b |
| 60 | + * a <= b |
| 61 | + * a > b |
| 62 | + * a >= b |
| 63 | + * a === b returns true if a==b and both are the same type |
| 64 | + * a !== b return true if a != b or they are different types |
| 65 | + * |
| 66 | + * The parser uses afp::datum for its variables. This means it supports |
| 67 | + * strings, ints and floats, with automatic conversion between types. |
| 68 | + */ |
| 69 | + |
20 | 70 | namespace px = phoenix; |
21 | 71 | |
22 | 72 | namespace afp { |
— | — | @@ -24,6 +74,10 @@ |
25 | 75 | parse_error(char const *what) : std::runtime_error(what) {} |
26 | 76 | }; |
27 | 77 | |
| 78 | +/* |
| 79 | + * The parser stores the result of each grammar rule in a closure. Most rules |
| 80 | + * use the parser_closure, which simply stores a single value. |
| 81 | + */ |
28 | 82 | struct parser_closure : boost::spirit::closure<parser_closure, datum> |
29 | 83 | { |
30 | 84 | member1 val; |
— | — | @@ -45,6 +99,11 @@ |
46 | 100 | |
47 | 101 | } |
48 | 102 | |
| 103 | +/* |
| 104 | + * This is the closure types for functions. 'val' stores the final result of |
| 105 | + * the function call; func and args store the function object and the parsed |
| 106 | + * arguments. |
| 107 | + */ |
49 | 108 | struct function_closure : boost::spirit::closure< |
50 | 109 | function_closure, |
51 | 110 | datum, |
— | — | @@ -56,6 +115,9 @@ |
57 | 116 | member3 args; |
58 | 117 | }; |
59 | 118 | |
| 119 | +/* |
| 120 | + * The closure for the ?: operator. Parsed as expr ? iftrue : iffalse. |
| 121 | + */ |
60 | 122 | struct ternary_closure : boost::spirit::closure< |
61 | 123 | ternary_closure, |
62 | 124 | datum, |
— | — | @@ -67,15 +129,21 @@ |
68 | 130 | member3 iffalse; |
69 | 131 | }; |
70 | 132 | |
| 133 | +/* |
| 134 | + * The grammar itself. |
| 135 | + */ |
71 | 136 | struct parser_grammar : public grammar<parser_grammar, parser_closure::context_t> |
72 | 137 | { |
| 138 | + /* User-defined variables. */ |
73 | 139 | symbols<datum> variables; |
74 | | - symbols<boost::function<datum (std::vector<datum>)> > functions; |
75 | 140 | |
76 | 141 | void add_variable(std::string const &name, datum const &value) { |
77 | 142 | variables.add(name.c_str(), value); |
78 | 143 | } |
79 | 144 | |
| 145 | + /* User-defined functions. */ |
| 146 | + symbols<boost::function<datum (std::vector<datum>)> > functions; |
| 147 | + |
80 | 148 | void add_function(std::string const &name, boost::function<datum (std::vector<datum>)> func) { |
81 | 149 | functions.add(name.c_str(), func); |
82 | 150 | } |
— | — | @@ -87,6 +155,9 @@ |
88 | 156 | |
89 | 157 | parser_grammar const &self_; |
90 | 158 | |
| 159 | + /* |
| 160 | + * A phoenix actor to append its argument to a container. |
| 161 | + */ |
91 | 162 | struct push_back_impl { |
92 | 163 | template<typename C, typename I> |
93 | 164 | struct result { |
— | — | @@ -101,6 +172,10 @@ |
102 | 173 | |
103 | 174 | phoenix::function<push_back_impl> const push_back; |
104 | 175 | |
| 176 | + /* |
| 177 | + * A phoenix actor to call a user-defined function given the |
| 178 | + * function object and arguments. |
| 179 | + */ |
105 | 180 | struct call_function_impl { |
106 | 181 | template<typename F, typename A> |
107 | 182 | struct result { |
— | — | @@ -120,21 +195,33 @@ |
121 | 196 | , push_back(push_back_impl()) |
122 | 197 | , call_function(call_function_impl()) |
123 | 198 | { |
| 199 | + /* |
| 200 | + * A literal value. Either a string, a floating |
| 201 | + * pointer number or an integer. |
| 202 | + */ |
124 | 203 | value = |
125 | | - real_p[value.val = arg1] |
| 204 | + strict_real_p[value.val = arg1] |
126 | 205 | | int_p[value.val = arg1] |
127 | 206 | | confix_p('"', *c_escape_ch_p, '"')[ |
128 | 207 | value.val = construct_<std::string>(arg1 + 1, arg2 - 1) |
129 | 208 | ] |
130 | 209 | ; |
131 | 210 | |
132 | | - /* a sequence of uppercase letters is a variable */ |
| 211 | + /* |
| 212 | + * A variable. If the variable is found in the |
| 213 | + * user-supplied variable list, we use that. |
| 214 | + * Otherwise, unknown variables (containing uppercase |
| 215 | + * letters and underscore only) are returned as the |
| 216 | + * empty string. |
| 217 | + */ |
133 | 218 | variable = |
134 | 219 | self.variables[variable.val = arg1] |
135 | | - | (+upper_p)[variable.val = ""] |
| 220 | + | (+ (upper_p | '_') )[variable.val = ""] |
136 | 221 | ; |
137 | 222 | |
138 | | - /* func(value) */ |
| 223 | + /* |
| 224 | + * A function call: func([arg[, arg...]]). |
| 225 | + */ |
139 | 226 | function = |
140 | 227 | ( |
141 | 228 | self.functions[function.func = arg1] |
— | — | @@ -144,6 +231,11 @@ |
145 | 232 | ) [function.val = call_function(function.func, function.args)] |
146 | 233 | ; |
147 | 234 | |
| 235 | + /* |
| 236 | + * A basic atomic value. Either a variable, function |
| 237 | + * or literal, or a negated expression !a, or a |
| 238 | + * parenthesised expression (a). |
| 239 | + */ |
148 | 240 | basic = |
149 | 241 | ( '(' >> tern_expr[basic.val = arg1] >> ')' ) |
150 | 242 | | ch_p('!') >> tern_expr[basic.val = !arg1] |
— | — | @@ -152,6 +244,9 @@ |
153 | 245 | | value[basic.val = arg1] |
154 | 246 | ; |
155 | 247 | |
| 248 | + /* |
| 249 | + * "a in b" operator |
| 250 | + */ |
156 | 251 | in_expr = |
157 | 252 | basic[in_expr.val = arg1] |
158 | 253 | >> *( |
— | — | @@ -159,17 +254,32 @@ |
160 | 255 | ) |
161 | 256 | ; |
162 | 257 | |
| 258 | + /* |
| 259 | + * power-of. This is right-associative. |
| 260 | + */ |
| 261 | + pow_expr = |
| 262 | + in_expr[pow_expr.val = arg1] |
| 263 | + >> !( |
| 264 | + "**" >> pow_expr[pow_expr.val = bind(&afp::pow)(pow_expr.val, arg1)] |
| 265 | + ) |
| 266 | + ; |
163 | 267 | |
| 268 | + /* |
| 269 | + * Multiplication and operators with the same |
| 270 | + * precedence. |
| 271 | + */ |
164 | 272 | mult_expr = |
165 | | - in_expr[mult_expr.val = arg1] |
| 273 | + pow_expr[mult_expr.val = arg1] |
166 | 274 | >> *( |
167 | | - '*' >> in_expr[mult_expr.val *= arg1] |
168 | | - | '/' >> in_expr[mult_expr.val /= arg1] |
169 | | - | '%' >> in_expr[mult_expr.val %= arg1] |
170 | | - | "**" >> in_expr[mult_expr.val = bind(&afp::pow)(mult_expr.val,arg1)] |
| 275 | + '*' >> pow_expr[mult_expr.val *= arg1] |
| 276 | + | '/' >> pow_expr[mult_expr.val /= arg1] |
| 277 | + | '%' >> pow_expr[mult_expr.val %= arg1] |
171 | 278 | ) |
172 | 279 | ; |
173 | 280 | |
| 281 | + /* |
| 282 | + * Additional and operators with the same precedence. |
| 283 | + */ |
174 | 284 | plus_expr = |
175 | 285 | mult_expr[plus_expr.val = arg1] |
176 | 286 | >> *( |
— | — | @@ -178,39 +288,54 @@ |
179 | 289 | ) |
180 | 290 | ; |
181 | 291 | |
182 | | - eq_expr = |
183 | | - plus_expr[eq_expr.val = arg1] |
| 292 | + /* |
| 293 | + * Ordinal comparisons and operators with the same |
| 294 | + * precedence. |
| 295 | + */ |
| 296 | + ord_expr = |
| 297 | + plus_expr[ord_expr.val = arg1] |
184 | 298 | >> *( |
185 | | - "<" >> plus_expr[eq_expr.val = eq_expr.val < arg1] |
186 | | - | ">" >> plus_expr[eq_expr.val = eq_expr.val > arg1] |
187 | | - | "<=" >> plus_expr[eq_expr.val = eq_expr.val <= arg1] |
188 | | - | ">=" >> plus_expr[eq_expr.val = eq_expr.val >= arg1] |
| 299 | + "<" >> plus_expr[ord_expr.val = ord_expr.val < arg1] |
| 300 | + | ">" >> plus_expr[ord_expr.val = ord_expr.val > arg1] |
| 301 | + | "<=" >> plus_expr[ord_expr.val = ord_expr.val <= arg1] |
| 302 | + | ">=" >> plus_expr[ord_expr.val = ord_expr.val >= arg1] |
189 | 303 | ) |
190 | 304 | ; |
191 | 305 | |
192 | | - eq2_expr = |
193 | | - eq_expr[eq2_expr.val = arg1] |
| 306 | + /* |
| 307 | + * Equality comparisons. |
| 308 | + */ |
| 309 | + eq_expr = |
| 310 | + ord_expr[eq_expr.val = arg1] |
194 | 311 | >> *( |
195 | | - "==" >> eq_expr[eq2_expr.val = eq2_expr.val == arg1] |
196 | | - | "!=" >> eq_expr[eq2_expr.val = eq2_expr.val != arg1] |
197 | | - | "===" >> eq_expr[eq2_expr.val = |
198 | | - bind(&datum::compare_with_type)(eq2_expr.val, arg1)] |
199 | | - | "!==" >> eq_expr[eq2_expr.val = |
200 | | - !bind(&datum::compare_with_type)(eq2_expr.val, arg1)] |
| 312 | + "==" >> eq_expr[eq_expr.val = eq_expr.val == arg1] |
| 313 | + | "!=" >> eq_expr[eq_expr.val = eq_expr.val != arg1] |
| 314 | + | "===" >> eq_expr[eq_expr.val = |
| 315 | + bind(&datum::compare_with_type)(eq_expr.val, arg1)] |
| 316 | + | "!==" >> eq_expr[eq_expr.val = |
| 317 | + !bind(&datum::compare_with_type)(eq_expr.val, arg1)] |
201 | 318 | ) |
202 | 319 | ; |
203 | 320 | |
| 321 | + /* |
| 322 | + * Boolean expressions. |
| 323 | + */ |
204 | 324 | bool_expr = |
205 | | - eq2_expr[bool_expr.val = arg1] |
| 325 | + eq_expr[bool_expr.val = arg1] |
206 | 326 | >> *( |
207 | | - '&' >> eq2_expr[bool_expr.val = bool_expr.val && arg1] |
208 | | - | '|' >> eq2_expr[bool_expr.val = bool_expr.val || arg1] |
209 | | - | '^' >> eq2_expr[bool_expr.val = |
| 327 | + '&' >> eq_expr[bool_expr.val = bool_expr.val && arg1] |
| 328 | + | '|' >> eq_expr[bool_expr.val = bool_expr.val || arg1] |
| 329 | + | '^' >> eq_expr[bool_expr.val = |
210 | 330 | ((bool_expr.val || arg1) |
211 | 331 | && !(bool_expr.val && arg1)) ] |
212 | 332 | ) |
213 | 333 | ; |
214 | 334 | |
| 335 | + /* |
| 336 | + * The ternary operator. Notice this is |
| 337 | + * right-associative: a ? b ? c : d : e |
| 338 | + * is supported. |
| 339 | + */ |
215 | 340 | tern_expr = |
216 | 341 | bool_expr[tern_expr.val = arg1] |
217 | 342 | >> !( |
— | — | @@ -222,6 +347,9 @@ |
223 | 348 | ) |
224 | 349 | ; |
225 | 350 | |
| 351 | + /* |
| 352 | + * The root expression type. |
| 353 | + */ |
226 | 354 | expr = tern_expr[self.val = arg1]; |
227 | 355 | } |
228 | 356 | |
— | — | @@ -230,7 +358,7 @@ |
231 | 359 | } |
232 | 360 | |
233 | 361 | rule_t value, variable, basic, bool_expr, |
234 | | - eq_expr, eq2_expr, mult_expr, plus_expr, in_expr, expr; |
| 362 | + ord_expr, eq_expr, pow_expr, mult_expr, plus_expr, in_expr, expr; |
235 | 363 | rule<ScannerT, function_closure::context_t> function; |
236 | 364 | rule<ScannerT, ternary_closure::context_t> tern_expr; |
237 | 365 | }; |
— | — | @@ -246,6 +374,10 @@ |
247 | 375 | delete grammar_; |
248 | 376 | } |
249 | 377 | |
| 378 | +/* |
| 379 | + * The user interface to evaluate an expression. It returns the result, or |
| 380 | + * throws an exception if an error occurs. |
| 381 | + */ |
250 | 382 | datum |
251 | 383 | expressor::evaluate(std::string const &filter) const |
252 | 384 | { |