r38877 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r38876‎ | r38877 | r38878 >
Date:10:36, 8 August 2008
Author:river
Status:old
Tags:
Comment:
- document the parser some
- make pow_expr a seperate rule with higher precedence than mult_expr
- rename eq_expr/eq2_expr to ord_expr/eq_expr
- match strict_real_p instead of real_p, otherwise '1 / 2' is 0.5 instead of 0
Modified paths:
  • /trunk/extensions/AbuseFilter/parser_native/parser.cpp (modified) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/parser_native/parser.cpp
@@ -16,6 +16,56 @@
1717 using namespace boost::spirit;
1818 using namespace phoenix;
1919
 20+/*
 21+ * ABUSEFILTER EXPRESSION PARSER
 22+ * =============================
 23+ *
 24+ * This is the basic expression parser. It doesn't contain any AF logic
 25+ * itself, but rather presents an interface for the user to add custom
 26+ * functions and variables.
 27+ *
 28+ * The interface to the parser is the 'expressor' class. Use it like this:
 29+ *
 30+ * expressor e;
 31+ * e.add_variable("ONE", 1);
 32+ * e.evaluate("ONE + 2"); -- returns 3
 33+ *
 34+ * Custom functions should have the following prototype:
 35+ *
 36+ * datum (std::vector<afp::datum) const &args);
 37+ *
 38+ * Functions must return a value; they cannot be void. The arguments passed to
 39+ * the function are stored in the 'args' array in left-to-right order.
 40+ *
 41+ * The parser implements a C-like grammar with some differences. The following
 42+ * operators are available:
 43+ *
 44+ * a & b true if a and b are both true
 45+ * a | b true if either a or b is true
 46+ * a ^ b true if either a or b is true, but not if both are true
 47+ * a + b arithmetic
 48+ * a - b
 49+ * a * b
 50+ * a / b
 51+ * a % b
 52+ * a ** b power-of (a^b)
 53+ * a in b true if the string "b" contains the substring "a"
 54+ * !a true if a is false
 55+ * (a) same value as a
 56+ * a ? b : c if a is true, returns the value of b, otherwise c
 57+ * a == b comparison operators
 58+ * a != b
 59+ * a < b
 60+ * a <= b
 61+ * a > b
 62+ * a >= b
 63+ * a === b returns true if a==b and both are the same type
 64+ * a !== b return true if a != b or they are different types
 65+ *
 66+ * The parser uses afp::datum for its variables. This means it supports
 67+ * strings, ints and floats, with automatic conversion between types.
 68+ */
 69+
2070 namespace px = phoenix;
2171
2272 namespace afp {
@@ -24,6 +74,10 @@
2575 parse_error(char const *what) : std::runtime_error(what) {}
2676 };
2777
 78+/*
 79+ * The parser stores the result of each grammar rule in a closure. Most rules
 80+ * use the parser_closure, which simply stores a single value.
 81+ */
2882 struct parser_closure : boost::spirit::closure<parser_closure, datum>
2983 {
3084 member1 val;
@@ -45,6 +99,11 @@
46100
47101 }
48102
 103+/*
 104+ * This is the closure types for functions. 'val' stores the final result of
 105+ * the function call; func and args store the function object and the parsed
 106+ * arguments.
 107+ */
49108 struct function_closure : boost::spirit::closure<
50109 function_closure,
51110 datum,
@@ -56,6 +115,9 @@
57116 member3 args;
58117 };
59118
 119+/*
 120+ * The closure for the ?: operator. Parsed as expr ? iftrue : iffalse.
 121+ */
60122 struct ternary_closure : boost::spirit::closure<
61123 ternary_closure,
62124 datum,
@@ -67,15 +129,21 @@
68130 member3 iffalse;
69131 };
70132
 133+/*
 134+ * The grammar itself.
 135+ */
71136 struct parser_grammar : public grammar<parser_grammar, parser_closure::context_t>
72137 {
 138+ /* User-defined variables. */
73139 symbols<datum> variables;
74 - symbols<boost::function<datum (std::vector<datum>)> > functions;
75140
76141 void add_variable(std::string const &name, datum const &value) {
77142 variables.add(name.c_str(), value);
78143 }
79144
 145+ /* User-defined functions. */
 146+ symbols<boost::function<datum (std::vector<datum>)> > functions;
 147+
80148 void add_function(std::string const &name, boost::function<datum (std::vector<datum>)> func) {
81149 functions.add(name.c_str(), func);
82150 }
@@ -87,6 +155,9 @@
88156
89157 parser_grammar const &self_;
90158
 159+ /*
 160+ * A phoenix actor to append its argument to a container.
 161+ */
91162 struct push_back_impl {
92163 template<typename C, typename I>
93164 struct result {
@@ -101,6 +172,10 @@
102173
103174 phoenix::function<push_back_impl> const push_back;
104175
 176+ /*
 177+ * A phoenix actor to call a user-defined function given the
 178+ * function object and arguments.
 179+ */
105180 struct call_function_impl {
106181 template<typename F, typename A>
107182 struct result {
@@ -120,21 +195,33 @@
121196 , push_back(push_back_impl())
122197 , call_function(call_function_impl())
123198 {
 199+ /*
 200+ * A literal value. Either a string, a floating
 201+ * pointer number or an integer.
 202+ */
124203 value =
125 - real_p[value.val = arg1]
 204+ strict_real_p[value.val = arg1]
126205 | int_p[value.val = arg1]
127206 | confix_p('"', *c_escape_ch_p, '"')[
128207 value.val = construct_<std::string>(arg1 + 1, arg2 - 1)
129208 ]
130209 ;
131210
132 - /* a sequence of uppercase letters is a variable */
 211+ /*
 212+ * A variable. If the variable is found in the
 213+ * user-supplied variable list, we use that.
 214+ * Otherwise, unknown variables (containing uppercase
 215+ * letters and underscore only) are returned as the
 216+ * empty string.
 217+ */
133218 variable =
134219 self.variables[variable.val = arg1]
135 - | (+upper_p)[variable.val = ""]
 220+ | (+ (upper_p | '_') )[variable.val = ""]
136221 ;
137222
138 - /* func(value) */
 223+ /*
 224+ * A function call: func([arg[, arg...]]).
 225+ */
139226 function =
140227 (
141228 self.functions[function.func = arg1]
@@ -144,6 +231,11 @@
145232 ) [function.val = call_function(function.func, function.args)]
146233 ;
147234
 235+ /*
 236+ * A basic atomic value. Either a variable, function
 237+ * or literal, or a negated expression !a, or a
 238+ * parenthesised expression (a).
 239+ */
148240 basic =
149241 ( '(' >> tern_expr[basic.val = arg1] >> ')' )
150242 | ch_p('!') >> tern_expr[basic.val = !arg1]
@@ -152,6 +244,9 @@
153245 | value[basic.val = arg1]
154246 ;
155247
 248+ /*
 249+ * "a in b" operator
 250+ */
156251 in_expr =
157252 basic[in_expr.val = arg1]
158253 >> *(
@@ -159,17 +254,32 @@
160255 )
161256 ;
162257
 258+ /*
 259+ * power-of. This is right-associative.
 260+ */
 261+ pow_expr =
 262+ in_expr[pow_expr.val = arg1]
 263+ >> !(
 264+ "**" >> pow_expr[pow_expr.val = bind(&afp::pow)(pow_expr.val, arg1)]
 265+ )
 266+ ;
163267
 268+ /*
 269+ * Multiplication and operators with the same
 270+ * precedence.
 271+ */
164272 mult_expr =
165 - in_expr[mult_expr.val = arg1]
 273+ pow_expr[mult_expr.val = arg1]
166274 >> *(
167 - '*' >> in_expr[mult_expr.val *= arg1]
168 - | '/' >> in_expr[mult_expr.val /= arg1]
169 - | '%' >> in_expr[mult_expr.val %= arg1]
170 - | "**" >> in_expr[mult_expr.val = bind(&afp::pow)(mult_expr.val,arg1)]
 275+ '*' >> pow_expr[mult_expr.val *= arg1]
 276+ | '/' >> pow_expr[mult_expr.val /= arg1]
 277+ | '%' >> pow_expr[mult_expr.val %= arg1]
171278 )
172279 ;
173280
 281+ /*
 282+ * Additional and operators with the same precedence.
 283+ */
174284 plus_expr =
175285 mult_expr[plus_expr.val = arg1]
176286 >> *(
@@ -178,39 +288,54 @@
179289 )
180290 ;
181291
182 - eq_expr =
183 - plus_expr[eq_expr.val = arg1]
 292+ /*
 293+ * Ordinal comparisons and operators with the same
 294+ * precedence.
 295+ */
 296+ ord_expr =
 297+ plus_expr[ord_expr.val = arg1]
184298 >> *(
185 - "<" >> plus_expr[eq_expr.val = eq_expr.val < arg1]
186 - | ">" >> plus_expr[eq_expr.val = eq_expr.val > arg1]
187 - | "<=" >> plus_expr[eq_expr.val = eq_expr.val <= arg1]
188 - | ">=" >> plus_expr[eq_expr.val = eq_expr.val >= arg1]
 299+ "<" >> plus_expr[ord_expr.val = ord_expr.val < arg1]
 300+ | ">" >> plus_expr[ord_expr.val = ord_expr.val > arg1]
 301+ | "<=" >> plus_expr[ord_expr.val = ord_expr.val <= arg1]
 302+ | ">=" >> plus_expr[ord_expr.val = ord_expr.val >= arg1]
189303 )
190304 ;
191305
192 - eq2_expr =
193 - eq_expr[eq2_expr.val = arg1]
 306+ /*
 307+ * Equality comparisons.
 308+ */
 309+ eq_expr =
 310+ ord_expr[eq_expr.val = arg1]
194311 >> *(
195 - "==" >> eq_expr[eq2_expr.val = eq2_expr.val == arg1]
196 - | "!=" >> eq_expr[eq2_expr.val = eq2_expr.val != arg1]
197 - | "===" >> eq_expr[eq2_expr.val =
198 - bind(&datum::compare_with_type)(eq2_expr.val, arg1)]
199 - | "!==" >> eq_expr[eq2_expr.val =
200 - !bind(&datum::compare_with_type)(eq2_expr.val, arg1)]
 312+ "==" >> eq_expr[eq_expr.val = eq_expr.val == arg1]
 313+ | "!=" >> eq_expr[eq_expr.val = eq_expr.val != arg1]
 314+ | "===" >> eq_expr[eq_expr.val =
 315+ bind(&datum::compare_with_type)(eq_expr.val, arg1)]
 316+ | "!==" >> eq_expr[eq_expr.val =
 317+ !bind(&datum::compare_with_type)(eq_expr.val, arg1)]
201318 )
202319 ;
203320
 321+ /*
 322+ * Boolean expressions.
 323+ */
204324 bool_expr =
205 - eq2_expr[bool_expr.val = arg1]
 325+ eq_expr[bool_expr.val = arg1]
206326 >> *(
207 - '&' >> eq2_expr[bool_expr.val = bool_expr.val && arg1]
208 - | '|' >> eq2_expr[bool_expr.val = bool_expr.val || arg1]
209 - | '^' >> eq2_expr[bool_expr.val =
 327+ '&' >> eq_expr[bool_expr.val = bool_expr.val && arg1]
 328+ | '|' >> eq_expr[bool_expr.val = bool_expr.val || arg1]
 329+ | '^' >> eq_expr[bool_expr.val =
210330 ((bool_expr.val || arg1)
211331 && !(bool_expr.val && arg1)) ]
212332 )
213333 ;
214334
 335+ /*
 336+ * The ternary operator. Notice this is
 337+ * right-associative: a ? b ? c : d : e
 338+ * is supported.
 339+ */
215340 tern_expr =
216341 bool_expr[tern_expr.val = arg1]
217342 >> !(
@@ -222,6 +347,9 @@
223348 )
224349 ;
225350
 351+ /*
 352+ * The root expression type.
 353+ */
226354 expr = tern_expr[self.val = arg1];
227355 }
228356
@@ -230,7 +358,7 @@
231359 }
232360
233361 rule_t value, variable, basic, bool_expr,
234 - eq_expr, eq2_expr, mult_expr, plus_expr, in_expr, expr;
 362+ ord_expr, eq_expr, pow_expr, mult_expr, plus_expr, in_expr, expr;
235363 rule<ScannerT, function_closure::context_t> function;
236364 rule<ScannerT, ternary_closure::context_t> tern_expr;
237365 };
@@ -246,6 +374,10 @@
247375 delete grammar_;
248376 }
249377
 378+/*
 379+ * The user interface to evaluate an expression. It returns the result, or
 380+ * throws an exception if an error occurs.
 381+ */
250382 datum
251383 expressor::evaluate(std::string const &filter) const
252384 {

Status & tagging log