r39069 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r39068‎ | r39069 | r39070 >
Date:15:36, 10 August 2008
Author:river
Status:old
Tags:
Comment:
cleanups
Modified paths:
  • /trunk/extensions/AbuseFilter/parser_native/ast.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/parser.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/parserdefs.h (added) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/parser_native/ast.h
@@ -12,6 +12,8 @@
1313 #ifndef AST_H
1414 #define AST_H
1515
 16+#include "parserdefs.h"
 17+
1618 namespace {
1719
1820 template<typename charT>
@@ -32,7 +34,6 @@
3335 str++;
3436 }
3537
36 - std::cerr << "hex2int: " << ret << '\n';
3738 return ret;
3839 }
3940
@@ -381,58 +382,58 @@
382383 ast_evaluator<charT, iterator>::tree_eval(iterator const &i)
383384 {
384385 switch (i->value.id().to_long()) {
385 - case parser_grammar<charT>::id_value:
 386+ case pid_value:
386387 return ast_eval_num(
387388 basic_fray<charT>(i->value.begin(), i->value.end()));
388389
389 - case parser_grammar<charT>::id_string:
 390+ case pid_string:
390391 return ast_eval_string(basic_fray<charT>(i->value.begin(), i->value.end()));
391392
392 - case parser_grammar<charT>::id_basic:
 393+ case pid_basic:
393394 return ast_eval_basic(*i->value.begin(), i->children.begin());
394395
395 - case parser_grammar<charT>::id_variable:
 396+ case pid_variable:
396397 return ast_eval_variable(basic_fray<charT>(i->value.begin(), i->value.end()));
397398
398 - case parser_grammar<charT>::id_function:
 399+ case pid_function:
399400 return ast_eval_function(
400401 basic_fray<charT>(i->value.begin(), i->value.end()),
401402 i->children.begin(), i->children.end());
402403
403 - case parser_grammar<charT>::id_in_expr:
 404+ case pid_in_expr:
404405 return ast_eval_in(*i->value.begin(), i->children.begin(), i->children.begin() + 1);
405406
406 - case parser_grammar<charT>::id_bool_expr:
 407+ case pid_bool_expr:
407408 return ast_eval_bool(*i->value.begin(), i->children.begin(), i->children.begin() + 1);
408409
409 - case parser_grammar<charT>::id_plus_expr:
 410+ case pid_plus_expr:
410411 return ast_eval_plus(*i->value.begin(), i->children.begin(), i->children.begin() + 1);
411412
412 - case parser_grammar<charT>::id_mult_expr:
 413+ case pid_mult_expr:
413414 return ast_eval_mult(*i->value.begin(), i->children.begin(), i->children.begin() + 1);
414415
415 - case parser_grammar<charT>::id_pow_expr:
 416+ case pid_pow_expr:
416417 return ast_eval_pow(i->children.begin(), i->children.begin() + 1);
417418
418 - case parser_grammar<charT>::id_ord_expr:
 419+ case pid_ord_expr:
419420 return ast_eval_ord(
420421 basic_fray<charT>(i->value.begin(), i->value.end()),
421422 i->children.begin(), i->children.begin() + 1);
422423
423 - case parser_grammar<charT>::id_eq_expr:
 424+ case pid_eq_expr:
424425 return ast_eval_eq(
425426 basic_fray<charT>(i->value.begin(), i->value.end()),
426427 i->children.begin(), i->children.begin() + 1);
427428
428 - case parser_grammar<charT>::id_tern_expr:
 429+ case pid_tern_expr:
429430 return ast_eval_tern(
430431 i->children.begin(),
431432 i->children.begin() + 1,
432433 i->children.begin() + 2);
433434
434435 default:
435 - std::cerr << "warning: unmatched expr type " << i->value.id().to_long() << "\n";
436 - return basic_datum<charT>::from_int(0);
 436+ throw parse_error(
 437+ str(boost::format("internal error: unmatched expr type %d") % i->value.id().to_long()));
437438 }
438439 }
439440
Index: trunk/extensions/AbuseFilter/parser_native/parser.h
@@ -39,6 +39,7 @@
4040 #include "affunctions.h"
4141 #include "fray.h"
4242 #include "ast.h"
 43+#include "parserdefs.h"
4344
4445 namespace afp {
4546
@@ -82,11 +83,12 @@
8384 *
8485 * expressor e;
8586 * e.add_variable("ONE", 1);
 87+ * e.add_function("f", myfunc);
8688 * e.evaluate("ONE + 2"); -- returns 3
8789 *
8890 * Custom functions should have the following prototype:
8991 *
90 - * datum (std::vector<afp::datum) const &args);
 92+ * afp::basic_datum<charT> (std::vector<afp::basic_datum<charT>) const &args);
9193 *
9294 * Functions must return a value; they cannot be void. The arguments passed to
9395 * the function are stored in the 'args' array in left-to-right order.
@@ -94,56 +96,52 @@
9597 * The parser implements a C-like grammar with some differences. The following
9698 * operators are available:
9799 *
98 - * a & b true if a and b are both true
99 - * a | b true if either a or b is true
100 - * a ^ b true if either a or b is true, but not if both are true
101 - * a + b arithmetic
 100+ * a & b true if a and b are both true
 101+ * a | b true if either a or b is true
 102+ * a ^ b true if either a or b is true, but not if both are true
 103+ * a + b arithmetic
102104 * a - b
103105 * a * b
104106 * a / b
105107 * a % b
106 - * a ** b power-of (a^b)
107 - * a in b true if the string "b" contains the substring "a"
108 - * !a true if a is false
109 - * (a) same value as a
110 - * a ? b : c if a is true, returns the value of b, otherwise c
111 - * a == b comparison operators
 108+ * a ** b power-of (a^b)
 109+ * a in b true if the string "b" contains the substring "a"
 110+ * a contains b true if b contains the string a
 111+ * a like b true if a matches the Unix glob b
 112+ * a matches b '' ''
 113+ * a rlike b true if a matches the Perl regex b
 114+ * a regex b '' ''
 115+ * !a true if a is false
 116+ * (a) same value as a
 117+ * a ? b : c if a is true, returns the value of b, otherwise c
 118+ * a == b comparison operators
112119 * a != b
113120 * a < b
114121 * a <= b
115122 * a > b
116123 * a >= b
117 - * a === b returns true if a==b and both are the same type
118 - * a !== b return true if a != b or they are different types
 124+ * a === b returns true if a==b and both are the same type
 125+ * a !== b return true if a != b or they are different types
119126 *
120127 * The parser uses afp::datum for its variables. This means it supports
121128 * strings, ints and floats, with automatic conversion between types.
 129+ *
 130+ * String constants are C-style. The standard C escapes \a \b \f \t \r \n \v are
 131+ * supported. \xHH encodes a 1-byte Unicode character, \uHHHH encodes a 2-byte
 132+ * Unicode characters, and \UHHHHHHHH encodes a 4-byte Unicode character.
 133+ *
 134+ * Numeric constants can be integers (e.g. 1), or floating pointers (e.g.
 135+ * 1., .1, 1.2).
 136+ *
 137+ * Function calls are f(arg1, arg2, ...).
122138 */
123139
124 -struct parse_error : std::runtime_error {
125 - parse_error(char const *what) : std::runtime_error(what) {}
126 -};
127 -
128140 /*
129141 * The grammar itself.
130142 */
131143 template<typename charT>
132144 struct parser_grammar : public grammar<parser_grammar<charT> >
133145 {
134 - static const int id_value = 1;
135 - static const int id_variable = 2;
136 - static const int id_basic = 3;
137 - static const int id_bool_expr = 4;
138 - static const int id_ord_expr = 5;
139 - static const int id_eq_expr = 6;
140 - static const int id_pow_expr = 7;
141 - static const int id_mult_expr = 8;
142 - static const int id_plus_expr = 9;
143 - static const int id_in_expr = 10;
144 - static const int id_function = 12;
145 - static const int id_tern_expr = 13;
146 - static const int id_string = 14;
147 -
148146 /* User-defined variables. */
149147 symbols<basic_datum<charT>, charT > variables;
150148
@@ -245,10 +243,7 @@
246244 * letters and underscore only) are returned as the
247245 * empty string.
248246 */
249 - variable = longest_d[
250 - self.variables
251 - | leaf_node_d[ (+ (upper_p | '_') ) ]
252 - ]
 247+ variable = leaf_node_d[ +(upper_p | '_') ]
253248 ;
254249
255250 /*
@@ -256,7 +251,9 @@
257252 */
258253 function =
259254 (
260 - root_node_d[self.functions]
 255+ root_node_d[ leaf_node_d[
 256+ +(lower_p | '_')
 257+ ] ]
261258 >> inner_node_d[
262259 '('
263260 >> ( tern_expr % discard_node_d[ch_p(',')] )
@@ -352,26 +349,26 @@
353350 ;
354351 }
355352
356 - rule<ScannerT, parser_context<>, parser_tag<id_tern_expr> >
 353+ rule<ScannerT, parser_context<>, parser_tag<pid_tern_expr> >
357354 const &start() const {
358355 return tern_expr;
359356 }
360357
361358 rule<ScannerT> c_string_char, hexchar, octchar;
362 - rule<ScannerT, parser_context<>, parser_tag<id_value> > value;
363 - rule<ScannerT, parser_context<>, parser_tag<id_variable> > variable;
364 - rule<ScannerT, parser_context<>, parser_tag<id_basic> > basic;
365 - rule<ScannerT, parser_context<>, parser_tag<id_bool_expr> > bool_expr;
366 - rule<ScannerT, parser_context<>, parser_tag<id_ord_expr> > ord_expr;
367 - rule<ScannerT, parser_context<>, parser_tag<id_eq_expr> > eq_expr;
368 - rule<ScannerT, parser_context<>, parser_tag<id_pow_expr> > pow_expr;
369 - rule<ScannerT, parser_context<>, parser_tag<id_mult_expr> > mult_expr;
370 - rule<ScannerT, parser_context<>, parser_tag<id_plus_expr> > plus_expr;
371 - rule<ScannerT, parser_context<>, parser_tag<id_in_expr> > in_expr;
 359+ rule<ScannerT, parser_context<>, parser_tag<pid_value> > value;
 360+ rule<ScannerT, parser_context<>, parser_tag<pid_variable> > variable;
 361+ rule<ScannerT, parser_context<>, parser_tag<pid_basic> > basic;
 362+ rule<ScannerT, parser_context<>, parser_tag<pid_bool_expr> > bool_expr;
 363+ rule<ScannerT, parser_context<>, parser_tag<pid_ord_expr> > ord_expr;
 364+ rule<ScannerT, parser_context<>, parser_tag<pid_eq_expr> > eq_expr;
 365+ rule<ScannerT, parser_context<>, parser_tag<pid_pow_expr> > pow_expr;
 366+ rule<ScannerT, parser_context<>, parser_tag<pid_mult_expr> > mult_expr;
 367+ rule<ScannerT, parser_context<>, parser_tag<pid_plus_expr> > plus_expr;
 368+ rule<ScannerT, parser_context<>, parser_tag<pid_in_expr> > in_expr;
372369
373 - rule<ScannerT, parser_context<>, parser_tag<id_function> > function;
374 - rule<ScannerT, parser_context<>, parser_tag<id_tern_expr> > tern_expr;
375 - rule<ScannerT, parser_context<>, parser_tag<id_string> > string;
 370+ rule<ScannerT, parser_context<>, parser_tag<pid_function> > function;
 371+ rule<ScannerT, parser_context<>, parser_tag<pid_tern_expr> > tern_expr;
 372+ rule<ScannerT, parser_context<>, parser_tag<pid_string> > string;
376373 };
377374 };
378375
@@ -414,7 +411,7 @@
415412 basic_datum<charT> ret;
416413
417414 tree_parse_info<iterator_t> info = ast_parse(filter.begin(), filter.end(), *grammar_,
418 - +chset<>("\n\t ") | comment_p("/*", "*/"));
 415+ chset<>("\r\n\t ") | comment_p("/*", "*/"));
419416
420417 if (info.full) {
421418 ast_evaluator<charT, typename tree_match<iterator_t>::tree_iterator> ae(*grammar_);
Index: trunk/extensions/AbuseFilter/parser_native/parserdefs.h
@@ -0,0 +1,48 @@
 2+/*
 3+ * Copyright (c) 2008 Andrew Garrett.
 4+ * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 5+ * Derived from public domain code contributed by Victor Vasiliev.
 6+ *
 7+ * Permission is granted to anyone to use this software for any purpose,
 8+ * including commercial applications, and to alter it and redistribute it
 9+ * freely. This software is provided 'as-is', without any express or
 10+ * implied warranty.
 11+ */
 12+
 13+#ifndef PARSERDEFS_H
 14+#define PARSERDEFS_H
 15+
 16+#define pid_value 1
 17+#define pid_variable 2
 18+#define pid_basic 3
 19+#define pid_bool_expr 4
 20+#define pid_ord_expr 5
 21+#define pid_eq_expr 6
 22+#define pid_pow_expr 7
 23+#define pid_mult_expr 8
 24+#define pid_plus_expr 9
 25+#define pid_in_expr 10
 26+#define pid_function 12
 27+#define pid_tern_expr 13
 28+#define pid_string 14
 29+
 30+namespace afp {
 31+
 32+struct parse_error : std::exception {
 33+ parse_error(std::string const &what)
 34+ : what_(what)
 35+ {}
 36+
 37+ ~parse_error() throw() {}
 38+
 39+ char const *what() const throw() {
 40+ return what_.c_str();
 41+ }
 42+
 43+private:
 44+ std::string what_;
 45+};
 46+
 47+} // namespace afp
 48+
 49+#endif /* !PARSERDEFS_H */

Status & tagging log