r38822 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r38821‎ | r38822 | r38823 >
Date:00:01, 8 August 2008
Author:river
Status:old
Tags:
Comment:
- rewrote AFPData to use boost::variant (type safe, no manual memory management)
- rewrote parser to use boost.spirit instead of a hand-written parser
- refactored request loading into 'request' object
- added 'expr', a command-line tool to test the new parser
- some performance fixes for affunctions
Modified paths:
  • /trunk/extensions/AbuseFilter/parser_native/afeval.cpp (deleted) (history)
  • /trunk/extensions/AbuseFilter/parser_native/afeval.h (deleted) (history)
  • /trunk/extensions/AbuseFilter/parser_native/affunctions.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/affunctions.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/aftypes.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/aftypes.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/check.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/eval.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/filter_evaluator.cpp (added) (history)
  • /trunk/extensions/AbuseFilter/parser_native/filter_evaluator.h (added) (history)
  • /trunk/extensions/AbuseFilter/parser_native/main.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/makefile (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/parser.cpp (added) (history)
  • /trunk/extensions/AbuseFilter/parser_native/parser.h (added) (history)
  • /trunk/extensions/AbuseFilter/parser_native/request.cpp (added) (history)
  • /trunk/extensions/AbuseFilter/parser_native/request.h (added) (history)
  • /trunk/extensions/AbuseFilter/parser_native/syntax_check.cpp (modified) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/parser_native/afeval.cpp
@@ -1,374 +0,0 @@
2 -#include "afeval.h"
3 -#include "affunctions.h"
4 -
5 -void FilterEvaluator::reset() {
6 - this->vars.clear();
7 - this->tokens.clear();
8 - this->cur = AFPToken();
9 - this->pos = 0;
10 - this->forceResult = false;
11 -}
12 -
13 -void FilterEvaluator::setVar( string key, AFPData value ) {
14 - this->vars[key] = AFPData(value);
15 -}
16 -
17 -void FilterEvaluator::setVars( map<string,AFPData> values ) {
18 - for (map<string,AFPData>::iterator it=values.begin();it!=values.end();++it) {
19 - this->setVar( it->first, it->second );
20 - }
21 -}
22 -
23 -string FilterEvaluator::evaluateExpression( string code ) {
24 - this->pos = 0;
25 -
26 - if (this->tokenCache.find(code) != this->tokenCache.end()) {
27 - this->tokens = this->tokenCache[code];
28 - } else {
29 - this->tokenCache[code] = this->tokens = af_parse( code );
30 - }
31 -
32 - if (this->tokenCache.size() > 100) {
33 - this->tokenCache.clear();
34 - }
35 -
36 - this->cur = this->tokens[0];
37 -
38 - AFPData result;
39 -
40 - this->doLevelEntry( &result );
41 -
42 - return result.toString();
43 -}
44 -
45 -// TODO: Merge these two functions
46 -
47 -bool FilterEvaluator::evaluateFilter( string code ) {
48 - this->pos = 0;
49 -
50 - if (this->tokenCache.find(code) != this->tokenCache.end()) {
51 - this->tokens = this->tokenCache[code];
52 - } else {
53 - this->tokenCache[code] = this->tokens = af_parse( code );
54 - }
55 -
56 - if (this->tokenCache.size() > 100) {
57 - this->tokenCache.clear();
58 - }
59 -
60 - this->cur = this->tokens[0];
61 -
62 - AFPData result;
63 -
64 - this->doLevelEntry( &result );
65 -
66 - return result.toBool();
67 -}
68 -
69 -bool FilterEvaluator::move() { this->move(1); }
70 -
71 -bool FilterEvaluator::move( int shift ) {
72 - this->pos += shift;
73 -
74 - if (this->pos >= 0 && this->pos < this->tokens.size()) {
75 - this->cur = this->tokens[this->pos];
76 - return true;
77 - } else {
78 - this->pos -= shift;
79 - return false;
80 - }
81 -}
82 -
83 -void FilterEvaluator::doLevelEntry( AFPData* result ) {
84 - this->doLevelSet( result );
85 -
86 - if (this->cur.type != T_NONE) {
87 - throw AFPException( "Unexpected tokens at end." );
88 - }
89 -}
90 -
91 -void FilterEvaluator::doLevelSet( AFPData* result ) {
92 - if (this->cur.type == T_ID) {
93 - string varName = this->cur.value;
94 -
95 - this->move();
96 -
97 - if (this->cur.type == T_OP && this->cur.value == "=") {
98 - this->move();
99 - this->doLevelSet( result );
100 - this->vars[varName] = AFPData(result);
101 - return;
102 - }
103 - this->move(-1);
104 - }
105 -
106 - this->doLevelBoolOps( result );
107 -}
108 -
109 -vector<string> getOpsForType( string type ) {
110 - static map<string,vector<string> > oft;
111 -
112 - if (oft.empty()) {
113 - vector<string> cv;
114 -
115 - cv = vector<string>();
116 - cv.push_back("&");
117 - cv.push_back("|");
118 - cv.push_back("^");
119 - oft["bool"] = cv;
120 -
121 - cv = vector<string>();
122 - cv.push_back( "==" );
123 - cv.push_back( "===" );
124 - cv.push_back( "!=" );
125 - cv.push_back( "!==" );
126 - cv.push_back( "<" );
127 - cv.push_back( ">" );
128 - cv.push_back( "<=" );
129 - cv.push_back( ">=" );
130 - oft["compare"] = cv;
131 -
132 - cv = vector<string>();
133 - cv.push_back( "*" );
134 - cv.push_back( "/" );
135 - cv.push_back( "%" );
136 - oft["mulrel"] = cv;
137 -
138 - cv = vector<string>();
139 - cv.push_back( "+" );
140 - cv.push_back( "-" );
141 - oft["sumrel"] = cv;
142 -
143 - cv = vector<string>();
144 - cv.push_back( "in" );
145 - cv.push_back( "like" );
146 - oft["special"] = cv;
147 - }
148 -
149 - return oft[type];
150 -}
151 -
152 -void FilterEvaluator::doLevelBoolOps( AFPData* result ) {
153 - bool setForce = false;
154 -
155 - this->doLevelCompares( result );
156 -
157 - vector<string> ops = getOpsForType( "bool" );
158 -
159 - while ( this->cur.type == T_OP && isInVector( this->cur.value, ops ) ) {
160 - string op = this->cur.value;
161 - this->move();
162 - AFPData r2;
163 -
164 - if (!this->forceResult && op == "&" && !result->toBool()) {
165 - setForce = true;
166 - this->forceResult = true;
167 - } else if (!this->forceResult && op == "|" && result->toBool()) {
168 - setForce = true;
169 - this->forceResult = true;
170 - }
171 -
172 - this->doLevelCompares( &r2 );
173 -
174 - if (!this->forceResult) {
175 - *result = af_boolOp( *result, r2, op );
176 - } else if (setForce) {
177 - setForce = false;
178 - this->forceResult = false;
179 - }
180 - }
181 -
182 - if (setForce)
183 - this->forceResult = false;
184 -}
185 -
186 -void FilterEvaluator::doLevelCompares( AFPData* result ) {
187 - this->doLevelMulRels( result );
188 - vector<string> ops = getOpsForType( "compare" );
189 -
190 - while (this->cur.type == T_OP && isInVector( this->cur.value, ops ) ) {
191 - string op = this->cur.value;
192 - this->move();
193 - AFPData r2;
194 -
195 - this->doLevelMulRels( &r2 );
196 -
197 - if (!this->forceResult)
198 - *result = af_compareOp( *result, r2, op );
199 - }
200 -}
201 -
202 -void FilterEvaluator::doLevelMulRels( AFPData* result ) {
203 - this->doLevelSumRels( result );
204 - vector<string> ops = getOpsForType( "mulrel" );
205 -
206 - while (this->cur.type == T_OP && isInVector( this->cur.value, ops ) ) {
207 - string op = this->cur.value;
208 - this->move();
209 - AFPData r2;
210 -
211 - this->doLevelSumRels( &r2 );
212 -
213 - if (!this->forceResult)
214 - *result = af_mulRel( *result, r2, op );
215 - }
216 -}
217 -
218 -void FilterEvaluator::doLevelSumRels( AFPData* result ) {
219 - this->doLevelPow( result );
220 - vector<string> ops = getOpsForType( "sumrel" );
221 -
222 - while (this->cur.type == T_OP && isInVector( this->cur.value, ops ) ) {
223 - string op = this->cur.value;
224 - this->move();
225 - AFPData r2;
226 -
227 - this->doLevelPow( &r2 );
228 -
229 - if (op == "+") {
230 - if (!this->forceResult)
231 - *result = af_sum( *result, r2 );
232 - } else if (op == "-") {
233 - if (!this->forceResult)
234 - *result = af_sub( *result, r2 );
235 - }
236 - }
237 -}
238 -
239 -void FilterEvaluator::doLevelPow( AFPData* result ) {
240 - this->doLevelBoolInvert( result );
241 -
242 - while (this->cur.type == T_OP && this->cur.value == "**" ) {
243 - this->move();
244 - AFPData exp;
245 -
246 - this->doLevelBoolInvert( &exp );
247 -
248 - if (!this->forceResult)
249 - *result = af_pow( *result, exp );
250 - }
251 -}
252 -
253 -void FilterEvaluator::doLevelBoolInvert( AFPData* result ) {
254 - if (this->cur.type == T_OP && this->cur.value == "!") {
255 - this->move();
256 - this->doLevelSpecialWords( result );
257 - if (!this->forceResult)
258 - *result = af_boolInvert( *result );
259 - } else {
260 - this->doLevelSpecialWords( result );
261 - }
262 -}
263 -
264 -void FilterEvaluator::doLevelSpecialWords( AFPData* result ) {
265 - this->doLevelUnarys( result );
266 - vector<string> specwords = getOpsForType( "special" );
267 -
268 - if (this->cur.type == T_KEYWORD && isInVector( this->cur.value, specwords )) {
269 - string keyword = this->cur.value;
270 -
271 - this->move();
272 - AFPData r2 = AFPData();
273 - this->doLevelUnarys( &r2 );
274 -
275 - if (!this->forceResult)
276 - *result = af_keyword( keyword, *result, r2 );
277 - }
278 -}
279 -
280 -void FilterEvaluator::doLevelUnarys( AFPData* result ) {
281 - if (this->cur.type == T_OP && (this->cur.value == "+" || this->cur.value == "-") ) {
282 - this->move();
283 - this->doLevelBraces( result );
284 - if (this->cur.value == "-") {
285 - if (!this->forceResult)
286 - *result = af_unaryMinus( *result );
287 - }
288 - } else {
289 - this->doLevelBraces( result );
290 - }
291 -}
292 -
293 -void FilterEvaluator::doLevelBraces( AFPData* result ) {
294 - if (this->cur.type == T_BRACE && this->cur.value == "(") {
295 - this->move();
296 - this->doLevelSet( result );
297 -
298 - if ( !(this->cur.type == T_BRACE && this->cur.value == ")") ) {
299 - throw AFPException( "Expected ')' at pos %d", this->cur.pos );
300 - }
301 - this->move();
302 - } else {
303 - this->doLevelFunction( result );
304 - }
305 -}
306 -
307 -void FilterEvaluator::doLevelFunction( AFPData* result ) {
308 - if ( this->cur.type == T_ID && isFunction( this->cur.value ) ) {
309 - string func = this->cur.value;
310 - this->move();
311 -
312 - if (this->cur.type != T_BRACE || this->cur.value != "(") {
313 - throw AFPException( "Expected (" );
314 - }
315 - this->move();
316 -
317 - vector<AFPData> args = vector<AFPData>();
318 -
319 - if (this->cur.type != T_BRACE || this->cur.value != ")") {
320 - this->move(-1);
321 - do {
322 - this->move();
323 - AFPData r = AFPData();
324 - this->doLevelSet( &r );
325 - args.push_back( r );
326 - } while (this->cur.type == T_COMMA );
327 - }
328 -
329 - if (!this->forceResult)
330 - *result = callFunction( func, args );
331 - this->move();
332 - } else {
333 - this->doLevelAtom( result );
334 - }
335 -}
336 -
337 -void FilterEvaluator::doLevelAtom( AFPData* result ) {
338 - string tok = this->cur.value;
339 -
340 - switch (this->cur.type) {
341 - case T_ID:
342 - if (this->vars.find(tok) != this->vars.end()) {
343 - *result = this->vars[tok];
344 - } else {
345 - *result = AFPData();
346 - }
347 - break;
348 - case T_STRING:
349 - case T_NUMBER:
350 - *result = AFPData( tok );
351 - break;
352 - case T_KEYWORD:
353 - if (tok == "true") {
354 - *result = AFPData( true );
355 - } else if (tok == "false") {
356 - *result = AFPData( false );
357 - } else if (tok == "null") {
358 - *result = AFPData();
359 - } else {
360 - throw AFPException( "Unidentifiable keyword" );
361 - }
362 - break;
363 - case T_BRACE:
364 - if (tok == ")") {
365 - return;
366 - }
367 - break;
368 - case T_COMMA:
369 - return;
370 - break;
371 - default: throw AFPException( "Unexpected token value %s", this->cur.value.c_str() );
372 - }
373 -
374 - this->move();
375 -}
Index: trunk/extensions/AbuseFilter/parser_native/afeval.h
@@ -1,45 +0,0 @@
2 -#ifndef AFEVAL_H
3 -#define AFEVAL_H
4 -
5 -#include "afparser.h"
6 -#include "afutils.h"
7 -// #include "aftypes.h"
8 -#include <map>
9 -
10 -class FilterEvaluator {
11 - public:
12 - void reset();
13 - void setVar( string key, AFPData value );
14 - void setVars( map<string,AFPData> values );
15 - bool evaluateFilter( string code );
16 - string evaluateExpression( string code );
17 - protected:
18 - bool move();
19 - bool move( int shift );
20 - void doLevelEntry( AFPData* result );
21 - void doLevelSet( AFPData* result );
22 - void doLevelBoolOps( AFPData* result );
23 - void doLevelCompares( AFPData* result );
24 - void doLevelMulRels( AFPData* result );
25 - void doLevelSumRels( AFPData* result );
26 - void doLevelPow( AFPData* result );
27 - void doLevelBoolInvert( AFPData* result );
28 - void doLevelSpecialWords( AFPData* result );
29 - void doLevelUnarys( AFPData* result );
30 - void doLevelBraces( AFPData* result );
31 - void doLevelFunction( AFPData* result );
32 - void doLevelAtom( AFPData* result );
33 -
34 - AFPToken cur;
35 - vector<AFPToken> tokens;
36 - map<string, vector<AFPToken> > tokenCache;
37 - unsigned int pos;
38 - map<string,AFPData> vars;
39 - bool forceResult;
40 -};
41 -
42 -// typedef AFPData (*AFPFunction) (vector<AFPData>);
43 -
44 -vector<string> getOpsForType( string type );
45 -
46 -#endif /* !AFEVAL_H */
Index: trunk/extensions/AbuseFilter/parser_native/parser.h
@@ -0,0 +1,29 @@
 2+#ifndef EXPRESSOR_H
 3+#define EXPRESSOR_H
 4+
 5+#include <string>
 6+#include <vector>
 7+
 8+#include <boost/noncopyable.hpp>
 9+#include <boost/function.hpp>
 10+
 11+#include "aftypes.h"
 12+
 13+struct parser_grammar;
 14+
 15+struct expressor : boost::noncopyable {
 16+ typedef boost::function<AFPData (std::vector<AFPData>)> func_t;
 17+
 18+ expressor();
 19+ ~expressor();
 20+
 21+ AFPData evaluate(std::string const &expr) const;
 22+
 23+ void add_variable(std::string const &name, AFPData value);
 24+ void add_function(std::string const &name, func_t value);
 25+
 26+private:
 27+ parser_grammar *grammar_;
 28+};
 29+
 30+#endif /* !EXPRESSOR_H */
Index: trunk/extensions/AbuseFilter/parser_native/filter_evaluator.cpp
@@ -0,0 +1,33 @@
 2+#include "filter_evaluator.h"
 3+#include "parser.h"
 4+#include "affunctions.h"
 5+
 6+filter_evaluator::filter_evaluator()
 7+{
 8+ e.add_function("length", af_length);
 9+ e.add_function("lcase", af_lcase);
 10+ e.add_function("ccnorm", af_ccnorm);
 11+ e.add_function("rmdoubles", af_rmdoubles);
 12+ e.add_function("specialratio", af_specialratio);
 13+ e.add_function("rmspecials", af_rmspecials);
 14+ e.add_function("norm", af_norm);
 15+ e.add_function("count", af_count);
 16+}
 17+
 18+bool
 19+filter_evaluator::evaluate(std::string const &filter) const
 20+{
 21+ try {
 22+ return (bool) e.evaluate(filter);
 23+ } catch (std::exception &e) {
 24+ std::cerr << "can't evaluate filter: " << e.what() << '\n';
 25+ return false;
 26+ }
 27+}
 28+
 29+void
 30+filter_evaluator::add_variable(std::string const &key, AFPData value)
 31+{
 32+ e.add_variable(key, value);
 33+}
 34+
Index: trunk/extensions/AbuseFilter/parser_native/aftypes.cpp
@@ -4,6 +4,7 @@
55 #include <iostream>
66 #include <cassert>
77 #include <algorithm>
 8+#include <cmath>
89 #include <boost/lexical_cast.hpp>
910
1011 AFPToken::AFPToken(unsigned int new_type, string new_value, unsigned int new_pos) {
@@ -13,296 +14,383 @@
1415 }
1516
1617
17 -AFPData::AFPData( unsigned int new_type, void* new_value, size_t new_size ) {
18 - this->makeData( new_type, new_value, new_size, "full constructor" );
 18+AFPData::AFPData(std::string const &var) {
 19+ _init_from_string(var);
1920 }
2021
21 -void AFPData::makeData( unsigned int new_type, void* new_value, size_t new_size, string new_source ) {
22 - this->type = new_type;
23 - this->value = new_value;
24 - this->size = new_size;
25 - this->source = new_source;
26 -
27 - if (this->type > DATATYPE_MAX) {
28 - // Something funky's going on
29 -// cerr << "Something funky. Trying to construct a datum with type " << this->type << ", source is " << new_source << endl;
30 - return;
31 - }
 22+AFPData::AFPData(char const *var)
 23+{
 24+ _init_from_string(var);
3225 }
3326
34 -AFPData::AFPData( string var ) {
35 - this->source = "string constructor";
36 -
 27+void
 28+AFPData::_init_from_string(std::string const &var)
 29+{
3730 // Try integer
3831 try {
39 - long int intval = boost::lexical_cast<long int>(var);
40 - // Valid conversion
41 - long int* val = new long int( intval );
42 - this->makeData( D_INTEGER, (void*)val, sizeof(long int), "string constructor" );
43 - return;
 32+ value_ = boost::lexical_cast<long int>(var);
4433 } catch (boost::bad_lexical_cast &e) {
4534 try {
46 - double fval = boost::lexical_cast<double>(var);
47 - double* val = new double(fval);
48 - this->makeData( D_FLOAT, (void*)val, sizeof(double), "string constructor" );
49 - return;
 35+ value_ = boost::lexical_cast<double>(var);
5036 } catch (boost::bad_lexical_cast &e) {
51 - // Last resort
52 - // Duplicate the string.
53 - string* s = new string(var);
54 - this->makeData( D_STRING, (void*)s, sizeof(string), "string constructor" );
 37+ /* If it's nothing else, it's a string */
 38+ value_ = var;
5539 }
5640 }
5741 }
5842
59 -AFPData::AFPData( AFPData old, unsigned int newType ) {
60 - if (old.type > DATATYPE_MAX) {
61 - // Non-existent type
62 - throw AFPException( "Given junk data" );
63 - }
 43+AFPData::AFPData() {
 44+}
6445
65 - if (old.type == newType) {
66 - void* newVal = 0;
67 -
68 - // Duplicate the contents.
69 - if (old.type == D_STRING) {
70 - string* s = new string();
71 - s->append(old.toString());
72 - newVal = (void*) s;
73 - } else if (old.type == D_INTEGER) {
74 - newVal = (void*) new long int(old.toInt());
75 - } else if (old.type == D_FLOAT) {
76 - newVal = (void*) new double(old.toFloat());
77 - }
78 -
79 - assert(newVal);
 46+AFPData::AFPData(AFPData const &other)
 47+ : value_(other.value_)
 48+{
 49+}
8050
81 - this->makeData( old.type, newVal, old.size, "cast constructor (copy)" );
82 - } else if (newType == 0) {
83 - this->makeData( D_NULL, NULL, 0, "cast constructor - null" );
84 - return;
85 - } else if (newType == D_INTEGER) {
86 - if (old.type==D_FLOAT) {
87 - long int* val = new long int(old.toFloat());
88 - this->makeData( D_INTEGER, (void*)val, sizeof(long int), "cast constructor - float2int" );
89 - return;
90 - } else if (old.type==D_STRING) {
91 - long int* val = new long int();
92 - istringstream ss(old.toString());
93 -
94 - ss >> *val;
95 -
96 - this->makeData( D_INTEGER, (void*)val, sizeof(long int), "cast constructor - string2int" );
97 - return;
98 - } else if (old.type==D_NULL) {
99 - long int* val = new long int(0);
100 - this->makeData( D_INTEGER, (void*)val, sizeof(long int), "cast constructor - null2int" );
101 - }// No other types possible
102 - } else if (newType == D_FLOAT) {
103 - if (old.type==D_INTEGER) {
104 - double* val = new double(old.toInt());
105 - this->makeData( D_FLOAT, (void*)val, sizeof(double), "cast constructor - int2float" );
106 - return;
107 - } else if (old.type==D_STRING) {
108 - double* val = new double();
109 - istringstream ss(old.toString());
110 -
111 - ss >> *val;
112 -
113 - this->makeData( D_FLOAT, (void*)val, sizeof(double), "cast constructor - string2float" );
114 - return;
115 - } else if (old.type==D_NULL) {
116 - double* val = new double(0);
117 - this->makeData( D_FLOAT, (void*)val, sizeof(double), "cast constructor - null2float" );
118 - } // No other types possible
119 - } else if (newType == D_STRING) {
120 - if (old.type == D_INTEGER || old.type == D_FLOAT) {
121 - ostringstream ss;
122 -
123 - if (old.type == D_INTEGER) {
124 - long int val = old.toInt();
125 - ss << val;
126 - } else if (old.type == D_FLOAT) {
127 - double val = old.toFloat();
128 - ss << val;
129 - }
130 -
131 - string* str = new string(ss.str());
132 - this->makeData( D_STRING, (void*)str, sizeof(string), "cast constructor - num2string" );
133 - return;
134 - } else if (old.type==D_NULL) {
135 - string* s = new string("");
136 - this->makeData( D_STRING, (void*)s, sizeof(string), "cast constructor - null2string" );
137 - } // No other types possible
 51+AFPData::AFPData(long int var)
 52+ : value_(var)
 53+{
 54+}
 55+
 56+AFPData::AFPData(double var)
 57+ : value_(var)
 58+{
 59+}
 60+
 61+AFPData::AFPData(float var)
 62+ : value_(var)
 63+{
 64+}
 65+
 66+AFPData::AFPData(bool var)
 67+ : value_((long int) var)
 68+{
 69+}
 70+
 71+AFPData & AFPData::operator= (AFPData const &other) {
 72+ // Protect against self-assignment
 73+ if (this == &other) {
 74+ return *this;
13875 }
13976
140 - if (this->type > DATATYPE_MAX) {
141 - // Non-existent type
142 - throw AFPException( "Created junk data" );
143 - }
 77+ value_ = other.value_;
 78+ return *this;
14479 }
14580
146 -AFPData::AFPData() { this->source = "empty constructor"; this->makeData( 0, NULL, 0, "empty constructor" );}
 81+bool isInVector( string needle, vector<string> haystack ) {
 82+ return std::find(haystack.begin(), haystack.end(), needle) != haystack.end();
 83+}
14784
148 -AFPData::~AFPData() { this->release(); }
 85+/*
 86+ * Convert a string to an integer value.
 87+ */
 88+template<typename T>
 89+struct from_string_converter {
 90+ typedef T type;
14991
150 -void AFPData::release() {
151 - if (this->value == 0x0) {
152 - return;
153 - } else if (this->type > DATATYPE_MAX) {
154 - // Something funky's going on
155 -// cerr << "Something funky. Trying to destruct a datum with type " << this->type << endl;
156 - return;
 92+ static type convert(T const &v) {
 93+ return v;
15794 }
158 -
159 -// cerr << "Freeing " << this->value << " - type " << this->type << " - source " << this->source << endl;
160 -
161 - switch (this->type) {
162 - case D_FLOAT:
163 - delete (double*)this->value;
164 - break;
165 - case D_INTEGER:
166 - delete (long int*)this->value;
167 - break;
168 - case D_STRING:
169 - delete (string*)this->value;
170 - break;
171 -// default:
172 -// delete this->value;
 95+};
 96+
 97+template<>
 98+struct from_string_converter<std::string> {
 99+ typedef long int type;
 100+
 101+ template<typename T>
 102+ static type convert(T const &v) {
 103+ try {
 104+ return boost::lexical_cast<type>(v);
 105+ } catch (boost::bad_lexical_cast &e) {
 106+ return 0;
 107+ }
173108 }
174 -
175 - this->value = 0x0;
176 - this->type = D_NULL;
177 -}
 109+};
178110
179 -AFPData::AFPData( const AFPData & oldData ) {
180 - this->source = "copy constructor";
181 -
182 - if (oldData.type > DATATYPE_MAX) {
183 - // Something funky's going on
184 -// cerr << "Something funky. Trying to copy a datum with type " << oldData.type << ", source " << oldData.source << endl;
185 - return;
 111+/*
 112+ * Conversions from AFPData to other types.
 113+ */
 114+struct to_string_visitor : boost::static_visitor<std::string> {
 115+ std::string operator() (std::string const &v) const {
 116+ return v;
186117 }
187 -
188 - // Duplicate the inner data
189 - void* newVal = 0;
190 -
191 - if (oldData.type == D_STRING) {
192 - string* ival = new string();
193 - *ival = *(string*)oldData.value;
194 - newVal = (void*)ival;
195 - } else if (oldData.type == D_INTEGER) {
196 - long int* ival = new long int;
197 - *ival = *(long int*)oldData.value;
198 - newVal = (void*)ival;
199 - } else if (oldData.type == D_FLOAT) {
200 - double* ival = new double;
201 - *ival = *(double*)oldData.value;
202 - newVal = (void*)ival;
203 - } else if (oldData.type == D_NULL) {
204 - newVal = 0;
205 - } else {
206 -// cerr << "Asked to copy an unknown type " << oldData.type << endl;
 118+
 119+ template<typename T>
 120+ std::string operator() (T const &v) const {
 121+ return boost::lexical_cast<std::string>(v);
207122 }
208 -
209 - this->makeData( oldData.type, newVal, oldData.size, "copy constructor" );
210 -}
 123+};
211124
212 -long int AFPData::toInt() {
213 - if (this->type == D_INTEGER) {
214 - return *(long int*)this->value;
 125+struct to_int_visitor : boost::static_visitor<long int> {
 126+ long int operator() (std::string const &v) const {
 127+ try {
 128+ return boost::lexical_cast<long int>(v);
 129+ } catch (boost::bad_lexical_cast &e) {
 130+ return 0;
 131+ }
215132 }
216 -
217 - AFPData intData(*this,D_INTEGER);
218 -
219 - return intData.toInt();
220 -}
221133
222 -double AFPData::toFloat() {
223 - if (this->type == D_FLOAT) {
224 - return *(double*)this->value;
 134+ long int operator() (double o) const {
 135+ return (long int) o;
225136 }
226137
227 - AFPData floatData(*this,D_FLOAT);
228 -
229 - return floatData.toFloat();
230 -}
 138+ template<typename T>
 139+ long int operator() (T const &v) const {
 140+ return v;
 141+ }
 142+};
231143
232 -bool AFPData::toBool() {
233 - return (bool)this->toInt();
234 -}
 144+struct to_double_visitor : boost::static_visitor<double> {
 145+ double operator() (std::string const &v) const {
 146+ try {
 147+ return boost::lexical_cast<double>(v);
 148+ } catch (boost::bad_lexical_cast &e) {
 149+ return 0;
 150+ }
 151+ }
235152
236 -string AFPData::toString() {
237 - if (this->type == D_STRING) {
238 - return *(string*)this->value;
 153+ template<typename T>
 154+ double operator() (T const &v) const {
 155+ return v;
239156 }
240 -
241 - AFPData stringData(*this,D_STRING);
242 -
243 - return stringData.toString();
244 -}
 157+};
245158
246 -AFPData::AFPData( long int var ) {
247 - long int* i = new long int(var);
248 -
249 - this->makeData( D_INTEGER, i, sizeof(long int), "int constructor" );
 159+std::string
 160+AFPData::toString() const {
 161+ return boost::apply_visitor(to_string_visitor(), value_);
250162 }
251163
252 -AFPData::AFPData( double var ) {
253 - double* d = new double(var);
254 -
255 - this->makeData( D_FLOAT, d, sizeof(double), "double constructor" );
 164+long int
 165+AFPData::toInt() const {
 166+ return boost::apply_visitor(to_int_visitor(), value_);
256167 }
257168
258 -AFPData::AFPData( bool var ) {
259 - long int* i = new long int(var);
260 -
261 - this->makeData( D_INTEGER, i, sizeof(long int), "bool constructor" );
 169+double
 170+AFPData::toFloat() const {
 171+ return boost::apply_visitor(to_double_visitor(), value_);
262172 }
263173
264 -unsigned int AFPData::getType() { return this->type; }
 174+/* Given T and U, find the preferred type for maths (i.e. double, if present) */
 175+template<typename T, typename U>
 176+struct preferred_type {
 177+ typedef T type;
 178+};
265179
266 -AFPData & AFPData::operator= (const AFPData & oldData) {
267 - // Protect against self-assignment
268 - if (this == &oldData) {
269 - return *this;
 180+template<typename T>
 181+struct preferred_type<double, T> {
 182+ typedef double type;
 183+};
 184+
 185+template<typename T>
 186+struct preferred_type<T, double> {
 187+ typedef double type;
 188+};
 189+
 190+template<>
 191+struct preferred_type<double, double> {
 192+ typedef double type;
 193+};
 194+
 195+/*
 196+ * std::modulus doesn't work with double, so we provide our own.
 197+ */
 198+template<typename T>
 199+struct afpmodulus {
 200+ T operator() (T const &a, T const &b) const {
 201+ return a % b;
270202 }
271 -
272 - // Clear it.
273 - this->release();
274 -
275 - // NULLs and INVALID data types need no deep copy
276 - if (oldData.type > DATATYPE_MAX || oldData.type == D_NULL) {
277 - this->makeData( 0, NULL, 0, "assignment operator" );
278 - return *this;
 203+};
 204+
 205+template<>
 206+struct afpmodulus<double> {
 207+ double operator() (double const &a, double const &b) const {
 208+ return std::fmod(a, b);
279209 }
280 -
281 - // Otherwise, do a proper copy.
282 - // Duplicate the inner data
283 - void* newVal = 0;
284 - if (oldData.type == D_STRING) {
285 - string* ival = new string();
286 - *ival = *(string*)oldData.value;
287 - newVal = (void*)ival;
288 - } else if (oldData.type == D_INTEGER) {
289 - long int* ival = new long int;
290 - *ival = *(long int*)oldData.value;
291 - newVal = (void*)ival;
292 - } else if (oldData.type == D_FLOAT) {
293 - double* ival = new double;
294 - *ival = *(double*)oldData.value;
295 - newVal = (void*)ival;
296 - } else if (oldData.type == D_NULL) {
297 - newVal = 0;
298 - } else {
299 -// cerr << "Asked to copy an unknown type " << oldData.type << endl;
 210+};
 211+
 212+/*
 213+ * A visitor that performs an arithmetic operation on its arguments,
 214+ * after doing appropriate int->double promotion.
 215+ */
 216+template<template<typename V> class Operator>
 217+struct arith_visitor : boost::static_visitor<AFPData> {
 218+ /*
 219+ * Anything involving a double returns a double.
 220+ * Otherwise, int is returned.
 221+ */
 222+ template<typename T, typename U>
 223+ AFPData operator() (T const &a, U const &b) const {
 224+ typedef typename from_string_converter<T>::type a_type;
 225+ typedef typename from_string_converter<U>::type b_type;
 226+
 227+ Operator<typename preferred_type<a_type, b_type>::type> op;
 228+ return op(
 229+ from_string_converter<T>::convert(a),
 230+ from_string_converter<U>::convert(b));
300231 }
 232+};
 233+
 234+/*
 235+ * Like arith_visitor, but for equality comparisons.
 236+ */
 237+template<
 238+ template<typename V> class Operator,
 239+ typename T,
 240+ typename U>
 241+struct compare_visitor_impl {
 242+ bool operator() (T const &a, U const &b) const {
 243+ typedef typename from_string_converter<T>::type a_type;
 244+ typedef typename from_string_converter<U>::type b_type;
 245+
 246+ Operator<typename preferred_type<a_type, b_type>::type> op;
 247+ return op(
 248+ from_string_converter<T>::convert(a),
 249+ from_string_converter<U>::convert(b));
 250+ }
 251+};
 252+
 253+/*
 254+ * Specialise for string<>string comparisons
 255+ */
 256+template<template<typename V> class Operator>
 257+struct compare_visitor_impl<Operator, std::string, std::string> : boost::static_visitor<bool> {
 258+ bool operator() (std::string const &a, std::string const &b) const {
 259+ Operator<std::string> op;
 260+ return op(a, b);
 261+ }
 262+};
 263+
 264+template<template<typename V> class Operator>
 265+struct compare_visitor : boost::static_visitor<bool> {
 266+ template<typename T, typename U>
 267+ bool operator() (T const &a, U const &b) const {
 268+ return compare_visitor_impl<Operator, T, U>()(a, b);
 269+ }
 270+};
 271+
 272+/*
 273+ * For comparisons that only work on integers - strings will be converted.
 274+ */
 275+template<template<typename V> class Operator>
 276+struct arith_compare_visitor : boost::static_visitor<AFPData> {
 277+ template<typename T, typename U>
 278+ bool operator() (T const &a, U const &b) const {
 279+ typedef typename from_string_converter<T>::type a_type;
 280+ typedef typename from_string_converter<U>::type b_type;
 281+
 282+ Operator<typename preferred_type<a_type, b_type>::type> op;
 283+ return op(
 284+ from_string_converter<T>::convert(a),
 285+ from_string_converter<U>::convert(b));
 286+ }
 287+};
 288+
 289+AFPData &
 290+AFPData::operator+=(AFPData const &other)
 291+{
 292+ AFPData result = boost::apply_visitor(arith_visitor<std::plus>(), value_, other.value_);
 293+ *this = result;
 294+ return *this;
 295+}
 296+
 297+AFPData &
 298+AFPData::operator-=(AFPData const &other)
 299+{
 300+ AFPData result = boost::apply_visitor(arith_visitor<std::minus>(), value_, other.value_);
 301+ *this = result;
 302+ return *this;
 303+}
 304+
 305+AFPData &
 306+AFPData::operator*=(AFPData const &other)
 307+{
 308+ AFPData result = boost::apply_visitor(arith_visitor<std::multiplies>(), value_, other.value_);
 309+ *this = result;
 310+ return *this;
 311+}
301312
302 - this->makeData( oldData.type, newVal, oldData.size, "assignment operator" );
303 -
 313+AFPData&
 314+AFPData::operator/=(AFPData const &other)
 315+{
 316+ AFPData result = boost::apply_visitor(arith_visitor<std::divides>(), value_, other.value_);
 317+ *this = result;
304318 return *this;
305319 }
306320
307 -bool isInVector( string needle, vector<string> haystack ) {
308 - return std::find(haystack.begin(), haystack.end(), needle) != haystack.end();
 321+AFPData&
 322+AFPData::operator%=(AFPData const &other)
 323+{
 324+ AFPData result = boost::apply_visitor(arith_visitor<afpmodulus>(), value_, other.value_);
 325+ *this = result;
 326+ return *this;
309327 }
 328+
 329+AFPData
 330+operator+(AFPData const &a, AFPData const &b) {
 331+ return AFPData(a) += b;
 332+}
 333+
 334+AFPData
 335+operator-(AFPData const &a, AFPData const &b) {
 336+ return AFPData(a) -= b;
 337+}
 338+
 339+AFPData
 340+operator*(AFPData const &a, AFPData const &b) {
 341+ return AFPData(a) *= b;
 342+}
 343+
 344+AFPData
 345+operator/(AFPData const &a, AFPData const &b) {
 346+ return AFPData(a) /= b;
 347+}
 348+
 349+AFPData
 350+operator%(AFPData const &a, AFPData const &b) {
 351+ return AFPData(a) %= b;
 352+}
 353+
 354+bool
 355+operator==(AFPData const &a, AFPData const &b) {
 356+ return a.compare(b);
 357+}
 358+
 359+bool
 360+AFPData::compare(AFPData const &other) const {
 361+ return boost::apply_visitor(compare_visitor<std::equal_to>(), value_, other.value_);
 362+}
 363+
 364+bool
 365+AFPData::less_than(AFPData const &other) const {
 366+ return boost::apply_visitor(arith_compare_visitor<std::less>(), value_, other.value_);
 367+}
 368+
 369+bool
 370+operator< (AFPData const &a, AFPData const &b) {
 371+ return a.less_than(b);
 372+}
 373+
 374+bool
 375+operator<= (AFPData const &a, AFPData const &b) {
 376+ return a.less_than(b) || a == b;
 377+}
 378+
 379+bool
 380+operator> (AFPData const &a, AFPData const &b) {
 381+ return !(a <= b);
 382+}
 383+
 384+bool
 385+operator>= (AFPData const &a, AFPData const &b) {
 386+ return !(a < b);
 387+}
 388+
 389+bool
 390+operator!= (AFPData const &a, AFPData const &b) {
 391+ return !(a == b);
 392+}
 393+
 394+bool
 395+AFPData::operator! () const {
 396+ return !(int) *this;
 397+}
Index: trunk/extensions/AbuseFilter/parser_native/filter_evaluator.h
@@ -0,0 +1,21 @@
 2+#ifndef FILTER_EVALUATOR_H
 3+#define FILTER_EVALUATOR_H
 4+
 5+#include <string>
 6+#include <map>
 7+
 8+#include "aftypes.h"
 9+#include "parser.h"
 10+
 11+struct filter_evaluator {
 12+ filter_evaluator();
 13+
 14+ bool evaluate(std::string const &filter) const;
 15+
 16+ void add_variable(std::string const &key, AFPData value);
 17+
 18+private:
 19+ expressor e;
 20+};
 21+
 22+#endif /* !FILTER_EVALUATOR_H */
Index: trunk/extensions/AbuseFilter/parser_native/check.cpp
@@ -1,22 +1,19 @@
2 -#include "afeval.h"
 2+#include "filter_evaluator.h"
33 #include "affunctions.h"
44
55 int main( int argc, char** argv ) {
6 - FilterEvaluator e;
 6+ filter_evaluator f;
77
8 - e.reset();
98 bool result = false;
109
11 - registerBuiltinFunctions();
12 -
1310 for(int i=0;i<=100;i++) {
14 - try {
15 - e.setVar( "foo", AFPData(string("love")) );
16 - result = e.evaluateFilter( "specialratio('foo;') == 0.25" );
17 - } catch (AFPException* excep) {
18 - printf( "Exception: %s\n", excep->what() );
 11+ try {
 12+ f.add_variable( "foo", AFPData(string("love")) );
 13+ result = f.evaluate( "specialratio('foo;') == 0.25" );
 14+ } catch (AFPException* excep) {
 15+ printf( "Exception: %s\n", excep->what() );
 16+ }
1917 }
20 - }
2118
2219 if (result) {
2320 printf("Success!\n");
Index: trunk/extensions/AbuseFilter/parser_native/affunctions.cpp
@@ -11,33 +11,6 @@
1212
1313 #define EQUIVSET_LOC "equivset.txt"
1414
15 -map<string,AFPFunction> af_functions;
16 -map<string,AFPData> functionResultCache;
17 -
18 -AFPData af_length( vector<AFPData> args );
19 -AFPData af_lcase( vector<AFPData> args );
20 -AFPData af_ccnorm( vector<AFPData> args );
21 -AFPData af_rmdoubles( vector<AFPData> args );
22 -AFPData af_specialratio( vector<AFPData> args );
23 -AFPData af_rmspecials( vector<AFPData> args );
24 -AFPData af_norm( vector<AFPData> args );
25 -AFPData af_count( vector<AFPData> args );
26 -
27 -void af_registerfunction( string name, AFPFunction method ) {
28 - af_functions[name] = method;
29 -}
30 -
31 -void registerBuiltinFunctions() {
32 - af_registerfunction( "length", (AFPFunction) &af_length);
33 - af_registerfunction( "lcase", (AFPFunction) &af_lcase );
34 - af_registerfunction( "ccnorm", (AFPFunction) &af_ccnorm );
35 - af_registerfunction( "rmdoubles", (AFPFunction) &af_rmdoubles );
36 - af_registerfunction( "specialratio", (AFPFunction) &af_specialratio );
37 - af_registerfunction( "rmspecials", (AFPFunction) &af_rmspecials );
38 - af_registerfunction( "norm", (AFPFunction) &af_norm );
39 - af_registerfunction( "count", (AFPFunction) &af_count );
40 -}
41 -
4215 AFPData af_count( vector<AFPData> args ) {
4316 if (!args.size()) {
4417 throw AFPException( "Not enough arguments to count" );
@@ -78,7 +51,7 @@
7952
8053 string::const_iterator p, charStart, end;
8154 int chr = 0,lastchr = 0;
82 - map<int,int> equivSet = getEquivSet();
 55+ map<int,int> &equivSet = getEquivSet();
8356 string result;
8457
8558 p = orig.begin();
@@ -117,52 +90,6 @@
11891 return result;
11992 }
12093
121 -vector<AFPData> makeFuncArgList( AFPData arg ) {
122 - vector<AFPData> ret;
123 -
124 - ret.push_back( arg );
125 -
126 - return ret;
127 -}
128 -
129 -AFPData callFunction( string name, vector<AFPData> args ) {
130 - string cacheKey;
131 - bool doCache = false;
132 - if (args.size() == 1) {
133 - doCache = true;
134 - cacheKey = name + args[0].toString();
135 -
136 - if (functionResultCache.find(cacheKey) != functionResultCache.end()) {
137 - // found a result
138 - return functionResultCache[cacheKey];
139 - }
140 - }
141 -
142 - if (functionResultCache.size() > 100) {
143 - functionResultCache.clear();
144 - }
145 -
146 - AFPData result;
147 -
148 - if ( af_functions.find( name ) != af_functions.end() ) {
149 - // Found the function
150 - AFPFunction func = af_functions[name];
151 - result = func(args);
152 -
153 - if (doCache) {
154 - functionResultCache[cacheKey] = result;
155 - }
156 -
157 - return result;
158 - }
159 -}
160 -
161 -AFPData callFunction( string name, AFPData arg ) {
162 - vector<AFPData> arglist = makeFuncArgList( arg );
163 -
164 - return callFunction( name, arglist );
165 -}
166 -
16794 AFPData af_specialratio( vector<AFPData> args ) {
16895 if (!args.size()) {
16996 throw AFPException( "Not enough arguments to specialratio" );
@@ -250,7 +177,7 @@
251178 string confusable_character_normalise( string orig ) {
252179 string::const_iterator p, charStart, end;
253180 int chr;
254 - map<int,int> equivSet = getEquivSet();
 181+ map<int,int> &equivSet = getEquivSet();
255182 string result;
256183
257184 p = orig.begin();
@@ -267,11 +194,8 @@
268195 return result;
269196 }
270197
271 -bool isFunction( string name ) {
272 - return af_functions.find(name) != af_functions.end();
273 -}
274 -
275 -map<int,int> getEquivSet() {
 198+map<int,int> &
 199+getEquivSet() {
276200 static map<int,int> equivSet;
277201 // Map of codepoint:codepoint
278202
@@ -284,7 +208,7 @@
285209
286210 string line;
287211
288 - while (!! getline(eqsFile,line)) {
 212+ while (getline(eqsFile,line)) {
289213 size_t pos = line.find_first_of( ":", 0 );
290214
291215 if (pos != line.npos) {
Index: trunk/extensions/AbuseFilter/parser_native/aftypes.h
@@ -3,7 +3,11 @@
44
55 #include <string>
66 #include <vector>
 7+#include <iostream>
78
 9+#include <boost/variant.hpp>
 10+#include <boost/lexical_cast.hpp>
 11+
812 using namespace std;
913
1014 #define T_NONE 0
@@ -22,48 +26,86 @@
2327
2428 #define DATATYPE_MAX 3
2529
26 -#include <iostream>
27 -
2830 class AFPToken {
29 - public:
30 - AFPToken() {}
31 - AFPToken(unsigned int type, string value, unsigned int pos);
32 - unsigned int type;
33 - string value;
34 - unsigned int pos;
 31+public:
 32+ AFPToken() {}
 33+ AFPToken(unsigned int type, string value, unsigned int pos);
 34+ unsigned int type;
 35+ string value;
 36+ unsigned int pos;
3537 };
3638
3739 class AFPData {
38 - public:
39 - ~AFPData();
40 - AFPData();
41 - AFPData( unsigned int type, void* value, size_t size );
42 - AFPData( string var );
43 - AFPData( AFPData oldData, unsigned int newType );
44 - AFPData( const AFPData & oldData );
 40+public:
 41+ AFPData();
 42+
 43+ /*
 44+ * Generic ctor tries to convert to an int.
 45+ */
 46+ template<typename T>
 47+ AFPData(T const &v)
 48+ : value_(boost::lexical_cast<long int>(v))
 49+ {
 50+ }
 51+
 52+ // Specific type constructors
 53+ AFPData( std::string const &var );
 54+ AFPData( char const *var );
 55+ AFPData( long int var );
 56+ AFPData( float var );
 57+ AFPData( double var );
 58+ AFPData( bool var );
 59+
 60+ AFPData( const AFPData & oldData );
4561
46 - // Assignment operator
47 - AFPData & operator = (const AFPData & other);
 62+ // Assignment operator
 63+ AFPData &operator= (const AFPData & other);
4864
49 - // Specific type constructors
50 - AFPData( long int var );
51 - AFPData( double var );
52 - AFPData( bool var );
 65+ AFPData &operator+=(AFPData const &other);
 66+ AFPData &operator-=(AFPData const &other);
 67+ AFPData &operator*=(AFPData const &other);
 68+ AFPData &operator/=(AFPData const &other);
 69+ AFPData &operator%=(AFPData const &other);
 70+ bool operator!() const;
5371
54 - bool toBool();
55 - string toString();
56 - long int toInt();
57 - double toFloat();
58 - unsigned int getType();
 72+ bool compare(AFPData const &other) const;
 73+ bool compare_with_type(AFPData const &other) const;
 74+ bool less_than(AFPData const &other) const;
 75+
 76+ string toString() const;
 77+ long int toInt() const;
 78+ double toFloat() const;
 79+ bool toBool() const {
 80+ return (bool) toInt();
 81+ }
5982
60 - protected:
61 - void makeData( unsigned int type, void* value, size_t size, string source );
62 - void release();
63 -
64 - unsigned int type;
65 - void* value;
66 - size_t size;
67 - string source;
 83+ operator long int(void) const {
 84+ return toInt();
 85+ }
 86+
 87+ operator double(void) const {
 88+ return toFloat();
 89+ }
 90+
 91+ operator std::string(void) const {
 92+ return toString();
 93+ }
 94+
 95+ operator bool(void) const {
 96+ return (bool) toInt();
 97+ }
 98+
 99+ template<typename char_type, typename traits>
 100+ void
 101+ print_to(std::basic_ostream<char_type, traits> &s) const {
 102+ s << value_;
 103+ }
 104+
 105+protected:
 106+ void _init_from_string(std::string const &);
 107+
 108+ typedef boost::variant<std::string, long int, double> valuetype;
 109+ valuetype value_;
68110 };
69111
70112 class AFPException :exception {
@@ -78,6 +120,26 @@
79121 const char* s;
80122 };
81123
 124+AFPData operator+(AFPData const &a, AFPData const &b);
 125+AFPData operator-(AFPData const &a, AFPData const &b);
 126+AFPData operator*(AFPData const &a, AFPData const &b);
 127+AFPData operator/(AFPData const &a, AFPData const &b);
 128+AFPData operator%(AFPData const &a, AFPData const &b);
 129+
 130+bool operator==(AFPData const &a, AFPData const &b);
 131+bool operator!=(AFPData const &a, AFPData const &b);
 132+bool operator<(AFPData const &a, AFPData const &b);
 133+bool operator>(AFPData const &a, AFPData const &b);
 134+bool operator<=(AFPData const &a, AFPData const &b);
 135+bool operator>=(AFPData const &a, AFPData const &b);
 136+
 137+template<typename char_type, typename traits>
 138+std::basic_ostream<char_type, traits> &
 139+operator<<(std::basic_ostream<char_type, traits> &s, AFPData const &d) {
 140+ d.print_to(s);
 141+ return s;
 142+}
 143+
82144 bool isInVector( string needle, vector<string> haystack );
83145
84146 #endif /* !AFTYPES_H */
Index: trunk/extensions/AbuseFilter/parser_native/syntax_check.cpp
@@ -1,9 +1,10 @@
2 -#include "afeval.h"
32 #include <cstdlib>
43 #include <string>
54 #include <sstream>
65 #include <iostream>
76
 7+#include "filter_evaluator.h"
 8+
89 int main( int argc, char** argv ) {
910 stringbuf ss( ios::in | ios::out );
1011
@@ -13,8 +14,8 @@
1415 string filter = ss.str();
1516
1617 try {
17 - FilterEvaluator e;
18 - e.evaluateFilter( filter );
 18+ filter_evaluator f;
 19+ f.evaluate(filter);
1920 } catch (AFPException excep) {
2021 cout << "PARSERR: " << excep.what() << endl;
2122 exit(0);
Index: trunk/extensions/AbuseFilter/parser_native/affunctions.h
@@ -5,15 +5,16 @@
66 #include <map>
77 #include <vector>
88
9 -typedef AFPData(*AFPFunction)(vector<AFPData>);
 9+AFPData af_length(std::vector<AFPData> args);
 10+AFPData af_lcase(std::vector<AFPData> args);
 11+AFPData af_ccnorm(std::vector<AFPData> args);
 12+AFPData af_rmdoubles(std::vector<AFPData> args);
 13+AFPData af_specialratio(std::vector<AFPData> args);
 14+AFPData af_rmspecials(std::vector<AFPData> args);
 15+AFPData af_norm(std::vector<AFPData> args);
 16+AFPData af_count(std::vector<AFPData> args);
1017
11 -extern map<string,AFPFunction> af_functions;
12 -
13 -void af_registerfunction( string name, AFPFunction method );
14 -void registerBuiltinFunctions();
15 -AFPData callFunction( string name, vector<AFPData> args );
16 -bool isFunction( string name );
17 -map<int,int> getEquivSet();
 18+map<int,int> &getEquivSet();
1819 int next_utf8_char(std::string::const_iterator & p, std::string::const_iterator & charStart, std::string::const_iterator end);
1920 string codepointToUtf8( int codepoint );
2021 string confusable_character_normalise( string orig );
Index: trunk/extensions/AbuseFilter/parser_native/main.cpp
@@ -1,7 +1,6 @@
2 -#include "afeval.h"
3 -#include "affunctions.h"
42 #include <cstdlib>
53 #include <iostream>
 4+#include <iterator>
65 #include <string>
76 #include <sstream>
87 #include <fstream>
@@ -10,45 +9,33 @@
1110 #include <cstring>
1211
1312 #include <boost/format.hpp>
 13+#include <boost/next_prior.hpp>
1414
15 -string filter;
16 -map<string,AFPData> vars;
 15+#include "request.h"
1716
18 -bool loadRequest(std::istream &);
19 -void clearNulls();
20 -
2117 int main( int argc, char** argv ) {
22 - FilterEvaluator e;
23 - registerBuiltinFunctions();
24 -
2518 while (true) {
26 - bool result;
 19+ request r;
 20+ bool result = false;
2721
2822 try {
29 - // Reset
30 - e.reset();
31 - vars.clear();
32 - filter = "";
33 -
3423 if (argv[1]) {
3524 std::ifstream inf(argv[1]);
3625 if (!inf) {
3726 std::cerr << boost::format("%s: %s: %s\n")
3827 % argv[0] % argv[1] % std::strerror(errno);
39 - return 1;
 28+ return 0;
4029 }
4130
42 - if (!loadRequest(inf))
43 - continue;
 31+ if (!r.load(inf))
 32+ return 0;
4433 } else {
45 - if (!loadRequest(std::cin))
46 - continue;
 34+ if (!r.load(std::cin))
 35+ return 0;
4736 }
4837
49 - e.setVars( vars );
50 - result = e.evaluateFilter( filter );
 38+ result = r.evaluate();
5139 } catch (AFPException &excep) {
52 - cout << "EXCEPTION: " << excep.what() << endl;
5340 cerr << "EXCEPTION: " << excep.what() << endl;
5441 }
5542
@@ -56,42 +43,3 @@
5744 }
5845 }
5946
60 -// Protocol:
61 -// code NULL <key> NULL <value> NULL ... <value> NULL NULL
62 -
63 -bool loadRequest(std::istream &inp) {
64 - stringbuf codesb(ios::out | ios::in);
65 -
66 - // Load the code
67 - cin.get( codesb, '\0' );
68 - cin.get();
69 - filter = codesb.str();
70 -
71 - while (true) {
72 - stringbuf keysb(ios::out | ios::in);
73 - stringbuf valsb(ios::out | ios::in);
74 -
75 - // Double NULL = end
76 - if (cin.peek() == 0) {
77 - cin.get();
78 - break;
79 - } else if (cin.peek() == -1) {
80 - exit(-1);
81 - }
82 -
83 - cin.get( keysb, '\0' );
84 - cin.get();
85 -
86 - if (cin.peek() == 0) {
87 - cin.get();
88 - // Leave blank.
89 - } else {
90 - cin.get( valsb, '\0' );
91 - cin.get();
92 - }
93 -
94 - vars[keysb.str()] = AFPData( valsb.str() );
95 - }
96 -
97 - return true;
98 -}
Index: trunk/extensions/AbuseFilter/parser_native/eval.cpp
@@ -1,29 +1,21 @@
2 -#include "afeval.h"
3 -#include "affunctions.h"
42 #include <cstdlib>
53 #include <iostream>
64 #include <string>
75 #include <sstream>
86 #include <map>
97
10 -string filter;
11 -map<string,AFPData> vars;
 8+#include "filter_evaluator.h"
 9+#include "request.h"
1210
13 -bool loadRequest();
14 -
1511 int main( int argc, char** argv ) {
16 - FilterEvaluator e;
17 - registerBuiltinFunctions();
18 -
 12+ request r;
1913 string result;
2014
2115 try {
22 - e.reset();
23 - if (!loadRequest())
24 - exit(-1);
 16+ if (!r.load(std::cin))
 17+ return 1;
2518
26 - e.setVars( vars );
27 - result = e.evaluateExpression( filter );
 19+ result = r.evaluate();
2820 } catch (AFPException excep) {
2921 cout << "EXCEPTION: " << excep.what() << endl;
3022 cerr << "EXCEPTION: " << excep.what() << endl;
@@ -31,47 +23,3 @@
3224
3325 cout << result << "\0";
3426 }
35 -
36 -// Protocol:
37 -// code NULL <key> NULL <value> NULL ... <value> NULL NULL
38 -
39 -bool loadRequest() {
40 - stringbuf codesb(ios::out | ios::in);
41 -
42 - // Load the code
43 - cin.get( codesb, '\0' );
44 - cin.get();
45 - filter = codesb.str();
46 -
47 - cerr << "Got code " << filter << endl;
48 -
49 - while (true) {
50 - stringbuf keysb(ios::out | ios::in);
51 - stringbuf valsb(ios::out | ios::in);
52 -
53 - // Double NULL = end
54 - if (cin.peek() == 0) {
55 - cin.get();
56 - break;
57 - } else if (cin.peek() == -1) {
58 - exit(-1);
59 - }
60 -
61 - cin.get( keysb, '\0' );
62 - cin.get();
63 -
64 - if (cin.peek() == 0) {
65 - cin.get();
66 - // Leave blank.
67 - } else {
68 - cin.get( valsb, '\0' );
69 - cin.get();
70 - }
71 -
72 - cerr << "Got var " << keysb.str() << "=" << valsb.str() << endl;
73 -
74 - vars[keysb.str()] = AFPData( valsb.str() );
75 - }
76 -
77 - return true;
78 -}
Index: trunk/extensions/AbuseFilter/parser_native/request.cpp
@@ -0,0 +1,69 @@
 2+#include "request.h"
 3+
 4+// Protocol:
 5+// code NULL <key> NULL <value> NULL ... <value> NULL NULL
 6+
 7+bool
 8+request::load(std::istream &inp) {
 9+ inp.unsetf(ios_base::skipws);
 10+
 11+ std::istream_iterator<char> it(inp), p, end;
 12+
 13+ std::pair<std::istream_iterator<char>, std::istream_iterator<char> >
 14+ iters;
 15+
 16+ filter.erase();
 17+ for (; it != end; ++it) {
 18+ if (*it == '\0')
 19+ break;
 20+ filter.push_back(*it);
 21+ }
 22+
 23+ if (it == end)
 24+ return false;
 25+
 26+ it++;
 27+
 28+ while (true) {
 29+ std::string key, value;
 30+
 31+ /* read the key */
 32+ for (; it != end; ++it) {
 33+ if (*it == '\0')
 34+ break;
 35+ key.push_back(*it);
 36+ }
 37+
 38+ if (it == end)
 39+ return false;
 40+
 41+ if (key.empty())
 42+ /* empty string means end of input */
 43+ return true;
 44+
 45+ it++;
 46+
 47+ /* read the value */
 48+ for (; it != end; ++it) {
 49+ if (*it == '\0')
 50+ break;
 51+ value.push_back(*it);
 52+ }
 53+
 54+ if (it == end)
 55+ return false;
 56+
 57+ it++;
 58+
 59+ f.add_variable(key, AFPData(value));
 60+ }
 61+
 62+ return true;
 63+}
 64+
 65+bool
 66+request::evaluate()
 67+{
 68+ return f.evaluate(filter);
 69+}
 70+
Index: trunk/extensions/AbuseFilter/parser_native/makefile
@@ -1,20 +1,48 @@
22 include makefile.config
33
4 -CXX = g++
5 -CXXFLAGS = -O3 -W -Wall
6 -
74 CPPFLAGS = $(EXTRA_CPPFLAGS)
85 LDFLAGS = $(EXTRA_LDFLAGS)
96
107 LIBS = -lboost_regex$(BOOST_TAG) -licuuc -licui18n -licudata -licui18n
118
12 -af_expr_objs = afeval.o affunctions.o afparser.o aftypes.o afutils.o eval.o
13 -af_parser_objs = afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o
14 -check_objs = afeval.o affunctions.o afparser.o aftypes.o afutils.o check.o
15 -syntax_check_objs = afeval.o affunctions.o afparser.o aftypes.o afutils.o syntax_check.o
 9+expr: CPPFLAGS+=-DTEST_PARSER
1610
17 -progs = check af_parser syntax_check af_expr
 11+af_expr_objs = \
 12+ af_expr-affunctions.o \
 13+ af_expr-aftypes.o \
 14+ af_expr-parser.o \
 15+ af_expr-filter_evaluator.o \
 16+ af_expr-eval.o \
 17+ af_expr-request.o
1818
 19+af_parser_objs = \
 20+ af_parser-affunctions.o \
 21+ af_parser-aftypes.o \
 22+ af_parser-main.o \
 23+ af_parser-parser.o \
 24+ af_parser-request.o \
 25+ af_parser-filter_evaluator.o
 26+
 27+check_objs = \
 28+ check-affunctions.o \
 29+ check-aftypes.o \
 30+ check-check.o \
 31+ check-parser.o \
 32+ check-filter_evaluator.o
 33+
 34+syntax_check_objs = \
 35+ syntax_check-affunctions.o \
 36+ syntax_check-aftypes.o \
 37+ syntax_check-filter_evaluator.o \
 38+ syntax_check-parser.o \
 39+ syntax_check-syntax_check.o
 40+
 41+expr_objs = \
 42+ expr-aftypes.o \
 43+ expr-parser.o
 44+
 45+progs = check af_parser syntax_check af_expr expr
 46+
1947 all: $(progs)
2048
2149 af_expr: $(af_expr_objs)
@@ -25,10 +53,24 @@
2654 $(CXX) $(CXXFLAGS) -o $@ $(check_objs) $(LDFLAGS) $(LIBS)
2755 syntax_check: $(syntax_check_objs)
2856 $(CXX) $(CXXFLAGS) -o $@ $(syntax_check_objs) $(LDFLAGS) $(LIBS)
 57+expr: $(expr_objs)
 58+ $(CXX) $(CXXFLAGS) -o $@ $(expr_objs) $(LDFLAGS) $(LIBS)
2959
30 -.cpp.o:
31 - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $<
32 -
 60+$(af_expr_objs): af_expr-%.o: %.cpp
 61+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ -c $(@:af_expr-%.o=%.cpp)
 62+
 63+$(af_parser_objs): af_parser-%.o: %.cpp
 64+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ -c $(@:af_parser-%.o=%.cpp)
 65+
 66+$(check_objs): check-%.o: %.cpp
 67+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ -c $(@:check-%.o=%.cpp)
 68+
 69+$(syntax_check_objs): syntax_check-%.o: %.cpp
 70+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ -c $(@:syntax_check-%.o=%.cpp)
 71+
 72+$(expr_objs): expr-%.o: %.cpp
 73+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ -c $(@:expr-%.o=%.cpp)
 74+
3375 clean:
3476 rm -f *.o $(progs)
3577
Index: trunk/extensions/AbuseFilter/parser_native/parser.cpp
@@ -0,0 +1,266 @@
 2+#include <stdexcept>
 3+#include <iostream>
 4+
 5+#include <boost/spirit.hpp>
 6+#include <boost/spirit/phoenix.hpp>
 7+#include <boost/spirit/phoenix/composite.hpp>
 8+#include <boost/spirit/phoenix/functions.hpp>
 9+#include <boost/spirit/phoenix/operators.hpp>
 10+#include <boost/function.hpp>
 11+#include <boost/noncopyable.hpp>
 12+
 13+#include "aftypes.h"
 14+#include "parser.h"
 15+
 16+using namespace boost::spirit;
 17+using namespace phoenix;
 18+
 19+namespace px = phoenix;
 20+
 21+struct parse_error : std::runtime_error {
 22+ parse_error(char const *what) : std::runtime_error(what) {}
 23+};
 24+
 25+struct parser_closure : boost::spirit::closure<parser_closure, AFPData>
 26+{
 27+ member1 val;
 28+};
 29+
 30+namespace {
 31+
 32+AFPData f_in(AFPData const &a, AFPData const &b)
 33+{
 34+ std::string sa = a, sb = b;
 35+ return AFPData(std::search(sb.begin(), sb.end(), sa.begin(), sa.end()) != sb.end());
 36+}
 37+
 38+}
 39+
 40+struct function_closure : boost::spirit::closure<
 41+ function_closure,
 42+ AFPData,
 43+ boost::function<AFPData (std::vector<AFPData>)>,
 44+ std::vector<AFPData> >
 45+{
 46+ member1 val;
 47+ member2 func;
 48+ member3 args;
 49+};
 50+
 51+struct parser_grammar : public grammar<parser_grammar, parser_closure::context_t>
 52+{
 53+ symbols<AFPData> variables;
 54+ symbols<boost::function<AFPData (std::vector<AFPData>)> > functions;
 55+
 56+ void add_variable(std::string const &name, AFPData const &value) {
 57+ variables.add(name.c_str(), value);
 58+ }
 59+
 60+ void add_function(std::string const &name, boost::function<AFPData (std::vector<AFPData>)> func) {
 61+ functions.add(name.c_str(), func);
 62+ }
 63+
 64+ template<typename ScannerT>
 65+ struct definition
 66+ {
 67+ typedef rule<ScannerT, parser_closure::context_t> rule_t;
 68+
 69+ parser_grammar const &self_;
 70+
 71+ struct push_back_impl {
 72+ template<typename C, typename I>
 73+ struct result {
 74+ typedef void type;
 75+ };
 76+
 77+ template<typename C, typename I>
 78+ void operator() (C &c, I const &i) const {
 79+ c.push_back(i);
 80+ }
 81+ };
 82+
 83+ phoenix::function<push_back_impl> const push_back;
 84+
 85+ struct call_function_impl {
 86+ template<typename F, typename A>
 87+ struct result {
 88+ typedef AFPData type;
 89+ };
 90+
 91+ template<typename F, typename A>
 92+ AFPData operator() (F const &func, A const &args) const {
 93+ return func(args);
 94+ }
 95+ };
 96+
 97+ phoenix::function<call_function_impl> const call_function;
 98+
 99+ definition(parser_grammar const &self)
 100+ : self_(self)
 101+ , push_back(push_back_impl())
 102+ , call_function(call_function_impl())
 103+ {
 104+ value =
 105+ real_p[value.val = arg1]
 106+ | int_p[value.val = arg1]
 107+ | confix_p('"', *c_escape_ch_p, '"')[
 108+ value.val = construct_<std::string>(arg1 + 1, arg2 - 1)
 109+ ]
 110+ ;
 111+
 112+ /* a sequence of uppercase letters is a variable */
 113+ variable =
 114+ self.variables[variable.val = arg1]
 115+ ;
 116+
 117+ /* func(value) */
 118+ function =
 119+ (
 120+ self.functions[function.func = arg1]
 121+ >> '('
 122+ >> ( bool_expr[push_back(function.args, arg1)] % ',' )
 123+ >> ')'
 124+ ) [function.val = call_function(function.func, function.args)]
 125+ ;
 126+
 127+ basic =
 128+ ( '(' >> bool_expr[basic.val = arg1] >> ')' )
 129+ | ch_p('!') >> bool_expr[basic.val = !arg1]
 130+ | variable[basic.val = arg1]
 131+ | function[basic.val = arg1]
 132+ | value[basic.val = arg1]
 133+ ;
 134+
 135+ in_expr =
 136+ basic[in_expr.val = arg1]
 137+ >> *(
 138+ "in" >> basic[in_expr.val = bind(&f_in)(in_expr.val, arg1)]
 139+ )
 140+ ;
 141+
 142+
 143+ mult_expr =
 144+ in_expr[mult_expr.val = arg1]
 145+ >> *(
 146+ '*' >> in_expr[mult_expr.val *= arg1]
 147+ | '/' >> in_expr[mult_expr.val /= arg1]
 148+ | '%' >> in_expr[mult_expr.val %= arg1]
 149+ )
 150+ ;
 151+
 152+ plus_expr =
 153+ mult_expr[plus_expr.val = arg1]
 154+ >> *(
 155+ '+' >> mult_expr[plus_expr.val += arg1]
 156+ | '-' >> mult_expr[plus_expr.val -= arg1]
 157+ )
 158+ ;
 159+
 160+ eq_expr =
 161+ plus_expr[eq_expr.val = arg1]
 162+ >> *(
 163+ "<" >> plus_expr[eq_expr.val = eq_expr.val < arg1]
 164+ | ">" >> plus_expr[eq_expr.val = eq_expr.val > arg1]
 165+ | "<=" >> plus_expr[eq_expr.val = eq_expr.val <= arg1]
 166+ | ">=" >> plus_expr[eq_expr.val = eq_expr.val >= arg1]
 167+ )
 168+ ;
 169+
 170+ eq2_expr =
 171+ eq_expr[eq2_expr.val = arg1]
 172+ >> *(
 173+ "==" >> eq_expr[eq2_expr.val = eq2_expr.val == arg1]
 174+ | "!=" >> eq_expr[eq2_expr.val = eq2_expr.val != arg1]
 175+ | "===" >> eq_expr[eq2_expr.val = eq2_expr.val == arg1]
 176+ | "!==" >> eq_expr[eq2_expr.val = eq2_expr.val != arg1]
 177+ )
 178+ ;
 179+
 180+ bool_expr =
 181+ eq2_expr[bool_expr.val = arg1]
 182+ >> *(
 183+ '&' >> eq_expr[bool_expr.val = bool_expr.val && arg1]
 184+ | '|' >> eq_expr[bool_expr.val = bool_expr.val || arg1]
 185+ | '^' >> eq_expr[bool_expr.val =
 186+ ((bool_expr.val || arg1)
 187+ && !(bool_expr.val && arg1)) ]
 188+ )
 189+ ;
 190+
 191+ expr = bool_expr[self.val = arg1];
 192+ }
 193+
 194+ rule_t const &start() const {
 195+ return expr;
 196+ }
 197+
 198+ rule_t value, variable, basic, bool_expr,
 199+ eq_expr, eq2_expr, mult_expr, plus_expr, in_expr, not_expr, expr;
 200+ rule<ScannerT, function_closure::context_t> function;
 201+ };
 202+};
 203+
 204+expressor::expressor()
 205+ : grammar_(new parser_grammar)
 206+{
 207+}
 208+
 209+expressor::~expressor()
 210+{
 211+ delete grammar_;
 212+}
 213+
 214+AFPData
 215+expressor::evaluate(std::string const &filter) const
 216+{
 217+ AFPData ret;
 218+ parse_info<std::string::const_iterator> info =
 219+ parse(filter.begin(), filter.end(), (*grammar_)[var(ret) = arg1], space_p);
 220+ if (info.full) {
 221+ return ret;
 222+ } else {
 223+ std::cerr << "stopped at: [" << std::string(info.stop, filter.end()) << "]\n";
 224+ throw parse_error("parsing failed");
 225+ }
 226+}
 227+
 228+void
 229+expressor::add_variable(std::string const &name, AFPData value)
 230+{
 231+ grammar_->add_variable(name, value);
 232+}
 233+
 234+void
 235+expressor::add_function(std::string const &name, func_t value)
 236+{
 237+ grammar_->add_function(name, value);
 238+}
 239+
 240+#ifdef TEST_PARSER
 241+AFPData f_add(std::vector<AFPData> const &args)
 242+{
 243+ return args[0] + args[1];
 244+}
 245+
 246+AFPData f_norm(std::vector<AFPData> const &args)
 247+{
 248+ return args[0];
 249+}
 250+
 251+int
 252+main(int argc, char **argv)
 253+{
 254+ expressor e;
 255+ e.add_variable("ONE", 1);
 256+ e.add_variable("TWO", 2);
 257+ e.add_variable("THREE", 3);
 258+ e.add_function("add", f_add);
 259+ e.add_function("norm", f_norm);
 260+
 261+ try {
 262+ std::cout << e.evaluate(argv[1]) << '\n';
 263+ } catch (std::exception &e) {
 264+ std::cout << "parsing failed: " << e.what() << '\n';
 265+ }
 266+}
 267+#endif
Index: trunk/extensions/AbuseFilter/parser_native/request.h
@@ -0,0 +1,18 @@
 2+#ifndef REQUEST_H
 3+#define REQUEST_H
 4+
 5+#include <string>
 6+#include <istream>
 7+
 8+#include "filter_evaluator.h"
 9+
 10+struct request {
 11+ bool load(std::istream &);
 12+ bool evaluate(void);
 13+
 14+private:
 15+ filter_evaluator f;
 16+ std::string filter;
 17+};
 18+
 19+#endif /* !REQUEST_H */

Status & tagging log