Index: trunk/extensions/AbuseFilter/parser_native/afeval.cpp |
— | — | @@ -7,6 +7,7 @@ |
8 | 8 | this->cur = AFPToken(); |
9 | 9 | this->pos = 0; |
10 | 10 | this->code = ""; |
| 11 | + this->forceResult = false; |
11 | 12 | } |
12 | 13 | |
13 | 14 | void FilterEvaluator::setVar( string key, AFPData value ) { |
— | — | @@ -21,8 +22,18 @@ |
22 | 23 | |
23 | 24 | bool FilterEvaluator::evaluateFilter( string code ) { |
24 | 25 | this->code = code; |
25 | | - this->tokens = af_parse( code ); |
26 | 26 | this->pos = 0; |
| 27 | + |
| 28 | + if (this->tokenCache.find(code) != this->tokenCache.end()) { |
| 29 | + this->tokens = this->tokenCache[code]; |
| 30 | + } else { |
| 31 | + this->tokenCache[code] = this->tokens = af_parse( code ); |
| 32 | + } |
| 33 | + |
| 34 | + if (this->tokenCache.size() > 100) { |
| 35 | + this->tokenCache.clear(); |
| 36 | + } |
| 37 | + |
27 | 38 | this->cur = this->tokens[0]; |
28 | 39 | |
29 | 40 | AFPData result; |
— | — | @@ -50,7 +61,7 @@ |
51 | 62 | this->doLevelSet( result ); |
52 | 63 | |
53 | 64 | if (this->cur.type != T_NONE) { |
54 | | - throw new AFPException( "Unexpected tokens at end." ); |
| 65 | + throw AFPException( "Unexpected tokens at end." ); |
55 | 66 | } |
56 | 67 | } |
57 | 68 | |
— | — | @@ -116,6 +127,8 @@ |
117 | 128 | } |
118 | 129 | |
119 | 130 | void FilterEvaluator::doLevelBoolOps( AFPData* result ) { |
| 131 | + bool setForce = false; |
| 132 | + |
120 | 133 | this->doLevelCompares( result ); |
121 | 134 | |
122 | 135 | vector<string> ops = getOpsForType( "bool" ); |
— | — | @@ -124,9 +137,27 @@ |
125 | 138 | string op = this->cur.value; |
126 | 139 | this->move(); |
127 | 140 | AFPData r2; |
| 141 | + |
| 142 | + if (!this->forceResult && op == "&" && !result->toBool()) { |
| 143 | + setForce = true; |
| 144 | + this->forceResult = true; |
| 145 | + } else if (!this->forceResult && op == "|" && result->toBool()) { |
| 146 | + setForce = true; |
| 147 | + this->forceResult = true; |
| 148 | + } |
| 149 | + |
128 | 150 | this->doLevelCompares( &r2 ); |
129 | | - *result = af_boolOp( result, r2, op ); |
| 151 | + |
| 152 | + if (!this->forceResult) { |
| 153 | + *result = af_boolOp( *result, r2, op ); |
| 154 | + } else if (setForce) { |
| 155 | + setForce = false; |
| 156 | + this->forceResult = false; |
| 157 | + } |
130 | 158 | } |
| 159 | + |
| 160 | + if (setForce) |
| 161 | + this->forceResult = false; |
131 | 162 | } |
132 | 163 | |
133 | 164 | void FilterEvaluator::doLevelCompares( AFPData* result ) { |
— | — | @@ -140,7 +171,8 @@ |
141 | 172 | |
142 | 173 | this->doLevelMulRels( &r2 ); |
143 | 174 | |
144 | | - *result = af_compareOp( *result, r2, op ); |
| 175 | + if (!this->forceResult) |
| 176 | + *result = af_compareOp( *result, r2, op ); |
145 | 177 | } |
146 | 178 | } |
147 | 179 | |
— | — | @@ -154,7 +186,9 @@ |
155 | 187 | AFPData r2; |
156 | 188 | |
157 | 189 | this->doLevelSumRels( &r2 ); |
158 | | - *result = af_mulRel( *result, r2, op ); |
| 190 | + |
| 191 | + if (!this->forceResult) |
| 192 | + *result = af_mulRel( *result, r2, op ); |
159 | 193 | } |
160 | 194 | } |
161 | 195 | |
— | — | @@ -168,10 +202,13 @@ |
169 | 203 | AFPData r2; |
170 | 204 | |
171 | 205 | this->doLevelPow( &r2 ); |
| 206 | + |
172 | 207 | if (op == "+") { |
173 | | - *result = af_sum( *result, r2 ); |
| 208 | + if (!this->forceResult) |
| 209 | + *result = af_sum( *result, r2 ); |
174 | 210 | } else if (op == "-") { |
175 | | - *result = af_sub( *result, r2 ); |
| 211 | + if (!this->forceResult) |
| 212 | + *result = af_sub( *result, r2 ); |
176 | 213 | } |
177 | 214 | } |
178 | 215 | } |
— | — | @@ -184,15 +221,18 @@ |
185 | 222 | AFPData exp; |
186 | 223 | |
187 | 224 | this->doLevelBoolInvert( &exp ); |
188 | | - *result = af_pow( *result, exp ); |
| 225 | + |
| 226 | + if (!this->forceResult) |
| 227 | + *result = af_pow( *result, exp ); |
189 | 228 | } |
190 | 229 | } |
191 | 230 | |
192 | 231 | void FilterEvaluator::doLevelBoolInvert( AFPData* result ) { |
193 | | - if (this->cur.type == T_KEYWORD && this->cur.value == "!") { |
| 232 | + if (this->cur.type == T_OP && this->cur.value == "!") { |
194 | 233 | this->move(); |
195 | 234 | this->doLevelSpecialWords( result ); |
196 | | - *result = af_boolInvert( *result ); |
| 235 | + if (!this->forceResult) |
| 236 | + *result = af_boolInvert( *result ); |
197 | 237 | } else { |
198 | 238 | this->doLevelSpecialWords( result ); |
199 | 239 | } |
— | — | @@ -209,7 +249,8 @@ |
210 | 250 | AFPData r2 = AFPData(); |
211 | 251 | this->doLevelUnarys( &r2 ); |
212 | 252 | |
213 | | - *result = af_keyword( keyword, *result, r2 ); |
| 253 | + if (!this->forceResult) |
| 254 | + *result = af_keyword( keyword, *result, r2 ); |
214 | 255 | } |
215 | 256 | } |
216 | 257 | |
— | — | @@ -218,7 +259,8 @@ |
219 | 260 | this->move(); |
220 | 261 | this->doLevelBraces( result ); |
221 | 262 | if (this->cur.value == "-") { |
222 | | - *result = af_unaryMinus( *result ); |
| 263 | + if (!this->forceResult) |
| 264 | + *result = af_unaryMinus( *result ); |
223 | 265 | } |
224 | 266 | } else { |
225 | 267 | this->doLevelBraces( result ); |
— | — | @@ -231,7 +273,7 @@ |
232 | 274 | this->doLevelSet( result ); |
233 | 275 | |
234 | 276 | if ( !(this->cur.type == T_BRACE && this->cur.value == ")") ) { |
235 | | - throw new AFPException( "Expected ')'" ); |
| 277 | + throw AFPException( "Expected ')' at pos %d", this->cur.pos ); |
236 | 278 | } |
237 | 279 | this->move(); |
238 | 280 | } else { |
— | — | @@ -245,7 +287,7 @@ |
246 | 288 | this->move(); |
247 | 289 | |
248 | 290 | if (this->cur.type != T_BRACE || this->cur.value != "(") { |
249 | | - throw new AFPException( "Expected (" ); |
| 291 | + throw AFPException( "Expected (" ); |
250 | 292 | } |
251 | 293 | this->move(); |
252 | 294 | |
— | — | @@ -261,7 +303,8 @@ |
262 | 304 | } while (this->cur.type == T_COMMA ); |
263 | 305 | } |
264 | 306 | |
265 | | - *result = callFunction( func, args ); |
| 307 | + if (!this->forceResult) |
| 308 | + *result = callFunction( func, args ); |
266 | 309 | this->move(); |
267 | 310 | } else { |
268 | 311 | this->doLevelAtom( result ); |
— | — | @@ -291,7 +334,7 @@ |
292 | 335 | } else if (tok == "null") { |
293 | 336 | *result = AFPData(); |
294 | 337 | } else { |
295 | | - throw new AFPException( "Unidentifiable keyword" ); |
| 338 | + throw AFPException( "Unidentifiable keyword" ); |
296 | 339 | } |
297 | 340 | break; |
298 | 341 | case T_BRACE: |
— | — | @@ -301,7 +344,8 @@ |
302 | 345 | break; |
303 | 346 | case T_COMMA: |
304 | 347 | return; |
305 | | - default: throw new AFPException( "Unexpected token" ); |
| 348 | + break; |
| 349 | + default: throw AFPException( "Unexpected token value %s", this->cur.value.c_str() ); |
306 | 350 | } |
307 | 351 | |
308 | 352 | this->move(); |
Index: trunk/extensions/AbuseFilter/parser_native/affunctions.cpp |
— | — | @@ -5,17 +5,19 @@ |
6 | 6 | #include <ios> |
7 | 7 | #include <iostream> |
8 | 8 | #include <ctype.h> |
9 | | -#include <unicode/unistr.h> |
10 | 9 | |
11 | 10 | #define EQUIVSET_LOC "equivset.txt" |
12 | 11 | |
13 | 12 | map<string,AFPFunction> af_functions; |
| 13 | +map<string,AFPData> functionResultCache; |
14 | 14 | |
15 | 15 | AFPData af_length( vector<AFPData> args ); |
16 | 16 | AFPData af_lcase( vector<AFPData> args ); |
17 | 17 | AFPData af_ccnorm( vector<AFPData> args ); |
18 | 18 | AFPData af_rmdoubles( vector<AFPData> args ); |
19 | 19 | AFPData af_specialratio( vector<AFPData> args ); |
| 20 | +AFPData af_rmspecials( vector<AFPData> args ); |
| 21 | +AFPData af_norm( vector<AFPData> args ); |
20 | 22 | |
21 | 23 | void af_registerfunction( string name, AFPFunction method ) { |
22 | 24 | af_functions[name] = method; |
— | — | @@ -27,11 +29,107 @@ |
28 | 30 | af_registerfunction( "ccnorm", (AFPFunction) &af_ccnorm ); |
29 | 31 | af_registerfunction( "rmdoubles", (AFPFunction) &af_rmdoubles ); |
30 | 32 | af_registerfunction( "specialratio", (AFPFunction) &af_specialratio ); |
| 33 | + af_registerfunction( "rmspecials", (AFPFunction) &af_rmspecials ); |
| 34 | + af_registerfunction( "norm", (AFPFunction) &af_norm ); |
31 | 35 | } |
32 | 36 | |
| 37 | +AFPData af_norm( vector<AFPData> args ) { |
| 38 | + if (!args.size()) { |
| 39 | + throw AFPException( "Not enough arguments to norm" ); |
| 40 | + } |
| 41 | + |
| 42 | + string orig = args[0].toString(); |
| 43 | + |
| 44 | + string::const_iterator p, charStart, end; |
| 45 | + int chr = 0,lastchr = 0; |
| 46 | + map<int,int> equivSet = getEquivSet(); |
| 47 | + string result; |
| 48 | + |
| 49 | + p = orig.begin(); |
| 50 | + end = orig.end(); |
| 51 | + |
| 52 | + while (chr = next_utf8_char( p, charStart, end )) { |
| 53 | + if (equivSet.find(chr) != equivSet.end()) { |
| 54 | + chr = equivSet[chr]; |
| 55 | + } |
| 56 | + |
| 57 | + if (chr != lastchr && isalnum(chr)) { |
| 58 | + result.append(codepointToUtf8(chr)); |
| 59 | + } |
| 60 | + |
| 61 | + lastchr = chr; |
| 62 | + } |
| 63 | + |
| 64 | + return AFPData(result); |
| 65 | +} |
| 66 | + |
| 67 | +string rmdoubles( string orig ) { |
| 68 | + string::const_iterator p, charStart, end; |
| 69 | + int chr,lastchr = 0; |
| 70 | + string result; |
| 71 | + |
| 72 | + p = orig.begin(); |
| 73 | + end = orig.end(); |
| 74 | + while (chr = next_utf8_char( p, charStart, end )) { |
| 75 | + if (chr != lastchr) { |
| 76 | + result.append(codepointToUtf8(chr)); |
| 77 | + } |
| 78 | + |
| 79 | + lastchr = chr; |
| 80 | + } |
| 81 | + |
| 82 | + return result; |
| 83 | +} |
| 84 | + |
| 85 | +vector<AFPData> makeFuncArgList( AFPData arg ) { |
| 86 | + vector<AFPData> ret; |
| 87 | + |
| 88 | + ret.push_back( arg ); |
| 89 | + |
| 90 | + return ret; |
| 91 | +} |
| 92 | + |
| 93 | +AFPData callFunction( string name, vector<AFPData> args ) { |
| 94 | + string cacheKey; |
| 95 | + bool doCache = false; |
| 96 | + if (args.size() == 1) { |
| 97 | + doCache = true; |
| 98 | + cacheKey = name + args[0].toString(); |
| 99 | + |
| 100 | + if (functionResultCache.find(cacheKey) != functionResultCache.end()) { |
| 101 | + // found a result |
| 102 | + return functionResultCache[cacheKey]; |
| 103 | + } |
| 104 | + } |
| 105 | + |
| 106 | + if (functionResultCache.size() > 100) { |
| 107 | + functionResultCache.clear(); |
| 108 | + } |
| 109 | + |
| 110 | + AFPData result; |
| 111 | + |
| 112 | + if ( af_functions.find( name ) != af_functions.end() ) { |
| 113 | + // Found the function |
| 114 | + AFPFunction func = af_functions[name]; |
| 115 | + result = func(args); |
| 116 | + |
| 117 | + if (doCache) { |
| 118 | + functionResultCache[cacheKey] = result; |
| 119 | + } |
| 120 | + |
| 121 | + return result; |
| 122 | + } |
| 123 | +} |
| 124 | + |
| 125 | +AFPData callFunction( string name, AFPData arg ) { |
| 126 | + vector<AFPData> arglist = makeFuncArgList( arg ); |
| 127 | + |
| 128 | + return callFunction( name, arglist ); |
| 129 | +} |
| 130 | + |
33 | 131 | AFPData af_specialratio( vector<AFPData> args ) { |
34 | 132 | if (!args.size()) { |
35 | | - throw new AFPException( "Not enough arguments to specialratio" ); |
| 133 | + throw AFPException( "Not enough arguments to specialratio" ); |
36 | 134 | } |
37 | 135 | |
38 | 136 | string orig = args[0].toString(); |
— | — | @@ -49,61 +147,72 @@ |
50 | 148 | |
51 | 149 | double ratio = (float)(specialcount) / (float)(orig.size()); |
52 | 150 | |
53 | | - return AFPData(ratio); |
| 151 | + return AFPData(ratio); |
54 | 152 | } |
55 | 153 | |
56 | | -AFPData af_ccnorm( vector<AFPData> args ) { |
| 154 | +AFPData af_rmspecials( vector<AFPData> args ) { |
57 | 155 | if (!args.size()) { |
58 | | - throw new AFPException( "Not enough arguments to ccnorm" ); |
| 156 | + throw AFPException( "Not enough arguments to rmspecials" ); |
59 | 157 | } |
60 | 158 | |
61 | | - return AFPData( confusable_character_normalise( args[0].toString() ) ); |
| 159 | + string orig = args[0].toString(); |
| 160 | + string result = rmspecials(orig); |
| 161 | + |
| 162 | + return AFPData(result); |
62 | 163 | } |
63 | 164 | |
64 | | -AFPData af_rmdoubles( vector<AFPData> args ) { |
65 | | - if (!args.size()) { |
66 | | - throw new AFPException( "Not enough arguments to rmdoubles" ); |
67 | | - } |
68 | | - |
69 | | - string orig = args[0].toString(); |
| 165 | +string rmspecials( string orig ) { |
70 | 166 | string::const_iterator p, charStart, end; |
71 | | - int chr,lastchr = 0; |
| 167 | + int chr = 0; |
72 | 168 | string result; |
73 | 169 | |
74 | 170 | p = orig.begin(); |
75 | 171 | end = orig.end(); |
76 | 172 | while (chr = next_utf8_char( p, charStart, end )) { |
77 | | - if (chr != lastchr) { |
| 173 | + if (isalnum(chr)) { |
78 | 174 | result.append(codepointToUtf8(chr)); |
79 | 175 | } |
80 | | - |
81 | | - lastchr = chr; |
82 | 176 | } |
83 | 177 | |
84 | 178 | return result; |
85 | 179 | } |
86 | 180 | |
87 | | -AFPData af_length( vector<AFPData> args ) { |
| 181 | +AFPData af_ccnorm( vector<AFPData> args ) { |
88 | 182 | if (!args.size()) { |
89 | | - throw new AFPException( "Not enough arguments to lcase" ); |
| 183 | + throw AFPException( "Not enough arguments to ccnorm" ); |
90 | 184 | } |
| 185 | + |
| 186 | + return AFPData( confusable_character_normalise( args[0].toString() ) ); |
| 187 | +} |
91 | 188 | |
92 | | - UnicodeString ustr = UnicodeString( (UChar*)args[0].toString().c_str() ); |
93 | | - return AFPData( (long int)ustr.length() ); |
| 189 | +AFPData af_rmdoubles( vector<AFPData> args ) { |
| 190 | + if (!args.size()) { |
| 191 | + throw AFPException( "Not enough arguments to rmdoubles" ); |
| 192 | + } |
| 193 | + |
| 194 | + string result = rmdoubles( args[0].toString() ); |
| 195 | + |
| 196 | + return AFPData(result); |
94 | 197 | } |
95 | 198 | |
| 199 | +AFPData af_length( vector<AFPData> args ) { |
| 200 | + if (!args.size()) { |
| 201 | + throw AFPException( "Not enough arguments to lcase" ); |
| 202 | + } |
| 203 | + |
| 204 | + return AFPData( (long int)args[0].toString().size() ); |
| 205 | +} |
| 206 | + |
96 | 207 | AFPData af_lcase( vector<AFPData> args ) { |
97 | 208 | if (!args.size()) { |
98 | | - throw new AFPException( "Not enough arguments to lcase" ); |
| 209 | + throw AFPException( "Not enough arguments to lcase" ); |
99 | 210 | } |
100 | 211 | |
101 | | - int initlen = args[0].toString().length(); |
102 | | - UnicodeString us = UnicodeString( args[0].toString().c_str() ); |
103 | | - us = us.toLower(); |
104 | | - char* result = (char*)malloc(initlen); |
105 | | - us.extract(0, us.length(), result); |
| 212 | + string s = args[0].toString(); |
106 | 213 | |
107 | | - return AFPData(string(result)); |
| 214 | + transform( s.begin(), s.end(), s.begin(), (int(*)(int)) tolower ); |
| 215 | + |
| 216 | + return AFPData(s); |
108 | 217 | } |
109 | 218 | |
110 | 219 | string confusable_character_normalise( string orig ) { |
— | — | @@ -126,14 +235,6 @@ |
127 | 236 | return result; |
128 | 237 | } |
129 | 238 | |
130 | | -AFPData callFunction( string name, vector<AFPData> args ) { |
131 | | - if ( af_functions.find( name ) != af_functions.end() ) { |
132 | | - // Found the function |
133 | | - AFPFunction func = af_functions[name]; |
134 | | - return func(args); |
135 | | - } |
136 | | -} |
137 | | - |
138 | 239 | bool isFunction( string name ) { |
139 | 240 | return af_functions.find(name) != af_functions.end(); |
140 | 241 | } |
— | — | @@ -146,7 +247,7 @@ |
147 | 248 | ifstream eqsFile( EQUIVSET_LOC ); |
148 | 249 | |
149 | 250 | if (!eqsFile) { |
150 | | - throw new AFPException( "Unable to open equivalence sets!" ); |
| 251 | + throw AFPException( "Unable to open equivalence sets!" ); |
151 | 252 | } |
152 | 253 | |
153 | 254 | string line; |
— | — | @@ -250,5 +351,5 @@ |
251 | 352 | return ret; |
252 | 353 | } |
253 | 354 | |
254 | | - throw new AFPException("Asked for code outside of range ($codepoint)\n"); |
| 355 | + throw AFPException("Asked for code outside of range ($codepoint)\n"); |
255 | 356 | } |
Index: trunk/extensions/AbuseFilter/parser_native/aftypes.h |
— | — | @@ -40,22 +40,27 @@ |
41 | 41 | AFPData( AFPData oldData, unsigned int newType ); |
42 | 42 | AFPData( const AFPData & oldData ); |
43 | 43 | |
| 44 | + // Assignment operator |
| 45 | + AFPData & operator = (const AFPData & other); |
| 46 | + |
44 | 47 | // Specific type constructors |
45 | 48 | AFPData( long int var ); |
46 | 49 | AFPData( double var ); |
47 | 50 | AFPData( bool var ); |
48 | | - |
49 | | - unsigned int type; |
50 | | - void* value; |
51 | | - size_t size; |
52 | 51 | |
53 | 52 | bool toBool(); |
54 | 53 | string toString(); |
55 | 54 | long int toInt(); |
56 | 55 | double toFloat(); |
| 56 | + unsigned int getType(); |
57 | 57 | |
58 | 58 | protected: |
59 | | - void makeData( unsigned int type, void* value, size_t size ); |
| 59 | + void makeData( unsigned int type, void* value, size_t size, string source ); |
| 60 | + |
| 61 | + unsigned int type; |
| 62 | + void* value; |
| 63 | + size_t size; |
| 64 | + string source; |
60 | 65 | }; |
61 | 66 | |
62 | 67 | class AFPException :exception { |
Index: trunk/extensions/AbuseFilter/parser_native/afeval.h |
— | — | @@ -28,9 +28,11 @@ |
29 | 29 | |
30 | 30 | AFPToken cur; |
31 | 31 | vector<AFPToken> tokens; |
| 32 | + map<string, vector<AFPToken> > tokenCache; |
32 | 33 | unsigned int pos; |
33 | 34 | string code; |
34 | 35 | map<string,AFPData> vars; |
| 36 | + bool forceResult; |
35 | 37 | }; |
36 | 38 | |
37 | 39 | // typedef AFPData (*AFPFunction) (vector<AFPData>); |
Index: trunk/extensions/AbuseFilter/parser_native/afparser.cpp |
— | — | @@ -114,7 +114,7 @@ |
115 | 115 | } |
116 | 116 | } |
117 | 117 | |
118 | | - throw new AFPException( "Unclosed string" ); |
| 118 | + throw AFPException( "Unclosed string" ); |
119 | 119 | } |
120 | 120 | |
121 | 121 | // Operators |
— | — | @@ -127,7 +127,7 @@ |
128 | 128 | } |
129 | 129 | |
130 | 130 | if (!isValidOp( s )) { |
131 | | - throw new AFPException( "Invalid operator %s", s ); |
| 131 | + throw AFPException( "Invalid operator %s", s ); |
132 | 132 | } |
133 | 133 | |
134 | 134 | tok = AFPToken( T_OP, s, pos ); |
— | — | @@ -167,7 +167,7 @@ |
168 | 168 | return true; |
169 | 169 | } |
170 | 170 | |
171 | | - throw new AFPException( "Unrecognised token" ); |
| 171 | + throw AFPException( "Unrecognised token" ); |
172 | 172 | } |
173 | 173 | |
174 | 174 | bool isDigitOrDot( char chr ) { |
Index: trunk/extensions/AbuseFilter/parser_native/affunctions.h |
— | — | @@ -14,3 +14,7 @@ |
15 | 15 | int next_utf8_char(std::string::const_iterator & p, std::string::const_iterator & charStart, std::string::const_iterator end); |
16 | 16 | string codepointToUtf8( int codepoint ); |
17 | 17 | string confusable_character_normalise( string orig ); |
| 18 | +vector<AFPData> makeFuncArgList( AFPData arg ); |
| 19 | +AFPData callFunction( string name, AFPData arg ); |
| 20 | +string rmdoubles( string orig ); |
| 21 | +string rmspecials( string orig ); |
Index: trunk/extensions/AbuseFilter/parser_native/main.cpp |
— | — | @@ -1,26 +1,105 @@ |
2 | 2 | #include "afeval.h" |
3 | 3 | #include "affunctions.h" |
| 4 | +#include <libxml++/libxml++.h> |
| 5 | +#include <iostream> |
| 6 | +#include <string> |
| 7 | +#include <sstream> |
| 8 | +#include <map> |
4 | 9 | |
| 10 | +string filter; |
| 11 | +map<string,AFPData> vars; |
| 12 | + |
| 13 | +bool loadRequest(); |
| 14 | + |
5 | 15 | int main( int argc, char** argv ) { |
6 | 16 | FilterEvaluator e; |
| 17 | + registerBuiltinFunctions(); |
| 18 | + |
| 19 | + while (true) { |
| 20 | + e.reset(); |
| 21 | + |
| 22 | + if (!loadRequest()) |
| 23 | + continue; |
| 24 | + |
| 25 | + bool result; |
| 26 | + |
| 27 | + try { |
| 28 | + e.setVars( vars ); |
| 29 | + result = e.evaluateFilter( filter ); |
| 30 | + } catch (AFPException excep) { |
| 31 | + cerr << "EXCEPTION: " << excep.what() << endl; |
| 32 | + } |
| 33 | + |
| 34 | + cout << ( result ? "MATCH\n" : "NOMATCH\n" ); |
| 35 | +// exit(result ? 1 : 0); // Exit 0 means OK, exit 1 means match |
| 36 | + } |
| 37 | +} |
7 | 38 | |
8 | | - e.reset(); |
9 | | - bool result = false; |
| 39 | +/* REQUEST FORMAT: |
| 40 | +<request> |
| 41 | + <vars> |
| 42 | + <var key="varname">value</var> |
| 43 | + </vars> |
| 44 | + <rule> RULE CONTENT </rule> |
| 45 | +</request> */ |
| 46 | + |
| 47 | +bool loadRequest() { |
| 48 | + // Parse the XML. |
| 49 | + xmlpp::DomParser parser; |
| 50 | + parser.set_substitute_entities(); |
| 51 | + |
| 52 | + stringbuf sb(ios::out | ios::in); |
| 53 | + cin.get( sb, '\x04' ); |
| 54 | + cin.get(); |
10 | 55 | |
11 | | - registerBuiltinFunctions(); |
| 56 | + string text = sb.str(); |
12 | 57 | |
13 | | - for(int i=0;i<=1;i++) { |
14 | | - try { |
15 | | - e.setVar( "foo", AFPData(string("love")) ); |
16 | | - result = e.evaluateFilter( "specialratio('foo;') == 0.25" ); |
17 | | - } catch (AFPException* excep) { |
18 | | - printf( "Exception: %s\n", excep->what() ); |
| 58 | + // Remove the NULL |
| 59 | + for( string::iterator it = text.begin(); it!=text.end(); ++it ) { |
| 60 | + if (*it == '\x04') { text.erase(it); } |
19 | 61 | } |
| 62 | + |
| 63 | + if (text.size() < 2) { |
| 64 | + return false; |
20 | 65 | } |
21 | 66 | |
22 | | - if (result) { |
23 | | - printf("Success!\n"); |
24 | | - } else { |
25 | | - printf("OH NOES!\n"); |
| 67 | + istringstream ss(text); |
| 68 | + parser.parse_stream( ss ); |
| 69 | +// parser.parse_file( "xml.test" ); |
| 70 | + xmlpp::Node* rootNode = parser.get_document()->get_root_node(); |
| 71 | + |
| 72 | + // Get vars |
| 73 | + xmlpp::Node::NodeList varNodes = rootNode->get_children( "vars" ); |
| 74 | + |
| 75 | + if (varNodes.begin() == varNodes.end()) { |
| 76 | + throw AFPException( "Request did not contain any vars" ); |
26 | 77 | } |
| 78 | + |
| 79 | + xmlpp::Node::Node* varNode = *(varNodes.begin()); // Get the <vars> element |
| 80 | + varNodes = varNode->get_children( "var" ); // Iterate through <var> child nodes |
| 81 | + for (xmlpp::Node::NodeList::const_iterator it = varNodes.begin(); it!=varNodes.end(); ++it) { |
| 82 | + xmlpp::Element* n = dynamic_cast<xmlpp::Element*>(*it); |
| 83 | + |
| 84 | + string attName = n->get_attribute( "key" )->get_value(); |
| 85 | + if (n->has_child_text()) { |
| 86 | + string attValue = n->get_child_text()->get_content(); |
| 87 | + vars[attName] = AFPData(attValue); |
| 88 | + } else { |
| 89 | + vars[attName] = ""; |
| 90 | + } |
| 91 | + } |
| 92 | + |
| 93 | + //Get code. |
| 94 | + xmlpp::Node::NodeList codeNodes = rootNode->get_children( "rule" ); |
| 95 | + |
| 96 | + if (codeNodes.begin() == codeNodes.end()) { |
| 97 | + throw new AFPException( "Request did not contain any filter" ); |
| 98 | + } |
| 99 | + |
| 100 | + xmlpp::Node* codeNode = *(codeNodes.begin()); |
| 101 | + xmlpp::Element* codeElement = dynamic_cast<xmlpp::Element*>(codeNode); |
| 102 | + |
| 103 | + filter = codeElement->get_child_text()->get_content(); |
| 104 | + |
| 105 | + return true; |
27 | 106 | } |
Index: trunk/extensions/AbuseFilter/parser_native/afutils.cpp |
— | — | @@ -46,7 +46,7 @@ |
47 | 47 | return AFPData( (v1 || v2) && !(v1 && v2) ); |
48 | 48 | } |
49 | 49 | |
50 | | - throw new AFPException( "Invalid boolean operation." ); |
| 50 | + throw AFPException( "Invalid boolean operation." ); |
51 | 51 | } |
52 | 52 | |
53 | 53 | AFPData af_compareOp( AFPData a, AFPData b, string op ) { |
— | — | @@ -56,8 +56,8 @@ |
57 | 57 | float f1 = a.toFloat(); |
58 | 58 | float f2 = b.toFloat(); |
59 | 59 | |
60 | | - unsigned int t1 = a.type; |
61 | | - unsigned int t2 = b.type; |
| 60 | + unsigned int t1 = a.getType(); |
| 61 | + unsigned int t2 = b.getType(); |
62 | 62 | |
63 | 63 | if (op == "==") { |
64 | 64 | return AFPData( s1 == s2 ); |
— | — | @@ -76,7 +76,7 @@ |
77 | 77 | } else if (op == "<=") { |
78 | 78 | return AFPData( f1 <= f2 ); |
79 | 79 | } |
80 | | - throw new AFPException( "Invalid comparison type" ); |
| 80 | + throw AFPException( "Invalid comparison type" ); |
81 | 81 | } |
82 | 82 | |
83 | 83 | AFPData af_mulRel( AFPData a, AFPData b, string op ) { |
— | — | @@ -94,11 +94,11 @@ |
95 | 95 | return AFPData( (double)(i1 % i2) ); |
96 | 96 | } |
97 | 97 | |
98 | | - throw new AFPException( "Invalid multiplication-related operator" ); |
| 98 | + throw AFPException( "Invalid multiplication-related operator" ); |
99 | 99 | } |
100 | 100 | |
101 | 101 | AFPData af_sum( AFPData a, AFPData b ) { |
102 | | - if (a.type == D_STRING || b.type == D_STRING) { |
| 102 | + if (a.getType() == D_STRING || b.getType() == D_STRING) { |
103 | 103 | return AFPData( a.toString() + b.toString() ); |
104 | 104 | } else { |
105 | 105 | return AFPData( a.toFloat() * b.toFloat() ); |
— | — | @@ -135,5 +135,5 @@ |
136 | 136 | return AFPData( result ); |
137 | 137 | } |
138 | 138 | |
139 | | - throw new AFPException( "Unknown keyword %s", keyword ); |
| 139 | + throw AFPException( "Unknown keyword %s", keyword ); |
140 | 140 | } |
Index: trunk/extensions/AbuseFilter/parser_native/aftypes.cpp |
— | — | @@ -11,13 +11,20 @@ |
12 | 12 | |
13 | 13 | |
14 | 14 | AFPData::AFPData( unsigned int new_type, void* new_value, size_t new_size ) { |
15 | | - this->makeData( new_type, new_value, new_size ); |
| 15 | + this->makeData( new_type, new_value, new_size, "full constructor" ); |
16 | 16 | } |
17 | 17 | |
18 | | -void AFPData::makeData( unsigned int new_type, void* new_value, size_t new_size ) { |
19 | | - type = new_type; |
20 | | - value = new_value; |
21 | | - size = new_size; |
| 18 | +void AFPData::makeData( unsigned int new_type, void* new_value, size_t new_size, string new_source ) { |
| 19 | + this->type = new_type; |
| 20 | + this->value = new_value; |
| 21 | + this->size = new_size; |
| 22 | + this->source = new_source; |
| 23 | + |
| 24 | + if (this->type > DATATYPE_MAX) { |
| 25 | + // Something funky's going on |
| 26 | +// cerr << "Something funky. Trying to construct a datum with type " << this->type << ", source is " << new_source << endl; |
| 27 | + return; |
| 28 | + } |
22 | 29 | } |
23 | 30 | |
24 | 31 | AFPData::AFPData( string var ) { |
— | — | @@ -27,31 +34,33 @@ |
28 | 35 | char* last_char; |
29 | 36 | istringstream ss(var); |
30 | 37 | |
| 38 | + this->source = "string constructor"; |
| 39 | + |
31 | 40 | // Try integer |
32 | 41 | if (!!(ss >> intval) && intval != 0) { // 0.25 converts to 0, otherwise. |
33 | 42 | // Valid conversion |
34 | 43 | long int* val = new long int( intval ); |
35 | | - this->makeData( D_INTEGER, (void*)val, sizeof(long int) ); |
| 44 | + this->makeData( D_INTEGER, (void*)val, sizeof(long int), "string constructor" ); |
36 | 45 | return; |
37 | 46 | } |
38 | 47 | |
39 | 48 | if (!!(ss >> fval)) { |
40 | 49 | double* val = new double(fval); |
41 | | - this->makeData( D_FLOAT, (void*)val, sizeof(double) ); |
| 50 | + this->makeData( D_FLOAT, (void*)val, sizeof(double), "string constructor" ); |
42 | 51 | return; |
43 | 52 | } |
44 | 53 | |
45 | 54 | // Last resort |
46 | 55 | // Duplicate the string. |
47 | 56 | string* s = new string(var); |
48 | | - this->makeData( D_STRING, (void*)s, sizeof(string) ); |
| 57 | + this->makeData( D_STRING, (void*)s, sizeof(string), "string constructor" ); |
49 | 58 | return; |
50 | 59 | } |
51 | 60 | |
52 | 61 | AFPData::AFPData( AFPData old, unsigned int newType ) { |
53 | 62 | if (old.type > DATATYPE_MAX) { |
54 | 63 | // Non-existent type |
55 | | - throw new AFPException( "Given junk data" ); |
| 64 | + throw AFPException( "Given junk data" ); |
56 | 65 | } |
57 | 66 | |
58 | 67 | if (old.type == newType) { |
— | — | @@ -59,21 +68,23 @@ |
60 | 69 | |
61 | 70 | // Duplicate the contents. |
62 | 71 | if (old.type == D_STRING) { |
63 | | - newVal = (void*) new string(old.toString()); |
| 72 | + string* s = new string(); |
| 73 | + s->append(old.toString()); |
| 74 | + newVal = (void*) s; |
64 | 75 | } else if (old.type == D_INTEGER) { |
65 | 76 | newVal = (void*) new long int(old.toInt()); |
66 | 77 | } else if (old.type == D_FLOAT) { |
67 | 78 | newVal = (void*) new double(old.toFloat()); |
68 | 79 | } |
69 | 80 | |
70 | | - this->makeData( old.type, newVal, old.size ); |
| 81 | + this->makeData( old.type, newVal, old.size, "cast constructor (copy)" ); |
71 | 82 | } else if (newType == 0) { |
72 | | - this->makeData( D_NULL, NULL, 0 ); |
| 83 | + this->makeData( D_NULL, NULL, 0, "cast constructor - null" ); |
73 | 84 | return; |
74 | 85 | } else if (newType == D_INTEGER) { |
75 | 86 | if (old.type==D_FLOAT) { |
76 | 87 | long int* val = new long int(old.toFloat()); |
77 | | - this->makeData( D_INTEGER, (void*)val, sizeof(long int) ); |
| 88 | + this->makeData( D_INTEGER, (void*)val, sizeof(long int), "cast constructor - float2int" ); |
78 | 89 | return; |
79 | 90 | } else if (old.type==D_STRING) { |
80 | 91 | long int* val = new long int(); |
— | — | @@ -81,16 +92,16 @@ |
82 | 93 | |
83 | 94 | ss >> *val; |
84 | 95 | |
85 | | - this->makeData( D_INTEGER, (void*)val, sizeof(long int) ); |
| 96 | + this->makeData( D_INTEGER, (void*)val, sizeof(long int), "cast constructor - string2int" ); |
86 | 97 | return; |
87 | 98 | } else if (old.type==D_NULL) { |
88 | 99 | long int* val = new long int(0); |
89 | | - this->makeData( D_INTEGER, (void*)val, sizeof(long int) ); |
| 100 | + this->makeData( D_INTEGER, (void*)val, sizeof(long int), "cast constructor - null2int" ); |
90 | 101 | }// No other types possible |
91 | 102 | } else if (newType == D_FLOAT) { |
92 | 103 | if (old.type==D_INTEGER) { |
93 | 104 | double* val = new double(old.toInt()); |
94 | | - this->makeData( D_FLOAT, (void*)val, sizeof(double) ); |
| 105 | + this->makeData( D_FLOAT, (void*)val, sizeof(double), "cast constructor - int2float" ); |
95 | 106 | return; |
96 | 107 | } else if (old.type==D_STRING) { |
97 | 108 | double* val = new double(); |
— | — | @@ -98,11 +109,11 @@ |
99 | 110 | |
100 | 111 | ss >> *val; |
101 | 112 | |
102 | | - this->makeData( D_FLOAT, (void*)val, sizeof(double) ); |
| 113 | + this->makeData( D_FLOAT, (void*)val, sizeof(double), "cast constructor - string2float" ); |
103 | 114 | return; |
104 | 115 | } else if (old.type==D_NULL) { |
105 | 116 | double* val = new double(0); |
106 | | - this->makeData( D_FLOAT, (void*)val, sizeof(double) ); |
| 117 | + this->makeData( D_FLOAT, (void*)val, sizeof(double), "cast constructor - null2float" ); |
107 | 118 | } // No other types possible |
108 | 119 | } else if (newType == D_STRING) { |
109 | 120 | if (old.type == D_INTEGER || old.type == D_FLOAT) { |
— | — | @@ -117,39 +128,81 @@ |
118 | 129 | } |
119 | 130 | |
120 | 131 | string* str = new string(ss.str()); |
121 | | - this->makeData( D_STRING, (void*)str, sizeof(string) ); |
| 132 | + this->makeData( D_STRING, (void*)str, sizeof(string), "cast constructor - num2string" ); |
122 | 133 | return; |
123 | 134 | } else if (old.type==D_NULL) { |
124 | 135 | string* s = new string(""); |
125 | | - this->makeData( D_STRING, (void*)s, sizeof(string) ); |
| 136 | + this->makeData( D_STRING, (void*)s, sizeof(string), "cast constructor - null2string" ); |
126 | 137 | } // No other types possible |
127 | 138 | } |
128 | 139 | |
129 | 140 | if (this->type > DATATYPE_MAX) { |
130 | 141 | // Non-existent type |
131 | | - throw new AFPException( "Created junk data" ); |
| 142 | + throw AFPException( "Created junk data" ); |
132 | 143 | } |
133 | 144 | } |
134 | 145 | |
135 | | -AFPData::AFPData() { this->makeData( 0, NULL, 0 );} |
| 146 | +AFPData::AFPData() { this->source = "empty constructor"; this->makeData( 0, NULL, 0, "empty constructor" );} |
136 | 147 | |
137 | | -AFPData::~AFPData() { /*free(this->value);*/ } |
| 148 | +AFPData::~AFPData() { |
| 149 | + if (this->value == 0x0) { |
| 150 | + return; |
| 151 | + } else if (this->type > DATATYPE_MAX) { |
| 152 | + // Something funky's going on |
| 153 | +// cerr << "Something funky. Trying to destruct a datum with type " << this->type << endl; |
| 154 | + return; |
| 155 | + } |
| 156 | + |
| 157 | +// cerr << "Freeing " << this->value << " - type " << this->type << " - source " << this->source << endl; |
| 158 | + |
| 159 | + switch (this->type) { |
| 160 | + case D_FLOAT: |
| 161 | + delete (double*)this->value; |
| 162 | + break; |
| 163 | + case D_INTEGER: |
| 164 | + delete (long int*)this->value; |
| 165 | + break; |
| 166 | + case D_STRING: |
| 167 | + delete (string*)this->value; |
| 168 | + break; |
| 169 | +// default: |
| 170 | +// delete this->value; |
| 171 | + } |
| 172 | + |
| 173 | + this->value = 0x0; |
| 174 | +} |
138 | 175 | |
139 | 176 | AFPData::AFPData( const AFPData & oldData ) { |
| 177 | + this->source = "copy constructor"; |
| 178 | + |
| 179 | + if (oldData.type > DATATYPE_MAX) { |
| 180 | + // Something funky's going on |
| 181 | +// cerr << "Something funky. Trying to copy a datum with type " << oldData.type << ", source " << oldData.source << endl; |
| 182 | + return; |
| 183 | + } |
| 184 | + |
140 | 185 | // Duplicate the inner data |
141 | 186 | void* newVal; |
142 | 187 | |
143 | 188 | if (oldData.type == D_STRING) { |
144 | | - string* s = new string(""); |
145 | | - s->append(*(string*)oldData.value); |
146 | | - newVal = (void*)s; |
| 189 | + string* ival = new string(); |
| 190 | + *ival = *(string*)oldData.value; |
| 191 | + newVal = (void*)ival; |
147 | 192 | } else if (oldData.type == D_INTEGER) { |
148 | | - newVal = (void*) new long int(*(long int*)oldData.value); |
| 193 | + long int* ival = new long int; |
| 194 | + *ival = *(long int*)oldData.value; |
| 195 | + newVal = (void*)ival; |
149 | 196 | } else if (oldData.type == D_FLOAT) { |
150 | | - newVal = (void*) new double(*(double*)oldData.value); |
| 197 | + double* ival = new double; |
| 198 | + *ival = *(double*)oldData.value; |
| 199 | + newVal = (void*)ival; |
| 200 | + } else if (oldData.type == D_NULL) { |
| 201 | + newVal = 0; |
| 202 | + } else { |
| 203 | +// cerr << "Asked to copy an unknown type " << oldData.type << endl; |
151 | 204 | } |
152 | 205 | |
153 | | - this->makeData( oldData.type, newVal, oldData.size ); |
| 206 | + this->makeData( oldData.type, newVal, oldData.size, "copy constructor" ); |
154 | 207 | } |
155 | 208 | |
156 | 209 | long int AFPData::toInt() { |
— | — | @@ -189,17 +242,57 @@ |
190 | 243 | AFPData::AFPData( long int var ) { |
191 | 244 | long int* i = new long int(var); |
192 | 245 | |
193 | | - this->makeData( D_INTEGER, i, sizeof(long int) ); |
| 246 | + this->makeData( D_INTEGER, i, sizeof(long int), "int constructor" ); |
194 | 247 | } |
195 | 248 | |
196 | 249 | AFPData::AFPData( double var ) { |
197 | 250 | double* d = new double(var); |
198 | 251 | |
199 | | - this->makeData( D_FLOAT, d, sizeof(double) ); |
| 252 | + this->makeData( D_FLOAT, d, sizeof(double), "double constructor" ); |
200 | 253 | } |
201 | 254 | |
202 | 255 | AFPData::AFPData( bool var ) { |
203 | 256 | long int* i = new long int(var); |
204 | 257 | |
205 | | - this->makeData( D_INTEGER, i, sizeof(long int) ); |
| 258 | + this->makeData( D_INTEGER, i, sizeof(long int), "bool constructor" ); |
206 | 259 | } |
| 260 | + |
| 261 | +unsigned int AFPData::getType() { return this->type; } |
| 262 | + |
| 263 | +AFPData & AFPData::operator= (const AFPData & oldData) { |
| 264 | + // Protect against self-assignment |
| 265 | + if (this == &oldData) { |
| 266 | + return *this; |
| 267 | + } |
| 268 | + |
| 269 | + // NULLs and INVALID data types need no deep copy |
| 270 | + if (oldData.type > DATATYPE_MAX || oldData.type == D_NULL) { |
| 271 | + this->makeData( 0, NULL, 0, "assignment operator" ); |
| 272 | + return *this; |
| 273 | + } |
| 274 | + |
| 275 | + // Otherwise, do a proper copy. |
| 276 | + // Duplicate the inner data |
| 277 | + void* newVal; |
| 278 | + if (oldData.type == D_STRING) { |
| 279 | + string* ival = new string(); |
| 280 | + *ival = *(string*)oldData.value; |
| 281 | + newVal = (void*)ival; |
| 282 | + } else if (oldData.type == D_INTEGER) { |
| 283 | + long int* ival = new long int; |
| 284 | + *ival = *(long int*)oldData.value; |
| 285 | + newVal = (void*)ival; |
| 286 | + } else if (oldData.type == D_FLOAT) { |
| 287 | + double* ival = new double; |
| 288 | + *ival = *(double*)oldData.value; |
| 289 | + newVal = (void*)ival; |
| 290 | + } else if (oldData.type == D_NULL) { |
| 291 | + newVal = 0; |
| 292 | + } else { |
| 293 | +// cerr << "Asked to copy an unknown type " << oldData.type << endl; |
| 294 | + } |
| 295 | + |
| 296 | + this->makeData( oldData.type, newVal, oldData.size, "assignment operator" ); |
| 297 | + |
| 298 | + return *this; |
| 299 | +} |
Index: trunk/extensions/AbuseFilter/parser_native/check |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Index: trunk/extensions/AbuseFilter/parser_native/makefile |
— | — | @@ -1,12 +1,13 @@ |
| 2 | +all: check af_parser |
2 | 3 | |
| 4 | +af_parser: afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o |
| 5 | + g++ -g -o af_parser afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o -lboost_regex -lxml++-2.6 -lxml2 -lglibmm-2.4 -lgobject-2.0 -lsigc-2.0 -lglib-2.0 |
3 | 6 | |
4 | | -all: check |
5 | | - |
6 | 7 | check: afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o |
7 | | - g++ -g -o check -lboost_regex -licudata afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o |
| 8 | + g++ -g -o check -lboost_regex afeval.o affunctions.o afparser.o aftypes.o afutils.o check.o |
8 | 9 | |
9 | 10 | .cpp.o: |
10 | | - g++ -g -c $< |
| 11 | + g++ -g -c $< -I/usr/include/libxml++-2.6 -I/usr/lib/libxml++-2.6/include -I/usr/include/libxml2 -I/usr/include/glibmm-2.4 -I/usr/lib/glibmm-2.4/include -I/usr/include/sigc++-2.0 -I/usr/lib/sigc++-2.0/include -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include |
11 | 12 | |
12 | 13 | clean: |
13 | | - rm -f *.o check |
\ No newline at end of file |
| 14 | + rm -f *.o check |
Index: trunk/extensions/AbuseFilter/AbuseFilter.parser.php |
— | — | @@ -0,0 +1,665 @@ |
| 2 | +<?php |
| 3 | +if ( ! defined( 'MEDIAWIKI' ) ) |
| 4 | + die(); |
| 5 | +/** |
| 6 | +Abuse filter parser. |
| 7 | +Copyright (C) Victor Vasiliev, 2008. Based on ideas by Andrew Garrett Distributed under GNU GPL v2 terms. |
| 8 | + |
| 9 | +Types of token: |
| 10 | +* T_NONE - special-purpose token |
| 11 | +* T_BRACE - ( or ) |
| 12 | +* T_COMMA - , |
| 13 | +* T_OP - operator like + or ^ |
| 14 | +* T_NUMBER - number |
| 15 | +* T_STRING - string, in "" or '' |
| 16 | +* T_KEYWORD - keyword |
| 17 | +* T_ID - identifier |
| 18 | + |
| 19 | +Levels of parsing: |
| 20 | +* Set (S) - ==, +=, etc. |
| 21 | +* BoolOps (BO) - &, |, ^ |
| 22 | +* CompOps (CO) - ==, !=, ===, !==, >, <, >=, <= |
| 23 | +* SumRel (SR) - +, - |
| 24 | +* MulRel (MR) - *, /, % |
| 25 | +* Pow (P) - ** |
| 26 | +* BoolNeg (BN) - ! operation |
| 27 | +* SpecialOperators (SO) - in and like |
| 28 | +* Unarys (U) - plus and minus in cases like -5 or -(2 * +2) |
| 29 | +* Braces (B) - ( and ) |
| 30 | +* Functions (F) |
| 31 | +* Atom (A) - return value |
| 32 | +*/ |
| 33 | + |
| 34 | +class AFPToken { |
| 35 | + //Types of tken |
| 36 | + const TNone = 'T_NONE'; |
| 37 | + const TID = 'T_ID'; |
| 38 | + const TKeyword = 'T_KEYWORD'; |
| 39 | + const TString = 'T_STRING'; |
| 40 | + const TNumber = 'T_NUMBER'; |
| 41 | + const TOp = 'T_OP'; |
| 42 | + const TBrace = 'T_BRACE'; |
| 43 | + const TComma = 'T_COMMA'; |
| 44 | + |
| 45 | + var $type; |
| 46 | + var $value; |
| 47 | + var $pos; |
| 48 | + |
| 49 | + public function __construct( $type = self::TNone, $value = null, $pos = 0 ) { |
| 50 | + $this->type = $type; |
| 51 | + $this->value = $value; |
| 52 | + $this->pos = $pos; |
| 53 | + } |
| 54 | +} |
| 55 | + |
| 56 | +class AFPData { |
| 57 | + //Datatypes |
| 58 | + const DNumber = 'number'; //any integer or double |
| 59 | + const DString = 'string'; |
| 60 | + const DNull = 'null'; |
| 61 | + const DBool = 'bool'; |
| 62 | + |
| 63 | + var $type; |
| 64 | + var $data; |
| 65 | + |
| 66 | + public function __construct( $type = self::DNull, $val = null ) { |
| 67 | + $this->type = $type; |
| 68 | + $this->data = $val; |
| 69 | + } |
| 70 | + |
| 71 | + public static function newFromPHPVar( $var ) { |
| 72 | + if( is_string( $var ) ) |
| 73 | + return new AFPData( self::DString, $var ); |
| 74 | + elseif( is_int( $var ) || is_float( $var ) ) |
| 75 | + return new AFPData( self::DNumber, $var ); |
| 76 | + elseif( is_bool( $var ) ) |
| 77 | + return new AFPData( self::DBool, $var ); |
| 78 | + elseif( is_null( $var ) ) |
| 79 | + return new AFPData(); |
| 80 | + else |
| 81 | + throw new AFPException( "Data type " . gettype( $var ) . " is not supported by AbuseFilter" ); |
| 82 | + } |
| 83 | + |
| 84 | + public function dup() { |
| 85 | + return new AFPData( $this->type, $this->data ); |
| 86 | + } |
| 87 | + |
| 88 | + public static function castTypes( $orig, $target ) { |
| 89 | + if( $orig->type == $target ) |
| 90 | + return $orig->dup(); |
| 91 | + if( $target == self::DNull ) { |
| 92 | + return new AFPData(); |
| 93 | + } |
| 94 | + if( $target == self::DBool ) { |
| 95 | + return new AFPData( self::DBool, (bool)$orig->data ); |
| 96 | + } |
| 97 | + if( $target == self::DNumber ) { |
| 98 | + return new AFPData( self::DNumber, doubleval( $orig->data ) ); |
| 99 | + } |
| 100 | + if( $target == self::DString ) { |
| 101 | + return new AFPData( self::DString, strval( $orig->data ) ); |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + public static function boolInvert( $value ) { |
| 106 | + return new AFPData( self::DBool, !$value->toBool() ); |
| 107 | + } |
| 108 | + |
| 109 | + public static function pow( $base, $exponent ) { |
| 110 | + return new AFPData( self::DNumber, pow( $base->toNumber(), $exponent->toNumber() ) ); |
| 111 | + } |
| 112 | + |
| 113 | + public static function keywordIn( $a, $b ) { |
| 114 | + $a = $a->toString(); |
| 115 | + $b = $b->toString(); |
| 116 | + |
| 117 | + if ($a == '' || $b == '') { |
| 118 | + return new AFPData( self::DBool, false ); |
| 119 | + } |
| 120 | + |
| 121 | + return new AFPData( self::DBool, in_string( $a, $b ) ); |
| 122 | + } |
| 123 | + |
| 124 | + public static function keywordLike( $str, $regex ) { |
| 125 | + $str = $str->toString(); |
| 126 | + $regex = $regex->toString() . 'u'; //Append unicode modifier |
| 127 | + wfSuppressWarnings(); |
| 128 | + $result = preg_match( $regex, $str ); |
| 129 | + wfRestoreWarnings(); |
| 130 | + return new AFPData( self::DBool, (bool)$result ); |
| 131 | + } |
| 132 | + |
| 133 | + public static function unaryMinus( $data ) { |
| 134 | + return new AFPData( self::DNumber, $data->toNumber() ); |
| 135 | + } |
| 136 | + |
| 137 | + public static function boolOp( $a, $b, $op ) { |
| 138 | + $a = $a->toBool(); |
| 139 | + $b = $b->toBool(); |
| 140 | + if( $op == '|' ) |
| 141 | + return new AFPData( self::DBool, $a || $b ); |
| 142 | + if( $op == '&' ) |
| 143 | + return new AFPData( self::DBool, $a && $b ); |
| 144 | + if( $op == '^' ) |
| 145 | + return new AFPData( self::DBool, $a xor $b ); |
| 146 | + throw new AFPException( "Invalid boolean operation: {$op}" ); |
| 147 | + } |
| 148 | + |
| 149 | + public static function compareOp( $a, $b, $op ) { |
| 150 | + if( $op == '==' ) |
| 151 | + return new AFPData( self::DBool, $a->toString() === $b->toString() ); |
| 152 | + if( $op == '!=' ) |
| 153 | + return new AFPData( self::DBool, $a->toString() !== $b->toString() ); |
| 154 | + if( $op == '===' ) |
| 155 | + return new AFPData( self::DBool, $a->data === $b->data && $a->type == $b->type ); |
| 156 | + if( $op == '!==' ) |
| 157 | + return new AFPData( self::DBool, $a->data !== $b->data || $a->type != $b->type ); |
| 158 | + $a = $a->toString(); |
| 159 | + $b = $b->toString(); |
| 160 | + if( $op == '>' ) |
| 161 | + return new AFPData( self::DBool, $a > $b ); |
| 162 | + if( $op == '<' ) |
| 163 | + return new AFPData( self::DBool, $a < $b ); |
| 164 | + if( $op == '>=' ) |
| 165 | + return new AFPData( self::DBool, $a >= $b ); |
| 166 | + if( $op == '<=' ) |
| 167 | + return new AFPData( self::DBool, $a <= $b ); |
| 168 | + throw new AFPException( "Invalid comprasion operation: {$op}" ); |
| 169 | + } |
| 170 | + |
| 171 | + public static function mulRel( $a, $b, $op ) { |
| 172 | + $a = $a->toNumber(); |
| 173 | + $b = $b->toNumber(); |
| 174 | + if( $op == '*' ) |
| 175 | + return new AFPData( self::DNumber, $a * $b ); |
| 176 | + if( $op == '/' ) |
| 177 | + return new AFPData( self::DNumber, $a / $b ); |
| 178 | + if( $op == '%' ) |
| 179 | + return new AFPData( self::DNumber, $a % $b ); |
| 180 | + throw new AFPException( "Invalid multiplication-related operation: {$op}" ); |
| 181 | + } |
| 182 | + |
| 183 | + public static function sum( $a, $b ) { |
| 184 | + if( $a->type == self::DString || $b->type == self::DString ) |
| 185 | + return new AFPData( self::DString, $a->toString() . $b->toString() ); |
| 186 | + else |
| 187 | + return new AFPData( self::DNumber, $a->toNumber() + $b->toNumber() ); |
| 188 | + } |
| 189 | + |
| 190 | + public static function sub( $a, $b ) { |
| 191 | + return new AFPData( self::DNumber, $a->toNumber() - $b->toNumber() ); |
| 192 | + } |
| 193 | + |
| 194 | + /** Convert shorteners */ |
| 195 | + public function toBool() { |
| 196 | + return self::castTypes( $this, self::DBool )->data; |
| 197 | + } |
| 198 | + |
| 199 | + public function toString() { |
| 200 | + return self::castTypes( $this, self::DString )->data; |
| 201 | + } |
| 202 | + |
| 203 | + public function toNumber() { |
| 204 | + return self::castTypes( $this, self::DNumber )->data; |
| 205 | + } |
| 206 | +} |
| 207 | + |
| 208 | +class AFPException extends MWException {} |
| 209 | + |
| 210 | +class AbuseFilterParser { |
| 211 | + var $mParams, $mVars, $mCode, $mTokens, $mPos, $mCur; |
| 212 | + |
| 213 | + static $mFunctions = array( |
| 214 | + 'lc' => 'funcLc', |
| 215 | + 'len' => 'funcLen', |
| 216 | + 'norm' => 'funcNorm', |
| 217 | + 'simplenorm' => 'funcSimpleNorm', |
| 218 | + 'specialratio' => 'funcSpecialRatio', |
| 219 | + ); |
| 220 | + static $mOps = array( |
| 221 | + '!', '*', '**', '/', '+', '-', '%', '&', '|', '^', |
| 222 | + '<', '>', '>=', '<=', '==', '!=', '=', '===', '!==', |
| 223 | + ); |
| 224 | + static $mKeywords = array( |
| 225 | + 'in', 'like', 'true', 'false', 'null', |
| 226 | + ); |
| 227 | + |
| 228 | + static $parserCache = array(); |
| 229 | + |
| 230 | + static $funcCache = array(); |
| 231 | + |
| 232 | + public function __construct() { |
| 233 | + $this->resetState(); |
| 234 | + } |
| 235 | + |
| 236 | + public function resetState() { |
| 237 | + $this->mParams = array(); |
| 238 | + $this->mCode = ''; |
| 239 | + $this->mTokens = array(); |
| 240 | + $this->mVars = array(); |
| 241 | + $this->mPos = 0; |
| 242 | + } |
| 243 | + |
| 244 | + public function setVar( $name, $var ) { |
| 245 | + $this->mVars[$name] = AFPData::newFromPHPVar( $var ); |
| 246 | + } |
| 247 | + |
| 248 | + public function setVars( $vars ) { |
| 249 | + wfProfileIn( __METHOD__ ); |
| 250 | + foreach( $vars as $name => $var ) { |
| 251 | + $this->setVar( $name, $var ); |
| 252 | + } |
| 253 | + wfProfileOut( __METHOD__ ); |
| 254 | + } |
| 255 | + |
| 256 | + protected function move( $shift = +1 ) { |
| 257 | + $old = $this->mPos; |
| 258 | + $this->mPos += $shift; |
| 259 | + if( $this->mPos >= 0 && $this->mPos < count( $this->mTokens ) ) { |
| 260 | + $this->mCur = $this->mTokens[$this->mPos]; |
| 261 | + return true; |
| 262 | + } |
| 263 | + else { |
| 264 | + $this->mPos = $old; |
| 265 | + return false; |
| 266 | + } |
| 267 | + } |
| 268 | + |
| 269 | + public function parse( $code ) { |
| 270 | + wfProfileIn( __METHOD__ ); |
| 271 | + $this->mCode = $code; |
| 272 | + $this->mTokens = self::parseTokens( $code ); |
| 273 | + $this->mPos = 0; |
| 274 | + $this->mCur = $this->mTokens[0]; |
| 275 | + $result = new AFPData(); |
| 276 | + $this->doLevelEntry( $result ); |
| 277 | + wfProfileOut( __METHOD__ ); |
| 278 | + return $result->toBool(); |
| 279 | + } |
| 280 | + |
| 281 | + /* Levels */ |
| 282 | + |
| 283 | + /** Handles unexpected characters after the expression */ |
| 284 | + protected function doLevelEntry( &$result ) { |
| 285 | + $this->doLevelSet( $result ); |
| 286 | + if( $this->mCur->type != AFPToken::TNone ) { |
| 287 | + throw new AFPException( "Unexpected {$this->mCur->type} at char {$this->mCur->pos}" ); |
| 288 | + } |
| 289 | + } |
| 290 | + |
| 291 | + /** Handles "=" operator */ |
| 292 | + protected function doLevelSet( &$result ) { |
| 293 | + wfProfileIn( __METHOD__ ); |
| 294 | + if( $this->mCur->type == AFPToken::TID ) { |
| 295 | + $varname = $this->mCur->value; |
| 296 | + $this->move(); |
| 297 | + if( $this->mCur->type == AFPToken::TOp && $this->mCur->value == '=' ) { |
| 298 | + $this->move(); |
| 299 | + $this->doLevelSet( $result ); |
| 300 | + $this->mVars[$varname] = $result->dup(); |
| 301 | + return; |
| 302 | + } |
| 303 | + $this->move( -1 ); |
| 304 | + } |
| 305 | + wfProfileOut( __METHOD__ ); |
| 306 | + $this->doLevelBoolOps( $result ); |
| 307 | + } |
| 308 | + |
| 309 | + protected function doLevelBoolOps( &$result ) { |
| 310 | + $this->doLevelCompares( $result ); |
| 311 | + $ops = array( '&', '|', '^' ); |
| 312 | + while( $this->mCur->type == AFPToken::TOp && in_array( $this->mCur->value, $ops ) ) { |
| 313 | + $op = $this->mCur->value; |
| 314 | + $this->move(); |
| 315 | + $r2 = new AFPData(); |
| 316 | + $this->doLevelCompares( $r2 ); |
| 317 | + wfProfileIn( __METHOD__ ); |
| 318 | + $result = AFPData::boolOp( $result, $r2, $op ); |
| 319 | + wfProfileOut( __METHOD__ ); |
| 320 | + } |
| 321 | + } |
| 322 | + |
| 323 | + protected function doLevelCompares( &$result ) { |
| 324 | + $this->doLevelMulRels( &$result ); |
| 325 | + $ops = array( '==', '===', '!=', '!==', '<', '>', '<=', '>=' ); |
| 326 | + while( $this->mCur->type == AFPToken::TOp && in_array( $this->mCur->value, $ops ) ) { |
| 327 | + $op = $this->mCur->value; |
| 328 | + $this->move(); |
| 329 | + $r2 = new AFPData(); |
| 330 | + $this->doLevelMulRels( $r2 ); |
| 331 | + wfProfileIn( __METHOD__ ); |
| 332 | + $result = AFPData::compareOp( $result, $r2, $op ); |
| 333 | + wfProfileOut( __METHOD__ ); |
| 334 | + } |
| 335 | + } |
| 336 | + |
| 337 | + protected function doLevelMulRels( &$result ) { |
| 338 | + $this->doLevelSumRels( &$result ); |
| 339 | + wfProfileIn( __METHOD__ ); |
| 340 | + $ops = array( '*', '/', '%' ); |
| 341 | + while( $this->mCur->type == AFPToken::TOp && in_array( $this->mCur->value, $ops ) ) { |
| 342 | + $op = $this->mCur->value; |
| 343 | + $this->move(); |
| 344 | + $r2 = new AFPData(); |
| 345 | + $this->doLevelSumRels( $r2 ); |
| 346 | + $result = AFPData::mulRel( $result, $r2, $op ); |
| 347 | + } |
| 348 | + wfProfileOut( __METHOD__ ); |
| 349 | + } |
| 350 | + |
| 351 | + protected function doLevelSumRels( &$result ) { |
| 352 | + $this->doLevelPow( &$result ); |
| 353 | + wfProfileIn( __METHOD__ ); |
| 354 | + $ops = array( '+', '-' ); |
| 355 | + while( $this->mCur->type == AFPToken::TOp && in_array( $this->mCur->value, $ops ) ) { |
| 356 | + $op = $this->mCur->value; |
| 357 | + $this->move(); |
| 358 | + $r2 = new AFPData(); |
| 359 | + $this->doLevelPow( $r2 ); |
| 360 | + if( $op == '+' ) |
| 361 | + $result = AFPData::sum( $result, $r2 ); |
| 362 | + if( $op == '-' ) |
| 363 | + $result = AFPData::sub( $result, $r2 ); |
| 364 | + } |
| 365 | + wfProfileOut( __METHOD__ ); |
| 366 | + } |
| 367 | + |
| 368 | + protected function doLevelPow( &$result ) { |
| 369 | + $this->doLevelBoolInvert( $result ); |
| 370 | + wfProfileIn( __METHOD__ ); |
| 371 | + while( $this->mCur->type == AFPToken::TOp && $this->mCur->value == '**' ) { |
| 372 | + $this->move(); |
| 373 | + $expanent = new AFPData(); |
| 374 | + $this->doLevelBoolInvert( $expanent ); |
| 375 | + $result = AFPData::pow( $result, $expanent ); |
| 376 | + } |
| 377 | + wfProfileOut( __METHOD__ ); |
| 378 | + } |
| 379 | + |
| 380 | + protected function doLevelBoolInvert( &$result ) { |
| 381 | + if( $this->mCur->type == AFPToken::TOp && $this->mCur->value == '!' ) { |
| 382 | + $this->move(); |
| 383 | + $this->doLevelSpecialWords( $result ); |
| 384 | + wfProfileIn( __METHOD__ ); |
| 385 | + $result = AFPData::boolInvert( $result ); |
| 386 | + wfProfileOut( __METHOD__ ); |
| 387 | + } else { |
| 388 | + $this->doLevelSpecialWords( $result ); |
| 389 | + } |
| 390 | + } |
| 391 | + |
| 392 | + protected function doLevelSpecialWords( &$result ) { |
| 393 | + $this->doLevelUnarys( $result ); |
| 394 | + $specwords = array( 'in', 'like' ); |
| 395 | + if( $this->mCur->type == AFPToken::TKeyword && in_array( $this->mCur->value, $specwords ) ) { |
| 396 | + $func = 'keyword' . ucfirst( $this->mCur->value ); |
| 397 | + $this->move(); |
| 398 | + $r2 = new AFPData(); |
| 399 | + $this->doLevelUnarys( $r2 ); |
| 400 | + wfProfileIn( __METHOD__ ); |
| 401 | + wfProfileIn( __METHOD__."-$func" ); |
| 402 | + $result = AFPData::$func( $result, $r2 ); |
| 403 | + wfProfileOut( __METHOD__."-$func" ); |
| 404 | + wfProfileOut( __METHOD__ ); |
| 405 | + } |
| 406 | + } |
| 407 | + |
| 408 | + protected function doLevelUnarys( &$result ) { |
| 409 | + $op = $this->mCur->value; |
| 410 | + if( $this->mCur->type == AFPToken::TOp && ( $op == "+" || $op == "-" ) ) { |
| 411 | + $this->move(); |
| 412 | + $this->doLevelBraces( $result ); |
| 413 | + wfProfileIn( __METHOD__ ); |
| 414 | + if( $op == '-' ) { |
| 415 | + $result = AFPData::unaryMinus( $result ); |
| 416 | + } |
| 417 | + wfProfileOut( __METHOD__ ); |
| 418 | + } else { |
| 419 | + $this->doLevelBraces( $result ); |
| 420 | + } |
| 421 | + } |
| 422 | + |
| 423 | + protected function doLevelBraces( &$result ) { |
| 424 | + if( $this->mCur->type == AFPToken::TBrace && $this->mCur->value == '(' ) { |
| 425 | + $this->move(); |
| 426 | + $this->doLevelSet( $result ); |
| 427 | + if( !($this->mCur->type == AFPToken::TBrace && $this->mCur->value == ')') ) |
| 428 | + throw new AFPException( "Expected ) at char {$this->mCur->pos}" ); |
| 429 | + $this->move(); |
| 430 | + } else { |
| 431 | + $this->doLevelFunction( $result ); |
| 432 | + } |
| 433 | + } |
| 434 | + |
| 435 | + protected function doLevelFunction( &$result ) { |
| 436 | + if( $this->mCur->type == AFPToken::TID && isset( self::$mFunctions[$this->mCur->value] ) ) { |
| 437 | + wfProfileIn( __METHOD__ ); |
| 438 | + $func = self::$mFunctions[$this->mCur->value]; |
| 439 | + $this->move(); |
| 440 | + if( $this->mCur->type != AFPToken::TBrace || $this->mCur->value != '(' ) |
| 441 | + throw new AFPEexception( "Expected ( at char {$this->mCur->value}" ); |
| 442 | + wfProfileIn( __METHOD__."-loadargs" ); |
| 443 | + $args = array(); |
| 444 | + if( $this->mCur->type != AFPToken::TBrace || $this->mCur->value != ')' ) |
| 445 | + do { |
| 446 | + $this->move(); |
| 447 | + $r = new AFPData(); |
| 448 | + try { |
| 449 | + $this->doLevelAtom( $r ); |
| 450 | + } catch (AFPException $e) { |
| 451 | + $this->move( -1 ); |
| 452 | + $this->doLevelSet( $r ); |
| 453 | + } |
| 454 | + $args[] = $r; |
| 455 | + } while( $this->mCur->type == AFPToken::TComma ); |
| 456 | + if( $this->mCur->type != AFPToken::TBrace || $this->mCur->value != ')' ) { |
| 457 | + throw new AFPException( "Expected ) at char {$this->mCur->pos}" ); |
| 458 | + } |
| 459 | + wfProfileOut( __METHOD__."-loadargs" ); |
| 460 | + |
| 461 | + wfProfileIn( __METHOD__."-$func" ); |
| 462 | + |
| 463 | + $funcHash = md5($func.serialize($args)); |
| 464 | + |
| 465 | + if (isset(self::$funcCache[$funcHash])) { |
| 466 | + $result = self::$funcCache[$funcHash]; |
| 467 | + } else { |
| 468 | + $result = self::$funcCache[$funcHash] = $this->$func( $args ); |
| 469 | + } |
| 470 | + |
| 471 | + if (count(self::$funcCache) > 1000) { |
| 472 | + self::$funcCache = array(); |
| 473 | + } |
| 474 | + |
| 475 | + wfProfileOut( __METHOD__."-$func" ); |
| 476 | + |
| 477 | + $this->move(); |
| 478 | + wfProfileOut( __METHOD__ ); |
| 479 | + } else { |
| 480 | + $this->doLevelAtom( $result ); |
| 481 | + } |
| 482 | + } |
| 483 | + |
| 484 | + protected function doLevelAtom( &$result ) { |
| 485 | + wfProfileIn( __METHOD__ ); |
| 486 | + $tok = $this->mCur->value; |
| 487 | + switch( $this->mCur->type ) { |
| 488 | + case AFPToken::TID: |
| 489 | + if( isset( $this->mVars[$tok] ) ) { |
| 490 | + $result = $this->mVars[$tok]; |
| 491 | + } else { |
| 492 | + $result = new AFPData(); |
| 493 | + } |
| 494 | + break; |
| 495 | + case AFPToken::TString: |
| 496 | + $result = new AFPData( AFPData::DString, $tok ); |
| 497 | + break; |
| 498 | + case AFPToken::TNumber: |
| 499 | + $result = new AFPData( AFPData::DNumber, $tok ); |
| 500 | + break; |
| 501 | + case AFPToken::TKeyword: |
| 502 | + if( $tok == "true" ) |
| 503 | + $result = new AFPData( AFPData::DBool, true ); |
| 504 | + elseif( $tok == "false" ) |
| 505 | + $result = new AFPData( AFPData::DBool, false ); |
| 506 | + elseif( $tok == "null" ) |
| 507 | + $result = new AFPData(); |
| 508 | + else |
| 509 | + throw new AFPException( "Unexpected {$this->mCur->type} at char {$this->mCur->pos}" ); |
| 510 | + break; |
| 511 | + case AFPToken::TBrace: |
| 512 | + if( $this->mCur->value == ')' ) |
| 513 | + return; // Handled at the entry level |
| 514 | + default: |
| 515 | + throw new AFPException( "Unexpected {$this->mCur->type} at char {$this->mCur->pos}" ); |
| 516 | + } |
| 517 | + $this->move(); |
| 518 | + wfProfileOut( __METHOD__ ); |
| 519 | + } |
| 520 | + |
| 521 | + /* End of levels */ |
| 522 | + |
| 523 | + public static function parseTokens( $code ) { |
| 524 | + $r = array(); |
| 525 | + $len = strlen( $code ); |
| 526 | + $hash = md5(trim($code)); |
| 527 | + |
| 528 | + if (isset(self::$parserCache[$hash])) { |
| 529 | + return self::$parserCache[$hash]; |
| 530 | + } |
| 531 | + |
| 532 | + while( $tok = self::nextToken( $code, $len ) ) { |
| 533 | + list( $val, $type, $code, $pos ) = $tok; |
| 534 | + $r[] = new AFPToken( $type, $val, $pos ); |
| 535 | + if( $type == AFPToken::TNone ) |
| 536 | + break; |
| 537 | + } |
| 538 | + return self::$parserCache[$hash] = $r; |
| 539 | + } |
| 540 | + |
| 541 | + protected static function nextToken( $code, $len ) { |
| 542 | + $tok = ''; |
| 543 | + if( strlen( $code ) == 0 ) return array( '', AFPToken::TNone, $code, $len ); |
| 544 | + while( ctype_space( $code[0] ) ) |
| 545 | + $code = substr( $code, 1 ); |
| 546 | + $pos = $len - strlen( $code ); |
| 547 | + if( strlen( $code ) == 0 ) return array( '', AFPToken::TNone, $code, $pos ); |
| 548 | + if( $code[0] == ',' ) |
| 549 | + return array( ',', AFPToken::TComma, substr( $code, 1 ), $pos ); |
| 550 | + if( $code[0] == '(' or $code[0] == ')' ) |
| 551 | + return array( $code[0], AFPToken::TBrace, substr( $code, 1 ), $pos ); |
| 552 | + if( $code[0] == '"' || $code[0] == "'" ) { |
| 553 | + $type = $code[0]; |
| 554 | + $code = substr( $code, 1 ); |
| 555 | + while( strlen( $code ) != 0 ) { |
| 556 | + if( $code[0] == $type ) { |
| 557 | + return array( $tok, AFPToken::TString, substr( $code, 1 ), $pos ); |
| 558 | + } |
| 559 | + if( $code[0] == '\\' ) { |
| 560 | + if( $code[1] == '\\' ) |
| 561 | + $tok .= '\\'; |
| 562 | + elseif( $code[1] == $type ) |
| 563 | + $tok .= $type; |
| 564 | + elseif( $code[1] == 'n' ) |
| 565 | + $tok .= "\n"; |
| 566 | + elseif( $code[1] == 'r' ) |
| 567 | + $tok .= "\r"; |
| 568 | + elseif( $code[1] == 't' ) |
| 569 | + $tok .= "\t"; |
| 570 | + else |
| 571 | + $tok .= $code[1]; |
| 572 | + $code = substr( $code, 2 ); |
| 573 | + } else { |
| 574 | + $tok .= $code[0]; |
| 575 | + $code = substr( $code, 1 ); |
| 576 | + } |
| 577 | + } |
| 578 | + throw new AFPException( "Unclosed string begining at char $pos" ); |
| 579 | + } |
| 580 | + if( ctype_punct( $code[0] ) ) { |
| 581 | + $tok .= $code[0]; |
| 582 | + $code = substr( $code, 1 ); |
| 583 | + while( strlen( $code ) != 0 && ctype_punct( $code[0] ) ) { |
| 584 | + $tok .= $code[0]; |
| 585 | + $code = substr( $code, 1 ); |
| 586 | + } |
| 587 | + if( !in_array( $tok, self::$mOps ) ) |
| 588 | + throw new AFPException( "Invalid operator: {$tok} (at char $pos)" ); |
| 589 | + return array( $tok, AFPToken::TOp, $code, $pos ); |
| 590 | + } |
| 591 | + if( ctype_digit( $code[0] ) ) { |
| 592 | + $tok .= $code[0]; |
| 593 | + $code = substr( $code, 1 ); |
| 594 | + while( strlen( $code ) != 0 && self::isDigitOrDot( $code[0] ) ) { |
| 595 | + $tok .= $code[0]; |
| 596 | + $code = substr( $code, 1 ); |
| 597 | + } |
| 598 | + return array( in_string( '.', $tok ) ? doubleval( $tok ) : intval( $tok ), AFPToken::TNumber, $code, $pos ); |
| 599 | + } |
| 600 | + if( self::isValidIdSymbol( $code[0] ) ) { |
| 601 | + while( strlen( $code ) != 0 && self::isValidIdSymbol( $code[0] ) ) { |
| 602 | + $tok .= $code[0]; |
| 603 | + $code = substr( $code, 1 ); |
| 604 | + } |
| 605 | + $type = in_array( $tok, self::$mKeywords ) ? AFPToken::TKeyword : AFPToken::TID; |
| 606 | + return array( $tok, $type, $code, $pos ); |
| 607 | + } |
| 608 | + throw new AFPException( "Unrecognized token \"{$code[0]}\" at char $pos" ); |
| 609 | + } |
| 610 | + |
| 611 | + protected static function isDigitOrDot( $chr ) { |
| 612 | + return ctype_digit( $chr ) || $chr == '.'; |
| 613 | + } |
| 614 | + |
| 615 | + protected static function isValidIdSymbol( $chr ) { |
| 616 | + return ctype_alnum( $chr ) || $chr == '_'; |
| 617 | + } |
| 618 | + |
| 619 | + //Built-in functions |
| 620 | + protected function funcLc( $args ) { |
| 621 | + global $wgContLang; |
| 622 | + if( count( $args ) < 1 ) |
| 623 | + throw new AFPExpection( "No params passed to lc()" ); |
| 624 | + $s = $args[0]->toString(); |
| 625 | + return new AFPData( AFPData::DString, $wgContLang->lc( $s ) ); |
| 626 | + } |
| 627 | + |
| 628 | + protected function funcLen( $args ) { |
| 629 | + if( count( $args ) < 1 ) |
| 630 | + throw new AFPExpection( "No params passed to len()" ); |
| 631 | + $s = $args[0]->toString(); |
| 632 | + return new AFPData( AFPData::DNumber, mb_strlen( $s, 'utf-8' ) ); |
| 633 | + } |
| 634 | + |
| 635 | + protected function funcNorm( $args ) { |
| 636 | + if( count( $args ) < 1 ) |
| 637 | + throw new AFPExpection( "No params passed to norm()" ); |
| 638 | + $s = $args[0]->toString(); |
| 639 | + return new AFPData( AFPData::DString, AbuseFilter::normalise( $s ) ); |
| 640 | + } |
| 641 | + |
| 642 | + protected function funcSimpleNorm( $args ) { |
| 643 | + if( count( $args ) < 1 ) |
| 644 | + throw new AFPExpection( "No params passed to simplenorm()" ); |
| 645 | + $s = $args[0]->toString(); |
| 646 | + |
| 647 | + $s = preg_replace( '/[\d\W]+/', '', $s ); |
| 648 | + $s = strtolower( $value ); |
| 649 | + return new AFPData( AFPData::DString, $s ); |
| 650 | + } |
| 651 | + |
| 652 | + protected function funcSpecialRatio( $args ) { |
| 653 | + if( count( $args ) < 1 ) |
| 654 | + throw new AFPExpection( "No params passed to simplenorm()" ); |
| 655 | + $s = $args[0]->toString(); |
| 656 | + |
| 657 | + if (!strlen($s)) { |
| 658 | + return new AFPData( AFPData::DNumber, 0 ); |
| 659 | + } |
| 660 | + |
| 661 | + $specialsonly = preg_replace('/\w/', '', $s ); |
| 662 | + $val = (strlen($specialsonly) / strlen($s)); |
| 663 | + |
| 664 | + return new AFPData( AFPData::DNumber, $val ); |
| 665 | + } |
| 666 | +} |
Property changes on: trunk/extensions/AbuseFilter/AbuseFilter.parser.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 667 | + native |
Added: svn:executable |
2 | 668 | + * |
Index: trunk/extensions/AbuseFilter/AbuseFilter.php |
— | — | @@ -28,6 +28,7 @@ |
29 | 29 | |
30 | 30 | $wgAutoloadClasses[ 'AbuseFilter' ] = "$dir/AbuseFilter.class.php"; |
31 | 31 | $wgAutoloadClasses[ 'AbuseFilterParser' ] = "$dir/AbuseFilter.parser.php"; |
| 32 | +$wgAutoloadClasses[ 'AbuseFilterParserNative' ] = "$dir/AbuseFilter.nativeparser.php"; |
32 | 33 | $wgAutoloadClasses[ 'AbuseFilterHooks' ] = "$dir/AbuseFilter.hooks.php"; |
33 | 34 | $wgAutoloadClasses['SpecialAbuseLog'] = "$dir/SpecialAbuseLog.php"; |
34 | 35 | $wgAutoloadClasses['SpecialAbuseFilter'] = "$dir/SpecialAbuseFilter.php"; |
— | — | @@ -54,4 +55,8 @@ |
55 | 56 | |
56 | 57 | // Disable filters if they match more than X edits, constituting more than Y% of the last Z edits |
57 | 58 | $wgAbuseFilterEmergencyDisableThreshold = 0.05; |
58 | | -$wgAbuseFilterEmergencyDisableCount = 5; |
\ No newline at end of file |
| 59 | +$wgAbuseFilterEmergencyDisableCount = 5; |
| 60 | + |
| 61 | +// Abuse filter parser class |
| 62 | +$wgAbuseFilterParserClass = 'AbuseFilterParserNative'; |
| 63 | +$wgAbuseFilterNativeParser = "$dir/parser_native/af_parser"; |
\ No newline at end of file |
Index: trunk/extensions/AbuseFilter/AbuseFilter.nativeparser.php |
— | — | @@ -0,0 +1,86 @@ |
| 2 | +<?php |
| 3 | +if ( ! defined( 'MEDIAWIKI' ) ) |
| 4 | + die(); |
| 5 | + |
| 6 | +class AbuseFilterParserNative { |
| 7 | + var $mVars; |
| 8 | + var $mProcess,$mPipes; |
| 9 | + |
| 10 | + public function __destruct() { |
| 11 | + foreach( $this->mPipes as $pipe ) { |
| 12 | + fclose($pipe); |
| 13 | + } |
| 14 | + |
| 15 | + proc_close( $this->mProcess ); |
| 16 | + } |
| 17 | + |
| 18 | + public function setVar( $name, $var ) { |
| 19 | + $this->mVars[$name] = $var; |
| 20 | + } |
| 21 | + |
| 22 | + public function setVars( $vars ) { |
| 23 | + foreach( $vars as $name => $var ) { |
| 24 | + $this->setVar( $name, $var ); |
| 25 | + } |
| 26 | + } |
| 27 | + |
| 28 | + public function getNativeParser() { |
| 29 | + global $wgAbuseFilterNativeParser; |
| 30 | + |
| 31 | + if (!is_resource($this->mProcess)) { |
| 32 | + $this->mPipes = array(); |
| 33 | + $descriptorspec = array( |
| 34 | + 0 => array( 'pipe', 'r' ), |
| 35 | + 1 => array( 'pipe', 'w' ) |
| 36 | + ); |
| 37 | + |
| 38 | + $this->mProcess = proc_open( $wgAbuseFilterNativeParser, $descriptorspec, $this->mPipes ); |
| 39 | + |
| 40 | + if (!is_resource($this->mProcess)) { |
| 41 | + throw new MWException( "Error using native parser" ); |
| 42 | + } |
| 43 | + |
| 44 | + return $this->mPipes; |
| 45 | + } |
| 46 | + |
| 47 | + return $this->mPipes; |
| 48 | + } |
| 49 | + |
| 50 | + public function parse( $filter ) { |
| 51 | + $request = $this->generateXMLRequest( $filter ); |
| 52 | + |
| 53 | + $pipes = $this->getNativeParser(); |
| 54 | + |
| 55 | + if (is_array($pipes)) { |
| 56 | + fwrite($pipes[0], $request); |
| 57 | + fwrite($pipes[0], "\x04"); |
| 58 | + fflush($pipes[0]); |
| 59 | + |
| 60 | + // Get response |
| 61 | + $response = trim(fgets( $pipes[1] )); |
| 62 | + |
| 63 | + if ($response == "MATCH") { |
| 64 | + return true; |
| 65 | + } elseif ($response == "NOMATCH") { |
| 66 | + return false; |
| 67 | + } else { |
| 68 | + throw new MWException( "Unknown output from native parser: $response" ); |
| 69 | + } |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + protected function generateXMLRequest( $filter ) { |
| 74 | + // Write vars |
| 75 | + $vars = ''; |
| 76 | + foreach( $this->mVars as $key => $value ) { |
| 77 | + $vars .= Xml::element( 'var', array( 'key' => $key ), utf8_encode($value) ); |
| 78 | + } |
| 79 | + $vars = Xml::tags( 'vars', null, $vars ); |
| 80 | + |
| 81 | + $code = Xml::element( 'rule', null, utf8_encode($filter) ); |
| 82 | + |
| 83 | + $request = Xml::tags( 'request', null, $vars . $code ); |
| 84 | + |
| 85 | + return $request; |
| 86 | + } |
| 87 | +} |
\ No newline at end of file |
Property changes on: trunk/extensions/AbuseFilter/AbuseFilter.nativeparser.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 88 | + native |