r38305 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r38304‎ | r38305 | r38306 >
Date:16:28, 31 July 2008
Author:werdna
Status:old
Tags:
Comment:
AbuseFilter native parser:
* Revert r38187 for now:
** Introduced a memory leak.
** Used an unnecessary library. The point is taken, and this will be fixed in a few days (using glibc instead).
* Fix logic error in boolean ops.
* Integrate with the PHP abuse filter using AbuseFilterParserNative class.
* Fix memory leak.
* Fix a few miscellaneous bugs
Modified paths:
  • /trunk/extensions/AbuseFilter/AbuseFilter.nativeparser.php (added) (history)
  • /trunk/extensions/AbuseFilter/AbuseFilter.parser.php (added) (history)
  • /trunk/extensions/AbuseFilter/AbuseFilter.php (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/afeval.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/afeval.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/affunctions.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/affunctions.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/afparser.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/aftypes.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/aftypes.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/afutils.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/check (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/main.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/makefile (modified) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/parser_native/afeval.cpp
@@ -7,6 +7,7 @@
88 this->cur = AFPToken();
99 this->pos = 0;
1010 this->code = "";
 11+ this->forceResult = false;
1112 }
1213
1314 void FilterEvaluator::setVar( string key, AFPData value ) {
@@ -21,8 +22,18 @@
2223
2324 bool FilterEvaluator::evaluateFilter( string code ) {
2425 this->code = code;
25 - this->tokens = af_parse( code );
2626 this->pos = 0;
 27+
 28+ if (this->tokenCache.find(code) != this->tokenCache.end()) {
 29+ this->tokens = this->tokenCache[code];
 30+ } else {
 31+ this->tokenCache[code] = this->tokens = af_parse( code );
 32+ }
 33+
 34+ if (this->tokenCache.size() > 100) {
 35+ this->tokenCache.clear();
 36+ }
 37+
2738 this->cur = this->tokens[0];
2839
2940 AFPData result;
@@ -50,7 +61,7 @@
5162 this->doLevelSet( result );
5263
5364 if (this->cur.type != T_NONE) {
54 - throw new AFPException( "Unexpected tokens at end." );
 65+ throw AFPException( "Unexpected tokens at end." );
5566 }
5667 }
5768
@@ -116,6 +127,8 @@
117128 }
118129
119130 void FilterEvaluator::doLevelBoolOps( AFPData* result ) {
 131+ bool setForce = false;
 132+
120133 this->doLevelCompares( result );
121134
122135 vector<string> ops = getOpsForType( "bool" );
@@ -124,9 +137,27 @@
125138 string op = this->cur.value;
126139 this->move();
127140 AFPData r2;
 141+
 142+ if (!this->forceResult && op == "&" && !result->toBool()) {
 143+ setForce = true;
 144+ this->forceResult = true;
 145+ } else if (!this->forceResult && op == "|" && result->toBool()) {
 146+ setForce = true;
 147+ this->forceResult = true;
 148+ }
 149+
128150 this->doLevelCompares( &r2 );
129 - *result = af_boolOp( result, r2, op );
 151+
 152+ if (!this->forceResult) {
 153+ *result = af_boolOp( *result, r2, op );
 154+ } else if (setForce) {
 155+ setForce = false;
 156+ this->forceResult = false;
 157+ }
130158 }
 159+
 160+ if (setForce)
 161+ this->forceResult = false;
131162 }
132163
133164 void FilterEvaluator::doLevelCompares( AFPData* result ) {
@@ -140,7 +171,8 @@
141172
142173 this->doLevelMulRels( &r2 );
143174
144 - *result = af_compareOp( *result, r2, op );
 175+ if (!this->forceResult)
 176+ *result = af_compareOp( *result, r2, op );
145177 }
146178 }
147179
@@ -154,7 +186,9 @@
155187 AFPData r2;
156188
157189 this->doLevelSumRels( &r2 );
158 - *result = af_mulRel( *result, r2, op );
 190+
 191+ if (!this->forceResult)
 192+ *result = af_mulRel( *result, r2, op );
159193 }
160194 }
161195
@@ -168,10 +202,13 @@
169203 AFPData r2;
170204
171205 this->doLevelPow( &r2 );
 206+
172207 if (op == "+") {
173 - *result = af_sum( *result, r2 );
 208+ if (!this->forceResult)
 209+ *result = af_sum( *result, r2 );
174210 } else if (op == "-") {
175 - *result = af_sub( *result, r2 );
 211+ if (!this->forceResult)
 212+ *result = af_sub( *result, r2 );
176213 }
177214 }
178215 }
@@ -184,15 +221,18 @@
185222 AFPData exp;
186223
187224 this->doLevelBoolInvert( &exp );
188 - *result = af_pow( *result, exp );
 225+
 226+ if (!this->forceResult)
 227+ *result = af_pow( *result, exp );
189228 }
190229 }
191230
192231 void FilterEvaluator::doLevelBoolInvert( AFPData* result ) {
193 - if (this->cur.type == T_KEYWORD && this->cur.value == "!") {
 232+ if (this->cur.type == T_OP && this->cur.value == "!") {
194233 this->move();
195234 this->doLevelSpecialWords( result );
196 - *result = af_boolInvert( *result );
 235+ if (!this->forceResult)
 236+ *result = af_boolInvert( *result );
197237 } else {
198238 this->doLevelSpecialWords( result );
199239 }
@@ -209,7 +249,8 @@
210250 AFPData r2 = AFPData();
211251 this->doLevelUnarys( &r2 );
212252
213 - *result = af_keyword( keyword, *result, r2 );
 253+ if (!this->forceResult)
 254+ *result = af_keyword( keyword, *result, r2 );
214255 }
215256 }
216257
@@ -218,7 +259,8 @@
219260 this->move();
220261 this->doLevelBraces( result );
221262 if (this->cur.value == "-") {
222 - *result = af_unaryMinus( *result );
 263+ if (!this->forceResult)
 264+ *result = af_unaryMinus( *result );
223265 }
224266 } else {
225267 this->doLevelBraces( result );
@@ -231,7 +273,7 @@
232274 this->doLevelSet( result );
233275
234276 if ( !(this->cur.type == T_BRACE && this->cur.value == ")") ) {
235 - throw new AFPException( "Expected ')'" );
 277+ throw AFPException( "Expected ')' at pos %d", this->cur.pos );
236278 }
237279 this->move();
238280 } else {
@@ -245,7 +287,7 @@
246288 this->move();
247289
248290 if (this->cur.type != T_BRACE || this->cur.value != "(") {
249 - throw new AFPException( "Expected (" );
 291+ throw AFPException( "Expected (" );
250292 }
251293 this->move();
252294
@@ -261,7 +303,8 @@
262304 } while (this->cur.type == T_COMMA );
263305 }
264306
265 - *result = callFunction( func, args );
 307+ if (!this->forceResult)
 308+ *result = callFunction( func, args );
266309 this->move();
267310 } else {
268311 this->doLevelAtom( result );
@@ -291,7 +334,7 @@
292335 } else if (tok == "null") {
293336 *result = AFPData();
294337 } else {
295 - throw new AFPException( "Unidentifiable keyword" );
 338+ throw AFPException( "Unidentifiable keyword" );
296339 }
297340 break;
298341 case T_BRACE:
@@ -301,7 +344,8 @@
302345 break;
303346 case T_COMMA:
304347 return;
305 - default: throw new AFPException( "Unexpected token" );
 348+ break;
 349+ default: throw AFPException( "Unexpected token value %s", this->cur.value.c_str() );
306350 }
307351
308352 this->move();
Index: trunk/extensions/AbuseFilter/parser_native/affunctions.cpp
@@ -5,17 +5,19 @@
66 #include <ios>
77 #include <iostream>
88 #include <ctype.h>
9 -#include <unicode/unistr.h>
109
1110 #define EQUIVSET_LOC "equivset.txt"
1211
1312 map<string,AFPFunction> af_functions;
 13+map<string,AFPData> functionResultCache;
1414
1515 AFPData af_length( vector<AFPData> args );
1616 AFPData af_lcase( vector<AFPData> args );
1717 AFPData af_ccnorm( vector<AFPData> args );
1818 AFPData af_rmdoubles( vector<AFPData> args );
1919 AFPData af_specialratio( vector<AFPData> args );
 20+AFPData af_rmspecials( vector<AFPData> args );
 21+AFPData af_norm( vector<AFPData> args );
2022
2123 void af_registerfunction( string name, AFPFunction method ) {
2224 af_functions[name] = method;
@@ -27,11 +29,107 @@
2830 af_registerfunction( "ccnorm", (AFPFunction) &af_ccnorm );
2931 af_registerfunction( "rmdoubles", (AFPFunction) &af_rmdoubles );
3032 af_registerfunction( "specialratio", (AFPFunction) &af_specialratio );
 33+ af_registerfunction( "rmspecials", (AFPFunction) &af_rmspecials );
 34+ af_registerfunction( "norm", (AFPFunction) &af_norm );
3135 }
3236
 37+AFPData af_norm( vector<AFPData> args ) {
 38+ if (!args.size()) {
 39+ throw AFPException( "Not enough arguments to norm" );
 40+ }
 41+
 42+ string orig = args[0].toString();
 43+
 44+ string::const_iterator p, charStart, end;
 45+ int chr = 0,lastchr = 0;
 46+ map<int,int> equivSet = getEquivSet();
 47+ string result;
 48+
 49+ p = orig.begin();
 50+ end = orig.end();
 51+
 52+ while (chr = next_utf8_char( p, charStart, end )) {
 53+ if (equivSet.find(chr) != equivSet.end()) {
 54+ chr = equivSet[chr];
 55+ }
 56+
 57+ if (chr != lastchr && isalnum(chr)) {
 58+ result.append(codepointToUtf8(chr));
 59+ }
 60+
 61+ lastchr = chr;
 62+ }
 63+
 64+ return AFPData(result);
 65+}
 66+
 67+string rmdoubles( string orig ) {
 68+ string::const_iterator p, charStart, end;
 69+ int chr,lastchr = 0;
 70+ string result;
 71+
 72+ p = orig.begin();
 73+ end = orig.end();
 74+ while (chr = next_utf8_char( p, charStart, end )) {
 75+ if (chr != lastchr) {
 76+ result.append(codepointToUtf8(chr));
 77+ }
 78+
 79+ lastchr = chr;
 80+ }
 81+
 82+ return result;
 83+}
 84+
 85+vector<AFPData> makeFuncArgList( AFPData arg ) {
 86+ vector<AFPData> ret;
 87+
 88+ ret.push_back( arg );
 89+
 90+ return ret;
 91+}
 92+
 93+AFPData callFunction( string name, vector<AFPData> args ) {
 94+ string cacheKey;
 95+ bool doCache = false;
 96+ if (args.size() == 1) {
 97+ doCache = true;
 98+ cacheKey = name + args[0].toString();
 99+
 100+ if (functionResultCache.find(cacheKey) != functionResultCache.end()) {
 101+ // found a result
 102+ return functionResultCache[cacheKey];
 103+ }
 104+ }
 105+
 106+ if (functionResultCache.size() > 100) {
 107+ functionResultCache.clear();
 108+ }
 109+
 110+ AFPData result;
 111+
 112+ if ( af_functions.find( name ) != af_functions.end() ) {
 113+ // Found the function
 114+ AFPFunction func = af_functions[name];
 115+ result = func(args);
 116+
 117+ if (doCache) {
 118+ functionResultCache[cacheKey] = result;
 119+ }
 120+
 121+ return result;
 122+ }
 123+}
 124+
 125+AFPData callFunction( string name, AFPData arg ) {
 126+ vector<AFPData> arglist = makeFuncArgList( arg );
 127+
 128+ return callFunction( name, arglist );
 129+}
 130+
33131 AFPData af_specialratio( vector<AFPData> args ) {
34132 if (!args.size()) {
35 - throw new AFPException( "Not enough arguments to specialratio" );
 133+ throw AFPException( "Not enough arguments to specialratio" );
36134 }
37135
38136 string orig = args[0].toString();
@@ -49,61 +147,72 @@
50148
51149 double ratio = (float)(specialcount) / (float)(orig.size());
52150
53 - return AFPData(ratio);
 151+ return AFPData(ratio);
54152 }
55153
56 -AFPData af_ccnorm( vector<AFPData> args ) {
 154+AFPData af_rmspecials( vector<AFPData> args ) {
57155 if (!args.size()) {
58 - throw new AFPException( "Not enough arguments to ccnorm" );
 156+ throw AFPException( "Not enough arguments to rmspecials" );
59157 }
60158
61 - return AFPData( confusable_character_normalise( args[0].toString() ) );
 159+ string orig = args[0].toString();
 160+ string result = rmspecials(orig);
 161+
 162+ return AFPData(result);
62163 }
63164
64 -AFPData af_rmdoubles( vector<AFPData> args ) {
65 - if (!args.size()) {
66 - throw new AFPException( "Not enough arguments to rmdoubles" );
67 - }
68 -
69 - string orig = args[0].toString();
 165+string rmspecials( string orig ) {
70166 string::const_iterator p, charStart, end;
71 - int chr,lastchr = 0;
 167+ int chr = 0;
72168 string result;
73169
74170 p = orig.begin();
75171 end = orig.end();
76172 while (chr = next_utf8_char( p, charStart, end )) {
77 - if (chr != lastchr) {
 173+ if (isalnum(chr)) {
78174 result.append(codepointToUtf8(chr));
79175 }
80 -
81 - lastchr = chr;
82176 }
83177
84178 return result;
85179 }
86180
87 -AFPData af_length( vector<AFPData> args ) {
 181+AFPData af_ccnorm( vector<AFPData> args ) {
88182 if (!args.size()) {
89 - throw new AFPException( "Not enough arguments to lcase" );
 183+ throw AFPException( "Not enough arguments to ccnorm" );
90184 }
 185+
 186+ return AFPData( confusable_character_normalise( args[0].toString() ) );
 187+}
91188
92 - UnicodeString ustr = UnicodeString( (UChar*)args[0].toString().c_str() );
93 - return AFPData( (long int)ustr.length() );
 189+AFPData af_rmdoubles( vector<AFPData> args ) {
 190+ if (!args.size()) {
 191+ throw AFPException( "Not enough arguments to rmdoubles" );
 192+ }
 193+
 194+ string result = rmdoubles( args[0].toString() );
 195+
 196+ return AFPData(result);
94197 }
95198
 199+AFPData af_length( vector<AFPData> args ) {
 200+ if (!args.size()) {
 201+ throw AFPException( "Not enough arguments to lcase" );
 202+ }
 203+
 204+ return AFPData( (long int)args[0].toString().size() );
 205+}
 206+
96207 AFPData af_lcase( vector<AFPData> args ) {
97208 if (!args.size()) {
98 - throw new AFPException( "Not enough arguments to lcase" );
 209+ throw AFPException( "Not enough arguments to lcase" );
99210 }
100211
101 - int initlen = args[0].toString().length();
102 - UnicodeString us = UnicodeString( args[0].toString().c_str() );
103 - us = us.toLower();
104 - char* result = (char*)malloc(initlen);
105 - us.extract(0, us.length(), result);
 212+ string s = args[0].toString();
106213
107 - return AFPData(string(result));
 214+ transform( s.begin(), s.end(), s.begin(), (int(*)(int)) tolower );
 215+
 216+ return AFPData(s);
108217 }
109218
110219 string confusable_character_normalise( string orig ) {
@@ -126,14 +235,6 @@
127236 return result;
128237 }
129238
130 -AFPData callFunction( string name, vector<AFPData> args ) {
131 - if ( af_functions.find( name ) != af_functions.end() ) {
132 - // Found the function
133 - AFPFunction func = af_functions[name];
134 - return func(args);
135 - }
136 -}
137 -
138239 bool isFunction( string name ) {
139240 return af_functions.find(name) != af_functions.end();
140241 }
@@ -146,7 +247,7 @@
147248 ifstream eqsFile( EQUIVSET_LOC );
148249
149250 if (!eqsFile) {
150 - throw new AFPException( "Unable to open equivalence sets!" );
 251+ throw AFPException( "Unable to open equivalence sets!" );
151252 }
152253
153254 string line;
@@ -250,5 +351,5 @@
251352 return ret;
252353 }
253354
254 - throw new AFPException("Asked for code outside of range ($codepoint)\n");
 355+ throw AFPException("Asked for code outside of range ($codepoint)\n");
255356 }
Index: trunk/extensions/AbuseFilter/parser_native/aftypes.h
@@ -40,22 +40,27 @@
4141 AFPData( AFPData oldData, unsigned int newType );
4242 AFPData( const AFPData & oldData );
4343
 44+ // Assignment operator
 45+ AFPData & operator = (const AFPData & other);
 46+
4447 // Specific type constructors
4548 AFPData( long int var );
4649 AFPData( double var );
4750 AFPData( bool var );
48 -
49 - unsigned int type;
50 - void* value;
51 - size_t size;
5251
5352 bool toBool();
5453 string toString();
5554 long int toInt();
5655 double toFloat();
 56+ unsigned int getType();
5757
5858 protected:
59 - void makeData( unsigned int type, void* value, size_t size );
 59+ void makeData( unsigned int type, void* value, size_t size, string source );
 60+
 61+ unsigned int type;
 62+ void* value;
 63+ size_t size;
 64+ string source;
6065 };
6166
6267 class AFPException :exception {
Index: trunk/extensions/AbuseFilter/parser_native/afeval.h
@@ -28,9 +28,11 @@
2929
3030 AFPToken cur;
3131 vector<AFPToken> tokens;
 32+ map<string, vector<AFPToken> > tokenCache;
3233 unsigned int pos;
3334 string code;
3435 map<string,AFPData> vars;
 36+ bool forceResult;
3537 };
3638
3739 // typedef AFPData (*AFPFunction) (vector<AFPData>);
Index: trunk/extensions/AbuseFilter/parser_native/afparser.cpp
@@ -114,7 +114,7 @@
115115 }
116116 }
117117
118 - throw new AFPException( "Unclosed string" );
 118+ throw AFPException( "Unclosed string" );
119119 }
120120
121121 // Operators
@@ -127,7 +127,7 @@
128128 }
129129
130130 if (!isValidOp( s )) {
131 - throw new AFPException( "Invalid operator %s", s );
 131+ throw AFPException( "Invalid operator %s", s );
132132 }
133133
134134 tok = AFPToken( T_OP, s, pos );
@@ -167,7 +167,7 @@
168168 return true;
169169 }
170170
171 - throw new AFPException( "Unrecognised token" );
 171+ throw AFPException( "Unrecognised token" );
172172 }
173173
174174 bool isDigitOrDot( char chr ) {
Index: trunk/extensions/AbuseFilter/parser_native/affunctions.h
@@ -14,3 +14,7 @@
1515 int next_utf8_char(std::string::const_iterator & p, std::string::const_iterator & charStart, std::string::const_iterator end);
1616 string codepointToUtf8( int codepoint );
1717 string confusable_character_normalise( string orig );
 18+vector<AFPData> makeFuncArgList( AFPData arg );
 19+AFPData callFunction( string name, AFPData arg );
 20+string rmdoubles( string orig );
 21+string rmspecials( string orig );
Index: trunk/extensions/AbuseFilter/parser_native/main.cpp
@@ -1,26 +1,105 @@
22 #include "afeval.h"
33 #include "affunctions.h"
 4+#include <libxml++/libxml++.h>
 5+#include <iostream>
 6+#include <string>
 7+#include <sstream>
 8+#include <map>
49
 10+string filter;
 11+map<string,AFPData> vars;
 12+
 13+bool loadRequest();
 14+
515 int main( int argc, char** argv ) {
616 FilterEvaluator e;
 17+ registerBuiltinFunctions();
 18+
 19+ while (true) {
 20+ e.reset();
 21+
 22+ if (!loadRequest())
 23+ continue;
 24+
 25+ bool result;
 26+
 27+ try {
 28+ e.setVars( vars );
 29+ result = e.evaluateFilter( filter );
 30+ } catch (AFPException excep) {
 31+ cerr << "EXCEPTION: " << excep.what() << endl;
 32+ }
 33+
 34+ cout << ( result ? "MATCH\n" : "NOMATCH\n" );
 35+// exit(result ? 1 : 0); // Exit 0 means OK, exit 1 means match
 36+ }
 37+}
738
8 - e.reset();
9 - bool result = false;
 39+/* REQUEST FORMAT:
 40+<request>
 41+ <vars>
 42+ <var key="varname">value</var>
 43+ </vars>
 44+ <rule> RULE CONTENT </rule>
 45+</request> */
 46+
 47+bool loadRequest() {
 48+ // Parse the XML.
 49+ xmlpp::DomParser parser;
 50+ parser.set_substitute_entities();
 51+
 52+ stringbuf sb(ios::out | ios::in);
 53+ cin.get( sb, '\x04' );
 54+ cin.get();
1055
11 - registerBuiltinFunctions();
 56+ string text = sb.str();
1257
13 - for(int i=0;i<=1;i++) {
14 - try {
15 - e.setVar( "foo", AFPData(string("love")) );
16 - result = e.evaluateFilter( "specialratio('foo;') == 0.25" );
17 - } catch (AFPException* excep) {
18 - printf( "Exception: %s\n", excep->what() );
 58+ // Remove the NULL
 59+ for( string::iterator it = text.begin(); it!=text.end(); ++it ) {
 60+ if (*it == '\x04') { text.erase(it); }
1961 }
 62+
 63+ if (text.size() < 2) {
 64+ return false;
2065 }
2166
22 - if (result) {
23 - printf("Success!\n");
24 - } else {
25 - printf("OH NOES!\n");
 67+ istringstream ss(text);
 68+ parser.parse_stream( ss );
 69+// parser.parse_file( "xml.test" );
 70+ xmlpp::Node* rootNode = parser.get_document()->get_root_node();
 71+
 72+ // Get vars
 73+ xmlpp::Node::NodeList varNodes = rootNode->get_children( "vars" );
 74+
 75+ if (varNodes.begin() == varNodes.end()) {
 76+ throw AFPException( "Request did not contain any vars" );
2677 }
 78+
 79+ xmlpp::Node::Node* varNode = *(varNodes.begin()); // Get the <vars> element
 80+ varNodes = varNode->get_children( "var" ); // Iterate through <var> child nodes
 81+ for (xmlpp::Node::NodeList::const_iterator it = varNodes.begin(); it!=varNodes.end(); ++it) {
 82+ xmlpp::Element* n = dynamic_cast<xmlpp::Element*>(*it);
 83+
 84+ string attName = n->get_attribute( "key" )->get_value();
 85+ if (n->has_child_text()) {
 86+ string attValue = n->get_child_text()->get_content();
 87+ vars[attName] = AFPData(attValue);
 88+ } else {
 89+ vars[attName] = "";
 90+ }
 91+ }
 92+
 93+ //Get code.
 94+ xmlpp::Node::NodeList codeNodes = rootNode->get_children( "rule" );
 95+
 96+ if (codeNodes.begin() == codeNodes.end()) {
 97+ throw new AFPException( "Request did not contain any filter" );
 98+ }
 99+
 100+ xmlpp::Node* codeNode = *(codeNodes.begin());
 101+ xmlpp::Element* codeElement = dynamic_cast<xmlpp::Element*>(codeNode);
 102+
 103+ filter = codeElement->get_child_text()->get_content();
 104+
 105+ return true;
27106 }
Index: trunk/extensions/AbuseFilter/parser_native/afutils.cpp
@@ -46,7 +46,7 @@
4747 return AFPData( (v1 || v2) && !(v1 && v2) );
4848 }
4949
50 - throw new AFPException( "Invalid boolean operation." );
 50+ throw AFPException( "Invalid boolean operation." );
5151 }
5252
5353 AFPData af_compareOp( AFPData a, AFPData b, string op ) {
@@ -56,8 +56,8 @@
5757 float f1 = a.toFloat();
5858 float f2 = b.toFloat();
5959
60 - unsigned int t1 = a.type;
61 - unsigned int t2 = b.type;
 60+ unsigned int t1 = a.getType();
 61+ unsigned int t2 = b.getType();
6262
6363 if (op == "==") {
6464 return AFPData( s1 == s2 );
@@ -76,7 +76,7 @@
7777 } else if (op == "<=") {
7878 return AFPData( f1 <= f2 );
7979 }
80 - throw new AFPException( "Invalid comparison type" );
 80+ throw AFPException( "Invalid comparison type" );
8181 }
8282
8383 AFPData af_mulRel( AFPData a, AFPData b, string op ) {
@@ -94,11 +94,11 @@
9595 return AFPData( (double)(i1 % i2) );
9696 }
9797
98 - throw new AFPException( "Invalid multiplication-related operator" );
 98+ throw AFPException( "Invalid multiplication-related operator" );
9999 }
100100
101101 AFPData af_sum( AFPData a, AFPData b ) {
102 - if (a.type == D_STRING || b.type == D_STRING) {
 102+ if (a.getType() == D_STRING || b.getType() == D_STRING) {
103103 return AFPData( a.toString() + b.toString() );
104104 } else {
105105 return AFPData( a.toFloat() * b.toFloat() );
@@ -135,5 +135,5 @@
136136 return AFPData( result );
137137 }
138138
139 - throw new AFPException( "Unknown keyword %s", keyword );
 139+ throw AFPException( "Unknown keyword %s", keyword );
140140 }
Index: trunk/extensions/AbuseFilter/parser_native/aftypes.cpp
@@ -11,13 +11,20 @@
1212
1313
1414 AFPData::AFPData( unsigned int new_type, void* new_value, size_t new_size ) {
15 - this->makeData( new_type, new_value, new_size );
 15+ this->makeData( new_type, new_value, new_size, "full constructor" );
1616 }
1717
18 -void AFPData::makeData( unsigned int new_type, void* new_value, size_t new_size ) {
19 - type = new_type;
20 - value = new_value;
21 - size = new_size;
 18+void AFPData::makeData( unsigned int new_type, void* new_value, size_t new_size, string new_source ) {
 19+ this->type = new_type;
 20+ this->value = new_value;
 21+ this->size = new_size;
 22+ this->source = new_source;
 23+
 24+ if (this->type > DATATYPE_MAX) {
 25+ // Something funky's going on
 26+// cerr << "Something funky. Trying to construct a datum with type " << this->type << ", source is " << new_source << endl;
 27+ return;
 28+ }
2229 }
2330
2431 AFPData::AFPData( string var ) {
@@ -27,31 +34,33 @@
2835 char* last_char;
2936 istringstream ss(var);
3037
 38+ this->source = "string constructor";
 39+
3140 // Try integer
3241 if (!!(ss >> intval) && intval != 0) { // 0.25 converts to 0, otherwise.
3342 // Valid conversion
3443 long int* val = new long int( intval );
35 - this->makeData( D_INTEGER, (void*)val, sizeof(long int) );
 44+ this->makeData( D_INTEGER, (void*)val, sizeof(long int), "string constructor" );
3645 return;
3746 }
3847
3948 if (!!(ss >> fval)) {
4049 double* val = new double(fval);
41 - this->makeData( D_FLOAT, (void*)val, sizeof(double) );
 50+ this->makeData( D_FLOAT, (void*)val, sizeof(double), "string constructor" );
4251 return;
4352 }
4453
4554 // Last resort
4655 // Duplicate the string.
4756 string* s = new string(var);
48 - this->makeData( D_STRING, (void*)s, sizeof(string) );
 57+ this->makeData( D_STRING, (void*)s, sizeof(string), "string constructor" );
4958 return;
5059 }
5160
5261 AFPData::AFPData( AFPData old, unsigned int newType ) {
5362 if (old.type > DATATYPE_MAX) {
5463 // Non-existent type
55 - throw new AFPException( "Given junk data" );
 64+ throw AFPException( "Given junk data" );
5665 }
5766
5867 if (old.type == newType) {
@@ -59,21 +68,23 @@
6069
6170 // Duplicate the contents.
6271 if (old.type == D_STRING) {
63 - newVal = (void*) new string(old.toString());
 72+ string* s = new string();
 73+ s->append(old.toString());
 74+ newVal = (void*) s;
6475 } else if (old.type == D_INTEGER) {
6576 newVal = (void*) new long int(old.toInt());
6677 } else if (old.type == D_FLOAT) {
6778 newVal = (void*) new double(old.toFloat());
6879 }
6980
70 - this->makeData( old.type, newVal, old.size );
 81+ this->makeData( old.type, newVal, old.size, "cast constructor (copy)" );
7182 } else if (newType == 0) {
72 - this->makeData( D_NULL, NULL, 0 );
 83+ this->makeData( D_NULL, NULL, 0, "cast constructor - null" );
7384 return;
7485 } else if (newType == D_INTEGER) {
7586 if (old.type==D_FLOAT) {
7687 long int* val = new long int(old.toFloat());
77 - this->makeData( D_INTEGER, (void*)val, sizeof(long int) );
 88+ this->makeData( D_INTEGER, (void*)val, sizeof(long int), "cast constructor - float2int" );
7889 return;
7990 } else if (old.type==D_STRING) {
8091 long int* val = new long int();
@@ -81,16 +92,16 @@
8293
8394 ss >> *val;
8495
85 - this->makeData( D_INTEGER, (void*)val, sizeof(long int) );
 96+ this->makeData( D_INTEGER, (void*)val, sizeof(long int), "cast constructor - string2int" );
8697 return;
8798 } else if (old.type==D_NULL) {
8899 long int* val = new long int(0);
89 - this->makeData( D_INTEGER, (void*)val, sizeof(long int) );
 100+ this->makeData( D_INTEGER, (void*)val, sizeof(long int), "cast constructor - null2int" );
90101 }// No other types possible
91102 } else if (newType == D_FLOAT) {
92103 if (old.type==D_INTEGER) {
93104 double* val = new double(old.toInt());
94 - this->makeData( D_FLOAT, (void*)val, sizeof(double) );
 105+ this->makeData( D_FLOAT, (void*)val, sizeof(double), "cast constructor - int2float" );
95106 return;
96107 } else if (old.type==D_STRING) {
97108 double* val = new double();
@@ -98,11 +109,11 @@
99110
100111 ss >> *val;
101112
102 - this->makeData( D_FLOAT, (void*)val, sizeof(double) );
 113+ this->makeData( D_FLOAT, (void*)val, sizeof(double), "cast constructor - string2float" );
103114 return;
104115 } else if (old.type==D_NULL) {
105116 double* val = new double(0);
106 - this->makeData( D_FLOAT, (void*)val, sizeof(double) );
 117+ this->makeData( D_FLOAT, (void*)val, sizeof(double), "cast constructor - null2float" );
107118 } // No other types possible
108119 } else if (newType == D_STRING) {
109120 if (old.type == D_INTEGER || old.type == D_FLOAT) {
@@ -117,39 +128,81 @@
118129 }
119130
120131 string* str = new string(ss.str());
121 - this->makeData( D_STRING, (void*)str, sizeof(string) );
 132+ this->makeData( D_STRING, (void*)str, sizeof(string), "cast constructor - num2string" );
122133 return;
123134 } else if (old.type==D_NULL) {
124135 string* s = new string("");
125 - this->makeData( D_STRING, (void*)s, sizeof(string) );
 136+ this->makeData( D_STRING, (void*)s, sizeof(string), "cast constructor - null2string" );
126137 } // No other types possible
127138 }
128139
129140 if (this->type > DATATYPE_MAX) {
130141 // Non-existent type
131 - throw new AFPException( "Created junk data" );
 142+ throw AFPException( "Created junk data" );
132143 }
133144 }
134145
135 -AFPData::AFPData() { this->makeData( 0, NULL, 0 );}
 146+AFPData::AFPData() { this->source = "empty constructor"; this->makeData( 0, NULL, 0, "empty constructor" );}
136147
137 -AFPData::~AFPData() { /*free(this->value);*/ }
 148+AFPData::~AFPData() {
 149+ if (this->value == 0x0) {
 150+ return;
 151+ } else if (this->type > DATATYPE_MAX) {
 152+ // Something funky's going on
 153+// cerr << "Something funky. Trying to destruct a datum with type " << this->type << endl;
 154+ return;
 155+ }
 156+
 157+// cerr << "Freeing " << this->value << " - type " << this->type << " - source " << this->source << endl;
 158+
 159+ switch (this->type) {
 160+ case D_FLOAT:
 161+ delete (double*)this->value;
 162+ break;
 163+ case D_INTEGER:
 164+ delete (long int*)this->value;
 165+ break;
 166+ case D_STRING:
 167+ delete (string*)this->value;
 168+ break;
 169+// default:
 170+// delete this->value;
 171+ }
 172+
 173+ this->value = 0x0;
 174+}
138175
139176 AFPData::AFPData( const AFPData & oldData ) {
 177+ this->source = "copy constructor";
 178+
 179+ if (oldData.type > DATATYPE_MAX) {
 180+ // Something funky's going on
 181+// cerr << "Something funky. Trying to copy a datum with type " << oldData.type << ", source " << oldData.source << endl;
 182+ return;
 183+ }
 184+
140185 // Duplicate the inner data
141186 void* newVal;
142187
143188 if (oldData.type == D_STRING) {
144 - string* s = new string("");
145 - s->append(*(string*)oldData.value);
146 - newVal = (void*)s;
 189+ string* ival = new string();
 190+ *ival = *(string*)oldData.value;
 191+ newVal = (void*)ival;
147192 } else if (oldData.type == D_INTEGER) {
148 - newVal = (void*) new long int(*(long int*)oldData.value);
 193+ long int* ival = new long int;
 194+ *ival = *(long int*)oldData.value;
 195+ newVal = (void*)ival;
149196 } else if (oldData.type == D_FLOAT) {
150 - newVal = (void*) new double(*(double*)oldData.value);
 197+ double* ival = new double;
 198+ *ival = *(double*)oldData.value;
 199+ newVal = (void*)ival;
 200+ } else if (oldData.type == D_NULL) {
 201+ newVal = 0;
 202+ } else {
 203+// cerr << "Asked to copy an unknown type " << oldData.type << endl;
151204 }
152205
153 - this->makeData( oldData.type, newVal, oldData.size );
 206+ this->makeData( oldData.type, newVal, oldData.size, "copy constructor" );
154207 }
155208
156209 long int AFPData::toInt() {
@@ -189,17 +242,57 @@
190243 AFPData::AFPData( long int var ) {
191244 long int* i = new long int(var);
192245
193 - this->makeData( D_INTEGER, i, sizeof(long int) );
 246+ this->makeData( D_INTEGER, i, sizeof(long int), "int constructor" );
194247 }
195248
196249 AFPData::AFPData( double var ) {
197250 double* d = new double(var);
198251
199 - this->makeData( D_FLOAT, d, sizeof(double) );
 252+ this->makeData( D_FLOAT, d, sizeof(double), "double constructor" );
200253 }
201254
202255 AFPData::AFPData( bool var ) {
203256 long int* i = new long int(var);
204257
205 - this->makeData( D_INTEGER, i, sizeof(long int) );
 258+ this->makeData( D_INTEGER, i, sizeof(long int), "bool constructor" );
206259 }
 260+
 261+unsigned int AFPData::getType() { return this->type; }
 262+
 263+AFPData & AFPData::operator= (const AFPData & oldData) {
 264+ // Protect against self-assignment
 265+ if (this == &oldData) {
 266+ return *this;
 267+ }
 268+
 269+ // NULLs and INVALID data types need no deep copy
 270+ if (oldData.type > DATATYPE_MAX || oldData.type == D_NULL) {
 271+ this->makeData( 0, NULL, 0, "assignment operator" );
 272+ return *this;
 273+ }
 274+
 275+ // Otherwise, do a proper copy.
 276+ // Duplicate the inner data
 277+ void* newVal;
 278+ if (oldData.type == D_STRING) {
 279+ string* ival = new string();
 280+ *ival = *(string*)oldData.value;
 281+ newVal = (void*)ival;
 282+ } else if (oldData.type == D_INTEGER) {
 283+ long int* ival = new long int;
 284+ *ival = *(long int*)oldData.value;
 285+ newVal = (void*)ival;
 286+ } else if (oldData.type == D_FLOAT) {
 287+ double* ival = new double;
 288+ *ival = *(double*)oldData.value;
 289+ newVal = (void*)ival;
 290+ } else if (oldData.type == D_NULL) {
 291+ newVal = 0;
 292+ } else {
 293+// cerr << "Asked to copy an unknown type " << oldData.type << endl;
 294+ }
 295+
 296+ this->makeData( oldData.type, newVal, oldData.size, "assignment operator" );
 297+
 298+ return *this;
 299+}
Index: trunk/extensions/AbuseFilter/parser_native/check
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: trunk/extensions/AbuseFilter/parser_native/makefile
@@ -1,12 +1,13 @@
 2+all: check af_parser
23
 4+af_parser: afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o
 5+ g++ -g -o af_parser afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o -lboost_regex -lxml++-2.6 -lxml2 -lglibmm-2.4 -lgobject-2.0 -lsigc-2.0 -lglib-2.0
36
4 -all: check
5 -
67 check: afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o
7 - g++ -g -o check -lboost_regex -licudata afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o
 8+ g++ -g -o check -lboost_regex afeval.o affunctions.o afparser.o aftypes.o afutils.o check.o
89
910 .cpp.o:
10 - g++ -g -c $<
 11+ g++ -g -c $< -I/usr/include/libxml++-2.6 -I/usr/lib/libxml++-2.6/include -I/usr/include/libxml2 -I/usr/include/glibmm-2.4 -I/usr/lib/glibmm-2.4/include -I/usr/include/sigc++-2.0 -I/usr/lib/sigc++-2.0/include -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include
1112
1213 clean:
13 - rm -f *.o check
\ No newline at end of file
 14+ rm -f *.o check
Index: trunk/extensions/AbuseFilter/AbuseFilter.parser.php
@@ -0,0 +1,665 @@
 2+<?php
 3+if ( ! defined( 'MEDIAWIKI' ) )
 4+ die();
 5+/**
 6+Abuse filter parser.
 7+Copyright (C) Victor Vasiliev, 2008. Based on ideas by Andrew Garrett Distributed under GNU GPL v2 terms.
 8+
 9+Types of token:
 10+* T_NONE - special-purpose token
 11+* T_BRACE - ( or )
 12+* T_COMMA - ,
 13+* T_OP - operator like + or ^
 14+* T_NUMBER - number
 15+* T_STRING - string, in "" or ''
 16+* T_KEYWORD - keyword
 17+* T_ID - identifier
 18+
 19+Levels of parsing:
 20+* Set (S) - ==, +=, etc.
 21+* BoolOps (BO) - &, |, ^
 22+* CompOps (CO) - ==, !=, ===, !==, >, <, >=, <=
 23+* SumRel (SR) - +, -
 24+* MulRel (MR) - *, /, %
 25+* Pow (P) - **
 26+* BoolNeg (BN) - ! operation
 27+* SpecialOperators (SO) - in and like
 28+* Unarys (U) - plus and minus in cases like -5 or -(2 * +2)
 29+* Braces (B) - ( and )
 30+* Functions (F)
 31+* Atom (A) - return value
 32+*/
 33+
 34+class AFPToken {
 35+ //Types of tken
 36+ const TNone = 'T_NONE';
 37+ const TID = 'T_ID';
 38+ const TKeyword = 'T_KEYWORD';
 39+ const TString = 'T_STRING';
 40+ const TNumber = 'T_NUMBER';
 41+ const TOp = 'T_OP';
 42+ const TBrace = 'T_BRACE';
 43+ const TComma = 'T_COMMA';
 44+
 45+ var $type;
 46+ var $value;
 47+ var $pos;
 48+
 49+ public function __construct( $type = self::TNone, $value = null, $pos = 0 ) {
 50+ $this->type = $type;
 51+ $this->value = $value;
 52+ $this->pos = $pos;
 53+ }
 54+}
 55+
 56+class AFPData {
 57+ //Datatypes
 58+ const DNumber = 'number'; //any integer or double
 59+ const DString = 'string';
 60+ const DNull = 'null';
 61+ const DBool = 'bool';
 62+
 63+ var $type;
 64+ var $data;
 65+
 66+ public function __construct( $type = self::DNull, $val = null ) {
 67+ $this->type = $type;
 68+ $this->data = $val;
 69+ }
 70+
 71+ public static function newFromPHPVar( $var ) {
 72+ if( is_string( $var ) )
 73+ return new AFPData( self::DString, $var );
 74+ elseif( is_int( $var ) || is_float( $var ) )
 75+ return new AFPData( self::DNumber, $var );
 76+ elseif( is_bool( $var ) )
 77+ return new AFPData( self::DBool, $var );
 78+ elseif( is_null( $var ) )
 79+ return new AFPData();
 80+ else
 81+ throw new AFPException( "Data type " . gettype( $var ) . " is not supported by AbuseFilter" );
 82+ }
 83+
 84+ public function dup() {
 85+ return new AFPData( $this->type, $this->data );
 86+ }
 87+
 88+ public static function castTypes( $orig, $target ) {
 89+ if( $orig->type == $target )
 90+ return $orig->dup();
 91+ if( $target == self::DNull ) {
 92+ return new AFPData();
 93+ }
 94+ if( $target == self::DBool ) {
 95+ return new AFPData( self::DBool, (bool)$orig->data );
 96+ }
 97+ if( $target == self::DNumber ) {
 98+ return new AFPData( self::DNumber, doubleval( $orig->data ) );
 99+ }
 100+ if( $target == self::DString ) {
 101+ return new AFPData( self::DString, strval( $orig->data ) );
 102+ }
 103+ }
 104+
 105+ public static function boolInvert( $value ) {
 106+ return new AFPData( self::DBool, !$value->toBool() );
 107+ }
 108+
 109+ public static function pow( $base, $exponent ) {
 110+ return new AFPData( self::DNumber, pow( $base->toNumber(), $exponent->toNumber() ) );
 111+ }
 112+
 113+ public static function keywordIn( $a, $b ) {
 114+ $a = $a->toString();
 115+ $b = $b->toString();
 116+
 117+ if ($a == '' || $b == '') {
 118+ return new AFPData( self::DBool, false );
 119+ }
 120+
 121+ return new AFPData( self::DBool, in_string( $a, $b ) );
 122+ }
 123+
 124+ public static function keywordLike( $str, $regex ) {
 125+ $str = $str->toString();
 126+ $regex = $regex->toString() . 'u'; //Append unicode modifier
 127+ wfSuppressWarnings();
 128+ $result = preg_match( $regex, $str );
 129+ wfRestoreWarnings();
 130+ return new AFPData( self::DBool, (bool)$result );
 131+ }
 132+
 133+ public static function unaryMinus( $data ) {
 134+ return new AFPData( self::DNumber, $data->toNumber() );
 135+ }
 136+
 137+ public static function boolOp( $a, $b, $op ) {
 138+ $a = $a->toBool();
 139+ $b = $b->toBool();
 140+ if( $op == '|' )
 141+ return new AFPData( self::DBool, $a || $b );
 142+ if( $op == '&' )
 143+ return new AFPData( self::DBool, $a && $b );
 144+ if( $op == '^' )
 145+ return new AFPData( self::DBool, $a xor $b );
 146+ throw new AFPException( "Invalid boolean operation: {$op}" );
 147+ }
 148+
 149+ public static function compareOp( $a, $b, $op ) {
 150+ if( $op == '==' )
 151+ return new AFPData( self::DBool, $a->toString() === $b->toString() );
 152+ if( $op == '!=' )
 153+ return new AFPData( self::DBool, $a->toString() !== $b->toString() );
 154+ if( $op == '===' )
 155+ return new AFPData( self::DBool, $a->data === $b->data && $a->type == $b->type );
 156+ if( $op == '!==' )
 157+ return new AFPData( self::DBool, $a->data !== $b->data || $a->type != $b->type );
 158+ $a = $a->toString();
 159+ $b = $b->toString();
 160+ if( $op == '>' )
 161+ return new AFPData( self::DBool, $a > $b );
 162+ if( $op == '<' )
 163+ return new AFPData( self::DBool, $a < $b );
 164+ if( $op == '>=' )
 165+ return new AFPData( self::DBool, $a >= $b );
 166+ if( $op == '<=' )
 167+ return new AFPData( self::DBool, $a <= $b );
 168+ throw new AFPException( "Invalid comprasion operation: {$op}" );
 169+ }
 170+
 171+ public static function mulRel( $a, $b, $op ) {
 172+ $a = $a->toNumber();
 173+ $b = $b->toNumber();
 174+ if( $op == '*' )
 175+ return new AFPData( self::DNumber, $a * $b );
 176+ if( $op == '/' )
 177+ return new AFPData( self::DNumber, $a / $b );
 178+ if( $op == '%' )
 179+ return new AFPData( self::DNumber, $a % $b );
 180+ throw new AFPException( "Invalid multiplication-related operation: {$op}" );
 181+ }
 182+
 183+ public static function sum( $a, $b ) {
 184+ if( $a->type == self::DString || $b->type == self::DString )
 185+ return new AFPData( self::DString, $a->toString() . $b->toString() );
 186+ else
 187+ return new AFPData( self::DNumber, $a->toNumber() + $b->toNumber() );
 188+ }
 189+
 190+ public static function sub( $a, $b ) {
 191+ return new AFPData( self::DNumber, $a->toNumber() - $b->toNumber() );
 192+ }
 193+
 194+ /** Convert shorteners */
 195+ public function toBool() {
 196+ return self::castTypes( $this, self::DBool )->data;
 197+ }
 198+
 199+ public function toString() {
 200+ return self::castTypes( $this, self::DString )->data;
 201+ }
 202+
 203+ public function toNumber() {
 204+ return self::castTypes( $this, self::DNumber )->data;
 205+ }
 206+}
 207+
 208+class AFPException extends MWException {}
 209+
 210+class AbuseFilterParser {
 211+ var $mParams, $mVars, $mCode, $mTokens, $mPos, $mCur;
 212+
 213+ static $mFunctions = array(
 214+ 'lc' => 'funcLc',
 215+ 'len' => 'funcLen',
 216+ 'norm' => 'funcNorm',
 217+ 'simplenorm' => 'funcSimpleNorm',
 218+ 'specialratio' => 'funcSpecialRatio',
 219+ );
 220+ static $mOps = array(
 221+ '!', '*', '**', '/', '+', '-', '%', '&', '|', '^',
 222+ '<', '>', '>=', '<=', '==', '!=', '=', '===', '!==',
 223+ );
 224+ static $mKeywords = array(
 225+ 'in', 'like', 'true', 'false', 'null',
 226+ );
 227+
 228+ static $parserCache = array();
 229+
 230+ static $funcCache = array();
 231+
 232+ public function __construct() {
 233+ $this->resetState();
 234+ }
 235+
 236+ public function resetState() {
 237+ $this->mParams = array();
 238+ $this->mCode = '';
 239+ $this->mTokens = array();
 240+ $this->mVars = array();
 241+ $this->mPos = 0;
 242+ }
 243+
 244+ public function setVar( $name, $var ) {
 245+ $this->mVars[$name] = AFPData::newFromPHPVar( $var );
 246+ }
 247+
 248+ public function setVars( $vars ) {
 249+ wfProfileIn( __METHOD__ );
 250+ foreach( $vars as $name => $var ) {
 251+ $this->setVar( $name, $var );
 252+ }
 253+ wfProfileOut( __METHOD__ );
 254+ }
 255+
 256+ protected function move( $shift = +1 ) {
 257+ $old = $this->mPos;
 258+ $this->mPos += $shift;
 259+ if( $this->mPos >= 0 && $this->mPos < count( $this->mTokens ) ) {
 260+ $this->mCur = $this->mTokens[$this->mPos];
 261+ return true;
 262+ }
 263+ else {
 264+ $this->mPos = $old;
 265+ return false;
 266+ }
 267+ }
 268+
 269+ public function parse( $code ) {
 270+ wfProfileIn( __METHOD__ );
 271+ $this->mCode = $code;
 272+ $this->mTokens = self::parseTokens( $code );
 273+ $this->mPos = 0;
 274+ $this->mCur = $this->mTokens[0];
 275+ $result = new AFPData();
 276+ $this->doLevelEntry( $result );
 277+ wfProfileOut( __METHOD__ );
 278+ return $result->toBool();
 279+ }
 280+
 281+ /* Levels */
 282+
 283+ /** Handles unexpected characters after the expression */
 284+ protected function doLevelEntry( &$result ) {
 285+ $this->doLevelSet( $result );
 286+ if( $this->mCur->type != AFPToken::TNone ) {
 287+ throw new AFPException( "Unexpected {$this->mCur->type} at char {$this->mCur->pos}" );
 288+ }
 289+ }
 290+
 291+ /** Handles "=" operator */
 292+ protected function doLevelSet( &$result ) {
 293+ wfProfileIn( __METHOD__ );
 294+ if( $this->mCur->type == AFPToken::TID ) {
 295+ $varname = $this->mCur->value;
 296+ $this->move();
 297+ if( $this->mCur->type == AFPToken::TOp && $this->mCur->value == '=' ) {
 298+ $this->move();
 299+ $this->doLevelSet( $result );
 300+ $this->mVars[$varname] = $result->dup();
 301+ return;
 302+ }
 303+ $this->move( -1 );
 304+ }
 305+ wfProfileOut( __METHOD__ );
 306+ $this->doLevelBoolOps( $result );
 307+ }
 308+
 309+ protected function doLevelBoolOps( &$result ) {
 310+ $this->doLevelCompares( $result );
 311+ $ops = array( '&', '|', '^' );
 312+ while( $this->mCur->type == AFPToken::TOp && in_array( $this->mCur->value, $ops ) ) {
 313+ $op = $this->mCur->value;
 314+ $this->move();
 315+ $r2 = new AFPData();
 316+ $this->doLevelCompares( $r2 );
 317+ wfProfileIn( __METHOD__ );
 318+ $result = AFPData::boolOp( $result, $r2, $op );
 319+ wfProfileOut( __METHOD__ );
 320+ }
 321+ }
 322+
 323+ protected function doLevelCompares( &$result ) {
 324+ $this->doLevelMulRels( &$result );
 325+ $ops = array( '==', '===', '!=', '!==', '<', '>', '<=', '>=' );
 326+ while( $this->mCur->type == AFPToken::TOp && in_array( $this->mCur->value, $ops ) ) {
 327+ $op = $this->mCur->value;
 328+ $this->move();
 329+ $r2 = new AFPData();
 330+ $this->doLevelMulRels( $r2 );
 331+ wfProfileIn( __METHOD__ );
 332+ $result = AFPData::compareOp( $result, $r2, $op );
 333+ wfProfileOut( __METHOD__ );
 334+ }
 335+ }
 336+
 337+ protected function doLevelMulRels( &$result ) {
 338+ $this->doLevelSumRels( &$result );
 339+ wfProfileIn( __METHOD__ );
 340+ $ops = array( '*', '/', '%' );
 341+ while( $this->mCur->type == AFPToken::TOp && in_array( $this->mCur->value, $ops ) ) {
 342+ $op = $this->mCur->value;
 343+ $this->move();
 344+ $r2 = new AFPData();
 345+ $this->doLevelSumRels( $r2 );
 346+ $result = AFPData::mulRel( $result, $r2, $op );
 347+ }
 348+ wfProfileOut( __METHOD__ );
 349+ }
 350+
 351+ protected function doLevelSumRels( &$result ) {
 352+ $this->doLevelPow( &$result );
 353+ wfProfileIn( __METHOD__ );
 354+ $ops = array( '+', '-' );
 355+ while( $this->mCur->type == AFPToken::TOp && in_array( $this->mCur->value, $ops ) ) {
 356+ $op = $this->mCur->value;
 357+ $this->move();
 358+ $r2 = new AFPData();
 359+ $this->doLevelPow( $r2 );
 360+ if( $op == '+' )
 361+ $result = AFPData::sum( $result, $r2 );
 362+ if( $op == '-' )
 363+ $result = AFPData::sub( $result, $r2 );
 364+ }
 365+ wfProfileOut( __METHOD__ );
 366+ }
 367+
 368+ protected function doLevelPow( &$result ) {
 369+ $this->doLevelBoolInvert( $result );
 370+ wfProfileIn( __METHOD__ );
 371+ while( $this->mCur->type == AFPToken::TOp && $this->mCur->value == '**' ) {
 372+ $this->move();
 373+ $expanent = new AFPData();
 374+ $this->doLevelBoolInvert( $expanent );
 375+ $result = AFPData::pow( $result, $expanent );
 376+ }
 377+ wfProfileOut( __METHOD__ );
 378+ }
 379+
 380+ protected function doLevelBoolInvert( &$result ) {
 381+ if( $this->mCur->type == AFPToken::TOp && $this->mCur->value == '!' ) {
 382+ $this->move();
 383+ $this->doLevelSpecialWords( $result );
 384+ wfProfileIn( __METHOD__ );
 385+ $result = AFPData::boolInvert( $result );
 386+ wfProfileOut( __METHOD__ );
 387+ } else {
 388+ $this->doLevelSpecialWords( $result );
 389+ }
 390+ }
 391+
 392+ protected function doLevelSpecialWords( &$result ) {
 393+ $this->doLevelUnarys( $result );
 394+ $specwords = array( 'in', 'like' );
 395+ if( $this->mCur->type == AFPToken::TKeyword && in_array( $this->mCur->value, $specwords ) ) {
 396+ $func = 'keyword' . ucfirst( $this->mCur->value );
 397+ $this->move();
 398+ $r2 = new AFPData();
 399+ $this->doLevelUnarys( $r2 );
 400+ wfProfileIn( __METHOD__ );
 401+ wfProfileIn( __METHOD__."-$func" );
 402+ $result = AFPData::$func( $result, $r2 );
 403+ wfProfileOut( __METHOD__."-$func" );
 404+ wfProfileOut( __METHOD__ );
 405+ }
 406+ }
 407+
 408+ protected function doLevelUnarys( &$result ) {
 409+ $op = $this->mCur->value;
 410+ if( $this->mCur->type == AFPToken::TOp && ( $op == "+" || $op == "-" ) ) {
 411+ $this->move();
 412+ $this->doLevelBraces( $result );
 413+ wfProfileIn( __METHOD__ );
 414+ if( $op == '-' ) {
 415+ $result = AFPData::unaryMinus( $result );
 416+ }
 417+ wfProfileOut( __METHOD__ );
 418+ } else {
 419+ $this->doLevelBraces( $result );
 420+ }
 421+ }
 422+
 423+ protected function doLevelBraces( &$result ) {
 424+ if( $this->mCur->type == AFPToken::TBrace && $this->mCur->value == '(' ) {
 425+ $this->move();
 426+ $this->doLevelSet( $result );
 427+ if( !($this->mCur->type == AFPToken::TBrace && $this->mCur->value == ')') )
 428+ throw new AFPException( "Expected ) at char {$this->mCur->pos}" );
 429+ $this->move();
 430+ } else {
 431+ $this->doLevelFunction( $result );
 432+ }
 433+ }
 434+
 435+ protected function doLevelFunction( &$result ) {
 436+ if( $this->mCur->type == AFPToken::TID && isset( self::$mFunctions[$this->mCur->value] ) ) {
 437+ wfProfileIn( __METHOD__ );
 438+ $func = self::$mFunctions[$this->mCur->value];
 439+ $this->move();
 440+ if( $this->mCur->type != AFPToken::TBrace || $this->mCur->value != '(' )
 441+ throw new AFPEexception( "Expected ( at char {$this->mCur->value}" );
 442+ wfProfileIn( __METHOD__."-loadargs" );
 443+ $args = array();
 444+ if( $this->mCur->type != AFPToken::TBrace || $this->mCur->value != ')' )
 445+ do {
 446+ $this->move();
 447+ $r = new AFPData();
 448+ try {
 449+ $this->doLevelAtom( $r );
 450+ } catch (AFPException $e) {
 451+ $this->move( -1 );
 452+ $this->doLevelSet( $r );
 453+ }
 454+ $args[] = $r;
 455+ } while( $this->mCur->type == AFPToken::TComma );
 456+ if( $this->mCur->type != AFPToken::TBrace || $this->mCur->value != ')' ) {
 457+ throw new AFPException( "Expected ) at char {$this->mCur->pos}" );
 458+ }
 459+ wfProfileOut( __METHOD__."-loadargs" );
 460+
 461+ wfProfileIn( __METHOD__."-$func" );
 462+
 463+ $funcHash = md5($func.serialize($args));
 464+
 465+ if (isset(self::$funcCache[$funcHash])) {
 466+ $result = self::$funcCache[$funcHash];
 467+ } else {
 468+ $result = self::$funcCache[$funcHash] = $this->$func( $args );
 469+ }
 470+
 471+ if (count(self::$funcCache) > 1000) {
 472+ self::$funcCache = array();
 473+ }
 474+
 475+ wfProfileOut( __METHOD__."-$func" );
 476+
 477+ $this->move();
 478+ wfProfileOut( __METHOD__ );
 479+ } else {
 480+ $this->doLevelAtom( $result );
 481+ }
 482+ }
 483+
 484+ protected function doLevelAtom( &$result ) {
 485+ wfProfileIn( __METHOD__ );
 486+ $tok = $this->mCur->value;
 487+ switch( $this->mCur->type ) {
 488+ case AFPToken::TID:
 489+ if( isset( $this->mVars[$tok] ) ) {
 490+ $result = $this->mVars[$tok];
 491+ } else {
 492+ $result = new AFPData();
 493+ }
 494+ break;
 495+ case AFPToken::TString:
 496+ $result = new AFPData( AFPData::DString, $tok );
 497+ break;
 498+ case AFPToken::TNumber:
 499+ $result = new AFPData( AFPData::DNumber, $tok );
 500+ break;
 501+ case AFPToken::TKeyword:
 502+ if( $tok == "true" )
 503+ $result = new AFPData( AFPData::DBool, true );
 504+ elseif( $tok == "false" )
 505+ $result = new AFPData( AFPData::DBool, false );
 506+ elseif( $tok == "null" )
 507+ $result = new AFPData();
 508+ else
 509+ throw new AFPException( "Unexpected {$this->mCur->type} at char {$this->mCur->pos}" );
 510+ break;
 511+ case AFPToken::TBrace:
 512+ if( $this->mCur->value == ')' )
 513+ return; // Handled at the entry level
 514+ default:
 515+ throw new AFPException( "Unexpected {$this->mCur->type} at char {$this->mCur->pos}" );
 516+ }
 517+ $this->move();
 518+ wfProfileOut( __METHOD__ );
 519+ }
 520+
 521+ /* End of levels */
 522+
 523+ public static function parseTokens( $code ) {
 524+ $r = array();
 525+ $len = strlen( $code );
 526+ $hash = md5(trim($code));
 527+
 528+ if (isset(self::$parserCache[$hash])) {
 529+ return self::$parserCache[$hash];
 530+ }
 531+
 532+ while( $tok = self::nextToken( $code, $len ) ) {
 533+ list( $val, $type, $code, $pos ) = $tok;
 534+ $r[] = new AFPToken( $type, $val, $pos );
 535+ if( $type == AFPToken::TNone )
 536+ break;
 537+ }
 538+ return self::$parserCache[$hash] = $r;
 539+ }
 540+
 541+ protected static function nextToken( $code, $len ) {
 542+ $tok = '';
 543+ if( strlen( $code ) == 0 ) return array( '', AFPToken::TNone, $code, $len );
 544+ while( ctype_space( $code[0] ) )
 545+ $code = substr( $code, 1 );
 546+ $pos = $len - strlen( $code );
 547+ if( strlen( $code ) == 0 ) return array( '', AFPToken::TNone, $code, $pos );
 548+ if( $code[0] == ',' )
 549+ return array( ',', AFPToken::TComma, substr( $code, 1 ), $pos );
 550+ if( $code[0] == '(' or $code[0] == ')' )
 551+ return array( $code[0], AFPToken::TBrace, substr( $code, 1 ), $pos );
 552+ if( $code[0] == '"' || $code[0] == "'" ) {
 553+ $type = $code[0];
 554+ $code = substr( $code, 1 );
 555+ while( strlen( $code ) != 0 ) {
 556+ if( $code[0] == $type ) {
 557+ return array( $tok, AFPToken::TString, substr( $code, 1 ), $pos );
 558+ }
 559+ if( $code[0] == '\\' ) {
 560+ if( $code[1] == '\\' )
 561+ $tok .= '\\';
 562+ elseif( $code[1] == $type )
 563+ $tok .= $type;
 564+ elseif( $code[1] == 'n' )
 565+ $tok .= "\n";
 566+ elseif( $code[1] == 'r' )
 567+ $tok .= "\r";
 568+ elseif( $code[1] == 't' )
 569+ $tok .= "\t";
 570+ else
 571+ $tok .= $code[1];
 572+ $code = substr( $code, 2 );
 573+ } else {
 574+ $tok .= $code[0];
 575+ $code = substr( $code, 1 );
 576+ }
 577+ }
 578+ throw new AFPException( "Unclosed string begining at char $pos" );
 579+ }
 580+ if( ctype_punct( $code[0] ) ) {
 581+ $tok .= $code[0];
 582+ $code = substr( $code, 1 );
 583+ while( strlen( $code ) != 0 && ctype_punct( $code[0] ) ) {
 584+ $tok .= $code[0];
 585+ $code = substr( $code, 1 );
 586+ }
 587+ if( !in_array( $tok, self::$mOps ) )
 588+ throw new AFPException( "Invalid operator: {$tok} (at char $pos)" );
 589+ return array( $tok, AFPToken::TOp, $code, $pos );
 590+ }
 591+ if( ctype_digit( $code[0] ) ) {
 592+ $tok .= $code[0];
 593+ $code = substr( $code, 1 );
 594+ while( strlen( $code ) != 0 && self::isDigitOrDot( $code[0] ) ) {
 595+ $tok .= $code[0];
 596+ $code = substr( $code, 1 );
 597+ }
 598+ return array( in_string( '.', $tok ) ? doubleval( $tok ) : intval( $tok ), AFPToken::TNumber, $code, $pos );
 599+ }
 600+ if( self::isValidIdSymbol( $code[0] ) ) {
 601+ while( strlen( $code ) != 0 && self::isValidIdSymbol( $code[0] ) ) {
 602+ $tok .= $code[0];
 603+ $code = substr( $code, 1 );
 604+ }
 605+ $type = in_array( $tok, self::$mKeywords ) ? AFPToken::TKeyword : AFPToken::TID;
 606+ return array( $tok, $type, $code, $pos );
 607+ }
 608+ throw new AFPException( "Unrecognized token \"{$code[0]}\" at char $pos" );
 609+ }
 610+
 611+ protected static function isDigitOrDot( $chr ) {
 612+ return ctype_digit( $chr ) || $chr == '.';
 613+ }
 614+
 615+ protected static function isValidIdSymbol( $chr ) {
 616+ return ctype_alnum( $chr ) || $chr == '_';
 617+ }
 618+
 619+ //Built-in functions
 620+ protected function funcLc( $args ) {
 621+ global $wgContLang;
 622+ if( count( $args ) < 1 )
 623+ throw new AFPExpection( "No params passed to lc()" );
 624+ $s = $args[0]->toString();
 625+ return new AFPData( AFPData::DString, $wgContLang->lc( $s ) );
 626+ }
 627+
 628+ protected function funcLen( $args ) {
 629+ if( count( $args ) < 1 )
 630+ throw new AFPExpection( "No params passed to len()" );
 631+ $s = $args[0]->toString();
 632+ return new AFPData( AFPData::DNumber, mb_strlen( $s, 'utf-8' ) );
 633+ }
 634+
 635+ protected function funcNorm( $args ) {
 636+ if( count( $args ) < 1 )
 637+ throw new AFPExpection( "No params passed to norm()" );
 638+ $s = $args[0]->toString();
 639+ return new AFPData( AFPData::DString, AbuseFilter::normalise( $s ) );
 640+ }
 641+
 642+ protected function funcSimpleNorm( $args ) {
 643+ if( count( $args ) < 1 )
 644+ throw new AFPExpection( "No params passed to simplenorm()" );
 645+ $s = $args[0]->toString();
 646+
 647+ $s = preg_replace( '/[\d\W]+/', '', $s );
 648+ $s = strtolower( $value );
 649+ return new AFPData( AFPData::DString, $s );
 650+ }
 651+
 652+ protected function funcSpecialRatio( $args ) {
 653+ if( count( $args ) < 1 )
 654+ throw new AFPExpection( "No params passed to simplenorm()" );
 655+ $s = $args[0]->toString();
 656+
 657+ if (!strlen($s)) {
 658+ return new AFPData( AFPData::DNumber, 0 );
 659+ }
 660+
 661+ $specialsonly = preg_replace('/\w/', '', $s );
 662+ $val = (strlen($specialsonly) / strlen($s));
 663+
 664+ return new AFPData( AFPData::DNumber, $val );
 665+ }
 666+}
Property changes on: trunk/extensions/AbuseFilter/AbuseFilter.parser.php
___________________________________________________________________
Added: svn:eol-style
1667 + native
Added: svn:executable
2668 + *
Index: trunk/extensions/AbuseFilter/AbuseFilter.php
@@ -28,6 +28,7 @@
2929
3030 $wgAutoloadClasses[ 'AbuseFilter' ] = "$dir/AbuseFilter.class.php";
3131 $wgAutoloadClasses[ 'AbuseFilterParser' ] = "$dir/AbuseFilter.parser.php";
 32+$wgAutoloadClasses[ 'AbuseFilterParserNative' ] = "$dir/AbuseFilter.nativeparser.php";
3233 $wgAutoloadClasses[ 'AbuseFilterHooks' ] = "$dir/AbuseFilter.hooks.php";
3334 $wgAutoloadClasses['SpecialAbuseLog'] = "$dir/SpecialAbuseLog.php";
3435 $wgAutoloadClasses['SpecialAbuseFilter'] = "$dir/SpecialAbuseFilter.php";
@@ -54,4 +55,8 @@
5556
5657 // Disable filters if they match more than X edits, constituting more than Y% of the last Z edits
5758 $wgAbuseFilterEmergencyDisableThreshold = 0.05;
58 -$wgAbuseFilterEmergencyDisableCount = 5;
\ No newline at end of file
 59+$wgAbuseFilterEmergencyDisableCount = 5;
 60+
 61+// Abuse filter parser class
 62+$wgAbuseFilterParserClass = 'AbuseFilterParserNative';
 63+$wgAbuseFilterNativeParser = "$dir/parser_native/af_parser";
\ No newline at end of file
Index: trunk/extensions/AbuseFilter/AbuseFilter.nativeparser.php
@@ -0,0 +1,86 @@
 2+<?php
 3+if ( ! defined( 'MEDIAWIKI' ) )
 4+ die();
 5+
 6+class AbuseFilterParserNative {
 7+ var $mVars;
 8+ var $mProcess,$mPipes;
 9+
 10+ public function __destruct() {
 11+ foreach( $this->mPipes as $pipe ) {
 12+ fclose($pipe);
 13+ }
 14+
 15+ proc_close( $this->mProcess );
 16+ }
 17+
 18+ public function setVar( $name, $var ) {
 19+ $this->mVars[$name] = $var;
 20+ }
 21+
 22+ public function setVars( $vars ) {
 23+ foreach( $vars as $name => $var ) {
 24+ $this->setVar( $name, $var );
 25+ }
 26+ }
 27+
 28+ public function getNativeParser() {
 29+ global $wgAbuseFilterNativeParser;
 30+
 31+ if (!is_resource($this->mProcess)) {
 32+ $this->mPipes = array();
 33+ $descriptorspec = array(
 34+ 0 => array( 'pipe', 'r' ),
 35+ 1 => array( 'pipe', 'w' )
 36+ );
 37+
 38+ $this->mProcess = proc_open( $wgAbuseFilterNativeParser, $descriptorspec, $this->mPipes );
 39+
 40+ if (!is_resource($this->mProcess)) {
 41+ throw new MWException( "Error using native parser" );
 42+ }
 43+
 44+ return $this->mPipes;
 45+ }
 46+
 47+ return $this->mPipes;
 48+ }
 49+
 50+ public function parse( $filter ) {
 51+ $request = $this->generateXMLRequest( $filter );
 52+
 53+ $pipes = $this->getNativeParser();
 54+
 55+ if (is_array($pipes)) {
 56+ fwrite($pipes[0], $request);
 57+ fwrite($pipes[0], "\x04");
 58+ fflush($pipes[0]);
 59+
 60+ // Get response
 61+ $response = trim(fgets( $pipes[1] ));
 62+
 63+ if ($response == "MATCH") {
 64+ return true;
 65+ } elseif ($response == "NOMATCH") {
 66+ return false;
 67+ } else {
 68+ throw new MWException( "Unknown output from native parser: $response" );
 69+ }
 70+ }
 71+ }
 72+
 73+ protected function generateXMLRequest( $filter ) {
 74+ // Write vars
 75+ $vars = '';
 76+ foreach( $this->mVars as $key => $value ) {
 77+ $vars .= Xml::element( 'var', array( 'key' => $key ), utf8_encode($value) );
 78+ }
 79+ $vars = Xml::tags( 'vars', null, $vars );
 80+
 81+ $code = Xml::element( 'rule', null, utf8_encode($filter) );
 82+
 83+ $request = Xml::tags( 'request', null, $vars . $code );
 84+
 85+ return $request;
 86+ }
 87+}
\ No newline at end of file
Property changes on: trunk/extensions/AbuseFilter/AbuseFilter.nativeparser.php
___________________________________________________________________
Added: svn:eol-style
188 + native

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r38187Support Unicode via ICU in parser_nativevasilievvv15:11, 29 July 2008

Status & tagging log