r38832 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r38831‎ | r38832 | r38833 >
Date:03:23, 8 August 2008
Author:river
Status:old
Tags:
Comment:
- remove unused isInVector
- move AFP stuff into its own namespace
- move utf8 and equivset into seperate modules
- parser should understand /* */ comments
Modified paths:
  • /trunk/extensions/AbuseFilter/parser_native/affunctions.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/affunctions.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/aftypes.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/aftypes.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/check.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/eval.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/filter_evaluator.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/filter_evaluator.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/main.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/makefile (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/parser.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/parser.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/request.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/request.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/syntax_check.cpp (modified) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/parser_native/parser.h
@@ -9,21 +9,25 @@
1010
1111 #include "aftypes.h"
1212
 13+namespace afp {
 14+
1315 struct parser_grammar;
1416
1517 struct expressor : boost::noncopyable {
16 - typedef boost::function<AFPData (std::vector<AFPData>)> func_t;
 18+ typedef boost::function<datum (std::vector<datum>)> func_t;
1719
1820 expressor();
1921 ~expressor();
2022
21 - AFPData evaluate(std::string const &expr) const;
 23+ datum evaluate(std::string const &expr) const;
2224
23 - void add_variable(std::string const &name, AFPData value);
 25+ void add_variable(std::string const &name, datum value);
2426 void add_function(std::string const &name, func_t value);
2527
2628 private:
2729 parser_grammar *grammar_;
2830 };
2931
 32+} // namespace afp
 33+
3034 #endif /* !EXPRESSOR_H */
Index: trunk/extensions/AbuseFilter/parser_native/filter_evaluator.cpp
@@ -2,6 +2,8 @@
33 #include "parser.h"
44 #include "affunctions.h"
55
 6+namespace afp {
 7+
68 filter_evaluator::filter_evaluator()
79 {
810 e.add_function("length", af_length);
@@ -26,8 +28,9 @@
2729 }
2830
2931 void
30 -filter_evaluator::add_variable(std::string const &key, AFPData value)
 32+filter_evaluator::add_variable(std::string const &key, datum value)
3133 {
3234 e.add_variable(key, value);
3335 }
3436
 37+} // namespace afp
Index: trunk/extensions/AbuseFilter/parser_native/aftypes.cpp
@@ -1,30 +1,27 @@
2 -#include "aftypes.h"
32 #include <sstream>
43 #include <ios>
54 #include <iostream>
65 #include <cassert>
76 #include <algorithm>
87 #include <cmath>
 8+
99 #include <boost/lexical_cast.hpp>
1010
11 -AFPToken::AFPToken(unsigned int new_type, string new_value, unsigned int new_pos) {
12 - type = new_type;
13 - value = new_value;
14 - pos = new_pos;
15 -}
 11+#include "aftypes.h"
1612
 13+namespace afp {
1714
18 -AFPData::AFPData(std::string const &var) {
 15+datum::datum(std::string const &var) {
1916 _init_from_string(var);
2017 }
2118
22 -AFPData::AFPData(char const *var)
 19+datum::datum(char const *var)
2320 {
2421 _init_from_string(var);
2522 }
2623
2724 void
28 -AFPData::_init_from_string(std::string const &var)
 25+datum::_init_from_string(std::string const &var)
2926 {
3027 // Try integer
3128 try {
@@ -39,35 +36,35 @@
4037 }
4138 }
4239
43 -AFPData::AFPData() {
 40+datum::datum() {
4441 }
4542
46 -AFPData::AFPData(AFPData const &other)
 43+datum::datum(datum const &other)
4744 : value_(other.value_)
4845 {
4946 }
5047
51 -AFPData::AFPData(long int var)
 48+datum::datum(long int var)
5249 : value_(var)
5350 {
5451 }
5552
56 -AFPData::AFPData(double var)
 53+datum::datum(double var)
5754 : value_(var)
5855 {
5956 }
6057
61 -AFPData::AFPData(float var)
 58+datum::datum(float var)
6259 : value_(var)
6360 {
6461 }
6562
66 -AFPData::AFPData(bool var)
 63+datum::datum(bool var)
6764 : value_((long int) var)
6865 {
6966 }
7067
71 -AFPData & AFPData::operator= (AFPData const &other) {
 68+datum & datum::operator= (datum const &other) {
7269 // Protect against self-assignment
7370 if (this == &other) {
7471 return *this;
@@ -77,10 +74,6 @@
7875 return *this;
7976 }
8077
81 -bool isInVector(std::string const &needle, std::vector<std::string> const &haystack) {
82 - return std::find(haystack.begin(), haystack.end(), needle) != haystack.end();
83 -}
84 -
8578 /*
8679 * Convert a string to an integer value.
8780 */
@@ -108,7 +101,7 @@
109102 };
110103
111104 /*
112 - * Conversions from AFPData to other types.
 105+ * Conversions from datum to other types.
113106 */
114107 struct to_string_visitor : boost::static_visitor<std::string> {
115108 std::string operator() (std::string const &v) const {
@@ -156,17 +149,17 @@
157150 };
158151
159152 std::string
160 -AFPData::toString() const {
 153+datum::toString() const {
161154 return boost::apply_visitor(to_string_visitor(), value_);
162155 }
163156
164157 long int
165 -AFPData::toInt() const {
 158+datum::toInt() const {
166159 return boost::apply_visitor(to_int_visitor(), value_);
167160 }
168161
169162 double
170 -AFPData::toFloat() const {
 163+datum::toFloat() const {
171164 return boost::apply_visitor(to_double_visitor(), value_);
172165 }
173166
@@ -213,13 +206,13 @@
214207 * after doing appropriate int->double promotion.
215208 */
216209 template<template<typename V> class Operator>
217 -struct arith_visitor : boost::static_visitor<AFPData> {
 210+struct arith_visitor : boost::static_visitor<datum> {
218211 /*
219212 * Anything involving a double returns a double.
220213 * Otherwise, int is returned.
221214 */
222215 template<typename T, typename U>
223 - AFPData operator() (T const &a, U const &b) const {
 216+ datum operator() (T const &a, U const &b) const {
224217 typedef typename from_string_converter<T>::type a_type;
225218 typedef typename from_string_converter<U>::type b_type;
226219
@@ -272,7 +265,7 @@
273266 * For comparisons that only work on integers - strings will be converted.
274267 */
275268 template<template<typename V> class Operator>
276 -struct arith_compare_visitor : boost::static_visitor<AFPData> {
 269+struct arith_compare_visitor : boost::static_visitor<datum> {
277270 template<typename T, typename U>
278271 bool operator() (T const &a, U const &b) const {
279272 typedef typename from_string_converter<T>::type a_type;
@@ -285,83 +278,83 @@
286279 }
287280 };
288281
289 -AFPData &
290 -AFPData::operator+=(AFPData const &other)
 282+datum &
 283+datum::operator+=(datum const &other)
291284 {
292 - AFPData result = boost::apply_visitor(arith_visitor<std::plus>(), value_, other.value_);
 285+ datum result = boost::apply_visitor(arith_visitor<std::plus>(), value_, other.value_);
293286 *this = result;
294287 return *this;
295288 }
296289
297 -AFPData &
298 -AFPData::operator-=(AFPData const &other)
 290+datum &
 291+datum::operator-=(datum const &other)
299292 {
300 - AFPData result = boost::apply_visitor(arith_visitor<std::minus>(), value_, other.value_);
 293+ datum result = boost::apply_visitor(arith_visitor<std::minus>(), value_, other.value_);
301294 *this = result;
302295 return *this;
303296 }
304297
305 -AFPData &
306 -AFPData::operator*=(AFPData const &other)
 298+datum &
 299+datum::operator*=(datum const &other)
307300 {
308 - AFPData result = boost::apply_visitor(arith_visitor<std::multiplies>(), value_, other.value_);
 301+ datum result = boost::apply_visitor(arith_visitor<std::multiplies>(), value_, other.value_);
309302 *this = result;
310303 return *this;
311304 }
312305
313 -AFPData&
314 -AFPData::operator/=(AFPData const &other)
 306+datum&
 307+datum::operator/=(datum const &other)
315308 {
316 - AFPData result = boost::apply_visitor(arith_visitor<std::divides>(), value_, other.value_);
 309+ datum result = boost::apply_visitor(arith_visitor<std::divides>(), value_, other.value_);
317310 *this = result;
318311 return *this;
319312 }
320313
321 -AFPData&
322 -AFPData::operator%=(AFPData const &other)
 314+datum&
 315+datum::operator%=(datum const &other)
323316 {
324 - AFPData result = boost::apply_visitor(arith_visitor<afpmodulus>(), value_, other.value_);
 317+ datum result = boost::apply_visitor(arith_visitor<afpmodulus>(), value_, other.value_);
325318 *this = result;
326319 return *this;
327320 }
328321
329 -AFPData
330 -operator+(AFPData const &a, AFPData const &b) {
331 - return AFPData(a) += b;
 322+datum
 323+operator+(datum const &a, datum const &b) {
 324+ return datum(a) += b;
332325 }
333326
334 -AFPData
335 -operator-(AFPData const &a, AFPData const &b) {
336 - return AFPData(a) -= b;
 327+datum
 328+operator-(datum const &a, datum const &b) {
 329+ return datum(a) -= b;
337330 }
338331
339 -AFPData
340 -operator*(AFPData const &a, AFPData const &b) {
341 - return AFPData(a) *= b;
 332+datum
 333+operator*(datum const &a, datum const &b) {
 334+ return datum(a) *= b;
342335 }
343336
344 -AFPData
345 -operator/(AFPData const &a, AFPData const &b) {
346 - return AFPData(a) /= b;
 337+datum
 338+operator/(datum const &a, datum const &b) {
 339+ return datum(a) /= b;
347340 }
348341
349 -AFPData
350 -operator%(AFPData const &a, AFPData const &b) {
351 - return AFPData(a) %= b;
 342+datum
 343+operator%(datum const &a, datum const &b) {
 344+ return datum(a) %= b;
352345 }
353346
354347 bool
355 -operator==(AFPData const &a, AFPData const &b) {
 348+operator==(datum const &a, datum const &b) {
356349 return a.compare(b);
357350 }
358351
359352 bool
360 -AFPData::compare(AFPData const &other) const {
 353+datum::compare(datum const &other) const {
361354 return boost::apply_visitor(compare_visitor<std::equal_to>(), value_, other.value_);
362355 }
363356
364357 bool
365 -AFPData::compare_with_type(AFPData const &other) const {
 358+datum::compare_with_type(datum const &other) const {
366359 if (value_.which() != other.value_.which())
367360 return false;
368361
@@ -369,36 +362,38 @@
370363 }
371364
372365 bool
373 -AFPData::less_than(AFPData const &other) const {
 366+datum::less_than(datum const &other) const {
374367 return boost::apply_visitor(arith_compare_visitor<std::less>(), value_, other.value_);
375368 }
376369
377370 bool
378 -operator< (AFPData const &a, AFPData const &b) {
 371+operator< (datum const &a, datum const &b) {
379372 return a.less_than(b);
380373 }
381374
382375 bool
383 -operator<= (AFPData const &a, AFPData const &b) {
 376+operator<= (datum const &a, datum const &b) {
384377 return a.less_than(b) || a == b;
385378 }
386379
387380 bool
388 -operator> (AFPData const &a, AFPData const &b) {
 381+operator> (datum const &a, datum const &b) {
389382 return !(a <= b);
390383 }
391384
392385 bool
393 -operator>= (AFPData const &a, AFPData const &b) {
 386+operator>= (datum const &a, datum const &b) {
394387 return !(a < b);
395388 }
396389
397390 bool
398 -operator!= (AFPData const &a, AFPData const &b) {
 391+operator!= (datum const &a, datum const &b) {
399392 return !(a == b);
400393 }
401394
402395 bool
403 -AFPData::operator! () const {
 396+datum::operator! () const {
404397 return !(int) *this;
405398 }
 399+
 400+} // namespace afp
Index: trunk/extensions/AbuseFilter/parser_native/filter_evaluator.h
@@ -7,15 +7,19 @@
88 #include "aftypes.h"
99 #include "parser.h"
1010
 11+namespace afp {
 12+
1113 struct filter_evaluator {
1214 filter_evaluator();
1315
1416 bool evaluate(std::string const &filter) const;
1517
16 - void add_variable(std::string const &key, AFPData value);
 18+ void add_variable(std::string const &key, datum value);
1719
1820 private:
1921 expressor e;
2022 };
2123
 24+} // namespace afp
 25+
2226 #endif /* !FILTER_EVALUATOR_H */
Index: trunk/extensions/AbuseFilter/parser_native/check.cpp
@@ -2,15 +2,15 @@
33 #include "affunctions.h"
44
55 int main( int argc, char** argv ) {
6 - filter_evaluator f;
 6+ afp::filter_evaluator f;
77
88 bool result = false;
99
1010 for(int i=0;i<=100;i++) {
1111 try {
12 - f.add_variable( "foo", AFPData(string("love")) );
 12+ f.add_variable("foo", afp::datum("love"));
1313 result = f.evaluate( "specialratio('foo;') == 0.25" );
14 - } catch (AFPException* excep) {
 14+ } catch (afp::exception* excep) {
1515 printf( "Exception: %s\n", excep->what() );
1616 }
1717 }
Index: trunk/extensions/AbuseFilter/parser_native/affunctions.cpp
@@ -6,18 +6,18 @@
77 #include <iostream>
88 #include <ctype.h>
99
10 -#include <unicode/utf8.h>
11 -#include <unicode/ustring.h>
 10+#include "utf8.h"
 11+#include "equiv.h"
1212
13 -#define EQUIVSET_LOC "equivset.txt"
 13+namespace afp {
1414
15 -AFPData
16 -af_count(std::vector<AFPData> const &args) {
 15+datum
 16+af_count(std::vector<datum> const &args) {
1717 if (!args.size()) {
18 - throw AFPException( "Not enough arguments to count" );
 18+ throw exception( "Not enough arguments to count" );
1919 }
2020
21 - string needle, haystack;
 21+ std::string needle, haystack;
2222
2323 if (args.size() < 2) {
2424 needle = ",";
@@ -40,52 +40,48 @@
4141 count--;
4242 }
4343
44 - return AFPData((long int)count);
 44+ return datum((long int)count);
4545 }
4646
47 -AFPData
48 -af_norm(vector<AFPData> const &args) {
 47+datum
 48+af_norm(std::vector<datum> const &args) {
4949 if (!args.size()) {
50 - throw AFPException( "Not enough arguments to norm" );
 50+ throw exception( "Not enough arguments to norm" );
5151 }
5252
53 - string orig = args[0].toString();
 53+ std::string orig = args[0].toString();
5454
55 - string::const_iterator p, charStart, end;
56 - int chr = 0,lastchr = 0;
57 - map<int,int> const &equivSet = getEquivSet();
58 - string result;
 55+ std::string::const_iterator p, charStart, end;
 56+ int chr = 0, lastchr = 0;
 57+ equiv_set const &equivs = equiv_set::instance();
 58+ std::string result;
5959
6060 p = orig.begin();
6161 end = orig.end();
6262
63 - while (chr = next_utf8_char( p, charStart, end )) {
64 - std::map<int, int>::const_iterator it;
65 - if ((it = equivSet.find(chr)) != equivSet.end()) {
66 - chr = it->second;
67 - }
 63+ while (chr = utf8::next_utf8_char( p, charStart, end )) {
 64+ chr = equivs.get(chr);
6865
69 - if (chr != lastchr && isalnum(chr)) {
70 - result.append(codepointToUtf8(chr));
71 - }
 66+ if (chr != lastchr && isalnum(chr))
 67+ result.append(utf8::codepoint_to_utf8(chr));
7268
7369 lastchr = chr;
7470 }
7571
76 - return AFPData(result);
 72+ return datum(result);
7773 }
7874
79 -string
 75+std::string
8076 rmdoubles(std::string const &orig) {
81 - string::const_iterator p, charStart, end;
 77+ std::string::const_iterator p, charStart, end;
8278 int chr,lastchr = 0;
83 - string result;
 79+ std::string result;
8480
8581 p = orig.begin();
8682 end = orig.end();
87 - while (chr = next_utf8_char( p, charStart, end )) {
 83+ while (chr = utf8::next_utf8_char( p, charStart, end )) {
8884 if (chr != lastchr) {
89 - result.append(codepointToUtf8(chr));
 85+ result.append(utf8::codepoint_to_utf8(chr));
9086 }
9187
9288 lastchr = chr;
@@ -94,277 +90,108 @@
9591 return result;
9692 }
9793
98 -AFPData
99 -af_specialratio(std::vector<AFPData> const &args) {
 94+datum
 95+af_specialratio(std::vector<datum> const &args) {
10096 if (!args.size()) {
101 - throw AFPException( "Not enough arguments to specialratio" );
 97+ throw exception( "Not enough arguments to specialratio" );
10298 }
10399
104 - string orig = args[0].toString();
105 - string::const_iterator p, charStart, end;
106 - int chr,lastchr = 0;
 100+ std::string orig = args[0].toString();
 101+ std::string::const_iterator p, charStart, end;
 102+ int chr;
107103 int specialcount = 0;
108104
109105 p = orig.begin();
110106 end = orig.end();
111 - while (chr = next_utf8_char( p, charStart, end )) {
 107+ while (chr = utf8::next_utf8_char( p, charStart, end )) {
112108 if (!isalnum(chr)) {
113109 specialcount++;
114110 }
115111 }
116112
117 - double ratio = (float)(specialcount) / (float)(utf8_strlen(orig));
 113+ double ratio = (float)(specialcount) / (float)(utf8::utf8_strlen(orig));
118114
119 - return AFPData(ratio);
 115+ return datum(ratio);
120116 }
121117
122 -AFPData
123 -af_rmspecials(std::vector<AFPData> const &args) {
 118+datum
 119+af_rmspecials(std::vector<datum> const &args) {
124120 if (!args.size()) {
125 - throw AFPException( "Not enough arguments to rmspecials" );
 121+ throw exception( "Not enough arguments to rmspecials" );
126122 }
127123
128 - return AFPData(rmspecials(args[0].toString()));
 124+ return datum(rmspecials(args[0].toString()));
129125 }
130126
131127 std::string
132128 rmspecials(std::string const &orig) {
133 - string::const_iterator p, charStart, end;
 129+ std::string::const_iterator p, charStart, end;
134130 int chr = 0;
135 - string result;
 131+ std::string result;
136132
137133 p = orig.begin();
138134 end = orig.end();
139 - while (chr = next_utf8_char( p, charStart, end )) {
 135+ while (chr = utf8::next_utf8_char( p, charStart, end )) {
140136 if (isalnum(chr)) {
141 - result.append(codepointToUtf8(chr));
 137+ result.append(utf8::codepoint_to_utf8(chr));
142138 }
143139 }
144140
145141 return result;
146142 }
147143
148 -AFPData
149 -af_ccnorm(std::vector<AFPData> const &args) {
 144+datum
 145+af_ccnorm(std::vector<datum> const &args) {
150146 if (!args.size()) {
151 - throw AFPException( "Not enough arguments to ccnorm" );
 147+ throw exception( "Not enough arguments to ccnorm" );
152148 }
153149
154 - return AFPData( confusable_character_normalise( args[0].toString() ) );
 150+ return datum( confusable_character_normalise( args[0].toString() ) );
155151 }
156152
157 -AFPData
158 -af_rmdoubles(std::vector<AFPData> const &args) {
 153+datum
 154+af_rmdoubles(std::vector<datum> const &args) {
159155 if (!args.size()) {
160 - throw AFPException( "Not enough arguments to rmdoubles" );
 156+ throw exception( "Not enough arguments to rmdoubles" );
161157 }
162158
163 - return AFPData(rmdoubles(args[0].toString()));
 159+ return datum(rmdoubles(args[0].toString()));
164160 }
165161
166 -AFPData
167 -af_length(std::vector<AFPData> const &args) {
 162+datum
 163+af_length(std::vector<datum> const &args) {
168164 if (!args.size()) {
169 - throw AFPException( "Not enough arguments to lcase" );
 165+ throw exception( "Not enough arguments to lcase" );
170166 }
171167
172 - return AFPData( (long int)utf8_strlen(args[0].toString()) );
 168+ return datum( (long int)utf8::utf8_strlen(args[0].toString()) );
173169 }
174170
175 -AFPData
176 -af_lcase(std::vector<AFPData> const &args) {
 171+datum
 172+af_lcase(std::vector<datum> const &args) {
177173 if (!args.size()) {
178 - throw AFPException( "Not enough arguments to lcase" );
 174+ throw exception( "Not enough arguments to lcase" );
179175 }
180176
181 - return AFPData(utf8_tolower(args[0].toString()));
 177+ return datum(utf8::utf8_tolower(args[0].toString()));
182178 }
183179
184180 std::string
185181 confusable_character_normalise(std::string const &orig) {
186 - string::const_iterator p, charStart, end;
 182+ std::string::const_iterator p, charStart, end;
187183 int chr;
188 - map<int,int> const &equivSet = getEquivSet();
189 - string result;
 184+ equiv_set const &equivs = equiv_set::instance();
 185+ std::string result;
190186
191187 p = orig.begin();
192188 end = orig.end();
193189
194 - while (chr = next_utf8_char( p, charStart, end )) {
195 - map<int, int>::const_iterator it;
196 - if ((it = equivSet.find(chr)) != equivSet.end()) {
197 - chr = it->second;
198 - }
199 -
200 - result.append(codepointToUtf8(chr));
 190+ while (chr = utf8::next_utf8_char( p, charStart, end )) {
 191+ chr = equivs.get(chr);
 192+ result.append(utf8::codepoint_to_utf8(chr));
201193 }
202194
203195 return result;
204196 }
205197
206 -map<int,int> const &
207 -getEquivSet() {
208 - static map<int,int> equivSet;
209 - // Map of codepoint:codepoint
210 -
211 - if (equivSet.empty()) {
212 - ifstream eqsFile( EQUIVSET_LOC );
213 -
214 - if (!eqsFile) {
215 - throw AFPException( "Unable to open equivalence sets!" );
216 - }
217 -
218 - string line;
219 -
220 - while (getline(eqsFile,line)) {
221 - size_t pos = line.find_first_of( ":", 0 );
222 -
223 - if (pos != line.npos) {
224 - // We have a codepoint:codepoint thing.
225 - int actual = 0;
226 - int canonical = 0;
227 -
228 - istringstream actual_buffer(line.substr(0,pos));
229 - istringstream canon_buffer( line.substr(pos+1));
230 - actual_buffer >> actual;
231 - canon_buffer >> canonical;
232 -
233 - if (actual != 0 && canonical != 0) {
234 - equivSet[actual] = canonical;
235 - }
236 - }
237 - }
238 -
239 - eqsFile.close();
240 - }
241 -
242 - return equivSet;
243 -}
244 -
245 -// Weak UTF-8 decoder
246 -// Will return garbage on invalid input (overshort sequences, overlong sequences, etc.)
247 -// Stolen from wikidiff2 extension by Tim Starling (no point in reinventing the wheel)
248 -int
249 -next_utf8_char(std::string::const_iterator & p, std::string::const_iterator & charStart,
250 - std::string::const_iterator end)
251 -{
252 - int c=0;
253 - unsigned char byte;
254 - int bytes = 0;
255 - charStart = p;
256 - if (p == end) {
257 - return 0;
258 - }
259 - do {
260 - byte = (unsigned char)*p;
261 - if (byte < 0x80) {
262 - c = byte;
263 - bytes = 0;
264 - } else if (byte >= 0xc0) {
265 - // Start of UTF-8 character
266 - // If this is unexpected, due to an overshort sequence, we ignore the invalid
267 - // sequence and resynchronise here
268 - if (byte < 0xe0) {
269 - bytes = 1;
270 - c = byte & 0x1f;
271 - } else if (byte < 0xf0) {
272 - bytes = 2;
273 - c = byte & 0x0f;
274 - } else {
275 - bytes = 3;
276 - c = byte & 7;
277 - }
278 - } else if (bytes) {
279 - c <<= 6;
280 - c |= byte & 0x3f;
281 - --bytes;
282 - } else {
283 - // Unexpected continuation, ignore
284 - }
285 - ++p;
286 - } while (bytes && p != end);
287 - return c;
288 -}
289 -
290 -std::size_t
291 -utf8_strlen(std::string const &s)
292 -{
293 -std::size_t ret = 0;
294 - for (std::string::const_iterator it = s.begin(), end = s.end();
295 - it < end; ++it)
296 - {
297 - int skip = 1;
298 -
299 - skip = U8_LENGTH(*it);
300 - if (it + skip >= end)
301 - return ret; /* end of string */
302 -
303 - it += skip;
304 - }
305 -
306 - return ret;
307 -}
308 -
309 -/*
310 - * This could almost certainly be done in a nicer way.
311 - */
312 -std::string
313 -utf8_tolower(std::string const &s)
314 -{
315 - std::vector<UChar> ustring;
316 - UErrorCode error = U_ZERO_ERROR;
317 -
318 - for (int i = 0; i < s.size(); ) {
319 - UChar32 c;
320 - U8_NEXT(s.data(), i, s.size(), c);
321 - ustring.push_back(c);
322 - }
323 -
324 - std::vector<UChar> dest;
325 - u_strToLower(&dest[0], dest.size(), &ustring[0], ustring.size(),
326 - NULL, &error);
327 -
328 - if (U_FAILURE(error))
329 - return s;
330 -
331 - std::vector<unsigned char> u8string;
332 - int i, j;
333 - for (i = 0, j = 0; i < dest.size(); j++) {
334 - U8_APPEND_UNSAFE(&u8string[0], i, dest[j]);
335 - }
336 - return std::string(u8string.begin(), u8string.begin() + i);
337 -}
338 -
339 -// Ported from MediaWiki core function in PHP.
340 -string
341 -codepointToUtf8(int codepoint) {
342 - string ret;
343 -
344 - if(codepoint < 0x80) {
345 - ret.append(1, codepoint);
346 - return ret;
347 - }
348 -
349 - if(codepoint < 0x800) {
350 - ret.append(1, codepoint >> 6 & 0x3f | 0xc0);
351 - ret.append(1, codepoint & 0x3f | 0x80);
352 - return ret;
353 - }
354 -
355 - if(codepoint < 0x10000) {
356 - ret.append(1, codepoint >> 12 & 0x0f | 0xe0);
357 - ret.append(1, codepoint >> 6 & 0x3f | 0x80);
358 - ret.append(1, codepoint & 0x3f | 0x80);
359 - return ret;
360 - }
361 -
362 - if(codepoint < 0x110000) {
363 - ret.append(1, codepoint >> 18 & 0x07 | 0xf0);
364 - ret.append(1, codepoint >> 12 & 0x3f | 0x80);
365 - ret.append(1, codepoint >> 6 & 0x3f | 0x80);
366 - ret.append(1, codepoint & 0x3f | 0x80);
367 - return ret;
368 - }
369 -
370 - throw AFPException("Asked for code outside of range ($codepoint)\n");
371 -}
 198+} // namespace afp
Index: trunk/extensions/AbuseFilter/parser_native/aftypes.h
@@ -8,71 +8,46 @@
99 #include <boost/variant.hpp>
1010 #include <boost/lexical_cast.hpp>
1111
12 -using namespace std;
 12+namespace afp {
1313
14 -#define T_NONE 0
15 -#define T_ID 1
16 -#define T_KEYWORD 2
17 -#define T_STRING 3
18 -#define T_NUMBER 4
19 -#define T_OP 5
20 -#define T_BRACE 6
21 -#define T_COMMA 7
22 -
23 -#define D_NULL 0
24 -#define D_INTEGER 1
25 -#define D_FLOAT 2
26 -#define D_STRING 3
27 -
28 -#define DATATYPE_MAX 3
29 -
30 -class AFPToken {
 14+class datum {
3115 public:
32 - AFPToken() {}
33 - AFPToken(unsigned int type, string value, unsigned int pos);
34 - unsigned int type;
35 - string value;
36 - unsigned int pos;
37 -};
 16+ datum();
3817
39 -class AFPData {
40 -public:
41 - AFPData();
42 -
4318 /*
4419 * Generic ctor tries to convert to an int.
4520 */
4621 template<typename T>
47 - AFPData(T const &v)
 22+ datum(T const &v)
4823 : value_(boost::lexical_cast<long int>(v))
4924 {
5025 }
5126
5227 // Specific type constructors
53 - AFPData( std::string const &var );
54 - AFPData( char const *var );
55 - AFPData( long int var );
56 - AFPData( float var );
57 - AFPData( double var );
58 - AFPData( bool var );
 28+ datum( std::string const &var );
 29+ datum( char const *var );
 30+ datum( long int var );
 31+ datum( float var );
 32+ datum( double var );
 33+ datum( bool var );
5934
60 - AFPData( const AFPData & oldData );
 35+ datum( const datum & oldData );
6136
6237 // Assignment operator
63 - AFPData &operator= (const AFPData & other);
 38+ datum &operator= (const datum & other);
6439
65 - AFPData &operator+=(AFPData const &other);
66 - AFPData &operator-=(AFPData const &other);
67 - AFPData &operator*=(AFPData const &other);
68 - AFPData &operator/=(AFPData const &other);
69 - AFPData &operator%=(AFPData const &other);
 40+ datum &operator+=(datum const &other);
 41+ datum &operator-=(datum const &other);
 42+ datum &operator*=(datum const &other);
 43+ datum &operator/=(datum const &other);
 44+ datum &operator%=(datum const &other);
7045 bool operator!() const;
7146
72 - bool compare(AFPData const &other) const;
73 - bool compare_with_type(AFPData const &other) const;
74 - bool less_than(AFPData const &other) const;
 47+ bool compare(datum const &other) const;
 48+ bool compare_with_type(datum const &other) const;
 49+ bool less_than(datum const &other) const;
7550
76 - string toString() const;
 51+ std::string toString() const;
7752 long int toInt() const;
7853 double toFloat() const;
7954 bool toBool() const {
@@ -108,38 +83,40 @@
10984 valuetype value_;
11085 };
11186
112 -class AFPException :exception {
113 - public:
114 - const char* what() {return this->s;}
115 - AFPException( const char* str ) {s = str;}
116 - AFPException( string str, string var ) { char* s1 = new char[1024]; sprintf( s1, str.c_str(), var.c_str() ); s = s1; }
117 - AFPException( string str, int var ) { char* s1 = new char[1024]; sprintf( s1, str.c_str(), var ); s = s1; }
118 - AFPException( string str, string svar, int ivar ) { char* s1 = new char[1024]; sprintf( s1, str.c_str(), ivar, svar.c_str() ); s = s1; }
119 -
120 - private:
121 - const char* s;
 87+class exception : std::exception {
 88+public:
 89+ exception(std::string const &what)
 90+ : what_(what) {}
 91+ ~exception() throw() {}
 92+
 93+ char const *what() const throw() {
 94+ return what_.c_str();
 95+ }
 96+
 97+private:
 98+ std::string what_;
12299 };
123100
124 -AFPData operator+(AFPData const &a, AFPData const &b);
125 -AFPData operator-(AFPData const &a, AFPData const &b);
126 -AFPData operator*(AFPData const &a, AFPData const &b);
127 -AFPData operator/(AFPData const &a, AFPData const &b);
128 -AFPData operator%(AFPData const &a, AFPData const &b);
 101+datum operator+(datum const &a, datum const &b);
 102+datum operator-(datum const &a, datum const &b);
 103+datum operator*(datum const &a, datum const &b);
 104+datum operator/(datum const &a, datum const &b);
 105+datum operator%(datum const &a, datum const &b);
129106
130 -bool operator==(AFPData const &a, AFPData const &b);
131 -bool operator!=(AFPData const &a, AFPData const &b);
132 -bool operator<(AFPData const &a, AFPData const &b);
133 -bool operator>(AFPData const &a, AFPData const &b);
134 -bool operator<=(AFPData const &a, AFPData const &b);
135 -bool operator>=(AFPData const &a, AFPData const &b);
 107+bool operator==(datum const &a, datum const &b);
 108+bool operator!=(datum const &a, datum const &b);
 109+bool operator<(datum const &a, datum const &b);
 110+bool operator>(datum const &a, datum const &b);
 111+bool operator<=(datum const &a, datum const &b);
 112+bool operator>=(datum const &a, datum const &b);
136113
137114 template<typename char_type, typename traits>
138115 std::basic_ostream<char_type, traits> &
139 -operator<<(std::basic_ostream<char_type, traits> &s, AFPData const &d) {
 116+operator<<(std::basic_ostream<char_type, traits> &s, datum const &d) {
140117 d.print_to(s);
141118 return s;
142119 }
143120
144 -bool isInVector(std::string const &needle, std::vector<std::string> const &haystack);
 121+} // namespace afp
145122
146123 #endif /* !AFTYPES_H */
Index: trunk/extensions/AbuseFilter/parser_native/syntax_check.cpp
@@ -5,21 +5,22 @@
66
77 #include "filter_evaluator.h"
88
9 -int main( int argc, char** argv ) {
10 - stringbuf ss( ios::in | ios::out );
 9+int main(int argc, char** argv)
 10+{
 11+ std::stringbuf ss( std::ios::in | std::ios::out );
1112
1213 // Fill the stringstream
13 - cin.get(ss,'\x04');
 14+ std::cin.get(ss,'\x04');
1415
15 - string filter = ss.str();
 16+ std::string filter = ss.str();
1617
1718 try {
18 - filter_evaluator f;
 19+ afp::filter_evaluator f;
1920 f.evaluate(filter);
20 - } catch (AFPException excep) {
21 - cout << "PARSERR: " << excep.what() << endl;
22 - exit(0);
 21+ } catch (afp::exception &excep) {
 22+ std::cout << "PARSERR: " << excep.what() << std::endl;
 23+ std::exit(0);
2324 }
2425
25 - cout << "SUCCESS" << endl;
 26+ std::cout << "SUCCESS" << std::endl;
2627 }
Index: trunk/extensions/AbuseFilter/parser_native/affunctions.h
@@ -1,28 +1,26 @@
22 #ifndef AFFUNCTIONS_H
33 #define AFFUNCTIONS_H
44
5 -#include "aftypes.h"
65 #include <map>
76 #include <vector>
87
9 -AFPData af_length(std::vector<AFPData> const &args);
10 -AFPData af_lcase(std::vector<AFPData> const &args);
11 -AFPData af_ccnorm(std::vector<AFPData> const &args);
12 -AFPData af_rmdoubles(std::vector<AFPData> const &args);
13 -AFPData af_specialratio(std::vector<AFPData> const &args);
14 -AFPData af_rmspecials(std::vector<AFPData> const &args);
15 -AFPData af_norm(std::vector<AFPData> const &args);
16 -AFPData af_count(std::vector<AFPData> const &args);
 8+#include "aftypes.h"
179
18 -map<int,int> const &getEquivSet();
19 -int next_utf8_char(std::string::const_iterator & p, std::string::const_iterator & charStart, std::string::const_iterator end);
20 -string codepointToUtf8( int codepoint );
21 -string confusable_character_normalise(std::string const &orig);
22 -vector<AFPData> makeFuncArgList( AFPData arg );
23 -AFPData callFunction(string const &name, AFPData arg);
24 -string rmdoubles(string const &orig);
25 -string rmspecials(string const &orig);
26 -std::size_t utf8_strlen(std::string const &s);
27 -std::string utf8_tolower(std::string const &s);
 10+namespace afp {
2811
 12+datum af_length (std::vector<datum> const &args);
 13+datum af_lcase (std::vector<datum> const &args);
 14+datum af_ccnorm (std::vector<datum> const &args);
 15+datum af_rmdoubles (std::vector<datum> const &args);
 16+datum af_specialratio (std::vector<datum> const &args);
 17+datum af_rmspecials (std::vector<datum> const &args);
 18+datum af_norm (std::vector<datum> const &args);
 19+datum af_count (std::vector<datum> const &args);
 20+
 21+std::string confusable_character_normalise(std::string const &orig);
 22+std::string rmdoubles(std::string const &orig);
 23+std::string rmspecials(std::string const &orig);
 24+
 25+} // namespace afp
 26+
2927 #endif /* !AFFUNCTIONS_H */
Index: trunk/extensions/AbuseFilter/parser_native/main.cpp
@@ -15,7 +15,7 @@
1616
1717 int main( int argc, char** argv ) {
1818 while (true) {
19 - request r;
 19+ afp::request r;
2020 bool result = false;
2121
2222 try {
@@ -35,11 +35,11 @@
3636 }
3737
3838 result = r.evaluate();
39 - } catch (AFPException &excep) {
40 - cerr << "EXCEPTION: " << excep.what() << endl;
 39+ } catch (afp::exception &excep) {
 40+ std::cerr << "EXCEPTION: " << excep.what() << std::endl;
4141 }
4242
43 - cout << ( result ? "MATCH\n" : "NOMATCH\n" );
 43+ std::cout << ( result ? "MATCH\n" : "NOMATCH\n" );
4444 }
4545 }
4646
Index: trunk/extensions/AbuseFilter/parser_native/eval.cpp
@@ -1,25 +1,24 @@
22 #include <cstdlib>
33 #include <iostream>
44 #include <string>
5 -#include <sstream>
6 -#include <map>
75
86 #include "filter_evaluator.h"
97 #include "request.h"
108
11 -int main( int argc, char** argv ) {
12 - request r;
13 - string result;
 9+int main(int argc, char** argv)
 10+{
 11+ afp::request r;
 12+ std::string result;
1413
1514 try {
1615 if (!r.load(std::cin))
1716 return 1;
1817
1918 result = r.evaluate();
20 - } catch (AFPException excep) {
21 - cout << "EXCEPTION: " << excep.what() << endl;
22 - cerr << "EXCEPTION: " << excep.what() << endl;
 19+ } catch (afp::exception &excep) {
 20+ std::cout << "EXCEPTION: " << excep.what() << std::endl;
 21+ std::cerr << "EXCEPTION: " << excep.what() << std::endl;
2322 }
2423
25 - cout << result << "\0";
 24+ std::cout << result << "\0";
2625 }
Index: trunk/extensions/AbuseFilter/parser_native/request.cpp
@@ -1,11 +1,13 @@
22 #include "request.h"
33
 4+namespace afp {
 5+
46 // Protocol:
57 // code NULL <key> NULL <value> NULL ... <value> NULL NULL
68
79 bool
810 request::load(std::istream &inp) {
9 - inp.unsetf(ios_base::skipws);
 11+ inp.unsetf(std::ios_base::skipws);
1012
1113 std::istream_iterator<char> it(inp), p, end;
1214
@@ -55,7 +57,7 @@
5658
5759 it++;
5860
59 - f.add_variable(key, AFPData(value));
 61+ f.add_variable(key, datum(value));
6062 }
6163
6264 return true;
@@ -67,3 +69,4 @@
6870 return f.evaluate(filter);
6971 }
7072
 73+} // namespace afp
Index: trunk/extensions/AbuseFilter/parser_native/makefile
@@ -13,6 +13,8 @@
1414 af_expr-parser.o \
1515 af_expr-filter_evaluator.o \
1616 af_expr-eval.o \
 17+ af_expr-utf8.o \
 18+ af_expr-equiv.o \
1719 af_expr-request.o
1820
1921 af_parser_objs = \
@@ -21,6 +23,8 @@
2224 af_parser-main.o \
2325 af_parser-parser.o \
2426 af_parser-request.o \
 27+ af_parser-utf8.o \
 28+ af_parser-equiv.o \
2529 af_parser-filter_evaluator.o
2630
2731 check_objs = \
@@ -28,6 +32,8 @@
2933 check-aftypes.o \
3034 check-check.o \
3135 check-parser.o \
 36+ check-utf8.o \
 37+ check-equiv.o \
3238 check-filter_evaluator.o
3339
3440 syntax_check_objs = \
@@ -35,6 +41,8 @@
3642 syntax_check-aftypes.o \
3743 syntax_check-filter_evaluator.o \
3844 syntax_check-parser.o \
 45+ syntax_check-utf8.o \
 46+ syntax_check-equiv.o \
3947 syntax_check-syntax_check.o
4048
4149 expr_objs = \
Index: trunk/extensions/AbuseFilter/parser_native/parser.cpp
@@ -8,6 +8,7 @@
99 #include <boost/spirit/phoenix/operators.hpp>
1010 #include <boost/function.hpp>
1111 #include <boost/noncopyable.hpp>
 12+#include <boost/format.hpp>
1213
1314 #include "aftypes.h"
1415 #include "parser.h"
@@ -17,30 +18,32 @@
1819
1920 namespace px = phoenix;
2021
 22+namespace afp {
 23+
2124 struct parse_error : std::runtime_error {
2225 parse_error(char const *what) : std::runtime_error(what) {}
2326 };
2427
25 -struct parser_closure : boost::spirit::closure<parser_closure, AFPData>
 28+struct parser_closure : boost::spirit::closure<parser_closure, datum>
2629 {
2730 member1 val;
2831 };
2932
3033 namespace {
3134
32 -AFPData f_in(AFPData const &a, AFPData const &b)
 35+datum f_in(datum const &a, datum const &b)
3336 {
3437 std::string sa = a, sb = b;
35 - return AFPData(std::search(sb.begin(), sb.end(), sa.begin(), sa.end()) != sb.end());
 38+ return datum(std::search(sb.begin(), sb.end(), sa.begin(), sa.end()) != sb.end());
3639 }
3740
3841 }
3942
4043 struct function_closure : boost::spirit::closure<
4144 function_closure,
42 - AFPData,
43 - boost::function<AFPData (std::vector<AFPData>)>,
44 - std::vector<AFPData> >
 45+ datum,
 46+ boost::function<datum (std::vector<datum>)>,
 47+ std::vector<datum> >
4548 {
4649 member1 val;
4750 member2 func;
@@ -49,14 +52,14 @@
5053
5154 struct parser_grammar : public grammar<parser_grammar, parser_closure::context_t>
5255 {
53 - symbols<AFPData> variables;
54 - symbols<boost::function<AFPData (std::vector<AFPData>)> > functions;
 56+ symbols<datum> variables;
 57+ symbols<boost::function<datum (std::vector<datum>)> > functions;
5558
56 - void add_variable(std::string const &name, AFPData const &value) {
 59+ void add_variable(std::string const &name, datum const &value) {
5760 variables.add(name.c_str(), value);
5861 }
5962
60 - void add_function(std::string const &name, boost::function<AFPData (std::vector<AFPData>)> func) {
 63+ void add_function(std::string const &name, boost::function<datum (std::vector<datum>)> func) {
6164 functions.add(name.c_str(), func);
6265 }
6366
@@ -84,11 +87,11 @@
8588 struct call_function_impl {
8689 template<typename F, typename A>
8790 struct result {
88 - typedef AFPData type;
 91+ typedef datum type;
8992 };
9093
9194 template<typename F, typename A>
92 - AFPData operator() (F const &func, A const &args) const {
 95+ datum operator() (F const &func, A const &args) const {
9396 return func(args);
9497 }
9598 };
@@ -173,9 +176,9 @@
174177 "==" >> eq_expr[eq2_expr.val = eq2_expr.val == arg1]
175178 | "!=" >> eq_expr[eq2_expr.val = eq2_expr.val != arg1]
176179 | "===" >> eq_expr[eq2_expr.val =
177 - bind(&AFPData::compare_with_type)(eq2_expr.val, arg1)]
 180+ bind(&datum::compare_with_type)(eq2_expr.val, arg1)]
178181 | "!==" >> eq_expr[eq2_expr.val =
179 - !bind(&AFPData::compare_with_type)(eq2_expr.val, arg1)]
 182+ !bind(&datum::compare_with_type)(eq2_expr.val, arg1)]
180183 )
181184 ;
182185
@@ -198,7 +201,7 @@
199202 }
200203
201204 rule_t value, variable, basic, bool_expr,
202 - eq_expr, eq2_expr, mult_expr, plus_expr, in_expr, not_expr, expr;
 205+ eq_expr, eq2_expr, mult_expr, plus_expr, in_expr, expr;
203206 rule<ScannerT, function_closure::context_t> function;
204207 };
205208 };
@@ -213,12 +216,13 @@
214217 delete grammar_;
215218 }
216219
217 -AFPData
 220+datum
218221 expressor::evaluate(std::string const &filter) const
219222 {
220 - AFPData ret;
 223+ datum ret;
221224 parse_info<std::string::const_iterator> info =
222 - parse(filter.begin(), filter.end(), (*grammar_)[var(ret) = arg1], space_p);
 225+ parse(filter.begin(), filter.end(), (*grammar_)[var(ret) = arg1],
 226+ comment_p("/*", "*/") | chset<>("\n\t "));
223227 if (info.full) {
224228 return ret;
225229 } else {
@@ -228,7 +232,7 @@
229233 }
230234
231235 void
232 -expressor::add_variable(std::string const &name, AFPData value)
 236+expressor::add_variable(std::string const &name, datum value)
233237 {
234238 grammar_->add_variable(name, value);
235239 }
@@ -239,13 +243,17 @@
240244 grammar_->add_function(name, value);
241245 }
242246
 247+} // namespace afp
 248+
243249 #ifdef TEST_PARSER
244 -AFPData f_add(std::vector<AFPData> const &args)
 250+afp::datum
 251+f_add(std::vector<afp::datum> const &args)
245252 {
246253 return args[0] + args[1];
247254 }
248255
249 -AFPData f_norm(std::vector<AFPData> const &args)
 256+afp::datum
 257+f_norm(std::vector<afp::datum> const &args)
250258 {
251259 return args[0];
252260 }
@@ -253,7 +261,13 @@
254262 int
255263 main(int argc, char **argv)
256264 {
257 - expressor e;
 265+ if (argc != 2) {
 266+ std::cerr << boost::format("usage: %s <expr>\n")
 267+ % argv[0];
 268+ return 1;
 269+ }
 270+
 271+ afp::expressor e;
258272 e.add_variable("ONE", 1);
259273 e.add_variable("TWO", 2);
260274 e.add_variable("THREE", 3);
Index: trunk/extensions/AbuseFilter/parser_native/request.h
@@ -6,6 +6,8 @@
77
88 #include "filter_evaluator.h"
99
 10+namespace afp {
 11+
1012 struct request {
1113 bool load(std::istream &);
1214 bool evaluate(void);
@@ -15,4 +17,6 @@
1618 std::string filter;
1719 };
1820
 21+} // namespace afp
 22+
1923 #endif /* !REQUEST_H */

Status & tagging log