r38779 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r38778‎ | r38779 | r38780 >
Date:13:57, 7 August 2008
Author:river
Status:old
Tags:
Comment:
makefile: cleanups
affunctions: change glib to ICU (untested)
eval.cpp: remote obsolete libxml include
Modified paths:
  • /trunk/extensions/AbuseFilter/parser_native/af_parser (deleted) (history)
  • /trunk/extensions/AbuseFilter/parser_native/affunctions.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/affunctions.h (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/check (deleted) (history)
  • /trunk/extensions/AbuseFilter/parser_native/eval.cpp (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/makefile (modified) (history)
  • /trunk/extensions/AbuseFilter/parser_native/syntax_check (deleted) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/parser_native/syntax_check
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: trunk/extensions/AbuseFilter/parser_native/af_parser
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: trunk/extensions/AbuseFilter/parser_native/check
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: trunk/extensions/AbuseFilter/parser_native/affunctions.cpp
@@ -5,8 +5,10 @@
66 #include <ios>
77 #include <iostream>
88 #include <ctype.h>
9 -#include <glibmm/ustring.h>
109
 10+#include <unicode/utf8.h>
 11+#include <unicode/ustring.h>
 12+
1113 #define EQUIVSET_LOC "equivset.txt"
1214
1315 map<string,AFPFunction> af_functions;
@@ -179,7 +181,7 @@
180182 }
181183 }
182184
183 - double ratio = (float)(specialcount) / (float)(((Glib::ustring)orig).size());
 185+ double ratio = (float)(specialcount) / (float)(utf8_strlen(orig));
184186
185187 return AFPData(ratio);
186188 }
@@ -234,7 +236,7 @@
235237 throw AFPException( "Not enough arguments to lcase" );
236238 }
237239
238 - return AFPData( (long int)((Glib::ustring)args[0].toString()).size() );
 240+ return AFPData( (long int)utf8_strlen(args[0].toString()) );
239241 }
240242
241243 AFPData af_lcase( vector<AFPData> args ) {
@@ -242,10 +244,7 @@
243245 throw AFPException( "Not enough arguments to lcase" );
244246 }
245247
246 - Glib::ustring s = args[0].toString();
247 - string s2 = s.lowercase();
248 -
249 - return AFPData(s);
 248+ return AFPData(utf8_tolower(args[0].toString()));
250249 }
251250
252251 string confusable_character_normalise( string orig ) {
@@ -354,6 +353,66 @@
355354 return c;
356355 }
357356
 357+std::size_t
 358+utf8_strlen(std::string const &s)
 359+{
 360+std::size_t ret = 0;
 361+ for (std::string::const_iterator it = s.begin(), end = s.end();
 362+ it < end; ++it)
 363+ {
 364+ int skip = 1;
 365+
 366+ skip = U8_LENGTH(*it);
 367+#if 0
 368+ if (*it >= 0xc0) {
 369+ if (*it < 0xe0)
 370+ skip = 1;
 371+ else if (*it < 0xf0)
 372+ skip = 2;
 373+ else
 374+ skip = 3;
 375+ } else
 376+ skip = 1;
 377+#endif
 378+
 379+ if (it + skip >= end)
 380+ return ret; /* end of string */
 381+
 382+ it += skip;
 383+ }
 384+
 385+ return ret;
 386+}
 387+
 388+/*
 389+ * This could almost certainly be done in a nicer way.
 390+ */
 391+std::string utf8_tolower(std::string const &s)
 392+{
 393+ std::vector<UChar> ustring;
 394+ UErrorCode error = U_ZERO_ERROR;
 395+
 396+ for (int i = 0; i < s.size(); ) {
 397+ UChar32 c;
 398+ U8_NEXT(s.data(), i, s.size(), c);
 399+ ustring.push_back(c);
 400+ }
 401+
 402+ std::vector<UChar> dest;
 403+ u_strToLower(&dest[0], dest.size(), &ustring[0], ustring.size(),
 404+ NULL, &error);
 405+
 406+ if (U_FAILURE(error))
 407+ return s;
 408+
 409+ std::vector<unsigned char> u8string;
 410+ int i, j;
 411+ for (i = 0, j = 0; i < dest.size(); j++) {
 412+ U8_APPEND_UNSAFE(&u8string[0], i, dest[j]);
 413+ }
 414+ return std::string(u8string.begin(), u8string.begin() + i);
 415+}
 416+
358417 // Ported from MediaWiki core function in PHP.
359418 string codepointToUtf8( int codepoint ) {
360419 string ret;
Index: trunk/extensions/AbuseFilter/parser_native/affunctions.h
@@ -1,3 +1,6 @@
 2+#ifndef AFFUNCTIONS_H
 3+#define AFFUNCTIONS_H
 4+
25 #include "aftypes.h"
36 #include <map>
47 #include <vector>
@@ -18,3 +21,7 @@
1922 AFPData callFunction( string name, AFPData arg );
2023 string rmdoubles( string orig );
2124 string rmspecials( string orig );
 25+std::size_t utf8_strlen(std::string const &s);
 26+std::string utf8_tolower(std::string const &s);
 27+
 28+#endif /* !AFFUNCTIONS_H */
Index: trunk/extensions/AbuseFilter/parser_native/eval.cpp
@@ -1,6 +1,5 @@
22 #include "afeval.h"
33 #include "affunctions.h"
4 -#include <libxml++/libxml++.h>
54 #include <iostream>
65 #include <string>
76 #include <sstream>
Index: trunk/extensions/AbuseFilter/parser_native/makefile
@@ -1,24 +1,41 @@
2 -all: check af_parser syntax_check af_expr
 2+CXX = g++
 3+CXXFLAGS = -O3
 4+BOOST_INCLUDES = /opt/boost/include/boost-1_35
 5+BOOST_LIBS = /opt/boost/lib
 6+BOOST_TAG = -gcc34-mt
 7+ICU_INCLUDES = /opt/icu/include
 8+ICU_LIBS = /opt/icu/lib
39
4 -af_expr: afeval.o affunctions.o afparser.o aftypes.o afutils.o eval.o
5 - g++ -O3 -o af_expr afeval.o affunctions.o afparser.o aftypes.o afutils.o eval.o -lboost_regex -lxml++-2.6 -lxml2 -lglibmm-2.4 -lgobject-2.0 -lsigc-2.0 -lglib-2.0
 10+CPPFLAGS = -I$(BOOST_INCLUDES) -I$(ICU_INCLUDES)
 11+LDFLAGS = -L$(BOOST_LIBS) -L$(ICU_LIBS) -R$(ICU_LIBS) -R$(BOOST_LIBS)
612
7 -af_parser: afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o
8 - g++ -O3 -o af_parser afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o -lboost_regex -lglibmm-2.4
 13+LIBS = -lboost_regex$(BOOST_TAG) -licuuc -licui18n -licudata -licui18n
914
10 -check: afeval.o affunctions.o afparser.o aftypes.o afutils.o check.o
11 - g++ -O3 -o check -lboost_regex afeval.o affunctions.o afparser.o aftypes.o afutils.o check.o -lglibmm-2.4
12 -
13 -syntax_check: afeval.o affunctions.o afparser.o aftypes.o afutils.o syntax_check.o
14 - g++ -O3 -o syntax_check afeval.o affunctions.o afparser.o aftypes.o afutils.o syntax_check.o -lboost_regex -lglibmm-2.4
 15+af_expr_objs = afeval.o affunctions.o afparser.o aftypes.o afutils.o eval.o
 16+af_parser_objs = afeval.o affunctions.o afparser.o aftypes.o afutils.o main.o
 17+check_objs = afeval.o affunctions.o afparser.o aftypes.o afutils.o check.o
 18+syntax_check_objs = afeval.o affunctions.o afparser.o aftypes.o afutils.o syntax_check.o
1519
 20+progs = check af_parser syntax_check af_expr
 21+
 22+all: $(progs)
 23+
 24+af_expr: $(af_expr_objs)
 25+ $(CXX) $(CXXFLAGS) -o $@ $(af_expr_objs) $(LDFLAGS) $(LIBS)
 26+af_parser: $(af_parser_objs)
 27+ $(CXX) $(CXXFLAGS) -o $@ $(af_parser_objs) $(LDFLAGS) $(LIBS)
 28+check: $(check_objs)
 29+ $(CXX) $(CXXFLAGS) -o $@ $(check_objs) $(LDFLAGS) $(LIBS)
 30+syntax_check: $(syntax_check_objs)
 31+ $(CXX) $(CXXFLAGS) -o $@ $(syntax_check_objs) $(LDFLAGS) $(LIBS)
 32+
1633 .cpp.o:
17 - g++ -O3 -c $< -I/usr/include/libxml++-2.6 -I/usr/lib/libxml++-2.6/include -I/usr/include/libxml2 -I/usr/include/glibmm-2.4 -I/usr/lib/glibmm-2.4/include -I/usr/include/sigc++-2.0 -I/usr/lib/sigc++-2.0/include -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include
 34+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $<
1835
1936 clean:
20 - rm -f *.o check af_parser syntax_check
 37+ rm -f *.o $(progs)
2138
2239 clean-final:
23 - rm -f check af_parser syntax_check
 40+ rm -f $(progs)
2441
2542 .SUFFIXES: .cpp .o

Status & tagging log