r39065 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r39064‎ \| r39065 \| r39066 >
Date:	12:53, 10 August 2008
Author:	river
Status:	old
Tags:
Comment:	improved string literal parser. now \xHH works as expected (only matches two digits), and \uHHHH, \UHHHHHHHH are supported (16/32-bit Unicode characters)
Modified paths:	/trunk/extensions/AbuseFilter/parser_native/ast.h (modified) (history) /trunk/extensions/AbuseFilter/parser_native/parser.h (modified) (history)

Diff [purge]

Index: trunk/extensions/AbuseFilter/parser_native/parser.h
—	—	@@ -22,6 +22,7 @@
23	23	#include <boost/spirit/core.hpp>
24	24	#include <boost/spirit/utility/confix.hpp>
25	25	#include <boost/spirit/utility/chset.hpp>
	26	+#include <boost/spirit/utility/loops.hpp>
26	27	#include <boost/spirit/tree/ast.hpp>
27	28	#include <boost/spirit/tree/tree_to_xml.hpp>
28	29	#include <boost/spirit/symbols.hpp>
—	—	@@ -211,13 +212,28 @@
212	213	\| string
213	214	;
214	215
	216	+ hexchar = chset<>("a-fA-F0-9")
	217	+ ;
	218	+
	219	+ octchar = chset<>("0-7")
	220	+ ;
	221	+
	222	+ c_string_char =
	223	+ "\\x" >> hexchar >> hexchar
	224	+ \| "\\u" >> repeat_p(4)[hexchar]
	225	+ \| "\\U" >> repeat_p(8)[hexchar]
	226	+ \| "\\o" >> octchar >> octchar >> octchar
	227	+ \| "\\" >> anychar_p - (ch_p('x') \| 'u' \| 'o')
	228	+ \| anychar_p - (ch_p('"') \| '\\')
	229	+ ;
	230	+
215	231	/*
216	232	* config_p can't be used here, because it will rewrite
217	233	* (c_escape_ch_p[x]) into (c_escape_ch_p)[x]
218	234	*/
219	235	string = inner_node_d[
220	236	'"'
221		~~- >> leaf_node_d[ *(lex_escape_ch_p - '"') ]~~
	237	+ >> leaf_node_d[ *(c_string_char) ]
222	238	>> '"'
223	239	]
224	240	;
—	—	@@ -341,6 +357,7 @@
342	358	return tern_expr;
343	359	}
344	360
	361	+ rule<ScannerT> c_string_char, hexchar, octchar;
345	362	rule<ScannerT, parser_context<>, parser_tag<id_value> > value;
346	363	rule<ScannerT, parser_context<>, parser_tag<id_variable> > variable;
347	364	rule<ScannerT, parser_context<>, parser_tag<id_basic> > basic;
Index: trunk/extensions/AbuseFilter/parser_native/ast.h
—	—	@@ -12,6 +12,32 @@
13	13	#ifndef AST_H
14	14	#define AST_H
15	15
	16	+namespace {
	17	+
	18	+template<typename charT>
	19	+int
	20	+hex2int(charT const *str, int ndigits)
	21	+{
	22	+ int ret = 0;
	23	+
	24	+ while (ndigits--) {
	25	+ ret *= 0x10;
	26	+ if (str >= 'a' && str <= 'f')
	27	+ ret += 10 + int(*str - 'a');
	28	+ else if (str >= 'A' && str <= 'F')
	29	+ ret += 10 + int(*str - 'A');
	30	+ else if (str >= '0' && str <= '9')
	31	+ ret += int(*str - '0');
	32	+
	33	+ str++;
	34	+ }
	35	+
	36	+ std::cerr << "hex2int: " << ret << '\n';
	37	+ return ret;
	38	+}
	39	+
	40	+}
	41	+
16	42	namespace afp {
17	43
18	44	template<typename T> struct parser_grammar;
—	—	@@ -234,6 +260,27 @@
235	261	case 'v':
236	262	ret.push_back('\v');
237	263	break;
	264	+ case 'x':
	265	+ if (i + 3 >= end)
	266	+ break;
	267	+ ret.push_back(hex2int(s.data() + i + 2, 2));
	268	+ i += 2;
	269	+ break;
	270	+
	271	+ case 'u':
	272	+ if (i + 5 >= end)
	273	+ break;
	274	+ ret.push_back(hex2int(s.data() + i + 2, 4));
	275	+ i += 4;
	276	+ break;
	277	+
	278	+ case 'U':
	279	+ if (i + 9 >= end)
	280	+ break;
	281	+ ret.push_back(hex2int(s.data() + i + 2, 8));
	282	+ i += 8;
	283	+ break;
	284	+
238	285	default:
239	286	ret.push_back(s[i + 1]);
240	287	break;

Status & tagging log

15:30, 12 September 2011 Meno25 (talk | contribs) changed the status of r39065 [removed: ok added: old]