r5237 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r5236‎ \| r5237 \| r5238 >
Date:	18:37, 15 September 2004
Author:	timwi
Status:	old
Tags:
Comment:	- TABLES !!! :-) - lots of fixes.
Modified paths:	/trunk/flexbisonparse/f (modified) (history) /trunk/flexbisonparse/fb_defines.h (modified) (history) /trunk/flexbisonparse/parsetree.c (modified) (history) /trunk/flexbisonparse/parsetree.h (modified) (history) /trunk/flexbisonparse/test.txt (modified) (history) /trunk/flexbisonparse/wikilex.l (modified) (history) /trunk/flexbisonparse/wikiparse.y (modified) (history)

Diff [purge]

Index: trunk/flexbisonparse/parsetree.h
—	—	@@ -12,7 +12,8 @@
13	13	typedef enum NodeType {
14	14	Article, Paragraph, Heading, TextBlock, TextToken, ExtensionToken,
15	15	Newlines, PreBlock, PreLine, Bold, Italics, LinkEtc, LinkTarget,
16		~~- LinkOption,~~
	16	+ LinkOption, Table, TableRow, TableCell, TableHead,
	17	+ Attribute, AttributeGroup, Comment /* 20 */,
17	18
18	19	/* After first parse */
19	20	ListBlock, ListLine, ListBullet, ListNumbered,
—	—	@@ -21,36 +22,58 @@
22	23	List, ListItem
23	24	} NodeType;
24	25
25		~~-typedef struct ExtensionDataStruct~~
	26	+typedef struct NameValueStruct
26	27	{
27	28	char * name;
28		~~- char * text;~~
	29	+ char * value;
29	30	}
30		~~-* ExtensionData;~~
	31	+* NameValue;
31	32
	33	+/* During the parsing of table cells, we don't know in advance whether what we are currently
	34	+ * parsing are attributes for the table cell, or the table cell's textual contents. We parse
	35	+ * them as attributes first, but we use AttributeDataStruct to store enough data to allow us
	36	+ * to turn it back into text should we later find out they weren't attributes after all. */
	37	+typedef struct AttributeDataStruct
	38	+{
	39	+ char * name;
	40	+ int type; /* 0 = just an attribute name; 1 = no quotes; 2 = '; 3 = " */
	41	+ int spacesAfterName;
	42	+ int spacesAfterEquals;
	43	+ int spacesAfterValue;
	44	+}
	45	+* AttributeData;
	46	+
32	47	typedef union DataType {
33		~~- char* str;~~
	48	+ char * str;
34	49	int num;
35		~~- ExtensionData ext;~~
	50	+ NameValue nameval;
	51	+ AttributeData attrdata;
36	52	} DataType;
37	53
38	54	typedef struct NodeStruct
39	55	{
40	56	NodeType type;
41	57	DataType data;
42		~~- struct NodeStruct* nextSibling;~~
43		~~- struct NodeStruct* firstChild;~~
	58	+ struct NodeStruct * nextSibling;
	59	+ struct NodeStruct * firstChild;
44	60	}
45	61	* Node;
46	62
47	63	Node newNode (NodeType newType);
48	64	Node newNodeI (NodeType newType, int data);
49	65	Node newNodeS (NodeType newType, char* data);
50		~~-Node newNodeE (NodeType newType, ExtensionData data);~~
	66	+Node newNodeN (NodeType newType, char* name, char* value, int copyname, int copyvalue); /* see NameValueStruct */
51	67
	68	+/* Used by the lexer to create a preliminary AttributeData object. */
	69	+AttributeData newAttributeDataFromStr (char* str);
	70	+
	71	+/* Completes an AttributeData object created by newAttributeDataFromStr */
	72	+Node newNodeA (int t, AttributeData ad, int sae, int sav);
	73	+
52	74	/* Return value of all of these is the first parameter */
53	75	Node nodeAddChild (Node node, Node child);
54	76	#define nodeAddChild2(a,b,c) nodeAddChild (nodeAddChild (a, b), c)
	77	+#define nodeAddChild3(a,b,c,d) nodeAddChild (nodeAddChild (nodeAddChild (a, b), c), d)
55	78	Node nodePrependChild (Node node, Node child);
56	79	Node nodeAddSibling (Node node, Node sibling);
57	80
—	—	@@ -63,26 +86,60 @@
64	87	/* Returns a TextToken, or null if n < 1 */
65	88	Node processEndHeadingInText (int n);
66	89
	90	+/* If 'node' is a paragraph node with no siblings, frees it and returns its child.
	91	+ * (We do this because if a table cell contains only text, we don't want it to
	92	+ * count as a "paragraph".) Otherwise just returns node. */
	93	+Node processTableCellContents (Node node);
	94	+
67	95	/* If a is a TextBlock, adds b to it; if b is a TextBlock, prepends a;
68	96	* if both are a TextBlock, adds b's children to a and frees b;
69	97	* otherwise creates new TextBlock with a and b in it.
70	98	* If any parameter is 0, returns the other. */
71	99	Node makeTextBlock (Node a, Node b);
72	100	#define makeTextBlock2(a,b,c) makeTextBlock (makeTextBlock (a, b), c)
	101	+#define makeTextBlock3(a,b,c,d) makeTextBlock (makeTextBlock (makeTextBlock (a, b), c), d)
73	102
	103	+/* Parameter must be a LinkOption node, optionally with a string of
	104	+ * siblings attached. These will all be freed, and a TextBlock returned. */
	105	+Node convertPipeSeriesToText (Node node);
	106	+
	107	+/* Parameter must be a AttributeGroup node. It and its children will
	108	+ * all be freed, and a TextBlock returned. */
	109	+Node convertAttributesToText (Node node);
	110	+
	111	+/* Parameter will be freed, and a TextBlock returned.
	112	+ * NOTICE: This will process ONLY the attribute name and the spaces after it. */
	113	+Node convertAttributeDataToText (AttributeData data);
	114	+
	115	+/* These all return a TextToken node. */
	116	+Node convertTableRowToText (int info);
	117	+Node convertTableCellToText (int info);
	118	+Node convertTableHeadToText (int info);
	119	+
74	120	/* Parameter must be a TextBlock. Turns something like
75	121	* <italics>X<italics>Y</italics>Z</italics> into
76	122	* <italics>X</italics>Y<italics>Z</italics>. Returns node. */
77	123	Node processNestedItalics (Node node);
78	124
79		~~-ExtensionData newExtensionData (char* name, char* text);~~
	125	+char* outputXML (Node node, int initialBufferSize);
80	126
81		~~-char* outputXML (Node node);~~
	127	+/* To store the output, outputXML() will use a dynamically-growing character buffer (char*).
	128	+ * The following routines manage such a buffer. */
	129	+void fb_create_new_buffer (int size);
	130	+void fb_write_to_buffer (const char* str);
	131	+void fb_write_to_buffer_len (const char* str, int len);
	132	+void fb_write_to_buffer_escaped (char* s);
	133	+char* fb_get_buffer();
82	134
83		~~-/* To store the output, outputXML() will use a dynamically-growing buffer.~~
84		~~- * Normally it will start at a size of 1 KB, but if your input is already,~~
85		~~- * say, 1 MB big, you might want to call this before outputXML() to save a~~
86		~~- * few buffer enlargements. ONLY call this function just before calling~~
87		~~- * outputXML(). */~~
88		~~-void fb_set_buffer_size (int size);~~
	135	+/* More string helper routines ... */
89	136
	137	+/* e.g. addSpaces ("=", 2) => "= " */
	138	+char* addSpaces (char* src, int spaces);
	139	+/* trims only trailing whitespace. Returns its parameter; does not create a new string. */
	140	+char* strtrim (char* src);
	141	+/* like strtrim, but returns the number of spaces removed. */
	142	+int strtrimC (char* src);
	143	+/* same as strtrim except takes a TextToken node */
	144	+Node strtrimN (Node src);
	145	+/* like strtrimN, but returns the number of spaces removed. */
	146	+int strtrimNC (Node src);
Index: trunk/flexbisonparse/f
—	—	@@ -1 +1 @@
2		~~-cat ./test.txt \| ./wiki~~
	2	+./wiki < test.txt
Index: trunk/flexbisonparse/wikiparse.y
—	—	@@ -14,48 +14,81 @@
15	15	#include <stdio.h>
16	16	#include "parsetree.h"
17	17	#include "fb_defines.h"
18		~~-int yyerror() { printf ("Syntax error.\n"); }~~
	18	+int yyerror() { printf ("\n\nSYNTAX ERROR.\n\n"); }
19	19
20	20	Node articlenode;
	21	+int i;
	22	+
21	23	%}
22	24
23	25	/* This defines the type of yylval */
24	26	%union {
25	27	Node node;
	28	+ char* str;
26	29	int num;
	30	+ AttributeData ad;
27	31	}
28		~~-%type <node> article block paragraph heading textorempty zeroormorenewlines oneormorenewlines~~
29		~~- preblock preline bulletlistline numberlistline listseries text listblock~~
	32	+%type <node> article blocks block paragraph heading textorempty zeroormorenewlines preblock
	33	+ oneormorenewlines preline bulletlistline numberlistline listseries listblock
30	34	zeroormorenewlinessave oneormorenewlinessave bulletlistblock numberlistblock
31	35	textelement textelementnoboit textelementnobold textelementnoital italicsorbold
32	36	textnoboit textnobold textnoital boldnoitalics italicsnobold linketc pipeseries
33		~~- TEXT EXTENSION PRELINE~~
34		~~-%type <num> HEADING ENDHEADING~~
	37	+ text attribute attributes tablecells tablecell tablecellcontents tablerows
	38	+ tablerow table comment blocksnotbl blocknotbl textnoitaltbl textnoboldtbl
	39	+ textnoboittbl textnotbl textelementnotbl textelementnoboldtbl textelementnoitaltbl
	40	+ textelementnoboittbl paragraphnotbl linketcnotbl italorboldnotbl boldnoitalicstbl
	41	+ italicsnoboldtbl pipeseriesnotbl
	42	+ TEXT EXTENSION
	43	+%type <ad> ATTRIBUTE
	44	+%type <num> HEADING ENDHEADING TABLEBEGIN TABLECELL TABLEHEAD TABLEROW EQUALS ATTRAPO ATTRQ
35	45
36		~~-%token EXTENSION EMPTYCOMMENT BEGINCOMMENT TEXT ENDCOMMENT OPENLINK OPENDBLSQBR CLOSEDBLSQBR PIPE~~
37		~~- NEWLINE PRELINE LISTBULLET LISTNUMBERED HEADING ENDHEADING APO5 APO3 APO2~~
	46	+%token EXTENSION BEGINCOMMENT TEXT ENDCOMMENT OPENLINK OPENDBLSQBR CLOSEDBLSQBR PIPE
	47	+ NEWLINE PRELINE LISTBULLET LISTNUMBERED HEADING ENDHEADING APO5 APO3 APO2 TABLEBEGIN
	48	+ TABLECELL TABLEHEAD TABLEROW TABLEEND ATTRIBUTE EQUALS ATTRAPO ATTRQ
38	49	// Not yet used:
39	50	OPENPENTUPLECURLY CLOSEPENTUPLECURLY OPENTEMPLATEVAR CLOSETEMPLATEVAR OPENTEMPLATE
40		~~- CLOSETEMPLATE TABLEBEGIN TABLECELL TABLEHEAD TABLEROW TABLEEND~~
	51	+ CLOSETEMPLATE
41	52
42	53	%start article
43	54
44	55	%%
45	56	/* rules */
46	57
	58	+ /* TODO:
	59	+ - optimise zeroormorenewlinessave (no need for Newlines nodes)
	60	+ - find all 'memcpy's and add a 'sizeof (char)' wherever necessary
	61	+
	62	+ UNATTENDED-TO CAVEATS:
	63	+ - a row beginning with TABLEBEGIN but not containing valid table mark-up
	64	+ (e.g. "{\| Hah!" + NEWLINE) is turned into a paragraph of its own even
	65	+ if it and the next line are separated by only one newline (so they should
	66	+ all be one paragraph).
	67	+ */
	68	+
47	69	article : /* empty */ { debugf ("article#1 "); $$ = articlenode = newNode (Article); }
48	70	\| oneormorenewlines { debugf ("article#2 "); $$ = articlenode = newNode (Article); }
49		~~- \| block { debugf ("article#3 "); $$ = articlenode = nodeAddChild (newNode (Article), $1); }~~
50		~~- \| article block { debugf ("article#4 "); $$ = articlenode = nodeAddChild ($1, $2); }~~
	71	+ \| blocks { debugf ("article#3 "); $$ = articlenode = nodeAddChild (newNode (Article), $1); }
51	72
52		~~-block : preblock { debugf ("block#1 "); $$ = processPreBlock ($1); }~~
53		~~- \| heading zeroormorenewlines { debugf ("block#2 "); $$ = $1; }~~
54		~~- \| listblock zeroormorenewlines { debugf ("block#3 "); $$ = $1; }~~
55		~~- \| paragraph zeroormorenewlines { debugf ("block#4 "); $$ = $1; }~~
56		-/*
57		~~- \| table zeroormorenewlines { debugf ("block#5 "); $$ = $1; }~~
58		~~-*/~~
	73	+blocks : block { debugf ("blocks#1 "); $$ = $1; }
	74	+ \| blocks block { debugf ("blocks#2 "); $$ = nodeAddSibling ($1, $2); }
59	75
	76	+blocksnotbl : blocknotbl { debugf ("blocksnotbl#1 "); $$ = $1; }
	77	+ \| blocksnotbl blocknotbl { debugf ("blocksnotbl#2 "); $$ = nodeAddSibling ($1, $2); }
	78	+
	79	+block : preblock { debugf ("block#1 "); $$ = processPreBlock ($1); }
	80	+ \| heading zeroormorenewlines { debugf ("block#2 "); $$ = $1; }
	81	+ \| listblock zeroormorenewlines { debugf ("block#3 "); $$ = $1; }
	82	+ \| paragraph zeroormorenewlines { debugf ("block#4 "); $$ = $1; }
	83	+ \| table zeroormorenewlines { debugf ("block#5 "); $$ = $1; }
	84	+ \| comment zeroormorenewlines { debugf ("block#6 "); $$ = $1; }
	85	+
	86	+blocknotbl : preblock { debugf ("blocknotbl#1 "); $$ = processPreBlock ($1); }
	87	+ \| heading zeroormorenewlines { debugf ("blocknotbl#2 "); $$ = $1; }
	88	+ \| listblock zeroormorenewlines { debugf ("blocknotbl#3 "); $$ = $1; }
	89	+ \| paragraphnotbl zeroormorenewlines { debugf ("blocknotbl#4 "); $$ = $1; }
	90	+ \| table zeroormorenewlines { debugf ("blocknotbl#5 "); $$ = $1; }
	91	+ \| comment zeroormorenewlines { debugf ("blocknotbl#6 "); $$ = $1; }
	92	+
60	93	preblock : preline { debugf ("preblock#1 "); $$ = nodeAddChild (newNode (PreBlock), $1); }
61	94	\| preblock preline { debugf ("preblock#2 "); $$ = nodeAddChild ($1, $2); }
62	95
—	—	@@ -103,66 +136,173 @@
104	137	{ debugf ("linketc#7 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 2), nodeAddChild (newNode (LinkTarget), $2), $3); }
105	138	\| OPENLINK text pipeseries PIPE CLOSEDBLSQBR
106	139	{ debugf ("linketc#8 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 3), nodeAddChild (newNode (LinkTarget), $2), $3); }
	140	+ /* ... and now everything again with the CLOSEDBLSQBR missing,
	141	+ * to take care of invalid mark-up. */
	142	+ \| OPENDBLSQBR text
	143	+ { debugf ("linketc#9 "); $$ = makeTextBlock (newNodeS (TextToken, "[["), $2); }
	144	+ \| OPENDBLSQBR text PIPE
	145	+ { debugf ("linketc#10 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[["), $2, newNodeS (TextToken, "\|")); }
	146	+ \| OPENDBLSQBR text pipeseries
	147	+ { debugf ("linketc#11 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[["), $2, convertPipeSeriesToText ($3)); }
	148	+ \| OPENDBLSQBR text pipeseries PIPE
	149	+ { debugf ("linketc#12 "); $$ = makeTextBlock3 (newNodeS (TextToken, "[["), $2, convertPipeSeriesToText ($3), newNodeS (TextToken, "\|")); }
	150	+ \| OPENLINK text
	151	+ { debugf ("linketc#13 "); $$ = makeTextBlock (newNodeS (TextToken, "[[:"), $2); }
	152	+ \| OPENLINK text PIPE
	153	+ { debugf ("linketc#14 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[[:"), $2, newNodeS (TextToken, "\|")); }
	154	+ \| OPENLINK text pipeseries
	155	+ { debugf ("linketc#15 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[[:"), $2, convertPipeSeriesToText ($3)); }
	156	+ \| OPENLINK text pipeseries PIPE
	157	+ { debugf ("linketc#16 "); $$ = makeTextBlock3 (newNodeS (TextToken, "[[:"), $2, convertPipeSeriesToText ($3), newNodeS (TextToken, "\|")); }
107	158
	159	+linketcnotbl : OPENDBLSQBR textnotbl CLOSEDBLSQBR
	160	+ { debugf ("linketcnotbl#1 "); $$ = nodeAddChild (newNodeI (LinkEtc, 0), nodeAddChild (newNode (LinkTarget), $2)); }
	161	+ \| OPENDBLSQBR textnotbl PIPE CLOSEDBLSQBR
	162	+ { debugf ("linketcnotbl#2 "); $$ = nodeAddChild (newNodeI (LinkEtc, 1), nodeAddChild (newNode (LinkTarget), $2)); }
	163	+ \| OPENDBLSQBR textnotbl pipeseriesnotbl CLOSEDBLSQBR
	164	+ { debugf ("linketcnotbl#3 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 0), nodeAddChild (newNode (LinkTarget), $2), $3); }
	165	+ \| OPENDBLSQBR textnotbl pipeseriesnotbl PIPE CLOSEDBLSQBR
	166	+ { debugf ("linketcnotbl#4 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 1), nodeAddChild (newNode (LinkTarget), $2), $3); }
	167	+ \| OPENLINK textnotbl CLOSEDBLSQBR
	168	+ { debugf ("linketcnotbl#5 "); $$ = nodeAddChild (newNodeI (LinkEtc, 2), nodeAddChild (newNode (LinkTarget), $2)); }
	169	+ \| OPENLINK textnotbl PIPE CLOSEDBLSQBR
	170	+ { debugf ("linketcnotbl#6 "); $$ = nodeAddChild (newNodeI (LinkEtc, 3), nodeAddChild (newNode (LinkTarget), $2)); }
	171	+ \| OPENLINK textnotbl pipeseriesnotbl CLOSEDBLSQBR
	172	+ { debugf ("linketcnotbl#7 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 2), nodeAddChild (newNode (LinkTarget), $2), $3); }
	173	+ \| OPENLINK textnotbl pipeseriesnotbl PIPE CLOSEDBLSQBR
	174	+ { debugf ("linketcnotbl#8 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 3), nodeAddChild (newNode (LinkTarget), $2), $3); }
	175	+ /* ... and now everything again with the CLOSEDBLSQBR missing,
	176	+ * to take care of invalid mark-up. */
	177	+ \| OPENDBLSQBR textnotbl
	178	+ { debugf ("linketcnotbl#9 "); $$ = makeTextBlock (newNodeS (TextToken, "[["), $2); }
	179	+ \| OPENDBLSQBR textnotbl PIPE
	180	+ { debugf ("linketcnotbl#10 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[["), $2, newNodeS (TextToken, "\|")); }
	181	+ \| OPENDBLSQBR textnotbl pipeseriesnotbl
	182	+ { debugf ("linketcnotbl#11 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[["), $2, convertPipeSeriesToText ($3)); }
	183	+ \| OPENDBLSQBR textnotbl pipeseriesnotbl PIPE
	184	+ { debugf ("linketcnotbl#12 "); $$ = makeTextBlock3 (newNodeS (TextToken, "[["), $2, convertPipeSeriesToText ($3), newNodeS (TextToken, "\|")); }
	185	+ \| OPENLINK textnotbl
	186	+ { debugf ("linketcnotbl#13 "); $$ = makeTextBlock (newNodeS (TextToken, "[[:"), $2); }
	187	+ \| OPENLINK textnotbl PIPE
	188	+ { debugf ("linketcnotbl#14 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[[:"), $2, newNodeS (TextToken, "\|")); }
	189	+ \| OPENLINK textnotbl pipeseriesnotbl
	190	+ { debugf ("linketcnotbl#15 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[[:"), $2, convertPipeSeriesToText ($3)); }
	191	+ \| OPENLINK textnotbl pipeseriesnotbl PIPE
	192	+ { debugf ("linketcnotbl#16 "); $$ = makeTextBlock3 (newNodeS (TextToken, "[[:"), $2, convertPipeSeriesToText ($3), newNodeS (TextToken, "\|")); }
	193	+
108	194	pipeseries : PIPE text { debugf ("pipeseries#1 "); $$ = nodeAddChild (newNode (LinkOption), $2); }
109		~~- \| pipeseries PIPE text { debugf ("pipeseries#2 "); $$ = nodeAddSibling ($1, nodeAddChild (newNode (LinkOption), $3)); }~~
	195	+ \| PIPE text pipeseries { debugf ("pipeseries#2 "); $$ = nodeAddSibling (nodeAddChild (newNode (LinkOption), $2), $3); }
110	196
	197	+pipeseriesnotbl : PIPE textnotbl { debugf ("pipeseriesnotbl#1 "); $$ = nodeAddChild (newNode (LinkOption), $2); }
	198	+ \| PIPE textnotbl pipeseriesnotbl { debugf ("pipeseriesnotbl#2 "); $$ = nodeAddSibling (nodeAddChild (newNode (LinkOption), $2), $3); }
	199	+
111	200	textorempty : /* empty */ { debugf ("textorempty#1 "); $$ = newNodeS (TextToken, ""); }
112	201	\| text { debugf ("textorempty#2 "); $$ = $1; }
113	202
114	203	italicsorbold : APO2 textnoital APO2
115		~~- { debugf ("italicsorbold#1 "); $$ = nodeAddChild (newNode (Italics), $2); }~~
	204	+ { debugf ("italicsorbold#1 "); $$ = nodeAddChild (newNode (Italics), $2); }
116	205	\| APO2 textnoital APO3 textnoboit APO5
117	206	{ debugf ("italicsorbold#2 "); $$ = nodeAddChild (newNode (Italics),
118		~~- makeTextBlock ($2, nodeAddChild (newNode (Bold), $4))); }~~
	207	+ makeTextBlock ($2, nodeAddChild (newNode (Bold), $4))); }
119	208	\| APO2 textnoital APO3 textnoboit
120	209	{ debugf ("italicsorbold#3 "); $$ =
121		~~- makeTextBlock2 (nodeAddChild (newNode (Italics), $2), newNodeS (TextToken, "'"), $4); }~~
	210	+ makeTextBlock2 (nodeAddChild (newNode (Italics), $2), newNodeS (TextToken, "'"), $4); }
122	211	\| APO2 textnoital
123		~~- { debugf ("italicsorbold#4 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }~~
	212	+ { debugf ("italicsorbold#4 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }
124	213	\| APO3 textnobold APO3
125		~~- { debugf ("italicsorbold#5 "); $$ = nodeAddChild (newNode (Bold), $2); }~~
	214	+ { debugf ("italicsorbold#5 "); $$ = nodeAddChild (newNode (Bold), $2); }
126	215	\| APO3 textnobold APO2 textnoboit APO5
127	216	{ debugf ("italicsorbold#6 "); $$ = nodeAddChild (newNode (Bold),
128		~~- makeTextBlock ($2, nodeAddChild (newNode (Italics), $4))); }~~
	217	+ makeTextBlock ($2, nodeAddChild (newNode (Italics), $4))); }
129	218	/* Peculiar case, especially for French l'''homme'' => l'<italics>homme</italics> */
130	219	/* We have to use textnobold here, even though textnoital would be logical. */
131	220	/* We use processNestedItalics to fix the weirdness produced by this. */
132	221	\| APO3 textnobold APO2 textnoboit
133	222	{ debugf ("italicsorbold#7 "); $$ = processNestedItalics (makeTextBlock2 (newNodeS
134		~~- (TextToken, "'"), nodeAddChild (newNode (Italics), $2), $4)); }~~
	223	+ (TextToken, "'"), nodeAddChild (newNode (Italics), $2), $4)); }
135	224	\| APO3 textnobold APO2
136	225	{ debugf ("italicsorbold#8 "); $$ = processNestedItalics (makeTextBlock (newNodeS
137		~~- (TextToken, "'"), nodeAddChild (newNode (Italics), $2))); }~~
	226	+ (TextToken, "'"), nodeAddChild (newNode (Italics), $2))); }
138	227	\| APO3 textnobold
139		~~- { debugf ("italicsorbold#9 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }~~
	228	+ { debugf ("italicsorbold#9 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }
140	229	\| APO5 textnoboit APO3 textnoital APO2
141	230	{ debugf ("italicsorbold#10 "); $$ = nodeAddChild (newNode (Italics),
142		~~- makeTextBlock (nodeAddChild (newNode (Bold), $2), $4)); }~~
	231	+ makeTextBlock (nodeAddChild (newNode (Bold), $2), $4)); }
143	232	\| APO5 textnoboit APO2 textnobold APO3
144	233	{ debugf ("italicsorbold#11 "); $$ = nodeAddChild (newNode (Bold),
145		~~- makeTextBlock (nodeAddChild (newNode (Italics), $2), $4)); }~~
	234	+ makeTextBlock (nodeAddChild (newNode (Italics), $2), $4)); }
146	235	\| APO5 textnoboit APO3 textnoital
147	236	{ debugf ("italicsorbold#12 "); $$ = makeTextBlock2 (newNodeS (TextToken, "''"),
148		~~- nodeAddChild (newNode (Bold), $2), $4); }~~
	237	+ nodeAddChild (newNode (Bold), $2), $4); }
149	238	\| APO5 textnoboit APO2 textnobold
150	239	{ debugf ("italicsorbold#13 "); $$ = makeTextBlock2 (newNodeS (TextToken, "'''"),
151		~~- nodeAddChild (newNode (Italics), $2), $4); }~~
	240	+ nodeAddChild (newNode (Italics), $2), $4); }
152	241	\| APO5 textnoboit
153	242	{ debugf ("italicsorbold#14 ");
154		~~- $$ = makeTextBlock (newNodeS (TextToken, "'''''"), $2); }~~
	243	+ $$ = makeTextBlock (newNodeS (TextToken, "'''''"), $2); }
155	244
	245	+italorboldnotbl : APO2 textnoitaltbl APO2
	246	+ { debugf ("italorboldnotbl#1 "); $$ = nodeAddChild (newNode (Italics), $2); }
	247	+ \| APO2 textnoitaltbl APO3 textnoboittbl APO5
	248	+ { debugf ("italorboldnotbl#2 "); $$ = nodeAddChild (newNode (Italics),
	249	+ makeTextBlock ($2, nodeAddChild (newNode (Bold), $4))); }
	250	+ \| APO2 textnoitaltbl APO3 textnoboittbl
	251	+ { debugf ("italorboldnotbl#3 "); $$ =
	252	+ makeTextBlock2 (nodeAddChild (newNode (Italics), $2), newNodeS (TextToken, "'"), $4); }
	253	+ \| APO2 textnoitaltbl
	254	+ { debugf ("italorboldnotbl#4 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }
	255	+ \| APO3 textnoboldtbl APO3
	256	+ { debugf ("italorboldnotbl#5 "); $$ = nodeAddChild (newNode (Bold), $2); }
	257	+ \| APO3 textnoboldtbl APO2 textnoboittbl APO5
	258	+ { debugf ("italorboldnotbl#6 "); $$ = nodeAddChild (newNode (Bold),
	259	+ makeTextBlock ($2, nodeAddChild (newNode (Italics), $4))); }
	260	+ /* Peculiar case, especially for French l'''homme'' => l'<italics>homme</italics> */
	261	+ /* We have to use textnoboldtbl here, even though textnoitaltbl would be logical. */
	262	+ /* We use processNestedItalics to fix the weirdness produced by this. */
	263	+ \| APO3 textnoboldtbl APO2 textnoboittbl
	264	+ { debugf ("italorboldnotbl#7 "); $$ = processNestedItalics (makeTextBlock2 (newNodeS
	265	+ (TextToken, "'"), nodeAddChild (newNode (Italics), $2), $4)); }
	266	+ \| APO3 textnoboldtbl APO2
	267	+ { debugf ("italorboldnotbl#8 "); $$ = processNestedItalics (makeTextBlock (newNodeS
	268	+ (TextToken, "'"), nodeAddChild (newNode (Italics), $2))); }
	269	+ \| APO3 textnoboldtbl
	270	+ { debugf ("italorboldnotbl#9 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }
	271	+ \| APO5 textnoboittbl APO3 textnoitaltbl APO2
	272	+ { debugf ("italorboldnotbl#10 "); $$ = nodeAddChild (newNode (Italics),
	273	+ makeTextBlock (nodeAddChild (newNode (Bold), $2), $4)); }
	274	+ \| APO5 textnoboittbl APO2 textnoboldtbl APO3
	275	+ { debugf ("italorboldnotbl#11 "); $$ = nodeAddChild (newNode (Bold),
	276	+ makeTextBlock (nodeAddChild (newNode (Italics), $2), $4)); }
	277	+ \| APO5 textnoboittbl APO3 textnoitaltbl
	278	+ { debugf ("italorboldnotbl#12 "); $$ = makeTextBlock2 (newNodeS (TextToken, "''"),
	279	+ nodeAddChild (newNode (Bold), $2), $4); }
	280	+ \| APO5 textnoboittbl APO2 textnoboldtbl
	281	+ { debugf ("italorboldnotbl#13 "); $$ = makeTextBlock2 (newNodeS (TextToken, "'''"),
	282	+ nodeAddChild (newNode (Italics), $2), $4); }
	283	+ \| APO5 textnoboittbl
	284	+ { debugf ("italorboldnotbl#14 ");
	285	+ $$ = makeTextBlock (newNodeS (TextToken, "'''''"), $2); }
156	286
157	287	italicsnobold : APO2 textnoboit APO2
158		~~- { debugf ("italicsnobold#1 "); $$ = nodeAddChild (newNode (Italics), $2); }~~
	288	+ { debugf ("italicsnobold#1 "); $$ = nodeAddChild (newNode (Italics), $2); }
159	289	\| APO2 textnoboit
160		~~- { debugf ("italicsnobold#2 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }~~
	290	+ { debugf ("italicsnobold#2 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }
161	291
162	292	boldnoitalics : APO3 textnoboit APO3
163		~~- { debugf ("boldnoitalics#1 "); $$ = nodeAddChild (newNode (Bold), $2); }~~
	293	+ { debugf ("boldnoitalics#1 "); $$ = nodeAddChild (newNode (Bold), $2); }
164	294	\| APO3 textnoboit
165		~~- { debugf ("boldnoitalics#2 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }~~
	295	+ { debugf ("boldnoitalics#2 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }
166	296
	297	+italicsnoboldtbl: APO2 textnoboittbl APO2
	298	+ { debugf ("italicsnobold#1 "); $$ = nodeAddChild (newNode (Italics), $2); }
	299	+ \| APO2 textnoboittbl
	300	+ { debugf ("italicsnobold#2 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }
	301	+
	302	+boldnoitalicstbl: APO3 textnoboittbl APO3
	303	+ { debugf ("boldnoitalics#1 "); $$ = nodeAddChild (newNode (Bold), $2); }
	304	+ \| APO3 textnoboittbl
	305	+ { debugf ("boldnoitalics#2 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }
	306	+
167	307	/* In order to resolve a reduce/reduce conflict correctly, heading must come before textelement. */
168	308	heading : HEADING text ENDHEADING NEWLINE
169	309	{ debugf ("heading#1 "); $$ = nodeAddChild (newNodeI (Heading, $1), $2); }
—	—	@@ -177,6 +317,87 @@
178	318	\| HEADING
179	319	{ debugf ("heading#6 "); $$ = nodeAddChild (newNodeI (Heading, $1), newNodeS (TextToken, "?")); }
180	320
	321	+table : TABLEBEGIN attributes tablerows TABLEEND
	322	+ { debugf ("table#1 "); $$ = nodeAddChild2 (newNode (Table), $2, $3); }
	323	+ \| TABLEBEGIN attributes tablerows
	324	+ { debugf ("table#2 "); $$ = nodeAddChild2 (newNode (Table), $2, $3); }
	325	+ \| TABLEBEGIN attributes oneormorenewlines tablerows TABLEEND
	326	+ { debugf ("table#3 "); $$ = nodeAddChild2 (newNode (Table), $2, $4); }
	327	+ \| TABLEBEGIN attributes oneormorenewlines tablerows
	328	+ { debugf ("table#4 "); $$ = nodeAddChild2 (newNode (Table), $2, $4); }
	329	+ \| TABLEBEGIN tablerows TABLEEND
	330	+ { debugf ("table#5 "); $$ = nodeAddChild (newNode (Table), $2); }
	331	+ \| TABLEBEGIN tablerows
	332	+ { debugf ("table#6 "); $$ = nodeAddChild (newNode (Table), $2); }
	333	+ \| TABLEBEGIN oneormorenewlines tablerows TABLEEND
	334	+ { debugf ("table#7 "); $$ = nodeAddChild (newNode (Table), $3); }
	335	+ \| TABLEBEGIN oneormorenewlines tablerows
	336	+ { debugf ("table#8 "); $$ = nodeAddChild (newNode (Table), $3); }
	337	+ /* and now some invalid mark-up catering ... */
	338	+ \| TABLEBEGIN attributes zeroormorenewlines
	339	+ { debugf ("table#9 "); $$ = nodeAddChild (newNode (Paragraph),
	340	+ makeTextBlock (newNodeS (TextToken, addSpaces ("{\|", $1)),
	341	+ convertAttributesToText ($2))); }
	342	+ \| TABLEBEGIN attributes text zeroormorenewlines
	343	+ { debugf ("table#10 "); $$ = nodeAddChild (newNode (Paragraph),
	344	+ makeTextBlock2 (newNodeS (TextToken, addSpaces ("{\|", $1)),
	345	+ convertAttributesToText ($2), $3)); }
	346	+ \| TABLEBEGIN text zeroormorenewlines
	347	+ { debugf ("table#11 "); $$ = nodeAddChild (newNode (Paragraph),
	348	+ makeTextBlock (newNodeS (TextToken, addSpaces ("{\|", $1)), $3)); }
	349	+
	350	+tablerows : tablerow { debugf ("tablerows#1 "); $$ = $1; }
	351	+ \| tablerows tablerow { debugf ("tablerows#2 "); $$ = nodeAddSibling ($1, $2); }
	352	+
	353	+tablerow : TABLEROW attributes tablecells
	354	+ { debugf ("tablerow#1 "); $$ = nodeAddChild2 (newNode (TableRow), $2, $3); }
	355	+ \| TABLEROW tablecells
	356	+ { debugf ("tablerow#2 "); $$ = nodeAddChild (newNode (TableRow), $2); }
	357	+ \| TABLEROW attributes oneormorenewlines tablecells
	358	+ { debugf ("tablerow#3 "); $$ = nodeAddChild2 (newNode (TableRow), $2, $4); }
	359	+ \| TABLEROW oneormorenewlines tablecells
	360	+ { debugf ("tablerow#4 "); $$ = nodeAddChild (newNode (TableRow), $3); }
	361	+ \| TABLEROW text zeroormorenewlines
	362	+ { debugf ("tablerow#5 "); $$ = nodeAddChild (newNode (TableRow), nodeAddChild (newNode (TableCell), $2)); }
	363	+ \| TABLEROW attributes text zeroormorenewlines
	364	+ { debugf ("tablerow#6 "); $$ = nodeAddChild (newNode (TableRow), nodeAddChild2 (newNode (TableCell), convertAttributesToText ($2), $3)); }
	365	+ \| TABLEROW zeroormorenewlines
	366	+ { debugf ("tablerow#7 "); $$ = 0; }
	367	+ /* It is possible for the first table row to have no TABLEROW token */
	368	+ \| tablecells
	369	+ { debugf ("tablerow#8 "); $$ = nodeAddChild (newNode (TableRow), $1); }
	370	+
	371	+tablecells : tablecell { debugf ("tablecells#1 "); $$ = $1; }
	372	+ \| tablecells tablecell { debugf ("tablecells#2 "); $$ = nodeAddSibling ($1, $2); }
	373	+
	374	+tablecell : TABLECELL attributes PIPE tablecellcontents
	375	+ { debugf ("tablecell#1 "); $$ = nodeAddChild2 (newNode (TableCell), $2, processTableCellContents ($4)); }
	376	+ \| TABLECELL tablecellcontents
	377	+ { debugf ("tablecell#2 "); $$ = nodeAddChild (newNode (TableCell), processTableCellContents ($2)); }
	378	+
	379	+tablecellcontents : blocksnotbl
	380	+ { debugf ("tablecellcontents#1 "); $$ = $1; }
	381	+ \| oneormorenewlines blocksnotbl
	382	+ { debugf ("tablecellcontents#2 "); $$ = $2; }
	383	+
	384	+/* In order to reduce the second one (ATTRIBUTE EQUALS TEXT) correctly, this rule must
	385	+ * be further up than textelement. */
	386	+attribute : ATTRIBUTE
	387	+ { debugf ("attribute#1 "); $$ = newNodeA (0, $1, 0, 0); }
	388	+ \| ATTRIBUTE EQUALS TEXT
	389	+ { debugf ("attribute#2 "); $$ = nodeAddChild (newNodeA (1, $1, $2, strtrimNC ($3)), $3); }
	390	+ \| ATTRIBUTE EQUALS ATTRAPO text ATTRAPO
	391	+ { debugf ("attribute#3 "); $$ = nodeAddChild (newNodeA (2, $1, $2, $5), $4); }
	392	+ \| ATTRIBUTE EQUALS ATTRQ text ATTRQ
	393	+ { debugf ("attribute#4 "); $$ = nodeAddChild (newNodeA (3, $1, $2, $5), $4); }
	394	+ \| ATTRIBUTE EQUALS ATTRQ ATTRQ
	395	+ { debugf ("attribute#5 "); $$ = newNodeA (3, $1, $2, $4); }
	396	+ \| ATTRIBUTE EQUALS
	397	+ { debugf ("attribute#6 "); $$ = newNodeA (1, $1, $2, 0); }
	398	+
	399	+attributes : attribute { debugf ("attributes#1 "); $$ = nodeAddChild (newNode (AttributeGroup), $1); }
	400	+ \| attributes attribute { debugf ("attributes#2 "); $$ = nodeAddChild ($1, $2); }
	401	+
181	402	text : textelement { debugf ("text#1 "); $$ = $1; }
182	403	\| text textelement { debugf ("text#2 "); $$ = makeTextBlock ($1, $2); }
183	404	textnoital : textelementnoital { debugf ("textnoital#1 "); $$ = $1; }
—	—	@@ -185,50 +406,106 @@
186	407	\| textnobold textelementnobold { debugf ("textnobold#2 "); $$ = makeTextBlock ($1, $2); }
187	408	textnoboit : textelementnoboit { debugf ("textnoboit#1 "); $$ = $1; }
188	409	\| textnoboit textelementnoboit { debugf ("textnoboit#2 "); $$ = makeTextBlock ($1, $2); }
	410	+textnotbl : textelementnotbl { debugf ("textnotbl#1 "); $$ = $1; }
	411	+ \| textnotbl textelementnotbl { debugf ("textnotbl#2 "); $$ = makeTextBlock ($1, $2); }
	412	+textnoitaltbl : textelementnoitaltbl { debugf ("textnoitaltbl#1 "); $$ = $1; }
	413	+ \| textnoitaltbl textelementnoitaltbl { debugf ("textnoitaltbl#2 "); $$ = makeTextBlock ($1, $2); }
	414	+textnoboldtbl : textelementnoboldtbl { debugf ("textnoboldtbl#1 "); $$ = $1; }
	415	+ \| textnoboldtbl textelementnoboldtbl { debugf ("textnoboldtbl#2 "); $$ = makeTextBlock ($1, $2); }
	416	+textnoboittbl : textelementnoboittbl { debugf ("textnoboittbl#1 "); $$ = $1; }
	417	+ \| textnoboittbl textelementnoboittbl { debugf ("textnoboittbl#2 "); $$ = makeTextBlock ($1, $2); }
189	418
190	419	textelement : TEXT { debugf ("textelement#1 "); $$ = $1; }
191	420	\| EXTENSION { debugf ("textelement#2 "); $$ = $1; }
192	421	\| PIPE { debugf ("textelement#3 "); $$ = newNodeS (TextToken, "\|"); }
193	422	\| ENDHEADING { debugf ("textelement#4 "); $$ = processEndHeadingInText ($1); }
194		~~- \| italicsorbold { debugf ("textelement#5 "); $$ = $1; }~~
195		~~- \| APO2 { debugf ("textelement#6 "); $$ = newNodeS (TextToken, "''"); }~~
196		~~- \| APO3 { debugf ("textelement#7 "); $$ = newNodeS (TextToken, "'''"); }~~
197		~~- \| APO5 { debugf ("textelement#8 "); $$ = newNodeS (TextToken, "'''''"); }~~
198		~~- \| linketc { debugf ("textelement#9 "); $$ = $1; }~~
	423	+ \| APO2 { debugf ("textelement#5 "); $$ = newNodeS (TextToken, "''"); }
	424	+ \| APO3 { debugf ("textelement#6 "); $$ = newNodeS (TextToken, "'''"); }
	425	+ \| APO5 { debugf ("textelement#7 "); $$ = newNodeS (TextToken, "'''''"); }
	426	+ \| EQUALS { debugf ("textelement#8 "); $$ = newNodeS (TextToken, addSpaces ("=", $1)); }
	427	+ \| TABLEBEGIN { debugf ("textelement#9 "); $$ = newNodeS (TextToken, addSpaces ("{\|", $1)); }
	428	+ \| TABLEEND { debugf ("textelement#10 "); $$ = newNodeS (TextToken, "\|}"); }
	429	+ \| TABLEROW { debugf ("textelement#11 "); $$ = convertTableRowToText ($1); }
	430	+ \| TABLECELL { debugf ("textelement#12 "); $$ = convertTableCellToText ($1); }
	431	+ \| TABLEHEAD { debugf ("textelement#13 "); $$ = convertTableHeadToText ($1); }
	432	+ \| ATTRIBUTE { debugf ("textelement#14 "); $$ = convertAttributeDataToText ($1); }
	433	+ \| comment { debugf ("textelement#15 "); $$ = $1; }
	434	+ \| linketc { debugf ("textelement#16 "); $$ = $1; }
	435	+ \| italicsorbold { debugf ("textelement#17 "); $$ = $1; }
199	436
200	437	textelementnoital : TEXT { debugf ("textelementnoital#1 "); $$ = $1; }
201	438	\| EXTENSION { debugf ("textelementnoital#2 "); $$ = $1; }
202	439	\| PIPE { debugf ("textelementnoital#3 "); $$ = newNodeS (TextToken, "\|"); }
203	440	\| ENDHEADING { debugf ("textelementnoital#4 "); $$ = processEndHeadingInText ($1); }
204		~~- \| boldnoitalics { debugf ("textelementnoital#5 "); $$ = $1; }~~
	441	+ \| TABLEBEGIN { debugf ("textelementnoital#5 "); $$ = newNodeS (TextToken, addSpaces ("{\|", $1)); }
	442	+ \| TABLEEND { debugf ("textelementnoital#6 "); $$ = newNodeS (TextToken, "\|}"); }
	443	+ \| TABLEROW { debugf ("textelementnoital#7 "); $$ = convertTableRowToText ($1); }
	444	+ \| TABLECELL { debugf ("textelementnoital#8 "); $$ = convertTableCellToText ($1); }
	445	+ \| TABLEHEAD { debugf ("textelementnoital#9 "); $$ = convertTableHeadToText ($1); }
	446	+ \| comment { debugf ("textelementnoital#10 "); $$ = $1; }
	447	+ \| linketc { debugf ("textelementnoital#11 "); $$ = $1; }
	448	+ \| boldnoitalics { debugf ("textelementnoital#12 "); $$ = $1; }
205	449
206	450	textelementnobold : TEXT { debugf ("textelementnobold#1 "); $$ = $1; }
207	451	\| EXTENSION { debugf ("textelementnobold#2 "); $$ = $1; }
208	452	\| PIPE { debugf ("textelementnobold#3 "); $$ = newNodeS (TextToken, "\|"); }
209	453	\| ENDHEADING { debugf ("textelementnobold#4 "); $$ = processEndHeadingInText ($1); }
210		~~- \| italicsnobold { debugf ("textelementnobold#5 "); $$ = $1; }~~
	454	+ \| TABLEBEGIN { debugf ("textelementnobold#5 "); $$ = newNodeS (TextToken, addSpaces ("{\|", $1)); }
	455	+ \| TABLEEND { debugf ("textelementnobold#6 "); $$ = newNodeS (TextToken, "\|}"); }
	456	+ \| TABLEROW { debugf ("textelementnobold#7 "); $$ = convertTableRowToText ($1); }
	457	+ \| TABLECELL { debugf ("textelementnobold#8 "); $$ = convertTableCellToText ($1); }
	458	+ \| TABLEHEAD { debugf ("textelementnobold#9 "); $$ = convertTableHeadToText ($1); }
	459	+ \| comment { debugf ("textelementnobold#10 "); $$ = $1; }
	460	+ \| linketc { debugf ("textelementnobold#11 "); $$ = $1; }
	461	+ \| italicsnobold { debugf ("textelementnobold#12 "); $$ = $1; }
211	462
212	463	textelementnoboit : TEXT { debugf ("textelementnoboit#1 "); $$ = $1; }
213	464	\| EXTENSION { debugf ("textelementnoboit#2 "); $$ = $1; }
214	465	\| PIPE { debugf ("textelementnoboit#3 "); $$ = newNodeS (TextToken, "\|"); }
215	466	\| ENDHEADING { debugf ("textelementnoboit#4 "); $$ = processEndHeadingInText ($1); }
	467	+ \| TABLEBEGIN { debugf ("textelementnoboit#5 "); $$ = newNodeS (TextToken, addSpaces ("{\|", $1)); }
	468	+ \| TABLEEND { debugf ("textelementnoboit#6 "); $$ = newNodeS (TextToken, "\|}"); }
	469	+ \| TABLEROW { debugf ("textelementnoboit#7 "); $$ = convertTableRowToText ($1); }
	470	+ \| TABLECELL { debugf ("textelementnoboit#8 "); $$ = convertTableCellToText ($1); }
	471	+ \| TABLEHEAD { debugf ("textelementnoboit#9 "); $$ = convertTableHeadToText ($1); }
	472	+ \| comment { debugf ("textelementnoboit#10 "); $$ = $1; }
	473	+ \| linketc { debugf ("textelementnoboit#11 "); $$ = $1; }
216	474
217		~~-paragraph : text NEWLINE~~
218		~~- { debugf ("paragraph#1 "); $$ = nodeAddChild (newNode (Paragraph), $1); }~~
219		~~- \| text NEWLINE paragraph /* needs to be right-recursive due to eof */~~
220		~~- { debugf ("paragraph#2 "); $$ = nodePrependChild (nodePrependChild ($3,~~
221		~~- newNodeS (TextToken, " ")), $1); }~~
222		~~- \| text /* for eof */~~
223		~~- { debugf ("paragraph#3 "); $$ = nodeAddChild (newNode (Paragraph), $1); }~~
	475	+textelementnotbl : TEXT { debugf ("textelementnotbl#1 "); $$ = $1; }
	476	+ \| EXTENSION { debugf ("textelementnotbl#2 "); $$ = $1; }
	477	+ \| PIPE { debugf ("textelementnotbl#3 "); $$ = newNodeS (TextToken, "\|"); }
	478	+ \| ENDHEADING { debugf ("textelementnotbl#4 "); $$ = processEndHeadingInText ($1); }
	479	+ \| APO2 { debugf ("textelementnotbl#5 "); $$ = newNodeS (TextToken, "''"); }
	480	+ \| APO3 { debugf ("textelementnotbl#6 "); $$ = newNodeS (TextToken, "'''"); }
	481	+ \| APO5 { debugf ("textelementnotbl#7 "); $$ = newNodeS (TextToken, "'''''"); }
	482	+ \| EQUALS { debugf ("textelementnotbl#8 "); $$ = newNodeS (TextToken, addSpaces ("=", $1)); }
	483	+ \| comment { debugf ("textelementnotbl#9 "); $$ = $1; }
	484	+ \| linketcnotbl { debugf ("textelementnotbl#10 "); $$ = $1; }
	485	+ \| italorboldnotbl { debugf ("textelementnotbl#11 "); $$ = $1; }
224	486
225		-/*
226		~~-table : TABLEBEGIN tablerows TABLEEND { debugf ("table#1 "); $$ = $2; }~~
227		~~- \| TABLEBEGIN tablerows /* eof * /{ debugf ("table#2 "); $$ = $2; }~~
	487	+textelementnoitaltbl: TEXT { debugf ("textelementnoitaltbl#1 "); $$ = $1; }
	488	+ \| EXTENSION { debugf ("textelementnoitaltbl#2 "); $$ = $1; }
	489	+ \| PIPE { debugf ("textelementnoitaltbl#3 "); $$ = newNodeS (TextToken, "\|"); }
	490	+ \| ENDHEADING { debugf ("textelementnoitaltbl#4 "); $$ = processEndHeadingInText ($1); }
	491	+ \| comment { debugf ("textelementnoitaltbl#5 "); $$ = $1; }
	492	+ \| linketcnotbl { debugf ("textelementnoitaltbl#6 "); $$ = $1; }
	493	+ \| boldnoitalicstbl{ debugf ("textelementnoitaltbl#7 "); $$ = $1; }
228	494
229		~~-tablerows : tablerow { debugf ("tablerows#1 "); $$ = $1; }~~
230		~~- \| tablerows tablerow { debugf ("tablerows#2 ");~~
231		~~-*/~~
	495	+textelementnoboldtbl: TEXT { debugf ("textelementnoboldtbl#1 "); $$ = $1; }
	496	+ \| EXTENSION { debugf ("textelementnoboldtbl#2 "); $$ = $1; }
	497	+ \| PIPE { debugf ("textelementnoboldtbl#3 "); $$ = newNodeS (TextToken, "\|"); }
	498	+ \| ENDHEADING { debugf ("textelementnoboldtbl#4 "); $$ = processEndHeadingInText ($1); }
	499	+ \| comment { debugf ("textelementnoboldtbl#5 "); $$ = $1; }
	500	+ \| linketcnotbl { debugf ("textelementnoboldtbl#6 "); $$ = $1; }
	501	+ \| italicsnoboldtbl{ debugf ("textelementnoboldtbl#7 "); $$ = $1; }
232	502
	503	+textelementnoboittbl: TEXT { debugf ("textelementnoboittbl#1 "); $$ = $1; }
	504	+ \| EXTENSION { debugf ("textelementnoboittbl#2 "); $$ = $1; }
	505	+ \| PIPE { debugf ("textelementnoboittbl#3 "); $$ = newNodeS (TextToken, "\|"); }
	506	+ \| ENDHEADING { debugf ("textelementnoboittbl#4 "); $$ = processEndHeadingInText ($1); }
	507	+ \| comment { debugf ("textelementnoboittbl#5 "); $$ = $1; }
	508	+ \| linketcnotbl { debugf ("textelementnoboittbl#6 "); $$ = $1; }
	509	+
233	510	zeroormorenewlines : /* empty */ { debugf ("zeroormorenewlines#1 "); $$ = 0; }
234	511	\| oneormorenewlines { debugf ("zeroormorenewlines#2 "); $$ = 0; }
235	512	oneormorenewlines : NEWLINE { debugf ("oneormorenewlines#1 "); $$ = 0; }
—	—	@@ -239,7 +516,52 @@
240	517	oneormorenewlinessave : NEWLINE { debugf ("oneormorenewlinessave#1 "); $$ = newNodeI (Newlines, 0); }
241	518	\| oneormorenewlinessave NEWLINE { debugf ("oneormorenewlinessave#2 "); $1->data.num++; $$ = $1; }
242	519
	520	+paragraph : text NEWLINE
	521	+ { debugf ("paragraph#1 "); $$ = nodeAddChild (newNode (Paragraph), $1); }
	522	+ \| paragraph text NEWLINE
	523	+ { debugf ("paragraph#2 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), $2); }
	524	+ /* for eof ... */
	525	+ \| text
	526	+ { debugf ("paragraph#3 "); $$ = nodeAddChild (newNode (Paragraph), $1); }
	527	+ \| paragraph text
	528	+ { debugf ("paragraph#4 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), $2); }
243	529
	530	+/* This seemingly pointless inclusion of 'attributes' here that will all be converted to text
	531	+ * by way of convertAttributesToText() is necessary because, as a table cell begins, we simply
	532	+ * don't know whether there are attributes following or not. We parse them as attributes first,
	533	+ * but then convert them back to text if it turns out they're not. */
	534	+paragraphnotbl : textnotbl NEWLINE
	535	+ { debugf ("paragraphnotbl#1 "); $$ = nodeAddChild (newNode (Paragraph), $1); }
	536	+ \| attributes textnotbl NEWLINE
	537	+ { debugf ("paragraphnotbl#2 "); $$ = nodeAddChild2 (newNode (Paragraph), convertAttributesToText ($1), $2); }
	538	+ \| attributes NEWLINE
	539	+ { debugf ("paragraphnotbl#3 "); $$ = nodeAddChild (newNode (Paragraph), convertAttributesToText ($1)); }
	540	+ \| paragraphnotbl textnotbl NEWLINE
	541	+ { debugf ("paragraphnotbl#4 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), $2); }
	542	+ \| paragraphnotbl attributes textnotbl NEWLINE
	543	+ { debugf ("paragraphnotbl#5 "); $$ = nodeAddChild3 ($1, newNodeS (TextToken, " "), convertAttributesToText ($2), $3); }
	544	+ \| paragraphnotbl attributes NEWLINE
	545	+ { debugf ("paragraphnotbl#6 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), convertAttributesToText ($2)); }
	546	+ /* for eof ... */
	547	+ \| textnotbl
	548	+ { debugf ("paragraphnotbl#7 "); $$ = nodeAddChild (newNode (Paragraph), $1); }
	549	+ \| attributes textnotbl
	550	+ { debugf ("paragraphnotbl#8 "); $$ = nodeAddChild2 (newNode (Paragraph), convertAttributesToText ($1), $2); }
	551	+ \| attributes
	552	+ { debugf ("paragraphnotbl#9 "); $$ = nodeAddChild (newNode (Paragraph), convertAttributesToText ($1)); }
	553	+ \| paragraphnotbl textnotbl
	554	+ { debugf ("paragraphnotbl#10 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), $2); }
	555	+ \| paragraphnotbl attributes textnotbl
	556	+ { debugf ("paragraphnotbl#11 "); $$ = nodeAddChild3 ($1, newNodeS (TextToken, " "), convertAttributesToText ($2), $3); }
	557	+ \| paragraphnotbl attributes
	558	+ { debugf ("paragraphnotbl#12 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), convertAttributesToText ($2)); }
	559	+
	560	+comment : BEGINCOMMENT text ENDCOMMENT
	561	+ { debugf ("comment#1 "); $$ = nodeAddChild (newNode (Comment), $2); }
	562	+ \| BEGINCOMMENT ENDCOMMENT
	563	+ { debugf ("comment#2 "); $$ = newNode (Comment); }
	564	+
	565	+
244	566	%%
245	567
246	568	/* programs */
—	—	@@ -249,7 +571,7 @@
250	572	printf ("Parsing... ");
251	573	result = yyparse();
252	574	if (!result)
253		~~- printf ("\n\nXML output:\n\n%s\n\n", outputXML (articlenode));~~
	575	+ printf ("\n\nXML output:\n\n%s\n\n", outputXML (articlenode, 1024));
254	576	return result;
255	577	}
256	578
—	—	@@ -264,13 +586,13 @@
265	587	* end with two NULs instead of just one. Thus yy_scan_string is the easiest way for now. */
266	588	yy_scan_string (input);
267	589
268		~~- /* Start with an output buffer twice the size of the input, but at least 1 KB. This should~~
269		~~- * normally be plenty. If it isn't, it will grow automatically. */~~
270		~~- i = 2*strlen (input);~~
271		~~- fb_set_buffer_size (i < 1024 ? 1024 : i);~~
272		-
273	590	result = yyparse();
274	591	if (!result)
275		~~- return outputXML (articlenode);~~
	592	+ {
	593	+ /* Start with an output buffer twice the size of the input, but at least 1 KB. This should
	594	+ * normally be plenty. If it isn't, it will grow automatically. */
	595	+ i = 2*strlen (input);
	596	+ return outputXML (articlenode, i < 1024 ? 1024 : i);
	597	+ }
276	598	return "<error />";
277	599	}
Index: trunk/flexbisonparse/wikilex.l
—	—	@@ -13,6 +13,8 @@
14	14
15	15	#include <stdio.h>
16	16	#include <string.h>
	17	+
	18	+/* This file defines debuglex and debuglex2. */
17	19	#include "fb_defines.h"
18	20
19	21	/* Notice: We need to include parsetree.h first because wikiparse.tab.h will
—	—	@@ -20,27 +22,37 @@
21	23	#include "parsetree.h"
22	24	#include "wikiparse.tab.h"
23	25
	26	+/* Tells flex to stop processing input when EOF is reached. */
24	27	int yywrap(void) { return 1; }
25	28
	29	+int encodeTableRowInfo (char* input, int initleng)
	30	+{
	31	+ int i = 1;
	32	+ while (input[i] == '-') i++;
	33	+ return ((i-1)*0x10000 + (initleng-i));
	34	+}
	35	+
	36	+#define COMMONTOKENS \
	37	+ if (YY_START != inattributeapo && YY_START != inattributeq && YY_START != canbeheading) \
	38	+ BEGIN (cannotbelistorheadingorpre);
	39	+
26	40	%}
27	41
	42	+/* inclusive start conditions */
28	43	%s canbelist canbeheading cannotbelistorheadingorpre attributes
	44	+%s inattributeapo inattributeq startattribute
	45	+
	46	+/* exclusive start conditions */
29	47	%x extension comment
30	48
31	49	%%
32	50
33		~~- /* We will use the same buffer every time to accumulate the contents of an~~
34		~~- * extension token. newExtensionData() will copy the data to a new string */~~
35		~~- char extension_buf [ 65536 ];~~
36		~~- char * extension_buf_ptr;~~
37		~~- char * extension_name = 0;~~
38		~~- ExtensionData ed;~~
39		~~- Node node;~~
	51	+ char* extension_name = 0;
40	52	int i;
41	53
42	54
43	55	"<"[[:alnum:]]+">" {
44		~~- extension_buf_ptr = extension_buf;~~
	56	+ fb_create_new_buffer (256); /* for the contents */
45	57	i = strlen (yytext)-1;
46	58	extension_name = (char) malloc (i sizeof (char));
47	59	memcpy (extension_name, yytext + 1, --i);
—	—	@@ -50,55 +62,77 @@
51	63	<extension>"</"[[:alnum:]]+">" {
52	64	i = strlen (extension_name);
53	65	if (strncmp (extension_name, yytext+2, i))
54		~~- {~~
55		~~- memcpy (extension_buf_ptr, yytext, yyleng);~~
56		~~- extension_buf_ptr += yyleng;~~
57		~~- }~~
	66	+ fb_write_to_buffer (yytext);
58	67	else
59	68	{
60		~~- *extension_buf_ptr = '\0';~~
61		~~- BEGIN (INITIAL);~~
62		~~- yylval.node = newNodeE (ExtensionToken,~~
63		~~- newExtensionData (extension_name, extension_buf));~~
	69	+ BEGIN (cannotbelistorheadingorpre);
	70	+ yylval.node = newNodeN (ExtensionToken,
	71	+ extension_name, fb_get_buffer(), 0, 0);
64	72	debuglex ("EXTENSION ");
65	73	return EXTENSION;
66	74	}
67	75	}
68		~~-<extension>. { *extension_buf_ptr++ = yytext[0]; }~~
	76	+<extension>.[^<>]* { fb_write_to_buffer (yytext); }
69	77	<extension><<EOF>> {
70		~~- *extension_buf_ptr = '\0';~~
71		~~- yylval.node = newNodeE (ExtensionToken,~~
72		~~- newExtensionData (extension_name, extension_buf));~~
	78	+ BEGIN (cannotbelistorheadingorpre);
	79	+ yylval.node = newNodeN (ExtensionToken,
	80	+ extension_name, fb_get_buffer(), 0, 0);
73	81	debuglex ("EXTENSION ");
74		~~- BEGIN (INITIAL);~~
75	82	return EXTENSION;
76	83	}
77	84
78		~~-"<!---->" { BEGIN (cannotbelistorheadingorpre); debuglex ("EMPTYCOMMENT "); return EMPTYCOMMENT; }~~
79	85	"<!--" { BEGIN (comment); debuglex ("BEGINCOMMENT "); return BEGINCOMMENT; }
80	86	<comment>.[^-]* { debuglex ("TEXT "); yylval.node = newNodeS (TextToken, strdup (yytext)); return TEXT; }
81	87	<comment>"-->" { BEGIN (cannotbelistorheadingorpre); debuglex ("ENDCOMMENT "); return ENDCOMMENT; }
82	88
83		~~-"\[\[:" { BEGIN (cannotbelistorheadingorpre); debuglex ("OPENLINK "); return OPENLINK; }~~
84		~~-"\[\[" { BEGIN (cannotbelistorheadingorpre); debuglex ("OPENDBLSQBR "); return OPENDBLSQBR; }~~
85		~~-"\]\]" { BEGIN (cannotbelistorheadingorpre); debuglex ("CLOSEDBLSQBR "); return CLOSEDBLSQBR; }~~
86		~~-\\| { BEGIN (cannotbelistorheadingorpre); debuglex ("PIPE "); return PIPE; }~~
87		~~-\{\{\{\{\{ { BEGIN (cannotbelistorheadingorpre); debuglex ("OPENPENTUPLECURLY "); return OPENPENTUPLECURLY; }~~
88		~~-\}\}\}\}\} { BEGIN (cannotbelistorheadingorpre); debuglex ("CLOSEPENTUPLECURLY "); return CLOSEPENTUPLECURLY; }~~
89		~~-\{\{\{ { BEGIN (cannotbelistorheadingorpre); debuglex ("OPENTEMPLATEVAR "); return OPENTEMPLATEVAR; }~~
90		~~-\}\}\} { BEGIN (cannotbelistorheadingorpre); debuglex ("CLOSETEMPLATEVAR "); return CLOSETEMPLATEVAR; }~~
91		~~-\{\{ { BEGIN (cannotbelistorheadingorpre); debuglex ("OPENTEMPLATE "); return OPENTEMPLATE; }~~
92		~~-\}\} { BEGIN (cannotbelistorheadingorpre); debuglex ("CLOSETEMPLATE "); return CLOSETEMPLATE; }~~
93		~~-'''''/[^'] { BEGIN (cannotbelistorheadingorpre); debuglex ("APO5 "); return APO5; }~~
94		~~-'''/[^'] { BEGIN (cannotbelistorheadingorpre); debuglex ("APO3 "); return APO3; }~~
95		~~-''/[^'] { BEGIN (cannotbelistorheadingorpre); debuglex ("APO2 "); return APO2; }~~
	89	+ /* For the table-related tokens, we need to remember enough information so that we can
	90	+ * reliably turn things back into text. */
	91	+"{\|"" "* { BEGIN(attributes); debuglex ("TABLEBEGIN "); yylval.num = yyleng-2; return TABLEBEGIN; }
	92	+"\|\|"" "* { yylval.num = 2*(yyleng-2); BEGIN(attributes); debuglex2 ("TABLECELL(%u) ", yylval.num); return TABLECELL; }
	93	+^"\|"" "* { yylval.num = 2*(yyleng-1)+1; BEGIN(attributes); debuglex2 ("TABLECELL(%u) ", yylval.num); return TABLECELL; }
	94	+"!!"" "* { BEGIN(attributes); debuglex ("TABLEHEAD "); yylval.num = 2*(yyleng-2); return TABLEHEAD; }
	95	+^"!"" "* { BEGIN(attributes); debuglex ("TABLEHEAD "); yylval.num = 2*(yyleng-1)+1; return TABLEHEAD; }
	96	+"\|""-"+" "* { BEGIN(attributes); debuglex ("TABLEROW "); yylval.num = encodeTableRowInfo (yytext, yyleng); return TABLEROW; }
	97	+"\|}" { BEGIN(cannotbelistorheadingorpre); debuglex ("TABLEEND "); return TABLEEND; }
	98	+
	99	+<attributes>[-a-zA-Z:_]+" "* {
	100	+ debuglex2 ("ATTRIBUTE(%s) ", yytext);
	101	+ yylval.ad = newAttributeDataFromStr (yytext);
	102	+ return ATTRIBUTE;
	103	+ }
	104	+<attributes>"="" "* {
	105	+ debuglex2 ("EQUALS(%d) ", yyleng-1);
	106	+ yylval.num = yyleng-1;
	107	+ BEGIN (startattribute);
	108	+ return EQUALS;
	109	+ }
	110	+
	111	+<startattribute>\' { BEGIN (inattributeapo); yylval.num = 0; debuglex ("ATTRAPO(0) "); return ATTRAPO; }
	112	+<startattribute>\" { BEGIN (inattributeq); yylval.num = 0; debuglex ("ATTRQ(0) "); return ATTRQ; }
	113	+<inattributeapo>\'" "* { BEGIN (attributes); yylval.num = yyleng-1; debuglex2 ("ATTRAPO(%d) ", yyleng-1); return ATTRAPO; }
	114	+<inattributeq>\"" "* { BEGIN (attributes); yylval.num = yyleng-1; debuglex2 ("ATTRQ(%d) ", yyleng-1); return ATTRQ; }
	115	+
	116	+"\[\[:" { COMMONTOKENS; debuglex ("OPENLINK "); return OPENLINK; }
	117	+"\[\[" { COMMONTOKENS; debuglex ("OPENDBLSQBR "); return OPENDBLSQBR; }
	118	+"\]\]" { COMMONTOKENS; debuglex ("CLOSEDBLSQBR "); return CLOSEDBLSQBR; }
	119	+\\| { COMMONTOKENS; debuglex ("PIPE "); return PIPE; }
	120	+\{\{\{\{\{ { COMMONTOKENS; debuglex ("OPENPENTUPLECURLY "); return OPENPENTUPLECURLY; }
	121	+\}\}\}\}\} { COMMONTOKENS; debuglex ("CLOSEPENTUPLECURLY "); return CLOSEPENTUPLECURLY; }
	122	+\{\{\{ { COMMONTOKENS; debuglex ("OPENTEMPLATEVAR "); return OPENTEMPLATEVAR; }
	123	+\}\}\} { COMMONTOKENS; debuglex ("CLOSETEMPLATEVAR "); return CLOSETEMPLATEVAR; }
	124	+\{\{ { COMMONTOKENS; debuglex ("OPENTEMPLATE "); return OPENTEMPLATE; }
	125	+\}\} { COMMONTOKENS; debuglex ("CLOSETEMPLATE "); return CLOSETEMPLATE; }
	126	+'''''/[^'] { COMMONTOKENS; debuglex ("APO5 "); return APO5; }
	127	+'''/[^'] { COMMONTOKENS; debuglex ("APO3 "); return APO3; }
	128	+''/[^'] { COMMONTOKENS; debuglex ("APO2 "); return APO2; }
96	129	\n { BEGIN (INITIAL); debuglex ("NEWLINE\n"); return NEWLINE; }
	130	+\r { /* ignore this one */ debuglex ("<13> "); }
97	131
98	132	^" " { BEGIN(cannotbelistorheadingorpre); debuglex ("PRELINE "); return PRELINE; }
99		~~-^\[[:space:]] { BEGIN(canbelist); debuglex ("LISTBULLET "); return LISTBULLET; }~~
100		~~-<canbelist>\[[:space:]] { debuglex ("LISTBULLET "); return LISTBULLET; }~~
101		~~-^\#[[:space:]]* { BEGIN(canbelist); debuglex ("LISTNUMBERED "); return LISTNUMBERED; }~~
102		~~-<canbelist>\#[[:space:]]* { debuglex ("LISTNUMBERED "); return LISTNUMBERED; }~~
	133	+^\[ \t] { BEGIN(canbelist); debuglex ("LISTBULLET "); return LISTBULLET; }
	134	+<canbelist>\[ \t] { debuglex ("LISTBULLET "); return LISTBULLET; }
	135	+^\#[ \t]* { BEGIN(canbelist); debuglex ("LISTNUMBERED "); return LISTNUMBERED; }
	136	+<canbelist>\#[ \t]* { debuglex ("LISTNUMBERED "); return LISTNUMBERED; }
103	137
104	138	^"="+ {
105	139	BEGIN (canbeheading);
—	—	@@ -114,24 +148,28 @@
115	149	return ENDHEADING;
116	150	}
117	151
118		~~-"{\|"" "* { BEGIN(attributes); debuglex ("TABLEBEGIN "); return TABLEBEGIN; }~~
119		~~-"\|\|"" "/[^\\| ][^\\|]"\|"[^\\|] { BEGIN(attributes); debuglex ("TABLECELL "); return TABLECELL; }~~
120		~~-"\|\|"" "* { BEGIN(cannotbelistorheadingorpre); debuglex ("TABLECELL "); return TABLECELL; }~~
121		~~-"!!"" "/[^\! ][^\!]"!"[^\!] { BEGIN(attributes); debuglex ("TABLEHEAD "); return TABLEHEAD; }~~
122		~~-"!!"" "* { BEGIN(cannotbelistorheadingorpre); debuglex ("TABLEHEAD "); return TABLEHEAD; }~~
	152	+<cannotbelistorheadingorpre,canbeheading>[^\\|\r\n][^\<\>\[\]\{\}\r\n\'\\|\=\!]* \|
123	153
124		~~-"\|""-"+" "* { BEGIN(attributes); debuglex ("TABLEROW "); return TABLEROW; }~~
125		~~-"\|}"" "* { BEGIN(cannotbelistorheadingorpre); debuglex ("TABLEEND "); return TABLEEND; }~~
	154	+<inattributeapo>[^\'\\|\r\n][^\<\>\[\]\{\}\r\n\'\\|\=\!]* \|
126	155
	156	+<inattributeq>[^\"\\|\r\n][^\<\>\[\]\{\}\r\n\'\"\\|\=\!]* {
	157	+ yylval.node = newNodeS (TextToken, strdup (yytext));
	158	+ debuglex2 ("TEXT(%s) ", yytext);
	159	+ return TEXT; }
127	160
128		~~-<cannotbelistorheadingorpre,canbeheading>[^\\|\n][^\<\>\[\]\{\}\n\'\\|\=]* {~~
	161	+<canbelist>[^ \\|\\#\r\n][^\<\>\[\]\{\}\r\n\'\\|\!] \|
	162	+
	163	+<attributes>[^-a-zA-Z:_\r\n\\|\=][^\<\>\[\]\{\}\r\n\'\\|\!]* \|
	164	+
	165	+<INITIAL>[^ \\|\\#\r\n\=][^\<\>\[\]\{\}\r\n\'\\|\=\!] {
	166	+ BEGIN(cannotbelistorheadingorpre);
129	167	yylval.node = newNodeS (TextToken, strdup (yytext));
130		~~- debuglex2 ("TEXT(%s) ", yytext); return TEXT; }~~
131		~~-<canbelist>[^ \\|\\#\n][^\<\>\[\]\{\}\n\'\\|] {~~
132		~~- BEGIN(cannotbelistorheadingorpre); yylval.node = newNodeS (TextToken, strdup (yytext));~~
133		~~- debuglex2 ("TEXT(%s) ", yytext); return TEXT; }~~
134		~~-<INITIAL>[^ \\|\\#\n\=][^\<\>\[\]\{\}\n\'\\|\=] {~~
135		~~- BEGIN(cannotbelistorheadingorpre); yylval.node = newNodeS (TextToken, strdup (yytext));~~
136		~~- debuglex2 ("TEXT(%s) ", yytext); return TEXT; }~~
	168	+ debuglex2 ("TEXT(%s) ", yytext);
	169	+ return TEXT; }
137	170
	171	+<startattribute>[^ \t\r\n\'\"][^ \t\r\n]" " {
	172	+ BEGIN (attributes);
	173	+ yylval.node = newNodeS (TextToken, strdup (yytext));
	174	+ debuglex2 ("TEXT(%s) ", yytext);
	175	+ return TEXT; }
138	176	%%
Index: trunk/flexbisonparse/fb_defines.h
—	—	@@ -12,10 +12,12 @@
13	13	/* Change these to
14	14	#define debuglex printf
15	15	#define debuglex2 printf
	16	+ #define debuglex3 printf
16	17	to have the lexer output all the tokens generated. */
17	18
18	19	#define debuglex(x)
19	20	#define debuglex2(x,y)
	21	+#define debuglex3(x,y,z)
20	22
21	23
22	24	/* Change this one to
Index: trunk/flexbisonparse/parsetree.c
—	—	@@ -33,13 +33,38 @@
34	34	result->data.str = data;
35	35	return result;
36	36	}
37		~~-Node newNodeE (NodeType newType, ExtensionData data)~~
	37	+Node newNodeN (NodeType newType, char* name, char* value, int copyName, int copyValue)
38	38	{
39	39	Node result = newNode (newType);
40		~~- result->data.ext = data;~~
	40	+ result->data.nameval = (NameValue) malloc (sizeof (struct NameValueStruct));
	41	+ result->data.nameval->name = copyName ? strdup (name) : name;
	42	+ result->data.nameval->value = copyValue ? strdup (value) : value;
41	43	return result;
42	44	}
	45	+AttributeData newAttributeDataFromStr (char* str)
	46	+{
	47	+ AttributeData ret = (AttributeData) malloc (sizeof (struct AttributeDataStruct));
	48	+ int len = strlen (str);
	49	+ int i = len-1;
43	50
	51	+ while (str[i] == ' ') i--;
	52	+ i++;
	53	+ ret->name = (char) malloc ((i+1) sizeof (char));
	54	+ memcpy (ret->name, str, i * sizeof (char));
	55	+ ret->name[i] = '\0';
	56	+ ret->spacesAfterName = len-i;
	57	+ return ret;
	58	+}
	59	+Node newNodeA (int t, AttributeData ad, int sae, int sav)
	60	+{
	61	+ Node result = newNode (Attribute);
	62	+ result->data.attrdata = ad;
	63	+ result->data.attrdata->type = t;
	64	+ result->data.attrdata->spacesAfterEquals = sae;
	65	+ result->data.attrdata->spacesAfterValue = sav;
	66	+ return result;
	67	+}
	68	+
44	69	/* Return value is the first parameter */
45	70	Node nodeAddChild (Node node, Node child)
46	71	{
—	—	@@ -61,11 +86,12 @@
62	87	/* Return value is the first parameter */
63	88	Node nodeAddSibling (Node node, Node sibling)
64	89	{
	90	+ Node examine = node;
65	91	if (sibling)
66	92	{
67		~~- while (node->nextSibling)~~
68		~~- node = node->nextSibling;~~
69		~~- node->nextSibling = sibling;~~
	93	+ while (examine->nextSibling)
	94	+ examine = examine->nextSibling;
	95	+ examine->nextSibling = sibling;
70	96	}
71	97	return node;
72	98	}
—	—	@@ -73,18 +99,21 @@
74	100	/* Return value is the first parameter */
75	101	Node nodePrependChild (Node node, Node child)
76	102	{
77		~~- child->nextSibling = node->firstChild;~~
	103	+ Node prevChild = node->firstChild;
78	104	node->firstChild = child;
79		~~- return node;~~
	105	+ return nodeAddChild (node, prevChild);
80	106	}
81	107
82		~~-ExtensionData newExtensionData (char name, char text)~~
	108	+void freeRecursively (Node node)
83	109	{
84		~~- ExtensionData ed = (ExtensionData) malloc (sizeof (struct ExtensionDataStruct));~~
85		~~- ed->name = name;~~
86		~~- ed->text = (char ) malloc ((strlen (text)+1) sizeof (char));~~
87		~~- strcpy (ed->text, text);~~
88		~~- return ed;~~
	110	+ Node next, child = node->firstChild;
	111	+
	112	+ while (child)
	113	+ {
	114	+ next = child->nextSibling;
	115	+ freeRecursively (child);
	116	+ child = next;
	117	+ }
89	118	}
90	119
91	120	void removeAndFreeFirstChild (Node node)
—	—	@@ -92,7 +121,7 @@
93	122	Node child = node->firstChild;
94	123	if (!child) return;
95	124	node->firstChild = child->nextSibling;
96		~~- free (child);~~
	125	+ freeRecursively (child);
97	126	}
98	127
99	128	/* Parameter must be a ListLine node. Returns a List node. */
—	—	@@ -255,6 +284,7 @@
256	285	/* Re-attach the next sibling (B) */
257	286	examine->nextSibling = tmpnode;
258	287	}
	288	+ /* Newlines nodes don't have children, no need for freeRecursively */
259	289	free (newlinesnode);
260	290	}
261	291	examine = examine->nextSibling;
—	—	@@ -276,6 +306,20 @@
277	307	return newNodeS (TextToken, ret);
278	308	}
279	309
	310	+Node processTableCellContents (Node node)
	311	+{
	312	+ Node ret;
	313	+
	314	+ if (!node) return 0;
	315	+ if (node->type == Paragraph && !node->nextSibling)
	316	+ {
	317	+ ret = node->firstChild;
	318	+ free (node);
	319	+ return ret;
	320	+ }
	321	+ return node;
	322	+}
	323	+
280	324	Node processNestedItalics (Node node)
281	325	{
282	326	Node examine, saveExamineSibling, childExamine, childSibling, saveChildSibling;
—	—	@@ -353,6 +397,7 @@
354	398	/* Move examine on to the newly created sibling */
355	399	examine = examine->nextSibling;
356	400	/* Free the now-obsolete Italics node */
	401	+ /* We have attached its children elsewhere, so don't use freeRecursively */
357	402	free (childSibling);
358	403	}
359	404	/* Any node that is not an Italics node needs to become attached to one.
—	—	@@ -390,6 +435,7 @@
391	436	if (a->type == TextBlock && b->type == TextBlock)
392	437	{
393	438	nodeAddChild (a, b->firstChild);
	439	+ /* We have attached b's children elsewhere, so don't use freeRecursively */
394	440	free (b);
395	441	return a;
396	442	}
—	—	@@ -398,18 +444,179 @@
399	445	else if (b->type == TextBlock)
400	446	return nodePrependChild (b, a);
401	447	else
402		~~- return nodeAddChild (nodeAddChild (newNode (TextBlock), a), b);~~
	448	+ return nodeAddChild2 (newNode (TextBlock), a, b);
403	449	}
404	450
	451	+Node convertAttributesToText (Node node)
	452	+{
	453	+ char* str;
	454	+ int len, at, i;
	455	+ Node ret = 0, examine = node->firstChild, prevExamine;
	456	+ AttributeData ad;
	457	+
	458	+ if (node->type != AttributeGroup) return 0;
	459	+
	460	+ /* We've stored the first child in examine, so we can already free the parent */
	461	+ free (node);
	462	+
	463	+ while (examine) /* should be an Attribute node */
	464	+ {
	465	+ ad = examine->data.attrdata;
	466	+ /* first turn attribute name, equals sign (if any) and
	467	+ * opening apostrophe or quotes (if any) into one string */
	468	+ len = strlen (ad->name);
	469	+ at = len;
	470	+ len += ad->spacesAfterName;
	471	+ if (ad->type > 0)
	472	+ {
	473	+ len++; /* '=' */
	474	+ len += ad->spacesAfterEquals;
	475	+ if (ad->type > 1) len++; /* ' or " */
	476	+ }
	477	+ len++; /* trailing '\0' */
	478	+
	479	+ str = (char) malloc (len sizeof (char));
	480	+ memcpy (str, ad->name, at * sizeof (char));
	481	+ while (ad->spacesAfterName--) str[at++] = ' ';
	482	+ if (ad->type > 0)
	483	+ {
	484	+ str[at++] = '=';
	485	+ while (ad->spacesAfterEquals--) str[at++] = ' ';
	486	+ if (ad->type == 2) str[at++] = '\'';
	487	+ else if (ad->type == 3) str[at++] = '"';
	488	+ }
	489	+ str[at] = '\0';
	490	+
	491	+ ret = makeTextBlock2 (ret, newNodeS (TextToken, str), examine->firstChild);
	492	+
	493	+ if (ad->type > 1 \|\| (ad->type == 1 && ad->spacesAfterValue > 0))
	494	+ {
	495	+ at = ad->type > 1 ? 1 : 0;
	496	+ len = at + ad->spacesAfterValue;
	497	+ str = (char) malloc (len sizeof (char));
	498	+ if (ad->type == 2) str[0] = '\'';
	499	+ else if (ad->type == 3) str[0] = '"';
	500	+ while (ad->spacesAfterValue--) str[at++] = ' ';
	501	+ str[at] = '\0';
	502	+ ret = makeTextBlock (ret, newNodeS (TextToken, str));
	503	+ }
	504	+ prevExamine = examine;
	505	+ examine = examine->nextSibling;
	506	+ free (prevExamine);
	507	+ }
	508	+
	509	+ return ret;
	510	+}
	511	+
	512	+Node convertAttributeDataToText (AttributeData data)
	513	+{
	514	+ return makeTextBlock (newNodeS (TextToken, data->name),
	515	+ newNodeS (TextToken, addSpaces ("", data->spacesAfterName)));
	516	+}
	517	+
	518	+Node convertPipeSeriesToText (Node node)
	519	+{
	520	+ Node result = 0;
	521	+ Node nextNode;
	522	+
	523	+ while (node)
	524	+ {
	525	+ result = makeTextBlock2 (result, newNodeS (TextToken, "\|"), node->firstChild);
	526	+ nextNode = node->nextSibling;
	527	+ freeRecursively (node);
	528	+ node = nextNode;
	529	+ }
	530	+
	531	+ return result;
	532	+}
	533	+
	534	+Node convertTableRowToText (int info)
	535	+{
	536	+ int minuses, spaces, i;
	537	+ char* text;
	538	+
	539	+ minuses = info / 0x10000;
	540	+ spaces = info % 0x10000;
	541	+
	542	+ text = (char) malloc ((minuses + spaces + 2) sizeof (char));
	543	+ text[0] = '\|';
	544	+ i = 1;
	545	+ while (minuses--) text[i++] = '-';
	546	+ while (spaces--) text[i++] = ' ';
	547	+ text[i] = '\0';
	548	+ return newNodeS (TextToken, text);
	549	+}
	550	+
	551	+Node convertTableCellToText (int info)
	552	+{
	553	+ return newNodeS (TextToken, addSpaces (info % 2 ? "\|" : "\|\|", info/2));
	554	+}
	555	+
	556	+Node convertTableHeadToText (int info)
	557	+{
	558	+ return newNodeS (TextToken, addSpaces (info % 2 ? "!" : "!!", info/2));
	559	+}
	560	+
	561	+char* addSpaces (char* src, int spaces)
	562	+{
	563	+ char* ret;
	564	+ int len = strlen (src);
	565	+
	566	+ ret = (char) malloc ((len + spaces + 1) sizeof (char));
	567	+ if (len > 0) memcpy (ret, src, len * sizeof (char));
	568	+ ret[len+spaces] = '\0';
	569	+ while (spaces--) ret[len+spaces] = ' ';
	570	+ return ret;
	571	+}
	572	+
	573	+char* strtrim (char* src)
	574	+{
	575	+ int i = strlen (src);
	576	+ i--;
	577	+ while ((i > 0) && (src[i] == ' ')) i--;
	578	+ src[i+1] = '\0';
	579	+ return src;
	580	+}
	581	+
	582	+int strtrimC (char* src)
	583	+{
	584	+ int i = strlen (src), j = i;
	585	+ i--;
	586	+ while ((i > 0) && (src[i] == ' ')) i--;
	587	+ src[i+1] = '\0';
	588	+ return j - i - 1;
	589	+}
	590	+
	591	+Node strtrimN (Node src)
	592	+{
	593	+ if (src->type == TextToken)
	594	+ strtrim (src->data.str);
	595	+ return src;
	596	+}
	597	+int strtrimNC (Node src)
	598	+{
	599	+ if (src->type == TextToken)
	600	+ return strtrimC (src->data.str);
	601	+ return 0;
	602	+}
	603	+
405	604	char* fb_buffer;
406		~~-int fb_buflen = 1024; /* Start with 1 KB if user doesn't call fb_set_buffer_size() */~~
	605	+int fb_buflen;
407	606	int fb_bufcontentlen;
408	607
409		~~-inline void fb_set_buffer_size (int size)~~
	608	+void fb_create_new_buffer (int size)
410	609	{
	610	+ fb_buffer = (char) malloc (size sizeof (char));
	611	+ fb_buffer[0] = '\0';
	612	+ fb_bufcontentlen = 0;
411	613	fb_buflen = size;
412	614	}
413	615
	616	+char* fb_get_buffer()
	617	+{
	618	+ return fb_buffer;
	619	+}
	620	+
414	621	void fb_write_to_buffer_len (const char* str, int len)
415	622	{
416	623	char* newbuffer;
—	—	@@ -429,7 +636,7 @@
430	637	fb_buffer[fb_bufcontentlen] = '\0';
431	638	}
432	639
433		~~-inline void fb_write_to_buffer (const char* str)~~
	640	+void fb_write_to_buffer (const char* str)
434	641	{
435	642	fb_write_to_buffer_len (str, strlen (str));
436	643	}
—	—	@@ -459,9 +666,9 @@
460	667	case '>': FB_WRITE_CURRY (">");
461	668	case '"': FB_WRITE_CURRY (""");
462	669	default:
463		~~- if (*s < ' ')~~
	670	+ if (s < ' ' && s != '\n')
464	671	{
465		~~- sprintf (tmpstr, "&#%d;", *s);~~
	672	+ sprintf (tmpstr, "&#%u;", (unsigned char)*s);
466	673	FB_WRITE_CURRY (tmpstr);
467	674	}
468	675	else
—	—	@@ -495,10 +702,13 @@
496	703
497	704	rname =
498	705	node->type == TextBlock ? 0 /* don't output tags for this, just the text */ :
499		~~- node->type == Heading ? 0 /* outputXML already does this one; it may have attributes */ :~~
500		~~- node->type == List ? 0 /* outputXML already does this one; it may have attributes */ :~~
501		~~- node->type == LinkEtc ? 0 /* outputXML already does this one; it may have attributes */ :~~
502	706
	707	+ /* For the following, the tag is already output by outputXMLHelper: */
	708	+ node->type == Heading ? 0 :
	709	+ node->type == List ? 0 :
	710	+ node->type == LinkEtc ? 0 :
	711	+ node->type == Attribute ? 0 :
	712	+
503	713	node->type == LinkTarget ? "linktarget" :
504	714	node->type == LinkOption ? "linkoption" :
505	715	node->type == Article ? "article" :
—	—	@@ -508,6 +718,14 @@
509	719	node->type == ListItem ? "listitem" :
510	720	node->type == Bold ? "bold" :
511	721	node->type == Italics ? "italics" :
	722	+ node->type == Comment ? "comment" :
	723	+
	724	+ node->type == Table ? "table" :
	725	+ node->type == TableRow ? "tablerow" :
	726	+ node->type == TableCell ? "tablecell" :
	727	+ node->type == TableHead ? "tablehead" :
	728	+ node->type == AttributeGroup? "attrs" :
	729	+
512	730	/* Fallback value */
513	731	(sprintf (defaultname, "type%dnode", node->type), defaultname);
514	732
—	—	@@ -535,7 +753,7 @@
536	754	void outputXMLHelper (Node node)
537	755	{
538	756	Node child;
539		~~- ExtensionData ed;~~
	757	+ NameValue nv;
540	758	int i;
541	759	char tmpstr[255];
542	760
—	—	@@ -553,13 +771,23 @@
554	772	break;
555	773
556	774	case ExtensionToken:
557		~~- ed = node->data.ext;~~
558		~~- sprintf (tmpstr, "<extension name=\"%s\">", ed->name);~~
	775	+ nv = node->data.nameval;
	776	+ sprintf (tmpstr, "<extension name='%s'>", nv->name);
559	777	fb_write_to_buffer (tmpstr);
560		~~- fb_write_to_buffer_escaped (ed->text);~~
	778	+ fb_write_to_buffer_escaped (nv->value);
561	779	fb_write_to_buffer ("</extension>");
562	780	break;
563	781
	782	+ case Attribute:
	783	+ sprintf (tmpstr, "<attr name='%s'", node->data.attrdata->name);
	784	+ fb_write_to_buffer (tmpstr);
	785	+ if (node->data.attrdata->type == 0)
	786	+ fb_write_to_buffer (" isnull='yes'");
	787	+ fb_write_to_buffer (">");
	788	+ outputNode (node);
	789	+ fb_write_to_buffer ("</attr>");
	790	+ break;
	791	+
564	792	case List:
565	793	fb_write_to_buffer (node->data.num == 1 ? "<list type='bullet'>" :
566	794	node->data.num == 2 ? "<list type='numbered'>" :
—	—	@@ -583,12 +811,9 @@
584	812	}
585	813	}
586	814
587		~~-char* outputXML (Node node)~~
	815	+char* outputXML (Node node, int initialBufferSize)
588	816	{
589		~~- fb_buffer = (char) malloc (fb_buflen sizeof (char));~~
590		~~- fb_buffer[0] = '\0';~~
591		~~- fb_bufcontentlen = 0;~~
592		-
	817	+ fb_create_new_buffer (initialBufferSize);
593	818	outputXMLHelper (node);
594		~~- return fb_buffer;~~
	819	+ return fb_get_buffer();
595	820	}
Index: trunk/flexbisonparse/test.txt
—	—	@@ -8,3 +8,7 @@
9	9	** graphics
10	10	** sound
11	11
	12	+{\| \|\| Version 1 \|\| not bad
	13	+\|- \|\| Version 2 \|\| much better \|}
	14	+
	15	+This is a \|\| token in the middle of text.
\ No newline at end of file

Status & tagging log

15:00, 12 September 2011 Meno25 (talk | contribs) changed the status of r5237 [removed: ok added: old]
13:39, 18 June 2009 😂 (talk | contribs) changed the status of r5237 [removed: new added: ok]