r5237 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r5236‎ | r5237 | r5238 >
Date:18:37, 15 September 2004
Author:timwi
Status:old
Tags:
Comment:
- TABLES !!! :-)
- lots of fixes.
Modified paths:
  • /trunk/flexbisonparse/f (modified) (history)
  • /trunk/flexbisonparse/fb_defines.h (modified) (history)
  • /trunk/flexbisonparse/parsetree.c (modified) (history)
  • /trunk/flexbisonparse/parsetree.h (modified) (history)
  • /trunk/flexbisonparse/test.txt (modified) (history)
  • /trunk/flexbisonparse/wikilex.l (modified) (history)
  • /trunk/flexbisonparse/wikiparse.y (modified) (history)

Diff [purge]

Index: trunk/flexbisonparse/parsetree.h
@@ -12,7 +12,8 @@
1313 typedef enum NodeType {
1414 Article, Paragraph, Heading, TextBlock, TextToken, ExtensionToken,
1515 Newlines, PreBlock, PreLine, Bold, Italics, LinkEtc, LinkTarget,
16 - LinkOption,
 16+ LinkOption, Table, TableRow, TableCell, TableHead,
 17+ Attribute, AttributeGroup, Comment /* 20 */,
1718
1819 /* After first parse */
1920 ListBlock, ListLine, ListBullet, ListNumbered,
@@ -21,36 +22,58 @@
2223 List, ListItem
2324 } NodeType;
2425
25 -typedef struct ExtensionDataStruct
 26+typedef struct NameValueStruct
2627 {
2728 char * name;
28 - char * text;
 29+ char * value;
2930 }
30 -* ExtensionData;
 31+* NameValue;
3132
 33+/* During the parsing of table cells, we don't know in advance whether what we are currently
 34+ * parsing are attributes for the table cell, or the table cell's textual contents. We parse
 35+ * them as attributes first, but we use AttributeDataStruct to store enough data to allow us
 36+ * to turn it back into text should we later find out they weren't attributes after all. */
 37+typedef struct AttributeDataStruct
 38+{
 39+ char * name;
 40+ int type; /* 0 = just an attribute name; 1 = no quotes; 2 = '; 3 = " */
 41+ int spacesAfterName;
 42+ int spacesAfterEquals;
 43+ int spacesAfterValue;
 44+}
 45+* AttributeData;
 46+
3247 typedef union DataType {
33 - char* str;
 48+ char * str;
3449 int num;
35 - ExtensionData ext;
 50+ NameValue nameval;
 51+ AttributeData attrdata;
3652 } DataType;
3753
3854 typedef struct NodeStruct
3955 {
4056 NodeType type;
4157 DataType data;
42 - struct NodeStruct* nextSibling;
43 - struct NodeStruct* firstChild;
 58+ struct NodeStruct * nextSibling;
 59+ struct NodeStruct * firstChild;
4460 }
4561 * Node;
4662
4763 Node newNode (NodeType newType);
4864 Node newNodeI (NodeType newType, int data);
4965 Node newNodeS (NodeType newType, char* data);
50 -Node newNodeE (NodeType newType, ExtensionData data);
 66+Node newNodeN (NodeType newType, char* name, char* value, int copyname, int copyvalue); /* see NameValueStruct */
5167
 68+/* Used by the lexer to create a preliminary AttributeData object. */
 69+AttributeData newAttributeDataFromStr (char* str);
 70+
 71+/* Completes an AttributeData object created by newAttributeDataFromStr */
 72+Node newNodeA (int t, AttributeData ad, int sae, int sav);
 73+
5274 /* Return value of all of these is the first parameter */
5375 Node nodeAddChild (Node node, Node child);
5476 #define nodeAddChild2(a,b,c) nodeAddChild (nodeAddChild (a, b), c)
 77+#define nodeAddChild3(a,b,c,d) nodeAddChild (nodeAddChild (nodeAddChild (a, b), c), d)
5578 Node nodePrependChild (Node node, Node child);
5679 Node nodeAddSibling (Node node, Node sibling);
5780
@@ -63,26 +86,60 @@
6487 /* Returns a TextToken, or null if n < 1 */
6588 Node processEndHeadingInText (int n);
6689
 90+/* If 'node' is a paragraph node with no siblings, frees it and returns its child.
 91+ * (We do this because if a table cell contains only text, we don't want it to
 92+ * count as a "paragraph".) Otherwise just returns node. */
 93+Node processTableCellContents (Node node);
 94+
6795 /* If a is a TextBlock, adds b to it; if b is a TextBlock, prepends a;
6896 * if both are a TextBlock, adds b's children to a and frees b;
6997 * otherwise creates new TextBlock with a and b in it.
7098 * If any parameter is 0, returns the other. */
7199 Node makeTextBlock (Node a, Node b);
72100 #define makeTextBlock2(a,b,c) makeTextBlock (makeTextBlock (a, b), c)
 101+#define makeTextBlock3(a,b,c,d) makeTextBlock (makeTextBlock (makeTextBlock (a, b), c), d)
73102
 103+/* Parameter must be a LinkOption node, optionally with a string of
 104+ * siblings attached. These will all be freed, and a TextBlock returned. */
 105+Node convertPipeSeriesToText (Node node);
 106+
 107+/* Parameter must be a AttributeGroup node. It and its children will
 108+ * all be freed, and a TextBlock returned. */
 109+Node convertAttributesToText (Node node);
 110+
 111+/* Parameter will be freed, and a TextBlock returned.
 112+ * NOTICE: This will process ONLY the attribute name and the spaces after it. */
 113+Node convertAttributeDataToText (AttributeData data);
 114+
 115+/* These all return a TextToken node. */
 116+Node convertTableRowToText (int info);
 117+Node convertTableCellToText (int info);
 118+Node convertTableHeadToText (int info);
 119+
74120 /* Parameter must be a TextBlock. Turns something like
75121 * <italics>X<italics>Y</italics>Z</italics> into
76122 * <italics>X</italics>Y<italics>Z</italics>. Returns node. */
77123 Node processNestedItalics (Node node);
78124
79 -ExtensionData newExtensionData (char* name, char* text);
 125+char* outputXML (Node node, int initialBufferSize);
80126
81 -char* outputXML (Node node);
 127+/* To store the output, outputXML() will use a dynamically-growing character buffer (char*).
 128+ * The following routines manage such a buffer. */
 129+void fb_create_new_buffer (int size);
 130+void fb_write_to_buffer (const char* str);
 131+void fb_write_to_buffer_len (const char* str, int len);
 132+void fb_write_to_buffer_escaped (char* s);
 133+char* fb_get_buffer();
82134
83 -/* To store the output, outputXML() will use a dynamically-growing buffer.
84 - * Normally it will start at a size of 1 KB, but if your input is already,
85 - * say, 1 MB big, you might want to call this before outputXML() to save a
86 - * few buffer enlargements. ONLY call this function just before calling
87 - * outputXML(). */
88 -void fb_set_buffer_size (int size);
 135+/* More string helper routines ... */
89136
 137+/* e.g. addSpaces ("=", 2) => "= " */
 138+char* addSpaces (char* src, int spaces);
 139+/* trims only *trailing* whitespace. Returns its parameter; does not create a new string. */
 140+char* strtrim (char* src);
 141+/* like strtrim, but returns the number of spaces removed. */
 142+int strtrimC (char* src);
 143+/* same as strtrim except takes a TextToken node */
 144+Node strtrimN (Node src);
 145+/* like strtrimN, but returns the number of spaces removed. */
 146+int strtrimNC (Node src);
Index: trunk/flexbisonparse/f
@@ -1 +1 @@
2 -cat ./test.txt | ./wiki
 2+./wiki < test.txt
Index: trunk/flexbisonparse/wikiparse.y
@@ -14,48 +14,81 @@
1515 #include <stdio.h>
1616 #include "parsetree.h"
1717 #include "fb_defines.h"
18 -int yyerror() { printf ("Syntax error.\n"); }
 18+int yyerror() { printf ("\n\nSYNTAX ERROR.\n\n"); }
1919
2020 Node articlenode;
 21+int i;
 22+
2123 %}
2224
2325 /* This defines the type of yylval */
2426 %union {
2527 Node node;
 28+ char* str;
2629 int num;
 30+ AttributeData ad;
2731 }
28 -%type <node> article block paragraph heading textorempty zeroormorenewlines oneormorenewlines
29 - preblock preline bulletlistline numberlistline listseries text listblock
 32+%type <node> article blocks block paragraph heading textorempty zeroormorenewlines preblock
 33+ oneormorenewlines preline bulletlistline numberlistline listseries listblock
3034 zeroormorenewlinessave oneormorenewlinessave bulletlistblock numberlistblock
3135 textelement textelementnoboit textelementnobold textelementnoital italicsorbold
3236 textnoboit textnobold textnoital boldnoitalics italicsnobold linketc pipeseries
33 - TEXT EXTENSION PRELINE
34 -%type <num> HEADING ENDHEADING
 37+ text attribute attributes tablecells tablecell tablecellcontents tablerows
 38+ tablerow table comment blocksnotbl blocknotbl textnoitaltbl textnoboldtbl
 39+ textnoboittbl textnotbl textelementnotbl textelementnoboldtbl textelementnoitaltbl
 40+ textelementnoboittbl paragraphnotbl linketcnotbl italorboldnotbl boldnoitalicstbl
 41+ italicsnoboldtbl pipeseriesnotbl
 42+ TEXT EXTENSION
 43+%type <ad> ATTRIBUTE
 44+%type <num> HEADING ENDHEADING TABLEBEGIN TABLECELL TABLEHEAD TABLEROW EQUALS ATTRAPO ATTRQ
3545
36 -%token EXTENSION EMPTYCOMMENT BEGINCOMMENT TEXT ENDCOMMENT OPENLINK OPENDBLSQBR CLOSEDBLSQBR PIPE
37 - NEWLINE PRELINE LISTBULLET LISTNUMBERED HEADING ENDHEADING APO5 APO3 APO2
 46+%token EXTENSION BEGINCOMMENT TEXT ENDCOMMENT OPENLINK OPENDBLSQBR CLOSEDBLSQBR PIPE
 47+ NEWLINE PRELINE LISTBULLET LISTNUMBERED HEADING ENDHEADING APO5 APO3 APO2 TABLEBEGIN
 48+ TABLECELL TABLEHEAD TABLEROW TABLEEND ATTRIBUTE EQUALS ATTRAPO ATTRQ
3849 // Not yet used:
3950 OPENPENTUPLECURLY CLOSEPENTUPLECURLY OPENTEMPLATEVAR CLOSETEMPLATEVAR OPENTEMPLATE
40 - CLOSETEMPLATE TABLEBEGIN TABLECELL TABLEHEAD TABLEROW TABLEEND
 51+ CLOSETEMPLATE
4152
4253 %start article
4354
4455 %%
4556 /* rules */
4657
 58+ /* TODO:
 59+ - optimise zeroormorenewlinessave (no need for Newlines nodes)
 60+ - find all 'memcpy's and add a 'sizeof (char)' wherever necessary
 61+
 62+ UNATTENDED-TO CAVEATS:
 63+ - a row beginning with TABLEBEGIN but not containing valid table mark-up
 64+ (e.g. "{| Hah!" + NEWLINE) is turned into a paragraph of its own even
 65+ if it and the next line are separated by only one newline (so they should
 66+ all be one paragraph).
 67+ */
 68+
4769 article : /* empty */ { debugf ("article#1 "); $$ = articlenode = newNode (Article); }
4870 | oneormorenewlines { debugf ("article#2 "); $$ = articlenode = newNode (Article); }
49 - | block { debugf ("article#3 "); $$ = articlenode = nodeAddChild (newNode (Article), $1); }
50 - | article block { debugf ("article#4 "); $$ = articlenode = nodeAddChild ($1, $2); }
 71+ | blocks { debugf ("article#3 "); $$ = articlenode = nodeAddChild (newNode (Article), $1); }
5172
52 -block : preblock { debugf ("block#1 "); $$ = processPreBlock ($1); }
53 - | heading zeroormorenewlines { debugf ("block#2 "); $$ = $1; }
54 - | listblock zeroormorenewlines { debugf ("block#3 "); $$ = $1; }
55 - | paragraph zeroormorenewlines { debugf ("block#4 "); $$ = $1; }
56 -/*
57 - | table zeroormorenewlines { debugf ("block#5 "); $$ = $1; }
58 -*/
 73+blocks : block { debugf ("blocks#1 "); $$ = $1; }
 74+ | blocks block { debugf ("blocks#2 "); $$ = nodeAddSibling ($1, $2); }
5975
 76+blocksnotbl : blocknotbl { debugf ("blocksnotbl#1 "); $$ = $1; }
 77+ | blocksnotbl blocknotbl { debugf ("blocksnotbl#2 "); $$ = nodeAddSibling ($1, $2); }
 78+
 79+block : preblock { debugf ("block#1 "); $$ = processPreBlock ($1); }
 80+ | heading zeroormorenewlines { debugf ("block#2 "); $$ = $1; }
 81+ | listblock zeroormorenewlines { debugf ("block#3 "); $$ = $1; }
 82+ | paragraph zeroormorenewlines { debugf ("block#4 "); $$ = $1; }
 83+ | table zeroormorenewlines { debugf ("block#5 "); $$ = $1; }
 84+ | comment zeroormorenewlines { debugf ("block#6 "); $$ = $1; }
 85+
 86+blocknotbl : preblock { debugf ("blocknotbl#1 "); $$ = processPreBlock ($1); }
 87+ | heading zeroormorenewlines { debugf ("blocknotbl#2 "); $$ = $1; }
 88+ | listblock zeroormorenewlines { debugf ("blocknotbl#3 "); $$ = $1; }
 89+ | paragraphnotbl zeroormorenewlines { debugf ("blocknotbl#4 "); $$ = $1; }
 90+ | table zeroormorenewlines { debugf ("blocknotbl#5 "); $$ = $1; }
 91+ | comment zeroormorenewlines { debugf ("blocknotbl#6 "); $$ = $1; }
 92+
6093 preblock : preline { debugf ("preblock#1 "); $$ = nodeAddChild (newNode (PreBlock), $1); }
6194 | preblock preline { debugf ("preblock#2 "); $$ = nodeAddChild ($1, $2); }
6295
@@ -103,66 +136,173 @@
104137 { debugf ("linketc#7 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 2), nodeAddChild (newNode (LinkTarget), $2), $3); }
105138 | OPENLINK text pipeseries PIPE CLOSEDBLSQBR
106139 { debugf ("linketc#8 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 3), nodeAddChild (newNode (LinkTarget), $2), $3); }
 140+ /* ... and now everything again with the CLOSEDBLSQBR missing,
 141+ * to take care of invalid mark-up. */
 142+ | OPENDBLSQBR text
 143+ { debugf ("linketc#9 "); $$ = makeTextBlock (newNodeS (TextToken, "[["), $2); }
 144+ | OPENDBLSQBR text PIPE
 145+ { debugf ("linketc#10 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[["), $2, newNodeS (TextToken, "|")); }
 146+ | OPENDBLSQBR text pipeseries
 147+ { debugf ("linketc#11 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[["), $2, convertPipeSeriesToText ($3)); }
 148+ | OPENDBLSQBR text pipeseries PIPE
 149+ { debugf ("linketc#12 "); $$ = makeTextBlock3 (newNodeS (TextToken, "[["), $2, convertPipeSeriesToText ($3), newNodeS (TextToken, "|")); }
 150+ | OPENLINK text
 151+ { debugf ("linketc#13 "); $$ = makeTextBlock (newNodeS (TextToken, "[[:"), $2); }
 152+ | OPENLINK text PIPE
 153+ { debugf ("linketc#14 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[[:"), $2, newNodeS (TextToken, "|")); }
 154+ | OPENLINK text pipeseries
 155+ { debugf ("linketc#15 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[[:"), $2, convertPipeSeriesToText ($3)); }
 156+ | OPENLINK text pipeseries PIPE
 157+ { debugf ("linketc#16 "); $$ = makeTextBlock3 (newNodeS (TextToken, "[[:"), $2, convertPipeSeriesToText ($3), newNodeS (TextToken, "|")); }
107158
 159+linketcnotbl : OPENDBLSQBR textnotbl CLOSEDBLSQBR
 160+ { debugf ("linketcnotbl#1 "); $$ = nodeAddChild (newNodeI (LinkEtc, 0), nodeAddChild (newNode (LinkTarget), $2)); }
 161+ | OPENDBLSQBR textnotbl PIPE CLOSEDBLSQBR
 162+ { debugf ("linketcnotbl#2 "); $$ = nodeAddChild (newNodeI (LinkEtc, 1), nodeAddChild (newNode (LinkTarget), $2)); }
 163+ | OPENDBLSQBR textnotbl pipeseriesnotbl CLOSEDBLSQBR
 164+ { debugf ("linketcnotbl#3 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 0), nodeAddChild (newNode (LinkTarget), $2), $3); }
 165+ | OPENDBLSQBR textnotbl pipeseriesnotbl PIPE CLOSEDBLSQBR
 166+ { debugf ("linketcnotbl#4 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 1), nodeAddChild (newNode (LinkTarget), $2), $3); }
 167+ | OPENLINK textnotbl CLOSEDBLSQBR
 168+ { debugf ("linketcnotbl#5 "); $$ = nodeAddChild (newNodeI (LinkEtc, 2), nodeAddChild (newNode (LinkTarget), $2)); }
 169+ | OPENLINK textnotbl PIPE CLOSEDBLSQBR
 170+ { debugf ("linketcnotbl#6 "); $$ = nodeAddChild (newNodeI (LinkEtc, 3), nodeAddChild (newNode (LinkTarget), $2)); }
 171+ | OPENLINK textnotbl pipeseriesnotbl CLOSEDBLSQBR
 172+ { debugf ("linketcnotbl#7 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 2), nodeAddChild (newNode (LinkTarget), $2), $3); }
 173+ | OPENLINK textnotbl pipeseriesnotbl PIPE CLOSEDBLSQBR
 174+ { debugf ("linketcnotbl#8 "); $$ = nodeAddChild2 (newNodeI (LinkEtc, 3), nodeAddChild (newNode (LinkTarget), $2), $3); }
 175+ /* ... and now everything again with the CLOSEDBLSQBR missing,
 176+ * to take care of invalid mark-up. */
 177+ | OPENDBLSQBR textnotbl
 178+ { debugf ("linketcnotbl#9 "); $$ = makeTextBlock (newNodeS (TextToken, "[["), $2); }
 179+ | OPENDBLSQBR textnotbl PIPE
 180+ { debugf ("linketcnotbl#10 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[["), $2, newNodeS (TextToken, "|")); }
 181+ | OPENDBLSQBR textnotbl pipeseriesnotbl
 182+ { debugf ("linketcnotbl#11 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[["), $2, convertPipeSeriesToText ($3)); }
 183+ | OPENDBLSQBR textnotbl pipeseriesnotbl PIPE
 184+ { debugf ("linketcnotbl#12 "); $$ = makeTextBlock3 (newNodeS (TextToken, "[["), $2, convertPipeSeriesToText ($3), newNodeS (TextToken, "|")); }
 185+ | OPENLINK textnotbl
 186+ { debugf ("linketcnotbl#13 "); $$ = makeTextBlock (newNodeS (TextToken, "[[:"), $2); }
 187+ | OPENLINK textnotbl PIPE
 188+ { debugf ("linketcnotbl#14 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[[:"), $2, newNodeS (TextToken, "|")); }
 189+ | OPENLINK textnotbl pipeseriesnotbl
 190+ { debugf ("linketcnotbl#15 "); $$ = makeTextBlock2 (newNodeS (TextToken, "[[:"), $2, convertPipeSeriesToText ($3)); }
 191+ | OPENLINK textnotbl pipeseriesnotbl PIPE
 192+ { debugf ("linketcnotbl#16 "); $$ = makeTextBlock3 (newNodeS (TextToken, "[[:"), $2, convertPipeSeriesToText ($3), newNodeS (TextToken, "|")); }
 193+
108194 pipeseries : PIPE text { debugf ("pipeseries#1 "); $$ = nodeAddChild (newNode (LinkOption), $2); }
109 - | pipeseries PIPE text { debugf ("pipeseries#2 "); $$ = nodeAddSibling ($1, nodeAddChild (newNode (LinkOption), $3)); }
 195+ | PIPE text pipeseries { debugf ("pipeseries#2 "); $$ = nodeAddSibling (nodeAddChild (newNode (LinkOption), $2), $3); }
110196
 197+pipeseriesnotbl : PIPE textnotbl { debugf ("pipeseriesnotbl#1 "); $$ = nodeAddChild (newNode (LinkOption), $2); }
 198+ | PIPE textnotbl pipeseriesnotbl { debugf ("pipeseriesnotbl#2 "); $$ = nodeAddSibling (nodeAddChild (newNode (LinkOption), $2), $3); }
 199+
111200 textorempty : /* empty */ { debugf ("textorempty#1 "); $$ = newNodeS (TextToken, ""); }
112201 | text { debugf ("textorempty#2 "); $$ = $1; }
113202
114203 italicsorbold : APO2 textnoital APO2
115 - { debugf ("italicsorbold#1 "); $$ = nodeAddChild (newNode (Italics), $2); }
 204+ { debugf ("italicsorbold#1 "); $$ = nodeAddChild (newNode (Italics), $2); }
116205 | APO2 textnoital APO3 textnoboit APO5
117206 { debugf ("italicsorbold#2 "); $$ = nodeAddChild (newNode (Italics),
118 - makeTextBlock ($2, nodeAddChild (newNode (Bold), $4))); }
 207+ makeTextBlock ($2, nodeAddChild (newNode (Bold), $4))); }
119208 | APO2 textnoital APO3 textnoboit
120209 { debugf ("italicsorbold#3 "); $$ =
121 - makeTextBlock2 (nodeAddChild (newNode (Italics), $2), newNodeS (TextToken, "'"), $4); }
 210+ makeTextBlock2 (nodeAddChild (newNode (Italics), $2), newNodeS (TextToken, "'"), $4); }
122211 | APO2 textnoital
123 - { debugf ("italicsorbold#4 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }
 212+ { debugf ("italicsorbold#4 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }
124213 | APO3 textnobold APO3
125 - { debugf ("italicsorbold#5 "); $$ = nodeAddChild (newNode (Bold), $2); }
 214+ { debugf ("italicsorbold#5 "); $$ = nodeAddChild (newNode (Bold), $2); }
126215 | APO3 textnobold APO2 textnoboit APO5
127216 { debugf ("italicsorbold#6 "); $$ = nodeAddChild (newNode (Bold),
128 - makeTextBlock ($2, nodeAddChild (newNode (Italics), $4))); }
 217+ makeTextBlock ($2, nodeAddChild (newNode (Italics), $4))); }
129218 /* Peculiar case, especially for French l'''homme'' => l'<italics>homme</italics> */
130219 /* We have to use textnobold here, even though textnoital would be logical. */
131220 /* We use processNestedItalics to fix the weirdness produced by this. */
132221 | APO3 textnobold APO2 textnoboit
133222 { debugf ("italicsorbold#7 "); $$ = processNestedItalics (makeTextBlock2 (newNodeS
134 - (TextToken, "'"), nodeAddChild (newNode (Italics), $2), $4)); }
 223+ (TextToken, "'"), nodeAddChild (newNode (Italics), $2), $4)); }
135224 | APO3 textnobold APO2
136225 { debugf ("italicsorbold#8 "); $$ = processNestedItalics (makeTextBlock (newNodeS
137 - (TextToken, "'"), nodeAddChild (newNode (Italics), $2))); }
 226+ (TextToken, "'"), nodeAddChild (newNode (Italics), $2))); }
138227 | APO3 textnobold
139 - { debugf ("italicsorbold#9 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }
 228+ { debugf ("italicsorbold#9 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }
140229 | APO5 textnoboit APO3 textnoital APO2
141230 { debugf ("italicsorbold#10 "); $$ = nodeAddChild (newNode (Italics),
142 - makeTextBlock (nodeAddChild (newNode (Bold), $2), $4)); }
 231+ makeTextBlock (nodeAddChild (newNode (Bold), $2), $4)); }
143232 | APO5 textnoboit APO2 textnobold APO3
144233 { debugf ("italicsorbold#11 "); $$ = nodeAddChild (newNode (Bold),
145 - makeTextBlock (nodeAddChild (newNode (Italics), $2), $4)); }
 234+ makeTextBlock (nodeAddChild (newNode (Italics), $2), $4)); }
146235 | APO5 textnoboit APO3 textnoital
147236 { debugf ("italicsorbold#12 "); $$ = makeTextBlock2 (newNodeS (TextToken, "''"),
148 - nodeAddChild (newNode (Bold), $2), $4); }
 237+ nodeAddChild (newNode (Bold), $2), $4); }
149238 | APO5 textnoboit APO2 textnobold
150239 { debugf ("italicsorbold#13 "); $$ = makeTextBlock2 (newNodeS (TextToken, "'''"),
151 - nodeAddChild (newNode (Italics), $2), $4); }
 240+ nodeAddChild (newNode (Italics), $2), $4); }
152241 | APO5 textnoboit
153242 { debugf ("italicsorbold#14 ");
154 - $$ = makeTextBlock (newNodeS (TextToken, "'''''"), $2); }
 243+ $$ = makeTextBlock (newNodeS (TextToken, "'''''"), $2); }
155244
 245+italorboldnotbl : APO2 textnoitaltbl APO2
 246+ { debugf ("italorboldnotbl#1 "); $$ = nodeAddChild (newNode (Italics), $2); }
 247+ | APO2 textnoitaltbl APO3 textnoboittbl APO5
 248+ { debugf ("italorboldnotbl#2 "); $$ = nodeAddChild (newNode (Italics),
 249+ makeTextBlock ($2, nodeAddChild (newNode (Bold), $4))); }
 250+ | APO2 textnoitaltbl APO3 textnoboittbl
 251+ { debugf ("italorboldnotbl#3 "); $$ =
 252+ makeTextBlock2 (nodeAddChild (newNode (Italics), $2), newNodeS (TextToken, "'"), $4); }
 253+ | APO2 textnoitaltbl
 254+ { debugf ("italorboldnotbl#4 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }
 255+ | APO3 textnoboldtbl APO3
 256+ { debugf ("italorboldnotbl#5 "); $$ = nodeAddChild (newNode (Bold), $2); }
 257+ | APO3 textnoboldtbl APO2 textnoboittbl APO5
 258+ { debugf ("italorboldnotbl#6 "); $$ = nodeAddChild (newNode (Bold),
 259+ makeTextBlock ($2, nodeAddChild (newNode (Italics), $4))); }
 260+ /* Peculiar case, especially for French l'''homme'' => l'<italics>homme</italics> */
 261+ /* We have to use textnoboldtbl here, even though textnoitaltbl would be logical. */
 262+ /* We use processNestedItalics to fix the weirdness produced by this. */
 263+ | APO3 textnoboldtbl APO2 textnoboittbl
 264+ { debugf ("italorboldnotbl#7 "); $$ = processNestedItalics (makeTextBlock2 (newNodeS
 265+ (TextToken, "'"), nodeAddChild (newNode (Italics), $2), $4)); }
 266+ | APO3 textnoboldtbl APO2
 267+ { debugf ("italorboldnotbl#8 "); $$ = processNestedItalics (makeTextBlock (newNodeS
 268+ (TextToken, "'"), nodeAddChild (newNode (Italics), $2))); }
 269+ | APO3 textnoboldtbl
 270+ { debugf ("italorboldnotbl#9 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }
 271+ | APO5 textnoboittbl APO3 textnoitaltbl APO2
 272+ { debugf ("italorboldnotbl#10 "); $$ = nodeAddChild (newNode (Italics),
 273+ makeTextBlock (nodeAddChild (newNode (Bold), $2), $4)); }
 274+ | APO5 textnoboittbl APO2 textnoboldtbl APO3
 275+ { debugf ("italorboldnotbl#11 "); $$ = nodeAddChild (newNode (Bold),
 276+ makeTextBlock (nodeAddChild (newNode (Italics), $2), $4)); }
 277+ | APO5 textnoboittbl APO3 textnoitaltbl
 278+ { debugf ("italorboldnotbl#12 "); $$ = makeTextBlock2 (newNodeS (TextToken, "''"),
 279+ nodeAddChild (newNode (Bold), $2), $4); }
 280+ | APO5 textnoboittbl APO2 textnoboldtbl
 281+ { debugf ("italorboldnotbl#13 "); $$ = makeTextBlock2 (newNodeS (TextToken, "'''"),
 282+ nodeAddChild (newNode (Italics), $2), $4); }
 283+ | APO5 textnoboittbl
 284+ { debugf ("italorboldnotbl#14 ");
 285+ $$ = makeTextBlock (newNodeS (TextToken, "'''''"), $2); }
156286
157287 italicsnobold : APO2 textnoboit APO2
158 - { debugf ("italicsnobold#1 "); $$ = nodeAddChild (newNode (Italics), $2); }
 288+ { debugf ("italicsnobold#1 "); $$ = nodeAddChild (newNode (Italics), $2); }
159289 | APO2 textnoboit
160 - { debugf ("italicsnobold#2 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }
 290+ { debugf ("italicsnobold#2 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }
161291
162292 boldnoitalics : APO3 textnoboit APO3
163 - { debugf ("boldnoitalics#1 "); $$ = nodeAddChild (newNode (Bold), $2); }
 293+ { debugf ("boldnoitalics#1 "); $$ = nodeAddChild (newNode (Bold), $2); }
164294 | APO3 textnoboit
165 - { debugf ("boldnoitalics#2 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }
 295+ { debugf ("boldnoitalics#2 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }
166296
 297+italicsnoboldtbl: APO2 textnoboittbl APO2
 298+ { debugf ("italicsnobold#1 "); $$ = nodeAddChild (newNode (Italics), $2); }
 299+ | APO2 textnoboittbl
 300+ { debugf ("italicsnobold#2 "); $$ = makeTextBlock (newNodeS (TextToken, "''"), $2); }
 301+
 302+boldnoitalicstbl: APO3 textnoboittbl APO3
 303+ { debugf ("boldnoitalics#1 "); $$ = nodeAddChild (newNode (Bold), $2); }
 304+ | APO3 textnoboittbl
 305+ { debugf ("boldnoitalics#2 "); $$ = makeTextBlock (newNodeS (TextToken, "'''"), $2); }
 306+
167307 /* In order to resolve a reduce/reduce conflict correctly, heading must come before textelement. */
168308 heading : HEADING text ENDHEADING NEWLINE
169309 { debugf ("heading#1 "); $$ = nodeAddChild (newNodeI (Heading, $1), $2); }
@@ -177,6 +317,87 @@
178318 | HEADING
179319 { debugf ("heading#6 "); $$ = nodeAddChild (newNodeI (Heading, $1), newNodeS (TextToken, "?")); }
180320
 321+table : TABLEBEGIN attributes tablerows TABLEEND
 322+ { debugf ("table#1 "); $$ = nodeAddChild2 (newNode (Table), $2, $3); }
 323+ | TABLEBEGIN attributes tablerows
 324+ { debugf ("table#2 "); $$ = nodeAddChild2 (newNode (Table), $2, $3); }
 325+ | TABLEBEGIN attributes oneormorenewlines tablerows TABLEEND
 326+ { debugf ("table#3 "); $$ = nodeAddChild2 (newNode (Table), $2, $4); }
 327+ | TABLEBEGIN attributes oneormorenewlines tablerows
 328+ { debugf ("table#4 "); $$ = nodeAddChild2 (newNode (Table), $2, $4); }
 329+ | TABLEBEGIN tablerows TABLEEND
 330+ { debugf ("table#5 "); $$ = nodeAddChild (newNode (Table), $2); }
 331+ | TABLEBEGIN tablerows
 332+ { debugf ("table#6 "); $$ = nodeAddChild (newNode (Table), $2); }
 333+ | TABLEBEGIN oneormorenewlines tablerows TABLEEND
 334+ { debugf ("table#7 "); $$ = nodeAddChild (newNode (Table), $3); }
 335+ | TABLEBEGIN oneormorenewlines tablerows
 336+ { debugf ("table#8 "); $$ = nodeAddChild (newNode (Table), $3); }
 337+ /* and now some invalid mark-up catering ... */
 338+ | TABLEBEGIN attributes zeroormorenewlines
 339+ { debugf ("table#9 "); $$ = nodeAddChild (newNode (Paragraph),
 340+ makeTextBlock (newNodeS (TextToken, addSpaces ("{|", $1)),
 341+ convertAttributesToText ($2))); }
 342+ | TABLEBEGIN attributes text zeroormorenewlines
 343+ { debugf ("table#10 "); $$ = nodeAddChild (newNode (Paragraph),
 344+ makeTextBlock2 (newNodeS (TextToken, addSpaces ("{|", $1)),
 345+ convertAttributesToText ($2), $3)); }
 346+ | TABLEBEGIN text zeroormorenewlines
 347+ { debugf ("table#11 "); $$ = nodeAddChild (newNode (Paragraph),
 348+ makeTextBlock (newNodeS (TextToken, addSpaces ("{|", $1)), $3)); }
 349+
 350+tablerows : tablerow { debugf ("tablerows#1 "); $$ = $1; }
 351+ | tablerows tablerow { debugf ("tablerows#2 "); $$ = nodeAddSibling ($1, $2); }
 352+
 353+tablerow : TABLEROW attributes tablecells
 354+ { debugf ("tablerow#1 "); $$ = nodeAddChild2 (newNode (TableRow), $2, $3); }
 355+ | TABLEROW tablecells
 356+ { debugf ("tablerow#2 "); $$ = nodeAddChild (newNode (TableRow), $2); }
 357+ | TABLEROW attributes oneormorenewlines tablecells
 358+ { debugf ("tablerow#3 "); $$ = nodeAddChild2 (newNode (TableRow), $2, $4); }
 359+ | TABLEROW oneormorenewlines tablecells
 360+ { debugf ("tablerow#4 "); $$ = nodeAddChild (newNode (TableRow), $3); }
 361+ | TABLEROW text zeroormorenewlines
 362+ { debugf ("tablerow#5 "); $$ = nodeAddChild (newNode (TableRow), nodeAddChild (newNode (TableCell), $2)); }
 363+ | TABLEROW attributes text zeroormorenewlines
 364+ { debugf ("tablerow#6 "); $$ = nodeAddChild (newNode (TableRow), nodeAddChild2 (newNode (TableCell), convertAttributesToText ($2), $3)); }
 365+ | TABLEROW zeroormorenewlines
 366+ { debugf ("tablerow#7 "); $$ = 0; }
 367+ /* It is possible for the first table row to have no TABLEROW token */
 368+ | tablecells
 369+ { debugf ("tablerow#8 "); $$ = nodeAddChild (newNode (TableRow), $1); }
 370+
 371+tablecells : tablecell { debugf ("tablecells#1 "); $$ = $1; }
 372+ | tablecells tablecell { debugf ("tablecells#2 "); $$ = nodeAddSibling ($1, $2); }
 373+
 374+tablecell : TABLECELL attributes PIPE tablecellcontents
 375+ { debugf ("tablecell#1 "); $$ = nodeAddChild2 (newNode (TableCell), $2, processTableCellContents ($4)); }
 376+ | TABLECELL tablecellcontents
 377+ { debugf ("tablecell#2 "); $$ = nodeAddChild (newNode (TableCell), processTableCellContents ($2)); }
 378+
 379+tablecellcontents : blocksnotbl
 380+ { debugf ("tablecellcontents#1 "); $$ = $1; }
 381+ | oneormorenewlines blocksnotbl
 382+ { debugf ("tablecellcontents#2 "); $$ = $2; }
 383+
 384+/* In order to reduce the second one (ATTRIBUTE EQUALS TEXT) correctly, this rule must
 385+ * be further up than textelement. */
 386+attribute : ATTRIBUTE
 387+ { debugf ("attribute#1 "); $$ = newNodeA (0, $1, 0, 0); }
 388+ | ATTRIBUTE EQUALS TEXT
 389+ { debugf ("attribute#2 "); $$ = nodeAddChild (newNodeA (1, $1, $2, strtrimNC ($3)), $3); }
 390+ | ATTRIBUTE EQUALS ATTRAPO text ATTRAPO
 391+ { debugf ("attribute#3 "); $$ = nodeAddChild (newNodeA (2, $1, $2, $5), $4); }
 392+ | ATTRIBUTE EQUALS ATTRQ text ATTRQ
 393+ { debugf ("attribute#4 "); $$ = nodeAddChild (newNodeA (3, $1, $2, $5), $4); }
 394+ | ATTRIBUTE EQUALS ATTRQ ATTRQ
 395+ { debugf ("attribute#5 "); $$ = newNodeA (3, $1, $2, $4); }
 396+ | ATTRIBUTE EQUALS
 397+ { debugf ("attribute#6 "); $$ = newNodeA (1, $1, $2, 0); }
 398+
 399+attributes : attribute { debugf ("attributes#1 "); $$ = nodeAddChild (newNode (AttributeGroup), $1); }
 400+ | attributes attribute { debugf ("attributes#2 "); $$ = nodeAddChild ($1, $2); }
 401+
181402 text : textelement { debugf ("text#1 "); $$ = $1; }
182403 | text textelement { debugf ("text#2 "); $$ = makeTextBlock ($1, $2); }
183404 textnoital : textelementnoital { debugf ("textnoital#1 "); $$ = $1; }
@@ -185,50 +406,106 @@
186407 | textnobold textelementnobold { debugf ("textnobold#2 "); $$ = makeTextBlock ($1, $2); }
187408 textnoboit : textelementnoboit { debugf ("textnoboit#1 "); $$ = $1; }
188409 | textnoboit textelementnoboit { debugf ("textnoboit#2 "); $$ = makeTextBlock ($1, $2); }
 410+textnotbl : textelementnotbl { debugf ("textnotbl#1 "); $$ = $1; }
 411+ | textnotbl textelementnotbl { debugf ("textnotbl#2 "); $$ = makeTextBlock ($1, $2); }
 412+textnoitaltbl : textelementnoitaltbl { debugf ("textnoitaltbl#1 "); $$ = $1; }
 413+ | textnoitaltbl textelementnoitaltbl { debugf ("textnoitaltbl#2 "); $$ = makeTextBlock ($1, $2); }
 414+textnoboldtbl : textelementnoboldtbl { debugf ("textnoboldtbl#1 "); $$ = $1; }
 415+ | textnoboldtbl textelementnoboldtbl { debugf ("textnoboldtbl#2 "); $$ = makeTextBlock ($1, $2); }
 416+textnoboittbl : textelementnoboittbl { debugf ("textnoboittbl#1 "); $$ = $1; }
 417+ | textnoboittbl textelementnoboittbl { debugf ("textnoboittbl#2 "); $$ = makeTextBlock ($1, $2); }
189418
190419 textelement : TEXT { debugf ("textelement#1 "); $$ = $1; }
191420 | EXTENSION { debugf ("textelement#2 "); $$ = $1; }
192421 | PIPE { debugf ("textelement#3 "); $$ = newNodeS (TextToken, "|"); }
193422 | ENDHEADING { debugf ("textelement#4 "); $$ = processEndHeadingInText ($1); }
194 - | italicsorbold { debugf ("textelement#5 "); $$ = $1; }
195 - | APO2 { debugf ("textelement#6 "); $$ = newNodeS (TextToken, "''"); }
196 - | APO3 { debugf ("textelement#7 "); $$ = newNodeS (TextToken, "'''"); }
197 - | APO5 { debugf ("textelement#8 "); $$ = newNodeS (TextToken, "'''''"); }
198 - | linketc { debugf ("textelement#9 "); $$ = $1; }
 423+ | APO2 { debugf ("textelement#5 "); $$ = newNodeS (TextToken, "''"); }
 424+ | APO3 { debugf ("textelement#6 "); $$ = newNodeS (TextToken, "'''"); }
 425+ | APO5 { debugf ("textelement#7 "); $$ = newNodeS (TextToken, "'''''"); }
 426+ | EQUALS { debugf ("textelement#8 "); $$ = newNodeS (TextToken, addSpaces ("=", $1)); }
 427+ | TABLEBEGIN { debugf ("textelement#9 "); $$ = newNodeS (TextToken, addSpaces ("{|", $1)); }
 428+ | TABLEEND { debugf ("textelement#10 "); $$ = newNodeS (TextToken, "|}"); }
 429+ | TABLEROW { debugf ("textelement#11 "); $$ = convertTableRowToText ($1); }
 430+ | TABLECELL { debugf ("textelement#12 "); $$ = convertTableCellToText ($1); }
 431+ | TABLEHEAD { debugf ("textelement#13 "); $$ = convertTableHeadToText ($1); }
 432+ | ATTRIBUTE { debugf ("textelement#14 "); $$ = convertAttributeDataToText ($1); }
 433+ | comment { debugf ("textelement#15 "); $$ = $1; }
 434+ | linketc { debugf ("textelement#16 "); $$ = $1; }
 435+ | italicsorbold { debugf ("textelement#17 "); $$ = $1; }
199436
200437 textelementnoital : TEXT { debugf ("textelementnoital#1 "); $$ = $1; }
201438 | EXTENSION { debugf ("textelementnoital#2 "); $$ = $1; }
202439 | PIPE { debugf ("textelementnoital#3 "); $$ = newNodeS (TextToken, "|"); }
203440 | ENDHEADING { debugf ("textelementnoital#4 "); $$ = processEndHeadingInText ($1); }
204 - | boldnoitalics { debugf ("textelementnoital#5 "); $$ = $1; }
 441+ | TABLEBEGIN { debugf ("textelementnoital#5 "); $$ = newNodeS (TextToken, addSpaces ("{|", $1)); }
 442+ | TABLEEND { debugf ("textelementnoital#6 "); $$ = newNodeS (TextToken, "|}"); }
 443+ | TABLEROW { debugf ("textelementnoital#7 "); $$ = convertTableRowToText ($1); }
 444+ | TABLECELL { debugf ("textelementnoital#8 "); $$ = convertTableCellToText ($1); }
 445+ | TABLEHEAD { debugf ("textelementnoital#9 "); $$ = convertTableHeadToText ($1); }
 446+ | comment { debugf ("textelementnoital#10 "); $$ = $1; }
 447+ | linketc { debugf ("textelementnoital#11 "); $$ = $1; }
 448+ | boldnoitalics { debugf ("textelementnoital#12 "); $$ = $1; }
205449
206450 textelementnobold : TEXT { debugf ("textelementnobold#1 "); $$ = $1; }
207451 | EXTENSION { debugf ("textelementnobold#2 "); $$ = $1; }
208452 | PIPE { debugf ("textelementnobold#3 "); $$ = newNodeS (TextToken, "|"); }
209453 | ENDHEADING { debugf ("textelementnobold#4 "); $$ = processEndHeadingInText ($1); }
210 - | italicsnobold { debugf ("textelementnobold#5 "); $$ = $1; }
 454+ | TABLEBEGIN { debugf ("textelementnobold#5 "); $$ = newNodeS (TextToken, addSpaces ("{|", $1)); }
 455+ | TABLEEND { debugf ("textelementnobold#6 "); $$ = newNodeS (TextToken, "|}"); }
 456+ | TABLEROW { debugf ("textelementnobold#7 "); $$ = convertTableRowToText ($1); }
 457+ | TABLECELL { debugf ("textelementnobold#8 "); $$ = convertTableCellToText ($1); }
 458+ | TABLEHEAD { debugf ("textelementnobold#9 "); $$ = convertTableHeadToText ($1); }
 459+ | comment { debugf ("textelementnobold#10 "); $$ = $1; }
 460+ | linketc { debugf ("textelementnobold#11 "); $$ = $1; }
 461+ | italicsnobold { debugf ("textelementnobold#12 "); $$ = $1; }
211462
212463 textelementnoboit : TEXT { debugf ("textelementnoboit#1 "); $$ = $1; }
213464 | EXTENSION { debugf ("textelementnoboit#2 "); $$ = $1; }
214465 | PIPE { debugf ("textelementnoboit#3 "); $$ = newNodeS (TextToken, "|"); }
215466 | ENDHEADING { debugf ("textelementnoboit#4 "); $$ = processEndHeadingInText ($1); }
 467+ | TABLEBEGIN { debugf ("textelementnoboit#5 "); $$ = newNodeS (TextToken, addSpaces ("{|", $1)); }
 468+ | TABLEEND { debugf ("textelementnoboit#6 "); $$ = newNodeS (TextToken, "|}"); }
 469+ | TABLEROW { debugf ("textelementnoboit#7 "); $$ = convertTableRowToText ($1); }
 470+ | TABLECELL { debugf ("textelementnoboit#8 "); $$ = convertTableCellToText ($1); }
 471+ | TABLEHEAD { debugf ("textelementnoboit#9 "); $$ = convertTableHeadToText ($1); }
 472+ | comment { debugf ("textelementnoboit#10 "); $$ = $1; }
 473+ | linketc { debugf ("textelementnoboit#11 "); $$ = $1; }
216474
217 -paragraph : text NEWLINE
218 - { debugf ("paragraph#1 "); $$ = nodeAddChild (newNode (Paragraph), $1); }
219 - | text NEWLINE paragraph /* needs to be right-recursive due to eof */
220 - { debugf ("paragraph#2 "); $$ = nodePrependChild (nodePrependChild ($3,
221 - newNodeS (TextToken, " ")), $1); }
222 - | text /* for eof */
223 - { debugf ("paragraph#3 "); $$ = nodeAddChild (newNode (Paragraph), $1); }
 475+textelementnotbl : TEXT { debugf ("textelementnotbl#1 "); $$ = $1; }
 476+ | EXTENSION { debugf ("textelementnotbl#2 "); $$ = $1; }
 477+ | PIPE { debugf ("textelementnotbl#3 "); $$ = newNodeS (TextToken, "|"); }
 478+ | ENDHEADING { debugf ("textelementnotbl#4 "); $$ = processEndHeadingInText ($1); }
 479+ | APO2 { debugf ("textelementnotbl#5 "); $$ = newNodeS (TextToken, "''"); }
 480+ | APO3 { debugf ("textelementnotbl#6 "); $$ = newNodeS (TextToken, "'''"); }
 481+ | APO5 { debugf ("textelementnotbl#7 "); $$ = newNodeS (TextToken, "'''''"); }
 482+ | EQUALS { debugf ("textelementnotbl#8 "); $$ = newNodeS (TextToken, addSpaces ("=", $1)); }
 483+ | comment { debugf ("textelementnotbl#9 "); $$ = $1; }
 484+ | linketcnotbl { debugf ("textelementnotbl#10 "); $$ = $1; }
 485+ | italorboldnotbl { debugf ("textelementnotbl#11 "); $$ = $1; }
224486
225 -/*
226 -table : TABLEBEGIN tablerows TABLEEND { debugf ("table#1 "); $$ = $2; }
227 - | TABLEBEGIN tablerows /* eof * /{ debugf ("table#2 "); $$ = $2; }
 487+textelementnoitaltbl: TEXT { debugf ("textelementnoitaltbl#1 "); $$ = $1; }
 488+ | EXTENSION { debugf ("textelementnoitaltbl#2 "); $$ = $1; }
 489+ | PIPE { debugf ("textelementnoitaltbl#3 "); $$ = newNodeS (TextToken, "|"); }
 490+ | ENDHEADING { debugf ("textelementnoitaltbl#4 "); $$ = processEndHeadingInText ($1); }
 491+ | comment { debugf ("textelementnoitaltbl#5 "); $$ = $1; }
 492+ | linketcnotbl { debugf ("textelementnoitaltbl#6 "); $$ = $1; }
 493+ | boldnoitalicstbl{ debugf ("textelementnoitaltbl#7 "); $$ = $1; }
228494
229 -tablerows : tablerow { debugf ("tablerows#1 "); $$ = $1; }
230 - | tablerows tablerow { debugf ("tablerows#2 ");
231 -*/
 495+textelementnoboldtbl: TEXT { debugf ("textelementnoboldtbl#1 "); $$ = $1; }
 496+ | EXTENSION { debugf ("textelementnoboldtbl#2 "); $$ = $1; }
 497+ | PIPE { debugf ("textelementnoboldtbl#3 "); $$ = newNodeS (TextToken, "|"); }
 498+ | ENDHEADING { debugf ("textelementnoboldtbl#4 "); $$ = processEndHeadingInText ($1); }
 499+ | comment { debugf ("textelementnoboldtbl#5 "); $$ = $1; }
 500+ | linketcnotbl { debugf ("textelementnoboldtbl#6 "); $$ = $1; }
 501+ | italicsnoboldtbl{ debugf ("textelementnoboldtbl#7 "); $$ = $1; }
232502
 503+textelementnoboittbl: TEXT { debugf ("textelementnoboittbl#1 "); $$ = $1; }
 504+ | EXTENSION { debugf ("textelementnoboittbl#2 "); $$ = $1; }
 505+ | PIPE { debugf ("textelementnoboittbl#3 "); $$ = newNodeS (TextToken, "|"); }
 506+ | ENDHEADING { debugf ("textelementnoboittbl#4 "); $$ = processEndHeadingInText ($1); }
 507+ | comment { debugf ("textelementnoboittbl#5 "); $$ = $1; }
 508+ | linketcnotbl { debugf ("textelementnoboittbl#6 "); $$ = $1; }
 509+
233510 zeroormorenewlines : /* empty */ { debugf ("zeroormorenewlines#1 "); $$ = 0; }
234511 | oneormorenewlines { debugf ("zeroormorenewlines#2 "); $$ = 0; }
235512 oneormorenewlines : NEWLINE { debugf ("oneormorenewlines#1 "); $$ = 0; }
@@ -239,7 +516,52 @@
240517 oneormorenewlinessave : NEWLINE { debugf ("oneormorenewlinessave#1 "); $$ = newNodeI (Newlines, 0); }
241518 | oneormorenewlinessave NEWLINE { debugf ("oneormorenewlinessave#2 "); $1->data.num++; $$ = $1; }
242519
 520+paragraph : text NEWLINE
 521+ { debugf ("paragraph#1 "); $$ = nodeAddChild (newNode (Paragraph), $1); }
 522+ | paragraph text NEWLINE
 523+ { debugf ("paragraph#2 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), $2); }
 524+ /* for eof ... */
 525+ | text
 526+ { debugf ("paragraph#3 "); $$ = nodeAddChild (newNode (Paragraph), $1); }
 527+ | paragraph text
 528+ { debugf ("paragraph#4 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), $2); }
243529
 530+/* This seemingly pointless inclusion of 'attributes' here that will all be converted to text
 531+ * by way of convertAttributesToText() is necessary because, as a table cell begins, we simply
 532+ * don't know whether there are attributes following or not. We parse them as attributes first,
 533+ * but then convert them back to text if it turns out they're not. */
 534+paragraphnotbl : textnotbl NEWLINE
 535+ { debugf ("paragraphnotbl#1 "); $$ = nodeAddChild (newNode (Paragraph), $1); }
 536+ | attributes textnotbl NEWLINE
 537+ { debugf ("paragraphnotbl#2 "); $$ = nodeAddChild2 (newNode (Paragraph), convertAttributesToText ($1), $2); }
 538+ | attributes NEWLINE
 539+ { debugf ("paragraphnotbl#3 "); $$ = nodeAddChild (newNode (Paragraph), convertAttributesToText ($1)); }
 540+ | paragraphnotbl textnotbl NEWLINE
 541+ { debugf ("paragraphnotbl#4 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), $2); }
 542+ | paragraphnotbl attributes textnotbl NEWLINE
 543+ { debugf ("paragraphnotbl#5 "); $$ = nodeAddChild3 ($1, newNodeS (TextToken, " "), convertAttributesToText ($2), $3); }
 544+ | paragraphnotbl attributes NEWLINE
 545+ { debugf ("paragraphnotbl#6 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), convertAttributesToText ($2)); }
 546+ /* for eof ... */
 547+ | textnotbl
 548+ { debugf ("paragraphnotbl#7 "); $$ = nodeAddChild (newNode (Paragraph), $1); }
 549+ | attributes textnotbl
 550+ { debugf ("paragraphnotbl#8 "); $$ = nodeAddChild2 (newNode (Paragraph), convertAttributesToText ($1), $2); }
 551+ | attributes
 552+ { debugf ("paragraphnotbl#9 "); $$ = nodeAddChild (newNode (Paragraph), convertAttributesToText ($1)); }
 553+ | paragraphnotbl textnotbl
 554+ { debugf ("paragraphnotbl#10 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), $2); }
 555+ | paragraphnotbl attributes textnotbl
 556+ { debugf ("paragraphnotbl#11 "); $$ = nodeAddChild3 ($1, newNodeS (TextToken, " "), convertAttributesToText ($2), $3); }
 557+ | paragraphnotbl attributes
 558+ { debugf ("paragraphnotbl#12 "); $$ = nodeAddChild2 ($1, newNodeS (TextToken, " "), convertAttributesToText ($2)); }
 559+
 560+comment : BEGINCOMMENT text ENDCOMMENT
 561+ { debugf ("comment#1 "); $$ = nodeAddChild (newNode (Comment), $2); }
 562+ | BEGINCOMMENT ENDCOMMENT
 563+ { debugf ("comment#2 "); $$ = newNode (Comment); }
 564+
 565+
244566 %%
245567
246568 /* programs */
@@ -249,7 +571,7 @@
250572 printf ("Parsing... ");
251573 result = yyparse();
252574 if (!result)
253 - printf ("\n\nXML output:\n\n%s\n\n", outputXML (articlenode));
 575+ printf ("\n\nXML output:\n\n%s\n\n", outputXML (articlenode, 1024));
254576 return result;
255577 }
256578
@@ -264,13 +586,13 @@
265587 * end with *two* NULs instead of just one. Thus yy_scan_string is the easiest way for now. */
266588 yy_scan_string (input);
267589
268 - /* Start with an output buffer twice the size of the input, but at least 1 KB. This should
269 - * normally be plenty. If it isn't, it will grow automatically. */
270 - i = 2*strlen (input);
271 - fb_set_buffer_size (i < 1024 ? 1024 : i);
272 -
273590 result = yyparse();
274591 if (!result)
275 - return outputXML (articlenode);
 592+ {
 593+ /* Start with an output buffer twice the size of the input, but at least 1 KB. This should
 594+ * normally be plenty. If it isn't, it will grow automatically. */
 595+ i = 2*strlen (input);
 596+ return outputXML (articlenode, i < 1024 ? 1024 : i);
 597+ }
276598 return "<error />";
277599 }
Index: trunk/flexbisonparse/wikilex.l
@@ -13,6 +13,8 @@
1414
1515 #include <stdio.h>
1616 #include <string.h>
 17+
 18+/* This file defines debuglex and debuglex2. */
1719 #include "fb_defines.h"
1820
1921 /* Notice: We need to include parsetree.h first because wikiparse.tab.h will
@@ -20,27 +22,37 @@
2123 #include "parsetree.h"
2224 #include "wikiparse.tab.h"
2325
 26+/* Tells flex to stop processing input when EOF is reached. */
2427 int yywrap(void) { return 1; }
2528
 29+int encodeTableRowInfo (char* input, int initleng)
 30+{
 31+ int i = 1;
 32+ while (input[i] == '-') i++;
 33+ return ((i-1)*0x10000 + (initleng-i));
 34+}
 35+
 36+#define COMMONTOKENS \
 37+ if (YY_START != inattributeapo && YY_START != inattributeq && YY_START != canbeheading) \
 38+ BEGIN (cannotbelistorheadingorpre);
 39+
2640 %}
2741
 42+/* inclusive start conditions */
2843 %s canbelist canbeheading cannotbelistorheadingorpre attributes
 44+%s inattributeapo inattributeq startattribute
 45+
 46+/* exclusive start conditions */
2947 %x extension comment
3048
3149 %%
3250
33 - /* We will use the same buffer every time to accumulate the contents of an
34 - * extension token. newExtensionData() will copy the data to a new string */
35 - char extension_buf [ 65536 ];
36 - char * extension_buf_ptr;
37 - char * extension_name = 0;
38 - ExtensionData ed;
39 - Node node;
 51+ char* extension_name = 0;
4052 int i;
4153
4254
4355 "<"[[:alnum:]]+">" {
44 - extension_buf_ptr = extension_buf;
 56+ fb_create_new_buffer (256); /* for the contents */
4557 i = strlen (yytext)-1;
4658 extension_name = (char*) malloc (i * sizeof (char));
4759 memcpy (extension_name, yytext + 1, --i);
@@ -50,55 +62,77 @@
5163 <extension>"</"[[:alnum:]]+">" {
5264 i = strlen (extension_name);
5365 if (strncmp (extension_name, yytext+2, i))
54 - {
55 - memcpy (extension_buf_ptr, yytext, yyleng);
56 - extension_buf_ptr += yyleng;
57 - }
 66+ fb_write_to_buffer (yytext);
5867 else
5968 {
60 - *extension_buf_ptr = '\0';
61 - BEGIN (INITIAL);
62 - yylval.node = newNodeE (ExtensionToken,
63 - newExtensionData (extension_name, extension_buf));
 69+ BEGIN (cannotbelistorheadingorpre);
 70+ yylval.node = newNodeN (ExtensionToken,
 71+ extension_name, fb_get_buffer(), 0, 0);
6472 debuglex ("EXTENSION ");
6573 return EXTENSION;
6674 }
6775 }
68 -<extension>. { *extension_buf_ptr++ = yytext[0]; }
 76+<extension>.[^<>]* { fb_write_to_buffer (yytext); }
6977 <extension><<EOF>> {
70 - *extension_buf_ptr = '\0';
71 - yylval.node = newNodeE (ExtensionToken,
72 - newExtensionData (extension_name, extension_buf));
 78+ BEGIN (cannotbelistorheadingorpre);
 79+ yylval.node = newNodeN (ExtensionToken,
 80+ extension_name, fb_get_buffer(), 0, 0);
7381 debuglex ("EXTENSION ");
74 - BEGIN (INITIAL);
7582 return EXTENSION;
7683 }
7784
78 -"<!---->" { BEGIN (cannotbelistorheadingorpre); debuglex ("EMPTYCOMMENT "); return EMPTYCOMMENT; }
7985 "<!--" { BEGIN (comment); debuglex ("BEGINCOMMENT "); return BEGINCOMMENT; }
8086 <comment>.[^-]* { debuglex ("TEXT "); yylval.node = newNodeS (TextToken, strdup (yytext)); return TEXT; }
8187 <comment>"-->" { BEGIN (cannotbelistorheadingorpre); debuglex ("ENDCOMMENT "); return ENDCOMMENT; }
8288
83 -"\[\[:" { BEGIN (cannotbelistorheadingorpre); debuglex ("OPENLINK "); return OPENLINK; }
84 -"\[\[" { BEGIN (cannotbelistorheadingorpre); debuglex ("OPENDBLSQBR "); return OPENDBLSQBR; }
85 -"\]\]" { BEGIN (cannotbelistorheadingorpre); debuglex ("CLOSEDBLSQBR "); return CLOSEDBLSQBR; }
86 -\| { BEGIN (cannotbelistorheadingorpre); debuglex ("PIPE "); return PIPE; }
87 -\{\{\{\{\{ { BEGIN (cannotbelistorheadingorpre); debuglex ("OPENPENTUPLECURLY "); return OPENPENTUPLECURLY; }
88 -\}\}\}\}\} { BEGIN (cannotbelistorheadingorpre); debuglex ("CLOSEPENTUPLECURLY "); return CLOSEPENTUPLECURLY; }
89 -\{\{\{ { BEGIN (cannotbelistorheadingorpre); debuglex ("OPENTEMPLATEVAR "); return OPENTEMPLATEVAR; }
90 -\}\}\} { BEGIN (cannotbelistorheadingorpre); debuglex ("CLOSETEMPLATEVAR "); return CLOSETEMPLATEVAR; }
91 -\{\{ { BEGIN (cannotbelistorheadingorpre); debuglex ("OPENTEMPLATE "); return OPENTEMPLATE; }
92 -\}\} { BEGIN (cannotbelistorheadingorpre); debuglex ("CLOSETEMPLATE "); return CLOSETEMPLATE; }
93 -'''''/[^'] { BEGIN (cannotbelistorheadingorpre); debuglex ("APO5 "); return APO5; }
94 -'''/[^'] { BEGIN (cannotbelistorheadingorpre); debuglex ("APO3 "); return APO3; }
95 -''/[^'] { BEGIN (cannotbelistorheadingorpre); debuglex ("APO2 "); return APO2; }
 89+ /* For the table-related tokens, we need to remember enough information so that we can
 90+ * reliably turn things back into text. */
 91+"{|"" "* { BEGIN(attributes); debuglex ("TABLEBEGIN "); yylval.num = yyleng-2; return TABLEBEGIN; }
 92+"||"" "* { yylval.num = 2*(yyleng-2); BEGIN(attributes); debuglex2 ("TABLECELL(%u) ", yylval.num); return TABLECELL; }
 93+^"|"" "* { yylval.num = 2*(yyleng-1)+1; BEGIN(attributes); debuglex2 ("TABLECELL(%u) ", yylval.num); return TABLECELL; }
 94+"!!"" "* { BEGIN(attributes); debuglex ("TABLEHEAD "); yylval.num = 2*(yyleng-2); return TABLEHEAD; }
 95+^"!"" "* { BEGIN(attributes); debuglex ("TABLEHEAD "); yylval.num = 2*(yyleng-1)+1; return TABLEHEAD; }
 96+"|""-"+" "* { BEGIN(attributes); debuglex ("TABLEROW "); yylval.num = encodeTableRowInfo (yytext, yyleng); return TABLEROW; }
 97+"|}" { BEGIN(cannotbelistorheadingorpre); debuglex ("TABLEEND "); return TABLEEND; }
 98+
 99+<attributes>[-a-zA-Z:_]+" "* {
 100+ debuglex2 ("ATTRIBUTE(%s) ", yytext);
 101+ yylval.ad = newAttributeDataFromStr (yytext);
 102+ return ATTRIBUTE;
 103+ }
 104+<attributes>"="" "* {
 105+ debuglex2 ("EQUALS(%d) ", yyleng-1);
 106+ yylval.num = yyleng-1;
 107+ BEGIN (startattribute);
 108+ return EQUALS;
 109+ }
 110+
 111+<startattribute>\' { BEGIN (inattributeapo); yylval.num = 0; debuglex ("ATTRAPO(0) "); return ATTRAPO; }
 112+<startattribute>\" { BEGIN (inattributeq); yylval.num = 0; debuglex ("ATTRQ(0) "); return ATTRQ; }
 113+<inattributeapo>\'" "* { BEGIN (attributes); yylval.num = yyleng-1; debuglex2 ("ATTRAPO(%d) ", yyleng-1); return ATTRAPO; }
 114+<inattributeq>\"" "* { BEGIN (attributes); yylval.num = yyleng-1; debuglex2 ("ATTRQ(%d) ", yyleng-1); return ATTRQ; }
 115+
 116+"\[\[:" { COMMONTOKENS; debuglex ("OPENLINK "); return OPENLINK; }
 117+"\[\[" { COMMONTOKENS; debuglex ("OPENDBLSQBR "); return OPENDBLSQBR; }
 118+"\]\]" { COMMONTOKENS; debuglex ("CLOSEDBLSQBR "); return CLOSEDBLSQBR; }
 119+\| { COMMONTOKENS; debuglex ("PIPE "); return PIPE; }
 120+\{\{\{\{\{ { COMMONTOKENS; debuglex ("OPENPENTUPLECURLY "); return OPENPENTUPLECURLY; }
 121+\}\}\}\}\} { COMMONTOKENS; debuglex ("CLOSEPENTUPLECURLY "); return CLOSEPENTUPLECURLY; }
 122+\{\{\{ { COMMONTOKENS; debuglex ("OPENTEMPLATEVAR "); return OPENTEMPLATEVAR; }
 123+\}\}\} { COMMONTOKENS; debuglex ("CLOSETEMPLATEVAR "); return CLOSETEMPLATEVAR; }
 124+\{\{ { COMMONTOKENS; debuglex ("OPENTEMPLATE "); return OPENTEMPLATE; }
 125+\}\} { COMMONTOKENS; debuglex ("CLOSETEMPLATE "); return CLOSETEMPLATE; }
 126+'''''/[^'] { COMMONTOKENS; debuglex ("APO5 "); return APO5; }
 127+'''/[^'] { COMMONTOKENS; debuglex ("APO3 "); return APO3; }
 128+''/[^'] { COMMONTOKENS; debuglex ("APO2 "); return APO2; }
96129 \n { BEGIN (INITIAL); debuglex ("NEWLINE\n"); return NEWLINE; }
 130+\r { /* ignore this one */ debuglex ("<13> "); }
97131
98132 ^" " { BEGIN(cannotbelistorheadingorpre); debuglex ("PRELINE "); return PRELINE; }
99 -^\*[[:space:]]* { BEGIN(canbelist); debuglex ("LISTBULLET "); return LISTBULLET; }
100 -<canbelist>\*[[:space:]]* { debuglex ("LISTBULLET "); return LISTBULLET; }
101 -^\#[[:space:]]* { BEGIN(canbelist); debuglex ("LISTNUMBERED "); return LISTNUMBERED; }
102 -<canbelist>\#[[:space:]]* { debuglex ("LISTNUMBERED "); return LISTNUMBERED; }
 133+^\*[ \t]* { BEGIN(canbelist); debuglex ("LISTBULLET "); return LISTBULLET; }
 134+<canbelist>\*[ \t]* { debuglex ("LISTBULLET "); return LISTBULLET; }
 135+^\#[ \t]* { BEGIN(canbelist); debuglex ("LISTNUMBERED "); return LISTNUMBERED; }
 136+<canbelist>\#[ \t]* { debuglex ("LISTNUMBERED "); return LISTNUMBERED; }
103137
104138 ^"="+ {
105139 BEGIN (canbeheading);
@@ -114,24 +148,28 @@
115149 return ENDHEADING;
116150 }
117151
118 -"{|"" "* { BEGIN(attributes); debuglex ("TABLEBEGIN "); return TABLEBEGIN; }
119 -"||"" "*/[^\| ][^\|]*"|"[^\|] { BEGIN(attributes); debuglex ("TABLECELL "); return TABLECELL; }
120 -"||"" "* { BEGIN(cannotbelistorheadingorpre); debuglex ("TABLECELL "); return TABLECELL; }
121 -"!!"" "*/[^\! ][^\!]*"!"[^\!] { BEGIN(attributes); debuglex ("TABLEHEAD "); return TABLEHEAD; }
122 -"!!"" "* { BEGIN(cannotbelistorheadingorpre); debuglex ("TABLEHEAD "); return TABLEHEAD; }
 152+<cannotbelistorheadingorpre,canbeheading>[^\|\r\n][^\<\>\[\]\{\}\r\n\'\|\=\!]* |
123153
124 -"|""-"+" "* { BEGIN(attributes); debuglex ("TABLEROW "); return TABLEROW; }
125 -"|}"" "* { BEGIN(cannotbelistorheadingorpre); debuglex ("TABLEEND "); return TABLEEND; }
 154+<inattributeapo>[^\'\|\r\n][^\<\>\[\]\{\}\r\n\'\|\=\!]* |
126155
 156+<inattributeq>[^\"\|\r\n][^\<\>\[\]\{\}\r\n\'\"\|\=\!]* {
 157+ yylval.node = newNodeS (TextToken, strdup (yytext));
 158+ debuglex2 ("TEXT(%s) ", yytext);
 159+ return TEXT; }
127160
128 -<cannotbelistorheadingorpre,canbeheading>[^\|\n][^\<\>\[\]\{\}\n\'\|\=]* {
 161+<canbelist>[^ \|\*\#\r\n][^\<\>\[\]\{\}\r\n\'\|\!]* |
 162+
 163+<attributes>[^-a-zA-Z:_\r\n\|\=][^\<\>\[\]\{\}\r\n\'\|\!]* |
 164+
 165+<INITIAL>[^ \|\*\#\r\n\=][^\<\>\[\]\{\}\r\n\'\|\=\!]* {
 166+ BEGIN(cannotbelistorheadingorpre);
129167 yylval.node = newNodeS (TextToken, strdup (yytext));
130 - debuglex2 ("TEXT(%s) ", yytext); return TEXT; }
131 -<canbelist>[^ \|\*\#\n][^\<\>\[\]\{\}\n\'\|]* {
132 - BEGIN(cannotbelistorheadingorpre); yylval.node = newNodeS (TextToken, strdup (yytext));
133 - debuglex2 ("TEXT(%s) ", yytext); return TEXT; }
134 -<INITIAL>[^ \|\*\#\n\=][^\<\>\[\]\{\}\n\'\|\=]* {
135 - BEGIN(cannotbelistorheadingorpre); yylval.node = newNodeS (TextToken, strdup (yytext));
136 - debuglex2 ("TEXT(%s) ", yytext); return TEXT; }
 168+ debuglex2 ("TEXT(%s) ", yytext);
 169+ return TEXT; }
137170
 171+<startattribute>[^ \t\r\n\'\"][^ \t\r\n]*" "* {
 172+ BEGIN (attributes);
 173+ yylval.node = newNodeS (TextToken, strdup (yytext));
 174+ debuglex2 ("TEXT(%s) ", yytext);
 175+ return TEXT; }
138176 %%
Index: trunk/flexbisonparse/fb_defines.h
@@ -12,10 +12,12 @@
1313 /* Change these to
1414 #define debuglex printf
1515 #define debuglex2 printf
 16+ #define debuglex3 printf
1617 to have the lexer output all the tokens generated. */
1718
1819 #define debuglex(x)
1920 #define debuglex2(x,y)
 21+#define debuglex3(x,y,z)
2022
2123
2224 /* Change this one to
Index: trunk/flexbisonparse/parsetree.c
@@ -33,13 +33,38 @@
3434 result->data.str = data;
3535 return result;
3636 }
37 -Node newNodeE (NodeType newType, ExtensionData data)
 37+Node newNodeN (NodeType newType, char* name, char* value, int copyName, int copyValue)
3838 {
3939 Node result = newNode (newType);
40 - result->data.ext = data;
 40+ result->data.nameval = (NameValue) malloc (sizeof (struct NameValueStruct));
 41+ result->data.nameval->name = copyName ? strdup (name) : name;
 42+ result->data.nameval->value = copyValue ? strdup (value) : value;
4143 return result;
4244 }
 45+AttributeData newAttributeDataFromStr (char* str)
 46+{
 47+ AttributeData ret = (AttributeData) malloc (sizeof (struct AttributeDataStruct));
 48+ int len = strlen (str);
 49+ int i = len-1;
4350
 51+ while (str[i] == ' ') i--;
 52+ i++;
 53+ ret->name = (char*) malloc ((i+1) * sizeof (char));
 54+ memcpy (ret->name, str, i * sizeof (char));
 55+ ret->name[i] = '\0';
 56+ ret->spacesAfterName = len-i;
 57+ return ret;
 58+}
 59+Node newNodeA (int t, AttributeData ad, int sae, int sav)
 60+{
 61+ Node result = newNode (Attribute);
 62+ result->data.attrdata = ad;
 63+ result->data.attrdata->type = t;
 64+ result->data.attrdata->spacesAfterEquals = sae;
 65+ result->data.attrdata->spacesAfterValue = sav;
 66+ return result;
 67+}
 68+
4469 /* Return value is the first parameter */
4570 Node nodeAddChild (Node node, Node child)
4671 {
@@ -61,11 +86,12 @@
6287 /* Return value is the first parameter */
6388 Node nodeAddSibling (Node node, Node sibling)
6489 {
 90+ Node examine = node;
6591 if (sibling)
6692 {
67 - while (node->nextSibling)
68 - node = node->nextSibling;
69 - node->nextSibling = sibling;
 93+ while (examine->nextSibling)
 94+ examine = examine->nextSibling;
 95+ examine->nextSibling = sibling;
7096 }
7197 return node;
7298 }
@@ -73,18 +99,21 @@
74100 /* Return value is the first parameter */
75101 Node nodePrependChild (Node node, Node child)
76102 {
77 - child->nextSibling = node->firstChild;
 103+ Node prevChild = node->firstChild;
78104 node->firstChild = child;
79 - return node;
 105+ return nodeAddChild (node, prevChild);
80106 }
81107
82 -ExtensionData newExtensionData (char *name, char *text)
 108+void freeRecursively (Node node)
83109 {
84 - ExtensionData ed = (ExtensionData) malloc (sizeof (struct ExtensionDataStruct));
85 - ed->name = name;
86 - ed->text = (char *) malloc ((strlen (text)+1) * sizeof (char));
87 - strcpy (ed->text, text);
88 - return ed;
 110+ Node next, child = node->firstChild;
 111+
 112+ while (child)
 113+ {
 114+ next = child->nextSibling;
 115+ freeRecursively (child);
 116+ child = next;
 117+ }
89118 }
90119
91120 void removeAndFreeFirstChild (Node node)
@@ -92,7 +121,7 @@
93122 Node child = node->firstChild;
94123 if (!child) return;
95124 node->firstChild = child->nextSibling;
96 - free (child);
 125+ freeRecursively (child);
97126 }
98127
99128 /* Parameter must be a ListLine node. Returns a List node. */
@@ -255,6 +284,7 @@
256285 /* Re-attach the next sibling (B) */
257286 examine->nextSibling = tmpnode;
258287 }
 288+ /* Newlines nodes don't have children, no need for freeRecursively */
259289 free (newlinesnode);
260290 }
261291 examine = examine->nextSibling;
@@ -276,6 +306,20 @@
277307 return newNodeS (TextToken, ret);
278308 }
279309
 310+Node processTableCellContents (Node node)
 311+{
 312+ Node ret;
 313+
 314+ if (!node) return 0;
 315+ if (node->type == Paragraph && !node->nextSibling)
 316+ {
 317+ ret = node->firstChild;
 318+ free (node);
 319+ return ret;
 320+ }
 321+ return node;
 322+}
 323+
280324 Node processNestedItalics (Node node)
281325 {
282326 Node examine, saveExamineSibling, childExamine, childSibling, saveChildSibling;
@@ -353,6 +397,7 @@
354398 /* Move examine on to the newly created sibling */
355399 examine = examine->nextSibling;
356400 /* Free the now-obsolete Italics node */
 401+ /* We have attached its children elsewhere, so don't use freeRecursively */
357402 free (childSibling);
358403 }
359404 /* Any node that is not an Italics node needs to become attached to one.
@@ -390,6 +435,7 @@
391436 if (a->type == TextBlock && b->type == TextBlock)
392437 {
393438 nodeAddChild (a, b->firstChild);
 439+ /* We have attached b's children elsewhere, so don't use freeRecursively */
394440 free (b);
395441 return a;
396442 }
@@ -398,18 +444,179 @@
399445 else if (b->type == TextBlock)
400446 return nodePrependChild (b, a);
401447 else
402 - return nodeAddChild (nodeAddChild (newNode (TextBlock), a), b);
 448+ return nodeAddChild2 (newNode (TextBlock), a, b);
403449 }
404450
 451+Node convertAttributesToText (Node node)
 452+{
 453+ char* str;
 454+ int len, at, i;
 455+ Node ret = 0, examine = node->firstChild, prevExamine;
 456+ AttributeData ad;
 457+
 458+ if (node->type != AttributeGroup) return 0;
 459+
 460+ /* We've stored the first child in examine, so we can already free the parent */
 461+ free (node);
 462+
 463+ while (examine) /* should be an Attribute node */
 464+ {
 465+ ad = examine->data.attrdata;
 466+ /* first turn attribute name, equals sign (if any) and
 467+ * opening apostrophe or quotes (if any) into one string */
 468+ len = strlen (ad->name);
 469+ at = len;
 470+ len += ad->spacesAfterName;
 471+ if (ad->type > 0)
 472+ {
 473+ len++; /* '=' */
 474+ len += ad->spacesAfterEquals;
 475+ if (ad->type > 1) len++; /* ' or " */
 476+ }
 477+ len++; /* trailing '\0' */
 478+
 479+ str = (char*) malloc (len * sizeof (char));
 480+ memcpy (str, ad->name, at * sizeof (char));
 481+ while (ad->spacesAfterName--) str[at++] = ' ';
 482+ if (ad->type > 0)
 483+ {
 484+ str[at++] = '=';
 485+ while (ad->spacesAfterEquals--) str[at++] = ' ';
 486+ if (ad->type == 2) str[at++] = '\'';
 487+ else if (ad->type == 3) str[at++] = '"';
 488+ }
 489+ str[at] = '\0';
 490+
 491+ ret = makeTextBlock2 (ret, newNodeS (TextToken, str), examine->firstChild);
 492+
 493+ if (ad->type > 1 || (ad->type == 1 && ad->spacesAfterValue > 0))
 494+ {
 495+ at = ad->type > 1 ? 1 : 0;
 496+ len = at + ad->spacesAfterValue;
 497+ str = (char*) malloc (len * sizeof (char));
 498+ if (ad->type == 2) str[0] = '\'';
 499+ else if (ad->type == 3) str[0] = '"';
 500+ while (ad->spacesAfterValue--) str[at++] = ' ';
 501+ str[at] = '\0';
 502+ ret = makeTextBlock (ret, newNodeS (TextToken, str));
 503+ }
 504+ prevExamine = examine;
 505+ examine = examine->nextSibling;
 506+ free (prevExamine);
 507+ }
 508+
 509+ return ret;
 510+}
 511+
 512+Node convertAttributeDataToText (AttributeData data)
 513+{
 514+ return makeTextBlock (newNodeS (TextToken, data->name),
 515+ newNodeS (TextToken, addSpaces ("", data->spacesAfterName)));
 516+}
 517+
 518+Node convertPipeSeriesToText (Node node)
 519+{
 520+ Node result = 0;
 521+ Node nextNode;
 522+
 523+ while (node)
 524+ {
 525+ result = makeTextBlock2 (result, newNodeS (TextToken, "|"), node->firstChild);
 526+ nextNode = node->nextSibling;
 527+ freeRecursively (node);
 528+ node = nextNode;
 529+ }
 530+
 531+ return result;
 532+}
 533+
 534+Node convertTableRowToText (int info)
 535+{
 536+ int minuses, spaces, i;
 537+ char* text;
 538+
 539+ minuses = info / 0x10000;
 540+ spaces = info % 0x10000;
 541+
 542+ text = (char*) malloc ((minuses + spaces + 2) * sizeof (char));
 543+ text[0] = '|';
 544+ i = 1;
 545+ while (minuses--) text[i++] = '-';
 546+ while (spaces--) text[i++] = ' ';
 547+ text[i] = '\0';
 548+ return newNodeS (TextToken, text);
 549+}
 550+
 551+Node convertTableCellToText (int info)
 552+{
 553+ return newNodeS (TextToken, addSpaces (info % 2 ? "|" : "||", info/2));
 554+}
 555+
 556+Node convertTableHeadToText (int info)
 557+{
 558+ return newNodeS (TextToken, addSpaces (info % 2 ? "!" : "!!", info/2));
 559+}
 560+
 561+char* addSpaces (char* src, int spaces)
 562+{
 563+ char* ret;
 564+ int len = strlen (src);
 565+
 566+ ret = (char*) malloc ((len + spaces + 1) * sizeof (char));
 567+ if (len > 0) memcpy (ret, src, len * sizeof (char));
 568+ ret[len+spaces] = '\0';
 569+ while (spaces--) ret[len+spaces] = ' ';
 570+ return ret;
 571+}
 572+
 573+char* strtrim (char* src)
 574+{
 575+ int i = strlen (src);
 576+ i--;
 577+ while ((i > 0) && (src[i] == ' ')) i--;
 578+ src[i+1] = '\0';
 579+ return src;
 580+}
 581+
 582+int strtrimC (char* src)
 583+{
 584+ int i = strlen (src), j = i;
 585+ i--;
 586+ while ((i > 0) && (src[i] == ' ')) i--;
 587+ src[i+1] = '\0';
 588+ return j - i - 1;
 589+}
 590+
 591+Node strtrimN (Node src)
 592+{
 593+ if (src->type == TextToken)
 594+ strtrim (src->data.str);
 595+ return src;
 596+}
 597+int strtrimNC (Node src)
 598+{
 599+ if (src->type == TextToken)
 600+ return strtrimC (src->data.str);
 601+ return 0;
 602+}
 603+
405604 char* fb_buffer;
406 -int fb_buflen = 1024; /* Start with 1 KB if user doesn't call fb_set_buffer_size() */
 605+int fb_buflen;
407606 int fb_bufcontentlen;
408607
409 -inline void fb_set_buffer_size (int size)
 608+void fb_create_new_buffer (int size)
410609 {
 610+ fb_buffer = (char*) malloc (size * sizeof (char));
 611+ fb_buffer[0] = '\0';
 612+ fb_bufcontentlen = 0;
411613 fb_buflen = size;
412614 }
413615
 616+char* fb_get_buffer()
 617+{
 618+ return fb_buffer;
 619+}
 620+
414621 void fb_write_to_buffer_len (const char* str, int len)
415622 {
416623 char* newbuffer;
@@ -429,7 +636,7 @@
430637 fb_buffer[fb_bufcontentlen] = '\0';
431638 }
432639
433 -inline void fb_write_to_buffer (const char* str)
 640+void fb_write_to_buffer (const char* str)
434641 {
435642 fb_write_to_buffer_len (str, strlen (str));
436643 }
@@ -459,9 +666,9 @@
460667 case '>': FB_WRITE_CURRY ("&gt;");
461668 case '"': FB_WRITE_CURRY ("&quot;");
462669 default:
463 - if (*s < ' ')
 670+ if (*s < ' ' && *s != '\n')
464671 {
465 - sprintf (tmpstr, "&#%d;", *s);
 672+ sprintf (tmpstr, "&#%u;", (unsigned char)*s);
466673 FB_WRITE_CURRY (tmpstr);
467674 }
468675 else
@@ -495,10 +702,13 @@
496703
497704 rname =
498705 node->type == TextBlock ? 0 /* don't output tags for this, just the text */ :
499 - node->type == Heading ? 0 /* outputXML already does this one; it may have attributes */ :
500 - node->type == List ? 0 /* outputXML already does this one; it may have attributes */ :
501 - node->type == LinkEtc ? 0 /* outputXML already does this one; it may have attributes */ :
502706
 707+ /* For the following, the tag is already output by outputXMLHelper: */
 708+ node->type == Heading ? 0 :
 709+ node->type == List ? 0 :
 710+ node->type == LinkEtc ? 0 :
 711+ node->type == Attribute ? 0 :
 712+
503713 node->type == LinkTarget ? "linktarget" :
504714 node->type == LinkOption ? "linkoption" :
505715 node->type == Article ? "article" :
@@ -508,6 +718,14 @@
509719 node->type == ListItem ? "listitem" :
510720 node->type == Bold ? "bold" :
511721 node->type == Italics ? "italics" :
 722+ node->type == Comment ? "comment" :
 723+
 724+ node->type == Table ? "table" :
 725+ node->type == TableRow ? "tablerow" :
 726+ node->type == TableCell ? "tablecell" :
 727+ node->type == TableHead ? "tablehead" :
 728+ node->type == AttributeGroup? "attrs" :
 729+
512730 /* Fallback value */
513731 (sprintf (defaultname, "type%dnode", node->type), defaultname);
514732
@@ -535,7 +753,7 @@
536754 void outputXMLHelper (Node node)
537755 {
538756 Node child;
539 - ExtensionData ed;
 757+ NameValue nv;
540758 int i;
541759 char tmpstr[255];
542760
@@ -553,13 +771,23 @@
554772 break;
555773
556774 case ExtensionToken:
557 - ed = node->data.ext;
558 - sprintf (tmpstr, "<extension name=\"%s\">", ed->name);
 775+ nv = node->data.nameval;
 776+ sprintf (tmpstr, "<extension name='%s'>", nv->name);
559777 fb_write_to_buffer (tmpstr);
560 - fb_write_to_buffer_escaped (ed->text);
 778+ fb_write_to_buffer_escaped (nv->value);
561779 fb_write_to_buffer ("</extension>");
562780 break;
563781
 782+ case Attribute:
 783+ sprintf (tmpstr, "<attr name='%s'", node->data.attrdata->name);
 784+ fb_write_to_buffer (tmpstr);
 785+ if (node->data.attrdata->type == 0)
 786+ fb_write_to_buffer (" isnull='yes'");
 787+ fb_write_to_buffer (">");
 788+ outputNode (node);
 789+ fb_write_to_buffer ("</attr>");
 790+ break;
 791+
564792 case List:
565793 fb_write_to_buffer (node->data.num == 1 ? "<list type='bullet'>" :
566794 node->data.num == 2 ? "<list type='numbered'>" :
@@ -583,12 +811,9 @@
584812 }
585813 }
586814
587 -char* outputXML (Node node)
 815+char* outputXML (Node node, int initialBufferSize)
588816 {
589 - fb_buffer = (char*) malloc (fb_buflen * sizeof (char));
590 - fb_buffer[0] = '\0';
591 - fb_bufcontentlen = 0;
592 -
 817+ fb_create_new_buffer (initialBufferSize);
593818 outputXMLHelper (node);
594 - return fb_buffer;
 819+ return fb_get_buffer();
595820 }
Index: trunk/flexbisonparse/test.txt
@@ -8,3 +8,7 @@
99 ** graphics
1010 ** sound
1111
 12+{| || Version 1 || not bad
 13+|- || Version 2 || much better |}
 14+
 15+This is a || token in the middle of text.
\ No newline at end of file

Status & tagging log