Index: branches/parser-work/phase3/includes/parser/ParseTree.php |
— | — | @@ -6,7 +6,7 @@ |
7 | 7 | */ |
8 | 8 | interface ParseObject { |
9 | 9 | // Does the parse task specific to each parse object |
10 | | - function parse(&$text, &$rules); |
| 10 | + function parse(&$text, &$rules, $endTag = NULL); |
11 | 11 | } |
12 | 12 | |
13 | 13 | /** |
— | — | @@ -15,7 +15,7 @@ |
16 | 16 | * mName - The name to give the resultant ParseTree object |
17 | 17 | * mBeginTag - the regular expression used to determine if this is the rule that should be used |
18 | 18 | * mEndTag - If ParseTrees of this type are to have children, mEndTag specifies when all of the children are collected |
19 | | - * mChildRule - an extra rule to consider when collecting children, it is only used for situations covered by the HHP21 parser test |
| 19 | + * mChildRule - What Parse rule to use to gather children for this element |
20 | 20 | * @ingroup Parser |
21 | 21 | */ |
22 | 22 | class ParseRule implements ParseObject { |
— | — | @@ -28,26 +28,23 @@ |
29 | 29 | $this->mChildRule = $childRule; |
30 | 30 | } |
31 | 31 | |
32 | | - function parse(&$text, &$rules) { |
| 32 | + function parse(&$text, &$rules, $endTag = NULL) { |
33 | 33 | if (! preg_match($this->mBeginTag, $text, $matches)) { |
34 | 34 | return NULL; |
35 | 35 | } |
36 | 36 | $newText = substr($text, strlen($matches[0])); |
37 | | - $children = array(); |
38 | | - if ($this->mChildRule != NULL && $this->mEndTag != NULL) { |
| 37 | + $children = NULL; |
| 38 | + if ($this->mChildRule != NULL) { |
39 | 39 | $endTag = $this->mEndTag; |
40 | | - foreach ($matches as $i => $crrnt) { |
41 | | - $endTag = str_replace('~' . $i, $crrnt, $endTag); |
42 | | - } |
43 | | - while (! preg_match($endTag, $newText, $endMatches)) { |
44 | | - $child = $rules[$this->mChildRule]->parse($newText, $rules); |
45 | | - if ($child == NULL) { |
46 | | - return NULL; |
| 40 | + if ($endTag != NULL) { |
| 41 | + foreach ($matches as $i => $crrnt) { |
| 42 | + $endTag = str_replace('~' . $i, $crrnt, $endTag); |
47 | 43 | } |
48 | | - $children[] = $child; |
49 | 44 | } |
50 | | - $newText = substr($newText, strlen($endMatches[0])); |
51 | | - $matches = array_merge($matches, $endMatches); |
| 45 | + $children = $rules[$this->mChildRule]->parse($newText, $rules, $endTag); |
| 46 | + if ($children == NULL) { |
| 47 | + return NULL; |
| 48 | + } |
52 | 49 | } |
53 | 50 | $text = $newText; |
54 | 51 | return new ParseTree($this->mName, $matches, $children); |
— | — | @@ -55,6 +52,47 @@ |
56 | 53 | } |
57 | 54 | |
58 | 55 | /** |
| 56 | + * A rule specifying how to parse the text. |
| 57 | + * If the text matches mBeginTag then a ParseTree object is created with the appropriate info. |
| 58 | + * mName - The name to give the resultant ParseTree object |
| 59 | + * mChildRule - What Parse rule to use to gather children for this element |
| 60 | + * mEndTag - If ParseTrees of this type are to have children, mEndTag specifies when all of the children are collected |
| 61 | + * mMinChildren - Minimum amount of children for this rule |
| 62 | + * mMaxChildren - Maximum amount of children for this rule, 0 means unlimited |
| 63 | + * @ingroup Parser |
| 64 | + */ |
| 65 | +class ParseQuant implements ParseObject { |
| 66 | + private $mName, $mChildRule, $mEndTag, $mMinChildren, $mMaxChildren; |
| 67 | + |
| 68 | + function __construct($name, $childRule, $endTag = NULL, $minChildren = 0, $maxChildren = 0) { |
| 69 | + $this->mName = $name; |
| 70 | + $this->mChildRule = $childRule; |
| 71 | + $this->mEndTag = $endTag; |
| 72 | + $this->mMinChildren = $minChildren; |
| 73 | + $this->mMaxChildren = $maxChildren; |
| 74 | + } |
| 75 | + |
| 76 | + function parse(&$text, &$rules, $endTag = NULL) { |
| 77 | + $children = array(); |
| 78 | + for ($i = 0; $i < $minChildren || (($this->mEndTag == NULL || ! preg_match($this->mEndTag, $text, $matches)) && |
| 79 | + ($endTag == NULL || ! preg_match($endTag, $text, $matches)) && ($maxChildren <= 0 || $i < $maxChildren)); $i ++) { |
| 80 | + $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag); |
| 81 | + if ($child == NULL) { |
| 82 | + return NULL; |
| 83 | + } |
| 84 | + $children[] = $child; |
| 85 | + } |
| 86 | + if ($endTag != NULL) { |
| 87 | + if (!isset($matches[0])) { |
| 88 | + return NULL; |
| 89 | + } |
| 90 | + $text = substr($text, strlen($matches[0])); |
| 91 | + } |
| 92 | + return new ParseTree($this->mName, $matches, $children); |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +/** |
59 | 97 | * Contains a list of rules to cycle through when creating a parse tree |
60 | 98 | * mList - The list of rules |
61 | 99 | * @ingroup Parser |
— | — | @@ -66,9 +104,9 @@ |
67 | 105 | $this->mList = $list; |
68 | 106 | } |
69 | 107 | |
70 | | - function parse(&$text, &$rules) { |
| 108 | + function parse(&$text, &$rules, $endTag = NULL) { |
71 | 109 | foreach ($this->mList as $crrnt) { |
72 | | - $child = $rules[$crrnt]->parse($text, $rules); |
| 110 | + $child = $rules[$crrnt]->parse($text, $rules, $endTag); |
73 | 111 | if ($child != NULL) { |
74 | 112 | return $child; |
75 | 113 | } |
— | — | @@ -131,64 +169,52 @@ |
132 | 170 | $retString .= "<close>" . htmlspecialchars($this->mMatches[4]) . "</close>"; |
133 | 171 | } |
134 | 172 | $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">"; |
135 | | - } elseif (($this->mName == "template" || $this->mName == "tplarg") && isset($this->mMatches[1])) { |
| 173 | + } elseif ($this->mName == "link") { |
| 174 | + $retString = htmlspecialchars($this->mMatches[0]) . $this->mChildren->printTree() . "]]"; |
| 175 | + } elseif ($this->mName == "h") { |
| 176 | + $retString = "<h>" . htmlspecialchars($this->mMatches[2]) . $this->mChildren->printTree() . |
| 177 | + htmlspecialchars($this->mMatches[2]) . "</h>"; |
| 178 | + if ($this->mMatches[1] == "\n") { |
| 179 | + $retString = "\n" . $retString; |
| 180 | + } |
| 181 | + } elseif ($this->mName == "template" || $this->mName == "tplarg") { |
| 182 | + $retString = "<" . $this->mName . ">" . $this->mChildren->printTree() . "</" . $this->mName . ">"; |
| 183 | + } elseif ($this->mName == "templatequant") { |
136 | 184 | $inTitle = true; |
137 | 185 | $foundEquals = false; |
138 | 186 | $currentItem = ""; |
139 | 187 | $this->mChildren[] = new ParseTree("pipe", NULL, NULL); |
140 | 188 | foreach ($this->mChildren as $crrnt) { |
141 | | - if ($crrnt instanceof ParseTree) { |
142 | | - if ($crrnt->getName() == "pipe") { |
143 | | - if ($inTitle) { |
144 | | - $retString .= "<title>" . $currentItem . "</title>"; |
145 | | - $inTitle = false; |
146 | | - } else { |
147 | | - if (! $foundEquals) { |
148 | | - $retString .= "<part>"; |
149 | | - } |
150 | | - $retString .= "<value>" . $currentItem . "</value></part>"; |
151 | | - $foundEquals = false; |
| 189 | + if ($crrnt->getName() == "pipe") { |
| 190 | + if ($inTitle) { |
| 191 | + $retString .= "<title>" . $currentItem . "</title>"; |
| 192 | + $inTitle = false; |
| 193 | + } else { |
| 194 | + if (! $foundEquals) { |
| 195 | + $retString .= "<part>"; |
152 | 196 | } |
| 197 | + $retString .= "<value>" . $currentItem . "</value></part>"; |
| 198 | + $foundEquals = false; |
| 199 | + } |
| 200 | + $currentItem = ""; |
| 201 | + } elseif ($crrnt->getName() == "equals") { |
| 202 | + if (! $inTitle && ! $foundEquals) { |
| 203 | + $retString .= "<part><name>" . $currentItem . "</name>"; |
| 204 | + $foundEquals = true; |
153 | 205 | $currentItem = ""; |
154 | | - } elseif ($crrnt->getName() == "equals") { |
155 | | - if (! $inTitle && ! $foundEquals) { |
156 | | - $retString .= "<part><name>" . $currentItem . "</name>"; |
157 | | - $foundEquals = true; |
158 | | - $currentItem = ""; |
159 | | - } else { |
160 | | - $currentItem .= "="; |
161 | | - } |
162 | 206 | } else { |
163 | | - $currentItem .= $crrnt->printTree(); |
| 207 | + $currentItem .= "="; |
164 | 208 | } |
165 | 209 | } else { |
166 | | - $currentItem .= htmlspecialchars($crrnt); |
| 210 | + $currentItem .= $crrnt->printTree(); |
167 | 211 | } |
168 | 212 | } |
169 | | - $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">"; |
170 | 213 | } else { |
171 | 214 | foreach ($this->mChildren as $crrnt) { |
172 | | - if ($crrnt instanceof ParseTree) { |
173 | | - $retString .= $crrnt->printTree(); |
174 | | - } else { |
175 | | - $retString .= htmlspecialchars($crrnt); |
176 | | - } |
| 215 | + $retString .= $crrnt->printTree(); |
177 | 216 | } |
178 | 217 | if ($this->mName == "root") { |
179 | 218 | $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">"; |
180 | | - } elseif ($this->mName == "link") { |
181 | | - $retString = htmlspecialchars($this->mMatches[0]) . $retString; |
182 | | - if (isset($this->mMatches[1])) { |
183 | | - $retString .= htmlspecialchars($this->mMatches[1]); |
184 | | - } |
185 | | - } elseif ($this->mName == "h") { |
186 | | - $retString = htmlspecialchars($this->mMatches[2]) . $retString; |
187 | | - if (isset($this->mMatches[3])) { |
188 | | - $retString = "<h>" . $retString . htmlspecialchars($this->mMatches[3]) . "</h>"; |
189 | | - } |
190 | | - if ($this->mMatches[1] == "\n") { |
191 | | - $retString = "\n" . $retString; |
192 | | - } |
193 | 219 | } |
194 | 220 | } |
195 | 221 | |
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php |
— | — | @@ -70,11 +70,10 @@ |
71 | 71 | // To XML |
72 | 72 | $xmlishRegex = implode('|', $this->parser->getStripList()); |
73 | 73 | $rules = array( |
74 | | - "Root" => new ParseRule("root", '/^/', '/^$/', "MainList"), |
75 | | - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', "TemplateList"), |
76 | | - "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', "TemplateList"), |
77 | | - "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', "MainList"), |
78 | | - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?: *<!--.*?-->)*(?=\n|$)/s', "MainList"), |
| 74 | + "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', "TemplateQuant"), |
| 75 | + "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', "TemplateQuant"), |
| 76 | + "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', "MainQuant"), |
| 77 | + "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?=(?: *<!--.*?-->)*(?:\n|$))/s', "MainQuant"), |
79 | 78 | "CommentLine" => new ParseRule("commentline", '/^\n((?:<!--.*?-->\n)+)/s'), |
80 | 79 | "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'), |
81 | 80 | "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'), |
— | — | @@ -86,6 +85,9 @@ |
87 | 86 | "MainText" => new ParseRule("text", '/^.[^{}\[\]<\n|=]*/s'), |
88 | 87 | "TplPipe" => new ParseRule("pipe", '/^\|/s'), |
89 | 88 | "TplEquals" => new ParseRule("equals", '/^=/s'), |
| 89 | + "Root" => new ParseQuant("root", "MainList", '/^$/'), |
| 90 | + "MainQuant" => new ParseQuant("mainquant", "MainList"), |
| 91 | + "TemplateQuant" => new ParseQuant("templatequant", "TemplateList"), |
90 | 92 | "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")), |
91 | 93 | "TemplateList" => new ParseList(array("TplPipe", "TplEquals", "MainList"))); |
92 | 94 | if ($flags & Parser::PTD_FOR_INCLUSION) { |