Index: branches/parser-work/phase3/includes/parser/ParseTree.php |
— | — | @@ -4,9 +4,15 @@ |
5 | 5 | * Interface for Parse Object each with a specialized task while parsing |
6 | 6 | * @ingroup Parser |
7 | 7 | */ |
8 | | -interface ParseObject { |
| 8 | +abstract class ParseObject { |
| 9 | + protected $mName; |
| 10 | + |
| 11 | + function __construct($name) { |
| 12 | + $this->mName = $name; |
| 13 | + } |
| 14 | + |
9 | 15 | // Does the parse task specific to each parse object |
10 | | - function parse(&$text, &$rules, $endTag = NULL); |
| 16 | + abstract function parse(&$text, &$rules, $endTag = NULL); |
11 | 17 | } |
12 | 18 | |
13 | 19 | /** |
— | — | @@ -18,36 +24,35 @@ |
19 | 25 | * mChildRule - What Parse rule to use to gather children for this element |
20 | 26 | * @ingroup Parser |
21 | 27 | */ |
22 | | -class ParseRule implements ParseObject { |
23 | | - private $mName, $mBeginTag, $mEndTag, $mChildRule; |
| 28 | +class ParseRule extends ParseObject { |
| 29 | + private $mBeginTag, $mChildRule, $mEndTag; |
24 | 30 | |
25 | | - function __construct($name, $beginTag, $endTag = NULL, $childRule = NULL) { |
26 | | - $this->mName = $name; |
| 31 | + function __construct($name, $beginTag, $childRule = NULL, $endTag = NULL) { |
| 32 | + parent::__construct($name); |
27 | 33 | $this->mBeginTag = $beginTag; |
| 34 | + $this->mChildRule = $childRule; |
28 | 35 | $this->mEndTag = $endTag; |
29 | | - $this->mChildRule = $childRule; |
30 | 36 | } |
31 | 37 | |
32 | 38 | function parse(&$text, &$rules, $endTag = NULL) { |
33 | 39 | if (! preg_match($this->mBeginTag, $text, $matches)) { |
34 | 40 | return NULL; |
35 | 41 | } |
36 | | - $newText = substr($text, strlen($matches[0])); |
37 | | - $children = NULL; |
| 42 | + $text = substr($text, strlen($matches[0])); |
| 43 | + $child = NULL; |
38 | 44 | if ($this->mChildRule != NULL) { |
39 | | - $endTag = $this->mEndTag; |
40 | | - if ($endTag != NULL) { |
| 45 | + if ($this->mEndTag != NULL) { |
| 46 | + $endTag = $this->mEndTag; |
41 | 47 | foreach ($matches as $i => $crrnt) { |
42 | 48 | $endTag = str_replace('~' . $i, $crrnt, $endTag); |
43 | 49 | } |
44 | 50 | } |
45 | | - $children = $rules[$this->mChildRule]->parse($newText, $rules, $endTag); |
46 | | - if ($children == NULL) { |
| 51 | + $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag); |
| 52 | + if ($child == NULL) { |
47 | 53 | return NULL; |
48 | 54 | } |
49 | 55 | } |
50 | | - $text = $newText; |
51 | | - return new ParseTree($this->mName, $matches, $children); |
| 56 | + return new ParseTree($this->mName, $matches, array($child)); |
52 | 57 | } |
53 | 58 | } |
54 | 59 | |
— | — | @@ -61,11 +66,11 @@ |
62 | 67 | * mMaxChildren - Maximum amount of children for this rule, 0 means unlimited |
63 | 68 | * @ingroup Parser |
64 | 69 | */ |
65 | | -class ParseQuant implements ParseObject { |
66 | | - private $mName, $mChildRule, $mEndTag, $mMinChildren, $mMaxChildren; |
| 70 | +class ParseQuant extends ParseObject { |
| 71 | + private $mChildRule, $mEndTag, $mMinChildren, $mMaxChildren; |
67 | 72 | |
68 | 73 | function __construct($name, $childRule, $endTag = NULL, $minChildren = 0, $maxChildren = 0) { |
69 | | - $this->mName = $name; |
| 74 | + parent::__construct($name); |
70 | 75 | $this->mChildRule = $childRule; |
71 | 76 | $this->mEndTag = $endTag; |
72 | 77 | $this->mMinChildren = $minChildren; |
— | — | @@ -73,17 +78,21 @@ |
74 | 79 | } |
75 | 80 | |
76 | 81 | function parse(&$text, &$rules, $endTag = NULL) { |
| 82 | + $endRegEx = $this->mEndTag; |
| 83 | + if ($this->mEndTag != NULL && $endTag != NULL) { |
| 84 | + $endRegEx = str_replace('~r', $endTag, $this->mEndTag); |
| 85 | + } |
77 | 86 | $children = array(); |
78 | | - for ($i = 0; $i < $minChildren || (($this->mEndTag == NULL || ! preg_match($this->mEndTag, $text, $matches)) && |
79 | | - ($endTag == NULL || ! preg_match($endTag, $text, $matches)) && ($maxChildren <= 0 || $i < $maxChildren)); $i ++) { |
| 87 | + for ($i = 0; $i < $this->mMinChildren || (($endRegEx == NULL || ! preg_match($endRegEx, $text, $matches)) && |
| 88 | + ($this->mMaxChildren <= 0 || $i < $this->mMaxChildren)); $i ++) { |
80 | 89 | $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag); |
81 | 90 | if ($child == NULL) { |
82 | 91 | return NULL; |
83 | 92 | } |
84 | 93 | $children[] = $child; |
85 | 94 | } |
86 | | - if ($endTag != NULL) { |
87 | | - if (!isset($matches[0])) { |
| 95 | + if ($this->mEndTag != NULL) { |
| 96 | + if (! isset($matches[0])) { |
88 | 97 | return NULL; |
89 | 98 | } |
90 | 99 | $text = substr($text, strlen($matches[0])); |
— | — | @@ -97,18 +106,21 @@ |
98 | 107 | * mList - The list of rules |
99 | 108 | * @ingroup Parser |
100 | 109 | */ |
101 | | -class ParseList implements ParseObject { |
| 110 | +class ParseList extends ParseObject { |
102 | 111 | private $mList; |
103 | 112 | |
104 | | - function __construct($list) { |
| 113 | + function __construct($name, $list) { |
| 114 | + parent::__construct($name); |
105 | 115 | $this->mList = $list; |
106 | 116 | } |
107 | 117 | |
108 | 118 | function parse(&$text, &$rules, $endTag = NULL) { |
109 | 119 | foreach ($this->mList as $crrnt) { |
110 | | - $child = $rules[$crrnt]->parse($text, $rules, $endTag); |
| 120 | + $newText = $text; |
| 121 | + $child = $rules[$crrnt]->parse($newText, $rules, $endTag); |
111 | 122 | if ($child != NULL) { |
112 | | - return $child; |
| 123 | + $text = $newText; |
| 124 | + return new ParseTree($this->mName, NULL, array($child)); |
113 | 125 | } |
114 | 126 | } |
115 | 127 | return NULL; |
— | — | @@ -116,6 +128,33 @@ |
117 | 129 | } |
118 | 130 | |
119 | 131 | /** |
| 132 | + * Contains a sequence of rules all of which must pass |
| 133 | + * mName - The name to give the resultant ParseTree object |
| 134 | + * mList - The sequence of rules |
| 135 | + * @ingroup Parser |
| 136 | + */ |
| 137 | +class ParseSeq extends ParseObject { |
| 138 | + private $mList; |
| 139 | + |
| 140 | + function __construct($name, $list) { |
| 141 | + parent::__construct($name); |
| 142 | + $this->mList = $list; |
| 143 | + } |
| 144 | + |
| 145 | + function parse(&$text, &$rules, $endTag = NULL) { |
| 146 | + $children = array(); |
| 147 | + foreach ($this->mList as $crrnt) { |
| 148 | + $child = $rules[$crrnt]->parse($text, $rules, $endTag); |
| 149 | + if ($child == NULL) { |
| 150 | + return NULL; |
| 151 | + } |
| 152 | + $children[] = $child; |
| 153 | + } |
| 154 | + return new ParseTree($this->mName, NULL, $children); |
| 155 | + } |
| 156 | +} |
| 157 | + |
| 158 | +/** |
120 | 159 | * The parse tree of the data. |
121 | 160 | * printTree translates the parse tree to xml, eventually this should be seperated into a data and engine layer. |
122 | 161 | * mName - Indicates what ParseRule was used to create this node |
— | — | @@ -170,50 +209,18 @@ |
171 | 210 | } |
172 | 211 | $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">"; |
173 | 212 | } elseif ($this->mName == "link") { |
174 | | - $retString = htmlspecialchars($this->mMatches[0]) . $this->mChildren->printTree() . "]]"; |
| 213 | + $retString = htmlspecialchars($this->mMatches[0]) . $this->mChildren[0]->printTree() . "]]"; |
175 | 214 | } elseif ($this->mName == "h") { |
176 | | - $retString = "<h>" . htmlspecialchars($this->mMatches[2]) . $this->mChildren->printTree() . |
| 215 | + $retString = "<h>" . htmlspecialchars($this->mMatches[2]) . $this->mChildren[0]->printTree() . |
177 | 216 | htmlspecialchars($this->mMatches[2]) . "</h>"; |
178 | 217 | if ($this->mMatches[1] == "\n") { |
179 | 218 | $retString = "\n" . $retString; |
180 | 219 | } |
181 | | - } elseif ($this->mName == "template" || $this->mName == "tplarg") { |
182 | | - $retString = "<" . $this->mName . ">" . $this->mChildren->printTree() . "</" . $this->mName . ">"; |
183 | | - } elseif ($this->mName == "templatequant") { |
184 | | - $inTitle = true; |
185 | | - $foundEquals = false; |
186 | | - $currentItem = ""; |
187 | | - $this->mChildren[] = new ParseTree("pipe", NULL, NULL); |
188 | | - foreach ($this->mChildren as $crrnt) { |
189 | | - if ($crrnt->getName() == "pipe") { |
190 | | - if ($inTitle) { |
191 | | - $retString .= "<title>" . $currentItem . "</title>"; |
192 | | - $inTitle = false; |
193 | | - } else { |
194 | | - if (! $foundEquals) { |
195 | | - $retString .= "<part>"; |
196 | | - } |
197 | | - $retString .= "<value>" . $currentItem . "</value></part>"; |
198 | | - $foundEquals = false; |
199 | | - } |
200 | | - $currentItem = ""; |
201 | | - } elseif ($crrnt->getName() == "equals") { |
202 | | - if (! $inTitle && ! $foundEquals) { |
203 | | - $retString .= "<part><name>" . $currentItem . "</name>"; |
204 | | - $foundEquals = true; |
205 | | - $currentItem = ""; |
206 | | - } else { |
207 | | - $currentItem .= "="; |
208 | | - } |
209 | | - } else { |
210 | | - $currentItem .= $crrnt->printTree(); |
211 | | - } |
212 | | - } |
213 | 220 | } else { |
214 | 221 | foreach ($this->mChildren as $crrnt) { |
215 | 222 | $retString .= $crrnt->printTree(); |
216 | 223 | } |
217 | | - if ($this->mName == "root") { |
| 224 | + if ($this->mName != "unnamed") { |
218 | 225 | $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">"; |
219 | 226 | } |
220 | 227 | } |
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php |
— | — | @@ -70,10 +70,11 @@ |
71 | 71 | // To XML |
72 | 72 | $xmlishRegex = implode('|', $this->parser->getStripList()); |
73 | 73 | $rules = array( |
74 | | - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', "TemplateQuant"), |
75 | | - "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', "TemplateQuant"), |
76 | | - "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', "MainQuant"), |
77 | | - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?=(?: *<!--.*?-->)*(?:\n|$))/s', "MainQuant"), |
| 74 | + "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', "TemplateSeq", '}}'), |
| 75 | + "TplArg" => new ParseRule("tplarg", '/^{{{/s', "TemplateSeq", '}}}'), |
| 76 | + "TplPart" => new ParseRule("part", '/^\|/s', "TplPartList"), |
| 77 | + "Link" => new ParseRule("link", '/^\[\[/s', "MainQuant", ']]'), |
| 78 | + "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', "HeadingQuant", '~2'), |
78 | 79 | "CommentLine" => new ParseRule("commentline", '/^\n((?:<!--.*?-->\n)+)/s'), |
79 | 80 | "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'), |
80 | 81 | "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'), |
— | — | @@ -83,13 +84,18 @@ |
84 | 85 | "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>)/si'), |
85 | 86 | "BeginFile" => new ParseRule("bof", '/^~BOF/s'), |
86 | 87 | "MainText" => new ParseRule("text", '/^.[^{}\[\]<\n|=]*/s'), |
87 | | - "TplPipe" => new ParseRule("pipe", '/^\|/s'), |
88 | | - "TplEquals" => new ParseRule("equals", '/^=/s'), |
89 | 88 | "Root" => new ParseQuant("root", "MainList", '/^$/'), |
90 | | - "MainQuant" => new ParseQuant("mainquant", "MainList"), |
91 | | - "TemplateQuant" => new ParseQuant("templatequant", "TemplateList"), |
92 | | - "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")), |
93 | | - "TemplateList" => new ParseList(array("TplPipe", "TplEquals", "MainList"))); |
| 89 | + "MainQuant" => new ParseQuant("unnamed", "MainList", '/^~r/s'), |
| 90 | + "HeadingQuant" => new ParseQuant("unnamed", "MainList", '/^~r(?=(?: *<!--.*?-->)*(?:\n|$))/s'), |
| 91 | + "TplTitle" => new ParseQuant("title", "MainList", '/^(?=~r|\|)/s'), |
| 92 | + "TplPartQuant" => new ParseQuant("unnamed", "TplPart", '/^~r/s'), |
| 93 | + "TplTest" => new ParseQuant("unnamed", "MainList", '/^(?=~r|\||=(?!~r|\|))/s'), |
| 94 | + "TplName" => new ParseQuant("name", "TplTest", '/^=/s', 0, 1), |
| 95 | + "TplValue" => new ParseQuant("value", "MainList", '/^(?=~r|\|)/s'), |
| 96 | + "MainList" => new ParseList("unnamed", array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")), |
| 97 | + "TplPartList" => new ParseList("unnamed", array("TplPartSeq", "TplValue")), |
| 98 | + "TemplateSeq" => new ParseSeq("unnamed", array("TplTitle", "TplPartQuant")), |
| 99 | + "TplPartSeq" => new ParseSeq("unnamed", array("TplName", "TplValue"))); |
94 | 100 | if ($flags & Parser::PTD_FOR_INCLUSION) { |
95 | 101 | $rules["OnlyInclude"] = new ParseRule("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s'); |
96 | 102 | $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?<\/noinclude>/s'); |