Index: branches/parser-work/phase3/includes/parser/ParseTree.php |
— | — | @@ -12,7 +12,7 @@ |
13 | 13 | } |
14 | 14 | |
15 | 15 | // Does the parse task specific to each parse object |
16 | | - abstract function parse(&$text, &$rules, $endTag = NULL); |
| 16 | + abstract function parse(&$text, &$rules, $replaceStr = NULL); |
17 | 17 | } |
18 | 18 | |
19 | 19 | /** |
— | — | @@ -20,39 +20,44 @@ |
21 | 21 | * If the text matches mBeginTag then a ParseTree object is created with the appropriate info. |
22 | 22 | * mName - The name to give the resultant ParseTree object |
23 | 23 | * mBeginTag - the regular expression used to determine if this is the rule that should be used |
24 | | - * mEndTag - If ParseTrees of this type are to have children, mEndTag specifies when all of the children are collected |
| 24 | + * mReplaceStr - Collected patterns that should be passed to children |
25 | 25 | * mChildRule - What Parse rule to use to gather children for this element |
26 | 26 | * @ingroup Parser |
27 | 27 | */ |
28 | | -class ParseRule extends ParseObject { |
29 | | - private $mBeginTag, $mChildRule, $mEndTag; |
| 28 | +class ParsePattern extends ParseObject { |
| 29 | + private $mBeginTag, $mChildRule, $mReplaceStr; |
30 | 30 | |
31 | | - function __construct($name, $beginTag, $childRule = NULL, $endTag = NULL) { |
| 31 | + function __construct($name, $beginTag, $childRule = NULL, $replaceStr = NULL) { |
32 | 32 | parent::__construct($name); |
33 | 33 | $this->mBeginTag = $beginTag; |
34 | 34 | $this->mChildRule = $childRule; |
35 | | - $this->mEndTag = $endTag; |
| 35 | + $this->mReplaceStr = $replaceStr; |
36 | 36 | } |
37 | 37 | |
38 | | - function parse(&$text, &$rules, $endTag = NULL) { |
39 | | - if (! preg_match($this->mBeginTag, $text, $matches)) { |
| 38 | + function parse(&$text, &$rules, $replaceStr = NULL) { |
| 39 | + $beginTag = $this->mBeginTag; |
| 40 | + if ($replaceStr != NULL) { |
| 41 | + $beginTag = str_replace('~r', $replaceStr, $beginTag); |
| 42 | + } |
| 43 | + if (! preg_match($beginTag, $text, $matches)) { |
40 | 44 | return NULL; |
41 | 45 | } |
42 | 46 | $text = substr($text, strlen($matches[0])); |
43 | | - $child = NULL; |
| 47 | + $children = NULL; |
44 | 48 | if ($this->mChildRule != NULL) { |
45 | | - if ($this->mEndTag != NULL) { |
46 | | - $endTag = $this->mEndTag; |
| 49 | + if ($this->mReplaceStr != NULL) { |
| 50 | + $replaceStr = $this->mReplaceStr; |
47 | 51 | foreach ($matches as $i => $crrnt) { |
48 | | - $endTag = str_replace('~' . $i, $crrnt, $endTag); |
| 52 | + $replaceStr = str_replace('~' . $i, $crrnt, $replaceStr); |
49 | 53 | } |
50 | 54 | } |
51 | | - $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag); |
| 55 | + $child = $rules[$this->mChildRule]->parse($text, $rules, $replaceStr); |
52 | 56 | if ($child == NULL) { |
53 | 57 | return NULL; |
54 | 58 | } |
| 59 | + $children = array($child); |
55 | 60 | } |
56 | | - return new ParseTree($this->mName, $matches, array($child)); |
| 61 | + return new ParseTree($this->mName, $matches, $children); |
57 | 62 | } |
58 | 63 | } |
59 | 64 | |
— | — | @@ -77,27 +82,30 @@ |
78 | 83 | $this->mMaxChildren = $maxChildren; |
79 | 84 | } |
80 | 85 | |
81 | | - function parse(&$text, &$rules, $endTag = NULL) { |
82 | | - $endRegEx = $this->mEndTag; |
83 | | - if ($this->mEndTag != NULL && $endTag != NULL) { |
84 | | - $endRegEx = str_replace('~r', $endTag, $this->mEndTag); |
| 86 | + function parse(&$text, &$rules, $replaceStr = NULL) { |
| 87 | + $endTag = $this->mEndTag; |
| 88 | + if ($endTag != NULL && $replaceStr != NULL) { |
| 89 | + $endTag = str_replace('~r', $replaceStr, $endTag); |
85 | 90 | } |
86 | 91 | $children = array(); |
87 | | - for ($i = 0; $i < $this->mMinChildren || (($endRegEx == NULL || ! preg_match($endRegEx, $text, $matches)) && |
| 92 | + for ($i = 0; $i < $this->mMinChildren || (($endTag == NULL || ! preg_match($endTag, $text, $matches)) && |
88 | 93 | ($this->mMaxChildren <= 0 || $i < $this->mMaxChildren)); $i ++) { |
89 | | - $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag); |
| 94 | + $child = $rules[$this->mChildRule]->parse($text, $rules, $replaceStr); |
90 | 95 | if ($child == NULL) { |
91 | | - return NULL; |
| 96 | + if ($endTag != NULL || $i < $this->mMinChildren) { |
| 97 | + return NULL; |
| 98 | + } |
| 99 | + break; |
92 | 100 | } |
93 | 101 | $children[] = $child; |
94 | 102 | } |
95 | | - if ($this->mEndTag != NULL) { |
| 103 | + if ($endTag != NULL) { |
96 | 104 | if (! isset($matches[0])) { |
97 | 105 | return NULL; |
98 | 106 | } |
99 | 107 | $text = substr($text, strlen($matches[0])); |
100 | 108 | } |
101 | | - return new ParseTree($this->mName, $matches, $children); |
| 109 | + return new ParseTree($this->mName, NULL, $children); |
102 | 110 | } |
103 | 111 | } |
104 | 112 | |
— | — | @@ -106,18 +114,22 @@ |
107 | 115 | * mList - The list of rules |
108 | 116 | * @ingroup Parser |
109 | 117 | */ |
110 | | -class ParseList extends ParseObject { |
111 | | - private $mList; |
| 118 | +class ParseChoice extends ParseObject { |
| 119 | + private $mList, $matchChar; |
112 | 120 | |
113 | | - function __construct($name, $list) { |
| 121 | + function __construct($name, $list, $matchChar = null) { |
114 | 122 | parent::__construct($name); |
115 | 123 | $this->mList = $list; |
| 124 | + $this->mMatchChar = $matchChar; |
116 | 125 | } |
117 | 126 | |
118 | | - function parse(&$text, &$rules, $endTag = NULL) { |
| 127 | + function parse(&$text, &$rules, $replaceStr = NULL) { |
| 128 | + if ($this->mMatchChar != NULL && $text[0] != $this->mMatchChar) { |
| 129 | + return NULL; |
| 130 | + } |
119 | 131 | foreach ($this->mList as $crrnt) { |
120 | 132 | $newText = $text; |
121 | | - $child = $rules[$crrnt]->parse($newText, $rules, $endTag); |
| 133 | + $child = $rules[$crrnt]->parse($newText, $rules, $replaceStr); |
122 | 134 | if ($child != NULL) { |
123 | 135 | $text = $newText; |
124 | 136 | return new ParseTree($this->mName, NULL, array($child)); |
— | — | @@ -141,10 +153,10 @@ |
142 | 154 | $this->mList = $list; |
143 | 155 | } |
144 | 156 | |
145 | | - function parse(&$text, &$rules, $endTag = NULL) { |
| 157 | + function parse(&$text, &$rules, $replaceStr = NULL) { |
146 | 158 | $children = array(); |
147 | 159 | foreach ($this->mList as $crrnt) { |
148 | | - $child = $rules[$crrnt]->parse($text, $rules, $endTag); |
| 160 | + $child = $rules[$crrnt]->parse($text, $rules, $replaceStr); |
149 | 161 | if ($child == NULL) { |
150 | 162 | return NULL; |
151 | 163 | } |
— | — | @@ -157,7 +169,7 @@ |
158 | 170 | /** |
159 | 171 | * The parse tree of the data. |
160 | 172 | * printTree translates the parse tree to xml, eventually this should be seperated into a data and engine layer. |
161 | | - * mName - Indicates what ParseRule was used to create this node |
| 173 | + * mName - Indicates what ParseObject was used to create this node |
162 | 174 | * mMatches - The text groups that were collected by the regular expressions used when creating this rule |
163 | 175 | * mChildren - The child ParseTree nodes in this tree |
164 | 176 | * @ingroup Parser |
— | — | @@ -197,17 +209,6 @@ |
198 | 210 | if (isset($this->mMatches[1])) { |
199 | 211 | $retString = "<ignore>" . htmlspecialchars($this->mMatches[1]) . "</ignore>"; |
200 | 212 | } |
201 | | - } elseif ($this->mName == "comment" || $this->mName == "ignore") { |
202 | | - $retString = "<" . $this->mName . ">" . htmlspecialchars($this->mMatches[0]) . "</" . $this->mName . ">"; |
203 | | - } elseif ($this->mName == "ext") { |
204 | | - $retString = "<name>" . htmlspecialchars($this->mMatches[1]) . "</name><attr>" . htmlspecialchars($this->mMatches[2]) . "</attr>"; |
205 | | - if (isset($this->mMatches[3])) { |
206 | | - $retString .= "<inner>" . htmlspecialchars($this->mMatches[3]) . "</inner>"; |
207 | | - } |
208 | | - if (isset($this->mMatches[4])) { |
209 | | - $retString .= "<close>" . htmlspecialchars($this->mMatches[4]) . "</close>"; |
210 | | - } |
211 | | - $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">"; |
212 | 213 | } elseif ($this->mName == "link") { |
213 | 214 | $retString = htmlspecialchars($this->mMatches[0]) . $this->mChildren[0]->printTree() . "]]"; |
214 | 215 | } elseif ($this->mName == "h") { |
— | — | @@ -216,9 +217,13 @@ |
217 | 218 | if ($this->mMatches[1] == "\n") { |
218 | 219 | $retString = "\n" . $retString; |
219 | 220 | } |
220 | | - } else { |
221 | | - foreach ($this->mChildren as $crrnt) { |
222 | | - $retString .= $crrnt->printTree(); |
| 221 | + } elseif ($this->mName != "unUsed") { |
| 222 | + if ($this->mChildren != NULL) { |
| 223 | + foreach ($this->mChildren as $crrnt) { |
| 224 | + $retString .= $crrnt->printTree(); |
| 225 | + } |
| 226 | + } else { |
| 227 | + $retString = htmlspecialchars($this->mMatches[0]); |
223 | 228 | } |
224 | 229 | if ($this->mName != "unnamed") { |
225 | 230 | $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">"; |
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php |
— | — | @@ -70,37 +70,49 @@ |
71 | 71 | // To XML |
72 | 72 | $xmlishRegex = implode('|', $this->parser->getStripList()); |
73 | 73 | $rules = array( |
74 | | - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', "TemplateSeq", '}}'), |
75 | | - "TplArg" => new ParseRule("tplarg", '/^{{{/s', "TemplateSeq", '}}}'), |
76 | | - "TplPart" => new ParseRule("part", '/^\|/s', "TplPartList"), |
77 | | - "Link" => new ParseRule("link", '/^\[\[/s', "MainQuant", ']]'), |
78 | | - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', "HeadingQuant", '~2'), |
79 | | - "CommentLine" => new ParseRule("commentline", '/^\n((?:<!--.*?-->\n)+)/s'), |
80 | | - "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'), |
81 | | - "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'), |
82 | | - "NoInclude" => new ParseRule("ignore", '/^<\/?noinclude>/s'), |
83 | | - "IncludeOnly" => new ParseRule("ignore", '/^<includeonly>.*?<\/includeonly>/s'), |
84 | | - "XmlClosed" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')([^>]*)\/>/si'), |
85 | | - "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>)/si'), |
86 | | - "BeginFile" => new ParseRule("bof", '/^~BOF/s'), |
87 | | - "MainText" => new ParseRule("text", '/^.[^{}\[\]<\n|=]*/s'), |
88 | | - "Root" => new ParseQuant("root", "MainList", '/^$/'), |
89 | | - "MainQuant" => new ParseQuant("unnamed", "MainList", '/^~r/s'), |
90 | | - "HeadingQuant" => new ParseQuant("unnamed", "MainList", '/^~r(?=(?: *<!--.*?-->)*(?:\n|$))/s'), |
91 | | - "TplTitle" => new ParseQuant("title", "MainList", '/^(?=~r|\|)/s'), |
| 74 | + "Template" => new ParsePattern("template", '/^{{(?!{[^{])/s', "TemplateSeq", '}}'), |
| 75 | + "TplArg" => new ParsePattern("tplarg", '/^{{{/s', "TemplateSeq", '}}}'), |
| 76 | + "TplPart" => new ParsePattern("part", '/^\|/s', "TplPartList"), |
| 77 | + "Link" => new ParsePattern("link", '/^\[\[/s', "MainQuant", ']]'), |
| 78 | + "Heading" => new ParsePattern("h", '/^(\n|~BOF)(={1,6})/s', "HeadingQuant", '~2'), |
| 79 | + "XmlExt" => new ParsePattern("ext", '/^<(?=(' . $xmlishRegex . '))/si', "XmlExtSeq", '~1'), |
| 80 | + "CommentLine" => new ParsePattern("commentline", '/^\n((?:<!--.*?-->\n)+)/s'), |
| 81 | + "Comment" => new ParsePattern("comment", '/^<!--.*?(?:-->|$)/s'), |
| 82 | + "OnlyInclude" => new ParsePattern("ignore", '/^<\/?onlyinclude>/s'), |
| 83 | + "NoInclude" => new ParsePattern("ignore", '/^<\/?noinclude>/s'), |
| 84 | + "IncludeOnly" => new ParsePattern("ignore", '/^<includeonly>.*?<\/includeonly>/s'), |
| 85 | + "BeginFile" => new ParsePattern("bof", '/^~BOF/s'), |
| 86 | + "MainText" => new ParsePattern("text", '/^.[^{}\[\]<\n|=]*/s'), |
| 87 | + "XmlName" => new ParsePattern("name", '/^.*?(?= |\/>|>)/s'), |
| 88 | + "XmlAttr" => new ParsePattern("attr", '/^.*?(?=\/>|>)/s'), |
| 89 | + "XmlClosed" => new ParsePattern("unUsed", '/^\/>/si'), |
| 90 | + "XmlOpened" => new ParsePattern("unUsed", '/^>/si'), |
| 91 | + "XmlInner" => new ParsePattern("inner", '/^.*?(?=<\/~r>|$)/si'), |
| 92 | + "XmlCloseTag" => new ParsePattern("close", '/^<\/~r>/si'), |
| 93 | + "Root" => new ParseQuant("root", "MainChoice", '/^$/'), |
| 94 | + "MainQuant" => new ParseQuant("unnamed", "MainChoice", '/^~r/s'), |
| 95 | + "HeadingQuant" => new ParseQuant("unnamed", "MainChoice", '/^~r(?=(?: *<!--.*?-->)*(?:\n|$))/s'), |
| 96 | + "TplTitle" => new ParseQuant("title", "MainChoice", '/^(?=~r|\|)/s'), |
92 | 97 | "TplPartQuant" => new ParseQuant("unnamed", "TplPart", '/^~r/s'), |
93 | | - "TplTest" => new ParseQuant("unnamed", "MainList", '/^(?=~r|\||=(?!~r|\|))/s'), |
| 98 | + "TplTest" => new ParseQuant("unnamed", "MainChoice", '/^(?=~r|\||=(?!~r|\|))/s'), |
94 | 99 | "TplName" => new ParseQuant("name", "TplTest", '/^=/s', 0, 1), |
95 | | - "TplValue" => new ParseQuant("value", "MainList", '/^(?=~r|\|)/s'), |
96 | | - "MainList" => new ParseList("unnamed", array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")), |
97 | | - "TplPartList" => new ParseList("unnamed", array("TplPartSeq", "TplValue")), |
| 100 | + "TplValue" => new ParseQuant("value", "MainChoice", '/^(?=~r|\|)/s'), |
| 101 | + "XmlCloseQuant" => new ParseQuant("unnamed", "XmlCloseTag", NULL, 0, 1), |
| 102 | + "MainChoice" => new ParseChoice("unnamed", array("CurlyChoice", "XmlChoice", |
| 103 | + "Heading", "CommentLine", "Link", "BeginFile", "MainText")), |
| 104 | + "CurlyChoice" => new ParseChoice("unnamed", array("Template", "TplArg"), "{"), |
| 105 | + "XmlChoice" => new ParseChoice("unnamed", array("Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlExt"), "<"), |
| 106 | + "TplPartList" => new ParseChoice("unnamed", array("TplPartSeq", "TplValue")), |
| 107 | + "XmlClose" => new ParseChoice("unnamed", array("XmlClosed", "XmlOpenedSeq")), |
98 | 108 | "TemplateSeq" => new ParseSeq("unnamed", array("TplTitle", "TplPartQuant")), |
99 | | - "TplPartSeq" => new ParseSeq("unnamed", array("TplName", "TplValue"))); |
| 109 | + "TplPartSeq" => new ParseSeq("unnamed", array("TplName", "TplValue")), |
| 110 | + "XmlExtSeq" => new ParseSeq("unnamed", array("XmlName", "XmlAttr", "XmlClose")), |
| 111 | + "XmlOpenedSeq" => new ParseSeq("unnamed", array("XmlOpened", "XmlInner", "XmlCloseQuant"))); |
100 | 112 | if ($flags & Parser::PTD_FOR_INCLUSION) { |
101 | | - $rules["OnlyInclude"] = new ParseRule("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s'); |
102 | | - $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?<\/noinclude>/s'); |
103 | | - $rules["IncludeOnly"] = new ParseRule("ignore", '/^<\/?includeonly>/s'); |
104 | | - $rules["BeginFile"] = new ParseRule("bof", '/^~BOF(.*?<onlyinclude>)?/s'); |
| 113 | + $rules["OnlyInclude"] = new ParsePattern("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s'); |
| 114 | + $rules["NoInclude"] = new ParsePattern("ignore", '/^<noinclude>.*?<\/noinclude>/s'); |
| 115 | + $rules["IncludeOnly"] = new ParsePattern("ignore", '/^<\/?includeonly>/s'); |
| 116 | + $rules["BeginFile"] = new ParsePattern("bof", '/^~BOF(.*?<onlyinclude>)?/s'); |
105 | 117 | } |
106 | 118 | |
107 | 119 | $parseTree = ParseTree::createParseTree($text, $rules); |