r62754 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62753‎ | r62754 | r62755 >
Date:01:00, 21 February 2010
Author:than4213
Status:deferred
Tags:
Comment:
Created parse sequences and split up the parsing of templates to there different parts
Modified paths:
  • /branches/parser-work/phase3/includes/parser/ParseTree.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php (modified) (history)

Diff [purge]

Index: branches/parser-work/phase3/includes/parser/ParseTree.php
@@ -4,9 +4,15 @@
55 * Interface for Parse Object each with a specialized task while parsing
66 * @ingroup Parser
77 */
8 -interface ParseObject {
 8+abstract class ParseObject {
 9+ protected $mName;
 10+
 11+ function __construct($name) {
 12+ $this->mName = $name;
 13+ }
 14+
915 // Does the parse task specific to each parse object
10 - function parse(&$text, &$rules, $endTag = NULL);
 16+ abstract function parse(&$text, &$rules, $endTag = NULL);
1117 }
1218
1319 /**
@@ -18,36 +24,35 @@
1925 * mChildRule - What Parse rule to use to gather children for this element
2026 * @ingroup Parser
2127 */
22 -class ParseRule implements ParseObject {
23 - private $mName, $mBeginTag, $mEndTag, $mChildRule;
 28+class ParseRule extends ParseObject {
 29+ private $mBeginTag, $mChildRule, $mEndTag;
2430
25 - function __construct($name, $beginTag, $endTag = NULL, $childRule = NULL) {
26 - $this->mName = $name;
 31+ function __construct($name, $beginTag, $childRule = NULL, $endTag = NULL) {
 32+ parent::__construct($name);
2733 $this->mBeginTag = $beginTag;
 34+ $this->mChildRule = $childRule;
2835 $this->mEndTag = $endTag;
29 - $this->mChildRule = $childRule;
3036 }
3137
3238 function parse(&$text, &$rules, $endTag = NULL) {
3339 if (! preg_match($this->mBeginTag, $text, $matches)) {
3440 return NULL;
3541 }
36 - $newText = substr($text, strlen($matches[0]));
37 - $children = NULL;
 42+ $text = substr($text, strlen($matches[0]));
 43+ $child = NULL;
3844 if ($this->mChildRule != NULL) {
39 - $endTag = $this->mEndTag;
40 - if ($endTag != NULL) {
 45+ if ($this->mEndTag != NULL) {
 46+ $endTag = $this->mEndTag;
4147 foreach ($matches as $i => $crrnt) {
4248 $endTag = str_replace('~' . $i, $crrnt, $endTag);
4349 }
4450 }
45 - $children = $rules[$this->mChildRule]->parse($newText, $rules, $endTag);
46 - if ($children == NULL) {
 51+ $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag);
 52+ if ($child == NULL) {
4753 return NULL;
4854 }
4955 }
50 - $text = $newText;
51 - return new ParseTree($this->mName, $matches, $children);
 56+ return new ParseTree($this->mName, $matches, array($child));
5257 }
5358 }
5459
@@ -61,11 +66,11 @@
6267 * mMaxChildren - Maximum amount of children for this rule, 0 means unlimited
6368 * @ingroup Parser
6469 */
65 -class ParseQuant implements ParseObject {
66 - private $mName, $mChildRule, $mEndTag, $mMinChildren, $mMaxChildren;
 70+class ParseQuant extends ParseObject {
 71+ private $mChildRule, $mEndTag, $mMinChildren, $mMaxChildren;
6772
6873 function __construct($name, $childRule, $endTag = NULL, $minChildren = 0, $maxChildren = 0) {
69 - $this->mName = $name;
 74+ parent::__construct($name);
7075 $this->mChildRule = $childRule;
7176 $this->mEndTag = $endTag;
7277 $this->mMinChildren = $minChildren;
@@ -73,17 +78,21 @@
7479 }
7580
7681 function parse(&$text, &$rules, $endTag = NULL) {
 82+ $endRegEx = $this->mEndTag;
 83+ if ($this->mEndTag != NULL && $endTag != NULL) {
 84+ $endRegEx = str_replace('~r', $endTag, $this->mEndTag);
 85+ }
7786 $children = array();
78 - for ($i = 0; $i < $minChildren || (($this->mEndTag == NULL || ! preg_match($this->mEndTag, $text, $matches)) &&
79 - ($endTag == NULL || ! preg_match($endTag, $text, $matches)) && ($maxChildren <= 0 || $i < $maxChildren)); $i ++) {
 87+ for ($i = 0; $i < $this->mMinChildren || (($endRegEx == NULL || ! preg_match($endRegEx, $text, $matches)) &&
 88+ ($this->mMaxChildren <= 0 || $i < $this->mMaxChildren)); $i ++) {
8089 $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag);
8190 if ($child == NULL) {
8291 return NULL;
8392 }
8493 $children[] = $child;
8594 }
86 - if ($endTag != NULL) {
87 - if (!isset($matches[0])) {
 95+ if ($this->mEndTag != NULL) {
 96+ if (! isset($matches[0])) {
8897 return NULL;
8998 }
9099 $text = substr($text, strlen($matches[0]));
@@ -97,18 +106,21 @@
98107 * mList - The list of rules
99108 * @ingroup Parser
100109 */
101 -class ParseList implements ParseObject {
 110+class ParseList extends ParseObject {
102111 private $mList;
103112
104 - function __construct($list) {
 113+ function __construct($name, $list) {
 114+ parent::__construct($name);
105115 $this->mList = $list;
106116 }
107117
108118 function parse(&$text, &$rules, $endTag = NULL) {
109119 foreach ($this->mList as $crrnt) {
110 - $child = $rules[$crrnt]->parse($text, $rules, $endTag);
 120+ $newText = $text;
 121+ $child = $rules[$crrnt]->parse($newText, $rules, $endTag);
111122 if ($child != NULL) {
112 - return $child;
 123+ $text = $newText;
 124+ return new ParseTree($this->mName, NULL, array($child));
113125 }
114126 }
115127 return NULL;
@@ -116,6 +128,33 @@
117129 }
118130
119131 /**
 132+ * Contains a sequence of rules all of which must pass
 133+ * mName - The name to give the resultant ParseTree object
 134+ * mList - The sequence of rules
 135+ * @ingroup Parser
 136+ */
 137+class ParseSeq extends ParseObject {
 138+ private $mList;
 139+
 140+ function __construct($name, $list) {
 141+ parent::__construct($name);
 142+ $this->mList = $list;
 143+ }
 144+
 145+ function parse(&$text, &$rules, $endTag = NULL) {
 146+ $children = array();
 147+ foreach ($this->mList as $crrnt) {
 148+ $child = $rules[$crrnt]->parse($text, $rules, $endTag);
 149+ if ($child == NULL) {
 150+ return NULL;
 151+ }
 152+ $children[] = $child;
 153+ }
 154+ return new ParseTree($this->mName, NULL, $children);
 155+ }
 156+}
 157+
 158+/**
120159 * The parse tree of the data.
121160 * printTree translates the parse tree to xml, eventually this should be seperated into a data and engine layer.
122161 * mName - Indicates what ParseRule was used to create this node
@@ -170,50 +209,18 @@
171210 }
172211 $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">";
173212 } elseif ($this->mName == "link") {
174 - $retString = htmlspecialchars($this->mMatches[0]) . $this->mChildren->printTree() . "]]";
 213+ $retString = htmlspecialchars($this->mMatches[0]) . $this->mChildren[0]->printTree() . "]]";
175214 } elseif ($this->mName == "h") {
176 - $retString = "<h>" . htmlspecialchars($this->mMatches[2]) . $this->mChildren->printTree() .
 215+ $retString = "<h>" . htmlspecialchars($this->mMatches[2]) . $this->mChildren[0]->printTree() .
177216 htmlspecialchars($this->mMatches[2]) . "</h>";
178217 if ($this->mMatches[1] == "\n") {
179218 $retString = "\n" . $retString;
180219 }
181 - } elseif ($this->mName == "template" || $this->mName == "tplarg") {
182 - $retString = "<" . $this->mName . ">" . $this->mChildren->printTree() . "</" . $this->mName . ">";
183 - } elseif ($this->mName == "templatequant") {
184 - $inTitle = true;
185 - $foundEquals = false;
186 - $currentItem = "";
187 - $this->mChildren[] = new ParseTree("pipe", NULL, NULL);
188 - foreach ($this->mChildren as $crrnt) {
189 - if ($crrnt->getName() == "pipe") {
190 - if ($inTitle) {
191 - $retString .= "<title>" . $currentItem . "</title>";
192 - $inTitle = false;
193 - } else {
194 - if (! $foundEquals) {
195 - $retString .= "<part>";
196 - }
197 - $retString .= "<value>" . $currentItem . "</value></part>";
198 - $foundEquals = false;
199 - }
200 - $currentItem = "";
201 - } elseif ($crrnt->getName() == "equals") {
202 - if (! $inTitle && ! $foundEquals) {
203 - $retString .= "<part><name>" . $currentItem . "</name>";
204 - $foundEquals = true;
205 - $currentItem = "";
206 - } else {
207 - $currentItem .= "=";
208 - }
209 - } else {
210 - $currentItem .= $crrnt->printTree();
211 - }
212 - }
213220 } else {
214221 foreach ($this->mChildren as $crrnt) {
215222 $retString .= $crrnt->printTree();
216223 }
217 - if ($this->mName == "root") {
 224+ if ($this->mName != "unnamed") {
218225 $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">";
219226 }
220227 }
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php
@@ -70,10 +70,11 @@
7171 // To XML
7272 $xmlishRegex = implode('|', $this->parser->getStripList());
7373 $rules = array(
74 - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', "TemplateQuant"),
75 - "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', "TemplateQuant"),
76 - "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', "MainQuant"),
77 - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?=(?: *<!--.*?-->)*(?:\n|$))/s', "MainQuant"),
 74+ "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', "TemplateSeq", '}}'),
 75+ "TplArg" => new ParseRule("tplarg", '/^{{{/s', "TemplateSeq", '}}}'),
 76+ "TplPart" => new ParseRule("part", '/^\|/s', "TplPartList"),
 77+ "Link" => new ParseRule("link", '/^\[\[/s', "MainQuant", ']]'),
 78+ "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', "HeadingQuant", '~2'),
7879 "CommentLine" => new ParseRule("commentline", '/^\n((?:<!--.*?-->\n)+)/s'),
7980 "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'),
8081 "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'),
@@ -83,13 +84,18 @@
8485 "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>)/si'),
8586 "BeginFile" => new ParseRule("bof", '/^~BOF/s'),
8687 "MainText" => new ParseRule("text", '/^.[^{}\[\]<\n|=]*/s'),
87 - "TplPipe" => new ParseRule("pipe", '/^\|/s'),
88 - "TplEquals" => new ParseRule("equals", '/^=/s'),
8988 "Root" => new ParseQuant("root", "MainList", '/^$/'),
90 - "MainQuant" => new ParseQuant("mainquant", "MainList"),
91 - "TemplateQuant" => new ParseQuant("templatequant", "TemplateList"),
92 - "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")),
93 - "TemplateList" => new ParseList(array("TplPipe", "TplEquals", "MainList")));
 89+ "MainQuant" => new ParseQuant("unnamed", "MainList", '/^~r/s'),
 90+ "HeadingQuant" => new ParseQuant("unnamed", "MainList", '/^~r(?=(?: *<!--.*?-->)*(?:\n|$))/s'),
 91+ "TplTitle" => new ParseQuant("title", "MainList", '/^(?=~r|\|)/s'),
 92+ "TplPartQuant" => new ParseQuant("unnamed", "TplPart", '/^~r/s'),
 93+ "TplTest" => new ParseQuant("unnamed", "MainList", '/^(?=~r|\||=(?!~r|\|))/s'),
 94+ "TplName" => new ParseQuant("name", "TplTest", '/^=/s', 0, 1),
 95+ "TplValue" => new ParseQuant("value", "MainList", '/^(?=~r|\|)/s'),
 96+ "MainList" => new ParseList("unnamed", array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")),
 97+ "TplPartList" => new ParseList("unnamed", array("TplPartSeq", "TplValue")),
 98+ "TemplateSeq" => new ParseSeq("unnamed", array("TplTitle", "TplPartQuant")),
 99+ "TplPartSeq" => new ParseSeq("unnamed", array("TplName", "TplValue")));
94100 if ($flags & Parser::PTD_FOR_INCLUSION) {
95101 $rules["OnlyInclude"] = new ParseRule("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s');
96102 $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?<\/noinclude>/s');

Status & tagging log