r62687 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62686‎ | r62687 | r62688 >
Date:03:07, 19 February 2010
Author:than4213
Status:deferred
Tags:
Comment:
Created Parser quantifiers (ParseQuant) These will be used more in the future
Modified paths:
  • /branches/parser-work/phase3/includes/parser/ParseTree.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php (modified) (history)

Diff [purge]

Index: branches/parser-work/phase3/includes/parser/ParseTree.php
@@ -6,7 +6,7 @@
77 */
88 interface ParseObject {
99 // Does the parse task specific to each parse object
10 - function parse(&$text, &$rules);
 10+ function parse(&$text, &$rules, $endTag = NULL);
1111 }
1212
1313 /**
@@ -15,7 +15,7 @@
1616 * mName - The name to give the resultant ParseTree object
1717 * mBeginTag - the regular expression used to determine if this is the rule that should be used
1818 * mEndTag - If ParseTrees of this type are to have children, mEndTag specifies when all of the children are collected
19 - * mChildRule - an extra rule to consider when collecting children, it is only used for situations covered by the HHP21 parser test
 19+ * mChildRule - What Parse rule to use to gather children for this element
2020 * @ingroup Parser
2121 */
2222 class ParseRule implements ParseObject {
@@ -28,26 +28,23 @@
2929 $this->mChildRule = $childRule;
3030 }
3131
32 - function parse(&$text, &$rules) {
 32+ function parse(&$text, &$rules, $endTag = NULL) {
3333 if (! preg_match($this->mBeginTag, $text, $matches)) {
3434 return NULL;
3535 }
3636 $newText = substr($text, strlen($matches[0]));
37 - $children = array();
38 - if ($this->mChildRule != NULL && $this->mEndTag != NULL) {
 37+ $children = NULL;
 38+ if ($this->mChildRule != NULL) {
3939 $endTag = $this->mEndTag;
40 - foreach ($matches as $i => $crrnt) {
41 - $endTag = str_replace('~' . $i, $crrnt, $endTag);
42 - }
43 - while (! preg_match($endTag, $newText, $endMatches)) {
44 - $child = $rules[$this->mChildRule]->parse($newText, $rules);
45 - if ($child == NULL) {
46 - return NULL;
 40+ if ($endTag != NULL) {
 41+ foreach ($matches as $i => $crrnt) {
 42+ $endTag = str_replace('~' . $i, $crrnt, $endTag);
4743 }
48 - $children[] = $child;
4944 }
50 - $newText = substr($newText, strlen($endMatches[0]));
51 - $matches = array_merge($matches, $endMatches);
 45+ $children = $rules[$this->mChildRule]->parse($newText, $rules, $endTag);
 46+ if ($children == NULL) {
 47+ return NULL;
 48+ }
5249 }
5350 $text = $newText;
5451 return new ParseTree($this->mName, $matches, $children);
@@ -55,6 +52,47 @@
5653 }
5754
5855 /**
 56+ * A rule specifying how to parse the text.
 57+ * If the text matches mBeginTag then a ParseTree object is created with the appropriate info.
 58+ * mName - The name to give the resultant ParseTree object
 59+ * mChildRule - What Parse rule to use to gather children for this element
 60+ * mEndTag - If ParseTrees of this type are to have children, mEndTag specifies when all of the children are collected
 61+ * mMinChildren - Minimum amount of children for this rule
 62+ * mMaxChildren - Maximum amount of children for this rule, 0 means unlimited
 63+ * @ingroup Parser
 64+ */
 65+class ParseQuant implements ParseObject {
 66+ private $mName, $mChildRule, $mEndTag, $mMinChildren, $mMaxChildren;
 67+
 68+ function __construct($name, $childRule, $endTag = NULL, $minChildren = 0, $maxChildren = 0) {
 69+ $this->mName = $name;
 70+ $this->mChildRule = $childRule;
 71+ $this->mEndTag = $endTag;
 72+ $this->mMinChildren = $minChildren;
 73+ $this->mMaxChildren = $maxChildren;
 74+ }
 75+
 76+ function parse(&$text, &$rules, $endTag = NULL) {
 77+ $children = array();
 78+ for ($i = 0; $i < $minChildren || (($this->mEndTag == NULL || ! preg_match($this->mEndTag, $text, $matches)) &&
 79+ ($endTag == NULL || ! preg_match($endTag, $text, $matches)) && ($maxChildren <= 0 || $i < $maxChildren)); $i ++) {
 80+ $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag);
 81+ if ($child == NULL) {
 82+ return NULL;
 83+ }
 84+ $children[] = $child;
 85+ }
 86+ if ($endTag != NULL) {
 87+ if (!isset($matches[0])) {
 88+ return NULL;
 89+ }
 90+ $text = substr($text, strlen($matches[0]));
 91+ }
 92+ return new ParseTree($this->mName, $matches, $children);
 93+ }
 94+}
 95+
 96+/**
5997 * Contains a list of rules to cycle through when creating a parse tree
6098 * mList - The list of rules
6199 * @ingroup Parser
@@ -66,9 +104,9 @@
67105 $this->mList = $list;
68106 }
69107
70 - function parse(&$text, &$rules) {
 108+ function parse(&$text, &$rules, $endTag = NULL) {
71109 foreach ($this->mList as $crrnt) {
72 - $child = $rules[$crrnt]->parse($text, $rules);
 110+ $child = $rules[$crrnt]->parse($text, $rules, $endTag);
73111 if ($child != NULL) {
74112 return $child;
75113 }
@@ -131,64 +169,52 @@
132170 $retString .= "<close>" . htmlspecialchars($this->mMatches[4]) . "</close>";
133171 }
134172 $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">";
135 - } elseif (($this->mName == "template" || $this->mName == "tplarg") && isset($this->mMatches[1])) {
 173+ } elseif ($this->mName == "link") {
 174+ $retString = htmlspecialchars($this->mMatches[0]) . $this->mChildren->printTree() . "]]";
 175+ } elseif ($this->mName == "h") {
 176+ $retString = "<h>" . htmlspecialchars($this->mMatches[2]) . $this->mChildren->printTree() .
 177+ htmlspecialchars($this->mMatches[2]) . "</h>";
 178+ if ($this->mMatches[1] == "\n") {
 179+ $retString = "\n" . $retString;
 180+ }
 181+ } elseif ($this->mName == "template" || $this->mName == "tplarg") {
 182+ $retString = "<" . $this->mName . ">" . $this->mChildren->printTree() . "</" . $this->mName . ">";
 183+ } elseif ($this->mName == "templatequant") {
136184 $inTitle = true;
137185 $foundEquals = false;
138186 $currentItem = "";
139187 $this->mChildren[] = new ParseTree("pipe", NULL, NULL);
140188 foreach ($this->mChildren as $crrnt) {
141 - if ($crrnt instanceof ParseTree) {
142 - if ($crrnt->getName() == "pipe") {
143 - if ($inTitle) {
144 - $retString .= "<title>" . $currentItem . "</title>";
145 - $inTitle = false;
146 - } else {
147 - if (! $foundEquals) {
148 - $retString .= "<part>";
149 - }
150 - $retString .= "<value>" . $currentItem . "</value></part>";
151 - $foundEquals = false;
 189+ if ($crrnt->getName() == "pipe") {
 190+ if ($inTitle) {
 191+ $retString .= "<title>" . $currentItem . "</title>";
 192+ $inTitle = false;
 193+ } else {
 194+ if (! $foundEquals) {
 195+ $retString .= "<part>";
152196 }
 197+ $retString .= "<value>" . $currentItem . "</value></part>";
 198+ $foundEquals = false;
 199+ }
 200+ $currentItem = "";
 201+ } elseif ($crrnt->getName() == "equals") {
 202+ if (! $inTitle && ! $foundEquals) {
 203+ $retString .= "<part><name>" . $currentItem . "</name>";
 204+ $foundEquals = true;
153205 $currentItem = "";
154 - } elseif ($crrnt->getName() == "equals") {
155 - if (! $inTitle && ! $foundEquals) {
156 - $retString .= "<part><name>" . $currentItem . "</name>";
157 - $foundEquals = true;
158 - $currentItem = "";
159 - } else {
160 - $currentItem .= "=";
161 - }
162206 } else {
163 - $currentItem .= $crrnt->printTree();
 207+ $currentItem .= "=";
164208 }
165209 } else {
166 - $currentItem .= htmlspecialchars($crrnt);
 210+ $currentItem .= $crrnt->printTree();
167211 }
168212 }
169 - $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">";
170213 } else {
171214 foreach ($this->mChildren as $crrnt) {
172 - if ($crrnt instanceof ParseTree) {
173 - $retString .= $crrnt->printTree();
174 - } else {
175 - $retString .= htmlspecialchars($crrnt);
176 - }
 215+ $retString .= $crrnt->printTree();
177216 }
178217 if ($this->mName == "root") {
179218 $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">";
180 - } elseif ($this->mName == "link") {
181 - $retString = htmlspecialchars($this->mMatches[0]) . $retString;
182 - if (isset($this->mMatches[1])) {
183 - $retString .= htmlspecialchars($this->mMatches[1]);
184 - }
185 - } elseif ($this->mName == "h") {
186 - $retString = htmlspecialchars($this->mMatches[2]) . $retString;
187 - if (isset($this->mMatches[3])) {
188 - $retString = "<h>" . $retString . htmlspecialchars($this->mMatches[3]) . "</h>";
189 - }
190 - if ($this->mMatches[1] == "\n") {
191 - $retString = "\n" . $retString;
192 - }
193219 }
194220 }
195221
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php
@@ -70,11 +70,10 @@
7171 // To XML
7272 $xmlishRegex = implode('|', $this->parser->getStripList());
7373 $rules = array(
74 - "Root" => new ParseRule("root", '/^/', '/^$/', "MainList"),
75 - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', "TemplateList"),
76 - "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', "TemplateList"),
77 - "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', "MainList"),
78 - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?: *<!--.*?-->)*(?=\n|$)/s', "MainList"),
 74+ "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', "TemplateQuant"),
 75+ "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', "TemplateQuant"),
 76+ "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', "MainQuant"),
 77+ "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?=(?: *<!--.*?-->)*(?:\n|$))/s', "MainQuant"),
7978 "CommentLine" => new ParseRule("commentline", '/^\n((?:<!--.*?-->\n)+)/s'),
8079 "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'),
8180 "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'),
@@ -86,6 +85,9 @@
8786 "MainText" => new ParseRule("text", '/^.[^{}\[\]<\n|=]*/s'),
8887 "TplPipe" => new ParseRule("pipe", '/^\|/s'),
8988 "TplEquals" => new ParseRule("equals", '/^=/s'),
 89+ "Root" => new ParseQuant("root", "MainList", '/^$/'),
 90+ "MainQuant" => new ParseQuant("mainquant", "MainList"),
 91+ "TemplateQuant" => new ParseQuant("templatequant", "TemplateList"),
9092 "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")),
9193 "TemplateList" => new ParseList(array("TplPipe", "TplEquals", "MainList")));
9294 if ($flags & Parser::PTD_FOR_INCLUSION) {

Status & tagging log