r62796 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62795‎ | r62796 | r62797 >
Date:00:08, 22 February 2010
Author:than4213
Status:deferred
Tags:
Comment:
Split up extension tags and add character specific ParseChoices.
Modified paths:
  • /branches/parser-work/phase3/includes/parser/ParseTree.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php (modified) (history)

Diff [purge]

Index: branches/parser-work/phase3/includes/parser/ParseTree.php
@@ -12,7 +12,7 @@
1313 }
1414
1515 // Does the parse task specific to each parse object
16 - abstract function parse(&$text, &$rules, $endTag = NULL);
 16+ abstract function parse(&$text, &$rules, $replaceStr = NULL);
1717 }
1818
1919 /**
@@ -20,39 +20,44 @@
2121 * If the text matches mBeginTag then a ParseTree object is created with the appropriate info.
2222 * mName - The name to give the resultant ParseTree object
2323 * mBeginTag - the regular expression used to determine if this is the rule that should be used
24 - * mEndTag - If ParseTrees of this type are to have children, mEndTag specifies when all of the children are collected
 24+ * mReplaceStr - Collected patterns that should be passed to children
2525 * mChildRule - What Parse rule to use to gather children for this element
2626 * @ingroup Parser
2727 */
28 -class ParseRule extends ParseObject {
29 - private $mBeginTag, $mChildRule, $mEndTag;
 28+class ParsePattern extends ParseObject {
 29+ private $mBeginTag, $mChildRule, $mReplaceStr;
3030
31 - function __construct($name, $beginTag, $childRule = NULL, $endTag = NULL) {
 31+ function __construct($name, $beginTag, $childRule = NULL, $replaceStr = NULL) {
3232 parent::__construct($name);
3333 $this->mBeginTag = $beginTag;
3434 $this->mChildRule = $childRule;
35 - $this->mEndTag = $endTag;
 35+ $this->mReplaceStr = $replaceStr;
3636 }
3737
38 - function parse(&$text, &$rules, $endTag = NULL) {
39 - if (! preg_match($this->mBeginTag, $text, $matches)) {
 38+ function parse(&$text, &$rules, $replaceStr = NULL) {
 39+ $beginTag = $this->mBeginTag;
 40+ if ($replaceStr != NULL) {
 41+ $beginTag = str_replace('~r', $replaceStr, $beginTag);
 42+ }
 43+ if (! preg_match($beginTag, $text, $matches)) {
4044 return NULL;
4145 }
4246 $text = substr($text, strlen($matches[0]));
43 - $child = NULL;
 47+ $children = NULL;
4448 if ($this->mChildRule != NULL) {
45 - if ($this->mEndTag != NULL) {
46 - $endTag = $this->mEndTag;
 49+ if ($this->mReplaceStr != NULL) {
 50+ $replaceStr = $this->mReplaceStr;
4751 foreach ($matches as $i => $crrnt) {
48 - $endTag = str_replace('~' . $i, $crrnt, $endTag);
 52+ $replaceStr = str_replace('~' . $i, $crrnt, $replaceStr);
4953 }
5054 }
51 - $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag);
 55+ $child = $rules[$this->mChildRule]->parse($text, $rules, $replaceStr);
5256 if ($child == NULL) {
5357 return NULL;
5458 }
 59+ $children = array($child);
5560 }
56 - return new ParseTree($this->mName, $matches, array($child));
 61+ return new ParseTree($this->mName, $matches, $children);
5762 }
5863 }
5964
@@ -77,27 +82,30 @@
7883 $this->mMaxChildren = $maxChildren;
7984 }
8085
81 - function parse(&$text, &$rules, $endTag = NULL) {
82 - $endRegEx = $this->mEndTag;
83 - if ($this->mEndTag != NULL && $endTag != NULL) {
84 - $endRegEx = str_replace('~r', $endTag, $this->mEndTag);
 86+ function parse(&$text, &$rules, $replaceStr = NULL) {
 87+ $endTag = $this->mEndTag;
 88+ if ($endTag != NULL && $replaceStr != NULL) {
 89+ $endTag = str_replace('~r', $replaceStr, $endTag);
8590 }
8691 $children = array();
87 - for ($i = 0; $i < $this->mMinChildren || (($endRegEx == NULL || ! preg_match($endRegEx, $text, $matches)) &&
 92+ for ($i = 0; $i < $this->mMinChildren || (($endTag == NULL || ! preg_match($endTag, $text, $matches)) &&
8893 ($this->mMaxChildren <= 0 || $i < $this->mMaxChildren)); $i ++) {
89 - $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag);
 94+ $child = $rules[$this->mChildRule]->parse($text, $rules, $replaceStr);
9095 if ($child == NULL) {
91 - return NULL;
 96+ if ($endTag != NULL || $i < $this->mMinChildren) {
 97+ return NULL;
 98+ }
 99+ break;
92100 }
93101 $children[] = $child;
94102 }
95 - if ($this->mEndTag != NULL) {
 103+ if ($endTag != NULL) {
96104 if (! isset($matches[0])) {
97105 return NULL;
98106 }
99107 $text = substr($text, strlen($matches[0]));
100108 }
101 - return new ParseTree($this->mName, $matches, $children);
 109+ return new ParseTree($this->mName, NULL, $children);
102110 }
103111 }
104112
@@ -106,18 +114,22 @@
107115 * mList - The list of rules
108116 * @ingroup Parser
109117 */
110 -class ParseList extends ParseObject {
111 - private $mList;
 118+class ParseChoice extends ParseObject {
 119+ private $mList, $matchChar;
112120
113 - function __construct($name, $list) {
 121+ function __construct($name, $list, $matchChar = null) {
114122 parent::__construct($name);
115123 $this->mList = $list;
 124+ $this->mMatchChar = $matchChar;
116125 }
117126
118 - function parse(&$text, &$rules, $endTag = NULL) {
 127+ function parse(&$text, &$rules, $replaceStr = NULL) {
 128+ if ($this->mMatchChar != NULL && $text[0] != $this->mMatchChar) {
 129+ return NULL;
 130+ }
119131 foreach ($this->mList as $crrnt) {
120132 $newText = $text;
121 - $child = $rules[$crrnt]->parse($newText, $rules, $endTag);
 133+ $child = $rules[$crrnt]->parse($newText, $rules, $replaceStr);
122134 if ($child != NULL) {
123135 $text = $newText;
124136 return new ParseTree($this->mName, NULL, array($child));
@@ -141,10 +153,10 @@
142154 $this->mList = $list;
143155 }
144156
145 - function parse(&$text, &$rules, $endTag = NULL) {
 157+ function parse(&$text, &$rules, $replaceStr = NULL) {
146158 $children = array();
147159 foreach ($this->mList as $crrnt) {
148 - $child = $rules[$crrnt]->parse($text, $rules, $endTag);
 160+ $child = $rules[$crrnt]->parse($text, $rules, $replaceStr);
149161 if ($child == NULL) {
150162 return NULL;
151163 }
@@ -157,7 +169,7 @@
158170 /**
159171 * The parse tree of the data.
160172 * printTree translates the parse tree to xml, eventually this should be seperated into a data and engine layer.
161 - * mName - Indicates what ParseRule was used to create this node
 173+ * mName - Indicates what ParseObject was used to create this node
162174 * mMatches - The text groups that were collected by the regular expressions used when creating this rule
163175 * mChildren - The child ParseTree nodes in this tree
164176 * @ingroup Parser
@@ -197,17 +209,6 @@
198210 if (isset($this->mMatches[1])) {
199211 $retString = "<ignore>" . htmlspecialchars($this->mMatches[1]) . "</ignore>";
200212 }
201 - } elseif ($this->mName == "comment" || $this->mName == "ignore") {
202 - $retString = "<" . $this->mName . ">" . htmlspecialchars($this->mMatches[0]) . "</" . $this->mName . ">";
203 - } elseif ($this->mName == "ext") {
204 - $retString = "<name>" . htmlspecialchars($this->mMatches[1]) . "</name><attr>" . htmlspecialchars($this->mMatches[2]) . "</attr>";
205 - if (isset($this->mMatches[3])) {
206 - $retString .= "<inner>" . htmlspecialchars($this->mMatches[3]) . "</inner>";
207 - }
208 - if (isset($this->mMatches[4])) {
209 - $retString .= "<close>" . htmlspecialchars($this->mMatches[4]) . "</close>";
210 - }
211 - $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">";
212213 } elseif ($this->mName == "link") {
213214 $retString = htmlspecialchars($this->mMatches[0]) . $this->mChildren[0]->printTree() . "]]";
214215 } elseif ($this->mName == "h") {
@@ -216,9 +217,13 @@
217218 if ($this->mMatches[1] == "\n") {
218219 $retString = "\n" . $retString;
219220 }
220 - } else {
221 - foreach ($this->mChildren as $crrnt) {
222 - $retString .= $crrnt->printTree();
 221+ } elseif ($this->mName != "unUsed") {
 222+ if ($this->mChildren != NULL) {
 223+ foreach ($this->mChildren as $crrnt) {
 224+ $retString .= $crrnt->printTree();
 225+ }
 226+ } else {
 227+ $retString = htmlspecialchars($this->mMatches[0]);
223228 }
224229 if ($this->mName != "unnamed") {
225230 $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">";
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php
@@ -70,37 +70,49 @@
7171 // To XML
7272 $xmlishRegex = implode('|', $this->parser->getStripList());
7373 $rules = array(
74 - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', "TemplateSeq", '}}'),
75 - "TplArg" => new ParseRule("tplarg", '/^{{{/s', "TemplateSeq", '}}}'),
76 - "TplPart" => new ParseRule("part", '/^\|/s', "TplPartList"),
77 - "Link" => new ParseRule("link", '/^\[\[/s', "MainQuant", ']]'),
78 - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', "HeadingQuant", '~2'),
79 - "CommentLine" => new ParseRule("commentline", '/^\n((?:<!--.*?-->\n)+)/s'),
80 - "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'),
81 - "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'),
82 - "NoInclude" => new ParseRule("ignore", '/^<\/?noinclude>/s'),
83 - "IncludeOnly" => new ParseRule("ignore", '/^<includeonly>.*?<\/includeonly>/s'),
84 - "XmlClosed" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')([^>]*)\/>/si'),
85 - "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>)/si'),
86 - "BeginFile" => new ParseRule("bof", '/^~BOF/s'),
87 - "MainText" => new ParseRule("text", '/^.[^{}\[\]<\n|=]*/s'),
88 - "Root" => new ParseQuant("root", "MainList", '/^$/'),
89 - "MainQuant" => new ParseQuant("unnamed", "MainList", '/^~r/s'),
90 - "HeadingQuant" => new ParseQuant("unnamed", "MainList", '/^~r(?=(?: *<!--.*?-->)*(?:\n|$))/s'),
91 - "TplTitle" => new ParseQuant("title", "MainList", '/^(?=~r|\|)/s'),
 74+ "Template" => new ParsePattern("template", '/^{{(?!{[^{])/s', "TemplateSeq", '}}'),
 75+ "TplArg" => new ParsePattern("tplarg", '/^{{{/s', "TemplateSeq", '}}}'),
 76+ "TplPart" => new ParsePattern("part", '/^\|/s', "TplPartList"),
 77+ "Link" => new ParsePattern("link", '/^\[\[/s', "MainQuant", ']]'),
 78+ "Heading" => new ParsePattern("h", '/^(\n|~BOF)(={1,6})/s', "HeadingQuant", '~2'),
 79+ "XmlExt" => new ParsePattern("ext", '/^<(?=(' . $xmlishRegex . '))/si', "XmlExtSeq", '~1'),
 80+ "CommentLine" => new ParsePattern("commentline", '/^\n((?:<!--.*?-->\n)+)/s'),
 81+ "Comment" => new ParsePattern("comment", '/^<!--.*?(?:-->|$)/s'),
 82+ "OnlyInclude" => new ParsePattern("ignore", '/^<\/?onlyinclude>/s'),
 83+ "NoInclude" => new ParsePattern("ignore", '/^<\/?noinclude>/s'),
 84+ "IncludeOnly" => new ParsePattern("ignore", '/^<includeonly>.*?<\/includeonly>/s'),
 85+ "BeginFile" => new ParsePattern("bof", '/^~BOF/s'),
 86+ "MainText" => new ParsePattern("text", '/^.[^{}\[\]<\n|=]*/s'),
 87+ "XmlName" => new ParsePattern("name", '/^.*?(?= |\/>|>)/s'),
 88+ "XmlAttr" => new ParsePattern("attr", '/^.*?(?=\/>|>)/s'),
 89+ "XmlClosed" => new ParsePattern("unUsed", '/^\/>/si'),
 90+ "XmlOpened" => new ParsePattern("unUsed", '/^>/si'),
 91+ "XmlInner" => new ParsePattern("inner", '/^.*?(?=<\/~r>|$)/si'),
 92+ "XmlCloseTag" => new ParsePattern("close", '/^<\/~r>/si'),
 93+ "Root" => new ParseQuant("root", "MainChoice", '/^$/'),
 94+ "MainQuant" => new ParseQuant("unnamed", "MainChoice", '/^~r/s'),
 95+ "HeadingQuant" => new ParseQuant("unnamed", "MainChoice", '/^~r(?=(?: *<!--.*?-->)*(?:\n|$))/s'),
 96+ "TplTitle" => new ParseQuant("title", "MainChoice", '/^(?=~r|\|)/s'),
9297 "TplPartQuant" => new ParseQuant("unnamed", "TplPart", '/^~r/s'),
93 - "TplTest" => new ParseQuant("unnamed", "MainList", '/^(?=~r|\||=(?!~r|\|))/s'),
 98+ "TplTest" => new ParseQuant("unnamed", "MainChoice", '/^(?=~r|\||=(?!~r|\|))/s'),
9499 "TplName" => new ParseQuant("name", "TplTest", '/^=/s', 0, 1),
95 - "TplValue" => new ParseQuant("value", "MainList", '/^(?=~r|\|)/s'),
96 - "MainList" => new ParseList("unnamed", array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")),
97 - "TplPartList" => new ParseList("unnamed", array("TplPartSeq", "TplValue")),
 100+ "TplValue" => new ParseQuant("value", "MainChoice", '/^(?=~r|\|)/s'),
 101+ "XmlCloseQuant" => new ParseQuant("unnamed", "XmlCloseTag", NULL, 0, 1),
 102+ "MainChoice" => new ParseChoice("unnamed", array("CurlyChoice", "XmlChoice",
 103+ "Heading", "CommentLine", "Link", "BeginFile", "MainText")),
 104+ "CurlyChoice" => new ParseChoice("unnamed", array("Template", "TplArg"), "{"),
 105+ "XmlChoice" => new ParseChoice("unnamed", array("Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlExt"), "<"),
 106+ "TplPartList" => new ParseChoice("unnamed", array("TplPartSeq", "TplValue")),
 107+ "XmlClose" => new ParseChoice("unnamed", array("XmlClosed", "XmlOpenedSeq")),
98108 "TemplateSeq" => new ParseSeq("unnamed", array("TplTitle", "TplPartQuant")),
99 - "TplPartSeq" => new ParseSeq("unnamed", array("TplName", "TplValue")));
 109+ "TplPartSeq" => new ParseSeq("unnamed", array("TplName", "TplValue")),
 110+ "XmlExtSeq" => new ParseSeq("unnamed", array("XmlName", "XmlAttr", "XmlClose")),
 111+ "XmlOpenedSeq" => new ParseSeq("unnamed", array("XmlOpened", "XmlInner", "XmlCloseQuant")));
100112 if ($flags & Parser::PTD_FOR_INCLUSION) {
101 - $rules["OnlyInclude"] = new ParseRule("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s');
102 - $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?<\/noinclude>/s');
103 - $rules["IncludeOnly"] = new ParseRule("ignore", '/^<\/?includeonly>/s');
104 - $rules["BeginFile"] = new ParseRule("bof", '/^~BOF(.*?<onlyinclude>)?/s');
 113+ $rules["OnlyInclude"] = new ParsePattern("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s');
 114+ $rules["NoInclude"] = new ParsePattern("ignore", '/^<noinclude>.*?<\/noinclude>/s');
 115+ $rules["IncludeOnly"] = new ParsePattern("ignore", '/^<\/?includeonly>/s');
 116+ $rules["BeginFile"] = new ParsePattern("bof", '/^~BOF(.*?<onlyinclude>)?/s');
105117 }
106118
107119 $parseTree = ParseTree::createParseTree($text, $rules);

Status & tagging log