r62672 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62671‎ | r62672 | r62673 >
Date:05:27, 18 February 2010
Author:than4213
Status:deferred
Tags:
Comment:
Got rid of the ParseList StopChar functionality because it didn't work well with error handling
Modified paths:
  • /branches/parser-work/phase3/includes/parser/ParseTree.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php (modified) (history)
  • /branches/parser-work/phase3/maintenance/parserTests.txt (modified) (history)

Diff [purge]

Index: branches/parser-work/phase3/maintenance/parserTests.txt
@@ -7265,30 +7265,18 @@
72667266 !! end
72677267
72687268 !! test
7269 -HHP2.1: Heuristics for headings in preprocessor parenthetical structures
 7269+HHP3.1: Heuristics for headings in preprocessor parenthetical structures
72707270 !! input
72717271 {{foo|
72727272 =heading=
72737273 !! result
72747274 <p>{{foo|
72757275 </p>
7276 -<h1> <span class="mw-headline" id="heading">heading</span></h1>
 7276+<h1><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: heading">edit</a>]</span> <span class="mw-headline" id="heading">heading</span></h1>
72777277
72787278 !! end
72797279
72807280 !! test
7281 -HHP2.2: Heuristics for headings in preprocessor parenthetical structures
7282 -!! input
7283 -{{foo|
7284 -==heading==
7285 -!! result
7286 -<p>{{foo|
7287 -</p>
7288 -<h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: heading">edit</a>]</span> <span class="mw-headline" id="heading">heading</span></h2>
7289 -
7290 -!! end
7291 -
7292 -!! test
72937281 Tildes in comments
72947282 !! options
72957283 pst
Index: branches/parser-work/phase3/includes/parser/ParseTree.php
@@ -6,7 +6,7 @@
77 */
88 interface ParseObject {
99 // Does the parse task specific to each parse object
10 - function parse(&$text, &$rules, $stopChars = '');
 10+ function parse(&$text, &$rules);
1111 }
1212
1313 /**
@@ -15,46 +15,41 @@
1616 * mName - The name to give the resultant ParseTree object
1717 * mBeginTag - the regular expression used to determine if this is the rule that should be used
1818 * mEndTag - If ParseTrees of this type are to have children, mEndTag specifies when all of the children are collected
19 - * mStopChars - extra characters that indicate markup
2019 * mChildRule - an extra rule to consider when collecting children, it is only used for situations covered by the HHP21 parser test
2120 * @ingroup Parser
2221 */
2322 class ParseRule implements ParseObject {
24 - private $mName, $mBeginTag, $mEndTag, $mStopChars, $mChildRule;
 23+ private $mName, $mBeginTag, $mEndTag, $mChildRule;
2524
26 - function __construct($name, $beginTag, $endTag = NULL, $stopChars = '', $childRule = NULL) {
 25+ function __construct($name, $beginTag, $endTag = NULL, $childRule = NULL) {
2726 $this->mName = $name;
2827 $this->mBeginTag = $beginTag;
2928 $this->mEndTag = $endTag;
30 - $this->mStopChars = $stopChars;
3129 $this->mChildRule = $childRule;
3230 }
3331
34 - function parse(&$text, &$rules, $stopChars = '') {
 32+ function parse(&$text, &$rules) {
3533 if (! preg_match($this->mBeginTag, $text, $matches)) {
3634 return NULL;
3735 }
38 - $text = substr($text, strlen($matches[0]));
 36+ $newText = substr($text, strlen($matches[0]));
3937 $children = array();
40 - if ($this->mChildRule != NULL) {
 38+ if ($this->mChildRule != NULL && $this->mEndTag != NULL) {
4139 $endTag = $this->mEndTag;
42 - if ($endTag != NULL) {
43 - foreach ($matches as $i => $crrnt) {
44 - $endTag = str_replace('~' . $i, $crrnt, $endTag);
45 - }
 40+ foreach ($matches as $i => $crrnt) {
 41+ $endTag = str_replace('~' . $i, $crrnt, $endTag);
4642 }
47 - while ($text != "" && ($endTag == NULL || ! preg_match($endTag, $text, $endMatches))) {
48 - $child = $rules[$this->mChildRule]->parse($text, $rules, $this->mStopChars);
 43+ while (! preg_match($endTag, $newText, $endMatches)) {
 44+ $child = $rules[$this->mChildRule]->parse($newText, $rules);
4945 if ($child == NULL) {
50 - break;
 46+ return NULL;
5147 }
5248 $children[] = $child;
5349 }
54 - if ($text != "") {
55 - $text = substr($text, strlen($endMatches[0]));
56 - $matches = array_merge($matches, $endMatches);
57 - }
 50+ $newText = substr($newText, strlen($endMatches[0]));
 51+ $matches = array_merge($matches, $endMatches);
5852 }
 53+ $text = $newText;
5954 return new ParseTree($this->mName, $matches, $children);
6055 }
6156 }
@@ -62,28 +57,23 @@
6358 /**
6459 * Contains a list of rules to cycle through when creating a parse tree
6560 * mList - The list of rules
66 - * mStopChars - the characters used to find markup
6761 * @ingroup Parser
6862 */
6963 class ParseList implements ParseObject {
70 - private $mList, $mStopChars;
 64+ private $mList;
7165
72 - function __construct($list, $stopChars = '') {
 66+ function __construct($list) {
7367 $this->mList = $list;
74 - $this->mStopChars = $stopChars;
7568 }
7669
77 - function parse(&$text, &$rules, $stopChars = '') {
 70+ function parse(&$text, &$rules) {
7871 foreach ($this->mList as $crrnt) {
79 - $child = $rules[$crrnt]->parse($text, $rules, $stopChars);
 72+ $child = $rules[$crrnt]->parse($text, $rules);
8073 if ($child != NULL) {
8174 return $child;
8275 }
8376 }
84 - $stopChars .= $this->mStopChars;
85 - preg_match('/^[' . $stopChars . ']|[^' . $stopChars . ']*/s', $text, $matches);
86 - $text = substr($text, strlen($matches[0]));
87 - return $matches[0];
 77+ return NULL;
8878 }
8979 }
9080
@@ -104,6 +94,10 @@
10595 $this->mChildren = $children;
10696 }
10797
 98+ function getName() {
 99+ return $this->mName;
 100+ }
 101+
108102 static function createParseTree($text, $rules) {
109103 wfProfileIn( __METHOD__ );
110104
@@ -118,10 +112,10 @@
119113 function printTree() {
120114 $retString = "";
121115
122 - if ($this->mName == "hhp21") {
 116+ if ($this->mName == "text") {
123117 $retString = htmlspecialchars($this->mMatches[0]);
124118 } elseif ($this->mName == "commentline") {
125 - $retString = htmlspecialchars($this->mMatches[1]) . "<comment>" . htmlspecialchars($this->mMatches[2]) . "</comment>";
 119+ $retString = "\n<comment>" . htmlspecialchars($this->mMatches[1]) . "</comment>";
126120 } elseif ($this->mName == "bof") {
127121 if (isset($this->mMatches[1])) {
128122 $retString = "<ignore>" . htmlspecialchars($this->mMatches[1]) . "</ignore>";
@@ -141,26 +135,32 @@
142136 $inTitle = true;
143137 $foundEquals = false;
144138 $currentItem = "";
145 - $this->mChildren[] = '|';
 139+ $this->mChildren[] = new ParseTree("pipe", NULL, NULL);
146140 foreach ($this->mChildren as $crrnt) {
147141 if ($crrnt instanceof ParseTree) {
148 - $currentItem .= $crrnt->printTree();
149 - } elseif ($crrnt == '|') {
150 - if ($inTitle) {
151 - $retString .= "<title>" . $currentItem . "</title>";
152 - $inTitle = false;
153 - } else {
154 - if (! $foundEquals) {
155 - $retString .= "<part>";
 142+ if ($crrnt->getName() == "pipe") {
 143+ if ($inTitle) {
 144+ $retString .= "<title>" . $currentItem . "</title>";
 145+ $inTitle = false;
 146+ } else {
 147+ if (! $foundEquals) {
 148+ $retString .= "<part>";
 149+ }
 150+ $retString .= "<value>" . $currentItem . "</value></part>";
 151+ $foundEquals = false;
156152 }
157 - $retString .= "<value>" . $currentItem . "</value></part>";
158 - $foundEquals = false;
 153+ $currentItem = "";
 154+ } elseif ($crrnt->getName() == "equals") {
 155+ if (! $inTitle && ! $foundEquals) {
 156+ $retString .= "<part><name>" . $currentItem . "</name>";
 157+ $foundEquals = true;
 158+ $currentItem = "";
 159+ } else {
 160+ $currentItem .= "=";
 161+ }
 162+ } else {
 163+ $currentItem .= $crrnt->printTree();
159164 }
160 - $currentItem = "";
161 - } elseif ($crrnt == '=' && ! $inTitle && ! $foundEquals) {
162 - $retString .= "<part><name>" . $currentItem . "</name>";
163 - $foundEquals = true;
164 - $currentItem = "";
165165 } else {
166166 $currentItem .= htmlspecialchars($crrnt);
167167 }
@@ -176,8 +176,6 @@
177177 }
178178 if ($this->mName == "root") {
179179 $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">";
180 - } elseif ($this->mName == "tplarg" || $this->mName == "template") {
181 - $retString = htmlspecialchars($this->mMatches[0]) . $retString;
182180 } elseif ($this->mName == "link") {
183181 $retString = htmlspecialchars($this->mMatches[0]) . $retString;
184182 if (isset($this->mMatches[1])) {
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php
@@ -70,25 +70,27 @@
7171 // To XML
7272 $xmlishRegex = implode('|', $this->parser->getStripList());
7373 $rules = array(
74 - "Root" => new ParseRule("root", '/^/', '/^\Z/', '', "MainList"),
75 - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', '}|=', "HHP21List"),
76 - "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', '}|=', "HHP21List"),
77 - "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', '\]', "MainList"),
78 - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?: *<!--.*?(?:-->|\Z))*(?=\n|$)/s', '=', "MainList"),
79 - "CommentLine" => new ParseRule("commentline", '/^(\n *)((?:<!--.*?(?:-->|$)(?: *\n)?)+)/s'),
 74+ "Root" => new ParseRule("root", '/^/', '/^$/', "MainList"),
 75+ "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', "TemplateList"),
 76+ "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', "TemplateList"),
 77+ "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', "MainList"),
 78+ "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?: *<!--.*?-->)*(?=\n|$)/s', "MainList"),
 79+ "CommentLine" => new ParseRule("commentline", '/^\n((?:<!--.*?-->\n)+)/s'),
8080 "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'),
8181 "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'),
8282 "NoInclude" => new ParseRule("ignore", '/^<\/?noinclude>/s'),
83 - "IncludeOnly" => new ParseRule("ignore", '/^<includeonly>.*?(?:<\/includeonly>|$)/s'),
 83+ "IncludeOnly" => new ParseRule("ignore", '/^<includeonly>.*?<\/includeonly>/s'),
8484 "XmlClosed" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')([^>]*)\/>/si'),
85 - "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>|$)/si'),
 85+ "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>)/si'),
8686 "BeginFile" => new ParseRule("bof", '/^~BOF/s'),
87 - "BugHHP21" => new ParseRule("hhp21", '/^\n(?==[^=])/s'),
88 - "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile"), '{\[<\n'),
89 - "HHP21List" => new ParseList(array("BugHHP21", "MainList")));
 87+ "MainText" => new ParseRule("text", '/^.[^{}\[\]<\n|=]*/s'),
 88+ "TplPipe" => new ParseRule("pipe", '/^\|/s'),
 89+ "TplEquals" => new ParseRule("equals", '/^=/s'),
 90+ "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")),
 91+ "TemplateList" => new ParseList(array("TplPipe", "TplEquals", "MainList")));
9092 if ($flags & Parser::PTD_FOR_INCLUSION) {
9193 $rules["OnlyInclude"] = new ParseRule("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s');
92 - $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?(?:<\/noinclude>|$)/s');
 94+ $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?<\/noinclude>/s');
9395 $rules["IncludeOnly"] = new ParseRule("ignore", '/^<\/?includeonly>/s');
9496 $rules["BeginFile"] = new ParseRule("bof", '/^~BOF(.*?<onlyinclude>)?/s');
9597 }

Status & tagging log