r62640 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62639‎ | r62640 | r62641 >
Date:06:11, 17 February 2010
Author:than4213
Status:deferred
Tags:
Comment:
Make ParseList and ParseRule more modular by giving them a common interface. Will add more ParseObjects in the future.
Modified paths:
  • /branches/parser-work/phase3/includes/parser/ParseTree.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php (modified) (history)

Diff [purge]

Index: branches/parser-work/phase3/includes/parser/ParseTree.php
@@ -1,6 +1,15 @@
22 <?php
33
44 /**
 5+ * Interface for Parse Object each with a specialized task while parsing
 6+ * @ingroup Parser
 7+ */
 8+interface ParseObject {
 9+ // Does the parse task specific to each parse object
 10+ function parse(&$text, &$rules, $stopChars = '');
 11+}
 12+
 13+/**
514 * A rule specifying how to parse the text.
615 * If the text matches mBeginTag then a ParseTree object is created with the appropriate info.
716 * mName - The name to give the resultant ParseTree object
@@ -10,7 +19,7 @@
1120 * mChildRule - an extra rule to consider when collecting children, it is only used for situations covered by the HHP21 parser test
1221 * @ingroup Parser
1322 */
14 -class ParseRule {
 23+class ParseRule implements ParseObject {
1524 private $mName, $mBeginTag, $mEndTag, $mStopChars, $mChildRule;
1625
1726 function __construct($name, $beginTag, $endTag = NULL, $stopChars = '', $childRule = NULL) {
@@ -21,36 +30,32 @@
2231 $this->mChildRule = $childRule;
2332 }
2433
25 - function parse(&$text, $parseList) {
26 - $retTree = NULL;
27 -
28 - if (preg_match($this->mBeginTag, $text, $matches)) {
29 - $text = substr($text, strlen($matches[0]));
30 - $children = array();
31 - if ($this->mEndTag != NULL) {
32 - $endTag = $this->mEndTag;
 34+ function parse(&$text, &$rules, $stopChars = '') {
 35+ if (! preg_match($this->mBeginTag, $text, $matches)) {
 36+ return NULL;
 37+ }
 38+ $text = substr($text, strlen($matches[0]));
 39+ $children = array();
 40+ if ($this->mChildRule != NULL) {
 41+ $endTag = $this->mEndTag;
 42+ if ($endTag != NULL) {
3343 foreach ($matches as $i => $crrnt) {
3444 $endTag = str_replace('~' . $i, $crrnt, $endTag);
3545 }
36 - while ($text != "" && ($endTag == NULL || ! preg_match($endTag, $text, $endMatches))) {
37 - if ($this->mChildRule != NULL) {
38 - $child = $this->mChildRule->parse($text, $parseList);
39 - if ($child != NULL) {
40 - $children[] = $child;
41 - }
42 - }
43 - $moreChildren = $parseList->parse($text, $this->mStopChars);
44 - $children = array_merge($children, $moreChildren);
 46+ }
 47+ while ($text != "" && ($endTag == NULL || ! preg_match($endTag, $text, $endMatches))) {
 48+ $child = $rules[$this->mChildRule]->parse($text, $rules, $this->mStopChars);
 49+ if ($child == NULL) {
 50+ break;
4551 }
46 - if ($text != "") {
47 - $text = substr($text, strlen($endMatches[0]));
48 - $matches = array_merge($matches, $endMatches);
49 - }
 52+ $children[] = $child;
5053 }
51 - $retTree = new ParseTree($this->mName, $matches, $children);
 54+ if ($text != "") {
 55+ $text = substr($text, strlen($endMatches[0]));
 56+ $matches = array_merge($matches, $endMatches);
 57+ }
5258 }
53 -
54 - return $retTree;
 59+ return new ParseTree($this->mName, $matches, $children);
5560 }
5661 }
5762
@@ -60,34 +65,25 @@
6166 * mStopChars - the characters used to find markup
6267 * @ingroup Parser
6368 */
64 -class ParseList {
 69+class ParseList implements ParseObject {
6570 private $mList, $mStopChars;
6671
67 - function __construct($list, $stopChars) {
 72+ function __construct($list, $stopChars = '') {
6873 $this->mList = $list;
6974 $this->mStopChars = $stopChars;
7075 }
7176
72 - function parse(&$text, $stopChars) {
73 - $children = array();
74 -
 77+ function parse(&$text, &$rules, $stopChars = '') {
7578 foreach ($this->mList as $crrnt) {
76 - $child = $crrnt->parse($text, $this);
 79+ $child = $rules[$crrnt]->parse($text, $rules, $stopChars);
7780 if ($child != NULL) {
78 - $children[] = $child;
79 - break;
 81+ return $child;
8082 }
8183 }
82 - if ($child == NULL) {
83 - $children[] = $text[0];
84 - $text = substr($text, 1);
85 - }
86 - if (preg_match('/^[^' . $this->mStopChars . $stopChars . ']+/s', $text, $matches)) {
87 - $children[] = $matches[0];
88 - $text = substr($text, strlen($matches[0]));
89 - }
90 -
91 - return $children;
 84+ $stopChars .= $this->mStopChars;
 85+ preg_match('/^[' . $stopChars . ']|[^' . $stopChars . ']*/s', $text, $matches);
 86+ $text = substr($text, strlen($matches[0]));
 87+ return $matches[0];
9288 }
9389 }
9490
@@ -108,12 +104,11 @@
109105 $this->mChildren = $children;
110106 }
111107
112 - static function createParseTree($text, $parseList) {
 108+ static function createParseTree($text, $rules) {
113109 wfProfileIn( __METHOD__ );
114110
115111 $text = "~BOF" . $text;
116 - $root = new ParseRule("root", '/^/', '/^\Z/');
117 - $retTree = $root->parse($text, $parseList);
 112+ $retTree = $rules["Root"]->parse($text, $rules);
118113
119114 wfProfileOut( __METHOD__ );
120115 return $retTree;
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php
@@ -69,12 +69,12 @@
7070
7171 // To XML
7272 $xmlishRegex = implode('|', $this->parser->getStripList());
73 - $bugHHP21 = new ParseRule("hhp21", '/^\n(?==[^=])/s');
7473 $rules = array(
75 - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', '}|=', $bugHHP21),
76 - "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', '}|=', $bugHHP21),
77 - "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', '\]'),
78 - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?: *<!--.*?(?:-->|\Z))*(?=\n|$)/s', '='),
 74+ "Root" => new ParseRule("root", '/^/', '/^\Z/', '', "MainList"),
 75+ "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', '}|=', "HHP21List"),
 76+ "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', '}|=', "HHP21List"),
 77+ "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', '\]', "MainList"),
 78+ "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?: *<!--.*?(?:-->|\Z))*(?=\n|$)/s', '=', "MainList"),
7979 "CommentLine" => new ParseRule("commentline", '/^(\n *)((?:<!--.*?(?:-->|$)(?: *\n)?)+)/s'),
8080 "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'),
8181 "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'),
@@ -82,7 +82,10 @@
8383 "IncludeOnly" => new ParseRule("ignore", '/^<includeonly>.*?(?:<\/includeonly>|$)/s'),
8484 "XmlClosed" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')([^>]*)\/>/si'),
8585 "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>|$)/si'),
86 - "BeginFile" => new ParseRule("bof", '/^~BOF/s'));
 86+ "BeginFile" => new ParseRule("bof", '/^~BOF/s'),
 87+ "BugHHP21" => new ParseRule("hhp21", '/^\n(?==[^=])/s'),
 88+ "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile"), '{\[<\n'),
 89+ "HHP21List" => new ParseList(array("BugHHP21", "MainList")));
8790 if ($flags & Parser::PTD_FOR_INCLUSION) {
8891 $rules["OnlyInclude"] = new ParseRule("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s');
8992 $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?(?:<\/noinclude>|$)/s');
@@ -90,8 +93,7 @@
9194 $rules["BeginFile"] = new ParseRule("bof", '/^~BOF(.*?<onlyinclude>)?/s');
9295 }
9396
94 - $parseList = new ParseList($rules, '{\[<\n');
95 - $parseTree = ParseTree::createParseTree($text, $parseList);
 97+ $parseTree = ParseTree::createParseTree($text, $rules);
9698 $xml = $parseTree->printTree();
9799
98100 // To DOM

Status & tagging log