Index: branches/parser-work/phase3/includes/parser/ParseTree.php |
— | — | @@ -1,6 +1,15 @@ |
2 | 2 | <?php |
3 | 3 | |
4 | 4 | /** |
| 5 | + * Interface for Parse Object each with a specialized task while parsing |
| 6 | + * @ingroup Parser |
| 7 | + */ |
| 8 | +interface ParseObject { |
| 9 | + // Does the parse task specific to each parse object |
| 10 | + function parse(&$text, &$rules, $stopChars = ''); |
| 11 | +} |
| 12 | + |
| 13 | +/** |
5 | 14 | * A rule specifying how to parse the text. |
6 | 15 | * If the text matches mBeginTag then a ParseTree object is created with the appropriate info. |
7 | 16 | * mName - The name to give the resultant ParseTree object |
— | — | @@ -10,7 +19,7 @@ |
11 | 20 | * mChildRule - an extra rule to consider when collecting children, it is only used for situations covered by the HHP21 parser test |
12 | 21 | * @ingroup Parser |
13 | 22 | */ |
14 | | -class ParseRule { |
| 23 | +class ParseRule implements ParseObject { |
15 | 24 | private $mName, $mBeginTag, $mEndTag, $mStopChars, $mChildRule; |
16 | 25 | |
17 | 26 | function __construct($name, $beginTag, $endTag = NULL, $stopChars = '', $childRule = NULL) { |
— | — | @@ -21,36 +30,32 @@ |
22 | 31 | $this->mChildRule = $childRule; |
23 | 32 | } |
24 | 33 | |
25 | | - function parse(&$text, $parseList) { |
26 | | - $retTree = NULL; |
27 | | - |
28 | | - if (preg_match($this->mBeginTag, $text, $matches)) { |
29 | | - $text = substr($text, strlen($matches[0])); |
30 | | - $children = array(); |
31 | | - if ($this->mEndTag != NULL) { |
32 | | - $endTag = $this->mEndTag; |
| 34 | + function parse(&$text, &$rules, $stopChars = '') { |
| 35 | + if (! preg_match($this->mBeginTag, $text, $matches)) { |
| 36 | + return NULL; |
| 37 | + } |
| 38 | + $text = substr($text, strlen($matches[0])); |
| 39 | + $children = array(); |
| 40 | + if ($this->mChildRule != NULL) { |
| 41 | + $endTag = $this->mEndTag; |
| 42 | + if ($endTag != NULL) { |
33 | 43 | foreach ($matches as $i => $crrnt) { |
34 | 44 | $endTag = str_replace('~' . $i, $crrnt, $endTag); |
35 | 45 | } |
36 | | - while ($text != "" && ($endTag == NULL || ! preg_match($endTag, $text, $endMatches))) { |
37 | | - if ($this->mChildRule != NULL) { |
38 | | - $child = $this->mChildRule->parse($text, $parseList); |
39 | | - if ($child != NULL) { |
40 | | - $children[] = $child; |
41 | | - } |
42 | | - } |
43 | | - $moreChildren = $parseList->parse($text, $this->mStopChars); |
44 | | - $children = array_merge($children, $moreChildren); |
| 46 | + } |
| 47 | + while ($text != "" && ($endTag == NULL || ! preg_match($endTag, $text, $endMatches))) { |
| 48 | + $child = $rules[$this->mChildRule]->parse($text, $rules, $this->mStopChars); |
| 49 | + if ($child == NULL) { |
| 50 | + break; |
45 | 51 | } |
46 | | - if ($text != "") { |
47 | | - $text = substr($text, strlen($endMatches[0])); |
48 | | - $matches = array_merge($matches, $endMatches); |
49 | | - } |
| 52 | + $children[] = $child; |
50 | 53 | } |
51 | | - $retTree = new ParseTree($this->mName, $matches, $children); |
| 54 | + if ($text != "") { |
| 55 | + $text = substr($text, strlen($endMatches[0])); |
| 56 | + $matches = array_merge($matches, $endMatches); |
| 57 | + } |
52 | 58 | } |
53 | | - |
54 | | - return $retTree; |
| 59 | + return new ParseTree($this->mName, $matches, $children); |
55 | 60 | } |
56 | 61 | } |
57 | 62 | |
— | — | @@ -60,34 +65,25 @@ |
61 | 66 | * mStopChars - the characters used to find markup |
62 | 67 | * @ingroup Parser |
63 | 68 | */ |
64 | | -class ParseList { |
| 69 | +class ParseList implements ParseObject { |
65 | 70 | private $mList, $mStopChars; |
66 | 71 | |
67 | | - function __construct($list, $stopChars) { |
| 72 | + function __construct($list, $stopChars = '') { |
68 | 73 | $this->mList = $list; |
69 | 74 | $this->mStopChars = $stopChars; |
70 | 75 | } |
71 | 76 | |
72 | | - function parse(&$text, $stopChars) { |
73 | | - $children = array(); |
74 | | - |
| 77 | + function parse(&$text, &$rules, $stopChars = '') { |
75 | 78 | foreach ($this->mList as $crrnt) { |
76 | | - $child = $crrnt->parse($text, $this); |
| 79 | + $child = $rules[$crrnt]->parse($text, $rules, $stopChars); |
77 | 80 | if ($child != NULL) { |
78 | | - $children[] = $child; |
79 | | - break; |
| 81 | + return $child; |
80 | 82 | } |
81 | 83 | } |
82 | | - if ($child == NULL) { |
83 | | - $children[] = $text[0]; |
84 | | - $text = substr($text, 1); |
85 | | - } |
86 | | - if (preg_match('/^[^' . $this->mStopChars . $stopChars . ']+/s', $text, $matches)) { |
87 | | - $children[] = $matches[0]; |
88 | | - $text = substr($text, strlen($matches[0])); |
89 | | - } |
90 | | - |
91 | | - return $children; |
| 84 | + $stopChars .= $this->mStopChars; |
| 85 | + preg_match('/^[' . $stopChars . ']|[^' . $stopChars . ']*/s', $text, $matches); |
| 86 | + $text = substr($text, strlen($matches[0])); |
| 87 | + return $matches[0]; |
92 | 88 | } |
93 | 89 | } |
94 | 90 | |
— | — | @@ -108,12 +104,11 @@ |
109 | 105 | $this->mChildren = $children; |
110 | 106 | } |
111 | 107 | |
112 | | - static function createParseTree($text, $parseList) { |
| 108 | + static function createParseTree($text, $rules) { |
113 | 109 | wfProfileIn( __METHOD__ ); |
114 | 110 | |
115 | 111 | $text = "~BOF" . $text; |
116 | | - $root = new ParseRule("root", '/^/', '/^\Z/'); |
117 | | - $retTree = $root->parse($text, $parseList); |
| 112 | + $retTree = $rules["Root"]->parse($text, $rules); |
118 | 113 | |
119 | 114 | wfProfileOut( __METHOD__ ); |
120 | 115 | return $retTree; |
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php |
— | — | @@ -69,12 +69,12 @@ |
70 | 70 | |
71 | 71 | // To XML |
72 | 72 | $xmlishRegex = implode('|', $this->parser->getStripList()); |
73 | | - $bugHHP21 = new ParseRule("hhp21", '/^\n(?==[^=])/s'); |
74 | 73 | $rules = array( |
75 | | - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', '}|=', $bugHHP21), |
76 | | - "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', '}|=', $bugHHP21), |
77 | | - "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', '\]'), |
78 | | - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?: *<!--.*?(?:-->|\Z))*(?=\n|$)/s', '='), |
| 74 | + "Root" => new ParseRule("root", '/^/', '/^\Z/', '', "MainList"), |
| 75 | + "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', '}|=', "HHP21List"), |
| 76 | + "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', '}|=', "HHP21List"), |
| 77 | + "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', '\]', "MainList"), |
| 78 | + "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?: *<!--.*?(?:-->|\Z))*(?=\n|$)/s', '=', "MainList"), |
79 | 79 | "CommentLine" => new ParseRule("commentline", '/^(\n *)((?:<!--.*?(?:-->|$)(?: *\n)?)+)/s'), |
80 | 80 | "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'), |
81 | 81 | "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'), |
— | — | @@ -82,7 +82,10 @@ |
83 | 83 | "IncludeOnly" => new ParseRule("ignore", '/^<includeonly>.*?(?:<\/includeonly>|$)/s'), |
84 | 84 | "XmlClosed" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')([^>]*)\/>/si'), |
85 | 85 | "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>|$)/si'), |
86 | | - "BeginFile" => new ParseRule("bof", '/^~BOF/s')); |
| 86 | + "BeginFile" => new ParseRule("bof", '/^~BOF/s'), |
| 87 | + "BugHHP21" => new ParseRule("hhp21", '/^\n(?==[^=])/s'), |
| 88 | + "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile"), '{\[<\n'), |
| 89 | + "HHP21List" => new ParseList(array("BugHHP21", "MainList"))); |
87 | 90 | if ($flags & Parser::PTD_FOR_INCLUSION) { |
88 | 91 | $rules["OnlyInclude"] = new ParseRule("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s'); |
89 | 92 | $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?(?:<\/noinclude>|$)/s'); |
— | — | @@ -90,8 +93,7 @@ |
91 | 94 | $rules["BeginFile"] = new ParseRule("bof", '/^~BOF(.*?<onlyinclude>)?/s'); |
92 | 95 | } |
93 | 96 | |
94 | | - $parseList = new ParseList($rules, '{\[<\n'); |
95 | | - $parseTree = ParseTree::createParseTree($text, $parseList); |
| 97 | + $parseTree = ParseTree::createParseTree($text, $rules); |
96 | 98 | $xml = $parseTree->printTree(); |
97 | 99 | |
98 | 100 | // To DOM |