Index: branches/parser-work/phase3/maintenance/parserTests.txt |
— | — | @@ -7265,30 +7265,18 @@ |
7266 | 7266 | !! end |
7267 | 7267 | |
7268 | 7268 | !! test |
7269 | | -HHP2.1: Heuristics for headings in preprocessor parenthetical structures |
| 7269 | +HHP3.1: Heuristics for headings in preprocessor parenthetical structures |
7270 | 7270 | !! input |
7271 | 7271 | {{foo| |
7272 | 7272 | =heading= |
7273 | 7273 | !! result |
7274 | 7274 | <p>{{foo| |
7275 | 7275 | </p> |
7276 | | -<h1> <span class="mw-headline" id="heading">heading</span></h1> |
| 7276 | +<h1><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&action=edit&section=1" title="Edit section: heading">edit</a>]</span> <span class="mw-headline" id="heading">heading</span></h1> |
7277 | 7277 | |
7278 | 7278 | !! end |
7279 | 7279 | |
7280 | 7280 | !! test |
7281 | | -HHP2.2: Heuristics for headings in preprocessor parenthetical structures |
7282 | | -!! input |
7283 | | -{{foo| |
7284 | | -==heading== |
7285 | | -!! result |
7286 | | -<p>{{foo| |
7287 | | -</p> |
7288 | | -<h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&action=edit&section=1" title="Edit section: heading">edit</a>]</span> <span class="mw-headline" id="heading">heading</span></h2> |
7289 | | - |
7290 | | -!! end |
7291 | | - |
7292 | | -!! test |
7293 | 7281 | Tildes in comments |
7294 | 7282 | !! options |
7295 | 7283 | pst |
Index: branches/parser-work/phase3/includes/parser/ParseTree.php |
— | — | @@ -6,7 +6,7 @@ |
7 | 7 | */ |
8 | 8 | interface ParseObject { |
9 | 9 | // Does the parse task specific to each parse object |
10 | | - function parse(&$text, &$rules, $stopChars = ''); |
| 10 | + function parse(&$text, &$rules); |
11 | 11 | } |
12 | 12 | |
13 | 13 | /** |
— | — | @@ -15,46 +15,41 @@ |
16 | 16 | * mName - The name to give the resultant ParseTree object |
17 | 17 | * mBeginTag - the regular expression used to determine if this is the rule that should be used |
18 | 18 | * mEndTag - If ParseTrees of this type are to have children, mEndTag specifies when all of the children are collected |
19 | | - * mStopChars - extra characters that indicate markup |
20 | 19 | * mChildRule - an extra rule to consider when collecting children, it is only used for situations covered by the HHP21 parser test |
21 | 20 | * @ingroup Parser |
22 | 21 | */ |
23 | 22 | class ParseRule implements ParseObject { |
24 | | - private $mName, $mBeginTag, $mEndTag, $mStopChars, $mChildRule; |
| 23 | + private $mName, $mBeginTag, $mEndTag, $mChildRule; |
25 | 24 | |
26 | | - function __construct($name, $beginTag, $endTag = NULL, $stopChars = '', $childRule = NULL) { |
| 25 | + function __construct($name, $beginTag, $endTag = NULL, $childRule = NULL) { |
27 | 26 | $this->mName = $name; |
28 | 27 | $this->mBeginTag = $beginTag; |
29 | 28 | $this->mEndTag = $endTag; |
30 | | - $this->mStopChars = $stopChars; |
31 | 29 | $this->mChildRule = $childRule; |
32 | 30 | } |
33 | 31 | |
34 | | - function parse(&$text, &$rules, $stopChars = '') { |
| 32 | + function parse(&$text, &$rules) { |
35 | 33 | if (! preg_match($this->mBeginTag, $text, $matches)) { |
36 | 34 | return NULL; |
37 | 35 | } |
38 | | - $text = substr($text, strlen($matches[0])); |
| 36 | + $newText = substr($text, strlen($matches[0])); |
39 | 37 | $children = array(); |
40 | | - if ($this->mChildRule != NULL) { |
| 38 | + if ($this->mChildRule != NULL && $this->mEndTag != NULL) { |
41 | 39 | $endTag = $this->mEndTag; |
42 | | - if ($endTag != NULL) { |
43 | | - foreach ($matches as $i => $crrnt) { |
44 | | - $endTag = str_replace('~' . $i, $crrnt, $endTag); |
45 | | - } |
| 40 | + foreach ($matches as $i => $crrnt) { |
| 41 | + $endTag = str_replace('~' . $i, $crrnt, $endTag); |
46 | 42 | } |
47 | | - while ($text != "" && ($endTag == NULL || ! preg_match($endTag, $text, $endMatches))) { |
48 | | - $child = $rules[$this->mChildRule]->parse($text, $rules, $this->mStopChars); |
| 43 | + while (! preg_match($endTag, $newText, $endMatches)) { |
| 44 | + $child = $rules[$this->mChildRule]->parse($newText, $rules); |
49 | 45 | if ($child == NULL) { |
50 | | - break; |
| 46 | + return NULL; |
51 | 47 | } |
52 | 48 | $children[] = $child; |
53 | 49 | } |
54 | | - if ($text != "") { |
55 | | - $text = substr($text, strlen($endMatches[0])); |
56 | | - $matches = array_merge($matches, $endMatches); |
57 | | - } |
| 50 | + $newText = substr($newText, strlen($endMatches[0])); |
| 51 | + $matches = array_merge($matches, $endMatches); |
58 | 52 | } |
| 53 | + $text = $newText; |
59 | 54 | return new ParseTree($this->mName, $matches, $children); |
60 | 55 | } |
61 | 56 | } |
— | — | @@ -62,28 +57,23 @@ |
63 | 58 | /** |
64 | 59 | * Contains a list of rules to cycle through when creating a parse tree |
65 | 60 | * mList - The list of rules |
66 | | - * mStopChars - the characters used to find markup |
67 | 61 | * @ingroup Parser |
68 | 62 | */ |
69 | 63 | class ParseList implements ParseObject { |
70 | | - private $mList, $mStopChars; |
| 64 | + private $mList; |
71 | 65 | |
72 | | - function __construct($list, $stopChars = '') { |
| 66 | + function __construct($list) { |
73 | 67 | $this->mList = $list; |
74 | | - $this->mStopChars = $stopChars; |
75 | 68 | } |
76 | 69 | |
77 | | - function parse(&$text, &$rules, $stopChars = '') { |
| 70 | + function parse(&$text, &$rules) { |
78 | 71 | foreach ($this->mList as $crrnt) { |
79 | | - $child = $rules[$crrnt]->parse($text, $rules, $stopChars); |
| 72 | + $child = $rules[$crrnt]->parse($text, $rules); |
80 | 73 | if ($child != NULL) { |
81 | 74 | return $child; |
82 | 75 | } |
83 | 76 | } |
84 | | - $stopChars .= $this->mStopChars; |
85 | | - preg_match('/^[' . $stopChars . ']|[^' . $stopChars . ']*/s', $text, $matches); |
86 | | - $text = substr($text, strlen($matches[0])); |
87 | | - return $matches[0]; |
| 77 | + return NULL; |
88 | 78 | } |
89 | 79 | } |
90 | 80 | |
— | — | @@ -104,6 +94,10 @@ |
105 | 95 | $this->mChildren = $children; |
106 | 96 | } |
107 | 97 | |
| 98 | + function getName() { |
| 99 | + return $this->mName; |
| 100 | + } |
| 101 | + |
108 | 102 | static function createParseTree($text, $rules) { |
109 | 103 | wfProfileIn( __METHOD__ ); |
110 | 104 | |
— | — | @@ -118,10 +112,10 @@ |
119 | 113 | function printTree() { |
120 | 114 | $retString = ""; |
121 | 115 | |
122 | | - if ($this->mName == "hhp21") { |
| 116 | + if ($this->mName == "text") { |
123 | 117 | $retString = htmlspecialchars($this->mMatches[0]); |
124 | 118 | } elseif ($this->mName == "commentline") { |
125 | | - $retString = htmlspecialchars($this->mMatches[1]) . "<comment>" . htmlspecialchars($this->mMatches[2]) . "</comment>"; |
| 119 | + $retString = "\n<comment>" . htmlspecialchars($this->mMatches[1]) . "</comment>"; |
126 | 120 | } elseif ($this->mName == "bof") { |
127 | 121 | if (isset($this->mMatches[1])) { |
128 | 122 | $retString = "<ignore>" . htmlspecialchars($this->mMatches[1]) . "</ignore>"; |
— | — | @@ -141,26 +135,32 @@ |
142 | 136 | $inTitle = true; |
143 | 137 | $foundEquals = false; |
144 | 138 | $currentItem = ""; |
145 | | - $this->mChildren[] = '|'; |
| 139 | + $this->mChildren[] = new ParseTree("pipe", NULL, NULL); |
146 | 140 | foreach ($this->mChildren as $crrnt) { |
147 | 141 | if ($crrnt instanceof ParseTree) { |
148 | | - $currentItem .= $crrnt->printTree(); |
149 | | - } elseif ($crrnt == '|') { |
150 | | - if ($inTitle) { |
151 | | - $retString .= "<title>" . $currentItem . "</title>"; |
152 | | - $inTitle = false; |
153 | | - } else { |
154 | | - if (! $foundEquals) { |
155 | | - $retString .= "<part>"; |
| 142 | + if ($crrnt->getName() == "pipe") { |
| 143 | + if ($inTitle) { |
| 144 | + $retString .= "<title>" . $currentItem . "</title>"; |
| 145 | + $inTitle = false; |
| 146 | + } else { |
| 147 | + if (! $foundEquals) { |
| 148 | + $retString .= "<part>"; |
| 149 | + } |
| 150 | + $retString .= "<value>" . $currentItem . "</value></part>"; |
| 151 | + $foundEquals = false; |
156 | 152 | } |
157 | | - $retString .= "<value>" . $currentItem . "</value></part>"; |
158 | | - $foundEquals = false; |
| 153 | + $currentItem = ""; |
| 154 | + } elseif ($crrnt->getName() == "equals") { |
| 155 | + if (! $inTitle && ! $foundEquals) { |
| 156 | + $retString .= "<part><name>" . $currentItem . "</name>"; |
| 157 | + $foundEquals = true; |
| 158 | + $currentItem = ""; |
| 159 | + } else { |
| 160 | + $currentItem .= "="; |
| 161 | + } |
| 162 | + } else { |
| 163 | + $currentItem .= $crrnt->printTree(); |
159 | 164 | } |
160 | | - $currentItem = ""; |
161 | | - } elseif ($crrnt == '=' && ! $inTitle && ! $foundEquals) { |
162 | | - $retString .= "<part><name>" . $currentItem . "</name>"; |
163 | | - $foundEquals = true; |
164 | | - $currentItem = ""; |
165 | 165 | } else { |
166 | 166 | $currentItem .= htmlspecialchars($crrnt); |
167 | 167 | } |
— | — | @@ -176,8 +176,6 @@ |
177 | 177 | } |
178 | 178 | if ($this->mName == "root") { |
179 | 179 | $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">"; |
180 | | - } elseif ($this->mName == "tplarg" || $this->mName == "template") { |
181 | | - $retString = htmlspecialchars($this->mMatches[0]) . $retString; |
182 | 180 | } elseif ($this->mName == "link") { |
183 | 181 | $retString = htmlspecialchars($this->mMatches[0]) . $retString; |
184 | 182 | if (isset($this->mMatches[1])) { |
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php |
— | — | @@ -70,25 +70,27 @@ |
71 | 71 | // To XML |
72 | 72 | $xmlishRegex = implode('|', $this->parser->getStripList()); |
73 | 73 | $rules = array( |
74 | | - "Root" => new ParseRule("root", '/^/', '/^\Z/', '', "MainList"), |
75 | | - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', '}|=', "HHP21List"), |
76 | | - "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', '}|=', "HHP21List"), |
77 | | - "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', '\]', "MainList"), |
78 | | - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?: *<!--.*?(?:-->|\Z))*(?=\n|$)/s', '=', "MainList"), |
79 | | - "CommentLine" => new ParseRule("commentline", '/^(\n *)((?:<!--.*?(?:-->|$)(?: *\n)?)+)/s'), |
| 74 | + "Root" => new ParseRule("root", '/^/', '/^$/', "MainList"), |
| 75 | + "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', "TemplateList"), |
| 76 | + "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', "TemplateList"), |
| 77 | + "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', "MainList"), |
| 78 | + "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?: *<!--.*?-->)*(?=\n|$)/s', "MainList"), |
| 79 | + "CommentLine" => new ParseRule("commentline", '/^\n((?:<!--.*?-->\n)+)/s'), |
80 | 80 | "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'), |
81 | 81 | "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'), |
82 | 82 | "NoInclude" => new ParseRule("ignore", '/^<\/?noinclude>/s'), |
83 | | - "IncludeOnly" => new ParseRule("ignore", '/^<includeonly>.*?(?:<\/includeonly>|$)/s'), |
| 83 | + "IncludeOnly" => new ParseRule("ignore", '/^<includeonly>.*?<\/includeonly>/s'), |
84 | 84 | "XmlClosed" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')([^>]*)\/>/si'), |
85 | | - "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>|$)/si'), |
| 85 | + "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>)/si'), |
86 | 86 | "BeginFile" => new ParseRule("bof", '/^~BOF/s'), |
87 | | - "BugHHP21" => new ParseRule("hhp21", '/^\n(?==[^=])/s'), |
88 | | - "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile"), '{\[<\n'), |
89 | | - "HHP21List" => new ParseList(array("BugHHP21", "MainList"))); |
| 87 | + "MainText" => new ParseRule("text", '/^.[^{}\[\]<\n|=]*/s'), |
| 88 | + "TplPipe" => new ParseRule("pipe", '/^\|/s'), |
| 89 | + "TplEquals" => new ParseRule("equals", '/^=/s'), |
| 90 | + "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")), |
| 91 | + "TemplateList" => new ParseList(array("TplPipe", "TplEquals", "MainList"))); |
90 | 92 | if ($flags & Parser::PTD_FOR_INCLUSION) { |
91 | 93 | $rules["OnlyInclude"] = new ParseRule("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s'); |
92 | | - $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?(?:<\/noinclude>|$)/s'); |
| 94 | + $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?<\/noinclude>/s'); |
93 | 95 | $rules["IncludeOnly"] = new ParseRule("ignore", '/^<\/?includeonly>/s'); |
94 | 96 | $rules["BeginFile"] = new ParseRule("bof", '/^~BOF(.*?<onlyinclude>)?/s'); |
95 | 97 | } |