r67702 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r67701‎ | r67702 | r67703 >
Date:11:25, 9 June 2010
Author:reedy
Status:deferred
Tags:
Comment:
Revert r65412 to do the move properly
Modified paths:
  • /trunk/extensions/DataTransclusion/WebDataTransclusionSource.php (modified) (history)
  • /trunk/extensions/ParseEngine (added) (history)
  • /trunk/extensions/ParserWiki (deleted) (history)

Diff [purge]

Index: trunk/extensions/ParseEngine/ParseEngine.body.php
@@ -0,0 +1,237 @@
 2+<?php
 3+/**
 4+ * Acts as the primary interface between the world and the parser.
 5+ * mStartRule - the first rule to use while parsing
 6+ * mRules - The list of rules to use while parsing
 7+ * mDom - Used to create Dom objects and get's returned at the end of parsing
 8+ * mIter - Keeps track of how many times the parser recurses to stop endless loops
 9+ */
 10+class ParseEngine {
 11+ const maxIter = 2048;
 12+ private $mGrammars;
 13+
 14+ function __construct() {
 15+ $this->mGrammars = array();
 16+ }
 17+
 18+ function parse($grammarName, &$text) {
 19+ global $IP;
 20+ wfDebugLog("ParseEngine", "==========Start Parse Engine==========\n");
 21+ $grammar = isset($this->mGrammars[$grammarName]) ? $this->mGrammars[$grammarName] : NULL;
 22+ if ($grammar == NULL) {
 23+ $revision = Revision::newFromTitle(Title::newFromText($grammarName, NS_GRAMMAR));
 24+ $grammar = new DOMDocument();
 25+ if ($revision == NULL || ! $grammar->loadXML($revision->getText(), LIBXML_NOBLANKS)) {
 26+ return TRUE;
 27+ }
 28+ $this->pushTags($grammar->documentElement, NULL);
 29+ $this->mGrammars[$grammarName] = $grammar;
 30+ }
 31+ $doc = new DOMDocument();
 32+ $rootTag = $doc->createElement($grammar->documentElement->getAttribute("rootTag"));
 33+ $startRule = $grammar->documentElement->getAttribute("startRule");
 34+ $xpath = new DOMXPath($grammar);
 35+ $startRule = $xpath->query("/Grammar/*[@name='$startRule']")->item(0);
 36+ $refText = $text;
 37+ if (! $this->parseRec($startRule, "", "", $iter, $refText, $rootTag)) {
 38+ return TRUE;
 39+ }
 40+ $doc->appendChild($rootTag);
 41+ $text = $doc->saveXML();
 42+ wfDebugLog("ParseEngine", "Parsed text - $text\n");
 43+ return TRUE;
 44+ }
 45+
 46+ static function unparse($inNodes) {
 47+ $retStr = "";
 48+ foreach ($inNodes as $child) {
 49+ if ($child instanceof DOMText) {
 50+ $retStr .= $child->data;
 51+ } else {
 52+ $retStr .= $child->getAttribute("tag") . self::unparse($child->childNodes);
 53+ }
 54+ }
 55+ return $retStr;
 56+ }
 57+
 58+ private function parseRec($rule, $replaceStr, $saveTags, &$iter, &$text, &$outNode) {
 59+ wfDebugLog("ParseEngine", "Entering {$rule->nodeName}, {$rule->getAttribute("name")}\n");
 60+ $iter ++;
 61+ if ($iter > ParseEngine::maxIter) {
 62+ throw new MWException("Parser iterated too many times. Probable loop in grammar.");
 63+ }
 64+ if ($rule->nodeName == "Assignment" || $rule->nodeName == "Reference" || $rule->nodeName == "Text") {
 65+ $saveTags = str_replace("~r", preg_quote($replaceStr, "/"), $saveTags);
 66+ $newTags = $rule->getAttribute("saveTags");
 67+ if ($saveTags == "") {
 68+ $saveTags = $newTags;
 69+ } elseif ($newTags != "") {
 70+ $saveTags .= "|" . $newTags;
 71+ }
 72+ }
 73+ $dom = $outNode->ownerDocument;
 74+ $retCode = FALSE;
 75+ if ($rule->nodeName == "Assignment") {
 76+ $tag = $rule->getAttribute("tag");
 77+ $foundTag = $tag == NULL;
 78+ if (! $foundTag) {
 79+ if ($rule->getAttribute("regex") != NULL) {
 80+ $tag = str_replace("~r", preg_quote($replaceStr, "/"), $tag);
 81+ $foundTag = preg_match("/^$tag/s", $text, $matches);
 82+ if ($foundTag) {
 83+ $tag = $matches[0];
 84+ if (isset($matches[1])) {
 85+ $replaceStr = $matches[1];
 86+ }
 87+ }
 88+ } else {
 89+ $tag = str_replace("~r", $replaceStr, $tag);
 90+ $foundTag = strncmp($tag, $text, strlen($tag)) == 0;
 91+ }
 92+ }
 93+ if ($foundTag) {
 94+ $newText = $text;
 95+ $newElement = $dom->createElement($rule->getAttribute("tagName"));
 96+ if ($tag != NULL) {
 97+ $newText = substr($newText, strlen($tag));
 98+ $newElement->setAttribute("tag", $tag);
 99+ }
 100+ $retCode = $rule->firstChild == NULL || $this->parseRec($rule->firstChild, $replaceStr, $saveTags, $iter, $newText, $newElement);
 101+ if ($retCode) {
 102+ $outNode->appendChild($newElement);
 103+ $text = $newText;
 104+ }
 105+ }
 106+ } elseif ($rule->nodeName == "Sequence") {
 107+ $saveText = $text;
 108+ $saveNode = $outNode->cloneNode(TRUE);
 109+ $pushInd = $rule->getAttribute("pushInd");
 110+ foreach ($rule->childNodes as $i => $crrnt) {
 111+ $pushTags = $i >= $pushInd ? $saveTags : "";
 112+ $retCode = $this->parseRec($crrnt, $replaceStr, $pushTags, $iter, $text, $outNode);
 113+ if (! $retCode) {
 114+ $text = $saveText;
 115+ $outNode = $saveNode;
 116+ break;
 117+ }
 118+ }
 119+ } elseif ($rule->nodeName == "Choice") {
 120+ foreach ($rule->childNodes as $crrnt) {
 121+ $retCode = $this->parseRec($crrnt, $replaceStr, $saveTags, $iter, $text, $outNode);
 122+ if ($retCode) {
 123+ break;
 124+ }
 125+ }
 126+ $retCode |= $rule->getAttribute("failSafe") != NULL;
 127+ } elseif ($rule->nodeName == "Reference") {
 128+ $newVar = $rule->hasAttribute("var") ? str_replace("~r", $replaceStr, $rule->getAttribute("var")) : $replaceStr;
 129+ $xpath = new DOMXPath($rule->ownerDocument);
 130+ $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0);
 131+ $retCode = $this->parseRec($refRule, $newVar, $saveTags, $iter, $text, $outNode);
 132+ } elseif ($rule->nodeName == "Text") {
 133+ $tagSearch = $rule->getAttribute("childTags");
 134+ if ($tagSearch == "") {
 135+ $tagSearch = $saveTags;
 136+ } elseif ($saveTags != "") {
 137+ $tagSearch .= "|" . $saveTags;
 138+ }
 139+ while ($text != "" && ($saveTags == "" || ! preg_match("/^($saveTags)/s", $text))) {
 140+ $offset = $rule->firstChild != NULL && $this->parseRec($rule->firstChild, $replaceStr, "", $iter, $text, $outNode) ? 0 : 1;
 141+ if (preg_match("/$tagSearch/s", $text, $matches, PREG_OFFSET_CAPTURE, $offset)) {
 142+ if ($matches[0][1] > 0) {
 143+ $outNode->appendChild($dom->createTextNode(substr($text, 0, $matches[0][1])));
 144+ $text = substr($text, $matches[0][1]);
 145+ }
 146+ } else {
 147+ $outNode->appendChild($dom->createTextNode($text));
 148+ $text = "";
 149+ }
 150+ }
 151+ $retCode = true;
 152+ }
 153+ wfDebugLog("ParseEngine", "Exiting {$rule->nodeName}, Return Code - $retCode\n");
 154+ wfDebugLog("ParseEngine", "Text - $text\n");
 155+ return $retCode;
 156+ }
 157+
 158+ private function pushTags($rule, $tagStr) {
 159+ if ($rule->nodeName == "Sequence") {
 160+ $pushInd = $rule->childNodes->length - 1;
 161+ $shouldPush = true;
 162+ for ($child = $rule->lastChild; $child != NULL; $child = $child->previousSibling) {
 163+ $this->pushTags($child, $tagStr);
 164+ if ($child->previousSibling != NULL) {
 165+ if ($this->pullTags($child, $iter, $childTag)) {
 166+ if ($shouldPush) {
 167+ $pushInd --;
 168+ }
 169+ if ($tagStr == "") {
 170+ $tagStr = $childTag;
 171+ } elseif ($childTag != "") {
 172+ $tagStr .= "|" . $childTag;
 173+ }
 174+ } else {
 175+ $shouldPush = false;
 176+ $tagStr = $childTag;
 177+ }
 178+ }
 179+ }
 180+ $rule->setAttribute("pushInd", $pushInd);
 181+ } else {
 182+ if ($rule->nodeName != "Choice") {
 183+ $rule->setAttribute("saveTags", $tagStr);
 184+ $tagStr = NULL;
 185+ if ($rule->nodeName == "Text") {
 186+ $childTags = "";
 187+ foreach ($rule->childNodes as $crrnt) {
 188+ if ($childTags != "") {
 189+ $childTags .= "|";
 190+ }
 191+ $this->pullTags($crrnt, $iter, $childTag);
 192+ $childTags .= $childTag;
 193+ }
 194+ $rule->setAttribute("childTags", $childTags);
 195+ }
 196+ }
 197+ foreach ($rule->childNodes as $crrnt) {
 198+ $this->pushTags($crrnt, $tagStr);
 199+ }
 200+ }
 201+ }
 202+
 203+ private function pullTags($rule, &$iter, &$childTags) {
 204+ $iter ++;
 205+ if ($iter > ParseEngine::maxIter) {
 206+ throw new MWException("Collecter iterated too many times. Probable loop in grammar.");
 207+ }
 208+ $childTags = "";
 209+ $failSafe = TRUE;
 210+ if ($rule->nodeName == "Assignment") {
 211+ $childTags = $rule->getAttribute("tag");
 212+ if ($rule->getAttribute("regex") == NULL) {
 213+ $childTags = preg_quote($childTags, "/");
 214+ }
 215+ $failSafe = FALSE;
 216+ } elseif ($rule->nodeName == "Choice" || $rule->nodeName == "Sequence") {
 217+ $failSafe = $rule->nodeName == "Sequence";
 218+ foreach ($rule->childNodes as $child) {
 219+ $failSafe = $this->pullTags($child, $iter, $newTags);
 220+ if ($childTags == "") {
 221+ $childTags = $newTags;
 222+ } elseif ($newTags != "") {
 223+ $childTags .= "|" . $newTags;
 224+ }
 225+ if (($failSafe && $rule->nodeName == "Choice") || (! $failSafe && $rule->nodeName == "Sequence")) {
 226+ break;
 227+ }
 228+ }
 229+ $failSafe |= $rule->nodeName == "Choice" && $rule->getAttribute("failSafe") != NULL;
 230+ } elseif ($rule->nodeName == "Reference") {
 231+ $xpath = new DOMXPath($rule->ownerDocument);
 232+ $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0);
 233+ $failSafe = $this->pullTags($refRule, $iter, $childTags);
 234+ }
 235+ return $failSafe;
 236+ }
 237+}
 238+
Property changes on: trunk/extensions/ParseEngine/ParseEngine.body.php
___________________________________________________________________
Name: svn:eol-style
1239 + native
Index: trunk/extensions/ParseEngine/ParseEngine.php
@@ -0,0 +1,38 @@
 2+<?php
 3+/**
 4+ * Allows people to define a grammar in a wiki then use that grammar to input information to the same wiki
 5+ * @file
 6+ * @ingroup Extensions
 7+ * @author Nathanael Thompson <than4213@gmail.com>
 8+ * @copyright Copyright © 2010 Nathanael Thompson
 9+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
 10+ */
 11+if ( !defined( "MEDIAWIKI" ) ) {
 12+ die( "This is not a valid entry point.\n" );
 13+}
 14+
 15+$wgExtensionCredits["other"][] = array(
 16+ "path" => __FILE__,
 17+ "name" => "ParseEngine",
 18+ "author" => "Nathanael Thompson",
 19+ "url" => "http://www.mediawiki.org/wiki/Extension:ParseEngine",
 20+ "version" => "1.0",
 21+ "descriptionmsg" => "parseengine-desc",
 22+);
 23+
 24+$dir = dirname( __FILE__ );
 25+$wgAutoloadClasses["ParseEngine"] = "$dir/ParseEngine.body.php";
 26+
 27+$wgTheParseEngine = new ParseEngine();
 28+$wgHooks["BeforePreSaveTransform"][] = array($wgTheParseEngine, "parse", $wgParseEngineGrammar);
 29+$wgHooks["ParserBeforeStrip"][] = "wfParseEngineCallFromParse";
 30+
 31+define ( "NS_GRAMMAR" , 91628);
 32+define ( "NS_GRAMMAR_TALK" , 91629);
 33+$wgExtraNamespaces[NS_GRAMMAR] = "Grammar";
 34+$wgExtraNamespaces[NS_GRAMMAR_TALK] = "Grammar_talk";
 35+
 36+function wfParseEngineCallFromParse($unUsed, $text) {
 37+ global $wgTheParseEngine, $wgParseEngineGrammar;
 38+ return $wgTheParseEngine->parse($wgParseEngineGrammar, $text);
 39+}
Property changes on: trunk/extensions/ParseEngine/ParseEngine.php
___________________________________________________________________
Name: svn:eol-style
140 + native
Index: trunk/extensions/ParseEngine/WikiTextGrammar.xml
@@ -0,0 +1,145 @@
 2+<?xml version="1.0"?>
 3+<Grammar rootTag="root" startRule="start" version="1.0">
 4+ <Sequence name="start" >
 5+ <Reference name="postNewLine" />
 6+ <Reference name="main" />
 7+ </Sequence>
 8+ <Text name="main">
 9+ <Choice>
 10+ <Sequence>
 11+ <Reference name="newLine" />
 12+ <Reference name="postNewLine" />
 13+ </Sequence>
 14+ <Assignment tagName="link" tag="[[">
 15+ <Reference name="endText" var="]]" />
 16+ </Assignment>
 17+ <Assignment tagName="tplArg" tag="{{{(?!{)" regex="true">
 18+ <Sequence>
 19+ <Reference name="name" />
 20+ <Choice failSafe="true">
 21+ <Assignment tagName="default" tag="|">
 22+ <Reference name="main" />
 23+ </Assignment>
 24+ </Choice>
 25+ <Assignment tagName="endTag" tag="}}}" />
 26+ </Sequence>
 27+ </Assignment>
 28+ <Assignment tagName="template" tag="{{">
 29+ <Sequence>
 30+ <Reference name="name" />
 31+ <Choice failSafe="true">
 32+ <Assignment tagName="name2" tag=":">
 33+ <Reference name="main" />
 34+ </Assignment>
 35+ </Choice>
 36+ <Reference name="partList" />
 37+ <Assignment tagName="endTag" tag="}}" />
 38+ </Sequence>
 39+ </Assignment>
 40+ <Reference name="comment" />
 41+ <Assignment tagName="noWiki" tag="&lt;nowiki>">
 42+ <Sequence>
 43+ <Text />
 44+ <Assignment tagName="endTag" tag="&lt;\/nowiki>" />
 45+ </Sequence>
 46+ </Assignment>
 47+ <Assignment tagName="xmlTag" tag="&lt;(?=(\w+)[\s\/>])" regex="true">
 48+ <Sequence>
 49+ <Assignment tagName="name" tag="~r" />
 50+ <Reference name="attrList" />
 51+ <Choice>
 52+ <Assignment tagName="endTag" tag="\s*\/>" regex="true" />
 53+ <Sequence>
 54+ <Assignment tagName="inner" tag="\s*>" regex="true">
 55+ <Reference name="main" />
 56+ </Assignment>
 57+ <Assignment tagName="endTag" tag="&lt;/~r>" />
 58+ </Sequence>
 59+ </Choice>
 60+ </Sequence>
 61+ </Assignment>
 62+ </Choice>
 63+ </Text>
 64+ <Sequence name="endText">
 65+ <Reference name="main" />
 66+ <Assignment tagName="endTag" tag="~r" />
 67+ </Sequence>
 68+ <Assignment name="newLine" tagName="newLine" tag="\r?\n" regex="true" />
 69+ <Assignment name="eol" tagName="eol" tag="(?=\n|$)" regex="true" />
 70+ <Choice name="ignoreList" failSafe="true">
 71+ <Sequence>
 72+ <Choice>
 73+ <Assignment tag="[ \t]+" regex="true" />
 74+ <Reference name="comment" />
 75+ </Choice>
 76+ <Reference name="ignoreList" />
 77+ </Sequence>
 78+ </Choice>
 79+ <Choice name="postNewLine" failSafe="true">
 80+ <Sequence>
 81+ <Assignment tagName="h" tag="(={1,6})" regex="true">
 82+ <Reference name="endText" />
 83+ </Assignment>
 84+ <Reference name="ignoreList" />
 85+ <Reference name="eol" />
 86+ </Sequence>
 87+ <Reference name="listChoice" var="" />
 88+ </Choice>
 89+ <Choice name="listChoice">
 90+ <Assignment tagName="orderedList" tag="(?=(~r#))" regex="true">
 91+ <Reference name="itemList" />
 92+ </Assignment>
 93+ <Assignment tagName="unorderedList" tag="(?=(~r\*))" regex="true">
 94+ <Reference name="itemList" />
 95+ </Assignment>
 96+ </Choice>
 97+ <Sequence name="itemList">
 98+ <Choice>
 99+ <Reference name="listChoice" />
 100+ <Assignment tagName="listItem" tag="~r">
 101+ <Sequence>
 102+ <Reference name="main" />
 103+ <Reference name="eol" />
 104+ </Sequence>
 105+ </Assignment>
 106+ </Choice>
 107+ <Choice failSafe="true">
 108+ <Sequence>
 109+ <Reference name="newLine" />
 110+ <Reference name="itemList" />
 111+ </Sequence>
 112+ </Choice>
 113+ </Sequence>
 114+ <Assignment name="comment" tagName="comment" tag="&lt;!--.*?(?:-->|$)" regex="true" />
 115+ <Assignment name="name" tagName="name">
 116+ <Reference name="main" />
 117+ </Assignment>
 118+ <Choice name="partList" failSafe="true">
 119+ <Sequence>
 120+ <Assignment tagName="part" tag="|">
 121+ <Sequence>
 122+ <Reference name="name" />
 123+ <Choice failSafe="true">
 124+ <Assignment tagName="value" tag="=">
 125+ <Reference name="main" />
 126+ </Assignment>
 127+ </Choice>
 128+ </Sequence>
 129+ </Assignment>
 130+ <Reference name="partList" />
 131+ </Sequence>
 132+ </Choice>
 133+ <Choice name="attrList" failSafe="true">
 134+ <Sequence>
 135+ <Assignment tagName="attribute" tag="\s+(?!\/?>)" regex="true">
 136+ <Sequence>
 137+ <Reference name="name" />
 138+ <Assignment tagName="value" tag="\s*=\s*(&quot;|')" regex="true">
 139+ <Reference name="endText" />
 140+ </Assignment>
 141+ </Sequence>
 142+ </Assignment>
 143+ <Reference name="attrList" />
 144+ </Sequence>
 145+ </Choice>
 146+</Grammar>
Property changes on: trunk/extensions/ParseEngine/WikiTextGrammar.xml
___________________________________________________________________
Name: svn:eol-style
1147 + native
Property changes on: trunk/extensions/DataTransclusion/WebDataTransclusionSource.php
___________________________________________________________________
Name: svn:mergeinfo
2148 -

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r65412Rename extension to ParserWiki. Added test case to see if my parser language...than421319:40, 21 April 2010

Status & tagging log