r64970 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64969‎ | r64970 | r64971 >
Date:19:48, 12 April 2010
Author:than4213
Status:deferred
Tags:
Comment:
This extension will make it so that you can define a grammar in a wiki format and use that grammar for entering future wikis
Modified paths:
  • /trunk/extensions/ParseEngine (added) (history)
  • /trunk/extensions/ParseEngine/ParseEngine.body.php (added) (history)
  • /trunk/extensions/ParseEngine/ParseEngine.php (added) (history)
  • /trunk/extensions/ParseEngine/WikiTextGrammar.xml (added) (history)

Diff [purge]

Index: trunk/extensions/ParseEngine/ParseEngine.body.php
@@ -0,0 +1,236 @@
 2+<?php
 3+/**
 4+ * Acts as the primary interface between the world and the parser.
 5+ * mStartRule - the first rule to use while parsing
 6+ * mRules - The list of rules to use while parsing
 7+ * mDom - Used to create Dom objects and get's returned at the end of parsing
 8+ * mIter - Keeps track of how many times the parser recurses to stop endless loops
 9+ */
 10+class ParseEngine {
 11+ const maxIter = 2048;
 12+ private $mGrammars;
 13+
 14+ function __construct() {
 15+ $this->mGrammars = array();
 16+ }
 17+
 18+ function parse($grammarName, &$text) {
 19+ global $IP;
 20+ wfDebugLog("ParseEngine", "==========Start Parse Engine==========\n");
 21+ $grammar = isset($this->mGrammars[$grammarName]) ? $this->mGrammars[$grammarName] : NULL;
 22+ if ($grammar == NULL) {
 23+ $grammar = new DOMDocument();
 24+ if (! $grammar->load("$IP/extensions/ParseEngine/$grammarName.xml", LIBXML_NOBLANKS)) {
 25+ return TRUE;
 26+ }
 27+ $this->pushTags($grammar->documentElement, NULL);
 28+ $this->mGrammars[$grammarName] = $grammar;
 29+ }
 30+ $doc = new DOMDocument();
 31+ $rootTag = $doc->createElement($grammar->documentElement->getAttribute("rootTag"));
 32+ $startRule = $grammar->documentElement->getAttribute("startRule");
 33+ $xpath = new DOMXPath($grammar);
 34+ $startRule = $xpath->query("/Grammar/*[@name='$startRule']")->item(0);
 35+ $refText = $text;
 36+ if (! $this->parseRec($startRule, "", "", $iter, $refText, $rootTag)) {
 37+ return TRUE;
 38+ }
 39+ $doc->appendChild($rootTag);
 40+ $text = $doc->saveXML();
 41+ wfDebugLog("ParseEngine", "Parsed text - $text\n");
 42+ return TRUE;
 43+ }
 44+
 45+ static function unparse($inNodes) {
 46+ $retStr = "";
 47+ foreach ($inNodes as $child) {
 48+ if ($child instanceof DOMText) {
 49+ $retStr .= $child->data;
 50+ } else {
 51+ $retStr .= $child->getAttribute("tag") . self::unparse($child->childNodes);
 52+ }
 53+ }
 54+ return $retStr;
 55+ }
 56+
 57+ private function parseRec($rule, $replaceStr, $saveTags, &$iter, &$text, &$outNode) {
 58+ wfDebugLog("ParseEngine", "Entering {$rule->nodeName}, {$rule->getAttribute("name")}\n");
 59+ $iter ++;
 60+ if ($iter > ParseEngine::maxIter) {
 61+ throw new MWException("Parser iterated too many times. Probable loop in grammar.");
 62+ }
 63+ if ($rule->nodeName == "Assignment" || $rule->nodeName == "Reference" || $rule->nodeName == "Text") {
 64+ $saveTags = str_replace("~r", preg_quote($replaceStr, "/"), $saveTags);
 65+ $newTags = $rule->getAttribute("saveTags");
 66+ if ($saveTags == "") {
 67+ $saveTags = $newTags;
 68+ } elseif ($newTags != "") {
 69+ $saveTags .= "|" . $newTags;
 70+ }
 71+ }
 72+ $dom = $outNode->ownerDocument;
 73+ $retCode = FALSE;
 74+ if ($rule->nodeName == "Assignment") {
 75+ $tag = $rule->getAttribute("tag");
 76+ $foundTag = $tag == NULL;
 77+ if (! $foundTag) {
 78+ if ($rule->getAttribute("regex") != NULL) {
 79+ $tag = str_replace("~r", preg_quote($replaceStr, "/"), $tag);
 80+ $foundTag = preg_match("/^$tag/s", $text, $matches);
 81+ if ($foundTag) {
 82+ $tag = $matches[0];
 83+ if (isset($matches[1])) {
 84+ $replaceStr = $matches[1];
 85+ }
 86+ }
 87+ } else {
 88+ $tag = str_replace("~r", $replaceStr, $tag);
 89+ $foundTag = strncmp($tag, $text, strlen($tag)) == 0;
 90+ }
 91+ }
 92+ if ($foundTag) {
 93+ $newText = $text;
 94+ $newElement = $dom->createElement($rule->getAttribute("tagName"));
 95+ if ($tag != NULL) {
 96+ $newText = substr($newText, strlen($tag));
 97+ $newElement->setAttribute("tag", $tag);
 98+ }
 99+ $retCode = $rule->firstChild == NULL || $this->parseRec($rule->firstChild, $replaceStr, $saveTags, $iter, $newText, $newElement);
 100+ if ($retCode) {
 101+ $outNode->appendChild($newElement);
 102+ $text = $newText;
 103+ }
 104+ }
 105+ } elseif ($rule->nodeName == "Sequence") {
 106+ $saveText = $text;
 107+ $saveNode = $outNode->cloneNode(TRUE);
 108+ $pushInd = $rule->getAttribute("pushInd");
 109+ foreach ($rule->childNodes as $i => $crrnt) {
 110+ $pushTags = $i >= $pushInd ? $saveTags : "";
 111+ $retCode = $this->parseRec($crrnt, $replaceStr, $pushTags, $iter, $text, $outNode);
 112+ if (! $retCode) {
 113+ $text = $saveText;
 114+ $outNode = $saveNode;
 115+ break;
 116+ }
 117+ }
 118+ } elseif ($rule->nodeName == "Choice") {
 119+ foreach ($rule->childNodes as $crrnt) {
 120+ $retCode = $this->parseRec($crrnt, $replaceStr, $saveTags, $iter, $text, $outNode);
 121+ if ($retCode) {
 122+ break;
 123+ }
 124+ }
 125+ $retCode |= $rule->getAttribute("failSafe") != NULL;
 126+ } elseif ($rule->nodeName == "Reference") {
 127+ $newVar = $rule->hasAttribute("var") ? str_replace("~r", $replaceStr, $rule->getAttribute("var")) : $replaceStr;
 128+ $xpath = new DOMXPath($rule->ownerDocument);
 129+ $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0);
 130+ $retCode = $this->parseRec($refRule, $newVar, $saveTags, $iter, $text, $outNode);
 131+ } elseif ($rule->nodeName == "Text") {
 132+ $tagSearch = $rule->getAttribute("childTags");
 133+ if ($tagSearch == "") {
 134+ $tagSearch = $saveTags;
 135+ } elseif ($saveTags != "") {
 136+ $tagSearch .= "|" . $saveTags;
 137+ }
 138+ while ($text != "" && ($saveTags == "" || ! preg_match("/^($saveTags)/s", $text))) {
 139+ $offset = $rule->firstChild != NULL && $this->parseRec($rule->firstChild, $replaceStr, "", $iter, $text, $outNode) ? 0 : 1;
 140+ if (preg_match("/$tagSearch/s", $text, $matches, PREG_OFFSET_CAPTURE, $offset)) {
 141+ if ($matches[0][1] > 0) {
 142+ $outNode->appendChild($dom->createTextNode(substr($text, 0, $matches[0][1])));
 143+ $text = substr($text, $matches[0][1]);
 144+ }
 145+ } else {
 146+ $outNode->appendChild($dom->createTextNode($text));
 147+ $text = "";
 148+ }
 149+ }
 150+ $retCode = true;
 151+ }
 152+ wfDebugLog("ParseEngine", "Exiting {$rule->nodeName}, Return Code - $retCode\n");
 153+ wfDebugLog("ParseEngine", "Text - $text\n");
 154+ return $retCode;
 155+ }
 156+
 157+ private function pushTags($rule, $tagStr) {
 158+ if ($rule->nodeName == "Sequence") {
 159+ $pushInd = $rule->childNodes->length - 1;
 160+ $shouldPush = true;
 161+ for ($child = $rule->lastChild; $child != NULL; $child = $child->previousSibling) {
 162+ $this->pushTags($child, $tagStr);
 163+ if ($child->previousSibling != NULL) {
 164+ if ($this->pullTags($child, $iter, $childTag)) {
 165+ if ($shouldPush) {
 166+ $pushInd --;
 167+ }
 168+ if ($tagStr == "") {
 169+ $tagStr = $childTag;
 170+ } elseif ($childTag != "") {
 171+ $tagStr .= "|" . $childTag;
 172+ }
 173+ } else {
 174+ $shouldPush = false;
 175+ $tagStr = $childTag;
 176+ }
 177+ }
 178+ }
 179+ $rule->setAttribute("pushInd", $pushInd);
 180+ } else {
 181+ if ($rule->nodeName != "Choice") {
 182+ $rule->setAttribute("saveTags", $tagStr);
 183+ $tagStr = NULL;
 184+ if ($rule->nodeName == "Text") {
 185+ $childTags = "";
 186+ foreach ($rule->childNodes as $crrnt) {
 187+ if ($childTags != "") {
 188+ $childTags .= "|";
 189+ }
 190+ $this->pullTags($crrnt, $iter, $childTag);
 191+ $childTags .= $childTag;
 192+ }
 193+ $rule->setAttribute("childTags", $childTags);
 194+ }
 195+ }
 196+ foreach ($rule->childNodes as $crrnt) {
 197+ $this->pushTags($crrnt, $tagStr);
 198+ }
 199+ }
 200+ }
 201+
 202+ private function pullTags($rule, &$iter, &$childTags) {
 203+ $iter ++;
 204+ if ($iter > ParseEngine::maxIter) {
 205+ throw new MWException("Collecter iterated too many times. Probable loop in grammar.");
 206+ }
 207+ $childTags = "";
 208+ $failSafe = TRUE;
 209+ if ($rule->nodeName == "Assignment") {
 210+ $childTags = $rule->getAttribute("tag");
 211+ if ($rule->getAttribute("regex") == NULL) {
 212+ $childTags = preg_quote($childTags, "/");
 213+ }
 214+ $failSafe = FALSE;
 215+ } elseif ($rule->nodeName == "Choice" || $rule->nodeName == "Sequence") {
 216+ $failSafe = $rule->nodeName == "Sequence";
 217+ foreach ($rule->childNodes as $child) {
 218+ $failSafe = $this->pullTags($child, $iter, $newTags);
 219+ if ($childTags == "") {
 220+ $childTags = $newTags;
 221+ } elseif ($newTags != "") {
 222+ $childTags .= "|" . $newTags;
 223+ }
 224+ if (($failSafe && $rule->nodeName == "Choice") || (! $failSafe && $rule->nodeName == "Sequence")) {
 225+ break;
 226+ }
 227+ }
 228+ $failSafe |= $rule->nodeName == "Choice" && $rule->getAttribute("failSafe") != NULL;
 229+ } elseif ($rule->nodeName == "Reference") {
 230+ $xpath = new DOMXPath($rule->ownerDocument);
 231+ $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0);
 232+ $failSafe = $this->pullTags($refRule, $iter, $childTags);
 233+ }
 234+ return $failSafe;
 235+ }
 236+}
 237+
Property changes on: trunk/extensions/ParseEngine/ParseEngine.body.php
___________________________________________________________________
Name: svn:eol-style
1238 + native
Index: trunk/extensions/ParseEngine/ParseEngine.php
@@ -0,0 +1,26 @@
 2+<?php
 3+/**
 4+ * Allows people to define a grammar in a wiki format then use that grammar to input information to the wiki
 5+ * @file
 6+ * @ingroup Extensions
 7+ * @author Nathanael Thompson <than4213@gmail.com>
 8+ * @copyright Copyright © 2009 Nathanael Thompson
 9+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
 10+ */
 11+if ( !defined( "MEDIAWIKI" ) ) {
 12+ die( "This is not a valid entry point.\n" );
 13+}
 14+
 15+$wgExtensionCredits["other"][] = array(
 16+ "path" => __FILE__,
 17+ "name" => "ParseEngine",
 18+ "author" => "Nathanael Thompson",
 19+ "url" => "http://www.mediawiki.org/wiki/Extension:ParseEngine",
 20+ "version" => "1.0",
 21+ "descriptionmsg" => "parseengine-desc",
 22+);
 23+
 24+$dir = dirname( __FILE__ );
 25+$wgAutoloadClasses["ParseEngine"] = "$dir/ParseEngine.body.php";
 26+$wgHooks["BeforePreSaveTransform"][] = array(new ParseEngine(), "parse", $wgParseEngineGrammar);
 27+
Property changes on: trunk/extensions/ParseEngine/ParseEngine.php
___________________________________________________________________
Name: svn:eol-style
128 + native
Index: trunk/extensions/ParseEngine/WikiTextGrammar.xml
@@ -0,0 +1,145 @@
 2+<?xml version="1.0"?>
 3+<Grammar rootTag="root" startRule="start" version="1.0">
 4+ <Sequence name="start" >
 5+ <Reference name="postNewLine" />
 6+ <Reference name="main" />
 7+ </Sequence>
 8+ <Text name="main">
 9+ <Choice>
 10+ <Sequence>
 11+ <Reference name="newLine" />
 12+ <Reference name="postNewLine" />
 13+ </Sequence>
 14+ <Assignment tagName="link" tag="[[">
 15+ <Reference name="endText" var="]]" />
 16+ </Assignment>
 17+ <Assignment tagName="tplArg" tag="{{{(?!{)" regex="true">
 18+ <Sequence>
 19+ <Reference name="name" />
 20+ <Choice failSafe="true">
 21+ <Assignment tagName="default" tag="|">
 22+ <Reference name="main" />
 23+ </Assignment>
 24+ </Choice>
 25+ <Assignment tagName="endTag" tag="}}}" />
 26+ </Sequence>
 27+ </Assignment>
 28+ <Assignment tagName="template" tag="{{">
 29+ <Sequence>
 30+ <Reference name="name" />
 31+ <Choice failSafe="true">
 32+ <Assignment tagName="name2" tag=":">
 33+ <Reference name="main" />
 34+ </Assignment>
 35+ </Choice>
 36+ <Reference name="partList" />
 37+ <Assignment tagName="endTag" tag="}}" />
 38+ </Sequence>
 39+ </Assignment>
 40+ <Reference name="comment" />
 41+ <Assignment tagName="noWiki" tag="&lt;nowiki>">
 42+ <Sequence>
 43+ <Text />
 44+ <Assignment tagName="endTag" tag="&lt;\/nowiki>" />
 45+ </Sequence>
 46+ </Assignment>
 47+ <Assignment tagName="xmlTag" tag="&lt;(?=(\w+)[\s\/>])" regex="true">
 48+ <Sequence>
 49+ <Assignment tagName="name" tag="~r" />
 50+ <Reference name="attrList" />
 51+ <Choice>
 52+ <Assignment tagName="endTag" tag="\s*\/>" regex="true" />
 53+ <Sequence>
 54+ <Assignment tagName="inner" tag="\s*>" regex="true">
 55+ <Reference name="main" />
 56+ </Assignment>
 57+ <Assignment tagName="endTag" tag="&lt;/~r>" />
 58+ </Sequence>
 59+ </Choice>
 60+ </Sequence>
 61+ </Assignment>
 62+ </Choice>
 63+ </Text>
 64+ <Sequence name="endText">
 65+ <Reference name="main" />
 66+ <Assignment tagName="endTag" tag="~r" />
 67+ </Sequence>
 68+ <Assignment name="newLine" tagName="newLine" tag="\r?\n" regex="true" />
 69+ <Assignment name="eol" tagName="eol" tag="(?=\n|$)" regex="true" />
 70+ <Choice name="ignoreList" failSafe="true">
 71+ <Sequence>
 72+ <Choice>
 73+ <Assignment tag="[ \t]+" regex="true" />
 74+ <Reference name="comment" />
 75+ </Choice>
 76+ <Reference name="ignoreList" />
 77+ </Sequence>
 78+ </Choice>
 79+ <Choice name="postNewLine" failSafe="true">
 80+ <Sequence>
 81+ <Assignment tagName="h" tag="(={1,6})" regex="true">
 82+ <Reference name="endText" />
 83+ </Assignment>
 84+ <Reference name="ignoreList" />
 85+ <Reference name="eol" />
 86+ </Sequence>
 87+ <Reference name="listChoice" var="" />
 88+ </Choice>
 89+ <Choice name="listChoice">
 90+ <Assignment tagName="orderedList" tag="(?=(~r#))" regex="true">
 91+ <Reference name="itemList" />
 92+ </Assignment>
 93+ <Assignment tagName="unorderedList" tag="(?=(~r\*))" regex="true">
 94+ <Reference name="itemList" />
 95+ </Assignment>
 96+ </Choice>
 97+ <Sequence name="itemList">
 98+ <Choice>
 99+ <Reference name="listChoice" />
 100+ <Assignment tagName="listItem" tag="~r">
 101+ <Sequence>
 102+ <Reference name="main" />
 103+ <Reference name="eol" />
 104+ </Sequence>
 105+ </Assignment>
 106+ </Choice>
 107+ <Choice failSafe="true">
 108+ <Sequence>
 109+ <Reference name="newLine" />
 110+ <Reference name="itemList" />
 111+ </Sequence>
 112+ </Choice>
 113+ </Sequence>
 114+ <Assignment name="comment" tagName="comment" tag="&lt;!--.*?(?:-->|$)" regex="true" />
 115+ <Assignment name="name" tagName="name">
 116+ <Reference name="main" />
 117+ </Assignment>
 118+ <Choice name="partList" failSafe="true">
 119+ <Sequence>
 120+ <Assignment tagName="part" tag="|">
 121+ <Sequence>
 122+ <Reference name="name" />
 123+ <Choice failSafe="true">
 124+ <Assignment tagName="value" tag="=">
 125+ <Reference name="main" />
 126+ </Assignment>
 127+ </Choice>
 128+ </Sequence>
 129+ </Assignment>
 130+ <Reference name="partList" />
 131+ </Sequence>
 132+ </Choice>
 133+ <Choice name="attrList" failSafe="true">
 134+ <Sequence>
 135+ <Assignment tagName="attribute" tag="\s+(?!\/?>)" regex="true">
 136+ <Sequence>
 137+ <Reference name="name" />
 138+ <Assignment tagName="value" tag="\s*=\s*(&quot;|')" regex="true">
 139+ <Reference name="endText" />
 140+ </Assignment>
 141+ </Sequence>
 142+ </Assignment>
 143+ <Reference name="attrList" />
 144+ </Sequence>
 145+ </Choice>
 146+</Grammar>

Status & tagging log