Index: trunk/extensions/ParserWiki/ParserWiki.i18n.php |
— | — | @@ -0,0 +1,206 @@ |
| 2 | +<?php
|
| 3 | +/**
|
| 4 | + * Internationalisation file for extension ParserWiki.
|
| 5 | + *
|
| 6 | + * @file
|
| 7 | + * @ingroup Extensions
|
| 8 | + */
|
| 9 | +
|
| 10 | +$messages = array();
|
| 11 | +
|
| 12 | +$messages['en'] = array(
|
| 13 | + 'parserwiki-desc' => ' Allows to define a grammar in a wiki then use that grammar to input information to the same wiki',
|
| 14 | +);
|
| 15 | +
|
| 16 | +/** Message documentation (Message documentation)
|
| 17 | + * @author Umherirrender
|
| 18 | + */
|
| 19 | +$messages['qqq'] = array(
|
| 20 | + 'parserwiki-desc' => '{{desc}}',
|
| 21 | +);
|
| 22 | +
|
| 23 | +/** Gheg Albanian (Gegë)
|
| 24 | + * @author Mdupont
|
| 25 | + */
|
| 26 | +$messages['aln'] = array(
|
| 27 | + 'parserwiki-desc' => 'Lejon për të përcaktuar një gramatikë në një wiki atëherë përdorim që e ciklit të lartë të informatave të dhëna në të njëjtën wiki',
|
| 28 | +);
|
| 29 | +
|
| 30 | +/** Belarusian (Taraškievica orthography) (Беларуская (тарашкевіца))
|
| 31 | + * @author Jim-by
|
| 32 | + */
|
| 33 | +$messages['be-tarask'] = array(
|
| 34 | + 'parserwiki-desc' => 'Дазваляе вызначыць граматыку ў вікі, потым выкарыстоўваць гэту граматыку для ўводу інфармацыі ў такую самую вікі',
|
| 35 | +);
|
| 36 | +
|
| 37 | +/** Breton (Brezhoneg)
|
| 38 | + * @author Fulup
|
| 39 | + */
|
| 40 | +$messages['br'] = array(
|
| 41 | + 'parserwiki-desc' => "Talvezout a ra da dermeniñ ur yezhadur en ur wiki ha d'e evit enporzhiañ titouroù er wiki-se",
|
| 42 | +);
|
| 43 | +
|
| 44 | +/** German (Deutsch)
|
| 45 | + * @author Kghbln
|
| 46 | + */
|
| 47 | +$messages['de'] = array(
|
| 48 | + 'parserwiki-desc' => 'Ermöglicht es in einem Wiki Grammatiken zu erstellen und diese für die Eingabe von Informationen zu verwenden',
|
| 49 | +);
|
| 50 | +
|
| 51 | +/** Lower Sorbian (Dolnoserbski)
|
| 52 | + * @author Michawiki
|
| 53 | + */
|
| 54 | +$messages['dsb'] = array(
|
| 55 | + 'parserwiki-desc' => 'Zmóžnja gramatiku we wikiju definěrowaś, aby se pótom ta gramatika wužywała, aby se zapódali informacije do togo samego wikija',
|
| 56 | +);
|
| 57 | +
|
| 58 | +/** Spanish (Español)
|
| 59 | + * @author Locos epraix
|
| 60 | + */
|
| 61 | +$messages['es'] = array(
|
| 62 | + 'parserwiki-desc' => 'Permite definir una gramática en un wiki luego usar esa gramática para ingresar información a dicha wiki',
|
| 63 | +);
|
| 64 | +
|
| 65 | +/** Finnish (Suomi)
|
| 66 | + * @author Crt
|
| 67 | + * @author Str4nd
|
| 68 | + */
|
| 69 | +$messages['fi'] = array(
|
| 70 | + 'parserwiki-desc' => 'Mahdollistaa kieliopin määrittelyn wikissä ja käyttää määritettyä kielioppia saman wikin tiedonsyöttöön.',
|
| 71 | +);
|
| 72 | +
|
| 73 | +/** French (Français)
|
| 74 | + * @author Peter17
|
| 75 | + */
|
| 76 | +$messages['fr'] = array(
|
| 77 | + 'parserwiki-desc' => "Permet de définir une grammaire dans un wiki et d'utiliser cette grammaire pour importer de l'information sur ce wiki",
|
| 78 | +);
|
| 79 | +
|
| 80 | +/** Galician (Galego)
|
| 81 | + * @author Gallaecio
|
| 82 | + * @author Toliño
|
| 83 | + */
|
| 84 | +$messages['gl'] = array(
|
| 85 | + 'parserwiki-desc' => 'Permite establecer unha gramática nun wiki e, a continuación, utilizar esa gramática para introducir información nese mesmo wiki',
|
| 86 | +);
|
| 87 | +
|
| 88 | +/** Swiss German (Alemannisch)
|
| 89 | + * @author Als-Holder
|
| 90 | + */
|
| 91 | +$messages['gsw'] = array(
|
| 92 | + 'parserwiki-desc' => 'Erlaubt e Grammatik in eme Wiki z definiere un die Grammatik im glyyche Wiki z bruche go Informatione yyfiege',
|
| 93 | +);
|
| 94 | +
|
| 95 | +/** Upper Sorbian (Hornjoserbsce)
|
| 96 | + * @author Michawiki
|
| 97 | + */
|
| 98 | +$messages['hsb'] = array(
|
| 99 | + 'parserwiki-desc' => 'Zmóžnja gramatiku we wikiju definować, zo by so ta gramatika wužiwało, zo bychu so informacije do samsneho wikija a zapodali',
|
| 100 | +);
|
| 101 | +
|
| 102 | +/** Hungarian (Magyar)
|
| 103 | + * @author Glanthor Reviol
|
| 104 | + */
|
| 105 | +$messages['hu'] = array(
|
| 106 | + 'parserwiki-desc' => 'Lehetővé teszi nyelvtan definiálását egy wikin, majd ezen nyelvtan használatával információk bevitelét ugyanazon a wikin',
|
| 107 | +);
|
| 108 | +
|
| 109 | +/** Interlingua (Interlingua)
|
| 110 | + * @author McDutchie
|
| 111 | + */
|
| 112 | +$messages['ia'] = array(
|
| 113 | + 'parserwiki-desc' => 'Permitte definir un grammatica in un wiki, postea usar iste grammatica pro inserer information in le mesme wiki',
|
| 114 | +);
|
| 115 | +
|
| 116 | +/** Indonesian (Bahasa Indonesia)
|
| 117 | + * @author Bennylin
|
| 118 | + */
|
| 119 | +$messages['id'] = array(
|
| 120 | + 'parserwiki-desc' => 'Memungkinkan untuk mendefinisikan tata bahasa di wiki lalu menggunakan informasi tersebut untuk memasukkan informasi ke wiki yang sama',
|
| 121 | +);
|
| 122 | +
|
| 123 | +/** Italian (Italiano)
|
| 124 | + * @author Civvì
|
| 125 | + */
|
| 126 | +$messages['it'] = array(
|
| 127 | + 'parserwiki-desc' => 'Consente di definire una grammatica in un wiki e quindi si utilizzare quella grammatica per inserire informazioni nel wiki stesso',
|
| 128 | +);
|
| 129 | +
|
| 130 | +/** Japanese (日本語)
|
| 131 | + * @author Fryed-peach
|
| 132 | + */
|
| 133 | +$messages['ja'] = array(
|
| 134 | + 'parserwiki-desc' => 'ウィキ内で文法を定義し、定義した文法をそのウィキへ情報を入力するのに使えるようにする',
|
| 135 | +);
|
| 136 | +
|
| 137 | +/** Luxembourgish (Lëtzebuergesch)
|
| 138 | + * @author Robby
|
| 139 | + */
|
| 140 | +$messages['lb'] = array(
|
| 141 | + 'parserwiki-desc' => 'Erlaabt et eng Grammaire an enger Wiki ze definéieren an déi Grammaire benotze fir Informatiounen an déi Wiki derbäizesetzen',
|
| 142 | +);
|
| 143 | +
|
| 144 | +/** Macedonian (Македонски)
|
| 145 | + * @author Bjankuloski06
|
| 146 | + */
|
| 147 | +$messages['mk'] = array(
|
| 148 | + 'parserwiki-desc' => 'Овозможува утврдување на граматика во едно вики и употреба на таа граматика за внос на информации во тоа вики',
|
| 149 | +);
|
| 150 | +
|
| 151 | +/** Dutch (Nederlands)
|
| 152 | + * @author Siebrand
|
| 153 | + */
|
| 154 | +$messages['nl'] = array(
|
| 155 | + 'parserwiki-desc' => 'Maakt het mogelijk grammatica te definiëren en deze grammatica dan te gebruiken om informatie in te voeren',
|
| 156 | +);
|
| 157 | +
|
| 158 | +/** Norwegian (bokmål) (Norsk (bokmål))
|
| 159 | + * @author Nghtwlkr
|
| 160 | + */
|
| 161 | +$messages['no'] = array(
|
| 162 | + 'parserwiki-desc' => 'Gjør det mulig å definere en grammatikk i en wiki og deretter bruke denne grammatikken til å legge inn informasjon til den samme wikien',
|
| 163 | +);
|
| 164 | +
|
| 165 | +/** Occitan (Occitan)
|
| 166 | + * @author Cedric31
|
| 167 | + */
|
| 168 | +$messages['oc'] = array(
|
| 169 | + 'parserwiki-desc' => "Permet de definir una gramatica dins un wiki e d'utilizar aquela gramatica per importar d'informacion sus aqueste wiki",
|
| 170 | +);
|
| 171 | +
|
| 172 | +/** Polish (Polski)
|
| 173 | + * @author Marcin Łukasz Kiejzik
|
| 174 | + */
|
| 175 | +$messages['pl'] = array(
|
| 176 | + 'parserwiki-desc' => 'Pozwala na zdefiniowanie gramatyki w wiki, następnie użyj tej formy gramatycznej do wprowadzania gramatyki na tą samą wiki',
|
| 177 | +);
|
| 178 | +
|
| 179 | +/** Piedmontese (Piemontèis)
|
| 180 | + * @author Borichèt
|
| 181 | + * @author Dragonòt
|
| 182 | + */
|
| 183 | +$messages['pms'] = array(
|
| 184 | + 'parserwiki-desc' => "A përmët ëd definì na gramàtica ant na wiki peui ëd dovré cola gramàtica për anserì dl'anformassion an sla wiki midema",
|
| 185 | +);
|
| 186 | +
|
| 187 | +/** Portuguese (Português)
|
| 188 | + * @author Hamilton Abreu
|
| 189 | + */
|
| 190 | +$messages['pt'] = array(
|
| 191 | + 'parserwiki-desc' => 'Permite definir uma gramática numa wiki e depois usá-la para inserir informação nessa mesma wiki',
|
| 192 | +);
|
| 193 | +
|
| 194 | +/** Russian (Русский)
|
| 195 | + * @author Александр Сигачёв
|
| 196 | + */
|
| 197 | +$messages['ru'] = array(
|
| 198 | + 'parserwiki-desc' => 'Позволяет определить грамматику в вики, и затем использовать эту грамматику для ввода информации в эту же вики',
|
| 199 | +);
|
| 200 | +
|
| 201 | +/** Tagalog (Tagalog)
|
| 202 | + * @author AnakngAraw
|
| 203 | + */
|
| 204 | +$messages['tl'] = array(
|
| 205 | + 'parserwiki-desc' => 'Nagpapahintulot na bigyang kahulugang ang isang balarila sa loob ng isang wiki at pagkaraan ay gamitin ang balarilang iyon upang magpasok ng kabatiran sa wiking iyon din',
|
| 206 | +);
|
| 207 | +
|
Index: trunk/extensions/ParserWiki/ParserWiki.php |
— | — | @@ -0,0 +1,59 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Allows people to define a grammar in a wiki then use that grammar to input information to the same wiki |
| 5 | + * @ingroup Extensions |
| 6 | + * @author Nathanael Thompson <than4213@gmail.com> |
| 7 | + * @copyright Copyright © 2010 Nathanael Thompson |
| 8 | + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License |
| 9 | + */ |
| 10 | +if ( !defined( "MEDIAWIKI" ) ) { |
| 11 | + die( "This is not a valid entry point.\n" ); |
| 12 | +} |
| 13 | + |
| 14 | +$wgExtensionCredits["other"][] = array( |
| 15 | + "path" => __FILE__, |
| 16 | + "name" => "ParserWiki", |
| 17 | + "author" => "Nathanael Thompson", |
| 18 | + "url" => "http://www.mediawiki.org/wiki/Extension:ParserWiki", |
| 19 | + "version" => "1.0", |
| 20 | + "descriptionmsg" => "parserwiki-desc", |
| 21 | +); |
| 22 | +$dir = dirname( __FILE__ ) . '/'; |
| 23 | +$wgAutoloadClasses['ParseEngine'] = $dir . "ParseEngine.php"; |
| 24 | +$wgExtensionMessagesFiles['ParserWiki'] = $dir . 'ParserWiki.i18n.php'; |
| 25 | + |
| 26 | +$wgTheParserWiki = new ParserWiki(); |
| 27 | +$wgHooks["ParserBeforeStrip"][] = array( $wgTheParserWiki, "callFromParse" ); |
| 28 | + |
| 29 | +define ( "NS_GRAMMAR" , 91628 ); |
| 30 | +define ( "NS_GRAMMAR_TALK" , 91629 ); |
| 31 | +$wgExtraNamespaces[NS_GRAMMAR] = "Grammar"; |
| 32 | +$wgExtraNamespaces[NS_GRAMMAR_TALK] = "Grammar_talk"; |
| 33 | + |
| 34 | +class ParserWiki { |
| 35 | + private $mEngines; |
| 36 | + |
| 37 | + function __construct() { |
| 38 | + $this->mEngines = array(); |
| 39 | + } |
| 40 | + |
| 41 | + function callFromParse( $unUsed, &$text ) { |
| 42 | + global $wgParserWikiGrammar; |
| 43 | + $engine = $this->mEngines[$wgParserWikiGrammar]; |
| 44 | + if ( $engine == NULL ) { |
| 45 | + $revision = Revision::newFromTitle( Title::newFromText( $wgParserWikiGrammar, NS_GRAMMAR ) ); |
| 46 | + $grammar = new DOMDocument(); |
| 47 | + if ( $revision == NULL || ! $grammar->loadXML( $revision->getText(), LIBXML_NOBLANKS ) ) { |
| 48 | + return TRUE; |
| 49 | + } |
| 50 | + $engine = new ParseEngine( $grammar ); |
| 51 | + $this->mEngines[$wgParserWikiGrammar] = $engine; |
| 52 | + } |
| 53 | + $parseTree = $engine->parse( $text ); |
| 54 | + if ( $parseTree == NULL ) { |
| 55 | + return TRUE; |
| 56 | + } |
| 57 | + $text = $parseTree->saveXML(); |
| 58 | + return FALSE; |
| 59 | + } |
| 60 | +} |
Property changes on: trunk/extensions/ParserWiki/ParserWiki.php |
___________________________________________________________________ |
Name: svn:eol-style |
1 | 61 | + native |
Index: trunk/extensions/ParserWiki/ParserEngine.php |
— | — | @@ -0,0 +1,237 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Acts as the primary interface between the world and the parser. |
| 5 | + * mStartRule - the first rule to use while parsing |
| 6 | + * mRules - The list of rules to use while parsing |
| 7 | + * mDom - Used to create Dom objects and get's returned at the end of parsing |
| 8 | + * mIter - Keeps track of how many times the parser recurses to stop endless loops |
| 9 | + */ |
| 10 | +class ParseEngine { |
| 11 | + const maxIter = 2048; |
| 12 | + private $mGrammars; |
| 13 | + |
| 14 | + function __construct() { |
| 15 | + $this->mGrammars = array(); |
| 16 | + } |
| 17 | + |
| 18 | + function parse($grammarName, &$text) { |
| 19 | + global $IP; |
| 20 | + wfDebugLog("ParseEngine", "==========Start Parse Engine==========\n"); |
| 21 | + $grammar = isset($this->mGrammars[$grammarName]) ? $this->mGrammars[$grammarName] : NULL; |
| 22 | + if ($grammar == NULL) { |
| 23 | + $revision = Revision::newFromTitle(Title::newFromText($grammarName, NS_GRAMMAR)); |
| 24 | + $grammar = new DOMDocument(); |
| 25 | + if ($revision == NULL || ! $grammar->loadXML($revision->getText(), LIBXML_NOBLANKS)) { |
| 26 | + return TRUE; |
| 27 | + } |
| 28 | + $this->pushTags($grammar->documentElement, NULL); |
| 29 | + $this->mGrammars[$grammarName] = $grammar; |
| 30 | + } |
| 31 | + $doc = new DOMDocument(); |
| 32 | + $rootTag = $doc->createElement($grammar->documentElement->getAttribute("rootTag")); |
| 33 | + $startRule = $grammar->documentElement->getAttribute("startRule"); |
| 34 | + $xpath = new DOMXPath($grammar); |
| 35 | + $startRule = $xpath->query("/Grammar/*[@name='$startRule']")->item(0); |
| 36 | + $refText = $text; |
| 37 | + if (! $this->parseRec($startRule, "", "", $iter, $refText, $rootTag)) { |
| 38 | + return TRUE; |
| 39 | + } |
| 40 | + $doc->appendChild($rootTag); |
| 41 | + $text = $doc->saveXML(); |
| 42 | + wfDebugLog("ParseEngine", "Parsed text - $text\n"); |
| 43 | + return TRUE; |
| 44 | + } |
| 45 | + |
| 46 | + static function unparse($inNodes) { |
| 47 | + $retStr = ""; |
| 48 | + foreach ($inNodes as $child) { |
| 49 | + if ($child instanceof DOMText) { |
| 50 | + $retStr .= $child->data; |
| 51 | + } else { |
| 52 | + $retStr .= $child->getAttribute("tag") . self::unparse($child->childNodes); |
| 53 | + } |
| 54 | + } |
| 55 | + return $retStr; |
| 56 | + } |
| 57 | + |
| 58 | + private function parseRec($rule, $replaceStr, $saveTags, &$iter, &$text, &$outNode) { |
| 59 | + wfDebugLog("ParseEngine", "Entering {$rule->nodeName}, {$rule->getAttribute("name")}\n"); |
| 60 | + $iter ++; |
| 61 | + if ($iter > ParseEngine::maxIter) { |
| 62 | + throw new MWException("Parser iterated too many times. Probable loop in grammar."); |
| 63 | + } |
| 64 | + if ($rule->nodeName == "Assignment" || $rule->nodeName == "Reference" || $rule->nodeName == "Text") { |
| 65 | + $saveTags = str_replace("~r", preg_quote($replaceStr, "/"), $saveTags); |
| 66 | + $newTags = $rule->getAttribute("saveTags"); |
| 67 | + if ($saveTags == "") { |
| 68 | + $saveTags = $newTags; |
| 69 | + } elseif ($newTags != "") { |
| 70 | + $saveTags .= "|" . $newTags; |
| 71 | + } |
| 72 | + } |
| 73 | + $dom = $outNode->ownerDocument; |
| 74 | + $retCode = FALSE; |
| 75 | + if ($rule->nodeName == "Assignment") { |
| 76 | + $tag = $rule->getAttribute("tag"); |
| 77 | + $foundTag = $tag == NULL; |
| 78 | + if (! $foundTag) { |
| 79 | + if ($rule->getAttribute("regex") != NULL) { |
| 80 | + $tag = str_replace("~r", preg_quote($replaceStr, "/"), $tag); |
| 81 | + $foundTag = preg_match("/^$tag/s", $text, $matches); |
| 82 | + if ($foundTag) { |
| 83 | + $tag = $matches[0]; |
| 84 | + if (isset($matches[1])) { |
| 85 | + $replaceStr = $matches[1]; |
| 86 | + } |
| 87 | + } |
| 88 | + } else { |
| 89 | + $tag = str_replace("~r", $replaceStr, $tag); |
| 90 | + $foundTag = strncmp($tag, $text, strlen($tag)) == 0; |
| 91 | + } |
| 92 | + } |
| 93 | + if ($foundTag) { |
| 94 | + $newText = $text; |
| 95 | + $newElement = $dom->createElement($rule->getAttribute("tagName")); |
| 96 | + if ($tag != NULL) { |
| 97 | + $newText = substr($newText, strlen($tag)); |
| 98 | + $newElement->setAttribute("tag", $tag); |
| 99 | + } |
| 100 | + $retCode = $rule->firstChild == NULL || $this->parseRec($rule->firstChild, $replaceStr, $saveTags, $iter, $newText, $newElement); |
| 101 | + if ($retCode) { |
| 102 | + $outNode->appendChild($newElement); |
| 103 | + $text = $newText; |
| 104 | + } |
| 105 | + } |
| 106 | + } elseif ($rule->nodeName == "Sequence") { |
| 107 | + $saveText = $text; |
| 108 | + $saveNode = $outNode->cloneNode(TRUE); |
| 109 | + $pushInd = $rule->getAttribute("pushInd"); |
| 110 | + foreach ($rule->childNodes as $i => $crrnt) { |
| 111 | + $pushTags = $i >= $pushInd ? $saveTags : ""; |
| 112 | + $retCode = $this->parseRec($crrnt, $replaceStr, $pushTags, $iter, $text, $outNode); |
| 113 | + if (! $retCode) { |
| 114 | + $text = $saveText; |
| 115 | + $outNode = $saveNode; |
| 116 | + break; |
| 117 | + } |
| 118 | + } |
| 119 | + } elseif ($rule->nodeName == "Choice") { |
| 120 | + foreach ($rule->childNodes as $crrnt) { |
| 121 | + $retCode = $this->parseRec($crrnt, $replaceStr, $saveTags, $iter, $text, $outNode); |
| 122 | + if ($retCode) { |
| 123 | + break; |
| 124 | + } |
| 125 | + } |
| 126 | + $retCode |= $rule->getAttribute("failSafe") != NULL; |
| 127 | + } elseif ($rule->nodeName == "Reference") { |
| 128 | + $newVar = $rule->hasAttribute("var") ? str_replace("~r", $replaceStr, $rule->getAttribute("var")) : $replaceStr; |
| 129 | + $xpath = new DOMXPath($rule->ownerDocument); |
| 130 | + $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0); |
| 131 | + $retCode = $this->parseRec($refRule, $newVar, $saveTags, $iter, $text, $outNode); |
| 132 | + } elseif ($rule->nodeName == "Text") { |
| 133 | + $tagSearch = $rule->getAttribute("childTags"); |
| 134 | + if ($tagSearch == "") { |
| 135 | + $tagSearch = $saveTags; |
| 136 | + } elseif ($saveTags != "") { |
| 137 | + $tagSearch .= "|" . $saveTags; |
| 138 | + } |
| 139 | + while ($text != "" && ($saveTags == "" || ! preg_match("/^($saveTags)/s", $text))) { |
| 140 | + $offset = $rule->firstChild != NULL && $this->parseRec($rule->firstChild, $replaceStr, "", $iter, $text, $outNode) ? 0 : 1; |
| 141 | + if (preg_match("/$tagSearch/s", $text, $matches, PREG_OFFSET_CAPTURE, $offset)) { |
| 142 | + if ($matches[0][1] > 0) { |
| 143 | + $outNode->appendChild($dom->createTextNode(substr($text, 0, $matches[0][1]))); |
| 144 | + $text = substr($text, $matches[0][1]); |
| 145 | + } |
| 146 | + } else { |
| 147 | + $outNode->appendChild($dom->createTextNode($text)); |
| 148 | + $text = ""; |
| 149 | + } |
| 150 | + } |
| 151 | + $retCode = true; |
| 152 | + } |
| 153 | + wfDebugLog("ParseEngine", "Exiting {$rule->nodeName}, Return Code - $retCode\n"); |
| 154 | + wfDebugLog("ParseEngine", "Text - $text\n"); |
| 155 | + return $retCode; |
| 156 | + } |
| 157 | + |
| 158 | + private function pushTags($rule, $tagStr) { |
| 159 | + if ($rule->nodeName == "Sequence") { |
| 160 | + $pushInd = $rule->childNodes->length - 1; |
| 161 | + $shouldPush = true; |
| 162 | + for ($child = $rule->lastChild; $child != NULL; $child = $child->previousSibling) { |
| 163 | + $this->pushTags($child, $tagStr); |
| 164 | + if ($child->previousSibling != NULL) { |
| 165 | + if ($this->pullTags($child, $iter, $childTag)) { |
| 166 | + if ($shouldPush) { |
| 167 | + $pushInd --; |
| 168 | + } |
| 169 | + if ($tagStr == "") { |
| 170 | + $tagStr = $childTag; |
| 171 | + } elseif ($childTag != "") { |
| 172 | + $tagStr .= "|" . $childTag; |
| 173 | + } |
| 174 | + } else { |
| 175 | + $shouldPush = false; |
| 176 | + $tagStr = $childTag; |
| 177 | + } |
| 178 | + } |
| 179 | + } |
| 180 | + $rule->setAttribute("pushInd", $pushInd); |
| 181 | + } else { |
| 182 | + if ($rule->nodeName != "Choice") { |
| 183 | + $rule->setAttribute("saveTags", $tagStr); |
| 184 | + $tagStr = NULL; |
| 185 | + if ($rule->nodeName == "Text") { |
| 186 | + $childTags = ""; |
| 187 | + foreach ($rule->childNodes as $crrnt) { |
| 188 | + if ($childTags != "") { |
| 189 | + $childTags .= "|"; |
| 190 | + } |
| 191 | + $this->pullTags($crrnt, $iter, $childTag); |
| 192 | + $childTags .= $childTag; |
| 193 | + } |
| 194 | + $rule->setAttribute("childTags", $childTags); |
| 195 | + } |
| 196 | + } |
| 197 | + foreach ($rule->childNodes as $crrnt) { |
| 198 | + $this->pushTags($crrnt, $tagStr); |
| 199 | + } |
| 200 | + } |
| 201 | + } |
| 202 | + |
| 203 | + private function pullTags($rule, &$iter, &$childTags) { |
| 204 | + $iter ++; |
| 205 | + if ($iter > ParseEngine::maxIter) { |
| 206 | + throw new MWException("Collecter iterated too many times. Probable loop in grammar."); |
| 207 | + } |
| 208 | + $childTags = ""; |
| 209 | + $failSafe = TRUE; |
| 210 | + if ($rule->nodeName == "Assignment") { |
| 211 | + $childTags = $rule->getAttribute("tag"); |
| 212 | + if ($rule->getAttribute("regex") == NULL) { |
| 213 | + $childTags = preg_quote($childTags, "/"); |
| 214 | + } |
| 215 | + $failSafe = FALSE; |
| 216 | + } elseif ($rule->nodeName == "Choice" || $rule->nodeName == "Sequence") { |
| 217 | + $failSafe = $rule->nodeName == "Sequence"; |
| 218 | + foreach ($rule->childNodes as $child) { |
| 219 | + $failSafe = $this->pullTags($child, $iter, $newTags); |
| 220 | + if ($childTags == "") { |
| 221 | + $childTags = $newTags; |
| 222 | + } elseif ($newTags != "") { |
| 223 | + $childTags .= "|" . $newTags; |
| 224 | + } |
| 225 | + if (($failSafe && $rule->nodeName == "Choice") || (! $failSafe && $rule->nodeName == "Sequence")) { |
| 226 | + break; |
| 227 | + } |
| 228 | + } |
| 229 | + $failSafe |= $rule->nodeName == "Choice" && $rule->getAttribute("failSafe") != NULL; |
| 230 | + } elseif ($rule->nodeName == "Reference") { |
| 231 | + $xpath = new DOMXPath($rule->ownerDocument); |
| 232 | + $refRule = $xpath->query("/Grammar/*[@name='{$rule->getAttribute("name")}']")->item(0); |
| 233 | + $failSafe = $this->pullTags($refRule, $iter, $childTags); |
| 234 | + } |
| 235 | + return $failSafe; |
| 236 | + } |
| 237 | +} |
| 238 | + |
Property changes on: trunk/extensions/ParserWiki/ParserEngine.php |
___________________________________________________________________ |
Name: svn:eol-style |
1 | 239 | + native |
Index: trunk/extensions/ParserWiki/parserTests.txt |
— | — | @@ -0,0 +1,37 @@ |
| 2 | +# Tests for the ParseEngine using maintenance/parserTests.php
|
| 3 | +# Please make sure that the input parser XML is an exact duplicate of the test result
|
| 4 | +
|
| 5 | +!!article
|
| 6 | +Grammar:WikiGrammar1_0
|
| 7 | +!!text
|
| 8 | +<grammar><tag tag=";; grammar for WikiGrammar v1.0 ;; only one parseObject at base level for the start rule / root node <"/><parseObject><assignment tag="grammar"><tag tag=" ["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="parseObject"/></parseObject><tag tag=" ->"/><parseObject><reference tag="ruleList"/></parseObject><tag tag=" ->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern/><tag tag="/"/></reference></parseObject></sequence></parseObject><tag tag="]"/></assignment></parseObject><tag tag=">"/><tag tag=" ;; ignore whitespace, comments lines starting in # and tags "/><rule tag="tag"><tag tag=" ="/><tag tag=" <"/><parseObject><assignment tag="tag"><tag tag=" /"/><pattern tag="(?:\s+|;;[^\n]*)*~r"/><tag tag="/"/></assignment></parseObject><tag tag=">"/></rule><tag tag=" "/><rule tag="pattern"><tag tag=" ="/><tag tag=" ["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="\/"/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="pattern"><tag tag=" /"/><pattern tag="(?:[^\\\/]|\\.)*"/><tag tag="/"/></assignment></parseObject><tag tag=">"/><tag tag=" <"/><parseObject><assignment tag="tag"><tag tag=" /"/><pattern tag="\/"/><tag tag="/"/></assignment></parseObject><tag tag=">"/></sequence></parseObject><tag tag="]"/></rule><tag tag=" ;; This works a lot like polymorphism -> new feature "/><rule tag="parseObject"><tag tag=" ="/><tag tag=" ("/><parseObject><choice><tag tag=" 	["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="->"/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="parseObject"><tag tag=" <"/><parseObject><assignment tag="reference"><tag tag=" /"/><pattern tag="\w+"/><tag tag="/"/><tag tag=" ("/><parseObject><choice tag="?"><tag tag="["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="="/><tag tag="/"/></reference></parseObject><tag tag=" ->"/><parseObject><reference tag="pattern"/></parseObject></sequence></parseObject><tag tag="]"/></choice></parseObject><tag tag=")"/></assignment></parseObject><tag tag=">"/></assignment></parseObject><tag tag=">"/></sequence></parseObject><tag tag="]"/><tag tag=" 	["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="\("/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="parseObject"><tag tag=" <"/><parseObject><assignment tag="choice"><tag tag=" /"/><pattern tag="\??"/><tag tag="/"/><tag tag=" ->"/><parseObject><reference tag="objectList"/></parseObject></assignment></parseObject><tag tag=">"/></assignment></parseObject><tag tag=">"/><tag tag=" ->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="\)"/><tag tag="/"/></reference></parseObject></sequence></parseObject><tag tag="]"/><tag tag=" 	["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="\["/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="parseObject"><tag tag=" <"/><parseObject><assignment tag="sequence"><tag tag=" ->"/><parseObject><reference tag="objectList"/></parseObject></assignment></parseObject><tag tag=">"/></assignment></parseObject><tag tag=">"/><tag tag=" ->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="\]"/><tag tag="/"/></reference></parseObject></sequence></parseObject><tag tag="]"/><tag tag=" 	["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="$"/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="parseObject"><tag tag=" <"/><parseObject><assignment tag="text"><tag tag=" ("/><parseObject><choice tag="?"><tag tag="->"/><parseObject><reference tag="parseObject"/></parseObject></choice></parseObject><tag tag=")"/></assignment></parseObject><tag tag=">"/></assignment></parseObject><tag tag=">"/><tag tag=" ->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="$"/><tag tag="/"/></reference></parseObject></sequence></parseObject><tag tag="]"/><tag tag=" 	["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="<"/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="parseObject"><tag tag=" <"/><parseObject><assignment tag="assignment"><tag tag=" /"/><pattern tag="\w+"/><tag tag="/"/><tag tag=" ["/><parseObject><sequence><tag tag=" 		("/><parseObject><choice tag="?"><tag tag="->"/><parseObject><reference tag="pattern"/></parseObject></choice></parseObject><tag tag=")"/><tag tag=" 		("/><parseObject><choice tag="?"><tag tag="->"/><parseObject><reference tag="parseObject"/></parseObject></choice></parseObject><tag tag=")"/></sequence></parseObject><tag tag=" 	]"/></assignment></parseObject><tag tag=">"/></assignment></parseObject><tag tag=">"/><tag tag=" ->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag=">"/><tag tag="/"/></reference></parseObject></sequence></parseObject><tag tag="]"/></choice></parseObject><tag tag=" )"/></rule><tag tag=" ;; Lists work a lot like regEx +'s -> syntactic sugar "/><rule tag="objectList"><tag tag=" ="/><tag tag=" ("/><parseObject><choice tag="?"><tag tag="["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="parseObject"/></parseObject><tag tag=" ->"/><parseObject><reference tag="objectList"/></parseObject></sequence></parseObject><tag tag="]"/></choice></parseObject><tag tag=")"/></rule><tag tag=" "/><rule tag="ruleList"><tag tag=" ="/><tag tag=" ("/><parseObject><choice tag="?"><tag tag="["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="rule"><tag tag=" /"/><pattern tag="\w+"/><tag tag="/"/><tag tag=" ["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="="/><tag tag="/"/></reference></parseObject><tag tag=" ->"/><parseObject><reference tag="parseObject"/></parseObject></sequence></parseObject><tag tag="]"/></assignment></parseObject><tag tag=">"/><tag tag=" ->"/><parseObject><reference tag="ruleList"/></parseObject></sequence></parseObject><tag tag="]"/></choice></parseObject><tag tag=")"/></rule><tag/></grammar>
|
| 9 | +!!endarticle
|
| 10 | +
|
| 11 | +!! test
|
| 12 | +Generic Test
|
| 13 | +!! input
|
| 14 | +;; grammar for WikiGrammar v1.0
|
| 15 | +;; only one parseObject at base level for the start rule / root node
|
| 16 | +<grammar [->parseObject ->ruleList ->tag=//]>
|
| 17 | +;; ignore whitespace, comments lines starting in # and tags
|
| 18 | +tag = <tag /(?:\s+|;;[^\n]*)*~r/>
|
| 19 | +pattern = [->tag=/\// <pattern /(?:[^\\\/]|\\.)*/> <tag /\//>]
|
| 20 | +;; This works a lot like polymorphism -> new feature
|
| 21 | +parseObject = (
|
| 22 | + [->tag=/->/ <parseObject <reference /\w+/ (?[->tag=/=/ ->pattern])>>]
|
| 23 | + [->tag=/\(/ <parseObject <choice /\??/ ->objectList>> ->tag=/\)/]
|
| 24 | + [->tag=/\[/ <parseObject <sequence ->objectList>> ->tag=/\]/]
|
| 25 | + [->tag=/$/ <parseObject <text (?->parseObject)>> ->tag=/$/]
|
| 26 | + [->tag=/</ <parseObject <assignment /\w+/ [
|
| 27 | + (?->pattern)
|
| 28 | + (?->parseObject)
|
| 29 | + ]>> ->tag=/>/]
|
| 30 | +)
|
| 31 | +;; Lists work a lot like regEx +'s -> syntactic sugar
|
| 32 | +objectList = (?[->parseObject ->objectList])
|
| 33 | +ruleList = (?[->tag=// <rule /\w+/ [->tag=/=/ ->parseObject]> ->ruleList])
|
| 34 | +!! result
|
| 35 | +<grammar><tag tag=";; grammar for WikiGrammar v1.0 ;; only one parseObject at base level for the start rule / root node <"/><parseObject><assignment tag="grammar"><tag tag=" ["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="parseObject"/></parseObject><tag tag=" ->"/><parseObject><reference tag="ruleList"/></parseObject><tag tag=" ->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern/><tag tag="/"/></reference></parseObject></sequence></parseObject><tag tag="]"/></assignment></parseObject><tag tag=">"/><tag tag=" ;; ignore whitespace, comments lines starting in # and tags "/><rule tag="tag"><tag tag=" ="/><tag tag=" <"/><parseObject><assignment tag="tag"><tag tag=" /"/><pattern tag="(?:\s+|;;[^\n]*)*~r"/><tag tag="/"/></assignment></parseObject><tag tag=">"/></rule><tag tag=" "/><rule tag="pattern"><tag tag=" ="/><tag tag=" ["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="\/"/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="pattern"><tag tag=" /"/><pattern tag="(?:[^\\\/]|\\.)*"/><tag tag="/"/></assignment></parseObject><tag tag=">"/><tag tag=" <"/><parseObject><assignment tag="tag"><tag tag=" /"/><pattern tag="\/"/><tag tag="/"/></assignment></parseObject><tag tag=">"/></sequence></parseObject><tag tag="]"/></rule><tag tag=" ;; This works a lot like polymorphism -> new feature "/><rule tag="parseObject"><tag tag=" ="/><tag tag=" ("/><parseObject><choice><tag tag=" 	["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="->"/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="parseObject"><tag tag=" <"/><parseObject><assignment tag="reference"><tag tag=" /"/><pattern tag="\w+"/><tag tag="/"/><tag tag=" ("/><parseObject><choice tag="?"><tag tag="["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="="/><tag tag="/"/></reference></parseObject><tag tag=" ->"/><parseObject><reference tag="pattern"/></parseObject></sequence></parseObject><tag tag="]"/></choice></parseObject><tag tag=")"/></assignment></parseObject><tag tag=">"/></assignment></parseObject><tag tag=">"/></sequence></parseObject><tag tag="]"/><tag tag=" 	["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="\("/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="parseObject"><tag tag=" <"/><parseObject><assignment tag="choice"><tag tag=" /"/><pattern tag="\??"/><tag tag="/"/><tag tag=" ->"/><parseObject><reference tag="objectList"/></parseObject></assignment></parseObject><tag tag=">"/></assignment></parseObject><tag tag=">"/><tag tag=" ->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="\)"/><tag tag="/"/></reference></parseObject></sequence></parseObject><tag tag="]"/><tag tag=" 	["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="\["/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="parseObject"><tag tag=" <"/><parseObject><assignment tag="sequence"><tag tag=" ->"/><parseObject><reference tag="objectList"/></parseObject></assignment></parseObject><tag tag=">"/></assignment></parseObject><tag tag=">"/><tag tag=" ->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="\]"/><tag tag="/"/></reference></parseObject></sequence></parseObject><tag tag="]"/><tag tag=" 	["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="$"/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="parseObject"><tag tag=" <"/><parseObject><assignment tag="text"><tag tag=" ("/><parseObject><choice tag="?"><tag tag="->"/><parseObject><reference tag="parseObject"/></parseObject></choice></parseObject><tag tag=")"/></assignment></parseObject><tag tag=">"/></assignment></parseObject><tag tag=">"/><tag tag=" ->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="$"/><tag tag="/"/></reference></parseObject></sequence></parseObject><tag tag="]"/><tag tag=" 	["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="<"/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="parseObject"><tag tag=" <"/><parseObject><assignment tag="assignment"><tag tag=" /"/><pattern tag="\w+"/><tag tag="/"/><tag tag=" ["/><parseObject><sequence><tag tag=" 		("/><parseObject><choice tag="?"><tag tag="->"/><parseObject><reference tag="pattern"/></parseObject></choice></parseObject><tag tag=")"/><tag tag=" 		("/><parseObject><choice tag="?"><tag tag="->"/><parseObject><reference tag="parseObject"/></parseObject></choice></parseObject><tag tag=")"/></sequence></parseObject><tag tag=" 	]"/></assignment></parseObject><tag tag=">"/></assignment></parseObject><tag tag=">"/><tag tag=" ->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag=">"/><tag tag="/"/></reference></parseObject></sequence></parseObject><tag tag="]"/></choice></parseObject><tag tag=" )"/></rule><tag tag=" ;; Lists work a lot like regEx +'s -> syntactic sugar "/><rule tag="objectList"><tag tag=" ="/><tag tag=" ("/><parseObject><choice tag="?"><tag tag="["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="parseObject"/></parseObject><tag tag=" ->"/><parseObject><reference tag="objectList"/></parseObject></sequence></parseObject><tag tag="]"/></choice></parseObject><tag tag=")"/></rule><tag tag=" "/><rule tag="ruleList"><tag tag=" ="/><tag tag=" ("/><parseObject><choice tag="?"><tag tag="["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern/><tag tag="/"/></reference></parseObject><tag tag=" <"/><parseObject><assignment tag="rule"><tag tag=" /"/><pattern tag="\w+"/><tag tag="/"/><tag tag=" ["/><parseObject><sequence><tag tag="->"/><parseObject><reference tag="tag"><tag tag="="/><tag tag="/"/><pattern tag="="/><tag tag="/"/></reference></parseObject><tag tag=" ->"/><parseObject><reference tag="parseObject"/></parseObject></sequence></parseObject><tag tag="]"/></assignment></parseObject><tag tag=">"/><tag tag=" ->"/><parseObject><reference tag="ruleList"/></parseObject></sequence></parseObject><tag tag="]"/></choice></parseObject><tag tag=")"/></rule><tag/></grammar>
|
| 36 | +
|
| 37 | +!! end
|
| 38 | +
|