r23125 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r23124‎ | r23125 | r23126 >
Date:12:03, 20 June 2007
Author:mkroetzsch
Status:old
Tags:
Comment:
Half-done implementation of query parser ...
Modified paths:
  • /trunk/extensions/SemanticMediaWiki/includes/SMW_QueryProcessor.php (modified) (history)

Diff [purge]

Index: trunk/extensions/SemanticMediaWiki/includes/SMW_QueryProcessor.php
@@ -33,33 +33,18 @@
3434 * as a string. Otherwise an object of type SMWQuery is returned.
3535 */
3636 static public function createQuery($querystring, $params, $inline = true) {
37 - /// TODO implement
38 - // DEBUG:
39 - $o_desc = new SMWNominalDescription(Title::newFromText("Africa"));
40 - $value = SMWDataValue::newAttributeValue('Population','5853000');
41 - $t_desc = new SMWThingDescription();
42 - $v_desc = new SMWValueDescription($value, SMW_CMP_GEQ);
43 - $a_desc = new SMWSomeAttribute(Title::newFromText('Attribute:Population'), $v_desc);
44 - $r_desc = new SMWSomeRelation(Title::newFromText("Relation:located in"), $o_desc);
45 - $r_desc2 = new SMWSomeRelation(Title::newFromText("Relation:borders"), $r_desc);
46 - $r_desc3 = new SMWSomeRelation(Title::newFromText("Relation:located in"), $t_desc);
47 - $c_desc = new SMWClassDescription(Title::newFromText("Category:Country"));
48 - $desc = new SMWConjunction(array($c_desc, $r_desc));
49 - $desc2 = new SMWConjunction(array($c_desc, $a_desc, $r_desc2, $r_desc));
50 - $pr1 = new SMWPrintrequest(SMW_PRINT_THIS, 'Country');
51 - $desc->addPrintRequest($pr1);
52 - $desc2->addPrintRequest($pr1);
53 - $pr2 = new SMWPrintrequest(SMW_PRINT_RELS, 'Borders', Title::newFromText('Relation:Borders'));
54 - $desc->addPrintRequest($pr2);
55 - $desc2->addPrintRequest($pr2);
56 - $pr3 = new SMWPrintrequest(SMW_PRINT_ATTS, 'Population', Title::newFromText('Attribute:Population'));
57 - $desc->addPrintRequest($pr3);
58 - $desc2->addPrintRequest($pr3);
59 - $pr4 = new SMWPrintrequest(SMW_PRINT_CATS, 'Categories');
60 - $desc->addPrintRequest($pr4);
61 - //$query = new SMWQuery($desc);
62 - $query = new SMWQuery($desc2);
 37+ // parse query:
 38+ $qp = new SMWQueryParser();
 39+ $desc = $qp->getQueryDescription($querystring);
 40+ /// TODO check for errors
6341
 42+ ///TODO do this only when wanted, use given label:
 43+ $desc->addPrintRequest(new SMWPrintrequest(SMW_PRINT_THIS, 'Mainlabel'));
 44+
 45+ $query = new SMWQuery($desc);
 46+
 47+ print '### Query:' . $desc->getQueryString() . ' ###';
 48+
6449 // set query parameters:
6550 global $smwgIQMaxLimit, $smwgIQMaxInlineLimit;
6651 if ($inline)
@@ -144,5 +129,231 @@
145130 }
146131
147132 }
 133+
 134+
 135+/**
 136+ * Objects of this class are in charge of parsing a query string in order
 137+ * to create an SMWDescription. The class and methods are not static in order
 138+ * to more cleanly store the intermediate state and progress of the parser.
 139+ */
 140+class SMWQueryParser {
 141+
 142+ protected $m_sepstack; // list of open blocks ("parentheses") that need closing at current step
 143+ protected $m_curstring; // remaining string to be parsed (parsing eats query string from the front)
 144+ protected $m_error; // false if all went right, string otherwise
 145+
 146+ protected $m_categoryprefix; // cache label of category namespace . ':'
 147+
 148+ public function SMWQueryParser() {
 149+ global $wgContLang;
 150+ $this->m_categoryprefix = $wgContLang->getNsText(NS_CATEGORY) . ':';
 151+ }
 152+
 153+ /**
 154+ * Compute an SMWDescription from a query string. Return this description or
 155+ * false if there were errors.
 156+ */
 157+ public function getQueryDescription($querystring) {
 158+ $this->m_curstring = $querystring;
 159+ $this->m_sepstack = array();
 160+ return $this->getSubqueryDescription();
 161+ }
 162+
 163+ /**
 164+ * Compute an SMWDescription for current part of a query, which should
 165+ * be a standalone query (the main query or a subquery enclosed within
 166+ * "<q>...</q>". Recursively calls similar methods and returns false upon error.
 167+ */
 168+ protected function getSubqueryDescription() {
 169+ $result = NULL;
 170+ while (($chunk = $this->readChunk()) != '') {
 171+ switch ($chunk) {
 172+ case '[[': // start new link block
 173+ $this->pushDelimiter(']]'); // expected termination symbol
 174+ $result = $this->addDescription($result,$this->getLinkDescription());
 175+ break;
 176+ case '</q>': // exit current subquery
 177+ if ($this->popDelimiter('</q>')) {
 178+ //TODO: return computed description
 179+ } else {
 180+ $this->m_error = 'There appear to be too many occurences of \'' . $chunk . '\' in the query.';
 181+ return false;
 182+ }
 183+ break;
 184+ default: // error: unexpected $chunk
 185+ $this->m_error = 'The part \'' . $chunk . '\' in the query was not understood. Results might not be as expected.'; // TODO: internationalise
 186+ return false;
 187+ }
 188+ }
 189+ return $result;
 190+
 191+ /// TODO implement
 192+ // DEBUG:
 193+// $o_desc = new SMWNominalDescription(Title::newFromText("Africa"));
 194+// $value = SMWDataValueFactory::newAttributeValue('Population','5853000');
 195+// $t_desc = new SMWThingDescription();
 196+// $v_desc = new SMWValueDescription($value, SMW_CMP_GEQ);
 197+// $a_desc = new SMWSomeAttribute(Title::newFromText('Attribute:Population'), $v_desc);
 198+// $r_desc = new SMWSomeRelation(Title::newFromText("Relation:located in"), $o_desc);
 199+// $r_desc2 = new SMWSomeRelation(Title::newFromText("Relation:borders"), $r_desc);
 200+// $r_desc3 = new SMWSomeRelation(Title::newFromText("Relation:located in"), $t_desc);
 201+// $c_desc = new SMWClassDescription(Title::newFromText("Category:Country"));
 202+// $desc = new SMWConjunction(array($c_desc, $r_desc));
 203+// $desc2 = new SMWConjunction(array($c_desc, $a_desc, $r_desc2, $r_desc));
 204+// $pr1 = new SMWPrintrequest(SMW_PRINT_THIS, 'Country');
 205+// $desc->addPrintRequest($pr1);
 206+// $desc2->addPrintRequest($pr1);
 207+// $pr2 = new SMWPrintrequest(SMW_PRINT_RELS, 'Borders', Title::newFromText('Relation:Borders'));
 208+// $desc->addPrintRequest($pr2);
 209+// $desc2->addPrintRequest($pr2);
 210+// $pr3 = new SMWPrintrequest(SMW_PRINT_ATTS, 'Population', Title::newFromText('Attribute:Population'));
 211+// $desc->addPrintRequest($pr3);
 212+// $desc2->addPrintRequest($pr3);
 213+// $pr4 = new SMWPrintrequest(SMW_PRINT_CATS, 'Categories');
 214+// $desc->addPrintRequest($pr4);
 215+//
 216+// return $desc2;
 217+ }
 218+
 219+ /**
 220+ * Compute an SMWDescription for current part of a query, which should
 221+ * be the content of "[[ ... ]]". Recursively calls similar methods and
 222+ * returns false upon error.
 223+ */
 224+ protected function getLinkDescription() {
 225+ $result = NULL;
 226+ // This method is called when we encountered an opening '[['. The following
 227+ // block could be a Category-statement, fixed object, relation or attribute
 228+ // statements, or according print statements.
 229+ $chunk = $this->readChunk();
 230+
 231+ if ($chunk == $this->m_categoryprefix) { // category statement
 232+ // note: no subqueries allowed here, inline disjunction allowed, wildcards allowed
 233+ $continue = true;
 234+ while ($continue) {
 235+ $chunk = $this->readChunk();
 236+ switch ($chunk) {
 237+ case '+': //wildcard
 238+ break;
 239+ case '*': //print statement
 240+ break;
 241+ default: //assume category title
 242+ $cat = Title::newFromText($chunk, NS_CATEGORY);
 243+ if ($cat !== NULL) {
 244+ $result = $this->addDescription($result, new SMWClassDescription($cat), false);
 245+ }
 246+ }
 247+ $chunk = $this->readChunk();
 248+ if ($chunk == '||') {
 249+ $continue = true;
 250+ } else {
 251+ $continue = false;
 252+ }
 253+ }
 254+ } else { // fixed subject, property query, or subquery
 255+
 256+ }
 257+
 258+ // terminate link (assuming that next chunk was read already)
 259+ if ($chunk == '|') { // label, TODO
 260+ $chunk = $this->readChunk();
 261+ $label = '';
 262+ ///TODO: rather have a mode for readChunk that stops only on ']]'
 263+ /// (otherwise we kill spaces in the label)
 264+ while ( ($chunk != ']]') && ($chunk !== '') ) {
 265+ $label .= $chunk;
 266+ $chunk = $this->readChunk();
 267+ }
 268+ }
 269+ if ($chunk == ']]') { // expected termination
 270+ $this->popDelimiter(']]');
 271+ return $result;
 272+ } else {
 273+ // What happended? We found some chunk that could not be processed as
 274+ // link content (as in [[Category:Test<q>]]) and there was no label to
 275+ // eat it. Or the closing ]] are just missing entirely.
 276+ if ($chunk != '') { //TODO: internationalise errors
 277+ $this->m_error = 'The symbol \'' . $chunk . '\' was used in a place where it is not useful.';
 278+ } else {
 279+ $this->m_error = 'Some use of \'[[\' in your query was not closed by a matching \']]\'.';
 280+ }
 281+ return false;
 282+ }
 283+
 284+ return $result;
 285+ }
 286+
 287+ /**
 288+ * Get the next unstructured string chunk from the query string.
 289+ * Chunks are delimited by any of the special strings used in inline queries
 290+ * (such as [[, ]], <q>, ...). If the string starts with such a delimiter,
 291+ * this delimiter is returned. Otherwise the first string in front of such a
 292+ * delimiter is returned.
 293+ * Trailing and initial spaces are always ignored and chunks
 294+ * consisting only of spaces are not returned.
 295+ * If there is no more qurey string left to process, the empty string is
 296+ * returned (and in no other case).
 297+ */
 298+ protected function readChunk() {
 299+ $chunks = preg_split('/[\s]*(\[\[|\]\]|::|:=|<q>|<\/q>|' . $this->m_categoryprefix . '|\|\||\|)[\s]*/', $this->m_curstring, 2, PREG_SPLIT_DELIM_CAPTURE);
 300+ if (count($chunks) == 1) { // no mathces anymore, strip spaces and finish
 301+ $this->m_curstring = '';
 302+ return trim($chunks[0]);
 303+ } elseif (count($chunks) == 3) { // this chould generally happen if count is not 1
 304+ if ($chunks[0] == '') { // string started with delimiter
 305+ $this->m_curstring = $chunks[2];
 306+ return $chunks[1]; // spaces stripped already
 307+ } else {
 308+ $this->m_curstring = $chunks[1] . $chunks[2];
 309+ return $chunks[0]; // spaces stripped already
 310+ }
 311+ } else { return false; } //should never happen
 312+ }
 313+
 314+ /**
 315+ * Enter a new subblock in the query, which must at some time be terminated by the
 316+ * given $endstring delimiter calling popDelimiter();
 317+ */
 318+ protected function pushDelimiter($endstring) {
 319+ array_push($this->m_sepstack, $endstring);
 320+ }
 321+
 322+ /**
 323+ * Exit a subblock in the query ending with the given delimiter.
 324+ * If the delimiter does not match the top-most open block, false
 325+ * will be returned. Otherwise return true.
 326+ */
 327+ protected function popDelimiter($endstring) {
 328+ $topdelim = array_pop($this->m_sepstack);
 329+ return ($topdelim == $endstring);
 330+ }
 331+
 332+ /**
 333+ * Extend a given description by a new one, either by adding the new description
 334+ * (if the old one is a container description) or by creating a new container.
 335+ * The parameter $conjunction determines whether the combination of both descriptions
 336+ * should be a disjunction or conjunction.
 337+ *
 338+ * In the special case that the current description is NULL, the new one will just
 339+ * replace the current one.
 340+ *
 341+ * The return value is the expected combined description. The object $curdesc will
 342+ * also be changed (if it was non-NULL).
 343+ */
 344+ protected function addDescription($curdesc, $newdesc, $conjunction = true) {
 345+ if ($curdesc === NULL) {
 346+ return $newdesc;
 347+ } else { // we already found descriptions
 348+ if ( (($conjunction) && ($curdesc instanceof SMWConjunction)) ||
 349+ ((!$conjunction) && ($curdesc instanceof SMWDisjunction)) ) { // use existing container
 350+ $curdesc->addDescription($newdesc);
 351+ } elseif ($conjunction) { // make new conjunction
 352+ return new SMWConjunction(array($curdesc,$newdesc));
 353+ } else { // make new disjunction
 354+ return new SMWDisjunction(array($curdesc,$newdesc));
 355+ }
 356+ }
 357+ }
 358+}
148359
149360 ?>
\ No newline at end of file

Status & tagging log