r23207 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r23206‎ | r23207 | r23208 >
Date:11:21, 22 June 2007
Author:mkroetzsch
Status:old
Tags:
Comment:
extended query parser: most query functions now supported
Modified paths:
  • /trunk/extensions/SemanticMediaWiki/includes/SMW_QueryProcessor.php (modified) (history)

Diff [purge]

Index: trunk/extensions/SemanticMediaWiki/includes/SMW_QueryProcessor.php
@@ -11,6 +11,29 @@
1212 require_once($smwgIP . '/includes/SMW_QueryPrinters.php');
1313
1414 /**
 15+ * This hook registers a parser-hook to the current parser.
 16+ * Note that parser hooks are something different than MW hooks
 17+ * in general, which explains the two-level registration.
 18+ */
 19+function smwfRegisterInlineQueries( $semantic, $mediawiki, $rules ) {
 20+ global $wgParser;
 21+ $wgParser->setHook( 'ask', 'smwfProcessInlineQuery' );
 22+ return true; // always return true, in order not to stop MW's hook processing!
 23+}
 24+
 25+/**
 26+ * The <ask> parser hook processing part.
 27+ */
 28+function smwfProcessInlineQuery($text, $param) {
 29+ global $smwgIQEnabled;
 30+ if ($smwgIQEnabled) {
 31+ return SMWQueryProcessor::getResultHTML($text,$param);
 32+ } else {
 33+ return wfMsgForContent('smw_iq_disabled');
 34+ }
 35+}
 36+
 37+/**
1538 * Static class for accessing functions to generate and execute semantic queries
1639 * and to serialise their results.
1740 */
@@ -36,14 +59,21 @@
3760 // parse query:
3861 $qp = new SMWQueryParser();
3962 $desc = $qp->getQueryDescription($querystring);
40 - /// TODO check for errors
 63+ if ($desc === NULL) { //abort with failure
 64+ return $qp->getError();
 65+ }
4166
42 - ///TODO do this only when wanted, use given label:
43 - $desc->addPrintRequest(new SMWPrintrequest(SMW_PRINT_THIS, 'Mainlabel'));
 67+ if (array_key_exists('mainlabel', $params)) {
 68+ $mainlabel = $params['mainlabel'] . $qp->getLabel();
 69+ } else {
 70+ $mainlabel = $qp->getLabel();
 71+ }
 72+ ///TODO do this only when wanted:
 73+ $desc->prependPrintRequest(new SMWPrintRequest(SMW_PRINT_THIS, $mainlabel));
4474
4575 $query = new SMWQuery($desc);
4676
47 - print '### Query:' . $desc->getQueryString() . ' ###';
 77+ //print '### Query:' . htmlspecialchars($desc->getQueryString()) . ' ###'; // DEBUG
4878
4979 // set query parameters:
5080 global $smwgIQMaxLimit, $smwgIQMaxInlineLimit;
@@ -87,7 +117,7 @@
88118 $printer = SMWQueryProcessor::getResultPrinter($format, $inline, $res);
89119 return $printer->getResultHTML($res, $params);
90120 } else { // error string: return escaped version
91 - return htmlspecialchars($query);
 121+ return htmlspecialchars($query); ///TODO: improve error reporting format ...
92122 }
93123 }
94124
@@ -95,6 +125,7 @@
96126 * Determine format label from parameters.
97127 */
98128 static protected function getResultFormat($params) {
 129+ $format = 'auto';
99130 if (array_key_exists('format', $params)) {
100131 $format = strtolower($params['format']);
101132 if ( !in_array($format,SMWQueryProcessor::$formats) ) {
@@ -119,11 +150,11 @@
120151 case 'ul': case 'ol': case 'list':
121152 return new SMWListResultPrinter($format,$inline);
122153 case 'timeline': case 'eventline':
123 - return new SMWListResultPrinter($format,$inline); //TODO
 154+ return new SMWTimelineResultPrinter($format,$inline);
124155 case 'embedded':
125 - return new SMWListResultPrinter($format,$inline); //TODO
 156+ return new SMWEmbeddedResultPrinter($format,$inline);
126157 case 'template':
127 - return new SMWListResultPrinter($format,$inline); //TODO
 158+ return new SMWTemplateResultPrinter($format,$inline);
128159 default: return new SMWListResultPrinter($format,$inline);
129160 }
130161 }
@@ -141,6 +172,7 @@
142173 protected $m_sepstack; // list of open blocks ("parentheses") that need closing at current step
143174 protected $m_curstring; // remaining string to be parsed (parsing eats query string from the front)
144175 protected $m_error; // false if all went right, string otherwise
 176+ protected $m_label; //label of the main query result
145177
146178 protected $m_categoryprefix; // cache label of category namespace . ':'
147179
@@ -154,71 +186,78 @@
155187 * false if there were errors.
156188 */
157189 public function getQueryDescription($querystring) {
 190+ $this->m_error = false;
 191+ $this->m_label = '';
158192 $this->m_curstring = $querystring;
159193 $this->m_sepstack = array();
160194 return $this->getSubqueryDescription();
161195 }
162196
163197 /**
 198+ * Return error message or false if no error occurred.
 199+ */
 200+ public function getError() {
 201+ return $this->m_error;
 202+ }
 203+
 204+ /**
 205+ * Return label for the results of this query (which
 206+ * might be empty if no such information was passed).
 207+ */
 208+ public function getLabel() {
 209+ return $this->m_label;
 210+ }
 211+
 212+
 213+ /**
164214 * Compute an SMWDescription for current part of a query, which should
165215 * be a standalone query (the main query or a subquery enclosed within
166 - * "<q>...</q>". Recursively calls similar methods and returns false upon error.
 216+ * "<q>...</q>". Recursively calls similar methods and returns NULL upon error.
167217 */
168218 protected function getSubqueryDescription() {
169219 $result = NULL;
170 - while (($chunk = $this->readChunk()) != '') {
 220+ $printrequests = array();
 221+ $continue = ($chunk = $this->readChunk()) != '';
 222+ while ($continue) {
171223 switch ($chunk) {
172224 case '[[': // start new link block
173 - $this->pushDelimiter(']]'); // expected termination symbol
174 - $result = $this->addDescription($result,$this->getLinkDescription());
 225+ $ld = $this->getLinkDescription($printrequests);
 226+ if ($ld === NULL) {
 227+ return NULL;
 228+ } elseif ($ld instanceof SMWPrintRequest) {
 229+ $printrequests[] = $ld;
 230+ } else {
 231+ $result = $this->addDescription($result,$ld);
 232+ }
175233 break;
176234 case '</q>': // exit current subquery
177235 if ($this->popDelimiter('</q>')) {
178 - //TODO: return computed description
 236+ $continue = false; // leave the loop
179237 } else {
180238 $this->m_error = 'There appear to be too many occurences of \'' . $chunk . '\' in the query.';
181 - return false;
 239+ return NULL;
182240 }
183241 break;
184242 default: // error: unexpected $chunk
185243 $this->m_error = 'The part \'' . $chunk . '\' in the query was not understood. Results might not be as expected.'; // TODO: internationalise
186 - return false;
 244+ return NULL;
187245 }
 246+ $continue = ($continue) && ( ($chunk = $this->readChunk()) != '' );
188247 }
 248+
 249+ if ($result !== NULL) {
 250+ foreach ($printrequests as $pr) {
 251+ $result->addPrintRequest($pr);
 252+ }
 253+ }
189254 return $result;
 255+ }
190256
191 - /// TODO implement
192 - // DEBUG:
193 -// $o_desc = new SMWNominalDescription(Title::newFromText("Africa"));
194 -// $value = SMWDataValueFactory::newAttributeValue('Population','5853000');
195 -// $t_desc = new SMWThingDescription();
196 -// $v_desc = new SMWValueDescription($value, SMW_CMP_GEQ);
197 -// $a_desc = new SMWSomeAttribute(Title::newFromText('Attribute:Population'), $v_desc);
198 -// $r_desc = new SMWSomeRelation(Title::newFromText("Relation:located in"), $o_desc);
199 -// $r_desc2 = new SMWSomeRelation(Title::newFromText("Relation:borders"), $r_desc);
200 -// $r_desc3 = new SMWSomeRelation(Title::newFromText("Relation:located in"), $t_desc);
201 -// $c_desc = new SMWClassDescription(Title::newFromText("Category:Country"));
202 -// $desc = new SMWConjunction(array($c_desc, $r_desc));
203 -// $desc2 = new SMWConjunction(array($c_desc, $a_desc, $r_desc2, $r_desc));
204 -// $pr1 = new SMWPrintrequest(SMW_PRINT_THIS, 'Country');
205 -// $desc->addPrintRequest($pr1);
206 -// $desc2->addPrintRequest($pr1);
207 -// $pr2 = new SMWPrintrequest(SMW_PRINT_RELS, 'Borders', Title::newFromText('Relation:Borders'));
208 -// $desc->addPrintRequest($pr2);
209 -// $desc2->addPrintRequest($pr2);
210 -// $pr3 = new SMWPrintrequest(SMW_PRINT_ATTS, 'Population', Title::newFromText('Attribute:Population'));
211 -// $desc->addPrintRequest($pr3);
212 -// $desc2->addPrintRequest($pr3);
213 -// $pr4 = new SMWPrintrequest(SMW_PRINT_CATS, 'Categories');
214 -// $desc->addPrintRequest($pr4);
215 -//
216 -// return $desc2;
217 - }
218 -
219257 /**
220258 * Compute an SMWDescription for current part of a query, which should
221 - * be the content of "[[ ... ]]". Recursively calls similar methods and
222 - * returns false upon error.
 259+ * be the content of "[[ ... ]]". Alternatively, if the current syntax
 260+ * specifies a print request, return the print request object.
 261+ * Returns NULL upon error.
223262 */
224263 protected function getLinkDescription() {
225264 $result = NULL;
@@ -233,10 +272,29 @@
234273 while ($continue) {
235274 $chunk = $this->readChunk();
236275 switch ($chunk) {
237 - case '+': //wildcard
238 - break;
239276 case '*': //print statement
 277+ $chunk = $this->readChunk('\]\]|\|');
 278+ if ($chunk == '|') {
 279+ $label = $this->readChunk('\]\]');
 280+ if ($label != ']]') {
 281+ $chunk = $this->readChunk('\]\]');
 282+ } else {
 283+ $label = '';
 284+ $chunk = ']]';
 285+ }
 286+ } else {
 287+ global $wgContLang;
 288+ $label = $wgContLang->getNSText(NS_CATEGORY);
 289+ }
 290+ if ($chunk == ']]') {
 291+ return new SMWPrintRequest(SMW_PRINT_CATS, $label);
 292+ } else {
 293+ $this->m_error = 'Misshaped print statement.'; //TODO: internationalise
 294+ return NULL;
 295+ }
240296 break;
 297+ case '+': //wildcard, ignore for categories (semantically meaningless)
 298+ break;
241299 default: //assume category title
242300 $cat = Title::newFromText($chunk, NS_CATEGORY);
243301 if ($cat !== NULL) {
@@ -250,23 +308,186 @@
251309 $continue = false;
252310 }
253311 }
254 - } else { // fixed subject, property query, or subquery
255 -
 312+ } else { // fixed subject, namespace restriction, property query, or subquery
 313+ $sep = $this->readChunk();
 314+ if ($sep == '::') { // relation statement
 315+ $rel = Title::newFromText($chunk, SMW_NS_RELATION);
 316+ $continue = true;
 317+ $innerdesc = NULL;
 318+ while ($continue) {
 319+ $chunk = $this->readChunk();
 320+ switch ($chunk) {
 321+ case '*': // print statement, abort processing
 322+ $chunk = $this->readChunk('\]\]|\|');
 323+ if ($chunk == '|') {
 324+ $label = $this->readChunk('\]\]');
 325+ if ($label != ']]') {
 326+ $chunk = $this->readChunk('\]\]');
 327+ } else {
 328+ $label = '';
 329+ $chunk = ']]';
 330+ }
 331+ } else {
 332+ $label = $rel->getText();
 333+ }
 334+ if ($chunk == ']]') {
 335+ return new SMWPrintRequest(SMW_PRINT_RELS, $label, $rel);
 336+ } else {
 337+ $this->m_error = 'Misshaped print statement.'; //TODO: internationalise
 338+ return NULL;
 339+ }
 340+ break;
 341+ case '+': // wildcard
 342+ $innerdesc = $this->addDescription($innerdesc, new SMWThingDescription(), false);
 343+ break;
 344+ case '<q>': // subquery
 345+ $this->pushDelimiter('</q>');
 346+ $innerdesc = $this->addDescription($innerdesc, $this->getSubqueryDescription(), false);
 347+ break;
 348+ default: //normal object value
 349+ $obj = Title::newFromText($chunk);
 350+ if ($obj !== NULL) {
 351+ $innerdesc = $this->addDescription($innerdesc, new SMWNominalDescription($obj), false);
 352+ }
 353+ }
 354+ $chunk = $this->readChunk();
 355+ $continue = ($chunk == '||');
 356+ }
 357+ if ($innerdesc !== NULL) {
 358+ $result = new SMWSomeRelation($rel,$innerdesc);
 359+ }
 360+ } elseif ($sep == ':=') { // attribute statement
 361+ $att = Title::newFromText($chunk, SMW_NS_ATTRIBUTE);
 362+ ///TODO: currently no support for disjunctions in data values (needs extension of query processor)
 363+
 364+ // get values, including values with internal [[...]]
 365+ $open = 1;
 366+ $value = '';
 367+ while ( ($open > 0) && ($chunk != '') ) {
 368+ $chunk = $this->readChunk('\[\[|\]\]|\|');
 369+ switch ($chunk) {
 370+ case '[[': // open new [[ ]]
 371+ $open++;
 372+ break;
 373+ case ']]': // close [[ ]]
 374+ $open--;
 375+ break;
 376+ case '|': // terminates only outermost [[ ]]
 377+ if ($open == 1) {
 378+ $open = 0;
 379+ }
 380+ break;
 381+ }
 382+ if ($open != 0) {
 383+ $value .= $chunk;
 384+ }
 385+ }
 386+ // note that at this point, we already read one more chunk behind the value
 387+ switch ($value) {
 388+ case '*': // print statement
 389+ /// TODO: no support for selecting output unit yet
 390+ if ($chunk == '|') {
 391+ $label = $this->readChunk('\]\]');
 392+ if ($label != ']]') {
 393+ $chunk = $this->readChunk('\]\]');
 394+ } else {
 395+ $label = '';
 396+ $chunk = ']]';
 397+ }
 398+ } else {
 399+ $label = $att->getText();
 400+ }
 401+ if ($chunk == ']]') {
 402+ $dv = SMWDataValueFactory::newAttributeValue($att->getText());
 403+ return new SMWPrintRequest(SMW_PRINT_ATTS, $label, $att, $dv);
 404+ } else {
 405+ $this->m_error = 'Misshaped print statement.'; //TODO: internationalise
 406+ return NULL;
 407+ }
 408+ break;
 409+ case '+': // wildcard
 410+ $vd = new SMWValueDescription(NULL, SMW_CMP_ANY);
 411+ break;
 412+ default: // fixed value, possibly with comparator addons
 413+ // for now, treat comparators only if placed before whole value:
 414+ $list = preg_split('/^(<|>)/',$value, 2, PREG_SPLIT_DELIM_CAPTURE);
 415+ $comparator = SMW_CMP_EQ;
 416+ if (count($list) == 3) { // initial comparator found ($list[1] should be empty)
 417+ switch ($list[1]) {
 418+ case '<':
 419+ $comparator = SMW_CMP_LEQ;
 420+ $value = $list[2];
 421+ break;
 422+ case '>':
 423+ $comparator = SMW_CMP_GEQ;
 424+ $value = $list[2];
 425+ break;
 426+ //default: not possible
 427+ }
 428+ }
 429+ // TODO: needs extension for n-ary values
 430+ $dv = SMWDataValueFactory::newAttributeValue($att->getText(), $value);
 431+ if (!$dv->isValid()) {
 432+ $this->m_error = $dv->getError();
 433+ $vd = new SMWValueDescription(NULL, SMW_CMP_ANY);
 434+ } else {
 435+ $vd = new SMWValueDescription($dv, $comparator);
 436+ }
 437+ }
 438+ $result = new SMWSomeAttribute($att, $vd);
 439+ } else { // Fixed article/namespace restriction. $sep should be ]] or ||
 440+ $continue = true;
 441+ //$innerdesc = NULL;
 442+ while ($continue) {
 443+ switch ($chunk) {
 444+ case '<q>': // subquery
 445+ $this->pushDelimiter('</q>');
 446+ $result = $this->addDescription($result, $this->getSubqueryDescription(), false);
 447+ break;
 448+ default:
 449+ $list = preg_split('/:/', $chunk, 3);
 450+ if ($list[0] == '') {
 451+ $list = array_slice($list, 2);
 452+ }
 453+ if ( (count($list) == 3) && ($list[2] == '+') ) { // namespace restriction
 454+ // TODO
 455+ } else {
 456+ $result = $this->addDescription($result, new SMWNominalDescription(Title::newFromText($chunk)), false);
 457+ }
 458+ }
 459+
 460+ if ($sep !== false) { // resuse prefetched sep
 461+ $chunk = $sep;
 462+ $sep = false;
 463+ } else {
 464+ $chunk = $this->readChunk();
 465+ }
 466+ if ($chunk == '||') {
 467+ $chunk = $this->readChunk();
 468+ $continue = true;
 469+ } else {
 470+ $continue = false;
 471+ }
 472+ }
 473+ }
256474 }
257475
 476+ if ($result === NULL) { // no useful information or concrete error found
 477+ $this->m_error = 'Syntax error in query.'; //TODO internationalise
 478+ return NULL;
 479+ }
 480+
258481 // terminate link (assuming that next chunk was read already)
259482 if ($chunk == '|') { // label, TODO
260 - $chunk = $this->readChunk();
261 - $label = '';
262 - ///TODO: rather have a mode for readChunk that stops only on ']]'
263 - /// (otherwise we kill spaces in the label)
264 - while ( ($chunk != ']]') && ($chunk !== '') ) {
265 - $label .= $chunk;
266 - $chunk = $this->readChunk();
 483+ $label = $this->readChunk('\]\]');
 484+ if ($label != ']]') {
 485+ $chunk = $this->readChunk('\]\]');
 486+ } else {
 487+ $label = '';
 488+ $chunk = ']]';
267489 }
268490 }
269491 if ($chunk == ']]') { // expected termination
270 - $this->popDelimiter(']]');
271492 return $result;
272493 } else {
273494 // What happended? We found some chunk that could not be processed as
@@ -277,7 +498,7 @@
278499 } else {
279500 $this->m_error = 'Some use of \'[[\' in your query was not closed by a matching \']]\'.';
280501 }
281 - return false;
 502+ return NULL;
282503 }
283504
284505 return $result;
@@ -293,10 +514,16 @@
294515 * consisting only of spaces are not returned.
295516 * If there is no more qurey string left to process, the empty string is
296517 * returned (and in no other case).
 518+ *
 519+ * The stoppattern can be used to customise the matching, especially in order to
 520+ * overread certain special symbols.
297521 */
298 - protected function readChunk() {
299 - $chunks = preg_split('/[\s]*(\[\[|\]\]|::|:=|<q>|<\/q>|' . $this->m_categoryprefix . '|\|\||\|)[\s]*/', $this->m_curstring, 2, PREG_SPLIT_DELIM_CAPTURE);
300 - if (count($chunks) == 1) { // no mathces anymore, strip spaces and finish
 522+ protected function readChunk($stoppattern = '') {
 523+ if ($stoppattern == '') {
 524+ $stoppattern = '\[\[|\]\]|::|:=|<q>|<\/q>|' . $this->m_categoryprefix . '|\|\||\|';
 525+ }
 526+ $chunks = preg_split('/[\s]*(' . $stoppattern . ')[\s]*/', $this->m_curstring, 2, PREG_SPLIT_DELIM_CAPTURE);
 527+ if (count($chunks) == 1) { // no matches anymore, strip spaces and finish
301528 $this->m_curstring = '';
302529 return trim($chunks[0]);
303530 } elseif (count($chunks) == 3) { // this chould generally happen if count is not 1
@@ -347,6 +574,7 @@
348575 if ( (($conjunction) && ($curdesc instanceof SMWConjunction)) ||
349576 ((!$conjunction) && ($curdesc instanceof SMWDisjunction)) ) { // use existing container
350577 $curdesc->addDescription($newdesc);
 578+ return $curdesc;
351579 } elseif ($conjunction) { // make new conjunction
352580 return new SMWConjunction(array($curdesc,$newdesc));
353581 } else { // make new disjunction

Status & tagging log