r23316 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r23315‎ | r23316 | r23317 >
Date:10:01, 24 June 2007
Author:mkroetzsch
Status:old
Tags:
Comment:
Enable top-level disjunction and default namespace restrictions
Modified paths:
  • /trunk/extensions/SemanticMediaWiki/includes/SMW_QueryProcessor.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/storage/SMW_Description.php (modified) (history)
  • /trunk/extensions/SemanticMediaWiki/includes/storage/SMW_SQLStore.php (modified) (history)

Diff [purge]

Index: trunk/extensions/SemanticMediaWiki/includes/SMW_QueryProcessor.php
@@ -62,7 +62,7 @@
6363 */
6464 static public function createQuery($querystring, $params, $inline = true, $format = '') {
6565 // This should be the proper way of substituting templates in a safe and comprehensive way:
66 - global $wgTitle;
 66+ global $wgTitle, $smwgIQSearchNamespaces;
6767 $parser = new Parser();
6868 $parserOptions = new ParserOptions();
6969 $parser->startExternalParse( $wgTitle, $parserOptions, OT_HTML );
@@ -70,6 +70,7 @@
7171
7272 // parse query:
7373 $qp = new SMWQueryParser();
 74+ $qp->setDefaultNamespaces($smwgIQSearchNamespaces);
7475 $desc = $qp->getQueryDescription($querystring);
7576 if ($desc === NULL) { //abort with failure
7677 return $qp->getError();
@@ -207,15 +208,30 @@
208209 protected $m_curstring; // remaining string to be parsed (parsing eats query string from the front)
209210 protected $m_error; // false if all went right, string otherwise
210211 protected $m_label; //label of the main query result
 212+ protected $m_defaultns; //description of the default namespace restriction, or NULL if not used
211213
212214 protected $m_categoryprefix; // cache label of category namespace . ':'
213215
214216 public function SMWQueryParser() {
215217 global $wgContLang;
216218 $this->m_categoryprefix = $wgContLang->getNsText(NS_CATEGORY) . ':';
 219+ $this->m_defaultns = NULL;
217220 }
218221
219222 /**
 223+ * Provide an array of namespace constants that are used as default restrictions.
 224+ * If NULL is given, no such default restrictions will be added (faster).
 225+ */
 226+ public function setDefaultNamespaces($nsarray) {
 227+ $this->m_defaultns = NULL;
 228+ if ($nsarray !== NULL) {
 229+ foreach ($nsarray as $ns) {
 230+ $this->m_defaultns = $this->addDescription($this->m_defaultns, new SMWNamespaceDescription($ns), false);
 231+ }
 232+ }
 233+ }
 234+
 235+ /**
220236 * Compute an SMWDescription from a query string. Return this description or
221237 * false if there were errors.
222238 */
@@ -224,7 +240,8 @@
225241 $this->m_label = '';
226242 $this->m_curstring = $querystring;
227243 $this->m_sepstack = array();
228 - return $this->getSubqueryDescription();
 244+ $setNS = true;
 245+ return $this->getSubqueryDescription($setNS);
229246 }
230247
231248 /**
@@ -247,47 +264,112 @@
248265 * Compute an SMWDescription for current part of a query, which should
249266 * be a standalone query (the main query or a subquery enclosed within
250267 * "<q>...</q>". Recursively calls similar methods and returns NULL upon error.
 268+ *
 269+ * The call-by-ref parameter $setNS is a boolean. Its input specifies whether
 270+ * the query should set the current default namespace if no namespace restricitons
 271+ * were given. If false, the super-query is happy to set the required NS-restrictions
 272+ * by itself if needed. Otherwise the subquery itslef has to impose the defaults.
 273+ * This is so, since outermost queries and subqueries of disjunctions will have to set
 274+ * their own default restrictions.
 275+ *
 276+ * The return value of $setNS specifies whether or not the subquery has a namespace
 277+ * specification in place. This might happen automatically if the query string imposes
 278+ * such restrictions. The return value is important for those callers that otherwise
 279+ * set up their own restrictions.
 280+ *
 281+ * Note that $setNS is no means to switch on or off default namespaces in general,
 282+ * but just controls query generation. For general effect, the default namespaces
 283+ * should be set to NULL.
251284 */
252 - protected function getSubqueryDescription() {
253 - $result = NULL;
254 - $printrequests = array();
255 - $continue = ($chunk = $this->readChunk()) != '';
 285+ protected function getSubqueryDescription(&$setNS) {
 286+ $conjunction = NULL; // used for the current inner conjunction
 287+ $disjuncts = array(); // (disjunctive) array of subquery conjunctions
 288+ $printrequests = array(); // the printrequests found for this query level
 289+ $hasNamespaces = false; // does the current $conjnuction have its own namespace restrictions?
 290+ $mustSetNS = $setNS; // must ns restrictions be set? (may become true even if $setNS is false)
 291+
 292+ $continue = ($chunk = $this->readChunk()) != ''; // skip empty subquery completely, thorwing an error
256293 while ($continue) {
 294+ $setsubNS = false;
257295 switch ($chunk) {
258296 case '[[': // start new link block
259 - $ld = $this->getLinkDescription();
 297+ $ld = $this->getLinkDescription($setsubNS);
260298 if ($ld === NULL) {
261299 return NULL;
262300 } elseif ($ld instanceof SMWPrintRequest) {
263301 $printrequests[] = $ld;
264302 } else {
265 - $result = $this->addDescription($result,$ld);
 303+ $conjunction = $this->addDescription($conjunction,$ld);
266304 }
267305 break;
268306 case '<q>': // enter new subquery, currently irrelevant but possible
269307 $this->pushDelimiter('</q>');
270 - $result = $this->addDescription($result, $this->getSubqueryDescription());
 308+ $conjunction = $this->addDescription($conjunction, $this->getSubqueryDescription($setsubNS));
271309 break;
272 - case '</q>': // exit current subquery
273 - if ($this->popDelimiter('</q>')) {
274 - $continue = false; // leave the loop
275 - } else {
276 - $this->m_error = 'There appear to be too many occurences of \'' . $chunk . '\' in the query.';
277 - return NULL;
 310+ case '||': case '': case '</q>': // finish disjunction and maybe subquery
 311+ if ($this->m_defaultns !== NULL) { // possibly add namespace restrictions
 312+ if ( $hasNamespaces && !$mustSetNS) {
 313+ // add ns restrictions to all earlier conjunctions (all of which did not have them yet)
 314+ $mustSetNS = true; // enforce NS restrictions from now on
 315+ $newdisjuncts = array();
 316+ foreach ($disjuncts as $conj) {
 317+ $newdisjuncts[] = $this->addDescription($conj, $this->m_defaultns);
 318+ }
 319+ $disjuncts = $newdisjuncts;
 320+ } elseif ( !$hasNamespaces && $mustSetNS) {
 321+ // add ns restriction to current result
 322+ $conjunction = $this->addDescription($conjunction, $this->m_defaultns);
 323+ }
278324 }
 325+ $disjuncts[] = $conjunction;
 326+ // start anew
 327+ $conjunction = NULL;
 328+ $hasNamespaces = false;
 329+ // finish subquery?
 330+ if ($chunk == '</q>') {
 331+ if ($this->popDelimiter('</q>')) {
 332+ $continue = false; // leave the loop
 333+ } else {
 334+ $this->m_error = 'There appear to be too many occurences of \'' . $chunk . '\' in the query.';
 335+ return NULL;
 336+ }
 337+ } elseif ($chunk == '') {
 338+ $continue = false;
 339+ }
279340 break;
280341 default: // error: unexpected $chunk
281342 $this->m_error = 'The part \'' . $chunk . '\' in the query was not understood. Results might not be as expected.'; // TODO: internationalise
282343 return NULL;
283344 }
284 - $continue = ($continue) && ( ($chunk = $this->readChunk()) != '' );
 345+ if ($setsubNS) { // namespace restrictions encountered in current conjunct
 346+ $hasNamespaces = true;
 347+ }
 348+ if ($continue) { // read on only if $continue remained true
 349+ $chunk = $this->readChunk();
 350+ }
285351 }
286352
287 - if ($result !== NULL) {
288 - foreach ($printrequests as $pr) {
289 - $result->addPrintRequest($pr);
 353+ if (count($disjuncts) > 0) { // make disjunctive result
 354+ $result = NULL;
 355+ foreach ($disjuncts as $d) {
 356+ if ($d === NULL) {
 357+ $this->m_error = 'No condition in subquery.';
 358+ $setNS = false;
 359+ return NULL;
 360+ } else {
 361+ $result = $this->addDescription($result, $d, false);
 362+ }
290363 }
 364+ } else {
 365+ $this->m_error = 'No condition in subquery.';
 366+ $setNS = false;
 367+ return NULL;
291368 }
 369+ $setNS = $mustSetNS; // NOTE: also false if namespaces were given but no default NS descs are available
 370+
 371+ foreach ($printrequests as $pr) { // add printrequests
 372+ $result->addPrintRequest($pr);
 373+ }
292374 return $result;
293375 }
294376
@@ -296,13 +378,18 @@
297379 * be the content of "[[ ... ]]". Alternatively, if the current syntax
298380 * specifies a print request, return the print request object.
299381 * Returns NULL upon error.
 382+ *
 383+ * The call-by-ref parameter $setNS is a boolean used to state whether
 384+ * the namespace defaults must be added, and returns whether any have been
 385+ * added. Similar usage as in getSubqueryDescription().
300386 */
301 - protected function getLinkDescription() {
 387+ protected function getLinkDescription(&$setNS) {
302388 $result = NULL;
303389 // This method is called when we encountered an opening '[['. The following
304390 // block could be a Category-statement, fixed object, relation or attribute
305391 // statements, or according print statements.
306392 $chunk = $this->readChunk();
 393+ $hasNamespaces = false;
307394
308395 if ($chunk == $this->m_categoryprefix) { // category statement
309396 // note: no subqueries allowed here, inline disjunction allowed, wildcards allowed
@@ -347,8 +434,9 @@
348435 }
349436 }
350437 } else { // fixed subject, namespace restriction, property query, or subquery
351 - $sep = $this->readChunk();
 438+ $sep = $this->readChunk('',false); //do not consume hit, "look ahead"
352439 if ($sep == '::') { // relation statement
 440+ $this->readChunk(); // consume $sep
353441 $rel = Title::newFromText($chunk, SMW_NS_RELATION);
354442 $continue = true;
355443 $innerdesc = NULL;
@@ -376,13 +464,15 @@
377465 }
378466 break;
379467 case '+': // wildcard
 468+ /// TODO enforce default namespace!
380469 $innerdesc = $this->addDescription($innerdesc, new SMWThingDescription(), false);
381470 break;
382 - case '<q>': // subquery
 471+ case '<q>': // subquery, set default namespaces
383472 $this->pushDelimiter('</q>');
384 - $innerdesc = $this->addDescription($innerdesc, $this->getSubqueryDescription(), false);
 473+ $setsubNS = true;
 474+ $innerdesc = $this->addDescription($innerdesc, $this->getSubqueryDescription($setsubNS), false);
385475 break;
386 - default: //normal object value
 476+ default: //normal object value, brings its own namespace
387477 $obj = Title::newFromText($chunk);
388478 if ($obj !== NULL) {
389479 $innerdesc = $this->addDescription($innerdesc, new SMWNominalDescription($obj), false);
@@ -395,6 +485,7 @@
396486 $result = new SMWSomeRelation($rel,$innerdesc);
397487 }
398488 } elseif ($sep == ':=') { // attribute statement
 489+ $this->readChunk(); // consume $sep
399490 $att = Title::newFromText($chunk, SMW_NS_ATTRIBUTE);
400491 ///TODO: currently no support for disjunctions in data values (needs extension of query processor)
401492
@@ -482,10 +573,19 @@
483574 $continue = true;
484575 //$innerdesc = NULL;
485576 while ($continue) {
 577+ $hasNamespaces = true; // enforced for all cases
 578+ /// NOTE: this general enforcing is suboptimal for things like
 579+ /// "<ask>[[<q>[[Category:A]]</q>]] [[Category:B]]</ask>"
 580+ /// where one should have a single outer restriction and not enforce NS in the subquery.
 581+ /// But this only works if all "fixed subjects" have no NS already. It would be a problem
 582+ /// in cases like "<ask>[[<q>[[Category:A]]</q>||User:C]] [[Category:B]]</ask>". Since we
 583+ /// cannot go back to get the NS-restriciton into the subquery here, we ignore the first
 584+ /// case even though it is quite possible (think of many disjuncted subqueries).
486585 switch ($chunk) {
487586 case '<q>': // subquery
488587 $this->pushDelimiter('</q>');
489 - $result = $this->addDescription($result, $this->getSubqueryDescription(), false);
 588+ $setsubNS = true;
 589+ $result = $this->addDescription($result, $this->getSubqueryDescription($setsubNS), false);
490590 break;
491591 default:
492592 $list = preg_split('/:/', $chunk, 3); // ":Category:Foo" "User:bar" ":baz" ":+"
@@ -506,12 +606,7 @@
507607 }
508608 }
509609
510 - if ($sep !== false) { // resuse prefetched sep
511 - $chunk = $sep;
512 - $sep = false;
513 - } else {
514 - $chunk = $this->readChunk();
515 - }
 610+ $chunk = $this->readChunk();
516611 if ($chunk == '||') {
517612 $chunk = $this->readChunk();
518613 $continue = true;
@@ -523,10 +618,16 @@
524619 }
525620
526621 if ($result === NULL) { // no useful information or concrete error found
527 - $this->m_error = 'Syntax error in query.'; //TODO internationalise
 622+ $this->m_error = 'Syntax error in part of query.'; //TODO internationalise
528623 return NULL;
529624 }
530625
 626+ if (!$hasNamespaces && $setNS && ($this->m_defaultns !== NULL) ) {
 627+ $result = $this->addDescription($result, $this->m_defaultns);
 628+ $hasNamespaces = true;
 629+ }
 630+ $setNS = $hasNamespaces;
 631+
531632 // terminate link (assuming that next chunk was read already)
532633 if ($chunk == '|') { // label, TODO
533634 $label = $this->readChunk('\]\]');
@@ -567,21 +668,30 @@
568669 *
569670 * The stoppattern can be used to customise the matching, especially in order to
570671 * overread certain special symbols.
 672+ *
 673+ * $consume specifies whether the returned chunk should be removed from the
 674+ * query string.
571675 */
572 - protected function readChunk($stoppattern = '') {
 676+ protected function readChunk($stoppattern = '', $consume=true) {
573677 if ($stoppattern == '') {
574678 $stoppattern = '\[\[|\]\]|::|:=|<q>|<\/q>|^' . $this->m_categoryprefix . '|\|\||\|';
575679 }
576680 $chunks = preg_split('/[\s]*(' . $stoppattern . ')[\s]*/', $this->m_curstring, 2, PREG_SPLIT_DELIM_CAPTURE);
577681 if (count($chunks) == 1) { // no matches anymore, strip spaces and finish
578 - $this->m_curstring = '';
 682+ if ($consume) {
 683+ $this->m_curstring = '';
 684+ }
579685 return trim($chunks[0]);
580686 } elseif (count($chunks) == 3) { // this chould generally happen if count is not 1
581687 if ($chunks[0] == '') { // string started with delimiter
582 - $this->m_curstring = $chunks[2];
 688+ if ($consume) {
 689+ $this->m_curstring = $chunks[2];
 690+ }
583691 return $chunks[1]; // spaces stripped already
584692 } else {
585 - $this->m_curstring = $chunks[1] . $chunks[2];
 693+ if ($consume) {
 694+ $this->m_curstring = $chunks[1] . $chunks[2];
 695+ }
586696 return $chunks[0]; // spaces stripped already
587697 }
588698 } else { return false; } //should never happen
Index: trunk/extensions/SemanticMediaWiki/includes/storage/SMW_Description.php
@@ -222,12 +222,8 @@
223223 }
224224
225225 public function getQueryString() {
226 - global $wgContlang;
227 - if ($this->m_title !== NULL) {
228 - return '[[' . $wgContLang->getNSText($this->m_namespace) . ']]';
229 - } else {
230 - return '';
231 - }
 226+ global $wgContLang;
 227+ return '[[' . $wgContLang->getNSText($this->m_namespace) . ':+]]';
232228 }
233229
234230 public function isSingleton() {
@@ -345,11 +341,11 @@
346342 }
347343
348344 public function getQueryString() {
349 - $result = '<q>';
 345+ $result = '';
350346 foreach ($this->m_descriptions as $desc) {
351347 $result .= $desc->getQueryString() . ' ';
352348 }
353 - return $result . '</q>';
 349+ return $result;
354350 }
355351
356352 public function isSingleton() {
@@ -385,13 +381,13 @@
386382
387383 public function getQueryString() {
388384 $result = '';
389 - // TODO: this is not correct ... (many disjunctions have || abbreviations, OR does not work yet)
 385+ // TODO: many disjunctions have more suitable || abbreviations
390386 $first = true;
391387 foreach ($this->m_descriptions as $desc) {
392388 if ($first) {
393389 $first = false;
394390 } else {
395 - $result .= ' OR ';
 391+ $result .= ' || ';
396392 }
397393 $result .= $desc->getQueryString();
398394 }
Index: trunk/extensions/SemanticMediaWiki/includes/storage/SMW_SQLStore.php
@@ -581,6 +581,8 @@
582582 } elseif ($query->querymode == SMWQuery::MODE_DEBUG) {
583583 list( $startOpts, $useIndex, $tailOpts ) = $db->makeSelectOptions( $sql_options );
584584 $result = '<div style="border: 1px dotted black; background: #A1FB00; padding: 20px; ">' .
 585+ '<b>Generated Wiki Query</b><br />' .
 586+ htmlspecialchars($query->getDescription()->getQueryString()) . '<br />' .
585587 '<b>SQL-Query</b><br />' .
586588 "SELECT DISTINCT $pagetable.page_title as title, $pagetable.page_namespace as namespace" .
587589 ' FROM ' . $from . ' WHERE ' . $where . $tailOpts . '<br />' .

Status & tagging log