Index: trunk/extensions/SemanticMediaWiki/includes/SMW_QueryProcessor.php |
— | — | @@ -33,33 +33,18 @@ |
34 | 34 | * as a string. Otherwise an object of type SMWQuery is returned. |
35 | 35 | */ |
36 | 36 | static public function createQuery($querystring, $params, $inline = true) { |
37 | | - /// TODO implement |
38 | | - // DEBUG: |
39 | | - $o_desc = new SMWNominalDescription(Title::newFromText("Africa")); |
40 | | - $value = SMWDataValue::newAttributeValue('Population','5853000'); |
41 | | - $t_desc = new SMWThingDescription(); |
42 | | - $v_desc = new SMWValueDescription($value, SMW_CMP_GEQ); |
43 | | - $a_desc = new SMWSomeAttribute(Title::newFromText('Attribute:Population'), $v_desc); |
44 | | - $r_desc = new SMWSomeRelation(Title::newFromText("Relation:located in"), $o_desc); |
45 | | - $r_desc2 = new SMWSomeRelation(Title::newFromText("Relation:borders"), $r_desc); |
46 | | - $r_desc3 = new SMWSomeRelation(Title::newFromText("Relation:located in"), $t_desc); |
47 | | - $c_desc = new SMWClassDescription(Title::newFromText("Category:Country")); |
48 | | - $desc = new SMWConjunction(array($c_desc, $r_desc)); |
49 | | - $desc2 = new SMWConjunction(array($c_desc, $a_desc, $r_desc2, $r_desc)); |
50 | | - $pr1 = new SMWPrintrequest(SMW_PRINT_THIS, 'Country'); |
51 | | - $desc->addPrintRequest($pr1); |
52 | | - $desc2->addPrintRequest($pr1); |
53 | | - $pr2 = new SMWPrintrequest(SMW_PRINT_RELS, 'Borders', Title::newFromText('Relation:Borders')); |
54 | | - $desc->addPrintRequest($pr2); |
55 | | - $desc2->addPrintRequest($pr2); |
56 | | - $pr3 = new SMWPrintrequest(SMW_PRINT_ATTS, 'Population', Title::newFromText('Attribute:Population')); |
57 | | - $desc->addPrintRequest($pr3); |
58 | | - $desc2->addPrintRequest($pr3); |
59 | | - $pr4 = new SMWPrintrequest(SMW_PRINT_CATS, 'Categories'); |
60 | | - $desc->addPrintRequest($pr4); |
61 | | - //$query = new SMWQuery($desc); |
62 | | - $query = new SMWQuery($desc2); |
| 37 | + // parse query: |
| 38 | + $qp = new SMWQueryParser(); |
| 39 | + $desc = $qp->getQueryDescription($querystring); |
| 40 | + /// TODO check for errors |
63 | 41 | |
| 42 | + ///TODO do this only when wanted, use given label: |
| 43 | + $desc->addPrintRequest(new SMWPrintrequest(SMW_PRINT_THIS, 'Mainlabel')); |
| 44 | + |
| 45 | + $query = new SMWQuery($desc); |
| 46 | + |
| 47 | + print '### Query:' . $desc->getQueryString() . ' ###'; |
| 48 | + |
64 | 49 | // set query parameters: |
65 | 50 | global $smwgIQMaxLimit, $smwgIQMaxInlineLimit; |
66 | 51 | if ($inline) |
— | — | @@ -144,5 +129,231 @@ |
145 | 130 | } |
146 | 131 | |
147 | 132 | } |
| 133 | + |
| 134 | + |
| 135 | +/** |
| 136 | + * Objects of this class are in charge of parsing a query string in order |
| 137 | + * to create an SMWDescription. The class and methods are not static in order |
| 138 | + * to more cleanly store the intermediate state and progress of the parser. |
| 139 | + */ |
| 140 | +class SMWQueryParser { |
| 141 | + |
| 142 | + protected $m_sepstack; // list of open blocks ("parentheses") that need closing at current step |
| 143 | + protected $m_curstring; // remaining string to be parsed (parsing eats query string from the front) |
| 144 | + protected $m_error; // false if all went right, string otherwise |
| 145 | + |
| 146 | + protected $m_categoryprefix; // cache label of category namespace . ':' |
| 147 | + |
| 148 | + public function SMWQueryParser() { |
| 149 | + global $wgContLang; |
| 150 | + $this->m_categoryprefix = $wgContLang->getNsText(NS_CATEGORY) . ':'; |
| 151 | + } |
| 152 | + |
| 153 | + /** |
| 154 | + * Compute an SMWDescription from a query string. Return this description or |
| 155 | + * false if there were errors. |
| 156 | + */ |
| 157 | + public function getQueryDescription($querystring) { |
| 158 | + $this->m_curstring = $querystring; |
| 159 | + $this->m_sepstack = array(); |
| 160 | + return $this->getSubqueryDescription(); |
| 161 | + } |
| 162 | + |
| 163 | + /** |
| 164 | + * Compute an SMWDescription for current part of a query, which should |
| 165 | + * be a standalone query (the main query or a subquery enclosed within |
| 166 | + * "<q>...</q>". Recursively calls similar methods and returns false upon error. |
| 167 | + */ |
| 168 | + protected function getSubqueryDescription() { |
| 169 | + $result = NULL; |
| 170 | + while (($chunk = $this->readChunk()) != '') { |
| 171 | + switch ($chunk) { |
| 172 | + case '[[': // start new link block |
| 173 | + $this->pushDelimiter(']]'); // expected termination symbol |
| 174 | + $result = $this->addDescription($result,$this->getLinkDescription()); |
| 175 | + break; |
| 176 | + case '</q>': // exit current subquery |
| 177 | + if ($this->popDelimiter('</q>')) { |
| 178 | + //TODO: return computed description |
| 179 | + } else { |
| 180 | + $this->m_error = 'There appear to be too many occurences of \'' . $chunk . '\' in the query.'; |
| 181 | + return false; |
| 182 | + } |
| 183 | + break; |
| 184 | + default: // error: unexpected $chunk |
| 185 | + $this->m_error = 'The part \'' . $chunk . '\' in the query was not understood. Results might not be as expected.'; // TODO: internationalise |
| 186 | + return false; |
| 187 | + } |
| 188 | + } |
| 189 | + return $result; |
| 190 | + |
| 191 | + /// TODO implement |
| 192 | + // DEBUG: |
| 193 | +// $o_desc = new SMWNominalDescription(Title::newFromText("Africa")); |
| 194 | +// $value = SMWDataValueFactory::newAttributeValue('Population','5853000'); |
| 195 | +// $t_desc = new SMWThingDescription(); |
| 196 | +// $v_desc = new SMWValueDescription($value, SMW_CMP_GEQ); |
| 197 | +// $a_desc = new SMWSomeAttribute(Title::newFromText('Attribute:Population'), $v_desc); |
| 198 | +// $r_desc = new SMWSomeRelation(Title::newFromText("Relation:located in"), $o_desc); |
| 199 | +// $r_desc2 = new SMWSomeRelation(Title::newFromText("Relation:borders"), $r_desc); |
| 200 | +// $r_desc3 = new SMWSomeRelation(Title::newFromText("Relation:located in"), $t_desc); |
| 201 | +// $c_desc = new SMWClassDescription(Title::newFromText("Category:Country")); |
| 202 | +// $desc = new SMWConjunction(array($c_desc, $r_desc)); |
| 203 | +// $desc2 = new SMWConjunction(array($c_desc, $a_desc, $r_desc2, $r_desc)); |
| 204 | +// $pr1 = new SMWPrintrequest(SMW_PRINT_THIS, 'Country'); |
| 205 | +// $desc->addPrintRequest($pr1); |
| 206 | +// $desc2->addPrintRequest($pr1); |
| 207 | +// $pr2 = new SMWPrintrequest(SMW_PRINT_RELS, 'Borders', Title::newFromText('Relation:Borders')); |
| 208 | +// $desc->addPrintRequest($pr2); |
| 209 | +// $desc2->addPrintRequest($pr2); |
| 210 | +// $pr3 = new SMWPrintrequest(SMW_PRINT_ATTS, 'Population', Title::newFromText('Attribute:Population')); |
| 211 | +// $desc->addPrintRequest($pr3); |
| 212 | +// $desc2->addPrintRequest($pr3); |
| 213 | +// $pr4 = new SMWPrintrequest(SMW_PRINT_CATS, 'Categories'); |
| 214 | +// $desc->addPrintRequest($pr4); |
| 215 | +// |
| 216 | +// return $desc2; |
| 217 | + } |
| 218 | + |
| 219 | + /** |
| 220 | + * Compute an SMWDescription for current part of a query, which should |
| 221 | + * be the content of "[[ ... ]]". Recursively calls similar methods and |
| 222 | + * returns false upon error. |
| 223 | + */ |
| 224 | + protected function getLinkDescription() { |
| 225 | + $result = NULL; |
| 226 | + // This method is called when we encountered an opening '[['. The following |
| 227 | + // block could be a Category-statement, fixed object, relation or attribute |
| 228 | + // statements, or according print statements. |
| 229 | + $chunk = $this->readChunk(); |
| 230 | + |
| 231 | + if ($chunk == $this->m_categoryprefix) { // category statement |
| 232 | + // note: no subqueries allowed here, inline disjunction allowed, wildcards allowed |
| 233 | + $continue = true; |
| 234 | + while ($continue) { |
| 235 | + $chunk = $this->readChunk(); |
| 236 | + switch ($chunk) { |
| 237 | + case '+': //wildcard |
| 238 | + break; |
| 239 | + case '*': //print statement |
| 240 | + break; |
| 241 | + default: //assume category title |
| 242 | + $cat = Title::newFromText($chunk, NS_CATEGORY); |
| 243 | + if ($cat !== NULL) { |
| 244 | + $result = $this->addDescription($result, new SMWClassDescription($cat), false); |
| 245 | + } |
| 246 | + } |
| 247 | + $chunk = $this->readChunk(); |
| 248 | + if ($chunk == '||') { |
| 249 | + $continue = true; |
| 250 | + } else { |
| 251 | + $continue = false; |
| 252 | + } |
| 253 | + } |
| 254 | + } else { // fixed subject, property query, or subquery |
| 255 | + |
| 256 | + } |
| 257 | + |
| 258 | + // terminate link (assuming that next chunk was read already) |
| 259 | + if ($chunk == '|') { // label, TODO |
| 260 | + $chunk = $this->readChunk(); |
| 261 | + $label = ''; |
| 262 | + ///TODO: rather have a mode for readChunk that stops only on ']]' |
| 263 | + /// (otherwise we kill spaces in the label) |
| 264 | + while ( ($chunk != ']]') && ($chunk !== '') ) { |
| 265 | + $label .= $chunk; |
| 266 | + $chunk = $this->readChunk(); |
| 267 | + } |
| 268 | + } |
| 269 | + if ($chunk == ']]') { // expected termination |
| 270 | + $this->popDelimiter(']]'); |
| 271 | + return $result; |
| 272 | + } else { |
| 273 | + // What happended? We found some chunk that could not be processed as |
| 274 | + // link content (as in [[Category:Test<q>]]) and there was no label to |
| 275 | + // eat it. Or the closing ]] are just missing entirely. |
| 276 | + if ($chunk != '') { //TODO: internationalise errors |
| 277 | + $this->m_error = 'The symbol \'' . $chunk . '\' was used in a place where it is not useful.'; |
| 278 | + } else { |
| 279 | + $this->m_error = 'Some use of \'[[\' in your query was not closed by a matching \']]\'.'; |
| 280 | + } |
| 281 | + return false; |
| 282 | + } |
| 283 | + |
| 284 | + return $result; |
| 285 | + } |
| 286 | + |
| 287 | + /** |
| 288 | + * Get the next unstructured string chunk from the query string. |
| 289 | + * Chunks are delimited by any of the special strings used in inline queries |
| 290 | + * (such as [[, ]], <q>, ...). If the string starts with such a delimiter, |
| 291 | + * this delimiter is returned. Otherwise the first string in front of such a |
| 292 | + * delimiter is returned. |
| 293 | + * Trailing and initial spaces are always ignored and chunks |
| 294 | + * consisting only of spaces are not returned. |
| 295 | + * If there is no more qurey string left to process, the empty string is |
| 296 | + * returned (and in no other case). |
| 297 | + */ |
| 298 | + protected function readChunk() { |
| 299 | + $chunks = preg_split('/[\s]*(\[\[|\]\]|::|:=|<q>|<\/q>|' . $this->m_categoryprefix . '|\|\||\|)[\s]*/', $this->m_curstring, 2, PREG_SPLIT_DELIM_CAPTURE); |
| 300 | + if (count($chunks) == 1) { // no mathces anymore, strip spaces and finish |
| 301 | + $this->m_curstring = ''; |
| 302 | + return trim($chunks[0]); |
| 303 | + } elseif (count($chunks) == 3) { // this chould generally happen if count is not 1 |
| 304 | + if ($chunks[0] == '') { // string started with delimiter |
| 305 | + $this->m_curstring = $chunks[2]; |
| 306 | + return $chunks[1]; // spaces stripped already |
| 307 | + } else { |
| 308 | + $this->m_curstring = $chunks[1] . $chunks[2]; |
| 309 | + return $chunks[0]; // spaces stripped already |
| 310 | + } |
| 311 | + } else { return false; } //should never happen |
| 312 | + } |
| 313 | + |
| 314 | + /** |
| 315 | + * Enter a new subblock in the query, which must at some time be terminated by the |
| 316 | + * given $endstring delimiter calling popDelimiter(); |
| 317 | + */ |
| 318 | + protected function pushDelimiter($endstring) { |
| 319 | + array_push($this->m_sepstack, $endstring); |
| 320 | + } |
| 321 | + |
| 322 | + /** |
| 323 | + * Exit a subblock in the query ending with the given delimiter. |
| 324 | + * If the delimiter does not match the top-most open block, false |
| 325 | + * will be returned. Otherwise return true. |
| 326 | + */ |
| 327 | + protected function popDelimiter($endstring) { |
| 328 | + $topdelim = array_pop($this->m_sepstack); |
| 329 | + return ($topdelim == $endstring); |
| 330 | + } |
| 331 | + |
| 332 | + /** |
| 333 | + * Extend a given description by a new one, either by adding the new description |
| 334 | + * (if the old one is a container description) or by creating a new container. |
| 335 | + * The parameter $conjunction determines whether the combination of both descriptions |
| 336 | + * should be a disjunction or conjunction. |
| 337 | + * |
| 338 | + * In the special case that the current description is NULL, the new one will just |
| 339 | + * replace the current one. |
| 340 | + * |
| 341 | + * The return value is the expected combined description. The object $curdesc will |
| 342 | + * also be changed (if it was non-NULL). |
| 343 | + */ |
| 344 | + protected function addDescription($curdesc, $newdesc, $conjunction = true) { |
| 345 | + if ($curdesc === NULL) { |
| 346 | + return $newdesc; |
| 347 | + } else { // we already found descriptions |
| 348 | + if ( (($conjunction) && ($curdesc instanceof SMWConjunction)) || |
| 349 | + ((!$conjunction) && ($curdesc instanceof SMWDisjunction)) ) { // use existing container |
| 350 | + $curdesc->addDescription($newdesc); |
| 351 | + } elseif ($conjunction) { // make new conjunction |
| 352 | + return new SMWConjunction(array($curdesc,$newdesc)); |
| 353 | + } else { // make new disjunction |
| 354 | + return new SMWDisjunction(array($curdesc,$newdesc)); |
| 355 | + } |
| 356 | + } |
| 357 | + } |
| 358 | +} |
148 | 359 | |
149 | 360 | ?> |
\ No newline at end of file |