Index: trunk/phase3/includes/SearchEngine.php |
— | — | @@ -0,0 +1,1154 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * @defgroup Search Search |
| 5 | + * |
| 6 | + * @file |
| 7 | + * @ingroup Search |
| 8 | + */ |
| 9 | + |
| 10 | +/** |
| 11 | + * Contain a class for special pages |
| 12 | + * @ingroup Search |
| 13 | + */ |
| 14 | +class SearchEngine { |
| 15 | + var $limit = 10; |
| 16 | + var $offset = 0; |
| 17 | + var $searchTerms = array(); |
| 18 | + var $namespaces = array( NS_MAIN ); |
| 19 | + var $showRedirects = false; |
| 20 | + |
| 21 | + /** |
| 22 | + * Perform a full text search query and return a result set. |
| 23 | + * If title searches are not supported or disabled, return null. |
| 24 | + * |
| 25 | + * @param string $term - Raw search term |
| 26 | + * @return SearchResultSet |
| 27 | + * @access public |
| 28 | + * @abstract |
| 29 | + */ |
| 30 | + function searchText( $term ) { |
| 31 | + return null; |
| 32 | + } |
| 33 | + |
| 34 | + /** |
| 35 | + * Perform a title-only search query and return a result set. |
| 36 | + * If title searches are not supported or disabled, return null. |
| 37 | + * |
| 38 | + * @param string $term - Raw search term |
| 39 | + * @return SearchResultSet |
| 40 | + * @access public |
| 41 | + * @abstract |
| 42 | + */ |
| 43 | + function searchTitle( $term ) { |
| 44 | + return null; |
| 45 | + } |
| 46 | + |
| 47 | + /** |
| 48 | + * If an exact title match can be find, or a very slightly close match, |
| 49 | + * return the title. If no match, returns NULL. |
| 50 | + * |
| 51 | + * @param string $term |
| 52 | + * @return Title |
| 53 | + */ |
| 54 | + public static function getNearMatch( $searchterm ) { |
| 55 | + global $wgContLang; |
| 56 | + |
| 57 | + $allSearchTerms = array($searchterm); |
| 58 | + |
| 59 | + if($wgContLang->hasVariants()){ |
| 60 | + $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm)); |
| 61 | + } |
| 62 | + |
| 63 | + foreach($allSearchTerms as $term){ |
| 64 | + |
| 65 | + # Exact match? No need to look further. |
| 66 | + $title = Title::newFromText( $term ); |
| 67 | + if (is_null($title)) |
| 68 | + return NULL; |
| 69 | + |
| 70 | + if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal() |
| 71 | + || $title->exists() ) { |
| 72 | + return $title; |
| 73 | + } |
| 74 | + |
| 75 | + # Now try all lower case (i.e. first letter capitalized) |
| 76 | + # |
| 77 | + $title = Title::newFromText( $wgContLang->lc( $term ) ); |
| 78 | + if ( $title && $title->exists() ) { |
| 79 | + return $title; |
| 80 | + } |
| 81 | + |
| 82 | + # Now try capitalized string |
| 83 | + # |
| 84 | + $title = Title::newFromText( $wgContLang->ucwords( $term ) ); |
| 85 | + if ( $title && $title->exists() ) { |
| 86 | + return $title; |
| 87 | + } |
| 88 | + |
| 89 | + # Now try all upper case |
| 90 | + # |
| 91 | + $title = Title::newFromText( $wgContLang->uc( $term ) ); |
| 92 | + if ( $title && $title->exists() ) { |
| 93 | + return $title; |
| 94 | + } |
| 95 | + |
| 96 | + # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc |
| 97 | + $title = Title::newFromText( $wgContLang->ucwordbreaks($term) ); |
| 98 | + if ( $title && $title->exists() ) { |
| 99 | + return $title; |
| 100 | + } |
| 101 | + |
| 102 | + global $wgCapitalLinks, $wgContLang; |
| 103 | + if( !$wgCapitalLinks ) { |
| 104 | + // Catch differs-by-first-letter-case-only |
| 105 | + $title = Title::newFromText( $wgContLang->ucfirst( $term ) ); |
| 106 | + if ( $title && $title->exists() ) { |
| 107 | + return $title; |
| 108 | + } |
| 109 | + $title = Title::newFromText( $wgContLang->lcfirst( $term ) ); |
| 110 | + if ( $title && $title->exists() ) { |
| 111 | + return $title; |
| 112 | + } |
| 113 | + } |
| 114 | + |
| 115 | + // Give hooks a chance at better match variants |
| 116 | + $title = null; |
| 117 | + if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) { |
| 118 | + return $title; |
| 119 | + } |
| 120 | + } |
| 121 | + |
| 122 | + $title = Title::newFromText( $searchterm ); |
| 123 | + |
| 124 | + # Entering an IP address goes to the contributions page |
| 125 | + if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) ) |
| 126 | + || User::isIP( trim( $searchterm ) ) ) { |
| 127 | + return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() ); |
| 128 | + } |
| 129 | + |
| 130 | + |
| 131 | + # Entering a user goes to the user page whether it's there or not |
| 132 | + if ( $title->getNamespace() == NS_USER ) { |
| 133 | + return $title; |
| 134 | + } |
| 135 | + |
| 136 | + # Go to images that exist even if there's no local page. |
| 137 | + # There may have been a funny upload, or it may be on a shared |
| 138 | + # file repository such as Wikimedia Commons. |
| 139 | + if( $title->getNamespace() == NS_IMAGE ) { |
| 140 | + $image = wfFindFile( $title ); |
| 141 | + if( $image ) { |
| 142 | + return $title; |
| 143 | + } |
| 144 | + } |
| 145 | + |
| 146 | + # MediaWiki namespace? Page may be "implied" if not customized. |
| 147 | + # Just return it, with caps forced as the message system likes it. |
| 148 | + if( $title->getNamespace() == NS_MEDIAWIKI ) { |
| 149 | + return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) ); |
| 150 | + } |
| 151 | + |
| 152 | + # Quoted term? Try without the quotes... |
| 153 | + $matches = array(); |
| 154 | + if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) { |
| 155 | + return SearchEngine::getNearMatch( $matches[1] ); |
| 156 | + } |
| 157 | + |
| 158 | + return NULL; |
| 159 | + } |
| 160 | + |
| 161 | + public static function legalSearchChars() { |
| 162 | + return "A-Za-z_'0-9\\x80-\\xFF\\-"; |
| 163 | + } |
| 164 | + |
| 165 | + /** |
| 166 | + * Set the maximum number of results to return |
| 167 | + * and how many to skip before returning the first. |
| 168 | + * |
| 169 | + * @param int $limit |
| 170 | + * @param int $offset |
| 171 | + * @access public |
| 172 | + */ |
| 173 | + function setLimitOffset( $limit, $offset = 0 ) { |
| 174 | + $this->limit = intval( $limit ); |
| 175 | + $this->offset = intval( $offset ); |
| 176 | + } |
| 177 | + |
| 178 | + /** |
| 179 | + * Set which namespaces the search should include. |
| 180 | + * Give an array of namespace index numbers. |
| 181 | + * |
| 182 | + * @param array $namespaces |
| 183 | + * @access public |
| 184 | + */ |
| 185 | + function setNamespaces( $namespaces ) { |
| 186 | + $this->namespaces = $namespaces; |
| 187 | + } |
| 188 | + |
| 189 | + /** |
| 190 | + * Parse some common prefixes: all (search everything) |
| 191 | + * or namespace names |
| 192 | + * |
| 193 | + * @param string $query |
| 194 | + */ |
| 195 | + function replacePrefixes( $query ){ |
| 196 | + global $wgContLang; |
| 197 | + |
| 198 | + if( strpos($query,':') === false ) |
| 199 | + return $query; // nothing to do |
| 200 | + |
| 201 | + $parsed = $query; |
| 202 | + $allkeyword = wfMsgForContent('searchall').":"; |
| 203 | + if( strncmp($query, $allkeyword, strlen($allkeyword)) == 0 ){ |
| 204 | + $this->namespaces = null; |
| 205 | + $parsed = substr($query,strlen($allkeyword)); |
| 206 | + } else if( strpos($query,':') !== false ) { |
| 207 | + $prefix = substr($query,0,strpos($query,':')); |
| 208 | + $index = $wgContLang->getNsIndex($prefix); |
| 209 | + if($index !== false){ |
| 210 | + $this->namespaces = array($index); |
| 211 | + $parsed = substr($query,strlen($prefix)+1); |
| 212 | + } |
| 213 | + } |
| 214 | + if(trim($parsed) == '') |
| 215 | + return $query; // prefix was the whole query |
| 216 | + |
| 217 | + return $parsed; |
| 218 | + } |
| 219 | + |
| 220 | + /** |
| 221 | + * Make a list of searchable namespaces and their canonical names. |
| 222 | + * @return array |
| 223 | + */ |
| 224 | + public static function searchableNamespaces() { |
| 225 | + global $wgContLang; |
| 226 | + $arr = array(); |
| 227 | + foreach( $wgContLang->getNamespaces() as $ns => $name ) { |
| 228 | + if( $ns >= NS_MAIN ) { |
| 229 | + $arr[$ns] = $name; |
| 230 | + } |
| 231 | + } |
| 232 | + return $arr; |
| 233 | + } |
| 234 | + |
| 235 | + /** |
| 236 | + * Extract default namespaces to search from the given user's |
| 237 | + * settings, returning a list of index numbers. |
| 238 | + * |
| 239 | + * @param User $user |
| 240 | + * @return array |
| 241 | + * @static |
| 242 | + */ |
| 243 | + public static function userNamespaces( &$user ) { |
| 244 | + $arr = array(); |
| 245 | + foreach( SearchEngine::searchableNamespaces() as $ns => $name ) { |
| 246 | + if( $user->getOption( 'searchNs' . $ns ) ) { |
| 247 | + $arr[] = $ns; |
| 248 | + } |
| 249 | + } |
| 250 | + return $arr; |
| 251 | + } |
| 252 | + |
| 253 | + /** |
| 254 | + * Find snippet highlight settings for a given user |
| 255 | + * |
| 256 | + * @param User $user |
| 257 | + * @return array contextlines, contextchars |
| 258 | + * @static |
| 259 | + */ |
| 260 | + public static function userHighlightPrefs( &$user ){ |
| 261 | + //$contextlines = $user->getOption( 'contextlines', 5 ); |
| 262 | + //$contextchars = $user->getOption( 'contextchars', 50 ); |
| 263 | + $contextlines = 2; // Hardcode this. Old defaults sucked. :) |
| 264 | + $contextchars = 75; // same as above.... :P |
| 265 | + return array($contextlines, $contextchars); |
| 266 | + } |
| 267 | + |
| 268 | + /** |
| 269 | + * An array of namespaces indexes to be searched by default |
| 270 | + * |
| 271 | + * @return array |
| 272 | + * @static |
| 273 | + */ |
| 274 | + public static function defaultNamespaces(){ |
| 275 | + global $wgNamespacesToBeSearchedDefault; |
| 276 | + |
| 277 | + return array_keys($wgNamespacesToBeSearchedDefault, true); |
| 278 | + } |
| 279 | + |
| 280 | + /** |
| 281 | + * Return a 'cleaned up' search string |
| 282 | + * |
| 283 | + * @return string |
| 284 | + * @access public |
| 285 | + */ |
| 286 | + function filter( $text ) { |
| 287 | + $lc = $this->legalSearchChars(); |
| 288 | + return trim( preg_replace( "/[^{$lc}]/", " ", $text ) ); |
| 289 | + } |
| 290 | + /** |
| 291 | + * Load up the appropriate search engine class for the currently |
| 292 | + * active database backend, and return a configured instance. |
| 293 | + * |
| 294 | + * @return SearchEngine |
| 295 | + */ |
| 296 | + public static function create() { |
| 297 | + global $wgDBtype, $wgSearchType; |
| 298 | + if( $wgSearchType ) { |
| 299 | + $class = $wgSearchType; |
| 300 | + } elseif( $wgDBtype == 'mysql' ) { |
| 301 | + $class = 'SearchMySQL'; |
| 302 | + } else if ( $wgDBtype == 'postgres' ) { |
| 303 | + $class = 'SearchPostgres'; |
| 304 | + } else if ( $wgDBtype == 'oracle' ) { |
| 305 | + $class = 'SearchOracle'; |
| 306 | + } else { |
| 307 | + $class = 'SearchEngineDummy'; |
| 308 | + } |
| 309 | + $search = new $class( wfGetDB( DB_SLAVE ) ); |
| 310 | + $search->setLimitOffset(0,0); |
| 311 | + return $search; |
| 312 | + } |
| 313 | + |
| 314 | + /** |
| 315 | + * Create or update the search index record for the given page. |
| 316 | + * Title and text should be pre-processed. |
| 317 | + * |
| 318 | + * @param int $id |
| 319 | + * @param string $title |
| 320 | + * @param string $text |
| 321 | + * @abstract |
| 322 | + */ |
| 323 | + function update( $id, $title, $text ) { |
| 324 | + // no-op |
| 325 | + } |
| 326 | + |
| 327 | + /** |
| 328 | + * Update a search index record's title only. |
| 329 | + * Title should be pre-processed. |
| 330 | + * |
| 331 | + * @param int $id |
| 332 | + * @param string $title |
| 333 | + * @abstract |
| 334 | + */ |
| 335 | + function updateTitle( $id, $title ) { |
| 336 | + // no-op |
| 337 | + } |
| 338 | + |
| 339 | + /** |
| 340 | + * Get OpenSearch suggestion template |
| 341 | + * |
| 342 | + * @return string |
| 343 | + * @static |
| 344 | + */ |
| 345 | + public static function getOpenSearchTemplate() { |
| 346 | + global $wgOpenSearchTemplate, $wgServer, $wgScriptPath; |
| 347 | + if($wgOpenSearchTemplate) |
| 348 | + return $wgOpenSearchTemplate; |
| 349 | + else{ |
| 350 | + $ns = implode(',',SearchEngine::defaultNamespaces()); |
| 351 | + if(!$ns) $ns = "0"; |
| 352 | + return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace='.$ns; |
| 353 | + } |
| 354 | + } |
| 355 | + |
| 356 | + /** |
| 357 | + * Get internal MediaWiki Suggest template |
| 358 | + * |
| 359 | + * @return string |
| 360 | + * @static |
| 361 | + */ |
| 362 | + public static function getMWSuggestTemplate() { |
| 363 | + global $wgMWSuggestTemplate, $wgServer, $wgScriptPath; |
| 364 | + if($wgMWSuggestTemplate) |
| 365 | + return $wgMWSuggestTemplate; |
| 366 | + else |
| 367 | + return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}'; |
| 368 | + } |
| 369 | +} |
| 370 | + |
| 371 | +/** |
| 372 | + * @ingroup Search |
| 373 | + */ |
| 374 | +class SearchResultSet { |
| 375 | + /** |
| 376 | + * Fetch an array of regular expression fragments for matching |
| 377 | + * the search terms as parsed by this engine in a text extract. |
| 378 | + * |
| 379 | + * @return array |
| 380 | + * @access public |
| 381 | + * @abstract |
| 382 | + */ |
| 383 | + function termMatches() { |
| 384 | + return array(); |
| 385 | + } |
| 386 | + |
| 387 | + function numRows() { |
| 388 | + return 0; |
| 389 | + } |
| 390 | + |
| 391 | + /** |
| 392 | + * Return true if results are included in this result set. |
| 393 | + * @return bool |
| 394 | + * @abstract |
| 395 | + */ |
| 396 | + function hasResults() { |
| 397 | + return false; |
| 398 | + } |
| 399 | + |
| 400 | + /** |
| 401 | + * Some search modes return a total hit count for the query |
| 402 | + * in the entire article database. This may include pages |
| 403 | + * in namespaces that would not be matched on the given |
| 404 | + * settings. |
| 405 | + * |
| 406 | + * Return null if no total hits number is supported. |
| 407 | + * |
| 408 | + * @return int |
| 409 | + * @access public |
| 410 | + */ |
| 411 | + function getTotalHits() { |
| 412 | + return null; |
| 413 | + } |
| 414 | + |
| 415 | + /** |
| 416 | + * Some search modes return a suggested alternate term if there are |
| 417 | + * no exact hits. Returns true if there is one on this set. |
| 418 | + * |
| 419 | + * @return bool |
| 420 | + * @access public |
| 421 | + */ |
| 422 | + function hasSuggestion() { |
| 423 | + return false; |
| 424 | + } |
| 425 | + |
| 426 | + /** |
| 427 | + * @return string suggested query, null if none |
| 428 | + */ |
| 429 | + function getSuggestionQuery(){ |
| 430 | + return null; |
| 431 | + } |
| 432 | + |
| 433 | + /** |
| 434 | + * @return string highlighted suggested query, '' if none |
| 435 | + */ |
| 436 | + function getSuggestionSnippet(){ |
| 437 | + return ''; |
| 438 | + } |
| 439 | + |
| 440 | + /** |
| 441 | + * Return information about how and from where the results were fetched, |
| 442 | + * should be useful for diagnostics and debugging |
| 443 | + * |
| 444 | + * @return string |
| 445 | + */ |
| 446 | + function getInfo() { |
| 447 | + return null; |
| 448 | + } |
| 449 | + |
| 450 | + /** |
| 451 | + * Return a result set of hits on other (multiple) wikis associated with this one |
| 452 | + * |
| 453 | + * @return SearchResultSet |
| 454 | + */ |
| 455 | + function getInterwikiResults() { |
| 456 | + return null; |
| 457 | + } |
| 458 | + |
| 459 | + /** |
| 460 | + * Check if there are results on other wikis |
| 461 | + * |
| 462 | + * @return boolean |
| 463 | + */ |
| 464 | + function hasInterwikiResults() { |
| 465 | + return $this->getInterwikiResults() != null; |
| 466 | + } |
| 467 | + |
| 468 | + |
| 469 | + /** |
| 470 | + * Fetches next search result, or false. |
| 471 | + * @return SearchResult |
| 472 | + * @access public |
| 473 | + * @abstract |
| 474 | + */ |
| 475 | + function next() { |
| 476 | + return false; |
| 477 | + } |
| 478 | + |
| 479 | + /** |
| 480 | + * Frees the result set, if applicable. |
| 481 | + * @ access public |
| 482 | + */ |
| 483 | + function free() { |
| 484 | + // ... |
| 485 | + } |
| 486 | +} |
| 487 | + |
| 488 | + |
| 489 | +/** |
| 490 | + * @ingroup Search |
| 491 | + */ |
| 492 | +class SearchResultTooMany { |
| 493 | + ## Some search engines may bail out if too many matches are found |
| 494 | +} |
| 495 | + |
| 496 | + |
| 497 | +/** |
| 498 | + * @ingroup Search |
| 499 | + */ |
| 500 | +class SearchResult { |
| 501 | + var $mRevision = null; |
| 502 | + |
| 503 | + function SearchResult( $row ) { |
| 504 | + $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
| 505 | + if( !is_null($this->mTitle) ) |
| 506 | + $this->mRevision = Revision::newFromTitle( $this->mTitle ); |
| 507 | + } |
| 508 | + |
| 509 | + /** |
| 510 | + * Check if this is result points to an invalid title |
| 511 | + * |
| 512 | + * @return boolean |
| 513 | + * @access public |
| 514 | + */ |
| 515 | + function isBrokenTitle(){ |
| 516 | + if( is_null($this->mTitle) ) |
| 517 | + return true; |
| 518 | + return false; |
| 519 | + } |
| 520 | + |
| 521 | + /** |
| 522 | + * Check if target page is missing, happens when index is out of date |
| 523 | + * |
| 524 | + * @return boolean |
| 525 | + * @access public |
| 526 | + */ |
| 527 | + function isMissingRevision(){ |
| 528 | + if( !$this->mRevision ) |
| 529 | + return true; |
| 530 | + return false; |
| 531 | + } |
| 532 | + |
| 533 | + /** |
| 534 | + * @return Title |
| 535 | + * @access public |
| 536 | + */ |
| 537 | + function getTitle() { |
| 538 | + return $this->mTitle; |
| 539 | + } |
| 540 | + |
| 541 | + /** |
| 542 | + * @return double or null if not supported |
| 543 | + */ |
| 544 | + function getScore() { |
| 545 | + return null; |
| 546 | + } |
| 547 | + |
| 548 | + /** |
| 549 | + * Lazy initialization of article text from DB |
| 550 | + */ |
| 551 | + protected function initText(){ |
| 552 | + if( !isset($this->mText) ){ |
| 553 | + $this->mText = $this->mRevision->getText(); |
| 554 | + } |
| 555 | + } |
| 556 | + |
| 557 | + /** |
| 558 | + * @param array $terms terms to highlight |
| 559 | + * @return string highlighted text snippet, null (and not '') if not supported |
| 560 | + */ |
| 561 | + function getTextSnippet($terms){ |
| 562 | + global $wgUser, $wgAdvancedSearchHighlighting; |
| 563 | + $this->initText(); |
| 564 | + list($contextlines,$contextchars) = SearchEngine::userHighlightPrefs($wgUser); |
| 565 | + $h = new SearchHighlighter(); |
| 566 | + if( $wgAdvancedSearchHighlighting ) |
| 567 | + return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars ); |
| 568 | + else |
| 569 | + return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars ); |
| 570 | + } |
| 571 | + |
| 572 | + /** |
| 573 | + * @param array $terms terms to highlight |
| 574 | + * @return string highlighted title, '' if not supported |
| 575 | + */ |
| 576 | + function getTitleSnippet($terms){ |
| 577 | + return ''; |
| 578 | + } |
| 579 | + |
| 580 | + /** |
| 581 | + * @param array $terms terms to highlight |
| 582 | + * @return string highlighted redirect name (redirect to this page), '' if none or not supported |
| 583 | + */ |
| 584 | + function getRedirectSnippet($terms){ |
| 585 | + return ''; |
| 586 | + } |
| 587 | + |
| 588 | + /** |
| 589 | + * @return Title object for the redirect to this page, null if none or not supported |
| 590 | + */ |
| 591 | + function getRedirectTitle(){ |
| 592 | + return null; |
| 593 | + } |
| 594 | + |
| 595 | + /** |
| 596 | + * @return string highlighted relevant section name, null if none or not supported |
| 597 | + */ |
| 598 | + function getSectionSnippet(){ |
| 599 | + return ''; |
| 600 | + } |
| 601 | + |
| 602 | + /** |
| 603 | + * @return Title object (pagename+fragment) for the section, null if none or not supported |
| 604 | + */ |
| 605 | + function getSectionTitle(){ |
| 606 | + return null; |
| 607 | + } |
| 608 | + |
| 609 | + /** |
| 610 | + * @return string timestamp |
| 611 | + */ |
| 612 | + function getTimestamp(){ |
| 613 | + return $this->mRevision->getTimestamp(); |
| 614 | + } |
| 615 | + |
| 616 | + /** |
| 617 | + * @return int number of words |
| 618 | + */ |
| 619 | + function getWordCount(){ |
| 620 | + $this->initText(); |
| 621 | + return str_word_count( $this->mText ); |
| 622 | + } |
| 623 | + |
| 624 | + /** |
| 625 | + * @return int size in bytes |
| 626 | + */ |
| 627 | + function getByteSize(){ |
| 628 | + $this->initText(); |
| 629 | + return strlen( $this->mText ); |
| 630 | + } |
| 631 | + |
| 632 | + /** |
| 633 | + * @return boolean if hit has related articles |
| 634 | + */ |
| 635 | + function hasRelated(){ |
| 636 | + return false; |
| 637 | + } |
| 638 | + |
| 639 | + /** |
| 640 | + * @return interwiki prefix of the title (return iw even if title is broken) |
| 641 | + */ |
| 642 | + function getInterwikiPrefix(){ |
| 643 | + return ''; |
| 644 | + } |
| 645 | +} |
| 646 | + |
| 647 | +/** |
| 648 | + * Highlight bits of wikitext |
| 649 | + * |
| 650 | + * @ingroup Search |
| 651 | + */ |
| 652 | +class SearchHighlighter { |
| 653 | + var $mCleanWikitext = true; |
| 654 | + |
| 655 | + function SearchHighlighter($cleanupWikitext = true){ |
| 656 | + $this->mCleanWikitext = $cleanupWikitext; |
| 657 | + } |
| 658 | + |
| 659 | + /** |
| 660 | + * Default implementation of wikitext highlighting |
| 661 | + * |
| 662 | + * @param string $text |
| 663 | + * @param array $terms Terms to highlight (unescaped) |
| 664 | + * @param int $contextlines |
| 665 | + * @param int $contextchars |
| 666 | + * @return string |
| 667 | + */ |
| 668 | + public function highlightText( $text, $terms, $contextlines, $contextchars ) { |
| 669 | + global $wgLang, $wgContLang; |
| 670 | + global $wgSearchHighlightBoundaries; |
| 671 | + $fname = __METHOD__; |
| 672 | + |
| 673 | + if($text == '') |
| 674 | + return ''; |
| 675 | + |
| 676 | + // spli text into text + templates/links/tables |
| 677 | + $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; |
| 678 | + // first capture group is for detecting nested templates/links/tables/references |
| 679 | + $endPatterns = array( |
| 680 | + 1 => '/(\{\{)|(\}\})/', // template |
| 681 | + 2 => '/(\[\[)|(\]\])/', // image |
| 682 | + 3 => "/(\n\\{\\|)|(\n\\|\\})/"); // table |
| 683 | + |
| 684 | + // FIXME: this should prolly be a hook or something |
| 685 | + if(function_exists('wfCite')){ |
| 686 | + $spat .= '|(<ref>)'; // references via cite extension |
| 687 | + $endPatterns[4] = '/(<ref>)|(<\/ref>)/'; |
| 688 | + } |
| 689 | + $spat .= '/'; |
| 690 | + $textExt = array(); // text extracts |
| 691 | + $otherExt = array(); // other extracts |
| 692 | + wfProfileIn( "$fname-split" ); |
| 693 | + $start = 0; |
| 694 | + $textLen = strlen($text); |
| 695 | + $count = 0; // sequence number to maintain ordering |
| 696 | + while( $start < $textLen ){ |
| 697 | + // find start of template/image/table |
| 698 | + if( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ){ |
| 699 | + $epat = ''; |
| 700 | + foreach($matches as $key => $val){ |
| 701 | + if($key > 0 && $val[1] != -1){ |
| 702 | + if($key == 2){ |
| 703 | + // see if this is an image link |
| 704 | + $ns = substr($val[0],2,-1); |
| 705 | + if( $wgContLang->getNsIndex($ns) != NS_IMAGE ) |
| 706 | + break; |
| 707 | + |
| 708 | + } |
| 709 | + $epat = $endPatterns[$key]; |
| 710 | + $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) ); |
| 711 | + $start = $val[1]; |
| 712 | + break; |
| 713 | + } |
| 714 | + } |
| 715 | + if( $epat ){ |
| 716 | + // find end (and detect any nested elements) |
| 717 | + $level = 0; |
| 718 | + $offset = $start + 1; |
| 719 | + $found = false; |
| 720 | + while( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ){ |
| 721 | + if( array_key_exists(2,$endMatches) ){ |
| 722 | + // found end |
| 723 | + if($level == 0){ |
| 724 | + $len = strlen($endMatches[2][0]); |
| 725 | + $off = $endMatches[2][1]; |
| 726 | + $this->splitAndAdd( $otherExt, $count, |
| 727 | + substr( $text, $start, $off + $len - $start ) ); |
| 728 | + $start = $off + $len; |
| 729 | + $found = true; |
| 730 | + break; |
| 731 | + } else{ |
| 732 | + // end of nested element |
| 733 | + $level -= 1; |
| 734 | + } |
| 735 | + } else{ |
| 736 | + // nested |
| 737 | + $level += 1; |
| 738 | + } |
| 739 | + $offset = $endMatches[0][1] + strlen($endMatches[0][0]); |
| 740 | + } |
| 741 | + if( ! $found ){ |
| 742 | + // couldn't find appropriate closing tag, skip |
| 743 | + $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen($matches[0][0]) ) ); |
| 744 | + $start += strlen($matches[0][0]); |
| 745 | + } |
| 746 | + continue; |
| 747 | + } |
| 748 | + } |
| 749 | + // else: add as text extract |
| 750 | + $this->splitAndAdd( $textExt, $count, substr($text,$start) ); |
| 751 | + break; |
| 752 | + } |
| 753 | + |
| 754 | + $all = $textExt + $otherExt; // these have disjunct key sets |
| 755 | + |
| 756 | + wfProfileOut( "$fname-split" ); |
| 757 | + |
| 758 | + // prepare regexps |
| 759 | + foreach( $terms as $index => $term ) { |
| 760 | + $terms[$index] = preg_quote( $term, '/' ); |
| 761 | + // manually do upper/lowercase stuff for utf-8 since PHP won't do it |
| 762 | + if(preg_match('/[\x80-\xff]/', $term) ){ |
| 763 | + $terms[$index] = preg_replace_callback('/./us',array($this,'caseCallback'),$terms[$index]); |
| 764 | + } |
| 765 | + |
| 766 | + |
| 767 | + } |
| 768 | + $anyterm = implode( '|', $terms ); |
| 769 | + $phrase = implode("$wgSearchHighlightBoundaries+", $terms ); |
| 770 | + |
| 771 | + // FIXME: a hack to scale contextchars, a correct solution |
| 772 | + // would be to have contextchars actually be char and not byte |
| 773 | + // length, and do proper utf-8 substrings and lengths everywhere, |
| 774 | + // but PHP is making that very hard and unclean to implement :( |
| 775 | + $scale = strlen($anyterm) / mb_strlen($anyterm); |
| 776 | + $contextchars = intval( $contextchars * $scale ); |
| 777 | + |
| 778 | + $patPre = "(^|$wgSearchHighlightBoundaries)"; |
| 779 | + $patPost = "($wgSearchHighlightBoundaries|$)"; |
| 780 | + |
| 781 | + $pat1 = "/(".$phrase.")/ui"; |
| 782 | + $pat2 = "/$patPre(".$anyterm.")$patPost/ui"; |
| 783 | + |
| 784 | + wfProfileIn( "$fname-extract" ); |
| 785 | + |
| 786 | + $left = $contextlines; |
| 787 | + |
| 788 | + $snippets = array(); |
| 789 | + $offsets = array(); |
| 790 | + |
| 791 | + // show beginning only if it contains all words |
| 792 | + $first = 0; |
| 793 | + $firstText = ''; |
| 794 | + foreach($textExt as $index => $line){ |
| 795 | + if(strlen($line)>0 && $line[0] != ';' && $line[0] != ':'){ |
| 796 | + $firstText = $this->extract( $line, 0, $contextchars * $contextlines ); |
| 797 | + $first = $index; |
| 798 | + break; |
| 799 | + } |
| 800 | + } |
| 801 | + if( $firstText ){ |
| 802 | + $succ = true; |
| 803 | + // check if first text contains all terms |
| 804 | + foreach($terms as $term){ |
| 805 | + if( ! preg_match("/$patPre".$term."$patPost/ui", $firstText) ){ |
| 806 | + $succ = false; |
| 807 | + break; |
| 808 | + } |
| 809 | + } |
| 810 | + if( $succ ){ |
| 811 | + $snippets[$first] = $firstText; |
| 812 | + $offsets[$first] = 0; |
| 813 | + } |
| 814 | + } |
| 815 | + if( ! $snippets ) { |
| 816 | + // match whole query on text |
| 817 | + $this->process($pat1, $textExt, $left, $contextchars, $snippets, $offsets); |
| 818 | + // match whole query on templates/tables/images |
| 819 | + $this->process($pat1, $otherExt, $left, $contextchars, $snippets, $offsets); |
| 820 | + // match any words on text |
| 821 | + $this->process($pat2, $textExt, $left, $contextchars, $snippets, $offsets); |
| 822 | + // match any words on templates/tables/images |
| 823 | + $this->process($pat2, $otherExt, $left, $contextchars, $snippets, $offsets); |
| 824 | + |
| 825 | + ksort($snippets); |
| 826 | + } |
| 827 | + |
| 828 | + // add extra chars to each snippet to make snippets constant size |
| 829 | + $extended = array(); |
| 830 | + if( count( $snippets ) == 0){ |
| 831 | + // couldn't find the target words, just show beginning of article |
| 832 | + $targetchars = $contextchars * $contextlines; |
| 833 | + $snippets[$first] = ''; |
| 834 | + $offsets[$first] = 0; |
| 835 | + } else{ |
| 836 | + // if begin of the article contains the whole phrase, show only that !! |
| 837 | + if( array_key_exists($first,$snippets) && preg_match($pat1,$snippets[$first]) |
| 838 | + && $offsets[$first] < $contextchars * 2 ){ |
| 839 | + $snippets = array ($first => $snippets[$first]); |
| 840 | + } |
| 841 | + |
| 842 | + // calc by how much to extend existing snippets |
| 843 | + $targetchars = intval( ($contextchars * $contextlines) / count ( $snippets ) ); |
| 844 | + } |
| 845 | + |
| 846 | + foreach($snippets as $index => $line){ |
| 847 | + $extended[$index] = $line; |
| 848 | + $len = strlen($line); |
| 849 | + if( $len < $targetchars - 20 ){ |
| 850 | + // complete this line |
| 851 | + if($len < strlen( $all[$index] )){ |
| 852 | + $extended[$index] = $this->extract( $all[$index], $offsets[$index], $offsets[$index]+$targetchars, $offsets[$index]); |
| 853 | + $len = strlen( $extended[$index] ); |
| 854 | + } |
| 855 | + |
| 856 | + // add more lines |
| 857 | + $add = $index + 1; |
| 858 | + while( $len < $targetchars - 20 |
| 859 | + && array_key_exists($add,$all) |
| 860 | + && !array_key_exists($add,$snippets) ){ |
| 861 | + $offsets[$add] = 0; |
| 862 | + $tt = "\n".$this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] ); |
| 863 | + $extended[$add] = $tt; |
| 864 | + $len += strlen( $tt ); |
| 865 | + $add++; |
| 866 | + } |
| 867 | + } |
| 868 | + } |
| 869 | + |
| 870 | + //$snippets = array_map('htmlspecialchars', $extended); |
| 871 | + $snippets = $extended; |
| 872 | + $last = -1; |
| 873 | + $extract = ''; |
| 874 | + foreach($snippets as $index => $line){ |
| 875 | + if($last == -1) |
| 876 | + $extract .= $line; // first line |
| 877 | + elseif($last+1 == $index && $offsets[$last]+strlen($snippets[$last]) >= strlen($all[$last])) |
| 878 | + $extract .= " ".$line; // continous lines |
| 879 | + else |
| 880 | + $extract .= '<b> ... </b>' . $line; |
| 881 | + |
| 882 | + $last = $index; |
| 883 | + } |
| 884 | + if( $extract ) |
| 885 | + $extract .= '<b> ... </b>'; |
| 886 | + |
| 887 | + $processed = array(); |
| 888 | + foreach($terms as $term){ |
| 889 | + if( ! isset($processed[$term]) ){ |
| 890 | + $pat3 = "/$patPre(".$term.")$patPost/ui"; // highlight word |
| 891 | + $extract = preg_replace( $pat3, |
| 892 | + "\\1<span class='searchmatch'>\\2</span>\\3", $extract ); |
| 893 | + $processed[$term] = true; |
| 894 | + } |
| 895 | + } |
| 896 | + |
| 897 | + wfProfileOut( "$fname-extract" ); |
| 898 | + |
| 899 | + return $extract; |
| 900 | + } |
| 901 | + |
| 902 | + /** |
| 903 | + * Split text into lines and add it to extracts array |
| 904 | + * |
| 905 | + * @param array $extracts index -> $line |
| 906 | + * @param int $count |
| 907 | + * @param string $text |
| 908 | + */ |
| 909 | + function splitAndAdd(&$extracts, &$count, $text){ |
| 910 | + $split = explode( "\n", $this->mCleanWikitext? $this->removeWiki($text) : $text ); |
| 911 | + foreach($split as $line){ |
| 912 | + $tt = trim($line); |
| 913 | + if( $tt ) |
| 914 | + $extracts[$count++] = $tt; |
| 915 | + } |
| 916 | + } |
| 917 | + |
| 918 | + /** |
| 919 | + * Do manual case conversion for non-ascii chars |
| 920 | + * |
| 921 | + * @param unknown_type $matches |
| 922 | + */ |
| 923 | + function caseCallback($matches){ |
| 924 | + global $wgContLang; |
| 925 | + if( strlen($matches[0]) > 1 ){ |
| 926 | + return '['.$wgContLang->lc($matches[0]).$wgContLang->uc($matches[0]).']'; |
| 927 | + } else |
| 928 | + return $matches[0]; |
| 929 | + } |
| 930 | + |
| 931 | + /** |
| 932 | + * Extract part of the text from start to end, but by |
| 933 | + * not chopping up words |
| 934 | + * @param string $text |
| 935 | + * @param int $start |
| 936 | + * @param int $end |
| 937 | + * @param int $posStart (out) actual start position |
| 938 | + * @param int $posEnd (out) actual end position |
| 939 | + * @return string |
| 940 | + */ |
| 941 | + function extract($text, $start, $end, &$posStart = null, &$posEnd = null ){ |
| 942 | + global $wgContLang; |
| 943 | + |
| 944 | + if( $start != 0) |
| 945 | + $start = $this->position( $text, $start, 1 ); |
| 946 | + if( $end >= strlen($text) ) |
| 947 | + $end = strlen($text); |
| 948 | + else |
| 949 | + $end = $this->position( $text, $end ); |
| 950 | + |
| 951 | + if(!is_null($posStart)) |
| 952 | + $posStart = $start; |
| 953 | + if(!is_null($posEnd)) |
| 954 | + $posEnd = $end; |
| 955 | + |
| 956 | + if($end > $start) |
| 957 | + return substr($text, $start, $end-$start); |
| 958 | + else |
| 959 | + return ''; |
| 960 | + } |
| 961 | + |
| 962 | + /** |
| 963 | + * Find a nonletter near a point (index) in the text |
| 964 | + * |
| 965 | + * @param string $text |
| 966 | + * @param int $point |
| 967 | + * @param int $offset to found index |
| 968 | + * @return int nearest nonletter index, or beginning of utf8 char if none |
| 969 | + */ |
| 970 | + function position($text, $point, $offset=0 ){ |
| 971 | + $tolerance = 10; |
| 972 | + $s = max( 0, $point - $tolerance ); |
| 973 | + $l = min( strlen($text), $point + $tolerance ) - $s; |
| 974 | + $m = array(); |
| 975 | + if( preg_match('/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr($text,$s,$l), $m, PREG_OFFSET_CAPTURE ) ){ |
| 976 | + return $m[0][1] + $s + $offset; |
| 977 | + } else{ |
| 978 | + // check if point is on a valid first UTF8 char |
| 979 | + $char = ord( $text[$point] ); |
| 980 | + while( $char >= 0x80 && $char < 0xc0 ) { |
| 981 | + // skip trailing bytes |
| 982 | + $point++; |
| 983 | + if($point >= strlen($text)) |
| 984 | + return strlen($text); |
| 985 | + $char = ord( $text[$point] ); |
| 986 | + } |
| 987 | + return $point; |
| 988 | + |
| 989 | + } |
| 990 | + } |
| 991 | + |
| 992 | + /** |
| 993 | + * Search extracts for a pattern, and return snippets |
| 994 | + * |
| 995 | + * @param string $pattern regexp for matching lines |
| 996 | + * @param array $extracts extracts to search |
| 997 | + * @param int $linesleft number of extracts to make |
| 998 | + * @param int $contextchars length of snippet |
| 999 | + * @param array $out map for highlighted snippets |
| 1000 | + * @param array $offsets map of starting points of snippets |
| 1001 | + * @protected |
| 1002 | + */ |
| 1003 | + function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ){ |
| 1004 | + if($linesleft == 0) |
| 1005 | + return; // nothing to do |
| 1006 | + foreach($extracts as $index => $line){ |
| 1007 | + if( array_key_exists($index,$out) ) |
| 1008 | + continue; // this line already highlighted |
| 1009 | + |
| 1010 | + $m = array(); |
| 1011 | + if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) |
| 1012 | + continue; |
| 1013 | + |
| 1014 | + $offset = $m[0][1]; |
| 1015 | + $len = strlen($m[0][0]); |
| 1016 | + if($offset + $len < $contextchars) |
| 1017 | + $begin = 0; |
| 1018 | + elseif( $len > $contextchars) |
| 1019 | + $begin = $offset; |
| 1020 | + else |
| 1021 | + $begin = $offset + intval( ($len - $contextchars) / 2 ); |
| 1022 | + |
| 1023 | + $end = $begin + $contextchars; |
| 1024 | + |
| 1025 | + $posBegin = $begin; |
| 1026 | + // basic snippet from this line |
| 1027 | + $out[$index] = $this->extract($line,$begin,$end,$posBegin); |
| 1028 | + $offsets[$index] = $posBegin; |
| 1029 | + $linesleft--; |
| 1030 | + if($linesleft == 0) |
| 1031 | + return; |
| 1032 | + } |
| 1033 | + } |
| 1034 | + |
| 1035 | + /** |
| 1036 | + * Basic wikitext removal |
| 1037 | + * @protected |
| 1038 | + */ |
| 1039 | + function removeWiki($text) { |
| 1040 | + $fname = __METHOD__; |
| 1041 | + wfProfileIn( $fname ); |
| 1042 | + |
| 1043 | + //$text = preg_replace("/'{2,5}/", "", $text); |
| 1044 | + //$text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text); |
| 1045 | + //$text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text); |
| 1046 | + //$text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text); |
| 1047 | + //$text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text); |
| 1048 | + //$text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text); |
| 1049 | + $text = preg_replace("/\\{\\{([^|]+?)\\}\\}/", "", $text); |
| 1050 | + $text = preg_replace("/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text); |
| 1051 | + $text = preg_replace("/\\[\\[([^|]+?)\\]\\]/", "\\1", $text); |
| 1052 | + $text = preg_replace_callback("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", array($this,'linkReplace'), $text); |
| 1053 | + //$text = preg_replace("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", "\\2", $text); |
| 1054 | + $text = preg_replace("/<\/?[^>]+>/", "", $text); |
| 1055 | + $text = preg_replace("/'''''/", "", $text); |
| 1056 | + $text = preg_replace("/('''|<\/?[iIuUbB]>)/", "", $text); |
| 1057 | + $text = preg_replace("/''/", "", $text); |
| 1058 | + |
| 1059 | + wfProfileOut( $fname ); |
| 1060 | + return $text; |
| 1061 | + } |
| 1062 | + |
| 1063 | + /** |
| 1064 | + * callback to replace [[target|caption]] kind of links, if |
| 1065 | + * the target is category or image, leave it |
| 1066 | + * |
| 1067 | + * @param array $matches |
| 1068 | + */ |
| 1069 | + function linkReplace($matches){ |
| 1070 | + $colon = strpos( $matches[1], ':' ); |
| 1071 | + if( $colon === false ) |
| 1072 | + return $matches[2]; // replace with caption |
| 1073 | + global $wgContLang; |
| 1074 | + $ns = substr( $matches[1], 0, $colon ); |
| 1075 | + $index = $wgContLang->getNsIndex($ns); |
| 1076 | + if( $index !== false && ($index == NS_IMAGE || $index == NS_CATEGORY) ) |
| 1077 | + return $matches[0]; // return the whole thing |
| 1078 | + else |
| 1079 | + return $matches[2]; |
| 1080 | + |
| 1081 | + } |
| 1082 | + |
| 1083 | + /** |
| 1084 | + * Simple & fast snippet extraction, but gives completely unrelevant |
| 1085 | + * snippets |
| 1086 | + * |
| 1087 | + * @param string $text |
| 1088 | + * @param array $terms |
| 1089 | + * @param int $contextlines |
| 1090 | + * @param int $contextchars |
| 1091 | + * @return string |
| 1092 | + */ |
| 1093 | + public function highlightSimple( $text, $terms, $contextlines, $contextchars ) { |
| 1094 | + global $wgLang, $wgContLang; |
| 1095 | + $fname = __METHOD__; |
| 1096 | + |
| 1097 | + $lines = explode( "\n", $text ); |
| 1098 | + |
| 1099 | + $terms = implode( '|', $terms ); |
| 1100 | + $terms = str_replace( '/', "\\/", $terms); |
| 1101 | + $max = intval( $contextchars ) + 1; |
| 1102 | + $pat1 = "/(.*)($terms)(.{0,$max})/i"; |
| 1103 | + |
| 1104 | + $lineno = 0; |
| 1105 | + |
| 1106 | + $extract = ""; |
| 1107 | + wfProfileIn( "$fname-extract" ); |
| 1108 | + foreach ( $lines as $line ) { |
| 1109 | + if ( 0 == $contextlines ) { |
| 1110 | + break; |
| 1111 | + } |
| 1112 | + ++$lineno; |
| 1113 | + $m = array(); |
| 1114 | + if ( ! preg_match( $pat1, $line, $m ) ) { |
| 1115 | + continue; |
| 1116 | + } |
| 1117 | + --$contextlines; |
| 1118 | + $pre = $wgContLang->truncate( $m[1], -$contextchars, ' ... ' ); |
| 1119 | + |
| 1120 | + if ( count( $m ) < 3 ) { |
| 1121 | + $post = ''; |
| 1122 | + } else { |
| 1123 | + $post = $wgContLang->truncate( $m[3], $contextchars, ' ... ' ); |
| 1124 | + } |
| 1125 | + |
| 1126 | + $found = $m[2]; |
| 1127 | + |
| 1128 | + $line = htmlspecialchars( $pre . $found . $post ); |
| 1129 | + $pat2 = '/(' . $terms . ")/i"; |
| 1130 | + $line = preg_replace( $pat2, |
| 1131 | + "<span class='searchmatch'>\\1</span>", $line ); |
| 1132 | + |
| 1133 | + $extract .= "${line}\n"; |
| 1134 | + } |
| 1135 | + wfProfileOut( "$fname-extract" ); |
| 1136 | + |
| 1137 | + return $extract; |
| 1138 | + } |
| 1139 | + |
| 1140 | +} |
| 1141 | + |
| 1142 | +/** |
| 1143 | + * @ingroup Search |
| 1144 | + */ |
| 1145 | +class SearchEngineDummy { |
| 1146 | + function search( $term ) { |
| 1147 | + return null; |
| 1148 | + } |
| 1149 | + function setLimitOffset($l, $o) {} |
| 1150 | + function legalSearchChars() {} |
| 1151 | + function update() {} |
| 1152 | + function setnamespaces() {} |
| 1153 | + function searchtitle() {} |
| 1154 | + function searchtext() {} |
| 1155 | +} |
Property changes on: trunk/phase3/includes/SearchEngine.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 1156 | + native |
Added: svn:keywords |
2 | 1157 | + Author Date Id Revision |
Index: trunk/phase3/includes/SearchPostgres.php |
— | — | @@ -0,0 +1,255 @@ |
| 2 | +<?php |
| 3 | +# Copyright (C) 2006-2007 Greg Sabino Mullane <greg@turnstep.com> |
| 4 | +# http://www.mediawiki.org/ |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | +# http://www.gnu.org/copyleft/gpl.html |
| 20 | + |
| 21 | +/** |
| 22 | + * @file |
| 23 | + * @ingroup Search |
| 24 | + */ |
| 25 | + |
| 26 | +/** |
| 27 | + * Search engine hook base class for Postgres |
| 28 | + * @ingroup Search |
| 29 | + */ |
| 30 | +class SearchPostgres extends SearchEngine { |
| 31 | + |
| 32 | + function SearchPostgres( $db ) { |
| 33 | + $this->db = $db; |
| 34 | + } |
| 35 | + |
| 36 | + /** |
| 37 | + * Perform a full text search query via tsearch2 and return a result set. |
| 38 | + * Currently searches a page's current title (page.page_title) and |
| 39 | + * latest revision article text (pagecontent.old_text) |
| 40 | + * |
| 41 | + * @param string $term - Raw search term |
| 42 | + * @return PostgresSearchResultSet |
| 43 | + * @access public |
| 44 | + */ |
| 45 | + function searchTitle( $term ) { |
| 46 | + $q = $this->searchQuery( $term , 'titlevector', 'page_title' ); |
| 47 | + $olderror = error_reporting(E_ERROR); |
| 48 | + $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); |
| 49 | + error_reporting($olderror); |
| 50 | + if (!$resultSet) { |
| 51 | + // Needed for "Query requires full scan, GIN doesn't support it" |
| 52 | + return new SearchResultTooMany(); |
| 53 | + } |
| 54 | + return new PostgresSearchResultSet( $resultSet, $this->searchTerms ); |
| 55 | + } |
| 56 | + function searchText( $term ) { |
| 57 | + $q = $this->searchQuery( $term, 'textvector', 'old_text' ); |
| 58 | + $olderror = error_reporting(E_ERROR); |
| 59 | + $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); |
| 60 | + error_reporting($olderror); |
| 61 | + if (!$resultSet) { |
| 62 | + return new SearchResultTooMany(); |
| 63 | + } |
| 64 | + return new PostgresSearchResultSet( $resultSet, $this->searchTerms ); |
| 65 | + } |
| 66 | + |
| 67 | + |
| 68 | + /* |
| 69 | + * Transform the user's search string into a better form for tsearch2 |
| 70 | + */ |
| 71 | + function parseQuery( $term ) { |
| 72 | + |
| 73 | + wfDebug( "parseQuery received: $term" ); |
| 74 | + |
| 75 | + ## No backslashes allowed |
| 76 | + $term = preg_replace('/\\\/', '', $term); |
| 77 | + |
| 78 | + ## Collapse parens into nearby words: |
| 79 | + $term = preg_replace('/\s*\(\s*/', ' (', $term); |
| 80 | + $term = preg_replace('/\s*\)\s*/', ') ', $term); |
| 81 | + |
| 82 | + ## Treat colons as word separators: |
| 83 | + $term = preg_replace('/:/', ' ', $term); |
| 84 | + |
| 85 | + $searchstring = ''; |
| 86 | + $m = array(); |
| 87 | + if( preg_match_all('/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) { |
| 88 | + foreach( $m as $terms ) { |
| 89 | + if (strlen($terms[1])) { |
| 90 | + $searchstring .= ' & !'; |
| 91 | + } |
| 92 | + if (strtolower($terms[2]) === 'and') { |
| 93 | + $searchstring .= ' & '; |
| 94 | + } |
| 95 | + else if (strtolower($terms[2]) === 'or' or $terms[2] === '|') { |
| 96 | + $searchstring .= ' | '; |
| 97 | + } |
| 98 | + else if (strtolower($terms[2]) === 'not') { |
| 99 | + $searchstring .= ' & !'; |
| 100 | + } |
| 101 | + else { |
| 102 | + $searchstring .= " & $terms[2]"; |
| 103 | + } |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + ## Strip out leading junk |
| 108 | + $searchstring = preg_replace('/^[\s\&\|]+/', '', $searchstring); |
| 109 | + |
| 110 | + ## Remove any doubled-up operators |
| 111 | + $searchstring = preg_replace('/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring); |
| 112 | + |
| 113 | + ## Remove any non-spaced operators (e.g. "Zounds!") |
| 114 | + $searchstring = preg_replace('/([^ ])[\!\&\|]/', "$1", $searchstring); |
| 115 | + |
| 116 | + ## Remove any trailing whitespace or operators |
| 117 | + $searchstring = preg_replace('/[\s\!\&\|]+$/', '', $searchstring); |
| 118 | + |
| 119 | + ## Remove unnecessary quotes around everything |
| 120 | + $searchstring = preg_replace('/^[\'"](.*)[\'"]$/', "$1", $searchstring); |
| 121 | + |
| 122 | + ## Quote the whole thing |
| 123 | + $searchstring = $this->db->addQuotes($searchstring); |
| 124 | + |
| 125 | + wfDebug( "parseQuery returned: $searchstring" ); |
| 126 | + |
| 127 | + return $searchstring; |
| 128 | + |
| 129 | + } |
| 130 | + |
| 131 | + /** |
| 132 | + * Construct the full SQL query to do the search. |
| 133 | + * @param string $filteredTerm |
| 134 | + * @param string $fulltext |
| 135 | + * @private |
| 136 | + */ |
| 137 | + function searchQuery( $term, $fulltext, $colname ) { |
| 138 | + global $wgDBversion; |
| 139 | + |
| 140 | + if ( !isset( $wgDBversion ) ) { |
| 141 | + $this->db->getServerVersion(); |
| 142 | + $wgDBversion = $this->db->numeric_version; |
| 143 | + } |
| 144 | + $prefix = $wgDBversion < 8.3 ? "'default'," : ''; |
| 145 | + |
| 146 | + $searchstring = $this->parseQuery( $term ); |
| 147 | + |
| 148 | + ## We need a separate query here so gin does not complain about empty searches |
| 149 | + $SQL = "SELECT to_tsquery($prefix $searchstring)"; |
| 150 | + $res = $this->db->doQuery($SQL); |
| 151 | + if (!$res) { |
| 152 | + ## TODO: Better output (example to catch: one 'two) |
| 153 | + die ("Sorry, that was not a valid search string. Please go back and try again"); |
| 154 | + } |
| 155 | + $top = pg_fetch_result($res,0,0); |
| 156 | + |
| 157 | + if ($top === "") { ## e.g. if only stopwords are used XXX return something better |
| 158 | + $query = "SELECT page_id, page_namespace, page_title, 0 AS score ". |
| 159 | + "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . |
| 160 | + "AND r.rev_text_id = c.old_id AND 1=0"; |
| 161 | + } |
| 162 | + else { |
| 163 | + $m = array(); |
| 164 | + if( preg_match_all("/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) { |
| 165 | + foreach( $m as $terms ) { |
| 166 | + $this->searchTerms[$terms[1]] = $terms[1]; |
| 167 | + } |
| 168 | + } |
| 169 | + |
| 170 | + $rankscore = $wgDBversion > 8.2 ? 5 : 1; |
| 171 | + $rank = $wgDBversion < 8.3 ? 'rank' : 'ts_rank'; |
| 172 | + $query = "SELECT page_id, page_namespace, page_title, ". |
| 173 | + "$rank($fulltext, to_tsquery($prefix $searchstring), $rankscore) AS score ". |
| 174 | + "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . |
| 175 | + "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($prefix $searchstring)"; |
| 176 | + } |
| 177 | + |
| 178 | + ## Redirects |
| 179 | + if (! $this->showRedirects) |
| 180 | + $query .= ' AND page_is_redirect = 0'; |
| 181 | + |
| 182 | + ## Namespaces - defaults to 0 |
| 183 | + if( !is_null($this->namespaces) ){ // null -> search all |
| 184 | + if ( count($this->namespaces) < 1) |
| 185 | + $query .= ' AND page_namespace = 0'; |
| 186 | + else { |
| 187 | + $namespaces = implode( ',', $this->namespaces ); |
| 188 | + $query .= " AND page_namespace IN ($namespaces)"; |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + $query .= " ORDER BY score DESC, page_id DESC"; |
| 193 | + |
| 194 | + $query .= $this->db->limitResult( '', $this->limit, $this->offset ); |
| 195 | + |
| 196 | + wfDebug( "searchQuery returned: $query" ); |
| 197 | + |
| 198 | + return $query; |
| 199 | + } |
| 200 | + |
| 201 | + ## Most of the work of these two functions are done automatically via triggers |
| 202 | + |
| 203 | + function update( $pageid, $title, $text ) { |
| 204 | + ## We don't want to index older revisions |
| 205 | + $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id = ". |
| 206 | + "(SELECT rev_text_id FROM revision WHERE rev_page = $pageid ". |
| 207 | + "ORDER BY rev_text_id DESC LIMIT 1 OFFSET 1)"; |
| 208 | + $this->db->doQuery($SQL); |
| 209 | + return true; |
| 210 | + } |
| 211 | + |
| 212 | + function updateTitle( $id, $title ) { |
| 213 | + return true; |
| 214 | + } |
| 215 | + |
| 216 | +} ## end of the SearchPostgres class |
| 217 | + |
| 218 | +/** |
| 219 | + * @ingroup Search |
| 220 | + */ |
| 221 | +class PostgresSearchResult extends SearchResult { |
| 222 | + function PostgresSearchResult( $row ) { |
| 223 | + $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
| 224 | + $this->score = $row->score; |
| 225 | + } |
| 226 | + function getScore() { |
| 227 | + return $this->score; |
| 228 | + } |
| 229 | +} |
| 230 | + |
| 231 | +/** |
| 232 | + * @ingroup Search |
| 233 | + */ |
| 234 | +class PostgresSearchResultSet extends SearchResultSet { |
| 235 | + function PostgresSearchResultSet( $resultSet, $terms ) { |
| 236 | + $this->mResultSet = $resultSet; |
| 237 | + $this->mTerms = $terms; |
| 238 | + } |
| 239 | + |
| 240 | + function termMatches() { |
| 241 | + return $this->mTerms; |
| 242 | + } |
| 243 | + |
| 244 | + function numRows() { |
| 245 | + return $this->mResultSet->numRows(); |
| 246 | + } |
| 247 | + |
| 248 | + function next() { |
| 249 | + $row = $this->mResultSet->fetchObject(); |
| 250 | + if( $row === false ) { |
| 251 | + return false; |
| 252 | + } else { |
| 253 | + return new PostgresSearchResult( $row ); |
| 254 | + } |
| 255 | + } |
| 256 | +} |
Property changes on: trunk/phase3/includes/SearchPostgres.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 257 | + native |
Index: trunk/phase3/includes/SearchUpdate.php |
— | — | @@ -0,0 +1,113 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * See deferred.txt |
| 5 | + * @ingroup Search |
| 6 | + */ |
| 7 | +class SearchUpdate { |
| 8 | + |
| 9 | + /* private */ var $mId = 0, $mNamespace, $mTitle, $mText; |
| 10 | + /* private */ var $mTitleWords; |
| 11 | + |
| 12 | + function SearchUpdate( $id, $title, $text = false ) { |
| 13 | + $nt = Title::newFromText( $title ); |
| 14 | + if( $nt ) { |
| 15 | + $this->mId = $id; |
| 16 | + $this->mText = $text; |
| 17 | + |
| 18 | + $this->mNamespace = $nt->getNamespace(); |
| 19 | + $this->mTitle = $nt->getText(); # Discard namespace |
| 20 | + |
| 21 | + $this->mTitleWords = $this->mTextWords = array(); |
| 22 | + } else { |
| 23 | + wfDebug( "SearchUpdate object created with invalid title '$title'\n" ); |
| 24 | + } |
| 25 | + } |
| 26 | + |
| 27 | + function doUpdate() { |
| 28 | + global $wgContLang, $wgDisableSearchUpdate; |
| 29 | + |
| 30 | + if( $wgDisableSearchUpdate || !$this->mId ) { |
| 31 | + return false; |
| 32 | + } |
| 33 | + $fname = 'SearchUpdate::doUpdate'; |
| 34 | + wfProfileIn( $fname ); |
| 35 | + |
| 36 | + $search = SearchEngine::create(); |
| 37 | + $lc = SearchEngine::legalSearchChars() . '&#;'; |
| 38 | + |
| 39 | + if( $this->mText === false ) { |
| 40 | + $search->updateTitle($this->mId, |
| 41 | + Title::indexTitle( $this->mNamespace, $this->mTitle )); |
| 42 | + wfProfileOut( $fname ); |
| 43 | + return; |
| 44 | + } |
| 45 | + |
| 46 | + # Language-specific strip/conversion |
| 47 | + $text = $wgContLang->stripForSearch( $this->mText ); |
| 48 | + |
| 49 | + wfProfileIn( $fname.'-regexps' ); |
| 50 | + $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/", |
| 51 | + ' ', strtolower( " " . $text /*$this->mText*/ . " " ) ); # Strip HTML markup |
| 52 | + $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD", |
| 53 | + "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings |
| 54 | + |
| 55 | + # Strip external URLs |
| 56 | + $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF"; |
| 57 | + $protos = "http|https|ftp|mailto|news|gopher"; |
| 58 | + $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/"; |
| 59 | + $text = preg_replace( $pat, "\\1 \\3", $text ); |
| 60 | + |
| 61 | + $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/"; |
| 62 | + $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/"; |
| 63 | + $text = preg_replace( $p1, "\\1 ", $text ); |
| 64 | + $text = preg_replace( $p2, "\\1 \\3 ", $text ); |
| 65 | + |
| 66 | + # Internal image links |
| 67 | + $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i"; |
| 68 | + $text = preg_replace( $pat2, " \\1 \\3", $text ); |
| 69 | + |
| 70 | + $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/", |
| 71 | + "\\1\\2 \\2\\3", $text ); # Handle [[game]]s |
| 72 | + |
| 73 | + # Strip all remaining non-search characters |
| 74 | + $text = preg_replace( "/[^{$lc}]+/", " ", $text ); |
| 75 | + |
| 76 | + # Handle 's, s' |
| 77 | + # |
| 78 | + # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text ); |
| 79 | + # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text ); |
| 80 | + # |
| 81 | + # These tail-anchored regexps are insanely slow. The worst case comes |
| 82 | + # when Japanese or Chinese text (ie, no word spacing) is written on |
| 83 | + # a wiki configured for Western UTF-8 mode. The Unicode characters are |
| 84 | + # expanded to hex codes and the "words" are very long paragraph-length |
| 85 | + # monstrosities. On a large page the above regexps may take over 20 |
| 86 | + # seconds *each* on a 1GHz-level processor. |
| 87 | + # |
| 88 | + # Following are reversed versions which are consistently fast |
| 89 | + # (about 3 milliseconds on 1GHz-level processor). |
| 90 | + # |
| 91 | + $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) ); |
| 92 | + $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) ); |
| 93 | + |
| 94 | + # Strip wiki '' and ''' |
| 95 | + $text = preg_replace( "/''[']*/", " ", $text ); |
| 96 | + wfProfileOut( "$fname-regexps" ); |
| 97 | + |
| 98 | + wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) ); |
| 99 | + |
| 100 | + # Perform the actual update |
| 101 | + $search->update($this->mId, Title::indexTitle( $this->mNamespace, $this->mTitle ), |
| 102 | + $text); |
| 103 | + |
| 104 | + wfProfileOut( $fname ); |
| 105 | + } |
| 106 | +} |
| 107 | + |
| 108 | +/** |
| 109 | + * Placeholder class |
| 110 | + * @ingroup Search |
| 111 | + */ |
| 112 | +class SearchUpdateMyISAM extends SearchUpdate { |
| 113 | + # Inherits everything |
| 114 | +} |
Property changes on: trunk/phase3/includes/SearchUpdate.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 115 | + native |
Added: svn:keywords |
2 | 116 | + Author Date Id Revision |
Index: trunk/phase3/includes/SearchOracle.php |
— | — | @@ -0,0 +1,240 @@ |
| 2 | +<?php |
| 3 | +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> |
| 4 | +# http://www.mediawiki.org/ |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | +# http://www.gnu.org/copyleft/gpl.html |
| 20 | + |
| 21 | +/** |
| 22 | + * @file |
| 23 | + * @ingroup Search |
| 24 | + */ |
| 25 | + |
| 26 | +/** |
| 27 | + * Search engine hook base class for Oracle (ConText). |
| 28 | + * @ingroup Search |
| 29 | + */ |
| 30 | +class SearchOracle extends SearchEngine { |
| 31 | + function __construct($db) { |
| 32 | + $this->db = $db; |
| 33 | + } |
| 34 | + |
| 35 | + /** |
| 36 | + * Perform a full text search query and return a result set. |
| 37 | + * |
| 38 | + * @param string $term - Raw search term |
| 39 | + * @return OracleSearchResultSet |
| 40 | + * @access public |
| 41 | + */ |
| 42 | + function searchText( $term ) { |
| 43 | + $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true))); |
| 44 | + return new OracleSearchResultSet($resultSet, $this->searchTerms); |
| 45 | + } |
| 46 | + |
| 47 | + /** |
| 48 | + * Perform a title-only search query and return a result set. |
| 49 | + * |
| 50 | + * @param string $term - Raw search term |
| 51 | + * @return ORacleSearchResultSet |
| 52 | + * @access public |
| 53 | + */ |
| 54 | + function searchTitle($term) { |
| 55 | + $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false))); |
| 56 | + return new MySQLSearchResultSet($resultSet, $this->searchTerms); |
| 57 | + } |
| 58 | + |
| 59 | + |
| 60 | + /** |
| 61 | + * Return a partial WHERE clause to exclude redirects, if so set |
| 62 | + * @return string |
| 63 | + * @private |
| 64 | + */ |
| 65 | + function queryRedirect() { |
| 66 | + if ($this->showRedirects) { |
| 67 | + return ''; |
| 68 | + } else { |
| 69 | + return 'AND page_is_redirect=0'; |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + /** |
| 74 | + * Return a partial WHERE clause to limit the search to the given namespaces |
| 75 | + * @return string |
| 76 | + * @private |
| 77 | + */ |
| 78 | + function queryNamespaces() { |
| 79 | + if( is_null($this->namespaces) ) |
| 80 | + return ''; |
| 81 | + $namespaces = implode(',', $this->namespaces); |
| 82 | + if ($namespaces == '') { |
| 83 | + $namespaces = '0'; |
| 84 | + } |
| 85 | + return 'AND page_namespace IN (' . $namespaces . ')'; |
| 86 | + } |
| 87 | + |
| 88 | + /** |
| 89 | + * Return a LIMIT clause to limit results on the query. |
| 90 | + * @return string |
| 91 | + * @private |
| 92 | + */ |
| 93 | + function queryLimit($sql) { |
| 94 | + return $this->db->limitResult($sql, $this->limit, $this->offset); |
| 95 | + } |
| 96 | + |
| 97 | + /** |
| 98 | + * Does not do anything for generic search engine |
| 99 | + * subclasses may define this though |
| 100 | + * @return string |
| 101 | + * @private |
| 102 | + */ |
| 103 | + function queryRanking($filteredTerm, $fulltext) { |
| 104 | + return ' ORDER BY score(1)'; |
| 105 | + } |
| 106 | + |
| 107 | + /** |
| 108 | + * Construct the full SQL query to do the search. |
| 109 | + * The guts shoulds be constructed in queryMain() |
| 110 | + * @param string $filteredTerm |
| 111 | + * @param bool $fulltext |
| 112 | + * @private |
| 113 | + */ |
| 114 | + function getQuery( $filteredTerm, $fulltext ) { |
| 115 | + return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' . |
| 116 | + $this->queryRedirect() . ' ' . |
| 117 | + $this->queryNamespaces() . ' ' . |
| 118 | + $this->queryRanking( $filteredTerm, $fulltext ) . ' '); |
| 119 | + } |
| 120 | + |
| 121 | + |
| 122 | + /** |
| 123 | + * Picks which field to index on, depending on what type of query. |
| 124 | + * @param bool $fulltext |
| 125 | + * @return string |
| 126 | + */ |
| 127 | + function getIndexField($fulltext) { |
| 128 | + return $fulltext ? 'si_text' : 'si_title'; |
| 129 | + } |
| 130 | + |
| 131 | + /** |
| 132 | + * Get the base part of the search query. |
| 133 | + * |
| 134 | + * @param string $filteredTerm |
| 135 | + * @param bool $fulltext |
| 136 | + * @return string |
| 137 | + * @private |
| 138 | + */ |
| 139 | + function queryMain( $filteredTerm, $fulltext ) { |
| 140 | + $match = $this->parseQuery($filteredTerm, $fulltext); |
| 141 | + $page = $this->db->tableName('page'); |
| 142 | + $searchindex = $this->db->tableName('searchindex'); |
| 143 | + return 'SELECT page_id, page_namespace, page_title ' . |
| 144 | + "FROM $page,$searchindex " . |
| 145 | + 'WHERE page_id=si_page AND ' . $match; |
| 146 | + } |
| 147 | + |
| 148 | + /** @todo document */ |
| 149 | + function parseQuery($filteredText, $fulltext) { |
| 150 | + global $wgContLang; |
| 151 | + $lc = SearchEngine::legalSearchChars(); |
| 152 | + $this->searchTerms = array(); |
| 153 | + |
| 154 | + # FIXME: This doesn't handle parenthetical expressions. |
| 155 | + $m = array(); |
| 156 | + $q = array(); |
| 157 | + |
| 158 | + if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', |
| 159 | + $filteredText, $m, PREG_SET_ORDER)) { |
| 160 | + foreach($m as $terms) { |
| 161 | + $q[] = $terms[1] . $wgContLang->stripForSearch($terms[2]); |
| 162 | + |
| 163 | + if (!empty($terms[3])) { |
| 164 | + $regexp = preg_quote( $terms[3], '/' ); |
| 165 | + if ($terms[4]) |
| 166 | + $regexp .= "[0-9A-Za-z_]+"; |
| 167 | + } else { |
| 168 | + $regexp = preg_quote(str_replace('"', '', $terms[2]), '/'); |
| 169 | + } |
| 170 | + $this->searchTerms[] = $regexp; |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + $searchon = $this->db->strencode(join(',', $q)); |
| 175 | + $field = $this->getIndexField($fulltext); |
| 176 | + return " CONTAINS($field, '$searchon', 1) > 0 "; |
| 177 | + } |
| 178 | + |
| 179 | + /** |
| 180 | + * Create or update the search index record for the given page. |
| 181 | + * Title and text should be pre-processed. |
| 182 | + * |
| 183 | + * @param int $id |
| 184 | + * @param string $title |
| 185 | + * @param string $text |
| 186 | + */ |
| 187 | + function update($id, $title, $text) { |
| 188 | + $dbw = wfGetDB(DB_MASTER); |
| 189 | + $dbw->replace('searchindex', |
| 190 | + array('si_page'), |
| 191 | + array( |
| 192 | + 'si_page' => $id, |
| 193 | + 'si_title' => $title, |
| 194 | + 'si_text' => $text |
| 195 | + ), 'SearchOracle::update' ); |
| 196 | + $dbw->query("CALL ctx_ddl.sync_index('si_text_idx')"); |
| 197 | + $dbw->query("CALL ctx_ddl.sync_index('si_title_idx')"); |
| 198 | + } |
| 199 | + |
| 200 | + /** |
| 201 | + * Update a search index record's title only. |
| 202 | + * Title should be pre-processed. |
| 203 | + * |
| 204 | + * @param int $id |
| 205 | + * @param string $title |
| 206 | + */ |
| 207 | + function updateTitle($id, $title) { |
| 208 | + $dbw = wfGetDB(DB_MASTER); |
| 209 | + |
| 210 | + $dbw->update('searchindex', |
| 211 | + array('si_title' => $title), |
| 212 | + array('si_page' => $id), |
| 213 | + 'SearchOracle::updateTitle', |
| 214 | + array()); |
| 215 | + } |
| 216 | +} |
| 217 | + |
| 218 | +/** |
| 219 | + * @ingroup Search |
| 220 | + */ |
| 221 | +class OracleSearchResultSet extends SearchResultSet { |
| 222 | + function __construct($resultSet, $terms) { |
| 223 | + $this->mResultSet = $resultSet; |
| 224 | + $this->mTerms = $terms; |
| 225 | + } |
| 226 | + |
| 227 | + function termMatches() { |
| 228 | + return $this->mTerms; |
| 229 | + } |
| 230 | + |
| 231 | + function numRows() { |
| 232 | + return $this->mResultSet->numRows(); |
| 233 | + } |
| 234 | + |
| 235 | + function next() { |
| 236 | + $row = $this->mResultSet->fetchObject(); |
| 237 | + if ($row === false) |
| 238 | + return false; |
| 239 | + return new SearchResult($row); |
| 240 | + } |
| 241 | +} |
Property changes on: trunk/phase3/includes/SearchOracle.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 242 | + native |
Index: trunk/phase3/includes/AutoLoader.php |
— | — | @@ -126,8 +126,10 @@ |
127 | 127 | 'MimeMagic' => 'includes/MimeMagic.php', |
128 | 128 | 'MWException' => 'includes/Exception.php', |
129 | 129 | 'MWNamespace' => 'includes/Namespace.php', |
| 130 | + 'MySQLSearchResultSet' => 'includes/SearchMySQL.php', |
130 | 131 | 'Namespace' => 'includes/NamespaceCompat.php', // Compat |
131 | 132 | 'OldChangesList' => 'includes/ChangesList.php', |
| 133 | + 'OracleSearchResultSet' => 'includes/SearchOracle.php', |
132 | 134 | 'OutputPage' => 'includes/OutputPage.php', |
133 | 135 | 'PageHistory' => 'includes/PageHistory.php', |
134 | 136 | 'PageHistoryPager' => 'includes/PageHistory.php', |
— | — | @@ -135,6 +137,8 @@ |
136 | 138 | 'Pager' => 'includes/Pager.php', |
137 | 139 | 'PasswordError' => 'includes/User.php', |
138 | 140 | 'PatrolLog' => 'includes/PatrolLog.php', |
| 141 | + 'PostgresSearchResult' => 'includes/SearchPostgres.php', |
| 142 | + 'PostgresSearchResultSet' => 'includes/SearchPostgres.php', |
139 | 143 | 'PrefixSearch' => 'includes/PrefixSearch.php', |
140 | 144 | 'Profiler' => 'includes/Profiler.php', |
141 | 145 | 'ProfilerSimple' => 'includes/ProfilerSimple.php', |
— | — | @@ -154,6 +158,18 @@ |
155 | 159 | 'Revision' => 'includes/Revision.php', |
156 | 160 | 'RSSFeed' => 'includes/Feed.php', |
157 | 161 | 'Sanitizer' => 'includes/Sanitizer.php', |
| 162 | + 'SearchEngineDummy' => 'includes/SearchEngine.php', |
| 163 | + 'SearchEngine' => 'includes/SearchEngine.php', |
| 164 | + 'SearchHighlighter' => 'includes/SearchEngine.php', |
| 165 | + 'SearchMySQL4' => 'includes/SearchMySQL4.php', |
| 166 | + 'SearchMySQL' => 'includes/SearchMySQL.php', |
| 167 | + 'SearchOracle' => 'includes/SearchOracle.php', |
| 168 | + 'SearchPostgres' => 'includes/SearchPostgres.php', |
| 169 | + 'SearchResult' => 'includes/SearchEngine.php', |
| 170 | + 'SearchResultSet' => 'includes/SearchEngine.php', |
| 171 | + 'SearchResultTooMany' => 'includes/SearchEngine.php', |
| 172 | + 'SearchUpdate' => 'includes/SearchUpdate.php', |
| 173 | + 'SearchUpdateMyISAM' => 'includes/SearchUpdate.php', |
158 | 174 | 'SiteConfiguration' => 'includes/SiteConfiguration.php', |
159 | 175 | 'SiteStats' => 'includes/SiteStats.php', |
160 | 176 | 'SiteStatsUpdate' => 'includes/SiteStats.php', |
— | — | @@ -360,24 +376,6 @@ |
361 | 377 | 'Preprocessor_Hash' => 'includes/parser/Preprocessor_Hash.php', |
362 | 378 | 'StripState' => 'includes/parser/Parser.php', |
363 | 379 | |
364 | | - # includes/search |
365 | | - 'OracleSearchResultSet' => 'includes/search/Oracle.php', |
366 | | - 'PostgresSearchResult' => 'includes/search/Postgres.php', |
367 | | - 'PostgresSearchResultSet' => 'includes/search/Postgres.php', |
368 | | - 'MySQLSearchResultSet' => 'includes/Search/MySQL.php', |
369 | | - 'SearchEngineDummy' => 'includes/search/Engine.php', |
370 | | - 'SearchEngine' => 'includes/search/Engine.php', |
371 | | - 'SearchHighlighter' => 'includes/search/Engine.php', |
372 | | - 'SearchMySQL4' => 'includes/search/MySQL4.php', |
373 | | - 'SearchMySQL' => 'includes/search/MySQL.php', |
374 | | - 'SearchOracle' => 'includes/search/Oracle.php', |
375 | | - 'SearchPostgres' => 'includes/search/Postgres.php', |
376 | | - 'SearchResult' => 'includes/search/Engine.php', |
377 | | - 'SearchResultSet' => 'includes/search/Engine.php', |
378 | | - 'SearchResultTooMany' => 'includes/search/Engine.php', |
379 | | - 'SearchUpdate' => 'includes/search/Update.php', |
380 | | - 'SearchUpdateMyISAM' => 'includes/search/Update.php', |
381 | | - |
382 | 380 | # includes/specials |
383 | 381 | 'AncientPagesPage' => 'includes/specials/Ancientpages.php', |
384 | 382 | 'BrokenRedirectsPage' => 'includes/specials/BrokenRedirects.php', |
Index: trunk/phase3/includes/SearchTsearch2.php |
— | — | @@ -0,0 +1,120 @@ |
| 2 | +<?php |
| 3 | +# Copyright (C) 2004 Brion Vibber <brion@pobox.com>, Domas Mituzas <domas.mituzas@gmail.com> |
| 4 | +# http://www.mediawiki.org/ |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | +# http://www.gnu.org/copyleft/gpl.html |
| 20 | + |
| 21 | +/** |
| 22 | + * Search engine hook for PostgreSQL / Tsearch2 |
| 23 | + * @file |
| 24 | + * @ingroup Search |
| 25 | + */ |
| 26 | + |
| 27 | +/** |
| 28 | + * @todo document |
| 29 | + * @ingroup Search |
| 30 | + */ |
| 31 | +class SearchTsearch2 extends SearchEngine { |
| 32 | + var $strictMatching = false; |
| 33 | + |
| 34 | + function __construct( $db ) { |
| 35 | + $this->db = $db; |
| 36 | + $this->mRanking = true; |
| 37 | + } |
| 38 | + |
| 39 | + function getIndexField( $fulltext ) { |
| 40 | + return $fulltext ? 'si_text' : 'si_title'; |
| 41 | + } |
| 42 | + |
| 43 | + function parseQuery( $filteredText, $fulltext ) { |
| 44 | + global $wgContLang; |
| 45 | + $lc = SearchEngine::legalSearchChars(); |
| 46 | + $searchon = ''; |
| 47 | + $this->searchTerms = array(); |
| 48 | + |
| 49 | + # FIXME: This doesn't handle parenthetical expressions. |
| 50 | + $m = array(); |
| 51 | + if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', |
| 52 | + $filteredText, $m, PREG_SET_ORDER ) ) { |
| 53 | + foreach( $m as $terms ) { |
| 54 | + if( $searchon !== '' ) $searchon .= ' '; |
| 55 | + if( $this->strictMatching && ($terms[1] == '') ) { |
| 56 | + $terms[1] = '+'; |
| 57 | + } |
| 58 | + $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); |
| 59 | + if( !empty( $terms[3] ) ) { |
| 60 | + $regexp = preg_quote( $terms[3], '/' ); |
| 61 | + if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; |
| 62 | + } else { |
| 63 | + $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); |
| 64 | + } |
| 65 | + $this->searchTerms[] = $regexp; |
| 66 | + } |
| 67 | + wfDebug( "Would search with '$searchon'\n" ); |
| 68 | + wfDebug( 'Match with /\b' . implode( '\b|\b', $this->searchTerms ) . "\b/\n" ); |
| 69 | + } else { |
| 70 | + wfDebug( "Can't understand search query '{$this->filteredText}'\n" ); |
| 71 | + } |
| 72 | + |
| 73 | + $searchon = preg_replace( '/(\s+)/', '&', $searchon ); |
| 74 | + $searchon = $this->db->strencode( $searchon ); |
| 75 | + return $searchon; |
| 76 | + } |
| 77 | + |
| 78 | + function queryRanking( $filteredTerm, $fulltext ) { |
| 79 | + $field = $this->getIndexField( $fulltext ); |
| 80 | + $searchon = $this->parseQuery( $filteredTerm, $fulltext ); |
| 81 | + if ($this->mRanking) |
| 82 | + return " ORDER BY rank($field,to_tsquery('$searchon')) DESC"; |
| 83 | + else |
| 84 | + return ""; |
| 85 | + } |
| 86 | + |
| 87 | + |
| 88 | + function queryMain( $filteredTerm, $fulltext ) { |
| 89 | + $match = $this->parseQuery( $filteredTerm, $fulltext ); |
| 90 | + $field = $this->getIndexField( $fulltext ); |
| 91 | + $cur = $this->db->tableName( 'cur' ); |
| 92 | + $searchindex = $this->db->tableName( 'searchindex' ); |
| 93 | + return 'SELECT cur_id, cur_namespace, cur_title, cur_text ' . |
| 94 | + "FROM $cur,$searchindex " . |
| 95 | + 'WHERE cur_id=si_page AND ' . |
| 96 | + " $field @@ to_tsquery ('$match') " ; |
| 97 | + } |
| 98 | + |
| 99 | + function update( $id, $title, $text ) { |
| 100 | + $dbw = wfGetDB( DB_MASTER ); |
| 101 | + $searchindex = $dbw->tableName( 'searchindex' ); |
| 102 | + $sql = "DELETE FROM $searchindex WHERE si_page={$id}"; |
| 103 | + $dbw->query( $sql, __METHOD__ ); |
| 104 | + $sql = "INSERT INTO $searchindex (si_page,si_title,si_text) ". |
| 105 | + " VALUES ( $id, to_tsvector('". |
| 106 | + $dbw->strencode($title). |
| 107 | + "'),to_tsvector('". |
| 108 | + $dbw->strencode( $text)."')) "; |
| 109 | + $dbw->query($sql, __METHOD__ ); |
| 110 | + } |
| 111 | + |
| 112 | + function updateTitle($id,$title) { |
| 113 | + $dbw = wfGetDB(DB_MASTER); |
| 114 | + $searchindex = $dbw->tableName( 'searchindex' ); |
| 115 | + $sql = "UPDATE $searchindex SET si_title=to_tsvector('" . |
| 116 | + $dbw->strencode( $title ) . |
| 117 | + "') WHERE si_page={$id}"; |
| 118 | + |
| 119 | + $dbw->query( $sql, __METHOD__ ); |
| 120 | + } |
| 121 | +} |
Property changes on: trunk/phase3/includes/SearchTsearch2.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 122 | + native |
Added: svn:keywords |
2 | 123 | + Author Date Id Revision |
Index: trunk/phase3/includes/SearchMySQL4.php |
— | — | @@ -0,0 +1,34 @@ |
| 2 | +<?php |
| 3 | +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> |
| 4 | +# http://www.mediawiki.org/ |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | +# http://www.gnu.org/copyleft/gpl.html |
| 20 | + |
| 21 | +/** |
| 22 | + * @file |
| 23 | + * @ingroup Search |
| 24 | + */ |
| 25 | + |
| 26 | +/** |
| 27 | + * Search engine hook for MySQL 4+ |
| 28 | + * This class retained for backwards compatibility... |
| 29 | + * The meat's been moved to SearchMySQL, since the 3.x variety is gone. |
| 30 | + * @ingroup Search |
| 31 | + * @deprecated |
| 32 | + */ |
| 33 | +class SearchMySQL4 extends SearchMySQL { |
| 34 | + /* whee */ |
| 35 | +} |
Property changes on: trunk/phase3/includes/SearchMySQL4.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 36 | + native |
Added: svn:keywords |
2 | 37 | + Author Date Id Revision |
Index: trunk/phase3/includes/SearchMySQL.php |
— | — | @@ -0,0 +1,262 @@ |
| 2 | +<?php |
| 3 | +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> |
| 4 | +# http://www.mediawiki.org/ |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | +# http://www.gnu.org/copyleft/gpl.html |
| 20 | + |
| 21 | +/** |
| 22 | + * @file |
| 23 | + * @ingroup Search |
| 24 | + */ |
| 25 | + |
| 26 | +/** |
| 27 | + * Search engine hook for MySQL 4+ |
| 28 | + * @ingroup Search |
| 29 | + */ |
| 30 | +class SearchMySQL extends SearchEngine { |
| 31 | + var $strictMatching = true; |
| 32 | + |
| 33 | + /** @todo document */ |
| 34 | + function __construct( $db ) { |
| 35 | + $this->db = $db; |
| 36 | + } |
| 37 | + |
| 38 | + /** @todo document */ |
| 39 | + function parseQuery( $filteredText, $fulltext ) { |
| 40 | + global $wgContLang; |
| 41 | + $lc = SearchEngine::legalSearchChars(); // Minus format chars |
| 42 | + $searchon = ''; |
| 43 | + $this->searchTerms = array(); |
| 44 | + |
| 45 | + # FIXME: This doesn't handle parenthetical expressions. |
| 46 | + $m = array(); |
| 47 | + if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', |
| 48 | + $filteredText, $m, PREG_SET_ORDER ) ) { |
| 49 | + foreach( $m as $terms ) { |
| 50 | + if( $searchon !== '' ) $searchon .= ' '; |
| 51 | + if( $this->strictMatching && ($terms[1] == '') ) { |
| 52 | + $terms[1] = '+'; |
| 53 | + } |
| 54 | + $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); |
| 55 | + if( !empty( $terms[3] ) ) { |
| 56 | + // Match individual terms in result highlighting... |
| 57 | + $regexp = preg_quote( $terms[3], '/' ); |
| 58 | + if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; |
| 59 | + } else { |
| 60 | + // Match the quoted term in result highlighting... |
| 61 | + $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); |
| 62 | + } |
| 63 | + $this->searchTerms[] = $regexp; |
| 64 | + } |
| 65 | + wfDebug( "Would search with '$searchon'\n" ); |
| 66 | + wfDebug( 'Match with /' . implode( '|', $this->searchTerms ) . "/\n" ); |
| 67 | + } else { |
| 68 | + wfDebug( "Can't understand search query '{$filteredText}'\n" ); |
| 69 | + } |
| 70 | + |
| 71 | + $searchon = $this->db->strencode( $searchon ); |
| 72 | + $field = $this->getIndexField( $fulltext ); |
| 73 | + return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) "; |
| 74 | + } |
| 75 | + |
| 76 | + public static function legalSearchChars() { |
| 77 | + return "\"*" . parent::legalSearchChars(); |
| 78 | + } |
| 79 | + |
| 80 | + /** |
| 81 | + * Perform a full text search query and return a result set. |
| 82 | + * |
| 83 | + * @param string $term - Raw search term |
| 84 | + * @return MySQLSearchResultSet |
| 85 | + * @access public |
| 86 | + */ |
| 87 | + function searchText( $term ) { |
| 88 | + $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) ); |
| 89 | + return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); |
| 90 | + } |
| 91 | + |
| 92 | + /** |
| 93 | + * Perform a title-only search query and return a result set. |
| 94 | + * |
| 95 | + * @param string $term - Raw search term |
| 96 | + * @return MySQLSearchResultSet |
| 97 | + * @access public |
| 98 | + */ |
| 99 | + function searchTitle( $term ) { |
| 100 | + $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) ); |
| 101 | + return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); |
| 102 | + } |
| 103 | + |
| 104 | + |
| 105 | + /** |
| 106 | + * Return a partial WHERE clause to exclude redirects, if so set |
| 107 | + * @return string |
| 108 | + * @private |
| 109 | + */ |
| 110 | + function queryRedirect() { |
| 111 | + if( $this->showRedirects ) { |
| 112 | + return ''; |
| 113 | + } else { |
| 114 | + return 'AND page_is_redirect=0'; |
| 115 | + } |
| 116 | + } |
| 117 | + |
| 118 | + /** |
| 119 | + * Return a partial WHERE clause to limit the search to the given namespaces |
| 120 | + * @return string |
| 121 | + * @private |
| 122 | + */ |
| 123 | + function queryNamespaces() { |
| 124 | + if( is_null($this->namespaces) ) |
| 125 | + return ''; # search all |
| 126 | + $namespaces = implode( ',', $this->namespaces ); |
| 127 | + if ($namespaces == '') { |
| 128 | + $namespaces = '0'; |
| 129 | + } |
| 130 | + return 'AND page_namespace IN (' . $namespaces . ')'; |
| 131 | + } |
| 132 | + |
| 133 | + /** |
| 134 | + * Return a LIMIT clause to limit results on the query. |
| 135 | + * @return string |
| 136 | + * @private |
| 137 | + */ |
| 138 | + function queryLimit() { |
| 139 | + return $this->db->limitResult( '', $this->limit, $this->offset ); |
| 140 | + } |
| 141 | + |
| 142 | + /** |
| 143 | + * Does not do anything for generic search engine |
| 144 | + * subclasses may define this though |
| 145 | + * @return string |
| 146 | + * @private |
| 147 | + */ |
| 148 | + function queryRanking( $filteredTerm, $fulltext ) { |
| 149 | + return ''; |
| 150 | + } |
| 151 | + |
| 152 | + /** |
| 153 | + * Construct the full SQL query to do the search. |
| 154 | + * The guts shoulds be constructed in queryMain() |
| 155 | + * @param string $filteredTerm |
| 156 | + * @param bool $fulltext |
| 157 | + * @private |
| 158 | + */ |
| 159 | + function getQuery( $filteredTerm, $fulltext ) { |
| 160 | + return $this->queryMain( $filteredTerm, $fulltext ) . ' ' . |
| 161 | + $this->queryRedirect() . ' ' . |
| 162 | + $this->queryNamespaces() . ' ' . |
| 163 | + $this->queryRanking( $filteredTerm, $fulltext ) . ' ' . |
| 164 | + $this->queryLimit(); |
| 165 | + } |
| 166 | + |
| 167 | + |
| 168 | + /** |
| 169 | + * Picks which field to index on, depending on what type of query. |
| 170 | + * @param bool $fulltext |
| 171 | + * @return string |
| 172 | + */ |
| 173 | + function getIndexField( $fulltext ) { |
| 174 | + return $fulltext ? 'si_text' : 'si_title'; |
| 175 | + } |
| 176 | + |
| 177 | + /** |
| 178 | + * Get the base part of the search query. |
| 179 | + * The actual match syntax will depend on the server |
| 180 | + * version; MySQL 3 and MySQL 4 have different capabilities |
| 181 | + * in their fulltext search indexes. |
| 182 | + * |
| 183 | + * @param string $filteredTerm |
| 184 | + * @param bool $fulltext |
| 185 | + * @return string |
| 186 | + * @private |
| 187 | + */ |
| 188 | + function queryMain( $filteredTerm, $fulltext ) { |
| 189 | + $match = $this->parseQuery( $filteredTerm, $fulltext ); |
| 190 | + $page = $this->db->tableName( 'page' ); |
| 191 | + $searchindex = $this->db->tableName( 'searchindex' ); |
| 192 | + return 'SELECT page_id, page_namespace, page_title ' . |
| 193 | + "FROM $page,$searchindex " . |
| 194 | + 'WHERE page_id=si_page AND ' . $match; |
| 195 | + } |
| 196 | + |
| 197 | + /** |
| 198 | + * Create or update the search index record for the given page. |
| 199 | + * Title and text should be pre-processed. |
| 200 | + * |
| 201 | + * @param int $id |
| 202 | + * @param string $title |
| 203 | + * @param string $text |
| 204 | + */ |
| 205 | + function update( $id, $title, $text ) { |
| 206 | + $dbw = wfGetDB( DB_MASTER ); |
| 207 | + $dbw->replace( 'searchindex', |
| 208 | + array( 'si_page' ), |
| 209 | + array( |
| 210 | + 'si_page' => $id, |
| 211 | + 'si_title' => $title, |
| 212 | + 'si_text' => $text |
| 213 | + ), __METHOD__ ); |
| 214 | + } |
| 215 | + |
| 216 | + /** |
| 217 | + * Update a search index record's title only. |
| 218 | + * Title should be pre-processed. |
| 219 | + * |
| 220 | + * @param int $id |
| 221 | + * @param string $title |
| 222 | + */ |
| 223 | + function updateTitle( $id, $title ) { |
| 224 | + $dbw = wfGetDB( DB_MASTER ); |
| 225 | + |
| 226 | + $dbw->update( 'searchindex', |
| 227 | + array( 'si_title' => $title ), |
| 228 | + array( 'si_page' => $id ), |
| 229 | + __METHOD__, |
| 230 | + array( $dbw->lowPriorityOption() ) ); |
| 231 | + } |
| 232 | +} |
| 233 | + |
| 234 | +/** |
| 235 | + * @ingroup Search |
| 236 | + */ |
| 237 | +class MySQLSearchResultSet extends SearchResultSet { |
| 238 | + function MySQLSearchResultSet( $resultSet, $terms ) { |
| 239 | + $this->mResultSet = $resultSet; |
| 240 | + $this->mTerms = $terms; |
| 241 | + } |
| 242 | + |
| 243 | + function termMatches() { |
| 244 | + return $this->mTerms; |
| 245 | + } |
| 246 | + |
| 247 | + function numRows() { |
| 248 | + return $this->mResultSet->numRows(); |
| 249 | + } |
| 250 | + |
| 251 | + function next() { |
| 252 | + $row = $this->mResultSet->fetchObject(); |
| 253 | + if( $row === false ) { |
| 254 | + return false; |
| 255 | + } else { |
| 256 | + return new SearchResult( $row ); |
| 257 | + } |
| 258 | + } |
| 259 | + |
| 260 | + function free() { |
| 261 | + $this->mResultSet->free(); |
| 262 | + } |
| 263 | +} |
Property changes on: trunk/phase3/includes/SearchMySQL.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 264 | + native |
Added: svn:keywords |
2 | 265 | + Author Date Id Revision |