r36412 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r36411‎ | r36412 | r36413 >
Date:08:41, 18 June 2008
Author:btongminh
Status:old
Tags:
Comment:
Revert r36403: Breaking SVN version history
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/SearchEngine.php (added) (history)
  • /trunk/phase3/includes/SearchEngine.php (added) (history)
  • /trunk/phase3/includes/SearchMySQL.php (added) (history)
  • /trunk/phase3/includes/SearchMySQL.php (added) (history)
  • /trunk/phase3/includes/SearchMySQL4.php (added) (history)
  • /trunk/phase3/includes/SearchMySQL4.php (added) (history)
  • /trunk/phase3/includes/SearchOracle.php (added) (history)
  • /trunk/phase3/includes/SearchOracle.php (added) (history)
  • /trunk/phase3/includes/SearchPostgres.php (added) (history)
  • /trunk/phase3/includes/SearchPostgres.php (added) (history)
  • /trunk/phase3/includes/SearchTsearch2.php (added) (history)
  • /trunk/phase3/includes/SearchTsearch2.php (added) (history)
  • /trunk/phase3/includes/SearchUpdate.php (added) (history)
  • /trunk/phase3/includes/SearchUpdate.php (added) (history)
  • /trunk/phase3/includes/search (deleted) (history)

Diff [purge]

Index: trunk/phase3/includes/SearchEngine.php
@@ -0,0 +1,1154 @@
 2+<?php
 3+/**
 4+ * @defgroup Search Search
 5+ *
 6+ * @file
 7+ * @ingroup Search
 8+ */
 9+
 10+/**
 11+ * Contain a class for special pages
 12+ * @ingroup Search
 13+ */
 14+class SearchEngine {
 15+ var $limit = 10;
 16+ var $offset = 0;
 17+ var $searchTerms = array();
 18+ var $namespaces = array( NS_MAIN );
 19+ var $showRedirects = false;
 20+
 21+ /**
 22+ * Perform a full text search query and return a result set.
 23+ * If title searches are not supported or disabled, return null.
 24+ *
 25+ * @param string $term - Raw search term
 26+ * @return SearchResultSet
 27+ * @access public
 28+ * @abstract
 29+ */
 30+ function searchText( $term ) {
 31+ return null;
 32+ }
 33+
 34+ /**
 35+ * Perform a title-only search query and return a result set.
 36+ * If title searches are not supported or disabled, return null.
 37+ *
 38+ * @param string $term - Raw search term
 39+ * @return SearchResultSet
 40+ * @access public
 41+ * @abstract
 42+ */
 43+ function searchTitle( $term ) {
 44+ return null;
 45+ }
 46+
 47+ /**
 48+ * If an exact title match can be find, or a very slightly close match,
 49+ * return the title. If no match, returns NULL.
 50+ *
 51+ * @param string $term
 52+ * @return Title
 53+ */
 54+ public static function getNearMatch( $searchterm ) {
 55+ global $wgContLang;
 56+
 57+ $allSearchTerms = array($searchterm);
 58+
 59+ if($wgContLang->hasVariants()){
 60+ $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm));
 61+ }
 62+
 63+ foreach($allSearchTerms as $term){
 64+
 65+ # Exact match? No need to look further.
 66+ $title = Title::newFromText( $term );
 67+ if (is_null($title))
 68+ return NULL;
 69+
 70+ if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal()
 71+ || $title->exists() ) {
 72+ return $title;
 73+ }
 74+
 75+ # Now try all lower case (i.e. first letter capitalized)
 76+ #
 77+ $title = Title::newFromText( $wgContLang->lc( $term ) );
 78+ if ( $title && $title->exists() ) {
 79+ return $title;
 80+ }
 81+
 82+ # Now try capitalized string
 83+ #
 84+ $title = Title::newFromText( $wgContLang->ucwords( $term ) );
 85+ if ( $title && $title->exists() ) {
 86+ return $title;
 87+ }
 88+
 89+ # Now try all upper case
 90+ #
 91+ $title = Title::newFromText( $wgContLang->uc( $term ) );
 92+ if ( $title && $title->exists() ) {
 93+ return $title;
 94+ }
 95+
 96+ # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
 97+ $title = Title::newFromText( $wgContLang->ucwordbreaks($term) );
 98+ if ( $title && $title->exists() ) {
 99+ return $title;
 100+ }
 101+
 102+ global $wgCapitalLinks, $wgContLang;
 103+ if( !$wgCapitalLinks ) {
 104+ // Catch differs-by-first-letter-case-only
 105+ $title = Title::newFromText( $wgContLang->ucfirst( $term ) );
 106+ if ( $title && $title->exists() ) {
 107+ return $title;
 108+ }
 109+ $title = Title::newFromText( $wgContLang->lcfirst( $term ) );
 110+ if ( $title && $title->exists() ) {
 111+ return $title;
 112+ }
 113+ }
 114+
 115+ // Give hooks a chance at better match variants
 116+ $title = null;
 117+ if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) {
 118+ return $title;
 119+ }
 120+ }
 121+
 122+ $title = Title::newFromText( $searchterm );
 123+
 124+ # Entering an IP address goes to the contributions page
 125+ if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) )
 126+ || User::isIP( trim( $searchterm ) ) ) {
 127+ return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
 128+ }
 129+
 130+
 131+ # Entering a user goes to the user page whether it's there or not
 132+ if ( $title->getNamespace() == NS_USER ) {
 133+ return $title;
 134+ }
 135+
 136+ # Go to images that exist even if there's no local page.
 137+ # There may have been a funny upload, or it may be on a shared
 138+ # file repository such as Wikimedia Commons.
 139+ if( $title->getNamespace() == NS_IMAGE ) {
 140+ $image = wfFindFile( $title );
 141+ if( $image ) {
 142+ return $title;
 143+ }
 144+ }
 145+
 146+ # MediaWiki namespace? Page may be "implied" if not customized.
 147+ # Just return it, with caps forced as the message system likes it.
 148+ if( $title->getNamespace() == NS_MEDIAWIKI ) {
 149+ return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) );
 150+ }
 151+
 152+ # Quoted term? Try without the quotes...
 153+ $matches = array();
 154+ if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) {
 155+ return SearchEngine::getNearMatch( $matches[1] );
 156+ }
 157+
 158+ return NULL;
 159+ }
 160+
 161+ public static function legalSearchChars() {
 162+ return "A-Za-z_'0-9\\x80-\\xFF\\-";
 163+ }
 164+
 165+ /**
 166+ * Set the maximum number of results to return
 167+ * and how many to skip before returning the first.
 168+ *
 169+ * @param int $limit
 170+ * @param int $offset
 171+ * @access public
 172+ */
 173+ function setLimitOffset( $limit, $offset = 0 ) {
 174+ $this->limit = intval( $limit );
 175+ $this->offset = intval( $offset );
 176+ }
 177+
 178+ /**
 179+ * Set which namespaces the search should include.
 180+ * Give an array of namespace index numbers.
 181+ *
 182+ * @param array $namespaces
 183+ * @access public
 184+ */
 185+ function setNamespaces( $namespaces ) {
 186+ $this->namespaces = $namespaces;
 187+ }
 188+
 189+ /**
 190+ * Parse some common prefixes: all (search everything)
 191+ * or namespace names
 192+ *
 193+ * @param string $query
 194+ */
 195+ function replacePrefixes( $query ){
 196+ global $wgContLang;
 197+
 198+ if( strpos($query,':') === false )
 199+ return $query; // nothing to do
 200+
 201+ $parsed = $query;
 202+ $allkeyword = wfMsgForContent('searchall').":";
 203+ if( strncmp($query, $allkeyword, strlen($allkeyword)) == 0 ){
 204+ $this->namespaces = null;
 205+ $parsed = substr($query,strlen($allkeyword));
 206+ } else if( strpos($query,':') !== false ) {
 207+ $prefix = substr($query,0,strpos($query,':'));
 208+ $index = $wgContLang->getNsIndex($prefix);
 209+ if($index !== false){
 210+ $this->namespaces = array($index);
 211+ $parsed = substr($query,strlen($prefix)+1);
 212+ }
 213+ }
 214+ if(trim($parsed) == '')
 215+ return $query; // prefix was the whole query
 216+
 217+ return $parsed;
 218+ }
 219+
 220+ /**
 221+ * Make a list of searchable namespaces and their canonical names.
 222+ * @return array
 223+ */
 224+ public static function searchableNamespaces() {
 225+ global $wgContLang;
 226+ $arr = array();
 227+ foreach( $wgContLang->getNamespaces() as $ns => $name ) {
 228+ if( $ns >= NS_MAIN ) {
 229+ $arr[$ns] = $name;
 230+ }
 231+ }
 232+ return $arr;
 233+ }
 234+
 235+ /**
 236+ * Extract default namespaces to search from the given user's
 237+ * settings, returning a list of index numbers.
 238+ *
 239+ * @param User $user
 240+ * @return array
 241+ * @static
 242+ */
 243+ public static function userNamespaces( &$user ) {
 244+ $arr = array();
 245+ foreach( SearchEngine::searchableNamespaces() as $ns => $name ) {
 246+ if( $user->getOption( 'searchNs' . $ns ) ) {
 247+ $arr[] = $ns;
 248+ }
 249+ }
 250+ return $arr;
 251+ }
 252+
 253+ /**
 254+ * Find snippet highlight settings for a given user
 255+ *
 256+ * @param User $user
 257+ * @return array contextlines, contextchars
 258+ * @static
 259+ */
 260+ public static function userHighlightPrefs( &$user ){
 261+ //$contextlines = $user->getOption( 'contextlines', 5 );
 262+ //$contextchars = $user->getOption( 'contextchars', 50 );
 263+ $contextlines = 2; // Hardcode this. Old defaults sucked. :)
 264+ $contextchars = 75; // same as above.... :P
 265+ return array($contextlines, $contextchars);
 266+ }
 267+
 268+ /**
 269+ * An array of namespaces indexes to be searched by default
 270+ *
 271+ * @return array
 272+ * @static
 273+ */
 274+ public static function defaultNamespaces(){
 275+ global $wgNamespacesToBeSearchedDefault;
 276+
 277+ return array_keys($wgNamespacesToBeSearchedDefault, true);
 278+ }
 279+
 280+ /**
 281+ * Return a 'cleaned up' search string
 282+ *
 283+ * @return string
 284+ * @access public
 285+ */
 286+ function filter( $text ) {
 287+ $lc = $this->legalSearchChars();
 288+ return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
 289+ }
 290+ /**
 291+ * Load up the appropriate search engine class for the currently
 292+ * active database backend, and return a configured instance.
 293+ *
 294+ * @return SearchEngine
 295+ */
 296+ public static function create() {
 297+ global $wgDBtype, $wgSearchType;
 298+ if( $wgSearchType ) {
 299+ $class = $wgSearchType;
 300+ } elseif( $wgDBtype == 'mysql' ) {
 301+ $class = 'SearchMySQL';
 302+ } else if ( $wgDBtype == 'postgres' ) {
 303+ $class = 'SearchPostgres';
 304+ } else if ( $wgDBtype == 'oracle' ) {
 305+ $class = 'SearchOracle';
 306+ } else {
 307+ $class = 'SearchEngineDummy';
 308+ }
 309+ $search = new $class( wfGetDB( DB_SLAVE ) );
 310+ $search->setLimitOffset(0,0);
 311+ return $search;
 312+ }
 313+
 314+ /**
 315+ * Create or update the search index record for the given page.
 316+ * Title and text should be pre-processed.
 317+ *
 318+ * @param int $id
 319+ * @param string $title
 320+ * @param string $text
 321+ * @abstract
 322+ */
 323+ function update( $id, $title, $text ) {
 324+ // no-op
 325+ }
 326+
 327+ /**
 328+ * Update a search index record's title only.
 329+ * Title should be pre-processed.
 330+ *
 331+ * @param int $id
 332+ * @param string $title
 333+ * @abstract
 334+ */
 335+ function updateTitle( $id, $title ) {
 336+ // no-op
 337+ }
 338+
 339+ /**
 340+ * Get OpenSearch suggestion template
 341+ *
 342+ * @return string
 343+ * @static
 344+ */
 345+ public static function getOpenSearchTemplate() {
 346+ global $wgOpenSearchTemplate, $wgServer, $wgScriptPath;
 347+ if($wgOpenSearchTemplate)
 348+ return $wgOpenSearchTemplate;
 349+ else{
 350+ $ns = implode(',',SearchEngine::defaultNamespaces());
 351+ if(!$ns) $ns = "0";
 352+ return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace='.$ns;
 353+ }
 354+ }
 355+
 356+ /**
 357+ * Get internal MediaWiki Suggest template
 358+ *
 359+ * @return string
 360+ * @static
 361+ */
 362+ public static function getMWSuggestTemplate() {
 363+ global $wgMWSuggestTemplate, $wgServer, $wgScriptPath;
 364+ if($wgMWSuggestTemplate)
 365+ return $wgMWSuggestTemplate;
 366+ else
 367+ return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}';
 368+ }
 369+}
 370+
 371+/**
 372+ * @ingroup Search
 373+ */
 374+class SearchResultSet {
 375+ /**
 376+ * Fetch an array of regular expression fragments for matching
 377+ * the search terms as parsed by this engine in a text extract.
 378+ *
 379+ * @return array
 380+ * @access public
 381+ * @abstract
 382+ */
 383+ function termMatches() {
 384+ return array();
 385+ }
 386+
 387+ function numRows() {
 388+ return 0;
 389+ }
 390+
 391+ /**
 392+ * Return true if results are included in this result set.
 393+ * @return bool
 394+ * @abstract
 395+ */
 396+ function hasResults() {
 397+ return false;
 398+ }
 399+
 400+ /**
 401+ * Some search modes return a total hit count for the query
 402+ * in the entire article database. This may include pages
 403+ * in namespaces that would not be matched on the given
 404+ * settings.
 405+ *
 406+ * Return null if no total hits number is supported.
 407+ *
 408+ * @return int
 409+ * @access public
 410+ */
 411+ function getTotalHits() {
 412+ return null;
 413+ }
 414+
 415+ /**
 416+ * Some search modes return a suggested alternate term if there are
 417+ * no exact hits. Returns true if there is one on this set.
 418+ *
 419+ * @return bool
 420+ * @access public
 421+ */
 422+ function hasSuggestion() {
 423+ return false;
 424+ }
 425+
 426+ /**
 427+ * @return string suggested query, null if none
 428+ */
 429+ function getSuggestionQuery(){
 430+ return null;
 431+ }
 432+
 433+ /**
 434+ * @return string highlighted suggested query, '' if none
 435+ */
 436+ function getSuggestionSnippet(){
 437+ return '';
 438+ }
 439+
 440+ /**
 441+ * Return information about how and from where the results were fetched,
 442+ * should be useful for diagnostics and debugging
 443+ *
 444+ * @return string
 445+ */
 446+ function getInfo() {
 447+ return null;
 448+ }
 449+
 450+ /**
 451+ * Return a result set of hits on other (multiple) wikis associated with this one
 452+ *
 453+ * @return SearchResultSet
 454+ */
 455+ function getInterwikiResults() {
 456+ return null;
 457+ }
 458+
 459+ /**
 460+ * Check if there are results on other wikis
 461+ *
 462+ * @return boolean
 463+ */
 464+ function hasInterwikiResults() {
 465+ return $this->getInterwikiResults() != null;
 466+ }
 467+
 468+
 469+ /**
 470+ * Fetches next search result, or false.
 471+ * @return SearchResult
 472+ * @access public
 473+ * @abstract
 474+ */
 475+ function next() {
 476+ return false;
 477+ }
 478+
 479+ /**
 480+ * Frees the result set, if applicable.
 481+ * @ access public
 482+ */
 483+ function free() {
 484+ // ...
 485+ }
 486+}
 487+
 488+
 489+/**
 490+ * @ingroup Search
 491+ */
 492+class SearchResultTooMany {
 493+ ## Some search engines may bail out if too many matches are found
 494+}
 495+
 496+
 497+/**
 498+ * @ingroup Search
 499+ */
 500+class SearchResult {
 501+ var $mRevision = null;
 502+
 503+ function SearchResult( $row ) {
 504+ $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
 505+ if( !is_null($this->mTitle) )
 506+ $this->mRevision = Revision::newFromTitle( $this->mTitle );
 507+ }
 508+
 509+ /**
 510+ * Check if this is result points to an invalid title
 511+ *
 512+ * @return boolean
 513+ * @access public
 514+ */
 515+ function isBrokenTitle(){
 516+ if( is_null($this->mTitle) )
 517+ return true;
 518+ return false;
 519+ }
 520+
 521+ /**
 522+ * Check if target page is missing, happens when index is out of date
 523+ *
 524+ * @return boolean
 525+ * @access public
 526+ */
 527+ function isMissingRevision(){
 528+ if( !$this->mRevision )
 529+ return true;
 530+ return false;
 531+ }
 532+
 533+ /**
 534+ * @return Title
 535+ * @access public
 536+ */
 537+ function getTitle() {
 538+ return $this->mTitle;
 539+ }
 540+
 541+ /**
 542+ * @return double or null if not supported
 543+ */
 544+ function getScore() {
 545+ return null;
 546+ }
 547+
 548+ /**
 549+ * Lazy initialization of article text from DB
 550+ */
 551+ protected function initText(){
 552+ if( !isset($this->mText) ){
 553+ $this->mText = $this->mRevision->getText();
 554+ }
 555+ }
 556+
 557+ /**
 558+ * @param array $terms terms to highlight
 559+ * @return string highlighted text snippet, null (and not '') if not supported
 560+ */
 561+ function getTextSnippet($terms){
 562+ global $wgUser, $wgAdvancedSearchHighlighting;
 563+ $this->initText();
 564+ list($contextlines,$contextchars) = SearchEngine::userHighlightPrefs($wgUser);
 565+ $h = new SearchHighlighter();
 566+ if( $wgAdvancedSearchHighlighting )
 567+ return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
 568+ else
 569+ return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
 570+ }
 571+
 572+ /**
 573+ * @param array $terms terms to highlight
 574+ * @return string highlighted title, '' if not supported
 575+ */
 576+ function getTitleSnippet($terms){
 577+ return '';
 578+ }
 579+
 580+ /**
 581+ * @param array $terms terms to highlight
 582+ * @return string highlighted redirect name (redirect to this page), '' if none or not supported
 583+ */
 584+ function getRedirectSnippet($terms){
 585+ return '';
 586+ }
 587+
 588+ /**
 589+ * @return Title object for the redirect to this page, null if none or not supported
 590+ */
 591+ function getRedirectTitle(){
 592+ return null;
 593+ }
 594+
 595+ /**
 596+ * @return string highlighted relevant section name, null if none or not supported
 597+ */
 598+ function getSectionSnippet(){
 599+ return '';
 600+ }
 601+
 602+ /**
 603+ * @return Title object (pagename+fragment) for the section, null if none or not supported
 604+ */
 605+ function getSectionTitle(){
 606+ return null;
 607+ }
 608+
 609+ /**
 610+ * @return string timestamp
 611+ */
 612+ function getTimestamp(){
 613+ return $this->mRevision->getTimestamp();
 614+ }
 615+
 616+ /**
 617+ * @return int number of words
 618+ */
 619+ function getWordCount(){
 620+ $this->initText();
 621+ return str_word_count( $this->mText );
 622+ }
 623+
 624+ /**
 625+ * @return int size in bytes
 626+ */
 627+ function getByteSize(){
 628+ $this->initText();
 629+ return strlen( $this->mText );
 630+ }
 631+
 632+ /**
 633+ * @return boolean if hit has related articles
 634+ */
 635+ function hasRelated(){
 636+ return false;
 637+ }
 638+
 639+ /**
 640+ * @return interwiki prefix of the title (return iw even if title is broken)
 641+ */
 642+ function getInterwikiPrefix(){
 643+ return '';
 644+ }
 645+}
 646+
 647+/**
 648+ * Highlight bits of wikitext
 649+ *
 650+ * @ingroup Search
 651+ */
 652+class SearchHighlighter {
 653+ var $mCleanWikitext = true;
 654+
 655+ function SearchHighlighter($cleanupWikitext = true){
 656+ $this->mCleanWikitext = $cleanupWikitext;
 657+ }
 658+
 659+ /**
 660+ * Default implementation of wikitext highlighting
 661+ *
 662+ * @param string $text
 663+ * @param array $terms Terms to highlight (unescaped)
 664+ * @param int $contextlines
 665+ * @param int $contextchars
 666+ * @return string
 667+ */
 668+ public function highlightText( $text, $terms, $contextlines, $contextchars ) {
 669+ global $wgLang, $wgContLang;
 670+ global $wgSearchHighlightBoundaries;
 671+ $fname = __METHOD__;
 672+
 673+ if($text == '')
 674+ return '';
 675+
 676+ // spli text into text + templates/links/tables
 677+ $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
 678+ // first capture group is for detecting nested templates/links/tables/references
 679+ $endPatterns = array(
 680+ 1 => '/(\{\{)|(\}\})/', // template
 681+ 2 => '/(\[\[)|(\]\])/', // image
 682+ 3 => "/(\n\\{\\|)|(\n\\|\\})/"); // table
 683+
 684+ // FIXME: this should prolly be a hook or something
 685+ if(function_exists('wfCite')){
 686+ $spat .= '|(<ref>)'; // references via cite extension
 687+ $endPatterns[4] = '/(<ref>)|(<\/ref>)/';
 688+ }
 689+ $spat .= '/';
 690+ $textExt = array(); // text extracts
 691+ $otherExt = array(); // other extracts
 692+ wfProfileIn( "$fname-split" );
 693+ $start = 0;
 694+ $textLen = strlen($text);
 695+ $count = 0; // sequence number to maintain ordering
 696+ while( $start < $textLen ){
 697+ // find start of template/image/table
 698+ if( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ){
 699+ $epat = '';
 700+ foreach($matches as $key => $val){
 701+ if($key > 0 && $val[1] != -1){
 702+ if($key == 2){
 703+ // see if this is an image link
 704+ $ns = substr($val[0],2,-1);
 705+ if( $wgContLang->getNsIndex($ns) != NS_IMAGE )
 706+ break;
 707+
 708+ }
 709+ $epat = $endPatterns[$key];
 710+ $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) );
 711+ $start = $val[1];
 712+ break;
 713+ }
 714+ }
 715+ if( $epat ){
 716+ // find end (and detect any nested elements)
 717+ $level = 0;
 718+ $offset = $start + 1;
 719+ $found = false;
 720+ while( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ){
 721+ if( array_key_exists(2,$endMatches) ){
 722+ // found end
 723+ if($level == 0){
 724+ $len = strlen($endMatches[2][0]);
 725+ $off = $endMatches[2][1];
 726+ $this->splitAndAdd( $otherExt, $count,
 727+ substr( $text, $start, $off + $len - $start ) );
 728+ $start = $off + $len;
 729+ $found = true;
 730+ break;
 731+ } else{
 732+ // end of nested element
 733+ $level -= 1;
 734+ }
 735+ } else{
 736+ // nested
 737+ $level += 1;
 738+ }
 739+ $offset = $endMatches[0][1] + strlen($endMatches[0][0]);
 740+ }
 741+ if( ! $found ){
 742+ // couldn't find appropriate closing tag, skip
 743+ $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen($matches[0][0]) ) );
 744+ $start += strlen($matches[0][0]);
 745+ }
 746+ continue;
 747+ }
 748+ }
 749+ // else: add as text extract
 750+ $this->splitAndAdd( $textExt, $count, substr($text,$start) );
 751+ break;
 752+ }
 753+
 754+ $all = $textExt + $otherExt; // these have disjunct key sets
 755+
 756+ wfProfileOut( "$fname-split" );
 757+
 758+ // prepare regexps
 759+ foreach( $terms as $index => $term ) {
 760+ $terms[$index] = preg_quote( $term, '/' );
 761+ // manually do upper/lowercase stuff for utf-8 since PHP won't do it
 762+ if(preg_match('/[\x80-\xff]/', $term) ){
 763+ $terms[$index] = preg_replace_callback('/./us',array($this,'caseCallback'),$terms[$index]);
 764+ }
 765+
 766+
 767+ }
 768+ $anyterm = implode( '|', $terms );
 769+ $phrase = implode("$wgSearchHighlightBoundaries+", $terms );
 770+
 771+ // FIXME: a hack to scale contextchars, a correct solution
 772+ // would be to have contextchars actually be char and not byte
 773+ // length, and do proper utf-8 substrings and lengths everywhere,
 774+ // but PHP is making that very hard and unclean to implement :(
 775+ $scale = strlen($anyterm) / mb_strlen($anyterm);
 776+ $contextchars = intval( $contextchars * $scale );
 777+
 778+ $patPre = "(^|$wgSearchHighlightBoundaries)";
 779+ $patPost = "($wgSearchHighlightBoundaries|$)";
 780+
 781+ $pat1 = "/(".$phrase.")/ui";
 782+ $pat2 = "/$patPre(".$anyterm.")$patPost/ui";
 783+
 784+ wfProfileIn( "$fname-extract" );
 785+
 786+ $left = $contextlines;
 787+
 788+ $snippets = array();
 789+ $offsets = array();
 790+
 791+ // show beginning only if it contains all words
 792+ $first = 0;
 793+ $firstText = '';
 794+ foreach($textExt as $index => $line){
 795+ if(strlen($line)>0 && $line[0] != ';' && $line[0] != ':'){
 796+ $firstText = $this->extract( $line, 0, $contextchars * $contextlines );
 797+ $first = $index;
 798+ break;
 799+ }
 800+ }
 801+ if( $firstText ){
 802+ $succ = true;
 803+ // check if first text contains all terms
 804+ foreach($terms as $term){
 805+ if( ! preg_match("/$patPre".$term."$patPost/ui", $firstText) ){
 806+ $succ = false;
 807+ break;
 808+ }
 809+ }
 810+ if( $succ ){
 811+ $snippets[$first] = $firstText;
 812+ $offsets[$first] = 0;
 813+ }
 814+ }
 815+ if( ! $snippets ) {
 816+ // match whole query on text
 817+ $this->process($pat1, $textExt, $left, $contextchars, $snippets, $offsets);
 818+ // match whole query on templates/tables/images
 819+ $this->process($pat1, $otherExt, $left, $contextchars, $snippets, $offsets);
 820+ // match any words on text
 821+ $this->process($pat2, $textExt, $left, $contextchars, $snippets, $offsets);
 822+ // match any words on templates/tables/images
 823+ $this->process($pat2, $otherExt, $left, $contextchars, $snippets, $offsets);
 824+
 825+ ksort($snippets);
 826+ }
 827+
 828+ // add extra chars to each snippet to make snippets constant size
 829+ $extended = array();
 830+ if( count( $snippets ) == 0){
 831+ // couldn't find the target words, just show beginning of article
 832+ $targetchars = $contextchars * $contextlines;
 833+ $snippets[$first] = '';
 834+ $offsets[$first] = 0;
 835+ } else{
 836+ // if begin of the article contains the whole phrase, show only that !!
 837+ if( array_key_exists($first,$snippets) && preg_match($pat1,$snippets[$first])
 838+ && $offsets[$first] < $contextchars * 2 ){
 839+ $snippets = array ($first => $snippets[$first]);
 840+ }
 841+
 842+ // calc by how much to extend existing snippets
 843+ $targetchars = intval( ($contextchars * $contextlines) / count ( $snippets ) );
 844+ }
 845+
 846+ foreach($snippets as $index => $line){
 847+ $extended[$index] = $line;
 848+ $len = strlen($line);
 849+ if( $len < $targetchars - 20 ){
 850+ // complete this line
 851+ if($len < strlen( $all[$index] )){
 852+ $extended[$index] = $this->extract( $all[$index], $offsets[$index], $offsets[$index]+$targetchars, $offsets[$index]);
 853+ $len = strlen( $extended[$index] );
 854+ }
 855+
 856+ // add more lines
 857+ $add = $index + 1;
 858+ while( $len < $targetchars - 20
 859+ && array_key_exists($add,$all)
 860+ && !array_key_exists($add,$snippets) ){
 861+ $offsets[$add] = 0;
 862+ $tt = "\n".$this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
 863+ $extended[$add] = $tt;
 864+ $len += strlen( $tt );
 865+ $add++;
 866+ }
 867+ }
 868+ }
 869+
 870+ //$snippets = array_map('htmlspecialchars', $extended);
 871+ $snippets = $extended;
 872+ $last = -1;
 873+ $extract = '';
 874+ foreach($snippets as $index => $line){
 875+ if($last == -1)
 876+ $extract .= $line; // first line
 877+ elseif($last+1 == $index && $offsets[$last]+strlen($snippets[$last]) >= strlen($all[$last]))
 878+ $extract .= " ".$line; // continous lines
 879+ else
 880+ $extract .= '<b> ... </b>' . $line;
 881+
 882+ $last = $index;
 883+ }
 884+ if( $extract )
 885+ $extract .= '<b> ... </b>';
 886+
 887+ $processed = array();
 888+ foreach($terms as $term){
 889+ if( ! isset($processed[$term]) ){
 890+ $pat3 = "/$patPre(".$term.")$patPost/ui"; // highlight word
 891+ $extract = preg_replace( $pat3,
 892+ "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
 893+ $processed[$term] = true;
 894+ }
 895+ }
 896+
 897+ wfProfileOut( "$fname-extract" );
 898+
 899+ return $extract;
 900+ }
 901+
 902+ /**
 903+ * Split text into lines and add it to extracts array
 904+ *
 905+ * @param array $extracts index -> $line
 906+ * @param int $count
 907+ * @param string $text
 908+ */
 909+ function splitAndAdd(&$extracts, &$count, $text){
 910+ $split = explode( "\n", $this->mCleanWikitext? $this->removeWiki($text) : $text );
 911+ foreach($split as $line){
 912+ $tt = trim($line);
 913+ if( $tt )
 914+ $extracts[$count++] = $tt;
 915+ }
 916+ }
 917+
 918+ /**
 919+ * Do manual case conversion for non-ascii chars
 920+ *
 921+ * @param unknown_type $matches
 922+ */
 923+ function caseCallback($matches){
 924+ global $wgContLang;
 925+ if( strlen($matches[0]) > 1 ){
 926+ return '['.$wgContLang->lc($matches[0]).$wgContLang->uc($matches[0]).']';
 927+ } else
 928+ return $matches[0];
 929+ }
 930+
 931+ /**
 932+ * Extract part of the text from start to end, but by
 933+ * not chopping up words
 934+ * @param string $text
 935+ * @param int $start
 936+ * @param int $end
 937+ * @param int $posStart (out) actual start position
 938+ * @param int $posEnd (out) actual end position
 939+ * @return string
 940+ */
 941+ function extract($text, $start, $end, &$posStart = null, &$posEnd = null ){
 942+ global $wgContLang;
 943+
 944+ if( $start != 0)
 945+ $start = $this->position( $text, $start, 1 );
 946+ if( $end >= strlen($text) )
 947+ $end = strlen($text);
 948+ else
 949+ $end = $this->position( $text, $end );
 950+
 951+ if(!is_null($posStart))
 952+ $posStart = $start;
 953+ if(!is_null($posEnd))
 954+ $posEnd = $end;
 955+
 956+ if($end > $start)
 957+ return substr($text, $start, $end-$start);
 958+ else
 959+ return '';
 960+ }
 961+
 962+ /**
 963+ * Find a nonletter near a point (index) in the text
 964+ *
 965+ * @param string $text
 966+ * @param int $point
 967+ * @param int $offset to found index
 968+ * @return int nearest nonletter index, or beginning of utf8 char if none
 969+ */
 970+ function position($text, $point, $offset=0 ){
 971+ $tolerance = 10;
 972+ $s = max( 0, $point - $tolerance );
 973+ $l = min( strlen($text), $point + $tolerance ) - $s;
 974+ $m = array();
 975+ if( preg_match('/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr($text,$s,$l), $m, PREG_OFFSET_CAPTURE ) ){
 976+ return $m[0][1] + $s + $offset;
 977+ } else{
 978+ // check if point is on a valid first UTF8 char
 979+ $char = ord( $text[$point] );
 980+ while( $char >= 0x80 && $char < 0xc0 ) {
 981+ // skip trailing bytes
 982+ $point++;
 983+ if($point >= strlen($text))
 984+ return strlen($text);
 985+ $char = ord( $text[$point] );
 986+ }
 987+ return $point;
 988+
 989+ }
 990+ }
 991+
 992+ /**
 993+ * Search extracts for a pattern, and return snippets
 994+ *
 995+ * @param string $pattern regexp for matching lines
 996+ * @param array $extracts extracts to search
 997+ * @param int $linesleft number of extracts to make
 998+ * @param int $contextchars length of snippet
 999+ * @param array $out map for highlighted snippets
 1000+ * @param array $offsets map of starting points of snippets
 1001+ * @protected
 1002+ */
 1003+ function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ){
 1004+ if($linesleft == 0)
 1005+ return; // nothing to do
 1006+ foreach($extracts as $index => $line){
 1007+ if( array_key_exists($index,$out) )
 1008+ continue; // this line already highlighted
 1009+
 1010+ $m = array();
 1011+ if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) )
 1012+ continue;
 1013+
 1014+ $offset = $m[0][1];
 1015+ $len = strlen($m[0][0]);
 1016+ if($offset + $len < $contextchars)
 1017+ $begin = 0;
 1018+ elseif( $len > $contextchars)
 1019+ $begin = $offset;
 1020+ else
 1021+ $begin = $offset + intval( ($len - $contextchars) / 2 );
 1022+
 1023+ $end = $begin + $contextchars;
 1024+
 1025+ $posBegin = $begin;
 1026+ // basic snippet from this line
 1027+ $out[$index] = $this->extract($line,$begin,$end,$posBegin);
 1028+ $offsets[$index] = $posBegin;
 1029+ $linesleft--;
 1030+ if($linesleft == 0)
 1031+ return;
 1032+ }
 1033+ }
 1034+
 1035+ /**
 1036+ * Basic wikitext removal
 1037+ * @protected
 1038+ */
 1039+ function removeWiki($text) {
 1040+ $fname = __METHOD__;
 1041+ wfProfileIn( $fname );
 1042+
 1043+ //$text = preg_replace("/'{2,5}/", "", $text);
 1044+ //$text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text);
 1045+ //$text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text);
 1046+ //$text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text);
 1047+ //$text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text);
 1048+ //$text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text);
 1049+ $text = preg_replace("/\\{\\{([^|]+?)\\}\\}/", "", $text);
 1050+ $text = preg_replace("/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text);
 1051+ $text = preg_replace("/\\[\\[([^|]+?)\\]\\]/", "\\1", $text);
 1052+ $text = preg_replace_callback("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", array($this,'linkReplace'), $text);
 1053+ //$text = preg_replace("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", "\\2", $text);
 1054+ $text = preg_replace("/<\/?[^>]+>/", "", $text);
 1055+ $text = preg_replace("/'''''/", "", $text);
 1056+ $text = preg_replace("/('''|<\/?[iIuUbB]>)/", "", $text);
 1057+ $text = preg_replace("/''/", "", $text);
 1058+
 1059+ wfProfileOut( $fname );
 1060+ return $text;
 1061+ }
 1062+
 1063+ /**
 1064+ * callback to replace [[target|caption]] kind of links, if
 1065+ * the target is category or image, leave it
 1066+ *
 1067+ * @param array $matches
 1068+ */
 1069+ function linkReplace($matches){
 1070+ $colon = strpos( $matches[1], ':' );
 1071+ if( $colon === false )
 1072+ return $matches[2]; // replace with caption
 1073+ global $wgContLang;
 1074+ $ns = substr( $matches[1], 0, $colon );
 1075+ $index = $wgContLang->getNsIndex($ns);
 1076+ if( $index !== false && ($index == NS_IMAGE || $index == NS_CATEGORY) )
 1077+ return $matches[0]; // return the whole thing
 1078+ else
 1079+ return $matches[2];
 1080+
 1081+ }
 1082+
 1083+ /**
 1084+ * Simple & fast snippet extraction, but gives completely unrelevant
 1085+ * snippets
 1086+ *
 1087+ * @param string $text
 1088+ * @param array $terms
 1089+ * @param int $contextlines
 1090+ * @param int $contextchars
 1091+ * @return string
 1092+ */
 1093+ public function highlightSimple( $text, $terms, $contextlines, $contextchars ) {
 1094+ global $wgLang, $wgContLang;
 1095+ $fname = __METHOD__;
 1096+
 1097+ $lines = explode( "\n", $text );
 1098+
 1099+ $terms = implode( '|', $terms );
 1100+ $terms = str_replace( '/', "\\/", $terms);
 1101+ $max = intval( $contextchars ) + 1;
 1102+ $pat1 = "/(.*)($terms)(.{0,$max})/i";
 1103+
 1104+ $lineno = 0;
 1105+
 1106+ $extract = "";
 1107+ wfProfileIn( "$fname-extract" );
 1108+ foreach ( $lines as $line ) {
 1109+ if ( 0 == $contextlines ) {
 1110+ break;
 1111+ }
 1112+ ++$lineno;
 1113+ $m = array();
 1114+ if ( ! preg_match( $pat1, $line, $m ) ) {
 1115+ continue;
 1116+ }
 1117+ --$contextlines;
 1118+ $pre = $wgContLang->truncate( $m[1], -$contextchars, ' ... ' );
 1119+
 1120+ if ( count( $m ) < 3 ) {
 1121+ $post = '';
 1122+ } else {
 1123+ $post = $wgContLang->truncate( $m[3], $contextchars, ' ... ' );
 1124+ }
 1125+
 1126+ $found = $m[2];
 1127+
 1128+ $line = htmlspecialchars( $pre . $found . $post );
 1129+ $pat2 = '/(' . $terms . ")/i";
 1130+ $line = preg_replace( $pat2,
 1131+ "<span class='searchmatch'>\\1</span>", $line );
 1132+
 1133+ $extract .= "${line}\n";
 1134+ }
 1135+ wfProfileOut( "$fname-extract" );
 1136+
 1137+ return $extract;
 1138+ }
 1139+
 1140+}
 1141+
 1142+/**
 1143+ * @ingroup Search
 1144+ */
 1145+class SearchEngineDummy {
 1146+ function search( $term ) {
 1147+ return null;
 1148+ }
 1149+ function setLimitOffset($l, $o) {}
 1150+ function legalSearchChars() {}
 1151+ function update() {}
 1152+ function setnamespaces() {}
 1153+ function searchtitle() {}
 1154+ function searchtext() {}
 1155+}
Property changes on: trunk/phase3/includes/SearchEngine.php
___________________________________________________________________
Added: svn:eol-style
11156 + native
Added: svn:keywords
21157 + Author Date Id Revision
Index: trunk/phase3/includes/SearchPostgres.php
@@ -0,0 +1,255 @@
 2+<?php
 3+# Copyright (C) 2006-2007 Greg Sabino Mullane <greg@turnstep.com>
 4+# http://www.mediawiki.org/
 5+#
 6+# This program is free software; you can redistribute it and/or modify
 7+# it under the terms of the GNU General Public License as published by
 8+# the Free Software Foundation; either version 2 of the License, or
 9+# (at your option) any later version.
 10+#
 11+# This program is distributed in the hope that it will be useful,
 12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14+# GNU General Public License for more details.
 15+#
 16+# You should have received a copy of the GNU General Public License along
 17+# with this program; if not, write to the Free Software Foundation, Inc.,
 18+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 19+# http://www.gnu.org/copyleft/gpl.html
 20+
 21+/**
 22+ * @file
 23+ * @ingroup Search
 24+ */
 25+
 26+/**
 27+ * Search engine hook base class for Postgres
 28+ * @ingroup Search
 29+ */
 30+class SearchPostgres extends SearchEngine {
 31+
 32+ function SearchPostgres( $db ) {
 33+ $this->db = $db;
 34+ }
 35+
 36+ /**
 37+ * Perform a full text search query via tsearch2 and return a result set.
 38+ * Currently searches a page's current title (page.page_title) and
 39+ * latest revision article text (pagecontent.old_text)
 40+ *
 41+ * @param string $term - Raw search term
 42+ * @return PostgresSearchResultSet
 43+ * @access public
 44+ */
 45+ function searchTitle( $term ) {
 46+ $q = $this->searchQuery( $term , 'titlevector', 'page_title' );
 47+ $olderror = error_reporting(E_ERROR);
 48+ $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) );
 49+ error_reporting($olderror);
 50+ if (!$resultSet) {
 51+ // Needed for "Query requires full scan, GIN doesn't support it"
 52+ return new SearchResultTooMany();
 53+ }
 54+ return new PostgresSearchResultSet( $resultSet, $this->searchTerms );
 55+ }
 56+ function searchText( $term ) {
 57+ $q = $this->searchQuery( $term, 'textvector', 'old_text' );
 58+ $olderror = error_reporting(E_ERROR);
 59+ $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) );
 60+ error_reporting($olderror);
 61+ if (!$resultSet) {
 62+ return new SearchResultTooMany();
 63+ }
 64+ return new PostgresSearchResultSet( $resultSet, $this->searchTerms );
 65+ }
 66+
 67+
 68+ /*
 69+ * Transform the user's search string into a better form for tsearch2
 70+ */
 71+ function parseQuery( $term ) {
 72+
 73+ wfDebug( "parseQuery received: $term" );
 74+
 75+ ## No backslashes allowed
 76+ $term = preg_replace('/\\\/', '', $term);
 77+
 78+ ## Collapse parens into nearby words:
 79+ $term = preg_replace('/\s*\(\s*/', ' (', $term);
 80+ $term = preg_replace('/\s*\)\s*/', ') ', $term);
 81+
 82+ ## Treat colons as word separators:
 83+ $term = preg_replace('/:/', ' ', $term);
 84+
 85+ $searchstring = '';
 86+ $m = array();
 87+ if( preg_match_all('/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) {
 88+ foreach( $m as $terms ) {
 89+ if (strlen($terms[1])) {
 90+ $searchstring .= ' & !';
 91+ }
 92+ if (strtolower($terms[2]) === 'and') {
 93+ $searchstring .= ' & ';
 94+ }
 95+ else if (strtolower($terms[2]) === 'or' or $terms[2] === '|') {
 96+ $searchstring .= ' | ';
 97+ }
 98+ else if (strtolower($terms[2]) === 'not') {
 99+ $searchstring .= ' & !';
 100+ }
 101+ else {
 102+ $searchstring .= " & $terms[2]";
 103+ }
 104+ }
 105+ }
 106+
 107+ ## Strip out leading junk
 108+ $searchstring = preg_replace('/^[\s\&\|]+/', '', $searchstring);
 109+
 110+ ## Remove any doubled-up operators
 111+ $searchstring = preg_replace('/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring);
 112+
 113+ ## Remove any non-spaced operators (e.g. "Zounds!")
 114+ $searchstring = preg_replace('/([^ ])[\!\&\|]/', "$1", $searchstring);
 115+
 116+ ## Remove any trailing whitespace or operators
 117+ $searchstring = preg_replace('/[\s\!\&\|]+$/', '', $searchstring);
 118+
 119+ ## Remove unnecessary quotes around everything
 120+ $searchstring = preg_replace('/^[\'"](.*)[\'"]$/', "$1", $searchstring);
 121+
 122+ ## Quote the whole thing
 123+ $searchstring = $this->db->addQuotes($searchstring);
 124+
 125+ wfDebug( "parseQuery returned: $searchstring" );
 126+
 127+ return $searchstring;
 128+
 129+ }
 130+
 131+ /**
 132+ * Construct the full SQL query to do the search.
 133+ * @param string $filteredTerm
 134+ * @param string $fulltext
 135+ * @private
 136+ */
 137+ function searchQuery( $term, $fulltext, $colname ) {
 138+ global $wgDBversion;
 139+
 140+ if ( !isset( $wgDBversion ) ) {
 141+ $this->db->getServerVersion();
 142+ $wgDBversion = $this->db->numeric_version;
 143+ }
 144+ $prefix = $wgDBversion < 8.3 ? "'default'," : '';
 145+
 146+ $searchstring = $this->parseQuery( $term );
 147+
 148+ ## We need a separate query here so gin does not complain about empty searches
 149+ $SQL = "SELECT to_tsquery($prefix $searchstring)";
 150+ $res = $this->db->doQuery($SQL);
 151+ if (!$res) {
 152+ ## TODO: Better output (example to catch: one 'two)
 153+ die ("Sorry, that was not a valid search string. Please go back and try again");
 154+ }
 155+ $top = pg_fetch_result($res,0,0);
 156+
 157+ if ($top === "") { ## e.g. if only stopwords are used XXX return something better
 158+ $query = "SELECT page_id, page_namespace, page_title, 0 AS score ".
 159+ "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
 160+ "AND r.rev_text_id = c.old_id AND 1=0";
 161+ }
 162+ else {
 163+ $m = array();
 164+ if( preg_match_all("/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) {
 165+ foreach( $m as $terms ) {
 166+ $this->searchTerms[$terms[1]] = $terms[1];
 167+ }
 168+ }
 169+
 170+ $rankscore = $wgDBversion > 8.2 ? 5 : 1;
 171+ $rank = $wgDBversion < 8.3 ? 'rank' : 'ts_rank';
 172+ $query = "SELECT page_id, page_namespace, page_title, ".
 173+ "$rank($fulltext, to_tsquery($prefix $searchstring), $rankscore) AS score ".
 174+ "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
 175+ "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($prefix $searchstring)";
 176+ }
 177+
 178+ ## Redirects
 179+ if (! $this->showRedirects)
 180+ $query .= ' AND page_is_redirect = 0';
 181+
 182+ ## Namespaces - defaults to 0
 183+ if( !is_null($this->namespaces) ){ // null -> search all
 184+ if ( count($this->namespaces) < 1)
 185+ $query .= ' AND page_namespace = 0';
 186+ else {
 187+ $namespaces = implode( ',', $this->namespaces );
 188+ $query .= " AND page_namespace IN ($namespaces)";
 189+ }
 190+ }
 191+
 192+ $query .= " ORDER BY score DESC, page_id DESC";
 193+
 194+ $query .= $this->db->limitResult( '', $this->limit, $this->offset );
 195+
 196+ wfDebug( "searchQuery returned: $query" );
 197+
 198+ return $query;
 199+ }
 200+
 201+ ## Most of the work of these two functions are done automatically via triggers
 202+
 203+ function update( $pageid, $title, $text ) {
 204+ ## We don't want to index older revisions
 205+ $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id = ".
 206+ "(SELECT rev_text_id FROM revision WHERE rev_page = $pageid ".
 207+ "ORDER BY rev_text_id DESC LIMIT 1 OFFSET 1)";
 208+ $this->db->doQuery($SQL);
 209+ return true;
 210+ }
 211+
 212+ function updateTitle( $id, $title ) {
 213+ return true;
 214+ }
 215+
 216+} ## end of the SearchPostgres class
 217+
 218+/**
 219+ * @ingroup Search
 220+ */
 221+class PostgresSearchResult extends SearchResult {
 222+ function PostgresSearchResult( $row ) {
 223+ $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
 224+ $this->score = $row->score;
 225+ }
 226+ function getScore() {
 227+ return $this->score;
 228+ }
 229+}
 230+
 231+/**
 232+ * @ingroup Search
 233+ */
 234+class PostgresSearchResultSet extends SearchResultSet {
 235+ function PostgresSearchResultSet( $resultSet, $terms ) {
 236+ $this->mResultSet = $resultSet;
 237+ $this->mTerms = $terms;
 238+ }
 239+
 240+ function termMatches() {
 241+ return $this->mTerms;
 242+ }
 243+
 244+ function numRows() {
 245+ return $this->mResultSet->numRows();
 246+ }
 247+
 248+ function next() {
 249+ $row = $this->mResultSet->fetchObject();
 250+ if( $row === false ) {
 251+ return false;
 252+ } else {
 253+ return new PostgresSearchResult( $row );
 254+ }
 255+ }
 256+}
Property changes on: trunk/phase3/includes/SearchPostgres.php
___________________________________________________________________
Added: svn:eol-style
1257 + native
Index: trunk/phase3/includes/SearchUpdate.php
@@ -0,0 +1,113 @@
 2+<?php
 3+/**
 4+ * See deferred.txt
 5+ * @ingroup Search
 6+ */
 7+class SearchUpdate {
 8+
 9+ /* private */ var $mId = 0, $mNamespace, $mTitle, $mText;
 10+ /* private */ var $mTitleWords;
 11+
 12+ function SearchUpdate( $id, $title, $text = false ) {
 13+ $nt = Title::newFromText( $title );
 14+ if( $nt ) {
 15+ $this->mId = $id;
 16+ $this->mText = $text;
 17+
 18+ $this->mNamespace = $nt->getNamespace();
 19+ $this->mTitle = $nt->getText(); # Discard namespace
 20+
 21+ $this->mTitleWords = $this->mTextWords = array();
 22+ } else {
 23+ wfDebug( "SearchUpdate object created with invalid title '$title'\n" );
 24+ }
 25+ }
 26+
 27+ function doUpdate() {
 28+ global $wgContLang, $wgDisableSearchUpdate;
 29+
 30+ if( $wgDisableSearchUpdate || !$this->mId ) {
 31+ return false;
 32+ }
 33+ $fname = 'SearchUpdate::doUpdate';
 34+ wfProfileIn( $fname );
 35+
 36+ $search = SearchEngine::create();
 37+ $lc = SearchEngine::legalSearchChars() . '&#;';
 38+
 39+ if( $this->mText === false ) {
 40+ $search->updateTitle($this->mId,
 41+ Title::indexTitle( $this->mNamespace, $this->mTitle ));
 42+ wfProfileOut( $fname );
 43+ return;
 44+ }
 45+
 46+ # Language-specific strip/conversion
 47+ $text = $wgContLang->stripForSearch( $this->mText );
 48+
 49+ wfProfileIn( $fname.'-regexps' );
 50+ $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/",
 51+ ' ', strtolower( " " . $text /*$this->mText*/ . " " ) ); # Strip HTML markup
 52+ $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD",
 53+ "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings
 54+
 55+ # Strip external URLs
 56+ $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF";
 57+ $protos = "http|https|ftp|mailto|news|gopher";
 58+ $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/";
 59+ $text = preg_replace( $pat, "\\1 \\3", $text );
 60+
 61+ $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/";
 62+ $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/";
 63+ $text = preg_replace( $p1, "\\1 ", $text );
 64+ $text = preg_replace( $p2, "\\1 \\3 ", $text );
 65+
 66+ # Internal image links
 67+ $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i";
 68+ $text = preg_replace( $pat2, " \\1 \\3", $text );
 69+
 70+ $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/",
 71+ "\\1\\2 \\2\\3", $text ); # Handle [[game]]s
 72+
 73+ # Strip all remaining non-search characters
 74+ $text = preg_replace( "/[^{$lc}]+/", " ", $text );
 75+
 76+ # Handle 's, s'
 77+ #
 78+ # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text );
 79+ # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text );
 80+ #
 81+ # These tail-anchored regexps are insanely slow. The worst case comes
 82+ # when Japanese or Chinese text (ie, no word spacing) is written on
 83+ # a wiki configured for Western UTF-8 mode. The Unicode characters are
 84+ # expanded to hex codes and the "words" are very long paragraph-length
 85+ # monstrosities. On a large page the above regexps may take over 20
 86+ # seconds *each* on a 1GHz-level processor.
 87+ #
 88+ # Following are reversed versions which are consistently fast
 89+ # (about 3 milliseconds on 1GHz-level processor).
 90+ #
 91+ $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) );
 92+ $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) );
 93+
 94+ # Strip wiki '' and '''
 95+ $text = preg_replace( "/''[']*/", " ", $text );
 96+ wfProfileOut( "$fname-regexps" );
 97+
 98+ wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) );
 99+
 100+ # Perform the actual update
 101+ $search->update($this->mId, Title::indexTitle( $this->mNamespace, $this->mTitle ),
 102+ $text);
 103+
 104+ wfProfileOut( $fname );
 105+ }
 106+}
 107+
 108+/**
 109+ * Placeholder class
 110+ * @ingroup Search
 111+ */
 112+class SearchUpdateMyISAM extends SearchUpdate {
 113+ # Inherits everything
 114+}
Property changes on: trunk/phase3/includes/SearchUpdate.php
___________________________________________________________________
Added: svn:eol-style
1115 + native
Added: svn:keywords
2116 + Author Date Id Revision
Index: trunk/phase3/includes/SearchOracle.php
@@ -0,0 +1,240 @@
 2+<?php
 3+# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
 4+# http://www.mediawiki.org/
 5+#
 6+# This program is free software; you can redistribute it and/or modify
 7+# it under the terms of the GNU General Public License as published by
 8+# the Free Software Foundation; either version 2 of the License, or
 9+# (at your option) any later version.
 10+#
 11+# This program is distributed in the hope that it will be useful,
 12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14+# GNU General Public License for more details.
 15+#
 16+# You should have received a copy of the GNU General Public License along
 17+# with this program; if not, write to the Free Software Foundation, Inc.,
 18+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 19+# http://www.gnu.org/copyleft/gpl.html
 20+
 21+/**
 22+ * @file
 23+ * @ingroup Search
 24+ */
 25+
 26+/**
 27+ * Search engine hook base class for Oracle (ConText).
 28+ * @ingroup Search
 29+ */
 30+class SearchOracle extends SearchEngine {
 31+ function __construct($db) {
 32+ $this->db = $db;
 33+ }
 34+
 35+ /**
 36+ * Perform a full text search query and return a result set.
 37+ *
 38+ * @param string $term - Raw search term
 39+ * @return OracleSearchResultSet
 40+ * @access public
 41+ */
 42+ function searchText( $term ) {
 43+ $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true)));
 44+ return new OracleSearchResultSet($resultSet, $this->searchTerms);
 45+ }
 46+
 47+ /**
 48+ * Perform a title-only search query and return a result set.
 49+ *
 50+ * @param string $term - Raw search term
 51+ * @return ORacleSearchResultSet
 52+ * @access public
 53+ */
 54+ function searchTitle($term) {
 55+ $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false)));
 56+ return new MySQLSearchResultSet($resultSet, $this->searchTerms);
 57+ }
 58+
 59+
 60+ /**
 61+ * Return a partial WHERE clause to exclude redirects, if so set
 62+ * @return string
 63+ * @private
 64+ */
 65+ function queryRedirect() {
 66+ if ($this->showRedirects) {
 67+ return '';
 68+ } else {
 69+ return 'AND page_is_redirect=0';
 70+ }
 71+ }
 72+
 73+ /**
 74+ * Return a partial WHERE clause to limit the search to the given namespaces
 75+ * @return string
 76+ * @private
 77+ */
 78+ function queryNamespaces() {
 79+ if( is_null($this->namespaces) )
 80+ return '';
 81+ $namespaces = implode(',', $this->namespaces);
 82+ if ($namespaces == '') {
 83+ $namespaces = '0';
 84+ }
 85+ return 'AND page_namespace IN (' . $namespaces . ')';
 86+ }
 87+
 88+ /**
 89+ * Return a LIMIT clause to limit results on the query.
 90+ * @return string
 91+ * @private
 92+ */
 93+ function queryLimit($sql) {
 94+ return $this->db->limitResult($sql, $this->limit, $this->offset);
 95+ }
 96+
 97+ /**
 98+ * Does not do anything for generic search engine
 99+ * subclasses may define this though
 100+ * @return string
 101+ * @private
 102+ */
 103+ function queryRanking($filteredTerm, $fulltext) {
 104+ return ' ORDER BY score(1)';
 105+ }
 106+
 107+ /**
 108+ * Construct the full SQL query to do the search.
 109+ * The guts shoulds be constructed in queryMain()
 110+ * @param string $filteredTerm
 111+ * @param bool $fulltext
 112+ * @private
 113+ */
 114+ function getQuery( $filteredTerm, $fulltext ) {
 115+ return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' .
 116+ $this->queryRedirect() . ' ' .
 117+ $this->queryNamespaces() . ' ' .
 118+ $this->queryRanking( $filteredTerm, $fulltext ) . ' ');
 119+ }
 120+
 121+
 122+ /**
 123+ * Picks which field to index on, depending on what type of query.
 124+ * @param bool $fulltext
 125+ * @return string
 126+ */
 127+ function getIndexField($fulltext) {
 128+ return $fulltext ? 'si_text' : 'si_title';
 129+ }
 130+
 131+ /**
 132+ * Get the base part of the search query.
 133+ *
 134+ * @param string $filteredTerm
 135+ * @param bool $fulltext
 136+ * @return string
 137+ * @private
 138+ */
 139+ function queryMain( $filteredTerm, $fulltext ) {
 140+ $match = $this->parseQuery($filteredTerm, $fulltext);
 141+ $page = $this->db->tableName('page');
 142+ $searchindex = $this->db->tableName('searchindex');
 143+ return 'SELECT page_id, page_namespace, page_title ' .
 144+ "FROM $page,$searchindex " .
 145+ 'WHERE page_id=si_page AND ' . $match;
 146+ }
 147+
 148+ /** @todo document */
 149+ function parseQuery($filteredText, $fulltext) {
 150+ global $wgContLang;
 151+ $lc = SearchEngine::legalSearchChars();
 152+ $this->searchTerms = array();
 153+
 154+ # FIXME: This doesn't handle parenthetical expressions.
 155+ $m = array();
 156+ $q = array();
 157+
 158+ if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
 159+ $filteredText, $m, PREG_SET_ORDER)) {
 160+ foreach($m as $terms) {
 161+ $q[] = $terms[1] . $wgContLang->stripForSearch($terms[2]);
 162+
 163+ if (!empty($terms[3])) {
 164+ $regexp = preg_quote( $terms[3], '/' );
 165+ if ($terms[4])
 166+ $regexp .= "[0-9A-Za-z_]+";
 167+ } else {
 168+ $regexp = preg_quote(str_replace('"', '', $terms[2]), '/');
 169+ }
 170+ $this->searchTerms[] = $regexp;
 171+ }
 172+ }
 173+
 174+ $searchon = $this->db->strencode(join(',', $q));
 175+ $field = $this->getIndexField($fulltext);
 176+ return " CONTAINS($field, '$searchon', 1) > 0 ";
 177+ }
 178+
 179+ /**
 180+ * Create or update the search index record for the given page.
 181+ * Title and text should be pre-processed.
 182+ *
 183+ * @param int $id
 184+ * @param string $title
 185+ * @param string $text
 186+ */
 187+ function update($id, $title, $text) {
 188+ $dbw = wfGetDB(DB_MASTER);
 189+ $dbw->replace('searchindex',
 190+ array('si_page'),
 191+ array(
 192+ 'si_page' => $id,
 193+ 'si_title' => $title,
 194+ 'si_text' => $text
 195+ ), 'SearchOracle::update' );
 196+ $dbw->query("CALL ctx_ddl.sync_index('si_text_idx')");
 197+ $dbw->query("CALL ctx_ddl.sync_index('si_title_idx')");
 198+ }
 199+
 200+ /**
 201+ * Update a search index record's title only.
 202+ * Title should be pre-processed.
 203+ *
 204+ * @param int $id
 205+ * @param string $title
 206+ */
 207+ function updateTitle($id, $title) {
 208+ $dbw = wfGetDB(DB_MASTER);
 209+
 210+ $dbw->update('searchindex',
 211+ array('si_title' => $title),
 212+ array('si_page' => $id),
 213+ 'SearchOracle::updateTitle',
 214+ array());
 215+ }
 216+}
 217+
 218+/**
 219+ * @ingroup Search
 220+ */
 221+class OracleSearchResultSet extends SearchResultSet {
 222+ function __construct($resultSet, $terms) {
 223+ $this->mResultSet = $resultSet;
 224+ $this->mTerms = $terms;
 225+ }
 226+
 227+ function termMatches() {
 228+ return $this->mTerms;
 229+ }
 230+
 231+ function numRows() {
 232+ return $this->mResultSet->numRows();
 233+ }
 234+
 235+ function next() {
 236+ $row = $this->mResultSet->fetchObject();
 237+ if ($row === false)
 238+ return false;
 239+ return new SearchResult($row);
 240+ }
 241+}
Property changes on: trunk/phase3/includes/SearchOracle.php
___________________________________________________________________
Added: svn:eol-style
1242 + native
Index: trunk/phase3/includes/AutoLoader.php
@@ -126,8 +126,10 @@
127127 'MimeMagic' => 'includes/MimeMagic.php',
128128 'MWException' => 'includes/Exception.php',
129129 'MWNamespace' => 'includes/Namespace.php',
 130+ 'MySQLSearchResultSet' => 'includes/SearchMySQL.php',
130131 'Namespace' => 'includes/NamespaceCompat.php', // Compat
131132 'OldChangesList' => 'includes/ChangesList.php',
 133+ 'OracleSearchResultSet' => 'includes/SearchOracle.php',
132134 'OutputPage' => 'includes/OutputPage.php',
133135 'PageHistory' => 'includes/PageHistory.php',
134136 'PageHistoryPager' => 'includes/PageHistory.php',
@@ -135,6 +137,8 @@
136138 'Pager' => 'includes/Pager.php',
137139 'PasswordError' => 'includes/User.php',
138140 'PatrolLog' => 'includes/PatrolLog.php',
 141+ 'PostgresSearchResult' => 'includes/SearchPostgres.php',
 142+ 'PostgresSearchResultSet' => 'includes/SearchPostgres.php',
139143 'PrefixSearch' => 'includes/PrefixSearch.php',
140144 'Profiler' => 'includes/Profiler.php',
141145 'ProfilerSimple' => 'includes/ProfilerSimple.php',
@@ -154,6 +158,18 @@
155159 'Revision' => 'includes/Revision.php',
156160 'RSSFeed' => 'includes/Feed.php',
157161 'Sanitizer' => 'includes/Sanitizer.php',
 162+ 'SearchEngineDummy' => 'includes/SearchEngine.php',
 163+ 'SearchEngine' => 'includes/SearchEngine.php',
 164+ 'SearchHighlighter' => 'includes/SearchEngine.php',
 165+ 'SearchMySQL4' => 'includes/SearchMySQL4.php',
 166+ 'SearchMySQL' => 'includes/SearchMySQL.php',
 167+ 'SearchOracle' => 'includes/SearchOracle.php',
 168+ 'SearchPostgres' => 'includes/SearchPostgres.php',
 169+ 'SearchResult' => 'includes/SearchEngine.php',
 170+ 'SearchResultSet' => 'includes/SearchEngine.php',
 171+ 'SearchResultTooMany' => 'includes/SearchEngine.php',
 172+ 'SearchUpdate' => 'includes/SearchUpdate.php',
 173+ 'SearchUpdateMyISAM' => 'includes/SearchUpdate.php',
158174 'SiteConfiguration' => 'includes/SiteConfiguration.php',
159175 'SiteStats' => 'includes/SiteStats.php',
160176 'SiteStatsUpdate' => 'includes/SiteStats.php',
@@ -360,24 +376,6 @@
361377 'Preprocessor_Hash' => 'includes/parser/Preprocessor_Hash.php',
362378 'StripState' => 'includes/parser/Parser.php',
363379
364 - # includes/search
365 - 'OracleSearchResultSet' => 'includes/search/Oracle.php',
366 - 'PostgresSearchResult' => 'includes/search/Postgres.php',
367 - 'PostgresSearchResultSet' => 'includes/search/Postgres.php',
368 - 'MySQLSearchResultSet' => 'includes/Search/MySQL.php',
369 - 'SearchEngineDummy' => 'includes/search/Engine.php',
370 - 'SearchEngine' => 'includes/search/Engine.php',
371 - 'SearchHighlighter' => 'includes/search/Engine.php',
372 - 'SearchMySQL4' => 'includes/search/MySQL4.php',
373 - 'SearchMySQL' => 'includes/search/MySQL.php',
374 - 'SearchOracle' => 'includes/search/Oracle.php',
375 - 'SearchPostgres' => 'includes/search/Postgres.php',
376 - 'SearchResult' => 'includes/search/Engine.php',
377 - 'SearchResultSet' => 'includes/search/Engine.php',
378 - 'SearchResultTooMany' => 'includes/search/Engine.php',
379 - 'SearchUpdate' => 'includes/search/Update.php',
380 - 'SearchUpdateMyISAM' => 'includes/search/Update.php',
381 -
382380 # includes/specials
383381 'AncientPagesPage' => 'includes/specials/Ancientpages.php',
384382 'BrokenRedirectsPage' => 'includes/specials/BrokenRedirects.php',
Index: trunk/phase3/includes/SearchTsearch2.php
@@ -0,0 +1,120 @@
 2+<?php
 3+# Copyright (C) 2004 Brion Vibber <brion@pobox.com>, Domas Mituzas <domas.mituzas@gmail.com>
 4+# http://www.mediawiki.org/
 5+#
 6+# This program is free software; you can redistribute it and/or modify
 7+# it under the terms of the GNU General Public License as published by
 8+# the Free Software Foundation; either version 2 of the License, or
 9+# (at your option) any later version.
 10+#
 11+# This program is distributed in the hope that it will be useful,
 12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14+# GNU General Public License for more details.
 15+#
 16+# You should have received a copy of the GNU General Public License along
 17+# with this program; if not, write to the Free Software Foundation, Inc.,
 18+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 19+# http://www.gnu.org/copyleft/gpl.html
 20+
 21+/**
 22+ * Search engine hook for PostgreSQL / Tsearch2
 23+ * @file
 24+ * @ingroup Search
 25+ */
 26+
 27+/**
 28+ * @todo document
 29+ * @ingroup Search
 30+ */
 31+class SearchTsearch2 extends SearchEngine {
 32+ var $strictMatching = false;
 33+
 34+ function __construct( $db ) {
 35+ $this->db = $db;
 36+ $this->mRanking = true;
 37+ }
 38+
 39+ function getIndexField( $fulltext ) {
 40+ return $fulltext ? 'si_text' : 'si_title';
 41+ }
 42+
 43+ function parseQuery( $filteredText, $fulltext ) {
 44+ global $wgContLang;
 45+ $lc = SearchEngine::legalSearchChars();
 46+ $searchon = '';
 47+ $this->searchTerms = array();
 48+
 49+ # FIXME: This doesn't handle parenthetical expressions.
 50+ $m = array();
 51+ if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
 52+ $filteredText, $m, PREG_SET_ORDER ) ) {
 53+ foreach( $m as $terms ) {
 54+ if( $searchon !== '' ) $searchon .= ' ';
 55+ if( $this->strictMatching && ($terms[1] == '') ) {
 56+ $terms[1] = '+';
 57+ }
 58+ $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] );
 59+ if( !empty( $terms[3] ) ) {
 60+ $regexp = preg_quote( $terms[3], '/' );
 61+ if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
 62+ } else {
 63+ $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
 64+ }
 65+ $this->searchTerms[] = $regexp;
 66+ }
 67+ wfDebug( "Would search with '$searchon'\n" );
 68+ wfDebug( 'Match with /\b' . implode( '\b|\b', $this->searchTerms ) . "\b/\n" );
 69+ } else {
 70+ wfDebug( "Can't understand search query '{$this->filteredText}'\n" );
 71+ }
 72+
 73+ $searchon = preg_replace( '/(\s+)/', '&', $searchon );
 74+ $searchon = $this->db->strencode( $searchon );
 75+ return $searchon;
 76+ }
 77+
 78+ function queryRanking( $filteredTerm, $fulltext ) {
 79+ $field = $this->getIndexField( $fulltext );
 80+ $searchon = $this->parseQuery( $filteredTerm, $fulltext );
 81+ if ($this->mRanking)
 82+ return " ORDER BY rank($field,to_tsquery('$searchon')) DESC";
 83+ else
 84+ return "";
 85+ }
 86+
 87+
 88+ function queryMain( $filteredTerm, $fulltext ) {
 89+ $match = $this->parseQuery( $filteredTerm, $fulltext );
 90+ $field = $this->getIndexField( $fulltext );
 91+ $cur = $this->db->tableName( 'cur' );
 92+ $searchindex = $this->db->tableName( 'searchindex' );
 93+ return 'SELECT cur_id, cur_namespace, cur_title, cur_text ' .
 94+ "FROM $cur,$searchindex " .
 95+ 'WHERE cur_id=si_page AND ' .
 96+ " $field @@ to_tsquery ('$match') " ;
 97+ }
 98+
 99+ function update( $id, $title, $text ) {
 100+ $dbw = wfGetDB( DB_MASTER );
 101+ $searchindex = $dbw->tableName( 'searchindex' );
 102+ $sql = "DELETE FROM $searchindex WHERE si_page={$id}";
 103+ $dbw->query( $sql, __METHOD__ );
 104+ $sql = "INSERT INTO $searchindex (si_page,si_title,si_text) ".
 105+ " VALUES ( $id, to_tsvector('".
 106+ $dbw->strencode($title).
 107+ "'),to_tsvector('".
 108+ $dbw->strencode( $text)."')) ";
 109+ $dbw->query($sql, __METHOD__ );
 110+ }
 111+
 112+ function updateTitle($id,$title) {
 113+ $dbw = wfGetDB(DB_MASTER);
 114+ $searchindex = $dbw->tableName( 'searchindex' );
 115+ $sql = "UPDATE $searchindex SET si_title=to_tsvector('" .
 116+ $dbw->strencode( $title ) .
 117+ "') WHERE si_page={$id}";
 118+
 119+ $dbw->query( $sql, __METHOD__ );
 120+ }
 121+}
Property changes on: trunk/phase3/includes/SearchTsearch2.php
___________________________________________________________________
Added: svn:eol-style
1122 + native
Added: svn:keywords
2123 + Author Date Id Revision
Index: trunk/phase3/includes/SearchMySQL4.php
@@ -0,0 +1,34 @@
 2+<?php
 3+# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
 4+# http://www.mediawiki.org/
 5+#
 6+# This program is free software; you can redistribute it and/or modify
 7+# it under the terms of the GNU General Public License as published by
 8+# the Free Software Foundation; either version 2 of the License, or
 9+# (at your option) any later version.
 10+#
 11+# This program is distributed in the hope that it will be useful,
 12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14+# GNU General Public License for more details.
 15+#
 16+# You should have received a copy of the GNU General Public License along
 17+# with this program; if not, write to the Free Software Foundation, Inc.,
 18+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 19+# http://www.gnu.org/copyleft/gpl.html
 20+
 21+/**
 22+ * @file
 23+ * @ingroup Search
 24+ */
 25+
 26+/**
 27+ * Search engine hook for MySQL 4+
 28+ * This class retained for backwards compatibility...
 29+ * The meat's been moved to SearchMySQL, since the 3.x variety is gone.
 30+ * @ingroup Search
 31+ * @deprecated
 32+ */
 33+class SearchMySQL4 extends SearchMySQL {
 34+ /* whee */
 35+}
Property changes on: trunk/phase3/includes/SearchMySQL4.php
___________________________________________________________________
Added: svn:eol-style
136 + native
Added: svn:keywords
237 + Author Date Id Revision
Index: trunk/phase3/includes/SearchMySQL.php
@@ -0,0 +1,262 @@
 2+<?php
 3+# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
 4+# http://www.mediawiki.org/
 5+#
 6+# This program is free software; you can redistribute it and/or modify
 7+# it under the terms of the GNU General Public License as published by
 8+# the Free Software Foundation; either version 2 of the License, or
 9+# (at your option) any later version.
 10+#
 11+# This program is distributed in the hope that it will be useful,
 12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14+# GNU General Public License for more details.
 15+#
 16+# You should have received a copy of the GNU General Public License along
 17+# with this program; if not, write to the Free Software Foundation, Inc.,
 18+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 19+# http://www.gnu.org/copyleft/gpl.html
 20+
 21+/**
 22+ * @file
 23+ * @ingroup Search
 24+ */
 25+
 26+/**
 27+ * Search engine hook for MySQL 4+
 28+ * @ingroup Search
 29+ */
 30+class SearchMySQL extends SearchEngine {
 31+ var $strictMatching = true;
 32+
 33+ /** @todo document */
 34+ function __construct( $db ) {
 35+ $this->db = $db;
 36+ }
 37+
 38+ /** @todo document */
 39+ function parseQuery( $filteredText, $fulltext ) {
 40+ global $wgContLang;
 41+ $lc = SearchEngine::legalSearchChars(); // Minus format chars
 42+ $searchon = '';
 43+ $this->searchTerms = array();
 44+
 45+ # FIXME: This doesn't handle parenthetical expressions.
 46+ $m = array();
 47+ if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
 48+ $filteredText, $m, PREG_SET_ORDER ) ) {
 49+ foreach( $m as $terms ) {
 50+ if( $searchon !== '' ) $searchon .= ' ';
 51+ if( $this->strictMatching && ($terms[1] == '') ) {
 52+ $terms[1] = '+';
 53+ }
 54+ $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] );
 55+ if( !empty( $terms[3] ) ) {
 56+ // Match individual terms in result highlighting...
 57+ $regexp = preg_quote( $terms[3], '/' );
 58+ if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
 59+ } else {
 60+ // Match the quoted term in result highlighting...
 61+ $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
 62+ }
 63+ $this->searchTerms[] = $regexp;
 64+ }
 65+ wfDebug( "Would search with '$searchon'\n" );
 66+ wfDebug( 'Match with /' . implode( '|', $this->searchTerms ) . "/\n" );
 67+ } else {
 68+ wfDebug( "Can't understand search query '{$filteredText}'\n" );
 69+ }
 70+
 71+ $searchon = $this->db->strencode( $searchon );
 72+ $field = $this->getIndexField( $fulltext );
 73+ return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) ";
 74+ }
 75+
 76+ public static function legalSearchChars() {
 77+ return "\"*" . parent::legalSearchChars();
 78+ }
 79+
 80+ /**
 81+ * Perform a full text search query and return a result set.
 82+ *
 83+ * @param string $term - Raw search term
 84+ * @return MySQLSearchResultSet
 85+ * @access public
 86+ */
 87+ function searchText( $term ) {
 88+ $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) );
 89+ return new MySQLSearchResultSet( $resultSet, $this->searchTerms );
 90+ }
 91+
 92+ /**
 93+ * Perform a title-only search query and return a result set.
 94+ *
 95+ * @param string $term - Raw search term
 96+ * @return MySQLSearchResultSet
 97+ * @access public
 98+ */
 99+ function searchTitle( $term ) {
 100+ $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) );
 101+ return new MySQLSearchResultSet( $resultSet, $this->searchTerms );
 102+ }
 103+
 104+
 105+ /**
 106+ * Return a partial WHERE clause to exclude redirects, if so set
 107+ * @return string
 108+ * @private
 109+ */
 110+ function queryRedirect() {
 111+ if( $this->showRedirects ) {
 112+ return '';
 113+ } else {
 114+ return 'AND page_is_redirect=0';
 115+ }
 116+ }
 117+
 118+ /**
 119+ * Return a partial WHERE clause to limit the search to the given namespaces
 120+ * @return string
 121+ * @private
 122+ */
 123+ function queryNamespaces() {
 124+ if( is_null($this->namespaces) )
 125+ return ''; # search all
 126+ $namespaces = implode( ',', $this->namespaces );
 127+ if ($namespaces == '') {
 128+ $namespaces = '0';
 129+ }
 130+ return 'AND page_namespace IN (' . $namespaces . ')';
 131+ }
 132+
 133+ /**
 134+ * Return a LIMIT clause to limit results on the query.
 135+ * @return string
 136+ * @private
 137+ */
 138+ function queryLimit() {
 139+ return $this->db->limitResult( '', $this->limit, $this->offset );
 140+ }
 141+
 142+ /**
 143+ * Does not do anything for generic search engine
 144+ * subclasses may define this though
 145+ * @return string
 146+ * @private
 147+ */
 148+ function queryRanking( $filteredTerm, $fulltext ) {
 149+ return '';
 150+ }
 151+
 152+ /**
 153+ * Construct the full SQL query to do the search.
 154+ * The guts shoulds be constructed in queryMain()
 155+ * @param string $filteredTerm
 156+ * @param bool $fulltext
 157+ * @private
 158+ */
 159+ function getQuery( $filteredTerm, $fulltext ) {
 160+ return $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
 161+ $this->queryRedirect() . ' ' .
 162+ $this->queryNamespaces() . ' ' .
 163+ $this->queryRanking( $filteredTerm, $fulltext ) . ' ' .
 164+ $this->queryLimit();
 165+ }
 166+
 167+
 168+ /**
 169+ * Picks which field to index on, depending on what type of query.
 170+ * @param bool $fulltext
 171+ * @return string
 172+ */
 173+ function getIndexField( $fulltext ) {
 174+ return $fulltext ? 'si_text' : 'si_title';
 175+ }
 176+
 177+ /**
 178+ * Get the base part of the search query.
 179+ * The actual match syntax will depend on the server
 180+ * version; MySQL 3 and MySQL 4 have different capabilities
 181+ * in their fulltext search indexes.
 182+ *
 183+ * @param string $filteredTerm
 184+ * @param bool $fulltext
 185+ * @return string
 186+ * @private
 187+ */
 188+ function queryMain( $filteredTerm, $fulltext ) {
 189+ $match = $this->parseQuery( $filteredTerm, $fulltext );
 190+ $page = $this->db->tableName( 'page' );
 191+ $searchindex = $this->db->tableName( 'searchindex' );
 192+ return 'SELECT page_id, page_namespace, page_title ' .
 193+ "FROM $page,$searchindex " .
 194+ 'WHERE page_id=si_page AND ' . $match;
 195+ }
 196+
 197+ /**
 198+ * Create or update the search index record for the given page.
 199+ * Title and text should be pre-processed.
 200+ *
 201+ * @param int $id
 202+ * @param string $title
 203+ * @param string $text
 204+ */
 205+ function update( $id, $title, $text ) {
 206+ $dbw = wfGetDB( DB_MASTER );
 207+ $dbw->replace( 'searchindex',
 208+ array( 'si_page' ),
 209+ array(
 210+ 'si_page' => $id,
 211+ 'si_title' => $title,
 212+ 'si_text' => $text
 213+ ), __METHOD__ );
 214+ }
 215+
 216+ /**
 217+ * Update a search index record's title only.
 218+ * Title should be pre-processed.
 219+ *
 220+ * @param int $id
 221+ * @param string $title
 222+ */
 223+ function updateTitle( $id, $title ) {
 224+ $dbw = wfGetDB( DB_MASTER );
 225+
 226+ $dbw->update( 'searchindex',
 227+ array( 'si_title' => $title ),
 228+ array( 'si_page' => $id ),
 229+ __METHOD__,
 230+ array( $dbw->lowPriorityOption() ) );
 231+ }
 232+}
 233+
 234+/**
 235+ * @ingroup Search
 236+ */
 237+class MySQLSearchResultSet extends SearchResultSet {
 238+ function MySQLSearchResultSet( $resultSet, $terms ) {
 239+ $this->mResultSet = $resultSet;
 240+ $this->mTerms = $terms;
 241+ }
 242+
 243+ function termMatches() {
 244+ return $this->mTerms;
 245+ }
 246+
 247+ function numRows() {
 248+ return $this->mResultSet->numRows();
 249+ }
 250+
 251+ function next() {
 252+ $row = $this->mResultSet->fetchObject();
 253+ if( $row === false ) {
 254+ return false;
 255+ } else {
 256+ return new SearchResult( $row );
 257+ }
 258+ }
 259+
 260+ function free() {
 261+ $this->mResultSet->free();
 262+ }
 263+}
Property changes on: trunk/phase3/includes/SearchMySQL.php
___________________________________________________________________
Added: svn:eol-style
1264 + native
Added: svn:keywords
2265 + Author Date Id Revision

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r36403More ~/includes cleanup. Moving all the Search*.php files to ~/includes/search.demon20:58, 17 June 2008

Status & tagging log