r36413 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r36412‎ | r36413 | r36414 >
Date:09:01, 18 June 2008
Author:btongminh
Status:old
Tags:
Comment:
Recommit r36403 with working version history hopefully: Move Search*.php to search/*.php
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/SearchEngine.php (deleted) (history)
  • /trunk/phase3/includes/SearchMySQL.php (deleted) (history)
  • /trunk/phase3/includes/SearchMySQL4.php (deleted) (history)
  • /trunk/phase3/includes/SearchOracle.php (deleted) (history)
  • /trunk/phase3/includes/SearchPostgres.php (deleted) (history)
  • /trunk/phase3/includes/SearchTsearch2.php (deleted) (history)
  • /trunk/phase3/includes/SearchUpdate.php (deleted) (history)
  • /trunk/phase3/includes/search (added) (history)
  • /trunk/phase3/includes/search/Engine.php (added) (history)
  • /trunk/phase3/includes/search/Engine.php (added) (history)
  • /trunk/phase3/includes/search/MySQL.php (added) (history)
  • /trunk/phase3/includes/search/MySQL.php (added) (history)
  • /trunk/phase3/includes/search/MySQL4.php (added) (history)
  • /trunk/phase3/includes/search/MySQL4.php (added) (history)
  • /trunk/phase3/includes/search/Oracle.php (added) (history)
  • /trunk/phase3/includes/search/Oracle.php (added) (history)
  • /trunk/phase3/includes/search/Postgres.php (added) (history)
  • /trunk/phase3/includes/search/Postgres.php (added) (history)
  • /trunk/phase3/includes/search/Tsearch2.php (added) (history)
  • /trunk/phase3/includes/search/Tsearch2.php (added) (history)
  • /trunk/phase3/includes/search/Update.php (added) (history)
  • /trunk/phase3/includes/search/Update.php (added) (history)

Diff [purge]

Index: trunk/phase3/includes/SearchEngine.php
@@ -1,1154 +0,0 @@
2 -<?php
3 -/**
4 - * @defgroup Search Search
5 - *
6 - * @file
7 - * @ingroup Search
8 - */
9 -
10 -/**
11 - * Contain a class for special pages
12 - * @ingroup Search
13 - */
14 -class SearchEngine {
15 - var $limit = 10;
16 - var $offset = 0;
17 - var $searchTerms = array();
18 - var $namespaces = array( NS_MAIN );
19 - var $showRedirects = false;
20 -
21 - /**
22 - * Perform a full text search query and return a result set.
23 - * If title searches are not supported or disabled, return null.
24 - *
25 - * @param string $term - Raw search term
26 - * @return SearchResultSet
27 - * @access public
28 - * @abstract
29 - */
30 - function searchText( $term ) {
31 - return null;
32 - }
33 -
34 - /**
35 - * Perform a title-only search query and return a result set.
36 - * If title searches are not supported or disabled, return null.
37 - *
38 - * @param string $term - Raw search term
39 - * @return SearchResultSet
40 - * @access public
41 - * @abstract
42 - */
43 - function searchTitle( $term ) {
44 - return null;
45 - }
46 -
47 - /**
48 - * If an exact title match can be find, or a very slightly close match,
49 - * return the title. If no match, returns NULL.
50 - *
51 - * @param string $term
52 - * @return Title
53 - */
54 - public static function getNearMatch( $searchterm ) {
55 - global $wgContLang;
56 -
57 - $allSearchTerms = array($searchterm);
58 -
59 - if($wgContLang->hasVariants()){
60 - $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm));
61 - }
62 -
63 - foreach($allSearchTerms as $term){
64 -
65 - # Exact match? No need to look further.
66 - $title = Title::newFromText( $term );
67 - if (is_null($title))
68 - return NULL;
69 -
70 - if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal()
71 - || $title->exists() ) {
72 - return $title;
73 - }
74 -
75 - # Now try all lower case (i.e. first letter capitalized)
76 - #
77 - $title = Title::newFromText( $wgContLang->lc( $term ) );
78 - if ( $title && $title->exists() ) {
79 - return $title;
80 - }
81 -
82 - # Now try capitalized string
83 - #
84 - $title = Title::newFromText( $wgContLang->ucwords( $term ) );
85 - if ( $title && $title->exists() ) {
86 - return $title;
87 - }
88 -
89 - # Now try all upper case
90 - #
91 - $title = Title::newFromText( $wgContLang->uc( $term ) );
92 - if ( $title && $title->exists() ) {
93 - return $title;
94 - }
95 -
96 - # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
97 - $title = Title::newFromText( $wgContLang->ucwordbreaks($term) );
98 - if ( $title && $title->exists() ) {
99 - return $title;
100 - }
101 -
102 - global $wgCapitalLinks, $wgContLang;
103 - if( !$wgCapitalLinks ) {
104 - // Catch differs-by-first-letter-case-only
105 - $title = Title::newFromText( $wgContLang->ucfirst( $term ) );
106 - if ( $title && $title->exists() ) {
107 - return $title;
108 - }
109 - $title = Title::newFromText( $wgContLang->lcfirst( $term ) );
110 - if ( $title && $title->exists() ) {
111 - return $title;
112 - }
113 - }
114 -
115 - // Give hooks a chance at better match variants
116 - $title = null;
117 - if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) {
118 - return $title;
119 - }
120 - }
121 -
122 - $title = Title::newFromText( $searchterm );
123 -
124 - # Entering an IP address goes to the contributions page
125 - if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) )
126 - || User::isIP( trim( $searchterm ) ) ) {
127 - return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
128 - }
129 -
130 -
131 - # Entering a user goes to the user page whether it's there or not
132 - if ( $title->getNamespace() == NS_USER ) {
133 - return $title;
134 - }
135 -
136 - # Go to images that exist even if there's no local page.
137 - # There may have been a funny upload, or it may be on a shared
138 - # file repository such as Wikimedia Commons.
139 - if( $title->getNamespace() == NS_IMAGE ) {
140 - $image = wfFindFile( $title );
141 - if( $image ) {
142 - return $title;
143 - }
144 - }
145 -
146 - # MediaWiki namespace? Page may be "implied" if not customized.
147 - # Just return it, with caps forced as the message system likes it.
148 - if( $title->getNamespace() == NS_MEDIAWIKI ) {
149 - return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) );
150 - }
151 -
152 - # Quoted term? Try without the quotes...
153 - $matches = array();
154 - if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) {
155 - return SearchEngine::getNearMatch( $matches[1] );
156 - }
157 -
158 - return NULL;
159 - }
160 -
161 - public static function legalSearchChars() {
162 - return "A-Za-z_'0-9\\x80-\\xFF\\-";
163 - }
164 -
165 - /**
166 - * Set the maximum number of results to return
167 - * and how many to skip before returning the first.
168 - *
169 - * @param int $limit
170 - * @param int $offset
171 - * @access public
172 - */
173 - function setLimitOffset( $limit, $offset = 0 ) {
174 - $this->limit = intval( $limit );
175 - $this->offset = intval( $offset );
176 - }
177 -
178 - /**
179 - * Set which namespaces the search should include.
180 - * Give an array of namespace index numbers.
181 - *
182 - * @param array $namespaces
183 - * @access public
184 - */
185 - function setNamespaces( $namespaces ) {
186 - $this->namespaces = $namespaces;
187 - }
188 -
189 - /**
190 - * Parse some common prefixes: all (search everything)
191 - * or namespace names
192 - *
193 - * @param string $query
194 - */
195 - function replacePrefixes( $query ){
196 - global $wgContLang;
197 -
198 - if( strpos($query,':') === false )
199 - return $query; // nothing to do
200 -
201 - $parsed = $query;
202 - $allkeyword = wfMsgForContent('searchall').":";
203 - if( strncmp($query, $allkeyword, strlen($allkeyword)) == 0 ){
204 - $this->namespaces = null;
205 - $parsed = substr($query,strlen($allkeyword));
206 - } else if( strpos($query,':') !== false ) {
207 - $prefix = substr($query,0,strpos($query,':'));
208 - $index = $wgContLang->getNsIndex($prefix);
209 - if($index !== false){
210 - $this->namespaces = array($index);
211 - $parsed = substr($query,strlen($prefix)+1);
212 - }
213 - }
214 - if(trim($parsed) == '')
215 - return $query; // prefix was the whole query
216 -
217 - return $parsed;
218 - }
219 -
220 - /**
221 - * Make a list of searchable namespaces and their canonical names.
222 - * @return array
223 - */
224 - public static function searchableNamespaces() {
225 - global $wgContLang;
226 - $arr = array();
227 - foreach( $wgContLang->getNamespaces() as $ns => $name ) {
228 - if( $ns >= NS_MAIN ) {
229 - $arr[$ns] = $name;
230 - }
231 - }
232 - return $arr;
233 - }
234 -
235 - /**
236 - * Extract default namespaces to search from the given user's
237 - * settings, returning a list of index numbers.
238 - *
239 - * @param User $user
240 - * @return array
241 - * @static
242 - */
243 - public static function userNamespaces( &$user ) {
244 - $arr = array();
245 - foreach( SearchEngine::searchableNamespaces() as $ns => $name ) {
246 - if( $user->getOption( 'searchNs' . $ns ) ) {
247 - $arr[] = $ns;
248 - }
249 - }
250 - return $arr;
251 - }
252 -
253 - /**
254 - * Find snippet highlight settings for a given user
255 - *
256 - * @param User $user
257 - * @return array contextlines, contextchars
258 - * @static
259 - */
260 - public static function userHighlightPrefs( &$user ){
261 - //$contextlines = $user->getOption( 'contextlines', 5 );
262 - //$contextchars = $user->getOption( 'contextchars', 50 );
263 - $contextlines = 2; // Hardcode this. Old defaults sucked. :)
264 - $contextchars = 75; // same as above.... :P
265 - return array($contextlines, $contextchars);
266 - }
267 -
268 - /**
269 - * An array of namespaces indexes to be searched by default
270 - *
271 - * @return array
272 - * @static
273 - */
274 - public static function defaultNamespaces(){
275 - global $wgNamespacesToBeSearchedDefault;
276 -
277 - return array_keys($wgNamespacesToBeSearchedDefault, true);
278 - }
279 -
280 - /**
281 - * Return a 'cleaned up' search string
282 - *
283 - * @return string
284 - * @access public
285 - */
286 - function filter( $text ) {
287 - $lc = $this->legalSearchChars();
288 - return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
289 - }
290 - /**
291 - * Load up the appropriate search engine class for the currently
292 - * active database backend, and return a configured instance.
293 - *
294 - * @return SearchEngine
295 - */
296 - public static function create() {
297 - global $wgDBtype, $wgSearchType;
298 - if( $wgSearchType ) {
299 - $class = $wgSearchType;
300 - } elseif( $wgDBtype == 'mysql' ) {
301 - $class = 'SearchMySQL';
302 - } else if ( $wgDBtype == 'postgres' ) {
303 - $class = 'SearchPostgres';
304 - } else if ( $wgDBtype == 'oracle' ) {
305 - $class = 'SearchOracle';
306 - } else {
307 - $class = 'SearchEngineDummy';
308 - }
309 - $search = new $class( wfGetDB( DB_SLAVE ) );
310 - $search->setLimitOffset(0,0);
311 - return $search;
312 - }
313 -
314 - /**
315 - * Create or update the search index record for the given page.
316 - * Title and text should be pre-processed.
317 - *
318 - * @param int $id
319 - * @param string $title
320 - * @param string $text
321 - * @abstract
322 - */
323 - function update( $id, $title, $text ) {
324 - // no-op
325 - }
326 -
327 - /**
328 - * Update a search index record's title only.
329 - * Title should be pre-processed.
330 - *
331 - * @param int $id
332 - * @param string $title
333 - * @abstract
334 - */
335 - function updateTitle( $id, $title ) {
336 - // no-op
337 - }
338 -
339 - /**
340 - * Get OpenSearch suggestion template
341 - *
342 - * @return string
343 - * @static
344 - */
345 - public static function getOpenSearchTemplate() {
346 - global $wgOpenSearchTemplate, $wgServer, $wgScriptPath;
347 - if($wgOpenSearchTemplate)
348 - return $wgOpenSearchTemplate;
349 - else{
350 - $ns = implode(',',SearchEngine::defaultNamespaces());
351 - if(!$ns) $ns = "0";
352 - return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace='.$ns;
353 - }
354 - }
355 -
356 - /**
357 - * Get internal MediaWiki Suggest template
358 - *
359 - * @return string
360 - * @static
361 - */
362 - public static function getMWSuggestTemplate() {
363 - global $wgMWSuggestTemplate, $wgServer, $wgScriptPath;
364 - if($wgMWSuggestTemplate)
365 - return $wgMWSuggestTemplate;
366 - else
367 - return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}';
368 - }
369 -}
370 -
371 -/**
372 - * @ingroup Search
373 - */
374 -class SearchResultSet {
375 - /**
376 - * Fetch an array of regular expression fragments for matching
377 - * the search terms as parsed by this engine in a text extract.
378 - *
379 - * @return array
380 - * @access public
381 - * @abstract
382 - */
383 - function termMatches() {
384 - return array();
385 - }
386 -
387 - function numRows() {
388 - return 0;
389 - }
390 -
391 - /**
392 - * Return true if results are included in this result set.
393 - * @return bool
394 - * @abstract
395 - */
396 - function hasResults() {
397 - return false;
398 - }
399 -
400 - /**
401 - * Some search modes return a total hit count for the query
402 - * in the entire article database. This may include pages
403 - * in namespaces that would not be matched on the given
404 - * settings.
405 - *
406 - * Return null if no total hits number is supported.
407 - *
408 - * @return int
409 - * @access public
410 - */
411 - function getTotalHits() {
412 - return null;
413 - }
414 -
415 - /**
416 - * Some search modes return a suggested alternate term if there are
417 - * no exact hits. Returns true if there is one on this set.
418 - *
419 - * @return bool
420 - * @access public
421 - */
422 - function hasSuggestion() {
423 - return false;
424 - }
425 -
426 - /**
427 - * @return string suggested query, null if none
428 - */
429 - function getSuggestionQuery(){
430 - return null;
431 - }
432 -
433 - /**
434 - * @return string highlighted suggested query, '' if none
435 - */
436 - function getSuggestionSnippet(){
437 - return '';
438 - }
439 -
440 - /**
441 - * Return information about how and from where the results were fetched,
442 - * should be useful for diagnostics and debugging
443 - *
444 - * @return string
445 - */
446 - function getInfo() {
447 - return null;
448 - }
449 -
450 - /**
451 - * Return a result set of hits on other (multiple) wikis associated with this one
452 - *
453 - * @return SearchResultSet
454 - */
455 - function getInterwikiResults() {
456 - return null;
457 - }
458 -
459 - /**
460 - * Check if there are results on other wikis
461 - *
462 - * @return boolean
463 - */
464 - function hasInterwikiResults() {
465 - return $this->getInterwikiResults() != null;
466 - }
467 -
468 -
469 - /**
470 - * Fetches next search result, or false.
471 - * @return SearchResult
472 - * @access public
473 - * @abstract
474 - */
475 - function next() {
476 - return false;
477 - }
478 -
479 - /**
480 - * Frees the result set, if applicable.
481 - * @ access public
482 - */
483 - function free() {
484 - // ...
485 - }
486 -}
487 -
488 -
489 -/**
490 - * @ingroup Search
491 - */
492 -class SearchResultTooMany {
493 - ## Some search engines may bail out if too many matches are found
494 -}
495 -
496 -
497 -/**
498 - * @ingroup Search
499 - */
500 -class SearchResult {
501 - var $mRevision = null;
502 -
503 - function SearchResult( $row ) {
504 - $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
505 - if( !is_null($this->mTitle) )
506 - $this->mRevision = Revision::newFromTitle( $this->mTitle );
507 - }
508 -
509 - /**
510 - * Check if this is result points to an invalid title
511 - *
512 - * @return boolean
513 - * @access public
514 - */
515 - function isBrokenTitle(){
516 - if( is_null($this->mTitle) )
517 - return true;
518 - return false;
519 - }
520 -
521 - /**
522 - * Check if target page is missing, happens when index is out of date
523 - *
524 - * @return boolean
525 - * @access public
526 - */
527 - function isMissingRevision(){
528 - if( !$this->mRevision )
529 - return true;
530 - return false;
531 - }
532 -
533 - /**
534 - * @return Title
535 - * @access public
536 - */
537 - function getTitle() {
538 - return $this->mTitle;
539 - }
540 -
541 - /**
542 - * @return double or null if not supported
543 - */
544 - function getScore() {
545 - return null;
546 - }
547 -
548 - /**
549 - * Lazy initialization of article text from DB
550 - */
551 - protected function initText(){
552 - if( !isset($this->mText) ){
553 - $this->mText = $this->mRevision->getText();
554 - }
555 - }
556 -
557 - /**
558 - * @param array $terms terms to highlight
559 - * @return string highlighted text snippet, null (and not '') if not supported
560 - */
561 - function getTextSnippet($terms){
562 - global $wgUser, $wgAdvancedSearchHighlighting;
563 - $this->initText();
564 - list($contextlines,$contextchars) = SearchEngine::userHighlightPrefs($wgUser);
565 - $h = new SearchHighlighter();
566 - if( $wgAdvancedSearchHighlighting )
567 - return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
568 - else
569 - return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
570 - }
571 -
572 - /**
573 - * @param array $terms terms to highlight
574 - * @return string highlighted title, '' if not supported
575 - */
576 - function getTitleSnippet($terms){
577 - return '';
578 - }
579 -
580 - /**
581 - * @param array $terms terms to highlight
582 - * @return string highlighted redirect name (redirect to this page), '' if none or not supported
583 - */
584 - function getRedirectSnippet($terms){
585 - return '';
586 - }
587 -
588 - /**
589 - * @return Title object for the redirect to this page, null if none or not supported
590 - */
591 - function getRedirectTitle(){
592 - return null;
593 - }
594 -
595 - /**
596 - * @return string highlighted relevant section name, null if none or not supported
597 - */
598 - function getSectionSnippet(){
599 - return '';
600 - }
601 -
602 - /**
603 - * @return Title object (pagename+fragment) for the section, null if none or not supported
604 - */
605 - function getSectionTitle(){
606 - return null;
607 - }
608 -
609 - /**
610 - * @return string timestamp
611 - */
612 - function getTimestamp(){
613 - return $this->mRevision->getTimestamp();
614 - }
615 -
616 - /**
617 - * @return int number of words
618 - */
619 - function getWordCount(){
620 - $this->initText();
621 - return str_word_count( $this->mText );
622 - }
623 -
624 - /**
625 - * @return int size in bytes
626 - */
627 - function getByteSize(){
628 - $this->initText();
629 - return strlen( $this->mText );
630 - }
631 -
632 - /**
633 - * @return boolean if hit has related articles
634 - */
635 - function hasRelated(){
636 - return false;
637 - }
638 -
639 - /**
640 - * @return interwiki prefix of the title (return iw even if title is broken)
641 - */
642 - function getInterwikiPrefix(){
643 - return '';
644 - }
645 -}
646 -
647 -/**
648 - * Highlight bits of wikitext
649 - *
650 - * @ingroup Search
651 - */
652 -class SearchHighlighter {
653 - var $mCleanWikitext = true;
654 -
655 - function SearchHighlighter($cleanupWikitext = true){
656 - $this->mCleanWikitext = $cleanupWikitext;
657 - }
658 -
659 - /**
660 - * Default implementation of wikitext highlighting
661 - *
662 - * @param string $text
663 - * @param array $terms Terms to highlight (unescaped)
664 - * @param int $contextlines
665 - * @param int $contextchars
666 - * @return string
667 - */
668 - public function highlightText( $text, $terms, $contextlines, $contextchars ) {
669 - global $wgLang, $wgContLang;
670 - global $wgSearchHighlightBoundaries;
671 - $fname = __METHOD__;
672 -
673 - if($text == '')
674 - return '';
675 -
676 - // spli text into text + templates/links/tables
677 - $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
678 - // first capture group is for detecting nested templates/links/tables/references
679 - $endPatterns = array(
680 - 1 => '/(\{\{)|(\}\})/', // template
681 - 2 => '/(\[\[)|(\]\])/', // image
682 - 3 => "/(\n\\{\\|)|(\n\\|\\})/"); // table
683 -
684 - // FIXME: this should prolly be a hook or something
685 - if(function_exists('wfCite')){
686 - $spat .= '|(<ref>)'; // references via cite extension
687 - $endPatterns[4] = '/(<ref>)|(<\/ref>)/';
688 - }
689 - $spat .= '/';
690 - $textExt = array(); // text extracts
691 - $otherExt = array(); // other extracts
692 - wfProfileIn( "$fname-split" );
693 - $start = 0;
694 - $textLen = strlen($text);
695 - $count = 0; // sequence number to maintain ordering
696 - while( $start < $textLen ){
697 - // find start of template/image/table
698 - if( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ){
699 - $epat = '';
700 - foreach($matches as $key => $val){
701 - if($key > 0 && $val[1] != -1){
702 - if($key == 2){
703 - // see if this is an image link
704 - $ns = substr($val[0],2,-1);
705 - if( $wgContLang->getNsIndex($ns) != NS_IMAGE )
706 - break;
707 -
708 - }
709 - $epat = $endPatterns[$key];
710 - $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) );
711 - $start = $val[1];
712 - break;
713 - }
714 - }
715 - if( $epat ){
716 - // find end (and detect any nested elements)
717 - $level = 0;
718 - $offset = $start + 1;
719 - $found = false;
720 - while( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ){
721 - if( array_key_exists(2,$endMatches) ){
722 - // found end
723 - if($level == 0){
724 - $len = strlen($endMatches[2][0]);
725 - $off = $endMatches[2][1];
726 - $this->splitAndAdd( $otherExt, $count,
727 - substr( $text, $start, $off + $len - $start ) );
728 - $start = $off + $len;
729 - $found = true;
730 - break;
731 - } else{
732 - // end of nested element
733 - $level -= 1;
734 - }
735 - } else{
736 - // nested
737 - $level += 1;
738 - }
739 - $offset = $endMatches[0][1] + strlen($endMatches[0][0]);
740 - }
741 - if( ! $found ){
742 - // couldn't find appropriate closing tag, skip
743 - $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen($matches[0][0]) ) );
744 - $start += strlen($matches[0][0]);
745 - }
746 - continue;
747 - }
748 - }
749 - // else: add as text extract
750 - $this->splitAndAdd( $textExt, $count, substr($text,$start) );
751 - break;
752 - }
753 -
754 - $all = $textExt + $otherExt; // these have disjunct key sets
755 -
756 - wfProfileOut( "$fname-split" );
757 -
758 - // prepare regexps
759 - foreach( $terms as $index => $term ) {
760 - $terms[$index] = preg_quote( $term, '/' );
761 - // manually do upper/lowercase stuff for utf-8 since PHP won't do it
762 - if(preg_match('/[\x80-\xff]/', $term) ){
763 - $terms[$index] = preg_replace_callback('/./us',array($this,'caseCallback'),$terms[$index]);
764 - }
765 -
766 -
767 - }
768 - $anyterm = implode( '|', $terms );
769 - $phrase = implode("$wgSearchHighlightBoundaries+", $terms );
770 -
771 - // FIXME: a hack to scale contextchars, a correct solution
772 - // would be to have contextchars actually be char and not byte
773 - // length, and do proper utf-8 substrings and lengths everywhere,
774 - // but PHP is making that very hard and unclean to implement :(
775 - $scale = strlen($anyterm) / mb_strlen($anyterm);
776 - $contextchars = intval( $contextchars * $scale );
777 -
778 - $patPre = "(^|$wgSearchHighlightBoundaries)";
779 - $patPost = "($wgSearchHighlightBoundaries|$)";
780 -
781 - $pat1 = "/(".$phrase.")/ui";
782 - $pat2 = "/$patPre(".$anyterm.")$patPost/ui";
783 -
784 - wfProfileIn( "$fname-extract" );
785 -
786 - $left = $contextlines;
787 -
788 - $snippets = array();
789 - $offsets = array();
790 -
791 - // show beginning only if it contains all words
792 - $first = 0;
793 - $firstText = '';
794 - foreach($textExt as $index => $line){
795 - if(strlen($line)>0 && $line[0] != ';' && $line[0] != ':'){
796 - $firstText = $this->extract( $line, 0, $contextchars * $contextlines );
797 - $first = $index;
798 - break;
799 - }
800 - }
801 - if( $firstText ){
802 - $succ = true;
803 - // check if first text contains all terms
804 - foreach($terms as $term){
805 - if( ! preg_match("/$patPre".$term."$patPost/ui", $firstText) ){
806 - $succ = false;
807 - break;
808 - }
809 - }
810 - if( $succ ){
811 - $snippets[$first] = $firstText;
812 - $offsets[$first] = 0;
813 - }
814 - }
815 - if( ! $snippets ) {
816 - // match whole query on text
817 - $this->process($pat1, $textExt, $left, $contextchars, $snippets, $offsets);
818 - // match whole query on templates/tables/images
819 - $this->process($pat1, $otherExt, $left, $contextchars, $snippets, $offsets);
820 - // match any words on text
821 - $this->process($pat2, $textExt, $left, $contextchars, $snippets, $offsets);
822 - // match any words on templates/tables/images
823 - $this->process($pat2, $otherExt, $left, $contextchars, $snippets, $offsets);
824 -
825 - ksort($snippets);
826 - }
827 -
828 - // add extra chars to each snippet to make snippets constant size
829 - $extended = array();
830 - if( count( $snippets ) == 0){
831 - // couldn't find the target words, just show beginning of article
832 - $targetchars = $contextchars * $contextlines;
833 - $snippets[$first] = '';
834 - $offsets[$first] = 0;
835 - } else{
836 - // if begin of the article contains the whole phrase, show only that !!
837 - if( array_key_exists($first,$snippets) && preg_match($pat1,$snippets[$first])
838 - && $offsets[$first] < $contextchars * 2 ){
839 - $snippets = array ($first => $snippets[$first]);
840 - }
841 -
842 - // calc by how much to extend existing snippets
843 - $targetchars = intval( ($contextchars * $contextlines) / count ( $snippets ) );
844 - }
845 -
846 - foreach($snippets as $index => $line){
847 - $extended[$index] = $line;
848 - $len = strlen($line);
849 - if( $len < $targetchars - 20 ){
850 - // complete this line
851 - if($len < strlen( $all[$index] )){
852 - $extended[$index] = $this->extract( $all[$index], $offsets[$index], $offsets[$index]+$targetchars, $offsets[$index]);
853 - $len = strlen( $extended[$index] );
854 - }
855 -
856 - // add more lines
857 - $add = $index + 1;
858 - while( $len < $targetchars - 20
859 - && array_key_exists($add,$all)
860 - && !array_key_exists($add,$snippets) ){
861 - $offsets[$add] = 0;
862 - $tt = "\n".$this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
863 - $extended[$add] = $tt;
864 - $len += strlen( $tt );
865 - $add++;
866 - }
867 - }
868 - }
869 -
870 - //$snippets = array_map('htmlspecialchars', $extended);
871 - $snippets = $extended;
872 - $last = -1;
873 - $extract = '';
874 - foreach($snippets as $index => $line){
875 - if($last == -1)
876 - $extract .= $line; // first line
877 - elseif($last+1 == $index && $offsets[$last]+strlen($snippets[$last]) >= strlen($all[$last]))
878 - $extract .= " ".$line; // continous lines
879 - else
880 - $extract .= '<b> ... </b>' . $line;
881 -
882 - $last = $index;
883 - }
884 - if( $extract )
885 - $extract .= '<b> ... </b>';
886 -
887 - $processed = array();
888 - foreach($terms as $term){
889 - if( ! isset($processed[$term]) ){
890 - $pat3 = "/$patPre(".$term.")$patPost/ui"; // highlight word
891 - $extract = preg_replace( $pat3,
892 - "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
893 - $processed[$term] = true;
894 - }
895 - }
896 -
897 - wfProfileOut( "$fname-extract" );
898 -
899 - return $extract;
900 - }
901 -
902 - /**
903 - * Split text into lines and add it to extracts array
904 - *
905 - * @param array $extracts index -> $line
906 - * @param int $count
907 - * @param string $text
908 - */
909 - function splitAndAdd(&$extracts, &$count, $text){
910 - $split = explode( "\n", $this->mCleanWikitext? $this->removeWiki($text) : $text );
911 - foreach($split as $line){
912 - $tt = trim($line);
913 - if( $tt )
914 - $extracts[$count++] = $tt;
915 - }
916 - }
917 -
918 - /**
919 - * Do manual case conversion for non-ascii chars
920 - *
921 - * @param unknown_type $matches
922 - */
923 - function caseCallback($matches){
924 - global $wgContLang;
925 - if( strlen($matches[0]) > 1 ){
926 - return '['.$wgContLang->lc($matches[0]).$wgContLang->uc($matches[0]).']';
927 - } else
928 - return $matches[0];
929 - }
930 -
931 - /**
932 - * Extract part of the text from start to end, but by
933 - * not chopping up words
934 - * @param string $text
935 - * @param int $start
936 - * @param int $end
937 - * @param int $posStart (out) actual start position
938 - * @param int $posEnd (out) actual end position
939 - * @return string
940 - */
941 - function extract($text, $start, $end, &$posStart = null, &$posEnd = null ){
942 - global $wgContLang;
943 -
944 - if( $start != 0)
945 - $start = $this->position( $text, $start, 1 );
946 - if( $end >= strlen($text) )
947 - $end = strlen($text);
948 - else
949 - $end = $this->position( $text, $end );
950 -
951 - if(!is_null($posStart))
952 - $posStart = $start;
953 - if(!is_null($posEnd))
954 - $posEnd = $end;
955 -
956 - if($end > $start)
957 - return substr($text, $start, $end-$start);
958 - else
959 - return '';
960 - }
961 -
962 - /**
963 - * Find a nonletter near a point (index) in the text
964 - *
965 - * @param string $text
966 - * @param int $point
967 - * @param int $offset to found index
968 - * @return int nearest nonletter index, or beginning of utf8 char if none
969 - */
970 - function position($text, $point, $offset=0 ){
971 - $tolerance = 10;
972 - $s = max( 0, $point - $tolerance );
973 - $l = min( strlen($text), $point + $tolerance ) - $s;
974 - $m = array();
975 - if( preg_match('/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr($text,$s,$l), $m, PREG_OFFSET_CAPTURE ) ){
976 - return $m[0][1] + $s + $offset;
977 - } else{
978 - // check if point is on a valid first UTF8 char
979 - $char = ord( $text[$point] );
980 - while( $char >= 0x80 && $char < 0xc0 ) {
981 - // skip trailing bytes
982 - $point++;
983 - if($point >= strlen($text))
984 - return strlen($text);
985 - $char = ord( $text[$point] );
986 - }
987 - return $point;
988 -
989 - }
990 - }
991 -
992 - /**
993 - * Search extracts for a pattern, and return snippets
994 - *
995 - * @param string $pattern regexp for matching lines
996 - * @param array $extracts extracts to search
997 - * @param int $linesleft number of extracts to make
998 - * @param int $contextchars length of snippet
999 - * @param array $out map for highlighted snippets
1000 - * @param array $offsets map of starting points of snippets
1001 - * @protected
1002 - */
1003 - function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ){
1004 - if($linesleft == 0)
1005 - return; // nothing to do
1006 - foreach($extracts as $index => $line){
1007 - if( array_key_exists($index,$out) )
1008 - continue; // this line already highlighted
1009 -
1010 - $m = array();
1011 - if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) )
1012 - continue;
1013 -
1014 - $offset = $m[0][1];
1015 - $len = strlen($m[0][0]);
1016 - if($offset + $len < $contextchars)
1017 - $begin = 0;
1018 - elseif( $len > $contextchars)
1019 - $begin = $offset;
1020 - else
1021 - $begin = $offset + intval( ($len - $contextchars) / 2 );
1022 -
1023 - $end = $begin + $contextchars;
1024 -
1025 - $posBegin = $begin;
1026 - // basic snippet from this line
1027 - $out[$index] = $this->extract($line,$begin,$end,$posBegin);
1028 - $offsets[$index] = $posBegin;
1029 - $linesleft--;
1030 - if($linesleft == 0)
1031 - return;
1032 - }
1033 - }
1034 -
1035 - /**
1036 - * Basic wikitext removal
1037 - * @protected
1038 - */
1039 - function removeWiki($text) {
1040 - $fname = __METHOD__;
1041 - wfProfileIn( $fname );
1042 -
1043 - //$text = preg_replace("/'{2,5}/", "", $text);
1044 - //$text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text);
1045 - //$text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text);
1046 - //$text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text);
1047 - //$text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text);
1048 - //$text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text);
1049 - $text = preg_replace("/\\{\\{([^|]+?)\\}\\}/", "", $text);
1050 - $text = preg_replace("/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text);
1051 - $text = preg_replace("/\\[\\[([^|]+?)\\]\\]/", "\\1", $text);
1052 - $text = preg_replace_callback("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", array($this,'linkReplace'), $text);
1053 - //$text = preg_replace("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", "\\2", $text);
1054 - $text = preg_replace("/<\/?[^>]+>/", "", $text);
1055 - $text = preg_replace("/'''''/", "", $text);
1056 - $text = preg_replace("/('''|<\/?[iIuUbB]>)/", "", $text);
1057 - $text = preg_replace("/''/", "", $text);
1058 -
1059 - wfProfileOut( $fname );
1060 - return $text;
1061 - }
1062 -
1063 - /**
1064 - * callback to replace [[target|caption]] kind of links, if
1065 - * the target is category or image, leave it
1066 - *
1067 - * @param array $matches
1068 - */
1069 - function linkReplace($matches){
1070 - $colon = strpos( $matches[1], ':' );
1071 - if( $colon === false )
1072 - return $matches[2]; // replace with caption
1073 - global $wgContLang;
1074 - $ns = substr( $matches[1], 0, $colon );
1075 - $index = $wgContLang->getNsIndex($ns);
1076 - if( $index !== false && ($index == NS_IMAGE || $index == NS_CATEGORY) )
1077 - return $matches[0]; // return the whole thing
1078 - else
1079 - return $matches[2];
1080 -
1081 - }
1082 -
1083 - /**
1084 - * Simple & fast snippet extraction, but gives completely unrelevant
1085 - * snippets
1086 - *
1087 - * @param string $text
1088 - * @param array $terms
1089 - * @param int $contextlines
1090 - * @param int $contextchars
1091 - * @return string
1092 - */
1093 - public function highlightSimple( $text, $terms, $contextlines, $contextchars ) {
1094 - global $wgLang, $wgContLang;
1095 - $fname = __METHOD__;
1096 -
1097 - $lines = explode( "\n", $text );
1098 -
1099 - $terms = implode( '|', $terms );
1100 - $terms = str_replace( '/', "\\/", $terms);
1101 - $max = intval( $contextchars ) + 1;
1102 - $pat1 = "/(.*)($terms)(.{0,$max})/i";
1103 -
1104 - $lineno = 0;
1105 -
1106 - $extract = "";
1107 - wfProfileIn( "$fname-extract" );
1108 - foreach ( $lines as $line ) {
1109 - if ( 0 == $contextlines ) {
1110 - break;
1111 - }
1112 - ++$lineno;
1113 - $m = array();
1114 - if ( ! preg_match( $pat1, $line, $m ) ) {
1115 - continue;
1116 - }
1117 - --$contextlines;
1118 - $pre = $wgContLang->truncate( $m[1], -$contextchars, ' ... ' );
1119 -
1120 - if ( count( $m ) < 3 ) {
1121 - $post = '';
1122 - } else {
1123 - $post = $wgContLang->truncate( $m[3], $contextchars, ' ... ' );
1124 - }
1125 -
1126 - $found = $m[2];
1127 -
1128 - $line = htmlspecialchars( $pre . $found . $post );
1129 - $pat2 = '/(' . $terms . ")/i";
1130 - $line = preg_replace( $pat2,
1131 - "<span class='searchmatch'>\\1</span>", $line );
1132 -
1133 - $extract .= "${line}\n";
1134 - }
1135 - wfProfileOut( "$fname-extract" );
1136 -
1137 - return $extract;
1138 - }
1139 -
1140 -}
1141 -
1142 -/**
1143 - * @ingroup Search
1144 - */
1145 -class SearchEngineDummy {
1146 - function search( $term ) {
1147 - return null;
1148 - }
1149 - function setLimitOffset($l, $o) {}
1150 - function legalSearchChars() {}
1151 - function update() {}
1152 - function setnamespaces() {}
1153 - function searchtitle() {}
1154 - function searchtext() {}
1155 -}
Index: trunk/phase3/includes/SearchPostgres.php
@@ -1,255 +0,0 @@
2 -<?php
3 -# Copyright (C) 2006-2007 Greg Sabino Mullane <greg@turnstep.com>
4 -# http://www.mediawiki.org/
5 -#
6 -# This program is free software; you can redistribute it and/or modify
7 -# it under the terms of the GNU General Public License as published by
8 -# the Free Software Foundation; either version 2 of the License, or
9 -# (at your option) any later version.
10 -#
11 -# This program is distributed in the hope that it will be useful,
12 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
13 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 -# GNU General Public License for more details.
15 -#
16 -# You should have received a copy of the GNU General Public License along
17 -# with this program; if not, write to the Free Software Foundation, Inc.,
18 -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 -# http://www.gnu.org/copyleft/gpl.html
20 -
21 -/**
22 - * @file
23 - * @ingroup Search
24 - */
25 -
26 -/**
27 - * Search engine hook base class for Postgres
28 - * @ingroup Search
29 - */
30 -class SearchPostgres extends SearchEngine {
31 -
32 - function SearchPostgres( $db ) {
33 - $this->db = $db;
34 - }
35 -
36 - /**
37 - * Perform a full text search query via tsearch2 and return a result set.
38 - * Currently searches a page's current title (page.page_title) and
39 - * latest revision article text (pagecontent.old_text)
40 - *
41 - * @param string $term - Raw search term
42 - * @return PostgresSearchResultSet
43 - * @access public
44 - */
45 - function searchTitle( $term ) {
46 - $q = $this->searchQuery( $term , 'titlevector', 'page_title' );
47 - $olderror = error_reporting(E_ERROR);
48 - $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) );
49 - error_reporting($olderror);
50 - if (!$resultSet) {
51 - // Needed for "Query requires full scan, GIN doesn't support it"
52 - return new SearchResultTooMany();
53 - }
54 - return new PostgresSearchResultSet( $resultSet, $this->searchTerms );
55 - }
56 - function searchText( $term ) {
57 - $q = $this->searchQuery( $term, 'textvector', 'old_text' );
58 - $olderror = error_reporting(E_ERROR);
59 - $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) );
60 - error_reporting($olderror);
61 - if (!$resultSet) {
62 - return new SearchResultTooMany();
63 - }
64 - return new PostgresSearchResultSet( $resultSet, $this->searchTerms );
65 - }
66 -
67 -
68 - /*
69 - * Transform the user's search string into a better form for tsearch2
70 - */
71 - function parseQuery( $term ) {
72 -
73 - wfDebug( "parseQuery received: $term" );
74 -
75 - ## No backslashes allowed
76 - $term = preg_replace('/\\\/', '', $term);
77 -
78 - ## Collapse parens into nearby words:
79 - $term = preg_replace('/\s*\(\s*/', ' (', $term);
80 - $term = preg_replace('/\s*\)\s*/', ') ', $term);
81 -
82 - ## Treat colons as word separators:
83 - $term = preg_replace('/:/', ' ', $term);
84 -
85 - $searchstring = '';
86 - $m = array();
87 - if( preg_match_all('/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) {
88 - foreach( $m as $terms ) {
89 - if (strlen($terms[1])) {
90 - $searchstring .= ' & !';
91 - }
92 - if (strtolower($terms[2]) === 'and') {
93 - $searchstring .= ' & ';
94 - }
95 - else if (strtolower($terms[2]) === 'or' or $terms[2] === '|') {
96 - $searchstring .= ' | ';
97 - }
98 - else if (strtolower($terms[2]) === 'not') {
99 - $searchstring .= ' & !';
100 - }
101 - else {
102 - $searchstring .= " & $terms[2]";
103 - }
104 - }
105 - }
106 -
107 - ## Strip out leading junk
108 - $searchstring = preg_replace('/^[\s\&\|]+/', '', $searchstring);
109 -
110 - ## Remove any doubled-up operators
111 - $searchstring = preg_replace('/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring);
112 -
113 - ## Remove any non-spaced operators (e.g. "Zounds!")
114 - $searchstring = preg_replace('/([^ ])[\!\&\|]/', "$1", $searchstring);
115 -
116 - ## Remove any trailing whitespace or operators
117 - $searchstring = preg_replace('/[\s\!\&\|]+$/', '', $searchstring);
118 -
119 - ## Remove unnecessary quotes around everything
120 - $searchstring = preg_replace('/^[\'"](.*)[\'"]$/', "$1", $searchstring);
121 -
122 - ## Quote the whole thing
123 - $searchstring = $this->db->addQuotes($searchstring);
124 -
125 - wfDebug( "parseQuery returned: $searchstring" );
126 -
127 - return $searchstring;
128 -
129 - }
130 -
131 - /**
132 - * Construct the full SQL query to do the search.
133 - * @param string $filteredTerm
134 - * @param string $fulltext
135 - * @private
136 - */
137 - function searchQuery( $term, $fulltext, $colname ) {
138 - global $wgDBversion;
139 -
140 - if ( !isset( $wgDBversion ) ) {
141 - $this->db->getServerVersion();
142 - $wgDBversion = $this->db->numeric_version;
143 - }
144 - $prefix = $wgDBversion < 8.3 ? "'default'," : '';
145 -
146 - $searchstring = $this->parseQuery( $term );
147 -
148 - ## We need a separate query here so gin does not complain about empty searches
149 - $SQL = "SELECT to_tsquery($prefix $searchstring)";
150 - $res = $this->db->doQuery($SQL);
151 - if (!$res) {
152 - ## TODO: Better output (example to catch: one 'two)
153 - die ("Sorry, that was not a valid search string. Please go back and try again");
154 - }
155 - $top = pg_fetch_result($res,0,0);
156 -
157 - if ($top === "") { ## e.g. if only stopwords are used XXX return something better
158 - $query = "SELECT page_id, page_namespace, page_title, 0 AS score ".
159 - "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
160 - "AND r.rev_text_id = c.old_id AND 1=0";
161 - }
162 - else {
163 - $m = array();
164 - if( preg_match_all("/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) {
165 - foreach( $m as $terms ) {
166 - $this->searchTerms[$terms[1]] = $terms[1];
167 - }
168 - }
169 -
170 - $rankscore = $wgDBversion > 8.2 ? 5 : 1;
171 - $rank = $wgDBversion < 8.3 ? 'rank' : 'ts_rank';
172 - $query = "SELECT page_id, page_namespace, page_title, ".
173 - "$rank($fulltext, to_tsquery($prefix $searchstring), $rankscore) AS score ".
174 - "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
175 - "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($prefix $searchstring)";
176 - }
177 -
178 - ## Redirects
179 - if (! $this->showRedirects)
180 - $query .= ' AND page_is_redirect = 0';
181 -
182 - ## Namespaces - defaults to 0
183 - if( !is_null($this->namespaces) ){ // null -> search all
184 - if ( count($this->namespaces) < 1)
185 - $query .= ' AND page_namespace = 0';
186 - else {
187 - $namespaces = implode( ',', $this->namespaces );
188 - $query .= " AND page_namespace IN ($namespaces)";
189 - }
190 - }
191 -
192 - $query .= " ORDER BY score DESC, page_id DESC";
193 -
194 - $query .= $this->db->limitResult( '', $this->limit, $this->offset );
195 -
196 - wfDebug( "searchQuery returned: $query" );
197 -
198 - return $query;
199 - }
200 -
201 - ## Most of the work of these two functions are done automatically via triggers
202 -
203 - function update( $pageid, $title, $text ) {
204 - ## We don't want to index older revisions
205 - $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id = ".
206 - "(SELECT rev_text_id FROM revision WHERE rev_page = $pageid ".
207 - "ORDER BY rev_text_id DESC LIMIT 1 OFFSET 1)";
208 - $this->db->doQuery($SQL);
209 - return true;
210 - }
211 -
212 - function updateTitle( $id, $title ) {
213 - return true;
214 - }
215 -
216 -} ## end of the SearchPostgres class
217 -
218 -/**
219 - * @ingroup Search
220 - */
221 -class PostgresSearchResult extends SearchResult {
222 - function PostgresSearchResult( $row ) {
223 - $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
224 - $this->score = $row->score;
225 - }
226 - function getScore() {
227 - return $this->score;
228 - }
229 -}
230 -
231 -/**
232 - * @ingroup Search
233 - */
234 -class PostgresSearchResultSet extends SearchResultSet {
235 - function PostgresSearchResultSet( $resultSet, $terms ) {
236 - $this->mResultSet = $resultSet;
237 - $this->mTerms = $terms;
238 - }
239 -
240 - function termMatches() {
241 - return $this->mTerms;
242 - }
243 -
244 - function numRows() {
245 - return $this->mResultSet->numRows();
246 - }
247 -
248 - function next() {
249 - $row = $this->mResultSet->fetchObject();
250 - if( $row === false ) {
251 - return false;
252 - } else {
253 - return new PostgresSearchResult( $row );
254 - }
255 - }
256 -}
Index: trunk/phase3/includes/SearchUpdate.php
@@ -1,113 +0,0 @@
2 -<?php
3 -/**
4 - * See deferred.txt
5 - * @ingroup Search
6 - */
7 -class SearchUpdate {
8 -
9 - /* private */ var $mId = 0, $mNamespace, $mTitle, $mText;
10 - /* private */ var $mTitleWords;
11 -
12 - function SearchUpdate( $id, $title, $text = false ) {
13 - $nt = Title::newFromText( $title );
14 - if( $nt ) {
15 - $this->mId = $id;
16 - $this->mText = $text;
17 -
18 - $this->mNamespace = $nt->getNamespace();
19 - $this->mTitle = $nt->getText(); # Discard namespace
20 -
21 - $this->mTitleWords = $this->mTextWords = array();
22 - } else {
23 - wfDebug( "SearchUpdate object created with invalid title '$title'\n" );
24 - }
25 - }
26 -
27 - function doUpdate() {
28 - global $wgContLang, $wgDisableSearchUpdate;
29 -
30 - if( $wgDisableSearchUpdate || !$this->mId ) {
31 - return false;
32 - }
33 - $fname = 'SearchUpdate::doUpdate';
34 - wfProfileIn( $fname );
35 -
36 - $search = SearchEngine::create();
37 - $lc = SearchEngine::legalSearchChars() . '&#;';
38 -
39 - if( $this->mText === false ) {
40 - $search->updateTitle($this->mId,
41 - Title::indexTitle( $this->mNamespace, $this->mTitle ));
42 - wfProfileOut( $fname );
43 - return;
44 - }
45 -
46 - # Language-specific strip/conversion
47 - $text = $wgContLang->stripForSearch( $this->mText );
48 -
49 - wfProfileIn( $fname.'-regexps' );
50 - $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/",
51 - ' ', strtolower( " " . $text /*$this->mText*/ . " " ) ); # Strip HTML markup
52 - $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD",
53 - "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings
54 -
55 - # Strip external URLs
56 - $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF";
57 - $protos = "http|https|ftp|mailto|news|gopher";
58 - $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/";
59 - $text = preg_replace( $pat, "\\1 \\3", $text );
60 -
61 - $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/";
62 - $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/";
63 - $text = preg_replace( $p1, "\\1 ", $text );
64 - $text = preg_replace( $p2, "\\1 \\3 ", $text );
65 -
66 - # Internal image links
67 - $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i";
68 - $text = preg_replace( $pat2, " \\1 \\3", $text );
69 -
70 - $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/",
71 - "\\1\\2 \\2\\3", $text ); # Handle [[game]]s
72 -
73 - # Strip all remaining non-search characters
74 - $text = preg_replace( "/[^{$lc}]+/", " ", $text );
75 -
76 - # Handle 's, s'
77 - #
78 - # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text );
79 - # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text );
80 - #
81 - # These tail-anchored regexps are insanely slow. The worst case comes
82 - # when Japanese or Chinese text (ie, no word spacing) is written on
83 - # a wiki configured for Western UTF-8 mode. The Unicode characters are
84 - # expanded to hex codes and the "words" are very long paragraph-length
85 - # monstrosities. On a large page the above regexps may take over 20
86 - # seconds *each* on a 1GHz-level processor.
87 - #
88 - # Following are reversed versions which are consistently fast
89 - # (about 3 milliseconds on 1GHz-level processor).
90 - #
91 - $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) );
92 - $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) );
93 -
94 - # Strip wiki '' and '''
95 - $text = preg_replace( "/''[']*/", " ", $text );
96 - wfProfileOut( "$fname-regexps" );
97 -
98 - wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) );
99 -
100 - # Perform the actual update
101 - $search->update($this->mId, Title::indexTitle( $this->mNamespace, $this->mTitle ),
102 - $text);
103 -
104 - wfProfileOut( $fname );
105 - }
106 -}
107 -
108 -/**
109 - * Placeholder class
110 - * @ingroup Search
111 - */
112 -class SearchUpdateMyISAM extends SearchUpdate {
113 - # Inherits everything
114 -}
Index: trunk/phase3/includes/SearchOracle.php
@@ -1,240 +0,0 @@
2 -<?php
3 -# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
4 -# http://www.mediawiki.org/
5 -#
6 -# This program is free software; you can redistribute it and/or modify
7 -# it under the terms of the GNU General Public License as published by
8 -# the Free Software Foundation; either version 2 of the License, or
9 -# (at your option) any later version.
10 -#
11 -# This program is distributed in the hope that it will be useful,
12 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
13 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 -# GNU General Public License for more details.
15 -#
16 -# You should have received a copy of the GNU General Public License along
17 -# with this program; if not, write to the Free Software Foundation, Inc.,
18 -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 -# http://www.gnu.org/copyleft/gpl.html
20 -
21 -/**
22 - * @file
23 - * @ingroup Search
24 - */
25 -
26 -/**
27 - * Search engine hook base class for Oracle (ConText).
28 - * @ingroup Search
29 - */
30 -class SearchOracle extends SearchEngine {
31 - function __construct($db) {
32 - $this->db = $db;
33 - }
34 -
35 - /**
36 - * Perform a full text search query and return a result set.
37 - *
38 - * @param string $term - Raw search term
39 - * @return OracleSearchResultSet
40 - * @access public
41 - */
42 - function searchText( $term ) {
43 - $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true)));
44 - return new OracleSearchResultSet($resultSet, $this->searchTerms);
45 - }
46 -
47 - /**
48 - * Perform a title-only search query and return a result set.
49 - *
50 - * @param string $term - Raw search term
51 - * @return ORacleSearchResultSet
52 - * @access public
53 - */
54 - function searchTitle($term) {
55 - $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false)));
56 - return new MySQLSearchResultSet($resultSet, $this->searchTerms);
57 - }
58 -
59 -
60 - /**
61 - * Return a partial WHERE clause to exclude redirects, if so set
62 - * @return string
63 - * @private
64 - */
65 - function queryRedirect() {
66 - if ($this->showRedirects) {
67 - return '';
68 - } else {
69 - return 'AND page_is_redirect=0';
70 - }
71 - }
72 -
73 - /**
74 - * Return a partial WHERE clause to limit the search to the given namespaces
75 - * @return string
76 - * @private
77 - */
78 - function queryNamespaces() {
79 - if( is_null($this->namespaces) )
80 - return '';
81 - $namespaces = implode(',', $this->namespaces);
82 - if ($namespaces == '') {
83 - $namespaces = '0';
84 - }
85 - return 'AND page_namespace IN (' . $namespaces . ')';
86 - }
87 -
88 - /**
89 - * Return a LIMIT clause to limit results on the query.
90 - * @return string
91 - * @private
92 - */
93 - function queryLimit($sql) {
94 - return $this->db->limitResult($sql, $this->limit, $this->offset);
95 - }
96 -
97 - /**
98 - * Does not do anything for generic search engine
99 - * subclasses may define this though
100 - * @return string
101 - * @private
102 - */
103 - function queryRanking($filteredTerm, $fulltext) {
104 - return ' ORDER BY score(1)';
105 - }
106 -
107 - /**
108 - * Construct the full SQL query to do the search.
109 - * The guts shoulds be constructed in queryMain()
110 - * @param string $filteredTerm
111 - * @param bool $fulltext
112 - * @private
113 - */
114 - function getQuery( $filteredTerm, $fulltext ) {
115 - return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' .
116 - $this->queryRedirect() . ' ' .
117 - $this->queryNamespaces() . ' ' .
118 - $this->queryRanking( $filteredTerm, $fulltext ) . ' ');
119 - }
120 -
121 -
122 - /**
123 - * Picks which field to index on, depending on what type of query.
124 - * @param bool $fulltext
125 - * @return string
126 - */
127 - function getIndexField($fulltext) {
128 - return $fulltext ? 'si_text' : 'si_title';
129 - }
130 -
131 - /**
132 - * Get the base part of the search query.
133 - *
134 - * @param string $filteredTerm
135 - * @param bool $fulltext
136 - * @return string
137 - * @private
138 - */
139 - function queryMain( $filteredTerm, $fulltext ) {
140 - $match = $this->parseQuery($filteredTerm, $fulltext);
141 - $page = $this->db->tableName('page');
142 - $searchindex = $this->db->tableName('searchindex');
143 - return 'SELECT page_id, page_namespace, page_title ' .
144 - "FROM $page,$searchindex " .
145 - 'WHERE page_id=si_page AND ' . $match;
146 - }
147 -
148 - /** @todo document */
149 - function parseQuery($filteredText, $fulltext) {
150 - global $wgContLang;
151 - $lc = SearchEngine::legalSearchChars();
152 - $this->searchTerms = array();
153 -
154 - # FIXME: This doesn't handle parenthetical expressions.
155 - $m = array();
156 - $q = array();
157 -
158 - if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
159 - $filteredText, $m, PREG_SET_ORDER)) {
160 - foreach($m as $terms) {
161 - $q[] = $terms[1] . $wgContLang->stripForSearch($terms[2]);
162 -
163 - if (!empty($terms[3])) {
164 - $regexp = preg_quote( $terms[3], '/' );
165 - if ($terms[4])
166 - $regexp .= "[0-9A-Za-z_]+";
167 - } else {
168 - $regexp = preg_quote(str_replace('"', '', $terms[2]), '/');
169 - }
170 - $this->searchTerms[] = $regexp;
171 - }
172 - }
173 -
174 - $searchon = $this->db->strencode(join(',', $q));
175 - $field = $this->getIndexField($fulltext);
176 - return " CONTAINS($field, '$searchon', 1) > 0 ";
177 - }
178 -
179 - /**
180 - * Create or update the search index record for the given page.
181 - * Title and text should be pre-processed.
182 - *
183 - * @param int $id
184 - * @param string $title
185 - * @param string $text
186 - */
187 - function update($id, $title, $text) {
188 - $dbw = wfGetDB(DB_MASTER);
189 - $dbw->replace('searchindex',
190 - array('si_page'),
191 - array(
192 - 'si_page' => $id,
193 - 'si_title' => $title,
194 - 'si_text' => $text
195 - ), 'SearchOracle::update' );
196 - $dbw->query("CALL ctx_ddl.sync_index('si_text_idx')");
197 - $dbw->query("CALL ctx_ddl.sync_index('si_title_idx')");
198 - }
199 -
200 - /**
201 - * Update a search index record's title only.
202 - * Title should be pre-processed.
203 - *
204 - * @param int $id
205 - * @param string $title
206 - */
207 - function updateTitle($id, $title) {
208 - $dbw = wfGetDB(DB_MASTER);
209 -
210 - $dbw->update('searchindex',
211 - array('si_title' => $title),
212 - array('si_page' => $id),
213 - 'SearchOracle::updateTitle',
214 - array());
215 - }
216 -}
217 -
218 -/**
219 - * @ingroup Search
220 - */
221 -class OracleSearchResultSet extends SearchResultSet {
222 - function __construct($resultSet, $terms) {
223 - $this->mResultSet = $resultSet;
224 - $this->mTerms = $terms;
225 - }
226 -
227 - function termMatches() {
228 - return $this->mTerms;
229 - }
230 -
231 - function numRows() {
232 - return $this->mResultSet->numRows();
233 - }
234 -
235 - function next() {
236 - $row = $this->mResultSet->fetchObject();
237 - if ($row === false)
238 - return false;
239 - return new SearchResult($row);
240 - }
241 -}
Index: trunk/phase3/includes/SearchTsearch2.php
@@ -1,120 +0,0 @@
2 -<?php
3 -# Copyright (C) 2004 Brion Vibber <brion@pobox.com>, Domas Mituzas <domas.mituzas@gmail.com>
4 -# http://www.mediawiki.org/
5 -#
6 -# This program is free software; you can redistribute it and/or modify
7 -# it under the terms of the GNU General Public License as published by
8 -# the Free Software Foundation; either version 2 of the License, or
9 -# (at your option) any later version.
10 -#
11 -# This program is distributed in the hope that it will be useful,
12 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
13 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 -# GNU General Public License for more details.
15 -#
16 -# You should have received a copy of the GNU General Public License along
17 -# with this program; if not, write to the Free Software Foundation, Inc.,
18 -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 -# http://www.gnu.org/copyleft/gpl.html
20 -
21 -/**
22 - * Search engine hook for PostgreSQL / Tsearch2
23 - * @file
24 - * @ingroup Search
25 - */
26 -
27 -/**
28 - * @todo document
29 - * @ingroup Search
30 - */
31 -class SearchTsearch2 extends SearchEngine {
32 - var $strictMatching = false;
33 -
34 - function __construct( $db ) {
35 - $this->db = $db;
36 - $this->mRanking = true;
37 - }
38 -
39 - function getIndexField( $fulltext ) {
40 - return $fulltext ? 'si_text' : 'si_title';
41 - }
42 -
43 - function parseQuery( $filteredText, $fulltext ) {
44 - global $wgContLang;
45 - $lc = SearchEngine::legalSearchChars();
46 - $searchon = '';
47 - $this->searchTerms = array();
48 -
49 - # FIXME: This doesn't handle parenthetical expressions.
50 - $m = array();
51 - if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
52 - $filteredText, $m, PREG_SET_ORDER ) ) {
53 - foreach( $m as $terms ) {
54 - if( $searchon !== '' ) $searchon .= ' ';
55 - if( $this->strictMatching && ($terms[1] == '') ) {
56 - $terms[1] = '+';
57 - }
58 - $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] );
59 - if( !empty( $terms[3] ) ) {
60 - $regexp = preg_quote( $terms[3], '/' );
61 - if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
62 - } else {
63 - $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
64 - }
65 - $this->searchTerms[] = $regexp;
66 - }
67 - wfDebug( "Would search with '$searchon'\n" );
68 - wfDebug( 'Match with /\b' . implode( '\b|\b', $this->searchTerms ) . "\b/\n" );
69 - } else {
70 - wfDebug( "Can't understand search query '{$this->filteredText}'\n" );
71 - }
72 -
73 - $searchon = preg_replace( '/(\s+)/', '&', $searchon );
74 - $searchon = $this->db->strencode( $searchon );
75 - return $searchon;
76 - }
77 -
78 - function queryRanking( $filteredTerm, $fulltext ) {
79 - $field = $this->getIndexField( $fulltext );
80 - $searchon = $this->parseQuery( $filteredTerm, $fulltext );
81 - if ($this->mRanking)
82 - return " ORDER BY rank($field,to_tsquery('$searchon')) DESC";
83 - else
84 - return "";
85 - }
86 -
87 -
88 - function queryMain( $filteredTerm, $fulltext ) {
89 - $match = $this->parseQuery( $filteredTerm, $fulltext );
90 - $field = $this->getIndexField( $fulltext );
91 - $cur = $this->db->tableName( 'cur' );
92 - $searchindex = $this->db->tableName( 'searchindex' );
93 - return 'SELECT cur_id, cur_namespace, cur_title, cur_text ' .
94 - "FROM $cur,$searchindex " .
95 - 'WHERE cur_id=si_page AND ' .
96 - " $field @@ to_tsquery ('$match') " ;
97 - }
98 -
99 - function update( $id, $title, $text ) {
100 - $dbw = wfGetDB( DB_MASTER );
101 - $searchindex = $dbw->tableName( 'searchindex' );
102 - $sql = "DELETE FROM $searchindex WHERE si_page={$id}";
103 - $dbw->query( $sql, __METHOD__ );
104 - $sql = "INSERT INTO $searchindex (si_page,si_title,si_text) ".
105 - " VALUES ( $id, to_tsvector('".
106 - $dbw->strencode($title).
107 - "'),to_tsvector('".
108 - $dbw->strencode( $text)."')) ";
109 - $dbw->query($sql, __METHOD__ );
110 - }
111 -
112 - function updateTitle($id,$title) {
113 - $dbw = wfGetDB(DB_MASTER);
114 - $searchindex = $dbw->tableName( 'searchindex' );
115 - $sql = "UPDATE $searchindex SET si_title=to_tsvector('" .
116 - $dbw->strencode( $title ) .
117 - "') WHERE si_page={$id}";
118 -
119 - $dbw->query( $sql, __METHOD__ );
120 - }
121 -}
Index: trunk/phase3/includes/SearchMySQL4.php
@@ -1,34 +0,0 @@
2 -<?php
3 -# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
4 -# http://www.mediawiki.org/
5 -#
6 -# This program is free software; you can redistribute it and/or modify
7 -# it under the terms of the GNU General Public License as published by
8 -# the Free Software Foundation; either version 2 of the License, or
9 -# (at your option) any later version.
10 -#
11 -# This program is distributed in the hope that it will be useful,
12 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
13 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 -# GNU General Public License for more details.
15 -#
16 -# You should have received a copy of the GNU General Public License along
17 -# with this program; if not, write to the Free Software Foundation, Inc.,
18 -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 -# http://www.gnu.org/copyleft/gpl.html
20 -
21 -/**
22 - * @file
23 - * @ingroup Search
24 - */
25 -
26 -/**
27 - * Search engine hook for MySQL 4+
28 - * This class retained for backwards compatibility...
29 - * The meat's been moved to SearchMySQL, since the 3.x variety is gone.
30 - * @ingroup Search
31 - * @deprecated
32 - */
33 -class SearchMySQL4 extends SearchMySQL {
34 - /* whee */
35 -}
Index: trunk/phase3/includes/SearchMySQL.php
@@ -1,262 +0,0 @@
2 -<?php
3 -# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
4 -# http://www.mediawiki.org/
5 -#
6 -# This program is free software; you can redistribute it and/or modify
7 -# it under the terms of the GNU General Public License as published by
8 -# the Free Software Foundation; either version 2 of the License, or
9 -# (at your option) any later version.
10 -#
11 -# This program is distributed in the hope that it will be useful,
12 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
13 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 -# GNU General Public License for more details.
15 -#
16 -# You should have received a copy of the GNU General Public License along
17 -# with this program; if not, write to the Free Software Foundation, Inc.,
18 -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 -# http://www.gnu.org/copyleft/gpl.html
20 -
21 -/**
22 - * @file
23 - * @ingroup Search
24 - */
25 -
26 -/**
27 - * Search engine hook for MySQL 4+
28 - * @ingroup Search
29 - */
30 -class SearchMySQL extends SearchEngine {
31 - var $strictMatching = true;
32 -
33 - /** @todo document */
34 - function __construct( $db ) {
35 - $this->db = $db;
36 - }
37 -
38 - /** @todo document */
39 - function parseQuery( $filteredText, $fulltext ) {
40 - global $wgContLang;
41 - $lc = SearchEngine::legalSearchChars(); // Minus format chars
42 - $searchon = '';
43 - $this->searchTerms = array();
44 -
45 - # FIXME: This doesn't handle parenthetical expressions.
46 - $m = array();
47 - if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
48 - $filteredText, $m, PREG_SET_ORDER ) ) {
49 - foreach( $m as $terms ) {
50 - if( $searchon !== '' ) $searchon .= ' ';
51 - if( $this->strictMatching && ($terms[1] == '') ) {
52 - $terms[1] = '+';
53 - }
54 - $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] );
55 - if( !empty( $terms[3] ) ) {
56 - // Match individual terms in result highlighting...
57 - $regexp = preg_quote( $terms[3], '/' );
58 - if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
59 - } else {
60 - // Match the quoted term in result highlighting...
61 - $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
62 - }
63 - $this->searchTerms[] = $regexp;
64 - }
65 - wfDebug( "Would search with '$searchon'\n" );
66 - wfDebug( 'Match with /' . implode( '|', $this->searchTerms ) . "/\n" );
67 - } else {
68 - wfDebug( "Can't understand search query '{$filteredText}'\n" );
69 - }
70 -
71 - $searchon = $this->db->strencode( $searchon );
72 - $field = $this->getIndexField( $fulltext );
73 - return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) ";
74 - }
75 -
76 - public static function legalSearchChars() {
77 - return "\"*" . parent::legalSearchChars();
78 - }
79 -
80 - /**
81 - * Perform a full text search query and return a result set.
82 - *
83 - * @param string $term - Raw search term
84 - * @return MySQLSearchResultSet
85 - * @access public
86 - */
87 - function searchText( $term ) {
88 - $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) );
89 - return new MySQLSearchResultSet( $resultSet, $this->searchTerms );
90 - }
91 -
92 - /**
93 - * Perform a title-only search query and return a result set.
94 - *
95 - * @param string $term - Raw search term
96 - * @return MySQLSearchResultSet
97 - * @access public
98 - */
99 - function searchTitle( $term ) {
100 - $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) );
101 - return new MySQLSearchResultSet( $resultSet, $this->searchTerms );
102 - }
103 -
104 -
105 - /**
106 - * Return a partial WHERE clause to exclude redirects, if so set
107 - * @return string
108 - * @private
109 - */
110 - function queryRedirect() {
111 - if( $this->showRedirects ) {
112 - return '';
113 - } else {
114 - return 'AND page_is_redirect=0';
115 - }
116 - }
117 -
118 - /**
119 - * Return a partial WHERE clause to limit the search to the given namespaces
120 - * @return string
121 - * @private
122 - */
123 - function queryNamespaces() {
124 - if( is_null($this->namespaces) )
125 - return ''; # search all
126 - $namespaces = implode( ',', $this->namespaces );
127 - if ($namespaces == '') {
128 - $namespaces = '0';
129 - }
130 - return 'AND page_namespace IN (' . $namespaces . ')';
131 - }
132 -
133 - /**
134 - * Return a LIMIT clause to limit results on the query.
135 - * @return string
136 - * @private
137 - */
138 - function queryLimit() {
139 - return $this->db->limitResult( '', $this->limit, $this->offset );
140 - }
141 -
142 - /**
143 - * Does not do anything for generic search engine
144 - * subclasses may define this though
145 - * @return string
146 - * @private
147 - */
148 - function queryRanking( $filteredTerm, $fulltext ) {
149 - return '';
150 - }
151 -
152 - /**
153 - * Construct the full SQL query to do the search.
154 - * The guts shoulds be constructed in queryMain()
155 - * @param string $filteredTerm
156 - * @param bool $fulltext
157 - * @private
158 - */
159 - function getQuery( $filteredTerm, $fulltext ) {
160 - return $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
161 - $this->queryRedirect() . ' ' .
162 - $this->queryNamespaces() . ' ' .
163 - $this->queryRanking( $filteredTerm, $fulltext ) . ' ' .
164 - $this->queryLimit();
165 - }
166 -
167 -
168 - /**
169 - * Picks which field to index on, depending on what type of query.
170 - * @param bool $fulltext
171 - * @return string
172 - */
173 - function getIndexField( $fulltext ) {
174 - return $fulltext ? 'si_text' : 'si_title';
175 - }
176 -
177 - /**
178 - * Get the base part of the search query.
179 - * The actual match syntax will depend on the server
180 - * version; MySQL 3 and MySQL 4 have different capabilities
181 - * in their fulltext search indexes.
182 - *
183 - * @param string $filteredTerm
184 - * @param bool $fulltext
185 - * @return string
186 - * @private
187 - */
188 - function queryMain( $filteredTerm, $fulltext ) {
189 - $match = $this->parseQuery( $filteredTerm, $fulltext );
190 - $page = $this->db->tableName( 'page' );
191 - $searchindex = $this->db->tableName( 'searchindex' );
192 - return 'SELECT page_id, page_namespace, page_title ' .
193 - "FROM $page,$searchindex " .
194 - 'WHERE page_id=si_page AND ' . $match;
195 - }
196 -
197 - /**
198 - * Create or update the search index record for the given page.
199 - * Title and text should be pre-processed.
200 - *
201 - * @param int $id
202 - * @param string $title
203 - * @param string $text
204 - */
205 - function update( $id, $title, $text ) {
206 - $dbw = wfGetDB( DB_MASTER );
207 - $dbw->replace( 'searchindex',
208 - array( 'si_page' ),
209 - array(
210 - 'si_page' => $id,
211 - 'si_title' => $title,
212 - 'si_text' => $text
213 - ), __METHOD__ );
214 - }
215 -
216 - /**
217 - * Update a search index record's title only.
218 - * Title should be pre-processed.
219 - *
220 - * @param int $id
221 - * @param string $title
222 - */
223 - function updateTitle( $id, $title ) {
224 - $dbw = wfGetDB( DB_MASTER );
225 -
226 - $dbw->update( 'searchindex',
227 - array( 'si_title' => $title ),
228 - array( 'si_page' => $id ),
229 - __METHOD__,
230 - array( $dbw->lowPriorityOption() ) );
231 - }
232 -}
233 -
234 -/**
235 - * @ingroup Search
236 - */
237 -class MySQLSearchResultSet extends SearchResultSet {
238 - function MySQLSearchResultSet( $resultSet, $terms ) {
239 - $this->mResultSet = $resultSet;
240 - $this->mTerms = $terms;
241 - }
242 -
243 - function termMatches() {
244 - return $this->mTerms;
245 - }
246 -
247 - function numRows() {
248 - return $this->mResultSet->numRows();
249 - }
250 -
251 - function next() {
252 - $row = $this->mResultSet->fetchObject();
253 - if( $row === false ) {
254 - return false;
255 - } else {
256 - return new SearchResult( $row );
257 - }
258 - }
259 -
260 - function free() {
261 - $this->mResultSet->free();
262 - }
263 -}
Index: trunk/phase3/includes/search/MySQL4.php
@@ -0,0 +1,34 @@
 2+<?php
 3+# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
 4+# http://www.mediawiki.org/
 5+#
 6+# This program is free software; you can redistribute it and/or modify
 7+# it under the terms of the GNU General Public License as published by
 8+# the Free Software Foundation; either version 2 of the License, or
 9+# (at your option) any later version.
 10+#
 11+# This program is distributed in the hope that it will be useful,
 12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14+# GNU General Public License for more details.
 15+#
 16+# You should have received a copy of the GNU General Public License along
 17+# with this program; if not, write to the Free Software Foundation, Inc.,
 18+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 19+# http://www.gnu.org/copyleft/gpl.html
 20+
 21+/**
 22+ * @file
 23+ * @ingroup Search
 24+ */
 25+
 26+/**
 27+ * Search engine hook for MySQL 4+
 28+ * This class retained for backwards compatibility...
 29+ * The meat's been moved to SearchMySQL, since the 3.x variety is gone.
 30+ * @ingroup Search
 31+ * @deprecated
 32+ */
 33+class SearchMySQL4 extends SearchMySQL {
 34+ /* whee */
 35+}
Property changes on: trunk/phase3/includes/search/MySQL4.php
___________________________________________________________________
Added: svn:keywords
136 + Author Date Id Revision
Added: svn:eol-style
237 + native
Index: trunk/phase3/includes/search/Tsearch2.php
@@ -0,0 +1,120 @@
 2+<?php
 3+# Copyright (C) 2004 Brion Vibber <brion@pobox.com>, Domas Mituzas <domas.mituzas@gmail.com>
 4+# http://www.mediawiki.org/
 5+#
 6+# This program is free software; you can redistribute it and/or modify
 7+# it under the terms of the GNU General Public License as published by
 8+# the Free Software Foundation; either version 2 of the License, or
 9+# (at your option) any later version.
 10+#
 11+# This program is distributed in the hope that it will be useful,
 12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14+# GNU General Public License for more details.
 15+#
 16+# You should have received a copy of the GNU General Public License along
 17+# with this program; if not, write to the Free Software Foundation, Inc.,
 18+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 19+# http://www.gnu.org/copyleft/gpl.html
 20+
 21+/**
 22+ * Search engine hook for PostgreSQL / Tsearch2
 23+ * @file
 24+ * @ingroup Search
 25+ */
 26+
 27+/**
 28+ * @todo document
 29+ * @ingroup Search
 30+ */
 31+class SearchTsearch2 extends SearchEngine {
 32+ var $strictMatching = false;
 33+
 34+ function __construct( $db ) {
 35+ $this->db = $db;
 36+ $this->mRanking = true;
 37+ }
 38+
 39+ function getIndexField( $fulltext ) {
 40+ return $fulltext ? 'si_text' : 'si_title';
 41+ }
 42+
 43+ function parseQuery( $filteredText, $fulltext ) {
 44+ global $wgContLang;
 45+ $lc = SearchEngine::legalSearchChars();
 46+ $searchon = '';
 47+ $this->searchTerms = array();
 48+
 49+ # FIXME: This doesn't handle parenthetical expressions.
 50+ $m = array();
 51+ if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
 52+ $filteredText, $m, PREG_SET_ORDER ) ) {
 53+ foreach( $m as $terms ) {
 54+ if( $searchon !== '' ) $searchon .= ' ';
 55+ if( $this->strictMatching && ($terms[1] == '') ) {
 56+ $terms[1] = '+';
 57+ }
 58+ $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] );
 59+ if( !empty( $terms[3] ) ) {
 60+ $regexp = preg_quote( $terms[3], '/' );
 61+ if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
 62+ } else {
 63+ $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
 64+ }
 65+ $this->searchTerms[] = $regexp;
 66+ }
 67+ wfDebug( "Would search with '$searchon'\n" );
 68+ wfDebug( 'Match with /\b' . implode( '\b|\b', $this->searchTerms ) . "\b/\n" );
 69+ } else {
 70+ wfDebug( "Can't understand search query '{$this->filteredText}'\n" );
 71+ }
 72+
 73+ $searchon = preg_replace( '/(\s+)/', '&', $searchon );
 74+ $searchon = $this->db->strencode( $searchon );
 75+ return $searchon;
 76+ }
 77+
 78+ function queryRanking( $filteredTerm, $fulltext ) {
 79+ $field = $this->getIndexField( $fulltext );
 80+ $searchon = $this->parseQuery( $filteredTerm, $fulltext );
 81+ if ($this->mRanking)
 82+ return " ORDER BY rank($field,to_tsquery('$searchon')) DESC";
 83+ else
 84+ return "";
 85+ }
 86+
 87+
 88+ function queryMain( $filteredTerm, $fulltext ) {
 89+ $match = $this->parseQuery( $filteredTerm, $fulltext );
 90+ $field = $this->getIndexField( $fulltext );
 91+ $cur = $this->db->tableName( 'cur' );
 92+ $searchindex = $this->db->tableName( 'searchindex' );
 93+ return 'SELECT cur_id, cur_namespace, cur_title, cur_text ' .
 94+ "FROM $cur,$searchindex " .
 95+ 'WHERE cur_id=si_page AND ' .
 96+ " $field @@ to_tsquery ('$match') " ;
 97+ }
 98+
 99+ function update( $id, $title, $text ) {
 100+ $dbw = wfGetDB( DB_MASTER );
 101+ $searchindex = $dbw->tableName( 'searchindex' );
 102+ $sql = "DELETE FROM $searchindex WHERE si_page={$id}";
 103+ $dbw->query( $sql, __METHOD__ );
 104+ $sql = "INSERT INTO $searchindex (si_page,si_title,si_text) ".
 105+ " VALUES ( $id, to_tsvector('".
 106+ $dbw->strencode($title).
 107+ "'),to_tsvector('".
 108+ $dbw->strencode( $text)."')) ";
 109+ $dbw->query($sql, __METHOD__ );
 110+ }
 111+
 112+ function updateTitle($id,$title) {
 113+ $dbw = wfGetDB(DB_MASTER);
 114+ $searchindex = $dbw->tableName( 'searchindex' );
 115+ $sql = "UPDATE $searchindex SET si_title=to_tsvector('" .
 116+ $dbw->strencode( $title ) .
 117+ "') WHERE si_page={$id}";
 118+
 119+ $dbw->query( $sql, __METHOD__ );
 120+ }
 121+}
Property changes on: trunk/phase3/includes/search/Tsearch2.php
___________________________________________________________________
Added: svn:keywords
1122 + Author Date Id Revision
Added: svn:eol-style
2123 + native
Index: trunk/phase3/includes/search/Update.php
@@ -0,0 +1,113 @@
 2+<?php
 3+/**
 4+ * See deferred.txt
 5+ * @ingroup Search
 6+ */
 7+class SearchUpdate {
 8+
 9+ /* private */ var $mId = 0, $mNamespace, $mTitle, $mText;
 10+ /* private */ var $mTitleWords;
 11+
 12+ function SearchUpdate( $id, $title, $text = false ) {
 13+ $nt = Title::newFromText( $title );
 14+ if( $nt ) {
 15+ $this->mId = $id;
 16+ $this->mText = $text;
 17+
 18+ $this->mNamespace = $nt->getNamespace();
 19+ $this->mTitle = $nt->getText(); # Discard namespace
 20+
 21+ $this->mTitleWords = $this->mTextWords = array();
 22+ } else {
 23+ wfDebug( "SearchUpdate object created with invalid title '$title'\n" );
 24+ }
 25+ }
 26+
 27+ function doUpdate() {
 28+ global $wgContLang, $wgDisableSearchUpdate;
 29+
 30+ if( $wgDisableSearchUpdate || !$this->mId ) {
 31+ return false;
 32+ }
 33+ $fname = 'SearchUpdate::doUpdate';
 34+ wfProfileIn( $fname );
 35+
 36+ $search = SearchEngine::create();
 37+ $lc = SearchEngine::legalSearchChars() . '&#;';
 38+
 39+ if( $this->mText === false ) {
 40+ $search->updateTitle($this->mId,
 41+ Title::indexTitle( $this->mNamespace, $this->mTitle ));
 42+ wfProfileOut( $fname );
 43+ return;
 44+ }
 45+
 46+ # Language-specific strip/conversion
 47+ $text = $wgContLang->stripForSearch( $this->mText );
 48+
 49+ wfProfileIn( $fname.'-regexps' );
 50+ $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/",
 51+ ' ', strtolower( " " . $text /*$this->mText*/ . " " ) ); # Strip HTML markup
 52+ $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD",
 53+ "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings
 54+
 55+ # Strip external URLs
 56+ $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF";
 57+ $protos = "http|https|ftp|mailto|news|gopher";
 58+ $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/";
 59+ $text = preg_replace( $pat, "\\1 \\3", $text );
 60+
 61+ $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/";
 62+ $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/";
 63+ $text = preg_replace( $p1, "\\1 ", $text );
 64+ $text = preg_replace( $p2, "\\1 \\3 ", $text );
 65+
 66+ # Internal image links
 67+ $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i";
 68+ $text = preg_replace( $pat2, " \\1 \\3", $text );
 69+
 70+ $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/",
 71+ "\\1\\2 \\2\\3", $text ); # Handle [[game]]s
 72+
 73+ # Strip all remaining non-search characters
 74+ $text = preg_replace( "/[^{$lc}]+/", " ", $text );
 75+
 76+ # Handle 's, s'
 77+ #
 78+ # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text );
 79+ # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text );
 80+ #
 81+ # These tail-anchored regexps are insanely slow. The worst case comes
 82+ # when Japanese or Chinese text (ie, no word spacing) is written on
 83+ # a wiki configured for Western UTF-8 mode. The Unicode characters are
 84+ # expanded to hex codes and the "words" are very long paragraph-length
 85+ # monstrosities. On a large page the above regexps may take over 20
 86+ # seconds *each* on a 1GHz-level processor.
 87+ #
 88+ # Following are reversed versions which are consistently fast
 89+ # (about 3 milliseconds on 1GHz-level processor).
 90+ #
 91+ $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) );
 92+ $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) );
 93+
 94+ # Strip wiki '' and '''
 95+ $text = preg_replace( "/''[']*/", " ", $text );
 96+ wfProfileOut( "$fname-regexps" );
 97+
 98+ wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) );
 99+
 100+ # Perform the actual update
 101+ $search->update($this->mId, Title::indexTitle( $this->mNamespace, $this->mTitle ),
 102+ $text);
 103+
 104+ wfProfileOut( $fname );
 105+ }
 106+}
 107+
 108+/**
 109+ * Placeholder class
 110+ * @ingroup Search
 111+ */
 112+class SearchUpdateMyISAM extends SearchUpdate {
 113+ # Inherits everything
 114+}
Property changes on: trunk/phase3/includes/search/Update.php
___________________________________________________________________
Added: svn:keywords
1115 + Author Date Id Revision
Added: svn:eol-style
2116 + native
Index: trunk/phase3/includes/search/Engine.php
@@ -0,0 +1,1154 @@
 2+<?php
 3+/**
 4+ * @defgroup Search Search
 5+ *
 6+ * @file
 7+ * @ingroup Search
 8+ */
 9+
 10+/**
 11+ * Contain a class for special pages
 12+ * @ingroup Search
 13+ */
 14+class SearchEngine {
 15+ var $limit = 10;
 16+ var $offset = 0;
 17+ var $searchTerms = array();
 18+ var $namespaces = array( NS_MAIN );
 19+ var $showRedirects = false;
 20+
 21+ /**
 22+ * Perform a full text search query and return a result set.
 23+ * If title searches are not supported or disabled, return null.
 24+ *
 25+ * @param string $term - Raw search term
 26+ * @return SearchResultSet
 27+ * @access public
 28+ * @abstract
 29+ */
 30+ function searchText( $term ) {
 31+ return null;
 32+ }
 33+
 34+ /**
 35+ * Perform a title-only search query and return a result set.
 36+ * If title searches are not supported or disabled, return null.
 37+ *
 38+ * @param string $term - Raw search term
 39+ * @return SearchResultSet
 40+ * @access public
 41+ * @abstract
 42+ */
 43+ function searchTitle( $term ) {
 44+ return null;
 45+ }
 46+
 47+ /**
 48+ * If an exact title match can be find, or a very slightly close match,
 49+ * return the title. If no match, returns NULL.
 50+ *
 51+ * @param string $term
 52+ * @return Title
 53+ */
 54+ public static function getNearMatch( $searchterm ) {
 55+ global $wgContLang;
 56+
 57+ $allSearchTerms = array($searchterm);
 58+
 59+ if($wgContLang->hasVariants()){
 60+ $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm));
 61+ }
 62+
 63+ foreach($allSearchTerms as $term){
 64+
 65+ # Exact match? No need to look further.
 66+ $title = Title::newFromText( $term );
 67+ if (is_null($title))
 68+ return NULL;
 69+
 70+ if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal()
 71+ || $title->exists() ) {
 72+ return $title;
 73+ }
 74+
 75+ # Now try all lower case (i.e. first letter capitalized)
 76+ #
 77+ $title = Title::newFromText( $wgContLang->lc( $term ) );
 78+ if ( $title && $title->exists() ) {
 79+ return $title;
 80+ }
 81+
 82+ # Now try capitalized string
 83+ #
 84+ $title = Title::newFromText( $wgContLang->ucwords( $term ) );
 85+ if ( $title && $title->exists() ) {
 86+ return $title;
 87+ }
 88+
 89+ # Now try all upper case
 90+ #
 91+ $title = Title::newFromText( $wgContLang->uc( $term ) );
 92+ if ( $title && $title->exists() ) {
 93+ return $title;
 94+ }
 95+
 96+ # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
 97+ $title = Title::newFromText( $wgContLang->ucwordbreaks($term) );
 98+ if ( $title && $title->exists() ) {
 99+ return $title;
 100+ }
 101+
 102+ global $wgCapitalLinks, $wgContLang;
 103+ if( !$wgCapitalLinks ) {
 104+ // Catch differs-by-first-letter-case-only
 105+ $title = Title::newFromText( $wgContLang->ucfirst( $term ) );
 106+ if ( $title && $title->exists() ) {
 107+ return $title;
 108+ }
 109+ $title = Title::newFromText( $wgContLang->lcfirst( $term ) );
 110+ if ( $title && $title->exists() ) {
 111+ return $title;
 112+ }
 113+ }
 114+
 115+ // Give hooks a chance at better match variants
 116+ $title = null;
 117+ if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) {
 118+ return $title;
 119+ }
 120+ }
 121+
 122+ $title = Title::newFromText( $searchterm );
 123+
 124+ # Entering an IP address goes to the contributions page
 125+ if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) )
 126+ || User::isIP( trim( $searchterm ) ) ) {
 127+ return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
 128+ }
 129+
 130+
 131+ # Entering a user goes to the user page whether it's there or not
 132+ if ( $title->getNamespace() == NS_USER ) {
 133+ return $title;
 134+ }
 135+
 136+ # Go to images that exist even if there's no local page.
 137+ # There may have been a funny upload, or it may be on a shared
 138+ # file repository such as Wikimedia Commons.
 139+ if( $title->getNamespace() == NS_IMAGE ) {
 140+ $image = wfFindFile( $title );
 141+ if( $image ) {
 142+ return $title;
 143+ }
 144+ }
 145+
 146+ # MediaWiki namespace? Page may be "implied" if not customized.
 147+ # Just return it, with caps forced as the message system likes it.
 148+ if( $title->getNamespace() == NS_MEDIAWIKI ) {
 149+ return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) );
 150+ }
 151+
 152+ # Quoted term? Try without the quotes...
 153+ $matches = array();
 154+ if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) {
 155+ return SearchEngine::getNearMatch( $matches[1] );
 156+ }
 157+
 158+ return NULL;
 159+ }
 160+
 161+ public static function legalSearchChars() {
 162+ return "A-Za-z_'0-9\\x80-\\xFF\\-";
 163+ }
 164+
 165+ /**
 166+ * Set the maximum number of results to return
 167+ * and how many to skip before returning the first.
 168+ *
 169+ * @param int $limit
 170+ * @param int $offset
 171+ * @access public
 172+ */
 173+ function setLimitOffset( $limit, $offset = 0 ) {
 174+ $this->limit = intval( $limit );
 175+ $this->offset = intval( $offset );
 176+ }
 177+
 178+ /**
 179+ * Set which namespaces the search should include.
 180+ * Give an array of namespace index numbers.
 181+ *
 182+ * @param array $namespaces
 183+ * @access public
 184+ */
 185+ function setNamespaces( $namespaces ) {
 186+ $this->namespaces = $namespaces;
 187+ }
 188+
 189+ /**
 190+ * Parse some common prefixes: all (search everything)
 191+ * or namespace names
 192+ *
 193+ * @param string $query
 194+ */
 195+ function replacePrefixes( $query ){
 196+ global $wgContLang;
 197+
 198+ if( strpos($query,':') === false )
 199+ return $query; // nothing to do
 200+
 201+ $parsed = $query;
 202+ $allkeyword = wfMsgForContent('searchall').":";
 203+ if( strncmp($query, $allkeyword, strlen($allkeyword)) == 0 ){
 204+ $this->namespaces = null;
 205+ $parsed = substr($query,strlen($allkeyword));
 206+ } else if( strpos($query,':') !== false ) {
 207+ $prefix = substr($query,0,strpos($query,':'));
 208+ $index = $wgContLang->getNsIndex($prefix);
 209+ if($index !== false){
 210+ $this->namespaces = array($index);
 211+ $parsed = substr($query,strlen($prefix)+1);
 212+ }
 213+ }
 214+ if(trim($parsed) == '')
 215+ return $query; // prefix was the whole query
 216+
 217+ return $parsed;
 218+ }
 219+
 220+ /**
 221+ * Make a list of searchable namespaces and their canonical names.
 222+ * @return array
 223+ */
 224+ public static function searchableNamespaces() {
 225+ global $wgContLang;
 226+ $arr = array();
 227+ foreach( $wgContLang->getNamespaces() as $ns => $name ) {
 228+ if( $ns >= NS_MAIN ) {
 229+ $arr[$ns] = $name;
 230+ }
 231+ }
 232+ return $arr;
 233+ }
 234+
 235+ /**
 236+ * Extract default namespaces to search from the given user's
 237+ * settings, returning a list of index numbers.
 238+ *
 239+ * @param User $user
 240+ * @return array
 241+ * @static
 242+ */
 243+ public static function userNamespaces( &$user ) {
 244+ $arr = array();
 245+ foreach( SearchEngine::searchableNamespaces() as $ns => $name ) {
 246+ if( $user->getOption( 'searchNs' . $ns ) ) {
 247+ $arr[] = $ns;
 248+ }
 249+ }
 250+ return $arr;
 251+ }
 252+
 253+ /**
 254+ * Find snippet highlight settings for a given user
 255+ *
 256+ * @param User $user
 257+ * @return array contextlines, contextchars
 258+ * @static
 259+ */
 260+ public static function userHighlightPrefs( &$user ){
 261+ //$contextlines = $user->getOption( 'contextlines', 5 );
 262+ //$contextchars = $user->getOption( 'contextchars', 50 );
 263+ $contextlines = 2; // Hardcode this. Old defaults sucked. :)
 264+ $contextchars = 75; // same as above.... :P
 265+ return array($contextlines, $contextchars);
 266+ }
 267+
 268+ /**
 269+ * An array of namespaces indexes to be searched by default
 270+ *
 271+ * @return array
 272+ * @static
 273+ */
 274+ public static function defaultNamespaces(){
 275+ global $wgNamespacesToBeSearchedDefault;
 276+
 277+ return array_keys($wgNamespacesToBeSearchedDefault, true);
 278+ }
 279+
 280+ /**
 281+ * Return a 'cleaned up' search string
 282+ *
 283+ * @return string
 284+ * @access public
 285+ */
 286+ function filter( $text ) {
 287+ $lc = $this->legalSearchChars();
 288+ return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
 289+ }
 290+ /**
 291+ * Load up the appropriate search engine class for the currently
 292+ * active database backend, and return a configured instance.
 293+ *
 294+ * @return SearchEngine
 295+ */
 296+ public static function create() {
 297+ global $wgDBtype, $wgSearchType;
 298+ if( $wgSearchType ) {
 299+ $class = $wgSearchType;
 300+ } elseif( $wgDBtype == 'mysql' ) {
 301+ $class = 'SearchMySQL';
 302+ } else if ( $wgDBtype == 'postgres' ) {
 303+ $class = 'SearchPostgres';
 304+ } else if ( $wgDBtype == 'oracle' ) {
 305+ $class = 'SearchOracle';
 306+ } else {
 307+ $class = 'SearchEngineDummy';
 308+ }
 309+ $search = new $class( wfGetDB( DB_SLAVE ) );
 310+ $search->setLimitOffset(0,0);
 311+ return $search;
 312+ }
 313+
 314+ /**
 315+ * Create or update the search index record for the given page.
 316+ * Title and text should be pre-processed.
 317+ *
 318+ * @param int $id
 319+ * @param string $title
 320+ * @param string $text
 321+ * @abstract
 322+ */
 323+ function update( $id, $title, $text ) {
 324+ // no-op
 325+ }
 326+
 327+ /**
 328+ * Update a search index record's title only.
 329+ * Title should be pre-processed.
 330+ *
 331+ * @param int $id
 332+ * @param string $title
 333+ * @abstract
 334+ */
 335+ function updateTitle( $id, $title ) {
 336+ // no-op
 337+ }
 338+
 339+ /**
 340+ * Get OpenSearch suggestion template
 341+ *
 342+ * @return string
 343+ * @static
 344+ */
 345+ public static function getOpenSearchTemplate() {
 346+ global $wgOpenSearchTemplate, $wgServer, $wgScriptPath;
 347+ if($wgOpenSearchTemplate)
 348+ return $wgOpenSearchTemplate;
 349+ else{
 350+ $ns = implode(',',SearchEngine::defaultNamespaces());
 351+ if(!$ns) $ns = "0";
 352+ return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace='.$ns;
 353+ }
 354+ }
 355+
 356+ /**
 357+ * Get internal MediaWiki Suggest template
 358+ *
 359+ * @return string
 360+ * @static
 361+ */
 362+ public static function getMWSuggestTemplate() {
 363+ global $wgMWSuggestTemplate, $wgServer, $wgScriptPath;
 364+ if($wgMWSuggestTemplate)
 365+ return $wgMWSuggestTemplate;
 366+ else
 367+ return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}';
 368+ }
 369+}
 370+
 371+/**
 372+ * @ingroup Search
 373+ */
 374+class SearchResultSet {
 375+ /**
 376+ * Fetch an array of regular expression fragments for matching
 377+ * the search terms as parsed by this engine in a text extract.
 378+ *
 379+ * @return array
 380+ * @access public
 381+ * @abstract
 382+ */
 383+ function termMatches() {
 384+ return array();
 385+ }
 386+
 387+ function numRows() {
 388+ return 0;
 389+ }
 390+
 391+ /**
 392+ * Return true if results are included in this result set.
 393+ * @return bool
 394+ * @abstract
 395+ */
 396+ function hasResults() {
 397+ return false;
 398+ }
 399+
 400+ /**
 401+ * Some search modes return a total hit count for the query
 402+ * in the entire article database. This may include pages
 403+ * in namespaces that would not be matched on the given
 404+ * settings.
 405+ *
 406+ * Return null if no total hits number is supported.
 407+ *
 408+ * @return int
 409+ * @access public
 410+ */
 411+ function getTotalHits() {
 412+ return null;
 413+ }
 414+
 415+ /**
 416+ * Some search modes return a suggested alternate term if there are
 417+ * no exact hits. Returns true if there is one on this set.
 418+ *
 419+ * @return bool
 420+ * @access public
 421+ */
 422+ function hasSuggestion() {
 423+ return false;
 424+ }
 425+
 426+ /**
 427+ * @return string suggested query, null if none
 428+ */
 429+ function getSuggestionQuery(){
 430+ return null;
 431+ }
 432+
 433+ /**
 434+ * @return string highlighted suggested query, '' if none
 435+ */
 436+ function getSuggestionSnippet(){
 437+ return '';
 438+ }
 439+
 440+ /**
 441+ * Return information about how and from where the results were fetched,
 442+ * should be useful for diagnostics and debugging
 443+ *
 444+ * @return string
 445+ */
 446+ function getInfo() {
 447+ return null;
 448+ }
 449+
 450+ /**
 451+ * Return a result set of hits on other (multiple) wikis associated with this one
 452+ *
 453+ * @return SearchResultSet
 454+ */
 455+ function getInterwikiResults() {
 456+ return null;
 457+ }
 458+
 459+ /**
 460+ * Check if there are results on other wikis
 461+ *
 462+ * @return boolean
 463+ */
 464+ function hasInterwikiResults() {
 465+ return $this->getInterwikiResults() != null;
 466+ }
 467+
 468+
 469+ /**
 470+ * Fetches next search result, or false.
 471+ * @return SearchResult
 472+ * @access public
 473+ * @abstract
 474+ */
 475+ function next() {
 476+ return false;
 477+ }
 478+
 479+ /**
 480+ * Frees the result set, if applicable.
 481+ * @ access public
 482+ */
 483+ function free() {
 484+ // ...
 485+ }
 486+}
 487+
 488+
 489+/**
 490+ * @ingroup Search
 491+ */
 492+class SearchResultTooMany {
 493+ ## Some search engines may bail out if too many matches are found
 494+}
 495+
 496+
 497+/**
 498+ * @ingroup Search
 499+ */
 500+class SearchResult {
 501+ var $mRevision = null;
 502+
 503+ function SearchResult( $row ) {
 504+ $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
 505+ if( !is_null($this->mTitle) )
 506+ $this->mRevision = Revision::newFromTitle( $this->mTitle );
 507+ }
 508+
 509+ /**
 510+ * Check if this is result points to an invalid title
 511+ *
 512+ * @return boolean
 513+ * @access public
 514+ */
 515+ function isBrokenTitle(){
 516+ if( is_null($this->mTitle) )
 517+ return true;
 518+ return false;
 519+ }
 520+
 521+ /**
 522+ * Check if target page is missing, happens when index is out of date
 523+ *
 524+ * @return boolean
 525+ * @access public
 526+ */
 527+ function isMissingRevision(){
 528+ if( !$this->mRevision )
 529+ return true;
 530+ return false;
 531+ }
 532+
 533+ /**
 534+ * @return Title
 535+ * @access public
 536+ */
 537+ function getTitle() {
 538+ return $this->mTitle;
 539+ }
 540+
 541+ /**
 542+ * @return double or null if not supported
 543+ */
 544+ function getScore() {
 545+ return null;
 546+ }
 547+
 548+ /**
 549+ * Lazy initialization of article text from DB
 550+ */
 551+ protected function initText(){
 552+ if( !isset($this->mText) ){
 553+ $this->mText = $this->mRevision->getText();
 554+ }
 555+ }
 556+
 557+ /**
 558+ * @param array $terms terms to highlight
 559+ * @return string highlighted text snippet, null (and not '') if not supported
 560+ */
 561+ function getTextSnippet($terms){
 562+ global $wgUser, $wgAdvancedSearchHighlighting;
 563+ $this->initText();
 564+ list($contextlines,$contextchars) = SearchEngine::userHighlightPrefs($wgUser);
 565+ $h = new SearchHighlighter();
 566+ if( $wgAdvancedSearchHighlighting )
 567+ return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
 568+ else
 569+ return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
 570+ }
 571+
 572+ /**
 573+ * @param array $terms terms to highlight
 574+ * @return string highlighted title, '' if not supported
 575+ */
 576+ function getTitleSnippet($terms){
 577+ return '';
 578+ }
 579+
 580+ /**
 581+ * @param array $terms terms to highlight
 582+ * @return string highlighted redirect name (redirect to this page), '' if none or not supported
 583+ */
 584+ function getRedirectSnippet($terms){
 585+ return '';
 586+ }
 587+
 588+ /**
 589+ * @return Title object for the redirect to this page, null if none or not supported
 590+ */
 591+ function getRedirectTitle(){
 592+ return null;
 593+ }
 594+
 595+ /**
 596+ * @return string highlighted relevant section name, null if none or not supported
 597+ */
 598+ function getSectionSnippet(){
 599+ return '';
 600+ }
 601+
 602+ /**
 603+ * @return Title object (pagename+fragment) for the section, null if none or not supported
 604+ */
 605+ function getSectionTitle(){
 606+ return null;
 607+ }
 608+
 609+ /**
 610+ * @return string timestamp
 611+ */
 612+ function getTimestamp(){
 613+ return $this->mRevision->getTimestamp();
 614+ }
 615+
 616+ /**
 617+ * @return int number of words
 618+ */
 619+ function getWordCount(){
 620+ $this->initText();
 621+ return str_word_count( $this->mText );
 622+ }
 623+
 624+ /**
 625+ * @return int size in bytes
 626+ */
 627+ function getByteSize(){
 628+ $this->initText();
 629+ return strlen( $this->mText );
 630+ }
 631+
 632+ /**
 633+ * @return boolean if hit has related articles
 634+ */
 635+ function hasRelated(){
 636+ return false;
 637+ }
 638+
 639+ /**
 640+ * @return interwiki prefix of the title (return iw even if title is broken)
 641+ */
 642+ function getInterwikiPrefix(){
 643+ return '';
 644+ }
 645+}
 646+
 647+/**
 648+ * Highlight bits of wikitext
 649+ *
 650+ * @ingroup Search
 651+ */
 652+class SearchHighlighter {
 653+ var $mCleanWikitext = true;
 654+
 655+ function SearchHighlighter($cleanupWikitext = true){
 656+ $this->mCleanWikitext = $cleanupWikitext;
 657+ }
 658+
 659+ /**
 660+ * Default implementation of wikitext highlighting
 661+ *
 662+ * @param string $text
 663+ * @param array $terms Terms to highlight (unescaped)
 664+ * @param int $contextlines
 665+ * @param int $contextchars
 666+ * @return string
 667+ */
 668+ public function highlightText( $text, $terms, $contextlines, $contextchars ) {
 669+ global $wgLang, $wgContLang;
 670+ global $wgSearchHighlightBoundaries;
 671+ $fname = __METHOD__;
 672+
 673+ if($text == '')
 674+ return '';
 675+
 676+ // spli text into text + templates/links/tables
 677+ $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
 678+ // first capture group is for detecting nested templates/links/tables/references
 679+ $endPatterns = array(
 680+ 1 => '/(\{\{)|(\}\})/', // template
 681+ 2 => '/(\[\[)|(\]\])/', // image
 682+ 3 => "/(\n\\{\\|)|(\n\\|\\})/"); // table
 683+
 684+ // FIXME: this should prolly be a hook or something
 685+ if(function_exists('wfCite')){
 686+ $spat .= '|(<ref>)'; // references via cite extension
 687+ $endPatterns[4] = '/(<ref>)|(<\/ref>)/';
 688+ }
 689+ $spat .= '/';
 690+ $textExt = array(); // text extracts
 691+ $otherExt = array(); // other extracts
 692+ wfProfileIn( "$fname-split" );
 693+ $start = 0;
 694+ $textLen = strlen($text);
 695+ $count = 0; // sequence number to maintain ordering
 696+ while( $start < $textLen ){
 697+ // find start of template/image/table
 698+ if( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ){
 699+ $epat = '';
 700+ foreach($matches as $key => $val){
 701+ if($key > 0 && $val[1] != -1){
 702+ if($key == 2){
 703+ // see if this is an image link
 704+ $ns = substr($val[0],2,-1);
 705+ if( $wgContLang->getNsIndex($ns) != NS_IMAGE )
 706+ break;
 707+
 708+ }
 709+ $epat = $endPatterns[$key];
 710+ $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) );
 711+ $start = $val[1];
 712+ break;
 713+ }
 714+ }
 715+ if( $epat ){
 716+ // find end (and detect any nested elements)
 717+ $level = 0;
 718+ $offset = $start + 1;
 719+ $found = false;
 720+ while( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ){
 721+ if( array_key_exists(2,$endMatches) ){
 722+ // found end
 723+ if($level == 0){
 724+ $len = strlen($endMatches[2][0]);
 725+ $off = $endMatches[2][1];
 726+ $this->splitAndAdd( $otherExt, $count,
 727+ substr( $text, $start, $off + $len - $start ) );
 728+ $start = $off + $len;
 729+ $found = true;
 730+ break;
 731+ } else{
 732+ // end of nested element
 733+ $level -= 1;
 734+ }
 735+ } else{
 736+ // nested
 737+ $level += 1;
 738+ }
 739+ $offset = $endMatches[0][1] + strlen($endMatches[0][0]);
 740+ }
 741+ if( ! $found ){
 742+ // couldn't find appropriate closing tag, skip
 743+ $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen($matches[0][0]) ) );
 744+ $start += strlen($matches[0][0]);
 745+ }
 746+ continue;
 747+ }
 748+ }
 749+ // else: add as text extract
 750+ $this->splitAndAdd( $textExt, $count, substr($text,$start) );
 751+ break;
 752+ }
 753+
 754+ $all = $textExt + $otherExt; // these have disjunct key sets
 755+
 756+ wfProfileOut( "$fname-split" );
 757+
 758+ // prepare regexps
 759+ foreach( $terms as $index => $term ) {
 760+ $terms[$index] = preg_quote( $term, '/' );
 761+ // manually do upper/lowercase stuff for utf-8 since PHP won't do it
 762+ if(preg_match('/[\x80-\xff]/', $term) ){
 763+ $terms[$index] = preg_replace_callback('/./us',array($this,'caseCallback'),$terms[$index]);
 764+ }
 765+
 766+
 767+ }
 768+ $anyterm = implode( '|', $terms );
 769+ $phrase = implode("$wgSearchHighlightBoundaries+", $terms );
 770+
 771+ // FIXME: a hack to scale contextchars, a correct solution
 772+ // would be to have contextchars actually be char and not byte
 773+ // length, and do proper utf-8 substrings and lengths everywhere,
 774+ // but PHP is making that very hard and unclean to implement :(
 775+ $scale = strlen($anyterm) / mb_strlen($anyterm);
 776+ $contextchars = intval( $contextchars * $scale );
 777+
 778+ $patPre = "(^|$wgSearchHighlightBoundaries)";
 779+ $patPost = "($wgSearchHighlightBoundaries|$)";
 780+
 781+ $pat1 = "/(".$phrase.")/ui";
 782+ $pat2 = "/$patPre(".$anyterm.")$patPost/ui";
 783+
 784+ wfProfileIn( "$fname-extract" );
 785+
 786+ $left = $contextlines;
 787+
 788+ $snippets = array();
 789+ $offsets = array();
 790+
 791+ // show beginning only if it contains all words
 792+ $first = 0;
 793+ $firstText = '';
 794+ foreach($textExt as $index => $line){
 795+ if(strlen($line)>0 && $line[0] != ';' && $line[0] != ':'){
 796+ $firstText = $this->extract( $line, 0, $contextchars * $contextlines );
 797+ $first = $index;
 798+ break;
 799+ }
 800+ }
 801+ if( $firstText ){
 802+ $succ = true;
 803+ // check if first text contains all terms
 804+ foreach($terms as $term){
 805+ if( ! preg_match("/$patPre".$term."$patPost/ui", $firstText) ){
 806+ $succ = false;
 807+ break;
 808+ }
 809+ }
 810+ if( $succ ){
 811+ $snippets[$first] = $firstText;
 812+ $offsets[$first] = 0;
 813+ }
 814+ }
 815+ if( ! $snippets ) {
 816+ // match whole query on text
 817+ $this->process($pat1, $textExt, $left, $contextchars, $snippets, $offsets);
 818+ // match whole query on templates/tables/images
 819+ $this->process($pat1, $otherExt, $left, $contextchars, $snippets, $offsets);
 820+ // match any words on text
 821+ $this->process($pat2, $textExt, $left, $contextchars, $snippets, $offsets);
 822+ // match any words on templates/tables/images
 823+ $this->process($pat2, $otherExt, $left, $contextchars, $snippets, $offsets);
 824+
 825+ ksort($snippets);
 826+ }
 827+
 828+ // add extra chars to each snippet to make snippets constant size
 829+ $extended = array();
 830+ if( count( $snippets ) == 0){
 831+ // couldn't find the target words, just show beginning of article
 832+ $targetchars = $contextchars * $contextlines;
 833+ $snippets[$first] = '';
 834+ $offsets[$first] = 0;
 835+ } else{
 836+ // if begin of the article contains the whole phrase, show only that !!
 837+ if( array_key_exists($first,$snippets) && preg_match($pat1,$snippets[$first])
 838+ && $offsets[$first] < $contextchars * 2 ){
 839+ $snippets = array ($first => $snippets[$first]);
 840+ }
 841+
 842+ // calc by how much to extend existing snippets
 843+ $targetchars = intval( ($contextchars * $contextlines) / count ( $snippets ) );
 844+ }
 845+
 846+ foreach($snippets as $index => $line){
 847+ $extended[$index] = $line;
 848+ $len = strlen($line);
 849+ if( $len < $targetchars - 20 ){
 850+ // complete this line
 851+ if($len < strlen( $all[$index] )){
 852+ $extended[$index] = $this->extract( $all[$index], $offsets[$index], $offsets[$index]+$targetchars, $offsets[$index]);
 853+ $len = strlen( $extended[$index] );
 854+ }
 855+
 856+ // add more lines
 857+ $add = $index + 1;
 858+ while( $len < $targetchars - 20
 859+ && array_key_exists($add,$all)
 860+ && !array_key_exists($add,$snippets) ){
 861+ $offsets[$add] = 0;
 862+ $tt = "\n".$this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
 863+ $extended[$add] = $tt;
 864+ $len += strlen( $tt );
 865+ $add++;
 866+ }
 867+ }
 868+ }
 869+
 870+ //$snippets = array_map('htmlspecialchars', $extended);
 871+ $snippets = $extended;
 872+ $last = -1;
 873+ $extract = '';
 874+ foreach($snippets as $index => $line){
 875+ if($last == -1)
 876+ $extract .= $line; // first line
 877+ elseif($last+1 == $index && $offsets[$last]+strlen($snippets[$last]) >= strlen($all[$last]))
 878+ $extract .= " ".$line; // continous lines
 879+ else
 880+ $extract .= '<b> ... </b>' . $line;
 881+
 882+ $last = $index;
 883+ }
 884+ if( $extract )
 885+ $extract .= '<b> ... </b>';
 886+
 887+ $processed = array();
 888+ foreach($terms as $term){
 889+ if( ! isset($processed[$term]) ){
 890+ $pat3 = "/$patPre(".$term.")$patPost/ui"; // highlight word
 891+ $extract = preg_replace( $pat3,
 892+ "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
 893+ $processed[$term] = true;
 894+ }
 895+ }
 896+
 897+ wfProfileOut( "$fname-extract" );
 898+
 899+ return $extract;
 900+ }
 901+
 902+ /**
 903+ * Split text into lines and add it to extracts array
 904+ *
 905+ * @param array $extracts index -> $line
 906+ * @param int $count
 907+ * @param string $text
 908+ */
 909+ function splitAndAdd(&$extracts, &$count, $text){
 910+ $split = explode( "\n", $this->mCleanWikitext? $this->removeWiki($text) : $text );
 911+ foreach($split as $line){
 912+ $tt = trim($line);
 913+ if( $tt )
 914+ $extracts[$count++] = $tt;
 915+ }
 916+ }
 917+
 918+ /**
 919+ * Do manual case conversion for non-ascii chars
 920+ *
 921+ * @param unknown_type $matches
 922+ */
 923+ function caseCallback($matches){
 924+ global $wgContLang;
 925+ if( strlen($matches[0]) > 1 ){
 926+ return '['.$wgContLang->lc($matches[0]).$wgContLang->uc($matches[0]).']';
 927+ } else
 928+ return $matches[0];
 929+ }
 930+
 931+ /**
 932+ * Extract part of the text from start to end, but by
 933+ * not chopping up words
 934+ * @param string $text
 935+ * @param int $start
 936+ * @param int $end
 937+ * @param int $posStart (out) actual start position
 938+ * @param int $posEnd (out) actual end position
 939+ * @return string
 940+ */
 941+ function extract($text, $start, $end, &$posStart = null, &$posEnd = null ){
 942+ global $wgContLang;
 943+
 944+ if( $start != 0)
 945+ $start = $this->position( $text, $start, 1 );
 946+ if( $end >= strlen($text) )
 947+ $end = strlen($text);
 948+ else
 949+ $end = $this->position( $text, $end );
 950+
 951+ if(!is_null($posStart))
 952+ $posStart = $start;
 953+ if(!is_null($posEnd))
 954+ $posEnd = $end;
 955+
 956+ if($end > $start)
 957+ return substr($text, $start, $end-$start);
 958+ else
 959+ return '';
 960+ }
 961+
 962+ /**
 963+ * Find a nonletter near a point (index) in the text
 964+ *
 965+ * @param string $text
 966+ * @param int $point
 967+ * @param int $offset to found index
 968+ * @return int nearest nonletter index, or beginning of utf8 char if none
 969+ */
 970+ function position($text, $point, $offset=0 ){
 971+ $tolerance = 10;
 972+ $s = max( 0, $point - $tolerance );
 973+ $l = min( strlen($text), $point + $tolerance ) - $s;
 974+ $m = array();
 975+ if( preg_match('/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr($text,$s,$l), $m, PREG_OFFSET_CAPTURE ) ){
 976+ return $m[0][1] + $s + $offset;
 977+ } else{
 978+ // check if point is on a valid first UTF8 char
 979+ $char = ord( $text[$point] );
 980+ while( $char >= 0x80 && $char < 0xc0 ) {
 981+ // skip trailing bytes
 982+ $point++;
 983+ if($point >= strlen($text))
 984+ return strlen($text);
 985+ $char = ord( $text[$point] );
 986+ }
 987+ return $point;
 988+
 989+ }
 990+ }
 991+
 992+ /**
 993+ * Search extracts for a pattern, and return snippets
 994+ *
 995+ * @param string $pattern regexp for matching lines
 996+ * @param array $extracts extracts to search
 997+ * @param int $linesleft number of extracts to make
 998+ * @param int $contextchars length of snippet
 999+ * @param array $out map for highlighted snippets
 1000+ * @param array $offsets map of starting points of snippets
 1001+ * @protected
 1002+ */
 1003+ function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ){
 1004+ if($linesleft == 0)
 1005+ return; // nothing to do
 1006+ foreach($extracts as $index => $line){
 1007+ if( array_key_exists($index,$out) )
 1008+ continue; // this line already highlighted
 1009+
 1010+ $m = array();
 1011+ if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) )
 1012+ continue;
 1013+
 1014+ $offset = $m[0][1];
 1015+ $len = strlen($m[0][0]);
 1016+ if($offset + $len < $contextchars)
 1017+ $begin = 0;
 1018+ elseif( $len > $contextchars)
 1019+ $begin = $offset;
 1020+ else
 1021+ $begin = $offset + intval( ($len - $contextchars) / 2 );
 1022+
 1023+ $end = $begin + $contextchars;
 1024+
 1025+ $posBegin = $begin;
 1026+ // basic snippet from this line
 1027+ $out[$index] = $this->extract($line,$begin,$end,$posBegin);
 1028+ $offsets[$index] = $posBegin;
 1029+ $linesleft--;
 1030+ if($linesleft == 0)
 1031+ return;
 1032+ }
 1033+ }
 1034+
 1035+ /**
 1036+ * Basic wikitext removal
 1037+ * @protected
 1038+ */
 1039+ function removeWiki($text) {
 1040+ $fname = __METHOD__;
 1041+ wfProfileIn( $fname );
 1042+
 1043+ //$text = preg_replace("/'{2,5}/", "", $text);
 1044+ //$text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text);
 1045+ //$text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text);
 1046+ //$text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text);
 1047+ //$text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text);
 1048+ //$text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text);
 1049+ $text = preg_replace("/\\{\\{([^|]+?)\\}\\}/", "", $text);
 1050+ $text = preg_replace("/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text);
 1051+ $text = preg_replace("/\\[\\[([^|]+?)\\]\\]/", "\\1", $text);
 1052+ $text = preg_replace_callback("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", array($this,'linkReplace'), $text);
 1053+ //$text = preg_replace("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", "\\2", $text);
 1054+ $text = preg_replace("/<\/?[^>]+>/", "", $text);
 1055+ $text = preg_replace("/'''''/", "", $text);
 1056+ $text = preg_replace("/('''|<\/?[iIuUbB]>)/", "", $text);
 1057+ $text = preg_replace("/''/", "", $text);
 1058+
 1059+ wfProfileOut( $fname );
 1060+ return $text;
 1061+ }
 1062+
 1063+ /**
 1064+ * callback to replace [[target|caption]] kind of links, if
 1065+ * the target is category or image, leave it
 1066+ *
 1067+ * @param array $matches
 1068+ */
 1069+ function linkReplace($matches){
 1070+ $colon = strpos( $matches[1], ':' );
 1071+ if( $colon === false )
 1072+ return $matches[2]; // replace with caption
 1073+ global $wgContLang;
 1074+ $ns = substr( $matches[1], 0, $colon );
 1075+ $index = $wgContLang->getNsIndex($ns);
 1076+ if( $index !== false && ($index == NS_IMAGE || $index == NS_CATEGORY) )
 1077+ return $matches[0]; // return the whole thing
 1078+ else
 1079+ return $matches[2];
 1080+
 1081+ }
 1082+
 1083+ /**
 1084+ * Simple & fast snippet extraction, but gives completely unrelevant
 1085+ * snippets
 1086+ *
 1087+ * @param string $text
 1088+ * @param array $terms
 1089+ * @param int $contextlines
 1090+ * @param int $contextchars
 1091+ * @return string
 1092+ */
 1093+ public function highlightSimple( $text, $terms, $contextlines, $contextchars ) {
 1094+ global $wgLang, $wgContLang;
 1095+ $fname = __METHOD__;
 1096+
 1097+ $lines = explode( "\n", $text );
 1098+
 1099+ $terms = implode( '|', $terms );
 1100+ $terms = str_replace( '/', "\\/", $terms);
 1101+ $max = intval( $contextchars ) + 1;
 1102+ $pat1 = "/(.*)($terms)(.{0,$max})/i";
 1103+
 1104+ $lineno = 0;
 1105+
 1106+ $extract = "";
 1107+ wfProfileIn( "$fname-extract" );
 1108+ foreach ( $lines as $line ) {
 1109+ if ( 0 == $contextlines ) {
 1110+ break;
 1111+ }
 1112+ ++$lineno;
 1113+ $m = array();
 1114+ if ( ! preg_match( $pat1, $line, $m ) ) {
 1115+ continue;
 1116+ }
 1117+ --$contextlines;
 1118+ $pre = $wgContLang->truncate( $m[1], -$contextchars, ' ... ' );
 1119+
 1120+ if ( count( $m ) < 3 ) {
 1121+ $post = '';
 1122+ } else {
 1123+ $post = $wgContLang->truncate( $m[3], $contextchars, ' ... ' );
 1124+ }
 1125+
 1126+ $found = $m[2];
 1127+
 1128+ $line = htmlspecialchars( $pre . $found . $post );
 1129+ $pat2 = '/(' . $terms . ")/i";
 1130+ $line = preg_replace( $pat2,
 1131+ "<span class='searchmatch'>\\1</span>", $line );
 1132+
 1133+ $extract .= "${line}\n";
 1134+ }
 1135+ wfProfileOut( "$fname-extract" );
 1136+
 1137+ return $extract;
 1138+ }
 1139+
 1140+}
 1141+
 1142+/**
 1143+ * @ingroup Search
 1144+ */
 1145+class SearchEngineDummy {
 1146+ function search( $term ) {
 1147+ return null;
 1148+ }
 1149+ function setLimitOffset($l, $o) {}
 1150+ function legalSearchChars() {}
 1151+ function update() {}
 1152+ function setnamespaces() {}
 1153+ function searchtitle() {}
 1154+ function searchtext() {}
 1155+}
Property changes on: trunk/phase3/includes/search/Engine.php
___________________________________________________________________
Added: svn:keywords
11156 + Author Date Id Revision
Added: svn:eol-style
21157 + native
Index: trunk/phase3/includes/search/MySQL.php
@@ -0,0 +1,262 @@
 2+<?php
 3+# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
 4+# http://www.mediawiki.org/
 5+#
 6+# This program is free software; you can redistribute it and/or modify
 7+# it under the terms of the GNU General Public License as published by
 8+# the Free Software Foundation; either version 2 of the License, or
 9+# (at your option) any later version.
 10+#
 11+# This program is distributed in the hope that it will be useful,
 12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14+# GNU General Public License for more details.
 15+#
 16+# You should have received a copy of the GNU General Public License along
 17+# with this program; if not, write to the Free Software Foundation, Inc.,
 18+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 19+# http://www.gnu.org/copyleft/gpl.html
 20+
 21+/**
 22+ * @file
 23+ * @ingroup Search
 24+ */
 25+
 26+/**
 27+ * Search engine hook for MySQL 4+
 28+ * @ingroup Search
 29+ */
 30+class SearchMySQL extends SearchEngine {
 31+ var $strictMatching = true;
 32+
 33+ /** @todo document */
 34+ function __construct( $db ) {
 35+ $this->db = $db;
 36+ }
 37+
 38+ /** @todo document */
 39+ function parseQuery( $filteredText, $fulltext ) {
 40+ global $wgContLang;
 41+ $lc = SearchEngine::legalSearchChars(); // Minus format chars
 42+ $searchon = '';
 43+ $this->searchTerms = array();
 44+
 45+ # FIXME: This doesn't handle parenthetical expressions.
 46+ $m = array();
 47+ if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
 48+ $filteredText, $m, PREG_SET_ORDER ) ) {
 49+ foreach( $m as $terms ) {
 50+ if( $searchon !== '' ) $searchon .= ' ';
 51+ if( $this->strictMatching && ($terms[1] == '') ) {
 52+ $terms[1] = '+';
 53+ }
 54+ $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] );
 55+ if( !empty( $terms[3] ) ) {
 56+ // Match individual terms in result highlighting...
 57+ $regexp = preg_quote( $terms[3], '/' );
 58+ if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+";
 59+ } else {
 60+ // Match the quoted term in result highlighting...
 61+ $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
 62+ }
 63+ $this->searchTerms[] = $regexp;
 64+ }
 65+ wfDebug( "Would search with '$searchon'\n" );
 66+ wfDebug( 'Match with /' . implode( '|', $this->searchTerms ) . "/\n" );
 67+ } else {
 68+ wfDebug( "Can't understand search query '{$filteredText}'\n" );
 69+ }
 70+
 71+ $searchon = $this->db->strencode( $searchon );
 72+ $field = $this->getIndexField( $fulltext );
 73+ return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) ";
 74+ }
 75+
 76+ public static function legalSearchChars() {
 77+ return "\"*" . parent::legalSearchChars();
 78+ }
 79+
 80+ /**
 81+ * Perform a full text search query and return a result set.
 82+ *
 83+ * @param string $term - Raw search term
 84+ * @return MySQLSearchResultSet
 85+ * @access public
 86+ */
 87+ function searchText( $term ) {
 88+ $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) );
 89+ return new MySQLSearchResultSet( $resultSet, $this->searchTerms );
 90+ }
 91+
 92+ /**
 93+ * Perform a title-only search query and return a result set.
 94+ *
 95+ * @param string $term - Raw search term
 96+ * @return MySQLSearchResultSet
 97+ * @access public
 98+ */
 99+ function searchTitle( $term ) {
 100+ $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) );
 101+ return new MySQLSearchResultSet( $resultSet, $this->searchTerms );
 102+ }
 103+
 104+
 105+ /**
 106+ * Return a partial WHERE clause to exclude redirects, if so set
 107+ * @return string
 108+ * @private
 109+ */
 110+ function queryRedirect() {
 111+ if( $this->showRedirects ) {
 112+ return '';
 113+ } else {
 114+ return 'AND page_is_redirect=0';
 115+ }
 116+ }
 117+
 118+ /**
 119+ * Return a partial WHERE clause to limit the search to the given namespaces
 120+ * @return string
 121+ * @private
 122+ */
 123+ function queryNamespaces() {
 124+ if( is_null($this->namespaces) )
 125+ return ''; # search all
 126+ $namespaces = implode( ',', $this->namespaces );
 127+ if ($namespaces == '') {
 128+ $namespaces = '0';
 129+ }
 130+ return 'AND page_namespace IN (' . $namespaces . ')';
 131+ }
 132+
 133+ /**
 134+ * Return a LIMIT clause to limit results on the query.
 135+ * @return string
 136+ * @private
 137+ */
 138+ function queryLimit() {
 139+ return $this->db->limitResult( '', $this->limit, $this->offset );
 140+ }
 141+
 142+ /**
 143+ * Does not do anything for generic search engine
 144+ * subclasses may define this though
 145+ * @return string
 146+ * @private
 147+ */
 148+ function queryRanking( $filteredTerm, $fulltext ) {
 149+ return '';
 150+ }
 151+
 152+ /**
 153+ * Construct the full SQL query to do the search.
 154+ * The guts shoulds be constructed in queryMain()
 155+ * @param string $filteredTerm
 156+ * @param bool $fulltext
 157+ * @private
 158+ */
 159+ function getQuery( $filteredTerm, $fulltext ) {
 160+ return $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
 161+ $this->queryRedirect() . ' ' .
 162+ $this->queryNamespaces() . ' ' .
 163+ $this->queryRanking( $filteredTerm, $fulltext ) . ' ' .
 164+ $this->queryLimit();
 165+ }
 166+
 167+
 168+ /**
 169+ * Picks which field to index on, depending on what type of query.
 170+ * @param bool $fulltext
 171+ * @return string
 172+ */
 173+ function getIndexField( $fulltext ) {
 174+ return $fulltext ? 'si_text' : 'si_title';
 175+ }
 176+
 177+ /**
 178+ * Get the base part of the search query.
 179+ * The actual match syntax will depend on the server
 180+ * version; MySQL 3 and MySQL 4 have different capabilities
 181+ * in their fulltext search indexes.
 182+ *
 183+ * @param string $filteredTerm
 184+ * @param bool $fulltext
 185+ * @return string
 186+ * @private
 187+ */
 188+ function queryMain( $filteredTerm, $fulltext ) {
 189+ $match = $this->parseQuery( $filteredTerm, $fulltext );
 190+ $page = $this->db->tableName( 'page' );
 191+ $searchindex = $this->db->tableName( 'searchindex' );
 192+ return 'SELECT page_id, page_namespace, page_title ' .
 193+ "FROM $page,$searchindex " .
 194+ 'WHERE page_id=si_page AND ' . $match;
 195+ }
 196+
 197+ /**
 198+ * Create or update the search index record for the given page.
 199+ * Title and text should be pre-processed.
 200+ *
 201+ * @param int $id
 202+ * @param string $title
 203+ * @param string $text
 204+ */
 205+ function update( $id, $title, $text ) {
 206+ $dbw = wfGetDB( DB_MASTER );
 207+ $dbw->replace( 'searchindex',
 208+ array( 'si_page' ),
 209+ array(
 210+ 'si_page' => $id,
 211+ 'si_title' => $title,
 212+ 'si_text' => $text
 213+ ), __METHOD__ );
 214+ }
 215+
 216+ /**
 217+ * Update a search index record's title only.
 218+ * Title should be pre-processed.
 219+ *
 220+ * @param int $id
 221+ * @param string $title
 222+ */
 223+ function updateTitle( $id, $title ) {
 224+ $dbw = wfGetDB( DB_MASTER );
 225+
 226+ $dbw->update( 'searchindex',
 227+ array( 'si_title' => $title ),
 228+ array( 'si_page' => $id ),
 229+ __METHOD__,
 230+ array( $dbw->lowPriorityOption() ) );
 231+ }
 232+}
 233+
 234+/**
 235+ * @ingroup Search
 236+ */
 237+class MySQLSearchResultSet extends SearchResultSet {
 238+ function MySQLSearchResultSet( $resultSet, $terms ) {
 239+ $this->mResultSet = $resultSet;
 240+ $this->mTerms = $terms;
 241+ }
 242+
 243+ function termMatches() {
 244+ return $this->mTerms;
 245+ }
 246+
 247+ function numRows() {
 248+ return $this->mResultSet->numRows();
 249+ }
 250+
 251+ function next() {
 252+ $row = $this->mResultSet->fetchObject();
 253+ if( $row === false ) {
 254+ return false;
 255+ } else {
 256+ return new SearchResult( $row );
 257+ }
 258+ }
 259+
 260+ function free() {
 261+ $this->mResultSet->free();
 262+ }
 263+}
Property changes on: trunk/phase3/includes/search/MySQL.php
___________________________________________________________________
Added: svn:keywords
1264 + Author Date Id Revision
Added: svn:eol-style
2265 + native
Index: trunk/phase3/includes/search/Oracle.php
@@ -0,0 +1,240 @@
 2+<?php
 3+# Copyright (C) 2004 Brion Vibber <brion@pobox.com>
 4+# http://www.mediawiki.org/
 5+#
 6+# This program is free software; you can redistribute it and/or modify
 7+# it under the terms of the GNU General Public License as published by
 8+# the Free Software Foundation; either version 2 of the License, or
 9+# (at your option) any later version.
 10+#
 11+# This program is distributed in the hope that it will be useful,
 12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14+# GNU General Public License for more details.
 15+#
 16+# You should have received a copy of the GNU General Public License along
 17+# with this program; if not, write to the Free Software Foundation, Inc.,
 18+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 19+# http://www.gnu.org/copyleft/gpl.html
 20+
 21+/**
 22+ * @file
 23+ * @ingroup Search
 24+ */
 25+
 26+/**
 27+ * Search engine hook base class for Oracle (ConText).
 28+ * @ingroup Search
 29+ */
 30+class SearchOracle extends SearchEngine {
 31+ function __construct($db) {
 32+ $this->db = $db;
 33+ }
 34+
 35+ /**
 36+ * Perform a full text search query and return a result set.
 37+ *
 38+ * @param string $term - Raw search term
 39+ * @return OracleSearchResultSet
 40+ * @access public
 41+ */
 42+ function searchText( $term ) {
 43+ $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true)));
 44+ return new OracleSearchResultSet($resultSet, $this->searchTerms);
 45+ }
 46+
 47+ /**
 48+ * Perform a title-only search query and return a result set.
 49+ *
 50+ * @param string $term - Raw search term
 51+ * @return ORacleSearchResultSet
 52+ * @access public
 53+ */
 54+ function searchTitle($term) {
 55+ $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false)));
 56+ return new MySQLSearchResultSet($resultSet, $this->searchTerms);
 57+ }
 58+
 59+
 60+ /**
 61+ * Return a partial WHERE clause to exclude redirects, if so set
 62+ * @return string
 63+ * @private
 64+ */
 65+ function queryRedirect() {
 66+ if ($this->showRedirects) {
 67+ return '';
 68+ } else {
 69+ return 'AND page_is_redirect=0';
 70+ }
 71+ }
 72+
 73+ /**
 74+ * Return a partial WHERE clause to limit the search to the given namespaces
 75+ * @return string
 76+ * @private
 77+ */
 78+ function queryNamespaces() {
 79+ if( is_null($this->namespaces) )
 80+ return '';
 81+ $namespaces = implode(',', $this->namespaces);
 82+ if ($namespaces == '') {
 83+ $namespaces = '0';
 84+ }
 85+ return 'AND page_namespace IN (' . $namespaces . ')';
 86+ }
 87+
 88+ /**
 89+ * Return a LIMIT clause to limit results on the query.
 90+ * @return string
 91+ * @private
 92+ */
 93+ function queryLimit($sql) {
 94+ return $this->db->limitResult($sql, $this->limit, $this->offset);
 95+ }
 96+
 97+ /**
 98+ * Does not do anything for generic search engine
 99+ * subclasses may define this though
 100+ * @return string
 101+ * @private
 102+ */
 103+ function queryRanking($filteredTerm, $fulltext) {
 104+ return ' ORDER BY score(1)';
 105+ }
 106+
 107+ /**
 108+ * Construct the full SQL query to do the search.
 109+ * The guts shoulds be constructed in queryMain()
 110+ * @param string $filteredTerm
 111+ * @param bool $fulltext
 112+ * @private
 113+ */
 114+ function getQuery( $filteredTerm, $fulltext ) {
 115+ return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' .
 116+ $this->queryRedirect() . ' ' .
 117+ $this->queryNamespaces() . ' ' .
 118+ $this->queryRanking( $filteredTerm, $fulltext ) . ' ');
 119+ }
 120+
 121+
 122+ /**
 123+ * Picks which field to index on, depending on what type of query.
 124+ * @param bool $fulltext
 125+ * @return string
 126+ */
 127+ function getIndexField($fulltext) {
 128+ return $fulltext ? 'si_text' : 'si_title';
 129+ }
 130+
 131+ /**
 132+ * Get the base part of the search query.
 133+ *
 134+ * @param string $filteredTerm
 135+ * @param bool $fulltext
 136+ * @return string
 137+ * @private
 138+ */
 139+ function queryMain( $filteredTerm, $fulltext ) {
 140+ $match = $this->parseQuery($filteredTerm, $fulltext);
 141+ $page = $this->db->tableName('page');
 142+ $searchindex = $this->db->tableName('searchindex');
 143+ return 'SELECT page_id, page_namespace, page_title ' .
 144+ "FROM $page,$searchindex " .
 145+ 'WHERE page_id=si_page AND ' . $match;
 146+ }
 147+
 148+ /** @todo document */
 149+ function parseQuery($filteredText, $fulltext) {
 150+ global $wgContLang;
 151+ $lc = SearchEngine::legalSearchChars();
 152+ $this->searchTerms = array();
 153+
 154+ # FIXME: This doesn't handle parenthetical expressions.
 155+ $m = array();
 156+ $q = array();
 157+
 158+ if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
 159+ $filteredText, $m, PREG_SET_ORDER)) {
 160+ foreach($m as $terms) {
 161+ $q[] = $terms[1] . $wgContLang->stripForSearch($terms[2]);
 162+
 163+ if (!empty($terms[3])) {
 164+ $regexp = preg_quote( $terms[3], '/' );
 165+ if ($terms[4])
 166+ $regexp .= "[0-9A-Za-z_]+";
 167+ } else {
 168+ $regexp = preg_quote(str_replace('"', '', $terms[2]), '/');
 169+ }
 170+ $this->searchTerms[] = $regexp;
 171+ }
 172+ }
 173+
 174+ $searchon = $this->db->strencode(join(',', $q));
 175+ $field = $this->getIndexField($fulltext);
 176+ return " CONTAINS($field, '$searchon', 1) > 0 ";
 177+ }
 178+
 179+ /**
 180+ * Create or update the search index record for the given page.
 181+ * Title and text should be pre-processed.
 182+ *
 183+ * @param int $id
 184+ * @param string $title
 185+ * @param string $text
 186+ */
 187+ function update($id, $title, $text) {
 188+ $dbw = wfGetDB(DB_MASTER);
 189+ $dbw->replace('searchindex',
 190+ array('si_page'),
 191+ array(
 192+ 'si_page' => $id,
 193+ 'si_title' => $title,
 194+ 'si_text' => $text
 195+ ), 'SearchOracle::update' );
 196+ $dbw->query("CALL ctx_ddl.sync_index('si_text_idx')");
 197+ $dbw->query("CALL ctx_ddl.sync_index('si_title_idx')");
 198+ }
 199+
 200+ /**
 201+ * Update a search index record's title only.
 202+ * Title should be pre-processed.
 203+ *
 204+ * @param int $id
 205+ * @param string $title
 206+ */
 207+ function updateTitle($id, $title) {
 208+ $dbw = wfGetDB(DB_MASTER);
 209+
 210+ $dbw->update('searchindex',
 211+ array('si_title' => $title),
 212+ array('si_page' => $id),
 213+ 'SearchOracle::updateTitle',
 214+ array());
 215+ }
 216+}
 217+
 218+/**
 219+ * @ingroup Search
 220+ */
 221+class OracleSearchResultSet extends SearchResultSet {
 222+ function __construct($resultSet, $terms) {
 223+ $this->mResultSet = $resultSet;
 224+ $this->mTerms = $terms;
 225+ }
 226+
 227+ function termMatches() {
 228+ return $this->mTerms;
 229+ }
 230+
 231+ function numRows() {
 232+ return $this->mResultSet->numRows();
 233+ }
 234+
 235+ function next() {
 236+ $row = $this->mResultSet->fetchObject();
 237+ if ($row === false)
 238+ return false;
 239+ return new SearchResult($row);
 240+ }
 241+}
Property changes on: trunk/phase3/includes/search/Oracle.php
___________________________________________________________________
Added: svn:eol-style
1242 + native
Index: trunk/phase3/includes/search/Postgres.php
@@ -0,0 +1,255 @@
 2+<?php
 3+# Copyright (C) 2006-2007 Greg Sabino Mullane <greg@turnstep.com>
 4+# http://www.mediawiki.org/
 5+#
 6+# This program is free software; you can redistribute it and/or modify
 7+# it under the terms of the GNU General Public License as published by
 8+# the Free Software Foundation; either version 2 of the License, or
 9+# (at your option) any later version.
 10+#
 11+# This program is distributed in the hope that it will be useful,
 12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14+# GNU General Public License for more details.
 15+#
 16+# You should have received a copy of the GNU General Public License along
 17+# with this program; if not, write to the Free Software Foundation, Inc.,
 18+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 19+# http://www.gnu.org/copyleft/gpl.html
 20+
 21+/**
 22+ * @file
 23+ * @ingroup Search
 24+ */
 25+
 26+/**
 27+ * Search engine hook base class for Postgres
 28+ * @ingroup Search
 29+ */
 30+class SearchPostgres extends SearchEngine {
 31+
 32+ function SearchPostgres( $db ) {
 33+ $this->db = $db;
 34+ }
 35+
 36+ /**
 37+ * Perform a full text search query via tsearch2 and return a result set.
 38+ * Currently searches a page's current title (page.page_title) and
 39+ * latest revision article text (pagecontent.old_text)
 40+ *
 41+ * @param string $term - Raw search term
 42+ * @return PostgresSearchResultSet
 43+ * @access public
 44+ */
 45+ function searchTitle( $term ) {
 46+ $q = $this->searchQuery( $term , 'titlevector', 'page_title' );
 47+ $olderror = error_reporting(E_ERROR);
 48+ $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) );
 49+ error_reporting($olderror);
 50+ if (!$resultSet) {
 51+ // Needed for "Query requires full scan, GIN doesn't support it"
 52+ return new SearchResultTooMany();
 53+ }
 54+ return new PostgresSearchResultSet( $resultSet, $this->searchTerms );
 55+ }
 56+ function searchText( $term ) {
 57+ $q = $this->searchQuery( $term, 'textvector', 'old_text' );
 58+ $olderror = error_reporting(E_ERROR);
 59+ $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) );
 60+ error_reporting($olderror);
 61+ if (!$resultSet) {
 62+ return new SearchResultTooMany();
 63+ }
 64+ return new PostgresSearchResultSet( $resultSet, $this->searchTerms );
 65+ }
 66+
 67+
 68+ /*
 69+ * Transform the user's search string into a better form for tsearch2
 70+ */
 71+ function parseQuery( $term ) {
 72+
 73+ wfDebug( "parseQuery received: $term" );
 74+
 75+ ## No backslashes allowed
 76+ $term = preg_replace('/\\\/', '', $term);
 77+
 78+ ## Collapse parens into nearby words:
 79+ $term = preg_replace('/\s*\(\s*/', ' (', $term);
 80+ $term = preg_replace('/\s*\)\s*/', ') ', $term);
 81+
 82+ ## Treat colons as word separators:
 83+ $term = preg_replace('/:/', ' ', $term);
 84+
 85+ $searchstring = '';
 86+ $m = array();
 87+ if( preg_match_all('/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) {
 88+ foreach( $m as $terms ) {
 89+ if (strlen($terms[1])) {
 90+ $searchstring .= ' & !';
 91+ }
 92+ if (strtolower($terms[2]) === 'and') {
 93+ $searchstring .= ' & ';
 94+ }
 95+ else if (strtolower($terms[2]) === 'or' or $terms[2] === '|') {
 96+ $searchstring .= ' | ';
 97+ }
 98+ else if (strtolower($terms[2]) === 'not') {
 99+ $searchstring .= ' & !';
 100+ }
 101+ else {
 102+ $searchstring .= " & $terms[2]";
 103+ }
 104+ }
 105+ }
 106+
 107+ ## Strip out leading junk
 108+ $searchstring = preg_replace('/^[\s\&\|]+/', '', $searchstring);
 109+
 110+ ## Remove any doubled-up operators
 111+ $searchstring = preg_replace('/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring);
 112+
 113+ ## Remove any non-spaced operators (e.g. "Zounds!")
 114+ $searchstring = preg_replace('/([^ ])[\!\&\|]/', "$1", $searchstring);
 115+
 116+ ## Remove any trailing whitespace or operators
 117+ $searchstring = preg_replace('/[\s\!\&\|]+$/', '', $searchstring);
 118+
 119+ ## Remove unnecessary quotes around everything
 120+ $searchstring = preg_replace('/^[\'"](.*)[\'"]$/', "$1", $searchstring);
 121+
 122+ ## Quote the whole thing
 123+ $searchstring = $this->db->addQuotes($searchstring);
 124+
 125+ wfDebug( "parseQuery returned: $searchstring" );
 126+
 127+ return $searchstring;
 128+
 129+ }
 130+
 131+ /**
 132+ * Construct the full SQL query to do the search.
 133+ * @param string $filteredTerm
 134+ * @param string $fulltext
 135+ * @private
 136+ */
 137+ function searchQuery( $term, $fulltext, $colname ) {
 138+ global $wgDBversion;
 139+
 140+ if ( !isset( $wgDBversion ) ) {
 141+ $this->db->getServerVersion();
 142+ $wgDBversion = $this->db->numeric_version;
 143+ }
 144+ $prefix = $wgDBversion < 8.3 ? "'default'," : '';
 145+
 146+ $searchstring = $this->parseQuery( $term );
 147+
 148+ ## We need a separate query here so gin does not complain about empty searches
 149+ $SQL = "SELECT to_tsquery($prefix $searchstring)";
 150+ $res = $this->db->doQuery($SQL);
 151+ if (!$res) {
 152+ ## TODO: Better output (example to catch: one 'two)
 153+ die ("Sorry, that was not a valid search string. Please go back and try again");
 154+ }
 155+ $top = pg_fetch_result($res,0,0);
 156+
 157+ if ($top === "") { ## e.g. if only stopwords are used XXX return something better
 158+ $query = "SELECT page_id, page_namespace, page_title, 0 AS score ".
 159+ "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
 160+ "AND r.rev_text_id = c.old_id AND 1=0";
 161+ }
 162+ else {
 163+ $m = array();
 164+ if( preg_match_all("/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) {
 165+ foreach( $m as $terms ) {
 166+ $this->searchTerms[$terms[1]] = $terms[1];
 167+ }
 168+ }
 169+
 170+ $rankscore = $wgDBversion > 8.2 ? 5 : 1;
 171+ $rank = $wgDBversion < 8.3 ? 'rank' : 'ts_rank';
 172+ $query = "SELECT page_id, page_namespace, page_title, ".
 173+ "$rank($fulltext, to_tsquery($prefix $searchstring), $rankscore) AS score ".
 174+ "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
 175+ "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($prefix $searchstring)";
 176+ }
 177+
 178+ ## Redirects
 179+ if (! $this->showRedirects)
 180+ $query .= ' AND page_is_redirect = 0';
 181+
 182+ ## Namespaces - defaults to 0
 183+ if( !is_null($this->namespaces) ){ // null -> search all
 184+ if ( count($this->namespaces) < 1)
 185+ $query .= ' AND page_namespace = 0';
 186+ else {
 187+ $namespaces = implode( ',', $this->namespaces );
 188+ $query .= " AND page_namespace IN ($namespaces)";
 189+ }
 190+ }
 191+
 192+ $query .= " ORDER BY score DESC, page_id DESC";
 193+
 194+ $query .= $this->db->limitResult( '', $this->limit, $this->offset );
 195+
 196+ wfDebug( "searchQuery returned: $query" );
 197+
 198+ return $query;
 199+ }
 200+
 201+ ## Most of the work of these two functions are done automatically via triggers
 202+
 203+ function update( $pageid, $title, $text ) {
 204+ ## We don't want to index older revisions
 205+ $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id = ".
 206+ "(SELECT rev_text_id FROM revision WHERE rev_page = $pageid ".
 207+ "ORDER BY rev_text_id DESC LIMIT 1 OFFSET 1)";
 208+ $this->db->doQuery($SQL);
 209+ return true;
 210+ }
 211+
 212+ function updateTitle( $id, $title ) {
 213+ return true;
 214+ }
 215+
 216+} ## end of the SearchPostgres class
 217+
 218+/**
 219+ * @ingroup Search
 220+ */
 221+class PostgresSearchResult extends SearchResult {
 222+ function PostgresSearchResult( $row ) {
 223+ $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
 224+ $this->score = $row->score;
 225+ }
 226+ function getScore() {
 227+ return $this->score;
 228+ }
 229+}
 230+
 231+/**
 232+ * @ingroup Search
 233+ */
 234+class PostgresSearchResultSet extends SearchResultSet {
 235+ function PostgresSearchResultSet( $resultSet, $terms ) {
 236+ $this->mResultSet = $resultSet;
 237+ $this->mTerms = $terms;
 238+ }
 239+
 240+ function termMatches() {
 241+ return $this->mTerms;
 242+ }
 243+
 244+ function numRows() {
 245+ return $this->mResultSet->numRows();
 246+ }
 247+
 248+ function next() {
 249+ $row = $this->mResultSet->fetchObject();
 250+ if( $row === false ) {
 251+ return false;
 252+ } else {
 253+ return new PostgresSearchResult( $row );
 254+ }
 255+ }
 256+}
Property changes on: trunk/phase3/includes/search/Postgres.php
___________________________________________________________________
Added: svn:eol-style
1257 + native
Index: trunk/phase3/includes/AutoLoader.php
@@ -126,10 +126,10 @@
127127 'MimeMagic' => 'includes/MimeMagic.php',
128128 'MWException' => 'includes/Exception.php',
129129 'MWNamespace' => 'includes/Namespace.php',
130 - 'MySQLSearchResultSet' => 'includes/SearchMySQL.php',
 130+ 'MySQLSearchResultSet' => 'includes/search/MySQL.php',
131131 'Namespace' => 'includes/NamespaceCompat.php', // Compat
132132 'OldChangesList' => 'includes/ChangesList.php',
133 - 'OracleSearchResultSet' => 'includes/SearchOracle.php',
 133+ 'OracleSearchResultSet' => 'includes/search/Oracle.php',
134134 'OutputPage' => 'includes/OutputPage.php',
135135 'PageHistory' => 'includes/PageHistory.php',
136136 'PageHistoryPager' => 'includes/PageHistory.php',
@@ -137,8 +137,8 @@
138138 'Pager' => 'includes/Pager.php',
139139 'PasswordError' => 'includes/User.php',
140140 'PatrolLog' => 'includes/PatrolLog.php',
141 - 'PostgresSearchResult' => 'includes/SearchPostgres.php',
142 - 'PostgresSearchResultSet' => 'includes/SearchPostgres.php',
 141+ 'PostgresSearchResult' => 'includes/search/Postgres.php',
 142+ 'PostgresSearchResultSet' => 'includes/search/Postgres.php',
143143 'PrefixSearch' => 'includes/PrefixSearch.php',
144144 'Profiler' => 'includes/Profiler.php',
145145 'ProfilerSimple' => 'includes/ProfilerSimple.php',
@@ -158,18 +158,18 @@
159159 'Revision' => 'includes/Revision.php',
160160 'RSSFeed' => 'includes/Feed.php',
161161 'Sanitizer' => 'includes/Sanitizer.php',
162 - 'SearchEngineDummy' => 'includes/SearchEngine.php',
163 - 'SearchEngine' => 'includes/SearchEngine.php',
164 - 'SearchHighlighter' => 'includes/SearchEngine.php',
165 - 'SearchMySQL4' => 'includes/SearchMySQL4.php',
166 - 'SearchMySQL' => 'includes/SearchMySQL.php',
167 - 'SearchOracle' => 'includes/SearchOracle.php',
168 - 'SearchPostgres' => 'includes/SearchPostgres.php',
169 - 'SearchResult' => 'includes/SearchEngine.php',
170 - 'SearchResultSet' => 'includes/SearchEngine.php',
171 - 'SearchResultTooMany' => 'includes/SearchEngine.php',
172 - 'SearchUpdate' => 'includes/SearchUpdate.php',
173 - 'SearchUpdateMyISAM' => 'includes/SearchUpdate.php',
 162+ 'SearchEngineDummy' => 'includes/search/Engine.php',
 163+ 'SearchEngine' => 'includes/search/Engine.php',
 164+ 'SearchHighlighter' => 'includes/search/Engine.php',
 165+ 'SearchMySQL4' => 'includes/search/MySQL4.php',
 166+ 'SearchMySQL' => 'includes/search/MySQL.php',
 167+ 'SearchOracle' => 'includes/search/Oracle.php',
 168+ 'SearchPostgres' => 'includes/search/Postgres.php',
 169+ 'SearchResult' => 'includes/search/Engine.php',
 170+ 'SearchResultSet' => 'includes/search/Engine.php',
 171+ 'SearchResultTooMany' => 'includes/search/Engine.php',
 172+ 'SearchUpdate' => 'includes/search/Update.php',
 173+ 'SearchUpdateMyISAM' => 'includes/search/Update.php',
174174 'SiteConfiguration' => 'includes/SiteConfiguration.php',
175175 'SiteStats' => 'includes/SiteStats.php',
176176 'SiteStatsUpdate' => 'includes/SiteStats.php',

Follow-up revisions

RevisionCommit summaryAuthorDate
r36480Revert r36413 -- renaming of search files into 'search' subdirectory...brion21:02, 19 June 2008

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r36403More ~/includes cleanup. Moving all the Search*.php files to ~/includes/search.demon20:58, 17 June 2008

Status & tagging log