Index: trunk/phase3/includes/SearchEngine.php |
— | — | @@ -1,1154 +0,0 @@ |
2 | | -<?php |
3 | | -/** |
4 | | - * @defgroup Search Search |
5 | | - * |
6 | | - * @file |
7 | | - * @ingroup Search |
8 | | - */ |
9 | | - |
10 | | -/** |
11 | | - * Contain a class for special pages |
12 | | - * @ingroup Search |
13 | | - */ |
14 | | -class SearchEngine { |
15 | | - var $limit = 10; |
16 | | - var $offset = 0; |
17 | | - var $searchTerms = array(); |
18 | | - var $namespaces = array( NS_MAIN ); |
19 | | - var $showRedirects = false; |
20 | | - |
21 | | - /** |
22 | | - * Perform a full text search query and return a result set. |
23 | | - * If title searches are not supported or disabled, return null. |
24 | | - * |
25 | | - * @param string $term - Raw search term |
26 | | - * @return SearchResultSet |
27 | | - * @access public |
28 | | - * @abstract |
29 | | - */ |
30 | | - function searchText( $term ) { |
31 | | - return null; |
32 | | - } |
33 | | - |
34 | | - /** |
35 | | - * Perform a title-only search query and return a result set. |
36 | | - * If title searches are not supported or disabled, return null. |
37 | | - * |
38 | | - * @param string $term - Raw search term |
39 | | - * @return SearchResultSet |
40 | | - * @access public |
41 | | - * @abstract |
42 | | - */ |
43 | | - function searchTitle( $term ) { |
44 | | - return null; |
45 | | - } |
46 | | - |
47 | | - /** |
48 | | - * If an exact title match can be find, or a very slightly close match, |
49 | | - * return the title. If no match, returns NULL. |
50 | | - * |
51 | | - * @param string $term |
52 | | - * @return Title |
53 | | - */ |
54 | | - public static function getNearMatch( $searchterm ) { |
55 | | - global $wgContLang; |
56 | | - |
57 | | - $allSearchTerms = array($searchterm); |
58 | | - |
59 | | - if($wgContLang->hasVariants()){ |
60 | | - $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm)); |
61 | | - } |
62 | | - |
63 | | - foreach($allSearchTerms as $term){ |
64 | | - |
65 | | - # Exact match? No need to look further. |
66 | | - $title = Title::newFromText( $term ); |
67 | | - if (is_null($title)) |
68 | | - return NULL; |
69 | | - |
70 | | - if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal() |
71 | | - || $title->exists() ) { |
72 | | - return $title; |
73 | | - } |
74 | | - |
75 | | - # Now try all lower case (i.e. first letter capitalized) |
76 | | - # |
77 | | - $title = Title::newFromText( $wgContLang->lc( $term ) ); |
78 | | - if ( $title && $title->exists() ) { |
79 | | - return $title; |
80 | | - } |
81 | | - |
82 | | - # Now try capitalized string |
83 | | - # |
84 | | - $title = Title::newFromText( $wgContLang->ucwords( $term ) ); |
85 | | - if ( $title && $title->exists() ) { |
86 | | - return $title; |
87 | | - } |
88 | | - |
89 | | - # Now try all upper case |
90 | | - # |
91 | | - $title = Title::newFromText( $wgContLang->uc( $term ) ); |
92 | | - if ( $title && $title->exists() ) { |
93 | | - return $title; |
94 | | - } |
95 | | - |
96 | | - # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc |
97 | | - $title = Title::newFromText( $wgContLang->ucwordbreaks($term) ); |
98 | | - if ( $title && $title->exists() ) { |
99 | | - return $title; |
100 | | - } |
101 | | - |
102 | | - global $wgCapitalLinks, $wgContLang; |
103 | | - if( !$wgCapitalLinks ) { |
104 | | - // Catch differs-by-first-letter-case-only |
105 | | - $title = Title::newFromText( $wgContLang->ucfirst( $term ) ); |
106 | | - if ( $title && $title->exists() ) { |
107 | | - return $title; |
108 | | - } |
109 | | - $title = Title::newFromText( $wgContLang->lcfirst( $term ) ); |
110 | | - if ( $title && $title->exists() ) { |
111 | | - return $title; |
112 | | - } |
113 | | - } |
114 | | - |
115 | | - // Give hooks a chance at better match variants |
116 | | - $title = null; |
117 | | - if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) { |
118 | | - return $title; |
119 | | - } |
120 | | - } |
121 | | - |
122 | | - $title = Title::newFromText( $searchterm ); |
123 | | - |
124 | | - # Entering an IP address goes to the contributions page |
125 | | - if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) ) |
126 | | - || User::isIP( trim( $searchterm ) ) ) { |
127 | | - return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() ); |
128 | | - } |
129 | | - |
130 | | - |
131 | | - # Entering a user goes to the user page whether it's there or not |
132 | | - if ( $title->getNamespace() == NS_USER ) { |
133 | | - return $title; |
134 | | - } |
135 | | - |
136 | | - # Go to images that exist even if there's no local page. |
137 | | - # There may have been a funny upload, or it may be on a shared |
138 | | - # file repository such as Wikimedia Commons. |
139 | | - if( $title->getNamespace() == NS_IMAGE ) { |
140 | | - $image = wfFindFile( $title ); |
141 | | - if( $image ) { |
142 | | - return $title; |
143 | | - } |
144 | | - } |
145 | | - |
146 | | - # MediaWiki namespace? Page may be "implied" if not customized. |
147 | | - # Just return it, with caps forced as the message system likes it. |
148 | | - if( $title->getNamespace() == NS_MEDIAWIKI ) { |
149 | | - return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) ); |
150 | | - } |
151 | | - |
152 | | - # Quoted term? Try without the quotes... |
153 | | - $matches = array(); |
154 | | - if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) { |
155 | | - return SearchEngine::getNearMatch( $matches[1] ); |
156 | | - } |
157 | | - |
158 | | - return NULL; |
159 | | - } |
160 | | - |
161 | | - public static function legalSearchChars() { |
162 | | - return "A-Za-z_'0-9\\x80-\\xFF\\-"; |
163 | | - } |
164 | | - |
165 | | - /** |
166 | | - * Set the maximum number of results to return |
167 | | - * and how many to skip before returning the first. |
168 | | - * |
169 | | - * @param int $limit |
170 | | - * @param int $offset |
171 | | - * @access public |
172 | | - */ |
173 | | - function setLimitOffset( $limit, $offset = 0 ) { |
174 | | - $this->limit = intval( $limit ); |
175 | | - $this->offset = intval( $offset ); |
176 | | - } |
177 | | - |
178 | | - /** |
179 | | - * Set which namespaces the search should include. |
180 | | - * Give an array of namespace index numbers. |
181 | | - * |
182 | | - * @param array $namespaces |
183 | | - * @access public |
184 | | - */ |
185 | | - function setNamespaces( $namespaces ) { |
186 | | - $this->namespaces = $namespaces; |
187 | | - } |
188 | | - |
189 | | - /** |
190 | | - * Parse some common prefixes: all (search everything) |
191 | | - * or namespace names |
192 | | - * |
193 | | - * @param string $query |
194 | | - */ |
195 | | - function replacePrefixes( $query ){ |
196 | | - global $wgContLang; |
197 | | - |
198 | | - if( strpos($query,':') === false ) |
199 | | - return $query; // nothing to do |
200 | | - |
201 | | - $parsed = $query; |
202 | | - $allkeyword = wfMsgForContent('searchall').":"; |
203 | | - if( strncmp($query, $allkeyword, strlen($allkeyword)) == 0 ){ |
204 | | - $this->namespaces = null; |
205 | | - $parsed = substr($query,strlen($allkeyword)); |
206 | | - } else if( strpos($query,':') !== false ) { |
207 | | - $prefix = substr($query,0,strpos($query,':')); |
208 | | - $index = $wgContLang->getNsIndex($prefix); |
209 | | - if($index !== false){ |
210 | | - $this->namespaces = array($index); |
211 | | - $parsed = substr($query,strlen($prefix)+1); |
212 | | - } |
213 | | - } |
214 | | - if(trim($parsed) == '') |
215 | | - return $query; // prefix was the whole query |
216 | | - |
217 | | - return $parsed; |
218 | | - } |
219 | | - |
220 | | - /** |
221 | | - * Make a list of searchable namespaces and their canonical names. |
222 | | - * @return array |
223 | | - */ |
224 | | - public static function searchableNamespaces() { |
225 | | - global $wgContLang; |
226 | | - $arr = array(); |
227 | | - foreach( $wgContLang->getNamespaces() as $ns => $name ) { |
228 | | - if( $ns >= NS_MAIN ) { |
229 | | - $arr[$ns] = $name; |
230 | | - } |
231 | | - } |
232 | | - return $arr; |
233 | | - } |
234 | | - |
235 | | - /** |
236 | | - * Extract default namespaces to search from the given user's |
237 | | - * settings, returning a list of index numbers. |
238 | | - * |
239 | | - * @param User $user |
240 | | - * @return array |
241 | | - * @static |
242 | | - */ |
243 | | - public static function userNamespaces( &$user ) { |
244 | | - $arr = array(); |
245 | | - foreach( SearchEngine::searchableNamespaces() as $ns => $name ) { |
246 | | - if( $user->getOption( 'searchNs' . $ns ) ) { |
247 | | - $arr[] = $ns; |
248 | | - } |
249 | | - } |
250 | | - return $arr; |
251 | | - } |
252 | | - |
253 | | - /** |
254 | | - * Find snippet highlight settings for a given user |
255 | | - * |
256 | | - * @param User $user |
257 | | - * @return array contextlines, contextchars |
258 | | - * @static |
259 | | - */ |
260 | | - public static function userHighlightPrefs( &$user ){ |
261 | | - //$contextlines = $user->getOption( 'contextlines', 5 ); |
262 | | - //$contextchars = $user->getOption( 'contextchars', 50 ); |
263 | | - $contextlines = 2; // Hardcode this. Old defaults sucked. :) |
264 | | - $contextchars = 75; // same as above.... :P |
265 | | - return array($contextlines, $contextchars); |
266 | | - } |
267 | | - |
268 | | - /** |
269 | | - * An array of namespaces indexes to be searched by default |
270 | | - * |
271 | | - * @return array |
272 | | - * @static |
273 | | - */ |
274 | | - public static function defaultNamespaces(){ |
275 | | - global $wgNamespacesToBeSearchedDefault; |
276 | | - |
277 | | - return array_keys($wgNamespacesToBeSearchedDefault, true); |
278 | | - } |
279 | | - |
280 | | - /** |
281 | | - * Return a 'cleaned up' search string |
282 | | - * |
283 | | - * @return string |
284 | | - * @access public |
285 | | - */ |
286 | | - function filter( $text ) { |
287 | | - $lc = $this->legalSearchChars(); |
288 | | - return trim( preg_replace( "/[^{$lc}]/", " ", $text ) ); |
289 | | - } |
290 | | - /** |
291 | | - * Load up the appropriate search engine class for the currently |
292 | | - * active database backend, and return a configured instance. |
293 | | - * |
294 | | - * @return SearchEngine |
295 | | - */ |
296 | | - public static function create() { |
297 | | - global $wgDBtype, $wgSearchType; |
298 | | - if( $wgSearchType ) { |
299 | | - $class = $wgSearchType; |
300 | | - } elseif( $wgDBtype == 'mysql' ) { |
301 | | - $class = 'SearchMySQL'; |
302 | | - } else if ( $wgDBtype == 'postgres' ) { |
303 | | - $class = 'SearchPostgres'; |
304 | | - } else if ( $wgDBtype == 'oracle' ) { |
305 | | - $class = 'SearchOracle'; |
306 | | - } else { |
307 | | - $class = 'SearchEngineDummy'; |
308 | | - } |
309 | | - $search = new $class( wfGetDB( DB_SLAVE ) ); |
310 | | - $search->setLimitOffset(0,0); |
311 | | - return $search; |
312 | | - } |
313 | | - |
314 | | - /** |
315 | | - * Create or update the search index record for the given page. |
316 | | - * Title and text should be pre-processed. |
317 | | - * |
318 | | - * @param int $id |
319 | | - * @param string $title |
320 | | - * @param string $text |
321 | | - * @abstract |
322 | | - */ |
323 | | - function update( $id, $title, $text ) { |
324 | | - // no-op |
325 | | - } |
326 | | - |
327 | | - /** |
328 | | - * Update a search index record's title only. |
329 | | - * Title should be pre-processed. |
330 | | - * |
331 | | - * @param int $id |
332 | | - * @param string $title |
333 | | - * @abstract |
334 | | - */ |
335 | | - function updateTitle( $id, $title ) { |
336 | | - // no-op |
337 | | - } |
338 | | - |
339 | | - /** |
340 | | - * Get OpenSearch suggestion template |
341 | | - * |
342 | | - * @return string |
343 | | - * @static |
344 | | - */ |
345 | | - public static function getOpenSearchTemplate() { |
346 | | - global $wgOpenSearchTemplate, $wgServer, $wgScriptPath; |
347 | | - if($wgOpenSearchTemplate) |
348 | | - return $wgOpenSearchTemplate; |
349 | | - else{ |
350 | | - $ns = implode(',',SearchEngine::defaultNamespaces()); |
351 | | - if(!$ns) $ns = "0"; |
352 | | - return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace='.$ns; |
353 | | - } |
354 | | - } |
355 | | - |
356 | | - /** |
357 | | - * Get internal MediaWiki Suggest template |
358 | | - * |
359 | | - * @return string |
360 | | - * @static |
361 | | - */ |
362 | | - public static function getMWSuggestTemplate() { |
363 | | - global $wgMWSuggestTemplate, $wgServer, $wgScriptPath; |
364 | | - if($wgMWSuggestTemplate) |
365 | | - return $wgMWSuggestTemplate; |
366 | | - else |
367 | | - return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}'; |
368 | | - } |
369 | | -} |
370 | | - |
371 | | -/** |
372 | | - * @ingroup Search |
373 | | - */ |
374 | | -class SearchResultSet { |
375 | | - /** |
376 | | - * Fetch an array of regular expression fragments for matching |
377 | | - * the search terms as parsed by this engine in a text extract. |
378 | | - * |
379 | | - * @return array |
380 | | - * @access public |
381 | | - * @abstract |
382 | | - */ |
383 | | - function termMatches() { |
384 | | - return array(); |
385 | | - } |
386 | | - |
387 | | - function numRows() { |
388 | | - return 0; |
389 | | - } |
390 | | - |
391 | | - /** |
392 | | - * Return true if results are included in this result set. |
393 | | - * @return bool |
394 | | - * @abstract |
395 | | - */ |
396 | | - function hasResults() { |
397 | | - return false; |
398 | | - } |
399 | | - |
400 | | - /** |
401 | | - * Some search modes return a total hit count for the query |
402 | | - * in the entire article database. This may include pages |
403 | | - * in namespaces that would not be matched on the given |
404 | | - * settings. |
405 | | - * |
406 | | - * Return null if no total hits number is supported. |
407 | | - * |
408 | | - * @return int |
409 | | - * @access public |
410 | | - */ |
411 | | - function getTotalHits() { |
412 | | - return null; |
413 | | - } |
414 | | - |
415 | | - /** |
416 | | - * Some search modes return a suggested alternate term if there are |
417 | | - * no exact hits. Returns true if there is one on this set. |
418 | | - * |
419 | | - * @return bool |
420 | | - * @access public |
421 | | - */ |
422 | | - function hasSuggestion() { |
423 | | - return false; |
424 | | - } |
425 | | - |
426 | | - /** |
427 | | - * @return string suggested query, null if none |
428 | | - */ |
429 | | - function getSuggestionQuery(){ |
430 | | - return null; |
431 | | - } |
432 | | - |
433 | | - /** |
434 | | - * @return string highlighted suggested query, '' if none |
435 | | - */ |
436 | | - function getSuggestionSnippet(){ |
437 | | - return ''; |
438 | | - } |
439 | | - |
440 | | - /** |
441 | | - * Return information about how and from where the results were fetched, |
442 | | - * should be useful for diagnostics and debugging |
443 | | - * |
444 | | - * @return string |
445 | | - */ |
446 | | - function getInfo() { |
447 | | - return null; |
448 | | - } |
449 | | - |
450 | | - /** |
451 | | - * Return a result set of hits on other (multiple) wikis associated with this one |
452 | | - * |
453 | | - * @return SearchResultSet |
454 | | - */ |
455 | | - function getInterwikiResults() { |
456 | | - return null; |
457 | | - } |
458 | | - |
459 | | - /** |
460 | | - * Check if there are results on other wikis |
461 | | - * |
462 | | - * @return boolean |
463 | | - */ |
464 | | - function hasInterwikiResults() { |
465 | | - return $this->getInterwikiResults() != null; |
466 | | - } |
467 | | - |
468 | | - |
469 | | - /** |
470 | | - * Fetches next search result, or false. |
471 | | - * @return SearchResult |
472 | | - * @access public |
473 | | - * @abstract |
474 | | - */ |
475 | | - function next() { |
476 | | - return false; |
477 | | - } |
478 | | - |
479 | | - /** |
480 | | - * Frees the result set, if applicable. |
481 | | - * @ access public |
482 | | - */ |
483 | | - function free() { |
484 | | - // ... |
485 | | - } |
486 | | -} |
487 | | - |
488 | | - |
489 | | -/** |
490 | | - * @ingroup Search |
491 | | - */ |
492 | | -class SearchResultTooMany { |
493 | | - ## Some search engines may bail out if too many matches are found |
494 | | -} |
495 | | - |
496 | | - |
497 | | -/** |
498 | | - * @ingroup Search |
499 | | - */ |
500 | | -class SearchResult { |
501 | | - var $mRevision = null; |
502 | | - |
503 | | - function SearchResult( $row ) { |
504 | | - $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
505 | | - if( !is_null($this->mTitle) ) |
506 | | - $this->mRevision = Revision::newFromTitle( $this->mTitle ); |
507 | | - } |
508 | | - |
509 | | - /** |
510 | | - * Check if this is result points to an invalid title |
511 | | - * |
512 | | - * @return boolean |
513 | | - * @access public |
514 | | - */ |
515 | | - function isBrokenTitle(){ |
516 | | - if( is_null($this->mTitle) ) |
517 | | - return true; |
518 | | - return false; |
519 | | - } |
520 | | - |
521 | | - /** |
522 | | - * Check if target page is missing, happens when index is out of date |
523 | | - * |
524 | | - * @return boolean |
525 | | - * @access public |
526 | | - */ |
527 | | - function isMissingRevision(){ |
528 | | - if( !$this->mRevision ) |
529 | | - return true; |
530 | | - return false; |
531 | | - } |
532 | | - |
533 | | - /** |
534 | | - * @return Title |
535 | | - * @access public |
536 | | - */ |
537 | | - function getTitle() { |
538 | | - return $this->mTitle; |
539 | | - } |
540 | | - |
541 | | - /** |
542 | | - * @return double or null if not supported |
543 | | - */ |
544 | | - function getScore() { |
545 | | - return null; |
546 | | - } |
547 | | - |
548 | | - /** |
549 | | - * Lazy initialization of article text from DB |
550 | | - */ |
551 | | - protected function initText(){ |
552 | | - if( !isset($this->mText) ){ |
553 | | - $this->mText = $this->mRevision->getText(); |
554 | | - } |
555 | | - } |
556 | | - |
557 | | - /** |
558 | | - * @param array $terms terms to highlight |
559 | | - * @return string highlighted text snippet, null (and not '') if not supported |
560 | | - */ |
561 | | - function getTextSnippet($terms){ |
562 | | - global $wgUser, $wgAdvancedSearchHighlighting; |
563 | | - $this->initText(); |
564 | | - list($contextlines,$contextchars) = SearchEngine::userHighlightPrefs($wgUser); |
565 | | - $h = new SearchHighlighter(); |
566 | | - if( $wgAdvancedSearchHighlighting ) |
567 | | - return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars ); |
568 | | - else |
569 | | - return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars ); |
570 | | - } |
571 | | - |
572 | | - /** |
573 | | - * @param array $terms terms to highlight |
574 | | - * @return string highlighted title, '' if not supported |
575 | | - */ |
576 | | - function getTitleSnippet($terms){ |
577 | | - return ''; |
578 | | - } |
579 | | - |
580 | | - /** |
581 | | - * @param array $terms terms to highlight |
582 | | - * @return string highlighted redirect name (redirect to this page), '' if none or not supported |
583 | | - */ |
584 | | - function getRedirectSnippet($terms){ |
585 | | - return ''; |
586 | | - } |
587 | | - |
588 | | - /** |
589 | | - * @return Title object for the redirect to this page, null if none or not supported |
590 | | - */ |
591 | | - function getRedirectTitle(){ |
592 | | - return null; |
593 | | - } |
594 | | - |
595 | | - /** |
596 | | - * @return string highlighted relevant section name, null if none or not supported |
597 | | - */ |
598 | | - function getSectionSnippet(){ |
599 | | - return ''; |
600 | | - } |
601 | | - |
602 | | - /** |
603 | | - * @return Title object (pagename+fragment) for the section, null if none or not supported |
604 | | - */ |
605 | | - function getSectionTitle(){ |
606 | | - return null; |
607 | | - } |
608 | | - |
609 | | - /** |
610 | | - * @return string timestamp |
611 | | - */ |
612 | | - function getTimestamp(){ |
613 | | - return $this->mRevision->getTimestamp(); |
614 | | - } |
615 | | - |
616 | | - /** |
617 | | - * @return int number of words |
618 | | - */ |
619 | | - function getWordCount(){ |
620 | | - $this->initText(); |
621 | | - return str_word_count( $this->mText ); |
622 | | - } |
623 | | - |
624 | | - /** |
625 | | - * @return int size in bytes |
626 | | - */ |
627 | | - function getByteSize(){ |
628 | | - $this->initText(); |
629 | | - return strlen( $this->mText ); |
630 | | - } |
631 | | - |
632 | | - /** |
633 | | - * @return boolean if hit has related articles |
634 | | - */ |
635 | | - function hasRelated(){ |
636 | | - return false; |
637 | | - } |
638 | | - |
639 | | - /** |
640 | | - * @return interwiki prefix of the title (return iw even if title is broken) |
641 | | - */ |
642 | | - function getInterwikiPrefix(){ |
643 | | - return ''; |
644 | | - } |
645 | | -} |
646 | | - |
647 | | -/** |
648 | | - * Highlight bits of wikitext |
649 | | - * |
650 | | - * @ingroup Search |
651 | | - */ |
652 | | -class SearchHighlighter { |
653 | | - var $mCleanWikitext = true; |
654 | | - |
655 | | - function SearchHighlighter($cleanupWikitext = true){ |
656 | | - $this->mCleanWikitext = $cleanupWikitext; |
657 | | - } |
658 | | - |
659 | | - /** |
660 | | - * Default implementation of wikitext highlighting |
661 | | - * |
662 | | - * @param string $text |
663 | | - * @param array $terms Terms to highlight (unescaped) |
664 | | - * @param int $contextlines |
665 | | - * @param int $contextchars |
666 | | - * @return string |
667 | | - */ |
668 | | - public function highlightText( $text, $terms, $contextlines, $contextchars ) { |
669 | | - global $wgLang, $wgContLang; |
670 | | - global $wgSearchHighlightBoundaries; |
671 | | - $fname = __METHOD__; |
672 | | - |
673 | | - if($text == '') |
674 | | - return ''; |
675 | | - |
676 | | - // spli text into text + templates/links/tables |
677 | | - $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; |
678 | | - // first capture group is for detecting nested templates/links/tables/references |
679 | | - $endPatterns = array( |
680 | | - 1 => '/(\{\{)|(\}\})/', // template |
681 | | - 2 => '/(\[\[)|(\]\])/', // image |
682 | | - 3 => "/(\n\\{\\|)|(\n\\|\\})/"); // table |
683 | | - |
684 | | - // FIXME: this should prolly be a hook or something |
685 | | - if(function_exists('wfCite')){ |
686 | | - $spat .= '|(<ref>)'; // references via cite extension |
687 | | - $endPatterns[4] = '/(<ref>)|(<\/ref>)/'; |
688 | | - } |
689 | | - $spat .= '/'; |
690 | | - $textExt = array(); // text extracts |
691 | | - $otherExt = array(); // other extracts |
692 | | - wfProfileIn( "$fname-split" ); |
693 | | - $start = 0; |
694 | | - $textLen = strlen($text); |
695 | | - $count = 0; // sequence number to maintain ordering |
696 | | - while( $start < $textLen ){ |
697 | | - // find start of template/image/table |
698 | | - if( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ){ |
699 | | - $epat = ''; |
700 | | - foreach($matches as $key => $val){ |
701 | | - if($key > 0 && $val[1] != -1){ |
702 | | - if($key == 2){ |
703 | | - // see if this is an image link |
704 | | - $ns = substr($val[0],2,-1); |
705 | | - if( $wgContLang->getNsIndex($ns) != NS_IMAGE ) |
706 | | - break; |
707 | | - |
708 | | - } |
709 | | - $epat = $endPatterns[$key]; |
710 | | - $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) ); |
711 | | - $start = $val[1]; |
712 | | - break; |
713 | | - } |
714 | | - } |
715 | | - if( $epat ){ |
716 | | - // find end (and detect any nested elements) |
717 | | - $level = 0; |
718 | | - $offset = $start + 1; |
719 | | - $found = false; |
720 | | - while( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ){ |
721 | | - if( array_key_exists(2,$endMatches) ){ |
722 | | - // found end |
723 | | - if($level == 0){ |
724 | | - $len = strlen($endMatches[2][0]); |
725 | | - $off = $endMatches[2][1]; |
726 | | - $this->splitAndAdd( $otherExt, $count, |
727 | | - substr( $text, $start, $off + $len - $start ) ); |
728 | | - $start = $off + $len; |
729 | | - $found = true; |
730 | | - break; |
731 | | - } else{ |
732 | | - // end of nested element |
733 | | - $level -= 1; |
734 | | - } |
735 | | - } else{ |
736 | | - // nested |
737 | | - $level += 1; |
738 | | - } |
739 | | - $offset = $endMatches[0][1] + strlen($endMatches[0][0]); |
740 | | - } |
741 | | - if( ! $found ){ |
742 | | - // couldn't find appropriate closing tag, skip |
743 | | - $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen($matches[0][0]) ) ); |
744 | | - $start += strlen($matches[0][0]); |
745 | | - } |
746 | | - continue; |
747 | | - } |
748 | | - } |
749 | | - // else: add as text extract |
750 | | - $this->splitAndAdd( $textExt, $count, substr($text,$start) ); |
751 | | - break; |
752 | | - } |
753 | | - |
754 | | - $all = $textExt + $otherExt; // these have disjunct key sets |
755 | | - |
756 | | - wfProfileOut( "$fname-split" ); |
757 | | - |
758 | | - // prepare regexps |
759 | | - foreach( $terms as $index => $term ) { |
760 | | - $terms[$index] = preg_quote( $term, '/' ); |
761 | | - // manually do upper/lowercase stuff for utf-8 since PHP won't do it |
762 | | - if(preg_match('/[\x80-\xff]/', $term) ){ |
763 | | - $terms[$index] = preg_replace_callback('/./us',array($this,'caseCallback'),$terms[$index]); |
764 | | - } |
765 | | - |
766 | | - |
767 | | - } |
768 | | - $anyterm = implode( '|', $terms ); |
769 | | - $phrase = implode("$wgSearchHighlightBoundaries+", $terms ); |
770 | | - |
771 | | - // FIXME: a hack to scale contextchars, a correct solution |
772 | | - // would be to have contextchars actually be char and not byte |
773 | | - // length, and do proper utf-8 substrings and lengths everywhere, |
774 | | - // but PHP is making that very hard and unclean to implement :( |
775 | | - $scale = strlen($anyterm) / mb_strlen($anyterm); |
776 | | - $contextchars = intval( $contextchars * $scale ); |
777 | | - |
778 | | - $patPre = "(^|$wgSearchHighlightBoundaries)"; |
779 | | - $patPost = "($wgSearchHighlightBoundaries|$)"; |
780 | | - |
781 | | - $pat1 = "/(".$phrase.")/ui"; |
782 | | - $pat2 = "/$patPre(".$anyterm.")$patPost/ui"; |
783 | | - |
784 | | - wfProfileIn( "$fname-extract" ); |
785 | | - |
786 | | - $left = $contextlines; |
787 | | - |
788 | | - $snippets = array(); |
789 | | - $offsets = array(); |
790 | | - |
791 | | - // show beginning only if it contains all words |
792 | | - $first = 0; |
793 | | - $firstText = ''; |
794 | | - foreach($textExt as $index => $line){ |
795 | | - if(strlen($line)>0 && $line[0] != ';' && $line[0] != ':'){ |
796 | | - $firstText = $this->extract( $line, 0, $contextchars * $contextlines ); |
797 | | - $first = $index; |
798 | | - break; |
799 | | - } |
800 | | - } |
801 | | - if( $firstText ){ |
802 | | - $succ = true; |
803 | | - // check if first text contains all terms |
804 | | - foreach($terms as $term){ |
805 | | - if( ! preg_match("/$patPre".$term."$patPost/ui", $firstText) ){ |
806 | | - $succ = false; |
807 | | - break; |
808 | | - } |
809 | | - } |
810 | | - if( $succ ){ |
811 | | - $snippets[$first] = $firstText; |
812 | | - $offsets[$first] = 0; |
813 | | - } |
814 | | - } |
815 | | - if( ! $snippets ) { |
816 | | - // match whole query on text |
817 | | - $this->process($pat1, $textExt, $left, $contextchars, $snippets, $offsets); |
818 | | - // match whole query on templates/tables/images |
819 | | - $this->process($pat1, $otherExt, $left, $contextchars, $snippets, $offsets); |
820 | | - // match any words on text |
821 | | - $this->process($pat2, $textExt, $left, $contextchars, $snippets, $offsets); |
822 | | - // match any words on templates/tables/images |
823 | | - $this->process($pat2, $otherExt, $left, $contextchars, $snippets, $offsets); |
824 | | - |
825 | | - ksort($snippets); |
826 | | - } |
827 | | - |
828 | | - // add extra chars to each snippet to make snippets constant size |
829 | | - $extended = array(); |
830 | | - if( count( $snippets ) == 0){ |
831 | | - // couldn't find the target words, just show beginning of article |
832 | | - $targetchars = $contextchars * $contextlines; |
833 | | - $snippets[$first] = ''; |
834 | | - $offsets[$first] = 0; |
835 | | - } else{ |
836 | | - // if begin of the article contains the whole phrase, show only that !! |
837 | | - if( array_key_exists($first,$snippets) && preg_match($pat1,$snippets[$first]) |
838 | | - && $offsets[$first] < $contextchars * 2 ){ |
839 | | - $snippets = array ($first => $snippets[$first]); |
840 | | - } |
841 | | - |
842 | | - // calc by how much to extend existing snippets |
843 | | - $targetchars = intval( ($contextchars * $contextlines) / count ( $snippets ) ); |
844 | | - } |
845 | | - |
846 | | - foreach($snippets as $index => $line){ |
847 | | - $extended[$index] = $line; |
848 | | - $len = strlen($line); |
849 | | - if( $len < $targetchars - 20 ){ |
850 | | - // complete this line |
851 | | - if($len < strlen( $all[$index] )){ |
852 | | - $extended[$index] = $this->extract( $all[$index], $offsets[$index], $offsets[$index]+$targetchars, $offsets[$index]); |
853 | | - $len = strlen( $extended[$index] ); |
854 | | - } |
855 | | - |
856 | | - // add more lines |
857 | | - $add = $index + 1; |
858 | | - while( $len < $targetchars - 20 |
859 | | - && array_key_exists($add,$all) |
860 | | - && !array_key_exists($add,$snippets) ){ |
861 | | - $offsets[$add] = 0; |
862 | | - $tt = "\n".$this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] ); |
863 | | - $extended[$add] = $tt; |
864 | | - $len += strlen( $tt ); |
865 | | - $add++; |
866 | | - } |
867 | | - } |
868 | | - } |
869 | | - |
870 | | - //$snippets = array_map('htmlspecialchars', $extended); |
871 | | - $snippets = $extended; |
872 | | - $last = -1; |
873 | | - $extract = ''; |
874 | | - foreach($snippets as $index => $line){ |
875 | | - if($last == -1) |
876 | | - $extract .= $line; // first line |
877 | | - elseif($last+1 == $index && $offsets[$last]+strlen($snippets[$last]) >= strlen($all[$last])) |
878 | | - $extract .= " ".$line; // continous lines |
879 | | - else |
880 | | - $extract .= '<b> ... </b>' . $line; |
881 | | - |
882 | | - $last = $index; |
883 | | - } |
884 | | - if( $extract ) |
885 | | - $extract .= '<b> ... </b>'; |
886 | | - |
887 | | - $processed = array(); |
888 | | - foreach($terms as $term){ |
889 | | - if( ! isset($processed[$term]) ){ |
890 | | - $pat3 = "/$patPre(".$term.")$patPost/ui"; // highlight word |
891 | | - $extract = preg_replace( $pat3, |
892 | | - "\\1<span class='searchmatch'>\\2</span>\\3", $extract ); |
893 | | - $processed[$term] = true; |
894 | | - } |
895 | | - } |
896 | | - |
897 | | - wfProfileOut( "$fname-extract" ); |
898 | | - |
899 | | - return $extract; |
900 | | - } |
901 | | - |
902 | | - /** |
903 | | - * Split text into lines and add it to extracts array |
904 | | - * |
905 | | - * @param array $extracts index -> $line |
906 | | - * @param int $count |
907 | | - * @param string $text |
908 | | - */ |
909 | | - function splitAndAdd(&$extracts, &$count, $text){ |
910 | | - $split = explode( "\n", $this->mCleanWikitext? $this->removeWiki($text) : $text ); |
911 | | - foreach($split as $line){ |
912 | | - $tt = trim($line); |
913 | | - if( $tt ) |
914 | | - $extracts[$count++] = $tt; |
915 | | - } |
916 | | - } |
917 | | - |
918 | | - /** |
919 | | - * Do manual case conversion for non-ascii chars |
920 | | - * |
921 | | - * @param unknown_type $matches |
922 | | - */ |
923 | | - function caseCallback($matches){ |
924 | | - global $wgContLang; |
925 | | - if( strlen($matches[0]) > 1 ){ |
926 | | - return '['.$wgContLang->lc($matches[0]).$wgContLang->uc($matches[0]).']'; |
927 | | - } else |
928 | | - return $matches[0]; |
929 | | - } |
930 | | - |
931 | | - /** |
932 | | - * Extract part of the text from start to end, but by |
933 | | - * not chopping up words |
934 | | - * @param string $text |
935 | | - * @param int $start |
936 | | - * @param int $end |
937 | | - * @param int $posStart (out) actual start position |
938 | | - * @param int $posEnd (out) actual end position |
939 | | - * @return string |
940 | | - */ |
941 | | - function extract($text, $start, $end, &$posStart = null, &$posEnd = null ){ |
942 | | - global $wgContLang; |
943 | | - |
944 | | - if( $start != 0) |
945 | | - $start = $this->position( $text, $start, 1 ); |
946 | | - if( $end >= strlen($text) ) |
947 | | - $end = strlen($text); |
948 | | - else |
949 | | - $end = $this->position( $text, $end ); |
950 | | - |
951 | | - if(!is_null($posStart)) |
952 | | - $posStart = $start; |
953 | | - if(!is_null($posEnd)) |
954 | | - $posEnd = $end; |
955 | | - |
956 | | - if($end > $start) |
957 | | - return substr($text, $start, $end-$start); |
958 | | - else |
959 | | - return ''; |
960 | | - } |
961 | | - |
962 | | - /** |
963 | | - * Find a nonletter near a point (index) in the text |
964 | | - * |
965 | | - * @param string $text |
966 | | - * @param int $point |
967 | | - * @param int $offset to found index |
968 | | - * @return int nearest nonletter index, or beginning of utf8 char if none |
969 | | - */ |
970 | | - function position($text, $point, $offset=0 ){ |
971 | | - $tolerance = 10; |
972 | | - $s = max( 0, $point - $tolerance ); |
973 | | - $l = min( strlen($text), $point + $tolerance ) - $s; |
974 | | - $m = array(); |
975 | | - if( preg_match('/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr($text,$s,$l), $m, PREG_OFFSET_CAPTURE ) ){ |
976 | | - return $m[0][1] + $s + $offset; |
977 | | - } else{ |
978 | | - // check if point is on a valid first UTF8 char |
979 | | - $char = ord( $text[$point] ); |
980 | | - while( $char >= 0x80 && $char < 0xc0 ) { |
981 | | - // skip trailing bytes |
982 | | - $point++; |
983 | | - if($point >= strlen($text)) |
984 | | - return strlen($text); |
985 | | - $char = ord( $text[$point] ); |
986 | | - } |
987 | | - return $point; |
988 | | - |
989 | | - } |
990 | | - } |
991 | | - |
992 | | - /** |
993 | | - * Search extracts for a pattern, and return snippets |
994 | | - * |
995 | | - * @param string $pattern regexp for matching lines |
996 | | - * @param array $extracts extracts to search |
997 | | - * @param int $linesleft number of extracts to make |
998 | | - * @param int $contextchars length of snippet |
999 | | - * @param array $out map for highlighted snippets |
1000 | | - * @param array $offsets map of starting points of snippets |
1001 | | - * @protected |
1002 | | - */ |
1003 | | - function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ){ |
1004 | | - if($linesleft == 0) |
1005 | | - return; // nothing to do |
1006 | | - foreach($extracts as $index => $line){ |
1007 | | - if( array_key_exists($index,$out) ) |
1008 | | - continue; // this line already highlighted |
1009 | | - |
1010 | | - $m = array(); |
1011 | | - if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) |
1012 | | - continue; |
1013 | | - |
1014 | | - $offset = $m[0][1]; |
1015 | | - $len = strlen($m[0][0]); |
1016 | | - if($offset + $len < $contextchars) |
1017 | | - $begin = 0; |
1018 | | - elseif( $len > $contextchars) |
1019 | | - $begin = $offset; |
1020 | | - else |
1021 | | - $begin = $offset + intval( ($len - $contextchars) / 2 ); |
1022 | | - |
1023 | | - $end = $begin + $contextchars; |
1024 | | - |
1025 | | - $posBegin = $begin; |
1026 | | - // basic snippet from this line |
1027 | | - $out[$index] = $this->extract($line,$begin,$end,$posBegin); |
1028 | | - $offsets[$index] = $posBegin; |
1029 | | - $linesleft--; |
1030 | | - if($linesleft == 0) |
1031 | | - return; |
1032 | | - } |
1033 | | - } |
1034 | | - |
1035 | | - /** |
1036 | | - * Basic wikitext removal |
1037 | | - * @protected |
1038 | | - */ |
1039 | | - function removeWiki($text) { |
1040 | | - $fname = __METHOD__; |
1041 | | - wfProfileIn( $fname ); |
1042 | | - |
1043 | | - //$text = preg_replace("/'{2,5}/", "", $text); |
1044 | | - //$text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text); |
1045 | | - //$text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text); |
1046 | | - //$text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text); |
1047 | | - //$text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text); |
1048 | | - //$text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text); |
1049 | | - $text = preg_replace("/\\{\\{([^|]+?)\\}\\}/", "", $text); |
1050 | | - $text = preg_replace("/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text); |
1051 | | - $text = preg_replace("/\\[\\[([^|]+?)\\]\\]/", "\\1", $text); |
1052 | | - $text = preg_replace_callback("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", array($this,'linkReplace'), $text); |
1053 | | - //$text = preg_replace("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", "\\2", $text); |
1054 | | - $text = preg_replace("/<\/?[^>]+>/", "", $text); |
1055 | | - $text = preg_replace("/'''''/", "", $text); |
1056 | | - $text = preg_replace("/('''|<\/?[iIuUbB]>)/", "", $text); |
1057 | | - $text = preg_replace("/''/", "", $text); |
1058 | | - |
1059 | | - wfProfileOut( $fname ); |
1060 | | - return $text; |
1061 | | - } |
1062 | | - |
1063 | | - /** |
1064 | | - * callback to replace [[target|caption]] kind of links, if |
1065 | | - * the target is category or image, leave it |
1066 | | - * |
1067 | | - * @param array $matches |
1068 | | - */ |
1069 | | - function linkReplace($matches){ |
1070 | | - $colon = strpos( $matches[1], ':' ); |
1071 | | - if( $colon === false ) |
1072 | | - return $matches[2]; // replace with caption |
1073 | | - global $wgContLang; |
1074 | | - $ns = substr( $matches[1], 0, $colon ); |
1075 | | - $index = $wgContLang->getNsIndex($ns); |
1076 | | - if( $index !== false && ($index == NS_IMAGE || $index == NS_CATEGORY) ) |
1077 | | - return $matches[0]; // return the whole thing |
1078 | | - else |
1079 | | - return $matches[2]; |
1080 | | - |
1081 | | - } |
1082 | | - |
1083 | | - /** |
1084 | | - * Simple & fast snippet extraction, but gives completely unrelevant |
1085 | | - * snippets |
1086 | | - * |
1087 | | - * @param string $text |
1088 | | - * @param array $terms |
1089 | | - * @param int $contextlines |
1090 | | - * @param int $contextchars |
1091 | | - * @return string |
1092 | | - */ |
1093 | | - public function highlightSimple( $text, $terms, $contextlines, $contextchars ) { |
1094 | | - global $wgLang, $wgContLang; |
1095 | | - $fname = __METHOD__; |
1096 | | - |
1097 | | - $lines = explode( "\n", $text ); |
1098 | | - |
1099 | | - $terms = implode( '|', $terms ); |
1100 | | - $terms = str_replace( '/', "\\/", $terms); |
1101 | | - $max = intval( $contextchars ) + 1; |
1102 | | - $pat1 = "/(.*)($terms)(.{0,$max})/i"; |
1103 | | - |
1104 | | - $lineno = 0; |
1105 | | - |
1106 | | - $extract = ""; |
1107 | | - wfProfileIn( "$fname-extract" ); |
1108 | | - foreach ( $lines as $line ) { |
1109 | | - if ( 0 == $contextlines ) { |
1110 | | - break; |
1111 | | - } |
1112 | | - ++$lineno; |
1113 | | - $m = array(); |
1114 | | - if ( ! preg_match( $pat1, $line, $m ) ) { |
1115 | | - continue; |
1116 | | - } |
1117 | | - --$contextlines; |
1118 | | - $pre = $wgContLang->truncate( $m[1], -$contextchars, ' ... ' ); |
1119 | | - |
1120 | | - if ( count( $m ) < 3 ) { |
1121 | | - $post = ''; |
1122 | | - } else { |
1123 | | - $post = $wgContLang->truncate( $m[3], $contextchars, ' ... ' ); |
1124 | | - } |
1125 | | - |
1126 | | - $found = $m[2]; |
1127 | | - |
1128 | | - $line = htmlspecialchars( $pre . $found . $post ); |
1129 | | - $pat2 = '/(' . $terms . ")/i"; |
1130 | | - $line = preg_replace( $pat2, |
1131 | | - "<span class='searchmatch'>\\1</span>", $line ); |
1132 | | - |
1133 | | - $extract .= "${line}\n"; |
1134 | | - } |
1135 | | - wfProfileOut( "$fname-extract" ); |
1136 | | - |
1137 | | - return $extract; |
1138 | | - } |
1139 | | - |
1140 | | -} |
1141 | | - |
1142 | | -/** |
1143 | | - * @ingroup Search |
1144 | | - */ |
1145 | | -class SearchEngineDummy { |
1146 | | - function search( $term ) { |
1147 | | - return null; |
1148 | | - } |
1149 | | - function setLimitOffset($l, $o) {} |
1150 | | - function legalSearchChars() {} |
1151 | | - function update() {} |
1152 | | - function setnamespaces() {} |
1153 | | - function searchtitle() {} |
1154 | | - function searchtext() {} |
1155 | | -} |
Index: trunk/phase3/includes/SearchPostgres.php |
— | — | @@ -1,255 +0,0 @@ |
2 | | -<?php |
3 | | -# Copyright (C) 2006-2007 Greg Sabino Mullane <greg@turnstep.com> |
4 | | -# http://www.mediawiki.org/ |
5 | | -# |
6 | | -# This program is free software; you can redistribute it and/or modify |
7 | | -# it under the terms of the GNU General Public License as published by |
8 | | -# the Free Software Foundation; either version 2 of the License, or |
9 | | -# (at your option) any later version. |
10 | | -# |
11 | | -# This program is distributed in the hope that it will be useful, |
12 | | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | -# GNU General Public License for more details. |
15 | | -# |
16 | | -# You should have received a copy of the GNU General Public License along |
17 | | -# with this program; if not, write to the Free Software Foundation, Inc., |
18 | | -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
19 | | -# http://www.gnu.org/copyleft/gpl.html |
20 | | - |
21 | | -/** |
22 | | - * @file |
23 | | - * @ingroup Search |
24 | | - */ |
25 | | - |
26 | | -/** |
27 | | - * Search engine hook base class for Postgres |
28 | | - * @ingroup Search |
29 | | - */ |
30 | | -class SearchPostgres extends SearchEngine { |
31 | | - |
32 | | - function SearchPostgres( $db ) { |
33 | | - $this->db = $db; |
34 | | - } |
35 | | - |
36 | | - /** |
37 | | - * Perform a full text search query via tsearch2 and return a result set. |
38 | | - * Currently searches a page's current title (page.page_title) and |
39 | | - * latest revision article text (pagecontent.old_text) |
40 | | - * |
41 | | - * @param string $term - Raw search term |
42 | | - * @return PostgresSearchResultSet |
43 | | - * @access public |
44 | | - */ |
45 | | - function searchTitle( $term ) { |
46 | | - $q = $this->searchQuery( $term , 'titlevector', 'page_title' ); |
47 | | - $olderror = error_reporting(E_ERROR); |
48 | | - $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); |
49 | | - error_reporting($olderror); |
50 | | - if (!$resultSet) { |
51 | | - // Needed for "Query requires full scan, GIN doesn't support it" |
52 | | - return new SearchResultTooMany(); |
53 | | - } |
54 | | - return new PostgresSearchResultSet( $resultSet, $this->searchTerms ); |
55 | | - } |
56 | | - function searchText( $term ) { |
57 | | - $q = $this->searchQuery( $term, 'textvector', 'old_text' ); |
58 | | - $olderror = error_reporting(E_ERROR); |
59 | | - $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); |
60 | | - error_reporting($olderror); |
61 | | - if (!$resultSet) { |
62 | | - return new SearchResultTooMany(); |
63 | | - } |
64 | | - return new PostgresSearchResultSet( $resultSet, $this->searchTerms ); |
65 | | - } |
66 | | - |
67 | | - |
68 | | - /* |
69 | | - * Transform the user's search string into a better form for tsearch2 |
70 | | - */ |
71 | | - function parseQuery( $term ) { |
72 | | - |
73 | | - wfDebug( "parseQuery received: $term" ); |
74 | | - |
75 | | - ## No backslashes allowed |
76 | | - $term = preg_replace('/\\\/', '', $term); |
77 | | - |
78 | | - ## Collapse parens into nearby words: |
79 | | - $term = preg_replace('/\s*\(\s*/', ' (', $term); |
80 | | - $term = preg_replace('/\s*\)\s*/', ') ', $term); |
81 | | - |
82 | | - ## Treat colons as word separators: |
83 | | - $term = preg_replace('/:/', ' ', $term); |
84 | | - |
85 | | - $searchstring = ''; |
86 | | - $m = array(); |
87 | | - if( preg_match_all('/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) { |
88 | | - foreach( $m as $terms ) { |
89 | | - if (strlen($terms[1])) { |
90 | | - $searchstring .= ' & !'; |
91 | | - } |
92 | | - if (strtolower($terms[2]) === 'and') { |
93 | | - $searchstring .= ' & '; |
94 | | - } |
95 | | - else if (strtolower($terms[2]) === 'or' or $terms[2] === '|') { |
96 | | - $searchstring .= ' | '; |
97 | | - } |
98 | | - else if (strtolower($terms[2]) === 'not') { |
99 | | - $searchstring .= ' & !'; |
100 | | - } |
101 | | - else { |
102 | | - $searchstring .= " & $terms[2]"; |
103 | | - } |
104 | | - } |
105 | | - } |
106 | | - |
107 | | - ## Strip out leading junk |
108 | | - $searchstring = preg_replace('/^[\s\&\|]+/', '', $searchstring); |
109 | | - |
110 | | - ## Remove any doubled-up operators |
111 | | - $searchstring = preg_replace('/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring); |
112 | | - |
113 | | - ## Remove any non-spaced operators (e.g. "Zounds!") |
114 | | - $searchstring = preg_replace('/([^ ])[\!\&\|]/', "$1", $searchstring); |
115 | | - |
116 | | - ## Remove any trailing whitespace or operators |
117 | | - $searchstring = preg_replace('/[\s\!\&\|]+$/', '', $searchstring); |
118 | | - |
119 | | - ## Remove unnecessary quotes around everything |
120 | | - $searchstring = preg_replace('/^[\'"](.*)[\'"]$/', "$1", $searchstring); |
121 | | - |
122 | | - ## Quote the whole thing |
123 | | - $searchstring = $this->db->addQuotes($searchstring); |
124 | | - |
125 | | - wfDebug( "parseQuery returned: $searchstring" ); |
126 | | - |
127 | | - return $searchstring; |
128 | | - |
129 | | - } |
130 | | - |
131 | | - /** |
132 | | - * Construct the full SQL query to do the search. |
133 | | - * @param string $filteredTerm |
134 | | - * @param string $fulltext |
135 | | - * @private |
136 | | - */ |
137 | | - function searchQuery( $term, $fulltext, $colname ) { |
138 | | - global $wgDBversion; |
139 | | - |
140 | | - if ( !isset( $wgDBversion ) ) { |
141 | | - $this->db->getServerVersion(); |
142 | | - $wgDBversion = $this->db->numeric_version; |
143 | | - } |
144 | | - $prefix = $wgDBversion < 8.3 ? "'default'," : ''; |
145 | | - |
146 | | - $searchstring = $this->parseQuery( $term ); |
147 | | - |
148 | | - ## We need a separate query here so gin does not complain about empty searches |
149 | | - $SQL = "SELECT to_tsquery($prefix $searchstring)"; |
150 | | - $res = $this->db->doQuery($SQL); |
151 | | - if (!$res) { |
152 | | - ## TODO: Better output (example to catch: one 'two) |
153 | | - die ("Sorry, that was not a valid search string. Please go back and try again"); |
154 | | - } |
155 | | - $top = pg_fetch_result($res,0,0); |
156 | | - |
157 | | - if ($top === "") { ## e.g. if only stopwords are used XXX return something better |
158 | | - $query = "SELECT page_id, page_namespace, page_title, 0 AS score ". |
159 | | - "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . |
160 | | - "AND r.rev_text_id = c.old_id AND 1=0"; |
161 | | - } |
162 | | - else { |
163 | | - $m = array(); |
164 | | - if( preg_match_all("/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) { |
165 | | - foreach( $m as $terms ) { |
166 | | - $this->searchTerms[$terms[1]] = $terms[1]; |
167 | | - } |
168 | | - } |
169 | | - |
170 | | - $rankscore = $wgDBversion > 8.2 ? 5 : 1; |
171 | | - $rank = $wgDBversion < 8.3 ? 'rank' : 'ts_rank'; |
172 | | - $query = "SELECT page_id, page_namespace, page_title, ". |
173 | | - "$rank($fulltext, to_tsquery($prefix $searchstring), $rankscore) AS score ". |
174 | | - "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . |
175 | | - "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($prefix $searchstring)"; |
176 | | - } |
177 | | - |
178 | | - ## Redirects |
179 | | - if (! $this->showRedirects) |
180 | | - $query .= ' AND page_is_redirect = 0'; |
181 | | - |
182 | | - ## Namespaces - defaults to 0 |
183 | | - if( !is_null($this->namespaces) ){ // null -> search all |
184 | | - if ( count($this->namespaces) < 1) |
185 | | - $query .= ' AND page_namespace = 0'; |
186 | | - else { |
187 | | - $namespaces = implode( ',', $this->namespaces ); |
188 | | - $query .= " AND page_namespace IN ($namespaces)"; |
189 | | - } |
190 | | - } |
191 | | - |
192 | | - $query .= " ORDER BY score DESC, page_id DESC"; |
193 | | - |
194 | | - $query .= $this->db->limitResult( '', $this->limit, $this->offset ); |
195 | | - |
196 | | - wfDebug( "searchQuery returned: $query" ); |
197 | | - |
198 | | - return $query; |
199 | | - } |
200 | | - |
201 | | - ## Most of the work of these two functions are done automatically via triggers |
202 | | - |
203 | | - function update( $pageid, $title, $text ) { |
204 | | - ## We don't want to index older revisions |
205 | | - $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id = ". |
206 | | - "(SELECT rev_text_id FROM revision WHERE rev_page = $pageid ". |
207 | | - "ORDER BY rev_text_id DESC LIMIT 1 OFFSET 1)"; |
208 | | - $this->db->doQuery($SQL); |
209 | | - return true; |
210 | | - } |
211 | | - |
212 | | - function updateTitle( $id, $title ) { |
213 | | - return true; |
214 | | - } |
215 | | - |
216 | | -} ## end of the SearchPostgres class |
217 | | - |
218 | | -/** |
219 | | - * @ingroup Search |
220 | | - */ |
221 | | -class PostgresSearchResult extends SearchResult { |
222 | | - function PostgresSearchResult( $row ) { |
223 | | - $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
224 | | - $this->score = $row->score; |
225 | | - } |
226 | | - function getScore() { |
227 | | - return $this->score; |
228 | | - } |
229 | | -} |
230 | | - |
231 | | -/** |
232 | | - * @ingroup Search |
233 | | - */ |
234 | | -class PostgresSearchResultSet extends SearchResultSet { |
235 | | - function PostgresSearchResultSet( $resultSet, $terms ) { |
236 | | - $this->mResultSet = $resultSet; |
237 | | - $this->mTerms = $terms; |
238 | | - } |
239 | | - |
240 | | - function termMatches() { |
241 | | - return $this->mTerms; |
242 | | - } |
243 | | - |
244 | | - function numRows() { |
245 | | - return $this->mResultSet->numRows(); |
246 | | - } |
247 | | - |
248 | | - function next() { |
249 | | - $row = $this->mResultSet->fetchObject(); |
250 | | - if( $row === false ) { |
251 | | - return false; |
252 | | - } else { |
253 | | - return new PostgresSearchResult( $row ); |
254 | | - } |
255 | | - } |
256 | | -} |
Index: trunk/phase3/includes/SearchUpdate.php |
— | — | @@ -1,113 +0,0 @@ |
2 | | -<?php |
3 | | -/** |
4 | | - * See deferred.txt |
5 | | - * @ingroup Search |
6 | | - */ |
7 | | -class SearchUpdate { |
8 | | - |
9 | | - /* private */ var $mId = 0, $mNamespace, $mTitle, $mText; |
10 | | - /* private */ var $mTitleWords; |
11 | | - |
12 | | - function SearchUpdate( $id, $title, $text = false ) { |
13 | | - $nt = Title::newFromText( $title ); |
14 | | - if( $nt ) { |
15 | | - $this->mId = $id; |
16 | | - $this->mText = $text; |
17 | | - |
18 | | - $this->mNamespace = $nt->getNamespace(); |
19 | | - $this->mTitle = $nt->getText(); # Discard namespace |
20 | | - |
21 | | - $this->mTitleWords = $this->mTextWords = array(); |
22 | | - } else { |
23 | | - wfDebug( "SearchUpdate object created with invalid title '$title'\n" ); |
24 | | - } |
25 | | - } |
26 | | - |
27 | | - function doUpdate() { |
28 | | - global $wgContLang, $wgDisableSearchUpdate; |
29 | | - |
30 | | - if( $wgDisableSearchUpdate || !$this->mId ) { |
31 | | - return false; |
32 | | - } |
33 | | - $fname = 'SearchUpdate::doUpdate'; |
34 | | - wfProfileIn( $fname ); |
35 | | - |
36 | | - $search = SearchEngine::create(); |
37 | | - $lc = SearchEngine::legalSearchChars() . '&#;'; |
38 | | - |
39 | | - if( $this->mText === false ) { |
40 | | - $search->updateTitle($this->mId, |
41 | | - Title::indexTitle( $this->mNamespace, $this->mTitle )); |
42 | | - wfProfileOut( $fname ); |
43 | | - return; |
44 | | - } |
45 | | - |
46 | | - # Language-specific strip/conversion |
47 | | - $text = $wgContLang->stripForSearch( $this->mText ); |
48 | | - |
49 | | - wfProfileIn( $fname.'-regexps' ); |
50 | | - $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/", |
51 | | - ' ', strtolower( " " . $text /*$this->mText*/ . " " ) ); # Strip HTML markup |
52 | | - $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD", |
53 | | - "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings |
54 | | - |
55 | | - # Strip external URLs |
56 | | - $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF"; |
57 | | - $protos = "http|https|ftp|mailto|news|gopher"; |
58 | | - $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/"; |
59 | | - $text = preg_replace( $pat, "\\1 \\3", $text ); |
60 | | - |
61 | | - $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/"; |
62 | | - $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/"; |
63 | | - $text = preg_replace( $p1, "\\1 ", $text ); |
64 | | - $text = preg_replace( $p2, "\\1 \\3 ", $text ); |
65 | | - |
66 | | - # Internal image links |
67 | | - $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i"; |
68 | | - $text = preg_replace( $pat2, " \\1 \\3", $text ); |
69 | | - |
70 | | - $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/", |
71 | | - "\\1\\2 \\2\\3", $text ); # Handle [[game]]s |
72 | | - |
73 | | - # Strip all remaining non-search characters |
74 | | - $text = preg_replace( "/[^{$lc}]+/", " ", $text ); |
75 | | - |
76 | | - # Handle 's, s' |
77 | | - # |
78 | | - # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text ); |
79 | | - # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text ); |
80 | | - # |
81 | | - # These tail-anchored regexps are insanely slow. The worst case comes |
82 | | - # when Japanese or Chinese text (ie, no word spacing) is written on |
83 | | - # a wiki configured for Western UTF-8 mode. The Unicode characters are |
84 | | - # expanded to hex codes and the "words" are very long paragraph-length |
85 | | - # monstrosities. On a large page the above regexps may take over 20 |
86 | | - # seconds *each* on a 1GHz-level processor. |
87 | | - # |
88 | | - # Following are reversed versions which are consistently fast |
89 | | - # (about 3 milliseconds on 1GHz-level processor). |
90 | | - # |
91 | | - $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) ); |
92 | | - $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) ); |
93 | | - |
94 | | - # Strip wiki '' and ''' |
95 | | - $text = preg_replace( "/''[']*/", " ", $text ); |
96 | | - wfProfileOut( "$fname-regexps" ); |
97 | | - |
98 | | - wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) ); |
99 | | - |
100 | | - # Perform the actual update |
101 | | - $search->update($this->mId, Title::indexTitle( $this->mNamespace, $this->mTitle ), |
102 | | - $text); |
103 | | - |
104 | | - wfProfileOut( $fname ); |
105 | | - } |
106 | | -} |
107 | | - |
108 | | -/** |
109 | | - * Placeholder class |
110 | | - * @ingroup Search |
111 | | - */ |
112 | | -class SearchUpdateMyISAM extends SearchUpdate { |
113 | | - # Inherits everything |
114 | | -} |
Index: trunk/phase3/includes/SearchOracle.php |
— | — | @@ -1,240 +0,0 @@ |
2 | | -<?php |
3 | | -# Copyright (C) 2004 Brion Vibber <brion@pobox.com> |
4 | | -# http://www.mediawiki.org/ |
5 | | -# |
6 | | -# This program is free software; you can redistribute it and/or modify |
7 | | -# it under the terms of the GNU General Public License as published by |
8 | | -# the Free Software Foundation; either version 2 of the License, or |
9 | | -# (at your option) any later version. |
10 | | -# |
11 | | -# This program is distributed in the hope that it will be useful, |
12 | | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | -# GNU General Public License for more details. |
15 | | -# |
16 | | -# You should have received a copy of the GNU General Public License along |
17 | | -# with this program; if not, write to the Free Software Foundation, Inc., |
18 | | -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
19 | | -# http://www.gnu.org/copyleft/gpl.html |
20 | | - |
21 | | -/** |
22 | | - * @file |
23 | | - * @ingroup Search |
24 | | - */ |
25 | | - |
26 | | -/** |
27 | | - * Search engine hook base class for Oracle (ConText). |
28 | | - * @ingroup Search |
29 | | - */ |
30 | | -class SearchOracle extends SearchEngine { |
31 | | - function __construct($db) { |
32 | | - $this->db = $db; |
33 | | - } |
34 | | - |
35 | | - /** |
36 | | - * Perform a full text search query and return a result set. |
37 | | - * |
38 | | - * @param string $term - Raw search term |
39 | | - * @return OracleSearchResultSet |
40 | | - * @access public |
41 | | - */ |
42 | | - function searchText( $term ) { |
43 | | - $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true))); |
44 | | - return new OracleSearchResultSet($resultSet, $this->searchTerms); |
45 | | - } |
46 | | - |
47 | | - /** |
48 | | - * Perform a title-only search query and return a result set. |
49 | | - * |
50 | | - * @param string $term - Raw search term |
51 | | - * @return ORacleSearchResultSet |
52 | | - * @access public |
53 | | - */ |
54 | | - function searchTitle($term) { |
55 | | - $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false))); |
56 | | - return new MySQLSearchResultSet($resultSet, $this->searchTerms); |
57 | | - } |
58 | | - |
59 | | - |
60 | | - /** |
61 | | - * Return a partial WHERE clause to exclude redirects, if so set |
62 | | - * @return string |
63 | | - * @private |
64 | | - */ |
65 | | - function queryRedirect() { |
66 | | - if ($this->showRedirects) { |
67 | | - return ''; |
68 | | - } else { |
69 | | - return 'AND page_is_redirect=0'; |
70 | | - } |
71 | | - } |
72 | | - |
73 | | - /** |
74 | | - * Return a partial WHERE clause to limit the search to the given namespaces |
75 | | - * @return string |
76 | | - * @private |
77 | | - */ |
78 | | - function queryNamespaces() { |
79 | | - if( is_null($this->namespaces) ) |
80 | | - return ''; |
81 | | - $namespaces = implode(',', $this->namespaces); |
82 | | - if ($namespaces == '') { |
83 | | - $namespaces = '0'; |
84 | | - } |
85 | | - return 'AND page_namespace IN (' . $namespaces . ')'; |
86 | | - } |
87 | | - |
88 | | - /** |
89 | | - * Return a LIMIT clause to limit results on the query. |
90 | | - * @return string |
91 | | - * @private |
92 | | - */ |
93 | | - function queryLimit($sql) { |
94 | | - return $this->db->limitResult($sql, $this->limit, $this->offset); |
95 | | - } |
96 | | - |
97 | | - /** |
98 | | - * Does not do anything for generic search engine |
99 | | - * subclasses may define this though |
100 | | - * @return string |
101 | | - * @private |
102 | | - */ |
103 | | - function queryRanking($filteredTerm, $fulltext) { |
104 | | - return ' ORDER BY score(1)'; |
105 | | - } |
106 | | - |
107 | | - /** |
108 | | - * Construct the full SQL query to do the search. |
109 | | - * The guts shoulds be constructed in queryMain() |
110 | | - * @param string $filteredTerm |
111 | | - * @param bool $fulltext |
112 | | - * @private |
113 | | - */ |
114 | | - function getQuery( $filteredTerm, $fulltext ) { |
115 | | - return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' . |
116 | | - $this->queryRedirect() . ' ' . |
117 | | - $this->queryNamespaces() . ' ' . |
118 | | - $this->queryRanking( $filteredTerm, $fulltext ) . ' '); |
119 | | - } |
120 | | - |
121 | | - |
122 | | - /** |
123 | | - * Picks which field to index on, depending on what type of query. |
124 | | - * @param bool $fulltext |
125 | | - * @return string |
126 | | - */ |
127 | | - function getIndexField($fulltext) { |
128 | | - return $fulltext ? 'si_text' : 'si_title'; |
129 | | - } |
130 | | - |
131 | | - /** |
132 | | - * Get the base part of the search query. |
133 | | - * |
134 | | - * @param string $filteredTerm |
135 | | - * @param bool $fulltext |
136 | | - * @return string |
137 | | - * @private |
138 | | - */ |
139 | | - function queryMain( $filteredTerm, $fulltext ) { |
140 | | - $match = $this->parseQuery($filteredTerm, $fulltext); |
141 | | - $page = $this->db->tableName('page'); |
142 | | - $searchindex = $this->db->tableName('searchindex'); |
143 | | - return 'SELECT page_id, page_namespace, page_title ' . |
144 | | - "FROM $page,$searchindex " . |
145 | | - 'WHERE page_id=si_page AND ' . $match; |
146 | | - } |
147 | | - |
148 | | - /** @todo document */ |
149 | | - function parseQuery($filteredText, $fulltext) { |
150 | | - global $wgContLang; |
151 | | - $lc = SearchEngine::legalSearchChars(); |
152 | | - $this->searchTerms = array(); |
153 | | - |
154 | | - # FIXME: This doesn't handle parenthetical expressions. |
155 | | - $m = array(); |
156 | | - $q = array(); |
157 | | - |
158 | | - if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', |
159 | | - $filteredText, $m, PREG_SET_ORDER)) { |
160 | | - foreach($m as $terms) { |
161 | | - $q[] = $terms[1] . $wgContLang->stripForSearch($terms[2]); |
162 | | - |
163 | | - if (!empty($terms[3])) { |
164 | | - $regexp = preg_quote( $terms[3], '/' ); |
165 | | - if ($terms[4]) |
166 | | - $regexp .= "[0-9A-Za-z_]+"; |
167 | | - } else { |
168 | | - $regexp = preg_quote(str_replace('"', '', $terms[2]), '/'); |
169 | | - } |
170 | | - $this->searchTerms[] = $regexp; |
171 | | - } |
172 | | - } |
173 | | - |
174 | | - $searchon = $this->db->strencode(join(',', $q)); |
175 | | - $field = $this->getIndexField($fulltext); |
176 | | - return " CONTAINS($field, '$searchon', 1) > 0 "; |
177 | | - } |
178 | | - |
179 | | - /** |
180 | | - * Create or update the search index record for the given page. |
181 | | - * Title and text should be pre-processed. |
182 | | - * |
183 | | - * @param int $id |
184 | | - * @param string $title |
185 | | - * @param string $text |
186 | | - */ |
187 | | - function update($id, $title, $text) { |
188 | | - $dbw = wfGetDB(DB_MASTER); |
189 | | - $dbw->replace('searchindex', |
190 | | - array('si_page'), |
191 | | - array( |
192 | | - 'si_page' => $id, |
193 | | - 'si_title' => $title, |
194 | | - 'si_text' => $text |
195 | | - ), 'SearchOracle::update' ); |
196 | | - $dbw->query("CALL ctx_ddl.sync_index('si_text_idx')"); |
197 | | - $dbw->query("CALL ctx_ddl.sync_index('si_title_idx')"); |
198 | | - } |
199 | | - |
200 | | - /** |
201 | | - * Update a search index record's title only. |
202 | | - * Title should be pre-processed. |
203 | | - * |
204 | | - * @param int $id |
205 | | - * @param string $title |
206 | | - */ |
207 | | - function updateTitle($id, $title) { |
208 | | - $dbw = wfGetDB(DB_MASTER); |
209 | | - |
210 | | - $dbw->update('searchindex', |
211 | | - array('si_title' => $title), |
212 | | - array('si_page' => $id), |
213 | | - 'SearchOracle::updateTitle', |
214 | | - array()); |
215 | | - } |
216 | | -} |
217 | | - |
218 | | -/** |
219 | | - * @ingroup Search |
220 | | - */ |
221 | | -class OracleSearchResultSet extends SearchResultSet { |
222 | | - function __construct($resultSet, $terms) { |
223 | | - $this->mResultSet = $resultSet; |
224 | | - $this->mTerms = $terms; |
225 | | - } |
226 | | - |
227 | | - function termMatches() { |
228 | | - return $this->mTerms; |
229 | | - } |
230 | | - |
231 | | - function numRows() { |
232 | | - return $this->mResultSet->numRows(); |
233 | | - } |
234 | | - |
235 | | - function next() { |
236 | | - $row = $this->mResultSet->fetchObject(); |
237 | | - if ($row === false) |
238 | | - return false; |
239 | | - return new SearchResult($row); |
240 | | - } |
241 | | -} |
Index: trunk/phase3/includes/SearchTsearch2.php |
— | — | @@ -1,120 +0,0 @@ |
2 | | -<?php |
3 | | -# Copyright (C) 2004 Brion Vibber <brion@pobox.com>, Domas Mituzas <domas.mituzas@gmail.com> |
4 | | -# http://www.mediawiki.org/ |
5 | | -# |
6 | | -# This program is free software; you can redistribute it and/or modify |
7 | | -# it under the terms of the GNU General Public License as published by |
8 | | -# the Free Software Foundation; either version 2 of the License, or |
9 | | -# (at your option) any later version. |
10 | | -# |
11 | | -# This program is distributed in the hope that it will be useful, |
12 | | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | -# GNU General Public License for more details. |
15 | | -# |
16 | | -# You should have received a copy of the GNU General Public License along |
17 | | -# with this program; if not, write to the Free Software Foundation, Inc., |
18 | | -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
19 | | -# http://www.gnu.org/copyleft/gpl.html |
20 | | - |
21 | | -/** |
22 | | - * Search engine hook for PostgreSQL / Tsearch2 |
23 | | - * @file |
24 | | - * @ingroup Search |
25 | | - */ |
26 | | - |
27 | | -/** |
28 | | - * @todo document |
29 | | - * @ingroup Search |
30 | | - */ |
31 | | -class SearchTsearch2 extends SearchEngine { |
32 | | - var $strictMatching = false; |
33 | | - |
34 | | - function __construct( $db ) { |
35 | | - $this->db = $db; |
36 | | - $this->mRanking = true; |
37 | | - } |
38 | | - |
39 | | - function getIndexField( $fulltext ) { |
40 | | - return $fulltext ? 'si_text' : 'si_title'; |
41 | | - } |
42 | | - |
43 | | - function parseQuery( $filteredText, $fulltext ) { |
44 | | - global $wgContLang; |
45 | | - $lc = SearchEngine::legalSearchChars(); |
46 | | - $searchon = ''; |
47 | | - $this->searchTerms = array(); |
48 | | - |
49 | | - # FIXME: This doesn't handle parenthetical expressions. |
50 | | - $m = array(); |
51 | | - if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', |
52 | | - $filteredText, $m, PREG_SET_ORDER ) ) { |
53 | | - foreach( $m as $terms ) { |
54 | | - if( $searchon !== '' ) $searchon .= ' '; |
55 | | - if( $this->strictMatching && ($terms[1] == '') ) { |
56 | | - $terms[1] = '+'; |
57 | | - } |
58 | | - $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); |
59 | | - if( !empty( $terms[3] ) ) { |
60 | | - $regexp = preg_quote( $terms[3], '/' ); |
61 | | - if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; |
62 | | - } else { |
63 | | - $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); |
64 | | - } |
65 | | - $this->searchTerms[] = $regexp; |
66 | | - } |
67 | | - wfDebug( "Would search with '$searchon'\n" ); |
68 | | - wfDebug( 'Match with /\b' . implode( '\b|\b', $this->searchTerms ) . "\b/\n" ); |
69 | | - } else { |
70 | | - wfDebug( "Can't understand search query '{$this->filteredText}'\n" ); |
71 | | - } |
72 | | - |
73 | | - $searchon = preg_replace( '/(\s+)/', '&', $searchon ); |
74 | | - $searchon = $this->db->strencode( $searchon ); |
75 | | - return $searchon; |
76 | | - } |
77 | | - |
78 | | - function queryRanking( $filteredTerm, $fulltext ) { |
79 | | - $field = $this->getIndexField( $fulltext ); |
80 | | - $searchon = $this->parseQuery( $filteredTerm, $fulltext ); |
81 | | - if ($this->mRanking) |
82 | | - return " ORDER BY rank($field,to_tsquery('$searchon')) DESC"; |
83 | | - else |
84 | | - return ""; |
85 | | - } |
86 | | - |
87 | | - |
88 | | - function queryMain( $filteredTerm, $fulltext ) { |
89 | | - $match = $this->parseQuery( $filteredTerm, $fulltext ); |
90 | | - $field = $this->getIndexField( $fulltext ); |
91 | | - $cur = $this->db->tableName( 'cur' ); |
92 | | - $searchindex = $this->db->tableName( 'searchindex' ); |
93 | | - return 'SELECT cur_id, cur_namespace, cur_title, cur_text ' . |
94 | | - "FROM $cur,$searchindex " . |
95 | | - 'WHERE cur_id=si_page AND ' . |
96 | | - " $field @@ to_tsquery ('$match') " ; |
97 | | - } |
98 | | - |
99 | | - function update( $id, $title, $text ) { |
100 | | - $dbw = wfGetDB( DB_MASTER ); |
101 | | - $searchindex = $dbw->tableName( 'searchindex' ); |
102 | | - $sql = "DELETE FROM $searchindex WHERE si_page={$id}"; |
103 | | - $dbw->query( $sql, __METHOD__ ); |
104 | | - $sql = "INSERT INTO $searchindex (si_page,si_title,si_text) ". |
105 | | - " VALUES ( $id, to_tsvector('". |
106 | | - $dbw->strencode($title). |
107 | | - "'),to_tsvector('". |
108 | | - $dbw->strencode( $text)."')) "; |
109 | | - $dbw->query($sql, __METHOD__ ); |
110 | | - } |
111 | | - |
112 | | - function updateTitle($id,$title) { |
113 | | - $dbw = wfGetDB(DB_MASTER); |
114 | | - $searchindex = $dbw->tableName( 'searchindex' ); |
115 | | - $sql = "UPDATE $searchindex SET si_title=to_tsvector('" . |
116 | | - $dbw->strencode( $title ) . |
117 | | - "') WHERE si_page={$id}"; |
118 | | - |
119 | | - $dbw->query( $sql, __METHOD__ ); |
120 | | - } |
121 | | -} |
Index: trunk/phase3/includes/SearchMySQL4.php |
— | — | @@ -1,34 +0,0 @@ |
2 | | -<?php |
3 | | -# Copyright (C) 2004 Brion Vibber <brion@pobox.com> |
4 | | -# http://www.mediawiki.org/ |
5 | | -# |
6 | | -# This program is free software; you can redistribute it and/or modify |
7 | | -# it under the terms of the GNU General Public License as published by |
8 | | -# the Free Software Foundation; either version 2 of the License, or |
9 | | -# (at your option) any later version. |
10 | | -# |
11 | | -# This program is distributed in the hope that it will be useful, |
12 | | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | -# GNU General Public License for more details. |
15 | | -# |
16 | | -# You should have received a copy of the GNU General Public License along |
17 | | -# with this program; if not, write to the Free Software Foundation, Inc., |
18 | | -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
19 | | -# http://www.gnu.org/copyleft/gpl.html |
20 | | - |
21 | | -/** |
22 | | - * @file |
23 | | - * @ingroup Search |
24 | | - */ |
25 | | - |
26 | | -/** |
27 | | - * Search engine hook for MySQL 4+ |
28 | | - * This class retained for backwards compatibility... |
29 | | - * The meat's been moved to SearchMySQL, since the 3.x variety is gone. |
30 | | - * @ingroup Search |
31 | | - * @deprecated |
32 | | - */ |
33 | | -class SearchMySQL4 extends SearchMySQL { |
34 | | - /* whee */ |
35 | | -} |
Index: trunk/phase3/includes/SearchMySQL.php |
— | — | @@ -1,262 +0,0 @@ |
2 | | -<?php |
3 | | -# Copyright (C) 2004 Brion Vibber <brion@pobox.com> |
4 | | -# http://www.mediawiki.org/ |
5 | | -# |
6 | | -# This program is free software; you can redistribute it and/or modify |
7 | | -# it under the terms of the GNU General Public License as published by |
8 | | -# the Free Software Foundation; either version 2 of the License, or |
9 | | -# (at your option) any later version. |
10 | | -# |
11 | | -# This program is distributed in the hope that it will be useful, |
12 | | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | -# GNU General Public License for more details. |
15 | | -# |
16 | | -# You should have received a copy of the GNU General Public License along |
17 | | -# with this program; if not, write to the Free Software Foundation, Inc., |
18 | | -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
19 | | -# http://www.gnu.org/copyleft/gpl.html |
20 | | - |
21 | | -/** |
22 | | - * @file |
23 | | - * @ingroup Search |
24 | | - */ |
25 | | - |
26 | | -/** |
27 | | - * Search engine hook for MySQL 4+ |
28 | | - * @ingroup Search |
29 | | - */ |
30 | | -class SearchMySQL extends SearchEngine { |
31 | | - var $strictMatching = true; |
32 | | - |
33 | | - /** @todo document */ |
34 | | - function __construct( $db ) { |
35 | | - $this->db = $db; |
36 | | - } |
37 | | - |
38 | | - /** @todo document */ |
39 | | - function parseQuery( $filteredText, $fulltext ) { |
40 | | - global $wgContLang; |
41 | | - $lc = SearchEngine::legalSearchChars(); // Minus format chars |
42 | | - $searchon = ''; |
43 | | - $this->searchTerms = array(); |
44 | | - |
45 | | - # FIXME: This doesn't handle parenthetical expressions. |
46 | | - $m = array(); |
47 | | - if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', |
48 | | - $filteredText, $m, PREG_SET_ORDER ) ) { |
49 | | - foreach( $m as $terms ) { |
50 | | - if( $searchon !== '' ) $searchon .= ' '; |
51 | | - if( $this->strictMatching && ($terms[1] == '') ) { |
52 | | - $terms[1] = '+'; |
53 | | - } |
54 | | - $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); |
55 | | - if( !empty( $terms[3] ) ) { |
56 | | - // Match individual terms in result highlighting... |
57 | | - $regexp = preg_quote( $terms[3], '/' ); |
58 | | - if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; |
59 | | - } else { |
60 | | - // Match the quoted term in result highlighting... |
61 | | - $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); |
62 | | - } |
63 | | - $this->searchTerms[] = $regexp; |
64 | | - } |
65 | | - wfDebug( "Would search with '$searchon'\n" ); |
66 | | - wfDebug( 'Match with /' . implode( '|', $this->searchTerms ) . "/\n" ); |
67 | | - } else { |
68 | | - wfDebug( "Can't understand search query '{$filteredText}'\n" ); |
69 | | - } |
70 | | - |
71 | | - $searchon = $this->db->strencode( $searchon ); |
72 | | - $field = $this->getIndexField( $fulltext ); |
73 | | - return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) "; |
74 | | - } |
75 | | - |
76 | | - public static function legalSearchChars() { |
77 | | - return "\"*" . parent::legalSearchChars(); |
78 | | - } |
79 | | - |
80 | | - /** |
81 | | - * Perform a full text search query and return a result set. |
82 | | - * |
83 | | - * @param string $term - Raw search term |
84 | | - * @return MySQLSearchResultSet |
85 | | - * @access public |
86 | | - */ |
87 | | - function searchText( $term ) { |
88 | | - $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) ); |
89 | | - return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); |
90 | | - } |
91 | | - |
92 | | - /** |
93 | | - * Perform a title-only search query and return a result set. |
94 | | - * |
95 | | - * @param string $term - Raw search term |
96 | | - * @return MySQLSearchResultSet |
97 | | - * @access public |
98 | | - */ |
99 | | - function searchTitle( $term ) { |
100 | | - $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) ); |
101 | | - return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); |
102 | | - } |
103 | | - |
104 | | - |
105 | | - /** |
106 | | - * Return a partial WHERE clause to exclude redirects, if so set |
107 | | - * @return string |
108 | | - * @private |
109 | | - */ |
110 | | - function queryRedirect() { |
111 | | - if( $this->showRedirects ) { |
112 | | - return ''; |
113 | | - } else { |
114 | | - return 'AND page_is_redirect=0'; |
115 | | - } |
116 | | - } |
117 | | - |
118 | | - /** |
119 | | - * Return a partial WHERE clause to limit the search to the given namespaces |
120 | | - * @return string |
121 | | - * @private |
122 | | - */ |
123 | | - function queryNamespaces() { |
124 | | - if( is_null($this->namespaces) ) |
125 | | - return ''; # search all |
126 | | - $namespaces = implode( ',', $this->namespaces ); |
127 | | - if ($namespaces == '') { |
128 | | - $namespaces = '0'; |
129 | | - } |
130 | | - return 'AND page_namespace IN (' . $namespaces . ')'; |
131 | | - } |
132 | | - |
133 | | - /** |
134 | | - * Return a LIMIT clause to limit results on the query. |
135 | | - * @return string |
136 | | - * @private |
137 | | - */ |
138 | | - function queryLimit() { |
139 | | - return $this->db->limitResult( '', $this->limit, $this->offset ); |
140 | | - } |
141 | | - |
142 | | - /** |
143 | | - * Does not do anything for generic search engine |
144 | | - * subclasses may define this though |
145 | | - * @return string |
146 | | - * @private |
147 | | - */ |
148 | | - function queryRanking( $filteredTerm, $fulltext ) { |
149 | | - return ''; |
150 | | - } |
151 | | - |
152 | | - /** |
153 | | - * Construct the full SQL query to do the search. |
154 | | - * The guts shoulds be constructed in queryMain() |
155 | | - * @param string $filteredTerm |
156 | | - * @param bool $fulltext |
157 | | - * @private |
158 | | - */ |
159 | | - function getQuery( $filteredTerm, $fulltext ) { |
160 | | - return $this->queryMain( $filteredTerm, $fulltext ) . ' ' . |
161 | | - $this->queryRedirect() . ' ' . |
162 | | - $this->queryNamespaces() . ' ' . |
163 | | - $this->queryRanking( $filteredTerm, $fulltext ) . ' ' . |
164 | | - $this->queryLimit(); |
165 | | - } |
166 | | - |
167 | | - |
168 | | - /** |
169 | | - * Picks which field to index on, depending on what type of query. |
170 | | - * @param bool $fulltext |
171 | | - * @return string |
172 | | - */ |
173 | | - function getIndexField( $fulltext ) { |
174 | | - return $fulltext ? 'si_text' : 'si_title'; |
175 | | - } |
176 | | - |
177 | | - /** |
178 | | - * Get the base part of the search query. |
179 | | - * The actual match syntax will depend on the server |
180 | | - * version; MySQL 3 and MySQL 4 have different capabilities |
181 | | - * in their fulltext search indexes. |
182 | | - * |
183 | | - * @param string $filteredTerm |
184 | | - * @param bool $fulltext |
185 | | - * @return string |
186 | | - * @private |
187 | | - */ |
188 | | - function queryMain( $filteredTerm, $fulltext ) { |
189 | | - $match = $this->parseQuery( $filteredTerm, $fulltext ); |
190 | | - $page = $this->db->tableName( 'page' ); |
191 | | - $searchindex = $this->db->tableName( 'searchindex' ); |
192 | | - return 'SELECT page_id, page_namespace, page_title ' . |
193 | | - "FROM $page,$searchindex " . |
194 | | - 'WHERE page_id=si_page AND ' . $match; |
195 | | - } |
196 | | - |
197 | | - /** |
198 | | - * Create or update the search index record for the given page. |
199 | | - * Title and text should be pre-processed. |
200 | | - * |
201 | | - * @param int $id |
202 | | - * @param string $title |
203 | | - * @param string $text |
204 | | - */ |
205 | | - function update( $id, $title, $text ) { |
206 | | - $dbw = wfGetDB( DB_MASTER ); |
207 | | - $dbw->replace( 'searchindex', |
208 | | - array( 'si_page' ), |
209 | | - array( |
210 | | - 'si_page' => $id, |
211 | | - 'si_title' => $title, |
212 | | - 'si_text' => $text |
213 | | - ), __METHOD__ ); |
214 | | - } |
215 | | - |
216 | | - /** |
217 | | - * Update a search index record's title only. |
218 | | - * Title should be pre-processed. |
219 | | - * |
220 | | - * @param int $id |
221 | | - * @param string $title |
222 | | - */ |
223 | | - function updateTitle( $id, $title ) { |
224 | | - $dbw = wfGetDB( DB_MASTER ); |
225 | | - |
226 | | - $dbw->update( 'searchindex', |
227 | | - array( 'si_title' => $title ), |
228 | | - array( 'si_page' => $id ), |
229 | | - __METHOD__, |
230 | | - array( $dbw->lowPriorityOption() ) ); |
231 | | - } |
232 | | -} |
233 | | - |
234 | | -/** |
235 | | - * @ingroup Search |
236 | | - */ |
237 | | -class MySQLSearchResultSet extends SearchResultSet { |
238 | | - function MySQLSearchResultSet( $resultSet, $terms ) { |
239 | | - $this->mResultSet = $resultSet; |
240 | | - $this->mTerms = $terms; |
241 | | - } |
242 | | - |
243 | | - function termMatches() { |
244 | | - return $this->mTerms; |
245 | | - } |
246 | | - |
247 | | - function numRows() { |
248 | | - return $this->mResultSet->numRows(); |
249 | | - } |
250 | | - |
251 | | - function next() { |
252 | | - $row = $this->mResultSet->fetchObject(); |
253 | | - if( $row === false ) { |
254 | | - return false; |
255 | | - } else { |
256 | | - return new SearchResult( $row ); |
257 | | - } |
258 | | - } |
259 | | - |
260 | | - function free() { |
261 | | - $this->mResultSet->free(); |
262 | | - } |
263 | | -} |
Index: trunk/phase3/includes/search/MySQL4.php |
— | — | @@ -0,0 +1,34 @@ |
| 2 | +<?php |
| 3 | +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> |
| 4 | +# http://www.mediawiki.org/ |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | +# http://www.gnu.org/copyleft/gpl.html |
| 20 | + |
| 21 | +/** |
| 22 | + * @file |
| 23 | + * @ingroup Search |
| 24 | + */ |
| 25 | + |
| 26 | +/** |
| 27 | + * Search engine hook for MySQL 4+ |
| 28 | + * This class retained for backwards compatibility... |
| 29 | + * The meat's been moved to SearchMySQL, since the 3.x variety is gone. |
| 30 | + * @ingroup Search |
| 31 | + * @deprecated |
| 32 | + */ |
| 33 | +class SearchMySQL4 extends SearchMySQL { |
| 34 | + /* whee */ |
| 35 | +} |
Property changes on: trunk/phase3/includes/search/MySQL4.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 36 | + native |
Index: trunk/phase3/includes/search/Tsearch2.php |
— | — | @@ -0,0 +1,120 @@ |
| 2 | +<?php |
| 3 | +# Copyright (C) 2004 Brion Vibber <brion@pobox.com>, Domas Mituzas <domas.mituzas@gmail.com> |
| 4 | +# http://www.mediawiki.org/ |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | +# http://www.gnu.org/copyleft/gpl.html |
| 20 | + |
| 21 | +/** |
| 22 | + * Search engine hook for PostgreSQL / Tsearch2 |
| 23 | + * @file |
| 24 | + * @ingroup Search |
| 25 | + */ |
| 26 | + |
| 27 | +/** |
| 28 | + * @todo document |
| 29 | + * @ingroup Search |
| 30 | + */ |
| 31 | +class SearchTsearch2 extends SearchEngine { |
| 32 | + var $strictMatching = false; |
| 33 | + |
| 34 | + function __construct( $db ) { |
| 35 | + $this->db = $db; |
| 36 | + $this->mRanking = true; |
| 37 | + } |
| 38 | + |
| 39 | + function getIndexField( $fulltext ) { |
| 40 | + return $fulltext ? 'si_text' : 'si_title'; |
| 41 | + } |
| 42 | + |
| 43 | + function parseQuery( $filteredText, $fulltext ) { |
| 44 | + global $wgContLang; |
| 45 | + $lc = SearchEngine::legalSearchChars(); |
| 46 | + $searchon = ''; |
| 47 | + $this->searchTerms = array(); |
| 48 | + |
| 49 | + # FIXME: This doesn't handle parenthetical expressions. |
| 50 | + $m = array(); |
| 51 | + if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', |
| 52 | + $filteredText, $m, PREG_SET_ORDER ) ) { |
| 53 | + foreach( $m as $terms ) { |
| 54 | + if( $searchon !== '' ) $searchon .= ' '; |
| 55 | + if( $this->strictMatching && ($terms[1] == '') ) { |
| 56 | + $terms[1] = '+'; |
| 57 | + } |
| 58 | + $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); |
| 59 | + if( !empty( $terms[3] ) ) { |
| 60 | + $regexp = preg_quote( $terms[3], '/' ); |
| 61 | + if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; |
| 62 | + } else { |
| 63 | + $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); |
| 64 | + } |
| 65 | + $this->searchTerms[] = $regexp; |
| 66 | + } |
| 67 | + wfDebug( "Would search with '$searchon'\n" ); |
| 68 | + wfDebug( 'Match with /\b' . implode( '\b|\b', $this->searchTerms ) . "\b/\n" ); |
| 69 | + } else { |
| 70 | + wfDebug( "Can't understand search query '{$this->filteredText}'\n" ); |
| 71 | + } |
| 72 | + |
| 73 | + $searchon = preg_replace( '/(\s+)/', '&', $searchon ); |
| 74 | + $searchon = $this->db->strencode( $searchon ); |
| 75 | + return $searchon; |
| 76 | + } |
| 77 | + |
| 78 | + function queryRanking( $filteredTerm, $fulltext ) { |
| 79 | + $field = $this->getIndexField( $fulltext ); |
| 80 | + $searchon = $this->parseQuery( $filteredTerm, $fulltext ); |
| 81 | + if ($this->mRanking) |
| 82 | + return " ORDER BY rank($field,to_tsquery('$searchon')) DESC"; |
| 83 | + else |
| 84 | + return ""; |
| 85 | + } |
| 86 | + |
| 87 | + |
| 88 | + function queryMain( $filteredTerm, $fulltext ) { |
| 89 | + $match = $this->parseQuery( $filteredTerm, $fulltext ); |
| 90 | + $field = $this->getIndexField( $fulltext ); |
| 91 | + $cur = $this->db->tableName( 'cur' ); |
| 92 | + $searchindex = $this->db->tableName( 'searchindex' ); |
| 93 | + return 'SELECT cur_id, cur_namespace, cur_title, cur_text ' . |
| 94 | + "FROM $cur,$searchindex " . |
| 95 | + 'WHERE cur_id=si_page AND ' . |
| 96 | + " $field @@ to_tsquery ('$match') " ; |
| 97 | + } |
| 98 | + |
| 99 | + function update( $id, $title, $text ) { |
| 100 | + $dbw = wfGetDB( DB_MASTER ); |
| 101 | + $searchindex = $dbw->tableName( 'searchindex' ); |
| 102 | + $sql = "DELETE FROM $searchindex WHERE si_page={$id}"; |
| 103 | + $dbw->query( $sql, __METHOD__ ); |
| 104 | + $sql = "INSERT INTO $searchindex (si_page,si_title,si_text) ". |
| 105 | + " VALUES ( $id, to_tsvector('". |
| 106 | + $dbw->strencode($title). |
| 107 | + "'),to_tsvector('". |
| 108 | + $dbw->strencode( $text)."')) "; |
| 109 | + $dbw->query($sql, __METHOD__ ); |
| 110 | + } |
| 111 | + |
| 112 | + function updateTitle($id,$title) { |
| 113 | + $dbw = wfGetDB(DB_MASTER); |
| 114 | + $searchindex = $dbw->tableName( 'searchindex' ); |
| 115 | + $sql = "UPDATE $searchindex SET si_title=to_tsvector('" . |
| 116 | + $dbw->strencode( $title ) . |
| 117 | + "') WHERE si_page={$id}"; |
| 118 | + |
| 119 | + $dbw->query( $sql, __METHOD__ ); |
| 120 | + } |
| 121 | +} |
Property changes on: trunk/phase3/includes/search/Tsearch2.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 122 | + native |
Index: trunk/phase3/includes/search/Update.php |
— | — | @@ -0,0 +1,113 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * See deferred.txt |
| 5 | + * @ingroup Search |
| 6 | + */ |
| 7 | +class SearchUpdate { |
| 8 | + |
| 9 | + /* private */ var $mId = 0, $mNamespace, $mTitle, $mText; |
| 10 | + /* private */ var $mTitleWords; |
| 11 | + |
| 12 | + function SearchUpdate( $id, $title, $text = false ) { |
| 13 | + $nt = Title::newFromText( $title ); |
| 14 | + if( $nt ) { |
| 15 | + $this->mId = $id; |
| 16 | + $this->mText = $text; |
| 17 | + |
| 18 | + $this->mNamespace = $nt->getNamespace(); |
| 19 | + $this->mTitle = $nt->getText(); # Discard namespace |
| 20 | + |
| 21 | + $this->mTitleWords = $this->mTextWords = array(); |
| 22 | + } else { |
| 23 | + wfDebug( "SearchUpdate object created with invalid title '$title'\n" ); |
| 24 | + } |
| 25 | + } |
| 26 | + |
| 27 | + function doUpdate() { |
| 28 | + global $wgContLang, $wgDisableSearchUpdate; |
| 29 | + |
| 30 | + if( $wgDisableSearchUpdate || !$this->mId ) { |
| 31 | + return false; |
| 32 | + } |
| 33 | + $fname = 'SearchUpdate::doUpdate'; |
| 34 | + wfProfileIn( $fname ); |
| 35 | + |
| 36 | + $search = SearchEngine::create(); |
| 37 | + $lc = SearchEngine::legalSearchChars() . '&#;'; |
| 38 | + |
| 39 | + if( $this->mText === false ) { |
| 40 | + $search->updateTitle($this->mId, |
| 41 | + Title::indexTitle( $this->mNamespace, $this->mTitle )); |
| 42 | + wfProfileOut( $fname ); |
| 43 | + return; |
| 44 | + } |
| 45 | + |
| 46 | + # Language-specific strip/conversion |
| 47 | + $text = $wgContLang->stripForSearch( $this->mText ); |
| 48 | + |
| 49 | + wfProfileIn( $fname.'-regexps' ); |
| 50 | + $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/", |
| 51 | + ' ', strtolower( " " . $text /*$this->mText*/ . " " ) ); # Strip HTML markup |
| 52 | + $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD", |
| 53 | + "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings |
| 54 | + |
| 55 | + # Strip external URLs |
| 56 | + $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF"; |
| 57 | + $protos = "http|https|ftp|mailto|news|gopher"; |
| 58 | + $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/"; |
| 59 | + $text = preg_replace( $pat, "\\1 \\3", $text ); |
| 60 | + |
| 61 | + $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/"; |
| 62 | + $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/"; |
| 63 | + $text = preg_replace( $p1, "\\1 ", $text ); |
| 64 | + $text = preg_replace( $p2, "\\1 \\3 ", $text ); |
| 65 | + |
| 66 | + # Internal image links |
| 67 | + $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i"; |
| 68 | + $text = preg_replace( $pat2, " \\1 \\3", $text ); |
| 69 | + |
| 70 | + $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/", |
| 71 | + "\\1\\2 \\2\\3", $text ); # Handle [[game]]s |
| 72 | + |
| 73 | + # Strip all remaining non-search characters |
| 74 | + $text = preg_replace( "/[^{$lc}]+/", " ", $text ); |
| 75 | + |
| 76 | + # Handle 's, s' |
| 77 | + # |
| 78 | + # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text ); |
| 79 | + # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text ); |
| 80 | + # |
| 81 | + # These tail-anchored regexps are insanely slow. The worst case comes |
| 82 | + # when Japanese or Chinese text (ie, no word spacing) is written on |
| 83 | + # a wiki configured for Western UTF-8 mode. The Unicode characters are |
| 84 | + # expanded to hex codes and the "words" are very long paragraph-length |
| 85 | + # monstrosities. On a large page the above regexps may take over 20 |
| 86 | + # seconds *each* on a 1GHz-level processor. |
| 87 | + # |
| 88 | + # Following are reversed versions which are consistently fast |
| 89 | + # (about 3 milliseconds on 1GHz-level processor). |
| 90 | + # |
| 91 | + $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) ); |
| 92 | + $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) ); |
| 93 | + |
| 94 | + # Strip wiki '' and ''' |
| 95 | + $text = preg_replace( "/''[']*/", " ", $text ); |
| 96 | + wfProfileOut( "$fname-regexps" ); |
| 97 | + |
| 98 | + wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) ); |
| 99 | + |
| 100 | + # Perform the actual update |
| 101 | + $search->update($this->mId, Title::indexTitle( $this->mNamespace, $this->mTitle ), |
| 102 | + $text); |
| 103 | + |
| 104 | + wfProfileOut( $fname ); |
| 105 | + } |
| 106 | +} |
| 107 | + |
| 108 | +/** |
| 109 | + * Placeholder class |
| 110 | + * @ingroup Search |
| 111 | + */ |
| 112 | +class SearchUpdateMyISAM extends SearchUpdate { |
| 113 | + # Inherits everything |
| 114 | +} |
Property changes on: trunk/phase3/includes/search/Update.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 115 | + native |
Index: trunk/phase3/includes/search/Engine.php |
— | — | @@ -0,0 +1,1154 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * @defgroup Search Search |
| 5 | + * |
| 6 | + * @file |
| 7 | + * @ingroup Search |
| 8 | + */ |
| 9 | + |
| 10 | +/** |
| 11 | + * Contain a class for special pages |
| 12 | + * @ingroup Search |
| 13 | + */ |
| 14 | +class SearchEngine { |
| 15 | + var $limit = 10; |
| 16 | + var $offset = 0; |
| 17 | + var $searchTerms = array(); |
| 18 | + var $namespaces = array( NS_MAIN ); |
| 19 | + var $showRedirects = false; |
| 20 | + |
| 21 | + /** |
| 22 | + * Perform a full text search query and return a result set. |
| 23 | + * If title searches are not supported or disabled, return null. |
| 24 | + * |
| 25 | + * @param string $term - Raw search term |
| 26 | + * @return SearchResultSet |
| 27 | + * @access public |
| 28 | + * @abstract |
| 29 | + */ |
| 30 | + function searchText( $term ) { |
| 31 | + return null; |
| 32 | + } |
| 33 | + |
| 34 | + /** |
| 35 | + * Perform a title-only search query and return a result set. |
| 36 | + * If title searches are not supported or disabled, return null. |
| 37 | + * |
| 38 | + * @param string $term - Raw search term |
| 39 | + * @return SearchResultSet |
| 40 | + * @access public |
| 41 | + * @abstract |
| 42 | + */ |
| 43 | + function searchTitle( $term ) { |
| 44 | + return null; |
| 45 | + } |
| 46 | + |
| 47 | + /** |
| 48 | + * If an exact title match can be find, or a very slightly close match, |
| 49 | + * return the title. If no match, returns NULL. |
| 50 | + * |
| 51 | + * @param string $term |
| 52 | + * @return Title |
| 53 | + */ |
| 54 | + public static function getNearMatch( $searchterm ) { |
| 55 | + global $wgContLang; |
| 56 | + |
| 57 | + $allSearchTerms = array($searchterm); |
| 58 | + |
| 59 | + if($wgContLang->hasVariants()){ |
| 60 | + $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm)); |
| 61 | + } |
| 62 | + |
| 63 | + foreach($allSearchTerms as $term){ |
| 64 | + |
| 65 | + # Exact match? No need to look further. |
| 66 | + $title = Title::newFromText( $term ); |
| 67 | + if (is_null($title)) |
| 68 | + return NULL; |
| 69 | + |
| 70 | + if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal() |
| 71 | + || $title->exists() ) { |
| 72 | + return $title; |
| 73 | + } |
| 74 | + |
| 75 | + # Now try all lower case (i.e. first letter capitalized) |
| 76 | + # |
| 77 | + $title = Title::newFromText( $wgContLang->lc( $term ) ); |
| 78 | + if ( $title && $title->exists() ) { |
| 79 | + return $title; |
| 80 | + } |
| 81 | + |
| 82 | + # Now try capitalized string |
| 83 | + # |
| 84 | + $title = Title::newFromText( $wgContLang->ucwords( $term ) ); |
| 85 | + if ( $title && $title->exists() ) { |
| 86 | + return $title; |
| 87 | + } |
| 88 | + |
| 89 | + # Now try all upper case |
| 90 | + # |
| 91 | + $title = Title::newFromText( $wgContLang->uc( $term ) ); |
| 92 | + if ( $title && $title->exists() ) { |
| 93 | + return $title; |
| 94 | + } |
| 95 | + |
| 96 | + # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc |
| 97 | + $title = Title::newFromText( $wgContLang->ucwordbreaks($term) ); |
| 98 | + if ( $title && $title->exists() ) { |
| 99 | + return $title; |
| 100 | + } |
| 101 | + |
| 102 | + global $wgCapitalLinks, $wgContLang; |
| 103 | + if( !$wgCapitalLinks ) { |
| 104 | + // Catch differs-by-first-letter-case-only |
| 105 | + $title = Title::newFromText( $wgContLang->ucfirst( $term ) ); |
| 106 | + if ( $title && $title->exists() ) { |
| 107 | + return $title; |
| 108 | + } |
| 109 | + $title = Title::newFromText( $wgContLang->lcfirst( $term ) ); |
| 110 | + if ( $title && $title->exists() ) { |
| 111 | + return $title; |
| 112 | + } |
| 113 | + } |
| 114 | + |
| 115 | + // Give hooks a chance at better match variants |
| 116 | + $title = null; |
| 117 | + if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) { |
| 118 | + return $title; |
| 119 | + } |
| 120 | + } |
| 121 | + |
| 122 | + $title = Title::newFromText( $searchterm ); |
| 123 | + |
| 124 | + # Entering an IP address goes to the contributions page |
| 125 | + if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) ) |
| 126 | + || User::isIP( trim( $searchterm ) ) ) { |
| 127 | + return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() ); |
| 128 | + } |
| 129 | + |
| 130 | + |
| 131 | + # Entering a user goes to the user page whether it's there or not |
| 132 | + if ( $title->getNamespace() == NS_USER ) { |
| 133 | + return $title; |
| 134 | + } |
| 135 | + |
| 136 | + # Go to images that exist even if there's no local page. |
| 137 | + # There may have been a funny upload, or it may be on a shared |
| 138 | + # file repository such as Wikimedia Commons. |
| 139 | + if( $title->getNamespace() == NS_IMAGE ) { |
| 140 | + $image = wfFindFile( $title ); |
| 141 | + if( $image ) { |
| 142 | + return $title; |
| 143 | + } |
| 144 | + } |
| 145 | + |
| 146 | + # MediaWiki namespace? Page may be "implied" if not customized. |
| 147 | + # Just return it, with caps forced as the message system likes it. |
| 148 | + if( $title->getNamespace() == NS_MEDIAWIKI ) { |
| 149 | + return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) ); |
| 150 | + } |
| 151 | + |
| 152 | + # Quoted term? Try without the quotes... |
| 153 | + $matches = array(); |
| 154 | + if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) { |
| 155 | + return SearchEngine::getNearMatch( $matches[1] ); |
| 156 | + } |
| 157 | + |
| 158 | + return NULL; |
| 159 | + } |
| 160 | + |
| 161 | + public static function legalSearchChars() { |
| 162 | + return "A-Za-z_'0-9\\x80-\\xFF\\-"; |
| 163 | + } |
| 164 | + |
| 165 | + /** |
| 166 | + * Set the maximum number of results to return |
| 167 | + * and how many to skip before returning the first. |
| 168 | + * |
| 169 | + * @param int $limit |
| 170 | + * @param int $offset |
| 171 | + * @access public |
| 172 | + */ |
| 173 | + function setLimitOffset( $limit, $offset = 0 ) { |
| 174 | + $this->limit = intval( $limit ); |
| 175 | + $this->offset = intval( $offset ); |
| 176 | + } |
| 177 | + |
| 178 | + /** |
| 179 | + * Set which namespaces the search should include. |
| 180 | + * Give an array of namespace index numbers. |
| 181 | + * |
| 182 | + * @param array $namespaces |
| 183 | + * @access public |
| 184 | + */ |
| 185 | + function setNamespaces( $namespaces ) { |
| 186 | + $this->namespaces = $namespaces; |
| 187 | + } |
| 188 | + |
| 189 | + /** |
| 190 | + * Parse some common prefixes: all (search everything) |
| 191 | + * or namespace names |
| 192 | + * |
| 193 | + * @param string $query |
| 194 | + */ |
| 195 | + function replacePrefixes( $query ){ |
| 196 | + global $wgContLang; |
| 197 | + |
| 198 | + if( strpos($query,':') === false ) |
| 199 | + return $query; // nothing to do |
| 200 | + |
| 201 | + $parsed = $query; |
| 202 | + $allkeyword = wfMsgForContent('searchall').":"; |
| 203 | + if( strncmp($query, $allkeyword, strlen($allkeyword)) == 0 ){ |
| 204 | + $this->namespaces = null; |
| 205 | + $parsed = substr($query,strlen($allkeyword)); |
| 206 | + } else if( strpos($query,':') !== false ) { |
| 207 | + $prefix = substr($query,0,strpos($query,':')); |
| 208 | + $index = $wgContLang->getNsIndex($prefix); |
| 209 | + if($index !== false){ |
| 210 | + $this->namespaces = array($index); |
| 211 | + $parsed = substr($query,strlen($prefix)+1); |
| 212 | + } |
| 213 | + } |
| 214 | + if(trim($parsed) == '') |
| 215 | + return $query; // prefix was the whole query |
| 216 | + |
| 217 | + return $parsed; |
| 218 | + } |
| 219 | + |
| 220 | + /** |
| 221 | + * Make a list of searchable namespaces and their canonical names. |
| 222 | + * @return array |
| 223 | + */ |
| 224 | + public static function searchableNamespaces() { |
| 225 | + global $wgContLang; |
| 226 | + $arr = array(); |
| 227 | + foreach( $wgContLang->getNamespaces() as $ns => $name ) { |
| 228 | + if( $ns >= NS_MAIN ) { |
| 229 | + $arr[$ns] = $name; |
| 230 | + } |
| 231 | + } |
| 232 | + return $arr; |
| 233 | + } |
| 234 | + |
| 235 | + /** |
| 236 | + * Extract default namespaces to search from the given user's |
| 237 | + * settings, returning a list of index numbers. |
| 238 | + * |
| 239 | + * @param User $user |
| 240 | + * @return array |
| 241 | + * @static |
| 242 | + */ |
| 243 | + public static function userNamespaces( &$user ) { |
| 244 | + $arr = array(); |
| 245 | + foreach( SearchEngine::searchableNamespaces() as $ns => $name ) { |
| 246 | + if( $user->getOption( 'searchNs' . $ns ) ) { |
| 247 | + $arr[] = $ns; |
| 248 | + } |
| 249 | + } |
| 250 | + return $arr; |
| 251 | + } |
| 252 | + |
| 253 | + /** |
| 254 | + * Find snippet highlight settings for a given user |
| 255 | + * |
| 256 | + * @param User $user |
| 257 | + * @return array contextlines, contextchars |
| 258 | + * @static |
| 259 | + */ |
| 260 | + public static function userHighlightPrefs( &$user ){ |
| 261 | + //$contextlines = $user->getOption( 'contextlines', 5 ); |
| 262 | + //$contextchars = $user->getOption( 'contextchars', 50 ); |
| 263 | + $contextlines = 2; // Hardcode this. Old defaults sucked. :) |
| 264 | + $contextchars = 75; // same as above.... :P |
| 265 | + return array($contextlines, $contextchars); |
| 266 | + } |
| 267 | + |
| 268 | + /** |
| 269 | + * An array of namespaces indexes to be searched by default |
| 270 | + * |
| 271 | + * @return array |
| 272 | + * @static |
| 273 | + */ |
| 274 | + public static function defaultNamespaces(){ |
| 275 | + global $wgNamespacesToBeSearchedDefault; |
| 276 | + |
| 277 | + return array_keys($wgNamespacesToBeSearchedDefault, true); |
| 278 | + } |
| 279 | + |
| 280 | + /** |
| 281 | + * Return a 'cleaned up' search string |
| 282 | + * |
| 283 | + * @return string |
| 284 | + * @access public |
| 285 | + */ |
| 286 | + function filter( $text ) { |
| 287 | + $lc = $this->legalSearchChars(); |
| 288 | + return trim( preg_replace( "/[^{$lc}]/", " ", $text ) ); |
| 289 | + } |
| 290 | + /** |
| 291 | + * Load up the appropriate search engine class for the currently |
| 292 | + * active database backend, and return a configured instance. |
| 293 | + * |
| 294 | + * @return SearchEngine |
| 295 | + */ |
| 296 | + public static function create() { |
| 297 | + global $wgDBtype, $wgSearchType; |
| 298 | + if( $wgSearchType ) { |
| 299 | + $class = $wgSearchType; |
| 300 | + } elseif( $wgDBtype == 'mysql' ) { |
| 301 | + $class = 'SearchMySQL'; |
| 302 | + } else if ( $wgDBtype == 'postgres' ) { |
| 303 | + $class = 'SearchPostgres'; |
| 304 | + } else if ( $wgDBtype == 'oracle' ) { |
| 305 | + $class = 'SearchOracle'; |
| 306 | + } else { |
| 307 | + $class = 'SearchEngineDummy'; |
| 308 | + } |
| 309 | + $search = new $class( wfGetDB( DB_SLAVE ) ); |
| 310 | + $search->setLimitOffset(0,0); |
| 311 | + return $search; |
| 312 | + } |
| 313 | + |
| 314 | + /** |
| 315 | + * Create or update the search index record for the given page. |
| 316 | + * Title and text should be pre-processed. |
| 317 | + * |
| 318 | + * @param int $id |
| 319 | + * @param string $title |
| 320 | + * @param string $text |
| 321 | + * @abstract |
| 322 | + */ |
| 323 | + function update( $id, $title, $text ) { |
| 324 | + // no-op |
| 325 | + } |
| 326 | + |
| 327 | + /** |
| 328 | + * Update a search index record's title only. |
| 329 | + * Title should be pre-processed. |
| 330 | + * |
| 331 | + * @param int $id |
| 332 | + * @param string $title |
| 333 | + * @abstract |
| 334 | + */ |
| 335 | + function updateTitle( $id, $title ) { |
| 336 | + // no-op |
| 337 | + } |
| 338 | + |
| 339 | + /** |
| 340 | + * Get OpenSearch suggestion template |
| 341 | + * |
| 342 | + * @return string |
| 343 | + * @static |
| 344 | + */ |
| 345 | + public static function getOpenSearchTemplate() { |
| 346 | + global $wgOpenSearchTemplate, $wgServer, $wgScriptPath; |
| 347 | + if($wgOpenSearchTemplate) |
| 348 | + return $wgOpenSearchTemplate; |
| 349 | + else{ |
| 350 | + $ns = implode(',',SearchEngine::defaultNamespaces()); |
| 351 | + if(!$ns) $ns = "0"; |
| 352 | + return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace='.$ns; |
| 353 | + } |
| 354 | + } |
| 355 | + |
| 356 | + /** |
| 357 | + * Get internal MediaWiki Suggest template |
| 358 | + * |
| 359 | + * @return string |
| 360 | + * @static |
| 361 | + */ |
| 362 | + public static function getMWSuggestTemplate() { |
| 363 | + global $wgMWSuggestTemplate, $wgServer, $wgScriptPath; |
| 364 | + if($wgMWSuggestTemplate) |
| 365 | + return $wgMWSuggestTemplate; |
| 366 | + else |
| 367 | + return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}'; |
| 368 | + } |
| 369 | +} |
| 370 | + |
| 371 | +/** |
| 372 | + * @ingroup Search |
| 373 | + */ |
| 374 | +class SearchResultSet { |
| 375 | + /** |
| 376 | + * Fetch an array of regular expression fragments for matching |
| 377 | + * the search terms as parsed by this engine in a text extract. |
| 378 | + * |
| 379 | + * @return array |
| 380 | + * @access public |
| 381 | + * @abstract |
| 382 | + */ |
| 383 | + function termMatches() { |
| 384 | + return array(); |
| 385 | + } |
| 386 | + |
| 387 | + function numRows() { |
| 388 | + return 0; |
| 389 | + } |
| 390 | + |
| 391 | + /** |
| 392 | + * Return true if results are included in this result set. |
| 393 | + * @return bool |
| 394 | + * @abstract |
| 395 | + */ |
| 396 | + function hasResults() { |
| 397 | + return false; |
| 398 | + } |
| 399 | + |
| 400 | + /** |
| 401 | + * Some search modes return a total hit count for the query |
| 402 | + * in the entire article database. This may include pages |
| 403 | + * in namespaces that would not be matched on the given |
| 404 | + * settings. |
| 405 | + * |
| 406 | + * Return null if no total hits number is supported. |
| 407 | + * |
| 408 | + * @return int |
| 409 | + * @access public |
| 410 | + */ |
| 411 | + function getTotalHits() { |
| 412 | + return null; |
| 413 | + } |
| 414 | + |
| 415 | + /** |
| 416 | + * Some search modes return a suggested alternate term if there are |
| 417 | + * no exact hits. Returns true if there is one on this set. |
| 418 | + * |
| 419 | + * @return bool |
| 420 | + * @access public |
| 421 | + */ |
| 422 | + function hasSuggestion() { |
| 423 | + return false; |
| 424 | + } |
| 425 | + |
| 426 | + /** |
| 427 | + * @return string suggested query, null if none |
| 428 | + */ |
| 429 | + function getSuggestionQuery(){ |
| 430 | + return null; |
| 431 | + } |
| 432 | + |
| 433 | + /** |
| 434 | + * @return string highlighted suggested query, '' if none |
| 435 | + */ |
| 436 | + function getSuggestionSnippet(){ |
| 437 | + return ''; |
| 438 | + } |
| 439 | + |
| 440 | + /** |
| 441 | + * Return information about how and from where the results were fetched, |
| 442 | + * should be useful for diagnostics and debugging |
| 443 | + * |
| 444 | + * @return string |
| 445 | + */ |
| 446 | + function getInfo() { |
| 447 | + return null; |
| 448 | + } |
| 449 | + |
| 450 | + /** |
| 451 | + * Return a result set of hits on other (multiple) wikis associated with this one |
| 452 | + * |
| 453 | + * @return SearchResultSet |
| 454 | + */ |
| 455 | + function getInterwikiResults() { |
| 456 | + return null; |
| 457 | + } |
| 458 | + |
| 459 | + /** |
| 460 | + * Check if there are results on other wikis |
| 461 | + * |
| 462 | + * @return boolean |
| 463 | + */ |
| 464 | + function hasInterwikiResults() { |
| 465 | + return $this->getInterwikiResults() != null; |
| 466 | + } |
| 467 | + |
| 468 | + |
| 469 | + /** |
| 470 | + * Fetches next search result, or false. |
| 471 | + * @return SearchResult |
| 472 | + * @access public |
| 473 | + * @abstract |
| 474 | + */ |
| 475 | + function next() { |
| 476 | + return false; |
| 477 | + } |
| 478 | + |
| 479 | + /** |
| 480 | + * Frees the result set, if applicable. |
| 481 | + * @ access public |
| 482 | + */ |
| 483 | + function free() { |
| 484 | + // ... |
| 485 | + } |
| 486 | +} |
| 487 | + |
| 488 | + |
| 489 | +/** |
| 490 | + * @ingroup Search |
| 491 | + */ |
| 492 | +class SearchResultTooMany { |
| 493 | + ## Some search engines may bail out if too many matches are found |
| 494 | +} |
| 495 | + |
| 496 | + |
| 497 | +/** |
| 498 | + * @ingroup Search |
| 499 | + */ |
| 500 | +class SearchResult { |
| 501 | + var $mRevision = null; |
| 502 | + |
| 503 | + function SearchResult( $row ) { |
| 504 | + $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
| 505 | + if( !is_null($this->mTitle) ) |
| 506 | + $this->mRevision = Revision::newFromTitle( $this->mTitle ); |
| 507 | + } |
| 508 | + |
| 509 | + /** |
| 510 | + * Check if this is result points to an invalid title |
| 511 | + * |
| 512 | + * @return boolean |
| 513 | + * @access public |
| 514 | + */ |
| 515 | + function isBrokenTitle(){ |
| 516 | + if( is_null($this->mTitle) ) |
| 517 | + return true; |
| 518 | + return false; |
| 519 | + } |
| 520 | + |
| 521 | + /** |
| 522 | + * Check if target page is missing, happens when index is out of date |
| 523 | + * |
| 524 | + * @return boolean |
| 525 | + * @access public |
| 526 | + */ |
| 527 | + function isMissingRevision(){ |
| 528 | + if( !$this->mRevision ) |
| 529 | + return true; |
| 530 | + return false; |
| 531 | + } |
| 532 | + |
| 533 | + /** |
| 534 | + * @return Title |
| 535 | + * @access public |
| 536 | + */ |
| 537 | + function getTitle() { |
| 538 | + return $this->mTitle; |
| 539 | + } |
| 540 | + |
| 541 | + /** |
| 542 | + * @return double or null if not supported |
| 543 | + */ |
| 544 | + function getScore() { |
| 545 | + return null; |
| 546 | + } |
| 547 | + |
| 548 | + /** |
| 549 | + * Lazy initialization of article text from DB |
| 550 | + */ |
| 551 | + protected function initText(){ |
| 552 | + if( !isset($this->mText) ){ |
| 553 | + $this->mText = $this->mRevision->getText(); |
| 554 | + } |
| 555 | + } |
| 556 | + |
| 557 | + /** |
| 558 | + * @param array $terms terms to highlight |
| 559 | + * @return string highlighted text snippet, null (and not '') if not supported |
| 560 | + */ |
| 561 | + function getTextSnippet($terms){ |
| 562 | + global $wgUser, $wgAdvancedSearchHighlighting; |
| 563 | + $this->initText(); |
| 564 | + list($contextlines,$contextchars) = SearchEngine::userHighlightPrefs($wgUser); |
| 565 | + $h = new SearchHighlighter(); |
| 566 | + if( $wgAdvancedSearchHighlighting ) |
| 567 | + return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars ); |
| 568 | + else |
| 569 | + return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars ); |
| 570 | + } |
| 571 | + |
| 572 | + /** |
| 573 | + * @param array $terms terms to highlight |
| 574 | + * @return string highlighted title, '' if not supported |
| 575 | + */ |
| 576 | + function getTitleSnippet($terms){ |
| 577 | + return ''; |
| 578 | + } |
| 579 | + |
| 580 | + /** |
| 581 | + * @param array $terms terms to highlight |
| 582 | + * @return string highlighted redirect name (redirect to this page), '' if none or not supported |
| 583 | + */ |
| 584 | + function getRedirectSnippet($terms){ |
| 585 | + return ''; |
| 586 | + } |
| 587 | + |
| 588 | + /** |
| 589 | + * @return Title object for the redirect to this page, null if none or not supported |
| 590 | + */ |
| 591 | + function getRedirectTitle(){ |
| 592 | + return null; |
| 593 | + } |
| 594 | + |
| 595 | + /** |
| 596 | + * @return string highlighted relevant section name, null if none or not supported |
| 597 | + */ |
| 598 | + function getSectionSnippet(){ |
| 599 | + return ''; |
| 600 | + } |
| 601 | + |
| 602 | + /** |
| 603 | + * @return Title object (pagename+fragment) for the section, null if none or not supported |
| 604 | + */ |
| 605 | + function getSectionTitle(){ |
| 606 | + return null; |
| 607 | + } |
| 608 | + |
| 609 | + /** |
| 610 | + * @return string timestamp |
| 611 | + */ |
| 612 | + function getTimestamp(){ |
| 613 | + return $this->mRevision->getTimestamp(); |
| 614 | + } |
| 615 | + |
| 616 | + /** |
| 617 | + * @return int number of words |
| 618 | + */ |
| 619 | + function getWordCount(){ |
| 620 | + $this->initText(); |
| 621 | + return str_word_count( $this->mText ); |
| 622 | + } |
| 623 | + |
| 624 | + /** |
| 625 | + * @return int size in bytes |
| 626 | + */ |
| 627 | + function getByteSize(){ |
| 628 | + $this->initText(); |
| 629 | + return strlen( $this->mText ); |
| 630 | + } |
| 631 | + |
| 632 | + /** |
| 633 | + * @return boolean if hit has related articles |
| 634 | + */ |
| 635 | + function hasRelated(){ |
| 636 | + return false; |
| 637 | + } |
| 638 | + |
| 639 | + /** |
| 640 | + * @return interwiki prefix of the title (return iw even if title is broken) |
| 641 | + */ |
| 642 | + function getInterwikiPrefix(){ |
| 643 | + return ''; |
| 644 | + } |
| 645 | +} |
| 646 | + |
| 647 | +/** |
| 648 | + * Highlight bits of wikitext |
| 649 | + * |
| 650 | + * @ingroup Search |
| 651 | + */ |
| 652 | +class SearchHighlighter { |
| 653 | + var $mCleanWikitext = true; |
| 654 | + |
| 655 | + function SearchHighlighter($cleanupWikitext = true){ |
| 656 | + $this->mCleanWikitext = $cleanupWikitext; |
| 657 | + } |
| 658 | + |
| 659 | + /** |
| 660 | + * Default implementation of wikitext highlighting |
| 661 | + * |
| 662 | + * @param string $text |
| 663 | + * @param array $terms Terms to highlight (unescaped) |
| 664 | + * @param int $contextlines |
| 665 | + * @param int $contextchars |
| 666 | + * @return string |
| 667 | + */ |
| 668 | + public function highlightText( $text, $terms, $contextlines, $contextchars ) { |
| 669 | + global $wgLang, $wgContLang; |
| 670 | + global $wgSearchHighlightBoundaries; |
| 671 | + $fname = __METHOD__; |
| 672 | + |
| 673 | + if($text == '') |
| 674 | + return ''; |
| 675 | + |
| 676 | + // spli text into text + templates/links/tables |
| 677 | + $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; |
| 678 | + // first capture group is for detecting nested templates/links/tables/references |
| 679 | + $endPatterns = array( |
| 680 | + 1 => '/(\{\{)|(\}\})/', // template |
| 681 | + 2 => '/(\[\[)|(\]\])/', // image |
| 682 | + 3 => "/(\n\\{\\|)|(\n\\|\\})/"); // table |
| 683 | + |
| 684 | + // FIXME: this should prolly be a hook or something |
| 685 | + if(function_exists('wfCite')){ |
| 686 | + $spat .= '|(<ref>)'; // references via cite extension |
| 687 | + $endPatterns[4] = '/(<ref>)|(<\/ref>)/'; |
| 688 | + } |
| 689 | + $spat .= '/'; |
| 690 | + $textExt = array(); // text extracts |
| 691 | + $otherExt = array(); // other extracts |
| 692 | + wfProfileIn( "$fname-split" ); |
| 693 | + $start = 0; |
| 694 | + $textLen = strlen($text); |
| 695 | + $count = 0; // sequence number to maintain ordering |
| 696 | + while( $start < $textLen ){ |
| 697 | + // find start of template/image/table |
| 698 | + if( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ){ |
| 699 | + $epat = ''; |
| 700 | + foreach($matches as $key => $val){ |
| 701 | + if($key > 0 && $val[1] != -1){ |
| 702 | + if($key == 2){ |
| 703 | + // see if this is an image link |
| 704 | + $ns = substr($val[0],2,-1); |
| 705 | + if( $wgContLang->getNsIndex($ns) != NS_IMAGE ) |
| 706 | + break; |
| 707 | + |
| 708 | + } |
| 709 | + $epat = $endPatterns[$key]; |
| 710 | + $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) ); |
| 711 | + $start = $val[1]; |
| 712 | + break; |
| 713 | + } |
| 714 | + } |
| 715 | + if( $epat ){ |
| 716 | + // find end (and detect any nested elements) |
| 717 | + $level = 0; |
| 718 | + $offset = $start + 1; |
| 719 | + $found = false; |
| 720 | + while( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ){ |
| 721 | + if( array_key_exists(2,$endMatches) ){ |
| 722 | + // found end |
| 723 | + if($level == 0){ |
| 724 | + $len = strlen($endMatches[2][0]); |
| 725 | + $off = $endMatches[2][1]; |
| 726 | + $this->splitAndAdd( $otherExt, $count, |
| 727 | + substr( $text, $start, $off + $len - $start ) ); |
| 728 | + $start = $off + $len; |
| 729 | + $found = true; |
| 730 | + break; |
| 731 | + } else{ |
| 732 | + // end of nested element |
| 733 | + $level -= 1; |
| 734 | + } |
| 735 | + } else{ |
| 736 | + // nested |
| 737 | + $level += 1; |
| 738 | + } |
| 739 | + $offset = $endMatches[0][1] + strlen($endMatches[0][0]); |
| 740 | + } |
| 741 | + if( ! $found ){ |
| 742 | + // couldn't find appropriate closing tag, skip |
| 743 | + $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen($matches[0][0]) ) ); |
| 744 | + $start += strlen($matches[0][0]); |
| 745 | + } |
| 746 | + continue; |
| 747 | + } |
| 748 | + } |
| 749 | + // else: add as text extract |
| 750 | + $this->splitAndAdd( $textExt, $count, substr($text,$start) ); |
| 751 | + break; |
| 752 | + } |
| 753 | + |
| 754 | + $all = $textExt + $otherExt; // these have disjunct key sets |
| 755 | + |
| 756 | + wfProfileOut( "$fname-split" ); |
| 757 | + |
| 758 | + // prepare regexps |
| 759 | + foreach( $terms as $index => $term ) { |
| 760 | + $terms[$index] = preg_quote( $term, '/' ); |
| 761 | + // manually do upper/lowercase stuff for utf-8 since PHP won't do it |
| 762 | + if(preg_match('/[\x80-\xff]/', $term) ){ |
| 763 | + $terms[$index] = preg_replace_callback('/./us',array($this,'caseCallback'),$terms[$index]); |
| 764 | + } |
| 765 | + |
| 766 | + |
| 767 | + } |
| 768 | + $anyterm = implode( '|', $terms ); |
| 769 | + $phrase = implode("$wgSearchHighlightBoundaries+", $terms ); |
| 770 | + |
| 771 | + // FIXME: a hack to scale contextchars, a correct solution |
| 772 | + // would be to have contextchars actually be char and not byte |
| 773 | + // length, and do proper utf-8 substrings and lengths everywhere, |
| 774 | + // but PHP is making that very hard and unclean to implement :( |
| 775 | + $scale = strlen($anyterm) / mb_strlen($anyterm); |
| 776 | + $contextchars = intval( $contextchars * $scale ); |
| 777 | + |
| 778 | + $patPre = "(^|$wgSearchHighlightBoundaries)"; |
| 779 | + $patPost = "($wgSearchHighlightBoundaries|$)"; |
| 780 | + |
| 781 | + $pat1 = "/(".$phrase.")/ui"; |
| 782 | + $pat2 = "/$patPre(".$anyterm.")$patPost/ui"; |
| 783 | + |
| 784 | + wfProfileIn( "$fname-extract" ); |
| 785 | + |
| 786 | + $left = $contextlines; |
| 787 | + |
| 788 | + $snippets = array(); |
| 789 | + $offsets = array(); |
| 790 | + |
| 791 | + // show beginning only if it contains all words |
| 792 | + $first = 0; |
| 793 | + $firstText = ''; |
| 794 | + foreach($textExt as $index => $line){ |
| 795 | + if(strlen($line)>0 && $line[0] != ';' && $line[0] != ':'){ |
| 796 | + $firstText = $this->extract( $line, 0, $contextchars * $contextlines ); |
| 797 | + $first = $index; |
| 798 | + break; |
| 799 | + } |
| 800 | + } |
| 801 | + if( $firstText ){ |
| 802 | + $succ = true; |
| 803 | + // check if first text contains all terms |
| 804 | + foreach($terms as $term){ |
| 805 | + if( ! preg_match("/$patPre".$term."$patPost/ui", $firstText) ){ |
| 806 | + $succ = false; |
| 807 | + break; |
| 808 | + } |
| 809 | + } |
| 810 | + if( $succ ){ |
| 811 | + $snippets[$first] = $firstText; |
| 812 | + $offsets[$first] = 0; |
| 813 | + } |
| 814 | + } |
| 815 | + if( ! $snippets ) { |
| 816 | + // match whole query on text |
| 817 | + $this->process($pat1, $textExt, $left, $contextchars, $snippets, $offsets); |
| 818 | + // match whole query on templates/tables/images |
| 819 | + $this->process($pat1, $otherExt, $left, $contextchars, $snippets, $offsets); |
| 820 | + // match any words on text |
| 821 | + $this->process($pat2, $textExt, $left, $contextchars, $snippets, $offsets); |
| 822 | + // match any words on templates/tables/images |
| 823 | + $this->process($pat2, $otherExt, $left, $contextchars, $snippets, $offsets); |
| 824 | + |
| 825 | + ksort($snippets); |
| 826 | + } |
| 827 | + |
| 828 | + // add extra chars to each snippet to make snippets constant size |
| 829 | + $extended = array(); |
| 830 | + if( count( $snippets ) == 0){ |
| 831 | + // couldn't find the target words, just show beginning of article |
| 832 | + $targetchars = $contextchars * $contextlines; |
| 833 | + $snippets[$first] = ''; |
| 834 | + $offsets[$first] = 0; |
| 835 | + } else{ |
| 836 | + // if begin of the article contains the whole phrase, show only that !! |
| 837 | + if( array_key_exists($first,$snippets) && preg_match($pat1,$snippets[$first]) |
| 838 | + && $offsets[$first] < $contextchars * 2 ){ |
| 839 | + $snippets = array ($first => $snippets[$first]); |
| 840 | + } |
| 841 | + |
| 842 | + // calc by how much to extend existing snippets |
| 843 | + $targetchars = intval( ($contextchars * $contextlines) / count ( $snippets ) ); |
| 844 | + } |
| 845 | + |
| 846 | + foreach($snippets as $index => $line){ |
| 847 | + $extended[$index] = $line; |
| 848 | + $len = strlen($line); |
| 849 | + if( $len < $targetchars - 20 ){ |
| 850 | + // complete this line |
| 851 | + if($len < strlen( $all[$index] )){ |
| 852 | + $extended[$index] = $this->extract( $all[$index], $offsets[$index], $offsets[$index]+$targetchars, $offsets[$index]); |
| 853 | + $len = strlen( $extended[$index] ); |
| 854 | + } |
| 855 | + |
| 856 | + // add more lines |
| 857 | + $add = $index + 1; |
| 858 | + while( $len < $targetchars - 20 |
| 859 | + && array_key_exists($add,$all) |
| 860 | + && !array_key_exists($add,$snippets) ){ |
| 861 | + $offsets[$add] = 0; |
| 862 | + $tt = "\n".$this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] ); |
| 863 | + $extended[$add] = $tt; |
| 864 | + $len += strlen( $tt ); |
| 865 | + $add++; |
| 866 | + } |
| 867 | + } |
| 868 | + } |
| 869 | + |
| 870 | + //$snippets = array_map('htmlspecialchars', $extended); |
| 871 | + $snippets = $extended; |
| 872 | + $last = -1; |
| 873 | + $extract = ''; |
| 874 | + foreach($snippets as $index => $line){ |
| 875 | + if($last == -1) |
| 876 | + $extract .= $line; // first line |
| 877 | + elseif($last+1 == $index && $offsets[$last]+strlen($snippets[$last]) >= strlen($all[$last])) |
| 878 | + $extract .= " ".$line; // continous lines |
| 879 | + else |
| 880 | + $extract .= '<b> ... </b>' . $line; |
| 881 | + |
| 882 | + $last = $index; |
| 883 | + } |
| 884 | + if( $extract ) |
| 885 | + $extract .= '<b> ... </b>'; |
| 886 | + |
| 887 | + $processed = array(); |
| 888 | + foreach($terms as $term){ |
| 889 | + if( ! isset($processed[$term]) ){ |
| 890 | + $pat3 = "/$patPre(".$term.")$patPost/ui"; // highlight word |
| 891 | + $extract = preg_replace( $pat3, |
| 892 | + "\\1<span class='searchmatch'>\\2</span>\\3", $extract ); |
| 893 | + $processed[$term] = true; |
| 894 | + } |
| 895 | + } |
| 896 | + |
| 897 | + wfProfileOut( "$fname-extract" ); |
| 898 | + |
| 899 | + return $extract; |
| 900 | + } |
| 901 | + |
| 902 | + /** |
| 903 | + * Split text into lines and add it to extracts array |
| 904 | + * |
| 905 | + * @param array $extracts index -> $line |
| 906 | + * @param int $count |
| 907 | + * @param string $text |
| 908 | + */ |
| 909 | + function splitAndAdd(&$extracts, &$count, $text){ |
| 910 | + $split = explode( "\n", $this->mCleanWikitext? $this->removeWiki($text) : $text ); |
| 911 | + foreach($split as $line){ |
| 912 | + $tt = trim($line); |
| 913 | + if( $tt ) |
| 914 | + $extracts[$count++] = $tt; |
| 915 | + } |
| 916 | + } |
| 917 | + |
| 918 | + /** |
| 919 | + * Do manual case conversion for non-ascii chars |
| 920 | + * |
| 921 | + * @param unknown_type $matches |
| 922 | + */ |
| 923 | + function caseCallback($matches){ |
| 924 | + global $wgContLang; |
| 925 | + if( strlen($matches[0]) > 1 ){ |
| 926 | + return '['.$wgContLang->lc($matches[0]).$wgContLang->uc($matches[0]).']'; |
| 927 | + } else |
| 928 | + return $matches[0]; |
| 929 | + } |
| 930 | + |
| 931 | + /** |
| 932 | + * Extract part of the text from start to end, but by |
| 933 | + * not chopping up words |
| 934 | + * @param string $text |
| 935 | + * @param int $start |
| 936 | + * @param int $end |
| 937 | + * @param int $posStart (out) actual start position |
| 938 | + * @param int $posEnd (out) actual end position |
| 939 | + * @return string |
| 940 | + */ |
| 941 | + function extract($text, $start, $end, &$posStart = null, &$posEnd = null ){ |
| 942 | + global $wgContLang; |
| 943 | + |
| 944 | + if( $start != 0) |
| 945 | + $start = $this->position( $text, $start, 1 ); |
| 946 | + if( $end >= strlen($text) ) |
| 947 | + $end = strlen($text); |
| 948 | + else |
| 949 | + $end = $this->position( $text, $end ); |
| 950 | + |
| 951 | + if(!is_null($posStart)) |
| 952 | + $posStart = $start; |
| 953 | + if(!is_null($posEnd)) |
| 954 | + $posEnd = $end; |
| 955 | + |
| 956 | + if($end > $start) |
| 957 | + return substr($text, $start, $end-$start); |
| 958 | + else |
| 959 | + return ''; |
| 960 | + } |
| 961 | + |
| 962 | + /** |
| 963 | + * Find a nonletter near a point (index) in the text |
| 964 | + * |
| 965 | + * @param string $text |
| 966 | + * @param int $point |
| 967 | + * @param int $offset to found index |
| 968 | + * @return int nearest nonletter index, or beginning of utf8 char if none |
| 969 | + */ |
| 970 | + function position($text, $point, $offset=0 ){ |
| 971 | + $tolerance = 10; |
| 972 | + $s = max( 0, $point - $tolerance ); |
| 973 | + $l = min( strlen($text), $point + $tolerance ) - $s; |
| 974 | + $m = array(); |
| 975 | + if( preg_match('/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr($text,$s,$l), $m, PREG_OFFSET_CAPTURE ) ){ |
| 976 | + return $m[0][1] + $s + $offset; |
| 977 | + } else{ |
| 978 | + // check if point is on a valid first UTF8 char |
| 979 | + $char = ord( $text[$point] ); |
| 980 | + while( $char >= 0x80 && $char < 0xc0 ) { |
| 981 | + // skip trailing bytes |
| 982 | + $point++; |
| 983 | + if($point >= strlen($text)) |
| 984 | + return strlen($text); |
| 985 | + $char = ord( $text[$point] ); |
| 986 | + } |
| 987 | + return $point; |
| 988 | + |
| 989 | + } |
| 990 | + } |
| 991 | + |
| 992 | + /** |
| 993 | + * Search extracts for a pattern, and return snippets |
| 994 | + * |
| 995 | + * @param string $pattern regexp for matching lines |
| 996 | + * @param array $extracts extracts to search |
| 997 | + * @param int $linesleft number of extracts to make |
| 998 | + * @param int $contextchars length of snippet |
| 999 | + * @param array $out map for highlighted snippets |
| 1000 | + * @param array $offsets map of starting points of snippets |
| 1001 | + * @protected |
| 1002 | + */ |
| 1003 | + function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ){ |
| 1004 | + if($linesleft == 0) |
| 1005 | + return; // nothing to do |
| 1006 | + foreach($extracts as $index => $line){ |
| 1007 | + if( array_key_exists($index,$out) ) |
| 1008 | + continue; // this line already highlighted |
| 1009 | + |
| 1010 | + $m = array(); |
| 1011 | + if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) |
| 1012 | + continue; |
| 1013 | + |
| 1014 | + $offset = $m[0][1]; |
| 1015 | + $len = strlen($m[0][0]); |
| 1016 | + if($offset + $len < $contextchars) |
| 1017 | + $begin = 0; |
| 1018 | + elseif( $len > $contextchars) |
| 1019 | + $begin = $offset; |
| 1020 | + else |
| 1021 | + $begin = $offset + intval( ($len - $contextchars) / 2 ); |
| 1022 | + |
| 1023 | + $end = $begin + $contextchars; |
| 1024 | + |
| 1025 | + $posBegin = $begin; |
| 1026 | + // basic snippet from this line |
| 1027 | + $out[$index] = $this->extract($line,$begin,$end,$posBegin); |
| 1028 | + $offsets[$index] = $posBegin; |
| 1029 | + $linesleft--; |
| 1030 | + if($linesleft == 0) |
| 1031 | + return; |
| 1032 | + } |
| 1033 | + } |
| 1034 | + |
| 1035 | + /** |
| 1036 | + * Basic wikitext removal |
| 1037 | + * @protected |
| 1038 | + */ |
| 1039 | + function removeWiki($text) { |
| 1040 | + $fname = __METHOD__; |
| 1041 | + wfProfileIn( $fname ); |
| 1042 | + |
| 1043 | + //$text = preg_replace("/'{2,5}/", "", $text); |
| 1044 | + //$text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text); |
| 1045 | + //$text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text); |
| 1046 | + //$text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text); |
| 1047 | + //$text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text); |
| 1048 | + //$text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text); |
| 1049 | + $text = preg_replace("/\\{\\{([^|]+?)\\}\\}/", "", $text); |
| 1050 | + $text = preg_replace("/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text); |
| 1051 | + $text = preg_replace("/\\[\\[([^|]+?)\\]\\]/", "\\1", $text); |
| 1052 | + $text = preg_replace_callback("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", array($this,'linkReplace'), $text); |
| 1053 | + //$text = preg_replace("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", "\\2", $text); |
| 1054 | + $text = preg_replace("/<\/?[^>]+>/", "", $text); |
| 1055 | + $text = preg_replace("/'''''/", "", $text); |
| 1056 | + $text = preg_replace("/('''|<\/?[iIuUbB]>)/", "", $text); |
| 1057 | + $text = preg_replace("/''/", "", $text); |
| 1058 | + |
| 1059 | + wfProfileOut( $fname ); |
| 1060 | + return $text; |
| 1061 | + } |
| 1062 | + |
| 1063 | + /** |
| 1064 | + * callback to replace [[target|caption]] kind of links, if |
| 1065 | + * the target is category or image, leave it |
| 1066 | + * |
| 1067 | + * @param array $matches |
| 1068 | + */ |
| 1069 | + function linkReplace($matches){ |
| 1070 | + $colon = strpos( $matches[1], ':' ); |
| 1071 | + if( $colon === false ) |
| 1072 | + return $matches[2]; // replace with caption |
| 1073 | + global $wgContLang; |
| 1074 | + $ns = substr( $matches[1], 0, $colon ); |
| 1075 | + $index = $wgContLang->getNsIndex($ns); |
| 1076 | + if( $index !== false && ($index == NS_IMAGE || $index == NS_CATEGORY) ) |
| 1077 | + return $matches[0]; // return the whole thing |
| 1078 | + else |
| 1079 | + return $matches[2]; |
| 1080 | + |
| 1081 | + } |
| 1082 | + |
| 1083 | + /** |
| 1084 | + * Simple & fast snippet extraction, but gives completely unrelevant |
| 1085 | + * snippets |
| 1086 | + * |
| 1087 | + * @param string $text |
| 1088 | + * @param array $terms |
| 1089 | + * @param int $contextlines |
| 1090 | + * @param int $contextchars |
| 1091 | + * @return string |
| 1092 | + */ |
| 1093 | + public function highlightSimple( $text, $terms, $contextlines, $contextchars ) { |
| 1094 | + global $wgLang, $wgContLang; |
| 1095 | + $fname = __METHOD__; |
| 1096 | + |
| 1097 | + $lines = explode( "\n", $text ); |
| 1098 | + |
| 1099 | + $terms = implode( '|', $terms ); |
| 1100 | + $terms = str_replace( '/', "\\/", $terms); |
| 1101 | + $max = intval( $contextchars ) + 1; |
| 1102 | + $pat1 = "/(.*)($terms)(.{0,$max})/i"; |
| 1103 | + |
| 1104 | + $lineno = 0; |
| 1105 | + |
| 1106 | + $extract = ""; |
| 1107 | + wfProfileIn( "$fname-extract" ); |
| 1108 | + foreach ( $lines as $line ) { |
| 1109 | + if ( 0 == $contextlines ) { |
| 1110 | + break; |
| 1111 | + } |
| 1112 | + ++$lineno; |
| 1113 | + $m = array(); |
| 1114 | + if ( ! preg_match( $pat1, $line, $m ) ) { |
| 1115 | + continue; |
| 1116 | + } |
| 1117 | + --$contextlines; |
| 1118 | + $pre = $wgContLang->truncate( $m[1], -$contextchars, ' ... ' ); |
| 1119 | + |
| 1120 | + if ( count( $m ) < 3 ) { |
| 1121 | + $post = ''; |
| 1122 | + } else { |
| 1123 | + $post = $wgContLang->truncate( $m[3], $contextchars, ' ... ' ); |
| 1124 | + } |
| 1125 | + |
| 1126 | + $found = $m[2]; |
| 1127 | + |
| 1128 | + $line = htmlspecialchars( $pre . $found . $post ); |
| 1129 | + $pat2 = '/(' . $terms . ")/i"; |
| 1130 | + $line = preg_replace( $pat2, |
| 1131 | + "<span class='searchmatch'>\\1</span>", $line ); |
| 1132 | + |
| 1133 | + $extract .= "${line}\n"; |
| 1134 | + } |
| 1135 | + wfProfileOut( "$fname-extract" ); |
| 1136 | + |
| 1137 | + return $extract; |
| 1138 | + } |
| 1139 | + |
| 1140 | +} |
| 1141 | + |
| 1142 | +/** |
| 1143 | + * @ingroup Search |
| 1144 | + */ |
| 1145 | +class SearchEngineDummy { |
| 1146 | + function search( $term ) { |
| 1147 | + return null; |
| 1148 | + } |
| 1149 | + function setLimitOffset($l, $o) {} |
| 1150 | + function legalSearchChars() {} |
| 1151 | + function update() {} |
| 1152 | + function setnamespaces() {} |
| 1153 | + function searchtitle() {} |
| 1154 | + function searchtext() {} |
| 1155 | +} |
Property changes on: trunk/phase3/includes/search/Engine.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 1156 | + native |
Index: trunk/phase3/includes/search/MySQL.php |
— | — | @@ -0,0 +1,262 @@ |
| 2 | +<?php |
| 3 | +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> |
| 4 | +# http://www.mediawiki.org/ |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | +# http://www.gnu.org/copyleft/gpl.html |
| 20 | + |
| 21 | +/** |
| 22 | + * @file |
| 23 | + * @ingroup Search |
| 24 | + */ |
| 25 | + |
| 26 | +/** |
| 27 | + * Search engine hook for MySQL 4+ |
| 28 | + * @ingroup Search |
| 29 | + */ |
| 30 | +class SearchMySQL extends SearchEngine { |
| 31 | + var $strictMatching = true; |
| 32 | + |
| 33 | + /** @todo document */ |
| 34 | + function __construct( $db ) { |
| 35 | + $this->db = $db; |
| 36 | + } |
| 37 | + |
| 38 | + /** @todo document */ |
| 39 | + function parseQuery( $filteredText, $fulltext ) { |
| 40 | + global $wgContLang; |
| 41 | + $lc = SearchEngine::legalSearchChars(); // Minus format chars |
| 42 | + $searchon = ''; |
| 43 | + $this->searchTerms = array(); |
| 44 | + |
| 45 | + # FIXME: This doesn't handle parenthetical expressions. |
| 46 | + $m = array(); |
| 47 | + if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', |
| 48 | + $filteredText, $m, PREG_SET_ORDER ) ) { |
| 49 | + foreach( $m as $terms ) { |
| 50 | + if( $searchon !== '' ) $searchon .= ' '; |
| 51 | + if( $this->strictMatching && ($terms[1] == '') ) { |
| 52 | + $terms[1] = '+'; |
| 53 | + } |
| 54 | + $searchon .= $terms[1] . $wgContLang->stripForSearch( $terms[2] ); |
| 55 | + if( !empty( $terms[3] ) ) { |
| 56 | + // Match individual terms in result highlighting... |
| 57 | + $regexp = preg_quote( $terms[3], '/' ); |
| 58 | + if( $terms[4] ) $regexp .= "[0-9A-Za-z_]+"; |
| 59 | + } else { |
| 60 | + // Match the quoted term in result highlighting... |
| 61 | + $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); |
| 62 | + } |
| 63 | + $this->searchTerms[] = $regexp; |
| 64 | + } |
| 65 | + wfDebug( "Would search with '$searchon'\n" ); |
| 66 | + wfDebug( 'Match with /' . implode( '|', $this->searchTerms ) . "/\n" ); |
| 67 | + } else { |
| 68 | + wfDebug( "Can't understand search query '{$filteredText}'\n" ); |
| 69 | + } |
| 70 | + |
| 71 | + $searchon = $this->db->strencode( $searchon ); |
| 72 | + $field = $this->getIndexField( $fulltext ); |
| 73 | + return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) "; |
| 74 | + } |
| 75 | + |
| 76 | + public static function legalSearchChars() { |
| 77 | + return "\"*" . parent::legalSearchChars(); |
| 78 | + } |
| 79 | + |
| 80 | + /** |
| 81 | + * Perform a full text search query and return a result set. |
| 82 | + * |
| 83 | + * @param string $term - Raw search term |
| 84 | + * @return MySQLSearchResultSet |
| 85 | + * @access public |
| 86 | + */ |
| 87 | + function searchText( $term ) { |
| 88 | + $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) ); |
| 89 | + return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); |
| 90 | + } |
| 91 | + |
| 92 | + /** |
| 93 | + * Perform a title-only search query and return a result set. |
| 94 | + * |
| 95 | + * @param string $term - Raw search term |
| 96 | + * @return MySQLSearchResultSet |
| 97 | + * @access public |
| 98 | + */ |
| 99 | + function searchTitle( $term ) { |
| 100 | + $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) ); |
| 101 | + return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); |
| 102 | + } |
| 103 | + |
| 104 | + |
| 105 | + /** |
| 106 | + * Return a partial WHERE clause to exclude redirects, if so set |
| 107 | + * @return string |
| 108 | + * @private |
| 109 | + */ |
| 110 | + function queryRedirect() { |
| 111 | + if( $this->showRedirects ) { |
| 112 | + return ''; |
| 113 | + } else { |
| 114 | + return 'AND page_is_redirect=0'; |
| 115 | + } |
| 116 | + } |
| 117 | + |
| 118 | + /** |
| 119 | + * Return a partial WHERE clause to limit the search to the given namespaces |
| 120 | + * @return string |
| 121 | + * @private |
| 122 | + */ |
| 123 | + function queryNamespaces() { |
| 124 | + if( is_null($this->namespaces) ) |
| 125 | + return ''; # search all |
| 126 | + $namespaces = implode( ',', $this->namespaces ); |
| 127 | + if ($namespaces == '') { |
| 128 | + $namespaces = '0'; |
| 129 | + } |
| 130 | + return 'AND page_namespace IN (' . $namespaces . ')'; |
| 131 | + } |
| 132 | + |
| 133 | + /** |
| 134 | + * Return a LIMIT clause to limit results on the query. |
| 135 | + * @return string |
| 136 | + * @private |
| 137 | + */ |
| 138 | + function queryLimit() { |
| 139 | + return $this->db->limitResult( '', $this->limit, $this->offset ); |
| 140 | + } |
| 141 | + |
| 142 | + /** |
| 143 | + * Does not do anything for generic search engine |
| 144 | + * subclasses may define this though |
| 145 | + * @return string |
| 146 | + * @private |
| 147 | + */ |
| 148 | + function queryRanking( $filteredTerm, $fulltext ) { |
| 149 | + return ''; |
| 150 | + } |
| 151 | + |
| 152 | + /** |
| 153 | + * Construct the full SQL query to do the search. |
| 154 | + * The guts shoulds be constructed in queryMain() |
| 155 | + * @param string $filteredTerm |
| 156 | + * @param bool $fulltext |
| 157 | + * @private |
| 158 | + */ |
| 159 | + function getQuery( $filteredTerm, $fulltext ) { |
| 160 | + return $this->queryMain( $filteredTerm, $fulltext ) . ' ' . |
| 161 | + $this->queryRedirect() . ' ' . |
| 162 | + $this->queryNamespaces() . ' ' . |
| 163 | + $this->queryRanking( $filteredTerm, $fulltext ) . ' ' . |
| 164 | + $this->queryLimit(); |
| 165 | + } |
| 166 | + |
| 167 | + |
| 168 | + /** |
| 169 | + * Picks which field to index on, depending on what type of query. |
| 170 | + * @param bool $fulltext |
| 171 | + * @return string |
| 172 | + */ |
| 173 | + function getIndexField( $fulltext ) { |
| 174 | + return $fulltext ? 'si_text' : 'si_title'; |
| 175 | + } |
| 176 | + |
| 177 | + /** |
| 178 | + * Get the base part of the search query. |
| 179 | + * The actual match syntax will depend on the server |
| 180 | + * version; MySQL 3 and MySQL 4 have different capabilities |
| 181 | + * in their fulltext search indexes. |
| 182 | + * |
| 183 | + * @param string $filteredTerm |
| 184 | + * @param bool $fulltext |
| 185 | + * @return string |
| 186 | + * @private |
| 187 | + */ |
| 188 | + function queryMain( $filteredTerm, $fulltext ) { |
| 189 | + $match = $this->parseQuery( $filteredTerm, $fulltext ); |
| 190 | + $page = $this->db->tableName( 'page' ); |
| 191 | + $searchindex = $this->db->tableName( 'searchindex' ); |
| 192 | + return 'SELECT page_id, page_namespace, page_title ' . |
| 193 | + "FROM $page,$searchindex " . |
| 194 | + 'WHERE page_id=si_page AND ' . $match; |
| 195 | + } |
| 196 | + |
| 197 | + /** |
| 198 | + * Create or update the search index record for the given page. |
| 199 | + * Title and text should be pre-processed. |
| 200 | + * |
| 201 | + * @param int $id |
| 202 | + * @param string $title |
| 203 | + * @param string $text |
| 204 | + */ |
| 205 | + function update( $id, $title, $text ) { |
| 206 | + $dbw = wfGetDB( DB_MASTER ); |
| 207 | + $dbw->replace( 'searchindex', |
| 208 | + array( 'si_page' ), |
| 209 | + array( |
| 210 | + 'si_page' => $id, |
| 211 | + 'si_title' => $title, |
| 212 | + 'si_text' => $text |
| 213 | + ), __METHOD__ ); |
| 214 | + } |
| 215 | + |
| 216 | + /** |
| 217 | + * Update a search index record's title only. |
| 218 | + * Title should be pre-processed. |
| 219 | + * |
| 220 | + * @param int $id |
| 221 | + * @param string $title |
| 222 | + */ |
| 223 | + function updateTitle( $id, $title ) { |
| 224 | + $dbw = wfGetDB( DB_MASTER ); |
| 225 | + |
| 226 | + $dbw->update( 'searchindex', |
| 227 | + array( 'si_title' => $title ), |
| 228 | + array( 'si_page' => $id ), |
| 229 | + __METHOD__, |
| 230 | + array( $dbw->lowPriorityOption() ) ); |
| 231 | + } |
| 232 | +} |
| 233 | + |
| 234 | +/** |
| 235 | + * @ingroup Search |
| 236 | + */ |
| 237 | +class MySQLSearchResultSet extends SearchResultSet { |
| 238 | + function MySQLSearchResultSet( $resultSet, $terms ) { |
| 239 | + $this->mResultSet = $resultSet; |
| 240 | + $this->mTerms = $terms; |
| 241 | + } |
| 242 | + |
| 243 | + function termMatches() { |
| 244 | + return $this->mTerms; |
| 245 | + } |
| 246 | + |
| 247 | + function numRows() { |
| 248 | + return $this->mResultSet->numRows(); |
| 249 | + } |
| 250 | + |
| 251 | + function next() { |
| 252 | + $row = $this->mResultSet->fetchObject(); |
| 253 | + if( $row === false ) { |
| 254 | + return false; |
| 255 | + } else { |
| 256 | + return new SearchResult( $row ); |
| 257 | + } |
| 258 | + } |
| 259 | + |
| 260 | + function free() { |
| 261 | + $this->mResultSet->free(); |
| 262 | + } |
| 263 | +} |
Property changes on: trunk/phase3/includes/search/MySQL.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 264 | + native |
Index: trunk/phase3/includes/search/Oracle.php |
— | — | @@ -0,0 +1,240 @@ |
| 2 | +<?php |
| 3 | +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> |
| 4 | +# http://www.mediawiki.org/ |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | +# http://www.gnu.org/copyleft/gpl.html |
| 20 | + |
| 21 | +/** |
| 22 | + * @file |
| 23 | + * @ingroup Search |
| 24 | + */ |
| 25 | + |
| 26 | +/** |
| 27 | + * Search engine hook base class for Oracle (ConText). |
| 28 | + * @ingroup Search |
| 29 | + */ |
| 30 | +class SearchOracle extends SearchEngine { |
| 31 | + function __construct($db) { |
| 32 | + $this->db = $db; |
| 33 | + } |
| 34 | + |
| 35 | + /** |
| 36 | + * Perform a full text search query and return a result set. |
| 37 | + * |
| 38 | + * @param string $term - Raw search term |
| 39 | + * @return OracleSearchResultSet |
| 40 | + * @access public |
| 41 | + */ |
| 42 | + function searchText( $term ) { |
| 43 | + $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true))); |
| 44 | + return new OracleSearchResultSet($resultSet, $this->searchTerms); |
| 45 | + } |
| 46 | + |
| 47 | + /** |
| 48 | + * Perform a title-only search query and return a result set. |
| 49 | + * |
| 50 | + * @param string $term - Raw search term |
| 51 | + * @return ORacleSearchResultSet |
| 52 | + * @access public |
| 53 | + */ |
| 54 | + function searchTitle($term) { |
| 55 | + $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false))); |
| 56 | + return new MySQLSearchResultSet($resultSet, $this->searchTerms); |
| 57 | + } |
| 58 | + |
| 59 | + |
| 60 | + /** |
| 61 | + * Return a partial WHERE clause to exclude redirects, if so set |
| 62 | + * @return string |
| 63 | + * @private |
| 64 | + */ |
| 65 | + function queryRedirect() { |
| 66 | + if ($this->showRedirects) { |
| 67 | + return ''; |
| 68 | + } else { |
| 69 | + return 'AND page_is_redirect=0'; |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + /** |
| 74 | + * Return a partial WHERE clause to limit the search to the given namespaces |
| 75 | + * @return string |
| 76 | + * @private |
| 77 | + */ |
| 78 | + function queryNamespaces() { |
| 79 | + if( is_null($this->namespaces) ) |
| 80 | + return ''; |
| 81 | + $namespaces = implode(',', $this->namespaces); |
| 82 | + if ($namespaces == '') { |
| 83 | + $namespaces = '0'; |
| 84 | + } |
| 85 | + return 'AND page_namespace IN (' . $namespaces . ')'; |
| 86 | + } |
| 87 | + |
| 88 | + /** |
| 89 | + * Return a LIMIT clause to limit results on the query. |
| 90 | + * @return string |
| 91 | + * @private |
| 92 | + */ |
| 93 | + function queryLimit($sql) { |
| 94 | + return $this->db->limitResult($sql, $this->limit, $this->offset); |
| 95 | + } |
| 96 | + |
| 97 | + /** |
| 98 | + * Does not do anything for generic search engine |
| 99 | + * subclasses may define this though |
| 100 | + * @return string |
| 101 | + * @private |
| 102 | + */ |
| 103 | + function queryRanking($filteredTerm, $fulltext) { |
| 104 | + return ' ORDER BY score(1)'; |
| 105 | + } |
| 106 | + |
| 107 | + /** |
| 108 | + * Construct the full SQL query to do the search. |
| 109 | + * The guts shoulds be constructed in queryMain() |
| 110 | + * @param string $filteredTerm |
| 111 | + * @param bool $fulltext |
| 112 | + * @private |
| 113 | + */ |
| 114 | + function getQuery( $filteredTerm, $fulltext ) { |
| 115 | + return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' . |
| 116 | + $this->queryRedirect() . ' ' . |
| 117 | + $this->queryNamespaces() . ' ' . |
| 118 | + $this->queryRanking( $filteredTerm, $fulltext ) . ' '); |
| 119 | + } |
| 120 | + |
| 121 | + |
| 122 | + /** |
| 123 | + * Picks which field to index on, depending on what type of query. |
| 124 | + * @param bool $fulltext |
| 125 | + * @return string |
| 126 | + */ |
| 127 | + function getIndexField($fulltext) { |
| 128 | + return $fulltext ? 'si_text' : 'si_title'; |
| 129 | + } |
| 130 | + |
| 131 | + /** |
| 132 | + * Get the base part of the search query. |
| 133 | + * |
| 134 | + * @param string $filteredTerm |
| 135 | + * @param bool $fulltext |
| 136 | + * @return string |
| 137 | + * @private |
| 138 | + */ |
| 139 | + function queryMain( $filteredTerm, $fulltext ) { |
| 140 | + $match = $this->parseQuery($filteredTerm, $fulltext); |
| 141 | + $page = $this->db->tableName('page'); |
| 142 | + $searchindex = $this->db->tableName('searchindex'); |
| 143 | + return 'SELECT page_id, page_namespace, page_title ' . |
| 144 | + "FROM $page,$searchindex " . |
| 145 | + 'WHERE page_id=si_page AND ' . $match; |
| 146 | + } |
| 147 | + |
| 148 | + /** @todo document */ |
| 149 | + function parseQuery($filteredText, $fulltext) { |
| 150 | + global $wgContLang; |
| 151 | + $lc = SearchEngine::legalSearchChars(); |
| 152 | + $this->searchTerms = array(); |
| 153 | + |
| 154 | + # FIXME: This doesn't handle parenthetical expressions. |
| 155 | + $m = array(); |
| 156 | + $q = array(); |
| 157 | + |
| 158 | + if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', |
| 159 | + $filteredText, $m, PREG_SET_ORDER)) { |
| 160 | + foreach($m as $terms) { |
| 161 | + $q[] = $terms[1] . $wgContLang->stripForSearch($terms[2]); |
| 162 | + |
| 163 | + if (!empty($terms[3])) { |
| 164 | + $regexp = preg_quote( $terms[3], '/' ); |
| 165 | + if ($terms[4]) |
| 166 | + $regexp .= "[0-9A-Za-z_]+"; |
| 167 | + } else { |
| 168 | + $regexp = preg_quote(str_replace('"', '', $terms[2]), '/'); |
| 169 | + } |
| 170 | + $this->searchTerms[] = $regexp; |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + $searchon = $this->db->strencode(join(',', $q)); |
| 175 | + $field = $this->getIndexField($fulltext); |
| 176 | + return " CONTAINS($field, '$searchon', 1) > 0 "; |
| 177 | + } |
| 178 | + |
| 179 | + /** |
| 180 | + * Create or update the search index record for the given page. |
| 181 | + * Title and text should be pre-processed. |
| 182 | + * |
| 183 | + * @param int $id |
| 184 | + * @param string $title |
| 185 | + * @param string $text |
| 186 | + */ |
| 187 | + function update($id, $title, $text) { |
| 188 | + $dbw = wfGetDB(DB_MASTER); |
| 189 | + $dbw->replace('searchindex', |
| 190 | + array('si_page'), |
| 191 | + array( |
| 192 | + 'si_page' => $id, |
| 193 | + 'si_title' => $title, |
| 194 | + 'si_text' => $text |
| 195 | + ), 'SearchOracle::update' ); |
| 196 | + $dbw->query("CALL ctx_ddl.sync_index('si_text_idx')"); |
| 197 | + $dbw->query("CALL ctx_ddl.sync_index('si_title_idx')"); |
| 198 | + } |
| 199 | + |
| 200 | + /** |
| 201 | + * Update a search index record's title only. |
| 202 | + * Title should be pre-processed. |
| 203 | + * |
| 204 | + * @param int $id |
| 205 | + * @param string $title |
| 206 | + */ |
| 207 | + function updateTitle($id, $title) { |
| 208 | + $dbw = wfGetDB(DB_MASTER); |
| 209 | + |
| 210 | + $dbw->update('searchindex', |
| 211 | + array('si_title' => $title), |
| 212 | + array('si_page' => $id), |
| 213 | + 'SearchOracle::updateTitle', |
| 214 | + array()); |
| 215 | + } |
| 216 | +} |
| 217 | + |
| 218 | +/** |
| 219 | + * @ingroup Search |
| 220 | + */ |
| 221 | +class OracleSearchResultSet extends SearchResultSet { |
| 222 | + function __construct($resultSet, $terms) { |
| 223 | + $this->mResultSet = $resultSet; |
| 224 | + $this->mTerms = $terms; |
| 225 | + } |
| 226 | + |
| 227 | + function termMatches() { |
| 228 | + return $this->mTerms; |
| 229 | + } |
| 230 | + |
| 231 | + function numRows() { |
| 232 | + return $this->mResultSet->numRows(); |
| 233 | + } |
| 234 | + |
| 235 | + function next() { |
| 236 | + $row = $this->mResultSet->fetchObject(); |
| 237 | + if ($row === false) |
| 238 | + return false; |
| 239 | + return new SearchResult($row); |
| 240 | + } |
| 241 | +} |
Property changes on: trunk/phase3/includes/search/Oracle.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 242 | + native |
Index: trunk/phase3/includes/search/Postgres.php |
— | — | @@ -0,0 +1,255 @@ |
| 2 | +<?php |
| 3 | +# Copyright (C) 2006-2007 Greg Sabino Mullane <greg@turnstep.com> |
| 4 | +# http://www.mediawiki.org/ |
| 5 | +# |
| 6 | +# This program is free software; you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation; either version 2 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# This program is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License along |
| 17 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 18 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | +# http://www.gnu.org/copyleft/gpl.html |
| 20 | + |
| 21 | +/** |
| 22 | + * @file |
| 23 | + * @ingroup Search |
| 24 | + */ |
| 25 | + |
| 26 | +/** |
| 27 | + * Search engine hook base class for Postgres |
| 28 | + * @ingroup Search |
| 29 | + */ |
| 30 | +class SearchPostgres extends SearchEngine { |
| 31 | + |
| 32 | + function SearchPostgres( $db ) { |
| 33 | + $this->db = $db; |
| 34 | + } |
| 35 | + |
| 36 | + /** |
| 37 | + * Perform a full text search query via tsearch2 and return a result set. |
| 38 | + * Currently searches a page's current title (page.page_title) and |
| 39 | + * latest revision article text (pagecontent.old_text) |
| 40 | + * |
| 41 | + * @param string $term - Raw search term |
| 42 | + * @return PostgresSearchResultSet |
| 43 | + * @access public |
| 44 | + */ |
| 45 | + function searchTitle( $term ) { |
| 46 | + $q = $this->searchQuery( $term , 'titlevector', 'page_title' ); |
| 47 | + $olderror = error_reporting(E_ERROR); |
| 48 | + $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); |
| 49 | + error_reporting($olderror); |
| 50 | + if (!$resultSet) { |
| 51 | + // Needed for "Query requires full scan, GIN doesn't support it" |
| 52 | + return new SearchResultTooMany(); |
| 53 | + } |
| 54 | + return new PostgresSearchResultSet( $resultSet, $this->searchTerms ); |
| 55 | + } |
| 56 | + function searchText( $term ) { |
| 57 | + $q = $this->searchQuery( $term, 'textvector', 'old_text' ); |
| 58 | + $olderror = error_reporting(E_ERROR); |
| 59 | + $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); |
| 60 | + error_reporting($olderror); |
| 61 | + if (!$resultSet) { |
| 62 | + return new SearchResultTooMany(); |
| 63 | + } |
| 64 | + return new PostgresSearchResultSet( $resultSet, $this->searchTerms ); |
| 65 | + } |
| 66 | + |
| 67 | + |
| 68 | + /* |
| 69 | + * Transform the user's search string into a better form for tsearch2 |
| 70 | + */ |
| 71 | + function parseQuery( $term ) { |
| 72 | + |
| 73 | + wfDebug( "parseQuery received: $term" ); |
| 74 | + |
| 75 | + ## No backslashes allowed |
| 76 | + $term = preg_replace('/\\\/', '', $term); |
| 77 | + |
| 78 | + ## Collapse parens into nearby words: |
| 79 | + $term = preg_replace('/\s*\(\s*/', ' (', $term); |
| 80 | + $term = preg_replace('/\s*\)\s*/', ') ', $term); |
| 81 | + |
| 82 | + ## Treat colons as word separators: |
| 83 | + $term = preg_replace('/:/', ' ', $term); |
| 84 | + |
| 85 | + $searchstring = ''; |
| 86 | + $m = array(); |
| 87 | + if( preg_match_all('/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) { |
| 88 | + foreach( $m as $terms ) { |
| 89 | + if (strlen($terms[1])) { |
| 90 | + $searchstring .= ' & !'; |
| 91 | + } |
| 92 | + if (strtolower($terms[2]) === 'and') { |
| 93 | + $searchstring .= ' & '; |
| 94 | + } |
| 95 | + else if (strtolower($terms[2]) === 'or' or $terms[2] === '|') { |
| 96 | + $searchstring .= ' | '; |
| 97 | + } |
| 98 | + else if (strtolower($terms[2]) === 'not') { |
| 99 | + $searchstring .= ' & !'; |
| 100 | + } |
| 101 | + else { |
| 102 | + $searchstring .= " & $terms[2]"; |
| 103 | + } |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + ## Strip out leading junk |
| 108 | + $searchstring = preg_replace('/^[\s\&\|]+/', '', $searchstring); |
| 109 | + |
| 110 | + ## Remove any doubled-up operators |
| 111 | + $searchstring = preg_replace('/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring); |
| 112 | + |
| 113 | + ## Remove any non-spaced operators (e.g. "Zounds!") |
| 114 | + $searchstring = preg_replace('/([^ ])[\!\&\|]/', "$1", $searchstring); |
| 115 | + |
| 116 | + ## Remove any trailing whitespace or operators |
| 117 | + $searchstring = preg_replace('/[\s\!\&\|]+$/', '', $searchstring); |
| 118 | + |
| 119 | + ## Remove unnecessary quotes around everything |
| 120 | + $searchstring = preg_replace('/^[\'"](.*)[\'"]$/', "$1", $searchstring); |
| 121 | + |
| 122 | + ## Quote the whole thing |
| 123 | + $searchstring = $this->db->addQuotes($searchstring); |
| 124 | + |
| 125 | + wfDebug( "parseQuery returned: $searchstring" ); |
| 126 | + |
| 127 | + return $searchstring; |
| 128 | + |
| 129 | + } |
| 130 | + |
| 131 | + /** |
| 132 | + * Construct the full SQL query to do the search. |
| 133 | + * @param string $filteredTerm |
| 134 | + * @param string $fulltext |
| 135 | + * @private |
| 136 | + */ |
| 137 | + function searchQuery( $term, $fulltext, $colname ) { |
| 138 | + global $wgDBversion; |
| 139 | + |
| 140 | + if ( !isset( $wgDBversion ) ) { |
| 141 | + $this->db->getServerVersion(); |
| 142 | + $wgDBversion = $this->db->numeric_version; |
| 143 | + } |
| 144 | + $prefix = $wgDBversion < 8.3 ? "'default'," : ''; |
| 145 | + |
| 146 | + $searchstring = $this->parseQuery( $term ); |
| 147 | + |
| 148 | + ## We need a separate query here so gin does not complain about empty searches |
| 149 | + $SQL = "SELECT to_tsquery($prefix $searchstring)"; |
| 150 | + $res = $this->db->doQuery($SQL); |
| 151 | + if (!$res) { |
| 152 | + ## TODO: Better output (example to catch: one 'two) |
| 153 | + die ("Sorry, that was not a valid search string. Please go back and try again"); |
| 154 | + } |
| 155 | + $top = pg_fetch_result($res,0,0); |
| 156 | + |
| 157 | + if ($top === "") { ## e.g. if only stopwords are used XXX return something better |
| 158 | + $query = "SELECT page_id, page_namespace, page_title, 0 AS score ". |
| 159 | + "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . |
| 160 | + "AND r.rev_text_id = c.old_id AND 1=0"; |
| 161 | + } |
| 162 | + else { |
| 163 | + $m = array(); |
| 164 | + if( preg_match_all("/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) { |
| 165 | + foreach( $m as $terms ) { |
| 166 | + $this->searchTerms[$terms[1]] = $terms[1]; |
| 167 | + } |
| 168 | + } |
| 169 | + |
| 170 | + $rankscore = $wgDBversion > 8.2 ? 5 : 1; |
| 171 | + $rank = $wgDBversion < 8.3 ? 'rank' : 'ts_rank'; |
| 172 | + $query = "SELECT page_id, page_namespace, page_title, ". |
| 173 | + "$rank($fulltext, to_tsquery($prefix $searchstring), $rankscore) AS score ". |
| 174 | + "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . |
| 175 | + "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($prefix $searchstring)"; |
| 176 | + } |
| 177 | + |
| 178 | + ## Redirects |
| 179 | + if (! $this->showRedirects) |
| 180 | + $query .= ' AND page_is_redirect = 0'; |
| 181 | + |
| 182 | + ## Namespaces - defaults to 0 |
| 183 | + if( !is_null($this->namespaces) ){ // null -> search all |
| 184 | + if ( count($this->namespaces) < 1) |
| 185 | + $query .= ' AND page_namespace = 0'; |
| 186 | + else { |
| 187 | + $namespaces = implode( ',', $this->namespaces ); |
| 188 | + $query .= " AND page_namespace IN ($namespaces)"; |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + $query .= " ORDER BY score DESC, page_id DESC"; |
| 193 | + |
| 194 | + $query .= $this->db->limitResult( '', $this->limit, $this->offset ); |
| 195 | + |
| 196 | + wfDebug( "searchQuery returned: $query" ); |
| 197 | + |
| 198 | + return $query; |
| 199 | + } |
| 200 | + |
| 201 | + ## Most of the work of these two functions are done automatically via triggers |
| 202 | + |
| 203 | + function update( $pageid, $title, $text ) { |
| 204 | + ## We don't want to index older revisions |
| 205 | + $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id = ". |
| 206 | + "(SELECT rev_text_id FROM revision WHERE rev_page = $pageid ". |
| 207 | + "ORDER BY rev_text_id DESC LIMIT 1 OFFSET 1)"; |
| 208 | + $this->db->doQuery($SQL); |
| 209 | + return true; |
| 210 | + } |
| 211 | + |
| 212 | + function updateTitle( $id, $title ) { |
| 213 | + return true; |
| 214 | + } |
| 215 | + |
| 216 | +} ## end of the SearchPostgres class |
| 217 | + |
| 218 | +/** |
| 219 | + * @ingroup Search |
| 220 | + */ |
| 221 | +class PostgresSearchResult extends SearchResult { |
| 222 | + function PostgresSearchResult( $row ) { |
| 223 | + $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
| 224 | + $this->score = $row->score; |
| 225 | + } |
| 226 | + function getScore() { |
| 227 | + return $this->score; |
| 228 | + } |
| 229 | +} |
| 230 | + |
| 231 | +/** |
| 232 | + * @ingroup Search |
| 233 | + */ |
| 234 | +class PostgresSearchResultSet extends SearchResultSet { |
| 235 | + function PostgresSearchResultSet( $resultSet, $terms ) { |
| 236 | + $this->mResultSet = $resultSet; |
| 237 | + $this->mTerms = $terms; |
| 238 | + } |
| 239 | + |
| 240 | + function termMatches() { |
| 241 | + return $this->mTerms; |
| 242 | + } |
| 243 | + |
| 244 | + function numRows() { |
| 245 | + return $this->mResultSet->numRows(); |
| 246 | + } |
| 247 | + |
| 248 | + function next() { |
| 249 | + $row = $this->mResultSet->fetchObject(); |
| 250 | + if( $row === false ) { |
| 251 | + return false; |
| 252 | + } else { |
| 253 | + return new PostgresSearchResult( $row ); |
| 254 | + } |
| 255 | + } |
| 256 | +} |
Property changes on: trunk/phase3/includes/search/Postgres.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 257 | + native |
Index: trunk/phase3/includes/AutoLoader.php |
— | — | @@ -126,10 +126,8 @@ |
127 | 127 | 'MimeMagic' => 'includes/MimeMagic.php', |
128 | 128 | 'MWException' => 'includes/Exception.php', |
129 | 129 | 'MWNamespace' => 'includes/Namespace.php', |
130 | | - 'MySQLSearchResultSet' => 'includes/SearchMySQL.php', |
131 | 130 | 'Namespace' => 'includes/NamespaceCompat.php', // Compat |
132 | 131 | 'OldChangesList' => 'includes/ChangesList.php', |
133 | | - 'OracleSearchResultSet' => 'includes/SearchOracle.php', |
134 | 132 | 'OutputPage' => 'includes/OutputPage.php', |
135 | 133 | 'PageHistory' => 'includes/PageHistory.php', |
136 | 134 | 'PageHistoryPager' => 'includes/PageHistory.php', |
— | — | @@ -137,8 +135,6 @@ |
138 | 136 | 'Pager' => 'includes/Pager.php', |
139 | 137 | 'PasswordError' => 'includes/User.php', |
140 | 138 | 'PatrolLog' => 'includes/PatrolLog.php', |
141 | | - 'PostgresSearchResult' => 'includes/SearchPostgres.php', |
142 | | - 'PostgresSearchResultSet' => 'includes/SearchPostgres.php', |
143 | 139 | 'PrefixSearch' => 'includes/PrefixSearch.php', |
144 | 140 | 'Profiler' => 'includes/Profiler.php', |
145 | 141 | 'ProfilerSimple' => 'includes/ProfilerSimple.php', |
— | — | @@ -158,18 +154,6 @@ |
159 | 155 | 'Revision' => 'includes/Revision.php', |
160 | 156 | 'RSSFeed' => 'includes/Feed.php', |
161 | 157 | 'Sanitizer' => 'includes/Sanitizer.php', |
162 | | - 'SearchEngineDummy' => 'includes/SearchEngine.php', |
163 | | - 'SearchEngine' => 'includes/SearchEngine.php', |
164 | | - 'SearchHighlighter' => 'includes/SearchEngine.php', |
165 | | - 'SearchMySQL4' => 'includes/SearchMySQL4.php', |
166 | | - 'SearchMySQL' => 'includes/SearchMySQL.php', |
167 | | - 'SearchOracle' => 'includes/SearchOracle.php', |
168 | | - 'SearchPostgres' => 'includes/SearchPostgres.php', |
169 | | - 'SearchResult' => 'includes/SearchEngine.php', |
170 | | - 'SearchResultSet' => 'includes/SearchEngine.php', |
171 | | - 'SearchResultTooMany' => 'includes/SearchEngine.php', |
172 | | - 'SearchUpdate' => 'includes/SearchUpdate.php', |
173 | | - 'SearchUpdateMyISAM' => 'includes/SearchUpdate.php', |
174 | 158 | 'SiteConfiguration' => 'includes/SiteConfiguration.php', |
175 | 159 | 'SiteStats' => 'includes/SiteStats.php', |
176 | 160 | 'SiteStatsUpdate' => 'includes/SiteStats.php', |
— | — | @@ -376,6 +360,24 @@ |
377 | 361 | 'Preprocessor_Hash' => 'includes/parser/Preprocessor_Hash.php', |
378 | 362 | 'StripState' => 'includes/parser/Parser.php', |
379 | 363 | |
| 364 | + # includes/search |
| 365 | + 'OracleSearchResultSet' => 'includes/search/Oracle.php', |
| 366 | + 'PostgresSearchResult' => 'includes/search/Postgres.php', |
| 367 | + 'PostgresSearchResultSet' => 'includes/search/Postgres.php', |
| 368 | + 'MySQLSearchResultSet' => 'includes/Search/MySQL.php', |
| 369 | + 'SearchEngineDummy' => 'includes/search/Engine.php', |
| 370 | + 'SearchEngine' => 'includes/search/Engine.php', |
| 371 | + 'SearchHighlighter' => 'includes/search/Engine.php', |
| 372 | + 'SearchMySQL4' => 'includes/search/MySQL4.php', |
| 373 | + 'SearchMySQL' => 'includes/search/MySQL.php', |
| 374 | + 'SearchOracle' => 'includes/search/Oracle.php', |
| 375 | + 'SearchPostgres' => 'includes/search/Postgres.php', |
| 376 | + 'SearchResult' => 'includes/search/Engine.php', |
| 377 | + 'SearchResultSet' => 'includes/search/Engine.php', |
| 378 | + 'SearchResultTooMany' => 'includes/search/Engine.php', |
| 379 | + 'SearchUpdate' => 'includes/search/Update.php', |
| 380 | + 'SearchUpdateMyISAM' => 'includes/search/Update.php', |
| 381 | + |
380 | 382 | # includes/specials |
381 | 383 | 'AncientPagesPage' => 'includes/specials/Ancientpages.php', |
382 | 384 | 'BrokenRedirectsPage' => 'includes/specials/BrokenRedirects.php', |