Index: trunk/extensions/MWSearch/MWSearch.php |
— | — | @@ -59,8 +59,7 @@ |
60 | 60 | $wgLuceneSearchCacheExpiry = 0; |
61 | 61 | |
62 | 62 | # Not a valid entry point, skip unless MEDIAWIKI is defined |
63 | | -if (defined('MEDIAWIKI')) { |
64 | | -$wgExtensionFunctions[] = "wfLuceneSearch"; |
| 63 | +if( defined('MEDIAWIKI') ){ |
65 | 64 | |
66 | 65 | $wgExtensionCredits['other'][] = array( |
67 | 66 | 'name' => 'MWSearch', |
— | — | @@ -70,719 +69,17 @@ |
71 | 70 | 'descriptionmsg' => 'mwsearch-desc', |
72 | 71 | 'url' => 'http://www.mediawiki.org/wiki/Extension:MWSearch', |
73 | 72 | ); |
74 | | -$wgExtensionMessagesFiles['MWSearch'] = dirname(__FILE__) . '/MWSearch.i18n.php'; |
75 | 73 | |
| 74 | +$dir = dirname(__FILE__) . '/'; |
| 75 | + |
| 76 | +$wgExtensionMessagesFiles['MWSearch'] = $dir . 'MWSearch.i18n.php'; |
| 77 | + |
76 | 78 | if($wgLuceneSearchVersion >= 2.1 && $wgEnableLucenePrefixSearch) |
77 | 79 | $wgHooks['PrefixSearchBackend'][] = 'LuceneSearch::prefixSearch'; |
78 | 80 | |
79 | | -function wfLuceneSearch() { |
| 81 | +$wgAutoloadClasses['LuceneSearch'] = $dir . 'MWSearch_body.php'; |
| 82 | +$wgAutoloadClasses['LuceneResult'] = $dir . 'MWSearch_body.php'; |
| 83 | +$wgAutoloadClasses['LuceneSearchSet'] = $dir . 'MWSearch_body.php'; |
80 | 84 | |
81 | | -require_once( 'search/Engine.php' ); |
82 | | - |
83 | | -class LuceneSearch extends SearchEngine { |
84 | | - /** |
85 | | - * Perform a full text search query and return a result set. |
86 | | - * |
87 | | - * @param string $term - Raw search term |
88 | | - * @return LuceneSearchSet |
89 | | - * @access public |
90 | | - */ |
91 | | - function searchText( $term ) { |
92 | | - return LuceneSearchSet::newFromQuery( isset($this->related)? 'related' : 'search', |
93 | | - $term, $this->namespaces, $this->limit, $this->offset ); |
94 | | - } |
95 | | - |
96 | | - /** |
97 | | - * Perform a title-only search query and return a result set. |
98 | | - * |
99 | | - * @param string $term - Raw search term |
100 | | - * @return LuceneSearchSet |
101 | | - * @access public |
102 | | - */ |
103 | | - function searchTitle( $term ) { |
104 | | - return null; |
105 | | - } |
106 | | - |
107 | | - /** |
108 | | - * PrefixSearchBackend override for OpenSearch results |
109 | | - */ |
110 | | - static function prefixSearch( $ns, $search, $limit, &$results ) { |
111 | | - $it = LuceneSearchSet::newFromQuery( 'prefix', $search, $ns, $limit, 0 ); |
112 | | - $results = array(); |
113 | | - while( $res = $it->next() ) { |
114 | | - $results[] = $res->getTitle()->getPrefixedText(); |
115 | | - } |
116 | | - |
117 | | - return false; |
118 | | - } |
119 | | - |
120 | | - /** |
121 | | - * Prepare query for the lucene-search daemon: |
122 | | - * |
123 | | - * 1) rewrite namespaces into standardized form |
124 | | - * e.g. image:clouds -> [6]:clouds |
125 | | - * e.g. help,wp:npov -> [12,4]:npov |
126 | | - * |
127 | | - * 2) rewrite localizations of "search everything" keyword |
128 | | - * e.g. alle:heidegger -> all:heidegger |
129 | | - * |
130 | | - * @param string query |
131 | | - * @return string rewritten query |
132 | | - * @access private |
133 | | - */ |
134 | | - function replacePrefixes( $query ) { |
135 | | - global $wgContLang, $wgLuceneUseRelated; |
136 | | - $fname = 'LuceneSearch::replacePrefixes'; |
137 | | - wfProfileIn($fname); |
138 | | - $qlen = strlen($query); |
139 | | - $start = 0; $len = 0; // token start pos and length |
140 | | - $rewritten = ''; // rewritten query |
141 | | - $rindex = 0; // point to last rewritten character |
142 | | - $inquotes = false; |
143 | | - |
144 | | - // quick check, most of the time we don't need any rewriting |
145 | | - if(strpos($query,':')===false){ |
146 | | - wfProfileOut($fname); |
147 | | - return $query; |
148 | | - } |
149 | | - |
150 | | - // check if this is query for related articles |
151 | | - $relatedkey = wfMsgForContent('searchrelated').':'; |
152 | | - if($wgLuceneUseRelated && strncmp($query, $relatedkey, strlen($relatedkey)) == 0){ |
153 | | - $this->related = true; |
154 | | - list($dummy,$ret) = explode(":",$query,2); |
155 | | - wfProfileOut($fname); |
156 | | - return trim($ret); |
157 | | - } |
158 | | - |
159 | | - // "search everything" |
160 | | - // might not be at the beginning for complex queries |
161 | | - $allkeyword = wfMsgForContent('searchall'); |
162 | | - |
163 | | - for($i = 0 ; $i < $qlen ; $i++){ |
164 | | - $c = $query[$i]; |
165 | | - |
166 | | - // ignore chars in quotes |
167 | | - if($inquotes && $c!='"'); |
168 | | - // check if $c is valid prefix character |
169 | | - else if(($c >= 'a' && $c <= 'z') || |
170 | | - ($c >= 'A' && $c <= 'Z') || |
171 | | - $c == '_' || $c == '-' || $c ==','){ |
172 | | - if($len == 0){ |
173 | | - $start = $i; // begin of token |
174 | | - $len = 1; |
175 | | - } else |
176 | | - $len++; |
177 | | - // check for utf-8 chars |
178 | | - } else if(($c >= "\xc0" && $c <= "\xff")){ |
179 | | - $utf8len = 1; |
180 | | - for($j = $i+1; $j < $qlen; $j++){ // fetch extra utf-8 bytes |
181 | | - if($query[$j] >= "\x80" && $query[$j] <= "\xbf") |
182 | | - $utf8len++; |
183 | | - else |
184 | | - break; |
185 | | - } |
186 | | - if($len == 0){ |
187 | | - $start = $i; |
188 | | - $len = $utf8len; |
189 | | - } else |
190 | | - $len += $utf8len; |
191 | | - $i = $j - 1; // we consumed the chars |
192 | | - // check for end of prefix (i.e. semicolon) |
193 | | - } else if($c == ':' && $len !=0){ |
194 | | - $rewrite = array(); // here we collect namespaces |
195 | | - $prefixes = explode(',',substr($query,$start,$len)); |
196 | | - // iterate thru comma-separated list of prefixes |
197 | | - foreach($prefixes as $prefix){ |
198 | | - $index = $wgContLang->getNsIndex($prefix); |
199 | | - |
200 | | - // check for special prefixes all/incategory |
201 | | - if($prefix == $allkeyword){ |
202 | | - $rewrite = 'all'; |
203 | | - break; |
204 | | - // check for localized names of namespaces |
205 | | - } else if($index !== false) |
206 | | - $rewrite[] = $index; |
207 | | - } |
208 | | - $translated = null; |
209 | | - if($rewrite === 'all') |
210 | | - $translated = $rewrite; |
211 | | - else if(count($rewrite) != 0) |
212 | | - $translated = '['.implode(',',array_unique($rewrite)).']'; |
213 | | - |
214 | | - if(isset($translated)){ |
215 | | - // append text before the prefix, and then the prefix |
216 | | - $rewritten .= substr($query,$rindex,$start-$rindex); |
217 | | - $rewritten .= $translated . ':'; |
218 | | - $rindex = $i+1; |
219 | | - } |
220 | | - |
221 | | - $len = 0; |
222 | | - } else{ // end of token |
223 | | - if($c == '"') // get in/out of quotes |
224 | | - $inquotes = !$inquotes; |
225 | | - |
226 | | - $len = 0; |
227 | | - } |
228 | | - |
229 | | - } |
230 | | - // add rest of the original query that doesn't need rewritting |
231 | | - $rewritten .= substr($query,$rindex,$qlen-$rindex); |
232 | | - wfProfileOut($fname); |
233 | | - return $rewritten; |
234 | | - } |
235 | | -} |
236 | | - |
237 | | -class LuceneResult extends SearchResult { |
238 | | - /** |
239 | | - * Construct a result object from single result line |
240 | | - * |
241 | | - * @param array $lines |
242 | | - * @return array (float, Title) |
243 | | - * @access private |
244 | | - */ |
245 | | - function LuceneResult( $lines ) { |
246 | | - global $wgContLang; |
247 | | - |
248 | | - $score = null; |
249 | | - $interwiki = null; |
250 | | - $namespace = null; |
251 | | - $title = null; |
252 | | - |
253 | | - $line = $lines['result']; |
254 | | - wfDebug( "Lucene line: '$line'\n" ); |
255 | | - |
256 | | - # detect format |
257 | | - $parts = explode(' ', $line); |
258 | | - if(count($parts) == 3) |
259 | | - list( $score, $namespace, $title ) = $parts; |
260 | | - else |
261 | | - list( $score, $interwiki, $namespace, $nsText, $title ) = $parts; |
262 | | - |
263 | | - $score = floatval( $score ); |
264 | | - $namespace = intval( $namespace ); |
265 | | - $title = urldecode( $title ); |
266 | | - if(!isset($nsText)) |
267 | | - $nsText = $wgContLang->getNsText($namespace); |
268 | | - else |
269 | | - $nsText = urldecode($nsText); |
270 | | - |
271 | | - $this->mInterwiki = ''; |
272 | | - // make title |
273 | | - if( is_null($interwiki)){ |
274 | | - $this->mTitle = Title::makeTitle( $namespace, $title ); |
275 | | - } else{ |
276 | | - $interwiki = urldecode( $interwiki ); |
277 | | - // there might be a better way to make an interwiki link |
278 | | - $t = $interwiki.':'.$nsText.':'.str_replace( '_', ' ', $title ); |
279 | | - $this->mTitle = Title::newFromText( $t ); |
280 | | - $this->mInterwiki = $interwiki; |
281 | | - } |
282 | | - |
283 | | - $this->mScore = $score; |
284 | | - |
285 | | - $this->mWordCount = null; |
286 | | - if(array_key_exists("#h.wordcount",$lines)) |
287 | | - $this->mWordCount = intval($lines["#h.wordcount"][0]); |
288 | | - |
289 | | - $this->mSize = null; |
290 | | - if(array_key_exists("#h.size",$lines)) |
291 | | - $this->mSize = intval($lines["#h.size"][0]); |
292 | | - |
293 | | - $this->mDate = null; |
294 | | - if(array_key_exists("#h.date",$lines)) |
295 | | - $this->mDate = $lines["#h.date"][0]; |
296 | | - |
297 | | - // various snippets |
298 | | - list( $this->mHighlightTitle, $dummy ) = $this->extractSnippet($lines,$nsText,"#h.title"); |
299 | | - if( is_null($this->mHighlightTitle) && $this->isInterwiki() ){ |
300 | | - // construct highlighted interwiki title without the interwiki part |
301 | | - $this->mHighlightTitle = ($nsText==''? '' : $nsText.':') . str_replace( '_', ' ', $title ); |
302 | | - } |
303 | | - |
304 | | - list( $this->mHighlightText, $dummy ) = $this->extractSnippet($lines,'',"#h.text",true); |
305 | | - |
306 | | - list( $this->mHighlightRedirect, $redirect ) = $this->extractSnippet($lines,$nsText,"#h.redirect"); |
307 | | - $this->mRedirectTitle = null; |
308 | | - if( !is_null($redirect)){ |
309 | | - # build redirect Title object |
310 | | - if($interwiki != ''){ |
311 | | - $t = $interwiki.':'.$redirect; |
312 | | - $this->mRedirectTitle = Title::newFromText( $t ); |
313 | | - } else{ |
314 | | - $parts = explode(":",$redirect,2); |
315 | | - $redirectNs = intval($parts[0]); |
316 | | - $redirectText = str_replace('_', ' ', $parts[1]); |
317 | | - $this->mRedirectTitle = Title::makeTitle($redirectNs,$redirectText); |
318 | | - } |
319 | | - } |
320 | | - |
321 | | - list( $this->mHighlightSection, $section) = $this->extractSnippet($lines,'',"#h.section"); |
322 | | - $this->mSectionTitle = null; |
323 | | - if( !is_null($section)){ |
324 | | - # build title + fragment Title object |
325 | | - $t = $nsText.':'.str_replace( '_', ' ', $title ).'#'.$section; |
326 | | - $this->mSectionTitle = Title::newFromText($t); |
327 | | - } |
328 | | - |
329 | | - if($this->mInterwiki == '') |
330 | | - $this->mRevision = Revision::newFromTitle( $this->mTitle ); |
331 | | - } |
332 | | - |
333 | | - /** |
334 | | - * Get the pair [highlighted snippet, unmodified text] for highlighted text |
335 | | - * |
336 | | - * @param string $lines |
337 | | - * @param string $nsText textual form of namespace |
338 | | - * @param string $type |
339 | | - * @param boolean $useFinalSeparator |
340 | | - * @return array (highlighted, unmodified text) |
341 | | - */ |
342 | | - function extractSnippet($lines, $nsText, $type, $useFinalSeparator=false){ |
343 | | - if(!array_key_exists($type,$lines)) |
344 | | - return array(null,null); |
345 | | - $ret = ""; |
346 | | - $original = null; |
347 | | - foreach($lines[$type] as $h){ |
348 | | - list($s,$o) = $this->extractSnippetLine($h,$useFinalSeparator); |
349 | | - $ret .= $s; |
350 | | - $original = $o; |
351 | | - } |
352 | | - if($nsText!='') |
353 | | - $ret = $nsText.':'.$ret; |
354 | | - return array($ret,$original); |
355 | | - } |
356 | | - |
357 | | - /** |
358 | | - * Parse one line of a snippet |
359 | | - * |
360 | | - * @param string $line |
361 | | - * @param boolean $useFinalSeparator if "..." is to be appended to the end of snippet |
362 | | - * @access protected |
363 | | - * @return array(snippet,unmodified text) |
364 | | - */ |
365 | | - function extractSnippetLine($line, $useFinalSeparator){ |
366 | | - $parts = explode(" ",$line); |
367 | | - if(count($parts)!=4 && count($parts)!=5){ |
368 | | - wfDebug("Bad result line:".$line."\n"); |
369 | | - return ""; |
370 | | - } |
371 | | - $splits = $this->stripBracketsSplit($parts[0]); |
372 | | - $highlight = $this->stripBracketsSplit($parts[1]); |
373 | | - $suffix = urldecode($this->stripBrackets($parts[2])); |
374 | | - $text = urldecode($parts[3]); |
375 | | - $original = null; |
376 | | - if(count($parts) > 4) |
377 | | - $original = urldecode($parts[4]); |
378 | | - |
379 | | - $splits[] = strlen($text); |
380 | | - $start = 0; |
381 | | - $snippet = ""; |
382 | | - $hi = 0; |
383 | | - |
384 | | - foreach($splits as $sp){ |
385 | | - $sp = intval($sp); |
386 | | - // highlight words! |
387 | | - while($hi < count($highlight) && intval($highlight[$hi]) < $sp){ |
388 | | - $s = intval($highlight[$hi]); |
389 | | - $e = intval($highlight[$hi+1]); |
390 | | - $snippet .= substr($text,$start,$s-$start)."<span class='searchmatch'>".substr($text,$s,$e-$s)."</span>"; |
391 | | - $start = $e; |
392 | | - $hi += 2; |
393 | | - } |
394 | | - // copy till split point |
395 | | - $snippet .= substr($text,$start,$sp-$start); |
396 | | - if($sp == strlen($text) && $suffix != '') |
397 | | - $snippet .= $suffix; |
398 | | - else if($useFinalSeparator) |
399 | | - $snippet .= " <b>...</b> "; |
400 | | - |
401 | | - $start = $sp; |
402 | | - } |
403 | | - return array($snippet,$original); |
404 | | - } |
405 | | - |
406 | | - |
407 | | - /** |
408 | | - * @access private |
409 | | - */ |
410 | | - function stripBrackets($str){ |
411 | | - if($str == '[]') |
412 | | - return ''; |
413 | | - return substr($str,1,strlen($str)-2); |
414 | | - } |
415 | | - |
416 | | - /** |
417 | | - * @access private |
418 | | - * @return array |
419 | | - */ |
420 | | - function stripBracketsSplit($str){ |
421 | | - $strip = $this->stripBrackets($str); |
422 | | - if($strip == '') |
423 | | - return array(); |
424 | | - else |
425 | | - return explode(",",$strip); |
426 | | - } |
427 | | - |
428 | | - function getTitle() { |
429 | | - return $this->mTitle; |
430 | | - } |
431 | | - |
432 | | - function getScore() { |
433 | | - return null; // lucene scores are meaningless to the user... |
434 | | - } |
435 | | - |
436 | | - function getTitleSnippet($terms){ |
437 | | - if( is_null($this->mHighlightTitle) ) |
438 | | - return ''; |
439 | | - return $this->mHighlightTitle; |
440 | | - } |
441 | | - |
442 | | - function getTextSnippet($terms) { |
443 | | - if( is_null($this->mHighlightText) ) |
444 | | - return parent::getTextSnippet($terms); |
445 | | - return $this->mHighlightText; |
446 | | - } |
447 | | - |
448 | | - function getRedirectSnippet($terms) { |
449 | | - if( is_null($this->mHighlightRedirect) ) |
450 | | - return ''; |
451 | | - return $this->mHighlightRedirect; |
452 | | - } |
453 | | - |
454 | | - function getRedirectTitle(){ |
455 | | - return $this->mRedirectTitle; |
456 | | - } |
457 | | - |
458 | | - function getSectionSnippet(){ |
459 | | - if( is_null($this->mHighlightSection) ) |
460 | | - return ''; |
461 | | - return $this->mHighlightSection; |
462 | | - } |
463 | | - |
464 | | - function getSectionTitle(){ |
465 | | - return $this->mSectionTitle; |
466 | | - } |
467 | | - |
468 | | - function getInterwikiPrefix(){ |
469 | | - return $this->mInterwiki; |
470 | | - } |
471 | | - |
472 | | - function isInterwiki(){ |
473 | | - return $this->mInterwiki != ''; |
474 | | - } |
475 | | - |
476 | | - function getTimestamp(){ |
477 | | - if( is_null($this->mDate) ) |
478 | | - return parent::getTimestamp(); |
479 | | - return $this->mDate; |
480 | | - } |
481 | | - |
482 | | - function getWordCount(){ |
483 | | - if( is_null($this->mWordCount) ) |
484 | | - return parent::getWordCount(); |
485 | | - return $this->mWordCount; |
486 | | - } |
487 | | - |
488 | | - function getByteSize(){ |
489 | | - if( is_null($this->mSize) ) |
490 | | - return parent::getByteSize(); |
491 | | - return $this->mSize; |
492 | | - } |
493 | | - |
494 | | - function hasRelated(){ |
495 | | - global $wgLuceneSearchVersion, $wgLuceneUseRelated; |
496 | | - return $wgLuceneSearchVersion >= 2.1 && $wgLuceneUseRelated; |
497 | | - } |
498 | | -} |
499 | | - |
500 | | -class LuceneSearchSet extends SearchResultSet { |
501 | | - /** |
502 | | - * Contact the MWDaemon search server and return a wrapper |
503 | | - * object with the set of results. Results may be cached. |
504 | | - * |
505 | | - * @param string $method The protocol verb to use |
506 | | - * @param string $query |
507 | | - * @param int $limit |
508 | | - * @return array |
509 | | - * @access public |
510 | | - * @static |
511 | | - */ |
512 | | - function newFromQuery( $method, $query, $namespaces = array(), $limit = 20, $offset = 0 ) { |
513 | | - $fname = 'LuceneSearchSet::newFromQuery'; |
514 | | - wfProfileIn( $fname ); |
515 | | - |
516 | | - global $wgLuceneHost, $wgLucenePort, $wgDBname, $wgMemc; |
517 | | - global $wgLuceneSearchVersion, $wgLuceneSearchCacheExpiry; |
518 | | - |
519 | | - if( is_array( $wgLuceneHost ) ) { |
520 | | - $pick = mt_rand( 0, count( $wgLuceneHost ) - 1 ); |
521 | | - $host = $wgLuceneHost[$pick]; |
522 | | - } else { |
523 | | - $host = $wgLuceneHost; |
524 | | - } |
525 | | - |
526 | | - $enctext = rawurlencode( trim( $query ) ); |
527 | | - $searchUrl = "http://$host:$wgLucenePort/$method/$wgDBname/$enctext?" . |
528 | | - wfArrayToCGI( array( |
529 | | - 'namespaces' => implode( ',', $namespaces ), |
530 | | - 'offset' => $offset, |
531 | | - 'limit' => $limit, |
532 | | - 'version' => $wgLuceneSearchVersion, |
533 | | - 'iwlimit' => 10, |
534 | | - ) ); |
535 | | - |
536 | | - // try to fetch cached if caching is turned on |
537 | | - if($wgLuceneSearchCacheExpiry > 0){ |
538 | | - $key = "$wgDBname:lucene:" . md5( $searchUrl ); |
539 | | - $resultSet = $wgMemc->get( $key ); |
540 | | - if( is_object( $resultSet ) ) { |
541 | | - wfDebug( "$fname: got cached lucene results for key $key\n" ); |
542 | | - wfProfileOut( $fname ); |
543 | | - return $resultSet; |
544 | | - } |
545 | | - } |
546 | | - |
547 | | - wfDebug( "Fetching search data from $searchUrl\n" ); |
548 | | - wfSuppressWarnings(); |
549 | | - wfProfileIn( $fname.'-contact-'.$host ); |
550 | | - $data = Http::get( $searchUrl ); |
551 | | - wfProfileOut( $fname.'-contact-'.$host ); |
552 | | - wfRestoreWarnings(); |
553 | | - if( $data === false ) { |
554 | | - // Network error or server error |
555 | | - wfProfileOut( $fname ); |
556 | | - return null; |
557 | | - } else { |
558 | | - $inputLines = explode( "\n", trim( $data ) ); |
559 | | - $resultLines = array_map( 'trim', $inputLines ); |
560 | | - } |
561 | | - |
562 | | - $suggestion = null; |
563 | | - $totalHits = null; |
564 | | - $info = null; |
565 | | - $interwiki = null; |
566 | | - |
567 | | - # All methods have same syntax... |
568 | | - $totalHits = array_shift( $resultLines ); |
569 | | - if( $totalHits === false ) { |
570 | | - # I/O error? this shouldn't happen |
571 | | - wfDebug( "Couldn't read summary line...\n" ); |
572 | | - } else { |
573 | | - $totalHits = intval( $totalHits ); |
574 | | - wfDebug( "total [$totalHits] hits\n" ); |
575 | | - if($wgLuceneSearchVersion >= 2.1){ |
576 | | - # second line is info |
577 | | - list($dummy,$info) = explode(' ',array_shift($resultLines),2); |
578 | | - # third line is suggestions |
579 | | - $s = array_shift($resultLines); |
580 | | - if(self::startsWith($s,'#suggest ')) |
581 | | - $suggestion = $s; |
582 | | - |
583 | | - # fifth line is interwiki info line |
584 | | - $iwHeading = array_shift($resultLines); |
585 | | - list($dummy,$iwCount,$iwTotal) = explode(' ',$iwHeading); |
586 | | - if($iwCount>0){ |
587 | | - # pack interwiki lines into a separate result set |
588 | | - $interwikiLen = 0; |
589 | | - while(!self::startsWith($resultLines[$interwikiLen],"#results")) |
590 | | - $interwikiLen++; |
591 | | - $interwikiLines = array_splice($resultLines,0,$interwikiLen); |
592 | | - $interwiki = new LuceneSearchSet( $query, $interwikiLines, intval($iwCount), intval($iwTotal) ); |
593 | | - } |
594 | | - |
595 | | - # how many results we got |
596 | | - list($dummy,$resultCount) = explode(" ",array_shift($resultLines)); |
597 | | - $resultCount = intval($resultCount); |
598 | | - } else{ |
599 | | - $resultCount = count($resultLines); |
600 | | - } |
601 | | - } |
602 | | - |
603 | | - |
604 | | - $resultSet = new LuceneSearchSet( $query, $resultLines, $resultCount, $totalHits, |
605 | | - $suggestion, $info, $interwiki ); |
606 | | - |
607 | | - if($wgLuceneSearchCacheExpiry > 0){ |
608 | | - wfDebug( "$fname: caching lucene results for key $key\n" ); |
609 | | - $wgMemc->add( $key, $resultSet, $wgLuceneSearchCacheExpiry ); |
610 | | - } |
611 | | - |
612 | | - wfProfileOut( $fname ); |
613 | | - return $resultSet; |
614 | | - } |
615 | | - |
616 | | - static function startsWith($source, $prefix){ |
617 | | - return strncmp($source, $prefix, strlen($prefix)) == 0; |
618 | | - } |
619 | | - |
620 | | - /** |
621 | | - * Private constructor. Use LuceneSearchSet::newFromQuery(). |
622 | | - * |
623 | | - * @param string $query |
624 | | - * @param array $lines |
625 | | - * @param int $resultCount |
626 | | - * @param int $totalHits |
627 | | - * @param string $suggestion |
628 | | - * @param string $info |
629 | | - * @access private |
630 | | - */ |
631 | | - function LuceneSearchSet( $query, $lines, $resultCount, $totalHits = null, $suggestion = null, $info = null, $interwiki = null ) { |
632 | | - $this->mQuery = $query; |
633 | | - $this->mTotalHits = $totalHits; |
634 | | - $this->mResults = $lines; |
635 | | - $this->mResultCount = $resultCount; |
636 | | - $this->mPos = 0; |
637 | | - $this->mSuggestionQuery = null; |
638 | | - $this->mSuggestionSnippet = ''; |
639 | | - $this->parseSuggestion($suggestion); |
640 | | - $this->mInfo = $info; |
641 | | - $this->mInterwiki = $interwiki; |
642 | | - } |
643 | | - |
644 | | - /** Get suggestions from a suggestion result line */ |
645 | | - function parseSuggestion($suggestion){ |
646 | | - if( is_null($suggestion) ) |
647 | | - return; |
648 | | - // parse split points and highlight changes |
649 | | - list($dummy,$points,$sug) = explode(" ",$suggestion); |
650 | | - $sug = urldecode($sug); |
651 | | - $points = explode(",",substr($points,1,-1)); |
652 | | - array_unshift($points,0); |
653 | | - $suggestText = ""; |
654 | | - for($i=1;$i<count($points);$i+=2){ |
655 | | - $suggestText .= substr($sug,$points[$i-1],$points[$i]-$points[$i-1]); |
656 | | - $suggestText .= "<i>".substr($sug,$points[$i],$points[$i+1]-$points[$i])."</i>"; |
657 | | - } |
658 | | - $suggestText .= substr($sug,end($points)); |
659 | | - |
660 | | - $this->mSuggestionQuery = $this->replaceGenericPrefixes($sug); |
661 | | - $this->mSuggestionSnippet = $this->replaceGenericPrefixes($suggestText); |
662 | | - } |
663 | | - |
664 | | - /** replace prefixes like [2]: that are not in phrases */ |
665 | | - function replaceGenericPrefixes($text){ |
666 | | - $out = ""; |
667 | | - $phrases = explode('"',$text); |
668 | | - for($i=0;$i<count($phrases);$i+=2){ |
669 | | - $out .= preg_replace_callback('/\[([0-9]+)\]:/', array($this,'genericPrefixCallback'), $phrases[$i]); |
670 | | - if($i+1 < count($phrases)) |
671 | | - $out .= '"'.$phrases[$i+1].'"'; // phrase text |
672 | | - } |
673 | | - return $out; |
674 | | - } |
675 | | - |
676 | | - function genericPrefixCallback($matches){ |
677 | | - global $wgContLang; |
678 | | - return $wgContLang->getFormattedNsText($matches[1]).":"; |
679 | | - } |
680 | | - |
681 | | - function numRows() { |
682 | | - return $this->mResultCount; |
683 | | - } |
684 | | - |
685 | | - function termMatches() { |
686 | | - $resq = preg_replace( "/\\[.*?\\]:/", " ", $this->mQuery ); # generic prefixes |
687 | | - $resq = preg_replace( "/all:/", " ", $resq ); |
688 | | - $resq = trim( preg_replace( "/[ |\\[\\]()\"{}+\\-_@!?%&*=\\|:;><,.\\/]+/", " ", $resq ) ); |
689 | | - $terms = array_map( array( &$this, 'regexQuote' ), |
690 | | - explode( ' ', $resq ) ); |
691 | | - return $terms; |
692 | | - } |
693 | | - |
694 | | - /** |
695 | | - * Stupid hack around PHP's limited lambda support |
696 | | - * @access private |
697 | | - */ |
698 | | - function regexQuote( $term ) { |
699 | | - return preg_quote( $term, '/' ); |
700 | | - } |
701 | | - |
702 | | - function hasResults() { |
703 | | - return count( $this->mResults ) > 0; |
704 | | - } |
705 | | - |
706 | | - /** |
707 | | - * Some search modes return a total hit count for the query |
708 | | - * in the entire article database. This may include pages |
709 | | - * in namespaces that would not be matched on the given |
710 | | - * settings. |
711 | | - * |
712 | | - * @return int |
713 | | - * @access public |
714 | | - */ |
715 | | - function getTotalHits() { |
716 | | - return $this->mTotalHits; |
717 | | - } |
718 | | - |
719 | | - /** |
720 | | - * Return information about how and from where the results were fetched, |
721 | | - * should be useful for diagnostics and debugging |
722 | | - * |
723 | | - * @return string |
724 | | - */ |
725 | | - function getInfo() { |
726 | | - if( is_null($this->mInfo) ) |
727 | | - return null; |
728 | | - return "Search results fetched via ".$this->mInfo; |
729 | | - } |
730 | | - |
731 | | - /** |
732 | | - * Return a result set of hits on other (multiple) wikis associated with this one |
733 | | - * |
734 | | - * @return SearchResultSet |
735 | | - */ |
736 | | - function getInterwikiResults() { |
737 | | - return $this->mInterwiki; |
738 | | - } |
739 | | - |
740 | | - /** |
741 | | - * Some search modes return a suggested alternate term if there are |
742 | | - * no exact hits. Returns true if there is one on this set. |
743 | | - * |
744 | | - * @return bool |
745 | | - * @access public |
746 | | - */ |
747 | | - function hasSuggestion() { |
748 | | - return is_string( $this->mSuggestionQuery ) && $this->mSuggestionQuery != ''; |
749 | | - } |
750 | | - |
751 | | - function getSuggestionQuery(){ |
752 | | - return $this->mSuggestionQuery; |
753 | | - } |
754 | | - |
755 | | - function getSuggestionSnippet(){ |
756 | | - return $this->mSuggestionSnippet; |
757 | | - } |
758 | | - |
759 | | - /** |
760 | | - * Fetches next search result, or false. |
761 | | - * @return LuceneResult |
762 | | - * @access public |
763 | | - * @abstract |
764 | | - */ |
765 | | - function next() { |
766 | | - # Group together lines belonging to one hit |
767 | | - $group = array(); |
768 | | - |
769 | | - for(;$this->mPos < count($this->mResults);$this->mPos++){ |
770 | | - $l = trim($this->mResults[$this->mPos]); |
771 | | - if(count($group) == 0) // main line |
772 | | - $group['result'] = $l; |
773 | | - else if($l[0] == '#'){ // additional meta |
774 | | - list($meta,$value) = explode(" ",$l,2); |
775 | | - $group[$meta][] = $value; |
776 | | - } else |
777 | | - break; |
778 | | - } |
779 | | - if($group == false) |
780 | | - return false; |
781 | | - else |
782 | | - return new LuceneResult( $group ); |
783 | | - } |
784 | | - |
785 | | -} |
786 | | - |
787 | | -} # End of extension function |
788 | 85 | } # End of invocation guard |
789 | 86 | |
Index: trunk/extensions/MWSearch/MWSearch_body.php |
— | — | @@ -0,0 +1,704 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +class LuceneSearch extends SearchEngine { |
| 5 | + /** |
| 6 | + * Perform a full text search query and return a result set. |
| 7 | + * |
| 8 | + * @param string $term - Raw search term |
| 9 | + * @return LuceneSearchSet |
| 10 | + * @access public |
| 11 | + */ |
| 12 | + function searchText( $term ) { |
| 13 | + return LuceneSearchSet::newFromQuery( isset($this->related)? 'related' : 'search', |
| 14 | + $term, $this->namespaces, $this->limit, $this->offset ); |
| 15 | + } |
| 16 | + |
| 17 | + /** |
| 18 | + * Perform a title-only search query and return a result set. |
| 19 | + * |
| 20 | + * @param string $term - Raw search term |
| 21 | + * @return LuceneSearchSet |
| 22 | + * @access public |
| 23 | + */ |
| 24 | + function searchTitle( $term ) { |
| 25 | + return null; |
| 26 | + } |
| 27 | + |
| 28 | + /** |
| 29 | + * PrefixSearchBackend override for OpenSearch results |
| 30 | + */ |
| 31 | + static function prefixSearch( $ns, $search, $limit, &$results ) { |
| 32 | + $it = LuceneSearchSet::newFromQuery( 'prefix', $search, $ns, $limit, 0 ); |
| 33 | + $results = array(); |
| 34 | + while( $res = $it->next() ) { |
| 35 | + $results[] = $res->getTitle()->getPrefixedText(); |
| 36 | + } |
| 37 | + |
| 38 | + return false; |
| 39 | + } |
| 40 | + |
| 41 | + /** |
| 42 | + * Prepare query for the lucene-search daemon: |
| 43 | + * |
| 44 | + * 1) rewrite namespaces into standardized form |
| 45 | + * e.g. image:clouds -> [6]:clouds |
| 46 | + * e.g. help,wp:npov -> [12,4]:npov |
| 47 | + * |
| 48 | + * 2) rewrite localizations of "search everything" keyword |
| 49 | + * e.g. alle:heidegger -> all:heidegger |
| 50 | + * |
| 51 | + * @param string query |
| 52 | + * @return string rewritten query |
| 53 | + * @access private |
| 54 | + */ |
| 55 | + function replacePrefixes( $query ) { |
| 56 | + global $wgContLang, $wgLuceneUseRelated; |
| 57 | + $fname = 'LuceneSearch::replacePrefixes'; |
| 58 | + wfProfileIn($fname); |
| 59 | + $qlen = strlen($query); |
| 60 | + $start = 0; $len = 0; // token start pos and length |
| 61 | + $rewritten = ''; // rewritten query |
| 62 | + $rindex = 0; // point to last rewritten character |
| 63 | + $inquotes = false; |
| 64 | + |
| 65 | + // quick check, most of the time we don't need any rewriting |
| 66 | + if(strpos($query,':')===false){ |
| 67 | + wfProfileOut($fname); |
| 68 | + return $query; |
| 69 | + } |
| 70 | + |
| 71 | + // check if this is query for related articles |
| 72 | + $relatedkey = wfMsgForContent('searchrelated').':'; |
| 73 | + if($wgLuceneUseRelated && strncmp($query, $relatedkey, strlen($relatedkey)) == 0){ |
| 74 | + $this->related = true; |
| 75 | + list($dummy,$ret) = explode(":",$query,2); |
| 76 | + wfProfileOut($fname); |
| 77 | + return trim($ret); |
| 78 | + } |
| 79 | + |
| 80 | + // "search everything" |
| 81 | + // might not be at the beginning for complex queries |
| 82 | + $allkeyword = wfMsgForContent('searchall'); |
| 83 | + |
| 84 | + for($i = 0 ; $i < $qlen ; $i++){ |
| 85 | + $c = $query[$i]; |
| 86 | + |
| 87 | + // ignore chars in quotes |
| 88 | + if($inquotes && $c!='"'); |
| 89 | + // check if $c is valid prefix character |
| 90 | + else if(($c >= 'a' && $c <= 'z') || |
| 91 | + ($c >= 'A' && $c <= 'Z') || |
| 92 | + $c == '_' || $c == '-' || $c ==','){ |
| 93 | + if($len == 0){ |
| 94 | + $start = $i; // begin of token |
| 95 | + $len = 1; |
| 96 | + } else |
| 97 | + $len++; |
| 98 | + // check for utf-8 chars |
| 99 | + } else if(($c >= "\xc0" && $c <= "\xff")){ |
| 100 | + $utf8len = 1; |
| 101 | + for($j = $i+1; $j < $qlen; $j++){ // fetch extra utf-8 bytes |
| 102 | + if($query[$j] >= "\x80" && $query[$j] <= "\xbf") |
| 103 | + $utf8len++; |
| 104 | + else |
| 105 | + break; |
| 106 | + } |
| 107 | + if($len == 0){ |
| 108 | + $start = $i; |
| 109 | + $len = $utf8len; |
| 110 | + } else |
| 111 | + $len += $utf8len; |
| 112 | + $i = $j - 1; // we consumed the chars |
| 113 | + // check for end of prefix (i.e. semicolon) |
| 114 | + } else if($c == ':' && $len !=0){ |
| 115 | + $rewrite = array(); // here we collect namespaces |
| 116 | + $prefixes = explode(',',substr($query,$start,$len)); |
| 117 | + // iterate thru comma-separated list of prefixes |
| 118 | + foreach($prefixes as $prefix){ |
| 119 | + $index = $wgContLang->getNsIndex($prefix); |
| 120 | + |
| 121 | + // check for special prefixes all/incategory |
| 122 | + if($prefix == $allkeyword){ |
| 123 | + $rewrite = 'all'; |
| 124 | + break; |
| 125 | + // check for localized names of namespaces |
| 126 | + } else if($index !== false) |
| 127 | + $rewrite[] = $index; |
| 128 | + } |
| 129 | + $translated = null; |
| 130 | + if($rewrite === 'all') |
| 131 | + $translated = $rewrite; |
| 132 | + else if(count($rewrite) != 0) |
| 133 | + $translated = '['.implode(',',array_unique($rewrite)).']'; |
| 134 | + |
| 135 | + if(isset($translated)){ |
| 136 | + // append text before the prefix, and then the prefix |
| 137 | + $rewritten .= substr($query,$rindex,$start-$rindex); |
| 138 | + $rewritten .= $translated . ':'; |
| 139 | + $rindex = $i+1; |
| 140 | + } |
| 141 | + |
| 142 | + $len = 0; |
| 143 | + } else{ // end of token |
| 144 | + if($c == '"') // get in/out of quotes |
| 145 | + $inquotes = !$inquotes; |
| 146 | + |
| 147 | + $len = 0; |
| 148 | + } |
| 149 | + |
| 150 | + } |
| 151 | + // add rest of the original query that doesn't need rewritting |
| 152 | + $rewritten .= substr($query,$rindex,$qlen-$rindex); |
| 153 | + wfProfileOut($fname); |
| 154 | + return $rewritten; |
| 155 | + } |
| 156 | +} |
| 157 | + |
| 158 | +class LuceneResult extends SearchResult { |
| 159 | + /** |
| 160 | + * Construct a result object from single result line |
| 161 | + * |
| 162 | + * @param array $lines |
| 163 | + * @return array (float, Title) |
| 164 | + * @access private |
| 165 | + */ |
| 166 | + function LuceneResult( $lines ) { |
| 167 | + global $wgContLang; |
| 168 | + |
| 169 | + $score = null; |
| 170 | + $interwiki = null; |
| 171 | + $namespace = null; |
| 172 | + $title = null; |
| 173 | + |
| 174 | + $line = $lines['result']; |
| 175 | + wfDebug( "Lucene line: '$line'\n" ); |
| 176 | + |
| 177 | + # detect format |
| 178 | + $parts = explode(' ', $line); |
| 179 | + if(count($parts) == 3) |
| 180 | + list( $score, $namespace, $title ) = $parts; |
| 181 | + else |
| 182 | + list( $score, $interwiki, $namespace, $nsText, $title ) = $parts; |
| 183 | + |
| 184 | + $score = floatval( $score ); |
| 185 | + $namespace = intval( $namespace ); |
| 186 | + $title = urldecode( $title ); |
| 187 | + if(!isset($nsText)) |
| 188 | + $nsText = $wgContLang->getNsText($namespace); |
| 189 | + else |
| 190 | + $nsText = urldecode($nsText); |
| 191 | + |
| 192 | + $this->mInterwiki = ''; |
| 193 | + // make title |
| 194 | + if( is_null($interwiki)){ |
| 195 | + $this->mTitle = Title::makeTitle( $namespace, $title ); |
| 196 | + } else{ |
| 197 | + $interwiki = urldecode( $interwiki ); |
| 198 | + // there might be a better way to make an interwiki link |
| 199 | + $t = $interwiki.':'.$nsText.':'.str_replace( '_', ' ', $title ); |
| 200 | + $this->mTitle = Title::newFromText( $t ); |
| 201 | + $this->mInterwiki = $interwiki; |
| 202 | + } |
| 203 | + |
| 204 | + $this->mScore = $score; |
| 205 | + |
| 206 | + $this->mWordCount = null; |
| 207 | + if(array_key_exists("#h.wordcount",$lines)) |
| 208 | + $this->mWordCount = intval($lines["#h.wordcount"][0]); |
| 209 | + |
| 210 | + $this->mSize = null; |
| 211 | + if(array_key_exists("#h.size",$lines)) |
| 212 | + $this->mSize = intval($lines["#h.size"][0]); |
| 213 | + |
| 214 | + $this->mDate = null; |
| 215 | + if(array_key_exists("#h.date",$lines)) |
| 216 | + $this->mDate = $lines["#h.date"][0]; |
| 217 | + |
| 218 | + // various snippets |
| 219 | + list( $this->mHighlightTitle, $dummy ) = $this->extractSnippet($lines,$nsText,"#h.title"); |
| 220 | + if( is_null($this->mHighlightTitle) && $this->isInterwiki() ){ |
| 221 | + // construct highlighted interwiki title without the interwiki part |
| 222 | + $this->mHighlightTitle = ($nsText==''? '' : $nsText.':') . str_replace( '_', ' ', $title ); |
| 223 | + } |
| 224 | + |
| 225 | + list( $this->mHighlightText, $dummy ) = $this->extractSnippet($lines,'',"#h.text",true); |
| 226 | + |
| 227 | + list( $this->mHighlightRedirect, $redirect ) = $this->extractSnippet($lines,$nsText,"#h.redirect"); |
| 228 | + $this->mRedirectTitle = null; |
| 229 | + if( !is_null($redirect)){ |
| 230 | + # build redirect Title object |
| 231 | + if($interwiki != ''){ |
| 232 | + $t = $interwiki.':'.$redirect; |
| 233 | + $this->mRedirectTitle = Title::newFromText( $t ); |
| 234 | + } else{ |
| 235 | + $parts = explode(":",$redirect,2); |
| 236 | + $redirectNs = intval($parts[0]); |
| 237 | + $redirectText = str_replace('_', ' ', $parts[1]); |
| 238 | + $this->mRedirectTitle = Title::makeTitle($redirectNs,$redirectText); |
| 239 | + } |
| 240 | + } |
| 241 | + |
| 242 | + list( $this->mHighlightSection, $section) = $this->extractSnippet($lines,'',"#h.section"); |
| 243 | + $this->mSectionTitle = null; |
| 244 | + if( !is_null($section)){ |
| 245 | + # build title + fragment Title object |
| 246 | + $t = $nsText.':'.str_replace( '_', ' ', $title ).'#'.$section; |
| 247 | + $this->mSectionTitle = Title::newFromText($t); |
| 248 | + } |
| 249 | + |
| 250 | + if($this->mInterwiki == '') |
| 251 | + $this->mRevision = Revision::newFromTitle( $this->mTitle ); |
| 252 | + } |
| 253 | + |
| 254 | + /** |
| 255 | + * Get the pair [highlighted snippet, unmodified text] for highlighted text |
| 256 | + * |
| 257 | + * @param string $lines |
| 258 | + * @param string $nsText textual form of namespace |
| 259 | + * @param string $type |
| 260 | + * @param boolean $useFinalSeparator |
| 261 | + * @return array (highlighted, unmodified text) |
| 262 | + */ |
| 263 | + function extractSnippet($lines, $nsText, $type, $useFinalSeparator=false){ |
| 264 | + if(!array_key_exists($type,$lines)) |
| 265 | + return array(null,null); |
| 266 | + $ret = ""; |
| 267 | + $original = null; |
| 268 | + foreach($lines[$type] as $h){ |
| 269 | + list($s,$o) = $this->extractSnippetLine($h,$useFinalSeparator); |
| 270 | + $ret .= $s; |
| 271 | + $original = $o; |
| 272 | + } |
| 273 | + if($nsText!='') |
| 274 | + $ret = $nsText.':'.$ret; |
| 275 | + return array($ret,$original); |
| 276 | + } |
| 277 | + |
| 278 | + /** |
| 279 | + * Parse one line of a snippet |
| 280 | + * |
| 281 | + * @param string $line |
| 282 | + * @param boolean $useFinalSeparator if "..." is to be appended to the end of snippet |
| 283 | + * @access protected |
| 284 | + * @return array(snippet,unmodified text) |
| 285 | + */ |
| 286 | + function extractSnippetLine($line, $useFinalSeparator){ |
| 287 | + $parts = explode(" ",$line); |
| 288 | + if(count($parts)!=4 && count($parts)!=5){ |
| 289 | + wfDebug("Bad result line:".$line."\n"); |
| 290 | + return ""; |
| 291 | + } |
| 292 | + $splits = $this->stripBracketsSplit($parts[0]); |
| 293 | + $highlight = $this->stripBracketsSplit($parts[1]); |
| 294 | + $suffix = urldecode($this->stripBrackets($parts[2])); |
| 295 | + $text = urldecode($parts[3]); |
| 296 | + $original = null; |
| 297 | + if(count($parts) > 4) |
| 298 | + $original = urldecode($parts[4]); |
| 299 | + |
| 300 | + $splits[] = strlen($text); |
| 301 | + $start = 0; |
| 302 | + $snippet = ""; |
| 303 | + $hi = 0; |
| 304 | + |
| 305 | + foreach($splits as $sp){ |
| 306 | + $sp = intval($sp); |
| 307 | + // highlight words! |
| 308 | + while($hi < count($highlight) && intval($highlight[$hi]) < $sp){ |
| 309 | + $s = intval($highlight[$hi]); |
| 310 | + $e = intval($highlight[$hi+1]); |
| 311 | + $snippet .= substr($text,$start,$s-$start)."<span class='searchmatch'>".substr($text,$s,$e-$s)."</span>"; |
| 312 | + $start = $e; |
| 313 | + $hi += 2; |
| 314 | + } |
| 315 | + // copy till split point |
| 316 | + $snippet .= substr($text,$start,$sp-$start); |
| 317 | + if($sp == strlen($text) && $suffix != '') |
| 318 | + $snippet .= $suffix; |
| 319 | + else if($useFinalSeparator) |
| 320 | + $snippet .= " <b>...</b> "; |
| 321 | + |
| 322 | + $start = $sp; |
| 323 | + } |
| 324 | + return array($snippet,$original); |
| 325 | + } |
| 326 | + |
| 327 | + |
| 328 | + /** |
| 329 | + * @access private |
| 330 | + */ |
| 331 | + function stripBrackets($str){ |
| 332 | + if($str == '[]') |
| 333 | + return ''; |
| 334 | + return substr($str,1,strlen($str)-2); |
| 335 | + } |
| 336 | + |
| 337 | + /** |
| 338 | + * @access private |
| 339 | + * @return array |
| 340 | + */ |
| 341 | + function stripBracketsSplit($str){ |
| 342 | + $strip = $this->stripBrackets($str); |
| 343 | + if($strip == '') |
| 344 | + return array(); |
| 345 | + else |
| 346 | + return explode(",",$strip); |
| 347 | + } |
| 348 | + |
| 349 | + function getTitle() { |
| 350 | + return $this->mTitle; |
| 351 | + } |
| 352 | + |
| 353 | + function getScore() { |
| 354 | + return null; // lucene scores are meaningless to the user... |
| 355 | + } |
| 356 | + |
| 357 | + function getTitleSnippet($terms){ |
| 358 | + if( is_null($this->mHighlightTitle) ) |
| 359 | + return ''; |
| 360 | + return $this->mHighlightTitle; |
| 361 | + } |
| 362 | + |
| 363 | + function getTextSnippet($terms) { |
| 364 | + if( is_null($this->mHighlightText) ) |
| 365 | + return parent::getTextSnippet($terms); |
| 366 | + return $this->mHighlightText; |
| 367 | + } |
| 368 | + |
| 369 | + function getRedirectSnippet($terms) { |
| 370 | + if( is_null($this->mHighlightRedirect) ) |
| 371 | + return ''; |
| 372 | + return $this->mHighlightRedirect; |
| 373 | + } |
| 374 | + |
| 375 | + function getRedirectTitle(){ |
| 376 | + return $this->mRedirectTitle; |
| 377 | + } |
| 378 | + |
| 379 | + function getSectionSnippet(){ |
| 380 | + if( is_null($this->mHighlightSection) ) |
| 381 | + return ''; |
| 382 | + return $this->mHighlightSection; |
| 383 | + } |
| 384 | + |
| 385 | + function getSectionTitle(){ |
| 386 | + return $this->mSectionTitle; |
| 387 | + } |
| 388 | + |
| 389 | + function getInterwikiPrefix(){ |
| 390 | + return $this->mInterwiki; |
| 391 | + } |
| 392 | + |
| 393 | + function isInterwiki(){ |
| 394 | + return $this->mInterwiki != ''; |
| 395 | + } |
| 396 | + |
| 397 | + function getTimestamp(){ |
| 398 | + if( is_null($this->mDate) ) |
| 399 | + return parent::getTimestamp(); |
| 400 | + return $this->mDate; |
| 401 | + } |
| 402 | + |
| 403 | + function getWordCount(){ |
| 404 | + if( is_null($this->mWordCount) ) |
| 405 | + return parent::getWordCount(); |
| 406 | + return $this->mWordCount; |
| 407 | + } |
| 408 | + |
| 409 | + function getByteSize(){ |
| 410 | + if( is_null($this->mSize) ) |
| 411 | + return parent::getByteSize(); |
| 412 | + return $this->mSize; |
| 413 | + } |
| 414 | + |
| 415 | + function hasRelated(){ |
| 416 | + global $wgLuceneSearchVersion, $wgLuceneUseRelated; |
| 417 | + return $wgLuceneSearchVersion >= 2.1 && $wgLuceneUseRelated; |
| 418 | + } |
| 419 | +} |
| 420 | + |
| 421 | +class LuceneSearchSet extends SearchResultSet { |
| 422 | + /** |
| 423 | + * Contact the MWDaemon search server and return a wrapper |
| 424 | + * object with the set of results. Results may be cached. |
| 425 | + * |
| 426 | + * @param string $method The protocol verb to use |
| 427 | + * @param string $query |
| 428 | + * @param int $limit |
| 429 | + * @return array |
| 430 | + * @access public |
| 431 | + */ |
| 432 | + static function newFromQuery( $method, $query, $namespaces = array(), $limit = 20, $offset = 0 ) { |
| 433 | + $fname = 'LuceneSearchSet::newFromQuery'; |
| 434 | + wfProfileIn( $fname ); |
| 435 | + |
| 436 | + global $wgLuceneHost, $wgLucenePort, $wgDBname, $wgMemc; |
| 437 | + global $wgLuceneSearchVersion, $wgLuceneSearchCacheExpiry; |
| 438 | + |
| 439 | + if( is_array( $wgLuceneHost ) ) { |
| 440 | + $pick = mt_rand( 0, count( $wgLuceneHost ) - 1 ); |
| 441 | + $host = $wgLuceneHost[$pick]; |
| 442 | + } else { |
| 443 | + $host = $wgLuceneHost; |
| 444 | + } |
| 445 | + |
| 446 | + $enctext = rawurlencode( trim( $query ) ); |
| 447 | + $searchUrl = "http://$host:$wgLucenePort/$method/$wgDBname/$enctext?" . |
| 448 | + wfArrayToCGI( array( |
| 449 | + 'namespaces' => implode( ',', $namespaces ), |
| 450 | + 'offset' => $offset, |
| 451 | + 'limit' => $limit, |
| 452 | + 'version' => $wgLuceneSearchVersion, |
| 453 | + 'iwlimit' => 10, |
| 454 | + ) ); |
| 455 | + |
| 456 | + // try to fetch cached if caching is turned on |
| 457 | + if($wgLuceneSearchCacheExpiry > 0){ |
| 458 | + $key = "$wgDBname:lucene:" . md5( $searchUrl ); |
| 459 | + $resultSet = $wgMemc->get( $key ); |
| 460 | + if( is_object( $resultSet ) ) { |
| 461 | + wfDebug( "$fname: got cached lucene results for key $key\n" ); |
| 462 | + wfProfileOut( $fname ); |
| 463 | + return $resultSet; |
| 464 | + } |
| 465 | + } |
| 466 | + |
| 467 | + wfDebug( "Fetching search data from $searchUrl\n" ); |
| 468 | + wfSuppressWarnings(); |
| 469 | + wfProfileIn( $fname.'-contact-'.$host ); |
| 470 | + $data = Http::get( $searchUrl ); |
| 471 | + wfProfileOut( $fname.'-contact-'.$host ); |
| 472 | + wfRestoreWarnings(); |
| 473 | + if( $data === false ) { |
| 474 | + // Network error or server error |
| 475 | + wfProfileOut( $fname ); |
| 476 | + return null; |
| 477 | + } else { |
| 478 | + $inputLines = explode( "\n", trim( $data ) ); |
| 479 | + $resultLines = array_map( 'trim', $inputLines ); |
| 480 | + } |
| 481 | + |
| 482 | + $suggestion = null; |
| 483 | + $totalHits = null; |
| 484 | + $info = null; |
| 485 | + $interwiki = null; |
| 486 | + |
| 487 | + # All methods have same syntax... |
| 488 | + $totalHits = array_shift( $resultLines ); |
| 489 | + if( $totalHits === false ) { |
| 490 | + # I/O error? this shouldn't happen |
| 491 | + wfDebug( "Couldn't read summary line...\n" ); |
| 492 | + } else { |
| 493 | + $totalHits = intval( $totalHits ); |
| 494 | + wfDebug( "total [$totalHits] hits\n" ); |
| 495 | + if($wgLuceneSearchVersion >= 2.1){ |
| 496 | + # second line is info |
| 497 | + list($dummy,$info) = explode(' ',array_shift($resultLines),2); |
| 498 | + # third line is suggestions |
| 499 | + $s = array_shift($resultLines); |
| 500 | + if(self::startsWith($s,'#suggest ')) |
| 501 | + $suggestion = $s; |
| 502 | + |
| 503 | + # fifth line is interwiki info line |
| 504 | + $iwHeading = array_shift($resultLines); |
| 505 | + list($dummy,$iwCount,$iwTotal) = explode(' ',$iwHeading); |
| 506 | + if($iwCount>0){ |
| 507 | + # pack interwiki lines into a separate result set |
| 508 | + $interwikiLen = 0; |
| 509 | + while(!self::startsWith($resultLines[$interwikiLen],"#results")) |
| 510 | + $interwikiLen++; |
| 511 | + $interwikiLines = array_splice($resultLines,0,$interwikiLen); |
| 512 | + $interwiki = new LuceneSearchSet( $query, $interwikiLines, intval($iwCount), intval($iwTotal) ); |
| 513 | + } |
| 514 | + |
| 515 | + # how many results we got |
| 516 | + list($dummy,$resultCount) = explode(" ",array_shift($resultLines)); |
| 517 | + $resultCount = intval($resultCount); |
| 518 | + } else{ |
| 519 | + $resultCount = count($resultLines); |
| 520 | + } |
| 521 | + } |
| 522 | + |
| 523 | + |
| 524 | + $resultSet = new LuceneSearchSet( $query, $resultLines, $resultCount, $totalHits, |
| 525 | + $suggestion, $info, $interwiki ); |
| 526 | + |
| 527 | + if($wgLuceneSearchCacheExpiry > 0){ |
| 528 | + wfDebug( "$fname: caching lucene results for key $key\n" ); |
| 529 | + $wgMemc->add( $key, $resultSet, $wgLuceneSearchCacheExpiry ); |
| 530 | + } |
| 531 | + |
| 532 | + wfProfileOut( $fname ); |
| 533 | + return $resultSet; |
| 534 | + } |
| 535 | + |
| 536 | + static function startsWith($source, $prefix){ |
| 537 | + return strncmp($source, $prefix, strlen($prefix)) == 0; |
| 538 | + } |
| 539 | + |
| 540 | + /** |
| 541 | + * Private constructor. Use LuceneSearchSet::newFromQuery(). |
| 542 | + * |
| 543 | + * @param string $query |
| 544 | + * @param array $lines |
| 545 | + * @param int $resultCount |
| 546 | + * @param int $totalHits |
| 547 | + * @param string $suggestion |
| 548 | + * @param string $info |
| 549 | + * @access private |
| 550 | + */ |
| 551 | + function LuceneSearchSet( $query, $lines, $resultCount, $totalHits = null, $suggestion = null, $info = null, $interwiki = null ) { |
| 552 | + $this->mQuery = $query; |
| 553 | + $this->mTotalHits = $totalHits; |
| 554 | + $this->mResults = $lines; |
| 555 | + $this->mResultCount = $resultCount; |
| 556 | + $this->mPos = 0; |
| 557 | + $this->mSuggestionQuery = null; |
| 558 | + $this->mSuggestionSnippet = ''; |
| 559 | + $this->parseSuggestion($suggestion); |
| 560 | + $this->mInfo = $info; |
| 561 | + $this->mInterwiki = $interwiki; |
| 562 | + } |
| 563 | + |
| 564 | + /** Get suggestions from a suggestion result line */ |
| 565 | + function parseSuggestion($suggestion){ |
| 566 | + if( is_null($suggestion) ) |
| 567 | + return; |
| 568 | + // parse split points and highlight changes |
| 569 | + list($dummy,$points,$sug) = explode(" ",$suggestion); |
| 570 | + $sug = urldecode($sug); |
| 571 | + $points = explode(",",substr($points,1,-1)); |
| 572 | + array_unshift($points,0); |
| 573 | + $suggestText = ""; |
| 574 | + for($i=1;$i<count($points);$i+=2){ |
| 575 | + $suggestText .= substr($sug,$points[$i-1],$points[$i]-$points[$i-1]); |
| 576 | + $suggestText .= "<i>".substr($sug,$points[$i],$points[$i+1]-$points[$i])."</i>"; |
| 577 | + } |
| 578 | + $suggestText .= substr($sug,end($points)); |
| 579 | + |
| 580 | + $this->mSuggestionQuery = $this->replaceGenericPrefixes($sug); |
| 581 | + $this->mSuggestionSnippet = $this->replaceGenericPrefixes($suggestText); |
| 582 | + } |
| 583 | + |
| 584 | + /** replace prefixes like [2]: that are not in phrases */ |
| 585 | + function replaceGenericPrefixes($text){ |
| 586 | + $out = ""; |
| 587 | + $phrases = explode('"',$text); |
| 588 | + for($i=0;$i<count($phrases);$i+=2){ |
| 589 | + $out .= preg_replace_callback('/\[([0-9]+)\]:/', array($this,'genericPrefixCallback'), $phrases[$i]); |
| 590 | + if($i+1 < count($phrases)) |
| 591 | + $out .= '"'.$phrases[$i+1].'"'; // phrase text |
| 592 | + } |
| 593 | + return $out; |
| 594 | + } |
| 595 | + |
| 596 | + function genericPrefixCallback($matches){ |
| 597 | + global $wgContLang; |
| 598 | + return $wgContLang->getFormattedNsText($matches[1]).":"; |
| 599 | + } |
| 600 | + |
| 601 | + function numRows() { |
| 602 | + return $this->mResultCount; |
| 603 | + } |
| 604 | + |
| 605 | + function termMatches() { |
| 606 | + $resq = preg_replace( "/\\[.*?\\]:/", " ", $this->mQuery ); # generic prefixes |
| 607 | + $resq = preg_replace( "/all:/", " ", $resq ); |
| 608 | + $resq = trim( preg_replace( "/[ |\\[\\]()\"{}+\\-_@!?%&*=\\|:;><,.\\/]+/", " ", $resq ) ); |
| 609 | + $terms = array_map( array( &$this, 'regexQuote' ), |
| 610 | + explode( ' ', $resq ) ); |
| 611 | + return $terms; |
| 612 | + } |
| 613 | + |
| 614 | + /** |
| 615 | + * Stupid hack around PHP's limited lambda support |
| 616 | + * @access private |
| 617 | + */ |
| 618 | + function regexQuote( $term ) { |
| 619 | + return preg_quote( $term, '/' ); |
| 620 | + } |
| 621 | + |
| 622 | + function hasResults() { |
| 623 | + return count( $this->mResults ) > 0; |
| 624 | + } |
| 625 | + |
| 626 | + /** |
| 627 | + * Some search modes return a total hit count for the query |
| 628 | + * in the entire article database. This may include pages |
| 629 | + * in namespaces that would not be matched on the given |
| 630 | + * settings. |
| 631 | + * |
| 632 | + * @return int |
| 633 | + * @access public |
| 634 | + */ |
| 635 | + function getTotalHits() { |
| 636 | + return $this->mTotalHits; |
| 637 | + } |
| 638 | + |
| 639 | + /** |
| 640 | + * Return information about how and from where the results were fetched, |
| 641 | + * should be useful for diagnostics and debugging |
| 642 | + * |
| 643 | + * @return string |
| 644 | + */ |
| 645 | + function getInfo() { |
| 646 | + if( is_null($this->mInfo) ) |
| 647 | + return null; |
| 648 | + return "Search results fetched via ".$this->mInfo; |
| 649 | + } |
| 650 | + |
| 651 | + /** |
| 652 | + * Return a result set of hits on other (multiple) wikis associated with this one |
| 653 | + * |
| 654 | + * @return SearchResultSet |
| 655 | + */ |
| 656 | + function getInterwikiResults() { |
| 657 | + return $this->mInterwiki; |
| 658 | + } |
| 659 | + |
| 660 | + /** |
| 661 | + * Some search modes return a suggested alternate term if there are |
| 662 | + * no exact hits. Returns true if there is one on this set. |
| 663 | + * |
| 664 | + * @return bool |
| 665 | + * @access public |
| 666 | + */ |
| 667 | + function hasSuggestion() { |
| 668 | + return is_string( $this->mSuggestionQuery ) && $this->mSuggestionQuery != ''; |
| 669 | + } |
| 670 | + |
| 671 | + function getSuggestionQuery(){ |
| 672 | + return $this->mSuggestionQuery; |
| 673 | + } |
| 674 | + |
| 675 | + function getSuggestionSnippet(){ |
| 676 | + return $this->mSuggestionSnippet; |
| 677 | + } |
| 678 | + |
| 679 | + /** |
| 680 | + * Fetches next search result, or false. |
| 681 | + * @return LuceneResult |
| 682 | + * @access public |
| 683 | + * @abstract |
| 684 | + */ |
| 685 | + function next() { |
| 686 | + # Group together lines belonging to one hit |
| 687 | + $group = array(); |
| 688 | + |
| 689 | + for(;$this->mPos < count($this->mResults);$this->mPos++){ |
| 690 | + $l = trim($this->mResults[$this->mPos]); |
| 691 | + if(count($group) == 0) // main line |
| 692 | + $group['result'] = $l; |
| 693 | + else if($l[0] == '#'){ // additional meta |
| 694 | + list($meta,$value) = explode(" ",$l,2); |
| 695 | + $group[$meta][] = $value; |
| 696 | + } else |
| 697 | + break; |
| 698 | + } |
| 699 | + if($group == false) |
| 700 | + return false; |
| 701 | + else |
| 702 | + return new LuceneResult( $group ); |
| 703 | + } |
| 704 | + |
| 705 | +} |
Property changes on: trunk/extensions/MWSearch/MWSearch_body.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 706 | + native |