r36405 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r36404‎ | r36405 | r36406 >
Date:21:37, 17 June 2008
Author:ialex
Status:old
Tags:
Comment:
Moved classes defined in wfLuceneSearch() and put them in MWSearch_body.php
Modified paths:
  • /trunk/extensions/MWSearch/MWSearch.php (modified) (history)
  • /trunk/extensions/MWSearch/MWSearch_body.php (added) (history)

Diff [purge]

Index: trunk/extensions/MWSearch/MWSearch.php
@@ -59,8 +59,7 @@
6060 $wgLuceneSearchCacheExpiry = 0;
6161
6262 # Not a valid entry point, skip unless MEDIAWIKI is defined
63 -if (defined('MEDIAWIKI')) {
64 -$wgExtensionFunctions[] = "wfLuceneSearch";
 63+if( defined('MEDIAWIKI') ){
6564
6665 $wgExtensionCredits['other'][] = array(
6766 'name' => 'MWSearch',
@@ -70,719 +69,17 @@
7170 'descriptionmsg' => 'mwsearch-desc',
7271 'url' => 'http://www.mediawiki.org/wiki/Extension:MWSearch',
7372 );
74 -$wgExtensionMessagesFiles['MWSearch'] = dirname(__FILE__) . '/MWSearch.i18n.php';
7573
 74+$dir = dirname(__FILE__) . '/';
 75+
 76+$wgExtensionMessagesFiles['MWSearch'] = $dir . 'MWSearch.i18n.php';
 77+
7678 if($wgLuceneSearchVersion >= 2.1 && $wgEnableLucenePrefixSearch)
7779 $wgHooks['PrefixSearchBackend'][] = 'LuceneSearch::prefixSearch';
7880
79 -function wfLuceneSearch() {
 81+$wgAutoloadClasses['LuceneSearch'] = $dir . 'MWSearch_body.php';
 82+$wgAutoloadClasses['LuceneResult'] = $dir . 'MWSearch_body.php';
 83+$wgAutoloadClasses['LuceneSearchSet'] = $dir . 'MWSearch_body.php';
8084
81 -require_once( 'search/Engine.php' );
82 -
83 -class LuceneSearch extends SearchEngine {
84 - /**
85 - * Perform a full text search query and return a result set.
86 - *
87 - * @param string $term - Raw search term
88 - * @return LuceneSearchSet
89 - * @access public
90 - */
91 - function searchText( $term ) {
92 - return LuceneSearchSet::newFromQuery( isset($this->related)? 'related' : 'search',
93 - $term, $this->namespaces, $this->limit, $this->offset );
94 - }
95 -
96 - /**
97 - * Perform a title-only search query and return a result set.
98 - *
99 - * @param string $term - Raw search term
100 - * @return LuceneSearchSet
101 - * @access public
102 - */
103 - function searchTitle( $term ) {
104 - return null;
105 - }
106 -
107 - /**
108 - * PrefixSearchBackend override for OpenSearch results
109 - */
110 - static function prefixSearch( $ns, $search, $limit, &$results ) {
111 - $it = LuceneSearchSet::newFromQuery( 'prefix', $search, $ns, $limit, 0 );
112 - $results = array();
113 - while( $res = $it->next() ) {
114 - $results[] = $res->getTitle()->getPrefixedText();
115 - }
116 -
117 - return false;
118 - }
119 -
120 - /**
121 - * Prepare query for the lucene-search daemon:
122 - *
123 - * 1) rewrite namespaces into standardized form
124 - * e.g. image:clouds -> [6]:clouds
125 - * e.g. help,wp:npov -> [12,4]:npov
126 - *
127 - * 2) rewrite localizations of "search everything" keyword
128 - * e.g. alle:heidegger -> all:heidegger
129 - *
130 - * @param string query
131 - * @return string rewritten query
132 - * @access private
133 - */
134 - function replacePrefixes( $query ) {
135 - global $wgContLang, $wgLuceneUseRelated;
136 - $fname = 'LuceneSearch::replacePrefixes';
137 - wfProfileIn($fname);
138 - $qlen = strlen($query);
139 - $start = 0; $len = 0; // token start pos and length
140 - $rewritten = ''; // rewritten query
141 - $rindex = 0; // point to last rewritten character
142 - $inquotes = false;
143 -
144 - // quick check, most of the time we don't need any rewriting
145 - if(strpos($query,':')===false){
146 - wfProfileOut($fname);
147 - return $query;
148 - }
149 -
150 - // check if this is query for related articles
151 - $relatedkey = wfMsgForContent('searchrelated').':';
152 - if($wgLuceneUseRelated && strncmp($query, $relatedkey, strlen($relatedkey)) == 0){
153 - $this->related = true;
154 - list($dummy,$ret) = explode(":",$query,2);
155 - wfProfileOut($fname);
156 - return trim($ret);
157 - }
158 -
159 - // "search everything"
160 - // might not be at the beginning for complex queries
161 - $allkeyword = wfMsgForContent('searchall');
162 -
163 - for($i = 0 ; $i < $qlen ; $i++){
164 - $c = $query[$i];
165 -
166 - // ignore chars in quotes
167 - if($inquotes && $c!='"');
168 - // check if $c is valid prefix character
169 - else if(($c >= 'a' && $c <= 'z') ||
170 - ($c >= 'A' && $c <= 'Z') ||
171 - $c == '_' || $c == '-' || $c ==','){
172 - if($len == 0){
173 - $start = $i; // begin of token
174 - $len = 1;
175 - } else
176 - $len++;
177 - // check for utf-8 chars
178 - } else if(($c >= "\xc0" && $c <= "\xff")){
179 - $utf8len = 1;
180 - for($j = $i+1; $j < $qlen; $j++){ // fetch extra utf-8 bytes
181 - if($query[$j] >= "\x80" && $query[$j] <= "\xbf")
182 - $utf8len++;
183 - else
184 - break;
185 - }
186 - if($len == 0){
187 - $start = $i;
188 - $len = $utf8len;
189 - } else
190 - $len += $utf8len;
191 - $i = $j - 1; // we consumed the chars
192 - // check for end of prefix (i.e. semicolon)
193 - } else if($c == ':' && $len !=0){
194 - $rewrite = array(); // here we collect namespaces
195 - $prefixes = explode(',',substr($query,$start,$len));
196 - // iterate thru comma-separated list of prefixes
197 - foreach($prefixes as $prefix){
198 - $index = $wgContLang->getNsIndex($prefix);
199 -
200 - // check for special prefixes all/incategory
201 - if($prefix == $allkeyword){
202 - $rewrite = 'all';
203 - break;
204 - // check for localized names of namespaces
205 - } else if($index !== false)
206 - $rewrite[] = $index;
207 - }
208 - $translated = null;
209 - if($rewrite === 'all')
210 - $translated = $rewrite;
211 - else if(count($rewrite) != 0)
212 - $translated = '['.implode(',',array_unique($rewrite)).']';
213 -
214 - if(isset($translated)){
215 - // append text before the prefix, and then the prefix
216 - $rewritten .= substr($query,$rindex,$start-$rindex);
217 - $rewritten .= $translated . ':';
218 - $rindex = $i+1;
219 - }
220 -
221 - $len = 0;
222 - } else{ // end of token
223 - if($c == '"') // get in/out of quotes
224 - $inquotes = !$inquotes;
225 -
226 - $len = 0;
227 - }
228 -
229 - }
230 - // add rest of the original query that doesn't need rewritting
231 - $rewritten .= substr($query,$rindex,$qlen-$rindex);
232 - wfProfileOut($fname);
233 - return $rewritten;
234 - }
235 -}
236 -
237 -class LuceneResult extends SearchResult {
238 - /**
239 - * Construct a result object from single result line
240 - *
241 - * @param array $lines
242 - * @return array (float, Title)
243 - * @access private
244 - */
245 - function LuceneResult( $lines ) {
246 - global $wgContLang;
247 -
248 - $score = null;
249 - $interwiki = null;
250 - $namespace = null;
251 - $title = null;
252 -
253 - $line = $lines['result'];
254 - wfDebug( "Lucene line: '$line'\n" );
255 -
256 - # detect format
257 - $parts = explode(' ', $line);
258 - if(count($parts) == 3)
259 - list( $score, $namespace, $title ) = $parts;
260 - else
261 - list( $score, $interwiki, $namespace, $nsText, $title ) = $parts;
262 -
263 - $score = floatval( $score );
264 - $namespace = intval( $namespace );
265 - $title = urldecode( $title );
266 - if(!isset($nsText))
267 - $nsText = $wgContLang->getNsText($namespace);
268 - else
269 - $nsText = urldecode($nsText);
270 -
271 - $this->mInterwiki = '';
272 - // make title
273 - if( is_null($interwiki)){
274 - $this->mTitle = Title::makeTitle( $namespace, $title );
275 - } else{
276 - $interwiki = urldecode( $interwiki );
277 - // there might be a better way to make an interwiki link
278 - $t = $interwiki.':'.$nsText.':'.str_replace( '_', ' ', $title );
279 - $this->mTitle = Title::newFromText( $t );
280 - $this->mInterwiki = $interwiki;
281 - }
282 -
283 - $this->mScore = $score;
284 -
285 - $this->mWordCount = null;
286 - if(array_key_exists("#h.wordcount",$lines))
287 - $this->mWordCount = intval($lines["#h.wordcount"][0]);
288 -
289 - $this->mSize = null;
290 - if(array_key_exists("#h.size",$lines))
291 - $this->mSize = intval($lines["#h.size"][0]);
292 -
293 - $this->mDate = null;
294 - if(array_key_exists("#h.date",$lines))
295 - $this->mDate = $lines["#h.date"][0];
296 -
297 - // various snippets
298 - list( $this->mHighlightTitle, $dummy ) = $this->extractSnippet($lines,$nsText,"#h.title");
299 - if( is_null($this->mHighlightTitle) && $this->isInterwiki() ){
300 - // construct highlighted interwiki title without the interwiki part
301 - $this->mHighlightTitle = ($nsText==''? '' : $nsText.':') . str_replace( '_', ' ', $title );
302 - }
303 -
304 - list( $this->mHighlightText, $dummy ) = $this->extractSnippet($lines,'',"#h.text",true);
305 -
306 - list( $this->mHighlightRedirect, $redirect ) = $this->extractSnippet($lines,$nsText,"#h.redirect");
307 - $this->mRedirectTitle = null;
308 - if( !is_null($redirect)){
309 - # build redirect Title object
310 - if($interwiki != ''){
311 - $t = $interwiki.':'.$redirect;
312 - $this->mRedirectTitle = Title::newFromText( $t );
313 - } else{
314 - $parts = explode(":",$redirect,2);
315 - $redirectNs = intval($parts[0]);
316 - $redirectText = str_replace('_', ' ', $parts[1]);
317 - $this->mRedirectTitle = Title::makeTitle($redirectNs,$redirectText);
318 - }
319 - }
320 -
321 - list( $this->mHighlightSection, $section) = $this->extractSnippet($lines,'',"#h.section");
322 - $this->mSectionTitle = null;
323 - if( !is_null($section)){
324 - # build title + fragment Title object
325 - $t = $nsText.':'.str_replace( '_', ' ', $title ).'#'.$section;
326 - $this->mSectionTitle = Title::newFromText($t);
327 - }
328 -
329 - if($this->mInterwiki == '')
330 - $this->mRevision = Revision::newFromTitle( $this->mTitle );
331 - }
332 -
333 - /**
334 - * Get the pair [highlighted snippet, unmodified text] for highlighted text
335 - *
336 - * @param string $lines
337 - * @param string $nsText textual form of namespace
338 - * @param string $type
339 - * @param boolean $useFinalSeparator
340 - * @return array (highlighted, unmodified text)
341 - */
342 - function extractSnippet($lines, $nsText, $type, $useFinalSeparator=false){
343 - if(!array_key_exists($type,$lines))
344 - return array(null,null);
345 - $ret = "";
346 - $original = null;
347 - foreach($lines[$type] as $h){
348 - list($s,$o) = $this->extractSnippetLine($h,$useFinalSeparator);
349 - $ret .= $s;
350 - $original = $o;
351 - }
352 - if($nsText!='')
353 - $ret = $nsText.':'.$ret;
354 - return array($ret,$original);
355 - }
356 -
357 - /**
358 - * Parse one line of a snippet
359 - *
360 - * @param string $line
361 - * @param boolean $useFinalSeparator if "..." is to be appended to the end of snippet
362 - * @access protected
363 - * @return array(snippet,unmodified text)
364 - */
365 - function extractSnippetLine($line, $useFinalSeparator){
366 - $parts = explode(" ",$line);
367 - if(count($parts)!=4 && count($parts)!=5){
368 - wfDebug("Bad result line:".$line."\n");
369 - return "";
370 - }
371 - $splits = $this->stripBracketsSplit($parts[0]);
372 - $highlight = $this->stripBracketsSplit($parts[1]);
373 - $suffix = urldecode($this->stripBrackets($parts[2]));
374 - $text = urldecode($parts[3]);
375 - $original = null;
376 - if(count($parts) > 4)
377 - $original = urldecode($parts[4]);
378 -
379 - $splits[] = strlen($text);
380 - $start = 0;
381 - $snippet = "";
382 - $hi = 0;
383 -
384 - foreach($splits as $sp){
385 - $sp = intval($sp);
386 - // highlight words!
387 - while($hi < count($highlight) && intval($highlight[$hi]) < $sp){
388 - $s = intval($highlight[$hi]);
389 - $e = intval($highlight[$hi+1]);
390 - $snippet .= substr($text,$start,$s-$start)."<span class='searchmatch'>".substr($text,$s,$e-$s)."</span>";
391 - $start = $e;
392 - $hi += 2;
393 - }
394 - // copy till split point
395 - $snippet .= substr($text,$start,$sp-$start);
396 - if($sp == strlen($text) && $suffix != '')
397 - $snippet .= $suffix;
398 - else if($useFinalSeparator)
399 - $snippet .= " <b>...</b> ";
400 -
401 - $start = $sp;
402 - }
403 - return array($snippet,$original);
404 - }
405 -
406 -
407 - /**
408 - * @access private
409 - */
410 - function stripBrackets($str){
411 - if($str == '[]')
412 - return '';
413 - return substr($str,1,strlen($str)-2);
414 - }
415 -
416 - /**
417 - * @access private
418 - * @return array
419 - */
420 - function stripBracketsSplit($str){
421 - $strip = $this->stripBrackets($str);
422 - if($strip == '')
423 - return array();
424 - else
425 - return explode(",",$strip);
426 - }
427 -
428 - function getTitle() {
429 - return $this->mTitle;
430 - }
431 -
432 - function getScore() {
433 - return null; // lucene scores are meaningless to the user...
434 - }
435 -
436 - function getTitleSnippet($terms){
437 - if( is_null($this->mHighlightTitle) )
438 - return '';
439 - return $this->mHighlightTitle;
440 - }
441 -
442 - function getTextSnippet($terms) {
443 - if( is_null($this->mHighlightText) )
444 - return parent::getTextSnippet($terms);
445 - return $this->mHighlightText;
446 - }
447 -
448 - function getRedirectSnippet($terms) {
449 - if( is_null($this->mHighlightRedirect) )
450 - return '';
451 - return $this->mHighlightRedirect;
452 - }
453 -
454 - function getRedirectTitle(){
455 - return $this->mRedirectTitle;
456 - }
457 -
458 - function getSectionSnippet(){
459 - if( is_null($this->mHighlightSection) )
460 - return '';
461 - return $this->mHighlightSection;
462 - }
463 -
464 - function getSectionTitle(){
465 - return $this->mSectionTitle;
466 - }
467 -
468 - function getInterwikiPrefix(){
469 - return $this->mInterwiki;
470 - }
471 -
472 - function isInterwiki(){
473 - return $this->mInterwiki != '';
474 - }
475 -
476 - function getTimestamp(){
477 - if( is_null($this->mDate) )
478 - return parent::getTimestamp();
479 - return $this->mDate;
480 - }
481 -
482 - function getWordCount(){
483 - if( is_null($this->mWordCount) )
484 - return parent::getWordCount();
485 - return $this->mWordCount;
486 - }
487 -
488 - function getByteSize(){
489 - if( is_null($this->mSize) )
490 - return parent::getByteSize();
491 - return $this->mSize;
492 - }
493 -
494 - function hasRelated(){
495 - global $wgLuceneSearchVersion, $wgLuceneUseRelated;
496 - return $wgLuceneSearchVersion >= 2.1 && $wgLuceneUseRelated;
497 - }
498 -}
499 -
500 -class LuceneSearchSet extends SearchResultSet {
501 - /**
502 - * Contact the MWDaemon search server and return a wrapper
503 - * object with the set of results. Results may be cached.
504 - *
505 - * @param string $method The protocol verb to use
506 - * @param string $query
507 - * @param int $limit
508 - * @return array
509 - * @access public
510 - * @static
511 - */
512 - function newFromQuery( $method, $query, $namespaces = array(), $limit = 20, $offset = 0 ) {
513 - $fname = 'LuceneSearchSet::newFromQuery';
514 - wfProfileIn( $fname );
515 -
516 - global $wgLuceneHost, $wgLucenePort, $wgDBname, $wgMemc;
517 - global $wgLuceneSearchVersion, $wgLuceneSearchCacheExpiry;
518 -
519 - if( is_array( $wgLuceneHost ) ) {
520 - $pick = mt_rand( 0, count( $wgLuceneHost ) - 1 );
521 - $host = $wgLuceneHost[$pick];
522 - } else {
523 - $host = $wgLuceneHost;
524 - }
525 -
526 - $enctext = rawurlencode( trim( $query ) );
527 - $searchUrl = "http://$host:$wgLucenePort/$method/$wgDBname/$enctext?" .
528 - wfArrayToCGI( array(
529 - 'namespaces' => implode( ',', $namespaces ),
530 - 'offset' => $offset,
531 - 'limit' => $limit,
532 - 'version' => $wgLuceneSearchVersion,
533 - 'iwlimit' => 10,
534 - ) );
535 -
536 - // try to fetch cached if caching is turned on
537 - if($wgLuceneSearchCacheExpiry > 0){
538 - $key = "$wgDBname:lucene:" . md5( $searchUrl );
539 - $resultSet = $wgMemc->get( $key );
540 - if( is_object( $resultSet ) ) {
541 - wfDebug( "$fname: got cached lucene results for key $key\n" );
542 - wfProfileOut( $fname );
543 - return $resultSet;
544 - }
545 - }
546 -
547 - wfDebug( "Fetching search data from $searchUrl\n" );
548 - wfSuppressWarnings();
549 - wfProfileIn( $fname.'-contact-'.$host );
550 - $data = Http::get( $searchUrl );
551 - wfProfileOut( $fname.'-contact-'.$host );
552 - wfRestoreWarnings();
553 - if( $data === false ) {
554 - // Network error or server error
555 - wfProfileOut( $fname );
556 - return null;
557 - } else {
558 - $inputLines = explode( "\n", trim( $data ) );
559 - $resultLines = array_map( 'trim', $inputLines );
560 - }
561 -
562 - $suggestion = null;
563 - $totalHits = null;
564 - $info = null;
565 - $interwiki = null;
566 -
567 - # All methods have same syntax...
568 - $totalHits = array_shift( $resultLines );
569 - if( $totalHits === false ) {
570 - # I/O error? this shouldn't happen
571 - wfDebug( "Couldn't read summary line...\n" );
572 - } else {
573 - $totalHits = intval( $totalHits );
574 - wfDebug( "total [$totalHits] hits\n" );
575 - if($wgLuceneSearchVersion >= 2.1){
576 - # second line is info
577 - list($dummy,$info) = explode(' ',array_shift($resultLines),2);
578 - # third line is suggestions
579 - $s = array_shift($resultLines);
580 - if(self::startsWith($s,'#suggest '))
581 - $suggestion = $s;
582 -
583 - # fifth line is interwiki info line
584 - $iwHeading = array_shift($resultLines);
585 - list($dummy,$iwCount,$iwTotal) = explode(' ',$iwHeading);
586 - if($iwCount>0){
587 - # pack interwiki lines into a separate result set
588 - $interwikiLen = 0;
589 - while(!self::startsWith($resultLines[$interwikiLen],"#results"))
590 - $interwikiLen++;
591 - $interwikiLines = array_splice($resultLines,0,$interwikiLen);
592 - $interwiki = new LuceneSearchSet( $query, $interwikiLines, intval($iwCount), intval($iwTotal) );
593 - }
594 -
595 - # how many results we got
596 - list($dummy,$resultCount) = explode(" ",array_shift($resultLines));
597 - $resultCount = intval($resultCount);
598 - } else{
599 - $resultCount = count($resultLines);
600 - }
601 - }
602 -
603 -
604 - $resultSet = new LuceneSearchSet( $query, $resultLines, $resultCount, $totalHits,
605 - $suggestion, $info, $interwiki );
606 -
607 - if($wgLuceneSearchCacheExpiry > 0){
608 - wfDebug( "$fname: caching lucene results for key $key\n" );
609 - $wgMemc->add( $key, $resultSet, $wgLuceneSearchCacheExpiry );
610 - }
611 -
612 - wfProfileOut( $fname );
613 - return $resultSet;
614 - }
615 -
616 - static function startsWith($source, $prefix){
617 - return strncmp($source, $prefix, strlen($prefix)) == 0;
618 - }
619 -
620 - /**
621 - * Private constructor. Use LuceneSearchSet::newFromQuery().
622 - *
623 - * @param string $query
624 - * @param array $lines
625 - * @param int $resultCount
626 - * @param int $totalHits
627 - * @param string $suggestion
628 - * @param string $info
629 - * @access private
630 - */
631 - function LuceneSearchSet( $query, $lines, $resultCount, $totalHits = null, $suggestion = null, $info = null, $interwiki = null ) {
632 - $this->mQuery = $query;
633 - $this->mTotalHits = $totalHits;
634 - $this->mResults = $lines;
635 - $this->mResultCount = $resultCount;
636 - $this->mPos = 0;
637 - $this->mSuggestionQuery = null;
638 - $this->mSuggestionSnippet = '';
639 - $this->parseSuggestion($suggestion);
640 - $this->mInfo = $info;
641 - $this->mInterwiki = $interwiki;
642 - }
643 -
644 - /** Get suggestions from a suggestion result line */
645 - function parseSuggestion($suggestion){
646 - if( is_null($suggestion) )
647 - return;
648 - // parse split points and highlight changes
649 - list($dummy,$points,$sug) = explode(" ",$suggestion);
650 - $sug = urldecode($sug);
651 - $points = explode(",",substr($points,1,-1));
652 - array_unshift($points,0);
653 - $suggestText = "";
654 - for($i=1;$i<count($points);$i+=2){
655 - $suggestText .= substr($sug,$points[$i-1],$points[$i]-$points[$i-1]);
656 - $suggestText .= "<i>".substr($sug,$points[$i],$points[$i+1]-$points[$i])."</i>";
657 - }
658 - $suggestText .= substr($sug,end($points));
659 -
660 - $this->mSuggestionQuery = $this->replaceGenericPrefixes($sug);
661 - $this->mSuggestionSnippet = $this->replaceGenericPrefixes($suggestText);
662 - }
663 -
664 - /** replace prefixes like [2]: that are not in phrases */
665 - function replaceGenericPrefixes($text){
666 - $out = "";
667 - $phrases = explode('"',$text);
668 - for($i=0;$i<count($phrases);$i+=2){
669 - $out .= preg_replace_callback('/\[([0-9]+)\]:/', array($this,'genericPrefixCallback'), $phrases[$i]);
670 - if($i+1 < count($phrases))
671 - $out .= '"'.$phrases[$i+1].'"'; // phrase text
672 - }
673 - return $out;
674 - }
675 -
676 - function genericPrefixCallback($matches){
677 - global $wgContLang;
678 - return $wgContLang->getFormattedNsText($matches[1]).":";
679 - }
680 -
681 - function numRows() {
682 - return $this->mResultCount;
683 - }
684 -
685 - function termMatches() {
686 - $resq = preg_replace( "/\\[.*?\\]:/", " ", $this->mQuery ); # generic prefixes
687 - $resq = preg_replace( "/all:/", " ", $resq );
688 - $resq = trim( preg_replace( "/[ |\\[\\]()\"{}+\\-_@!?%&*=\\|:;><,.\\/]+/", " ", $resq ) );
689 - $terms = array_map( array( &$this, 'regexQuote' ),
690 - explode( ' ', $resq ) );
691 - return $terms;
692 - }
693 -
694 - /**
695 - * Stupid hack around PHP's limited lambda support
696 - * @access private
697 - */
698 - function regexQuote( $term ) {
699 - return preg_quote( $term, '/' );
700 - }
701 -
702 - function hasResults() {
703 - return count( $this->mResults ) > 0;
704 - }
705 -
706 - /**
707 - * Some search modes return a total hit count for the query
708 - * in the entire article database. This may include pages
709 - * in namespaces that would not be matched on the given
710 - * settings.
711 - *
712 - * @return int
713 - * @access public
714 - */
715 - function getTotalHits() {
716 - return $this->mTotalHits;
717 - }
718 -
719 - /**
720 - * Return information about how and from where the results were fetched,
721 - * should be useful for diagnostics and debugging
722 - *
723 - * @return string
724 - */
725 - function getInfo() {
726 - if( is_null($this->mInfo) )
727 - return null;
728 - return "Search results fetched via ".$this->mInfo;
729 - }
730 -
731 - /**
732 - * Return a result set of hits on other (multiple) wikis associated with this one
733 - *
734 - * @return SearchResultSet
735 - */
736 - function getInterwikiResults() {
737 - return $this->mInterwiki;
738 - }
739 -
740 - /**
741 - * Some search modes return a suggested alternate term if there are
742 - * no exact hits. Returns true if there is one on this set.
743 - *
744 - * @return bool
745 - * @access public
746 - */
747 - function hasSuggestion() {
748 - return is_string( $this->mSuggestionQuery ) && $this->mSuggestionQuery != '';
749 - }
750 -
751 - function getSuggestionQuery(){
752 - return $this->mSuggestionQuery;
753 - }
754 -
755 - function getSuggestionSnippet(){
756 - return $this->mSuggestionSnippet;
757 - }
758 -
759 - /**
760 - * Fetches next search result, or false.
761 - * @return LuceneResult
762 - * @access public
763 - * @abstract
764 - */
765 - function next() {
766 - # Group together lines belonging to one hit
767 - $group = array();
768 -
769 - for(;$this->mPos < count($this->mResults);$this->mPos++){
770 - $l = trim($this->mResults[$this->mPos]);
771 - if(count($group) == 0) // main line
772 - $group['result'] = $l;
773 - else if($l[0] == '#'){ // additional meta
774 - list($meta,$value) = explode(" ",$l,2);
775 - $group[$meta][] = $value;
776 - } else
777 - break;
778 - }
779 - if($group == false)
780 - return false;
781 - else
782 - return new LuceneResult( $group );
783 - }
784 -
785 -}
786 -
787 -} # End of extension function
78885 } # End of invocation guard
78986
Index: trunk/extensions/MWSearch/MWSearch_body.php
@@ -0,0 +1,704 @@
 2+<?php
 3+
 4+class LuceneSearch extends SearchEngine {
 5+ /**
 6+ * Perform a full text search query and return a result set.
 7+ *
 8+ * @param string $term - Raw search term
 9+ * @return LuceneSearchSet
 10+ * @access public
 11+ */
 12+ function searchText( $term ) {
 13+ return LuceneSearchSet::newFromQuery( isset($this->related)? 'related' : 'search',
 14+ $term, $this->namespaces, $this->limit, $this->offset );
 15+ }
 16+
 17+ /**
 18+ * Perform a title-only search query and return a result set.
 19+ *
 20+ * @param string $term - Raw search term
 21+ * @return LuceneSearchSet
 22+ * @access public
 23+ */
 24+ function searchTitle( $term ) {
 25+ return null;
 26+ }
 27+
 28+ /**
 29+ * PrefixSearchBackend override for OpenSearch results
 30+ */
 31+ static function prefixSearch( $ns, $search, $limit, &$results ) {
 32+ $it = LuceneSearchSet::newFromQuery( 'prefix', $search, $ns, $limit, 0 );
 33+ $results = array();
 34+ while( $res = $it->next() ) {
 35+ $results[] = $res->getTitle()->getPrefixedText();
 36+ }
 37+
 38+ return false;
 39+ }
 40+
 41+ /**
 42+ * Prepare query for the lucene-search daemon:
 43+ *
 44+ * 1) rewrite namespaces into standardized form
 45+ * e.g. image:clouds -> [6]:clouds
 46+ * e.g. help,wp:npov -> [12,4]:npov
 47+ *
 48+ * 2) rewrite localizations of "search everything" keyword
 49+ * e.g. alle:heidegger -> all:heidegger
 50+ *
 51+ * @param string query
 52+ * @return string rewritten query
 53+ * @access private
 54+ */
 55+ function replacePrefixes( $query ) {
 56+ global $wgContLang, $wgLuceneUseRelated;
 57+ $fname = 'LuceneSearch::replacePrefixes';
 58+ wfProfileIn($fname);
 59+ $qlen = strlen($query);
 60+ $start = 0; $len = 0; // token start pos and length
 61+ $rewritten = ''; // rewritten query
 62+ $rindex = 0; // point to last rewritten character
 63+ $inquotes = false;
 64+
 65+ // quick check, most of the time we don't need any rewriting
 66+ if(strpos($query,':')===false){
 67+ wfProfileOut($fname);
 68+ return $query;
 69+ }
 70+
 71+ // check if this is query for related articles
 72+ $relatedkey = wfMsgForContent('searchrelated').':';
 73+ if($wgLuceneUseRelated && strncmp($query, $relatedkey, strlen($relatedkey)) == 0){
 74+ $this->related = true;
 75+ list($dummy,$ret) = explode(":",$query,2);
 76+ wfProfileOut($fname);
 77+ return trim($ret);
 78+ }
 79+
 80+ // "search everything"
 81+ // might not be at the beginning for complex queries
 82+ $allkeyword = wfMsgForContent('searchall');
 83+
 84+ for($i = 0 ; $i < $qlen ; $i++){
 85+ $c = $query[$i];
 86+
 87+ // ignore chars in quotes
 88+ if($inquotes && $c!='"');
 89+ // check if $c is valid prefix character
 90+ else if(($c >= 'a' && $c <= 'z') ||
 91+ ($c >= 'A' && $c <= 'Z') ||
 92+ $c == '_' || $c == '-' || $c ==','){
 93+ if($len == 0){
 94+ $start = $i; // begin of token
 95+ $len = 1;
 96+ } else
 97+ $len++;
 98+ // check for utf-8 chars
 99+ } else if(($c >= "\xc0" && $c <= "\xff")){
 100+ $utf8len = 1;
 101+ for($j = $i+1; $j < $qlen; $j++){ // fetch extra utf-8 bytes
 102+ if($query[$j] >= "\x80" && $query[$j] <= "\xbf")
 103+ $utf8len++;
 104+ else
 105+ break;
 106+ }
 107+ if($len == 0){
 108+ $start = $i;
 109+ $len = $utf8len;
 110+ } else
 111+ $len += $utf8len;
 112+ $i = $j - 1; // we consumed the chars
 113+ // check for end of prefix (i.e. semicolon)
 114+ } else if($c == ':' && $len !=0){
 115+ $rewrite = array(); // here we collect namespaces
 116+ $prefixes = explode(',',substr($query,$start,$len));
 117+ // iterate thru comma-separated list of prefixes
 118+ foreach($prefixes as $prefix){
 119+ $index = $wgContLang->getNsIndex($prefix);
 120+
 121+ // check for special prefixes all/incategory
 122+ if($prefix == $allkeyword){
 123+ $rewrite = 'all';
 124+ break;
 125+ // check for localized names of namespaces
 126+ } else if($index !== false)
 127+ $rewrite[] = $index;
 128+ }
 129+ $translated = null;
 130+ if($rewrite === 'all')
 131+ $translated = $rewrite;
 132+ else if(count($rewrite) != 0)
 133+ $translated = '['.implode(',',array_unique($rewrite)).']';
 134+
 135+ if(isset($translated)){
 136+ // append text before the prefix, and then the prefix
 137+ $rewritten .= substr($query,$rindex,$start-$rindex);
 138+ $rewritten .= $translated . ':';
 139+ $rindex = $i+1;
 140+ }
 141+
 142+ $len = 0;
 143+ } else{ // end of token
 144+ if($c == '"') // get in/out of quotes
 145+ $inquotes = !$inquotes;
 146+
 147+ $len = 0;
 148+ }
 149+
 150+ }
 151+ // add rest of the original query that doesn't need rewritting
 152+ $rewritten .= substr($query,$rindex,$qlen-$rindex);
 153+ wfProfileOut($fname);
 154+ return $rewritten;
 155+ }
 156+}
 157+
 158+class LuceneResult extends SearchResult {
 159+ /**
 160+ * Construct a result object from single result line
 161+ *
 162+ * @param array $lines
 163+ * @return array (float, Title)
 164+ * @access private
 165+ */
 166+ function LuceneResult( $lines ) {
 167+ global $wgContLang;
 168+
 169+ $score = null;
 170+ $interwiki = null;
 171+ $namespace = null;
 172+ $title = null;
 173+
 174+ $line = $lines['result'];
 175+ wfDebug( "Lucene line: '$line'\n" );
 176+
 177+ # detect format
 178+ $parts = explode(' ', $line);
 179+ if(count($parts) == 3)
 180+ list( $score, $namespace, $title ) = $parts;
 181+ else
 182+ list( $score, $interwiki, $namespace, $nsText, $title ) = $parts;
 183+
 184+ $score = floatval( $score );
 185+ $namespace = intval( $namespace );
 186+ $title = urldecode( $title );
 187+ if(!isset($nsText))
 188+ $nsText = $wgContLang->getNsText($namespace);
 189+ else
 190+ $nsText = urldecode($nsText);
 191+
 192+ $this->mInterwiki = '';
 193+ // make title
 194+ if( is_null($interwiki)){
 195+ $this->mTitle = Title::makeTitle( $namespace, $title );
 196+ } else{
 197+ $interwiki = urldecode( $interwiki );
 198+ // there might be a better way to make an interwiki link
 199+ $t = $interwiki.':'.$nsText.':'.str_replace( '_', ' ', $title );
 200+ $this->mTitle = Title::newFromText( $t );
 201+ $this->mInterwiki = $interwiki;
 202+ }
 203+
 204+ $this->mScore = $score;
 205+
 206+ $this->mWordCount = null;
 207+ if(array_key_exists("#h.wordcount",$lines))
 208+ $this->mWordCount = intval($lines["#h.wordcount"][0]);
 209+
 210+ $this->mSize = null;
 211+ if(array_key_exists("#h.size",$lines))
 212+ $this->mSize = intval($lines["#h.size"][0]);
 213+
 214+ $this->mDate = null;
 215+ if(array_key_exists("#h.date",$lines))
 216+ $this->mDate = $lines["#h.date"][0];
 217+
 218+ // various snippets
 219+ list( $this->mHighlightTitle, $dummy ) = $this->extractSnippet($lines,$nsText,"#h.title");
 220+ if( is_null($this->mHighlightTitle) && $this->isInterwiki() ){
 221+ // construct highlighted interwiki title without the interwiki part
 222+ $this->mHighlightTitle = ($nsText==''? '' : $nsText.':') . str_replace( '_', ' ', $title );
 223+ }
 224+
 225+ list( $this->mHighlightText, $dummy ) = $this->extractSnippet($lines,'',"#h.text",true);
 226+
 227+ list( $this->mHighlightRedirect, $redirect ) = $this->extractSnippet($lines,$nsText,"#h.redirect");
 228+ $this->mRedirectTitle = null;
 229+ if( !is_null($redirect)){
 230+ # build redirect Title object
 231+ if($interwiki != ''){
 232+ $t = $interwiki.':'.$redirect;
 233+ $this->mRedirectTitle = Title::newFromText( $t );
 234+ } else{
 235+ $parts = explode(":",$redirect,2);
 236+ $redirectNs = intval($parts[0]);
 237+ $redirectText = str_replace('_', ' ', $parts[1]);
 238+ $this->mRedirectTitle = Title::makeTitle($redirectNs,$redirectText);
 239+ }
 240+ }
 241+
 242+ list( $this->mHighlightSection, $section) = $this->extractSnippet($lines,'',"#h.section");
 243+ $this->mSectionTitle = null;
 244+ if( !is_null($section)){
 245+ # build title + fragment Title object
 246+ $t = $nsText.':'.str_replace( '_', ' ', $title ).'#'.$section;
 247+ $this->mSectionTitle = Title::newFromText($t);
 248+ }
 249+
 250+ if($this->mInterwiki == '')
 251+ $this->mRevision = Revision::newFromTitle( $this->mTitle );
 252+ }
 253+
 254+ /**
 255+ * Get the pair [highlighted snippet, unmodified text] for highlighted text
 256+ *
 257+ * @param string $lines
 258+ * @param string $nsText textual form of namespace
 259+ * @param string $type
 260+ * @param boolean $useFinalSeparator
 261+ * @return array (highlighted, unmodified text)
 262+ */
 263+ function extractSnippet($lines, $nsText, $type, $useFinalSeparator=false){
 264+ if(!array_key_exists($type,$lines))
 265+ return array(null,null);
 266+ $ret = "";
 267+ $original = null;
 268+ foreach($lines[$type] as $h){
 269+ list($s,$o) = $this->extractSnippetLine($h,$useFinalSeparator);
 270+ $ret .= $s;
 271+ $original = $o;
 272+ }
 273+ if($nsText!='')
 274+ $ret = $nsText.':'.$ret;
 275+ return array($ret,$original);
 276+ }
 277+
 278+ /**
 279+ * Parse one line of a snippet
 280+ *
 281+ * @param string $line
 282+ * @param boolean $useFinalSeparator if "..." is to be appended to the end of snippet
 283+ * @access protected
 284+ * @return array(snippet,unmodified text)
 285+ */
 286+ function extractSnippetLine($line, $useFinalSeparator){
 287+ $parts = explode(" ",$line);
 288+ if(count($parts)!=4 && count($parts)!=5){
 289+ wfDebug("Bad result line:".$line."\n");
 290+ return "";
 291+ }
 292+ $splits = $this->stripBracketsSplit($parts[0]);
 293+ $highlight = $this->stripBracketsSplit($parts[1]);
 294+ $suffix = urldecode($this->stripBrackets($parts[2]));
 295+ $text = urldecode($parts[3]);
 296+ $original = null;
 297+ if(count($parts) > 4)
 298+ $original = urldecode($parts[4]);
 299+
 300+ $splits[] = strlen($text);
 301+ $start = 0;
 302+ $snippet = "";
 303+ $hi = 0;
 304+
 305+ foreach($splits as $sp){
 306+ $sp = intval($sp);
 307+ // highlight words!
 308+ while($hi < count($highlight) && intval($highlight[$hi]) < $sp){
 309+ $s = intval($highlight[$hi]);
 310+ $e = intval($highlight[$hi+1]);
 311+ $snippet .= substr($text,$start,$s-$start)."<span class='searchmatch'>".substr($text,$s,$e-$s)."</span>";
 312+ $start = $e;
 313+ $hi += 2;
 314+ }
 315+ // copy till split point
 316+ $snippet .= substr($text,$start,$sp-$start);
 317+ if($sp == strlen($text) && $suffix != '')
 318+ $snippet .= $suffix;
 319+ else if($useFinalSeparator)
 320+ $snippet .= " <b>...</b> ";
 321+
 322+ $start = $sp;
 323+ }
 324+ return array($snippet,$original);
 325+ }
 326+
 327+
 328+ /**
 329+ * @access private
 330+ */
 331+ function stripBrackets($str){
 332+ if($str == '[]')
 333+ return '';
 334+ return substr($str,1,strlen($str)-2);
 335+ }
 336+
 337+ /**
 338+ * @access private
 339+ * @return array
 340+ */
 341+ function stripBracketsSplit($str){
 342+ $strip = $this->stripBrackets($str);
 343+ if($strip == '')
 344+ return array();
 345+ else
 346+ return explode(",",$strip);
 347+ }
 348+
 349+ function getTitle() {
 350+ return $this->mTitle;
 351+ }
 352+
 353+ function getScore() {
 354+ return null; // lucene scores are meaningless to the user...
 355+ }
 356+
 357+ function getTitleSnippet($terms){
 358+ if( is_null($this->mHighlightTitle) )
 359+ return '';
 360+ return $this->mHighlightTitle;
 361+ }
 362+
 363+ function getTextSnippet($terms) {
 364+ if( is_null($this->mHighlightText) )
 365+ return parent::getTextSnippet($terms);
 366+ return $this->mHighlightText;
 367+ }
 368+
 369+ function getRedirectSnippet($terms) {
 370+ if( is_null($this->mHighlightRedirect) )
 371+ return '';
 372+ return $this->mHighlightRedirect;
 373+ }
 374+
 375+ function getRedirectTitle(){
 376+ return $this->mRedirectTitle;
 377+ }
 378+
 379+ function getSectionSnippet(){
 380+ if( is_null($this->mHighlightSection) )
 381+ return '';
 382+ return $this->mHighlightSection;
 383+ }
 384+
 385+ function getSectionTitle(){
 386+ return $this->mSectionTitle;
 387+ }
 388+
 389+ function getInterwikiPrefix(){
 390+ return $this->mInterwiki;
 391+ }
 392+
 393+ function isInterwiki(){
 394+ return $this->mInterwiki != '';
 395+ }
 396+
 397+ function getTimestamp(){
 398+ if( is_null($this->mDate) )
 399+ return parent::getTimestamp();
 400+ return $this->mDate;
 401+ }
 402+
 403+ function getWordCount(){
 404+ if( is_null($this->mWordCount) )
 405+ return parent::getWordCount();
 406+ return $this->mWordCount;
 407+ }
 408+
 409+ function getByteSize(){
 410+ if( is_null($this->mSize) )
 411+ return parent::getByteSize();
 412+ return $this->mSize;
 413+ }
 414+
 415+ function hasRelated(){
 416+ global $wgLuceneSearchVersion, $wgLuceneUseRelated;
 417+ return $wgLuceneSearchVersion >= 2.1 && $wgLuceneUseRelated;
 418+ }
 419+}
 420+
 421+class LuceneSearchSet extends SearchResultSet {
 422+ /**
 423+ * Contact the MWDaemon search server and return a wrapper
 424+ * object with the set of results. Results may be cached.
 425+ *
 426+ * @param string $method The protocol verb to use
 427+ * @param string $query
 428+ * @param int $limit
 429+ * @return array
 430+ * @access public
 431+ */
 432+ static function newFromQuery( $method, $query, $namespaces = array(), $limit = 20, $offset = 0 ) {
 433+ $fname = 'LuceneSearchSet::newFromQuery';
 434+ wfProfileIn( $fname );
 435+
 436+ global $wgLuceneHost, $wgLucenePort, $wgDBname, $wgMemc;
 437+ global $wgLuceneSearchVersion, $wgLuceneSearchCacheExpiry;
 438+
 439+ if( is_array( $wgLuceneHost ) ) {
 440+ $pick = mt_rand( 0, count( $wgLuceneHost ) - 1 );
 441+ $host = $wgLuceneHost[$pick];
 442+ } else {
 443+ $host = $wgLuceneHost;
 444+ }
 445+
 446+ $enctext = rawurlencode( trim( $query ) );
 447+ $searchUrl = "http://$host:$wgLucenePort/$method/$wgDBname/$enctext?" .
 448+ wfArrayToCGI( array(
 449+ 'namespaces' => implode( ',', $namespaces ),
 450+ 'offset' => $offset,
 451+ 'limit' => $limit,
 452+ 'version' => $wgLuceneSearchVersion,
 453+ 'iwlimit' => 10,
 454+ ) );
 455+
 456+ // try to fetch cached if caching is turned on
 457+ if($wgLuceneSearchCacheExpiry > 0){
 458+ $key = "$wgDBname:lucene:" . md5( $searchUrl );
 459+ $resultSet = $wgMemc->get( $key );
 460+ if( is_object( $resultSet ) ) {
 461+ wfDebug( "$fname: got cached lucene results for key $key\n" );
 462+ wfProfileOut( $fname );
 463+ return $resultSet;
 464+ }
 465+ }
 466+
 467+ wfDebug( "Fetching search data from $searchUrl\n" );
 468+ wfSuppressWarnings();
 469+ wfProfileIn( $fname.'-contact-'.$host );
 470+ $data = Http::get( $searchUrl );
 471+ wfProfileOut( $fname.'-contact-'.$host );
 472+ wfRestoreWarnings();
 473+ if( $data === false ) {
 474+ // Network error or server error
 475+ wfProfileOut( $fname );
 476+ return null;
 477+ } else {
 478+ $inputLines = explode( "\n", trim( $data ) );
 479+ $resultLines = array_map( 'trim', $inputLines );
 480+ }
 481+
 482+ $suggestion = null;
 483+ $totalHits = null;
 484+ $info = null;
 485+ $interwiki = null;
 486+
 487+ # All methods have same syntax...
 488+ $totalHits = array_shift( $resultLines );
 489+ if( $totalHits === false ) {
 490+ # I/O error? this shouldn't happen
 491+ wfDebug( "Couldn't read summary line...\n" );
 492+ } else {
 493+ $totalHits = intval( $totalHits );
 494+ wfDebug( "total [$totalHits] hits\n" );
 495+ if($wgLuceneSearchVersion >= 2.1){
 496+ # second line is info
 497+ list($dummy,$info) = explode(' ',array_shift($resultLines),2);
 498+ # third line is suggestions
 499+ $s = array_shift($resultLines);
 500+ if(self::startsWith($s,'#suggest '))
 501+ $suggestion = $s;
 502+
 503+ # fifth line is interwiki info line
 504+ $iwHeading = array_shift($resultLines);
 505+ list($dummy,$iwCount,$iwTotal) = explode(' ',$iwHeading);
 506+ if($iwCount>0){
 507+ # pack interwiki lines into a separate result set
 508+ $interwikiLen = 0;
 509+ while(!self::startsWith($resultLines[$interwikiLen],"#results"))
 510+ $interwikiLen++;
 511+ $interwikiLines = array_splice($resultLines,0,$interwikiLen);
 512+ $interwiki = new LuceneSearchSet( $query, $interwikiLines, intval($iwCount), intval($iwTotal) );
 513+ }
 514+
 515+ # how many results we got
 516+ list($dummy,$resultCount) = explode(" ",array_shift($resultLines));
 517+ $resultCount = intval($resultCount);
 518+ } else{
 519+ $resultCount = count($resultLines);
 520+ }
 521+ }
 522+
 523+
 524+ $resultSet = new LuceneSearchSet( $query, $resultLines, $resultCount, $totalHits,
 525+ $suggestion, $info, $interwiki );
 526+
 527+ if($wgLuceneSearchCacheExpiry > 0){
 528+ wfDebug( "$fname: caching lucene results for key $key\n" );
 529+ $wgMemc->add( $key, $resultSet, $wgLuceneSearchCacheExpiry );
 530+ }
 531+
 532+ wfProfileOut( $fname );
 533+ return $resultSet;
 534+ }
 535+
 536+ static function startsWith($source, $prefix){
 537+ return strncmp($source, $prefix, strlen($prefix)) == 0;
 538+ }
 539+
 540+ /**
 541+ * Private constructor. Use LuceneSearchSet::newFromQuery().
 542+ *
 543+ * @param string $query
 544+ * @param array $lines
 545+ * @param int $resultCount
 546+ * @param int $totalHits
 547+ * @param string $suggestion
 548+ * @param string $info
 549+ * @access private
 550+ */
 551+ function LuceneSearchSet( $query, $lines, $resultCount, $totalHits = null, $suggestion = null, $info = null, $interwiki = null ) {
 552+ $this->mQuery = $query;
 553+ $this->mTotalHits = $totalHits;
 554+ $this->mResults = $lines;
 555+ $this->mResultCount = $resultCount;
 556+ $this->mPos = 0;
 557+ $this->mSuggestionQuery = null;
 558+ $this->mSuggestionSnippet = '';
 559+ $this->parseSuggestion($suggestion);
 560+ $this->mInfo = $info;
 561+ $this->mInterwiki = $interwiki;
 562+ }
 563+
 564+ /** Get suggestions from a suggestion result line */
 565+ function parseSuggestion($suggestion){
 566+ if( is_null($suggestion) )
 567+ return;
 568+ // parse split points and highlight changes
 569+ list($dummy,$points,$sug) = explode(" ",$suggestion);
 570+ $sug = urldecode($sug);
 571+ $points = explode(",",substr($points,1,-1));
 572+ array_unshift($points,0);
 573+ $suggestText = "";
 574+ for($i=1;$i<count($points);$i+=2){
 575+ $suggestText .= substr($sug,$points[$i-1],$points[$i]-$points[$i-1]);
 576+ $suggestText .= "<i>".substr($sug,$points[$i],$points[$i+1]-$points[$i])."</i>";
 577+ }
 578+ $suggestText .= substr($sug,end($points));
 579+
 580+ $this->mSuggestionQuery = $this->replaceGenericPrefixes($sug);
 581+ $this->mSuggestionSnippet = $this->replaceGenericPrefixes($suggestText);
 582+ }
 583+
 584+ /** replace prefixes like [2]: that are not in phrases */
 585+ function replaceGenericPrefixes($text){
 586+ $out = "";
 587+ $phrases = explode('"',$text);
 588+ for($i=0;$i<count($phrases);$i+=2){
 589+ $out .= preg_replace_callback('/\[([0-9]+)\]:/', array($this,'genericPrefixCallback'), $phrases[$i]);
 590+ if($i+1 < count($phrases))
 591+ $out .= '"'.$phrases[$i+1].'"'; // phrase text
 592+ }
 593+ return $out;
 594+ }
 595+
 596+ function genericPrefixCallback($matches){
 597+ global $wgContLang;
 598+ return $wgContLang->getFormattedNsText($matches[1]).":";
 599+ }
 600+
 601+ function numRows() {
 602+ return $this->mResultCount;
 603+ }
 604+
 605+ function termMatches() {
 606+ $resq = preg_replace( "/\\[.*?\\]:/", " ", $this->mQuery ); # generic prefixes
 607+ $resq = preg_replace( "/all:/", " ", $resq );
 608+ $resq = trim( preg_replace( "/[ |\\[\\]()\"{}+\\-_@!?%&*=\\|:;><,.\\/]+/", " ", $resq ) );
 609+ $terms = array_map( array( &$this, 'regexQuote' ),
 610+ explode( ' ', $resq ) );
 611+ return $terms;
 612+ }
 613+
 614+ /**
 615+ * Stupid hack around PHP's limited lambda support
 616+ * @access private
 617+ */
 618+ function regexQuote( $term ) {
 619+ return preg_quote( $term, '/' );
 620+ }
 621+
 622+ function hasResults() {
 623+ return count( $this->mResults ) > 0;
 624+ }
 625+
 626+ /**
 627+ * Some search modes return a total hit count for the query
 628+ * in the entire article database. This may include pages
 629+ * in namespaces that would not be matched on the given
 630+ * settings.
 631+ *
 632+ * @return int
 633+ * @access public
 634+ */
 635+ function getTotalHits() {
 636+ return $this->mTotalHits;
 637+ }
 638+
 639+ /**
 640+ * Return information about how and from where the results were fetched,
 641+ * should be useful for diagnostics and debugging
 642+ *
 643+ * @return string
 644+ */
 645+ function getInfo() {
 646+ if( is_null($this->mInfo) )
 647+ return null;
 648+ return "Search results fetched via ".$this->mInfo;
 649+ }
 650+
 651+ /**
 652+ * Return a result set of hits on other (multiple) wikis associated with this one
 653+ *
 654+ * @return SearchResultSet
 655+ */
 656+ function getInterwikiResults() {
 657+ return $this->mInterwiki;
 658+ }
 659+
 660+ /**
 661+ * Some search modes return a suggested alternate term if there are
 662+ * no exact hits. Returns true if there is one on this set.
 663+ *
 664+ * @return bool
 665+ * @access public
 666+ */
 667+ function hasSuggestion() {
 668+ return is_string( $this->mSuggestionQuery ) && $this->mSuggestionQuery != '';
 669+ }
 670+
 671+ function getSuggestionQuery(){
 672+ return $this->mSuggestionQuery;
 673+ }
 674+
 675+ function getSuggestionSnippet(){
 676+ return $this->mSuggestionSnippet;
 677+ }
 678+
 679+ /**
 680+ * Fetches next search result, or false.
 681+ * @return LuceneResult
 682+ * @access public
 683+ * @abstract
 684+ */
 685+ function next() {
 686+ # Group together lines belonging to one hit
 687+ $group = array();
 688+
 689+ for(;$this->mPos < count($this->mResults);$this->mPos++){
 690+ $l = trim($this->mResults[$this->mPos]);
 691+ if(count($group) == 0) // main line
 692+ $group['result'] = $l;
 693+ else if($l[0] == '#'){ // additional meta
 694+ list($meta,$value) = explode(" ",$l,2);
 695+ $group[$meta][] = $value;
 696+ } else
 697+ break;
 698+ }
 699+ if($group == false)
 700+ return false;
 701+ else
 702+ return new LuceneResult( $group );
 703+ }
 704+
 705+}
Property changes on: trunk/extensions/MWSearch/MWSearch_body.php
___________________________________________________________________
Added: svn:eol-style
1706 + native

Status & tagging log