Index: trunk/phase3/includes/DefaultSettings.php |
— | — | @@ -187,6 +187,7 @@ |
188 | 188 | |
189 | 189 | $wgDisableCounters = false; |
190 | 190 | $wgDisableTextSearch = false; |
| 191 | +$wgDisableFuzzySearch = false; |
191 | 192 | $wgDisableSearchUpdate = false; # If you've disabled search semi-permanently, this also disables updates to the table. If you ever re-enable, be sure to rebuild the search table. |
192 | 193 | $wgDisableUploads = true; # Uploads have to be specially set up to be secure |
193 | 194 | $wgRemoteUploads = false; # Set to true to enable the upload _link_ while local uploads are disabled. Assumes that the special page link will be bounced to another server where uploads do work. |
Index: trunk/phase3/includes/SearchEngine.php |
— | — | @@ -458,30 +458,109 @@ |
459 | 459 | $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) ); |
460 | 460 | return; |
461 | 461 | } |
| 462 | + $wgOut->addHTML( wfMsg("nogomatch", |
| 463 | + htmlspecialchars( wfLocalUrl( ucfirst($this->mUsertext), "action=edit") ) ) |
| 464 | + . "\n<p>" ); |
462 | 465 | |
463 | | - # Try a near match |
464 | | - # |
465 | | - if( !$wgDisableTextSearch ) { |
466 | | - $this->parseQuery(); |
467 | | - $sql = "SELECT cur_id,cur_title,cur_namespace,si_page FROM cur,searchindex " . |
468 | | - "WHERE cur_id=si_page AND {$this->mTitlecond} ORDER BY cur_namespace LIMIT 1"; |
469 | | - |
470 | | - if ( "" != $this->mTitlecond ) { |
471 | | - $res = wfQuery( $sql, DB_READ, $fname ); |
472 | | - } |
473 | | - if ( isset( $res ) && 0 != wfNumRows( $res ) ) { |
474 | | - $s = wfFetchObject( $res ); |
475 | | - |
476 | | - $t = Title::makeTitle( $s->cur_namespace, $s->cur_title ); |
477 | | - $wgOut->redirect( wfLocalUrl( $t->getPrefixedURL() ) ); |
478 | | - return; |
| 466 | + # Try a fuzzy title search |
| 467 | + $anyhit = false; |
| 468 | + global $wgDisableFuzzySearch; |
| 469 | + if(! $wgDisableFuzzySearch ){ |
| 470 | + foreach( array(NS_MAIN, NS_WP, NS_USER, NS_IMAGE, NS_MEDIAWIKI) as $namespace){ |
| 471 | + $anyhit |= SearchEngine::doFuzzyTitleSearch( $search, $namespace ); |
479 | 472 | } |
| 473 | + } |
| 474 | + if( ! $anyhit ){ |
| 475 | + $wgOut->addHTML( wfMsg("notitlematches") ); |
480 | 476 | } |
481 | | - $wgOut->addHTML( wfMsg("nogomatch", |
482 | | - htmlspecialchars( wfLocalUrl( ucfirst($this->mUsertext), "action=edit") ) ) |
483 | | - . "\n<p>" ); |
484 | | - $this->showResults(); |
485 | 477 | } |
| 478 | + |
| 479 | + /* static */ function doFuzzyTitleSearch( $search, $namespace ){ |
| 480 | + global $wgLang, $wgOut; |
| 481 | + $sstr = ucfirst($search); |
| 482 | + $sstr = str_replace(" ", "_", $sstr); |
| 483 | + $fuzzymatches = SearchEngine::fuzzyTitles( $sstr, $namespace ); |
| 484 | + $fuzzymatches = array_slice($fuzzymatches, 0, 10); |
| 485 | + $slen = strlen( $search ); |
| 486 | + $wikitext = ""; |
| 487 | + foreach($fuzzymatches as $res){ |
| 488 | + $t = str_replace("_", " ", $res[1]); |
| 489 | + $tfull = $wgLang->getNsText( $namespace ) . ":$t|$t"; |
| 490 | + if( $namespace == NS_MAIN ) |
| 491 | + $tfull = "$t"; |
| 492 | + $distance = $res[0]; |
| 493 | + $closeness = (strlen( $search ) - $distance) / strlen( $search ); |
| 494 | + $percent = intval( $closeness * 100 ) . "%"; |
| 495 | + $stars = str_repeat("*", ceil(5 * $closeness) ); |
| 496 | + $wikitext .= "* [[$tfull]] $percent ($stars)\n"; |
| 497 | + } |
| 498 | + if( $wikitext ){ |
| 499 | + if( $namespace != NS_MAIN ) |
| 500 | + $wikitext = "=== " . $wgLang->getNsText( $namespace ) . " ===\n" . $wikitext; |
| 501 | + $wgOut->addWikiText( $wikitext ); |
| 502 | + return true; |
| 503 | + } |
| 504 | + return false; |
| 505 | + } |
| 506 | + |
| 507 | + /* static */ function fuzzyTitles( $sstr, $namespace = NS_MAIN ){ |
| 508 | + $span = 0.10; // weed on title length before doing levenshtein. |
| 509 | + $tolerance = 0.35; // allowed percentage of erronous characters |
| 510 | + $slen = strlen($sstr); |
| 511 | + $tolerance_count = ceil($tolerance * $slen); |
| 512 | + $spanabs = ceil($slen * (1 + $span)) - $slen; |
| 513 | + # print "Word: $sstr, len = $slen, range = [$min, $max], tolerance_count = $tolerance_count<BR>\n"; |
| 514 | + $result = array(); |
| 515 | + for( $i=0; $i <= $spanabs; $i++ ){ |
| 516 | + $titles = SearchEngine::getTitlesByLength( $slen + $i, $namespace ); |
| 517 | + if( $i != 0) |
| 518 | + $titles = array_merge($titles, SearchEngine::getTitlesByLength( $slen - $i, $namespace ) ); |
| 519 | + foreach($titles as $t){ |
| 520 | + $d = levenshtein($sstr, $t); |
| 521 | + if($d < $tolerance_count) |
| 522 | + $result[] = array($d, $t); |
| 523 | + $cnt++; |
| 524 | + } |
| 525 | + } |
| 526 | + usort($result, "SearchEngine_pcmp"); |
| 527 | + return $result; |
| 528 | + } |
| 529 | + |
| 530 | + /* static */ function getTitlesByLength($aLength, $aNamespace = 0){ |
| 531 | + global $wgMemc, $wgDBname; |
| 532 | + |
| 533 | + $mkey = "$wgDBname:titlesbylength:$aLength:$aNamespace"; |
| 534 | + $mkeyts = "$wgDBname:titlesbylength:createtime"; |
| 535 | + $ts = $wgMemc->get( $mkeyts ); |
| 536 | + $result = $wgMemc->get( $mkey ); |
| 537 | + |
| 538 | + if( time() - $ts < 3600 ){ |
| 539 | + // note: in case of insufficient memcached space, we return |
| 540 | + // an empty list instead of starting to hit the DB. |
| 541 | + return is_array( $result ) ? $result : array(); |
| 542 | + } |
| 543 | + |
| 544 | + $wgMemc->set( $mkeyts, time() ); |
| 545 | + $res = wfQuery("SELECT cur_title, cur_namespace FROM cur", DB_READ); |
| 546 | + $titles = array(); // length, ns, [titles] |
| 547 | + while( $obj = wfFetchObject( $res ) ){ |
| 548 | + $title = $obj->cur_title; |
| 549 | + $ns = $obj->cur_namespace; |
| 550 | + $len = strlen( $title ); |
| 551 | + $titles[$len][$ns][] = $title; |
| 552 | + } |
| 553 | + foreach($titles as $length => $length_arr){ |
| 554 | + foreach($length_arr as $ns => $title_arr){ |
| 555 | + $mkey = "$wgDBname:titlesbylength:$length:$ns"; |
| 556 | + $wgMemc->set( $mkey, $title_arr, 3600 * 24 ); |
| 557 | + } |
| 558 | + } |
| 559 | + return $titles[$aLength][$aNamespace]; |
| 560 | + } |
486 | 561 | } |
487 | 562 | |
| 563 | +/* private static */ function SearchEngine_pcmp($a, $b){ return $a[0] - $b[0]; } |
| 564 | + |
| 565 | + |
| 566 | + |
488 | 567 | ?> |