Index: trunk/extensions/SphinxSearch/SphinxSearch.php |
— | — | @@ -11,7 +11,7 @@ |
12 | 12 | |
13 | 13 | $wgExtensionCredits['specialpage'][] = array( |
14 | 14 | 'path' => __FILE__, |
15 | | - 'version' => '0.7.1', |
| 15 | + 'version' => '0.7.2', |
16 | 16 | 'name' => 'SphinxSearch', |
17 | 17 | 'author' => array( 'Svemir Brkic', 'Paul Grinberg' ), |
18 | 18 | 'email' => 'svemir at deveblog dot com, gri6507 at yahoo dot com', |
— | — | @@ -21,29 +21,35 @@ |
22 | 22 | |
23 | 23 | $dir = dirname( __FILE__ ) . '/'; |
24 | 24 | |
25 | | -$wgAutoloadClasses['SphinxSearch'] = $dir . 'SphinxSearch_body.php'; |
26 | 25 | $wgExtensionMessagesFiles['SphinxSearch'] = $dir . 'SphinxSearch.i18n.php'; |
27 | | -$wgExtensionAliasesFiles['SphinxSearch'] = $dir . 'SphinxSearch.alias.php'; |
28 | 26 | |
29 | 27 | # To completely disable the default search and replace it with SphinxSearch, |
30 | 28 | # set this BEFORE including SphinxSearch.php in LocalSettings.php |
31 | 29 | # $wgSearchType = 'SphinxSearch'; |
32 | | - |
33 | | -if ( $wgSearchType == 'SphinxSearch' ) { |
34 | | - $wgDisableInternalSearch = true; |
35 | | - $wgDisableSearchUpdate = true; |
36 | | - $wgSpecialPages['Search'] = 'SphinxSearch'; |
| 30 | +# To use the new approach (added in 0.7.2) set it to SphinxMWSearch |
| 31 | +if ( $wgSearchType == 'SphinxMWSearch' ) { |
| 32 | + $wgAutoloadClasses['SphinxMWSearch'] = $dir . 'SphinxMWSearch.php'; |
37 | 33 | } else { |
38 | | - $wgSpecialPages['SphinxSearch'] = 'SphinxSearch'; |
| 34 | + if ( $wgSearchType == 'SphinxSearch' ) { |
| 35 | + $wgAutoloadClasses['SphinxSearch'] = $dir . 'SphinxSearch_body.php'; |
| 36 | + $wgDisableInternalSearch = true; |
| 37 | + $wgDisableSearchUpdate = true; |
| 38 | + $wgSpecialPages['Search'] = 'SphinxSearch'; |
| 39 | + $wgDisableSearchUpdate = true; |
| 40 | + } else { |
| 41 | + $wgExtensionAliasesFiles['SphinxSearch'] = $dir . 'SphinxSearch.alias.php'; |
| 42 | + $wgSpecialPages['SphinxSearch'] = 'SphinxSearch'; |
| 43 | + } |
39 | 44 | } |
40 | 45 | |
41 | 46 | # this assumes you have copied sphinxapi.php from your Sphinx |
42 | 47 | # installation folder to your SphinxSearch extension folder |
| 48 | +# not needed if you install http://pecl.php.net/package/sphinx |
43 | 49 | if ( !class_exists( 'SphinxClient' ) ) { |
44 | 50 | require_once ( $dir . "sphinxapi.php" ); |
45 | 51 | } |
46 | 52 | |
47 | | -# Host and port on which searchd deamon is tunning |
| 53 | +# Host and port on which searchd deamon is running |
48 | 54 | $wgSphinxSearch_host = 'localhost'; |
49 | 55 | $wgSphinxSearch_port = 9312; |
50 | 56 | |
— | — | @@ -70,44 +76,43 @@ |
71 | 77 | $wgSphinxSearch_sortmode = SPH_SORT_RELEVANCE; |
72 | 78 | $wgSphinxSearch_sortby = ''; |
73 | 79 | |
74 | | -# By default, search will return articles that match any of the words in the search |
75 | | -# To change that to require all words to match by default, set the following to true |
76 | | -$wgSphinxMatchAll = false; |
| 80 | +if ( $wgSearchType == 'SphinxMWSearch' ) { |
| 81 | + # Following settings apply only in the new search model |
77 | 82 | |
78 | | -# Number of matches to display at once |
79 | | -$wgSphinxSearch_matches = 10; |
80 | | -# How many matches searchd will keep in RAM while searching |
81 | | -$wgSphinxSearch_maxmatches = 1000; |
82 | | -# When to stop searching all together (if not zero) |
83 | | -$wgSphinxSearch_cutoff = 0; |
| 83 | + # Set to true to use MW's default search snippets and highlighting |
| 84 | + $wgSphinxSearchMWHighlighter = false; |
| 85 | +} else { |
| 86 | + # Following settings apply only in the old search model |
84 | 87 | |
85 | | -# Weights of individual indexed columns. This gives page titles extra weight |
86 | | -$wgSphinxSearch_weights = array( |
87 | | - 'old_text' => 1, |
88 | | - 'page_title' => 100 |
89 | | -); |
| 88 | + # By default, search will return articles that match any of the words in the search |
| 89 | + # To change that to require all words to match by default, set the following to true |
| 90 | + $wgSphinxMatchAll = false; |
| 91 | + |
| 92 | + # Number of matches to display at once |
| 93 | + $wgSphinxSearch_matches = 10; |
90 | 94 | |
91 | | -# To enable hierarchical category search, specify the top category of your hierarchy |
92 | | -$wgSphinxTopSearchableCategory = ''; |
| 95 | + # To enable hierarchical category search, specify the top category of your hierarchy |
| 96 | + $wgSphinxTopSearchableCategory = ''; |
| 97 | + |
| 98 | + # This will fetch sub-categories as parent categories are checked |
| 99 | + # Requires $wgUseAjax to be true |
| 100 | + $wgAjaxExportList[] = 'SphinxSearch::ajaxGetCategoryChildren'; |
| 101 | + |
| 102 | + # Allow excluding selected categories when filtering |
| 103 | + $wgUseExcludes = false; |
93 | 104 | |
94 | | -# This will fetch sub-categories as parent categories are checked |
95 | | -# Requires $wgUseAjax to be true |
96 | | -$wgAjaxExportList[] = 'SphinxSearch::ajaxGetCategoryChildren'; |
| 105 | + # Web-accessible path to the extension's folder |
| 106 | + $wgSphinxSearchExtPath = $wgScriptPath . '/extensions/SphinxSearch'; |
| 107 | + |
| 108 | + # Web-accessible path to the folder with SphinxSearch.js file (if different from $wgSphinxSearchExtPath) |
| 109 | + $wgSphinxSearchJSPath = ''; |
| 110 | +} |
97 | 111 | |
98 | | -# EXPERIMENTAL: allow excluding selected categories when filtering |
99 | | -$wgUseExcludes = false; |
100 | | - |
101 | | -# Web-accessible path to the extension's folder |
102 | | -$wgSphinxSearchExtPath = $wgScriptPath . '/extensions/SphinxSearch'; |
103 | | - |
104 | | -# Web-accessible path to the folder with SphinxSearch.js file (if different from $wgSphinxSearchExtPath) |
105 | | -$wgSphinxSearchJSPath = ''; |
106 | | - |
107 | 112 | # ######################################################### |
108 | 113 | # Use Aspell to suggest possible misspellings. This can be provided via |
109 | 114 | # PHP pspell module (http://www.php.net/manual/en/ref.pspell.php) |
110 | 115 | # or command line insterface to ASpell |
111 | | - |
| 116 | + |
112 | 117 | # Should the suggestion mode be enabled? |
113 | 118 | $wgSphinxSuggestMode = false; |
114 | 119 | |
— | — | @@ -119,3 +124,15 @@ |
120 | 125 | |
121 | 126 | # Path to aspell location and language data files. Do not set if not sure. |
122 | 127 | $wgSphinxSearchPspellDictionaryDir = ''; |
| 128 | + |
| 129 | +# How many matches searchd will keep in RAM while searching |
| 130 | +$wgSphinxSearch_maxmatches = 1000; |
| 131 | + |
| 132 | +# When to stop searching all together (if not zero) |
| 133 | +$wgSphinxSearch_cutoff = 0; |
| 134 | + |
| 135 | +# Weights of individual indexed columns. This gives page titles extra weight |
| 136 | +$wgSphinxSearch_weights = array( |
| 137 | + 'old_text' => 1, |
| 138 | + 'page_title' => 100 |
| 139 | +); |
Index: trunk/extensions/SphinxSearch/SphinxMWSearch.php |
— | — | @@ -0,0 +1,340 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/** |
| 5 | + * Class file for the SphinxMWSearch extension |
| 6 | + * |
| 7 | + * http://www.mediawiki.org/wiki/Extension:SphinxSearch |
| 8 | + * |
| 9 | + * Released under GNU General Public License (see http://www.fsf.org/licenses/gpl.html) |
| 10 | + * |
| 11 | + * @file |
| 12 | + * @ingroup Extensions |
| 13 | + * @author Svemir Brkic <svemir@deveblog.com> |
| 14 | + */ |
| 15 | + |
| 16 | + class SphinxMWSearch extends SearchEngine { |
| 17 | + |
| 18 | + var $categories = array(); |
| 19 | + var $exc_categories = array(); |
| 20 | + var $db; |
| 21 | + var $sphinx_client = null; |
| 22 | + |
| 23 | + function __construct( $db ) { |
| 24 | + $this->db = $db; |
| 25 | + } |
| 26 | + |
| 27 | + /** |
| 28 | + * Perform a full text search query and return a result set. |
| 29 | + * |
| 30 | + * @param string $term - Raw search term |
| 31 | + * @return SphinxMWSearchResultSet |
| 32 | + * @access public |
| 33 | + */ |
| 34 | + function searchText( $term ) { |
| 35 | + global $wgSphinxSearch_index_list; |
| 36 | + |
| 37 | + if ( !$this->sphinx_client ) { |
| 38 | + $this->sphinx_client = $this->prepareSphinxClient( $term ); |
| 39 | + } |
| 40 | + |
| 41 | + if ( $this->sphinx_client ) { |
| 42 | + $this->searchTerms = $term; |
| 43 | + $escape = '/'; |
| 44 | + $delims = array( |
| 45 | + '(' => ')', |
| 46 | + '[' => ']', |
| 47 | + '"' => '', |
| 48 | + ); |
| 49 | + // temporarily replace already escaped characters |
| 50 | + $placeholders = array( |
| 51 | + '\\(' => '_PLC_O_PAR_', |
| 52 | + '\\)' => '_PLC_C_PAR_', |
| 53 | + '\\[' => '_PLC_O_BRA_', |
| 54 | + '\\]' => '_PLC_C_BRA_', |
| 55 | + '\\"' => '_PLC_QUOTE_', |
| 56 | + ); |
| 57 | + $term = str_replace(array_keys($placeholders), $placeholders, $term); |
| 58 | + foreach ($delims as $open => $close) { |
| 59 | + $open_cnt = substr_count( $term, $open ); |
| 60 | + if ($close) { |
| 61 | + // if counts do not match, escape them all |
| 62 | + $close_cnt = substr_count( $term, $close ); |
| 63 | + if ($open_cnt != $close_cnt) { |
| 64 | + $escape .= $open . $close; |
| 65 | + } |
| 66 | + } elseif ($open_cnt % 2 == 1) { |
| 67 | + // if there is no closing symbol, count should be even |
| 68 | + $escape .= $open; |
| 69 | + } |
| 70 | + } |
| 71 | + $term = str_replace($placeholders, array_keys($placeholders), $term); |
| 72 | + $resultSet = $this->sphinx_client->Query( |
| 73 | + addcslashes( $term, $escape ), |
| 74 | + $wgSphinxSearch_index_list |
| 75 | + ); |
| 76 | + } else { |
| 77 | + $resultSet = false; |
| 78 | + } |
| 79 | + |
| 80 | + if ( $resultSet === false ) { |
| 81 | + return null; |
| 82 | + } else { |
| 83 | + return new SphinxMWSearchResultSet( $resultSet, $term, $this->sphinx_client, $this->db ); |
| 84 | + } |
| 85 | + } |
| 86 | + |
| 87 | + /** |
| 88 | + * We do a weighted title/body search, no need to return titles separately |
| 89 | + * |
| 90 | + * @param string $term - Raw search term |
| 91 | + * @return SphinxMWSearchResultSet |
| 92 | + * @access public |
| 93 | + */ |
| 94 | + function searchTitle( $term ) { |
| 95 | + return null; |
| 96 | + } |
| 97 | + |
| 98 | + /** |
| 99 | + * @return SphinxClient: ready to run or false if term is empty |
| 100 | + */ |
| 101 | + function prepareSphinxClient( &$term ) { |
| 102 | + global $wgSphinxSearch_sortmode, $wgSphinxSearch_sortby, $wgSphinxSearch_host, |
| 103 | + $wgSphinxSearch_port, $wgSphinxSearch_index_weights, $wgSphinxSearch_index, |
| 104 | + $wgSphinxSearch_mode, $wgSphinxMatchAll, $wgSphinxSearch_maxmatches, |
| 105 | + $wgSphinxSearch_cutoff, $wgSphinxSearch_weights; |
| 106 | + |
| 107 | + // don't do anything for blank searches |
| 108 | + if ( trim( $term ) === '' ) { |
| 109 | + return false; |
| 110 | + } |
| 111 | + |
| 112 | + wfRunHooks( 'SphinxSearchBeforeResults', array( |
| 113 | + &$term, |
| 114 | + &$this->offset, |
| 115 | + &$this->namespaces, |
| 116 | + &$this->categories, |
| 117 | + &$this->exc_categories |
| 118 | + ) ); |
| 119 | + |
| 120 | + $cl = new SphinxClient(); |
| 121 | + |
| 122 | + // setup the options for searching |
| 123 | + if ( isset( $wgSphinxSearch_host ) && isset( $wgSphinxSearch_port ) ) { |
| 124 | + $cl->SetServer( $wgSphinxSearch_host, $wgSphinxSearch_port ); |
| 125 | + } |
| 126 | + if ( count( $wgSphinxSearch_weights ) ) { |
| 127 | + $cl->SetFieldWeights( $wgSphinxSearch_weights ); |
| 128 | + } |
| 129 | + if ( is_array( $wgSphinxSearch_index_weights ) ) { |
| 130 | + $cl->SetIndexWeights( $wgSphinxSearch_index_weights ); |
| 131 | + } |
| 132 | + if ( isset( $wgSphinxSearch_mode ) ) { |
| 133 | + $cl->SetMatchMode( $wgSphinxSearch_mode ); |
| 134 | + } |
| 135 | + if ( count( $this->namespaces ) ) { |
| 136 | + $cl->SetFilter( 'page_namespace', $this->namespaces ); |
| 137 | + } |
| 138 | + if( !$this->showRedirects ) { |
| 139 | + $cl->SetFilter( 'page_is_redirect', array( 0 ) ); |
| 140 | + } |
| 141 | + if ( count( $this->categories ) ) { |
| 142 | + $cl->SetFilter( 'category', $this->categories ); |
| 143 | + } |
| 144 | + if ( count( $this->exc_categories ) ) { |
| 145 | + $cl->SetFilter( 'category', $this->exc_categories, true ); |
| 146 | + } |
| 147 | + $cl->SetSortMode( $wgSphinxSearch_sortmode, $wgSphinxSearch_sortby ); |
| 148 | + $cl->SetLimits( |
| 149 | + $this->offset, |
| 150 | + $this->limit, |
| 151 | + $wgSphinxSearch_maxmatches, |
| 152 | + $wgSphinxSearch_cutoff |
| 153 | + ); |
| 154 | + |
| 155 | + wfRunHooks( 'SphinxSearchBeforeQuery', array( &$term, &$cl ) ); |
| 156 | + |
| 157 | + return $cl; |
| 158 | + } |
| 159 | + |
| 160 | + /** |
| 161 | + * @return Boolean: can we list/unlist redirects |
| 162 | + */ |
| 163 | + function acceptListRedirects() { |
| 164 | + return true; |
| 165 | + } |
| 166 | + |
| 167 | + /** |
| 168 | + * @return String: allowed query characters |
| 169 | + */ |
| 170 | + public static function legalSearchChars() { |
| 171 | + return "A-Za-z_'./\"!~0-9\\x80-\\xFF\\-"; |
| 172 | + } |
| 173 | + |
| 174 | + } |
| 175 | + |
| 176 | +/** |
| 177 | + * @ingroup Search |
| 178 | + */ |
| 179 | +class SphinxMWSearchResultSet extends SearchResultSet { |
| 180 | + var $mNdx = 0; |
| 181 | + var $sphinx_client = null; |
| 182 | + var $mSuggestion = ''; |
| 183 | + |
| 184 | + function __construct( $resultSet, $terms, $sphinx_client, $dbr ) { |
| 185 | + global $wgSphinxSearch_index; |
| 186 | + |
| 187 | + $this->sphinx_client = $sphinx_client; |
| 188 | + $this->mResultSet = array(); |
| 189 | + |
| 190 | + if ( is_array( $resultSet ) && is_array( $resultSet['matches'] ) ) { |
| 191 | + foreach ( $resultSet['matches'] as $id => $docinfo ) { |
| 192 | + $res = $dbr->select( |
| 193 | + 'page', |
| 194 | + array( 'page_id', 'page_title', 'page_namespace' ), |
| 195 | + array( 'page_id' => $id ), |
| 196 | + __METHOD__, |
| 197 | + array() |
| 198 | + ); |
| 199 | + if ( $dbr->numRows( $res ) > 0 ) { |
| 200 | + $this->mResultSet[] = $dbr->fetchObject( $res ); |
| 201 | + } |
| 202 | + } |
| 203 | + } |
| 204 | + $this->mNdx = 0; |
| 205 | + $this->mTerms = preg_split('/\W+/', $terms); |
| 206 | + } |
| 207 | + |
| 208 | + /** |
| 209 | + * Some search modes return a suggested alternate term if there are |
| 210 | + * no exact hits. Returns true if there is one on this set. |
| 211 | + * |
| 212 | + * @return Boolean |
| 213 | + */ |
| 214 | + function hasSuggestion() { |
| 215 | + global $wgSphinxSuggestMode; |
| 216 | + |
| 217 | + if ( $wgSphinxSuggestMode ) { |
| 218 | + // Initial (weak) implementation - will be replaced |
| 219 | + $dbr = wfGetDB( DB_SLAVE ); |
| 220 | + $res = $dbr->select( |
| 221 | + array( 'page' ), |
| 222 | + array( 'page_title' ), |
| 223 | + array( "page_title SOUNDS LIKE " . $dbr->addQuotes($this->mTerms[0]) ), |
| 224 | + __METHOD__, |
| 225 | + array( |
| 226 | + 'ORDER BY' => 'page_counter desc', |
| 227 | + 'LIMIT' => 1 |
| 228 | + ) |
| 229 | + ); |
| 230 | + $suggestion = $dbr->fetchObject ( $res ); |
| 231 | + $this->mSuggestion = $suggestion->page_title; |
| 232 | + if ($this->mSuggestion) { |
| 233 | + return true; |
| 234 | + } |
| 235 | + } |
| 236 | + return false; |
| 237 | + } |
| 238 | + |
| 239 | + /** |
| 240 | + * @return String: suggested query, null if none |
| 241 | + */ |
| 242 | + function getSuggestionQuery(){ |
| 243 | + return $this->mSuggestion; |
| 244 | + } |
| 245 | + |
| 246 | + /** |
| 247 | + * @return String: HTML highlighted suggested query, '' if none |
| 248 | + */ |
| 249 | + function getSuggestionSnippet(){ |
| 250 | + return $this->mSuggestion; |
| 251 | + } |
| 252 | + |
| 253 | + /** |
| 254 | + * @return Array: search terms |
| 255 | + */ |
| 256 | + function termMatches() { |
| 257 | + return $this->mTerms; |
| 258 | + } |
| 259 | + |
| 260 | + /** |
| 261 | + * @return Integer: number of results |
| 262 | + */ |
| 263 | + function numRows() { |
| 264 | + return count( $this->mResultSet ); |
| 265 | + } |
| 266 | + |
| 267 | + /** |
| 268 | + * @return SphinxMWSearchResult: next result, false if none |
| 269 | + */ |
| 270 | + function next() { |
| 271 | + if ( isset( $this->mResultSet[$this->mNdx] ) ) { |
| 272 | + $row = $this->mResultSet[$this->mNdx]; |
| 273 | + ++$this->mNdx; |
| 274 | + return new SphinxMWSearchResult( $row, $this->sphinx_client ); |
| 275 | + } else { |
| 276 | + return false; |
| 277 | + } |
| 278 | + } |
| 279 | + |
| 280 | + function free() { |
| 281 | + unset( $this->mResultSet ); |
| 282 | + } |
| 283 | + |
| 284 | +} |
| 285 | + |
| 286 | +class SphinxMWSearchResult extends SearchResult { |
| 287 | + |
| 288 | + var $sphinx_client = null; |
| 289 | + |
| 290 | + function __construct( $row, $sphinx_client ) { |
| 291 | + $this->sphinx_client = $sphinx_client; |
| 292 | + parent::__construct( $row ); |
| 293 | + } |
| 294 | + |
| 295 | + /** |
| 296 | + * @param $terms Array: terms to highlight |
| 297 | + * @return String: highlighted text snippet, null (and not '') if not supported |
| 298 | + */ |
| 299 | + function getTextSnippet( $terms ){ |
| 300 | + global $wgUser, $wgSphinxSearchMWHighlighter, $wgSphinxSearch_index; |
| 301 | + |
| 302 | + if ( $wgSphinxSearchMWHighlighter ) { |
| 303 | + return parent::getTextSnippet( $terms ); |
| 304 | + } |
| 305 | + |
| 306 | + $this->initText(); |
| 307 | + |
| 308 | + list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs( $wgUser ); |
| 309 | + |
| 310 | + $excerpts_opt = array( |
| 311 | + "before_match" => "<span class='searchmatch'>", |
| 312 | + "after_match" => "</span>", |
| 313 | + "chunk_separator" => " ... ", |
| 314 | + "limit" => $contextlines * $contextchars, |
| 315 | + "around" => $contextchars |
| 316 | + ); |
| 317 | + |
| 318 | + $excerpts = $this->sphinx_client->BuildExcerpts( |
| 319 | + array( $this->mText ), |
| 320 | + $wgSphinxSearch_index, |
| 321 | + join(' ', $terms), |
| 322 | + $excerpts_opt |
| 323 | + ); |
| 324 | + |
| 325 | + if ( is_array( $excerpts ) ) { |
| 326 | + $ret = ''; |
| 327 | + foreach ( $excerpts as $entry ) { |
| 328 | + // remove some wiki markup |
| 329 | + $entry = preg_replace( '/([\[\]\{\}\*\#\|\!]+|==+)/', |
| 330 | + ' ', |
| 331 | + strip_tags( $entry, '<span><br>' ) |
| 332 | + ); |
| 333 | + $ret .= "<div style='margin: 0.2em 1em 0.2em 1em;'>$entry</div>\n"; |
| 334 | + } |
| 335 | + } else { |
| 336 | + $ret = wfMsg( 'sphinxSearchWarning', $this->sphinx_client->GetLastError() ); |
| 337 | + } |
| 338 | + return $ret; |
| 339 | + } |
| 340 | + |
| 341 | +} |
\ No newline at end of file |