Index: trunk/extensions/SphinxSearch/SphinxSearch_setup.php |
— | — | @@ -0,0 +1,60 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Sets up myspell dictionary for search suggestions |
| 5 | + * |
| 6 | + * Run without any arguments to see instructions. |
| 7 | + * |
| 8 | + * @author Svemir Brkic |
| 9 | + * @file |
| 10 | + * @ingroup extensions |
| 11 | + */ |
| 12 | + |
| 13 | +require_once( '../../maintenance/Maintenance.php' ); |
| 14 | + |
| 15 | +class SphinxSearch_setup extends Maintenance { |
| 16 | + |
| 17 | + public function __construct() { |
| 18 | + parent::__construct(); |
| 19 | + |
| 20 | + $this->mDescription = "Sets up myspell dictionary (sphinx.dic and sphinx.aff) "; |
| 21 | + $this->mDescription .= "for for search suggestions (suggestWithEnchant method.)\n"; |
| 22 | + $this->mDescription .= "Uses Sphinx indexer to create a list "; |
| 23 | + $this->mDescription .= "of all indexed words, sorted by frequency."; |
| 24 | + } |
| 25 | + |
| 26 | + /* Override parameters setup becuase we do not need some of the default ones */ |
| 27 | + protected function addDefaultParams() { |
| 28 | + $this->addOption( 'spinxconf', 'Location of Sphinx configuration file', true, true ); |
| 29 | + $this->addOption( 'indexer', 'Full path to Sphinx indexer if not in the path', false, true ); |
| 30 | + $this->addOption( 'useindex', 'Sphinx index to use (defaults to wiki_main)', false, true ); |
| 31 | + $this->addOption( 'maxwords', 'Maximum number of words to extract (defaults to 10000)', false, true ); |
| 32 | + $this->addOption( 'help', "Display this help message" ); |
| 33 | + $this->addOption( 'quiet', "Whether to supress non-error output" ); |
| 34 | + } |
| 35 | + |
| 36 | + public function execute() { |
| 37 | + $max_words = intval( $this->getOption( 'maxwords', 10000 ) ); |
| 38 | + $indexer = wfEscapeShellArg( $this->getOption( 'indexer', 'indexer' ) ); |
| 39 | + $index = wfEscapeShellArg( $this->getOption( 'useindex', 'wiki_main' ) ); |
| 40 | + $conf = wfEscapeShellArg( $this->getOption( 'spinxconf' ) ); |
| 41 | + |
| 42 | + $cmd = "$indexer --config $conf $index --buildstops sphinx.dic $max_words"; |
| 43 | + $this->output( wfShellExec( $cmd, $retval ) ); |
| 44 | + if ( file_exists( 'sphinx.dic' ) ) { |
| 45 | + $words = file('sphinx.dic'); |
| 46 | + $cnt = count($words); |
| 47 | + if ($cnt) { |
| 48 | + file_put_contents( 'sphinx.dic', $cnt . "\n" . join( '', $words ) ); |
| 49 | + file_put_contents( 'sphinx.aff', "SET UTF-8\n" ); |
| 50 | + } |
| 51 | + } |
| 52 | + } |
| 53 | + |
| 54 | +} |
| 55 | + |
| 56 | +$maintClass = "SphinxSearch_setup"; |
| 57 | + |
| 58 | +// Avoid E_ALL notice caused by ob_end_flush() in Maintenance::setup() |
| 59 | +ob_start(); |
| 60 | + |
| 61 | +require_once( DO_MAINTENANCE ); |
Property changes on: trunk/extensions/SphinxSearch/SphinxSearch_setup.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 62 | + native |
Index: trunk/extensions/SphinxSearch/SphinxMWSearch.php |
— | — | @@ -18,7 +18,7 @@ |
19 | 19 | var $exc_categories = array(); |
20 | 20 | var $db; |
21 | 21 | var $sphinx_client = null; |
22 | | - |
| 22 | + |
23 | 23 | function __construct( $db ) { |
24 | 24 | $this->db = $db; |
25 | 25 | } |
— | — | @@ -75,7 +75,7 @@ |
76 | 76 | } else { |
77 | 77 | $resultSet = false; |
78 | 78 | } |
79 | | - |
| 79 | + |
80 | 80 | if ( $resultSet === false ) { |
81 | 81 | return null; |
82 | 82 | } else { |
— | — | @@ -85,12 +85,8 @@ |
86 | 86 | |
87 | 87 | /** |
88 | 88 | * We do a weighted title/body search, no need to return titles separately |
89 | | - * |
90 | | - * @param string $term - Raw search term |
91 | | - * @return SphinxMWSearchResultSet |
92 | | - * @access public |
93 | 89 | */ |
94 | | - function searchTitle( $term ) { |
| 90 | + function searchTitle() { |
95 | 91 | return null; |
96 | 92 | } |
97 | 93 | |
— | — | @@ -170,23 +166,20 @@ |
171 | 167 | return "A-Za-z_'./\"!~0-9\\x80-\\xFF\\-"; |
172 | 168 | } |
173 | 169 | |
174 | | - } |
| 170 | +} |
175 | 171 | |
176 | | -/** |
177 | | - * @ingroup Search |
178 | | - */ |
179 | 172 | class SphinxMWSearchResultSet extends SearchResultSet { |
180 | 173 | var $mNdx = 0; |
181 | 174 | var $sphinx_client = null; |
182 | 175 | var $mSuggestion = ''; |
183 | | - |
| 176 | + |
184 | 177 | function __construct( $resultSet, $terms, $sphinx_client, $dbr ) { |
185 | 178 | global $wgSphinxSearch_index; |
186 | 179 | |
187 | 180 | $this->sphinx_client = $sphinx_client; |
188 | 181 | $this->mResultSet = array(); |
189 | 182 | |
190 | | - if ( is_array( $resultSet ) && is_array( $resultSet['matches'] ) ) { |
| 183 | + if ( is_array( $resultSet ) && isset( $resultSet['matches'] ) ) { |
191 | 184 | foreach ( $resultSet['matches'] as $id => $docinfo ) { |
192 | 185 | $res = $dbr->select( |
193 | 186 | 'page', |
— | — | @@ -212,22 +205,14 @@ |
213 | 206 | */ |
214 | 207 | function hasSuggestion() { |
215 | 208 | global $wgSphinxSuggestMode; |
216 | | - |
| 209 | + |
217 | 210 | if ( $wgSphinxSuggestMode ) { |
218 | | - // Initial (weak) implementation - will be replaced |
219 | | - $dbr = wfGetDB( DB_SLAVE ); |
220 | | - $res = $dbr->select( |
221 | | - array( 'page' ), |
222 | | - array( 'page_title' ), |
223 | | - array( "page_title SOUNDS LIKE " . $dbr->addQuotes($this->mTerms[0]) ), |
224 | | - __METHOD__, |
225 | | - array( |
226 | | - 'ORDER BY' => 'page_counter desc', |
227 | | - 'LIMIT' => 1 |
228 | | - ) |
229 | | - ); |
230 | | - $suggestion = $dbr->fetchObject ( $res ); |
231 | | - $this->mSuggestion = $suggestion->page_title; |
| 211 | + $this->mSuggestion = ''; |
| 212 | + if ( $wgSphinxSuggestMode == 'enchant' ) { |
| 213 | + $this->suggestWithEnchant(); |
| 214 | + } else { |
| 215 | + $this->suggestWithSoundex(); |
| 216 | + } |
232 | 217 | if ($this->mSuggestion) { |
233 | 218 | return true; |
234 | 219 | } |
— | — | @@ -236,6 +221,66 @@ |
237 | 222 | } |
238 | 223 | |
239 | 224 | /** |
| 225 | + * Wiki-specific search suggestions using enchant library. |
| 226 | + * Use SphinxSearch_setup.php to create the dictionary |
| 227 | + */ |
| 228 | + function suggestWithEnchant() { |
| 229 | + $broker = enchant_broker_init(); |
| 230 | + enchant_broker_set_dict_path($broker, ENCHANT_MYSPELL, dirname( __FILE__ )); |
| 231 | + if ( enchant_broker_dict_exists( $broker, 'sphinx' ) ) { |
| 232 | + $dict = enchant_broker_request_dict( $broker, 'sphinx' ); |
| 233 | + $suggestion_found = false; |
| 234 | + $full_suggestion = ''; |
| 235 | + foreach ( $this->mTerms as $word ) { |
| 236 | + $suggestions = array(); |
| 237 | + if ( !enchant_dict_check($dict, $word) ) { |
| 238 | + $suggestions = enchant_dict_suggest($dict, $word); |
| 239 | + while ( count( $suggestions ) ) { |
| 240 | + $candidate = array_shift( $suggestions ); |
| 241 | + if ( strtolower($candidate) != strtolower($word) ) { |
| 242 | + $word = $candidate; |
| 243 | + $suggestion_found = true; |
| 244 | + break; |
| 245 | + } |
| 246 | + } |
| 247 | + } |
| 248 | + $full_suggestion .= $word . ' '; |
| 249 | + } |
| 250 | + enchant_broker_free_dict( $dict ); |
| 251 | + if ($suggestion_found) { |
| 252 | + $this->mSuggestion = trim( $full_suggestion ); |
| 253 | + } |
| 254 | + } |
| 255 | + enchant_broker_free( $broker ); |
| 256 | + } |
| 257 | + |
| 258 | + /** |
| 259 | + * Default (weak) suggestions implementation relies on MySQL soundex |
| 260 | + */ |
| 261 | + function suggestWithSoundex() { |
| 262 | + $dbr = wfGetDB( DB_SLAVE ); |
| 263 | + $joined_terms = $dbr->addQuotes( join( ' ', $this->mTerms ) ); |
| 264 | + $res = $dbr->select( |
| 265 | + array( 'page' ), |
| 266 | + array( 'page_title' ), |
| 267 | + array( |
| 268 | + "page_title SOUNDS LIKE " . $joined_terms, |
| 269 | + // avoid (re)recommending the search string |
| 270 | + "page_title NOT LIKE " . $joined_terms |
| 271 | + ), |
| 272 | + __METHOD__, |
| 273 | + array( |
| 274 | + 'ORDER BY' => 'page_counter desc', |
| 275 | + 'LIMIT' => 1 |
| 276 | + ) |
| 277 | + ); |
| 278 | + $suggestion = $dbr->fetchObject( $res ); |
| 279 | + if ( is_object( $suggestion ) ) { |
| 280 | + $this->mSuggestion = trim( $suggestion->page_title ); |
| 281 | + } |
| 282 | + } |
| 283 | + |
| 284 | + /** |
240 | 285 | * @return String: suggested query, null if none |
241 | 286 | */ |
242 | 287 | function getSuggestionQuery(){ |
— | — | @@ -285,12 +330,12 @@ |
286 | 331 | class SphinxMWSearchResult extends SearchResult { |
287 | 332 | |
288 | 333 | var $sphinx_client = null; |
289 | | - |
| 334 | + |
290 | 335 | function __construct( $row, $sphinx_client ) { |
291 | 336 | $this->sphinx_client = $sphinx_client; |
292 | 337 | parent::__construct( $row ); |
293 | 338 | } |
294 | | - |
| 339 | + |
295 | 340 | /** |
296 | 341 | * @param $terms Array: terms to highlight |
297 | 342 | * @return String: highlighted text snippet, null (and not '') if not supported |
— | — | @@ -333,7 +378,7 @@ |
334 | 379 | } |
335 | 380 | } else { |
336 | 381 | $ret = wfMsg( 'sphinxSearchWarning', $this->sphinx_client->GetLastError() ); |
337 | | - } |
| 382 | + } |
338 | 383 | return $ret; |
339 | 384 | } |
340 | 385 | |
Index: trunk/extensions/SphinxSearch/sphinx.conf |
— | — | @@ -45,7 +45,7 @@ |
46 | 46 | source src_wiki_incremental : src_wiki_main |
47 | 47 | { |
48 | 48 | # adjust this query based on the time you run the full index |
49 | | - # in this case, full index runs at 3 AM (server time) which translates to 7 AM UTC |
| 49 | + # in this case, full index runs at 7 AM UTC |
50 | 50 | sql_query = SELECT page_id, page_title, page_namespace, page_is_redirect, old_id, old_text FROM page, revision, text WHERE rev_id=page_latest AND old_id=rev_text_id AND page_touched>=DATE_FORMAT(CURDATE(), '%Y%m%d070000') |
51 | 51 | |
52 | 52 | # all other parameters are copied from the parent source, |
— | — | @@ -113,14 +113,9 @@ |
114 | 114 | # searchd settings |
115 | 115 | searchd |
116 | 116 | { |
117 | | - # IP address on which search daemon will bind and accept |
118 | | - # optional, default is to listen on all addresses, |
119 | | - # ie. listen = 0.0.0.0 |
120 | | - listen = 127.0.0.1 |
| 117 | + # IP address and port on which search daemon will bind and accept |
| 118 | + listen = 127.0.0.1:9312 |
121 | 119 | |
122 | | - # port on which search daemon will listen |
123 | | - port = 9312 |
124 | | - |
125 | 120 | # searchd run info is logged here - create or change the folder |
126 | 121 | log = /var/log/sphinx/searchd.log |
127 | 122 | |