r64941 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64940‎ | r64941 | r64942 >
Date:15:59, 11 April 2010
Author:svemir
Status:deferred (Comments)
Tags:
Comment:
* Make sure all globals are initialized in SphinxSearch.php
* Better default for $wgSphinxSearchExtPath
* Use colon : of ? with Did you mean (easier to translate and more common)
* Added localization for pspell error message
* Got rid of "personal dictionary editor" bit - too much trouble for what its worth
* Merged spelling stuff into the main class and got rid of levenshtein stuff
* use selectField instead of direct query
Modified paths:
  • /trunk/extensions/SphinxSearch/SphinxSearch.i18n.php (modified) (history)
  • /trunk/extensions/SphinxSearch/SphinxSearch.php (modified) (history)
  • /trunk/extensions/SphinxSearch/SphinxSearch_PersonalDict.php (deleted) (history)
  • /trunk/extensions/SphinxSearch/SphinxSearch_body.php (modified) (history)
  • /trunk/extensions/SphinxSearch/SphinxSearch_spell.php (deleted) (history)

Diff [purge]

Index: trunk/extensions/SphinxSearch/SphinxSearch_spell.php
@@ -1,147 +0,0 @@
2 -<?php
3 -
4 -class SphinxSearch_spell {
5 -
6 - var $string; // what to check
7 - var $words; // words from $string
8 - var $suggestion_needed; // is the suggestion needed
9 - var $suggestion; // the actual suggestion
10 -
11 - function spell ( $string ) {
12 - $this->string = str_replace( '"', '', $string );
13 - $this->words = preg_split( '/(\s+|\|)/', $this->string, - 1, PREG_SPLIT_NO_EMPTY );
14 - if ( function_exists( 'pspell_check' ) ) {
15 - $this->suggestion = $this->builtin_spell();
16 - } else {
17 - $this->suggestion = $this->nonnative_spell();
18 - }
19 - if ( $this->suggestion_needed )
20 - return $this->suggestion;
21 - else
22 - return '';
23 - }
24 -
25 - function builtin_spell () {
26 - global $wgUser, $wgSphinxSearchPersonalDictionary, $wgSphinxSearchPspellDictionaryDir;
27 -
28 - $ret = '';
29 - $this->suggestion_needed = false;
30 - foreach ( $this->words as $word ) {
31 - $pspell_config = pspell_config_create(
32 - $wgUser->getDefaultOption( 'language' ),
33 - $wgUser->getDefaultOption( 'variant' ) );
34 - if ( $wgSphinxSearchPspellDictionaryDir ) {
35 - pspell_config_data_dir( $pspell_config, $wgSphinxSearchPspellDictionaryDir );
36 - pspell_config_dict_dir( $pspell_config, $wgSphinxSearchPspellDictionaryDir );
37 - }
38 - pspell_config_mode( $pspell_config, PSPELL_FAST | PSPELL_RUN_TOGETHER );
39 - if ( $wgSphinxSearchPersonalDictionary )
40 - pspell_config_personal( $pspell_config, $wgSphinxSearchPersonalDictionary );
41 - $pspell_link = pspell_new_config( $pspell_config );
42 -
43 - if ( !$pspell_link )
44 - return "Error starting pspell personal dictionary\n";
45 -
46 - if ( !pspell_check( $pspell_link, $word ) ) {
47 - $suggestions = pspell_suggest( $pspell_link, $word );
48 - $guess = $this->bestguess( $word, $suggestions );
49 - if ( strtolower( $word ) == strtolower( $guess ) ) {
50 - $ret .= "$word ";
51 - } else {
52 - $ret .= "$guess ";
53 - $this->suggestion_needed = true;
54 - }
55 - unset( $suggestion );
56 - unset( $guess );
57 - } else {
58 - $ret .= "$word ";
59 - }
60 - }
61 -
62 - unset( $pspell_config );
63 - unset( $pspell_link );
64 - return trim( $ret );
65 -
66 - }
67 -
68 - function nonnative_spell () {
69 - global $wgUser, $wgSphinxSearchPersonalDictionary, $wgSphinxSearchAspellPath;
70 -
71 - // aspell will only return mis-spelled words, so remember all here
72 - $word_suggestions = array();
73 - foreach ( $this->words as $word ) {
74 - $word_suggestions[$word] = $word;
75 - }
76 -
77 - // prepare the system call with optional dictionary
78 - $aspellcommand = 'echo ' . escapeshellarg( $this->string ) .
79 - ' | ' . escapeshellarg( $wgSphinxSearchAspellPath ) .
80 - ' -a --ignore-accents --ignore-case';
81 - if ( $wgUser ) {
82 - $aspellcommand .= ' --lang=' . $wgUser->getDefaultOption( 'language' );
83 - }
84 - if ( $wgSphinxSearchPersonalDictionary ) {
85 - $aspellcommand .= ' --home-dir=' . dirname( $wgSphinxSearchPersonalDictionary );
86 - $aspellcommand .= ' -p ' . basename( $wgSphinxSearchPersonalDictionary );
87 - }
88 -
89 - // run aspell
90 - $shell_return = shell_exec( $aspellcommand );
91 -
92 - // parse return line by line
93 - $returnarray = explode( "\n", $shell_return );
94 - $this->suggestion_needed = false;
95 - foreach ( $returnarray as $key => $value ) {
96 - // lines with suggestions start with &
97 - if ( substr( $value, 0, 1 ) == "&" ) {
98 - $correction = explode( " ", $value );
99 - $word = $correction[1];
100 - $suggstart = strpos( $value, ":" ) + 2;
101 - $suggestions = substr( $value, $suggstart );
102 - $suggestionarray = explode( ", ", $suggestions );
103 - $guess = $this->bestguess( $word, $suggestionarray );
104 -
105 - if ( strtolower( $word ) != strtolower( $guess ) ) {
106 - $word_suggestions[$word] = $guess;
107 - $this->suggestion_needed = true;
108 - }
109 - }
110 - }
111 -
112 - return join( ' ', $word_suggestions );
113 - }
114 -
115 - /* This function takes a word, and an array of suggested words
116 - * and figure out which suggestion is closest sounding to
117 - * the word. Thif is made possible with the use of the
118 - * levenshtein() function.
119 - */
120 - function bestguess( $word, $suggestions ) {
121 - $shortest = - 1;
122 -
123 - if ( preg_match( '/^[^a-zA-Z]*$/', $word ) )
124 - return $word;
125 -
126 - foreach ( $suggestions as $suggested ) {
127 - $lev = levenshtein( strtolower( $word ), strtolower( $suggested ) );
128 - if ( $lev == 0 ) {
129 - // closest word is this one (exact match)
130 - $closest = $word;
131 - $shortest = 0;
132 -
133 - // break out of the loop; we've found an exact match
134 - break;
135 - }
136 -
137 - // if this distance is less than the next found shortest
138 - // distance, OR if a next shortest word has not yet been found
139 - if ( $lev <= $shortest || $shortest < 0 ) {
140 - // set the closest match, and shortest distance
141 - $closest = $suggested;
142 - $shortest = $lev;
143 - }
144 - }
145 -
146 - return $closest;
147 - }
148 -}
Index: trunk/extensions/SphinxSearch/SphinxSearch_PersonalDict.php
@@ -1,238 +0,0 @@
2 -<?php
3 -
4 -/**
5 - * SphinxSearch extension code for MediaWiki
6 - *
7 - * http://www.mediawiki.org/wiki/Extension:SphinxSearch
8 - *
9 - * Developed by Paul Grinberg and Svemir Brkic
10 - *
11 - * Released under GNU General Public License (see http://www.fsf.org/licenses/gpl.html)
12 - *
13 - */
14 -
15 -class SphinxSearchPersonalDict extends SpecialPage {
16 -
17 - function SphinxSearchPersonalDict() {
18 - SpecialPage::SpecialPage( "SphinxSearchPersonalDict", 'delete' );
19 - self::loadMessages();
20 - return true;
21 - }
22 -
23 - function loadMessages() {
24 - static $messagesLoaded = false;
25 - global $wgMessageCache;
26 - if ( $messagesLoaded ) {
27 - return;
28 - }
29 - $messagesLoaded = true;
30 -
31 - $allMessages = array(
32 - 'en' => array(
33 - 'sphinxsearchpersonaldict' => 'Wiki-specific Sphinx search spellcheck dictionary',
34 - 'sphinxsearchindictionary' => 'Already in personal dictionary',
35 - 'sphinxsearchtobeadded' => 'To be added to personal dictionary',
36 - 'sphinxsearchnotadded' => "Word '''%s''' was not added to dictionary because it contained non alphabetic characters",
37 - 'sphinxsearchcantpersonaldict' => 'You are not allowed to modify the {{SITENAME}} specific dictionary',
38 - )
39 - );
40 -
41 - foreach ( $allMessages as $lang => $langMessages ) {
42 - $wgMessageCache->addMessages( $langMessages, $lang );
43 - }
44 - return true;
45 - }
46 -
47 - function execute( $par ) {
48 - global $wgRequest, $wgOut, $wgUser;
49 -
50 - $this->setHeaders();
51 - $wgOut->setPagetitle( wfMsg( 'sphinxsearchpersonaldict' ) );
52 -
53 - if ( !$wgUser->isAllowed( "delete" ) ) {
54 - $wgOut->addWikiText( wfMsg( 'sphinxsearchcantpersonaldict' ) );
55 - $wgOut->addWikiText( '----' );
56 - }
57 -
58 - $toberemoved = $wgRequest->getArray( 'indictionary', array() );
59 - $tobeadded = $wgRequest->getVal( 'tobeadded', '' );
60 - $tobeadded = preg_split( '/\s/', trim( $tobeadded ), - 1, PREG_SPLIT_NO_EMPTY );
61 -
62 - $this->deleteFromPersonalDictionary( $toberemoved );
63 - $this->addToPersonalDictionary( $tobeadded );
64 -
65 - $this->CreateForm( $wgUser->isAllowed( "delete" ) );
66 - }
67 -
68 - function CreateForm( $allowed_to_add ) {
69 - global $wgOut;
70 - global $wgSphinxSearchPersonalDictionary;
71 -
72 - $wgOut->addHTML( "<form method=post>" );
73 - $wgOut->addHTML( "<div style=\"border: thin solid #000000; width:90%;\"><table cellpadding=\"15\" width=\"100%\" cellspacing=\"0\" border=\"0\">" );
74 - $wgOut->addHTML( "<tr><td valign=top>" );
75 - $wgOut->addWikiText( "<center>'''" . wfMsg( 'sphinxsearchindictionary' ) . "'''</center><p>" );
76 - $wgOut->addHTML( '<select name="indictionary[]" size="15" multiple="multiple">' );
77 -
78 - if ( file_exists( $wgSphinxSearchPersonalDictionary ) ) {
79 - $this->readPersonalDictionary( $langauge, $numwords, $words );
80 - sort( $words );
81 -
82 - if ( sizeof( $words ) > 0 ) {
83 - foreach ( $words as $w )
84 - $wgOut->addHTML( "<option value='$w'>$w</option>" );
85 - } else {
86 - $wgOut->addHTML( "<option disabled value=''>Dictionary empty</option>" );
87 - }
88 - } else {
89 - $wgOut->addHTML( "<option disabled value=''>Dictionary not found</option>" );
90 - }
91 -
92 - $wgOut->addHTML( '</select></td><td valign=top>' );
93 - if ( $allowed_to_add ) {
94 - $wgOut->addWikiText( "<center>'''" . wfMsg( 'sphinxsearchtobeadded' ) . "'''</center><p>" );
95 - $wgOut->addHTML( "<textarea name=\"tobeadded\" cols=\"30\" rows=\"15\"></textarea>" );
96 - $wgOut->addHTML( '</td></tr><tr><td colspan=2>' );
97 - $wgOut->addHTML( "<center><input type=\"submit\" value=\"Execute\" /></center>" );
98 - }
99 - $wgOut->addHTML( "</td></tr></table></div></form>" );
100 - }
101 -
102 - function addToPersonalDictionary( $list ) {
103 - if ( function_exists( 'pspell_config_create' ) ) {
104 - $this->builtin_addword( $list );
105 - } else {
106 - $this->nonnative_addword( $list );
107 - }
108 - }
109 -
110 - function getSearchLanguage() {
111 - global $wgUser, $wgLanguageCode;
112 -
113 - // Try to read the default language from $wgUser:
114 - $language = trim( $wgUser->getDefaultOption( 'language' ) );
115 -
116 - // Use global variable: $wgLanguageCode (from LocalSettings.php) as fallback:
117 - if ( empty( $language ) ) { $language = trim( $wgLanguageCode ); }
118 -
119 - // If we still don't have a valid language yet, assume English:
120 - if ( empty( $language ) ) { $language = 'en'; }
121 -
122 - return $language;
123 - }
124 -
125 - function builtin_addword( $list ) {
126 - global $wgUser, $wgOut;
127 - global $wgSphinxSearchPersonalDictionary;
128 - global $wgSphinxSearchPspellDictionaryDir;
129 -
130 - $language = $this->getSearchLanguage();
131 -
132 - $pspell_config = pspell_config_create(
133 - $language,
134 - $wgUser->getDefaultOption( 'variant' ) );
135 - if ( $wgSphinxSearchPspellDictionaryDir ) {
136 - pspell_config_data_dir( $pspell_config, $wgSphinxSearchPspellDictionaryDir );
137 - pspell_config_dict_dir( $pspell_config, $wgSphinxSearchPspellDictionaryDir );
138 - }
139 - pspell_config_mode( $pspell_config, PSPELL_FAST | PSPELL_RUN_TOGETHER );
140 - if ( $wgSphinxSearchPersonalDictionary )
141 - pspell_config_personal( $pspell_config, $wgSphinxSearchPersonalDictionary );
142 - $pspell_link = pspell_new_config( $pspell_config );
143 -
144 - $write_needed = false;
145 - foreach ( $list as $word ) {
146 - if ( $word == '' )
147 - continue;
148 - if ( preg_match( '/[^a-zA-Z]/', $word ) ) {
149 - $wgOut->addWikiText( sprintf( wfMsg( 'sphinxsearchnotadded' ), $word ) );
150 - continue;
151 - }
152 - pspell_add_to_personal( $pspell_link, $word );
153 - $write_needed = true;
154 - }
155 -
156 - if ( $write_needed ) {
157 - pspell_save_wordlist( $pspell_link );
158 - }
159 - }
160 -
161 - function nonnative_addword( $list ) {
162 - global $wgUser;
163 - global $wgSphinxSearchPersonalDictionary;
164 -
165 - if ( !file_exists( $wgSphinxSearchPersonalDictionary ) ) {
166 - // create the personal dictionary file if it does not already exist
167 - $language = $this->getSearchLanguage();
168 - $numwords = 0;
169 - $words = array();
170 - } else {
171 - $this->readPersonalDictionary( $language, $numwords, $words );
172 - }
173 -
174 - $write_needed = false;
175 - foreach ( $list as $word ) {
176 - if ( !in_array( $word, $words ) ) {
177 - $numwords++;
178 - array_push( $words, $word );
179 - $write_needed = true;
180 - }
181 - }
182 -
183 - if ( $write_needed )
184 - $this->writePersonalDictionary( $language, $numwords, $words );
185 - }
186 -
187 - function writePersonalDictionary( $language, $numwords, $words ) {
188 - global $wgSphinxSearchPersonalDictionary;
189 -
190 - $handle = fopen( $wgSphinxSearchPersonalDictionary, "wt" );
191 - if ( $handle ) {
192 - fwrite( $handle, "personal_ws-1.1 $language $numwords\n" );
193 - foreach ( $words as $w ) {
194 - fwrite( $handle, "$w\n" );
195 - }
196 - fclose( $handle );
197 - }
198 - }
199 -
200 - function readPersonalDictionary( &$language, &$numwords, &$words ) {
201 - global $wgSphinxSearchPersonalDictionary;
202 -
203 - $words = array();
204 - $lines = explode( "\n", file_get_contents( $wgSphinxSearchPersonalDictionary ) );
205 - foreach ( $lines as $line ) {
206 - trim( $line );
207 - if ( preg_match( '/\s(\w+)\s(\d+)/', $line, $matches ) ) {
208 - $language = $matches[1];
209 - $numwords = $matches[2];
210 - } else
211 - if ( $line )
212 - array_push( $words, $line );
213 - }
214 -
215 - // Make sure that we have a valid value for language if it wasn't in the .pws file:
216 - if ( empty( $language ) ) { $language = $this->getSearchLanguage(); }
217 - }
218 -
219 - function deleteFromPersonalDictionary( $list ) {
220 - // there is no built in way to delete from the personal dictionary.
221 -
222 - $this->readPersonalDictionary( $language, $numwords, $words );
223 -
224 - $write_needed = false;
225 - foreach ( $list as $w ) {
226 - if ( $w == '' )
227 - continue;
228 - if ( in_array( $w, $words ) ) {
229 - $index = array_keys( $words, $w );
230 - unset( $words[$index[0]] );
231 - $numwords--;
232 - $write_needed = true;
233 - }
234 - }
235 -
236 - if ( $write_needed )
237 - $this->writePersonalDictionary( $language, $numwords, $words );
238 - }
239 -}
Index: trunk/extensions/SphinxSearch/SphinxSearch_body.php
@@ -26,7 +26,7 @@
2727 * @return string
2828 */
2929 function SphinxSearch() {
30 - global $wgDisableInternalSearch, $wgSphinxSuggestMode, $wgAutoloadClasses;
 30+ global $wgDisableInternalSearch, $wgAutoloadClasses;
3131
3232 if ( $wgDisableInternalSearch ) {
3333 SpecialPage::SpecialPage( 'Search' );
@@ -34,10 +34,6 @@
3535 SpecialPage::SpecialPage( 'SphinxSearch' );
3636 }
3737
38 - if ( $wgSphinxSuggestMode ) {
39 - $wgAutoloadClasses['SphinxSearch_spell'] = dirname( __FILE__ ) . '/SphinxSearch_spell.php';
40 - }
41 -
4238 if ( function_exists( 'wfLoadExtensionMessages' ) ) {
4339 wfLoadExtensionMessages( 'SphinxSearch' );
4440 } else {
@@ -51,6 +47,7 @@
5248 }
5349 }
5450 }
 51+
5552 return true;
5653 }
5754
@@ -61,9 +58,9 @@
6259 */
6360 function searchableNamespaces() {
6461 $namespaces = SearchEngine::searchableNamespaces();
65 -
 62+
6663 wfRunHooks( 'SphinxSearchFilterSearchableNamespaces', array( &$namespaces ) );
67 -
 64+
6865 return $namespaces;
6966 }
7067
@@ -80,9 +77,9 @@
8178 } else {
8279 $categories = array();
8380 }
84 -
 81+
8582 wfRunHooks( 'SphinxSearchGetSearchableCategories', array( &$categories ) );
86 -
 83+
8784 return $categories;
8885 }
8986
@@ -100,6 +97,7 @@
10198 $cache_key = $wgDBname . ':sphinx_cats:' . md5( $parent );
10299 $categories = $wgMemc->get( $cache_key );
103100 }
 101+
104102 if ( !is_array( $categories ) ) {
105103 $categories = array();
106104 $dbr = wfGetDB( DB_SLAVE );
@@ -110,7 +108,7 @@
111109 'cl_from = page_id',
112110 'cl_to' => $parent,
113111 'page_namespace' => NS_CATEGORY ),
114 - 'epSearchableCategories',
 112+ __METHOD__,
115113 array( 'ORDER BY' => 'cl_sortkey' )
116114 );
117115 while ( $x = $dbr->fetchObject ( $res ) ) {
@@ -332,9 +330,9 @@
333331 * Returns the number of matches.
334332 */
335333 function prepareSphinxClient( $term, $match_titles_only = false ) {
336 - global $wgSphinxSearch_sortmode, $wgSphinxSearch_sortby,
337 - $wgSphinxSearch_host, $wgSphinxSearch_port, $wgSphinxSearch_index_weights,
338 - $wgSphinxSearch_index, $wgSphinxSearch_matches, $wgSphinxSearch_mode, $wgSphinxSearch_weights,
 334+ global $wgSphinxSearch_sortmode, $wgSphinxSearch_sortby, $wgSphinxSearch_host,
 335+ $wgSphinxSearch_port, $wgSphinxSearch_index_weights, $wgSphinxSearch_index,
 336+ $wgSphinxSearch_matches, $wgSphinxSearch_mode, $wgSphinxSearch_weights,
339337 $wgSphinxMatchAll, $wgSphinxSearch_maxmatches, $wgSphinxSearch_cutoff;
340338
341339 # don't do anything for blank searches
@@ -410,7 +408,8 @@
411409
412410 function wfSphinxDisplayResults( $term, $res, $cl ) {
413411
414 - global $wgOut, $wgSphinxSuggestMode, $wgSphinxSearch_matches, $wgSphinxSearch_index, $wgSphinxSearch_maxmatches;
 412+ global $wgOut, $wgSphinxSuggestMode, $wgSphinxSearch_matches,
 413+ $wgSphinxSearch_index, $wgSphinxSearch_maxmatches;
415414
416415 if ($cl->GetLastWarning()) {
417416 $wgOut->addWikiText( wfMsg( 'sphinxSearchWarning', $cl->GetLastWarning() ) . "\n\n");
@@ -418,13 +417,12 @@
419418 $found = $res['total_found'];
420419
421420 if ( $wgSphinxSuggestMode ) {
422 - $sc = new SphinxSearch_spell;
423 - $didyoumean = $sc->spell( $this->search_term );
 421+ $didyoumean = $this->spell();
424422 if ( $didyoumean ) {
425423 $wgOut->addhtml( wfMsg( 'sphinxSearchDidYouMean' ) .
426424 " <b><a href='" .
427425 $this->getActionURL( $didyoumean, $this->namespaces ) .
428 - "1'>" . $didyoumean . '</a></b>?'
 426+ "1'>" . $didyoumean . '</a></b>'
429427 );
430428 }
431429 }
@@ -461,7 +459,7 @@
462460
463461 if ( isset( $res["matches"] ) && is_array( $res["matches"] ) ) {
464462 $wgOut->addWikiText( "----" );
465 - $db = wfGetDB( DB_SLAVE );
 463+ $dbr = wfGetDB( DB_SLAVE );
466464 $excerpts_opt = array(
467465 "before_match" => "<span style='color:red'>",
468466 "after_match" => "</span>",
@@ -469,22 +467,30 @@
470468 "limit" => 400,
471469 "around" => 15
472470 );
473 -
474471 foreach ( $res["matches"] as $doc => $docinfo ) {
475 - $sql = "SELECT old_text FROM " . $db->tableName( 'text' ) . " WHERE old_id=" . $docinfo['attrs']['old_id'];
476 - $res = $db->query( $sql, __METHOD__ );
477 - if ( $db->numRows( $res ) ) {
478 - $row = $db->fetchRow( $res );
 472+ $page_content = $dbr->selectField(
 473+ 'text', 'old_text',
 474+ array(
 475+ 'old_id' => $docinfo['attrs']['old_id']
 476+ ),
 477+ __METHOD__
 478+ );
 479+ if ( $page_content ) {
479480 $title_obj = Title::newFromID( $doc );
480481 if ( is_object( $title_obj ) ) {
481482 $wiki_title = $title_obj->getPrefixedText();
482483 $wiki_path = $title_obj->getPrefixedDBkey();
483484 $wgOut->addWikiText( "* <span style='font-size:110%;'>[[:$wiki_path|$wiki_title]]</span>" );
484 -
 485+
485486 # uncomment this line to see the weights etc. as HTML comments in the source of the page
486487 # $wgOut->addHTML("<!-- page_id: ".$doc."\ninfo: ".print_r($docinfo, true)." -->");
487 -
488 - $excerpts = $cl->BuildExcerpts( array( $row[0] ), $wgSphinxSearch_index, $term, $excerpts_opt );
 488+
 489+ $excerpts = $cl->BuildExcerpts(
 490+ array( $page_content ),
 491+ $wgSphinxSearch_index,
 492+ $term,
 493+ $excerpts_opt
 494+ );
489495 if ( !is_array( $excerpts ) ) {
490496 $excerpts = array( wfMsg( 'sphinxSearchWarning', $cl->GetLastError() ) );
491497 }
@@ -498,7 +504,6 @@
499505 }
500506 }
501507 }
502 - $db->freeResult( $res );
503508 }
504509 $time = number_format( microtime( true ) - $start_time, 3);
505510 $wgOut->addWikiText( wfMsg( 'sphinxSearchEpilogue', $time ) );
@@ -586,8 +591,9 @@
587592 }
588593
589594 function createNewSearchForm( $term ) {
590 - global $wgOut, $wgDisableInternalSearch, $wgSphinxSearch_mode, $wgSphinxMatchAll, $wgUseExcludes;
591 - global $wgUseAjax, $wgJsMimeType, $wgScriptPath, $wgSphinxSearchExtPath, $wgSphinxSearchJSPath, $wgRequest;
 595+ global $wgOut, $wgDisableInternalSearch, $wgSphinxSearch_mode, $wgSphinxMatchAll,
 596+ $wgUseExcludes, $wgUseAjax, $wgJsMimeType, $wgScriptPath,
 597+ $wgSphinxSearchExtPath, $wgSphinxSearchJSPath, $wgRequest;
592598
593599 $search_title = ( $wgDisableInternalSearch ? 'Search' : 'SphinxSearch' );
594600 $titleObj = SpecialPage::getTitleFor( $search_title );
@@ -647,7 +653,7 @@
648654 $wgOut->addHTML('<br />');
649655 $wgOut->addHTML( $this->getCategoryCheckboxes( $all_categories, '', $cat_parents ) );
650656 }
651 - $wgOut->addHTML( "</div></form><br clear='both'>" );
 657+ $wgOut->addHTML( "</div></form><br clear='both' />" );
652658
653659 # Put a Sphinx label for this search
654660 $wgOut->addHTML( "<div style='text-align:center'>" .
@@ -707,6 +713,108 @@
708714 return $html;
709715 }
710716
 717+ function spell() {
 718+ $string = str_replace( '"', '', $this->search_term );
 719+ $words = preg_split( '/(\s+|\|)/', $string, -1, PREG_SPLIT_NO_EMPTY );
 720+ if ( function_exists( 'pspell_check' ) ) {
 721+ $suggestion = $this->builtin_spell($words);
 722+ } else {
 723+ $suggestion = $this->nonnative_spell($words);
 724+ }
 725+ return $suggestion;
 726+ }
 727+
 728+ function builtin_spell($words) {
 729+ global $wgUser, $wgSphinxSearchPersonalDictionary, $wgSphinxSearchPspellDictionaryDir;
 730+
 731+ $ret = '';
 732+ $suggestion_needed = false;
 733+ foreach ( $words as $word ) {
 734+ $pspell_config = pspell_config_create(
 735+ $wgUser->getDefaultOption( 'language' ),
 736+ $wgUser->getDefaultOption( 'variant' )
 737+ );
 738+ if ( $wgSphinxSearchPspellDictionaryDir ) {
 739+ pspell_config_data_dir( $pspell_config, $wgSphinxSearchPspellDictionaryDir );
 740+ pspell_config_dict_dir( $pspell_config, $wgSphinxSearchPspellDictionaryDir );
 741+ }
 742+ pspell_config_mode( $pspell_config, PSPELL_FAST | PSPELL_RUN_TOGETHER );
 743+ if ( $wgSphinxSearchPersonalDictionary ) {
 744+ pspell_config_personal( $pspell_config, $wgSphinxSearchPersonalDictionary );
 745+ }
 746+ $pspell_link = pspell_new_config( $pspell_config );
 747+
 748+ if ( !$pspell_link ) {
 749+ return wfMsg( 'sphinxPspellError' );
 750+ }
 751+ if ( !pspell_check( $pspell_link, $word ) ) {
 752+ $suggestions = pspell_suggest( $pspell_link, $word );
 753+ if ( count( $suggestions ) ) {
 754+ $guess = array_shift($suggestions);
 755+ } else {
 756+ $guess = '';
 757+ }
 758+ if ( !$guess || (strtolower( $word ) == strtolower( $guess )) ) {
 759+ $ret .= "$word ";
 760+ } else {
 761+ $ret .= "$guess ";
 762+ $suggestion_needed = true;
 763+ }
 764+ } else {
 765+ $ret .= "$word ";
 766+ }
 767+ }
 768+
 769+ return ( $suggestion_needed ? trim( $ret ) : '' );
 770+ }
 771+
 772+ function nonnative_spell($words) {
 773+ global $wgUser, $wgSphinxSearchPersonalDictionary, $wgSphinxSearchAspellPath;
 774+
 775+ // aspell will only return mis-spelled words, so remember all here
 776+ $word_suggestions = array();
 777+ foreach ( $words as $word ) {
 778+ $word_suggestions[$word] = $word;
 779+ }
 780+
 781+ // prepare the system call with optional dictionary
 782+ $aspellcommand = 'echo ' . escapeshellarg( join( ' ', $words ) ) .
 783+ ' | ' . escapeshellarg( $wgSphinxSearchAspellPath ) .
 784+ ' -a --ignore-accents --ignore-case';
 785+ if ( $wgUser ) {
 786+ $aspellcommand .= ' --lang=' . $wgUser->getDefaultOption( 'language' );
 787+ }
 788+ if ( $wgSphinxSearchPersonalDictionary ) {
 789+ $aspellcommand .= ' --home-dir=' . dirname( $wgSphinxSearchPersonalDictionary );
 790+ $aspellcommand .= ' -p ' . basename( $wgSphinxSearchPersonalDictionary );
 791+ }
 792+
 793+ // run aspell
 794+ $shell_return = shell_exec( $aspellcommand );
 795+
 796+ // parse return line by line
 797+ $returnarray = explode( "\n", $shell_return );
 798+ $suggestion_needed = false;
 799+ foreach ( $returnarray as $key => $value ) {
 800+ // lines with suggestions start with &
 801+ if ( substr( $value, 0, 1 ) == "&" ) {
 802+ $correction = explode( " ", $value );
 803+ $word = $correction[1];
 804+ $suggestions = substr( $value, strpos( $value, ":" ) + 2 );
 805+ $suggestions = explode( ", ", $suggestions );
 806+ if (count($suggestions)) {
 807+ $guess = array_shift($suggestions);
 808+ if ( strtolower( $word ) != strtolower( $guess ) ) {
 809+ $word_suggestions[$word] = $guess;
 810+ $suggestion_needed = true;
 811+ }
 812+ }
 813+ }
 814+ }
 815+
 816+ return ( $suggestion_needed ? join( ' ', $word_suggestions ) : '' );
 817+ }
 818+
711819 }
712820
713821 /**
Index: trunk/extensions/SphinxSearch/SphinxSearch.i18n.php
@@ -17,7 +17,7 @@
1818 'sphinxSearchStatsInfo' => 'Above numbers may include documents not listed due to search options.',
1919 'sphinxSearchButton' => 'Search',
2020 'sphinxSearchEpilogue' => 'Additional database time was $1 sec.',
21 - 'sphinxSearchDidYouMean' => 'Did you mean',
 21+ 'sphinxSearchDidYouMean' => 'Did you mean:',
2222 'sphinxMatchAny' => 'match any word',
2323 'sphinxMatchAll' => 'match all words',
2424 'sphinxMatchTitles' => 'match titles only',
@@ -25,7 +25,8 @@
2626 'sphinxPowered' => 'Powered by $1',
2727 'sphinxClientFailed' => 'Could not instantiate Sphinx client.',
2828 'sphinxSearchFailed' => 'Query failed: $1',
29 - 'sphinxSearchWarning' => 'Warning: $1'
 29+ 'sphinxSearchWarning' => 'Warning: $1',
 30+ 'sphinxPspellError' => 'Could not invoke pspell extension.'
3031 );
3132
3233 /** Belarusian (Taraškievica orthography) (Беларуская (тарашкевіца))
@@ -46,7 +47,7 @@
4748 'sphinxSearchStatsInfo' => 'Прыведзеныя лічбы могуць утрымліваць дакумэнты, не паказаныя з-за установак пошуку.',
4849 'sphinxSearchButton' => 'Шукаць',
4950 'sphinxSearchEpilogue' => 'Дадатковы час базы зьвестак склаў $1 с.',
50 - 'sphinxSearchDidYouMean' => 'Вы мелі на ўвазе',
 51+ 'sphinxSearchDidYouMean' => 'Вы мелі на ўвазе:',
5152 'sphinxMatchAny' => 'супадзеньне з любым словам',
5253 'sphinxMatchAll' => 'супадзеньне па ўсім словам',
5354 'sphinxMatchTitles' => 'супадзеньне толькі загалоўкаў',
@@ -76,7 +77,7 @@
7778 'sphinxNextPage' => "War-lerc'h",
7879 'sphinxSearchStats' => '"$1" bet kavet $2 gwech e $3 teul',
7980 'sphinxSearchButton' => 'Klask',
80 - 'sphinxSearchDidYouMean' => "N'hoc'h eus ket soñjet kentoc'h e",
 81+ 'sphinxSearchDidYouMean' => "N'hoc'h eus ket soñjet kentoc'h e :",
8182 'sphinxMatchAny' => 'kavout forzh peseurt ger',
8283 'sphinxMatchAll' => 'kavout an holl gerioù',
8384 'sphinxMatchTitles' => 'klask en titloù hepken',
@@ -187,7 +188,7 @@
188189 'sphinxSearchStatsInfo' => 'In däre Zahl wäre villicht Dokumänt mitzellt, wu wäge bstimmte Suechyystellige nit ufglischtet wäre',
189190 'sphinxSearchButton' => 'Sueche',
190191 'sphinxSearchEpilogue' => 'Zuesätzligi Datebankzyt isch $1 Sekunde gsi.',
191 - 'sphinxSearchDidYouMean' => 'Hesch gmeint',
 192+ 'sphinxSearchDidYouMean' => 'Hesch gmeint:',
192193 'sphinxMatchAny' => 'irged e Wort',
193194 'sphinxMatchAll' => 'alli Werter',
194195 'sphinxMatchTitles' => 'nume Sytename',
@@ -215,7 +216,7 @@
216217 'sphinxSearchStatsInfo' => 'Le numeros hic supra pote includer documentos non listate debite al optiones de recerca.',
217218 'sphinxSearchButton' => 'Cercar',
218219 'sphinxSearchEpilogue' => 'Le tempore additional del base de datos esseva $1 secundas.',
219 - 'sphinxSearchDidYouMean' => 'Esque tu vole dicer',
 220+ 'sphinxSearchDidYouMean' => 'Esque tu vole dicer:',
220221 'sphinxMatchAny' => 'trovar qualcunque parola',
221222 'sphinxMatchAll' => 'trovar tote le parolas',
222223 'sphinxMatchTitles' => 'cercar solmente in titulos',
@@ -237,7 +238,7 @@
238239 'sphinxNextPage' => '次',
239240 'sphinxSearchStats' => '"$1"が、$2の文書で、$3コ見つかりました',
240241 'sphinxSearchButton' => '検索',
241 - 'sphinxSearchDidYouMean' => 'もしかして',
 242+ 'sphinxSearchDidYouMean' => 'もしかして:',
242243 'sphinxPowered' => '$1の提供',
243244 'sphinxSearchWarning' => '警告: $1',
244245 );
@@ -255,7 +256,7 @@
256257 'sphinxNextPage' => 'Nächst',
257258 'sphinxSearchStats' => '"$1" gouf $2 mol an $3 Dokumenter fonnt',
258259 'sphinxSearchButton' => 'Sichen',
259 - 'sphinxSearchDidYouMean' => 'Mengt Dir',
 260+ 'sphinxSearchDidYouMean' => 'Mengt Dir:',
260261 'sphinxMatchTitles' => 'nëmmen an den Titele sichen',
261262 'sphinxLoading' => 'Lueden...',
262263 'sphinxSearchFailed' => 'Ufro huet net fonctionnéiert: $1',
@@ -279,15 +280,16 @@
280281 'sphinxSearchStatsInfo' => 'Горенаведените бројки може да содржат документи кои не се наведени поради нагодувањата на пребарувањето.',
281282 'sphinxSearchButton' => 'Пребарај',
282283 'sphinxSearchEpilogue' => 'Дополнителното време за базата на податоци изнесуваше $1 сек.',
283 - 'sphinxSearchDidYouMean' => 'Дали мислевте на',
 284+ 'sphinxSearchDidYouMean' => 'Дали мислевте на:',
284285 'sphinxMatchAny' => 'барај било кој збор',
285286 'sphinxMatchAll' => 'барај само зборови',
286287 'sphinxMatchTitles' => 'барај само наслови',
287288 'sphinxLoading' => 'Вчитувам...',
288289 'sphinxPowered' => 'Овозможено од $1',
289 - 'sphinxClientFailed' => 'Не можев да го повикам Sphinx клиент.',
 290+ 'sphinxClientFailed' => 'Не можев да повикам Sphinx клиент.',
290291 'sphinxSearchFailed' => 'Барањето не успеа $1',
291292 'sphinxSearchWarning' => 'Предупредување $1',
 293+ 'sphinxPspellError' => 'Не можев да повикам pspell проширување.'
292294 );
293295
294296 /** Dutch (Nederlands)
@@ -305,7 +307,7 @@
306308 'sphinxSearchPreamble' => 'Resultaten $1 tot $2 van $3 worden weergegeven voor zoekopdracht "<nowiki>$4</nowiki>". Zoektijd: $5 seconden',
307309 'sphinxSearchButton' => 'Zoeken',
308310 'sphinxSearchEpilogue' => 'Aanvullende databasetijd was $1 seconden.',
309 - 'sphinxSearchDidYouMean' => 'Bedoelde u',
 311+ 'sphinxSearchDidYouMean' => 'Bedoelde u:',
310312 'sphinxMatchAny' => 'ieder woord',
311313 'sphinxMatchAll' => 'alle woorden',
312314 'sphinxMatchTitles' => 'alleen paginanamen',
@@ -361,7 +363,7 @@
362364 'sphinxSearchStatsInfo' => "Ij nùmer sota a peulo anclude document pa listà a motiv dj'opsion d'arserca",
363365 'sphinxSearchButton' => 'Sërca',
364366 'sphinxSearchEpilogue' => "Temp adissional ëd database a l'é stàit \$1 sec.",
365 - 'sphinxSearchDidYouMean' => 'Vorìi-lo pa dì',
 367+ 'sphinxSearchDidYouMean' => 'Vorìi-lo pa dì:',
366368 'sphinxMatchAny' => 'confronta minca paròla',
367369 'sphinxMatchAll' => 'confranta tute paròle',
368370 'sphinxMatchTitles' => 'confronta mach ij tìtoj',
Index: trunk/extensions/SphinxSearch/SphinxSearch.php
@@ -1,11 +1,10 @@
22 <?php
33
4 -# Alert the user that this is not a valid entry point to MediaWiki if they try to access the special pages file directly.
 4+# Alert the user that this is not a valid entry point to MediaWiki if they try to access the file directly.
55 if ( !defined( 'MEDIAWIKI' ) ) {
66 echo <<<EOT
77 To install SphinxSearch extension, put the following line in LocalSettings.php:
88 require_once( "\$IP/extensions/SphinxSearch/SphinxSearch.php" );
9 -
109 EOT;
1110 exit( 1 );
1211 }
@@ -26,11 +25,9 @@
2726 $wgExtensionMessagesFiles['SphinxSearch'] = $dir . 'SphinxSearch.i18n.php';
2827 $wgExtensionAliasesFiles['SphinxSearch'] = $dir . 'SphinxSearch.alias.php';
2928
30 -# #########################################################
3129 # To completely disable the default search and replace it with SphinxSearch,
3230 # set this BEFORE including SphinxSearch.php in LocalSettings.php
3331 # $wgSearchType = 'SphinxSearch';
34 -# #########################################################
3532
3633 if ( $wgSearchType == 'SphinxSearch' ) {
3734 $wgDisableInternalSearch = true;
@@ -64,6 +61,7 @@
6562 # "wiki_main" => 100,
6663 # "wiki_incremental" => 10
6764 # );
 65+$wgSphinxSearch_index_weights = null;
6866
6967 # Default Sphinx search mode
7068 $wgSphinxSearch_mode = SPH_MATCH_EXTENDED;
@@ -73,33 +71,37 @@
7472 $wgSphinxSearch_sortby = '';
7573
7674 # By default, search will return articles that match any of the words in the search
77 -# To change that to require all words to match by default, set the following to true
 75+# To change that to require all words to match by default, set the following to true
7876 $wgSphinxMatchAll = false;
7977
8078 # Number of matches to display at once
8179 $wgSphinxSearch_matches = 10;
8280 # How many matches searchd will keep in RAM while searching
8381 $wgSphinxSearch_maxmatches = 1000;
84 -# When to stop searching all together (if different from zero)
 82+# When to stop searching all together (if not zero)
8583 $wgSphinxSearch_cutoff = 0;
8684
8785 # Weights of individual indexed columns. This gives page titles extra weight
88 -$wgSphinxSearch_weights = array( 'old_text' => 1, 'page_title' => 100 );
 86+$wgSphinxSearch_weights = array(
 87+ 'old_text' => 1,
 88+ 'page_title' => 100
 89+);
8990
90 -# If you want to enable hierarchical category search, specify the top category of your hierarchy like this
91 -# $wgSphinxTopSearchableCategory = 'Subject_areas';
 91+# To enable hierarchical category search, specify the top category of your hierarchy
 92+$wgSphinxTopSearchableCategory = '';
9293
93 -# If you want sub-categories to be fetched as parent categories are checked,
94 -# also set $wgUseAjax to true in your LocalSettings file, so that the following can be used:
95 -# $wgAjaxExportList[] = 'SphinxSearch::ajaxGetCategoryChildren';
 94+# This will fetch sub-categories as parent categories are checked
 95+# Requires $wgUseAjax to be true
 96+$wgAjaxExportList[] = 'SphinxSearch::ajaxGetCategoryChildren';
9697
9798 # EXPERIMENTAL: allow excluding selected categories when filtering
98 -# $wgUseExcludes = true;
 99+$wgUseExcludes = false;
99100
100101 # Web-accessible path to the extension's folder
101 -$wgSphinxSearchExtPath = '/extensions/SphinxSearch';
 102+$wgSphinxSearchExtPath = $wgScriptPath . '/extensions/SphinxSearch';
 103+
102104 # Web-accessible path to the folder with SphinxSearch.js file (if different from $wgSphinxSearchExtPath)
103 -# $wgSphinxSearchJSPath = '';
 105+$wgSphinxSearchJSPath = '';
104106
105107 # #########################################################
106108 # Use Aspell to suggest possible misspellings. This can be provided via
@@ -110,20 +112,10 @@
111113 $wgSphinxSuggestMode = false;
112114
113115 # Path to personal dictionary (for example personal.en.pws.) Needed only if using a personal dictionary
114 -# Should be set BEFORE SphinxSearch.php is included in LocalSettings
115 -if ( !isset( $wgSphinxSearchPersonalDictionary ) ) {
116 - $wgSphinxSearchPersonalDictionary = "";
117 -}
 116+$wgSphinxSearchPersonalDictionary = '';
118117
119 -# Here is why above var needs to be set before SphinxSearch is included.
120 -# We setup a special page to edit the personal dictionary.
121 -if ( $wgSphinxSearchPersonalDictionary ) {
122 - $wgAutoloadClasses['SphinxSearchPersonalDict'] = $dir . 'SphinxSearch_PersonalDict.php';
123 - $wgSpecialPages['SphinxSearchPersonalDict'] = 'SphinxSearchPersonalDict';
124 -}
125 -
126118 # Path to Aspell. Used only if your PHP does not have the pspell extension.
127119 $wgSphinxSearchAspellPath = "/usr/bin/aspell";
128120
129121 # Path to aspell location and language data files. Do not set if not sure.
130 -# $wgSphinxSearchPspellDictionaryDir = "/usr/lib/aspell";
 122+$wgSphinxSearchPspellDictionaryDir = '';

Comments

#Comment by Siebrand (talk | contribs)   16:42, 11 April 2010

Please don't update translations for languages other than English. There are a few reasons for that: (a) you probably do not speak all of them, so you will be making incorrect assumptions (b) it takes up your time, and that is time you could not work on functionality. The guys at translatewiki will take care of cleaning and tagging translations, and for us it's easier if we see as few as possible changes to languages other than English made outside of our environment.

#Comment by Svemir Brkic (talk | contribs)   17:10, 11 April 2010

Thanks, good to know. I was not sure how well integrated things are when existing messages are modified, so I felt I should touch up on those where I added the colons and numbered placeholders into the messages. I do speak or understand three of the languages listed, but for others I just followed what was there already - if they had a colon or a period where English had them as well, I added those in the new places too. Now I am glad I did not add a few more languages in there. I do have more things to work on, but those translators better catch up soon :-)

Status & tagging log