Index: trunk/extensions/SphinxSearch/SphinxSearch.php |
— | — | @@ -65,7 +65,7 @@ |
66 | 66 | $wgSphinxSearch_index_weights = null; |
67 | 67 | |
68 | 68 | # Default Sphinx search mode |
69 | | -$wgSphinxSearch_mode = SPH_MATCH_EXTENDED; |
| 69 | +$wgSphinxSearch_mode = SPH_MATCH_EXTENDED2; |
70 | 70 | |
71 | 71 | # Default sort mode |
72 | 72 | $wgSphinxSearch_sortmode = SPH_SORT_RELEVANCE; |
Index: trunk/extensions/SphinxSearch/SphinxMWSearch.php |
— | — | @@ -18,6 +18,12 @@ |
19 | 19 | var $exc_categories = array(); |
20 | 20 | var $db; |
21 | 21 | var $sphinx_client = null; |
| 22 | + var $prefix_handlers = array( |
| 23 | + 'all' => 'searchAllNamespaces', |
| 24 | + 'intitle' => 'filterByTitle', |
| 25 | + 'incategory' => 'filterByCategory', |
| 26 | + 'prefix' => 'filterByPrefix', |
| 27 | + ); |
22 | 28 | |
23 | 29 | /** |
24 | 30 | * Do not go to a near match if query prefixed with ~ |
— | — | @@ -117,29 +123,26 @@ |
118 | 124 | |
119 | 125 | $cl = new SphinxClient(); |
120 | 126 | |
121 | | - // setup the options for searching |
122 | | - if ( isset( $wgSphinxSearch_host ) && isset( $wgSphinxSearch_port ) ) { |
123 | | - $cl->SetServer( $wgSphinxSearch_host, $wgSphinxSearch_port ); |
124 | | - } |
125 | | - if ( count( $wgSphinxSearch_weights ) ) { |
| 127 | + $cl->SetServer( $wgSphinxSearch_host, $wgSphinxSearch_port ); |
| 128 | + if ( $wgSphinxSearch_weights && count( $wgSphinxSearch_weights ) ) { |
126 | 129 | $cl->SetFieldWeights( $wgSphinxSearch_weights ); |
127 | 130 | } |
128 | 131 | if ( is_array( $wgSphinxSearch_index_weights ) ) { |
129 | 132 | $cl->SetIndexWeights( $wgSphinxSearch_index_weights ); |
130 | 133 | } |
131 | | - if ( isset( $wgSphinxSearch_mode ) ) { |
| 134 | + if ( $wgSphinxSearch_mode ) { |
132 | 135 | $cl->SetMatchMode( $wgSphinxSearch_mode ); |
133 | 136 | } |
134 | | - if ( count( $this->namespaces ) ) { |
| 137 | + if ( $this->namespaces && count( $this->namespaces ) ) { |
135 | 138 | $cl->SetFilter( 'page_namespace', $this->namespaces ); |
136 | 139 | } |
137 | 140 | if( !$this->showRedirects ) { |
138 | 141 | $cl->SetFilter( 'page_is_redirect', array( 0 ) ); |
139 | 142 | } |
140 | | - if ( count( $this->categories ) ) { |
| 143 | + if ( $this->categories && count( $this->categories ) ) { |
141 | 144 | $cl->SetFilter( 'category', $this->categories ); |
142 | 145 | } |
143 | | - if ( count( $this->exc_categories ) ) { |
| 146 | + if ( $this->exc_categories && count( $this->exc_categories ) ) { |
144 | 147 | $cl->SetFilter( 'category', $this->exc_categories, true ); |
145 | 148 | } |
146 | 149 | $cl->SetSortMode( $wgSphinxSearch_sortmode, $wgSphinxSearch_sortby ); |
— | — | @@ -167,6 +170,136 @@ |
168 | 171 | return array( $contextlines, $contextchars ); |
169 | 172 | } |
170 | 173 | |
| 174 | + /** |
| 175 | + * Prepare query for sphinx search daemon |
| 176 | + * |
| 177 | + * @param string $query |
| 178 | + * @return string rewritten query |
| 179 | + */ |
| 180 | + function replacePrefixes( $query ) { |
| 181 | + // ~ prefix is used to avoid near-term search, remove it now |
| 182 | + if ( $query[ 0 ] === '~' ) { |
| 183 | + $query = substr( $query, 1 ); |
| 184 | + } |
| 185 | + |
| 186 | + $parts = preg_split( '/(")/', $query, -1, PREG_SPLIT_DELIM_CAPTURE ); |
| 187 | + $inquotes = false; |
| 188 | + $rewritten = ''; |
| 189 | + foreach ( $parts as $part ) { |
| 190 | + if ( $part == '"' ) { // stuff in quotes doesn't get rewritten |
| 191 | + $rewritten .= $part; |
| 192 | + $inquotes = !$inquotes; |
| 193 | + } elseif ( $inquotes ) { |
| 194 | + $rewritten .= $part; |
| 195 | + } else { |
| 196 | + if ( strpos( $query, ':' ) !== false ) { |
| 197 | + $regexp = $this->preparePrefixRegexp(); |
| 198 | + $part = preg_replace_callback( |
| 199 | + '/(^|[| :])(' . $regexp . '):([^ ]+)/i', |
| 200 | + array( $this, 'replaceQueryPrefix' ), |
| 201 | + $part |
| 202 | + ); |
| 203 | + } |
| 204 | + $rewritten .= str_replace( |
| 205 | + array( ' OR ', ' AND ' ), |
| 206 | + array( ' | ', ' & ' ), |
| 207 | + $part |
| 208 | + ); |
| 209 | + } |
| 210 | + } |
| 211 | + return $rewritten; |
| 212 | + } |
| 213 | + |
| 214 | + /** |
| 215 | + * @return string Regexp to match namespaces and other prefixes |
| 216 | + */ |
| 217 | + function preparePrefixRegexp() { |
| 218 | + global $wgContLang, $wgCanonicalNamespaceNames, $wgNamespaceAliases; |
| 219 | + |
| 220 | + $nsNamesRaw = array_merge( |
| 221 | + $wgContLang->getNamespaces(), |
| 222 | + $wgCanonicalNamespaceNames, |
| 223 | + array_keys( array_merge( $wgNamespaceAliases, $wgContLang->getNamespaceAliases() ) ) |
| 224 | + ); |
| 225 | + |
| 226 | + // add all namespace names w/o spaces |
| 227 | + $nsNames = array(); |
| 228 | + foreach ( $nsNamesRaw as $ns ) { |
| 229 | + if ( $ns != '' ) { |
| 230 | + $nsNames[] = str_replace( ' ', '_', $ns ); |
| 231 | + } |
| 232 | + } |
| 233 | + |
| 234 | + // "search everything" keyword |
| 235 | + $allkeyword = wfMsgForContent( 'searchall' ); |
| 236 | + $this->prefix_handlers[ $allkeyword ] = 'searchAllNamespaces'; |
| 237 | + |
| 238 | + // add other kinds of prefixes we support |
| 239 | + $nsNames = array_merge( $nsNames, array_keys( $this->prefix_handlers ) ); |
| 240 | + |
| 241 | + return implode( '|', array_unique( $nsNames ) ); |
| 242 | + } |
| 243 | + |
| 244 | + /** |
| 245 | + * preg callback to process foo: prefixes in the query |
| 246 | + * |
| 247 | + * @param array $matches |
| 248 | + * @return string |
| 249 | + */ |
| 250 | + function replaceQueryPrefix( $matches ) { |
| 251 | + if ( isset( $this->prefix_handlers[ $matches[ 2 ] ] ) ) { |
| 252 | + $callback = $this->prefix_handlers[ $matches[ 2 ] ]; |
| 253 | + return $this->$callback( $matches ); |
| 254 | + } else { |
| 255 | + return $this->filterByNamespace( $matches ); |
| 256 | + } |
| 257 | + } |
| 258 | + |
| 259 | + function filterByNamespace( $matches ) { |
| 260 | + global $wgContLang; |
| 261 | + $inx = $wgContLang->getNsIndex( str_replace( ' ', '_', $matches[ 2 ] ) ); |
| 262 | + if ( $inx === false ) { |
| 263 | + return $matches[ 0 ]; |
| 264 | + } else { |
| 265 | + $this->namespaces[] = $inx; |
| 266 | + return $matches[ 3 ]; |
| 267 | + } |
| 268 | + } |
| 269 | + |
| 270 | + function searchAllNamespaces( $matches ) { |
| 271 | + $this->namespaces = null; |
| 272 | + return $matches[ 3 ]; |
| 273 | + } |
| 274 | + |
| 275 | + function filterByTitle( $matches ) { |
| 276 | + return '@page_title ' . $matches[ 3 ]; |
| 277 | + } |
| 278 | + |
| 279 | + function filterByPrefix( $matches ) { |
| 280 | + $prefix = $matches[ 3 ]; |
| 281 | + if ( strpos( $matches[ 3 ], ':' ) !== false ) { |
| 282 | + global $wgContLang; |
| 283 | + list( $ns, $prefix ) = explode( ':', $matches[ 3 ] ); |
| 284 | + $inx = $wgContLang->getNsIndex( str_replace( ' ', '_', $ns ) ); |
| 285 | + if ( $inx !== false ) { |
| 286 | + $this->namespaces = array( $inx ); |
| 287 | + } |
| 288 | + } |
| 289 | + return '@page_title ^' . $prefix . '*'; |
| 290 | + } |
| 291 | + |
| 292 | + function filterByCategory( $matches ) { |
| 293 | + $page_id = $this->db->selectField( 'page', 'page_id', |
| 294 | + array( |
| 295 | + 'page_title' => $matches[ 3 ], |
| 296 | + 'page_namespace' => NS_CATEGORY |
| 297 | + ), |
| 298 | + __METHOD__ |
| 299 | + ); |
| 300 | + $this->categories[] = intval( $page_id ); |
| 301 | + return ''; |
| 302 | + } |
| 303 | + |
171 | 304 | } |
172 | 305 | |
173 | 306 | class SphinxMWSearchResultSet extends SearchResultSet { |