Index: branches/MWSearch-2.0/MWSearchUpdateHook.php |
— | — | @@ -33,7 +33,7 @@ |
34 | 34 | } |
35 | 35 | |
36 | 36 | $redirText = '#REDIRECT [[' . $to->getPrefixedText() . "]]\n"; |
37 | | - MWSearchUpdater::updatePage( $wgDBname, $from, $redirText ); |
| 37 | + MWSearchUpdater::updatePage( $wgDBname, $from, $redirText, 1 ); |
38 | 38 | return true; |
39 | 39 | } |
40 | 40 | ?> |
Index: branches/MWSearch-2.0/MWSearchUpdater.php |
— | — | @@ -6,9 +6,13 @@ |
7 | 7 | require_once( 'XML/RPC.php' ); |
8 | 8 | |
9 | 9 | $mwSearchUpdateHost = 'localhost'; |
10 | | -$mwSearchUpdatePort = 8124; |
| 10 | +$mwSearchUpdatePort = 8321; # HTTP default port is 8321 |
11 | 11 | $mwSearchUpdateDebug = false; |
12 | 12 | |
| 13 | +// the interface |
| 14 | +$mwSearchUpdater = new HTTPMWSearchUpdater; |
| 15 | + |
| 16 | +/** Delegate class to either HttpMWSearchUpdater or XMLRPCMWSearchUpdater */ |
13 | 17 | class MWSearchUpdater { |
14 | 18 | /** |
15 | 19 | * Queue a request to update a page in the search index. |
— | — | @@ -19,12 +23,261 @@ |
20 | 24 | * @return bool |
21 | 25 | * @static |
22 | 26 | */ |
23 | | - function updatePage( $dbname, $title, $text ) { |
24 | | - return MWSearchUpdater::sendRPC( 'searchupdater.updatePage', |
25 | | - array( $dbname, $title, $text ) ); |
| 27 | + function updatePage( $dbname, $title, $text, $isredirect=0) { |
| 28 | + global $mwSearchUpdater; |
| 29 | + $mwSearchUpdater->updatePage( $dbname, $title, $text, $isredirect); |
26 | 30 | } |
27 | 31 | |
28 | 32 | /** |
| 33 | + * Queue a request to delete a page from the search index. |
| 34 | + * |
| 35 | + * @param string $dbname |
| 36 | + * @param Title $title |
| 37 | + * @return bool |
| 38 | + * @static |
| 39 | + */ |
| 40 | + function deletePage( $dbname, $title ) { |
| 41 | + global $mwSearchUpdater; |
| 42 | + $mwSearchUpdater->deletePage( $dbname, $title ); |
| 43 | + } |
| 44 | + |
| 45 | + /** |
| 46 | + * Get a brief bit of status info on the update daemon. |
| 47 | + * @return string |
| 48 | + * @static |
| 49 | + */ |
| 50 | + function getStatus() { |
| 51 | + global $mwSearchUpdater; |
| 52 | + return $mwSearchUpdater->getStatus(); |
| 53 | + } |
| 54 | + |
| 55 | + /** |
| 56 | + * Request that the daemon start applying updates if it's stopped. |
| 57 | + * @return bool |
| 58 | + * @static |
| 59 | + */ |
| 60 | + function start() { |
| 61 | + global $mwSearchUpdater; |
| 62 | + $mwSearchUpdater->start(); |
| 63 | + } |
| 64 | + |
| 65 | + /** |
| 66 | + * Request that the daemon stop applying updates and close open indexes. |
| 67 | + * @return bool |
| 68 | + * @static |
| 69 | + */ |
| 70 | + function stop() { |
| 71 | + global $mwSearchUpdater; |
| 72 | + $mwSearchUpdater->stop(); |
| 73 | + } |
| 74 | + |
| 75 | + /** |
| 76 | + * Request that the daemon stop applying updates and close open indexes. |
| 77 | + * @return bool |
| 78 | + * @static |
| 79 | + */ |
| 80 | + function quit() { |
| 81 | + global $mwSearchUpdater; |
| 82 | + $mwSearchUpdater->quit(); |
| 83 | + } |
| 84 | + |
| 85 | + /** |
| 86 | + * Request that the daemon flush and reopen all indexes, without changing |
| 87 | + * the global is-running state. |
| 88 | + * @return bool |
| 89 | + * @static |
| 90 | + */ |
| 91 | + function flushAll() { |
| 92 | + global $mwSearchUpdater; |
| 93 | + $mwSearchUpdater->flushAll(); |
| 94 | + } |
| 95 | + |
| 96 | + /** |
| 97 | + * Request that the daemon flush and reopen all indexes, without changing |
| 98 | + * the global is-running state, and that indexes should be optimized when |
| 99 | + * closed. |
| 100 | + * @return bool |
| 101 | + * @static |
| 102 | + */ |
| 103 | + function optimize() { |
| 104 | + global $mwSearchUpdater; |
| 105 | + $mwSearchUpdater->optimize(); |
| 106 | + } |
| 107 | + |
| 108 | + /** |
| 109 | + * Request that the daemon flush and reopen a given index, without changing |
| 110 | + * the global is-running state. |
| 111 | + * @return bool |
| 112 | + * @static |
| 113 | + */ |
| 114 | + function flush( $dbname ) { |
| 115 | + global $mwSearchUpdater; |
| 116 | + $mwSearchUpdater->flush($dbname); |
| 117 | + } |
| 118 | + |
| 119 | + /** |
| 120 | + * Request that the daemon to make snapshot of all indexes |
| 121 | + * the global is-running state. |
| 122 | + * @return bool |
| 123 | + * @static |
| 124 | + */ |
| 125 | + function snapshot() { |
| 126 | + global $mwSearchUpdater; |
| 127 | + $mwSearchUpdater->snapshot(); |
| 128 | + } |
| 129 | + |
| 130 | +} |
| 131 | + |
| 132 | +class HttpMWSearchUpdater{ |
| 133 | + |
| 134 | + /** |
| 135 | + * Call remote method via the special http server |
| 136 | + * URI: /method?param1=value1¶m2=value2 |
| 137 | + * (all values urlencoded); |
| 138 | + */ |
| 139 | + function invokeRemote( $uri, $content = null){ |
| 140 | + global $mwSearchUpdateHost, $mwSearchUpdatePort, $mwSearchUpdateDebug; |
| 141 | + //global $socket; |
| 142 | + $host = $mwSearchUpdateHost; |
| 143 | + $port = $mwSearchUpdatePort; |
| 144 | + |
| 145 | + if($content === null){ |
| 146 | + $req = |
| 147 | + "POST $uri HTTP/1.0\r\n". |
| 148 | + "\r\n"; |
| 149 | + } else{ |
| 150 | + $contentLength = strlen($content); |
| 151 | + $req = |
| 152 | + "POST $uri HTTP/1.0\r\n". |
| 153 | + "Content-Type: application/octet-stream\r\n". |
| 154 | + "Content-Length: $contentLength\r\n\r\n". |
| 155 | + "$content"; |
| 156 | + } |
| 157 | + // open socket |
| 158 | + $socket = fsockopen($host, $port, $errno, $errstr, 10); |
| 159 | + if(!$socket){ |
| 160 | + $debug = "MWSearchUpdater.php: Error opening socket\n"; |
| 161 | + wfDebug($debug); |
| 162 | + if( $mwSearchUpdateDebug ) |
| 163 | + print($debug); |
| 164 | + return null; |
| 165 | + } |
| 166 | + |
| 167 | + // send request |
| 168 | + fwrite($socket, $req); |
| 169 | + |
| 170 | + // read server reply |
| 171 | + $headers = ""; |
| 172 | + while ($str = trim(fgets($socket, 4096))) |
| 173 | + $headers .= "$str\n"; |
| 174 | + |
| 175 | + $body = ""; |
| 176 | + while (!feof($socket)) |
| 177 | + $body .= fgets($socket, 4096); |
| 178 | + |
| 179 | + // no keep-alive, just close the connection |
| 180 | + fclose($socket); |
| 181 | + |
| 182 | + // process headers, just read the http code |
| 183 | + $headerLines = explode("\n",$headers); |
| 184 | + $code = $headerLines[0]; |
| 185 | + $bits = explode(' ',$code); |
| 186 | + |
| 187 | + // report if there was an error |
| 188 | + if($bits[1]!="200"){ |
| 189 | + $debug = "MWSearchUpdater.php: Error invoking remote procedure with uri $uri, got: ".$bits[1].' '.$bits[2]; |
| 190 | + |
| 191 | + wfDebug( $debug ); |
| 192 | + if( $mwSearchUpdateDebug ) { |
| 193 | + echo $debug; |
| 194 | + } |
| 195 | + } |
| 196 | + // get reply if any |
| 197 | + $ret = $body; |
| 198 | + |
| 199 | + return $ret; |
| 200 | + } |
| 201 | + |
| 202 | + function updatePage( $dbname, $title, $text, $isredirect=0 ) { |
| 203 | + $ns = $title->getNamespace(); |
| 204 | + $titleText = urlencode($title->getText()); |
| 205 | + if($text == null) $text = ""; |
| 206 | + return $this->invokeRemote("/updatePage?db=$dbname&namespace=$ns&title=$titleText&isredirect=$isredirect",$text); |
| 207 | + } |
| 208 | + |
| 209 | + function addNGram( $dbname, $title, $text ) { |
| 210 | + $ns = $title->getNamespace(); |
| 211 | + $titleText = urlencode($title->getText()); |
| 212 | + if($text == null) $text = ""; |
| 213 | + return $this->invokeRemote("/addNgram?db=$dbname&namespace=$ns&title=$titleText",$text); |
| 214 | + |
| 215 | + } |
| 216 | + |
| 217 | + function flushNGram( $dbname) { |
| 218 | + return $this->invokeRemote("/flushNgram?db=$dbname");; |
| 219 | + } |
| 220 | + |
| 221 | + |
| 222 | + function deletePage( $dbname, $title ) { |
| 223 | + $ns = $title->getNamespace(); |
| 224 | + $titleText = urlencode($title->getText()); |
| 225 | + return $this->invokeRemote("/deletePage?db=$dbname&namespace=$ns&title=$titleText");; |
| 226 | + } |
| 227 | + |
| 228 | + |
| 229 | + function getStatus() { |
| 230 | + return $this->invokeRemote("/getStatus"); |
| 231 | + } |
| 232 | + |
| 233 | + function start() { |
| 234 | + return $this->invokeRemote("/start"); |
| 235 | + } |
| 236 | + |
| 237 | + function stop() { |
| 238 | + return $this->invokeRemote("/stop"); |
| 239 | + } |
| 240 | + |
| 241 | + function quit() { |
| 242 | + return $this->invokeRemote("/quit"); |
| 243 | + } |
| 244 | + |
| 245 | + function flushAll() { |
| 246 | + return $this->invokeRemote("/flushAll"); |
| 247 | + } |
| 248 | + |
| 249 | + function snapshot() { |
| 250 | + return $this->invokeRemote("/makeSnapshots"); |
| 251 | + } |
| 252 | + |
| 253 | + |
| 254 | + function optimize() { |
| 255 | + return $this->invokeRemote("/optimize"); |
| 256 | + } |
| 257 | + |
| 258 | + function flush( $dbname ) { |
| 259 | + return $this->invokeRemote("/flush?db=$dbname"); |
| 260 | + } |
| 261 | +} |
| 262 | + |
| 263 | + |
| 264 | + |
| 265 | +class XMLRPCMWSearchUpdater { |
| 266 | + /** |
| 267 | + * Queue a request to update a page in the search index. |
| 268 | + * |
| 269 | + * @param string $dbname |
| 270 | + * @param Title $title |
| 271 | + * @param string $text |
| 272 | + * @return bool |
| 273 | + * @static |
| 274 | + */ |
| 275 | + function updatePage( $dbname, $title, $text, $isRedirect ) { |
| 276 | + return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.updatePage', |
| 277 | + array( $dbname, $title, $text, $isRedirect) ); |
| 278 | + } |
| 279 | + |
| 280 | + |
| 281 | + /** |
29 | 282 | * Queue a request to update a page in the search index, |
30 | 283 | * including metadata fields. |
31 | 284 | * |
— | — | @@ -41,7 +294,7 @@ |
42 | 295 | list( $key, $value ) = explode( '=', $pair, 2 ); |
43 | 296 | $translated[] = array( 'Key' => $key, 'Value' => $value ); |
44 | 297 | } |
45 | | - return MWSearchUpdater::sendRPC( 'searchupdater.updatePageData', |
| 298 | + return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.updatePageData', |
46 | 299 | array( $dbname, $title, $text, $translated ) ); |
47 | 300 | } |
48 | 301 | |
— | — | @@ -54,7 +307,7 @@ |
55 | 308 | * @static |
56 | 309 | */ |
57 | 310 | function deletePage( $dbname, $title ) { |
58 | | - return MWSearchUpdater::sendRPC( 'searchupdater.deletePage', |
| 311 | + return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.deletePage', |
59 | 312 | array( $dbname, $title ) ); |
60 | 313 | } |
61 | 314 | |
— | — | @@ -64,7 +317,7 @@ |
65 | 318 | * @static |
66 | 319 | */ |
67 | 320 | function getStatus() { |
68 | | - return MWSearchUpdater::sendRPC( 'searchupdater.getStatus' ); |
| 321 | + return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.getStatus' ); |
69 | 322 | } |
70 | 323 | |
71 | 324 | /** |
— | — | @@ -73,7 +326,7 @@ |
74 | 327 | * @static |
75 | 328 | */ |
76 | 329 | function start() { |
77 | | - return MWSearchUpdater::sendRPC( 'searchupdater.start' ); |
| 330 | + return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.start' ); |
78 | 331 | } |
79 | 332 | |
80 | 333 | /** |
— | — | @@ -82,7 +335,7 @@ |
83 | 336 | * @static |
84 | 337 | */ |
85 | 338 | function stop() { |
86 | | - return MWSearchUpdater::sendRPC( 'searchupdater.stop' ); |
| 339 | + return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.stop' ); |
87 | 340 | } |
88 | 341 | |
89 | 342 | /** |
— | — | @@ -91,7 +344,7 @@ |
92 | 345 | * @static |
93 | 346 | */ |
94 | 347 | function quit() { |
95 | | - return MWSearchUpdater::sendRPC( 'searchupdater.quit' ); |
| 348 | + return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.quit' ); |
96 | 349 | } |
97 | 350 | |
98 | 351 | /** |
— | — | @@ -101,7 +354,7 @@ |
102 | 355 | * @static |
103 | 356 | */ |
104 | 357 | function flushAll() { |
105 | | - return MWSearchUpdater::sendRPC( 'searchupdater.flushAll' ); |
| 358 | + return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.flushAll' ); |
106 | 359 | } |
107 | 360 | |
108 | 361 | /** |
— | — | @@ -112,7 +365,7 @@ |
113 | 366 | * @static |
114 | 367 | */ |
115 | 368 | function optimize() { |
116 | | - return MWSearchUpdater::sendRPC( 'searchupdater.optimize' ); |
| 369 | + return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.optimize' ); |
117 | 370 | } |
118 | 371 | |
119 | 372 | /** |
— | — | @@ -122,7 +375,7 @@ |
123 | 376 | * @static |
124 | 377 | */ |
125 | 378 | function flush( $dbname ) { |
126 | | - return MWSearchUpdater::sendRPC( 'searchupdater.flush', |
| 379 | + return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.flush', |
127 | 380 | array( $dbname ) ); |
128 | 381 | } |
129 | 382 | |
— | — | @@ -134,8 +387,8 @@ |
135 | 388 | if( is_object( $param ) && is_a( $param, 'Title' ) ) { |
136 | 389 | return new XML_RPC_Value( |
137 | 390 | array( |
138 | | - 'Namespace' => new XML_RPC_Value( $param->getNamespace(), 'int' ), |
139 | | - 'Text' => new XML_RPC_Value( $param->getText(), 'string' ) ), |
| 391 | + 'namespace' => new XML_RPC_Value( $param->getNamespace(), 'int' ), |
| 392 | + 'title' => new XML_RPC_Value( $param->getText(), 'string' ) ), |
140 | 393 | 'struct' ); |
141 | 394 | } elseif( is_string( $param ) ) { |
142 | 395 | return new XML_RPC_Value( $param, 'string' ); |
— | — | @@ -147,10 +400,10 @@ |
148 | 401 | $type = 'struct'; |
149 | 402 | } |
150 | 403 | } |
151 | | - $translated = array_map( array( 'MWSearchUpdater', 'outParam' ), $param ); |
| 404 | + $translated = array_map( array( 'XMLRPCMWSearchUpdater', 'outParam' ), $param ); |
152 | 405 | return new XML_RPC_Value( $translated, $type ); |
153 | 406 | } else { |
154 | | - return new WikiError( 'MWSearchUpdater::sendRPC given bogus parameter' ); |
| 407 | + return new WikiError( 'XMLRPCMWSearchUpdater::sendRPC given bogus parameter' ); |
155 | 408 | } |
156 | 409 | } |
157 | 410 | |
— | — | @@ -165,7 +418,7 @@ |
166 | 419 | $client->debug = true; |
167 | 420 | } |
168 | 421 | |
169 | | - $rpcParams = array_map( array( 'MWSearchUpdater', 'outParam' ), $params ); |
| 422 | + $rpcParams = array_map( array( 'XMLRPCMWSearchUpdater', 'outParam' ), $params ); |
170 | 423 | |
171 | 424 | $message = new XML_RPC_Message( $method, $rpcParams ); |
172 | 425 | wfSuppressWarnings(); |
— | — | @@ -174,7 +427,7 @@ |
175 | 428 | $delta = wfTime() - $start; |
176 | 429 | wfRestoreWarnings(); |
177 | 430 | |
178 | | - $debug = sprintf( "MWSearchUpdater::sendRPC for %s took %0.2fms\n", |
| 431 | + $debug = sprintf( "XMLRPCMWSearchUpdater::sendRPC for %s took %0.2fms\n", |
179 | 432 | $method, $delta * 1000.0 ); |
180 | 433 | wfDebug( $debug ); |
181 | 434 | if( $mwSearchUpdateDebug ) { |
Index: branches/MWSearch-2.0/luceneUpdate.php |
— | — | @@ -46,7 +46,8 @@ |
47 | 47 | $rev = Revision::newFromTitle( $title ); |
48 | 48 | if( $rev ) { |
49 | 49 | $text = $rev->getText(); |
50 | | - $ret = MWSearchUpdater::updatePage( $wgDBname, $title, $text ); |
| 50 | + $ar = new Article($title); |
| 51 | + $ret = MWSearchUpdater::updatePage( $wgDBname, $title, $text, $ar->isRedirect() ); |
51 | 52 | } else { |
52 | 53 | $ret = MWSearchUpdater::deletePage( $wgDBname, $title ); |
53 | 54 | } |
— | — | @@ -168,7 +169,7 @@ |
169 | 170 | * See if the daemon's getting overloaded and pause if so |
170 | 171 | */ |
171 | 172 | function wait() { |
172 | | - $cutoff = 500; |
| 173 | + $cutoff = 5000; |
173 | 174 | $waittime = 10; |
174 | 175 | |
175 | 176 | while( true ) { |
— | — | @@ -234,7 +235,7 @@ |
235 | 236 | } |
236 | 237 | |
237 | 238 | $result = $this->dbstream->select( array( 'page' ), |
238 | | - array( 'page_namespace', 'page_title', 'page_latest' ), |
| 239 | + array( 'page_namespace', 'page_title', 'page_latest', 'page_is_redirect' ), |
239 | 240 | '', |
240 | 241 | $fname, |
241 | 242 | $limit ); |
— | — | @@ -251,7 +252,7 @@ |
252 | 253 | } |
253 | 254 | |
254 | 255 | $text = $rev->getText(); |
255 | | - $hit = MWSearchUpdater::updatePage( $wgDBname, $title, $text ); |
| 256 | + $hit = MWSearchUpdater::updatePage( $wgDBname, $title, $text, $row->page_is_redirect); |
256 | 257 | |
257 | 258 | if( WikiError::isError( $hit ) ) { |
258 | 259 | echo "ERROR: " . $hit->getMessage() . "\n"; |
— | — | @@ -363,7 +364,7 @@ |
364 | 365 | $rev = new Revision( $row ); |
365 | 366 | if( is_object( $rev ) ) { |
366 | 367 | $title = Title::makeTitle( $row->page_namespace, $row->page_title ); |
367 | | - $hit = MWSearchUpdater::updatePage( $wgDBname, $title, $rev->getText() ); |
| 368 | + $hit = MWSearchUpdater::updatePage( $wgDBname, $title, $rev->getText(), $row->page_is_redirect ); |
368 | 369 | if( WikiError::isError( $hit ) ) { |
369 | 370 | echo "ERROR: " . $hit->getMessage() . "\n"; |
370 | 371 | $lastError = $hit; |