r21626 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r21625‎ | r21626 | r21627 >
Date:17:08, 26 April 2007
Author:rainman
Status:old
Tags:
Comment:

Adding support for HTTP frontend for indexer since XMLRPC implementations for Java (e.g. Apache xmlrpc) tend to be very slow and introduce unnecessary overhead.
The hook still needs some tuning since it's not called on some events (e.g. undelete of article).
Modified paths:
  • /branches/MWSearch-2.0/MWSearchUpdateHook.php (modified) (history)
  • /branches/MWSearch-2.0/MWSearchUpdater.php (modified) (history)
  • /branches/MWSearch-2.0/luceneUpdate.php (modified) (history)

Diff [purge]

Index: branches/MWSearch-2.0/MWSearchUpdateHook.php
@@ -33,7 +33,7 @@
3434 }
3535
3636 $redirText = '#REDIRECT [[' . $to->getPrefixedText() . "]]\n";
37 - MWSearchUpdater::updatePage( $wgDBname, $from, $redirText );
 37+ MWSearchUpdater::updatePage( $wgDBname, $from, $redirText, 1 );
3838 return true;
3939 }
4040 ?>
Index: branches/MWSearch-2.0/MWSearchUpdater.php
@@ -6,9 +6,13 @@
77 require_once( 'XML/RPC.php' );
88
99 $mwSearchUpdateHost = 'localhost';
10 -$mwSearchUpdatePort = 8124;
 10+$mwSearchUpdatePort = 8321; # HTTP default port is 8321
1111 $mwSearchUpdateDebug = false;
1212
 13+// the interface
 14+$mwSearchUpdater = new HTTPMWSearchUpdater;
 15+
 16+/** Delegate class to either HttpMWSearchUpdater or XMLRPCMWSearchUpdater */
1317 class MWSearchUpdater {
1418 /**
1519 * Queue a request to update a page in the search index.
@@ -19,12 +23,261 @@
2024 * @return bool
2125 * @static
2226 */
23 - function updatePage( $dbname, $title, $text ) {
24 - return MWSearchUpdater::sendRPC( 'searchupdater.updatePage',
25 - array( $dbname, $title, $text ) );
 27+ function updatePage( $dbname, $title, $text, $isredirect=0) {
 28+ global $mwSearchUpdater;
 29+ $mwSearchUpdater->updatePage( $dbname, $title, $text, $isredirect);
2630 }
2731
2832 /**
 33+ * Queue a request to delete a page from the search index.
 34+ *
 35+ * @param string $dbname
 36+ * @param Title $title
 37+ * @return bool
 38+ * @static
 39+ */
 40+ function deletePage( $dbname, $title ) {
 41+ global $mwSearchUpdater;
 42+ $mwSearchUpdater->deletePage( $dbname, $title );
 43+ }
 44+
 45+ /**
 46+ * Get a brief bit of status info on the update daemon.
 47+ * @return string
 48+ * @static
 49+ */
 50+ function getStatus() {
 51+ global $mwSearchUpdater;
 52+ return $mwSearchUpdater->getStatus();
 53+ }
 54+
 55+ /**
 56+ * Request that the daemon start applying updates if it's stopped.
 57+ * @return bool
 58+ * @static
 59+ */
 60+ function start() {
 61+ global $mwSearchUpdater;
 62+ $mwSearchUpdater->start();
 63+ }
 64+
 65+ /**
 66+ * Request that the daemon stop applying updates and close open indexes.
 67+ * @return bool
 68+ * @static
 69+ */
 70+ function stop() {
 71+ global $mwSearchUpdater;
 72+ $mwSearchUpdater->stop();
 73+ }
 74+
 75+ /**
 76+ * Request that the daemon stop applying updates and close open indexes.
 77+ * @return bool
 78+ * @static
 79+ */
 80+ function quit() {
 81+ global $mwSearchUpdater;
 82+ $mwSearchUpdater->quit();
 83+ }
 84+
 85+ /**
 86+ * Request that the daemon flush and reopen all indexes, without changing
 87+ * the global is-running state.
 88+ * @return bool
 89+ * @static
 90+ */
 91+ function flushAll() {
 92+ global $mwSearchUpdater;
 93+ $mwSearchUpdater->flushAll();
 94+ }
 95+
 96+ /**
 97+ * Request that the daemon flush and reopen all indexes, without changing
 98+ * the global is-running state, and that indexes should be optimized when
 99+ * closed.
 100+ * @return bool
 101+ * @static
 102+ */
 103+ function optimize() {
 104+ global $mwSearchUpdater;
 105+ $mwSearchUpdater->optimize();
 106+ }
 107+
 108+ /**
 109+ * Request that the daemon flush and reopen a given index, without changing
 110+ * the global is-running state.
 111+ * @return bool
 112+ * @static
 113+ */
 114+ function flush( $dbname ) {
 115+ global $mwSearchUpdater;
 116+ $mwSearchUpdater->flush($dbname);
 117+ }
 118+
 119+ /**
 120+ * Request that the daemon to make snapshot of all indexes
 121+ * the global is-running state.
 122+ * @return bool
 123+ * @static
 124+ */
 125+ function snapshot() {
 126+ global $mwSearchUpdater;
 127+ $mwSearchUpdater->snapshot();
 128+ }
 129+
 130+}
 131+
 132+class HttpMWSearchUpdater{
 133+
 134+ /**
 135+ * Call remote method via the special http server
 136+ * URI: /method?param1=value1&param2=value2
 137+ * (all values urlencoded);
 138+ */
 139+ function invokeRemote( $uri, $content = null){
 140+ global $mwSearchUpdateHost, $mwSearchUpdatePort, $mwSearchUpdateDebug;
 141+ //global $socket;
 142+ $host = $mwSearchUpdateHost;
 143+ $port = $mwSearchUpdatePort;
 144+
 145+ if($content === null){
 146+ $req =
 147+ "POST $uri HTTP/1.0\r\n".
 148+ "\r\n";
 149+ } else{
 150+ $contentLength = strlen($content);
 151+ $req =
 152+ "POST $uri HTTP/1.0\r\n".
 153+ "Content-Type: application/octet-stream\r\n".
 154+ "Content-Length: $contentLength\r\n\r\n".
 155+ "$content";
 156+ }
 157+ // open socket
 158+ $socket = fsockopen($host, $port, $errno, $errstr, 10);
 159+ if(!$socket){
 160+ $debug = "MWSearchUpdater.php: Error opening socket\n";
 161+ wfDebug($debug);
 162+ if( $mwSearchUpdateDebug )
 163+ print($debug);
 164+ return null;
 165+ }
 166+
 167+ // send request
 168+ fwrite($socket, $req);
 169+
 170+ // read server reply
 171+ $headers = "";
 172+ while ($str = trim(fgets($socket, 4096)))
 173+ $headers .= "$str\n";
 174+
 175+ $body = "";
 176+ while (!feof($socket))
 177+ $body .= fgets($socket, 4096);
 178+
 179+ // no keep-alive, just close the connection
 180+ fclose($socket);
 181+
 182+ // process headers, just read the http code
 183+ $headerLines = explode("\n",$headers);
 184+ $code = $headerLines[0];
 185+ $bits = explode(' ',$code);
 186+
 187+ // report if there was an error
 188+ if($bits[1]!="200"){
 189+ $debug = "MWSearchUpdater.php: Error invoking remote procedure with uri $uri, got: ".$bits[1].' '.$bits[2];
 190+
 191+ wfDebug( $debug );
 192+ if( $mwSearchUpdateDebug ) {
 193+ echo $debug;
 194+ }
 195+ }
 196+ // get reply if any
 197+ $ret = $body;
 198+
 199+ return $ret;
 200+ }
 201+
 202+ function updatePage( $dbname, $title, $text, $isredirect=0 ) {
 203+ $ns = $title->getNamespace();
 204+ $titleText = urlencode($title->getText());
 205+ if($text == null) $text = "";
 206+ return $this->invokeRemote("/updatePage?db=$dbname&namespace=$ns&title=$titleText&isredirect=$isredirect",$text);
 207+ }
 208+
 209+ function addNGram( $dbname, $title, $text ) {
 210+ $ns = $title->getNamespace();
 211+ $titleText = urlencode($title->getText());
 212+ if($text == null) $text = "";
 213+ return $this->invokeRemote("/addNgram?db=$dbname&namespace=$ns&title=$titleText",$text);
 214+
 215+ }
 216+
 217+ function flushNGram( $dbname) {
 218+ return $this->invokeRemote("/flushNgram?db=$dbname");;
 219+ }
 220+
 221+
 222+ function deletePage( $dbname, $title ) {
 223+ $ns = $title->getNamespace();
 224+ $titleText = urlencode($title->getText());
 225+ return $this->invokeRemote("/deletePage?db=$dbname&namespace=$ns&title=$titleText");;
 226+ }
 227+
 228+
 229+ function getStatus() {
 230+ return $this->invokeRemote("/getStatus");
 231+ }
 232+
 233+ function start() {
 234+ return $this->invokeRemote("/start");
 235+ }
 236+
 237+ function stop() {
 238+ return $this->invokeRemote("/stop");
 239+ }
 240+
 241+ function quit() {
 242+ return $this->invokeRemote("/quit");
 243+ }
 244+
 245+ function flushAll() {
 246+ return $this->invokeRemote("/flushAll");
 247+ }
 248+
 249+ function snapshot() {
 250+ return $this->invokeRemote("/makeSnapshots");
 251+ }
 252+
 253+
 254+ function optimize() {
 255+ return $this->invokeRemote("/optimize");
 256+ }
 257+
 258+ function flush( $dbname ) {
 259+ return $this->invokeRemote("/flush?db=$dbname");
 260+ }
 261+}
 262+
 263+
 264+
 265+class XMLRPCMWSearchUpdater {
 266+ /**
 267+ * Queue a request to update a page in the search index.
 268+ *
 269+ * @param string $dbname
 270+ * @param Title $title
 271+ * @param string $text
 272+ * @return bool
 273+ * @static
 274+ */
 275+ function updatePage( $dbname, $title, $text, $isRedirect ) {
 276+ return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.updatePage',
 277+ array( $dbname, $title, $text, $isRedirect) );
 278+ }
 279+
 280+
 281+ /**
29282 * Queue a request to update a page in the search index,
30283 * including metadata fields.
31284 *
@@ -41,7 +294,7 @@
42295 list( $key, $value ) = explode( '=', $pair, 2 );
43296 $translated[] = array( 'Key' => $key, 'Value' => $value );
44297 }
45 - return MWSearchUpdater::sendRPC( 'searchupdater.updatePageData',
 298+ return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.updatePageData',
46299 array( $dbname, $title, $text, $translated ) );
47300 }
48301
@@ -54,7 +307,7 @@
55308 * @static
56309 */
57310 function deletePage( $dbname, $title ) {
58 - return MWSearchUpdater::sendRPC( 'searchupdater.deletePage',
 311+ return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.deletePage',
59312 array( $dbname, $title ) );
60313 }
61314
@@ -64,7 +317,7 @@
65318 * @static
66319 */
67320 function getStatus() {
68 - return MWSearchUpdater::sendRPC( 'searchupdater.getStatus' );
 321+ return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.getStatus' );
69322 }
70323
71324 /**
@@ -73,7 +326,7 @@
74327 * @static
75328 */
76329 function start() {
77 - return MWSearchUpdater::sendRPC( 'searchupdater.start' );
 330+ return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.start' );
78331 }
79332
80333 /**
@@ -82,7 +335,7 @@
83336 * @static
84337 */
85338 function stop() {
86 - return MWSearchUpdater::sendRPC( 'searchupdater.stop' );
 339+ return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.stop' );
87340 }
88341
89342 /**
@@ -91,7 +344,7 @@
92345 * @static
93346 */
94347 function quit() {
95 - return MWSearchUpdater::sendRPC( 'searchupdater.quit' );
 348+ return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.quit' );
96349 }
97350
98351 /**
@@ -101,7 +354,7 @@
102355 * @static
103356 */
104357 function flushAll() {
105 - return MWSearchUpdater::sendRPC( 'searchupdater.flushAll' );
 358+ return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.flushAll' );
106359 }
107360
108361 /**
@@ -112,7 +365,7 @@
113366 * @static
114367 */
115368 function optimize() {
116 - return MWSearchUpdater::sendRPC( 'searchupdater.optimize' );
 369+ return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.optimize' );
117370 }
118371
119372 /**
@@ -122,7 +375,7 @@
123376 * @static
124377 */
125378 function flush( $dbname ) {
126 - return MWSearchUpdater::sendRPC( 'searchupdater.flush',
 379+ return XMLRPCMWSearchUpdater::sendRPC( 'searchupdater.flush',
127380 array( $dbname ) );
128381 }
129382
@@ -134,8 +387,8 @@
135388 if( is_object( $param ) && is_a( $param, 'Title' ) ) {
136389 return new XML_RPC_Value(
137390 array(
138 - 'Namespace' => new XML_RPC_Value( $param->getNamespace(), 'int' ),
139 - 'Text' => new XML_RPC_Value( $param->getText(), 'string' ) ),
 391+ 'namespace' => new XML_RPC_Value( $param->getNamespace(), 'int' ),
 392+ 'title' => new XML_RPC_Value( $param->getText(), 'string' ) ),
140393 'struct' );
141394 } elseif( is_string( $param ) ) {
142395 return new XML_RPC_Value( $param, 'string' );
@@ -147,10 +400,10 @@
148401 $type = 'struct';
149402 }
150403 }
151 - $translated = array_map( array( 'MWSearchUpdater', 'outParam' ), $param );
 404+ $translated = array_map( array( 'XMLRPCMWSearchUpdater', 'outParam' ), $param );
152405 return new XML_RPC_Value( $translated, $type );
153406 } else {
154 - return new WikiError( 'MWSearchUpdater::sendRPC given bogus parameter' );
 407+ return new WikiError( 'XMLRPCMWSearchUpdater::sendRPC given bogus parameter' );
155408 }
156409 }
157410
@@ -165,7 +418,7 @@
166419 $client->debug = true;
167420 }
168421
169 - $rpcParams = array_map( array( 'MWSearchUpdater', 'outParam' ), $params );
 422+ $rpcParams = array_map( array( 'XMLRPCMWSearchUpdater', 'outParam' ), $params );
170423
171424 $message = new XML_RPC_Message( $method, $rpcParams );
172425 wfSuppressWarnings();
@@ -174,7 +427,7 @@
175428 $delta = wfTime() - $start;
176429 wfRestoreWarnings();
177430
178 - $debug = sprintf( "MWSearchUpdater::sendRPC for %s took %0.2fms\n",
 431+ $debug = sprintf( "XMLRPCMWSearchUpdater::sendRPC for %s took %0.2fms\n",
179432 $method, $delta * 1000.0 );
180433 wfDebug( $debug );
181434 if( $mwSearchUpdateDebug ) {
Index: branches/MWSearch-2.0/luceneUpdate.php
@@ -46,7 +46,8 @@
4747 $rev = Revision::newFromTitle( $title );
4848 if( $rev ) {
4949 $text = $rev->getText();
50 - $ret = MWSearchUpdater::updatePage( $wgDBname, $title, $text );
 50+ $ar = new Article($title);
 51+ $ret = MWSearchUpdater::updatePage( $wgDBname, $title, $text, $ar->isRedirect() );
5152 } else {
5253 $ret = MWSearchUpdater::deletePage( $wgDBname, $title );
5354 }
@@ -168,7 +169,7 @@
169170 * See if the daemon's getting overloaded and pause if so
170171 */
171172 function wait() {
172 - $cutoff = 500;
 173+ $cutoff = 5000;
173174 $waittime = 10;
174175
175176 while( true ) {
@@ -234,7 +235,7 @@
235236 }
236237
237238 $result = $this->dbstream->select( array( 'page' ),
238 - array( 'page_namespace', 'page_title', 'page_latest' ),
 239+ array( 'page_namespace', 'page_title', 'page_latest', 'page_is_redirect' ),
239240 '',
240241 $fname,
241242 $limit );
@@ -251,7 +252,7 @@
252253 }
253254
254255 $text = $rev->getText();
255 - $hit = MWSearchUpdater::updatePage( $wgDBname, $title, $text );
 256+ $hit = MWSearchUpdater::updatePage( $wgDBname, $title, $text, $row->page_is_redirect);
256257
257258 if( WikiError::isError( $hit ) ) {
258259 echo "ERROR: " . $hit->getMessage() . "\n";
@@ -363,7 +364,7 @@
364365 $rev = new Revision( $row );
365366 if( is_object( $rev ) ) {
366367 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
367 - $hit = MWSearchUpdater::updatePage( $wgDBname, $title, $rev->getText() );
 368+ $hit = MWSearchUpdater::updatePage( $wgDBname, $title, $rev->getText(), $row->page_is_redirect );
368369 if( WikiError::isError( $hit ) ) {
369370 echo "ERROR: " . $hit->getMessage() . "\n";
370371 $lastError = $hit;