r60811 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r60810‎ | r60811 | r60812 >
Date:21:57, 7 January 2010
Author:dale
Status:resolved (Comments)
Tags:
Comment:
* fix for bug 20512 ( in both trunk and js2-work branch )
** Add 'proxy' option to Http $opts array
** Setting 'proxy' option to "false" will bypass the proxy
* Updated MWSearch_body.php to use new "false" option
Modified paths:
  • /branches/js2-work/phase3/includes/HttpFunctions.php (modified) (history)
  • /trunk/extensions/MWSearch/MWSearch_body.php (modified) (history)
  • /trunk/phase3/includes/HttpFunctions.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/HttpFunctions.php
@@ -8,111 +8,252 @@
99 * @ingroup HTTP
1010 */
1111 class Http {
 12+ // Syncronous download (in a single request)
 13+ const SYNC_DOWNLOAD = 1;
1214
 15+ // Asynchronous download ( background process with multiple requests )
 16+ const ASYNC_DOWNLOAD = 2;
 17+
1318 /**
 19+ * Get the contents of a file by HTTP
 20+ * @param $method string HTTP method. Usually GET/POST
 21+ * @param $url string Full URL to act on
 22+ * @param $timeout int Seconds to timeout. 'default' falls to $wgHTTPTimeout
 23+ * @param $curlOptions array Optional array of extra params to pass
 24+ * to curl_setopt()
 25+ */
 26+ public static function request( $method, $url, $opts = array() ) {
 27+ $opts['method'] = ( strtoupper( $method ) == 'GET' || strtoupper( $method ) == 'POST' )
 28+ ? strtoupper( $method ) : null;
 29+ $req = HttpRequest::newRequest( $url, $opts );
 30+ $status = $req->doRequest();
 31+ if( $status->isOK() ) {
 32+ return $status->value;
 33+ } else {
 34+ wfDebug( 'http error: ' . $status->getWikiText() );
 35+ return false;
 36+ }
 37+ }
 38+
 39+ /**
1440 * Simple wrapper for Http::request( 'GET' )
1541 * @see Http::request()
1642 */
17 - public static function get( $url, $timeout = 'default', $opts = array() ) {
18 - return Http::request( "GET", $url, $timeout, $opts );
 43+ public static function get( $url, $timeout = false, $opts = array() ) {
 44+ global $wgSyncHTTPTimeout;
 45+ if( $timeout )
 46+ $opts['timeout'] = $timeout;
 47+ return Http::request( 'GET', $url, $opts );
1948 }
2049
2150 /**
2251 * Simple wrapper for Http::request( 'POST' )
2352 * @see Http::request()
2453 */
25 - public static function post( $url, $timeout = 'default', $opts = array() ) {
26 - return Http::request( "POST", $url, $timeout, $opts );
 54+ public static function post( $url, $opts = array() ) {
 55+ return Http::request( 'POST', $url, $opts );
2756 }
2857
 58+ public static function doDownload( $url, $target_file_path, $dl_mode = self::SYNC_DOWNLOAD,
 59+ $redirectCount = 0 )
 60+ {
 61+ global $wgPhpCli, $wgMaxUploadSize, $wgMaxRedirects;
 62+ // do a quick check to HEAD to insure the file size is not > $wgMaxUploadSize
 63+ $headRequest = HttpRequest::newRequest( $url, array( 'headers_only' => true ) );
 64+ $headResponse = $headRequest->doRequest();
 65+ if( !$headResponse->isOK() ) {
 66+ return $headResponse;
 67+ }
 68+ $head = $headResponse->value;
 69+
 70+ // check for redirects:
 71+ if( isset( $head['Location'] ) && strrpos( $head[0], '302' ) !== false ) {
 72+ if( $redirectCount < $wgMaxRedirects ) {
 73+ if( self::isValidURI( $head['Location'] ) ) {
 74+ return self::doDownload( $head['Location'], $target_file_path,
 75+ $dl_mode, $redirectCount++ );
 76+ } else {
 77+ return Status::newFatal( 'upload-proto-error' );
 78+ }
 79+ } else {
 80+ return Status::newFatal( 'upload-too-many-redirects' );
 81+ }
 82+ }
 83+ // we did not get a 200 ok response:
 84+ if( strrpos( $head[0], '200 OK' ) === false ) {
 85+ return Status::newFatal( 'upload-http-error', htmlspecialchars( $head[0] ) );
 86+ }
 87+
 88+ $content_length = ( isset( $head['Content-Length'] ) ) ? $head['Content-Length'] : null;
 89+ if( $content_length ) {
 90+ if( $content_length > $wgMaxUploadSize ) {
 91+ return Status::newFatal( 'requested file length ' . $content_length .
 92+ ' is greater than $wgMaxUploadSize: ' . $wgMaxUploadSize );
 93+ }
 94+ }
 95+
 96+ // check if we can find phpCliPath (for doing a background shell request to
 97+ // php to do the download:
 98+ if( $wgPhpCli && wfShellExecEnabled() && $dl_mode == self::ASYNC_DOWNLOAD ) {
 99+ wfDebug( __METHOD__ . "\nASYNC_DOWNLOAD\n" );
 100+ //setup session and shell call:
 101+ return self::initBackgroundDownload( $url, $target_file_path, $content_length );
 102+ } else {
 103+ wfDebug( __METHOD__ . "\nSYNC_DOWNLOAD\n" );
 104+ // SYNC_DOWNLOAD download as much as we can in the time we have to execute
 105+ $opts['method'] = 'GET';
 106+ $opts['target_file_path'] = $target_file_path;
 107+ $req = HttpRequest::newRequest( $url, $opts );
 108+ return $req->doRequest();
 109+ }
 110+ }
 111+
29112 /**
30 - * Get the contents of a file by HTTP
31 - * @param $method string HTTP method. Usually GET/POST
32 - * @param $url string Full URL to act on
33 - * @param $timeout int Seconds to timeout. 'default' falls to $wgHTTPTimeout
34 - * @param $curlOptions array Optional array of extra params to pass
35 - * to curl_setopt()
 113+ * a non blocking request (generally an exit point in the application)
 114+ * should write to a file location and give updates
 115+ *
36116 */
37 - public static function request( $method, $url, $timeout = 'default', $curlOptions = array() ) {
38 - global $wgHTTPTimeout, $wgHTTPProxy, $wgTitle;
 117+ private static function initBackgroundDownload( $url, $target_file_path,
 118+ $content_length = null )
 119+ {
 120+ global $IP, $wgPhpCli, $wgServer;
 121+ $status = Status::newGood();
39122
40 - // Go ahead and set the timeout if not otherwise specified
41 - if ( $timeout == 'default' ) {
42 - $timeout = $wgHTTPTimeout;
 123+ // generate a session id with all the details for the download (pid, target_file_path )
 124+ $upload_session_key = self::getUploadSessionKey();
 125+ $session_id = session_id();
 126+
 127+ // store the url and target path:
 128+ $_SESSION['wsDownload'][$upload_session_key]['url'] = $url;
 129+ $_SESSION['wsDownload'][$upload_session_key]['target_file_path'] = $target_file_path;
 130+ // since we request from the cmd line we lose the original host name pass in the session:
 131+ $_SESSION['wsDownload'][$upload_session_key]['orgServer'] = $wgServer;
 132+
 133+ if( $content_length )
 134+ $_SESSION['wsDownload'][$upload_session_key]['content_length'] = $content_length;
 135+
 136+ // set initial loaded bytes:
 137+ $_SESSION['wsDownload'][$upload_session_key]['loaded'] = 0;
 138+
 139+ // run the background download request:
 140+ $cmd = $wgPhpCli . ' ' . $IP . "/maintenance/http_session_download.php " .
 141+ "--sid {$session_id} --usk {$upload_session_key} --wiki " . wfWikiId();
 142+ $pid = wfShellBackgroundExec( $cmd );
 143+ // the pid is not of much use since we won't be visiting this same apache any-time soon.
 144+ if( !$pid )
 145+ return Status::newFatal( 'could not run background shell exec' );
 146+
 147+ // update the status value with the $upload_session_key (for the user to
 148+ // check on the status of the upload)
 149+ $status->value = $upload_session_key;
 150+
 151+ // return good status
 152+ return $status;
 153+ }
 154+
 155+ static function getUploadSessionKey() {
 156+ $key = mt_rand( 0, 0x7fffffff );
 157+ $_SESSION['wsUploadData'][$key] = array();
 158+ return $key;
 159+ }
 160+
 161+ /**
 162+ * used to run a session based download. Is initiated via the shell.
 163+ *
 164+ * @param $session_id String: the session id to grab download details from
 165+ * @param $upload_session_key String: the key of the given upload session
 166+ * (a given client could have started a few http uploads at once)
 167+ */
 168+ public static function doSessionIdDownload( $session_id, $upload_session_key ) {
 169+ global $wgUser, $wgEnableWriteAPI, $wgAsyncHTTPTimeout, $wgServer,
 170+ $wgSessionsInMemcached, $wgSessionHandler, $wgSessionStarted;
 171+ wfDebug( __METHOD__ . "\n\n doSessionIdDownload :\n\n" );
 172+ // set session to the provided key:
 173+ session_id( $session_id );
 174+ //fire up mediaWiki session system:
 175+ wfSetupSession();
 176+
 177+ // start the session
 178+ if( session_start() === false ) {
 179+ wfDebug( __METHOD__ . ' could not start session' );
43180 }
 181+ // get all the vars we need from session_id
 182+ if( !isset( $_SESSION[ 'wsDownload' ][$upload_session_key] ) ) {
 183+ wfDebug( __METHOD__ . ' Error:could not find upload session');
 184+ exit();
 185+ }
 186+ // setup the global user from the session key we just inherited
 187+ $wgUser = User::newFromSession();
44188
45 - wfDebug( __METHOD__ . ": $method $url\n" );
46 - # Use curl if available
47 - if ( function_exists( 'curl_init' ) ) {
48 - $c = curl_init( $url );
49 - if ( self::isLocalURL( $url ) ) {
50 - curl_setopt( $c, CURLOPT_PROXY, 'localhost:80' );
51 - } else if ($wgHTTPProxy) {
52 - curl_setopt($c, CURLOPT_PROXY, $wgHTTPProxy);
53 - }
 189+ // grab the session data to setup the request:
 190+ $sd =& $_SESSION['wsDownload'][$upload_session_key];
54191
55 - curl_setopt( $c, CURLOPT_TIMEOUT, $timeout );
56 - curl_setopt( $c, CURLOPT_USERAGENT, self :: userAgent() );
57 - if ( $method == 'POST' ) {
58 - curl_setopt( $c, CURLOPT_POST, true );
59 - curl_setopt( $c, CURLOPT_POSTFIELDS, '' );
60 - }
61 - else
62 - curl_setopt( $c, CURLOPT_CUSTOMREQUEST, $method );
 192+ // update the wgServer var ( since cmd line thinks we are localhost
 193+ // when we are really orgServer)
 194+ if( isset( $sd['orgServer'] ) && $sd['orgServer'] ) {
 195+ $wgServer = $sd['orgServer'];
 196+ }
 197+ // close down the session so we can other http queries can get session
 198+ // updates: (if not $wgSessionsInMemcached)
 199+ if( !$wgSessionsInMemcached )
 200+ session_write_close();
63201
64 - # Set the referer to $wgTitle, even in command-line mode
65 - # This is useful for interwiki transclusion, where the foreign
66 - # server wants to know what the referring page is.
67 - # $_SERVER['REQUEST_URI'] gives a less reliable indication of the
68 - # referring page.
69 - if ( is_object( $wgTitle ) ) {
70 - curl_setopt( $c, CURLOPT_REFERER, $wgTitle->getFullURL() );
71 - }
72 -
73 - if ( is_array( $curlOptions ) ) {
74 - foreach( $curlOptions as $option => $value ) {
75 - curl_setopt( $c, $option, $value );
76 - }
77 - }
 202+ $req = HttpRequest::newRequest( $sd['url'], array(
 203+ 'target_file_path' => $sd['target_file_path'],
 204+ 'upload_session_key'=> $upload_session_key,
 205+ 'timeout' => $wgAsyncHTTPTimeout,
 206+ 'do_close_session_update' => true
 207+ ) );
 208+ // run the actual request .. (this can take some time)
 209+ wfDebug( __METHOD__ . 'do Session Download :: ' . $sd['url'] . ' tf: ' .
 210+ $sd['target_file_path'] . "\n\n");
 211+ $status = $req->doRequest();
 212+ //wfDebug("done with req status is: ". $status->isOK(). ' '.$status->getWikiText(). "\n");
78213
79 - ob_start();
80 - curl_exec( $c );
81 - $text = ob_get_contents();
82 - ob_end_clean();
 214+ // start up the session again:
 215+ if( session_start() === false ) {
 216+ wfDebug( __METHOD__ . ' ERROR:: Could not start session');
 217+ }
 218+ // grab the updated session data pointer
 219+ $sd =& $_SESSION['wsDownload'][$upload_session_key];
 220+ // if error update status:
 221+ if( !$status->isOK() ) {
 222+ $sd['apiUploadResult'] = FormatJson::encode(
 223+ array( 'error' => $status->getWikiText() )
 224+ );
 225+ }
 226+ // if status okay process upload using fauxReq to api:
 227+ if( $status->isOK() ){
 228+ // setup the FauxRequest
 229+ $fauxReqData = $sd['mParams'];
83230
84 - # Don't return the text of error messages, return false on error
85 - $retcode = curl_getinfo( $c, CURLINFO_HTTP_CODE );
86 - if ( $retcode != 200 ) {
87 - wfDebug( __METHOD__ . ": HTTP return code $retcode\n" );
88 - $text = false;
 231+ // Fix boolean parameters
 232+ foreach( $fauxReqData as $k => $v ) {
 233+ if( $v === false )
 234+ unset( $fauxReqData[$k] );
89235 }
90 - # Don't return truncated output
91 - $errno = curl_errno( $c );
92 - if ( $errno != CURLE_OK ) {
93 - $errstr = curl_error( $c );
94 - wfDebug( __METHOD__ . ": CURL error code $errno: $errstr\n" );
95 - $text = false;
96 - }
97 - curl_close( $c );
98 - } else {
99 - # Otherwise use file_get_contents...
100 - # This doesn't have local fetch capabilities...
101236
102 - $headers = array( "User-Agent: " . self :: userAgent() );
103 - if( strcasecmp( $method, 'post' ) == 0 ) {
104 - // Required for HTTP 1.0 POSTs
105 - $headers[] = "Content-Length: 0";
106 - }
107 - $opts = array(
108 - 'http' => array(
109 - 'method' => $method,
110 - 'header' => implode( "\r\n", $headers ),
111 - 'timeout' => $timeout ) );
112 - $ctx = stream_context_create($opts);
 237+ $fauxReqData['action'] = 'upload';
 238+ $fauxReqData['format'] = 'json';
 239+ $fauxReqData['internalhttpsession'] = $upload_session_key;
 240+ // evil but no other clean way about it:
 241+ $faxReq = new FauxRequest( $fauxReqData, true );
 242+ $processor = new ApiMain( $faxReq, $wgEnableWriteAPI );
113243
114 - $text = file_get_contents( $url, false, $ctx );
 244+ //init the mUpload var for the $processor
 245+ $processor->execute();
 246+ $processor->getResult()->cleanUpUTF8();
 247+ $printer = $processor->createPrinterByName( 'json' );
 248+ $printer->initPrinter( false );
 249+ ob_start();
 250+ $printer->execute();
 251+ $apiUploadResult = ob_get_clean();
 252+
 253+ // the status updates runner will grab the result form the session:
 254+ $sd['apiUploadResult'] = $apiUploadResult;
115255 }
116 - return $text;
 256+ // close the session:
 257+ session_write_close();
117258 }
118259
119260 /**
@@ -148,7 +289,7 @@
149290 }
150291 return false;
151292 }
152 -
 293+
153294 /**
154295 * Return a standard user-agent we can use for external requests.
155296 */
@@ -156,4 +297,387 @@
157298 global $wgVersion;
158299 return "MediaWiki/$wgVersion";
159300 }
 301+
 302+ /**
 303+ * Checks that the given URI is a valid one
 304+ * @param $uri Mixed: URI to check for validity
 305+ */
 306+ public static function isValidURI( $uri ){
 307+ return preg_match(
 308+ '/(ftp|http|https):\/\/(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%@!\-\/]))?/',
 309+ $uri,
 310+ $matches
 311+ );
 312+ }
160313 }
 314+
 315+class HttpRequest {
 316+ var $target_file_path;
 317+ var $upload_session_key;
 318+ function __construct( $url, $opt ){
 319+
 320+ global $wgSyncHTTPTimeout;
 321+ $this->url = $url;
 322+ // set the timeout to default sync timeout (unless the timeout option is provided)
 323+ $this->timeout = ( isset( $opt['timeout'] ) ) ? $opt['timeout'] : $wgSyncHTTPTimeout;
 324+ //check special key default
 325+ if($this->timeout == 'default'){
 326+ $opts['timeout'] = $wgSyncHTTPTimeout;
 327+ }
 328+
 329+ $this->method = ( isset( $opt['method'] ) ) ? $opt['method'] : 'GET';
 330+ $this->target_file_path = ( isset( $opt['target_file_path'] ) )
 331+ ? $opt['target_file_path'] : false;
 332+ $this->upload_session_key = ( isset( $opt['upload_session_key'] ) )
 333+ ? $opt['upload_session_key'] : false;
 334+ $this->headers_only = ( isset( $opt['headers_only'] ) ) ? $opt['headers_only'] : false;
 335+ $this->do_close_session_update = isset( $opt['do_close_session_update'] );
 336+ $this->postData = isset( $opt['postdata'] ) ? $opt['postdata'] : '';
 337+
 338+ $this->proxy = isset( $opt['proxy'] )? $opt['proxy'] : '';
 339+
 340+ $this->ssl_verifyhost = (isset( $opt['ssl_verifyhost'] ))? $opt['ssl_verifyhost']: false;
 341+
 342+ $this->cainfo = (isset( $opt['cainfo'] ))? $op['cainfo']: false;
 343+
 344+ }
 345+
 346+ public static function newRequest($url, $opt){
 347+ # select the handler (use curl if available)
 348+ if ( function_exists( 'curl_init' ) ) {
 349+ return new curlHttpRequest($url, $opt);
 350+ } else {
 351+ return new phpHttpRequest($url, $opt);
 352+ }
 353+ }
 354+
 355+ /**
 356+ * Get the contents of a file by HTTP
 357+ * @param $url string Full URL to act on
 358+ * @param $Opt associative array Optional array of options:
 359+ * 'method' => 'GET', 'POST' etc.
 360+ * 'target_file_path' => if curl should output to a target file
 361+ * 'adapter' => 'curl', 'soket'
 362+ */
 363+ public function doRequest() {
 364+ # Make sure we have a valid url
 365+ if( !Http::isValidURI( $this->url ) )
 366+ return Status::newFatal('bad-url');
 367+ //do the actual request:
 368+ return $this->doReq();
 369+ }
 370+}
 371+class curlHttpRequest extends HttpRequest {
 372+ public function doReq(){
 373+ global $wgHTTPProxy, $wgTitle;
 374+
 375+ $status = Status::newGood();
 376+ $c = curl_init( $this->url );
 377+
 378+ // only do proxy setup if ( not suppressed $this->proxy === false )
 379+ if( $this->proxy !== false ){
 380+ if( $this->proxy ){
 381+ curl_setopt( $c, CURLOPT_PROXY, $this->proxy );
 382+ } else if ( Http::isLocalURL( $this->url ) ) {
 383+ curl_setopt( $c, CURLOPT_PROXY, 'localhost:80' );
 384+ } else if ( $wgHTTPProxy ) {
 385+ curl_setopt( $c, CURLOPT_PROXY, $wgHTTPProxy );
 386+ }
 387+ }
 388+
 389+ curl_setopt( $c, CURLOPT_TIMEOUT, $this->timeout );
 390+ curl_setopt( $c, CURLOPT_USERAGENT, Http::userAgent() );
 391+
 392+ if( $this->ssl_verifyhost )
 393+ curl_setopt( $c, CURLOPT_SSL_VERIFYHOST, $this->ssl_verifyhost);
 394+
 395+ if( $this->cainfo )
 396+ curl_setopt( $c, CURLOPT_CAINFO, $this->cainfo);
 397+
 398+ if ( $this->headers_only ) {
 399+ curl_setopt( $c, CURLOPT_NOBODY, true );
 400+ curl_setopt( $c, CURLOPT_HEADER, true );
 401+ } elseif ( $this->method == 'POST' ) {
 402+ curl_setopt( $c, CURLOPT_POST, true );
 403+ curl_setopt( $c, CURLOPT_POSTFIELDS, $this->postData );
 404+ // Suppress 'Expect: 100-continue' header, as some servers
 405+ // will reject it with a 417 and Curl won't auto retry
 406+ // with HTTP 1.0 fallback
 407+ curl_setopt( $c, CURLOPT_HTTPHEADER, array( 'Expect:' ) );
 408+ } else {
 409+ curl_setopt( $c, CURLOPT_CUSTOMREQUEST, $this->method );
 410+ }
 411+
 412+ # Set the referer to $wgTitle, even in command-line mode
 413+ # This is useful for interwiki transclusion, where the foreign
 414+ # server wants to know what the referring page is.
 415+ # $_SERVER['REQUEST_URI'] gives a less reliable indication of the
 416+ # referring page.
 417+ if ( is_object( $wgTitle ) ) {
 418+ curl_setopt( $c, CURLOPT_REFERER, $wgTitle->getFullURL() );
 419+ }
 420+
 421+ // set the write back function (if we are writing to a file)
 422+ if( $this->target_file_path ) {
 423+ $cwrite = new simpleFileWriter( $this->target_file_path,
 424+ $this->upload_session_key,
 425+ $this->do_close_session_update
 426+ );
 427+ if( !$cwrite->status->isOK() ) {
 428+ wfDebug( __METHOD__ . "ERROR in setting up simpleFileWriter\n" );
 429+ $status = $cwrite->status;
 430+ return $status;
 431+ }
 432+ curl_setopt( $c, CURLOPT_WRITEFUNCTION, array( $cwrite, 'callbackWriteBody' ) );
 433+ }
 434+
 435+ // start output grabber:
 436+ if( !$this->target_file_path )
 437+ ob_start();
 438+
 439+ //run the actual curl_exec:
 440+ try {
 441+ if ( false === curl_exec( $c ) ) {
 442+ $error_txt ='Error sending request: #' . curl_errno( $c ) .' '. curl_error( $c );
 443+ wfDebug( __METHOD__ . $error_txt . "\n" );
 444+ $status = Status::newFatal( $error_txt );
 445+ }
 446+ } catch ( Exception $e ) {
 447+ // do something with curl exec error?
 448+ }
 449+ // if direct request output the results to the stats value:
 450+ if( !$this->target_file_path && $status->isOK() ) {
 451+ $status->value = ob_get_contents();
 452+ ob_end_clean();
 453+ }
 454+ // if we wrote to a target file close up or return error
 455+ if( $this->target_file_path ) {
 456+ $cwrite->close();
 457+ if( !$cwrite->status->isOK() ) {
 458+ return $cwrite->status;
 459+ }
 460+ }
 461+
 462+ if ( $this->headers_only ) {
 463+ $headers = explode( "\n", $status->value );
 464+ $headerArray = array();
 465+ foreach ( $headers as $header ) {
 466+ if ( !strlen( trim( $header ) ) )
 467+ continue;
 468+ $headerParts = explode( ':', $header, 2 );
 469+ if ( count( $headerParts ) == 1 ) {
 470+ $headerArray[] = trim( $header );
 471+ } else {
 472+ list( $key, $val ) = $headerParts;
 473+ $headerArray[trim( $key )] = trim( $val );
 474+ }
 475+ }
 476+ $status->value = $headerArray;
 477+ } else {
 478+ # Don't return the text of error messages, return false on error
 479+ $retcode = curl_getinfo( $c, CURLINFO_HTTP_CODE );
 480+ if ( $retcode != 200 ) {
 481+ wfDebug( __METHOD__ . ": HTTP return code $retcode\n" );
 482+ $status = Status::newFatal( "HTTP return code $retcode\n" );
 483+ }
 484+ # Don't return truncated output
 485+ $errno = curl_errno( $c );
 486+ if ( $errno != CURLE_OK ) {
 487+ $errstr = curl_error( $c );
 488+ wfDebug( __METHOD__ . ": CURL error code $errno: $errstr\n" );
 489+ $status = Status::newFatal( " CURL error code $errno: $errstr\n" );
 490+ }
 491+ }
 492+
 493+ curl_close( $c );
 494+ // return the result obj
 495+ return $status;
 496+ }
 497+}
 498+class phpHttpRequest extends HttpRequest {
 499+ public function doReq() {
 500+ global $wgTitle, $wgHTTPProxy;
 501+ # Check for php.ini allow_url_fopen
 502+ if( !ini_get( 'allow_url_fopen' ) ) {
 503+ return Status::newFatal( 'allow_url_fopen needs to be enabled for http copy to work' );
 504+ }
 505+
 506+ // start with good status:
 507+ $status = Status::newGood();
 508+
 509+ if ( $this->headers_only ) {
 510+ $status->value = get_headers( $this->url, 1 );
 511+ return $status;
 512+ }
 513+
 514+ // setup the headers
 515+ $headers = array( "User-Agent: " . Http::userAgent() );
 516+ if ( is_object( $wgTitle ) ) {
 517+ $headers[] = "Referer: ". $wgTitle->getFullURL();
 518+ }
 519+
 520+ if( strcasecmp( $this->method, 'post' ) == 0 ) {
 521+ // Required for HTTP 1.0 POSTs
 522+ $headers[] = "Content-Length: 0";
 523+ }
 524+
 525+ $httpContextOptions = array(
 526+ 'method' => $this->method,
 527+ 'header' => implode( "\r\n", $headers ),
 528+ 'timeout' => $this->timeout
 529+ );
 530+
 531+ // Proxy setup:
 532+ if( $this->proxy ){
 533+ $httpContextOptions['proxy'] = 'tcp://' . $this->proxy;
 534+ }else if ( Http::isLocalURL( $this->url ) ) {
 535+ $httpContextOptions['proxy'] = 'tcp://localhost:80';
 536+ } elseif ( $wgHTTPProxy ) {
 537+ $httpContextOptions['proxy'] = 'tcp://' . $wgHTTPProxy ;
 538+ }
 539+
 540+ $fcontext = stream_context_create (
 541+ array(
 542+ 'http' => $httpContextOptions
 543+ )
 544+ );
 545+
 546+ $fh = fopen( $this->url, "r", false, $fcontext);
 547+
 548+ // set the write back function (if we are writing to a file)
 549+ if( $this->target_file_path ) {
 550+ $cwrite = new simpleFileWriter( $this->target_file_path,
 551+ $this->upload_session_key, $this->do_close_session_update );
 552+ if( !$cwrite->status->isOK() ) {
 553+ wfDebug( __METHOD__ . "ERROR in setting up simpleFileWriter\n" );
 554+ $status = $cwrite->status;
 555+ return $status;
 556+ }
 557+
 558+ // Read $fh into the simpleFileWriter (grab in 64K chunks since
 559+ // it's likely a ~large~ media file)
 560+ while ( !feof( $fh ) ) {
 561+ $contents = fread( $fh, 65536 );
 562+ $cwrite->callbackWriteBody( $fh, $contents );
 563+ }
 564+ $cwrite->close();
 565+ // check for simpleFileWriter error:
 566+ if( !$cwrite->status->isOK() ) {
 567+ return $cwrite->status;
 568+ }
 569+ } else {
 570+ // read $fh into status->value
 571+ $status->value = @stream_get_contents( $fh );
 572+ }
 573+ //close the url file wrapper
 574+ fclose( $fh );
 575+
 576+ // check for "false"
 577+ if( $status->value === false ) {
 578+ $status->error( 'file_get_contents-failed' );
 579+ }
 580+ return $status;
 581+ }
 582+
 583+}
 584+
 585+/**
 586+ * SimpleFileWriter with session id updates
 587+ */
 588+class simpleFileWriter {
 589+ var $target_file_path;
 590+ var $status = null;
 591+ var $session_id = null;
 592+ var $session_update_interval = 0; // how often to update the session while downloading
 593+
 594+ function simpleFileWriter( $target_file_path, $upload_session_key,
 595+ $do_close_session_update = false )
 596+ {
 597+ $this->target_file_path = $target_file_path;
 598+ $this->upload_session_key = $upload_session_key;
 599+ $this->status = Status::newGood();
 600+ $this->do_close_session_update = $do_close_session_update;
 601+ // open the file:
 602+ $this->fp = fopen( $this->target_file_path, 'w' );
 603+ if( $this->fp === false ) {
 604+ $this->status = Status::newFatal( 'HTTP::could-not-open-file-for-writing' );
 605+ }
 606+ // true start time
 607+ $this->prevTime = time();
 608+ }
 609+
 610+ public function callbackWriteBody( $ch, $data_packet ) {
 611+ global $wgMaxUploadSize, $wgLang;
 612+
 613+ // write out the content
 614+ if( fwrite( $this->fp, $data_packet ) === false ) {
 615+ wfDebug( __METHOD__ ." ::could-not-write-to-file\n" );
 616+ $this->status = Status::newFatal( 'HTTP::could-not-write-to-file' );
 617+ return 0;
 618+ }
 619+
 620+ // check file size:
 621+ clearstatcache();
 622+ $this->current_fsize = filesize( $this->target_file_path );
 623+
 624+ if( $this->current_fsize > $wgMaxUploadSize ) {
 625+ wfDebug( __METHOD__ . " ::http download too large\n" );
 626+ $this->status = Status::newFatal( 'HTTP::file-has-grown-beyond-upload-limit-killing: ' .
 627+ 'downloaded more than ' .
 628+ $wgLang->formatSize( $wgMaxUploadSize ) . ' ' );
 629+ return 0;
 630+ }
 631+ // if more than session_update_interval second have passed update_session_progress
 632+ if( $this->do_close_session_update && $this->upload_session_key &&
 633+ ( ( time() - $this->prevTime ) > $this->session_update_interval ) ) {
 634+ $this->prevTime = time();
 635+ $session_status = $this->update_session_progress();
 636+ if( !$session_status->isOK() ) {
 637+ $this->status = $session_status;
 638+ wfDebug( __METHOD__ . ' update session failed or was canceled');
 639+ return 0;
 640+ }
 641+ }
 642+ return strlen( $data_packet );
 643+ }
 644+
 645+ public function update_session_progress() {
 646+ global $wgSessionsInMemcached;
 647+ $status = Status::newGood();
 648+ // start the session (if necessary)
 649+ if( !$wgSessionsInMemcached ) {
 650+ wfSuppressWarnings();
 651+ if( session_start() === false ) {
 652+ wfDebug( __METHOD__ . ' could not start session' );
 653+ exit( 0 );
 654+ }
 655+ wfRestoreWarnings();
 656+ }
 657+ $sd =& $_SESSION['wsDownload'][ $this->upload_session_key ];
 658+ // check if the user canceled the request:
 659+ if( isset( $sd['user_cancel'] ) && $sd['user_cancel'] == true ) {
 660+ //@@todo kill the download
 661+ return Status::newFatal( 'user-canceled-request' );
 662+ }
 663+ // update the progress bytes download so far:
 664+ $sd['loaded'] = $this->current_fsize;
 665+
 666+ // close down the session so we can other http queries can get session updates:
 667+ if( !$wgSessionsInMemcached )
 668+ session_write_close();
 669+
 670+ return $status;
 671+ }
 672+
 673+ public function close() {
 674+ // do a final session update:
 675+ if( $this->do_close_session_update ) {
 676+ $this->update_session_progress();
 677+ }
 678+ // close up the file handle:
 679+ if( false === fclose( $this->fp ) ) {
 680+ $this->status = Status::newFatal( 'HTTP::could-not-close-file' );
 681+ }
 682+ }
 683+
 684+}
Index: trunk/extensions/MWSearch/MWSearch_body.php
@@ -484,12 +484,12 @@
485485
486486 // Search server will be in local network but may not trigger checks on
487487 // Http::isLocal(), so suppress usage of $wgHTTPProxy if enabled.
488 - $curlOpts = array( CURLOPT_PROXY => '' );
 488+ $httpOpts = array( 'proxy' => false );
489489
490490 wfDebug( "Fetching search data from $searchUrl\n" );
491491 wfSuppressWarnings();
492492 wfProfileIn( $fname.'-contact-'.$host );
493 - $data = Http::get( $searchUrl, $wgLuceneSearchTimeout, $curlOpts );
 493+ $data = Http::get( $searchUrl, $wgLuceneSearchTimeout, $httpOpts);
494494 wfProfileOut( $fname.'-contact-'.$host );
495495 wfRestoreWarnings();
496496 if( $data === false ) {
Index: branches/js2-work/phase3/includes/HttpFunctions.php
@@ -1,17 +1,27 @@
22 <?php
33 /**
4 - * HTTP handling class
54 * @defgroup HTTP HTTP
6 - * @file
 5+ */
 6+
 7+/**
 8+ * Various HTTP related functions
79 * @ingroup HTTP
810 */
9 -
1011 class Http {
11 - const SYNC_DOWNLOAD = 1; // syncronous upload (in a single request)
12 - const ASYNC_DOWNLOAD = 2; // asynchronous upload
 12+ // Syncronous download (in a single request)
 13+ const SYNC_DOWNLOAD = 1;
1314
14 - var $body = '';
 15+ // Asynchronous download ( background process with multiple requests )
 16+ const ASYNC_DOWNLOAD = 2;
1517
 18+ /**
 19+ * Get the contents of a file by HTTP
 20+ * @param $method string HTTP method. Usually GET/POST
 21+ * @param $url string Full URL to act on
 22+ * @param $timeout int Seconds to timeout. 'default' falls to $wgHTTPTimeout
 23+ * @param $curlOptions array Optional array of extra params to pass
 24+ * to curl_setopt()
 25+ */
1626 public static function request( $method, $url, $opts = array() ) {
1727 $opts['method'] = ( strtoupper( $method ) == 'GET' || strtoupper( $method ) == 'POST' )
1828 ? strtoupper( $method ) : null;
@@ -27,6 +37,7 @@
2838
2939 /**
3040 * Simple wrapper for Http::request( 'GET' )
 41+ * @see Http::request()
3142 */
3243 public static function get( $url, $timeout = false, $opts = array() ) {
3344 global $wgSyncHTTPTimeout;
@@ -37,6 +48,7 @@
3849
3950 /**
4051 * Simple wrapper for Http::request( 'POST' )
 52+ * @see Http::request()
4153 */
4254 public static function post( $url, $opts = array() ) {
4355 return Http::request( 'POST', $url, $opts );
@@ -322,6 +334,8 @@
323335 $this->do_close_session_update = isset( $opt['do_close_session_update'] );
324336 $this->postData = isset( $opt['postdata'] ) ? $opt['postdata'] : '';
325337
 338+ $this->proxy = isset( $opt['proxy'] )? $opt['proxy'] : '';
 339+
326340 $this->ssl_verifyhost = (isset( $opt['ssl_verifyhost'] ))? $opt['ssl_verifyhost']: false;
327341
328342 $this->cainfo = (isset( $opt['cainfo'] ))? $op['cainfo']: false;
@@ -360,20 +374,24 @@
361375 $status = Status::newGood();
362376 $c = curl_init( $this->url );
363377
364 - // proxy setup:
365 - if ( Http::isLocalURL( $this->url ) ) {
366 - curl_setopt( $c, CURLOPT_PROXY, 'localhost:80' );
367 - } elseif ( $wgHTTPProxy ) {
368 - curl_setopt( $c, CURLOPT_PROXY, $wgHTTPProxy );
 378+ // only do proxy setup if ( not suppressed $this->proxy === false )
 379+ if( $this->proxy !== false ){
 380+ if( $this->proxy ){
 381+ curl_setopt( $c, CURLOPT_PROXY, $this->proxy );
 382+ } else if ( Http::isLocalURL( $this->url ) ) {
 383+ curl_setopt( $c, CURLOPT_PROXY, 'localhost:80' );
 384+ } else if ( $wgHTTPProxy ) {
 385+ curl_setopt( $c, CURLOPT_PROXY, $wgHTTPProxy );
 386+ }
369387 }
370388
371389 curl_setopt( $c, CURLOPT_TIMEOUT, $this->timeout );
372390 curl_setopt( $c, CURLOPT_USERAGENT, Http::userAgent() );
373391
374 - if($this->ssl_verifyhost)
 392+ if( $this->ssl_verifyhost )
375393 curl_setopt( $c, CURLOPT_SSL_VERIFYHOST, $this->ssl_verifyhost);
376394
377 - if($this->cainfo)
 395+ if( $this->cainfo )
378396 curl_setopt( $c, CURLOPT_CAINFO, $this->cainfo);
379397
380398 if ( $this->headers_only ) {
@@ -502,13 +520,28 @@
503521 // Required for HTTP 1.0 POSTs
504522 $headers[] = "Content-Length: 0";
505523 }
506 - $fcontext = stream_context_create ( array(
507 - 'http' => array(
508 - 'method' => $this->method,
509 - 'header' => implode( "\r\n", $headers ),
510 - 'timeout' => $this->timeout )
 524+
 525+ $httpContextOptions = array(
 526+ 'method' => $this->method,
 527+ 'header' => implode( "\r\n", $headers ),
 528+ 'timeout' => $this->timeout
 529+ );
 530+
 531+ // Proxy setup:
 532+ if( $this->proxy ){
 533+ $httpContextOptions['proxy'] = 'tcp://' . $this->proxy;
 534+ }else if ( Http::isLocalURL( $this->url ) ) {
 535+ $httpContextOptions['proxy'] = 'tcp://localhost:80';
 536+ } elseif ( $wgHTTPProxy ) {
 537+ $httpContextOptions['proxy'] = 'tcp://' . $wgHTTPProxy ;
 538+ }
 539+
 540+ $fcontext = stream_context_create (
 541+ array(
 542+ 'http' => $httpContextOptions
511543 )
512544 );
 545+
513546 $fh = fopen( $this->url, "r", false, $fcontext);
514547
515548 // set the write back function (if we are writing to a file)
@@ -521,7 +554,7 @@
522555 return $status;
523556 }
524557
525 - // read $fh into the simpleFileWriter (grab in 64K chunks since
 558+ // Read $fh into the simpleFileWriter (grab in 64K chunks since
526559 // it's likely a ~large~ media file)
527560 while ( !feof( $fh ) ) {
528561 $contents = fread( $fh, 65536 );
@@ -549,7 +582,7 @@
550583 }
551584
552585 /**
553 - * a simpleFileWriter with session id updates
 586+ * SimpleFileWriter with session id updates
554587 */
555588 class simpleFileWriter {
556589 var $target_file_path;

Follow-up revisions

RevisionCommit summaryAuthorDate
r60996* update to r60811 to check ->proxy for "ignore" / false condition for php up...dale23:50, 12 January 2010
r61078follow-up r60811 clean up code, write some tests for the existing uses of Htt...mah05:56, 15 January 2010
r77073Add PhpHttpRequest, added in r60811platonides14:35, 21 November 2010

Comments

#Comment by Tim Starling (talk | contribs)   05:47, 21 January 2010

Marking fixme since the buggy async download code partially reintroduced to trunk here needs to be removed again.

#Comment by Mdale (talk | contribs)   22:25, 21 January 2010

Is the idea to remove it for the release ?

We have already deployed the api side for commons ( although presently disabled bug 20512 ) ? ... Copy-by-url downloads is dependent on async download for client updates of progress.

#Comment by Saper (talk | contribs)   00:59, 22 January 2010

A patch in bug 22224 fixes the timeout = 0 problem. ForeignAPIRepo was broken.

#Comment by Mdale (talk | contribs)   03:37, 12 February 2010

marked resolved since it was re-factored in subsequent commits by mah.

Status & tagging log