r61352 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r61351‎ | r61352 | r61353 >
Date:02:17, 22 January 2010
Author:mah
Status:resolved (Comments)
Tags:
Comment:
follow up r61078, fixes bug 22224
Rewrite of HttpFunctions.php
Stripping async download, simple file writer stuff.
Modified paths:
  • /trunk/phase3/includes/HttpFunctions.php (modified) (history)
  • /trunk/phase3/tests/HttpTest.php (modified) (history)
  • /trunk/phase3/tests/MediaWiki_TestCase.php (modified) (history)
  • /trunk/phase3/tests/phpunit.xml (modified) (history)

Diff [purge]

Index: trunk/phase3/tests/HttpTest.php
@@ -1,28 +1,46 @@
22 <?php
33
4 -class HttpTest extends PHPUnit_Framework_TestCase {
 4+class HttpTest extends PhpUnit_Framework_TestCase {
55 static $content;
66 static $headers;
7 - var $test_geturl = array( "http://www.example.com/",
8 - "http://pecl.php.net/feeds/pkg_apc.rss",
9 - "http://toolserver.org/~jan/poll/dev/main.php?page=wiki_output&id=3",
10 - "http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw",
11 - "http://www.mediawiki.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:MediaWiki_hooks&cmlimit=500&format=php",
12 - );
13 - var $test_requesturl = array( "http://en.wikipedia.org/wiki/Special:Export/User:MarkAHershberger/Weekly_reports/2010-W01" );
 7+ static $has_curl;
 8+ static $has_proxy = false;
 9+ static $proxy = "http://hulk:8080/";
 10+ var $test_geturl = array(
 11+ "http://www.example.com/",
 12+ "http://pecl.php.net/feeds/pkg_apc.rss",
 13+ "http://toolserver.org/~jan/poll/dev/main.php?page=wiki_output&id=3",
 14+ "http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw",
 15+ "http://www.mediawiki.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:MediaWiki_hooks&format=php",
 16+ );
 17+ var $test_requesturl = array( "http://en.wikipedia.org/wiki/Special:Export/User:MarkAHershberger" );
1418
1519 var $test_posturl = array( "http://www.comp.leeds.ac.uk/cgi-bin/Perl/environment-example" => "review=test" );
1620
1721 function setup() {
 22+ putenv("http_proxy"); /* Remove any proxy env var, so curl doesn't get confused */
1823 if ( is_array( self::$content ) ) {
1924 return;
2025 }
 26+ self::$has_curl = function_exists( 'curl_init' );
 27+
 28+ if ( !file_exists("/usr/bin/curl") ) {
 29+ $this->markTestIncomplete("This test requires the curl binary at /usr/bin/curl. If you have curl, please file a bug on this test, or, better yet, provide a patch.");
 30+ }
 31+
2132 $content = tempnam( sys_get_temp_dir(), "" );
2233 $headers = tempnam( sys_get_temp_dir(), "" );
2334 if ( !$content && !$headers ) {
2435 die( "Couldn't create temp file!" );
2536 }
2637
 38+ // This probably isn't the best test for a proxy, but it works on my system!
 39+ system("curl -0 -o $content -s ".self::$proxy);
 40+ $out = file_get_contents( $content );
 41+ if( $out ) {
 42+ self::$has_proxy = true;
 43+ }
 44+
2745 /* Maybe use wget instead of curl here ... just to use a different codebase? */
2846 foreach ( $this->test_geturl as $u ) {
2947 system( "curl -0 -s -D $headers '$u' -o $content" );
@@ -43,30 +61,112 @@
4462 unlink( $headers );
4563 }
4664
 65+
 66+ function testInstantiation() {
 67+ global $wgHTTPEngine;
 68+
 69+ unset($wgHTTPEngine);
 70+ $r = new HttpRequest("http://www.example.com/");
 71+ if ( self::$has_curl ) {
 72+ $this->isInstanceOf( $r, 'CurlHttpRequest' );
 73+ } else {
 74+ $this->isInstanceOf( $r, 'PhpHttpRequest' );
 75+ }
 76+ unset($r);
 77+
 78+ $wgHTTPEngine = 'php';
 79+ $r = new HttpRequest("http://www.example.com/");
 80+ $this->isInstanceOf( $r, 'PhpHttpRequest' );
 81+ unset($r);
 82+
 83+ if( !self::$has_curl ) {
 84+ $this->setExpectedException( 'MWException' );
 85+ }
 86+ $wgHTTPEngine = 'curl';
 87+ $r = new HttpRequest("http://www.example.com/");
 88+ if( self::$has_curl ) {
 89+ $this->isInstanceOf( $r, 'CurlHttpRequest' );
 90+ }
 91+ }
 92+
 93+ function runHTTPFailureChecks() {
 94+ global $wgHTTPEngine;
 95+ // Each of the following requests should result in a failure.
 96+
 97+ $timeout = 1;
 98+ $start_time = time();
 99+ $r = HTTP::get( "http://www.example.com:1/", $timeout);
 100+ $end_time = time();
 101+ $this->assertLessThan($timeout+2, $end_time - $start_time,
 102+ "Request took less than {$timeout}s via $wgHTTPEngine");
 103+ $this->assertEquals($r, false, "false -- what we get on error from Http::get()");
 104+ }
 105+
 106+ function testFailureDefault() {
 107+ global $wgHTTPEngine;
 108+
 109+ unset($wgHTTPEngine);
 110+ self::runHTTPFailureChecks();
 111+ }
 112+
 113+ function testFailurePhp() {
 114+ global $wgHTTPEngine;
 115+
 116+ $wgHTTPEngine = "php";
 117+ self::runHTTPFailureChecks();
 118+ }
 119+
 120+ function testFailureCurl() {
 121+ global $wgHTTPEngine;
 122+
 123+ if (!self::$has_curl ) {
 124+ $this->markTestIncomplete("This test requires curl.");
 125+ }
 126+
 127+ $wgHTTPEngine = "curl";
 128+ self::runHTTPFailureChecks();
 129+ }
 130+
47131 /* ./phase3/includes/Import.php:1108: $data = Http::request( $method, $url ); */
48132 /* ./includes/Import.php:1124: $link = Title::newFromText( "$interwiki:Special:Export/$page" ); */
49133 /* ./includes/Import.php:1134: return ImportStreamSource::newFromURL( $url, "POST" ); */
50 - function runHTTPRequests() {
51 - global $wgForceHTTPEngine;
 134+ function runHTTPRequests($proxy=null) {
 135+ global $wgHTTPEngine;
 136+ $opt = array();
52137
 138+ if($proxy) {
 139+ $opt['proxy'] = $proxy;
 140+ }
 141+
53142 /* no postdata here because the only request I could find in code so far didn't have any */
54143 foreach ( $this->test_requesturl as $u ) {
55 - $r = Http::request( "POST", $u );
56 - $this->assertEquals( self::$content["POST $u"], $r, "POST $u with $wgForceHTTPEngine" );
 144+ $r = Http::request( "POST", $u, $opt );
 145+ $this->assertEquals( self::$content["POST $u"], "$r", "POST $u with $wgHTTPEngine" );
57146 }
58147 }
59148
60 - function testRequestPHP() {
61 - global $wgForceHTTPEngine;
 149+ function testRequestDefault() {
 150+ global $wgHTTPEngine;
62151
63 - $wgForceHTTPEngine = "php";
 152+ unset($wgHTTPEngine);
64153 self::runHTTPRequests();
65154 }
66155
 156+ function testRequestPhp() {
 157+ global $wgHTTPEngine;
 158+
 159+ $wgHTTPEngine = "php";
 160+ self::runHTTPRequests();
 161+ }
 162+
67163 function testRequestCurl() {
68 - global $wgForceHTTPEngine;
 164+ global $wgHTTPEngine;
69165
70 - $wgForceHTTPEngine = "curl";
 166+ if (!self::$has_curl ) {
 167+ $this->markTestIncomplete("This test requires curl.");
 168+ }
 169+
 170+ $wgHTTPEngine = "curl";
71171 self::runHTTPRequests();
72172 }
73173
@@ -114,72 +214,131 @@
115215 /* ./extensions/APC/SpecialAPC.php:245: $rss = Http::get( 'http://pecl.php.net/feeds/pkg_apc.rss' ); */
116216 /* ./extensions/Interlanguage/Interlanguage.php:56: $a = Http::get( $url ); */
117217 /* ./extensions/MWSearch/MWSearch_body.php:492: $data = Http::get( $searchUrl, $wgLuceneSearchTimeout, $httpOpts); */
118 - function runHTTPGets() {
119 - global $wgForceHTTPEngine;
 218+ function runHTTPGets($proxy=null) {
 219+ global $wgHTTPEngine;
 220+ $opt = array();
120221
 222+ if($proxy) {
 223+ $opt['proxy'] = $proxy;
 224+ }
 225+
121226 foreach ( $this->test_geturl as $u ) {
122 - $r = Http::get( $u );
123 - $this->assertEquals( self::$content["GET $u"], $r, "Get $u with $wgForceHTTPEngine" );
 227+ $r = Http::get( $u, 30, $opt ); /* timeout of 30s */
 228+ $this->assertEquals( self::$content["GET $u"], "$r", "Get $u with $wgHTTPEngine" );
124229 }
125230 }
126231
127 - function testGetPHP() {
128 - global $wgForceHTTPEngine;
 232+ function testGetDefault() {
 233+ global $wgHTTPEngine;
129234
130 - $wgForceHTTPEngine = "php";
 235+ unset($wgHTTPEngine);
131236 self::runHTTPGets();
132237 }
133238
 239+ function testGetPhp() {
 240+ global $wgHTTPEngine;
 241+
 242+ $wgHTTPEngine = "php";
 243+ self::runHTTPGets();
 244+ }
 245+
134246 function testGetCurl() {
135 - global $wgForceHTTPEngine;
 247+ global $wgHTTPEngine;
136248
137 - $wgForceHTTPEngine = "curl";
 249+ if (!self::$has_curl ) {
 250+ $this->markTestIncomplete("This test requires curl.");
 251+ }
 252+
 253+ $wgHTTPEngine = "curl";
138254 self::runHTTPGets();
139255 }
140256
141257 /* ./phase3/maintenance/parserTests.inc:1618: return Http::post( $url, array( 'postdata' => wfArrayToCGI( $data ) ) ); */
142 - function runHTTPPosts() {
143 - global $wgForceHTTPEngine;
 258+ function runHTTPPosts($proxy=null) {
 259+ global $wgHTTPEngine;
 260+ $opt = array();
144261
 262+ if($proxy) {
 263+ $opt['proxy'] = $proxy;
 264+ }
 265+
145266 foreach ( $this->test_posturl as $u => $postdata ) {
146 - $r = Http::post( $u, array( "postdata" => $postdata ) );
147 - $this->assertEquals( self::$content["POST $u => $postdata"], $r, "POST $u (postdata=$postdata) with $wgForceHTTPEngine" );
 267+ $opt['postdata'] = $postdata;
 268+ $r = Http::post( $u, $opt );
 269+ $this->assertEquals( self::$content["POST $u => $postdata"], "$r",
 270+ "POST $u (postdata=$postdata) with $wgHTTPEngine" );
148271 }
149272 }
150273
151 - function testPostPHP() {
152 - global $wgForceHTTPEngine;
 274+ function testPostDefault() {
 275+ global $wgHTTPEngine;
153276
154 - $wgForceHTTPEngine = "php";
 277+ unset($wgHTTPEngine);
155278 self::runHTTPPosts();
156279 }
157280
 281+ function testPostPhp() {
 282+ global $wgHTTPEngine;
 283+
 284+ $wgHTTPEngine = "php";
 285+ self::runHTTPPosts();
 286+ }
 287+
158288 function testPostCurl() {
159 - global $wgForceHTTPEngine;
 289+ global $wgHTTPEngine;
160290
161 - $wgForceHTTPEngine = "curl";
 291+ if (!self::$has_curl ) {
 292+ $this->markTestIncomplete("This test requires curl.");
 293+ }
 294+
 295+ $wgHTTPEngine = "curl";
162296 self::runHTTPPosts();
163297 }
164298
165 - function testDoDownload() {
 299+ function runProxyRequests() {
 300+ global $wgHTTPEngine;
 301+
 302+ if(!self::$has_proxy) {
 303+ $this->markTestIncomplete("This test requires a proxy.");
 304+ }
 305+ self::runHTTPGets(self::$proxy);
 306+ self::runHTTPPosts(self::$proxy);
 307+ self::runHTTPRequests(self::$proxy);
166308 }
167309
168 - function testStartBackgroundDownload() {
 310+ function testProxyDefault() {
 311+ global $wgHTTPEngine;
 312+
 313+ unset($wgHTTPEngine);
 314+ self::runProxyRequests();
169315 }
170316
171 - function testGetUploadSessionKey() {
 317+ function testProxyPhp() {
 318+ global $wgHTTPEngine;
 319+
 320+ $wgHTTPEngine = 'php';
 321+ self::runProxyRequests();
172322 }
173323
174 - function testDoSessionIdDownload() {
 324+ function testProxyCurl() {
 325+ global $wgHTTPEngine;
 326+
 327+ if (!self::$has_curl ) {
 328+ $this->markTestIncomplete("This test requires curl.");
 329+ }
 330+
 331+ $wgHTTPEngine = 'curl';
 332+ self::runProxyRequests();
175333 }
176334
177 - function testIsLocalURL() {
 335+ function testIsLocalUrl() {
178336 }
179337
180338 /* ./extensions/DonationInterface/payflowpro_gateway/payflowpro_gateway.body.php:559: $user_agent = Http::userAgent(); */
181339 function testUserAgent() {
182340 }
183341
184 - function testIsValidURI() {
 342+ function testIsValidUrl() {
185343 }
186 -}
 344+
 345+}
\ No newline at end of file
Index: trunk/phase3/tests/MediaWiki_TestCase.php
@@ -7,8 +7,9 @@
88 */
99 protected function buildTestDatabase( $tables ) {
1010 global $testOptions, $wgDBprefix, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
 11+ $this->markTestIncomplete("This test requires DB admin user credentials.");
1112 $wgDBprefix = 'parsertest_';
12 - $this->markTestIncomplete("This test requires DB admin user credentials.");
 13+
1314 $db = new DatabaseMysql(
1415 $wgDBserver,
1516 $wgDBadminuser,
Index: trunk/phase3/tests/phpunit.xml
@@ -1,10 +1,10 @@
22 <!-- See http://www.phpunit.de/manual/3.3/en/appendixes.configuration.html -->
33 <phpunit bootstrap="./bootstrap.php"
44 colors="false"
5 - convertErrorsToExceptions="true"
6 - convertNoticesToExceptions="true"
7 - convertWarningsToExceptions="true"
8 - stopOnFailure="true">
 5+ stopOnFailure="false">
 6+ <!-- convertErrorsToExceptions="true" -->
 7+ <!-- convertNoticesToExceptions="true" -->
 8+ <!-- convertWarningsToExceptions="true" -->
99 <testsuite name="MediaWiki Test Suite">
1010 <directory>.</directory>
1111 </testsuite>
Index: trunk/phase3/includes/HttpFunctions.php
@@ -8,29 +8,23 @@
99 * @ingroup HTTP
1010 */
1111 class Http {
12 - // Syncronous download (in a single request)
13 - const SYNC_DOWNLOAD = 1;
14 -
15 - // Asynchronous download ( background process with multiple requests )
16 - const ASYNC_DOWNLOAD = 2;
17 -
1812 /**
19 - * Get the contents of a file by HTTP
 13+ * Perform an HTTP request
2014 * @param $method string HTTP method. Usually GET/POST
2115 * @param $url string Full URL to act on
22 - * @param $timeout int Seconds to timeout. 'default' falls to $wgHTTPTimeout
23 - * @param $curlOptions array Optional array of extra params to pass
24 - * to curl_setopt()
 16+ * @param $opts options to pass to HttpRequest object
 17+ * @returns mixed (bool)false on failure or a string on success
2518 */
2619 public static function request( $method, $url, $opts = array() ) {
27 - $opts['method'] = ( strtoupper( $method ) == 'GET' || strtoupper( $method ) == 'POST' )
28 - ? strtoupper( $method ) : null;
29 - $req = HttpRequest::newRequest( $url, $opts );
30 - $status = $req->doRequest();
31 - if( $status->isOK() ) {
32 - return $status->value;
 20+ $opts['method'] = strtoupper( $method );
 21+ if ( !array_key_exists( 'timeout', $opts ) ) {
 22+ $opts['timeout'] = 'default';
 23+ }
 24+ $req = HttpRequest::factory( $url, $opts );
 25+ $status = $req->execute();
 26+ if ( $status->isOK() ) {
 27+ return $req;
3328 } else {
34 - wfDebug( 'http error: ' . $status->getWikiText() );
3529 return false;
3630 }
3731 }
@@ -39,10 +33,8 @@
4034 * Simple wrapper for Http::request( 'GET' )
4135 * @see Http::request()
4236 */
43 - public static function get( $url, $timeout = false, $opts = array() ) {
44 - global $wgSyncHTTPTimeout;
45 - if( $timeout )
46 - $opts['timeout'] = $timeout;
 37+ public static function get( $url, $timeout = 'default', $opts = array() ) {
 38+ $opts['timeout'] = $timeout;
4739 return Http::request( 'GET', $url, $opts );
4840 }
4941
@@ -54,209 +46,7 @@
5547 return Http::request( 'POST', $url, $opts );
5648 }
5749
58 - public static function doDownload( $url, $target_file_path, $dl_mode = self::SYNC_DOWNLOAD,
59 - $redirectCount = 0 )
60 - {
61 - global $wgPhpCli, $wgMaxUploadSize, $wgMaxRedirects;
62 - // do a quick check to HEAD to insure the file size is not > $wgMaxUploadSize
63 - $headRequest = HttpRequest::newRequest( $url, array( 'headers_only' => true ) );
64 - $headResponse = $headRequest->doRequest();
65 - if( !$headResponse->isOK() ) {
66 - return $headResponse;
67 - }
68 - $head = $headResponse->value;
69 -
70 - // check for redirects:
71 - if( isset( $head['Location'] ) && strrpos( $head[0], '302' ) !== false ) {
72 - if( $redirectCount < $wgMaxRedirects ) {
73 - if( self::isValidURI( $head['Location'] ) ) {
74 - return self::doDownload( $head['Location'], $target_file_path,
75 - $dl_mode, $redirectCount++ );
76 - } else {
77 - return Status::newFatal( 'upload-proto-error' );
78 - }
79 - } else {
80 - return Status::newFatal( 'upload-too-many-redirects' );
81 - }
82 - }
83 - // we did not get a 200 ok response:
84 - if( strrpos( $head[0], '200 OK' ) === false ) {
85 - return Status::newFatal( 'upload-http-error', htmlspecialchars( $head[0] ) );
86 - }
87 -
88 - $content_length = ( isset( $head['Content-Length'] ) ) ? $head['Content-Length'] : null;
89 - if( $content_length ) {
90 - if( $content_length > $wgMaxUploadSize ) {
91 - return Status::newFatal( 'requested file length ' . $content_length .
92 - ' is greater than $wgMaxUploadSize: ' . $wgMaxUploadSize );
93 - }
94 - }
95 -
96 - // check if we can find phpCliPath (for doing a background shell request to
97 - // php to do the download:
98 - if( $wgPhpCli && wfShellExecEnabled() && $dl_mode == self::ASYNC_DOWNLOAD ) {
99 - wfDebug( __METHOD__ . "\nASYNC_DOWNLOAD\n" );
100 - //setup session and shell call:
101 - return self::initBackgroundDownload( $url, $target_file_path, $content_length );
102 - } else {
103 - wfDebug( __METHOD__ . "\nSYNC_DOWNLOAD\n" );
104 - // SYNC_DOWNLOAD download as much as we can in the time we have to execute
105 - $opts['method'] = 'GET';
106 - $opts['target_file_path'] = $target_file_path;
107 - $req = HttpRequest::newRequest( $url, $opts );
108 - return $req->doRequest();
109 - }
110 - }
111 -
11250 /**
113 - * a non blocking request (generally an exit point in the application)
114 - * should write to a file location and give updates
115 - *
116 - */
117 - private static function initBackgroundDownload( $url, $target_file_path,
118 - $content_length = null )
119 - {
120 - global $IP, $wgPhpCli, $wgServer;
121 - $status = Status::newGood();
122 -
123 - // generate a session id with all the details for the download (pid, target_file_path )
124 - $upload_session_key = self::getUploadSessionKey();
125 - $session_id = session_id();
126 -
127 - // store the url and target path:
128 - $_SESSION['wsDownload'][$upload_session_key]['url'] = $url;
129 - $_SESSION['wsDownload'][$upload_session_key]['target_file_path'] = $target_file_path;
130 - // since we request from the cmd line we lose the original host name pass in the session:
131 - $_SESSION['wsDownload'][$upload_session_key]['orgServer'] = $wgServer;
132 -
133 - if( $content_length )
134 - $_SESSION['wsDownload'][$upload_session_key]['content_length'] = $content_length;
135 -
136 - // set initial loaded bytes:
137 - $_SESSION['wsDownload'][$upload_session_key]['loaded'] = 0;
138 -
139 - // run the background download request:
140 - $cmd = $wgPhpCli . ' ' . $IP . "/maintenance/http_session_download.php " .
141 - "--sid {$session_id} --usk {$upload_session_key} --wiki " . wfWikiId();
142 - $pid = wfShellBackgroundExec( $cmd );
143 - // the pid is not of much use since we won't be visiting this same apache any-time soon.
144 - if( !$pid )
145 - return Status::newFatal( 'could not run background shell exec' );
146 -
147 - // update the status value with the $upload_session_key (for the user to
148 - // check on the status of the upload)
149 - $status->value = $upload_session_key;
150 -
151 - // return good status
152 - return $status;
153 - }
154 -
155 - static function getUploadSessionKey() {
156 - $key = mt_rand( 0, 0x7fffffff );
157 - $_SESSION['wsUploadData'][$key] = array();
158 - return $key;
159 - }
160 -
161 - /**
162 - * used to run a session based download. Is initiated via the shell.
163 - *
164 - * @param $session_id String: the session id to grab download details from
165 - * @param $upload_session_key String: the key of the given upload session
166 - * (a given client could have started a few http uploads at once)
167 - */
168 - public static function doSessionIdDownload( $session_id, $upload_session_key ) {
169 - global $wgUser, $wgEnableWriteAPI, $wgAsyncHTTPTimeout, $wgServer,
170 - $wgSessionsInMemcached, $wgSessionHandler, $wgSessionStarted;
171 - wfDebug( __METHOD__ . "\n\n doSessionIdDownload :\n\n" );
172 - // set session to the provided key:
173 - session_id( $session_id );
174 - //fire up mediaWiki session system:
175 - wfSetupSession();
176 -
177 - // start the session
178 - if( session_start() === false ) {
179 - wfDebug( __METHOD__ . ' could not start session' );
180 - }
181 - // get all the vars we need from session_id
182 - if( !isset( $_SESSION[ 'wsDownload' ][$upload_session_key] ) ) {
183 - wfDebug( __METHOD__ . ' Error:could not find upload session');
184 - exit();
185 - }
186 - // setup the global user from the session key we just inherited
187 - $wgUser = User::newFromSession();
188 -
189 - // grab the session data to setup the request:
190 - $sd =& $_SESSION['wsDownload'][$upload_session_key];
191 -
192 - // update the wgServer var ( since cmd line thinks we are localhost
193 - // when we are really orgServer)
194 - if( isset( $sd['orgServer'] ) && $sd['orgServer'] ) {
195 - $wgServer = $sd['orgServer'];
196 - }
197 - // close down the session so we can other http queries can get session
198 - // updates: (if not $wgSessionsInMemcached)
199 - if( !$wgSessionsInMemcached )
200 - session_write_close();
201 -
202 - $req = HttpRequest::newRequest( $sd['url'], array(
203 - 'target_file_path' => $sd['target_file_path'],
204 - 'upload_session_key'=> $upload_session_key,
205 - 'timeout' => $wgAsyncHTTPTimeout,
206 - 'do_close_session_update' => true
207 - ) );
208 - // run the actual request .. (this can take some time)
209 - wfDebug( __METHOD__ . 'do Session Download :: ' . $sd['url'] . ' tf: ' .
210 - $sd['target_file_path'] . "\n\n");
211 - $status = $req->doRequest();
212 - //wfDebug("done with req status is: ". $status->isOK(). ' '.$status->getWikiText(). "\n");
213 -
214 - // start up the session again:
215 - if( session_start() === false ) {
216 - wfDebug( __METHOD__ . ' ERROR:: Could not start session');
217 - }
218 - // grab the updated session data pointer
219 - $sd =& $_SESSION['wsDownload'][$upload_session_key];
220 - // if error update status:
221 - if( !$status->isOK() ) {
222 - $sd['apiUploadResult'] = FormatJson::encode(
223 - array( 'error' => $status->getWikiText() )
224 - );
225 - }
226 - // if status okay process upload using fauxReq to api:
227 - if( $status->isOK() ){
228 - // setup the FauxRequest
229 - $fauxReqData = $sd['mParams'];
230 -
231 - // Fix boolean parameters
232 - foreach( $fauxReqData as $k => $v ) {
233 - if( $v === false )
234 - unset( $fauxReqData[$k] );
235 - }
236 -
237 - $fauxReqData['action'] = 'upload';
238 - $fauxReqData['format'] = 'json';
239 - $fauxReqData['internalhttpsession'] = $upload_session_key;
240 - // evil but no other clean way about it:
241 - $faxReq = new FauxRequest( $fauxReqData, true );
242 - $processor = new ApiMain( $faxReq, $wgEnableWriteAPI );
243 -
244 - //init the mUpload var for the $processor
245 - $processor->execute();
246 - $processor->getResult()->cleanUpUTF8();
247 - $printer = $processor->createPrinterByName( 'json' );
248 - $printer->initPrinter( false );
249 - ob_start();
250 - $printer->execute();
251 - $apiUploadResult = ob_get_clean();
252 -
253 - // the status updates runner will grab the result form the session:
254 - $sd['apiUploadResult'] = $apiUploadResult;
255 - }
256 - // close the session:
257 - session_write_close();
258 - }
259 -
260 - /**
26151 * Check if the URL can be served by localhost
26252 * @param $url string Full url to check
26353 * @return bool
@@ -291,7 +81,8 @@
29282 }
29383
29484 /**
295 - * Return a standard user-agent we can use for external requests.
 85+ * A standard user-agent we can use for external requests.
 86+ * @returns string
29687 */
29788 public static function userAgent() {
29889 global $wgVersion;
@@ -301,8 +92,9 @@
30293 /**
30394 * Checks that the given URI is a valid one
30495 * @param $uri Mixed: URI to check for validity
 96+ * @returns bool
30597 */
306 - public static function isValidURI( $uri ){
 98+ public static function isValidURI( $uri ) {
30799 return preg_match(
308100 '/(ftp|http|https):\/\/(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%@!\-\/]))?/',
309101 $uri,
@@ -311,373 +103,335 @@
312104 }
313105 }
314106
 107+/**
 108+ * This wrapper class will call out to curl (if available) or fallback
 109+ * to regular PHP if necessary for handling internal HTTP requests.
 110+ */
315111 class HttpRequest {
316 - var $target_file_path;
317 - var $upload_session_key;
318 - function __construct( $url, $opt ){
 112+ protected $content;
 113+ protected $timeout = 'default';
 114+ protected $headersOnly = null;
 115+ protected $postdata = null;
 116+ protected $proxy = null;
 117+ protected $no_proxy = false;
 118+ protected $sslVerifyHost = true;
 119+ protected $caInfo = null;
 120+ protected $method = "GET";
 121+ protected $url;
 122+ protected $parsed_url;
 123+ public $status;
319124
320 - global $wgSyncHTTPTimeout;
 125+ /**
 126+ * @param $url string url to use
 127+ * @param $options array (optional) extra params to pass
 128+ * Possible keys for the array:
 129+ * method
 130+ * timeout
 131+ * targetFilePath
 132+ * requestKey
 133+ * headersOnly
 134+ * postdata
 135+ * proxy
 136+ * no_proxy
 137+ * sslVerifyHost
 138+ * caInfo
 139+ */
 140+ function __construct( $url = null, $opt = array()) {
 141+ global $wgHTTPTimeout, $wgTitle;
 142+
321143 $this->url = $url;
322 - // set the timeout to default sync timeout (unless the timeout option is provided)
323 - $this->timeout = ( isset( $opt['timeout'] ) ) ? $opt['timeout'] : $wgSyncHTTPTimeout;
324 - //check special key default
325 - if($this->timeout == 'default'){
326 - $opts['timeout'] = $wgSyncHTTPTimeout;
 144+ $this->parsed_url = parse_url($url);
 145+
 146+ if ( !ini_get( 'allow_url_fopen' ) ) {
 147+ throw new MWException( 'allow_url_fopen needs to be enabled for http requests to work' );
 148+ } elseif ( !Http::isValidURI( $this->url ) ) {
 149+ throw new MWException( 'bad-uri' );
 150+ } else {
 151+ $this->status = Status::newGood( 100 ); // continue
327152 }
328153
329 - $this->method = ( isset( $opt['method'] ) ) ? $opt['method'] : 'GET';
330 - $this->target_file_path = ( isset( $opt['target_file_path'] ) )
331 - ? $opt['target_file_path'] : false;
332 - $this->upload_session_key = ( isset( $opt['upload_session_key'] ) )
333 - ? $opt['upload_session_key'] : false;
334 - $this->headers_only = ( isset( $opt['headers_only'] ) ) ? $opt['headers_only'] : false;
335 - $this->do_close_session_update = isset( $opt['do_close_session_update'] );
336 - $this->postData = isset( $opt['postdata'] ) ? $opt['postdata'] : '';
 154+ if ( array_key_exists( 'timeout', $opt ) && $opt['timeout'] != 'default' ) {
 155+ $this->timeout = $opt['timeout'];
 156+ } else {
 157+ $this->timeout = $wgHTTPTimeout;
 158+ }
337159
338 - $this->proxy = isset( $opt['proxy'] )? $opt['proxy'] : '';
 160+ $members = array( "targetFilePath", "requestKey", "headersOnly", "postdata",
 161+ "proxy", "no_proxy", "sslVerifyHost", "caInfo", "method" );
 162+ foreach ( $members as $o ) {
 163+ if ( array_key_exists( $o, $opt ) ) {
 164+ $this->$o = $opt[$o];
 165+ }
 166+ }
339167
340 - $this->ssl_verifyhost = (isset( $opt['ssl_verifyhost'] ))? $opt['ssl_verifyhost']: false;
 168+ if ( is_array( $this->postdata ) ) {
 169+ $this->postdata = wfArrayToCGI( $this->postdata );
 170+ }
341171
342 - $this->cainfo = (isset( $opt['cainfo'] ))? $op['cainfo']: false;
 172+ $this->initRequest();
343173
344 - }
 174+ if ( !$this->no_proxy ) {
 175+ $this->proxySetup();
 176+ }
345177
346 - public static function newRequest($url, $opt){
347 - # select the handler (use curl if available)
348 - if ( function_exists( 'curl_init' ) ) {
349 - return new curlHttpRequest($url, $opt);
350 - } else {
351 - return new phpHttpRequest($url, $opt);
 178+ # Set the referer to $wgTitle, even in command-line mode
 179+ # This is useful for interwiki transclusion, where the foreign
 180+ # server wants to know what the referring page is.
 181+ # $_SERVER['REQUEST_URI'] gives a less reliable indication of the
 182+ # referring page.
 183+ if ( is_object( $wgTitle ) ) {
 184+ $this->setReferrer( $wgTitle->getFullURL() );
352185 }
353186 }
354187
355188 /**
356 - * Get the contents of a file by HTTP
357 - * @param $url string Full URL to act on
358 - * @param $Opt associative array Optional array of options:
359 - * 'method' => 'GET', 'POST' etc.
360 - * 'target_file_path' => if curl should output to a target file
361 - * 'adapter' => 'curl', 'soket'
 189+ * For backwards compatibility, we provide a __toString method so
 190+ * that any code that expects a string result from Http::Get()
 191+ * will see the content of the request.
362192 */
363 - public function doRequest() {
364 - # Make sure we have a valid url
365 - if( !Http::isValidURI( $this->url ) )
366 - return Status::newFatal('bad-url');
367 - //do the actual request:
368 - return $this->doReq();
 193+ function __toString() {
 194+ return $this->content;
369195 }
370 -}
371 -class curlHttpRequest extends HttpRequest {
372 - public function doReq(){
373 - global $wgHTTPProxy, $wgTitle;
374196
375 - $status = Status::newGood();
376 - $c = curl_init( $this->url );
 197+ /**
 198+ * Generate a new request object
 199+ * @see HttpRequest::__construct
 200+ */
 201+ public static function factory( $url, $opt ) {
 202+ global $wgHTTPEngine;
 203+ $engine = $wgHTTPEngine;
377204
378 - // only do proxy setup if ( not suppressed $this->proxy === false )
379 - if( $this->proxy !== false ){
380 - if( $this->proxy ){
381 - curl_setopt( $c, CURLOPT_PROXY, $this->proxy );
382 - } else if ( Http::isLocalURL( $this->url ) ) {
383 - curl_setopt( $c, CURLOPT_PROXY, 'localhost:80' );
384 - } else if ( $wgHTTPProxy ) {
385 - curl_setopt( $c, CURLOPT_PROXY, $wgHTTPProxy );
386 - }
 205+ if ( !$wgHTTPEngine ) {
 206+ $wgHTTPEngine = function_exists( 'curl_init' ) ? 'curl' : 'php';
 207+ } elseif ( $wgHTTPEngine == 'curl' && !function_exists( 'curl_init' ) ) {
 208+ throw new MWException( 'FIXME' );
387209 }
388210
389 - curl_setopt( $c, CURLOPT_TIMEOUT, $this->timeout );
390 - curl_setopt( $c, CURLOPT_USERAGENT, Http::userAgent() );
 211+ switch( $wgHTTPEngine ) {
 212+ case 'curl':
 213+ return new CurlHttpRequest( $url, $opt );
 214+ case 'php':
 215+ return new PhpHttpRequest( $url, $opt );
 216+ default:
 217+ throw new MWException( 'FIXME' );
 218+ }
 219+ }
391220
392 - if( $this->ssl_verifyhost )
393 - curl_setopt( $c, CURLOPT_SSL_VERIFYHOST, $this->ssl_verifyhost);
 221+ public function getContent() {
 222+ return $this->content;
 223+ }
394224
395 - if( $this->cainfo )
396 - curl_setopt( $c, CURLOPT_CAINFO, $this->cainfo);
 225+ public function initRequest() {}
 226+ public function proxySetup() {}
 227+ public function setReferrer( $url ) {}
 228+ public function setCallback( $cb ) {}
 229+ public function read($fh, $content) {}
 230+ public function getCode() {}
 231+ public function execute() {}
 232+}
397233
398 - if ( $this->headers_only ) {
399 - curl_setopt( $c, CURLOPT_NOBODY, true );
400 - curl_setopt( $c, CURLOPT_HEADER, true );
401 - } elseif ( $this->method == 'POST' ) {
402 - curl_setopt( $c, CURLOPT_POST, true );
403 - curl_setopt( $c, CURLOPT_POSTFIELDS, $this->postData );
404 - // Suppress 'Expect: 100-continue' header, as some servers
405 - // will reject it with a 417 and Curl won't auto retry
406 - // with HTTP 1.0 fallback
407 - curl_setopt( $c, CURLOPT_HTTPHEADER, array( 'Expect:' ) );
408 - } else {
409 - curl_setopt( $c, CURLOPT_CUSTOMREQUEST, $this->method );
 234+/**
 235+ * HttpRequest implemented using internal curl compiled into PHP
 236+ */
 237+class CurlHttpRequest extends HttpRequest {
 238+ protected $curlHandle;
 239+ protected $curlCBSet;
 240+
 241+ public function initRequest() {
 242+ $this->curlHandle = curl_init( $this->url );
 243+ }
 244+
 245+ public function proxySetup() {
 246+ global $wgHTTPProxy;
 247+
 248+ if ( is_string( $this->proxy ) ) {
 249+ curl_setopt( $this->curlHandle, CURLOPT_PROXY, $this->proxy );
 250+ } else if ( Http::isLocalURL( $this->url ) ) { /* Not sure this makes any sense. */
 251+ curl_setopt( $this->curlHandle, CURLOPT_PROXY, 'localhost:80' );
 252+ } else if ( $wgHTTPProxy ) {
 253+ curl_setopt( $this->curlHandle, CURLOPT_PROXY, $wgHTTPProxy );
410254 }
 255+ }
411256
412 - # Set the referer to $wgTitle, even in command-line mode
413 - # This is useful for interwiki transclusion, where the foreign
414 - # server wants to know what the referring page is.
415 - # $_SERVER['REQUEST_URI'] gives a less reliable indication of the
416 - # referring page.
417 - if ( is_object( $wgTitle ) ) {
418 - curl_setopt( $c, CURLOPT_REFERER, $wgTitle->getFullURL() );
 257+ public function setCallback( $cb ) {
 258+ if ( !$this->curlCBSet ) {
 259+ $this->curlCBSet = true;
 260+ curl_setopt( $this->curlHandle, CURLOPT_WRITEFUNCTION, $cb );
419261 }
 262+ }
420263
421 - // set the write back function (if we are writing to a file)
422 - if( $this->target_file_path ) {
423 - $cwrite = new simpleFileWriter( $this->target_file_path,
424 - $this->upload_session_key,
425 - $this->do_close_session_update
426 - );
427 - if( !$cwrite->status->isOK() ) {
428 - wfDebug( __METHOD__ . "ERROR in setting up simpleFileWriter\n" );
429 - $status = $cwrite->status;
430 - return $status;
431 - }
432 - curl_setopt( $c, CURLOPT_WRITEFUNCTION, array( $cwrite, 'callbackWriteBody' ) );
 264+ public function execute() {
 265+ if( !$this->status->isOK() ) {
 266+ return $this->status;
433267 }
434268
435 - // start output grabber:
436 - if( !$this->target_file_path )
437 - ob_start();
 269+ $this->setCallback( array($this, 'read') );
438270
439 - //run the actual curl_exec:
440 - try {
441 - if ( false === curl_exec( $c ) ) {
442 - $error_txt ='Error sending request: #' . curl_errno( $c ) .' '. curl_error( $c );
443 - wfDebug( __METHOD__ . $error_txt . "\n" );
444 - $status = Status::newFatal( $error_txt );
445 - }
446 - } catch ( Exception $e ) {
447 - // do something with curl exec error?
 271+ curl_setopt( $this->curlHandle, CURLOPT_TIMEOUT, $this->timeout );
 272+ curl_setopt( $this->curlHandle, CURLOPT_USERAGENT, Http::userAgent() );
 273+ curl_setopt( $this->curlHandle, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0 );
 274+
 275+ if ( $this->sslVerifyHost ) {
 276+ curl_setopt( $this->curlHandle, CURLOPT_SSL_VERIFYHOST, $this->sslVerifyHost );
448277 }
449 - // if direct request output the results to the stats value:
450 - if( !$this->target_file_path && $status->isOK() ) {
451 - $status->value = ob_get_contents();
452 - ob_end_clean();
 278+
 279+ if ( $this->caInfo ) {
 280+ curl_setopt( $this->curlHandle, CURLOPT_CAINFO, $this->caInfo );
453281 }
454 - // if we wrote to a target file close up or return error
455 - if( $this->target_file_path ) {
456 - $cwrite->close();
457 - if( !$cwrite->status->isOK() ) {
458 - return $cwrite->status;
459 - }
 282+
 283+ if ( $this->headersOnly ) {
 284+ curl_setopt( $this->curlHandle, CURLOPT_NOBODY, true );
 285+ curl_setopt( $this->curlHandle, CURLOPT_HEADER, true );
 286+ } elseif ( $this->method == 'POST' ) {
 287+ curl_setopt( $this->curlHandle, CURLOPT_POST, true );
 288+ curl_setopt( $this->curlHandle, CURLOPT_POSTFIELDS, $this->postdata );
 289+ // Suppress 'Expect: 100-continue' header, as some servers
 290+ // will reject it with a 417 and Curl won't auto retry
 291+ // with HTTP 1.0 fallback
 292+ curl_setopt( $this->curlHandle, CURLOPT_HTTPHEADER, array( 'Expect:' ) );
 293+ } else {
 294+ curl_setopt( $this->curlHandle, CURLOPT_CUSTOMREQUEST, $this->method );
460295 }
461296
462 - if ( $this->headers_only ) {
463 - $headers = explode( "\n", $status->value );
464 - $headerArray = array();
465 - foreach ( $headers as $header ) {
466 - if ( !strlen( trim( $header ) ) )
467 - continue;
468 - $headerParts = explode( ':', $header, 2 );
469 - if ( count( $headerParts ) == 1 ) {
470 - $headerArray[] = trim( $header );
471 - } else {
472 - list( $key, $val ) = $headerParts;
473 - $headerArray[trim( $key )] = trim( $val );
474 - }
 297+ try {
 298+ if ( false === curl_exec( $this->curlHandle ) ) {
 299+ $error_txt = 'Error sending request: #' . curl_errno( $this->curlHandle ) . ' ' .
 300+ curl_error( $this->curlHandle );
 301+ wfDebug( __METHOD__ . $error_txt . "\n" );
 302+ $this->status->fatal( $error_txt ); /* i18n? */
475303 }
476 - $status->value = $headerArray;
477 - } else {
478 - # Don't return the text of error messages, return false on error
479 - $retcode = curl_getinfo( $c, CURLINFO_HTTP_CODE );
480 - if ( $retcode != 200 ) {
481 - wfDebug( __METHOD__ . ": HTTP return code $retcode\n" );
482 - $status = Status::newFatal( "HTTP return code $retcode\n" );
483 - }
484 - # Don't return truncated output
485 - $errno = curl_errno( $c );
 304+ } catch ( Exception $e ) {
 305+ $errno = curl_errno( $this->curlHandle );
486306 if ( $errno != CURLE_OK ) {
487 - $errstr = curl_error( $c );
 307+ $errstr = curl_error( $this->curlHandle );
488308 wfDebug( __METHOD__ . ": CURL error code $errno: $errstr\n" );
489 - $status = Status::newFatal( " CURL error code $errno: $errstr\n" );
 309+ $this->status->fatal( "CURL error code $errno: $errstr\n" ); /* i18n? */
490310 }
491311 }
492312
493 - curl_close( $c );
494 - // return the result obj
495 - return $status;
 313+ curl_close( $this->curlHandle );
 314+
 315+ return $this->status;
496316 }
497 -}
498 -class phpHttpRequest extends HttpRequest {
499 - public function doReq() {
500 - global $wgTitle, $wgHTTPProxy;
501 - # Check for php.ini allow_url_fopen
502 - if( !ini_get( 'allow_url_fopen' ) ) {
503 - return Status::newFatal( 'allow_url_fopen needs to be enabled for http copy to work' );
504 - }
505317
506 - // start with good status:
507 - $status = Status::newGood();
 318+ public function read( $curlH, $content ) {
 319+ $this->content .= $content;
 320+ return strlen( $content );
 321+ }
508322
509 - if ( $this->headers_only ) {
510 - $status->value = get_headers( $this->url, 1 );
511 - return $status;
 323+ public function getCode() {
 324+ # Don't return truncated output
 325+ $code = curl_getinfo( $this->curlHandle, CURLINFO_HTTP_CODE );
 326+ if ( $code < 400 ) {
 327+ $this->status->setResult( true, $code );
 328+ } else {
 329+ $this->status->setResult( false, $code );
512330 }
 331+ }
 332+}
513333
514 - // setup the headers
515 - $headers = array( "User-Agent: " . Http::userAgent() );
516 - if ( is_object( $wgTitle ) ) {
517 - $headers[] = "Referer: ". $wgTitle->getFullURL();
518 - }
 334+class PhpHttpRequest extends HttpRequest {
 335+ private $reqHeaders;
 336+ private $callback;
 337+ private $fh;
519338
520 - if( strcasecmp( $this->method, 'post' ) == 0 ) {
 339+ public function initRequest() {
 340+ $this->setCallback( array( $this, 'read' ) );
 341+
 342+ $this->reqHeaders[] = "User-Agent: " . Http::userAgent();
 343+ $this->reqHeaders[] = "Accept: */*";
 344+ if ( $this->method == 'POST' ) {
521345 // Required for HTTP 1.0 POSTs
522 - $headers[] = "Content-Length: 0";
 346+ $this->reqHeaders[] = "Content-Length: " . strlen( $this->postdata );
 347+ $this->reqHeaders[] = "Content-type: application/x-www-form-urlencoded";
523348 }
524349
525 - $httpContextOptions = array(
526 - 'method' => $this->method,
527 - 'header' => implode( "\r\n", $headers ),
528 - 'timeout' => $this->timeout
529 - );
 350+ if( $this->parsed_url['scheme'] != 'http' ) {
 351+ $this->status->fatal( "Only http:// is supported currently." );
 352+ }
 353+ }
530354
531 - // Proxy setup:
532 - if( $this->proxy ){
533 - $httpContextOptions['proxy'] = 'tcp://' . $this->proxy;
534 - }else if ( Http::isLocalURL( $this->url ) ) {
535 - $httpContextOptions['proxy'] = 'tcp://localhost:80';
536 - } elseif ( $wgHTTPProxy ) {
537 - $httpContextOptions['proxy'] = 'tcp://' . $wgHTTPProxy ;
538 - }
 355+ protected function urlToTcp($url) {
 356+ $parsed_url = parse_url($url);
539357
540 - $fcontext = stream_context_create (
541 - array(
542 - 'http' => $httpContextOptions
543 - )
544 - );
 358+ return 'tcp://'.$parsed_url['host'].':'.$parsed_url['port'];
 359+ }
545360
546 - $fh = fopen( $this->url, "r", false, $fcontext);
 361+ public function proxySetup() {
 362+ global $wgHTTPProxy;
547363
548 - // set the write back function (if we are writing to a file)
549 - if( $this->target_file_path ) {
550 - $cwrite = new simpleFileWriter( $this->target_file_path,
551 - $this->upload_session_key, $this->do_close_session_update );
552 - if( !$cwrite->status->isOK() ) {
553 - wfDebug( __METHOD__ . "ERROR in setting up simpleFileWriter\n" );
554 - $status = $cwrite->status;
555 - return $status;
556 - }
557 -
558 - // Read $fh into the simpleFileWriter (grab in 64K chunks since
559 - // it's likely a ~large~ media file)
560 - while ( !feof( $fh ) ) {
561 - $contents = fread( $fh, 65536 );
562 - $cwrite->callbackWriteBody( $fh, $contents );
563 - }
564 - $cwrite->close();
565 - // check for simpleFileWriter error:
566 - if( !$cwrite->status->isOK() ) {
567 - return $cwrite->status;
568 - }
569 - } else {
570 - // read $fh into status->value
571 - $status->value = @stream_get_contents( $fh );
 364+ if ( Http::isLocalURL( $this->url ) ) {
 365+ $this->proxy = 'http://localhost:80/';
 366+ } elseif ( $wgHTTPProxy ) {
 367+ $this->proxy = $wgHTTPProxy ;
572368 }
573 - //close the url file wrapper
574 - fclose( $fh );
 369+ }
575370
576 - // check for "false"
577 - if( $status->value === false ) {
578 - $status->error( 'file_get_contents-failed' );
579 - }
580 - return $status;
 371+ public function setReferrer( $url ) {
 372+ $this->reqHeaders[] = "Referer: $url";
581373 }
582374
583 -}
 375+ public function setCallback( $cb ) {
 376+ $this->callback = $cb;
 377+ }
584378
585 -/**
586 - * SimpleFileWriter with session id updates
587 - */
588 -class simpleFileWriter {
589 - var $target_file_path;
590 - var $status = null;
591 - var $session_id = null;
592 - var $session_update_interval = 0; // how often to update the session while downloading
593 -
594 - function simpleFileWriter( $target_file_path, $upload_session_key,
595 - $do_close_session_update = false )
596 - {
597 - $this->target_file_path = $target_file_path;
598 - $this->upload_session_key = $upload_session_key;
599 - $this->status = Status::newGood();
600 - $this->do_close_session_update = $do_close_session_update;
601 - // open the file:
602 - $this->fp = fopen( $this->target_file_path, 'w' );
603 - if( $this->fp === false ) {
604 - $this->status = Status::newFatal( 'HTTP::could-not-open-file-for-writing' );
 379+ public function read( $fh, $contents ) {
 380+ if ( $this->headersOnly ) {
 381+ return false;
605382 }
606 - // true start time
607 - $this->prevTime = time();
 383+ $this->content .= $contents;
 384+
 385+ return strlen( $contents );
608386 }
609387
610 - public function callbackWriteBody( $ch, $data_packet ) {
611 - global $wgMaxUploadSize, $wgLang;
 388+ public function execute() {
 389+ if( !$this->status->isOK() ) {
 390+ return $this->status;
 391+ }
612392
613 - // write out the content
614 - if( fwrite( $this->fp, $data_packet ) === false ) {
615 - wfDebug( __METHOD__ ." ::could-not-write-to-file\n" );
616 - $this->status = Status::newFatal( 'HTTP::could-not-write-to-file' );
617 - return 0;
 393+ $opts = array();
 394+ if ( $this->proxy && !$this->no_proxy ) {
 395+ $opts['proxy'] = $this->urlToTCP($this->proxy);
 396+ $opts['request_fulluri'] = true;
618397 }
619398
620 - // check file size:
621 - clearstatcache();
622 - $this->current_fsize = filesize( $this->target_file_path );
 399+ $opts['method'] = $this->method;
 400+ $opts['timeout'] = $this->timeout;
 401+ $opts['header'] = implode( "\r\n", $this->reqHeaders );
 402+ // FOR NOW: Force everyone to HTTP 1.0
 403+ /* if ( version_compare( "5.3.0", phpversion(), ">" ) ) { */
 404+ $opts['protocol_version'] = "1.0";
 405+ /* } else { */
 406+ /* $opts['protocol_version'] = "1.1"; */
 407+ /* } */
623408
624 - if( $this->current_fsize > $wgMaxUploadSize ) {
625 - wfDebug( __METHOD__ . " ::http download too large\n" );
626 - $this->status = Status::newFatal( 'HTTP::file-has-grown-beyond-upload-limit-killing: ' .
627 - 'downloaded more than ' .
628 - $wgLang->formatSize( $wgMaxUploadSize ) . ' ' );
629 - return 0;
 409+ if ( $this->postdata ) {
 410+ $opts['content'] = $this->postdata;
630411 }
631 - // if more than session_update_interval second have passed update_session_progress
632 - if( $this->do_close_session_update && $this->upload_session_key &&
633 - ( ( time() - $this->prevTime ) > $this->session_update_interval ) ) {
634 - $this->prevTime = time();
635 - $session_status = $this->update_session_progress();
636 - if( !$session_status->isOK() ) {
637 - $this->status = $session_status;
638 - wfDebug( __METHOD__ . ' update session failed or was canceled');
639 - return 0;
640 - }
 412+
 413+ $context = stream_context_create( array( 'http' => $opts ) );
 414+ try {
 415+ $this->fh = fopen( $this->url, "r", false, $context );
 416+ } catch (Exception $e) {
 417+ $this->status->fatal($e->getMessage());
 418+ return $this->status;
641419 }
642 - return strlen( $data_packet );
643 - }
644420
645 - public function update_session_progress() {
646 - global $wgSessionsInMemcached;
647 - $status = Status::newGood();
648 - // start the session (if necessary)
649 - if( !$wgSessionsInMemcached ) {
650 - wfSuppressWarnings();
651 - if( session_start() === false ) {
652 - wfDebug( __METHOD__ . ' could not start session' );
653 - exit( 0 );
654 - }
655 - wfRestoreWarnings();
 421+ $result = stream_get_meta_data( $this->fh );
 422+ if ( $result['timed_out'] ) {
 423+ $this->status->error( __CLASS__ . '::timed-out-in-headers' );
656424 }
657 - $sd =& $_SESSION['wsDownload'][ $this->upload_session_key ];
658 - // check if the user canceled the request:
659 - if( isset( $sd['user_cancel'] ) && $sd['user_cancel'] == true ) {
660 - //@@todo kill the download
661 - return Status::newFatal( 'user-canceled-request' );
662 - }
663 - // update the progress bytes download so far:
664 - $sd['loaded'] = $this->current_fsize;
665425
666 - // close down the session so we can other http queries can get session updates:
667 - if( !$wgSessionsInMemcached )
668 - session_write_close();
 426+ $this->headers = $result['wrapper_data'];
669427
670 - return $status;
671 - }
672 -
673 - public function close() {
674 - // do a final session update:
675 - if( $this->do_close_session_update ) {
676 - $this->update_session_progress();
 428+ $end = false;
 429+ while ( !$end ) {
 430+ $contents = fread( $this->fh, 8192 );
 431+ $size = call_user_func_array( $this->callback, array( $this->fh, $contents ) );
 432+ $end = ( $size == 0 ) || feof( $this->fh );
677433 }
678 - // close up the file handle:
679 - if( false === fclose( $this->fp ) ) {
680 - $this->status = Status::newFatal( 'HTTP::could-not-close-file' );
681 - }
682 - }
 434+ fclose( $this->fh );
683435
 436+ return $this->status;
 437+ }
684438 }

Follow-up revisions

RevisionCommit summaryAuthorDate
r61356follow up r61352 forgot to add the new global.mah04:55, 22 January 2010
r89180Kill httpSessionDownload.php, uses Http::doSessionIdDownload() that was remov...ialex18:28, 30 May 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r61078follow-up r60811 clean up code, write some tests for the existing uses of Htt...mah05:56, 15 January 2010

Comments

#Comment by Mdale (talk | contribs)   03:25, 22 January 2010

If striping the async download what is the plan to enable downloading files for more than php-execution time?

Why was this not included in part of the re-factor? Will this add even more delay to bug 20512?

#Comment by MarkAHershberger (talk | contribs)   03:57, 22 January 2010

Tim asked me to take it out. It will probably be re-done as something in the JobQueue.

#Comment by Tim Starling (talk | contribs)   04:01, 22 January 2010

Async download should be done with concurrency control, it shouldn't be a forkbomb exploitable by anyone with upload privileges. We have a system for this already: the job queue. For example, when EmailNotification needs to send emails to hundreds of users due to a change to a commonly-watched article, it does it by queueing EnotifNotifyJob jobs, which are then executed, with minimal delay, by dedicated runJobs.php threads which run with --type=enotifNotify. The same system could easily be used for async download.

However, async download is not a high priority compared to restoring basic MW 1.15 behaviour such as a working Http::get(), in preparation for a 1.16 release.


#Comment by Tim Starling (talk | contribs)   04:33, 22 January 2010

Since you're calling it a rewrite, I'll review the whole file as of r61353:

if ( !array_key_exists( 'timeout', $opts ) ) {

Use isset. Several other instances.

protected $no_proxy = false;
protected $parsed_url;

Use camel case not underscores.

if ( !ini_get( 'allow_url_fopen' ) ) {
	throw new MWException( 'allow_url_fopen needs to be enabled for http requests to work' );

That would be partially true if this code were in PhpHttpRequest, it's certainly wrong in the parent HttpRequest. allow_url_fopen is a security vulnerability and people should not be encouraged to enable it in their php.ini. The error message should promote curl as the preferred solution.

throw new MWException( 'Invalid URL' );

Exception messages should have enough context to allow programmers to work out what the problem is, when users report the error message only with backtraces disabled. That usually means something like:

throw new MWException( __METHOD__.': Invalid URL' );

However, if it's possible for the URL to be user input, then this error should be returned in the status instead of thrown as an exception.

function __construct( $url = null, $opt = array() ) {

It's a bit weird to have a default of null and then throw an exception if the user accepts that default. I think the URL sanity check should be moved to execute(), where a Status object with a fatal error message can be returned.

Usually array variable names use the plural, e.g. $options not $opt or $option.

if ( is_array( $this->postdata ) ) {

Use camel case, e.g. postData.

$this->initRequest();

Init functions are usually an indication of a fragile architecture with confused data flow. initRequest() is undocumented, and there is no documentation of what it initialises, or of what functions will break if they are called before it. I suggest using an accessor function CurlHttpRequest::getHandle() which initialises the member variable on the first request.

The calls of proxySetup() and setReferrer() should be moved to execute() so that initialisation of the curl handle can be deferred until then, and so that the user can do further configuration of the object using set*() functions before execute() is called.

	/**
	 * For backwards compatibility, we provide a __toString method so
	 * that any code that expects a string result from Http::Get()
	 * will see the content of the request.
	 */
	function __toString() {
		return $this->content;
	}

Http::get() should continue to return a string, since the implementation of __toString() in PHP has a few gaps where conversion is not done, and bugs can appear. This __toString() should be removed. Callers which wish to have an HttpRequest can construct one, using HttpRequest::factory().

To be continued.

#Comment by Tim Starling (talk | contribs)   07:01, 22 January 2010
throw new MWException( 'FIXME' );

Needs informative error message.

Another thing on this:

if ( !ini_get( 'allow_url_fopen' ) ) {

You need to use wfIniGetBool() to get boolean ini parameters, since ini_get() can return strings such as "off" which PHP will interpret as false.

$this->parsed_url = parse_url( $url );

parse_url() spews warnings if you give it malformed input, so if this is user input then it needs to be wrapped with wfSuppressWarnings()/wfRestoreWarnings(). One other instance.

if ( $this->headersOnly ) {
	curl_setopt( $this->curlHandle, CURLOPT_NOBODY, true );
	curl_setopt( $this->curlHandle, CURLOPT_HEADER, true );

It seems kind of weird to me that this is an option to Http::get() instead of being requested using Http::request('HEAD', ...). It's like the weird curl_setopt() interface is being copied by MW when it could be hidden.

$this->setCallback( array( $this, 'read' ) );

Theoretically you could use CURLOPT_RETURNTRANSFER instead of a callback if $this->curlCBSet is false, it might save a few lines of code and a few microseconds. It would also allow you to remove the condition from setCallback() so that calling code like this would work:

$req->setCallback( array( $this, 'foo' ) );
if ( $useBar ) {
    $req->setCallback( array( $this, 'bar' ) );
}

I see there's no documentation about the fact that this doesn't work presently.

$this->status->fatal( 'Error sending request (#$1): $2',

The first parameter to Status::fatal() is a message key, not English text. Several other instances.

try {
	if ( false === curl_exec( $this->curlHandle ) ) {
	...
} catch ( Exception $e ) {

I'm not sure what the try{} is for, very few PHP functions throw exceptions, and curl_exec() is not one of them. Maybe it could be used to catch errors thrown from the CURLOPT_WRITEFUNCTION callback, but if so, it lacks any method for getting an error message from the exception and passing it on to the user. And if it's meant to catch exceptions thrown from MW, it should be catching MWException instead of Exception.

CurlHttpRequest::getCode() seems to be broken, it doesn't return anything. If the aim is to set some member variable of $this->status then maybe it should be called something different.

while ( !$end ) {
	$contents = fread( $this->fh, 8192 );
	$size = call_user_func_array( $this->callback, array( $this->fh, $contents ) );
	$end = ( $size == 0 )  || feof( $this->fh );
}

fread() will return false if there is an error, in which case the callback should not be called, the loop should be exit immediately, and a meaningful error message should be returned to the user. fread() will return an empty string on EOF, although this is not a documented method of determining EOF, so it may do it in other situations as well. I suggest using feof() only to determine when the file has ended.

#Comment by MarkAHershberger (talk | contribs)   04:19, 28 January 2010

Handled in r61357

Status & tagging log