r91440 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r91439‎ | r91440 | r91441 >
Date:06:35, 5 July 2011
Author:tstarling
Status:ok
Tags:
Comment:
* Merged r89628 from 1.17: bug 28840 IE URL extension fixes.
* MFT r91153: fix strpos/strrpos confusion from the above
Modified paths:
  • /branches/wmf/1.17wmf1/api.php (modified) (history)
  • /branches/wmf/1.17wmf1/img_auth.php (modified) (history)
  • /branches/wmf/1.17wmf1/includes/AutoLoader.php (modified) (history)
  • /branches/wmf/1.17wmf1/includes/RawPage.php (modified) (history)
  • /branches/wmf/1.17wmf1/includes/WebRequest.php (modified) (history)
  • /branches/wmf/1.17wmf1/includes/libs/IEUrlExtension.php (added) (history)
  • /branches/wmf/1.17wmf1/load.php (modified) (history)

Diff [purge]

Index: branches/wmf/1.17wmf1/includes/WebRequest.php
@@ -736,6 +736,62 @@
737737 }
738738
739739 /**
 740+ * Check if Internet Explorer will detect an incorrect cache extension in
 741+ * PATH_INFO or QUERY_STRING. If the request can't be allowed, show an error
 742+ * message or redirect to a safer URL. Returns true if the URL is OK, and
 743+ * false if an error message has been shown and the request should be aborted.
 744+ */
 745+ public function checkUrlExtension( $extWhitelist = array() ) {
 746+ global $wgScriptExtension;
 747+ $extWhitelist[] = ltrim( $wgScriptExtension, '.' );
 748+ if ( IEUrlExtension::areServerVarsBad( $_SERVER, $extWhitelist ) ) {
 749+ if ( !$this->wasPosted() ) {
 750+ $newUrl = IEUrlExtension::fixUrlForIE6(
 751+ $this->getFullRequestURL(), $extWhitelist );
 752+ if ( $newUrl !== false ) {
 753+ $this->doSecurityRedirect( $newUrl );
 754+ return false;
 755+ }
 756+ }
 757+ wfHttpError( 403, 'Forbidden',
 758+ 'Invalid file extension found in the path info or query string.' );
 759+
 760+ return false;
 761+ }
 762+ return true;
 763+ }
 764+
 765+ /**
 766+ * Attempt to redirect to a URL with a QUERY_STRING that's not dangerous in
 767+ * IE 6. Returns true if it was successful, false otherwise.
 768+ */
 769+ protected function doSecurityRedirect( $url ) {
 770+ header( 'Location: ' . $url );
 771+ header( 'Content-Type: text/html' );
 772+ $encUrl = htmlspecialchars( $url );
 773+ echo <<<HTML
 774+<html>
 775+<head>
 776+<title>Security redirect</title>
 777+</head>
 778+<body>
 779+<h1>Security redirect</h1>
 780+<p>
 781+We can't serve non-HTML content from the URL you have requested, because
 782+Internet Explorer would interpret it as an incorrect and potentially dangerous
 783+content type.</p>
 784+<p>Instead, please use <a href="$encUrl">this URL</a>, which is the same as the URL you have requested, except that
 785+"&amp;*" is appended. This prevents Internet Explorer from seeing a bogus file
 786+extension.
 787+</p>
 788+</body>
 789+</html>
 790+HTML;
 791+ echo "\n";
 792+ return true;
 793+ }
 794+
 795+ /**
740796 * Returns true if the PATH_INFO ends with an extension other than a script
741797 * extension. This could confuse IE for scripts that send arbitrary data which
742798 * is not HTML but may be detected as such.
@@ -753,30 +809,8 @@
754810 */
755811 public function isPathInfoBad() {
756812 global $wgScriptExtension;
757 -
758 - if ( isset( $_SERVER['QUERY_STRING'] )
759 - && preg_match( '/\.[^\\/:*?"<>|%]+(#|\?|$)/i', $_SERVER['QUERY_STRING'] ) )
760 - {
761 - // Bug 28235
762 - // Block only Internet Explorer, and requests with missing UA
763 - // headers that could be IE users behind a privacy proxy.
764 - if ( !isset( $_SERVER['HTTP_USER_AGENT'] )
765 - || preg_match( '/; *MSIE/', $_SERVER['HTTP_USER_AGENT'] ) )
766 - {
767 - return true;
768 - }
769 - }
770 -
771 - if ( !isset( $_SERVER['PATH_INFO'] ) ) {
772 - return false;
773 - }
774 - $pi = $_SERVER['PATH_INFO'];
775 - $dotPos = strrpos( $pi, '.' );
776 - if ( $dotPos === false ) {
777 - return false;
778 - }
779 - $ext = substr( $pi, $dotPos );
780 - return !in_array( $ext, array( $wgScriptExtension, '.php', '.php5' ) );
 813+ $extWhitelist[] = ltrim( $wgScriptExtension, '.' );
 814+ return IEUrlExtension::areServerVarsBad( $_SERVER, $extWhitelist );
781815 }
782816
783817 /**
@@ -1025,7 +1059,11 @@
10261060 $this->session[$key] = $data;
10271061 }
10281062
1029 - public function isPathInfoBad() {
 1063+ public function isPathInfoBad( $extWhitelist = array() ) {
10301064 return false;
10311065 }
 1066+
 1067+ public function checkUrlExtension( $extWhitelist = array() ) {
 1068+ return true;
 1069+ }
10321070 }
Property changes on: branches/wmf/1.17wmf1/includes/WebRequest.php
___________________________________________________________________
Modified: svn:mergeinfo
10331071 Merged /branches/REL1_18/phase3/includes/templates/WebRequest.php:r89627
10341072 Merged /branches/REL1_17/phase3/includes/WebRequest.php:r89628
10351073 Merged /branches/REL1_18/phase3/includes/WebRequest.php:r89627
10361074 Merged /trunk/phase3/includes/WebRequest.php:r89558
10371075 Merged /branches/REL1_18/phase3/includes/libs/WebRequest.php:r89627
Index: branches/wmf/1.17wmf1/includes/AutoLoader.php
@@ -463,6 +463,7 @@
464464
465465 # includes/libs
466466 'IEContentAnalyzer' => 'includes/libs/IEContentAnalyzer.php',
 467+ 'IEUrlExtension' => 'includes/libs/IEUrlExtension.php',
467468 'Spyc' => 'includes/libs/spyc.php',
468469
469470 # includes/media
Property changes on: branches/wmf/1.17wmf1/includes/AutoLoader.php
___________________________________________________________________
Modified: svn:mergeinfo
470471 Merged /trunk/phase3/includes/AutoLoader.php:r89558
471472 Merged /branches/REL1_18/phase3/includes/libs/AutoLoader.php:r89627
472473 Merged /branches/REL1_18/phase3/includes/templates/AutoLoader.php:r89627
473474 Merged /branches/REL1_17/phase3/includes/AutoLoader.php:r89628
474475 Merged /branches/REL1_18/phase3/includes/AutoLoader.php:r89627
Index: branches/wmf/1.17wmf1/includes/RawPage.php
@@ -118,22 +118,8 @@
119119 function view() {
120120 global $wgOut, $wgRequest;
121121
122 - if( $wgRequest->isPathInfoBad() ) {
123 - # Internet Explorer will ignore the Content-Type header if it
124 - # thinks it sees a file extension it recognizes. Make sure that
125 - # all raw requests are done through the script node, which will
126 - # have eg '.php' and should remain safe.
127 - #
128 - # We used to redirect to a canonical-form URL as a general
129 - # backwards-compatibility / good-citizen nice thing. However
130 - # a lot of servers are set up in buggy ways, resulting in
131 - # redirect loops which hang the browser until the CSS load
132 - # times out.
133 - #
134 - # Just return a 403 Forbidden and get it over with.
135 - wfHttpError( 403, 'Forbidden',
136 - 'Invalid file extension found in PATH_INFO or QUERY_STRING. ' .
137 - 'Raw pages must be accessed through the primary script entry point.' );
 122+ if( !$wgRequest->checkUrlExtension() ) {
 123+ $wgOut->disable();
138124 return;
139125 }
140126
Property changes on: branches/wmf/1.17wmf1/includes/RawPage.php
___________________________________________________________________
Modified: svn:mergeinfo
141127 Merged /branches/REL1_18/phase3/includes/templates/RawPage.php:r89627
142128 Merged /branches/REL1_17/phase3/includes/RawPage.php:r89628
143129 Merged /branches/REL1_18/phase3/includes/RawPage.php:r89627
144130 Merged /trunk/phase3/includes/RawPage.php:r89397,89558
145131 Merged /branches/REL1_18/phase3/includes/libs/RawPage.php:r89627
Index: branches/wmf/1.17wmf1/includes/libs/IEUrlExtension.php
@@ -0,0 +1,247 @@
 2+<?php
 3+
 4+/**
 5+ * Internet Explorer derives a cache filename from a URL, and then in certain
 6+ * circumstances, uses the extension of the resulting file to determine the
 7+ * content type of the data, ignoring the Content-Type header.
 8+ *
 9+ * This can be a problem, especially when non-HTML content is sent by MediaWiki,
 10+ * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
 11+ *
 12+ * Usually the script filename (e.g. api.php) is present in the URL, and this
 13+ * makes Internet Explorer think the extension is a harmless script extension.
 14+ * But Internet Explorer 6 and earlier allows the script extension to be
 15+ * obscured by encoding the dot as "%2E".
 16+ *
 17+ * This class contains functions which help in detecting and dealing with this
 18+ * situation.
 19+ *
 20+ * Checking the URL for a bad extension is somewhat complicated due to the fact
 21+ * that CGI doesn't provide a standard method to determine the URL. Instead it
 22+ * is necessary to pass a subset of $_SERVER variables, which we then attempt
 23+ * to use to guess parts of the URL.
 24+ */
 25+class IEUrlExtension {
 26+ /**
 27+ * Check a subset of $_SERVER (or the whole of $_SERVER if you like)
 28+ * to see if it indicates that the request was sent with a bad file
 29+ * extension. Returns true if the request should be denied or modified,
 30+ * false otherwise. The relevant $_SERVER elements are:
 31+ *
 32+ * - SERVER_SOFTWARE
 33+ * - REQUEST_URI
 34+ * - QUERY_STRING
 35+ * - PATH_INFO
 36+ *
 37+ * If the a variable is unset in $_SERVER, it should be unset in $vars.
 38+ *
 39+ * @param $vars A subset of $_SERVER.
 40+ * @param $extWhitelist Extensions which are allowed, assumed harmless.
 41+ */
 42+ public static function areServerVarsBad( $vars, $extWhitelist = array() ) {
 43+ // Check QUERY_STRING or REQUEST_URI
 44+ if ( isset( $vars['SERVER_SOFTWARE'] )
 45+ && isset( $vars['REQUEST_URI'] )
 46+ && self::haveUndecodedRequestUri( $vars['SERVER_SOFTWARE'] ) )
 47+ {
 48+ $urlPart = $vars['REQUEST_URI'];
 49+ } elseif ( isset( $vars['QUERY_STRING'] ) ) {
 50+ $urlPart = $vars['QUERY_STRING'];
 51+ } else {
 52+ $urlPart = '';
 53+ }
 54+
 55+ if ( self::isUrlExtensionBad( $urlPart, $extWhitelist ) ) {
 56+ return true;
 57+ }
 58+
 59+ // Some servers have PATH_INFO but not REQUEST_URI, so we check both
 60+ // to be on the safe side.
 61+ if ( isset( $vars['PATH_INFO'] )
 62+ && self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) )
 63+ {
 64+ return true;
 65+ }
 66+
 67+ // All checks passed
 68+ return false;
 69+ }
 70+
 71+ /**
 72+ * Given a right-hand portion of a URL, determine whether IE would detect
 73+ * a potentially harmful file extension.
 74+ *
 75+ * @param $urlPart The right-hand portion of a URL
 76+ * @param $extWhitelist An array of file extensions which may occur in this
 77+ * URL, and which should be allowed.
 78+ * @return bool
 79+ */
 80+ public static function isUrlExtensionBad( $urlPart, $extWhitelist = array() ) {
 81+ if ( strval( $urlPart ) === '' ) {
 82+ return false;
 83+ }
 84+
 85+ $extension = self::findIE6Extension( $urlPart );
 86+ if ( strval( $extension ) === '' ) {
 87+ // No extension or empty extension
 88+ return false;
 89+ }
 90+
 91+ if ( in_array( $extension, array( 'php', 'php5' ) ) ) {
 92+ // Script extension, OK
 93+ return false;
 94+ }
 95+ if ( in_array( $extension, $extWhitelist ) ) {
 96+ // Whitelisted extension
 97+ return false;
 98+ }
 99+
 100+ if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
 101+ // Non-alphanumeric extension, unlikely to be registered.
 102+ //
 103+ // The regex above is known to match all registered file extensions
 104+ // in a default Windows XP installation. It's important to allow
 105+ // extensions with ampersands and percent signs, since that reduces
 106+ // the number of false positives substantially.
 107+ return false;
 108+ }
 109+
 110+ // Possibly bad extension
 111+ return true;
 112+ }
 113+
 114+ /**
 115+ * Returns a variant of $url which will pass isUrlExtensionBad() but has the
 116+ * same GET parameters, or false if it can't figure one out.
 117+ */
 118+ public static function fixUrlForIE6( $url, $extWhitelist = array() ) {
 119+ $questionPos = strpos( $url, '?' );
 120+ if ( $questionPos === false ) {
 121+ $beforeQuery = $url . '?';
 122+ $query = '';
 123+ } elseif ( $questionPos === strlen( $url ) - 1 ) {
 124+ $beforeQuery = $url;
 125+ $query = '';
 126+ } else {
 127+ $beforeQuery = substr( $url, 0, $questionPos + 1 );
 128+ $query = substr( $url, $questionPos + 1 );
 129+ }
 130+
 131+ // Multiple question marks cause problems. Encode the second and
 132+ // subsequent question mark.
 133+ $query = str_replace( '?', '%3E', $query );
 134+ // Append an invalid path character so that IE6 won't see the end of the
 135+ // query string as an extension
 136+ $query .= '&*';
 137+ // Put the URL back together
 138+ $url = $beforeQuery . $query;
 139+ if ( self::isUrlExtensionBad( $url, $extWhitelist ) ) {
 140+ // Avoid a redirect loop
 141+ return false;
 142+ }
 143+ return $url;
 144+ }
 145+
 146+ /**
 147+ * Determine what extension IE6 will infer from a certain query string.
 148+ * If the URL has an extension before the question mark, IE6 will use
 149+ * that and ignore the query string, but per the comment at
 150+ * isPathInfoBad() we don't have a reliable way to determine the URL,
 151+ * so isPathInfoBad() just passes in the query string for $url.
 152+ * All entry points have safe extensions (php, php5) anyway, so
 153+ * checking the query string is possibly overly paranoid but never
 154+ * insecure.
 155+ *
 156+ * The criteria for finding an extension are as follows:
 157+ * - a possible extension is a dot followed by one or more characters not
 158+ * in <>\"/:|?.#
 159+ * - if we find a possible extension followed by the end of the string or
 160+ * a #, that's our extension
 161+ * - if we find a possible extension followed by a ?, that's our extension
 162+ * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
 163+ * searching for another possible extension
 164+ * - if we find a possible extension followed by a dot or another illegal
 165+ * character, we ignore it and continue searching
 166+ *
 167+ * @param $url string URL
 168+ * @return mixed Detected extension (string), or false if none found
 169+ */
 170+ public static function findIE6Extension( $url ) {
 171+ $pos = 0;
 172+ $hashPos = strpos( $url, '#' );
 173+ if ( $hashPos !== false ) {
 174+ $urlLength = $hashPos;
 175+ } else {
 176+ $urlLength = strlen( $url );
 177+ }
 178+ $remainingLength = $urlLength;
 179+ while ( $remainingLength > 0 ) {
 180+ // Skip ahead to the next dot
 181+ $pos += strcspn( $url, '.', $pos, $remainingLength );
 182+ if ( $pos >= $urlLength ) {
 183+ // End of string, we're done
 184+ return false;
 185+ }
 186+
 187+ // We found a dot. Skip past it
 188+ $pos++;
 189+ $remainingLength = $urlLength - $pos;
 190+
 191+ // Check for illegal characters in our prospective extension,
 192+ // or for another dot
 193+ $nextPos = $pos + strcspn( $url, "<>\\\"/:|?*.", $pos, $remainingLength );
 194+ if ( $nextPos >= $urlLength ) {
 195+ // No illegal character or next dot
 196+ // We have our extension
 197+ return substr( $url, $pos, $urlLength - $pos );
 198+ }
 199+ if ( $url[$nextPos] === '?' ) {
 200+ // We've found a legal extension followed by a question mark
 201+ // If the extension is NOT exe, dll or cgi, return it
 202+ $extension = substr( $url, $pos, $nextPos - $pos );
 203+ if ( strcasecmp( $extension, 'exe' ) && strcasecmp( $extension, 'dll' ) &&
 204+ strcasecmp( $extension, 'cgi' ) )
 205+ {
 206+ return $extension;
 207+ }
 208+ // Else continue looking
 209+ }
 210+ // We found an illegal character or another dot
 211+ // Skip to that character and continue the loop
 212+ $pos = $nextPos + 1;
 213+ $remainingLength = $urlLength - $pos;
 214+ }
 215+ return false;
 216+ }
 217+
 218+ /**
 219+ * When passed the value of $_SERVER['SERVER_SOFTWARE'], this function
 220+ * returns true if that server is known to have a REQUEST_URI variable
 221+ * with %2E not decoded to ".". On such a server, it is possible to detect
 222+ * whether the script filename has been obscured.
 223+ *
 224+ * The function returns false if the server is not known to have this
 225+ * behaviour. Microsoft IIS in particular is known to decode escaped script
 226+ * filenames.
 227+ *
 228+ * SERVER_SOFTWARE typically contains either a plain string such as "Zeus",
 229+ * or a specification in the style of a User-Agent header, such as
 230+ * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
 231+ *
 232+ * @param $serverSoftware
 233+ * @return bool
 234+ *
 235+ */
 236+ public static function haveUndecodedRequestUri( $serverSoftware ) {
 237+ static $whitelist = array(
 238+ 'Apache',
 239+ 'Zeus',
 240+ 'LiteSpeed' );
 241+ if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
 242+ return in_array( $m[1], $whitelist );
 243+ } else {
 244+ return false;
 245+ }
 246+ }
 247+
 248+}
Property changes on: branches/wmf/1.17wmf1/includes/libs/IEUrlExtension.php
___________________________________________________________________
Added: svn:mergeinfo
1249 Merged /branches/REL1_15/phase3/includes/libs/IEUrlExtension.php:r51646
2250 Merged /branches/sqlite/includes/libs/IEUrlExtension.php:r58211-58321
3251 Merged /branches/new-installer/phase3/includes/libs/IEUrlExtension.php:r43664-66004
4252 Merged /branches/wmf-deployment/includes/libs/IEUrlExtension.php:r53381
Added: svn:eol-style
5253 + native
Index: branches/wmf/1.17wmf1/img_auth.php
@@ -37,13 +37,6 @@
3838 wfForbidden('img-auth-accessdenied','img-auth-public');
3939 }
4040
41 -// Check for bug 28235: QUERY_STRING overriding the correct extension
42 -if ( isset( $_SERVER['QUERY_STRING'] )
43 - && preg_match( '/\.[^\\/:*?"<>|%]+(#|\?|$)/i', $_SERVER['QUERY_STRING'] ) )
44 -{
45 - wfForbidden( 'img-auth-accessdenied', 'img-auth-bad-query-string' );
46 -}
47 -
4841 // Extract path and image information
4942 if( !isset( $_SERVER['PATH_INFO'] ) ) {
5043 $path = $wgRequest->getText( 'path' );
@@ -55,6 +48,17 @@
5649 $path = $_SERVER['PATH_INFO'];
5750 }
5851
 52+// Check for bug 28235: QUERY_STRING overriding the correct extension
 53+$dotPos = strrpos( $path, '.' );
 54+$whitelist = array();
 55+if ( $dotPos !== false ) {
 56+ $whitelist[] = substr( $path, $dotPos + 1 );
 57+}
 58+if ( !$wgRequest->checkUrlExtension( $whitelist ) )
 59+{
 60+ return;
 61+}
 62+
5963 $filename = realpath( $wgUploadDirectory . $path );
6064 $realUpload = realpath( $wgUploadDirectory );
6165
Property changes on: branches/wmf/1.17wmf1/img_auth.php
___________________________________________________________________
Modified: svn:mergeinfo
6266 Merged /branches/REL1_17/phase3/img_auth.php:r89628
6367 Merged /branches/REL1_18/phase3/img_auth.php:r89627
6468 Merged /trunk/phase3/img_auth.php:r91153
Index: branches/wmf/1.17wmf1/api.php
@@ -44,18 +44,7 @@
4545 $starttime = microtime( true );
4646
4747 // URL safety checks
48 -//
49 -// See RawPage.php for details; summary is that MSIE can override the
50 -// Content-Type if it sees a recognized extension on the URL, such as
51 -// might be appended via PATH_INFO after 'api.php'.
52 -//
53 -// Some data formats can end up containing unfiltered user-provided data
54 -// which will end up triggering HTML detection and execution, hence
55 -// XSS injection and all that entails.
56 -//
57 -if ( $wgRequest->isPathInfoBad() ) {
58 - wfHttpError( 403, 'Forbidden',
59 - 'Invalid file extension found in PATH_INFO or QUERY_STRING.' );
 48+if ( !$wgRequest->checkUrlExtension() ) {
6049 return;
6150 }
6251
Property changes on: branches/wmf/1.17wmf1/api.php
___________________________________________________________________
Modified: svn:mergeinfo
6352 Merged /branches/REL1_17/phase3/api.php:r89628
6453 Merged /branches/REL1_18/phase3/api.php:r89627
6554 Merged /trunk/phase3/api.php:r87632,87636,87640,87644,89397
Index: branches/wmf/1.17wmf1/load.php
@@ -27,17 +27,7 @@
2828 wfProfileIn( 'load.php' );
2929
3030 // URL safety checks
31 -//
32 -// See RawPage.php for details; summary is that MSIE can override the
33 -// Content-Type if it sees a recognized extension on the URL, such as
34 -// might be appended via PATH_INFO after 'load.php'.
35 -//
36 -// Some resources can contain HTML-like strings (e.g. in messages)
37 -// which will end up triggering HTML detection and execution.
38 -//
39 -if ( $wgRequest->isPathInfoBad() ) {
40 - wfHttpError( 403, 'Forbidden',
41 - 'Invalid file extension found in PATH_INFO or QUERY_STRING.' );
 31+if ( !$wgRequest->checkUrlExtension() ) {
4232 return;
4333 }
4434
Property changes on: branches/wmf/1.17wmf1/load.php
___________________________________________________________________
Modified: svn:mergeinfo
4535 Merged /branches/REL1_17/phase3/load.php:r89628
4636 Merged /branches/REL1_18/phase3/load.php:r89627
4737 Merged /trunk/phase3/load.php:r87632,87636,87640,87644,89397

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r87711(bug 28840) Commit patch by bawolff that encodes dots in ResourceLoader modul...catrope13:10, 9 May 2011
r88883(bug 28840) URLs with dots break because of IE6 security check...catrope09:49, 26 May 2011
r89397(bug 28840) If the query string hits bug 28235, redirect to a safer URL inste...tstarling05:32, 3 June 2011
r89627MFT r89397, r89558, etc.: bug 28840 IE URL extensiontstarling05:57, 7 June 2011
r89628Merge r89627 from 1.18, equivalent to trunk r89558, r89397, etc.: bug 28840 I...tstarling07:00, 7 June 2011
r91153* (bug 29531) r89628 breaks img_auth.php...reedy01:44, 30 June 2011

Status & tagging log