r89627 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r89626‎ | r89627 | r89628 >
Date:05:57, 7 June 2011
Author:tstarling
Status:ok
Tags:
Comment:
MFT r89397, r89558, etc.: bug 28840 IE URL extension
Modified paths:
  • /branches/REL1_18/phase3/api.php (modified) (history)
  • /branches/REL1_18/phase3/img_auth.php (modified) (history)
  • /branches/REL1_18/phase3/includes/AutoLoader.php (modified) (history)
  • /branches/REL1_18/phase3/includes/RawPage.php (modified) (history)
  • /branches/REL1_18/phase3/includes/WebRequest.php (modified) (history)
  • /branches/REL1_18/phase3/includes/libs/IEUrlExtension.php (added) (history)
  • /branches/REL1_18/phase3/load.php (modified) (history)

Diff [purge]

Index: branches/REL1_18/phase3/includes/WebRequest.php
@@ -767,6 +767,62 @@
768768 }
769769
770770 /**
 771+ * Check if Internet Explorer will detect an incorrect cache extension in
 772+ * PATH_INFO or QUERY_STRING. If the request can't be allowed, show an error
 773+ * message or redirect to a safer URL. Returns true if the URL is OK, and
 774+ * false if an error message has been shown and the request should be aborted.
 775+ */
 776+ public function checkUrlExtension( $extWhitelist = array() ) {
 777+ global $wgScriptExtension;
 778+ $extWhitelist[] = ltrim( $wgScriptExtension, '.' );
 779+ if ( IEUrlExtension::areServerVarsBad( $_SERVER, $extWhitelist ) ) {
 780+ if ( !$this->wasPosted() ) {
 781+ $newUrl = IEUrlExtension::fixUrlForIE6(
 782+ $this->getFullRequestURL(), $extWhitelist );
 783+ if ( $newUrl !== false ) {
 784+ $this->doSecurityRedirect( $newUrl );
 785+ return false;
 786+ }
 787+ }
 788+ wfHttpError( 403, 'Forbidden',
 789+ 'Invalid file extension found in the path info or query string.' );
 790+
 791+ return false;
 792+ }
 793+ return true;
 794+ }
 795+
 796+ /**
 797+ * Attempt to redirect to a URL with a QUERY_STRING that's not dangerous in
 798+ * IE 6. Returns true if it was successful, false otherwise.
 799+ */
 800+ protected function doSecurityRedirect( $url ) {
 801+ header( 'Location: ' . $url );
 802+ header( 'Content-Type: text/html' );
 803+ $encUrl = htmlspecialchars( $url );
 804+ echo <<<HTML
 805+<html>
 806+<head>
 807+<title>Security redirect</title>
 808+</head>
 809+<body>
 810+<h1>Security redirect</h1>
 811+<p>
 812+We can't serve non-HTML content from the URL you have requested, because
 813+Internet Explorer would interpret it as an incorrect and potentially dangerous
 814+content type.</p>
 815+<p>Instead, please use <a href="$encUrl">this URL</a>, which is the same as the URL you have requested, except that
 816+"&amp;*" is appended. This prevents Internet Explorer from seeing a bogus file
 817+extension.
 818+</p>
 819+</body>
 820+</html>
 821+HTML;
 822+ echo "\n";
 823+ return true;
 824+ }
 825+
 826+ /**
771827 * Returns true if the PATH_INFO ends with an extension other than a script
772828 * extension. This could confuse IE for scripts that send arbitrary data which
773829 * is not HTML but may be detected as such.
@@ -784,30 +840,8 @@
785841 */
786842 public function isPathInfoBad() {
787843 global $wgScriptExtension;
788 -
789 - if ( isset( $_SERVER['QUERY_STRING'] )
790 - && preg_match( '/\.[^\\/:*?"<>|%]+(#|\?|$)/i', $_SERVER['QUERY_STRING'] ) )
791 - {
792 - // Bug 28235
793 - // Block only Internet Explorer, and requests with missing UA
794 - // headers that could be IE users behind a privacy proxy.
795 - if ( !isset( $_SERVER['HTTP_USER_AGENT'] )
796 - || preg_match( '/; *MSIE/', $_SERVER['HTTP_USER_AGENT'] ) )
797 - {
798 - return true;
799 - }
800 - }
801 -
802 - if ( !isset( $_SERVER['PATH_INFO'] ) ) {
803 - return false;
804 - }
805 - $pi = $_SERVER['PATH_INFO'];
806 - $dotPos = strrpos( $pi, '.' );
807 - if ( $dotPos === false ) {
808 - return false;
809 - }
810 - $ext = substr( $pi, $dotPos );
811 - return !in_array( $ext, array( $wgScriptExtension, '.php', '.php5' ) );
 844+ $extWhitelist[] = ltrim( $wgScriptExtension, '.' );
 845+ return IEUrlExtension::areServerVarsBad( $_SERVER, $extWhitelist );
812846 }
813847
814848 /**
@@ -1045,8 +1079,12 @@
10461080 public function getSessionArray() {
10471081 return $this->session;
10481082 }
1049 -
1050 - public function isPathInfoBad() {
 1083+
 1084+ public function isPathInfoBad( $extWhitelist = array() ) {
10511085 return false;
10521086 }
 1087+
 1088+ public function checkUrlExtension( $extWhitelist = array() ) {
 1089+ return true;
 1090+ }
10531091 }
Property changes on: branches/REL1_18/phase3/includes/WebRequest.php
___________________________________________________________________
Added: svn:mergeinfo
10541092 Merged /branches/REL1_15/phase3/includes/WebRequest.php:r51646
10551093 Merged /branches/sqlite/includes/WebRequest.php:r58211-58321
10561094 Merged /trunk/phase3/includes/WebRequest.php:r89558
10571095 Merged /branches/new-installer/phase3/includes/WebRequest.php:r43664-66004
10581096 Merged /branches/wmf-deployment/includes/WebRequest.php:r53381
Index: branches/REL1_18/phase3/includes/AutoLoader.php
@@ -502,6 +502,7 @@
503503 'CSSJanus' => 'includes/libs/CSSJanus.php',
504504 'CSSMin' => 'includes/libs/CSSMin.php',
505505 'IEContentAnalyzer' => 'includes/libs/IEContentAnalyzer.php',
 506+ 'IEUrlExtension' => 'includes/libs/IEUrlExtension.php',
506507 'JavaScriptMinifier' => 'includes/libs/JavaScriptMinifier.php',
507508 'Spyc' => 'includes/libs/spyc.php',
508509
Property changes on: branches/REL1_18/phase3/includes/AutoLoader.php
___________________________________________________________________
Modified: svn:mergeinfo
509510 Merged /trunk/phase3/includes/AutoLoader.php:r89558
Index: branches/REL1_18/phase3/includes/RawPage.php
@@ -118,22 +118,8 @@
119119 function view() {
120120 global $wgOut, $wgRequest;
121121
122 - if( $wgRequest->isPathInfoBad() ) {
123 - # Internet Explorer will ignore the Content-Type header if it
124 - # thinks it sees a file extension it recognizes. Make sure that
125 - # all raw requests are done through the script node, which will
126 - # have eg '.php' and should remain safe.
127 - #
128 - # We used to redirect to a canonical-form URL as a general
129 - # backwards-compatibility / good-citizen nice thing. However
130 - # a lot of servers are set up in buggy ways, resulting in
131 - # redirect loops which hang the browser until the CSS load
132 - # times out.
133 - #
134 - # Just return a 403 Forbidden and get it over with.
135 - wfHttpError( 403, 'Forbidden',
136 - 'Invalid file extension found in PATH_INFO or QUERY_STRING. ' .
137 - 'Raw pages must be accessed through the primary script entry point.' );
 122+ if( !$wgRequest->checkUrlExtension() ) {
 123+ $wgOut->disable();
138124 return;
139125 }
140126
Property changes on: branches/REL1_18/phase3/includes/RawPage.php
___________________________________________________________________
Added: svn:mergeinfo
141127 Merged /branches/REL1_15/phase3/includes/RawPage.php:r51646
142128 Merged /branches/sqlite/includes/RawPage.php:r58211-58321
143129 Merged /trunk/phase3/includes/RawPage.php:r89397,89558
144130 Merged /branches/new-installer/phase3/includes/RawPage.php:r43664-66004
145131 Merged /branches/wmf-deployment/includes/RawPage.php:r53381
Index: branches/REL1_18/phase3/includes/libs/IEUrlExtension.php
@@ -0,0 +1,247 @@
 2+<?php
 3+
 4+/**
 5+ * Internet Explorer derives a cache filename from a URL, and then in certain
 6+ * circumstances, uses the extension of the resulting file to determine the
 7+ * content type of the data, ignoring the Content-Type header.
 8+ *
 9+ * This can be a problem, especially when non-HTML content is sent by MediaWiki,
 10+ * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
 11+ *
 12+ * Usually the script filename (e.g. api.php) is present in the URL, and this
 13+ * makes Internet Explorer think the extension is a harmless script extension.
 14+ * But Internet Explorer 6 and earlier allows the script extension to be
 15+ * obscured by encoding the dot as "%2E".
 16+ *
 17+ * This class contains functions which help in detecting and dealing with this
 18+ * situation.
 19+ *
 20+ * Checking the URL for a bad extension is somewhat complicated due to the fact
 21+ * that CGI doesn't provide a standard method to determine the URL. Instead it
 22+ * is necessary to pass a subset of $_SERVER variables, which we then attempt
 23+ * to use to guess parts of the URL.
 24+ */
 25+class IEUrlExtension {
 26+ /**
 27+ * Check a subset of $_SERVER (or the whole of $_SERVER if you like)
 28+ * to see if it indicates that the request was sent with a bad file
 29+ * extension. Returns true if the request should be denied or modified,
 30+ * false otherwise. The relevant $_SERVER elements are:
 31+ *
 32+ * - SERVER_SOFTWARE
 33+ * - REQUEST_URI
 34+ * - QUERY_STRING
 35+ * - PATH_INFO
 36+ *
 37+ * If the a variable is unset in $_SERVER, it should be unset in $vars.
 38+ *
 39+ * @param $vars A subset of $_SERVER.
 40+ * @param $extWhitelist Extensions which are allowed, assumed harmless.
 41+ */
 42+ public static function areServerVarsBad( $vars, $extWhitelist = array() ) {
 43+ // Check QUERY_STRING or REQUEST_URI
 44+ if ( isset( $vars['SERVER_SOFTWARE'] )
 45+ && isset( $vars['REQUEST_URI'] )
 46+ && self::haveUndecodedRequestUri( $vars['SERVER_SOFTWARE'] ) )
 47+ {
 48+ $urlPart = $vars['REQUEST_URI'];
 49+ } elseif ( isset( $vars['QUERY_STRING'] ) ) {
 50+ $urlPart = $vars['QUERY_STRING'];
 51+ } else {
 52+ $urlPart = '';
 53+ }
 54+
 55+ if ( self::isUrlExtensionBad( $urlPart, $extWhitelist ) ) {
 56+ return true;
 57+ }
 58+
 59+ // Some servers have PATH_INFO but not REQUEST_URI, so we check both
 60+ // to be on the safe side.
 61+ if ( isset( $vars['PATH_INFO'] )
 62+ && self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) )
 63+ {
 64+ return true;
 65+ }
 66+
 67+ // All checks passed
 68+ return false;
 69+ }
 70+
 71+ /**
 72+ * Given a right-hand portion of a URL, determine whether IE would detect
 73+ * a potentially harmful file extension.
 74+ *
 75+ * @param $urlPart The right-hand portion of a URL
 76+ * @param $extWhitelist An array of file extensions which may occur in this
 77+ * URL, and which should be allowed.
 78+ * @return bool
 79+ */
 80+ public static function isUrlExtensionBad( $urlPart, $extWhitelist = array() ) {
 81+ if ( strval( $urlPart ) === '' ) {
 82+ return false;
 83+ }
 84+
 85+ $extension = self::findIE6Extension( $urlPart );
 86+ if ( strval( $extension ) === '' ) {
 87+ // No extension or empty extension
 88+ return false;
 89+ }
 90+
 91+ if ( in_array( $extension, array( 'php', 'php5' ) ) ) {
 92+ // Script extension, OK
 93+ return false;
 94+ }
 95+ if ( in_array( $extension, $extWhitelist ) ) {
 96+ // Whitelisted extension
 97+ return false;
 98+ }
 99+
 100+ if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
 101+ // Non-alphanumeric extension, unlikely to be registered.
 102+ //
 103+ // The regex above is known to match all registered file extensions
 104+ // in a default Windows XP installation. It's important to allow
 105+ // extensions with ampersands and percent signs, since that reduces
 106+ // the number of false positives substantially.
 107+ return false;
 108+ }
 109+
 110+ // Possibly bad extension
 111+ return true;
 112+ }
 113+
 114+ /**
 115+ * Returns a variant of $url which will pass isUrlExtensionBad() but has the
 116+ * same GET parameters, or false if it can't figure one out.
 117+ */
 118+ public static function fixUrlForIE6( $url, $extWhitelist = array() ) {
 119+ $questionPos = strpos( $url, '?' );
 120+ if ( $questionPos === false ) {
 121+ $beforeQuery = $url . '?';
 122+ $query = '';
 123+ } elseif ( $questionPos === strlen( $url ) - 1 ) {
 124+ $beforeQuery = $url;
 125+ $query = '';
 126+ } else {
 127+ $beforeQuery = substr( $url, 0, $questionPos + 1 );
 128+ $query = substr( $url, $questionPos + 1 );
 129+ }
 130+
 131+ // Multiple question marks cause problems. Encode the second and
 132+ // subsequent question mark.
 133+ $query = str_replace( '?', '%3E', $query );
 134+ // Append an invalid path character so that IE6 won't see the end of the
 135+ // query string as an extension
 136+ $query .= '&*';
 137+ // Put the URL back together
 138+ $url = $beforeQuery . $query;
 139+ if ( self::isUrlExtensionBad( $url, $extWhitelist ) ) {
 140+ // Avoid a redirect loop
 141+ return false;
 142+ }
 143+ return $url;
 144+ }
 145+
 146+ /**
 147+ * Determine what extension IE6 will infer from a certain query string.
 148+ * If the URL has an extension before the question mark, IE6 will use
 149+ * that and ignore the query string, but per the comment at
 150+ * isPathInfoBad() we don't have a reliable way to determine the URL,
 151+ * so isPathInfoBad() just passes in the query string for $url.
 152+ * All entry points have safe extensions (php, php5) anyway, so
 153+ * checking the query string is possibly overly paranoid but never
 154+ * insecure.
 155+ *
 156+ * The criteria for finding an extension are as follows:
 157+ * - a possible extension is a dot followed by one or more characters not
 158+ * in <>\"/:|?.#
 159+ * - if we find a possible extension followed by the end of the string or
 160+ * a #, that's our extension
 161+ * - if we find a possible extension followed by a ?, that's our extension
 162+ * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
 163+ * searching for another possible extension
 164+ * - if we find a possible extension followed by a dot or another illegal
 165+ * character, we ignore it and continue searching
 166+ *
 167+ * @param $url string URL
 168+ * @return mixed Detected extension (string), or false if none found
 169+ */
 170+ public static function findIE6Extension( $url ) {
 171+ $pos = 0;
 172+ $hashPos = strpos( $url, '#' );
 173+ if ( $hashPos !== false ) {
 174+ $urlLength = $hashPos;
 175+ } else {
 176+ $urlLength = strlen( $url );
 177+ }
 178+ $remainingLength = $urlLength;
 179+ while ( $remainingLength > 0 ) {
 180+ // Skip ahead to the next dot
 181+ $pos += strcspn( $url, '.', $pos, $remainingLength );
 182+ if ( $pos >= $urlLength ) {
 183+ // End of string, we're done
 184+ return false;
 185+ }
 186+
 187+ // We found a dot. Skip past it
 188+ $pos++;
 189+ $remainingLength = $urlLength - $pos;
 190+
 191+ // Check for illegal characters in our prospective extension,
 192+ // or for another dot
 193+ $nextPos = $pos + strcspn( $url, "<>\\\"/:|?*.", $pos, $remainingLength );
 194+ if ( $nextPos >= $urlLength ) {
 195+ // No illegal character or next dot
 196+ // We have our extension
 197+ return substr( $url, $pos, $urlLength - $pos );
 198+ }
 199+ if ( $url[$nextPos] === '?' ) {
 200+ // We've found a legal extension followed by a question mark
 201+ // If the extension is NOT exe, dll or cgi, return it
 202+ $extension = substr( $url, $pos, $nextPos - $pos );
 203+ if ( strcasecmp( $extension, 'exe' ) && strcasecmp( $extension, 'dll' ) &&
 204+ strcasecmp( $extension, 'cgi' ) )
 205+ {
 206+ return $extension;
 207+ }
 208+ // Else continue looking
 209+ }
 210+ // We found an illegal character or another dot
 211+ // Skip to that character and continue the loop
 212+ $pos = $nextPos + 1;
 213+ $remainingLength = $urlLength - $pos;
 214+ }
 215+ return false;
 216+ }
 217+
 218+ /**
 219+ * When passed the value of $_SERVER['SERVER_SOFTWARE'], this function
 220+ * returns true if that server is known to have a REQUEST_URI variable
 221+ * with %2E not decoded to ".". On such a server, it is possible to detect
 222+ * whether the script filename has been obscured.
 223+ *
 224+ * The function returns false if the server is not known to have this
 225+ * behaviour. Microsoft IIS in particular is known to decode escaped script
 226+ * filenames.
 227+ *
 228+ * SERVER_SOFTWARE typically contains either a plain string such as "Zeus",
 229+ * or a specification in the style of a User-Agent header, such as
 230+ * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
 231+ *
 232+ * @param $serverSoftware
 233+ * @return bool
 234+ *
 235+ */
 236+ public static function haveUndecodedRequestUri( $serverSoftware ) {
 237+ static $whitelist = array(
 238+ 'Apache',
 239+ 'Zeus',
 240+ 'LiteSpeed' );
 241+ if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
 242+ return in_array( $m[1], $whitelist );
 243+ } else {
 244+ return false;
 245+ }
 246+ }
 247+
 248+}
Property changes on: branches/REL1_18/phase3/includes/libs/IEUrlExtension.php
___________________________________________________________________
Added: svn:mergeinfo
1249 Merged /branches/REL1_15/phase3/includes/libs/IEUrlExtension.php:r51646
2250 Merged /branches/sqlite/includes/libs/IEUrlExtension.php:r58211-58321
3251 Merged /branches/new-installer/phase3/includes/libs/IEUrlExtension.php:r43664-66004
4252 Merged /branches/wmf-deployment/includes/libs/IEUrlExtension.php:r53381
Added: svn:eol-style
5253 + native
Index: branches/REL1_18/phase3/img_auth.php
@@ -38,15 +38,20 @@
3939 wfForbidden('img-auth-accessdenied','img-auth-public');
4040 }
4141
 42+$matches = WebRequest::getPathInfo();
 43+$path = $matches['title'];
 44+
4245 // Check for bug 28235: QUERY_STRING overriding the correct extension
43 -if ( isset( $_SERVER['QUERY_STRING'] )
44 - && preg_match( '/\.[^\\/:*?"<>|%]+(#|\?|$)/i', $_SERVER['QUERY_STRING'] ) )
 46+$dotPos = strpos( $path, '.' );
 47+$whitelist = array();
 48+if ( $dotPos !== false ) {
 49+ $whitelist[] = substr( $path, $dotPos + 1 );
 50+}
 51+if ( !$wgRequest->checkUrlExtension( $whitelist ) )
4552 {
46 - wfForbidden( 'img-auth-accessdenied', 'img-auth-bad-query-string' );
47 -}
 53+ return;
 54+}
4855
49 -$matches = WebRequest::getPathInfo();
50 -$path = $matches['title'];
5156 $filename = realpath( $wgUploadDirectory . $path );
5257 $realUpload = realpath( $wgUploadDirectory );
5358
Property changes on: branches/REL1_18/phase3/img_auth.php
___________________________________________________________________
Added: svn:mergeinfo
5459 Merged /branches/REL1_15/phase3/img_auth.php:r51646
5560 Merged /branches/REL1_17/phase3/img_auth.php:r81445,81448
5661 Merged /branches/sqlite/img_auth.php:r58211-58321
5762 Merged /trunk/phase3/img_auth.php:r87632,87636,87640,87644,89558
5863 Merged /branches/new-installer/phase3/img_auth.php:r43664-66004
Index: branches/REL1_18/phase3/api.php
@@ -57,18 +57,7 @@
5858 $starttime = microtime( true );
5959
6060 // URL safety checks
61 -//
62 -// See RawPage.php for details; summary is that MSIE can override the
63 -// Content-Type if it sees a recognized extension on the URL, such as
64 -// might be appended via PATH_INFO after 'api.php'.
65 -//
66 -// Some data formats can end up containing unfiltered user-provided data
67 -// which will end up triggering HTML detection and execution, hence
68 -// XSS injection and all that entails.
69 -//
70 -if ( $wgRequest->isPathInfoBad() ) {
71 - wfHttpError( 403, 'Forbidden',
72 - 'Invalid file extension found in PATH_INFO or QUERY_STRING.' );
 61+if ( !$wgRequest->checkUrlExtension() ) {
7362 return;
7463 }
7564
Property changes on: branches/REL1_18/phase3/api.php
___________________________________________________________________
Added: svn:mergeinfo
7665 Merged /branches/REL1_17/phase3/api.php:r81445,81448
7766 Merged /branches/sqlite/api.php:r58211-58321
7867 Merged /trunk/phase3/api.php:r87632,87636,87640,87644,89397
7968 Merged /branches/new-installer/phase3/api.php:r43664-66004
8069 Merged /branches/REL1_15/phase3/api.php:r51646
Index: branches/REL1_18/phase3/load.php
@@ -40,17 +40,7 @@
4141 wfProfileIn( 'load.php' );
4242
4343 // URL safety checks
44 -//
45 -// See RawPage.php for details; summary is that MSIE can override the
46 -// Content-Type if it sees a recognized extension on the URL, such as
47 -// might be appended via PATH_INFO after 'load.php'.
48 -//
49 -// Some resources can contain HTML-like strings (e.g. in messages)
50 -// which will end up triggering HTML detection and execution.
51 -//
52 -if ( $wgRequest->isPathInfoBad() ) {
53 - wfHttpError( 403, 'Forbidden',
54 - 'Invalid file extension found in PATH_INFO or QUERY_STRING.' );
 44+if ( !$wgRequest->checkUrlExtension() ) {
5545 return;
5646 }
5747
Property changes on: branches/REL1_18/phase3/load.php
___________________________________________________________________
Added: svn:mergeinfo
5848 Merged /branches/new-installer/phase3/load.php:r43664-66004
5949 Merged /branches/REL1_15/phase3/load.php:r51646
6050 Merged /branches/REL1_17/phase3/load.php:r81445,81448
6151 Merged /branches/sqlite/load.php:r58211-58321
6252 Merged /trunk/phase3/load.php:r87632,87636,87640,87644,89397

Follow-up revisions

RevisionCommit summaryAuthorDate
r89628Merge r89627 from 1.18, equivalent to trunk r89558, r89397, etc.: bug 28840 I...tstarling07:00, 7 June 2011
r91440* Merged r89628 from 1.17: bug 28840 IE URL extension fixes....tstarling06:35, 5 July 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r87711(bug 28840) Commit patch by bawolff that encodes dots in ResourceLoader modul...catrope13:10, 9 May 2011
r88883(bug 28840) URLs with dots break because of IE6 security check...catrope09:49, 26 May 2011
r89397(bug 28840) If the query string hits bug 28235, redirect to a safer URL inste...tstarling05:32, 3 June 2011
r89558* Added a REQUEST_URI check to the bug 28235 handling....tstarling11:59, 6 June 2011

Status & tagging log