r44599 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r44598‎ | r44599 | r44600 >
Date:03:28, 15 December 2008
Author:tstarling
Status:deferred
Tags:
Comment:
Backports from 1.13.
Modified paths:
  • /branches/REL1_12/phase3 (modified) (history)
  • /branches/REL1_12/phase3/RELEASE-NOTES (modified) (history)
  • /branches/REL1_12/phase3/img_auth.php (modified) (history)
  • /branches/REL1_12/phase3/includes/AutoLoader.php (modified) (history)
  • /branches/REL1_12/phase3/includes/DefaultSettings.php (modified) (history)
  • /branches/REL1_12/phase3/includes/Exception.php (modified) (history)
  • /branches/REL1_12/phase3/includes/IEContentAnalyzer.php (added) (history)
  • /branches/REL1_12/phase3/includes/MimeMagic.php (modified) (history)
  • /branches/REL1_12/phase3/includes/SpecialImport.php (modified) (history)
  • /branches/REL1_12/phase3/includes/SpecialUndelete.php (modified) (history)
  • /branches/REL1_12/phase3/includes/SpecialUpload.php (modified) (history)
  • /branches/REL1_12/phase3/includes/StreamFile.php (modified) (history)
  • /branches/REL1_12/phase3/includes/Title.php (modified) (history)
  • /branches/REL1_12/phase3/includes/XmlTypeCheck.php (modified) (history)
  • /branches/REL1_12/phase3/includes/filerepo/FSRepo.php (modified) (history)
  • /branches/REL1_12/phase3/languages/messages/MessagesEn.php (modified) (history)
  • /branches/REL1_12/phase3/profileinfo.php (modified) (history)

Diff [purge]

Index: branches/REL1_12/phase3/includes/SpecialImport.php
@@ -42,26 +42,30 @@
4343 if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') {
4444 $isUpload = false;
4545 $namespace = $wgRequest->getIntOrNull( 'namespace' );
 46+ $sourceName = $wgRequest->getVal( "source" );
4647
47 - switch( $wgRequest->getVal( "source" ) ) {
48 - case "upload":
 48+ if ( !$wgUser->matchEditToken( $wgRequest->getVal( 'editToken' ) ) ) {
 49+ $source = new WikiErrorMsg( 'import-token-mismatch' );
 50+ } elseif ( $sourceName == 'upload' ) {
4951 $isUpload = true;
5052 if( $wgUser->isAllowed( 'importupload' ) ) {
5153 $source = ImportStreamSource::newFromUpload( "xmlimport" );
5254 } else {
5355 return $wgOut->permissionRequired( 'importupload' );
5456 }
55 - break;
56 - case "interwiki":
 57+ } elseif ( $sourceName == "interwiki" ) {
5758 $interwiki = $wgRequest->getVal( 'interwiki' );
58 - $history = $wgRequest->getCheck( 'interwikiHistory' );
59 - $frompage = $wgRequest->getText( "frompage" );
60 - $source = ImportStreamSource::newFromInterwiki(
61 - $interwiki,
62 - $frompage,
63 - $history );
64 - break;
65 - default:
 59+ if ( !in_array( $interwiki, $wgImportSources ) ) {
 60+ $source = new WikiErrorMsg( "import-invalid-interwiki" );
 61+ } else {
 62+ $history = $wgRequest->getCheck( 'interwikiHistory' );
 63+ $frompage = $wgRequest->getText( "frompage" );
 64+ $source = ImportStreamSource::newFromInterwiki(
 65+ $interwiki,
 66+ $frompage,
 67+ $history );
 68+ }
 69+ } else {
6670 $source = new WikiErrorMsg( "importunknownsource" );
6771 }
6872
@@ -105,6 +109,7 @@
106110 Xml::hidden( 'action', 'submit' ) .
107111 Xml::hidden( 'source', 'upload' ) .
108112 "<input type='file' name='xmlimport' value='' size='30' />" . // No Xml function for type=file? Todo?
 113+ Xml::hidden( 'editToken', $wgUser->editToken() ) .
109114 Xml::submitButton( wfMsg( 'uploadbtn' ) ) .
110115 Xml::closeElement( 'form' ) .
111116 Xml::closeElement( 'fieldset' )
@@ -123,6 +128,7 @@
124129 wfMsgExt( 'import-interwiki-text', array( 'parse' ) ) .
125130 Xml::hidden( 'action', 'submit' ) .
126131 Xml::hidden( 'source', 'interwiki' ) .
 132+ Xml::hidden( 'editToken', $wgUser->editToken() ) .
127133 Xml::openElement( 'table' ) .
128134 "<tr>
129135 <td>" .
Index: branches/REL1_12/phase3/includes/MimeMagic.php
@@ -100,6 +100,10 @@
101101 */
102102 var $mExtToMime= NULL;
103103
 104+ /** IEContentAnalyzer instance
 105+ */
 106+ var $mIEAnalyzer;
 107+
104108 /** The singleton instance
105109 */
106110 private static $instance;
@@ -733,6 +737,29 @@
734738
735739 return MEDIATYPE_UNKNOWN;
736740 }
 741+
 742+ /**
 743+ * Get the MIME types that various versions of Internet Explorer would
 744+ * detect from a chunk of the content.
 745+ *
 746+ * @param string $fileName The file name (unused at present)
 747+ * @param string $chunk The first 256 bytes of the file
 748+ * @param string $proposed The MIME type proposed by the server
 749+ */
 750+ public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
 751+ $ca = $this->getIEContentAnalyzer();
 752+ return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
 753+ }
 754+
 755+ /**
 756+ * Get a cached instance of IEContentAnalyzer
 757+ */
 758+ protected function getIEContentAnalyzer() {
 759+ if ( is_null( $this->mIEAnalyzer ) ) {
 760+ $this->mIEAnalyzer = new IEContentAnalyzer;
 761+ }
 762+ return $this->mIEAnalyzer;
 763+ }
737764 }
738765
739766
Index: branches/REL1_12/phase3/includes/filerepo/FSRepo.php
@@ -146,10 +146,8 @@
147147 if ( !wfMkdirParents( $dstDir ) ) {
148148 return $this->newFatal( 'directorycreateerror', $dstDir );
149149 }
150 - // In the deleted zone, seed new directories with a blank
151 - // index.html, to prevent crawling
152150 if ( $dstZone == 'deleted' ) {
153 - file_put_contents( "$dstDir/index.html", '' );
 151+ $this->initDeletedDir( $dstDir );
154152 }
155153 }
156154
@@ -212,6 +210,20 @@
213211 }
214212
215213 /**
 214+ * Take all available measures to prevent web accessibility of new deleted
 215+ * directories, in case the user has not configured offline storage
 216+ */
 217+ protected function initDeletedDir( $dir ) {
 218+ // Add a .htaccess file to the root of the deleted zone
 219+ $root = $this->getZonePath( 'deleted' );
 220+ if ( !file_exists( "$root/.htaccess" ) ) {
 221+ file_put_contents( "$root/.htaccess", "Deny from all\n" );
 222+ }
 223+ // Seed new directories with a blank index.html, to prevent crawling
 224+ file_put_contents( "$dir/index.html", '' );
 225+ }
 226+
 227+ /**
216228 * Pick a random name in the temp zone and store a file to it.
217229 * @param string $originalName The base name of the file as specified
218230 * by the user. The file extension will be maintained.
@@ -387,8 +399,7 @@
388400 $status->fatal( 'directorycreateerror', $archiveDir );
389401 continue;
390402 }
391 - // Seed new directories with a blank index.html, to prevent crawling
392 - file_put_contents( "$archiveDir/index.html", '' );
 403+ $this->initDeletedDir( $archiveDir );
393404 }
394405 // Check if the archive directory is writable
395406 // This doesn't appear to work on NTFS
Index: branches/REL1_12/phase3/includes/SpecialUpload.php
@@ -1214,11 +1214,11 @@
12151215 $magic=& MimeMagic::singleton();
12161216 $mime= $magic->guessMimeType($tmpfile,false);
12171217
 1218+
12181219 #check mime type, if desired
12191220 global $wgVerifyMimeType;
12201221 if ($wgVerifyMimeType) {
1221 -
1222 - wfDebug ( "\n\nmime: <$mime> extension: <$extension>\n\n");
 1222+ wfDebug ( "\n\nmime: <$mime> extension: <$extension>\n\n");
12231223 #check mime type against file extension
12241224 if( !$this->verifyExtension( $mime, $extension ) ) {
12251225 return new WikiErrorMsg( 'uploadcorrupt' );
@@ -1226,9 +1226,22 @@
12271227
12281228 #check mime type blacklist
12291229 global $wgMimeTypeBlacklist;
1230 - if( isset($wgMimeTypeBlacklist) && !is_null($wgMimeTypeBlacklist)
1231 - && $this->checkFileExtension( $mime, $wgMimeTypeBlacklist ) ) {
1232 - return new WikiErrorMsg( 'filetype-badmime', htmlspecialchars( $mime ) );
 1230+ if( isset($wgMimeTypeBlacklist) && !is_null($wgMimeTypeBlacklist) ) {
 1231+ if ( $this->checkFileExtension( $mime, $wgMimeTypeBlacklist ) ) {
 1232+ return new WikiErrorMsg( 'filetype-badmime', htmlspecialchars( $mime ) );
 1233+ }
 1234+
 1235+ # Check IE type
 1236+ $fp = fopen( $tmpfile, 'rb' );
 1237+ $chunk = fread( $fp, 256 );
 1238+ fclose( $fp );
 1239+ $extMime = $magic->guessTypesForExtension( $extension );
 1240+ $ieTypes = $magic->getIEMimeTypes( $tmpfile, $chunk, $extMime );
 1241+ foreach ( $ieTypes as $ieType ) {
 1242+ if ( $this->checkFileExtension( $ieType, $wgMimeTypeBlacklist ) ) {
 1243+ return new WikiErrorMsg( 'filetype-bad-ie-mime', $ieType );
 1244+ }
 1245+ }
12331246 }
12341247 }
12351248
@@ -1236,6 +1249,11 @@
12371250 if( $this->detectScript ( $tmpfile, $mime, $extension ) ) {
12381251 return new WikiErrorMsg( 'uploadscripted' );
12391252 }
 1253+ if( $extension == 'svg' || $mime == 'image/svg+xml' ) {
 1254+ if( $this->detectScriptInSvg( $tmpfile ) ) {
 1255+ return new WikiErrorMsg( 'uploadscripted' );
 1256+ }
 1257+ }
12401258
12411259 /**
12421260 * Scan the uploaded file for viruses
@@ -1249,6 +1267,7 @@
12501268 return true;
12511269 }
12521270
 1271+
12531272 /**
12541273 * Checks if the mime type of the uploaded file matches the file extension.
12551274 *
@@ -1347,6 +1366,7 @@
13481367 */
13491368
13501369 $tags = array(
 1370+ '<a href',
13511371 '<body',
13521372 '<head',
13531373 '<html', #also in safari
@@ -1385,6 +1405,41 @@
13861406 return false;
13871407 }
13881408
 1409+ function detectScriptInSvg( $filename ) {
 1410+ $check = new XmlTypeCheck( $filename, array( $this, 'checkSvgScriptCallback' ) );
 1411+ return $check->filterMatch;
 1412+ }
 1413+
 1414+ /**
 1415+ * @todo Replace this with a whitelist filter!
 1416+ */
 1417+ function checkSvgScriptCallback( $element, $attribs ) {
 1418+ $stripped = $this->stripXmlNamespace( $element );
 1419+
 1420+ if( $stripped == 'script' ) {
 1421+ wfDebug( __METHOD__ . ": Found script element '$element' in uploaded file.\n" );
 1422+ return true;
 1423+ }
 1424+
 1425+ foreach( $attribs as $attrib => $value ) {
 1426+ $stripped = $this->stripXmlNamespace( $attrib );
 1427+ if( substr( $stripped, 0, 2 ) == 'on' ) {
 1428+ wfDebug( __METHOD__ . ": Found script attribute '$attrib'='value' in uploaded file.\n" );
 1429+ return true;
 1430+ }
 1431+ if( $stripped == 'href' && strpos( strtolower( $value ), 'javascript:' ) !== false ) {
 1432+ wfDebug( __METHOD__ . ": Found script href attribute '$attrib'='$value' in uploaded file.\n" );
 1433+ return true;
 1434+ }
 1435+ }
 1436+ }
 1437+
 1438+ private function stripXmlNamespace( $name ) {
 1439+ // 'http://www.w3.org/2000/svg:script' -> 'script'
 1440+ $parts = explode( ':', strtolower( $name ) );
 1441+ return array_pop( $parts );
 1442+ }
 1443+
13891444 /**
13901445 * Generic wrapper function for a virus scanner program.
13911446 * This relies on the $wgAntivirus and $wgAntivirusSetup variables.
Index: branches/REL1_12/phase3/includes/AutoLoader.php
@@ -99,6 +99,7 @@
100100 'HistoryBlobCurStub' => 'includes/HistoryBlob.php',
101101 'HTMLCacheUpdate' => 'includes/HTMLCacheUpdate.php',
102102 'Http' => 'includes/HttpFunctions.php',
 103+ 'IEContentAnalyzer' => 'includes/IEContentAnalyzer.php',
103104 'IP' => 'includes/IP.php',
104105 'ImageGallery' => 'includes/ImageGallery.php',
105106 'ImagePage' => 'includes/ImagePage.php',
Index: branches/REL1_12/phase3/includes/SpecialUndelete.php
@@ -530,7 +530,7 @@
531531 */
532532 class UndeleteForm {
533533 var $mAction, $mTarget, $mTimestamp, $mRestore, $mTargetObj;
534 - var $mTargetTimestamp, $mAllowed, $mComment;
 534+ var $mTargetTimestamp, $mAllowed, $mComment, $mToken;
535535
536536 function UndeleteForm( $request, $par = "" ) {
537537 global $wgUser;
@@ -547,6 +547,7 @@
548548 $this->mPreview = $request->getCheck( 'preview' ) && $posted;
549549 $this->mDiff = $request->getCheck( 'diff' );
550550 $this->mComment = $request->getText( 'wpComment' );
 551+ $this->mToken = $request->getVal( 'token' );
551552
552553 if( $par != "" ) {
553554 $this->mTarget = $par;
@@ -604,7 +605,12 @@
605606 return $this->showRevision( $this->mTimestamp );
606607 }
607608 if( $this->mFile !== null ) {
608 - return $this->showFile( $this->mFile );
 609+ if ( !$wgUser->matchEditToken( $this->mToken, $this->mFile ) ) {
 610+ $this->showFileConfirmationForm( $this->mFile );
 611+ return false;
 612+ } else {
 613+ return $this->showFile( $this->mFile );
 614+ }
609615 }
610616 if( $this->mRestore && $this->mAction == "submit" ) {
611617 return $this->undelete();
@@ -810,6 +816,29 @@
811817 }
812818
813819 /**
 820+ * Show a form confirming whether a tokenless user really wants to see a file
 821+ */
 822+ private function showFileConfirmationForm( $key ) {
 823+ global $wgOut, $wgUser, $wgLang;
 824+ $file = new ArchivedFile( $this->mTargetObj, '', $this->mFile );
 825+ $wgOut->addWikiMsg( 'undelete-show-file-confirm',
 826+ $this->mTargetObj->getText(),
 827+ $wgLang->timeanddate( $file->getTimestamp() ) );
 828+ $wgOut->addHTML(
 829+ Xml::openElement( 'form', array(
 830+ 'method' => 'POST',
 831+ 'action' => SpecialPage::getTitleFor( 'Undelete' )->getLocalUrl(
 832+ 'target=' . urlencode( $this->mTarget ) .
 833+ '&file=' . urlencode( $key ) .
 834+ '&token=' . urlencode( $wgUser->editToken( $key ) ) )
 835+ )
 836+ ) .
 837+ Xml::submitButton( wfMsg( 'undelete-show-file-submit' ) ) .
 838+ '</form>'
 839+ );
 840+ }
 841+
 842+ /**
814843 * Show a deleted file version requested by the visitor.
815844 */
816845 function showFile( $key ) {
@@ -997,7 +1026,9 @@
9981027 $target = urlencode( $this->mTarget );
9991028 $pageLink = $sk->makeKnownLinkObj( $titleObj,
10001029 $wgLang->timeanddate( $ts, true ),
1001 - "target=$target&file=$key" );
 1030+ "target=$target" .
 1031+ "&file=$key" .
 1032+ "&token=" . urlencode( $wgUser->editToken( $key ) ) );
10021033 } else {
10031034 $checkBox = '';
10041035 $pageLink = $wgLang->timeanddate( $ts, true );
Index: branches/REL1_12/phase3/includes/Title.php
@@ -298,9 +298,13 @@
299299 $m[1] = urldecode( ltrim( $m[1], ':' ) );
300300 }
301301 $title = Title::newFromText( $m[1] );
302 - // Redirects to Special:Userlogout are not permitted
303 - if( $title instanceof Title && !$title->isSpecial( 'Userlogout' ) )
 302+ // Redirects to some special pages are not permitted
 303+ if( $title instanceof Title
 304+ && !$title->isSpecial( 'Userlogout' )
 305+ && !$title->isSpecial( 'Filepath' ) )
 306+ {
304307 return $title;
 308+ }
305309 }
306310 }
307311 return null;
Index: branches/REL1_12/phase3/includes/StreamFile.php
@@ -31,6 +31,12 @@
3232 header('Content-type: application/x-wiki');
3333 }
3434
 35+ // Don't stream it out as text/html if there was a PHP error
 36+ if ( headers_sent() ) {
 37+ echo "Headers already sent, terminating.\n";
 38+ return;
 39+ }
 40+
3541 global $wgContLanguageCode;
3642 header( "Content-Disposition: inline;filename*=utf-8'$wgContLanguageCode'" . urlencode( basename( $fname ) ) );
3743
@@ -53,27 +59,54 @@
5460 }
5561
5662 /** */
57 -function wfGetType( $filename ) {
 63+function wfGetType( $filename, $safe = true ) {
5864 global $wgTrivialMimeDetection;
5965
 66+ $ext = strrchr($filename, '.');
 67+ $ext = $ext === false ? '' : strtolower( substr( $ext, 1 ) );
 68+
6069 # trivial detection by file extension,
6170 # used for thumbnails (thumb.php)
6271 if ($wgTrivialMimeDetection) {
63 - $ext= strtolower(strrchr($filename, '.'));
6472
6573 switch ($ext) {
66 - case '.gif': return 'image/gif';
67 - case '.png': return 'image/png';
68 - case '.jpg': return 'image/jpeg';
69 - case '.jpeg': return 'image/jpeg';
 74+ case 'gif': return 'image/gif';
 75+ case 'png': return 'image/png';
 76+ case 'jpg': return 'image/jpeg';
 77+ case 'jpeg': return 'image/jpeg';
7078 }
7179
7280 return 'unknown/unknown';
7381 }
74 - else {
75 - $magic=& MimeMagic::singleton();
76 - return $magic->guessMimeType($filename); //full fancy mime detection
 82+
 83+ $magic = MimeMagic::singleton();
 84+ // Use the extension only, rather than magic numbers, to avoid opening
 85+ // up vulnerabilities due to uploads of files with allowed extensions
 86+ // but disallowed types.
 87+ $type = $magic->guessTypesForExtension( $ext );
 88+
 89+ /**
 90+ * Double-check some security settings that were done on upload but might
 91+ * have changed since.
 92+ */
 93+ if ( $safe ) {
 94+ global $wgFileBlacklist, $wgCheckFileExtensions, $wgStrictFileExtensions,
 95+ $wgFileExtensions, $wgVerifyMimeType, $wgMimeTypeBlacklist, $wgRequest;
 96+ $form = new UploadForm( $wgRequest );
 97+ list( $partName, $extList ) = $form->splitExtensions( $filename );
 98+ if ( $form->checkFileExtensionList( $extList, $wgFileBlacklist ) ) {
 99+ return 'unknown/unknown';
 100+ }
 101+ if ( $wgCheckFileExtensions && $wgStrictFileExtensions
 102+ && !$form->checkFileExtensionList( $extList, $wgFileExtensions ) )
 103+ {
 104+ return 'unknown/unknown';
 105+ }
 106+ if ( $wgVerifyMimeType && in_array( strtolower( $type ), $wgMimeTypeBlacklist ) ) {
 107+ return 'unknown/unknown';
 108+ }
77109 }
 110+ return $type;
78111 }
79112
80113
Index: branches/REL1_12/phase3/includes/DefaultSettings.php
@@ -1666,6 +1666,8 @@
16671667 'application/x-php', 'text/x-php',
16681668 # Other types that may be interpreted by some servers
16691669 'text/x-python', 'text/x-perl', 'text/x-bash', 'text/x-sh', 'text/x-csh',
 1670+ # Client-side hazards on Internet Explorer
 1671+ 'text/scriptlet', 'application/x-msdownload',
16701672 # Windows metafile, client-side vulnerability on some systems
16711673 'application/x-msmetafile'
16721674 );
Index: branches/REL1_12/phase3/includes/XmlTypeCheck.php
@@ -8,38 +8,37 @@
99 public $wellFormed = false;
1010
1111 /**
 12+ * Will be set to true if the optional element filter returned
 13+ * a match at some point.
 14+ */
 15+ public $filterMatch = false;
 16+
 17+ /**
1218 * Name of the document's root element, including any namespace
1319 * as an expanded URL.
1420 */
1521 public $rootElement = '';
1622
17 - private $softNamespaces;
18 - private $namespaces = array();
19 -
2023 /**
2124 * @param $file string filename
22 - * @param $softNamespaces bool
23 - * If set to true, use of undeclared XML namespaces will be ignored.
24 - * This matches the behavior of rsvg, but more compliant consumers
25 - * such as Firefox will reject such files.
26 - * Leave off for the default, stricter checks.
 25+ * @param $filterCallback callable (optional)
 26+ * Function to call to do additional custom validity checks from the
 27+ * SAX element handler event. This gives you access to the element
 28+ * namespace, name, and attributes, but not to text contents.
 29+ * Filter should return 'true' to toggle on $this->filterMatch
2730 */
28 - function __construct( $file, $softNamespaces=false ) {
29 - $this->softNamespaces = $softNamespaces;
 31+ function __construct( $file, $filterCallback=null ) {
 32+ $this->filterCallback = $filterCallback;
3033 $this->run( $file );
3134 }
3235
3336 private function run( $fname ) {
34 - if( $this->softNamespaces ) {
35 - $parser = xml_parser_create( 'UTF-8' );
36 - } else {
37 - $parser = xml_parser_create_ns( 'UTF-8' );
38 - }
 37+ $parser = xml_parser_create_ns( 'UTF-8' );
3938
4039 // case folding violates XML standard, turn it off
4140 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
4241
43 - xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false );
 42+ xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false );
4443
4544 $file = fopen( $fname, "rb" );
4645 do {
@@ -59,35 +58,22 @@
6059 xml_parser_free( $parser );
6160 }
6261
63 - private function elementOpen( $parser, $name, $attribs ) {
64 - if( $this->softNamespaces ) {
65 - // Check namespaces manually, so expat doesn't throw
66 - // errors on use of undeclared namespaces.
67 - foreach( $attribs as $attrib => $val ) {
68 - if( $attrib == 'xmlns' ) {
69 - $this->namespaces[''] = $val;
70 - } elseif( substr( $attrib, 0, strlen( 'xmlns:' ) ) == 'xmlns:' ) {
71 - $this->namespaces[substr( $attrib, strlen( 'xmlns:' ) )] = $val;
72 - }
73 - }
74 -
75 - if( strpos( $name, ':' ) === false ) {
76 - $ns = '';
77 - $subname = $name;
78 - } else {
79 - list( $ns, $subname ) = explode( ':', $name, 2 );
80 - }
81 -
82 - if( isset( $this->namespaces[$ns] ) ) {
83 - $name = $this->namespaces[$ns] . ':' . $subname;
84 - } else {
85 - // Technically this is invalid for XML with Namespaces.
86 - // But..... we'll just let it slide in soft mode.
87 - }
88 - }
89 -
90 - // We only need the first open element
 62+ private function rootElementOpen( $parser, $name, $attribs ) {
9163 $this->rootElement = $name;
92 - xml_set_element_handler( $parser, false, false );
 64+
 65+ if( is_callable( $this->filterCallback ) ) {
 66+ xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false );
 67+ $this->elementOpen( $parser, $name, $attribs );
 68+ } else {
 69+ // We only need the first open element
 70+ xml_set_element_handler( $parser, false, false );
 71+ }
9372 }
 73+
 74+ private function elementOpen( $parser, $name, $attribs ) {
 75+ if( call_user_func( $this->filterCallback, $name, $attribs ) ) {
 76+ // Filter hit!
 77+ $this->filterMatch = true;
 78+ }
 79+ }
9480 }
Index: branches/REL1_12/phase3/includes/Exception.php
@@ -227,7 +227,16 @@
228228 }
229229 }
230230 } else {
231 - echo $e->__toString();
 231+ $message = "Unexpected non-MediaWiki exception encountered, of type \"" . get_class( $e ) . "\"\n" .
 232+ $e->__toString() . "\n";
 233+ if ( $GLOBALS['wgShowExceptionDetails'] ) {
 234+ $message .= "\n" . $e->getTraceAsString() ."\n";
 235+ }
 236+ if ( !empty( $GLOBALS['wgCommandLineMode'] ) ) {
 237+ wfPrintError( $message );
 238+ } else {
 239+ echo nl2br( htmlspecialchars( $message ) ). "\n";
 240+ }
232241 }
233242 }
234243
Index: branches/REL1_12/phase3/includes/IEContentAnalyzer.php
@@ -0,0 +1,823 @@
 2+<?php
 3+
 4+/**
 5+ * This class simulates Microsoft Internet Explorer's terribly broken and
 6+ * insecure MIME type detection algorithm. It can be used to check web uploads
 7+ * with an apparently safe type, to see if IE will reinterpret them to produce
 8+ * something dangerous.
 9+ *
 10+ * It is full of bugs and strange design choices should not under any
 11+ * circumstances be used to determine a MIME type to present to a user or
 12+ * client. (Apple Safari developers, this means you too.)
 13+ *
 14+ * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
 15+ * attempted to ensure that this code works in exactly the same way as Internet
 16+ * Explorer, it does not share any source code, or creative choices such as
 17+ * variable names, thus I (Tim Starling) claim copyright on it.
 18+ *
 19+ * It may be redistributed without restriction. To aid reuse, this class does
 20+ * not depend on any MediaWiki module.
 21+ */
 22+class IEContentAnalyzer {
 23+ /**
 24+ * Relevant data taken from the type table in IE 5
 25+ */
 26+ protected $baseTypeTable = array(
 27+ 'ambiguous' /*1*/ => array(
 28+ 'text/plain',
 29+ 'application/octet-stream',
 30+ 'application/x-netcdf', // [sic]
 31+ ),
 32+ 'text' /*3*/ => array(
 33+ 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
 34+ 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
 35+ ),
 36+ 'binary' /*4*/ => array(
 37+ 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
 38+ 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
 39+ 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
 40+ 'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
 41+ 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
 42+ 'application/x-msdownload'
 43+ ),
 44+ 'html' /*5*/ => array( 'text/html' ),
 45+ );
 46+
 47+ /**
 48+ * Changes to the type table in later versions of IE
 49+ */
 50+ protected $addedTypes = array(
 51+ 'ie07' => array(
 52+ 'text' => array( 'text/xml', 'application/xml' )
 53+ ),
 54+ );
 55+
 56+ /**
 57+ * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a
 58+ * typical Windows installation.
 59+ *
 60+ * Used for extension to MIME type mapping if detection fails.
 61+ */
 62+ protected $registry = array(
 63+ '.323' => 'text/h323',
 64+ '.3g2' => 'video/3gpp2',
 65+ '.3gp' => 'video/3gpp',
 66+ '.3gp2' => 'video/3gpp2',
 67+ '.3gpp' => 'video/3gpp',
 68+ '.aac' => 'audio/aac',
 69+ '.ac3' => 'audio/ac3',
 70+ '.accda' => 'application/msaccess',
 71+ '.accdb' => 'application/msaccess',
 72+ '.accdc' => 'application/msaccess',
 73+ '.accde' => 'application/msaccess',
 74+ '.accdr' => 'application/msaccess',
 75+ '.accdt' => 'application/msaccess',
 76+ '.ade' => 'application/msaccess',
 77+ '.adp' => 'application/msaccess',
 78+ '.adts' => 'audio/aac',
 79+ '.ai' => 'application/postscript',
 80+ '.aif' => 'audio/aiff',
 81+ '.aifc' => 'audio/aiff',
 82+ '.aiff' => 'audio/aiff',
 83+ '.amc' => 'application/x-mpeg',
 84+ '.application' => 'application/x-ms-application',
 85+ '.asf' => 'video/x-ms-asf',
 86+ '.asx' => 'video/x-ms-asf',
 87+ '.au' => 'audio/basic',
 88+ '.avi' => 'video/avi',
 89+ '.bmp' => 'image/bmp',
 90+ '.caf' => 'audio/x-caf',
 91+ '.cat' => 'application/vnd.ms-pki.seccat',
 92+ '.cbo' => 'application/sha',
 93+ '.cdda' => 'audio/aiff',
 94+ '.cer' => 'application/x-x509-ca-cert',
 95+ '.conf' => 'text/plain',
 96+ '.crl' => 'application/pkix-crl',
 97+ '.crt' => 'application/x-x509-ca-cert',
 98+ '.css' => 'text/css',
 99+ '.csv' => 'application/vnd.ms-excel',
 100+ '.der' => 'application/x-x509-ca-cert',
 101+ '.dib' => 'image/bmp',
 102+ '.dif' => 'video/x-dv',
 103+ '.dll' => 'application/x-msdownload',
 104+ '.doc' => 'application/msword',
 105+ '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
 106+ '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
 107+ '.dot' => 'application/msword',
 108+ '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
 109+ '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
 110+ '.dv' => 'video/x-dv',
 111+ '.dwfx' => 'model/vnd.dwfx+xps',
 112+ '.edn' => 'application/vnd.adobe.edn',
 113+ '.eml' => 'message/rfc822',
 114+ '.eps' => 'application/postscript',
 115+ '.etd' => 'application/x-ebx',
 116+ '.exe' => 'application/x-msdownload',
 117+ '.fdf' => 'application/vnd.fdf',
 118+ '.fif' => 'application/fractals',
 119+ '.gif' => 'image/gif',
 120+ '.gsm' => 'audio/x-gsm',
 121+ '.hqx' => 'application/mac-binhex40',
 122+ '.hta' => 'application/hta',
 123+ '.htc' => 'text/x-component',
 124+ '.htm' => 'text/html',
 125+ '.html' => 'text/html',
 126+ '.htt' => 'text/webviewhtml',
 127+ '.hxa' => 'application/xml',
 128+ '.hxc' => 'application/xml',
 129+ '.hxd' => 'application/octet-stream',
 130+ '.hxe' => 'application/xml',
 131+ '.hxf' => 'application/xml',
 132+ '.hxh' => 'application/octet-stream',
 133+ '.hxi' => 'application/octet-stream',
 134+ '.hxk' => 'application/xml',
 135+ '.hxq' => 'application/octet-stream',
 136+ '.hxr' => 'application/octet-stream',
 137+ '.hxs' => 'application/octet-stream',
 138+ '.hxt' => 'application/xml',
 139+ '.hxv' => 'application/xml',
 140+ '.hxw' => 'application/octet-stream',
 141+ '.ico' => 'image/x-icon',
 142+ '.iii' => 'application/x-iphone',
 143+ '.ins' => 'application/x-internet-signup',
 144+ '.iqy' => 'text/x-ms-iqy',
 145+ '.isp' => 'application/x-internet-signup',
 146+ '.jfif' => 'image/jpeg',
 147+ '.jnlp' => 'application/x-java-jnlp-file',
 148+ '.jpe' => 'image/jpeg',
 149+ '.jpeg' => 'image/jpeg',
 150+ '.jpg' => 'image/jpeg',
 151+ '.jtx' => 'application/x-jtx+xps',
 152+ '.latex' => 'application/x-latex',
 153+ '.log' => 'text/plain',
 154+ '.m1v' => 'video/mpeg',
 155+ '.m2v' => 'video/mpeg',
 156+ '.m3u' => 'audio/x-mpegurl',
 157+ '.mac' => 'image/x-macpaint',
 158+ '.man' => 'application/x-troff-man',
 159+ '.mda' => 'application/msaccess',
 160+ '.mdb' => 'application/msaccess',
 161+ '.mde' => 'application/msaccess',
 162+ '.mfp' => 'application/x-shockwave-flash',
 163+ '.mht' => 'message/rfc822',
 164+ '.mhtml' => 'message/rfc822',
 165+ '.mid' => 'audio/mid',
 166+ '.midi' => 'audio/mid',
 167+ '.mod' => 'video/mpeg',
 168+ '.mov' => 'video/quicktime',
 169+ '.mp2' => 'video/mpeg',
 170+ '.mp2v' => 'video/mpeg',
 171+ '.mp3' => 'audio/mpeg',
 172+ '.mp4' => 'video/mp4',
 173+ '.mpa' => 'video/mpeg',
 174+ '.mpe' => 'video/mpeg',
 175+ '.mpeg' => 'video/mpeg',
 176+ '.mpf' => 'application/vnd.ms-mediapackage',
 177+ '.mpg' => 'video/mpeg',
 178+ '.mpv2' => 'video/mpeg',
 179+ '.mqv' => 'video/quicktime',
 180+ '.NMW' => 'application/nmwb',
 181+ '.nws' => 'message/rfc822',
 182+ '.odc' => 'text/x-ms-odc',
 183+ '.ols' => 'application/vnd.ms-publisher',
 184+ '.p10' => 'application/pkcs10',
 185+ '.p12' => 'application/x-pkcs12',
 186+ '.p7b' => 'application/x-pkcs7-certificates',
 187+ '.p7c' => 'application/pkcs7-mime',
 188+ '.p7m' => 'application/pkcs7-mime',
 189+ '.p7r' => 'application/x-pkcs7-certreqresp',
 190+ '.p7s' => 'application/pkcs7-signature',
 191+ '.pct' => 'image/pict',
 192+ '.pdf' => 'application/pdf',
 193+ '.pdx' => 'application/vnd.adobe.pdx',
 194+ '.pfx' => 'application/x-pkcs12',
 195+ '.pic' => 'image/pict',
 196+ '.pict' => 'image/pict',
 197+ '.pinstall' => 'application/x-picasa-detect',
 198+ '.pko' => 'application/vnd.ms-pki.pko',
 199+ '.png' => 'image/png',
 200+ '.pnt' => 'image/x-macpaint',
 201+ '.pntg' => 'image/x-macpaint',
 202+ '.pot' => 'application/vnd.ms-powerpoint',
 203+ '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
 204+ '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
 205+ '.ppa' => 'application/vnd.ms-powerpoint',
 206+ '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
 207+ '.pps' => 'application/vnd.ms-powerpoint',
 208+ '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
 209+ '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
 210+ '.ppt' => 'application/vnd.ms-powerpoint',
 211+ '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
 212+ '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
 213+ '.prf' => 'application/pics-rules',
 214+ '.ps' => 'application/postscript',
 215+ '.pub' => 'application/vnd.ms-publisher',
 216+ '.pwz' => 'application/vnd.ms-powerpoint',
 217+ '.py' => 'text/plain',
 218+ '.pyw' => 'text/plain',
 219+ '.qht' => 'text/x-html-insertion',
 220+ '.qhtm' => 'text/x-html-insertion',
 221+ '.qt' => 'video/quicktime',
 222+ '.qti' => 'image/x-quicktime',
 223+ '.qtif' => 'image/x-quicktime',
 224+ '.qtl' => 'application/x-quicktimeplayer',
 225+ '.rat' => 'application/rat-file',
 226+ '.rmf' => 'application/vnd.adobe.rmf',
 227+ '.rmi' => 'audio/mid',
 228+ '.rqy' => 'text/x-ms-rqy',
 229+ '.rtf' => 'application/msword',
 230+ '.sct' => 'text/scriptlet',
 231+ '.sd2' => 'audio/x-sd2',
 232+ '.sdp' => 'application/sdp',
 233+ '.shtml' => 'text/html',
 234+ '.sit' => 'application/x-stuffit',
 235+ '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
 236+ '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
 237+ '.slk' => 'application/vnd.ms-excel',
 238+ '.snd' => 'audio/basic',
 239+ '.so' => 'application/x-apachemodule',
 240+ '.sol' => 'text/plain',
 241+ '.sor' => 'text/plain',
 242+ '.spc' => 'application/x-pkcs7-certificates',
 243+ '.spl' => 'application/futuresplash',
 244+ '.sst' => 'application/vnd.ms-pki.certstore',
 245+ '.stl' => 'application/vnd.ms-pki.stl',
 246+ '.swf' => 'application/x-shockwave-flash',
 247+ '.thmx' => 'application/vnd.ms-officetheme',
 248+ '.tif' => 'image/tiff',
 249+ '.tiff' => 'image/tiff',
 250+ '.txt' => 'text/plain',
 251+ '.uls' => 'text/iuls',
 252+ '.vcf' => 'text/x-vcard',
 253+ '.vdx' => 'application/vnd.ms-visio.viewer',
 254+ '.vsd' => 'application/vnd.ms-visio.viewer',
 255+ '.vss' => 'application/vnd.ms-visio.viewer',
 256+ '.vst' => 'application/vnd.ms-visio.viewer',
 257+ '.vsx' => 'application/vnd.ms-visio.viewer',
 258+ '.vtx' => 'application/vnd.ms-visio.viewer',
 259+ '.wav' => 'audio/wav',
 260+ '.wax' => 'audio/x-ms-wax',
 261+ '.wbk' => 'application/msword',
 262+ '.wdp' => 'image/vnd.ms-photo',
 263+ '.wiz' => 'application/msword',
 264+ '.wm' => 'video/x-ms-wm',
 265+ '.wma' => 'audio/x-ms-wma',
 266+ '.wmd' => 'application/x-ms-wmd',
 267+ '.wmv' => 'video/x-ms-wmv',
 268+ '.wmx' => 'video/x-ms-wmx',
 269+ '.wmz' => 'application/x-ms-wmz',
 270+ '.wpl' => 'application/vnd.ms-wpl',
 271+ '.wsc' => 'text/scriptlet',
 272+ '.wvx' => 'video/x-ms-wvx',
 273+ '.xaml' => 'application/xaml+xml',
 274+ '.xbap' => 'application/x-ms-xbap',
 275+ '.xdp' => 'application/vnd.adobe.xdp+xml',
 276+ '.xfdf' => 'application/vnd.adobe.xfdf',
 277+ '.xht' => 'application/xhtml+xml',
 278+ '.xhtml' => 'application/xhtml+xml',
 279+ '.xla' => 'application/vnd.ms-excel',
 280+ '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
 281+ '.xlk' => 'application/vnd.ms-excel',
 282+ '.xll' => 'application/vnd.ms-excel',
 283+ '.xlm' => 'application/vnd.ms-excel',
 284+ '.xls' => 'application/vnd.ms-excel',
 285+ '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
 286+ '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
 287+ '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
 288+ '.xlt' => 'application/vnd.ms-excel',
 289+ '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
 290+ '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
 291+ '.xlw' => 'application/vnd.ms-excel',
 292+ '.xml' => 'text/xml',
 293+ '.xps' => 'application/vnd.ms-xpsdocument',
 294+ '.xsl' => 'text/xml',
 295+ );
 296+
 297+ /**
 298+ * IE versions which have been analysed to bring you this class, and for
 299+ * which some substantive difference exists. These will appear as keys
 300+ * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
 301+ */
 302+ protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' );
 303+
 304+ /**
 305+ * Type table with versions expanded
 306+ */
 307+ protected $typeTable = array();
 308+
 309+ /** constructor */
 310+ function __construct() {
 311+ // Construct versioned type arrays from the base type array plus additions
 312+ $types = $this->baseTypeTable;
 313+ foreach ( $this->versions as $version ) {
 314+ if ( isset( $this->addedTypes[$version] ) ) {
 315+ foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
 316+ $types[$format] = array_merge( $types[$format], $addedTypes );
 317+ }
 318+ }
 319+ $this->typeTable[$version] = $types;
 320+ }
 321+ }
 322+
 323+ /**
 324+ * Get the MIME types from getMimesFromData(), but convert the result from IE's
 325+ * idiosyncratic private types into something other apps will understand.
 326+ *
 327+ * @param string $fileName The file name (unused at present)
 328+ * @param string $chunk The first 256 bytes of the file
 329+ * @param string $proposed The MIME type proposed by the server
 330+ *
 331+ * @return array Map of IE version to detected mime type
 332+ */
 333+ public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
 334+ $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
 335+ $types = array_map( array( $this, 'translateMimeType' ), $types );
 336+ return $types;
 337+ }
 338+
 339+ /**
 340+ * Translate a MIME type from IE's idiosyncratic private types into
 341+ * more commonly understood type strings
 342+ */
 343+ public function translateMimeType( $type ) {
 344+ static $table = array(
 345+ 'image/pjpeg' => 'image/jpeg',
 346+ 'image/x-png' => 'image/png',
 347+ 'image/x-wmf' => 'application/x-msmetafile',
 348+ 'image/bmp' => 'image/x-bmp',
 349+ 'application/x-zip-compressed' => 'application/zip',
 350+ 'application/x-compressed' => 'application/x-compress',
 351+ 'application/x-gzip-compressed' => 'application/x-gzip',
 352+ 'audio/mid' => 'audio/midi',
 353+ );
 354+ if ( isset( $table[$type] ) ) {
 355+ $type = $table[$type];
 356+ }
 357+ return $type;
 358+ }
 359+
 360+ /**
 361+ * Get the untranslated MIME types for all known versions
 362+ *
 363+ * @param string $fileName The file name (unused at present)
 364+ * @param string $chunk The first 256 bytes of the file
 365+ * @param string $proposed The MIME type proposed by the server
 366+ *
 367+ * @return array Map of IE version to detected mime type
 368+ */
 369+ public function getMimesFromData( $fileName, $chunk, $proposed ) {
 370+ $types = array();
 371+ foreach ( $this->versions as $version ) {
 372+ $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
 373+ }
 374+ return $types;
 375+ }
 376+
 377+ /**
 378+ * Get the MIME type for a given named version
 379+ */
 380+ protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
 381+ // Strip text after a semicolon
 382+ $semiPos = strpos( $proposed, ';' );
 383+ if ( $semiPos !== false ) {
 384+ $proposed = substr( $proposed, 0, $semiPos );
 385+ }
 386+
 387+ $proposedFormat = $this->getDataFormat( $version, $proposed );
 388+ if ( $proposedFormat == 'unknown'
 389+ && $proposed != 'multipart/mixed'
 390+ && $proposed != 'multipart/x-mixed-replace' )
 391+ {
 392+ return $proposed;
 393+ }
 394+ if ( strval( $chunk ) === '' ) {
 395+ return $proposed;
 396+ }
 397+
 398+ // Truncate chunk at 255 bytes
 399+ $chunk = substr( $chunk, 0, 255 );
 400+
 401+ // IE does the Check*Headers() calls last, and instead does the following image
 402+ // type checks by directly looking for the magic numbers. What I do here should
 403+ // have the same effect since the magic number checks are identical in both cases.
 404+ $result = $this->sampleData( $version, $chunk );
 405+ $sampleFound = $result['found'];
 406+ $counters = $result['counters'];
 407+ $binaryType = $this->checkBinaryHeaders( $version, $chunk );
 408+ $textType = $this->checkTextHeaders( $version, $chunk );
 409+
 410+ if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
 411+ return 'text/html';
 412+ }
 413+ if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
 414+ return 'image/gif';
 415+ }
 416+ if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
 417+ && $binaryType == 'image/pjpeg' )
 418+ {
 419+ return $proposed;
 420+ }
 421+ // PNG check added in IE 7
 422+ if ( $version >= 'ie07'
 423+ && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
 424+ && $binaryType == 'image/x-png' )
 425+ {
 426+ return $proposed;
 427+ }
 428+
 429+ // CDF was removed in IE 7 so it won't be in $sampleFound for later versions
 430+ if ( isset( $sampleFound['cdf'] ) ) {
 431+ return 'application/x-cdf';
 432+ }
 433+
 434+ // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
 435+ // previous versions
 436+ if ( isset( $sampleFound['rss'] ) ) {
 437+ return 'application/rss+xml';
 438+ }
 439+ if ( isset( $sampleFound['rdf-tag'] )
 440+ && isset( $sampleFound['rdf-url'] )
 441+ && isset( $sampleFound['rdf-purl'] ) )
 442+ {
 443+ return 'application/rss+xml';
 444+ }
 445+ if ( isset( $sampleFound['atom'] ) ) {
 446+ return 'application/atom+xml';
 447+ }
 448+
 449+ if ( isset( $sampleFound['xml'] ) ) {
 450+ // TODO: I'm not sure under what circumstances this flag is enabled
 451+ if ( strpos( $version, 'strict' ) !== false ) {
 452+ if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
 453+ return 'text/xml';
 454+ }
 455+ } else {
 456+ return 'text/xml';
 457+ }
 458+ }
 459+ if ( isset( $sampleFound['html'] ) ) {
 460+ // TODO: I'm not sure under what circumstances this flag is enabled
 461+ if ( strpos( $version, 'nohtml' ) !== false ) {
 462+ if ( $proposed == 'text/plain' ) {
 463+ return 'text/html';
 464+ }
 465+ } else {
 466+ return 'text/html';
 467+ }
 468+ }
 469+ if ( isset( $sampleFound['xbm'] ) ) {
 470+ return 'image/x-bitmap';
 471+ }
 472+ if ( isset( $sampleFound['binhex'] ) ) {
 473+ return 'application/macbinhex40';
 474+ }
 475+ if ( isset( $sampleFound['scriptlet'] ) ) {
 476+ if ( strpos( $version, 'strict' ) !== false ) {
 477+ if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
 478+ return 'text/scriptlet';
 479+ }
 480+ } else {
 481+ return 'text/scriptlet';
 482+ }
 483+ }
 484+
 485+ // Freaky heuristics to determine if the data is text or binary
 486+ // The heuristic is of course broken for non-ASCII text
 487+ if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
 488+ < ( $counters['ctrl'] + $counters['high'] ) * 16 )
 489+ {
 490+ $kindOfBinary = true;
 491+ $type = $binaryType ? $binaryType : $textType;
 492+ if ( $type === false ) {
 493+ $type = 'application/octet-stream';
 494+ }
 495+ } else {
 496+ $kindOfBinary = false;
 497+ $type = $textType ? $textType : $binaryType;
 498+ if ( $type === false ) {
 499+ $type = 'text/plain';
 500+ }
 501+ }
 502+
 503+ // Check if the output format is ambiguous
 504+ // This generally means that detection failed, real types aren't ambiguous
 505+ $detectedFormat = $this->getDataFormat( $version, $type );
 506+ if ( $detectedFormat != 'ambiguous' ) {
 507+ return $type;
 508+ }
 509+
 510+ if ( $proposedFormat != 'ambiguous' ) {
 511+ // FormatAgreesWithData()
 512+ if ( $proposedFormat == 'text' && !$kindOfBinary ) {
 513+ return $proposed;
 514+ }
 515+ if ( $proposedFormat == 'binary' && $kindOfBinary ) {
 516+ return $proposed;
 517+ }
 518+ if ( $proposedFormat == 'html' ) {
 519+ return $proposed;
 520+ }
 521+ }
 522+
 523+ // Find a MIME type by searching the registry for the file extension.
 524+ $dotPos = strrpos( $fileName, '.' );
 525+ if ( $dotPos === false ) {
 526+ return $type;
 527+ }
 528+ $ext = substr( $fileName, $dotPos );
 529+ if ( isset( $this->registry[$ext] ) ) {
 530+ return $this->registry[$ext];
 531+ }
 532+
 533+ // TODO: If the extension has an application registered to it, IE will return
 534+ // application/octet-stream. We'll skip that, so we could erroneously
 535+ // return text/plain or application/x-netcdf where application/octet-stream
 536+ // would be correct.
 537+
 538+ return $type;
 539+ }
 540+
 541+ /**
 542+ * Check for text headers at the start of the chunk
 543+ * Confirmed same in 5 and 7.
 544+ */
 545+ private function checkTextHeaders( $version, $chunk ) {
 546+ $chunk2 = substr( $chunk, 0, 2 );
 547+ $chunk4 = substr( $chunk, 0, 4 );
 548+ $chunk5 = substr( $chunk, 0, 5 );
 549+ if ( $chunk4 == '%PDF' ) {
 550+ return 'application/pdf';
 551+ }
 552+ if ( $chunk2 == '%!' ) {
 553+ return 'application/postscript';
 554+ }
 555+ if ( $chunk5 == '{\\rtf' ) {
 556+ return 'text/richtext';
 557+ }
 558+ if ( $chunk5 == 'begin' ) {
 559+ return 'application/base64';
 560+ }
 561+ return false;
 562+ }
 563+
 564+ /**
 565+ * Check for binary headers at the start of the chunk
 566+ * Confirmed same in 5 and 7.
 567+ */
 568+ private function checkBinaryHeaders( $version, $chunk ) {
 569+ $chunk2 = substr( $chunk, 0, 2 );
 570+ $chunk3 = substr( $chunk, 0, 3 );
 571+ $chunk4 = substr( $chunk, 0, 4 );
 572+ $chunk5 = substr( $chunk, 0, 5 );
 573+ $chunk8 = substr( $chunk, 0, 8 );
 574+ if ( $chunk5 == 'GIF87' || $chunk5 == 'GIF89' ) {
 575+ return 'image/gif';
 576+ }
 577+ if ( $chunk2 == "\xff\xd8" ) {
 578+ return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
 579+ }
 580+
 581+ if ( $chunk2 == 'BM'
 582+ && substr( $chunk, 6, 2 ) == "\000\000"
 583+ && substr( $chunk, 8, 2 ) != "\000\000" )
 584+ {
 585+ return 'image/bmp'; // another non-standard MIME
 586+ }
 587+ if ( $chunk4 == 'RIFF'
 588+ && substr( $chunk, 8, 4 ) == 'WAVE' )
 589+ {
 590+ return 'audio/wav';
 591+ }
 592+ // These were integer literals in IE
 593+ // Perhaps the author was not sure what the target endianness was
 594+ if ( $chunk4 == ".sd\000"
 595+ || $chunk4 == ".snd"
 596+ || $chunk4 == "\000ds."
 597+ || $chunk4 == "dns." )
 598+ {
 599+ return 'audio/basic';
 600+ }
 601+ if ( $chunk3 == "MM\000" ) {
 602+ return 'image/tiff';
 603+ }
 604+ if ( $chunk2 == 'MZ' ) {
 605+ return 'application/x-msdownload';
 606+ }
 607+ if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
 608+ return 'image/x-png'; // [sic]
 609+ }
 610+ if ( strlen( $chunk ) >= 5 ) {
 611+ $byte2 = ord( $chunk[2] );
 612+ $byte4 = ord( $chunk[4] );
 613+ if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
 614+ return 'image/x-jg';
 615+ }
 616+ }
 617+ // More endian confusion?
 618+ if ( $chunk4 == 'MROF' ) {
 619+ return 'audio/x-aiff';
 620+ }
 621+ $chunk4_8 = substr( $chunk, 8, 4 );
 622+ if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
 623+ return 'audio/x-aiff';
 624+ }
 625+ if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
 626+ return 'video/avi';
 627+ }
 628+ if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
 629+ return 'video/mpeg';
 630+ }
 631+ if ( $chunk4 == "\001\000\000\000"
 632+ && substr( $chunk, 40, 4 ) == ' EMF' )
 633+ {
 634+ return 'image/x-emf';
 635+ }
 636+ if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
 637+ return 'image/x-wmf';
 638+ }
 639+ if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
 640+ return 'application/java';
 641+ }
 642+ if ( $chunk2 == 'PK' ) {
 643+ return 'application/x-zip-compressed';
 644+ }
 645+ if ( $chunk2 == "\x1f\x9d" ) {
 646+ return 'application/x-compressed';
 647+ }
 648+ if ( $chunk2 == "\x1f\x8b" ) {
 649+ return 'application/x-gzip-compressed';
 650+ }
 651+ // Skip redundant check for ZIP
 652+ if ( $chunk5 == "MThd\000" ) {
 653+ return 'audio/mid';
 654+ }
 655+ if ( $chunk4 == '%PDF' ) {
 656+ return 'application/pdf';
 657+ }
 658+ return false;
 659+ }
 660+
 661+ /**
 662+ * Do heuristic checks on the bulk of the data sample.
 663+ * Search for HTML tags.
 664+ */
 665+ protected function sampleData( $version, $chunk ) {
 666+ $found = array();
 667+ $counters = array(
 668+ 'ctrl' => 0,
 669+ 'high' => 0,
 670+ 'low' => 0,
 671+ 'lf' => 0,
 672+ 'cr' => 0,
 673+ 'ff' => 0
 674+ );
 675+ $htmlTags = array(
 676+ 'html',
 677+ 'head',
 678+ 'title',
 679+ 'body',
 680+ 'script',
 681+ 'a href',
 682+ 'pre',
 683+ 'img',
 684+ 'plaintext',
 685+ 'table'
 686+ );
 687+ $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
 688+ $rdfPurl = 'http://purl.org/rss/1.0/';
 689+ $xbmMagic1 = '#define';
 690+ $xbmMagic2 = '_width';
 691+ $xbmMagic3 = '_bits';
 692+ $binhexMagic = 'converted with BinHex';
 693+
 694+ for ( $offset = 0; $offset < strlen( $chunk ); $offset++ ) {
 695+ $curChar = $chunk[$offset];
 696+ if ( $curChar == "\x0a" ) {
 697+ $counters['lf']++;
 698+ continue;
 699+ } elseif ( $curChar == "\x0d" ) {
 700+ $counters['cr']++;
 701+ continue;
 702+ } elseif ( $curChar == "\x0c" ) {
 703+ $counters['ff']++;
 704+ continue;
 705+ } elseif ( $curChar == "\t" ) {
 706+ $counters['low']++;
 707+ continue;
 708+ } elseif ( ord( $curChar ) < 32 ) {
 709+ $counters['ctrl']++;
 710+ continue;
 711+ } elseif ( ord( $curChar ) >= 128 ) {
 712+ $counters['high']++;
 713+ continue;
 714+ }
 715+
 716+ $counters['low']++;
 717+ if ( $curChar == '<' ) {
 718+ // XML
 719+ $remainder = substr( $chunk, $offset + 1 );
 720+ if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
 721+ $nextChar = substr( $chunk, $offset + 5, 1 );
 722+ if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
 723+ $found['xml'] = true;
 724+ }
 725+ }
 726+ // Scriptlet (JSP)
 727+ if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
 728+ $found['scriptlet'] = true;
 729+ break;
 730+ }
 731+ // HTML
 732+ foreach ( $htmlTags as $tag ) {
 733+ if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
 734+ $found['html'] = true;
 735+ }
 736+ }
 737+ // Skip broken check for additional tags (HR etc.)
 738+
 739+ // CHANNEL replaced by RSS, RDF and FEED in IE 7
 740+ if ( $version < 'ie07' ) {
 741+ if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
 742+ $found['cdf'] = true;
 743+ }
 744+ } else {
 745+ // RSS
 746+ if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
 747+ $found['rss'] = true;
 748+ break; // return from SampleData
 749+ }
 750+ if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
 751+ $found['rdf-tag'] = true;
 752+ // no break
 753+ }
 754+ if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
 755+ $found['atom'] = true;
 756+ break;
 757+ }
 758+ }
 759+ continue;
 760+ }
 761+ // Skip broken check for -->
 762+
 763+ // RSS URL checks
 764+ // For some reason both URLs must appear before it is recognised
 765+ $remainder = substr( $chunk, $offset );
 766+ if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
 767+ $found['rdf-url'] = true;
 768+ if ( isset( $found['rdf-tag'] )
 769+ && isset( $found['rdf-purl'] ) ) // [sic]
 770+ {
 771+ break;
 772+ }
 773+ continue;
 774+ }
 775+
 776+ if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
 777+ if ( isset( $found['rdf-tag'] )
 778+ && isset( $found['rdf-url'] ) ) // [sic]
 779+ {
 780+ break;
 781+ }
 782+ continue;
 783+ }
 784+
 785+ // XBM checks
 786+ if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
 787+ $found['xbm1'] = true;
 788+ continue;
 789+ }
 790+ if ( $curChar == '_' ) {
 791+ if ( isset( $found['xbm2'] ) ) {
 792+ if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
 793+ $found['xbm'] = true;
 794+ break;
 795+ }
 796+ } elseif ( isset( $found['xbm1'] ) ) {
 797+ if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
 798+ $found['xbm2'] = true;
 799+ }
 800+ }
 801+ }
 802+
 803+ // BinHex
 804+ if ( !strncasecmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
 805+ $found['binhex'] = true;
 806+ }
 807+ }
 808+ return array( 'found' => $found, 'counters' => $counters );
 809+ }
 810+
 811+ protected function getDataFormat( $version, $type ) {
 812+ $types = $this->typeTable[$version];
 813+ if ( $type == '(null)' || strval( $type ) === '' ) {
 814+ return 'ambiguous';
 815+ }
 816+ foreach ( $types as $format => $list ) {
 817+ if ( in_array( $type, $list ) ) {
 818+ return $format;
 819+ }
 820+ }
 821+ return 'unknown';
 822+ }
 823+}
 824+
Property changes on: branches/REL1_12/phase3/includes/IEContentAnalyzer.php
___________________________________________________________________
Added: svn:eol-style
1825 + native
Index: branches/REL1_12/phase3/img_auth.php
@@ -17,6 +17,12 @@
1818 wfProfileIn( 'img_auth.php' );
1919 require_once( dirname( __FILE__ ) . '/includes/StreamFile.php' );
2020
 21+$perms = User::getGroupPermissions( array( '*' ) );
 22+if ( in_array( 'read', $perms, true ) ) {
 23+ wfDebugLog( 'img_auth', 'Public wiki' );
 24+ wfPublicError();
 25+}
 26+
2127 // Extract path and image information
2228 if( !isset( $_SERVER['PATH_INFO'] ) ) {
2329 wfDebugLog( 'img_auth', 'Missing PATH_INFO' );
@@ -88,3 +94,25 @@
8995 wfLogProfilingData();
9096 exit();
9197 }
 98+
 99+/**
 100+ * Show a 403 error for use when the wiki is public
 101+ */
 102+function wfPublicError() {
 103+ header( 'HTTP/1.0 403 Forbidden' );
 104+ header( 'Content-Type: text/html; charset=utf-8' );
 105+ echo <<<ENDS
 106+<html>
 107+<body>
 108+<h1>Access Denied</h1>
 109+<p>The function of img_auth.php is to output files from a private wiki. This wiki
 110+is configured as a public wiki. For optimal security, img_auth.php is disabled in
 111+this case.
 112+</p>
 113+</body>
 114+</html>
 115+ENDS;
 116+ wfLogProfilingData();
 117+ exit;
 118+}
 119+
Index: branches/REL1_12/phase3/profileinfo.php
@@ -48,7 +48,7 @@
4949
5050 define( 'MW_NO_SETUP', 1 );
5151 require_once( './includes/WebStart.php' );
52 -require_once("./AdminSettings.php");
 52+@include_once("./AdminSettings.php");
5353 require_once( './includes/GlobalFunctions.php' );
5454
5555 if (!$wgEnableProfileInfo) {
Index: branches/REL1_12/phase3/languages/messages/MessagesEn.php
@@ -2008,6 +2008,8 @@
20092009 'undelete-error-long' => 'Errors were encountered while undeleting the file:
20102010
20112011 $1',
 2012+'undelete-show-file-confirm' => 'Are you sure you want to view a deleted revision of the file "<nowiki>$1</nowiki>" from $2?',
 2013+'undelete-show-file-submit' => 'Yes',
20122014
20132015 # Namespace form on various pages
20142016 'namespace' => 'Namespace:',
@@ -2273,6 +2275,8 @@
22742276 'import-noarticle' => 'No page to import!',
22752277 'import-nonewrevisions' => 'All revisions were previously imported.',
22762278 'xml-error-string' => '$1 at line $2, col $3 (byte $4): $5',
 2279+'import-token-mismatch' => 'Loss of session data. Please try again.',
 2280+'import-invalid-interwiki' => 'Cannot import from the specified wiki.',
22772281
22782282 # Import log
22792283 'importlogpage' => 'Import log',
Index: branches/REL1_12/phase3/RELEASE-NOTES
@@ -3,9 +3,9 @@
44 Security reminder: MediaWiki does not require PHP's register_globals
55 setting since version 1.2.0. If you have it on, turn it *off* if you can.
66
7 -== MediaWiki 1.12.1 ==
 7+== MediaWiki 1.12.2 ==
88
9 -October 2, 2008
 9+December 15, 2008
1010
1111 This is a security and bugfix release of the quarterly branch of MediaWiki
1212 for Winter 2008.
@@ -21,6 +21,30 @@
2222 Those wishing to use the latest code instead of a branch release can obtain
2323 it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
2424
 25+== Changes since 1.12.1 ==
 26+
 27+* Fixed output escaping for reporting of non-MediaWiki exceptions. Potential
 28+ XSS if an extension throws one of these with user input.
 29+* Avoid fatal error in profileinfo.php when not configured. (Rem8)
 30+* Fixed CSRF vulnerability in Special:Import. Fixed input validation in
 31+ transwiki import feature. (Rem10, Rem11)
 32+* Add a .htaccess to deleted images directory for additional protection against
 33+ exposure of deleted files with known SHA-1 hashes on default installations.
 34+ (Rem13)
 35+* Fixed XSS vulnerability for Internet Explorer clients, via file uploads which
 36+ are interpreted by IE as HTML. (Rem14)
 37+* Fixed XSS vulnerability for clients with SVG scripting, on wikis where SVG
 38+ uploads are enabled. Firefox 1.5+ is affected. (Rem2, Rem3, Rem5, Rem6)
 39+* Avoid streaming uploaded files to the user via index.php. This allows
 40+ security-conscious users to serve uploaded files via a different domain, and
 41+ thus client-side scripts executed from that domain cannot access the login
 42+ cookies. Affects Special:Undelete, img_auth.php and thumb.php. (Rem12)
 43+* When streaming files via index.php, use the MIME type detected from the
 44+ file extension, not from the data. This reduces the XSS attack surface.
 45+* Blacklist redirects via Special:Filepath. Such redirects exacerbate any
 46+ XSS vulnerabilities involving uploads of files containing scripts. (Rem7)
 47+* Internationalisation updates
 48+
2549 === Changes since 1.12.0 ===
2650
2751 * (bug 13522) Fix fatal error in Parser::extractTagsAndParams
Property changes on: branches/REL1_12/phase3
___________________________________________________________________
Added: svn:mergeinfo
2852 Merged /trunk/phase3:r44570
2953 Merged /branches/REL1_12/phase3:r44570

Follow-up revisions

RevisionCommit summaryAuthorDate
r44603Update messages.inc for r44599 and r44600siebrand08:37, 15 December 2008

Status & tagging log