Index: trunk/phase3/includes/IEContentAnalyzer.php |
— | — | @@ -1,823 +1,823 @@ |
2 | | -<?php
|
3 | | -
|
4 | | -/**
|
5 | | - * This class simulates Microsoft Internet Explorer's terribly broken and
|
6 | | - * insecure MIME type detection algorithm. It can be used to check web uploads
|
7 | | - * with an apparently safe type, to see if IE will reinterpret them to produce
|
8 | | - * something dangerous.
|
9 | | - *
|
10 | | - * It is full of bugs and strange design choices should not under any
|
11 | | - * circumstances be used to determine a MIME type to present to a user or
|
12 | | - * client. (Apple Safari developers, this means you too.)
|
13 | | - *
|
14 | | - * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
|
15 | | - * attempted to ensure that this code works in exactly the same way as Internet
|
16 | | - * Explorer, it does not share any source code, or creative choices such as
|
17 | | - * variable names, thus I (Tim Starling) claim copyright on it.
|
18 | | - *
|
19 | | - * It may be redistributed without restriction. To aid reuse, this class does
|
20 | | - * not depend on any MediaWiki module.
|
21 | | - */
|
22 | | -class IEContentAnalyzer {
|
23 | | - /**
|
24 | | - * Relevant data taken from the type table in IE 5
|
25 | | - */
|
26 | | - protected $baseTypeTable = array(
|
27 | | - 'ambiguous' /*1*/ => array(
|
28 | | - 'text/plain',
|
29 | | - 'application/octet-stream',
|
30 | | - 'application/x-netcdf', // [sic]
|
31 | | - ),
|
32 | | - 'text' /*3*/ => array(
|
33 | | - 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
|
34 | | - 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
|
35 | | - ),
|
36 | | - 'binary' /*4*/ => array(
|
37 | | - 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
|
38 | | - 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
|
39 | | - 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
|
40 | | - 'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
|
41 | | - 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
|
42 | | - 'application/x-msdownload'
|
43 | | - ),
|
44 | | - 'html' /*5*/ => array( 'text/html' ),
|
45 | | - );
|
46 | | -
|
47 | | - /**
|
48 | | - * Changes to the type table in later versions of IE
|
49 | | - */
|
50 | | - protected $addedTypes = array(
|
51 | | - 'ie07' => array(
|
52 | | - 'text' => array( 'text/xml', 'application/xml' )
|
53 | | - ),
|
54 | | - );
|
55 | | -
|
56 | | - /**
|
57 | | - * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a
|
58 | | - * typical Windows installation.
|
59 | | - *
|
60 | | - * Used for extension to MIME type mapping if detection fails.
|
61 | | - */
|
62 | | - protected $registry = array(
|
63 | | - '.323' => 'text/h323',
|
64 | | - '.3g2' => 'video/3gpp2',
|
65 | | - '.3gp' => 'video/3gpp',
|
66 | | - '.3gp2' => 'video/3gpp2',
|
67 | | - '.3gpp' => 'video/3gpp',
|
68 | | - '.aac' => 'audio/aac',
|
69 | | - '.ac3' => 'audio/ac3',
|
70 | | - '.accda' => 'application/msaccess',
|
71 | | - '.accdb' => 'application/msaccess',
|
72 | | - '.accdc' => 'application/msaccess',
|
73 | | - '.accde' => 'application/msaccess',
|
74 | | - '.accdr' => 'application/msaccess',
|
75 | | - '.accdt' => 'application/msaccess',
|
76 | | - '.ade' => 'application/msaccess',
|
77 | | - '.adp' => 'application/msaccess',
|
78 | | - '.adts' => 'audio/aac',
|
79 | | - '.ai' => 'application/postscript',
|
80 | | - '.aif' => 'audio/aiff',
|
81 | | - '.aifc' => 'audio/aiff',
|
82 | | - '.aiff' => 'audio/aiff',
|
83 | | - '.amc' => 'application/x-mpeg',
|
84 | | - '.application' => 'application/x-ms-application',
|
85 | | - '.asf' => 'video/x-ms-asf',
|
86 | | - '.asx' => 'video/x-ms-asf',
|
87 | | - '.au' => 'audio/basic',
|
88 | | - '.avi' => 'video/avi',
|
89 | | - '.bmp' => 'image/bmp',
|
90 | | - '.caf' => 'audio/x-caf',
|
91 | | - '.cat' => 'application/vnd.ms-pki.seccat',
|
92 | | - '.cbo' => 'application/sha',
|
93 | | - '.cdda' => 'audio/aiff',
|
94 | | - '.cer' => 'application/x-x509-ca-cert',
|
95 | | - '.conf' => 'text/plain',
|
96 | | - '.crl' => 'application/pkix-crl',
|
97 | | - '.crt' => 'application/x-x509-ca-cert',
|
98 | | - '.css' => 'text/css',
|
99 | | - '.csv' => 'application/vnd.ms-excel',
|
100 | | - '.der' => 'application/x-x509-ca-cert',
|
101 | | - '.dib' => 'image/bmp',
|
102 | | - '.dif' => 'video/x-dv',
|
103 | | - '.dll' => 'application/x-msdownload',
|
104 | | - '.doc' => 'application/msword',
|
105 | | - '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
|
106 | | - '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
107 | | - '.dot' => 'application/msword',
|
108 | | - '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
|
109 | | - '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
|
110 | | - '.dv' => 'video/x-dv',
|
111 | | - '.dwfx' => 'model/vnd.dwfx+xps',
|
112 | | - '.edn' => 'application/vnd.adobe.edn',
|
113 | | - '.eml' => 'message/rfc822',
|
114 | | - '.eps' => 'application/postscript',
|
115 | | - '.etd' => 'application/x-ebx',
|
116 | | - '.exe' => 'application/x-msdownload',
|
117 | | - '.fdf' => 'application/vnd.fdf',
|
118 | | - '.fif' => 'application/fractals',
|
119 | | - '.gif' => 'image/gif',
|
120 | | - '.gsm' => 'audio/x-gsm',
|
121 | | - '.hqx' => 'application/mac-binhex40',
|
122 | | - '.hta' => 'application/hta',
|
123 | | - '.htc' => 'text/x-component',
|
124 | | - '.htm' => 'text/html',
|
125 | | - '.html' => 'text/html',
|
126 | | - '.htt' => 'text/webviewhtml',
|
127 | | - '.hxa' => 'application/xml',
|
128 | | - '.hxc' => 'application/xml',
|
129 | | - '.hxd' => 'application/octet-stream',
|
130 | | - '.hxe' => 'application/xml',
|
131 | | - '.hxf' => 'application/xml',
|
132 | | - '.hxh' => 'application/octet-stream',
|
133 | | - '.hxi' => 'application/octet-stream',
|
134 | | - '.hxk' => 'application/xml',
|
135 | | - '.hxq' => 'application/octet-stream',
|
136 | | - '.hxr' => 'application/octet-stream',
|
137 | | - '.hxs' => 'application/octet-stream',
|
138 | | - '.hxt' => 'application/xml',
|
139 | | - '.hxv' => 'application/xml',
|
140 | | - '.hxw' => 'application/octet-stream',
|
141 | | - '.ico' => 'image/x-icon',
|
142 | | - '.iii' => 'application/x-iphone',
|
143 | | - '.ins' => 'application/x-internet-signup',
|
144 | | - '.iqy' => 'text/x-ms-iqy',
|
145 | | - '.isp' => 'application/x-internet-signup',
|
146 | | - '.jfif' => 'image/jpeg',
|
147 | | - '.jnlp' => 'application/x-java-jnlp-file',
|
148 | | - '.jpe' => 'image/jpeg',
|
149 | | - '.jpeg' => 'image/jpeg',
|
150 | | - '.jpg' => 'image/jpeg',
|
151 | | - '.jtx' => 'application/x-jtx+xps',
|
152 | | - '.latex' => 'application/x-latex',
|
153 | | - '.log' => 'text/plain',
|
154 | | - '.m1v' => 'video/mpeg',
|
155 | | - '.m2v' => 'video/mpeg',
|
156 | | - '.m3u' => 'audio/x-mpegurl',
|
157 | | - '.mac' => 'image/x-macpaint',
|
158 | | - '.man' => 'application/x-troff-man',
|
159 | | - '.mda' => 'application/msaccess',
|
160 | | - '.mdb' => 'application/msaccess',
|
161 | | - '.mde' => 'application/msaccess',
|
162 | | - '.mfp' => 'application/x-shockwave-flash',
|
163 | | - '.mht' => 'message/rfc822',
|
164 | | - '.mhtml' => 'message/rfc822',
|
165 | | - '.mid' => 'audio/mid',
|
166 | | - '.midi' => 'audio/mid',
|
167 | | - '.mod' => 'video/mpeg',
|
168 | | - '.mov' => 'video/quicktime',
|
169 | | - '.mp2' => 'video/mpeg',
|
170 | | - '.mp2v' => 'video/mpeg',
|
171 | | - '.mp3' => 'audio/mpeg',
|
172 | | - '.mp4' => 'video/mp4',
|
173 | | - '.mpa' => 'video/mpeg',
|
174 | | - '.mpe' => 'video/mpeg',
|
175 | | - '.mpeg' => 'video/mpeg',
|
176 | | - '.mpf' => 'application/vnd.ms-mediapackage',
|
177 | | - '.mpg' => 'video/mpeg',
|
178 | | - '.mpv2' => 'video/mpeg',
|
179 | | - '.mqv' => 'video/quicktime',
|
180 | | - '.NMW' => 'application/nmwb',
|
181 | | - '.nws' => 'message/rfc822',
|
182 | | - '.odc' => 'text/x-ms-odc',
|
183 | | - '.ols' => 'application/vnd.ms-publisher',
|
184 | | - '.p10' => 'application/pkcs10',
|
185 | | - '.p12' => 'application/x-pkcs12',
|
186 | | - '.p7b' => 'application/x-pkcs7-certificates',
|
187 | | - '.p7c' => 'application/pkcs7-mime',
|
188 | | - '.p7m' => 'application/pkcs7-mime',
|
189 | | - '.p7r' => 'application/x-pkcs7-certreqresp',
|
190 | | - '.p7s' => 'application/pkcs7-signature',
|
191 | | - '.pct' => 'image/pict',
|
192 | | - '.pdf' => 'application/pdf',
|
193 | | - '.pdx' => 'application/vnd.adobe.pdx',
|
194 | | - '.pfx' => 'application/x-pkcs12',
|
195 | | - '.pic' => 'image/pict',
|
196 | | - '.pict' => 'image/pict',
|
197 | | - '.pinstall' => 'application/x-picasa-detect',
|
198 | | - '.pko' => 'application/vnd.ms-pki.pko',
|
199 | | - '.png' => 'image/png',
|
200 | | - '.pnt' => 'image/x-macpaint',
|
201 | | - '.pntg' => 'image/x-macpaint',
|
202 | | - '.pot' => 'application/vnd.ms-powerpoint',
|
203 | | - '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
|
204 | | - '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
|
205 | | - '.ppa' => 'application/vnd.ms-powerpoint',
|
206 | | - '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
|
207 | | - '.pps' => 'application/vnd.ms-powerpoint',
|
208 | | - '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
|
209 | | - '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
|
210 | | - '.ppt' => 'application/vnd.ms-powerpoint',
|
211 | | - '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
|
212 | | - '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
213 | | - '.prf' => 'application/pics-rules',
|
214 | | - '.ps' => 'application/postscript',
|
215 | | - '.pub' => 'application/vnd.ms-publisher',
|
216 | | - '.pwz' => 'application/vnd.ms-powerpoint',
|
217 | | - '.py' => 'text/plain',
|
218 | | - '.pyw' => 'text/plain',
|
219 | | - '.qht' => 'text/x-html-insertion',
|
220 | | - '.qhtm' => 'text/x-html-insertion',
|
221 | | - '.qt' => 'video/quicktime',
|
222 | | - '.qti' => 'image/x-quicktime',
|
223 | | - '.qtif' => 'image/x-quicktime',
|
224 | | - '.qtl' => 'application/x-quicktimeplayer',
|
225 | | - '.rat' => 'application/rat-file',
|
226 | | - '.rmf' => 'application/vnd.adobe.rmf',
|
227 | | - '.rmi' => 'audio/mid',
|
228 | | - '.rqy' => 'text/x-ms-rqy',
|
229 | | - '.rtf' => 'application/msword',
|
230 | | - '.sct' => 'text/scriptlet',
|
231 | | - '.sd2' => 'audio/x-sd2',
|
232 | | - '.sdp' => 'application/sdp',
|
233 | | - '.shtml' => 'text/html',
|
234 | | - '.sit' => 'application/x-stuffit',
|
235 | | - '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
|
236 | | - '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
|
237 | | - '.slk' => 'application/vnd.ms-excel',
|
238 | | - '.snd' => 'audio/basic',
|
239 | | - '.so' => 'application/x-apachemodule',
|
240 | | - '.sol' => 'text/plain',
|
241 | | - '.sor' => 'text/plain',
|
242 | | - '.spc' => 'application/x-pkcs7-certificates',
|
243 | | - '.spl' => 'application/futuresplash',
|
244 | | - '.sst' => 'application/vnd.ms-pki.certstore',
|
245 | | - '.stl' => 'application/vnd.ms-pki.stl',
|
246 | | - '.swf' => 'application/x-shockwave-flash',
|
247 | | - '.thmx' => 'application/vnd.ms-officetheme',
|
248 | | - '.tif' => 'image/tiff',
|
249 | | - '.tiff' => 'image/tiff',
|
250 | | - '.txt' => 'text/plain',
|
251 | | - '.uls' => 'text/iuls',
|
252 | | - '.vcf' => 'text/x-vcard',
|
253 | | - '.vdx' => 'application/vnd.ms-visio.viewer',
|
254 | | - '.vsd' => 'application/vnd.ms-visio.viewer',
|
255 | | - '.vss' => 'application/vnd.ms-visio.viewer',
|
256 | | - '.vst' => 'application/vnd.ms-visio.viewer',
|
257 | | - '.vsx' => 'application/vnd.ms-visio.viewer',
|
258 | | - '.vtx' => 'application/vnd.ms-visio.viewer',
|
259 | | - '.wav' => 'audio/wav',
|
260 | | - '.wax' => 'audio/x-ms-wax',
|
261 | | - '.wbk' => 'application/msword',
|
262 | | - '.wdp' => 'image/vnd.ms-photo',
|
263 | | - '.wiz' => 'application/msword',
|
264 | | - '.wm' => 'video/x-ms-wm',
|
265 | | - '.wma' => 'audio/x-ms-wma',
|
266 | | - '.wmd' => 'application/x-ms-wmd',
|
267 | | - '.wmv' => 'video/x-ms-wmv',
|
268 | | - '.wmx' => 'video/x-ms-wmx',
|
269 | | - '.wmz' => 'application/x-ms-wmz',
|
270 | | - '.wpl' => 'application/vnd.ms-wpl',
|
271 | | - '.wsc' => 'text/scriptlet',
|
272 | | - '.wvx' => 'video/x-ms-wvx',
|
273 | | - '.xaml' => 'application/xaml+xml',
|
274 | | - '.xbap' => 'application/x-ms-xbap',
|
275 | | - '.xdp' => 'application/vnd.adobe.xdp+xml',
|
276 | | - '.xfdf' => 'application/vnd.adobe.xfdf',
|
277 | | - '.xht' => 'application/xhtml+xml',
|
278 | | - '.xhtml' => 'application/xhtml+xml',
|
279 | | - '.xla' => 'application/vnd.ms-excel',
|
280 | | - '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
|
281 | | - '.xlk' => 'application/vnd.ms-excel',
|
282 | | - '.xll' => 'application/vnd.ms-excel',
|
283 | | - '.xlm' => 'application/vnd.ms-excel',
|
284 | | - '.xls' => 'application/vnd.ms-excel',
|
285 | | - '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
|
286 | | - '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
|
287 | | - '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
288 | | - '.xlt' => 'application/vnd.ms-excel',
|
289 | | - '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
|
290 | | - '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
|
291 | | - '.xlw' => 'application/vnd.ms-excel',
|
292 | | - '.xml' => 'text/xml',
|
293 | | - '.xps' => 'application/vnd.ms-xpsdocument',
|
294 | | - '.xsl' => 'text/xml',
|
295 | | - );
|
296 | | -
|
297 | | - /**
|
298 | | - * IE versions which have been analysed to bring you this class, and for
|
299 | | - * which some substantive difference exists. These will appear as keys
|
300 | | - * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
|
301 | | - */
|
302 | | - protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' );
|
303 | | -
|
304 | | - /**
|
305 | | - * Type table with versions expanded
|
306 | | - */
|
307 | | - protected $typeTable = array();
|
308 | | -
|
309 | | - /** constructor */
|
310 | | - function __construct() {
|
311 | | - // Construct versioned type arrays from the base type array plus additions
|
312 | | - $types = $this->baseTypeTable;
|
313 | | - foreach ( $this->versions as $version ) {
|
314 | | - if ( isset( $this->addedTypes[$version] ) ) {
|
315 | | - foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
|
316 | | - $types[$format] = array_merge( $types[$format], $addedTypes );
|
317 | | - }
|
318 | | - }
|
319 | | - $this->typeTable[$version] = $types;
|
320 | | - }
|
321 | | - }
|
322 | | -
|
323 | | - /**
|
324 | | - * Get the MIME types from getMimesFromData(), but convert the result from IE's
|
325 | | - * idiosyncratic private types into something other apps will understand.
|
326 | | - *
|
327 | | - * @param string $fileName The file name (unused at present)
|
328 | | - * @param string $chunk The first 256 bytes of the file
|
329 | | - * @param string $proposed The MIME type proposed by the server
|
330 | | - *
|
331 | | - * @return array Map of IE version to detected mime type
|
332 | | - */
|
333 | | - public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
|
334 | | - $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
|
335 | | - $types = array_map( array( $this, 'translateMimeType' ), $types );
|
336 | | - return $types;
|
337 | | - }
|
338 | | -
|
339 | | - /**
|
340 | | - * Translate a MIME type from IE's idiosyncratic private types into
|
341 | | - * more commonly understood type strings
|
342 | | - */
|
343 | | - public function translateMimeType( $type ) {
|
344 | | - static $table = array(
|
345 | | - 'image/pjpeg' => 'image/jpeg',
|
346 | | - 'image/x-png' => 'image/png',
|
347 | | - 'image/x-wmf' => 'application/x-msmetafile',
|
348 | | - 'image/bmp' => 'image/x-bmp',
|
349 | | - 'application/x-zip-compressed' => 'application/zip',
|
350 | | - 'application/x-compressed' => 'application/x-compress',
|
351 | | - 'application/x-gzip-compressed' => 'application/x-gzip',
|
352 | | - 'audio/mid' => 'audio/midi',
|
353 | | - );
|
354 | | - if ( isset( $table[$type] ) ) {
|
355 | | - $type = $table[$type];
|
356 | | - }
|
357 | | - return $type;
|
358 | | - }
|
359 | | -
|
360 | | - /**
|
361 | | - * Get the untranslated MIME types for all known versions
|
362 | | - *
|
363 | | - * @param string $fileName The file name (unused at present)
|
364 | | - * @param string $chunk The first 256 bytes of the file
|
365 | | - * @param string $proposed The MIME type proposed by the server
|
366 | | - *
|
367 | | - * @return array Map of IE version to detected mime type
|
368 | | - */
|
369 | | - public function getMimesFromData( $fileName, $chunk, $proposed ) {
|
370 | | - $types = array();
|
371 | | - foreach ( $this->versions as $version ) {
|
372 | | - $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
|
373 | | - }
|
374 | | - return $types;
|
375 | | - }
|
376 | | -
|
377 | | - /**
|
378 | | - * Get the MIME type for a given named version
|
379 | | - */
|
380 | | - protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
|
381 | | - // Strip text after a semicolon
|
382 | | - $semiPos = strpos( $proposed, ';' );
|
383 | | - if ( $semiPos !== false ) {
|
384 | | - $proposed = substr( $proposed, 0, $semiPos );
|
385 | | - }
|
386 | | -
|
387 | | - $proposedFormat = $this->getDataFormat( $version, $proposed );
|
388 | | - if ( $proposedFormat == 'unknown'
|
389 | | - && $proposed != 'multipart/mixed'
|
390 | | - && $proposed != 'multipart/x-mixed-replace' )
|
391 | | - {
|
392 | | - return $proposed;
|
393 | | - }
|
394 | | - if ( strval( $chunk ) === '' ) {
|
395 | | - return $proposed;
|
396 | | - }
|
397 | | -
|
398 | | - // Truncate chunk at 255 bytes
|
399 | | - $chunk = substr( $chunk, 0, 255 );
|
400 | | -
|
401 | | - // IE does the Check*Headers() calls last, and instead does the following image
|
402 | | - // type checks by directly looking for the magic numbers. What I do here should
|
403 | | - // have the same effect since the magic number checks are identical in both cases.
|
404 | | - $result = $this->sampleData( $version, $chunk );
|
405 | | - $sampleFound = $result['found'];
|
406 | | - $counters = $result['counters'];
|
407 | | - $binaryType = $this->checkBinaryHeaders( $version, $chunk );
|
408 | | - $textType = $this->checkTextHeaders( $version, $chunk );
|
409 | | -
|
410 | | - if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
|
411 | | - return 'text/html';
|
412 | | - }
|
413 | | - if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
|
414 | | - return 'image/gif';
|
415 | | - }
|
416 | | - if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
|
417 | | - && $binaryType == 'image/pjpeg' )
|
418 | | - {
|
419 | | - return $proposed;
|
420 | | - }
|
421 | | - // PNG check added in IE 7
|
422 | | - if ( $version >= 'ie07'
|
423 | | - && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
|
424 | | - && $binaryType == 'image/x-png' )
|
425 | | - {
|
426 | | - return $proposed;
|
427 | | - }
|
428 | | -
|
429 | | - // CDF was removed in IE 7 so it won't be in $sampleFound for later versions
|
430 | | - if ( isset( $sampleFound['cdf'] ) ) {
|
431 | | - return 'application/x-cdf';
|
432 | | - }
|
433 | | -
|
434 | | - // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
|
435 | | - // previous versions
|
436 | | - if ( isset( $sampleFound['rss'] ) ) {
|
437 | | - return 'application/rss+xml';
|
438 | | - }
|
439 | | - if ( isset( $sampleFound['rdf-tag'] )
|
440 | | - && isset( $sampleFound['rdf-url'] )
|
441 | | - && isset( $sampleFound['rdf-purl'] ) )
|
442 | | - {
|
443 | | - return 'application/rss+xml';
|
444 | | - }
|
445 | | - if ( isset( $sampleFound['atom'] ) ) {
|
446 | | - return 'application/atom+xml';
|
447 | | - }
|
448 | | -
|
449 | | - if ( isset( $sampleFound['xml'] ) ) {
|
450 | | - // TODO: I'm not sure under what circumstances this flag is enabled
|
451 | | - if ( strpos( $version, 'strict' ) !== false ) {
|
452 | | - if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
|
453 | | - return 'text/xml';
|
454 | | - }
|
455 | | - } else {
|
456 | | - return 'text/xml';
|
457 | | - }
|
458 | | - }
|
459 | | - if ( isset( $sampleFound['html'] ) ) {
|
460 | | - // TODO: I'm not sure under what circumstances this flag is enabled
|
461 | | - if ( strpos( $version, 'nohtml' ) !== false ) {
|
462 | | - if ( $proposed == 'text/plain' ) {
|
463 | | - return 'text/html';
|
464 | | - }
|
465 | | - } else {
|
466 | | - return 'text/html';
|
467 | | - }
|
468 | | - }
|
469 | | - if ( isset( $sampleFound['xbm'] ) ) {
|
470 | | - return 'image/x-bitmap';
|
471 | | - }
|
472 | | - if ( isset( $sampleFound['binhex'] ) ) {
|
473 | | - return 'application/macbinhex40';
|
474 | | - }
|
475 | | - if ( isset( $sampleFound['scriptlet'] ) ) {
|
476 | | - if ( strpos( $version, 'strict' ) !== false ) {
|
477 | | - if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
|
478 | | - return 'text/scriptlet';
|
479 | | - }
|
480 | | - } else {
|
481 | | - return 'text/scriptlet';
|
482 | | - }
|
483 | | - }
|
484 | | -
|
485 | | - // Freaky heuristics to determine if the data is text or binary
|
486 | | - // The heuristic is of course broken for non-ASCII text
|
487 | | - if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
|
488 | | - < ( $counters['ctrl'] + $counters['high'] ) * 16 )
|
489 | | - {
|
490 | | - $kindOfBinary = true;
|
491 | | - $type = $binaryType ? $binaryType : $textType;
|
492 | | - if ( $type === false ) {
|
493 | | - $type = 'application/octet-stream';
|
494 | | - }
|
495 | | - } else {
|
496 | | - $kindOfBinary = false;
|
497 | | - $type = $textType ? $textType : $binaryType;
|
498 | | - if ( $type === false ) {
|
499 | | - $type = 'text/plain';
|
500 | | - }
|
501 | | - }
|
502 | | -
|
503 | | - // Check if the output format is ambiguous
|
504 | | - // This generally means that detection failed, real types aren't ambiguous
|
505 | | - $detectedFormat = $this->getDataFormat( $version, $type );
|
506 | | - if ( $detectedFormat != 'ambiguous' ) {
|
507 | | - return $type;
|
508 | | - }
|
509 | | -
|
510 | | - if ( $proposedFormat != 'ambiguous' ) {
|
511 | | - // FormatAgreesWithData()
|
512 | | - if ( $proposedFormat == 'text' && !$kindOfBinary ) {
|
513 | | - return $proposed;
|
514 | | - }
|
515 | | - if ( $proposedFormat == 'binary' && $kindOfBinary ) {
|
516 | | - return $proposed;
|
517 | | - }
|
518 | | - if ( $proposedFormat == 'html' ) {
|
519 | | - return $proposed;
|
520 | | - }
|
521 | | - }
|
522 | | -
|
523 | | - // Find a MIME type by searching the registry for the file extension.
|
524 | | - $dotPos = strrpos( $fileName, '.' );
|
525 | | - if ( $dotPos === false ) {
|
526 | | - return $type;
|
527 | | - }
|
528 | | - $ext = substr( $fileName, $dotPos );
|
529 | | - if ( isset( $this->registry[$ext] ) ) {
|
530 | | - return $this->registry[$ext];
|
531 | | - }
|
532 | | -
|
533 | | - // TODO: If the extension has an application registered to it, IE will return
|
534 | | - // application/octet-stream. We'll skip that, so we could erroneously
|
535 | | - // return text/plain or application/x-netcdf where application/octet-stream
|
536 | | - // would be correct.
|
537 | | -
|
538 | | - return $type;
|
539 | | - }
|
540 | | -
|
541 | | - /**
|
542 | | - * Check for text headers at the start of the chunk
|
543 | | - * Confirmed same in 5 and 7.
|
544 | | - */
|
545 | | - private function checkTextHeaders( $version, $chunk ) {
|
546 | | - $chunk2 = substr( $chunk, 0, 2 );
|
547 | | - $chunk4 = substr( $chunk, 0, 4 );
|
548 | | - $chunk5 = substr( $chunk, 0, 5 );
|
549 | | - if ( $chunk4 == '%PDF' ) {
|
550 | | - return 'application/pdf';
|
551 | | - }
|
552 | | - if ( $chunk2 == '%!' ) {
|
553 | | - return 'application/postscript';
|
554 | | - }
|
555 | | - if ( $chunk5 == '{\\rtf' ) {
|
556 | | - return 'text/richtext';
|
557 | | - }
|
558 | | - if ( $chunk5 == 'begin' ) {
|
559 | | - return 'application/base64';
|
560 | | - }
|
561 | | - return false;
|
562 | | - }
|
563 | | -
|
564 | | - /**
|
565 | | - * Check for binary headers at the start of the chunk
|
566 | | - * Confirmed same in 5 and 7.
|
567 | | - */
|
568 | | - private function checkBinaryHeaders( $version, $chunk ) {
|
569 | | - $chunk2 = substr( $chunk, 0, 2 );
|
570 | | - $chunk3 = substr( $chunk, 0, 3 );
|
571 | | - $chunk4 = substr( $chunk, 0, 4 );
|
572 | | - $chunk5 = substr( $chunk, 0, 5 );
|
573 | | - $chunk8 = substr( $chunk, 0, 8 );
|
574 | | - if ( $chunk5 == 'GIF87' || $chunk5 == 'GIF89' ) {
|
575 | | - return 'image/gif';
|
576 | | - }
|
577 | | - if ( $chunk2 == "\xff\xd8" ) {
|
578 | | - return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
|
579 | | - }
|
580 | | -
|
581 | | - if ( $chunk2 == 'BM'
|
582 | | - && substr( $chunk, 6, 2 ) == "\000\000"
|
583 | | - && substr( $chunk, 8, 2 ) != "\000\000" )
|
584 | | - {
|
585 | | - return 'image/bmp'; // another non-standard MIME
|
586 | | - }
|
587 | | - if ( $chunk4 == 'RIFF'
|
588 | | - && substr( $chunk, 8, 4 ) == 'WAVE' )
|
589 | | - {
|
590 | | - return 'audio/wav';
|
591 | | - }
|
592 | | - // These were integer literals in IE
|
593 | | - // Perhaps the author was not sure what the target endianness was
|
594 | | - if ( $chunk4 == ".sd\000"
|
595 | | - || $chunk4 == ".snd"
|
596 | | - || $chunk4 == "\000ds."
|
597 | | - || $chunk4 == "dns." )
|
598 | | - {
|
599 | | - return 'audio/basic';
|
600 | | - }
|
601 | | - if ( $chunk3 == "MM\000" ) {
|
602 | | - return 'image/tiff';
|
603 | | - }
|
604 | | - if ( $chunk2 == 'MZ' ) {
|
605 | | - return 'application/x-msdownload';
|
606 | | - }
|
607 | | - if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
|
608 | | - return 'image/x-png'; // [sic]
|
609 | | - }
|
610 | | - if ( strlen( $chunk ) >= 5 ) {
|
611 | | - $byte2 = ord( $chunk[2] );
|
612 | | - $byte4 = ord( $chunk[4] );
|
613 | | - if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
|
614 | | - return 'image/x-jg';
|
615 | | - }
|
616 | | - }
|
617 | | - // More endian confusion?
|
618 | | - if ( $chunk4 == 'MROF' ) {
|
619 | | - return 'audio/x-aiff';
|
620 | | - }
|
621 | | - $chunk4_8 = substr( $chunk, 8, 4 );
|
622 | | - if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
|
623 | | - return 'audio/x-aiff';
|
624 | | - }
|
625 | | - if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
|
626 | | - return 'video/avi';
|
627 | | - }
|
628 | | - if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
|
629 | | - return 'video/mpeg';
|
630 | | - }
|
631 | | - if ( $chunk4 == "\001\000\000\000"
|
632 | | - && substr( $chunk, 40, 4 ) == ' EMF' )
|
633 | | - {
|
634 | | - return 'image/x-emf';
|
635 | | - }
|
636 | | - if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
|
637 | | - return 'image/x-wmf';
|
638 | | - }
|
639 | | - if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
|
640 | | - return 'application/java';
|
641 | | - }
|
642 | | - if ( $chunk2 == 'PK' ) {
|
643 | | - return 'application/x-zip-compressed';
|
644 | | - }
|
645 | | - if ( $chunk2 == "\x1f\x9d" ) {
|
646 | | - return 'application/x-compressed';
|
647 | | - }
|
648 | | - if ( $chunk2 == "\x1f\x8b" ) {
|
649 | | - return 'application/x-gzip-compressed';
|
650 | | - }
|
651 | | - // Skip redundant check for ZIP
|
652 | | - if ( $chunk5 == "MThd\000" ) {
|
653 | | - return 'audio/mid';
|
654 | | - }
|
655 | | - if ( $chunk4 == '%PDF' ) {
|
656 | | - return 'application/pdf';
|
657 | | - }
|
658 | | - return false;
|
659 | | - }
|
660 | | -
|
661 | | - /**
|
662 | | - * Do heuristic checks on the bulk of the data sample.
|
663 | | - * Search for HTML tags.
|
664 | | - */
|
665 | | - protected function sampleData( $version, $chunk ) {
|
666 | | - $found = array();
|
667 | | - $counters = array(
|
668 | | - 'ctrl' => 0,
|
669 | | - 'high' => 0,
|
670 | | - 'low' => 0,
|
671 | | - 'lf' => 0,
|
672 | | - 'cr' => 0,
|
673 | | - 'ff' => 0
|
674 | | - );
|
675 | | - $htmlTags = array(
|
676 | | - 'html',
|
677 | | - 'head',
|
678 | | - 'title',
|
679 | | - 'body',
|
680 | | - 'script',
|
681 | | - 'a href',
|
682 | | - 'pre',
|
683 | | - 'img',
|
684 | | - 'plaintext',
|
685 | | - 'table'
|
686 | | - );
|
687 | | - $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
|
688 | | - $rdfPurl = 'http://purl.org/rss/1.0/';
|
689 | | - $xbmMagic1 = '#define';
|
690 | | - $xbmMagic2 = '_width';
|
691 | | - $xbmMagic3 = '_bits';
|
692 | | - $binhexMagic = 'converted with BinHex';
|
693 | | -
|
694 | | - for ( $offset = 0; $offset < strlen( $chunk ); $offset++ ) {
|
695 | | - $curChar = $chunk[$offset];
|
696 | | - if ( $curChar == "\x0a" ) {
|
697 | | - $counters['lf']++;
|
698 | | - continue;
|
699 | | - } elseif ( $curChar == "\x0d" ) {
|
700 | | - $counters['cr']++;
|
701 | | - continue;
|
702 | | - } elseif ( $curChar == "\x0c" ) {
|
703 | | - $counters['ff']++;
|
704 | | - continue;
|
705 | | - } elseif ( $curChar == "\t" ) {
|
706 | | - $counters['low']++;
|
707 | | - continue;
|
708 | | - } elseif ( ord( $curChar ) < 32 ) {
|
709 | | - $counters['ctrl']++;
|
710 | | - continue;
|
711 | | - } elseif ( ord( $curChar ) >= 128 ) {
|
712 | | - $counters['high']++;
|
713 | | - continue;
|
714 | | - }
|
715 | | -
|
716 | | - $counters['low']++;
|
717 | | - if ( $curChar == '<' ) {
|
718 | | - // XML
|
719 | | - $remainder = substr( $chunk, $offset + 1 );
|
720 | | - if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
|
721 | | - $nextChar = substr( $chunk, $offset + 5, 1 );
|
722 | | - if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
|
723 | | - $found['xml'] = true;
|
724 | | - }
|
725 | | - }
|
726 | | - // Scriptlet (JSP)
|
727 | | - if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
|
728 | | - $found['scriptlet'] = true;
|
729 | | - break;
|
730 | | - }
|
731 | | - // HTML
|
732 | | - foreach ( $htmlTags as $tag ) {
|
733 | | - if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
|
734 | | - $found['html'] = true;
|
735 | | - }
|
736 | | - }
|
737 | | - // Skip broken check for additional tags (HR etc.)
|
738 | | -
|
739 | | - // CHANNEL replaced by RSS, RDF and FEED in IE 7
|
740 | | - if ( $version < 'ie07' ) {
|
741 | | - if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
|
742 | | - $found['cdf'] = true;
|
743 | | - }
|
744 | | - } else {
|
745 | | - // RSS
|
746 | | - if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
|
747 | | - $found['rss'] = true;
|
748 | | - break; // return from SampleData
|
749 | | - }
|
750 | | - if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
|
751 | | - $found['rdf-tag'] = true;
|
752 | | - // no break
|
753 | | - }
|
754 | | - if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
|
755 | | - $found['atom'] = true;
|
756 | | - break;
|
757 | | - }
|
758 | | - }
|
759 | | - continue;
|
760 | | - }
|
761 | | - // Skip broken check for -->
|
762 | | -
|
763 | | - // RSS URL checks
|
764 | | - // For some reason both URLs must appear before it is recognised
|
765 | | - $remainder = substr( $chunk, $offset );
|
766 | | - if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
|
767 | | - $found['rdf-url'] = true;
|
768 | | - if ( isset( $found['rdf-tag'] )
|
769 | | - && isset( $found['rdf-purl'] ) ) // [sic]
|
770 | | - {
|
771 | | - break;
|
772 | | - }
|
773 | | - continue;
|
774 | | - }
|
775 | | -
|
776 | | - if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
|
777 | | - if ( isset( $found['rdf-tag'] )
|
778 | | - && isset( $found['rdf-url'] ) ) // [sic]
|
779 | | - {
|
780 | | - break;
|
781 | | - }
|
782 | | - continue;
|
783 | | - }
|
784 | | -
|
785 | | - // XBM checks
|
786 | | - if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
|
787 | | - $found['xbm1'] = true;
|
788 | | - continue;
|
789 | | - }
|
790 | | - if ( $curChar == '_' ) {
|
791 | | - if ( isset( $found['xbm2'] ) ) {
|
792 | | - if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
|
793 | | - $found['xbm'] = true;
|
794 | | - break;
|
795 | | - }
|
796 | | - } elseif ( isset( $found['xbm1'] ) ) {
|
797 | | - if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
|
798 | | - $found['xbm2'] = true;
|
799 | | - }
|
800 | | - }
|
801 | | - }
|
802 | | -
|
803 | | - // BinHex
|
804 | | - if ( !strncasecmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
|
805 | | - $found['binhex'] = true;
|
806 | | - }
|
807 | | - }
|
808 | | - return array( 'found' => $found, 'counters' => $counters );
|
809 | | - }
|
810 | | -
|
811 | | - protected function getDataFormat( $version, $type ) {
|
812 | | - $types = $this->typeTable[$version];
|
813 | | - if ( $type == '(null)' || strval( $type ) === '' ) {
|
814 | | - return 'ambiguous';
|
815 | | - }
|
816 | | - foreach ( $types as $format => $list ) {
|
817 | | - if ( in_array( $type, $list ) ) {
|
818 | | - return $format;
|
819 | | - }
|
820 | | - }
|
821 | | - return 'unknown';
|
822 | | - }
|
823 | | -}
|
824 | | -
|
| 2 | +<?php |
| 3 | + |
| 4 | +/** |
| 5 | + * This class simulates Microsoft Internet Explorer's terribly broken and |
| 6 | + * insecure MIME type detection algorithm. It can be used to check web uploads |
| 7 | + * with an apparently safe type, to see if IE will reinterpret them to produce |
| 8 | + * something dangerous. |
| 9 | + * |
| 10 | + * It is full of bugs and strange design choices should not under any |
| 11 | + * circumstances be used to determine a MIME type to present to a user or |
| 12 | + * client. (Apple Safari developers, this means you too.) |
| 13 | + * |
| 14 | + * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have |
| 15 | + * attempted to ensure that this code works in exactly the same way as Internet |
| 16 | + * Explorer, it does not share any source code, or creative choices such as |
| 17 | + * variable names, thus I (Tim Starling) claim copyright on it. |
| 18 | + * |
| 19 | + * It may be redistributed without restriction. To aid reuse, this class does |
| 20 | + * not depend on any MediaWiki module. |
| 21 | + */ |
| 22 | +class IEContentAnalyzer { |
| 23 | + /** |
| 24 | + * Relevant data taken from the type table in IE 5 |
| 25 | + */ |
| 26 | + protected $baseTypeTable = array( |
| 27 | + 'ambiguous' /*1*/ => array( |
| 28 | + 'text/plain', |
| 29 | + 'application/octet-stream', |
| 30 | + 'application/x-netcdf', // [sic] |
| 31 | + ), |
| 32 | + 'text' /*3*/ => array( |
| 33 | + 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64', |
| 34 | + 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet' |
| 35 | + ), |
| 36 | + 'binary' /*4*/ => array( |
| 37 | + 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif', |
| 38 | + 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp', |
| 39 | + 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi', |
| 40 | + 'video/x-msvideo', 'video/mpeg', 'application/x-compressed', |
| 41 | + 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java', |
| 42 | + 'application/x-msdownload' |
| 43 | + ), |
| 44 | + 'html' /*5*/ => array( 'text/html' ), |
| 45 | + ); |
| 46 | + |
| 47 | + /** |
| 48 | + * Changes to the type table in later versions of IE |
| 49 | + */ |
| 50 | + protected $addedTypes = array( |
| 51 | + 'ie07' => array( |
| 52 | + 'text' => array( 'text/xml', 'application/xml' ) |
| 53 | + ), |
| 54 | + ); |
| 55 | + |
| 56 | + /** |
| 57 | + * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a |
| 58 | + * typical Windows installation. |
| 59 | + * |
| 60 | + * Used for extension to MIME type mapping if detection fails. |
| 61 | + */ |
| 62 | + protected $registry = array( |
| 63 | + '.323' => 'text/h323', |
| 64 | + '.3g2' => 'video/3gpp2', |
| 65 | + '.3gp' => 'video/3gpp', |
| 66 | + '.3gp2' => 'video/3gpp2', |
| 67 | + '.3gpp' => 'video/3gpp', |
| 68 | + '.aac' => 'audio/aac', |
| 69 | + '.ac3' => 'audio/ac3', |
| 70 | + '.accda' => 'application/msaccess', |
| 71 | + '.accdb' => 'application/msaccess', |
| 72 | + '.accdc' => 'application/msaccess', |
| 73 | + '.accde' => 'application/msaccess', |
| 74 | + '.accdr' => 'application/msaccess', |
| 75 | + '.accdt' => 'application/msaccess', |
| 76 | + '.ade' => 'application/msaccess', |
| 77 | + '.adp' => 'application/msaccess', |
| 78 | + '.adts' => 'audio/aac', |
| 79 | + '.ai' => 'application/postscript', |
| 80 | + '.aif' => 'audio/aiff', |
| 81 | + '.aifc' => 'audio/aiff', |
| 82 | + '.aiff' => 'audio/aiff', |
| 83 | + '.amc' => 'application/x-mpeg', |
| 84 | + '.application' => 'application/x-ms-application', |
| 85 | + '.asf' => 'video/x-ms-asf', |
| 86 | + '.asx' => 'video/x-ms-asf', |
| 87 | + '.au' => 'audio/basic', |
| 88 | + '.avi' => 'video/avi', |
| 89 | + '.bmp' => 'image/bmp', |
| 90 | + '.caf' => 'audio/x-caf', |
| 91 | + '.cat' => 'application/vnd.ms-pki.seccat', |
| 92 | + '.cbo' => 'application/sha', |
| 93 | + '.cdda' => 'audio/aiff', |
| 94 | + '.cer' => 'application/x-x509-ca-cert', |
| 95 | + '.conf' => 'text/plain', |
| 96 | + '.crl' => 'application/pkix-crl', |
| 97 | + '.crt' => 'application/x-x509-ca-cert', |
| 98 | + '.css' => 'text/css', |
| 99 | + '.csv' => 'application/vnd.ms-excel', |
| 100 | + '.der' => 'application/x-x509-ca-cert', |
| 101 | + '.dib' => 'image/bmp', |
| 102 | + '.dif' => 'video/x-dv', |
| 103 | + '.dll' => 'application/x-msdownload', |
| 104 | + '.doc' => 'application/msword', |
| 105 | + '.docm' => 'application/vnd.ms-word.document.macroEnabled.12', |
| 106 | + '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', |
| 107 | + '.dot' => 'application/msword', |
| 108 | + '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12', |
| 109 | + '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template', |
| 110 | + '.dv' => 'video/x-dv', |
| 111 | + '.dwfx' => 'model/vnd.dwfx+xps', |
| 112 | + '.edn' => 'application/vnd.adobe.edn', |
| 113 | + '.eml' => 'message/rfc822', |
| 114 | + '.eps' => 'application/postscript', |
| 115 | + '.etd' => 'application/x-ebx', |
| 116 | + '.exe' => 'application/x-msdownload', |
| 117 | + '.fdf' => 'application/vnd.fdf', |
| 118 | + '.fif' => 'application/fractals', |
| 119 | + '.gif' => 'image/gif', |
| 120 | + '.gsm' => 'audio/x-gsm', |
| 121 | + '.hqx' => 'application/mac-binhex40', |
| 122 | + '.hta' => 'application/hta', |
| 123 | + '.htc' => 'text/x-component', |
| 124 | + '.htm' => 'text/html', |
| 125 | + '.html' => 'text/html', |
| 126 | + '.htt' => 'text/webviewhtml', |
| 127 | + '.hxa' => 'application/xml', |
| 128 | + '.hxc' => 'application/xml', |
| 129 | + '.hxd' => 'application/octet-stream', |
| 130 | + '.hxe' => 'application/xml', |
| 131 | + '.hxf' => 'application/xml', |
| 132 | + '.hxh' => 'application/octet-stream', |
| 133 | + '.hxi' => 'application/octet-stream', |
| 134 | + '.hxk' => 'application/xml', |
| 135 | + '.hxq' => 'application/octet-stream', |
| 136 | + '.hxr' => 'application/octet-stream', |
| 137 | + '.hxs' => 'application/octet-stream', |
| 138 | + '.hxt' => 'application/xml', |
| 139 | + '.hxv' => 'application/xml', |
| 140 | + '.hxw' => 'application/octet-stream', |
| 141 | + '.ico' => 'image/x-icon', |
| 142 | + '.iii' => 'application/x-iphone', |
| 143 | + '.ins' => 'application/x-internet-signup', |
| 144 | + '.iqy' => 'text/x-ms-iqy', |
| 145 | + '.isp' => 'application/x-internet-signup', |
| 146 | + '.jfif' => 'image/jpeg', |
| 147 | + '.jnlp' => 'application/x-java-jnlp-file', |
| 148 | + '.jpe' => 'image/jpeg', |
| 149 | + '.jpeg' => 'image/jpeg', |
| 150 | + '.jpg' => 'image/jpeg', |
| 151 | + '.jtx' => 'application/x-jtx+xps', |
| 152 | + '.latex' => 'application/x-latex', |
| 153 | + '.log' => 'text/plain', |
| 154 | + '.m1v' => 'video/mpeg', |
| 155 | + '.m2v' => 'video/mpeg', |
| 156 | + '.m3u' => 'audio/x-mpegurl', |
| 157 | + '.mac' => 'image/x-macpaint', |
| 158 | + '.man' => 'application/x-troff-man', |
| 159 | + '.mda' => 'application/msaccess', |
| 160 | + '.mdb' => 'application/msaccess', |
| 161 | + '.mde' => 'application/msaccess', |
| 162 | + '.mfp' => 'application/x-shockwave-flash', |
| 163 | + '.mht' => 'message/rfc822', |
| 164 | + '.mhtml' => 'message/rfc822', |
| 165 | + '.mid' => 'audio/mid', |
| 166 | + '.midi' => 'audio/mid', |
| 167 | + '.mod' => 'video/mpeg', |
| 168 | + '.mov' => 'video/quicktime', |
| 169 | + '.mp2' => 'video/mpeg', |
| 170 | + '.mp2v' => 'video/mpeg', |
| 171 | + '.mp3' => 'audio/mpeg', |
| 172 | + '.mp4' => 'video/mp4', |
| 173 | + '.mpa' => 'video/mpeg', |
| 174 | + '.mpe' => 'video/mpeg', |
| 175 | + '.mpeg' => 'video/mpeg', |
| 176 | + '.mpf' => 'application/vnd.ms-mediapackage', |
| 177 | + '.mpg' => 'video/mpeg', |
| 178 | + '.mpv2' => 'video/mpeg', |
| 179 | + '.mqv' => 'video/quicktime', |
| 180 | + '.NMW' => 'application/nmwb', |
| 181 | + '.nws' => 'message/rfc822', |
| 182 | + '.odc' => 'text/x-ms-odc', |
| 183 | + '.ols' => 'application/vnd.ms-publisher', |
| 184 | + '.p10' => 'application/pkcs10', |
| 185 | + '.p12' => 'application/x-pkcs12', |
| 186 | + '.p7b' => 'application/x-pkcs7-certificates', |
| 187 | + '.p7c' => 'application/pkcs7-mime', |
| 188 | + '.p7m' => 'application/pkcs7-mime', |
| 189 | + '.p7r' => 'application/x-pkcs7-certreqresp', |
| 190 | + '.p7s' => 'application/pkcs7-signature', |
| 191 | + '.pct' => 'image/pict', |
| 192 | + '.pdf' => 'application/pdf', |
| 193 | + '.pdx' => 'application/vnd.adobe.pdx', |
| 194 | + '.pfx' => 'application/x-pkcs12', |
| 195 | + '.pic' => 'image/pict', |
| 196 | + '.pict' => 'image/pict', |
| 197 | + '.pinstall' => 'application/x-picasa-detect', |
| 198 | + '.pko' => 'application/vnd.ms-pki.pko', |
| 199 | + '.png' => 'image/png', |
| 200 | + '.pnt' => 'image/x-macpaint', |
| 201 | + '.pntg' => 'image/x-macpaint', |
| 202 | + '.pot' => 'application/vnd.ms-powerpoint', |
| 203 | + '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12', |
| 204 | + '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template', |
| 205 | + '.ppa' => 'application/vnd.ms-powerpoint', |
| 206 | + '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12', |
| 207 | + '.pps' => 'application/vnd.ms-powerpoint', |
| 208 | + '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12', |
| 209 | + '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow', |
| 210 | + '.ppt' => 'application/vnd.ms-powerpoint', |
| 211 | + '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12', |
| 212 | + '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation', |
| 213 | + '.prf' => 'application/pics-rules', |
| 214 | + '.ps' => 'application/postscript', |
| 215 | + '.pub' => 'application/vnd.ms-publisher', |
| 216 | + '.pwz' => 'application/vnd.ms-powerpoint', |
| 217 | + '.py' => 'text/plain', |
| 218 | + '.pyw' => 'text/plain', |
| 219 | + '.qht' => 'text/x-html-insertion', |
| 220 | + '.qhtm' => 'text/x-html-insertion', |
| 221 | + '.qt' => 'video/quicktime', |
| 222 | + '.qti' => 'image/x-quicktime', |
| 223 | + '.qtif' => 'image/x-quicktime', |
| 224 | + '.qtl' => 'application/x-quicktimeplayer', |
| 225 | + '.rat' => 'application/rat-file', |
| 226 | + '.rmf' => 'application/vnd.adobe.rmf', |
| 227 | + '.rmi' => 'audio/mid', |
| 228 | + '.rqy' => 'text/x-ms-rqy', |
| 229 | + '.rtf' => 'application/msword', |
| 230 | + '.sct' => 'text/scriptlet', |
| 231 | + '.sd2' => 'audio/x-sd2', |
| 232 | + '.sdp' => 'application/sdp', |
| 233 | + '.shtml' => 'text/html', |
| 234 | + '.sit' => 'application/x-stuffit', |
| 235 | + '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12', |
| 236 | + '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide', |
| 237 | + '.slk' => 'application/vnd.ms-excel', |
| 238 | + '.snd' => 'audio/basic', |
| 239 | + '.so' => 'application/x-apachemodule', |
| 240 | + '.sol' => 'text/plain', |
| 241 | + '.sor' => 'text/plain', |
| 242 | + '.spc' => 'application/x-pkcs7-certificates', |
| 243 | + '.spl' => 'application/futuresplash', |
| 244 | + '.sst' => 'application/vnd.ms-pki.certstore', |
| 245 | + '.stl' => 'application/vnd.ms-pki.stl', |
| 246 | + '.swf' => 'application/x-shockwave-flash', |
| 247 | + '.thmx' => 'application/vnd.ms-officetheme', |
| 248 | + '.tif' => 'image/tiff', |
| 249 | + '.tiff' => 'image/tiff', |
| 250 | + '.txt' => 'text/plain', |
| 251 | + '.uls' => 'text/iuls', |
| 252 | + '.vcf' => 'text/x-vcard', |
| 253 | + '.vdx' => 'application/vnd.ms-visio.viewer', |
| 254 | + '.vsd' => 'application/vnd.ms-visio.viewer', |
| 255 | + '.vss' => 'application/vnd.ms-visio.viewer', |
| 256 | + '.vst' => 'application/vnd.ms-visio.viewer', |
| 257 | + '.vsx' => 'application/vnd.ms-visio.viewer', |
| 258 | + '.vtx' => 'application/vnd.ms-visio.viewer', |
| 259 | + '.wav' => 'audio/wav', |
| 260 | + '.wax' => 'audio/x-ms-wax', |
| 261 | + '.wbk' => 'application/msword', |
| 262 | + '.wdp' => 'image/vnd.ms-photo', |
| 263 | + '.wiz' => 'application/msword', |
| 264 | + '.wm' => 'video/x-ms-wm', |
| 265 | + '.wma' => 'audio/x-ms-wma', |
| 266 | + '.wmd' => 'application/x-ms-wmd', |
| 267 | + '.wmv' => 'video/x-ms-wmv', |
| 268 | + '.wmx' => 'video/x-ms-wmx', |
| 269 | + '.wmz' => 'application/x-ms-wmz', |
| 270 | + '.wpl' => 'application/vnd.ms-wpl', |
| 271 | + '.wsc' => 'text/scriptlet', |
| 272 | + '.wvx' => 'video/x-ms-wvx', |
| 273 | + '.xaml' => 'application/xaml+xml', |
| 274 | + '.xbap' => 'application/x-ms-xbap', |
| 275 | + '.xdp' => 'application/vnd.adobe.xdp+xml', |
| 276 | + '.xfdf' => 'application/vnd.adobe.xfdf', |
| 277 | + '.xht' => 'application/xhtml+xml', |
| 278 | + '.xhtml' => 'application/xhtml+xml', |
| 279 | + '.xla' => 'application/vnd.ms-excel', |
| 280 | + '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12', |
| 281 | + '.xlk' => 'application/vnd.ms-excel', |
| 282 | + '.xll' => 'application/vnd.ms-excel', |
| 283 | + '.xlm' => 'application/vnd.ms-excel', |
| 284 | + '.xls' => 'application/vnd.ms-excel', |
| 285 | + '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12', |
| 286 | + '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12', |
| 287 | + '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', |
| 288 | + '.xlt' => 'application/vnd.ms-excel', |
| 289 | + '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12', |
| 290 | + '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', |
| 291 | + '.xlw' => 'application/vnd.ms-excel', |
| 292 | + '.xml' => 'text/xml', |
| 293 | + '.xps' => 'application/vnd.ms-xpsdocument', |
| 294 | + '.xsl' => 'text/xml', |
| 295 | + ); |
| 296 | + |
| 297 | + /** |
| 298 | + * IE versions which have been analysed to bring you this class, and for |
| 299 | + * which some substantive difference exists. These will appear as keys |
| 300 | + * in the return value of getRealMimesFromData(). The names are chosen to sort correctly. |
| 301 | + */ |
| 302 | + protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ); |
| 303 | + |
| 304 | + /** |
| 305 | + * Type table with versions expanded |
| 306 | + */ |
| 307 | + protected $typeTable = array(); |
| 308 | + |
| 309 | + /** constructor */ |
| 310 | + function __construct() { |
| 311 | + // Construct versioned type arrays from the base type array plus additions |
| 312 | + $types = $this->baseTypeTable; |
| 313 | + foreach ( $this->versions as $version ) { |
| 314 | + if ( isset( $this->addedTypes[$version] ) ) { |
| 315 | + foreach ( $this->addedTypes[$version] as $format => $addedTypes ) { |
| 316 | + $types[$format] = array_merge( $types[$format], $addedTypes ); |
| 317 | + } |
| 318 | + } |
| 319 | + $this->typeTable[$version] = $types; |
| 320 | + } |
| 321 | + } |
| 322 | + |
| 323 | + /** |
| 324 | + * Get the MIME types from getMimesFromData(), but convert the result from IE's |
| 325 | + * idiosyncratic private types into something other apps will understand. |
| 326 | + * |
| 327 | + * @param string $fileName The file name (unused at present) |
| 328 | + * @param string $chunk The first 256 bytes of the file |
| 329 | + * @param string $proposed The MIME type proposed by the server |
| 330 | + * |
| 331 | + * @return array Map of IE version to detected mime type |
| 332 | + */ |
| 333 | + public function getRealMimesFromData( $fileName, $chunk, $proposed ) { |
| 334 | + $types = $this->getMimesFromData( $fileName, $chunk, $proposed ); |
| 335 | + $types = array_map( array( $this, 'translateMimeType' ), $types ); |
| 336 | + return $types; |
| 337 | + } |
| 338 | + |
| 339 | + /** |
| 340 | + * Translate a MIME type from IE's idiosyncratic private types into |
| 341 | + * more commonly understood type strings |
| 342 | + */ |
| 343 | + public function translateMimeType( $type ) { |
| 344 | + static $table = array( |
| 345 | + 'image/pjpeg' => 'image/jpeg', |
| 346 | + 'image/x-png' => 'image/png', |
| 347 | + 'image/x-wmf' => 'application/x-msmetafile', |
| 348 | + 'image/bmp' => 'image/x-bmp', |
| 349 | + 'application/x-zip-compressed' => 'application/zip', |
| 350 | + 'application/x-compressed' => 'application/x-compress', |
| 351 | + 'application/x-gzip-compressed' => 'application/x-gzip', |
| 352 | + 'audio/mid' => 'audio/midi', |
| 353 | + ); |
| 354 | + if ( isset( $table[$type] ) ) { |
| 355 | + $type = $table[$type]; |
| 356 | + } |
| 357 | + return $type; |
| 358 | + } |
| 359 | + |
| 360 | + /** |
| 361 | + * Get the untranslated MIME types for all known versions |
| 362 | + * |
| 363 | + * @param string $fileName The file name (unused at present) |
| 364 | + * @param string $chunk The first 256 bytes of the file |
| 365 | + * @param string $proposed The MIME type proposed by the server |
| 366 | + * |
| 367 | + * @return array Map of IE version to detected mime type |
| 368 | + */ |
| 369 | + public function getMimesFromData( $fileName, $chunk, $proposed ) { |
| 370 | + $types = array(); |
| 371 | + foreach ( $this->versions as $version ) { |
| 372 | + $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ); |
| 373 | + } |
| 374 | + return $types; |
| 375 | + } |
| 376 | + |
| 377 | + /** |
| 378 | + * Get the MIME type for a given named version |
| 379 | + */ |
| 380 | + protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) { |
| 381 | + // Strip text after a semicolon |
| 382 | + $semiPos = strpos( $proposed, ';' ); |
| 383 | + if ( $semiPos !== false ) { |
| 384 | + $proposed = substr( $proposed, 0, $semiPos ); |
| 385 | + } |
| 386 | + |
| 387 | + $proposedFormat = $this->getDataFormat( $version, $proposed ); |
| 388 | + if ( $proposedFormat == 'unknown' |
| 389 | + && $proposed != 'multipart/mixed' |
| 390 | + && $proposed != 'multipart/x-mixed-replace' ) |
| 391 | + { |
| 392 | + return $proposed; |
| 393 | + } |
| 394 | + if ( strval( $chunk ) === '' ) { |
| 395 | + return $proposed; |
| 396 | + } |
| 397 | + |
| 398 | + // Truncate chunk at 255 bytes |
| 399 | + $chunk = substr( $chunk, 0, 255 ); |
| 400 | + |
| 401 | + // IE does the Check*Headers() calls last, and instead does the following image |
| 402 | + // type checks by directly looking for the magic numbers. What I do here should |
| 403 | + // have the same effect since the magic number checks are identical in both cases. |
| 404 | + $result = $this->sampleData( $version, $chunk ); |
| 405 | + $sampleFound = $result['found']; |
| 406 | + $counters = $result['counters']; |
| 407 | + $binaryType = $this->checkBinaryHeaders( $version, $chunk ); |
| 408 | + $textType = $this->checkTextHeaders( $version, $chunk ); |
| 409 | + |
| 410 | + if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) { |
| 411 | + return 'text/html'; |
| 412 | + } |
| 413 | + if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) { |
| 414 | + return 'image/gif'; |
| 415 | + } |
| 416 | + if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' ) |
| 417 | + && $binaryType == 'image/pjpeg' ) |
| 418 | + { |
| 419 | + return $proposed; |
| 420 | + } |
| 421 | + // PNG check added in IE 7 |
| 422 | + if ( $version >= 'ie07' |
| 423 | + && ( $proposed == 'image/x-png' || $proposed == 'image/png' ) |
| 424 | + && $binaryType == 'image/x-png' ) |
| 425 | + { |
| 426 | + return $proposed; |
| 427 | + } |
| 428 | + |
| 429 | + // CDF was removed in IE 7 so it won't be in $sampleFound for later versions |
| 430 | + if ( isset( $sampleFound['cdf'] ) ) { |
| 431 | + return 'application/x-cdf'; |
| 432 | + } |
| 433 | + |
| 434 | + // RSS and Atom were added in IE 7 so they won't be in $sampleFound for |
| 435 | + // previous versions |
| 436 | + if ( isset( $sampleFound['rss'] ) ) { |
| 437 | + return 'application/rss+xml'; |
| 438 | + } |
| 439 | + if ( isset( $sampleFound['rdf-tag'] ) |
| 440 | + && isset( $sampleFound['rdf-url'] ) |
| 441 | + && isset( $sampleFound['rdf-purl'] ) ) |
| 442 | + { |
| 443 | + return 'application/rss+xml'; |
| 444 | + } |
| 445 | + if ( isset( $sampleFound['atom'] ) ) { |
| 446 | + return 'application/atom+xml'; |
| 447 | + } |
| 448 | + |
| 449 | + if ( isset( $sampleFound['xml'] ) ) { |
| 450 | + // TODO: I'm not sure under what circumstances this flag is enabled |
| 451 | + if ( strpos( $version, 'strict' ) !== false ) { |
| 452 | + if ( $proposed == 'text/html' || $proposed == 'text/xml' ) { |
| 453 | + return 'text/xml'; |
| 454 | + } |
| 455 | + } else { |
| 456 | + return 'text/xml'; |
| 457 | + } |
| 458 | + } |
| 459 | + if ( isset( $sampleFound['html'] ) ) { |
| 460 | + // TODO: I'm not sure under what circumstances this flag is enabled |
| 461 | + if ( strpos( $version, 'nohtml' ) !== false ) { |
| 462 | + if ( $proposed == 'text/plain' ) { |
| 463 | + return 'text/html'; |
| 464 | + } |
| 465 | + } else { |
| 466 | + return 'text/html'; |
| 467 | + } |
| 468 | + } |
| 469 | + if ( isset( $sampleFound['xbm'] ) ) { |
| 470 | + return 'image/x-bitmap'; |
| 471 | + } |
| 472 | + if ( isset( $sampleFound['binhex'] ) ) { |
| 473 | + return 'application/macbinhex40'; |
| 474 | + } |
| 475 | + if ( isset( $sampleFound['scriptlet'] ) ) { |
| 476 | + if ( strpos( $version, 'strict' ) !== false ) { |
| 477 | + if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) { |
| 478 | + return 'text/scriptlet'; |
| 479 | + } |
| 480 | + } else { |
| 481 | + return 'text/scriptlet'; |
| 482 | + } |
| 483 | + } |
| 484 | + |
| 485 | + // Freaky heuristics to determine if the data is text or binary |
| 486 | + // The heuristic is of course broken for non-ASCII text |
| 487 | + if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] ) |
| 488 | + < ( $counters['ctrl'] + $counters['high'] ) * 16 ) |
| 489 | + { |
| 490 | + $kindOfBinary = true; |
| 491 | + $type = $binaryType ? $binaryType : $textType; |
| 492 | + if ( $type === false ) { |
| 493 | + $type = 'application/octet-stream'; |
| 494 | + } |
| 495 | + } else { |
| 496 | + $kindOfBinary = false; |
| 497 | + $type = $textType ? $textType : $binaryType; |
| 498 | + if ( $type === false ) { |
| 499 | + $type = 'text/plain'; |
| 500 | + } |
| 501 | + } |
| 502 | + |
| 503 | + // Check if the output format is ambiguous |
| 504 | + // This generally means that detection failed, real types aren't ambiguous |
| 505 | + $detectedFormat = $this->getDataFormat( $version, $type ); |
| 506 | + if ( $detectedFormat != 'ambiguous' ) { |
| 507 | + return $type; |
| 508 | + } |
| 509 | + |
| 510 | + if ( $proposedFormat != 'ambiguous' ) { |
| 511 | + // FormatAgreesWithData() |
| 512 | + if ( $proposedFormat == 'text' && !$kindOfBinary ) { |
| 513 | + return $proposed; |
| 514 | + } |
| 515 | + if ( $proposedFormat == 'binary' && $kindOfBinary ) { |
| 516 | + return $proposed; |
| 517 | + } |
| 518 | + if ( $proposedFormat == 'html' ) { |
| 519 | + return $proposed; |
| 520 | + } |
| 521 | + } |
| 522 | + |
| 523 | + // Find a MIME type by searching the registry for the file extension. |
| 524 | + $dotPos = strrpos( $fileName, '.' ); |
| 525 | + if ( $dotPos === false ) { |
| 526 | + return $type; |
| 527 | + } |
| 528 | + $ext = substr( $fileName, $dotPos ); |
| 529 | + if ( isset( $this->registry[$ext] ) ) { |
| 530 | + return $this->registry[$ext]; |
| 531 | + } |
| 532 | + |
| 533 | + // TODO: If the extension has an application registered to it, IE will return |
| 534 | + // application/octet-stream. We'll skip that, so we could erroneously |
| 535 | + // return text/plain or application/x-netcdf where application/octet-stream |
| 536 | + // would be correct. |
| 537 | + |
| 538 | + return $type; |
| 539 | + } |
| 540 | + |
| 541 | + /** |
| 542 | + * Check for text headers at the start of the chunk |
| 543 | + * Confirmed same in 5 and 7. |
| 544 | + */ |
| 545 | + private function checkTextHeaders( $version, $chunk ) { |
| 546 | + $chunk2 = substr( $chunk, 0, 2 ); |
| 547 | + $chunk4 = substr( $chunk, 0, 4 ); |
| 548 | + $chunk5 = substr( $chunk, 0, 5 ); |
| 549 | + if ( $chunk4 == '%PDF' ) { |
| 550 | + return 'application/pdf'; |
| 551 | + } |
| 552 | + if ( $chunk2 == '%!' ) { |
| 553 | + return 'application/postscript'; |
| 554 | + } |
| 555 | + if ( $chunk5 == '{\\rtf' ) { |
| 556 | + return 'text/richtext'; |
| 557 | + } |
| 558 | + if ( $chunk5 == 'begin' ) { |
| 559 | + return 'application/base64'; |
| 560 | + } |
| 561 | + return false; |
| 562 | + } |
| 563 | + |
| 564 | + /** |
| 565 | + * Check for binary headers at the start of the chunk |
| 566 | + * Confirmed same in 5 and 7. |
| 567 | + */ |
| 568 | + private function checkBinaryHeaders( $version, $chunk ) { |
| 569 | + $chunk2 = substr( $chunk, 0, 2 ); |
| 570 | + $chunk3 = substr( $chunk, 0, 3 ); |
| 571 | + $chunk4 = substr( $chunk, 0, 4 ); |
| 572 | + $chunk5 = substr( $chunk, 0, 5 ); |
| 573 | + $chunk8 = substr( $chunk, 0, 8 ); |
| 574 | + if ( $chunk5 == 'GIF87' || $chunk5 == 'GIF89' ) { |
| 575 | + return 'image/gif'; |
| 576 | + } |
| 577 | + if ( $chunk2 == "\xff\xd8" ) { |
| 578 | + return 'image/pjpeg'; // actually plain JPEG but this is what IE returns |
| 579 | + } |
| 580 | + |
| 581 | + if ( $chunk2 == 'BM' |
| 582 | + && substr( $chunk, 6, 2 ) == "\000\000" |
| 583 | + && substr( $chunk, 8, 2 ) != "\000\000" ) |
| 584 | + { |
| 585 | + return 'image/bmp'; // another non-standard MIME |
| 586 | + } |
| 587 | + if ( $chunk4 == 'RIFF' |
| 588 | + && substr( $chunk, 8, 4 ) == 'WAVE' ) |
| 589 | + { |
| 590 | + return 'audio/wav'; |
| 591 | + } |
| 592 | + // These were integer literals in IE |
| 593 | + // Perhaps the author was not sure what the target endianness was |
| 594 | + if ( $chunk4 == ".sd\000" |
| 595 | + || $chunk4 == ".snd" |
| 596 | + || $chunk4 == "\000ds." |
| 597 | + || $chunk4 == "dns." ) |
| 598 | + { |
| 599 | + return 'audio/basic'; |
| 600 | + } |
| 601 | + if ( $chunk3 == "MM\000" ) { |
| 602 | + return 'image/tiff'; |
| 603 | + } |
| 604 | + if ( $chunk2 == 'MZ' ) { |
| 605 | + return 'application/x-msdownload'; |
| 606 | + } |
| 607 | + if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) { |
| 608 | + return 'image/x-png'; // [sic] |
| 609 | + } |
| 610 | + if ( strlen( $chunk ) >= 5 ) { |
| 611 | + $byte2 = ord( $chunk[2] ); |
| 612 | + $byte4 = ord( $chunk[4] ); |
| 613 | + if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) { |
| 614 | + return 'image/x-jg'; |
| 615 | + } |
| 616 | + } |
| 617 | + // More endian confusion? |
| 618 | + if ( $chunk4 == 'MROF' ) { |
| 619 | + return 'audio/x-aiff'; |
| 620 | + } |
| 621 | + $chunk4_8 = substr( $chunk, 8, 4 ); |
| 622 | + if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) { |
| 623 | + return 'audio/x-aiff'; |
| 624 | + } |
| 625 | + if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) { |
| 626 | + return 'video/avi'; |
| 627 | + } |
| 628 | + if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) { |
| 629 | + return 'video/mpeg'; |
| 630 | + } |
| 631 | + if ( $chunk4 == "\001\000\000\000" |
| 632 | + && substr( $chunk, 40, 4 ) == ' EMF' ) |
| 633 | + { |
| 634 | + return 'image/x-emf'; |
| 635 | + } |
| 636 | + if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) { |
| 637 | + return 'image/x-wmf'; |
| 638 | + } |
| 639 | + if ( $chunk4 == "\xca\xfe\xba\xbe" ) { |
| 640 | + return 'application/java'; |
| 641 | + } |
| 642 | + if ( $chunk2 == 'PK' ) { |
| 643 | + return 'application/x-zip-compressed'; |
| 644 | + } |
| 645 | + if ( $chunk2 == "\x1f\x9d" ) { |
| 646 | + return 'application/x-compressed'; |
| 647 | + } |
| 648 | + if ( $chunk2 == "\x1f\x8b" ) { |
| 649 | + return 'application/x-gzip-compressed'; |
| 650 | + } |
| 651 | + // Skip redundant check for ZIP |
| 652 | + if ( $chunk5 == "MThd\000" ) { |
| 653 | + return 'audio/mid'; |
| 654 | + } |
| 655 | + if ( $chunk4 == '%PDF' ) { |
| 656 | + return 'application/pdf'; |
| 657 | + } |
| 658 | + return false; |
| 659 | + } |
| 660 | + |
| 661 | + /** |
| 662 | + * Do heuristic checks on the bulk of the data sample. |
| 663 | + * Search for HTML tags. |
| 664 | + */ |
| 665 | + protected function sampleData( $version, $chunk ) { |
| 666 | + $found = array(); |
| 667 | + $counters = array( |
| 668 | + 'ctrl' => 0, |
| 669 | + 'high' => 0, |
| 670 | + 'low' => 0, |
| 671 | + 'lf' => 0, |
| 672 | + 'cr' => 0, |
| 673 | + 'ff' => 0 |
| 674 | + ); |
| 675 | + $htmlTags = array( |
| 676 | + 'html', |
| 677 | + 'head', |
| 678 | + 'title', |
| 679 | + 'body', |
| 680 | + 'script', |
| 681 | + 'a href', |
| 682 | + 'pre', |
| 683 | + 'img', |
| 684 | + 'plaintext', |
| 685 | + 'table' |
| 686 | + ); |
| 687 | + $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; |
| 688 | + $rdfPurl = 'http://purl.org/rss/1.0/'; |
| 689 | + $xbmMagic1 = '#define'; |
| 690 | + $xbmMagic2 = '_width'; |
| 691 | + $xbmMagic3 = '_bits'; |
| 692 | + $binhexMagic = 'converted with BinHex'; |
| 693 | + |
| 694 | + for ( $offset = 0; $offset < strlen( $chunk ); $offset++ ) { |
| 695 | + $curChar = $chunk[$offset]; |
| 696 | + if ( $curChar == "\x0a" ) { |
| 697 | + $counters['lf']++; |
| 698 | + continue; |
| 699 | + } elseif ( $curChar == "\x0d" ) { |
| 700 | + $counters['cr']++; |
| 701 | + continue; |
| 702 | + } elseif ( $curChar == "\x0c" ) { |
| 703 | + $counters['ff']++; |
| 704 | + continue; |
| 705 | + } elseif ( $curChar == "\t" ) { |
| 706 | + $counters['low']++; |
| 707 | + continue; |
| 708 | + } elseif ( ord( $curChar ) < 32 ) { |
| 709 | + $counters['ctrl']++; |
| 710 | + continue; |
| 711 | + } elseif ( ord( $curChar ) >= 128 ) { |
| 712 | + $counters['high']++; |
| 713 | + continue; |
| 714 | + } |
| 715 | + |
| 716 | + $counters['low']++; |
| 717 | + if ( $curChar == '<' ) { |
| 718 | + // XML |
| 719 | + $remainder = substr( $chunk, $offset + 1 ); |
| 720 | + if ( !strncasecmp( $remainder, '?XML', 4 ) ) { |
| 721 | + $nextChar = substr( $chunk, $offset + 5, 1 ); |
| 722 | + if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) { |
| 723 | + $found['xml'] = true; |
| 724 | + } |
| 725 | + } |
| 726 | + // Scriptlet (JSP) |
| 727 | + if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) { |
| 728 | + $found['scriptlet'] = true; |
| 729 | + break; |
| 730 | + } |
| 731 | + // HTML |
| 732 | + foreach ( $htmlTags as $tag ) { |
| 733 | + if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) { |
| 734 | + $found['html'] = true; |
| 735 | + } |
| 736 | + } |
| 737 | + // Skip broken check for additional tags (HR etc.) |
| 738 | + |
| 739 | + // CHANNEL replaced by RSS, RDF and FEED in IE 7 |
| 740 | + if ( $version < 'ie07' ) { |
| 741 | + if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) { |
| 742 | + $found['cdf'] = true; |
| 743 | + } |
| 744 | + } else { |
| 745 | + // RSS |
| 746 | + if ( !strncasecmp( $remainder, 'RSS', 3 ) ) { |
| 747 | + $found['rss'] = true; |
| 748 | + break; // return from SampleData |
| 749 | + } |
| 750 | + if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) { |
| 751 | + $found['rdf-tag'] = true; |
| 752 | + // no break |
| 753 | + } |
| 754 | + if ( !strncasecmp( $remainder, 'FEED', 4 ) ) { |
| 755 | + $found['atom'] = true; |
| 756 | + break; |
| 757 | + } |
| 758 | + } |
| 759 | + continue; |
| 760 | + } |
| 761 | + // Skip broken check for --> |
| 762 | + |
| 763 | + // RSS URL checks |
| 764 | + // For some reason both URLs must appear before it is recognised |
| 765 | + $remainder = substr( $chunk, $offset ); |
| 766 | + if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) { |
| 767 | + $found['rdf-url'] = true; |
| 768 | + if ( isset( $found['rdf-tag'] ) |
| 769 | + && isset( $found['rdf-purl'] ) ) // [sic] |
| 770 | + { |
| 771 | + break; |
| 772 | + } |
| 773 | + continue; |
| 774 | + } |
| 775 | + |
| 776 | + if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) { |
| 777 | + if ( isset( $found['rdf-tag'] ) |
| 778 | + && isset( $found['rdf-url'] ) ) // [sic] |
| 779 | + { |
| 780 | + break; |
| 781 | + } |
| 782 | + continue; |
| 783 | + } |
| 784 | + |
| 785 | + // XBM checks |
| 786 | + if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) { |
| 787 | + $found['xbm1'] = true; |
| 788 | + continue; |
| 789 | + } |
| 790 | + if ( $curChar == '_' ) { |
| 791 | + if ( isset( $found['xbm2'] ) ) { |
| 792 | + if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) { |
| 793 | + $found['xbm'] = true; |
| 794 | + break; |
| 795 | + } |
| 796 | + } elseif ( isset( $found['xbm1'] ) ) { |
| 797 | + if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) { |
| 798 | + $found['xbm2'] = true; |
| 799 | + } |
| 800 | + } |
| 801 | + } |
| 802 | + |
| 803 | + // BinHex |
| 804 | + if ( !strncasecmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) { |
| 805 | + $found['binhex'] = true; |
| 806 | + } |
| 807 | + } |
| 808 | + return array( 'found' => $found, 'counters' => $counters ); |
| 809 | + } |
| 810 | + |
| 811 | + protected function getDataFormat( $version, $type ) { |
| 812 | + $types = $this->typeTable[$version]; |
| 813 | + if ( $type == '(null)' || strval( $type ) === '' ) { |
| 814 | + return 'ambiguous'; |
| 815 | + } |
| 816 | + foreach ( $types as $format => $list ) { |
| 817 | + if ( in_array( $type, $list ) ) { |
| 818 | + return $format; |
| 819 | + } |
| 820 | + } |
| 821 | + return 'unknown'; |
| 822 | + } |
| 823 | +} |
| 824 | + |
Property changes on: trunk/phase3/includes/IEContentAnalyzer.php |
___________________________________________________________________ |
Name: svn:eol-style |
825 | 825 | + native |