Index: trunk/phase3/includes/IEContentAnalyzer.php |
— | — | @@ -1,824 +0,0 @@ |
2 | | -<?php |
3 | | - |
4 | | -/** |
5 | | - * This class simulates Microsoft Internet Explorer's terribly broken and |
6 | | - * insecure MIME type detection algorithm. It can be used to check web uploads |
7 | | - * with an apparently safe type, to see if IE will reinterpret them to produce |
8 | | - * something dangerous. |
9 | | - * |
10 | | - * It is full of bugs and strange design choices should not under any |
11 | | - * circumstances be used to determine a MIME type to present to a user or |
12 | | - * client. (Apple Safari developers, this means you too.) |
13 | | - * |
14 | | - * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have |
15 | | - * attempted to ensure that this code works in exactly the same way as Internet |
16 | | - * Explorer, it does not share any source code, or creative choices such as |
17 | | - * variable names, thus I (Tim Starling) claim copyright on it. |
18 | | - * |
19 | | - * It may be redistributed without restriction. To aid reuse, this class does |
20 | | - * not depend on any MediaWiki module. |
21 | | - */ |
22 | | -class IEContentAnalyzer { |
23 | | - /** |
24 | | - * Relevant data taken from the type table in IE 5 |
25 | | - */ |
26 | | - protected $baseTypeTable = array( |
27 | | - 'ambiguous' /*1*/ => array( |
28 | | - 'text/plain', |
29 | | - 'application/octet-stream', |
30 | | - 'application/x-netcdf', // [sic] |
31 | | - ), |
32 | | - 'text' /*3*/ => array( |
33 | | - 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64', |
34 | | - 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet' |
35 | | - ), |
36 | | - 'binary' /*4*/ => array( |
37 | | - 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif', |
38 | | - 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp', |
39 | | - 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi', |
40 | | - 'video/x-msvideo', 'video/mpeg', 'application/x-compressed', |
41 | | - 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java', |
42 | | - 'application/x-msdownload' |
43 | | - ), |
44 | | - 'html' /*5*/ => array( 'text/html' ), |
45 | | - ); |
46 | | - |
47 | | - /** |
48 | | - * Changes to the type table in later versions of IE |
49 | | - */ |
50 | | - protected $addedTypes = array( |
51 | | - 'ie07' => array( |
52 | | - 'text' => array( 'text/xml', 'application/xml' ) |
53 | | - ), |
54 | | - ); |
55 | | - |
56 | | - /** |
57 | | - * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a |
58 | | - * typical Windows installation. |
59 | | - * |
60 | | - * Used for extension to MIME type mapping if detection fails. |
61 | | - */ |
62 | | - protected $registry = array( |
63 | | - '.323' => 'text/h323', |
64 | | - '.3g2' => 'video/3gpp2', |
65 | | - '.3gp' => 'video/3gpp', |
66 | | - '.3gp2' => 'video/3gpp2', |
67 | | - '.3gpp' => 'video/3gpp', |
68 | | - '.aac' => 'audio/aac', |
69 | | - '.ac3' => 'audio/ac3', |
70 | | - '.accda' => 'application/msaccess', |
71 | | - '.accdb' => 'application/msaccess', |
72 | | - '.accdc' => 'application/msaccess', |
73 | | - '.accde' => 'application/msaccess', |
74 | | - '.accdr' => 'application/msaccess', |
75 | | - '.accdt' => 'application/msaccess', |
76 | | - '.ade' => 'application/msaccess', |
77 | | - '.adp' => 'application/msaccess', |
78 | | - '.adts' => 'audio/aac', |
79 | | - '.ai' => 'application/postscript', |
80 | | - '.aif' => 'audio/aiff', |
81 | | - '.aifc' => 'audio/aiff', |
82 | | - '.aiff' => 'audio/aiff', |
83 | | - '.amc' => 'application/x-mpeg', |
84 | | - '.application' => 'application/x-ms-application', |
85 | | - '.asf' => 'video/x-ms-asf', |
86 | | - '.asx' => 'video/x-ms-asf', |
87 | | - '.au' => 'audio/basic', |
88 | | - '.avi' => 'video/avi', |
89 | | - '.bmp' => 'image/bmp', |
90 | | - '.caf' => 'audio/x-caf', |
91 | | - '.cat' => 'application/vnd.ms-pki.seccat', |
92 | | - '.cbo' => 'application/sha', |
93 | | - '.cdda' => 'audio/aiff', |
94 | | - '.cer' => 'application/x-x509-ca-cert', |
95 | | - '.conf' => 'text/plain', |
96 | | - '.crl' => 'application/pkix-crl', |
97 | | - '.crt' => 'application/x-x509-ca-cert', |
98 | | - '.css' => 'text/css', |
99 | | - '.csv' => 'application/vnd.ms-excel', |
100 | | - '.der' => 'application/x-x509-ca-cert', |
101 | | - '.dib' => 'image/bmp', |
102 | | - '.dif' => 'video/x-dv', |
103 | | - '.dll' => 'application/x-msdownload', |
104 | | - '.doc' => 'application/msword', |
105 | | - '.docm' => 'application/vnd.ms-word.document.macroEnabled.12', |
106 | | - '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', |
107 | | - '.dot' => 'application/msword', |
108 | | - '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12', |
109 | | - '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template', |
110 | | - '.dv' => 'video/x-dv', |
111 | | - '.dwfx' => 'model/vnd.dwfx+xps', |
112 | | - '.edn' => 'application/vnd.adobe.edn', |
113 | | - '.eml' => 'message/rfc822', |
114 | | - '.eps' => 'application/postscript', |
115 | | - '.etd' => 'application/x-ebx', |
116 | | - '.exe' => 'application/x-msdownload', |
117 | | - '.fdf' => 'application/vnd.fdf', |
118 | | - '.fif' => 'application/fractals', |
119 | | - '.gif' => 'image/gif', |
120 | | - '.gsm' => 'audio/x-gsm', |
121 | | - '.hqx' => 'application/mac-binhex40', |
122 | | - '.hta' => 'application/hta', |
123 | | - '.htc' => 'text/x-component', |
124 | | - '.htm' => 'text/html', |
125 | | - '.html' => 'text/html', |
126 | | - '.htt' => 'text/webviewhtml', |
127 | | - '.hxa' => 'application/xml', |
128 | | - '.hxc' => 'application/xml', |
129 | | - '.hxd' => 'application/octet-stream', |
130 | | - '.hxe' => 'application/xml', |
131 | | - '.hxf' => 'application/xml', |
132 | | - '.hxh' => 'application/octet-stream', |
133 | | - '.hxi' => 'application/octet-stream', |
134 | | - '.hxk' => 'application/xml', |
135 | | - '.hxq' => 'application/octet-stream', |
136 | | - '.hxr' => 'application/octet-stream', |
137 | | - '.hxs' => 'application/octet-stream', |
138 | | - '.hxt' => 'application/xml', |
139 | | - '.hxv' => 'application/xml', |
140 | | - '.hxw' => 'application/octet-stream', |
141 | | - '.ico' => 'image/x-icon', |
142 | | - '.iii' => 'application/x-iphone', |
143 | | - '.ins' => 'application/x-internet-signup', |
144 | | - '.iqy' => 'text/x-ms-iqy', |
145 | | - '.isp' => 'application/x-internet-signup', |
146 | | - '.jfif' => 'image/jpeg', |
147 | | - '.jnlp' => 'application/x-java-jnlp-file', |
148 | | - '.jpe' => 'image/jpeg', |
149 | | - '.jpeg' => 'image/jpeg', |
150 | | - '.jpg' => 'image/jpeg', |
151 | | - '.jtx' => 'application/x-jtx+xps', |
152 | | - '.latex' => 'application/x-latex', |
153 | | - '.log' => 'text/plain', |
154 | | - '.m1v' => 'video/mpeg', |
155 | | - '.m2v' => 'video/mpeg', |
156 | | - '.m3u' => 'audio/x-mpegurl', |
157 | | - '.mac' => 'image/x-macpaint', |
158 | | - '.man' => 'application/x-troff-man', |
159 | | - '.mda' => 'application/msaccess', |
160 | | - '.mdb' => 'application/msaccess', |
161 | | - '.mde' => 'application/msaccess', |
162 | | - '.mfp' => 'application/x-shockwave-flash', |
163 | | - '.mht' => 'message/rfc822', |
164 | | - '.mhtml' => 'message/rfc822', |
165 | | - '.mid' => 'audio/mid', |
166 | | - '.midi' => 'audio/mid', |
167 | | - '.mod' => 'video/mpeg', |
168 | | - '.mov' => 'video/quicktime', |
169 | | - '.mp2' => 'video/mpeg', |
170 | | - '.mp2v' => 'video/mpeg', |
171 | | - '.mp3' => 'audio/mpeg', |
172 | | - '.mp4' => 'video/mp4', |
173 | | - '.mpa' => 'video/mpeg', |
174 | | - '.mpe' => 'video/mpeg', |
175 | | - '.mpeg' => 'video/mpeg', |
176 | | - '.mpf' => 'application/vnd.ms-mediapackage', |
177 | | - '.mpg' => 'video/mpeg', |
178 | | - '.mpv2' => 'video/mpeg', |
179 | | - '.mqv' => 'video/quicktime', |
180 | | - '.NMW' => 'application/nmwb', |
181 | | - '.nws' => 'message/rfc822', |
182 | | - '.odc' => 'text/x-ms-odc', |
183 | | - '.ols' => 'application/vnd.ms-publisher', |
184 | | - '.p10' => 'application/pkcs10', |
185 | | - '.p12' => 'application/x-pkcs12', |
186 | | - '.p7b' => 'application/x-pkcs7-certificates', |
187 | | - '.p7c' => 'application/pkcs7-mime', |
188 | | - '.p7m' => 'application/pkcs7-mime', |
189 | | - '.p7r' => 'application/x-pkcs7-certreqresp', |
190 | | - '.p7s' => 'application/pkcs7-signature', |
191 | | - '.pct' => 'image/pict', |
192 | | - '.pdf' => 'application/pdf', |
193 | | - '.pdx' => 'application/vnd.adobe.pdx', |
194 | | - '.pfx' => 'application/x-pkcs12', |
195 | | - '.pic' => 'image/pict', |
196 | | - '.pict' => 'image/pict', |
197 | | - '.pinstall' => 'application/x-picasa-detect', |
198 | | - '.pko' => 'application/vnd.ms-pki.pko', |
199 | | - '.png' => 'image/png', |
200 | | - '.pnt' => 'image/x-macpaint', |
201 | | - '.pntg' => 'image/x-macpaint', |
202 | | - '.pot' => 'application/vnd.ms-powerpoint', |
203 | | - '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12', |
204 | | - '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template', |
205 | | - '.ppa' => 'application/vnd.ms-powerpoint', |
206 | | - '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12', |
207 | | - '.pps' => 'application/vnd.ms-powerpoint', |
208 | | - '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12', |
209 | | - '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow', |
210 | | - '.ppt' => 'application/vnd.ms-powerpoint', |
211 | | - '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12', |
212 | | - '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation', |
213 | | - '.prf' => 'application/pics-rules', |
214 | | - '.ps' => 'application/postscript', |
215 | | - '.pub' => 'application/vnd.ms-publisher', |
216 | | - '.pwz' => 'application/vnd.ms-powerpoint', |
217 | | - '.py' => 'text/plain', |
218 | | - '.pyw' => 'text/plain', |
219 | | - '.qht' => 'text/x-html-insertion', |
220 | | - '.qhtm' => 'text/x-html-insertion', |
221 | | - '.qt' => 'video/quicktime', |
222 | | - '.qti' => 'image/x-quicktime', |
223 | | - '.qtif' => 'image/x-quicktime', |
224 | | - '.qtl' => 'application/x-quicktimeplayer', |
225 | | - '.rat' => 'application/rat-file', |
226 | | - '.rmf' => 'application/vnd.adobe.rmf', |
227 | | - '.rmi' => 'audio/mid', |
228 | | - '.rqy' => 'text/x-ms-rqy', |
229 | | - '.rtf' => 'application/msword', |
230 | | - '.sct' => 'text/scriptlet', |
231 | | - '.sd2' => 'audio/x-sd2', |
232 | | - '.sdp' => 'application/sdp', |
233 | | - '.shtml' => 'text/html', |
234 | | - '.sit' => 'application/x-stuffit', |
235 | | - '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12', |
236 | | - '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide', |
237 | | - '.slk' => 'application/vnd.ms-excel', |
238 | | - '.snd' => 'audio/basic', |
239 | | - '.so' => 'application/x-apachemodule', |
240 | | - '.sol' => 'text/plain', |
241 | | - '.sor' => 'text/plain', |
242 | | - '.spc' => 'application/x-pkcs7-certificates', |
243 | | - '.spl' => 'application/futuresplash', |
244 | | - '.sst' => 'application/vnd.ms-pki.certstore', |
245 | | - '.stl' => 'application/vnd.ms-pki.stl', |
246 | | - '.swf' => 'application/x-shockwave-flash', |
247 | | - '.thmx' => 'application/vnd.ms-officetheme', |
248 | | - '.tif' => 'image/tiff', |
249 | | - '.tiff' => 'image/tiff', |
250 | | - '.txt' => 'text/plain', |
251 | | - '.uls' => 'text/iuls', |
252 | | - '.vcf' => 'text/x-vcard', |
253 | | - '.vdx' => 'application/vnd.ms-visio.viewer', |
254 | | - '.vsd' => 'application/vnd.ms-visio.viewer', |
255 | | - '.vss' => 'application/vnd.ms-visio.viewer', |
256 | | - '.vst' => 'application/vnd.ms-visio.viewer', |
257 | | - '.vsx' => 'application/vnd.ms-visio.viewer', |
258 | | - '.vtx' => 'application/vnd.ms-visio.viewer', |
259 | | - '.wav' => 'audio/wav', |
260 | | - '.wax' => 'audio/x-ms-wax', |
261 | | - '.wbk' => 'application/msword', |
262 | | - '.wdp' => 'image/vnd.ms-photo', |
263 | | - '.wiz' => 'application/msword', |
264 | | - '.wm' => 'video/x-ms-wm', |
265 | | - '.wma' => 'audio/x-ms-wma', |
266 | | - '.wmd' => 'application/x-ms-wmd', |
267 | | - '.wmv' => 'video/x-ms-wmv', |
268 | | - '.wmx' => 'video/x-ms-wmx', |
269 | | - '.wmz' => 'application/x-ms-wmz', |
270 | | - '.wpl' => 'application/vnd.ms-wpl', |
271 | | - '.wsc' => 'text/scriptlet', |
272 | | - '.wvx' => 'video/x-ms-wvx', |
273 | | - '.xaml' => 'application/xaml+xml', |
274 | | - '.xbap' => 'application/x-ms-xbap', |
275 | | - '.xdp' => 'application/vnd.adobe.xdp+xml', |
276 | | - '.xfdf' => 'application/vnd.adobe.xfdf', |
277 | | - '.xht' => 'application/xhtml+xml', |
278 | | - '.xhtml' => 'application/xhtml+xml', |
279 | | - '.xla' => 'application/vnd.ms-excel', |
280 | | - '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12', |
281 | | - '.xlk' => 'application/vnd.ms-excel', |
282 | | - '.xll' => 'application/vnd.ms-excel', |
283 | | - '.xlm' => 'application/vnd.ms-excel', |
284 | | - '.xls' => 'application/vnd.ms-excel', |
285 | | - '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12', |
286 | | - '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12', |
287 | | - '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', |
288 | | - '.xlt' => 'application/vnd.ms-excel', |
289 | | - '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12', |
290 | | - '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', |
291 | | - '.xlw' => 'application/vnd.ms-excel', |
292 | | - '.xml' => 'text/xml', |
293 | | - '.xps' => 'application/vnd.ms-xpsdocument', |
294 | | - '.xsl' => 'text/xml', |
295 | | - ); |
296 | | - |
297 | | - /** |
298 | | - * IE versions which have been analysed to bring you this class, and for |
299 | | - * which some substantive difference exists. These will appear as keys |
300 | | - * in the return value of getRealMimesFromData(). The names are chosen to sort correctly. |
301 | | - */ |
302 | | - protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ); |
303 | | - |
304 | | - /** |
305 | | - * Type table with versions expanded |
306 | | - */ |
307 | | - protected $typeTable = array(); |
308 | | - |
309 | | - /** constructor */ |
310 | | - function __construct() { |
311 | | - // Construct versioned type arrays from the base type array plus additions |
312 | | - $types = $this->baseTypeTable; |
313 | | - foreach ( $this->versions as $version ) { |
314 | | - if ( isset( $this->addedTypes[$version] ) ) { |
315 | | - foreach ( $this->addedTypes[$version] as $format => $addedTypes ) { |
316 | | - $types[$format] = array_merge( $types[$format], $addedTypes ); |
317 | | - } |
318 | | - } |
319 | | - $this->typeTable[$version] = $types; |
320 | | - } |
321 | | - } |
322 | | - |
323 | | - /** |
324 | | - * Get the MIME types from getMimesFromData(), but convert the result from IE's |
325 | | - * idiosyncratic private types into something other apps will understand. |
326 | | - * |
327 | | - * @param $fileName String: the file name (unused at present) |
328 | | - * @param $chunk String: the first 256 bytes of the file |
329 | | - * @param $proposed String: the MIME type proposed by the server |
330 | | - * |
331 | | - * @return Array: map of IE version to detected mime type |
332 | | - */ |
333 | | - public function getRealMimesFromData( $fileName, $chunk, $proposed ) { |
334 | | - $types = $this->getMimesFromData( $fileName, $chunk, $proposed ); |
335 | | - $types = array_map( array( $this, 'translateMimeType' ), $types ); |
336 | | - return $types; |
337 | | - } |
338 | | - |
339 | | - /** |
340 | | - * Translate a MIME type from IE's idiosyncratic private types into |
341 | | - * more commonly understood type strings |
342 | | - */ |
343 | | - public function translateMimeType( $type ) { |
344 | | - static $table = array( |
345 | | - 'image/pjpeg' => 'image/jpeg', |
346 | | - 'image/x-png' => 'image/png', |
347 | | - 'image/x-wmf' => 'application/x-msmetafile', |
348 | | - 'image/bmp' => 'image/x-bmp', |
349 | | - 'application/x-zip-compressed' => 'application/zip', |
350 | | - 'application/x-compressed' => 'application/x-compress', |
351 | | - 'application/x-gzip-compressed' => 'application/x-gzip', |
352 | | - 'audio/mid' => 'audio/midi', |
353 | | - ); |
354 | | - if ( isset( $table[$type] ) ) { |
355 | | - $type = $table[$type]; |
356 | | - } |
357 | | - return $type; |
358 | | - } |
359 | | - |
360 | | - /** |
361 | | - * Get the untranslated MIME types for all known versions |
362 | | - * |
363 | | - * @param $fileName String: the file name (unused at present) |
364 | | - * @param $chunk String: the first 256 bytes of the file |
365 | | - * @param $proposed String: the MIME type proposed by the server |
366 | | - * |
367 | | - * @return Array: map of IE version to detected mime type |
368 | | - */ |
369 | | - public function getMimesFromData( $fileName, $chunk, $proposed ) { |
370 | | - $types = array(); |
371 | | - foreach ( $this->versions as $version ) { |
372 | | - $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ); |
373 | | - } |
374 | | - return $types; |
375 | | - } |
376 | | - |
377 | | - /** |
378 | | - * Get the MIME type for a given named version |
379 | | - */ |
380 | | - protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) { |
381 | | - // Strip text after a semicolon |
382 | | - $semiPos = strpos( $proposed, ';' ); |
383 | | - if ( $semiPos !== false ) { |
384 | | - $proposed = substr( $proposed, 0, $semiPos ); |
385 | | - } |
386 | | - |
387 | | - $proposedFormat = $this->getDataFormat( $version, $proposed ); |
388 | | - if ( $proposedFormat == 'unknown' |
389 | | - && $proposed != 'multipart/mixed' |
390 | | - && $proposed != 'multipart/x-mixed-replace' ) |
391 | | - { |
392 | | - return $proposed; |
393 | | - } |
394 | | - if ( strval( $chunk ) === '' ) { |
395 | | - return $proposed; |
396 | | - } |
397 | | - |
398 | | - // Truncate chunk at 255 bytes |
399 | | - $chunk = substr( $chunk, 0, 255 ); |
400 | | - |
401 | | - // IE does the Check*Headers() calls last, and instead does the following image |
402 | | - // type checks by directly looking for the magic numbers. What I do here should |
403 | | - // have the same effect since the magic number checks are identical in both cases. |
404 | | - $result = $this->sampleData( $version, $chunk ); |
405 | | - $sampleFound = $result['found']; |
406 | | - $counters = $result['counters']; |
407 | | - $binaryType = $this->checkBinaryHeaders( $version, $chunk ); |
408 | | - $textType = $this->checkTextHeaders( $version, $chunk ); |
409 | | - |
410 | | - if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) { |
411 | | - return 'text/html'; |
412 | | - } |
413 | | - if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) { |
414 | | - return 'image/gif'; |
415 | | - } |
416 | | - if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' ) |
417 | | - && $binaryType == 'image/pjpeg' ) |
418 | | - { |
419 | | - return $proposed; |
420 | | - } |
421 | | - // PNG check added in IE 7 |
422 | | - if ( $version >= 'ie07' |
423 | | - && ( $proposed == 'image/x-png' || $proposed == 'image/png' ) |
424 | | - && $binaryType == 'image/x-png' ) |
425 | | - { |
426 | | - return $proposed; |
427 | | - } |
428 | | - |
429 | | - // CDF was removed in IE 7 so it won't be in $sampleFound for later versions |
430 | | - if ( isset( $sampleFound['cdf'] ) ) { |
431 | | - return 'application/x-cdf'; |
432 | | - } |
433 | | - |
434 | | - // RSS and Atom were added in IE 7 so they won't be in $sampleFound for |
435 | | - // previous versions |
436 | | - if ( isset( $sampleFound['rss'] ) ) { |
437 | | - return 'application/rss+xml'; |
438 | | - } |
439 | | - if ( isset( $sampleFound['rdf-tag'] ) |
440 | | - && isset( $sampleFound['rdf-url'] ) |
441 | | - && isset( $sampleFound['rdf-purl'] ) ) |
442 | | - { |
443 | | - return 'application/rss+xml'; |
444 | | - } |
445 | | - if ( isset( $sampleFound['atom'] ) ) { |
446 | | - return 'application/atom+xml'; |
447 | | - } |
448 | | - |
449 | | - if ( isset( $sampleFound['xml'] ) ) { |
450 | | - // TODO: I'm not sure under what circumstances this flag is enabled |
451 | | - if ( strpos( $version, 'strict' ) !== false ) { |
452 | | - if ( $proposed == 'text/html' || $proposed == 'text/xml' ) { |
453 | | - return 'text/xml'; |
454 | | - } |
455 | | - } else { |
456 | | - return 'text/xml'; |
457 | | - } |
458 | | - } |
459 | | - if ( isset( $sampleFound['html'] ) ) { |
460 | | - // TODO: I'm not sure under what circumstances this flag is enabled |
461 | | - if ( strpos( $version, 'nohtml' ) !== false ) { |
462 | | - if ( $proposed == 'text/plain' ) { |
463 | | - return 'text/html'; |
464 | | - } |
465 | | - } else { |
466 | | - return 'text/html'; |
467 | | - } |
468 | | - } |
469 | | - if ( isset( $sampleFound['xbm'] ) ) { |
470 | | - return 'image/x-bitmap'; |
471 | | - } |
472 | | - if ( isset( $sampleFound['binhex'] ) ) { |
473 | | - return 'application/macbinhex40'; |
474 | | - } |
475 | | - if ( isset( $sampleFound['scriptlet'] ) ) { |
476 | | - if ( strpos( $version, 'strict' ) !== false ) { |
477 | | - if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) { |
478 | | - return 'text/scriptlet'; |
479 | | - } |
480 | | - } else { |
481 | | - return 'text/scriptlet'; |
482 | | - } |
483 | | - } |
484 | | - |
485 | | - // Freaky heuristics to determine if the data is text or binary |
486 | | - // The heuristic is of course broken for non-ASCII text |
487 | | - if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] ) |
488 | | - < ( $counters['ctrl'] + $counters['high'] ) * 16 ) |
489 | | - { |
490 | | - $kindOfBinary = true; |
491 | | - $type = $binaryType ? $binaryType : $textType; |
492 | | - if ( $type === false ) { |
493 | | - $type = 'application/octet-stream'; |
494 | | - } |
495 | | - } else { |
496 | | - $kindOfBinary = false; |
497 | | - $type = $textType ? $textType : $binaryType; |
498 | | - if ( $type === false ) { |
499 | | - $type = 'text/plain'; |
500 | | - } |
501 | | - } |
502 | | - |
503 | | - // Check if the output format is ambiguous |
504 | | - // This generally means that detection failed, real types aren't ambiguous |
505 | | - $detectedFormat = $this->getDataFormat( $version, $type ); |
506 | | - if ( $detectedFormat != 'ambiguous' ) { |
507 | | - return $type; |
508 | | - } |
509 | | - |
510 | | - if ( $proposedFormat != 'ambiguous' ) { |
511 | | - // FormatAgreesWithData() |
512 | | - if ( $proposedFormat == 'text' && !$kindOfBinary ) { |
513 | | - return $proposed; |
514 | | - } |
515 | | - if ( $proposedFormat == 'binary' && $kindOfBinary ) { |
516 | | - return $proposed; |
517 | | - } |
518 | | - if ( $proposedFormat == 'html' ) { |
519 | | - return $proposed; |
520 | | - } |
521 | | - } |
522 | | - |
523 | | - // Find a MIME type by searching the registry for the file extension. |
524 | | - $dotPos = strrpos( $fileName, '.' ); |
525 | | - if ( $dotPos === false ) { |
526 | | - return $type; |
527 | | - } |
528 | | - $ext = substr( $fileName, $dotPos ); |
529 | | - if ( isset( $this->registry[$ext] ) ) { |
530 | | - return $this->registry[$ext]; |
531 | | - } |
532 | | - |
533 | | - // TODO: If the extension has an application registered to it, IE will return |
534 | | - // application/octet-stream. We'll skip that, so we could erroneously |
535 | | - // return text/plain or application/x-netcdf where application/octet-stream |
536 | | - // would be correct. |
537 | | - |
538 | | - return $type; |
539 | | - } |
540 | | - |
541 | | - /** |
542 | | - * Check for text headers at the start of the chunk |
543 | | - * Confirmed same in 5 and 7. |
544 | | - */ |
545 | | - private function checkTextHeaders( $version, $chunk ) { |
546 | | - $chunk2 = substr( $chunk, 0, 2 ); |
547 | | - $chunk4 = substr( $chunk, 0, 4 ); |
548 | | - $chunk5 = substr( $chunk, 0, 5 ); |
549 | | - if ( $chunk4 == '%PDF' ) { |
550 | | - return 'application/pdf'; |
551 | | - } |
552 | | - if ( $chunk2 == '%!' ) { |
553 | | - return 'application/postscript'; |
554 | | - } |
555 | | - if ( $chunk5 == '{\\rtf' ) { |
556 | | - return 'text/richtext'; |
557 | | - } |
558 | | - if ( $chunk5 == 'begin' ) { |
559 | | - return 'application/base64'; |
560 | | - } |
561 | | - return false; |
562 | | - } |
563 | | - |
564 | | - /** |
565 | | - * Check for binary headers at the start of the chunk |
566 | | - * Confirmed same in 5 and 7. |
567 | | - */ |
568 | | - private function checkBinaryHeaders( $version, $chunk ) { |
569 | | - $chunk2 = substr( $chunk, 0, 2 ); |
570 | | - $chunk3 = substr( $chunk, 0, 3 ); |
571 | | - $chunk4 = substr( $chunk, 0, 4 ); |
572 | | - $chunk5 = substr( $chunk, 0, 5 ); |
573 | | - $chunk5uc = strtoupper( $chunk5 ); |
574 | | - $chunk8 = substr( $chunk, 0, 8 ); |
575 | | - if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) { |
576 | | - return 'image/gif'; |
577 | | - } |
578 | | - if ( $chunk2 == "\xff\xd8" ) { |
579 | | - return 'image/pjpeg'; // actually plain JPEG but this is what IE returns |
580 | | - } |
581 | | - |
582 | | - if ( $chunk2 == 'BM' |
583 | | - && substr( $chunk, 6, 2 ) == "\000\000" |
584 | | - && substr( $chunk, 8, 2 ) == "\000\000" ) |
585 | | - { |
586 | | - return 'image/bmp'; // another non-standard MIME |
587 | | - } |
588 | | - if ( $chunk4 == 'RIFF' |
589 | | - && substr( $chunk, 8, 4 ) == 'WAVE' ) |
590 | | - { |
591 | | - return 'audio/wav'; |
592 | | - } |
593 | | - // These were integer literals in IE |
594 | | - // Perhaps the author was not sure what the target endianness was |
595 | | - if ( $chunk4 == ".sd\000" |
596 | | - || $chunk4 == ".snd" |
597 | | - || $chunk4 == "\000ds." |
598 | | - || $chunk4 == "dns." ) |
599 | | - { |
600 | | - return 'audio/basic'; |
601 | | - } |
602 | | - if ( $chunk3 == "MM\000" ) { |
603 | | - return 'image/tiff'; |
604 | | - } |
605 | | - if ( $chunk2 == 'MZ' ) { |
606 | | - return 'application/x-msdownload'; |
607 | | - } |
608 | | - if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) { |
609 | | - return 'image/x-png'; // [sic] |
610 | | - } |
611 | | - if ( strlen( $chunk ) >= 5 ) { |
612 | | - $byte2 = ord( $chunk[2] ); |
613 | | - $byte4 = ord( $chunk[4] ); |
614 | | - if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) { |
615 | | - return 'image/x-jg'; |
616 | | - } |
617 | | - } |
618 | | - // More endian confusion? |
619 | | - if ( $chunk4 == 'MROF' ) { |
620 | | - return 'audio/x-aiff'; |
621 | | - } |
622 | | - $chunk4_8 = substr( $chunk, 8, 4 ); |
623 | | - if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) { |
624 | | - return 'audio/x-aiff'; |
625 | | - } |
626 | | - if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) { |
627 | | - return 'video/avi'; |
628 | | - } |
629 | | - if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) { |
630 | | - return 'video/mpeg'; |
631 | | - } |
632 | | - if ( $chunk4 == "\001\000\000\000" |
633 | | - && substr( $chunk, 40, 4 ) == ' EMF' ) |
634 | | - { |
635 | | - return 'image/x-emf'; |
636 | | - } |
637 | | - if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) { |
638 | | - return 'image/x-wmf'; |
639 | | - } |
640 | | - if ( $chunk4 == "\xca\xfe\xba\xbe" ) { |
641 | | - return 'application/java'; |
642 | | - } |
643 | | - if ( $chunk2 == 'PK' ) { |
644 | | - return 'application/x-zip-compressed'; |
645 | | - } |
646 | | - if ( $chunk2 == "\x1f\x9d" ) { |
647 | | - return 'application/x-compressed'; |
648 | | - } |
649 | | - if ( $chunk2 == "\x1f\x8b" ) { |
650 | | - return 'application/x-gzip-compressed'; |
651 | | - } |
652 | | - // Skip redundant check for ZIP |
653 | | - if ( $chunk5 == "MThd\000" ) { |
654 | | - return 'audio/mid'; |
655 | | - } |
656 | | - if ( $chunk4 == '%PDF' ) { |
657 | | - return 'application/pdf'; |
658 | | - } |
659 | | - return false; |
660 | | - } |
661 | | - |
662 | | - /** |
663 | | - * Do heuristic checks on the bulk of the data sample. |
664 | | - * Search for HTML tags. |
665 | | - */ |
666 | | - protected function sampleData( $version, $chunk ) { |
667 | | - $found = array(); |
668 | | - $counters = array( |
669 | | - 'ctrl' => 0, |
670 | | - 'high' => 0, |
671 | | - 'low' => 0, |
672 | | - 'lf' => 0, |
673 | | - 'cr' => 0, |
674 | | - 'ff' => 0 |
675 | | - ); |
676 | | - $htmlTags = array( |
677 | | - 'html', |
678 | | - 'head', |
679 | | - 'title', |
680 | | - 'body', |
681 | | - 'script', |
682 | | - 'a href', |
683 | | - 'pre', |
684 | | - 'img', |
685 | | - 'plaintext', |
686 | | - 'table' |
687 | | - ); |
688 | | - $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; |
689 | | - $rdfPurl = 'http://purl.org/rss/1.0/'; |
690 | | - $xbmMagic1 = '#define'; |
691 | | - $xbmMagic2 = '_width'; |
692 | | - $xbmMagic3 = '_bits'; |
693 | | - $binhexMagic = 'converted with BinHex'; |
694 | | - |
695 | | - for ( $offset = 0; $offset < strlen( $chunk ); $offset++ ) { |
696 | | - $curChar = $chunk[$offset]; |
697 | | - if ( $curChar == "\x0a" ) { |
698 | | - $counters['lf']++; |
699 | | - continue; |
700 | | - } elseif ( $curChar == "\x0d" ) { |
701 | | - $counters['cr']++; |
702 | | - continue; |
703 | | - } elseif ( $curChar == "\x0c" ) { |
704 | | - $counters['ff']++; |
705 | | - continue; |
706 | | - } elseif ( $curChar == "\t" ) { |
707 | | - $counters['low']++; |
708 | | - continue; |
709 | | - } elseif ( ord( $curChar ) < 32 ) { |
710 | | - $counters['ctrl']++; |
711 | | - continue; |
712 | | - } elseif ( ord( $curChar ) >= 128 ) { |
713 | | - $counters['high']++; |
714 | | - continue; |
715 | | - } |
716 | | - |
717 | | - $counters['low']++; |
718 | | - if ( $curChar == '<' ) { |
719 | | - // XML |
720 | | - $remainder = substr( $chunk, $offset + 1 ); |
721 | | - if ( !strncasecmp( $remainder, '?XML', 4 ) ) { |
722 | | - $nextChar = substr( $chunk, $offset + 5, 1 ); |
723 | | - if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) { |
724 | | - $found['xml'] = true; |
725 | | - } |
726 | | - } |
727 | | - // Scriptlet (JSP) |
728 | | - if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) { |
729 | | - $found['scriptlet'] = true; |
730 | | - break; |
731 | | - } |
732 | | - // HTML |
733 | | - foreach ( $htmlTags as $tag ) { |
734 | | - if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) { |
735 | | - $found['html'] = true; |
736 | | - } |
737 | | - } |
738 | | - // Skip broken check for additional tags (HR etc.) |
739 | | - |
740 | | - // CHANNEL replaced by RSS, RDF and FEED in IE 7 |
741 | | - if ( $version < 'ie07' ) { |
742 | | - if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) { |
743 | | - $found['cdf'] = true; |
744 | | - } |
745 | | - } else { |
746 | | - // RSS |
747 | | - if ( !strncasecmp( $remainder, 'RSS', 3 ) ) { |
748 | | - $found['rss'] = true; |
749 | | - break; // return from SampleData |
750 | | - } |
751 | | - if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) { |
752 | | - $found['rdf-tag'] = true; |
753 | | - // no break |
754 | | - } |
755 | | - if ( !strncasecmp( $remainder, 'FEED', 4 ) ) { |
756 | | - $found['atom'] = true; |
757 | | - break; |
758 | | - } |
759 | | - } |
760 | | - continue; |
761 | | - } |
762 | | - // Skip broken check for --> |
763 | | - |
764 | | - // RSS URL checks |
765 | | - // For some reason both URLs must appear before it is recognised |
766 | | - $remainder = substr( $chunk, $offset ); |
767 | | - if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) { |
768 | | - $found['rdf-url'] = true; |
769 | | - if ( isset( $found['rdf-tag'] ) |
770 | | - && isset( $found['rdf-purl'] ) ) // [sic] |
771 | | - { |
772 | | - break; |
773 | | - } |
774 | | - continue; |
775 | | - } |
776 | | - |
777 | | - if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) { |
778 | | - if ( isset( $found['rdf-tag'] ) |
779 | | - && isset( $found['rdf-url'] ) ) // [sic] |
780 | | - { |
781 | | - break; |
782 | | - } |
783 | | - continue; |
784 | | - } |
785 | | - |
786 | | - // XBM checks |
787 | | - if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) { |
788 | | - $found['xbm1'] = true; |
789 | | - continue; |
790 | | - } |
791 | | - if ( $curChar == '_' ) { |
792 | | - if ( isset( $found['xbm2'] ) ) { |
793 | | - if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) { |
794 | | - $found['xbm'] = true; |
795 | | - break; |
796 | | - } |
797 | | - } elseif ( isset( $found['xbm1'] ) ) { |
798 | | - if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) { |
799 | | - $found['xbm2'] = true; |
800 | | - } |
801 | | - } |
802 | | - } |
803 | | - |
804 | | - // BinHex |
805 | | - if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) { |
806 | | - $found['binhex'] = true; |
807 | | - } |
808 | | - } |
809 | | - return array( 'found' => $found, 'counters' => $counters ); |
810 | | - } |
811 | | - |
812 | | - protected function getDataFormat( $version, $type ) { |
813 | | - $types = $this->typeTable[$version]; |
814 | | - if ( $type == '(null)' || strval( $type ) === '' ) { |
815 | | - return 'ambiguous'; |
816 | | - } |
817 | | - foreach ( $types as $format => $list ) { |
818 | | - if ( in_array( $type, $list ) ) { |
819 | | - return $format; |
820 | | - } |
821 | | - } |
822 | | - return 'unknown'; |
823 | | - } |
824 | | -} |
825 | | - |
Index: trunk/phase3/includes/AutoLoader.php |
— | — | @@ -124,7 +124,6 @@ |
125 | 125 | 'HTMLInfoField' => 'includes/HTMLForm.php', |
126 | 126 | 'Http' => 'includes/HttpFunctions.php', |
127 | 127 | 'HttpRequest' => 'includes/HttpFunctions.php', |
128 | | - 'IEContentAnalyzer' => 'includes/IEContentAnalyzer.php', |
129 | 128 | 'ImageGallery' => 'includes/ImageGallery.php', |
130 | 129 | 'ImageHistoryList' => 'includes/ImagePage.php', |
131 | 130 | 'ImageHistoryPseudoPager' => 'includes/ImagePage.php', |
— | — | @@ -454,6 +453,9 @@ |
455 | 454 | 'RefreshLinksJob2' => 'includes/job/RefreshLinksJob.php', |
456 | 455 | 'UploadFromUrlJob' => 'includes/job/UploadFromUrlJob.php', |
457 | 456 | |
| 457 | + # includes/libs |
| 458 | + 'IEContentAnalyzer' => 'includes/IEContentAnalyzer.php', |
| 459 | + |
458 | 460 | # includes/media |
459 | 461 | 'BitmapHandler' => 'includes/media/Bitmap.php', |
460 | 462 | 'BitmapHandler_ClientOnly' => 'includes/media/Bitmap_ClientOnly.php', |
Index: trunk/phase3/includes/libs/IEContentAnalyzer.php |
— | — | @@ -0,0 +1,824 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/** |
| 5 | + * This class simulates Microsoft Internet Explorer's terribly broken and |
| 6 | + * insecure MIME type detection algorithm. It can be used to check web uploads |
| 7 | + * with an apparently safe type, to see if IE will reinterpret them to produce |
| 8 | + * something dangerous. |
| 9 | + * |
| 10 | + * It is full of bugs and strange design choices should not under any |
| 11 | + * circumstances be used to determine a MIME type to present to a user or |
| 12 | + * client. (Apple Safari developers, this means you too.) |
| 13 | + * |
| 14 | + * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have |
| 15 | + * attempted to ensure that this code works in exactly the same way as Internet |
| 16 | + * Explorer, it does not share any source code, or creative choices such as |
| 17 | + * variable names, thus I (Tim Starling) claim copyright on it. |
| 18 | + * |
| 19 | + * It may be redistributed without restriction. To aid reuse, this class does |
| 20 | + * not depend on any MediaWiki module. |
| 21 | + */ |
| 22 | +class IEContentAnalyzer { |
| 23 | + /** |
| 24 | + * Relevant data taken from the type table in IE 5 |
| 25 | + */ |
| 26 | + protected $baseTypeTable = array( |
| 27 | + 'ambiguous' /*1*/ => array( |
| 28 | + 'text/plain', |
| 29 | + 'application/octet-stream', |
| 30 | + 'application/x-netcdf', // [sic] |
| 31 | + ), |
| 32 | + 'text' /*3*/ => array( |
| 33 | + 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64', |
| 34 | + 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet' |
| 35 | + ), |
| 36 | + 'binary' /*4*/ => array( |
| 37 | + 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif', |
| 38 | + 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp', |
| 39 | + 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi', |
| 40 | + 'video/x-msvideo', 'video/mpeg', 'application/x-compressed', |
| 41 | + 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java', |
| 42 | + 'application/x-msdownload' |
| 43 | + ), |
| 44 | + 'html' /*5*/ => array( 'text/html' ), |
| 45 | + ); |
| 46 | + |
| 47 | + /** |
| 48 | + * Changes to the type table in later versions of IE |
| 49 | + */ |
| 50 | + protected $addedTypes = array( |
| 51 | + 'ie07' => array( |
| 52 | + 'text' => array( 'text/xml', 'application/xml' ) |
| 53 | + ), |
| 54 | + ); |
| 55 | + |
| 56 | + /** |
| 57 | + * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a |
| 58 | + * typical Windows installation. |
| 59 | + * |
| 60 | + * Used for extension to MIME type mapping if detection fails. |
| 61 | + */ |
| 62 | + protected $registry = array( |
| 63 | + '.323' => 'text/h323', |
| 64 | + '.3g2' => 'video/3gpp2', |
| 65 | + '.3gp' => 'video/3gpp', |
| 66 | + '.3gp2' => 'video/3gpp2', |
| 67 | + '.3gpp' => 'video/3gpp', |
| 68 | + '.aac' => 'audio/aac', |
| 69 | + '.ac3' => 'audio/ac3', |
| 70 | + '.accda' => 'application/msaccess', |
| 71 | + '.accdb' => 'application/msaccess', |
| 72 | + '.accdc' => 'application/msaccess', |
| 73 | + '.accde' => 'application/msaccess', |
| 74 | + '.accdr' => 'application/msaccess', |
| 75 | + '.accdt' => 'application/msaccess', |
| 76 | + '.ade' => 'application/msaccess', |
| 77 | + '.adp' => 'application/msaccess', |
| 78 | + '.adts' => 'audio/aac', |
| 79 | + '.ai' => 'application/postscript', |
| 80 | + '.aif' => 'audio/aiff', |
| 81 | + '.aifc' => 'audio/aiff', |
| 82 | + '.aiff' => 'audio/aiff', |
| 83 | + '.amc' => 'application/x-mpeg', |
| 84 | + '.application' => 'application/x-ms-application', |
| 85 | + '.asf' => 'video/x-ms-asf', |
| 86 | + '.asx' => 'video/x-ms-asf', |
| 87 | + '.au' => 'audio/basic', |
| 88 | + '.avi' => 'video/avi', |
| 89 | + '.bmp' => 'image/bmp', |
| 90 | + '.caf' => 'audio/x-caf', |
| 91 | + '.cat' => 'application/vnd.ms-pki.seccat', |
| 92 | + '.cbo' => 'application/sha', |
| 93 | + '.cdda' => 'audio/aiff', |
| 94 | + '.cer' => 'application/x-x509-ca-cert', |
| 95 | + '.conf' => 'text/plain', |
| 96 | + '.crl' => 'application/pkix-crl', |
| 97 | + '.crt' => 'application/x-x509-ca-cert', |
| 98 | + '.css' => 'text/css', |
| 99 | + '.csv' => 'application/vnd.ms-excel', |
| 100 | + '.der' => 'application/x-x509-ca-cert', |
| 101 | + '.dib' => 'image/bmp', |
| 102 | + '.dif' => 'video/x-dv', |
| 103 | + '.dll' => 'application/x-msdownload', |
| 104 | + '.doc' => 'application/msword', |
| 105 | + '.docm' => 'application/vnd.ms-word.document.macroEnabled.12', |
| 106 | + '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', |
| 107 | + '.dot' => 'application/msword', |
| 108 | + '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12', |
| 109 | + '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template', |
| 110 | + '.dv' => 'video/x-dv', |
| 111 | + '.dwfx' => 'model/vnd.dwfx+xps', |
| 112 | + '.edn' => 'application/vnd.adobe.edn', |
| 113 | + '.eml' => 'message/rfc822', |
| 114 | + '.eps' => 'application/postscript', |
| 115 | + '.etd' => 'application/x-ebx', |
| 116 | + '.exe' => 'application/x-msdownload', |
| 117 | + '.fdf' => 'application/vnd.fdf', |
| 118 | + '.fif' => 'application/fractals', |
| 119 | + '.gif' => 'image/gif', |
| 120 | + '.gsm' => 'audio/x-gsm', |
| 121 | + '.hqx' => 'application/mac-binhex40', |
| 122 | + '.hta' => 'application/hta', |
| 123 | + '.htc' => 'text/x-component', |
| 124 | + '.htm' => 'text/html', |
| 125 | + '.html' => 'text/html', |
| 126 | + '.htt' => 'text/webviewhtml', |
| 127 | + '.hxa' => 'application/xml', |
| 128 | + '.hxc' => 'application/xml', |
| 129 | + '.hxd' => 'application/octet-stream', |
| 130 | + '.hxe' => 'application/xml', |
| 131 | + '.hxf' => 'application/xml', |
| 132 | + '.hxh' => 'application/octet-stream', |
| 133 | + '.hxi' => 'application/octet-stream', |
| 134 | + '.hxk' => 'application/xml', |
| 135 | + '.hxq' => 'application/octet-stream', |
| 136 | + '.hxr' => 'application/octet-stream', |
| 137 | + '.hxs' => 'application/octet-stream', |
| 138 | + '.hxt' => 'application/xml', |
| 139 | + '.hxv' => 'application/xml', |
| 140 | + '.hxw' => 'application/octet-stream', |
| 141 | + '.ico' => 'image/x-icon', |
| 142 | + '.iii' => 'application/x-iphone', |
| 143 | + '.ins' => 'application/x-internet-signup', |
| 144 | + '.iqy' => 'text/x-ms-iqy', |
| 145 | + '.isp' => 'application/x-internet-signup', |
| 146 | + '.jfif' => 'image/jpeg', |
| 147 | + '.jnlp' => 'application/x-java-jnlp-file', |
| 148 | + '.jpe' => 'image/jpeg', |
| 149 | + '.jpeg' => 'image/jpeg', |
| 150 | + '.jpg' => 'image/jpeg', |
| 151 | + '.jtx' => 'application/x-jtx+xps', |
| 152 | + '.latex' => 'application/x-latex', |
| 153 | + '.log' => 'text/plain', |
| 154 | + '.m1v' => 'video/mpeg', |
| 155 | + '.m2v' => 'video/mpeg', |
| 156 | + '.m3u' => 'audio/x-mpegurl', |
| 157 | + '.mac' => 'image/x-macpaint', |
| 158 | + '.man' => 'application/x-troff-man', |
| 159 | + '.mda' => 'application/msaccess', |
| 160 | + '.mdb' => 'application/msaccess', |
| 161 | + '.mde' => 'application/msaccess', |
| 162 | + '.mfp' => 'application/x-shockwave-flash', |
| 163 | + '.mht' => 'message/rfc822', |
| 164 | + '.mhtml' => 'message/rfc822', |
| 165 | + '.mid' => 'audio/mid', |
| 166 | + '.midi' => 'audio/mid', |
| 167 | + '.mod' => 'video/mpeg', |
| 168 | + '.mov' => 'video/quicktime', |
| 169 | + '.mp2' => 'video/mpeg', |
| 170 | + '.mp2v' => 'video/mpeg', |
| 171 | + '.mp3' => 'audio/mpeg', |
| 172 | + '.mp4' => 'video/mp4', |
| 173 | + '.mpa' => 'video/mpeg', |
| 174 | + '.mpe' => 'video/mpeg', |
| 175 | + '.mpeg' => 'video/mpeg', |
| 176 | + '.mpf' => 'application/vnd.ms-mediapackage', |
| 177 | + '.mpg' => 'video/mpeg', |
| 178 | + '.mpv2' => 'video/mpeg', |
| 179 | + '.mqv' => 'video/quicktime', |
| 180 | + '.NMW' => 'application/nmwb', |
| 181 | + '.nws' => 'message/rfc822', |
| 182 | + '.odc' => 'text/x-ms-odc', |
| 183 | + '.ols' => 'application/vnd.ms-publisher', |
| 184 | + '.p10' => 'application/pkcs10', |
| 185 | + '.p12' => 'application/x-pkcs12', |
| 186 | + '.p7b' => 'application/x-pkcs7-certificates', |
| 187 | + '.p7c' => 'application/pkcs7-mime', |
| 188 | + '.p7m' => 'application/pkcs7-mime', |
| 189 | + '.p7r' => 'application/x-pkcs7-certreqresp', |
| 190 | + '.p7s' => 'application/pkcs7-signature', |
| 191 | + '.pct' => 'image/pict', |
| 192 | + '.pdf' => 'application/pdf', |
| 193 | + '.pdx' => 'application/vnd.adobe.pdx', |
| 194 | + '.pfx' => 'application/x-pkcs12', |
| 195 | + '.pic' => 'image/pict', |
| 196 | + '.pict' => 'image/pict', |
| 197 | + '.pinstall' => 'application/x-picasa-detect', |
| 198 | + '.pko' => 'application/vnd.ms-pki.pko', |
| 199 | + '.png' => 'image/png', |
| 200 | + '.pnt' => 'image/x-macpaint', |
| 201 | + '.pntg' => 'image/x-macpaint', |
| 202 | + '.pot' => 'application/vnd.ms-powerpoint', |
| 203 | + '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12', |
| 204 | + '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template', |
| 205 | + '.ppa' => 'application/vnd.ms-powerpoint', |
| 206 | + '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12', |
| 207 | + '.pps' => 'application/vnd.ms-powerpoint', |
| 208 | + '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12', |
| 209 | + '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow', |
| 210 | + '.ppt' => 'application/vnd.ms-powerpoint', |
| 211 | + '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12', |
| 212 | + '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation', |
| 213 | + '.prf' => 'application/pics-rules', |
| 214 | + '.ps' => 'application/postscript', |
| 215 | + '.pub' => 'application/vnd.ms-publisher', |
| 216 | + '.pwz' => 'application/vnd.ms-powerpoint', |
| 217 | + '.py' => 'text/plain', |
| 218 | + '.pyw' => 'text/plain', |
| 219 | + '.qht' => 'text/x-html-insertion', |
| 220 | + '.qhtm' => 'text/x-html-insertion', |
| 221 | + '.qt' => 'video/quicktime', |
| 222 | + '.qti' => 'image/x-quicktime', |
| 223 | + '.qtif' => 'image/x-quicktime', |
| 224 | + '.qtl' => 'application/x-quicktimeplayer', |
| 225 | + '.rat' => 'application/rat-file', |
| 226 | + '.rmf' => 'application/vnd.adobe.rmf', |
| 227 | + '.rmi' => 'audio/mid', |
| 228 | + '.rqy' => 'text/x-ms-rqy', |
| 229 | + '.rtf' => 'application/msword', |
| 230 | + '.sct' => 'text/scriptlet', |
| 231 | + '.sd2' => 'audio/x-sd2', |
| 232 | + '.sdp' => 'application/sdp', |
| 233 | + '.shtml' => 'text/html', |
| 234 | + '.sit' => 'application/x-stuffit', |
| 235 | + '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12', |
| 236 | + '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide', |
| 237 | + '.slk' => 'application/vnd.ms-excel', |
| 238 | + '.snd' => 'audio/basic', |
| 239 | + '.so' => 'application/x-apachemodule', |
| 240 | + '.sol' => 'text/plain', |
| 241 | + '.sor' => 'text/plain', |
| 242 | + '.spc' => 'application/x-pkcs7-certificates', |
| 243 | + '.spl' => 'application/futuresplash', |
| 244 | + '.sst' => 'application/vnd.ms-pki.certstore', |
| 245 | + '.stl' => 'application/vnd.ms-pki.stl', |
| 246 | + '.swf' => 'application/x-shockwave-flash', |
| 247 | + '.thmx' => 'application/vnd.ms-officetheme', |
| 248 | + '.tif' => 'image/tiff', |
| 249 | + '.tiff' => 'image/tiff', |
| 250 | + '.txt' => 'text/plain', |
| 251 | + '.uls' => 'text/iuls', |
| 252 | + '.vcf' => 'text/x-vcard', |
| 253 | + '.vdx' => 'application/vnd.ms-visio.viewer', |
| 254 | + '.vsd' => 'application/vnd.ms-visio.viewer', |
| 255 | + '.vss' => 'application/vnd.ms-visio.viewer', |
| 256 | + '.vst' => 'application/vnd.ms-visio.viewer', |
| 257 | + '.vsx' => 'application/vnd.ms-visio.viewer', |
| 258 | + '.vtx' => 'application/vnd.ms-visio.viewer', |
| 259 | + '.wav' => 'audio/wav', |
| 260 | + '.wax' => 'audio/x-ms-wax', |
| 261 | + '.wbk' => 'application/msword', |
| 262 | + '.wdp' => 'image/vnd.ms-photo', |
| 263 | + '.wiz' => 'application/msword', |
| 264 | + '.wm' => 'video/x-ms-wm', |
| 265 | + '.wma' => 'audio/x-ms-wma', |
| 266 | + '.wmd' => 'application/x-ms-wmd', |
| 267 | + '.wmv' => 'video/x-ms-wmv', |
| 268 | + '.wmx' => 'video/x-ms-wmx', |
| 269 | + '.wmz' => 'application/x-ms-wmz', |
| 270 | + '.wpl' => 'application/vnd.ms-wpl', |
| 271 | + '.wsc' => 'text/scriptlet', |
| 272 | + '.wvx' => 'video/x-ms-wvx', |
| 273 | + '.xaml' => 'application/xaml+xml', |
| 274 | + '.xbap' => 'application/x-ms-xbap', |
| 275 | + '.xdp' => 'application/vnd.adobe.xdp+xml', |
| 276 | + '.xfdf' => 'application/vnd.adobe.xfdf', |
| 277 | + '.xht' => 'application/xhtml+xml', |
| 278 | + '.xhtml' => 'application/xhtml+xml', |
| 279 | + '.xla' => 'application/vnd.ms-excel', |
| 280 | + '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12', |
| 281 | + '.xlk' => 'application/vnd.ms-excel', |
| 282 | + '.xll' => 'application/vnd.ms-excel', |
| 283 | + '.xlm' => 'application/vnd.ms-excel', |
| 284 | + '.xls' => 'application/vnd.ms-excel', |
| 285 | + '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12', |
| 286 | + '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12', |
| 287 | + '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', |
| 288 | + '.xlt' => 'application/vnd.ms-excel', |
| 289 | + '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12', |
| 290 | + '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', |
| 291 | + '.xlw' => 'application/vnd.ms-excel', |
| 292 | + '.xml' => 'text/xml', |
| 293 | + '.xps' => 'application/vnd.ms-xpsdocument', |
| 294 | + '.xsl' => 'text/xml', |
| 295 | + ); |
| 296 | + |
| 297 | + /** |
| 298 | + * IE versions which have been analysed to bring you this class, and for |
| 299 | + * which some substantive difference exists. These will appear as keys |
| 300 | + * in the return value of getRealMimesFromData(). The names are chosen to sort correctly. |
| 301 | + */ |
| 302 | + protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ); |
| 303 | + |
| 304 | + /** |
| 305 | + * Type table with versions expanded |
| 306 | + */ |
| 307 | + protected $typeTable = array(); |
| 308 | + |
| 309 | + /** constructor */ |
| 310 | + function __construct() { |
| 311 | + // Construct versioned type arrays from the base type array plus additions |
| 312 | + $types = $this->baseTypeTable; |
| 313 | + foreach ( $this->versions as $version ) { |
| 314 | + if ( isset( $this->addedTypes[$version] ) ) { |
| 315 | + foreach ( $this->addedTypes[$version] as $format => $addedTypes ) { |
| 316 | + $types[$format] = array_merge( $types[$format], $addedTypes ); |
| 317 | + } |
| 318 | + } |
| 319 | + $this->typeTable[$version] = $types; |
| 320 | + } |
| 321 | + } |
| 322 | + |
| 323 | + /** |
| 324 | + * Get the MIME types from getMimesFromData(), but convert the result from IE's |
| 325 | + * idiosyncratic private types into something other apps will understand. |
| 326 | + * |
| 327 | + * @param $fileName String: the file name (unused at present) |
| 328 | + * @param $chunk String: the first 256 bytes of the file |
| 329 | + * @param $proposed String: the MIME type proposed by the server |
| 330 | + * |
| 331 | + * @return Array: map of IE version to detected mime type |
| 332 | + */ |
| 333 | + public function getRealMimesFromData( $fileName, $chunk, $proposed ) { |
| 334 | + $types = $this->getMimesFromData( $fileName, $chunk, $proposed ); |
| 335 | + $types = array_map( array( $this, 'translateMimeType' ), $types ); |
| 336 | + return $types; |
| 337 | + } |
| 338 | + |
| 339 | + /** |
| 340 | + * Translate a MIME type from IE's idiosyncratic private types into |
| 341 | + * more commonly understood type strings |
| 342 | + */ |
| 343 | + public function translateMimeType( $type ) { |
| 344 | + static $table = array( |
| 345 | + 'image/pjpeg' => 'image/jpeg', |
| 346 | + 'image/x-png' => 'image/png', |
| 347 | + 'image/x-wmf' => 'application/x-msmetafile', |
| 348 | + 'image/bmp' => 'image/x-bmp', |
| 349 | + 'application/x-zip-compressed' => 'application/zip', |
| 350 | + 'application/x-compressed' => 'application/x-compress', |
| 351 | + 'application/x-gzip-compressed' => 'application/x-gzip', |
| 352 | + 'audio/mid' => 'audio/midi', |
| 353 | + ); |
| 354 | + if ( isset( $table[$type] ) ) { |
| 355 | + $type = $table[$type]; |
| 356 | + } |
| 357 | + return $type; |
| 358 | + } |
| 359 | + |
| 360 | + /** |
| 361 | + * Get the untranslated MIME types for all known versions |
| 362 | + * |
| 363 | + * @param $fileName String: the file name (unused at present) |
| 364 | + * @param $chunk String: the first 256 bytes of the file |
| 365 | + * @param $proposed String: the MIME type proposed by the server |
| 366 | + * |
| 367 | + * @return Array: map of IE version to detected mime type |
| 368 | + */ |
| 369 | + public function getMimesFromData( $fileName, $chunk, $proposed ) { |
| 370 | + $types = array(); |
| 371 | + foreach ( $this->versions as $version ) { |
| 372 | + $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ); |
| 373 | + } |
| 374 | + return $types; |
| 375 | + } |
| 376 | + |
| 377 | + /** |
| 378 | + * Get the MIME type for a given named version |
| 379 | + */ |
| 380 | + protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) { |
| 381 | + // Strip text after a semicolon |
| 382 | + $semiPos = strpos( $proposed, ';' ); |
| 383 | + if ( $semiPos !== false ) { |
| 384 | + $proposed = substr( $proposed, 0, $semiPos ); |
| 385 | + } |
| 386 | + |
| 387 | + $proposedFormat = $this->getDataFormat( $version, $proposed ); |
| 388 | + if ( $proposedFormat == 'unknown' |
| 389 | + && $proposed != 'multipart/mixed' |
| 390 | + && $proposed != 'multipart/x-mixed-replace' ) |
| 391 | + { |
| 392 | + return $proposed; |
| 393 | + } |
| 394 | + if ( strval( $chunk ) === '' ) { |
| 395 | + return $proposed; |
| 396 | + } |
| 397 | + |
| 398 | + // Truncate chunk at 255 bytes |
| 399 | + $chunk = substr( $chunk, 0, 255 ); |
| 400 | + |
| 401 | + // IE does the Check*Headers() calls last, and instead does the following image |
| 402 | + // type checks by directly looking for the magic numbers. What I do here should |
| 403 | + // have the same effect since the magic number checks are identical in both cases. |
| 404 | + $result = $this->sampleData( $version, $chunk ); |
| 405 | + $sampleFound = $result['found']; |
| 406 | + $counters = $result['counters']; |
| 407 | + $binaryType = $this->checkBinaryHeaders( $version, $chunk ); |
| 408 | + $textType = $this->checkTextHeaders( $version, $chunk ); |
| 409 | + |
| 410 | + if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) { |
| 411 | + return 'text/html'; |
| 412 | + } |
| 413 | + if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) { |
| 414 | + return 'image/gif'; |
| 415 | + } |
| 416 | + if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' ) |
| 417 | + && $binaryType == 'image/pjpeg' ) |
| 418 | + { |
| 419 | + return $proposed; |
| 420 | + } |
| 421 | + // PNG check added in IE 7 |
| 422 | + if ( $version >= 'ie07' |
| 423 | + && ( $proposed == 'image/x-png' || $proposed == 'image/png' ) |
| 424 | + && $binaryType == 'image/x-png' ) |
| 425 | + { |
| 426 | + return $proposed; |
| 427 | + } |
| 428 | + |
| 429 | + // CDF was removed in IE 7 so it won't be in $sampleFound for later versions |
| 430 | + if ( isset( $sampleFound['cdf'] ) ) { |
| 431 | + return 'application/x-cdf'; |
| 432 | + } |
| 433 | + |
| 434 | + // RSS and Atom were added in IE 7 so they won't be in $sampleFound for |
| 435 | + // previous versions |
| 436 | + if ( isset( $sampleFound['rss'] ) ) { |
| 437 | + return 'application/rss+xml'; |
| 438 | + } |
| 439 | + if ( isset( $sampleFound['rdf-tag'] ) |
| 440 | + && isset( $sampleFound['rdf-url'] ) |
| 441 | + && isset( $sampleFound['rdf-purl'] ) ) |
| 442 | + { |
| 443 | + return 'application/rss+xml'; |
| 444 | + } |
| 445 | + if ( isset( $sampleFound['atom'] ) ) { |
| 446 | + return 'application/atom+xml'; |
| 447 | + } |
| 448 | + |
| 449 | + if ( isset( $sampleFound['xml'] ) ) { |
| 450 | + // TODO: I'm not sure under what circumstances this flag is enabled |
| 451 | + if ( strpos( $version, 'strict' ) !== false ) { |
| 452 | + if ( $proposed == 'text/html' || $proposed == 'text/xml' ) { |
| 453 | + return 'text/xml'; |
| 454 | + } |
| 455 | + } else { |
| 456 | + return 'text/xml'; |
| 457 | + } |
| 458 | + } |
| 459 | + if ( isset( $sampleFound['html'] ) ) { |
| 460 | + // TODO: I'm not sure under what circumstances this flag is enabled |
| 461 | + if ( strpos( $version, 'nohtml' ) !== false ) { |
| 462 | + if ( $proposed == 'text/plain' ) { |
| 463 | + return 'text/html'; |
| 464 | + } |
| 465 | + } else { |
| 466 | + return 'text/html'; |
| 467 | + } |
| 468 | + } |
| 469 | + if ( isset( $sampleFound['xbm'] ) ) { |
| 470 | + return 'image/x-bitmap'; |
| 471 | + } |
| 472 | + if ( isset( $sampleFound['binhex'] ) ) { |
| 473 | + return 'application/macbinhex40'; |
| 474 | + } |
| 475 | + if ( isset( $sampleFound['scriptlet'] ) ) { |
| 476 | + if ( strpos( $version, 'strict' ) !== false ) { |
| 477 | + if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) { |
| 478 | + return 'text/scriptlet'; |
| 479 | + } |
| 480 | + } else { |
| 481 | + return 'text/scriptlet'; |
| 482 | + } |
| 483 | + } |
| 484 | + |
| 485 | + // Freaky heuristics to determine if the data is text or binary |
| 486 | + // The heuristic is of course broken for non-ASCII text |
| 487 | + if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] ) |
| 488 | + < ( $counters['ctrl'] + $counters['high'] ) * 16 ) |
| 489 | + { |
| 490 | + $kindOfBinary = true; |
| 491 | + $type = $binaryType ? $binaryType : $textType; |
| 492 | + if ( $type === false ) { |
| 493 | + $type = 'application/octet-stream'; |
| 494 | + } |
| 495 | + } else { |
| 496 | + $kindOfBinary = false; |
| 497 | + $type = $textType ? $textType : $binaryType; |
| 498 | + if ( $type === false ) { |
| 499 | + $type = 'text/plain'; |
| 500 | + } |
| 501 | + } |
| 502 | + |
| 503 | + // Check if the output format is ambiguous |
| 504 | + // This generally means that detection failed, real types aren't ambiguous |
| 505 | + $detectedFormat = $this->getDataFormat( $version, $type ); |
| 506 | + if ( $detectedFormat != 'ambiguous' ) { |
| 507 | + return $type; |
| 508 | + } |
| 509 | + |
| 510 | + if ( $proposedFormat != 'ambiguous' ) { |
| 511 | + // FormatAgreesWithData() |
| 512 | + if ( $proposedFormat == 'text' && !$kindOfBinary ) { |
| 513 | + return $proposed; |
| 514 | + } |
| 515 | + if ( $proposedFormat == 'binary' && $kindOfBinary ) { |
| 516 | + return $proposed; |
| 517 | + } |
| 518 | + if ( $proposedFormat == 'html' ) { |
| 519 | + return $proposed; |
| 520 | + } |
| 521 | + } |
| 522 | + |
| 523 | + // Find a MIME type by searching the registry for the file extension. |
| 524 | + $dotPos = strrpos( $fileName, '.' ); |
| 525 | + if ( $dotPos === false ) { |
| 526 | + return $type; |
| 527 | + } |
| 528 | + $ext = substr( $fileName, $dotPos ); |
| 529 | + if ( isset( $this->registry[$ext] ) ) { |
| 530 | + return $this->registry[$ext]; |
| 531 | + } |
| 532 | + |
| 533 | + // TODO: If the extension has an application registered to it, IE will return |
| 534 | + // application/octet-stream. We'll skip that, so we could erroneously |
| 535 | + // return text/plain or application/x-netcdf where application/octet-stream |
| 536 | + // would be correct. |
| 537 | + |
| 538 | + return $type; |
| 539 | + } |
| 540 | + |
| 541 | + /** |
| 542 | + * Check for text headers at the start of the chunk |
| 543 | + * Confirmed same in 5 and 7. |
| 544 | + */ |
| 545 | + private function checkTextHeaders( $version, $chunk ) { |
| 546 | + $chunk2 = substr( $chunk, 0, 2 ); |
| 547 | + $chunk4 = substr( $chunk, 0, 4 ); |
| 548 | + $chunk5 = substr( $chunk, 0, 5 ); |
| 549 | + if ( $chunk4 == '%PDF' ) { |
| 550 | + return 'application/pdf'; |
| 551 | + } |
| 552 | + if ( $chunk2 == '%!' ) { |
| 553 | + return 'application/postscript'; |
| 554 | + } |
| 555 | + if ( $chunk5 == '{\\rtf' ) { |
| 556 | + return 'text/richtext'; |
| 557 | + } |
| 558 | + if ( $chunk5 == 'begin' ) { |
| 559 | + return 'application/base64'; |
| 560 | + } |
| 561 | + return false; |
| 562 | + } |
| 563 | + |
| 564 | + /** |
| 565 | + * Check for binary headers at the start of the chunk |
| 566 | + * Confirmed same in 5 and 7. |
| 567 | + */ |
| 568 | + private function checkBinaryHeaders( $version, $chunk ) { |
| 569 | + $chunk2 = substr( $chunk, 0, 2 ); |
| 570 | + $chunk3 = substr( $chunk, 0, 3 ); |
| 571 | + $chunk4 = substr( $chunk, 0, 4 ); |
| 572 | + $chunk5 = substr( $chunk, 0, 5 ); |
| 573 | + $chunk5uc = strtoupper( $chunk5 ); |
| 574 | + $chunk8 = substr( $chunk, 0, 8 ); |
| 575 | + if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) { |
| 576 | + return 'image/gif'; |
| 577 | + } |
| 578 | + if ( $chunk2 == "\xff\xd8" ) { |
| 579 | + return 'image/pjpeg'; // actually plain JPEG but this is what IE returns |
| 580 | + } |
| 581 | + |
| 582 | + if ( $chunk2 == 'BM' |
| 583 | + && substr( $chunk, 6, 2 ) == "\000\000" |
| 584 | + && substr( $chunk, 8, 2 ) == "\000\000" ) |
| 585 | + { |
| 586 | + return 'image/bmp'; // another non-standard MIME |
| 587 | + } |
| 588 | + if ( $chunk4 == 'RIFF' |
| 589 | + && substr( $chunk, 8, 4 ) == 'WAVE' ) |
| 590 | + { |
| 591 | + return 'audio/wav'; |
| 592 | + } |
| 593 | + // These were integer literals in IE |
| 594 | + // Perhaps the author was not sure what the target endianness was |
| 595 | + if ( $chunk4 == ".sd\000" |
| 596 | + || $chunk4 == ".snd" |
| 597 | + || $chunk4 == "\000ds." |
| 598 | + || $chunk4 == "dns." ) |
| 599 | + { |
| 600 | + return 'audio/basic'; |
| 601 | + } |
| 602 | + if ( $chunk3 == "MM\000" ) { |
| 603 | + return 'image/tiff'; |
| 604 | + } |
| 605 | + if ( $chunk2 == 'MZ' ) { |
| 606 | + return 'application/x-msdownload'; |
| 607 | + } |
| 608 | + if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) { |
| 609 | + return 'image/x-png'; // [sic] |
| 610 | + } |
| 611 | + if ( strlen( $chunk ) >= 5 ) { |
| 612 | + $byte2 = ord( $chunk[2] ); |
| 613 | + $byte4 = ord( $chunk[4] ); |
| 614 | + if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) { |
| 615 | + return 'image/x-jg'; |
| 616 | + } |
| 617 | + } |
| 618 | + // More endian confusion? |
| 619 | + if ( $chunk4 == 'MROF' ) { |
| 620 | + return 'audio/x-aiff'; |
| 621 | + } |
| 622 | + $chunk4_8 = substr( $chunk, 8, 4 ); |
| 623 | + if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) { |
| 624 | + return 'audio/x-aiff'; |
| 625 | + } |
| 626 | + if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) { |
| 627 | + return 'video/avi'; |
| 628 | + } |
| 629 | + if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) { |
| 630 | + return 'video/mpeg'; |
| 631 | + } |
| 632 | + if ( $chunk4 == "\001\000\000\000" |
| 633 | + && substr( $chunk, 40, 4 ) == ' EMF' ) |
| 634 | + { |
| 635 | + return 'image/x-emf'; |
| 636 | + } |
| 637 | + if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) { |
| 638 | + return 'image/x-wmf'; |
| 639 | + } |
| 640 | + if ( $chunk4 == "\xca\xfe\xba\xbe" ) { |
| 641 | + return 'application/java'; |
| 642 | + } |
| 643 | + if ( $chunk2 == 'PK' ) { |
| 644 | + return 'application/x-zip-compressed'; |
| 645 | + } |
| 646 | + if ( $chunk2 == "\x1f\x9d" ) { |
| 647 | + return 'application/x-compressed'; |
| 648 | + } |
| 649 | + if ( $chunk2 == "\x1f\x8b" ) { |
| 650 | + return 'application/x-gzip-compressed'; |
| 651 | + } |
| 652 | + // Skip redundant check for ZIP |
| 653 | + if ( $chunk5 == "MThd\000" ) { |
| 654 | + return 'audio/mid'; |
| 655 | + } |
| 656 | + if ( $chunk4 == '%PDF' ) { |
| 657 | + return 'application/pdf'; |
| 658 | + } |
| 659 | + return false; |
| 660 | + } |
| 661 | + |
| 662 | + /** |
| 663 | + * Do heuristic checks on the bulk of the data sample. |
| 664 | + * Search for HTML tags. |
| 665 | + */ |
| 666 | + protected function sampleData( $version, $chunk ) { |
| 667 | + $found = array(); |
| 668 | + $counters = array( |
| 669 | + 'ctrl' => 0, |
| 670 | + 'high' => 0, |
| 671 | + 'low' => 0, |
| 672 | + 'lf' => 0, |
| 673 | + 'cr' => 0, |
| 674 | + 'ff' => 0 |
| 675 | + ); |
| 676 | + $htmlTags = array( |
| 677 | + 'html', |
| 678 | + 'head', |
| 679 | + 'title', |
| 680 | + 'body', |
| 681 | + 'script', |
| 682 | + 'a href', |
| 683 | + 'pre', |
| 684 | + 'img', |
| 685 | + 'plaintext', |
| 686 | + 'table' |
| 687 | + ); |
| 688 | + $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; |
| 689 | + $rdfPurl = 'http://purl.org/rss/1.0/'; |
| 690 | + $xbmMagic1 = '#define'; |
| 691 | + $xbmMagic2 = '_width'; |
| 692 | + $xbmMagic3 = '_bits'; |
| 693 | + $binhexMagic = 'converted with BinHex'; |
| 694 | + |
| 695 | + for ( $offset = 0; $offset < strlen( $chunk ); $offset++ ) { |
| 696 | + $curChar = $chunk[$offset]; |
| 697 | + if ( $curChar == "\x0a" ) { |
| 698 | + $counters['lf']++; |
| 699 | + continue; |
| 700 | + } elseif ( $curChar == "\x0d" ) { |
| 701 | + $counters['cr']++; |
| 702 | + continue; |
| 703 | + } elseif ( $curChar == "\x0c" ) { |
| 704 | + $counters['ff']++; |
| 705 | + continue; |
| 706 | + } elseif ( $curChar == "\t" ) { |
| 707 | + $counters['low']++; |
| 708 | + continue; |
| 709 | + } elseif ( ord( $curChar ) < 32 ) { |
| 710 | + $counters['ctrl']++; |
| 711 | + continue; |
| 712 | + } elseif ( ord( $curChar ) >= 128 ) { |
| 713 | + $counters['high']++; |
| 714 | + continue; |
| 715 | + } |
| 716 | + |
| 717 | + $counters['low']++; |
| 718 | + if ( $curChar == '<' ) { |
| 719 | + // XML |
| 720 | + $remainder = substr( $chunk, $offset + 1 ); |
| 721 | + if ( !strncasecmp( $remainder, '?XML', 4 ) ) { |
| 722 | + $nextChar = substr( $chunk, $offset + 5, 1 ); |
| 723 | + if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) { |
| 724 | + $found['xml'] = true; |
| 725 | + } |
| 726 | + } |
| 727 | + // Scriptlet (JSP) |
| 728 | + if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) { |
| 729 | + $found['scriptlet'] = true; |
| 730 | + break; |
| 731 | + } |
| 732 | + // HTML |
| 733 | + foreach ( $htmlTags as $tag ) { |
| 734 | + if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) { |
| 735 | + $found['html'] = true; |
| 736 | + } |
| 737 | + } |
| 738 | + // Skip broken check for additional tags (HR etc.) |
| 739 | + |
| 740 | + // CHANNEL replaced by RSS, RDF and FEED in IE 7 |
| 741 | + if ( $version < 'ie07' ) { |
| 742 | + if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) { |
| 743 | + $found['cdf'] = true; |
| 744 | + } |
| 745 | + } else { |
| 746 | + // RSS |
| 747 | + if ( !strncasecmp( $remainder, 'RSS', 3 ) ) { |
| 748 | + $found['rss'] = true; |
| 749 | + break; // return from SampleData |
| 750 | + } |
| 751 | + if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) { |
| 752 | + $found['rdf-tag'] = true; |
| 753 | + // no break |
| 754 | + } |
| 755 | + if ( !strncasecmp( $remainder, 'FEED', 4 ) ) { |
| 756 | + $found['atom'] = true; |
| 757 | + break; |
| 758 | + } |
| 759 | + } |
| 760 | + continue; |
| 761 | + } |
| 762 | + // Skip broken check for --> |
| 763 | + |
| 764 | + // RSS URL checks |
| 765 | + // For some reason both URLs must appear before it is recognised |
| 766 | + $remainder = substr( $chunk, $offset ); |
| 767 | + if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) { |
| 768 | + $found['rdf-url'] = true; |
| 769 | + if ( isset( $found['rdf-tag'] ) |
| 770 | + && isset( $found['rdf-purl'] ) ) // [sic] |
| 771 | + { |
| 772 | + break; |
| 773 | + } |
| 774 | + continue; |
| 775 | + } |
| 776 | + |
| 777 | + if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) { |
| 778 | + if ( isset( $found['rdf-tag'] ) |
| 779 | + && isset( $found['rdf-url'] ) ) // [sic] |
| 780 | + { |
| 781 | + break; |
| 782 | + } |
| 783 | + continue; |
| 784 | + } |
| 785 | + |
| 786 | + // XBM checks |
| 787 | + if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) { |
| 788 | + $found['xbm1'] = true; |
| 789 | + continue; |
| 790 | + } |
| 791 | + if ( $curChar == '_' ) { |
| 792 | + if ( isset( $found['xbm2'] ) ) { |
| 793 | + if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) { |
| 794 | + $found['xbm'] = true; |
| 795 | + break; |
| 796 | + } |
| 797 | + } elseif ( isset( $found['xbm1'] ) ) { |
| 798 | + if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) { |
| 799 | + $found['xbm2'] = true; |
| 800 | + } |
| 801 | + } |
| 802 | + } |
| 803 | + |
| 804 | + // BinHex |
| 805 | + if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) { |
| 806 | + $found['binhex'] = true; |
| 807 | + } |
| 808 | + } |
| 809 | + return array( 'found' => $found, 'counters' => $counters ); |
| 810 | + } |
| 811 | + |
| 812 | + protected function getDataFormat( $version, $type ) { |
| 813 | + $types = $this->typeTable[$version]; |
| 814 | + if ( $type == '(null)' || strval( $type ) === '' ) { |
| 815 | + return 'ambiguous'; |
| 816 | + } |
| 817 | + foreach ( $types as $format => $list ) { |
| 818 | + if ( in_array( $type, $list ) ) { |
| 819 | + return $format; |
| 820 | + } |
| 821 | + } |
| 822 | + return 'unknown'; |
| 823 | + } |
| 824 | +} |
| 825 | + |
Property changes on: trunk/phase3/includes/libs/IEContentAnalyzer.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 826 | + native |
Index: trunk/phase3/includes/libs/README |
— | — | @@ -0,0 +1,7 @@ |
| 2 | +The classes in this directory ./includes/libs are considered standalone |
| 3 | +from the remainder of the MediaWiki codebase. They do not call on any other |
| 4 | +portions of MediaWiki code, and can be used in other projects without |
| 5 | +dependency issues. |
| 6 | + |
| 7 | +Like the rest of MediaWiki, these are distributed under the GNU General Public |
| 8 | +License, version 2. |