r85290 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r85289‎ \| r85290 \| r85291 >
Date:	22:22, 3 April 2011
Author:	demon
Status:	ok
Tags:
Comment:	Per Roan, pull JavaScriptDistiller from trunk since it won't merge cleanly. This grabs at least r82340, r82344, r82384, r82399
Modified paths:	/branches/REL1_17/phase3/includes/libs/JavaScriptDistiller.php (modified) (history)

Diff [purge]

Index: branches/REL1_17/phase3/includes/libs/JavaScriptDistiller.php
—	—	@@ -1,7 +1,7 @@
2	2	<?php
3	3	/**
4	4	* JavaScript Distiller
5		- *
	5	+ *
6	6	* Author: Dean Edwards, Nicholas Martin, Trevor Parscal
7	7	* License: LGPL
8	8	*/
—	—	@@ -11,56 +11,64 @@
12	12
13	13	/**
14	14	* Removes most of the white-space from JavaScript code.
15		- *
	15	+ *
16	16	* This code came from the first pass of Dean Edwards' JavaScript Packer. Compared to using
17	17	* JSMin::minify, this produces < 1% larger output (after gzip) in approx. 25% of the time.
18		- *
	18	+ *
19	19	* @param $script String: JavaScript code to minify
20	20	* @param $stripVerticalSpace Boolean: Try to remove as much vertical whitespace as possible
21	21	*/
22	22	public static function stripWhiteSpace( $script, $stripVerticalSpace = false ) {
	23	+ // Try to avoid segfaulting
	24	+ // I saw segfaults with a limit of 10000, 1000 seems to work
	25	+ $oldLimit = ini_get( 'pcre.recursion_limit' );
	26	+ if ( intval( $oldLimit ) > 1000 ) {
	27	+ ini_set( 'pcre.recursion_limit', '1000' );
	28	+ }
	29	+
23	30	$script = self::stripHorizontalSpace( $script );
24	31	// If requested, make some vertical whitespace collapsing as well
25	32	if ( $stripVerticalSpace ) {
26	33	$script = self::stripVerticalSpace( $script );
27	34	}
28	35	// Done
	36	+ ini_set( 'pcre.recursion_limit', $oldLimit );
29	37	return $script;
30	38	}
31	39
32		~~- private static function stripHorizontalSpace( $script ) {~~
	40	+ public static function stripHorizontalSpace( $script ) {
33	41	$parser = self::createParser();
34	42	// Collapse horizontal whitespaces between variable names into a single space
35		~~- $parser->add( '/(\\b\|\\$)[ \\t]+(\\b\|\\$)/', '$2 $3' );~~
	43	+ $parser->add( '(\b\|\$) [ \t]+ (\b\|\$)', '$2 $3' );
36	44	// Collapse horizontal whitespaces between unary operators into a single space
37		~~- $parser->add( '/([+\\-])[ \\t]+([+\\-])/', '$2 $3' );~~
	45	+ $parser->add( '([+\-]) [ \t]+ ([+\-])', '$2 $3' );
38	46	// Remove all remaining un-protected horizontal whitespace
39		~~- $parser->add( '/[ \\t]+/');~~
	47	+ $parser->add( '[ \t]+');
40	48	// Collapse multiple vertical whitespaces with some horizontal spaces between them
41		~~- $parser->add( '/[\\r\\n]+[ \\t]*[\\r\\n]+/', "\n" );~~
	49	+ $parser->add( '[\r\n]+ [ \t]* [\r\n]+', "\n" );
42	50	// Execute and return
43	51	return $parser->exec($script);
44	52	}
45	53
46		~~- private static function stripVerticalSpace( $script ) {~~
	54	+ public static function stripVerticalSpace( $script ) {
47	55	$parser = self::createParser();
48	56	// Collapse whitespaces between and after a ){ pair (function definitions)
49		~~- $parser->add( '/\\)\\s+\\{\\s+/', '){' );~~
	57	+ $parser->add( '\) \s+ \{ \s+', '){' );
50	58	// Collapse whitespaces between and after a ({ pair (JSON argument)
51		~~- $parser->add( '/\\(\\s+\\{\\s+/', '({' );~~
	59	+ $parser->add( '\( \s+ \{ \s+', '({' );
52	60	// Collapse whitespaces between a parenthesis and a period (call chaining)
53		~~- $parser->add( '/\\)\\s+\\./', ').');~~
	61	+ $parser->add( '\) \s+ \.', ').');
54	62	// Collapse vertical whitespaces which come directly after a semicolon or a comma
55		~~- $parser->add( '/([;,])\\s+/', '$2' );~~
	63	+ $parser->add( '( [;,] ) \s+', '$2' );
56	64	// Collapse whitespaces between multiple parenthesis/brackets of similar direction
57		~~- $parser->add( '/([\\)\\}])\\s+([\\)\\}])/', '$2$3' );~~
58		~~- $parser->add( '/([\\(\\{])\\s+([\\(\\{])/', '$2$3' );~~
	65	+ $parser->add( '( [\)\}] ) \s+ ( [\)\}] )', '$2$3' );
	66	+ $parser->add( '( [\(\{] ) \s+ ( [\(\{] )', '$2$3' );
59	67	return $parser->exec( $script );
60	68	}
61	69
62	70	/*
63	71	* Creates an instance of ParseMaster and protects sensitive JavaScript regions.
64		- *
	72	+ *
65	73	* This parser is based on regular expressions, which all get or'd together, so rules take
66	74	* precedence in the order they are added. We can use it to minify by armoring certain regions
67	75	* by matching them and replacing them with the full match, leaving the remaining regions around
—	—	@@ -74,18 +82,43 @@
75	83	// to \s if we use a backslash as the escape character. We work around this by using an
76	84	// obscure escape character that we hope will never appear at the end of a line.
77	85	$parser->escapeChar = chr( 1 );
	86	+
	87	+ // C-style comment: use non-greedy repetition to find the end
	88	+ $parser->add( '\/ \* .? \ \/' );
	89	+
	90	+ // Preserve the newline after a C++-style comment -- bug 27046
	91	+ $parser->add( '\/ \/ [^\r\n]* ( [\r\n] )', '$2' );
	92	+
78	93	// Protect strings. The original code had [^\'\\v] here, but that didn't armor multiline
79	94	// strings correctly. This also armors multiline strings that don't have backslashes at the
80	95	// end of the line (these are invalid), but that's fine because we're just armoring here.
81		~~- $parser->add( '/\'([^\'\\\\](\\\\.[^\'\\\\])*)\'/', '$1' );~~
82		~~- $parser->add( '/"([^"\\\\](\\\\.[^"\\\\])*)"/', '$1' );~~
	96	+
	97	+ // Single quotes
	98	+ $parser->add(
	99	+ '\'' . // start quote
	100	+ '[^\'\\\\]*' . // a run of non-special characters
	101	+ '(?:' .
	102	+ '\\\\ .' . // a backslash followed by any character
	103	+ '[^\'\\\\]*' . // a run of non-special characters
	104	+ ')*' . // any number of the above
	105	+ '\'', // end quote
	106	+ '$1' );
	107	+
	108	+ // Double quotes: same as above
	109	+ $parser->add( '" [^"\\\\]* (?: \\\\ . [^"\\\\]* )* "', '$1' );
	110	+
83	111	// Protect regular expressions
84		~~- $parser->add( '/[ \\t]+(\\/[^\\/\\r\\n\\][^\\/\\r\\n]\\/g?i?)/', '$2' );~~
85		~~- $parser->add( '/[^\\w\\$\\/\'")\\?:]\\/[^\\/\\r\\n\\][^\\/\\r\\n]*\\/g?i?/', '$1' );~~
86		~~- // Remove comments~~
87		~~- $parser->add( '/\\/\\(.\|[\\r\\n])?\\*\\//' );~~
88		~~- // Preserve the newline after a C++-style comment -- bug 27046~~
89		~~- $parser->add( '/\\/\\/[^\\r\\n]*([\\r\\n])/', '$2' );~~
	112	+ $parser->add(
	113	+ '(?<= [ \t] \| [^\w\$\/\'"*)\?:] )' . // assert that whitespace or punctuation precedes
	114	+ '\/' . // start slash
	115	+ '[^\r\n\*]' . // not a comment-start or line ending
	116	+ '[^\/\r\n\\\\]*' . // a sequence of non-special characters
	117	+ '(?:' .
	118	+ '\\\\ .' . // a backslash followed by any character
	119	+ '[^\/\r\n\\\\]*' . // a sequence of non-special characters
	120	+ ')*' . // any number of the above
	121	+ '\/[ig]*' , // pattern end, optional modifier
	122	+ '$1' );
90	123	return $parser;
91	124	}
92	125	}
—	—	@@ -94,37 +127,36 @@
95	128	* ParseMaster, version 1.0.2 (2005-08-19) Copyright 2005, Dean Edwards
96	129	* A multi-pattern parser.
97	130	* License: http://creativecommons.org/licenses/LGPL/2.1/
98		- *
	131	+ *
99	132	* This is the PHP version of the ParseMaster component of Dean Edwards' (http://dean.edwards.name/)
100	133	* Packer, which was originally written in JavaScript. It was ported to PHP by Nicolas Martin.
101		- *
	134	+ *
102	135	* Original Source: http://joliclic.free.fr/php/javascript-packer/en/
103		- *
	136	+ *
104	137	* Changes should be pushed back upstream.
105	138	*/
106	139	class ParseMaster {
107	140	public $ignoreCase = false;
108	141	public $escapeChar = '';
109		-
	142	+
110	143	// constants
111	144	const EXPRESSION = 0;
112	145	const REPLACEMENT = 1;
113	146	const LENGTH = 2;
114		-
	147	+
115	148	// used to determine nesting levels
116		~~- private $GROUPS = '/\\(/';//g~~
117		~~- private $SUB_REPLACE = '/\\$\\d/';~~
118		~~- private $INDEXED = '/^\\$\\d+$/';~~
119		~~- private $TRIM = '/([\'"])\\1\\.(.*)\\.\\1\\1$/';~~
	149	+ private $GROUPS = '/\( (?! \? ) /x';//g
	150	+ private $SUB_REPLACE = '/\$\d/';
	151	+ private $INDEXED = '/^\$\d+$/';
120	152	private $ESCAPE = '/\\\./';//g
121	153	private $QUOTE = '/\'/';
122		~~- private $DELETED = '/\\x01[^\\x01]*\\x01/';//g~~
123		-
	154	+ private $DELETED = '/\x01[^\x01]*\x01/';//g
	155	+
124	156	public function add($expression, $replacement = '') {
125	157	// count the number of sub-expressions
126	158	// - add one because each pattern is itself a sub-expression
127	159	$length = 1 + preg_match_all($this->GROUPS, $this->_internalEscape((string)$expression), $out);
128		-
	160	+
129	161	// treat only strings $replacement
130	162	if (is_string($replacement)) {
131	163	// does the pattern deal with sub-expressions?
—	—	@@ -136,7 +168,7 @@
137	169	} else { // a complicated lookup (e.g. "Hello $2 $1")
138	170	// build a function to do the lookup
139	171	$quote = preg_match($this->QUOTE, $this->_internalEscape($replacement))
140		~~- ? '"' : "'";~~
	172	+ ? '"' : "'";
141	173	$replacement = array(
142	174	'fn' => '_backReferences',
143	175	'data' => array(
—	—	@@ -152,19 +184,19 @@
153	185	if (!empty($expression)) $this->_add($expression, $replacement, $length);
154	186	else $this->_add('/^$/', $replacement, $length);
155	187	}
156		-
	188	+
157	189	public function exec($string) {
158	190	// execute the global replacement
159	191	$this->_escaped = array();
160		-
	192	+
161	193	// simulate the _patterns.toSTring of Dean
162	194	$regexp = '/';
163	195	foreach ($this->_patterns as $reg) {
164		~~- $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')\|';~~
	196	+ $regexp .= '(' . $reg[self::EXPRESSION] . ")\|\n";
165	197	}
166		~~- $regexp = substr($regexp, 0, -1) . '/';~~
	198	+ $regexp = substr($regexp, 0, -2) . '/Sxs';
167	199	$regexp .= ($this->ignoreCase) ? 'i' : '';
168		-
	200	+
169	201	$string = $this->_escape($string, $this->escapeChar);
170	202	$string = preg_replace_callback(
171	203	$regexp,
—	—	@@ -175,10 +207,10 @@
176	208	$string
177	209	);
178	210	$string = $this->_unescape($string, $this->escapeChar);
179		-
	211	+
180	212	return preg_replace($this->DELETED, '', $string);
181	213	}
182		-
	214	+
183	215	public function reset() {
184	216	// clear the patterns collection so that this object may be re-used
185	217	$this->_patterns = array();
—	—	@@ -187,17 +219,17 @@
188	220	// private
189	221	private $_escaped = array(); // escaped characters
190	222	private $_patterns = array(); // patterns stored by index
191		-
	223	+
192	224	// create and add a new pattern to the patterns collection
193	225	private function _add() {
194	226	$arguments = func_get_args();
195	227	$this->_patterns[] = $arguments;
196	228	}
197		-
	229	+
198	230	// this is the global replace function (it's quite complicated)
199	231	private function _replacement($arguments) {
200	232	if (empty($arguments)) return '';
201		-
	233	+
202	234	$i = 1; $j = 0;
203	235	// loop through the patterns
204	236	while (isset($this->_patterns[$j])) {
—	—	@@ -205,53 +237,53 @@
206	238	// do we have a result?
207	239	if (isset($arguments[$i]) && ($arguments[$i] != '')) {
208	240	$replacement = $pattern[self::REPLACEMENT];
209		-
	241	+
210	242	if (is_array($replacement) && isset($replacement['fn'])) {
211		-
	243	+
212	244	if (isset($replacement['data'])) $this->buffer = $replacement['data'];
213	245	return call_user_func(array(&$this, $replacement['fn']), $arguments, $i);
214		-
	246	+
215	247	} elseif (is_int($replacement)) {
216	248	return $arguments[$replacement + $i];
217		-
	249	+
218	250	}
219	251	$delete = ($this->escapeChar == '' \|\|
220		~~- strpos($arguments[$i], $this->escapeChar) === false)~~
221		~~- ? '' : "\x01" . $arguments[$i] . "\x01";~~
	252	+ strpos($arguments[$i], $this->escapeChar) === false)
	253	+ ? '' : "\x01" . $arguments[$i] . "\x01";
222	254	return $delete . $replacement;
223		-
	255	+
224	256	// skip over references to sub-expressions
225	257	} else {
226	258	$i += $pattern[self::LENGTH];
227	259	}
228	260	}
229	261	}
230		-
	262	+
231	263	private function _backReferences($match, $offset) {
232	264	$replacement = $this->buffer['replacement'];
233		~~- $quote = $this->buffer['quote'];~~
	265	+ //$quote = $this->buffer['quote'];
234	266	$i = $this->buffer['length'];
235	267	while ($i) {
236	268	$replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement);
237	269	}
238	270	return $replacement;
239	271	}
240		-
	272	+
241	273	private function _replace_name($match, $offset){
242	274	$length = strlen($match[$offset + 2]);
243	275	$start = $length - max($length - strlen($match[$offset + 3]), 0);
244	276	return substr($match[$offset + 1], $start, $length) . $match[$offset + 4];
245	277	}
246		-
	278	+
247	279	private function _replace_encoded($match, $offset) {
248	280	return $this->buffer[$match[$offset]];
249	281	}
250		-
251		-
	282	+
	283	+
252	284	// php : we cannot pass additional data to preg_replace_callback,
253	285	// and we cannot use &$this in create_function, so let's go to lower level
254	286	private $buffer;
255		-
	287	+
256	288	// encode escaped characters
257	289	private function _escape($string, $escapeChar) {
258	290	if ($escapeChar) {
—	—	@@ -261,7 +293,7 @@
262	294	array(&$this, '_escapeBis'),
263	295	$string
264	296	);
265		-
	297	+
266	298	} else {
267	299	return $string;
268	300	}
—	—	@@ -270,7 +302,7 @@
271	303	$this->_escaped[] = $match[1];
272	304	return $this->buffer;
273	305	}
274		-
	306	+
275	307	// decode escaped characters
276	308	private function _unescape($string, $escapeChar) {
277	309	if ($escapeChar) {
—	—	@@ -282,7 +314,7 @@
283	315	array(&$this, '_unescapeBis'),
284	316	$string
285	317	);
286		-
	318	+
287	319	} else {
288	320	return $string;
289	321	}
—	—	@@ -298,7 +330,7 @@
299	331	$this->buffer['i']++;
300	332	return $this->buffer['escapeChar'] . $temp;
301	333	}
302		-
	334	+
303	335	private function _internalEscape($string) {
304	336	return preg_replace($this->ESCAPE, '', $string);
305	337	}

Past revisions this follows-up on

Revision	Commit summary	Author	Date
r82340	(bug 27492) Fix regexes for matching JS regexes which could get messed up by ...	catrope	17:08, 17 February 2011
r82344	Revert r82340, caused something linke bug 27481 where spaces were being wrong...	catrope	19:19, 17 February 2011
r82384	* For readability, use /x in regexes. Removed the start and end slashes from ...	tstarling	07:36, 18 February 2011
r82399	JavaScriptDistiller fixes:...	tstarling	14:09, 18 February 2011

Status & tagging log

05:28, 20 April 2011 Reedy (talk | contribs) changed the status of r85290 [removed: new added: ok]