r85290 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85289‎ | r85290 | r85291 >
Date:22:22, 3 April 2011
Author:demon
Status:ok
Tags:
Comment:
Per Roan, pull JavaScriptDistiller from trunk since it won't merge cleanly. This grabs at least r82340, r82344, r82384, r82399
Modified paths:
  • /branches/REL1_17/phase3/includes/libs/JavaScriptDistiller.php (modified) (history)

Diff [purge]

Index: branches/REL1_17/phase3/includes/libs/JavaScriptDistiller.php
@@ -1,7 +1,7 @@
22 <?php
33 /**
44 * JavaScript Distiller
5 - *
 5+ *
66 * Author: Dean Edwards, Nicholas Martin, Trevor Parscal
77 * License: LGPL
88 */
@@ -11,56 +11,64 @@
1212
1313 /**
1414 * Removes most of the white-space from JavaScript code.
15 - *
 15+ *
1616 * This code came from the first pass of Dean Edwards' JavaScript Packer. Compared to using
1717 * JSMin::minify, this produces < 1% larger output (after gzip) in approx. 25% of the time.
18 - *
 18+ *
1919 * @param $script String: JavaScript code to minify
2020 * @param $stripVerticalSpace Boolean: Try to remove as much vertical whitespace as possible
2121 */
2222 public static function stripWhiteSpace( $script, $stripVerticalSpace = false ) {
 23+ // Try to avoid segfaulting
 24+ // I saw segfaults with a limit of 10000, 1000 seems to work
 25+ $oldLimit = ini_get( 'pcre.recursion_limit' );
 26+ if ( intval( $oldLimit ) > 1000 ) {
 27+ ini_set( 'pcre.recursion_limit', '1000' );
 28+ }
 29+
2330 $script = self::stripHorizontalSpace( $script );
2431 // If requested, make some vertical whitespace collapsing as well
2532 if ( $stripVerticalSpace ) {
2633 $script = self::stripVerticalSpace( $script );
2734 }
2835 // Done
 36+ ini_set( 'pcre.recursion_limit', $oldLimit );
2937 return $script;
3038 }
3139
32 - private static function stripHorizontalSpace( $script ) {
 40+ public static function stripHorizontalSpace( $script ) {
3341 $parser = self::createParser();
3442 // Collapse horizontal whitespaces between variable names into a single space
35 - $parser->add( '/(\\b|\\$)[ \\t]+(\\b|\\$)/', '$2 $3' );
 43+ $parser->add( '(\b|\$) [ \t]+ (\b|\$)', '$2 $3' );
3644 // Collapse horizontal whitespaces between unary operators into a single space
37 - $parser->add( '/([+\\-])[ \\t]+([+\\-])/', '$2 $3' );
 45+ $parser->add( '([+\-]) [ \t]+ ([+\-])', '$2 $3' );
3846 // Remove all remaining un-protected horizontal whitespace
39 - $parser->add( '/[ \\t]+/');
 47+ $parser->add( '[ \t]+');
4048 // Collapse multiple vertical whitespaces with some horizontal spaces between them
41 - $parser->add( '/[\\r\\n]+[ \\t]*[\\r\\n]+/', "\n" );
 49+ $parser->add( '[\r\n]+ [ \t]* [\r\n]+', "\n" );
4250 // Execute and return
4351 return $parser->exec($script);
4452 }
4553
46 - private static function stripVerticalSpace( $script ) {
 54+ public static function stripVerticalSpace( $script ) {
4755 $parser = self::createParser();
4856 // Collapse whitespaces between and after a ){ pair (function definitions)
49 - $parser->add( '/\\)\\s+\\{\\s+/', '){' );
 57+ $parser->add( '\) \s+ \{ \s+', '){' );
5058 // Collapse whitespaces between and after a ({ pair (JSON argument)
51 - $parser->add( '/\\(\\s+\\{\\s+/', '({' );
 59+ $parser->add( '\( \s+ \{ \s+', '({' );
5260 // Collapse whitespaces between a parenthesis and a period (call chaining)
53 - $parser->add( '/\\)\\s+\\./', ').');
 61+ $parser->add( '\) \s+ \.', ').');
5462 // Collapse vertical whitespaces which come directly after a semicolon or a comma
55 - $parser->add( '/([;,])\\s+/', '$2' );
 63+ $parser->add( '( [;,] ) \s+', '$2' );
5664 // Collapse whitespaces between multiple parenthesis/brackets of similar direction
57 - $parser->add( '/([\\)\\}])\\s+([\\)\\}])/', '$2$3' );
58 - $parser->add( '/([\\(\\{])\\s+([\\(\\{])/', '$2$3' );
 65+ $parser->add( '( [\)\}] ) \s+ ( [\)\}] )', '$2$3' );
 66+ $parser->add( '( [\(\{] ) \s+ ( [\(\{] )', '$2$3' );
5967 return $parser->exec( $script );
6068 }
6169
6270 /*
6371 * Creates an instance of ParseMaster and protects sensitive JavaScript regions.
64 - *
 72+ *
6573 * This parser is based on regular expressions, which all get or'd together, so rules take
6674 * precedence in the order they are added. We can use it to minify by armoring certain regions
6775 * by matching them and replacing them with the full match, leaving the remaining regions around
@@ -74,18 +82,43 @@
7583 // to \s if we use a backslash as the escape character. We work around this by using an
7684 // obscure escape character that we hope will never appear at the end of a line.
7785 $parser->escapeChar = chr( 1 );
 86+
 87+ // C-style comment: use non-greedy repetition to find the end
 88+ $parser->add( '\/ \* .*? \* \/' );
 89+
 90+ // Preserve the newline after a C++-style comment -- bug 27046
 91+ $parser->add( '\/ \/ [^\r\n]* ( [\r\n] )', '$2' );
 92+
7893 // Protect strings. The original code had [^\'\\v] here, but that didn't armor multiline
7994 // strings correctly. This also armors multiline strings that don't have backslashes at the
8095 // end of the line (these are invalid), but that's fine because we're just armoring here.
81 - $parser->add( '/\'([^\'\\\\]*(\\\\.[^\'\\\\]*)*)\'/', '$1' );
82 - $parser->add( '/"([^"\\\\]*(\\\\.[^"\\\\]*)*)"/', '$1' );
 96+
 97+ // Single quotes
 98+ $parser->add(
 99+ '\'' . // start quote
 100+ '[^\'\\\\]*' . // a run of non-special characters
 101+ '(?:' .
 102+ '\\\\ .' . // a backslash followed by any character
 103+ '[^\'\\\\]*' . // a run of non-special characters
 104+ ')*' . // any number of the above
 105+ '\'', // end quote
 106+ '$1' );
 107+
 108+ // Double quotes: same as above
 109+ $parser->add( '" [^"\\\\]* (?: \\\\ . [^"\\\\]* )* "', '$1' );
 110+
83111 // Protect regular expressions
84 - $parser->add( '/[ \\t]+(\\/[^\\/\\r\\n\\*][^\\/\\r\\n]*\\/g?i?)/', '$2' );
85 - $parser->add( '/[^\\w\\$\\/\'"*)\\?:]\\/[^\\/\\r\\n\\*][^\\/\\r\\n]*\\/g?i?/', '$1' );
86 - // Remove comments
87 - $parser->add( '/\\/\\*(.|[\\r\\n])*?\\*\\//' );
88 - // Preserve the newline after a C++-style comment -- bug 27046
89 - $parser->add( '/\\/\\/[^\\r\\n]*([\\r\\n])/', '$2' );
 112+ $parser->add(
 113+ '(?<= [ \t] | [^\w\$\/\'"*)\?:] )' . // assert that whitespace or punctuation precedes
 114+ '\/' . // start slash
 115+ '[^\r\n\*]' . // not a comment-start or line ending
 116+ '[^\/\r\n\\\\]*' . // a sequence of non-special characters
 117+ '(?:' .
 118+ '\\\\ .' . // a backslash followed by any character
 119+ '[^\/\r\n\\\\]*' . // a sequence of non-special characters
 120+ ')*' . // any number of the above
 121+ '\/[ig]*' , // pattern end, optional modifier
 122+ '$1' );
90123 return $parser;
91124 }
92125 }
@@ -94,37 +127,36 @@
95128 * ParseMaster, version 1.0.2 (2005-08-19) Copyright 2005, Dean Edwards
96129 * A multi-pattern parser.
97130 * License: http://creativecommons.org/licenses/LGPL/2.1/
98 - *
 131+ *
99132 * This is the PHP version of the ParseMaster component of Dean Edwards' (http://dean.edwards.name/)
100133 * Packer, which was originally written in JavaScript. It was ported to PHP by Nicolas Martin.
101 - *
 134+ *
102135 * Original Source: http://joliclic.free.fr/php/javascript-packer/en/
103 - *
 136+ *
104137 * Changes should be pushed back upstream.
105138 */
106139 class ParseMaster {
107140 public $ignoreCase = false;
108141 public $escapeChar = '';
109 -
 142+
110143 // constants
111144 const EXPRESSION = 0;
112145 const REPLACEMENT = 1;
113146 const LENGTH = 2;
114 -
 147+
115148 // used to determine nesting levels
116 - private $GROUPS = '/\\(/';//g
117 - private $SUB_REPLACE = '/\\$\\d/';
118 - private $INDEXED = '/^\\$\\d+$/';
119 - private $TRIM = '/([\'"])\\1\\.(.*)\\.\\1\\1$/';
 149+ private $GROUPS = '/\( (?! \? ) /x';//g
 150+ private $SUB_REPLACE = '/\$\d/';
 151+ private $INDEXED = '/^\$\d+$/';
120152 private $ESCAPE = '/\\\./';//g
121153 private $QUOTE = '/\'/';
122 - private $DELETED = '/\\x01[^\\x01]*\\x01/';//g
123 -
 154+ private $DELETED = '/\x01[^\x01]*\x01/';//g
 155+
124156 public function add($expression, $replacement = '') {
125157 // count the number of sub-expressions
126158 // - add one because each pattern is itself a sub-expression
127159 $length = 1 + preg_match_all($this->GROUPS, $this->_internalEscape((string)$expression), $out);
128 -
 160+
129161 // treat only strings $replacement
130162 if (is_string($replacement)) {
131163 // does the pattern deal with sub-expressions?
@@ -136,7 +168,7 @@
137169 } else { // a complicated lookup (e.g. "Hello $2 $1")
138170 // build a function to do the lookup
139171 $quote = preg_match($this->QUOTE, $this->_internalEscape($replacement))
140 - ? '"' : "'";
 172+ ? '"' : "'";
141173 $replacement = array(
142174 'fn' => '_backReferences',
143175 'data' => array(
@@ -152,19 +184,19 @@
153185 if (!empty($expression)) $this->_add($expression, $replacement, $length);
154186 else $this->_add('/^$/', $replacement, $length);
155187 }
156 -
 188+
157189 public function exec($string) {
158190 // execute the global replacement
159191 $this->_escaped = array();
160 -
 192+
161193 // simulate the _patterns.toSTring of Dean
162194 $regexp = '/';
163195 foreach ($this->_patterns as $reg) {
164 - $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')|';
 196+ $regexp .= '(' . $reg[self::EXPRESSION] . ")|\n";
165197 }
166 - $regexp = substr($regexp, 0, -1) . '/';
 198+ $regexp = substr($regexp, 0, -2) . '/Sxs';
167199 $regexp .= ($this->ignoreCase) ? 'i' : '';
168 -
 200+
169201 $string = $this->_escape($string, $this->escapeChar);
170202 $string = preg_replace_callback(
171203 $regexp,
@@ -175,10 +207,10 @@
176208 $string
177209 );
178210 $string = $this->_unescape($string, $this->escapeChar);
179 -
 211+
180212 return preg_replace($this->DELETED, '', $string);
181213 }
182 -
 214+
183215 public function reset() {
184216 // clear the patterns collection so that this object may be re-used
185217 $this->_patterns = array();
@@ -187,17 +219,17 @@
188220 // private
189221 private $_escaped = array(); // escaped characters
190222 private $_patterns = array(); // patterns stored by index
191 -
 223+
192224 // create and add a new pattern to the patterns collection
193225 private function _add() {
194226 $arguments = func_get_args();
195227 $this->_patterns[] = $arguments;
196228 }
197 -
 229+
198230 // this is the global replace function (it's quite complicated)
199231 private function _replacement($arguments) {
200232 if (empty($arguments)) return '';
201 -
 233+
202234 $i = 1; $j = 0;
203235 // loop through the patterns
204236 while (isset($this->_patterns[$j])) {
@@ -205,53 +237,53 @@
206238 // do we have a result?
207239 if (isset($arguments[$i]) && ($arguments[$i] != '')) {
208240 $replacement = $pattern[self::REPLACEMENT];
209 -
 241+
210242 if (is_array($replacement) && isset($replacement['fn'])) {
211 -
 243+
212244 if (isset($replacement['data'])) $this->buffer = $replacement['data'];
213245 return call_user_func(array(&$this, $replacement['fn']), $arguments, $i);
214 -
 246+
215247 } elseif (is_int($replacement)) {
216248 return $arguments[$replacement + $i];
217 -
 249+
218250 }
219251 $delete = ($this->escapeChar == '' ||
220 - strpos($arguments[$i], $this->escapeChar) === false)
221 - ? '' : "\x01" . $arguments[$i] . "\x01";
 252+ strpos($arguments[$i], $this->escapeChar) === false)
 253+ ? '' : "\x01" . $arguments[$i] . "\x01";
222254 return $delete . $replacement;
223 -
 255+
224256 // skip over references to sub-expressions
225257 } else {
226258 $i += $pattern[self::LENGTH];
227259 }
228260 }
229261 }
230 -
 262+
231263 private function _backReferences($match, $offset) {
232264 $replacement = $this->buffer['replacement'];
233 - $quote = $this->buffer['quote'];
 265+ //$quote = $this->buffer['quote'];
234266 $i = $this->buffer['length'];
235267 while ($i) {
236268 $replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement);
237269 }
238270 return $replacement;
239271 }
240 -
 272+
241273 private function _replace_name($match, $offset){
242274 $length = strlen($match[$offset + 2]);
243275 $start = $length - max($length - strlen($match[$offset + 3]), 0);
244276 return substr($match[$offset + 1], $start, $length) . $match[$offset + 4];
245277 }
246 -
 278+
247279 private function _replace_encoded($match, $offset) {
248280 return $this->buffer[$match[$offset]];
249281 }
250 -
251 -
 282+
 283+
252284 // php : we cannot pass additional data to preg_replace_callback,
253285 // and we cannot use &$this in create_function, so let's go to lower level
254286 private $buffer;
255 -
 287+
256288 // encode escaped characters
257289 private function _escape($string, $escapeChar) {
258290 if ($escapeChar) {
@@ -261,7 +293,7 @@
262294 array(&$this, '_escapeBis'),
263295 $string
264296 );
265 -
 297+
266298 } else {
267299 return $string;
268300 }
@@ -270,7 +302,7 @@
271303 $this->_escaped[] = $match[1];
272304 return $this->buffer;
273305 }
274 -
 306+
275307 // decode escaped characters
276308 private function _unescape($string, $escapeChar) {
277309 if ($escapeChar) {
@@ -282,7 +314,7 @@
283315 array(&$this, '_unescapeBis'),
284316 $string
285317 );
286 -
 318+
287319 } else {
288320 return $string;
289321 }
@@ -298,7 +330,7 @@
299331 $this->buffer['i']++;
300332 return $this->buffer['escapeChar'] . $temp;
301333 }
302 -
 334+
303335 private function _internalEscape($string) {
304336 return preg_replace($this->ESCAPE, '', $string);
305337 }

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r82340(bug 27492) Fix regexes for matching JS regexes which could get messed up by ...catrope17:08, 17 February 2011
r82344Revert r82340, caused something linke bug 27481 where spaces were being wrong...catrope19:19, 17 February 2011
r82384* For readability, use /x in regexes. Removed the start and end slashes from ...tstarling07:36, 18 February 2011
r82399JavaScriptDistiller fixes:...tstarling14:09, 18 February 2011

Status & tagging log