r82384 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r82383‎ | r82384 | r82385 >
Date:07:36, 18 February 2011
Author:tstarling
Status:ok
Tags:
Comment:
* For readability, use /x in regexes. Removed the start and end slashes from the input to ParseMaster::add(), to avoid confusion over the lack of a /x modifier in that input.
* Broke up several regexes and added comments, so that humans might be able to understand them.
* Tested md5(minify(jquery.js)) as before. Checked speed, it's the same.
Modified paths:
  • /trunk/phase3/includes/libs/JavaScriptDistiller.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/libs/JavaScriptDistiller.php
@@ -31,13 +31,13 @@
3232 public static function stripHorizontalSpace( $script ) {
3333 $parser = self::createParser();
3434 // Collapse horizontal whitespaces between variable names into a single space
35 - $parser->add( '/(\b|\$)[ \t]+(\b|\$)/', '$2 $3' );
 35+ $parser->add( '(\b|\$) [ \t]+ (\b|\$)', '$2 $3' );
3636 // Collapse horizontal whitespaces between unary operators into a single space
37 - $parser->add( '/([+\-])[ \t]+([+\-])/', '$2 $3' );
 37+ $parser->add( '([+\-]) [ \t]+ ([+\-])', '$2 $3' );
3838 // Remove all remaining un-protected horizontal whitespace
39 - $parser->add( '/[ \t]+/');
 39+ $parser->add( '[ \t]+');
4040 // Collapse multiple vertical whitespaces with some horizontal spaces between them
41 - $parser->add( '/[\r\n]+[ \t]*[\r\n]+/', "\n" );
 41+ $parser->add( '[\r\n]+ [ \t]* [\r\n]+', "\n" );
4242 // Execute and return
4343 return $parser->exec($script);
4444 }
@@ -45,16 +45,16 @@
4646 public static function stripVerticalSpace( $script ) {
4747 $parser = self::createParser();
4848 // Collapse whitespaces between and after a ){ pair (function definitions)
49 - $parser->add( '/\)\s+\{\s+/', '){' );
 49+ $parser->add( '\) \s+ \{ \s+', '){' );
5050 // Collapse whitespaces between and after a ({ pair (JSON argument)
51 - $parser->add( '/\(\s+\{\s+/', '({' );
 51+ $parser->add( '\( \s+ \{ \s+', '({' );
5252 // Collapse whitespaces between a parenthesis and a period (call chaining)
53 - $parser->add( '/\)\s+\./', ').');
 53+ $parser->add( '\) \s+ \.', ').');
5454 // Collapse vertical whitespaces which come directly after a semicolon or a comma
55 - $parser->add( '/([;,])\s+/', '$2' );
 55+ $parser->add( '( [;,] ) \s+', '$2' );
5656 // Collapse whitespaces between multiple parenthesis/brackets of similar direction
57 - $parser->add( '/([\)\}])\s+([\)\}])/', '$2$3' );
58 - $parser->add( '/([\(\{])\\s+([\(\{])/', '$2$3' );
 57+ $parser->add( '( [\)\}] ) \s+ ( [\)\}] )', '$2$3' );
 58+ $parser->add( '( [\(\{] ) \s+ ( [\(\{] )', '$2$3' );
5959 return $parser->exec( $script );
6060 }
6161
@@ -77,15 +77,52 @@
7878 // Protect strings. The original code had [^\'\\v] here, but that didn't armor multiline
7979 // strings correctly. This also armors multiline strings that don't have backslashes at the
8080 // end of the line (these are invalid), but that's fine because we're just armoring here.
81 - $parser->add( '/\'([^\'\\\\]*(\\\\(.|[\r\n])[^\'\\\\]*)*)\'/', '$1' );
82 - $parser->add( '/"([^"\\\\]*(\\\\(.|[\r\n])[^"\\\\]*)*)"/', '$1' );
 81+
 82+ // Single quotes
 83+ $parser->add(
 84+ '\' (' . // start quote
 85+ '[^\'\\\\]*' . // a run of non-special characters
 86+ '(' .
 87+ '\\\\ ( . | [\r\n] )' . // a backslash followed by a character or line ending
 88+ '[^\'\\\\]*' . // a run of non-special characters
 89+ ')*' . // any number of the above
 90+ ') \'', // end quote
 91+ '$1' );
 92+
 93+ // Double quotes: same as above
 94+ $parser->add( '" ( [^"\\\\]* ( \\\\ ( . | [\r\n] ) [^"\\\\]* )* ) "', '$1' );
 95+
8396 // Protect regular expressions
84 - $parser->add( '/[ \t]+((\/[^\r\n\*][^\/\r\n\\\\]*(\\\\.[^\/\r\n\\\\]*)*\/(i|g)*))/', '$1' );
85 - $parser->add( '/([^\w\$\/\'"*)\?:](\/[^\r\n\*][^\/\r\n\\\\]*(\\\\.[^\/\r\n\\\\]*)*\/(i|g)*))/', '$1' );
86 - // Remove comments
87 - $parser->add( '/\/\*(.|[\r\n])*?\*\//' );
 97+ // Regular expression with whitespace before it
 98+ $parser->add(
 99+ '[ \t]+ ( ( \/' . // whitespace then start slash
 100+ '[^\r\n\*]' . // not a comment-start or line ending
 101+ '[^\/\r\n\\\\]*' . // a sequence of non-special characters
 102+ '(' .
 103+ '\\\\.' . // an escaped dot
 104+ '[^\/\r\n\\\\]*' . // a sequence of non-special characters
 105+ ')*' . // any number of the above
 106+ '\/(i|g)*' . // pattern end, optional modifier
 107+ ') )',
 108+ '$1' );
 109+ // Regular expression with an operator before it
 110+ $parser->add(
 111+ '( [^\w\$\/\'"*)\?:] (\/' . // certain kinds of punctuation and then start slash
 112+ '[^\r\n\*]' . // not a comment-start or line ending
 113+ '[^\/\r\n\\\\]*' . // a sequence of non-special characters
 114+ '(' .
 115+ '\\\\.' . // an escaped dot
 116+ '[^\/\r\n\\\\]*' . // a sequence of non-special characters
 117+ ')*' . // any number of the above
 118+ '\/(i|g)*)' . // pattern end, optional modifier
 119+ ')',
 120+ '$1' );
 121+
 122+ // C-style comment: use non-greedy repetition to find the end
 123+ $parser->add( '\/ \* ( . | [\r\n] )*? \* \/' );
 124+
88125 // Preserve the newline after a C++-style comment -- bug 27046
89 - $parser->add( '/\/\/[^\r\n]*([\r\n])/', '$2' );
 126+ $parser->add( '\/ \/ [^\r\n]* ( [\r\n] )', '$2' );
90127 return $parser;
91128 }
92129 }
@@ -160,9 +197,9 @@
161198 // simulate the _patterns.toSTring of Dean
162199 $regexp = '/';
163200 foreach ($this->_patterns as $reg) {
164 - $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')|';
 201+ $regexp .= '(' . $reg[self::EXPRESSION] . ')|';
165202 }
166 - $regexp = substr($regexp, 0, -1) . '/S';
 203+ $regexp = substr($regexp, 0, -1) . '/Sx';
167204 $regexp .= ($this->ignoreCase) ? 'i' : '';
168205
169206 $string = $this->_escape($string, $this->escapeChar);

Follow-up revisions

RevisionCommit summaryAuthorDate
r824021.17wmf1: MFT r82380, r82384, r82399catrope14:28, 18 February 2011
r85290Per Roan, pull JavaScriptDistiller from trunk since it won't merge cleanly. T...demon22:22, 3 April 2011

Status & tagging log