r83885 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r83884‎ | r83885 | r83886 >
Date:11:44, 14 March 2011
Author:catrope
Status:ok (Comments)
Tags:
Comment:
(bug 27528) Incorporate Paul Copperman's minifier
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/libs/JavaScriptMinifier.php (added) (history)
  • /trunk/phase3/includes/resourceloader/ResourceLoader.php (modified) (history)
  • /trunk/phase3/tests/phpunit/includes/libs/JavaScriptMinifierTest.php (added) (history)

Diff [purge]

Index: trunk/phase3/tests/phpunit/includes/libs/JavaScriptMinifierTest.php
@@ -0,0 +1,70 @@
 2+<?php
 3+
 4+class JavaScriptMinifierTest extends MediaWikiTestCase {
 5+
 6+ function provideCases() {
 7+ return array(
 8+ // Basic tokens
 9+ array( "\r\t\f \v\n\r", "" ),
 10+ array( "/* Foo *\n*bar\n*/", "" ),
 11+ array( "' Foo \\' bar \\\n baz \\' quox ' .", "' Foo \\' bar \\\n baz \\' quox '." ),
 12+ array( '\" Foo \\" bar \\\n baz \\" quox " .', '\" Foo \\" bar \\\n baz \\" quox ".' ),
 13+ array( "// Foo b/ar baz", "" ),
 14+ array( "/ Foo \\/ bar [ / \\] / ] baz / .", "/ Foo \\/ bar [ / \\] / ] baz /." ),
 15+ // HTML comments
 16+ array( "<!-- Foo bar", "" ),
 17+ array( "<!-- Foo --> bar", "" ),
 18+ array( "--> Foo", "" ),
 19+ array( "x --> y", "x-->y" ),
 20+ // Semicolon insertion
 21+ array( "return\nx;", "return\nx;" ),
 22+ array( "throw\nx;", "throw\nx;" ),
 23+ array( "continue\nx;", "continue\nx;" ),
 24+ array( "break\nx;", "break\nx;" ),
 25+ array( "var\nx;", "var x;" ),
 26+ array( "x\ny;", "x\ny;" ),
 27+ array( "x\n++y;", "x\n++y;" ),
 28+ array( "x\n!y;", "x\n!y;" ),
 29+ array( "x\n{y}", "x\n{y}" ),
 30+ array( "x\n+y;", "x+y;" ),
 31+ array( "x\n(y);", "x(y);" ),
 32+ array( "5.\nx;", "5.\nx;" ),
 33+ array( "0xFF.\nx;", "0xFF.x;" ),
 34+ array( "5.3.\nx;", "5.3.x;" ),
 35+ // Token separation
 36+ array( "x in y", "x in y" ),
 37+ array( "/x/g in y", "/x/g in y" ),
 38+ array( "x in 30", "x in 30" ),
 39+ array( "x + ++ y", "x+ ++y" ),
 40+ array( "x / /y/.exec(z)", "x/ /y/.exec(z)" ),
 41+ // State machine
 42+ array( "/ x/g", "/ x/g" ),
 43+ array( "return/ x/g", "return/ x/g" ),
 44+ array( "+/ x/g", "+/ x/g" ),
 45+ array( "++/ x/g", "++/ x/g" ),
 46+ array( "x/ x/g", "x/x/g" ),
 47+ array( "(/ x/g)", "(/ x/g)" ),
 48+ array( "if(/ x/g);", "if(/ x/g);" ),
 49+ array( "(x/ x/g)", "(x/x/g)" ),
 50+ array( "([/ x/g])", "([/ x/g])" ),
 51+ array( "+x/ x/g", "+x/x/g" ),
 52+ array( "{}/ x/g", "{}/ x/g" ),
 53+ array( "+{}/ x/g", "+{}/x/g" ),
 54+ array( "(x)/ x/g", "(x)/x/g" ),
 55+ array( "if(x)/ x/g", "if(x)/ x/g" ),
 56+ array( "for(x;x;{}/ x/g);", "for(x;x;{}/x/g);" ),
 57+ array( "x;x;{}/ x/g", "x;x;{}/ x/g" ),
 58+ array( "x:{}/ x/g", "x:{}/ x/g" ),
 59+ array( "switch(x){case y?z:{}/ x/g:{}/ x/g;}", "switch(x){case y?z:{}/x/g:{}/ x/g;}" ),
 60+ array( "function x(){}/ x/g", "function x(){}/ x/g" ),
 61+ array( "+function x(){}/ x/g", "+function x(){}/x/g" )
 62+ );
 63+ }
 64+
 65+ /**
 66+ * @dataProvider provideCases
 67+ */
 68+ function testJavaScriptMinifierOutput( $code, $expectedOutput ) {
 69+ $this->assertEquals( $expectedOutput, JavaScriptMinifier::minify( $code ) );
 70+ }
 71+}
Property changes on: trunk/phase3/tests/phpunit/includes/libs/JavaScriptMinifierTest.php
___________________________________________________________________
Added: svn:eol-style
172 + native
Index: trunk/phase3/includes/resourceloader/ResourceLoader.php
@@ -29,7 +29,7 @@
3030 class ResourceLoader {
3131
3232 /* Protected Static Members */
33 - protected static $filterCacheVersion = 1;
 33+ protected static $filterCacheVersion = 2;
3434
3535 /** Array: List of module name/ResourceLoaderModule object pairs */
3636 protected $modules = array();
@@ -110,7 +110,7 @@
111111 * Runs JavaScript or CSS data through a filter, caching the filtered result for future calls.
112112 *
113113 * Available filters are:
114 - * - minify-js \see JavaScriptDistiller::stripWhiteSpace
 114+ * - minify-js \see JavaScriptMinifier::minify
115115 * - minify-css \see CSSMin::minify
116116 *
117117 * If $data is empty, only contains whitespace or the filter was unknown,
@@ -121,8 +121,6 @@
122122 * @return String: Filtered data, or a comment containing an error message
123123 */
124124 protected function filter( $filter, $data ) {
125 - global $wgResourceLoaderMinifyJSVerticalSpace;
126 -
127125 wfProfileIn( __METHOD__ );
128126
129127 // For empty/whitespace-only data or for unknown filters, don't perform
@@ -149,9 +147,7 @@
150148 try {
151149 switch ( $filter ) {
152150 case 'minify-js':
153 - $result = JavaScriptDistiller::stripWhiteSpace(
154 - $data, $wgResourceLoaderMinifyJSVerticalSpace
155 - );
 151+ $result = JavaScriptMinifier::minify( $data );
156152 $result .= "\n\n/* cache key: $key */\n";
157153 break;
158154 case 'minify-css':
Index: trunk/phase3/includes/AutoLoader.php
@@ -135,6 +135,7 @@
136136 'Interwiki' => 'includes/Interwiki.php',
137137 'IP' => 'includes/IP.php',
138138 'JavaScriptDistiller' => 'includes/libs/JavaScriptDistiller.php',
 139+ 'JavaScriptMinifier' => 'includes/libs/JavaScriptMinifier.php',
139140 'LCStore_DB' => 'includes/LocalisationCache.php',
140141 'LCStore_CDB' => 'includes/LocalisationCache.php',
141142 'LCStore_Null' => 'includes/LocalisationCache.php',
Index: trunk/phase3/includes/libs/JavaScriptMinifier.php
@@ -0,0 +1,530 @@
 2+<?php
 3+/**
 4+ * JavaScript Minifier
 5+ *
 6+ * This class is meant to safely minify javascript code, while leaving syntactically correct
 7+ * programs intact. Other libraries, such as JSMin require a certain coding style to work
 8+ * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
 9+ * slow, because they construct a complete parse tree before outputting the code minified.
 10+ * So this class is meant to allow arbitrary (but syntactically correct) input, while being
 11+ * fast enough to be used for on-the-fly minifying.
 12+ *
 13+ * Author: Paul Copperman <paul.copperman@gmail.com>
 14+ * License: choose any of Apache, MIT, GPL, LGPL
 15+ */
 16+
 17+class JavaScriptMinifier {
 18+
 19+ /* Class constants */
 20+ /* Parsing states.
 21+ * The state machine is only necessary to decide whether to parse a slash as division
 22+ * operator or as regexp literal.
 23+ * States are named after the next expected item. We only distinguish states when the
 24+ * distinction is relevant for our purpose.
 25+ */
 26+ const STATEMENT = 0;
 27+ const CONDITION = 1;
 28+ const PROPERTY_ASSIGNMENT = 2;
 29+ const EXPRESSION = 3;
 30+ const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
 31+ const EXPRESSION_OP = 5;
 32+ const EXPRESSION_FUNC = 6;
 33+ const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
 34+ const EXPRESSION_TERNARY_OP = 8;
 35+ const EXPRESSION_TERNARY_FUNC = 9;
 36+ const PAREN_EXPRESSION = 10; // expression which is not on the top level
 37+ const PAREN_EXPRESSION_OP = 11;
 38+ const PAREN_EXPRESSION_FUNC = 12;
 39+ const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
 40+ const PROPERTY_EXPRESSION_OP = 14;
 41+ const PROPERTY_EXPRESSION_FUNC = 15;
 42+
 43+ /* Token types */
 44+ const TYPE_UN_OP = 1; // unary operators
 45+ const TYPE_INCR_OP = 2; // ++ and --
 46+ const TYPE_BIN_OP = 3; // binary operators
 47+ const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
 48+ const TYPE_HOOK = 5; // ?
 49+ const TYPE_COLON = 6; // :
 50+ const TYPE_COMMA = 7; // ,
 51+ const TYPE_SEMICOLON = 8; // ;
 52+ const TYPE_BRACE_OPEN = 9; // {
 53+ const TYPE_BRACE_CLOSE = 10; // }
 54+ const TYPE_PAREN_OPEN = 11; // ( and [
 55+ const TYPE_PAREN_CLOSE = 12; // ) and ]
 56+ const TYPE_RETURN = 13; // keywords: break, continue, return, throw
 57+ const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
 58+ const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
 59+ const TYPE_FUNC = 16; // keywords: function
 60+ const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
 61+
 62+ // Sanity limit to avoid excessive memory usage
 63+ const STACK_LIMIT = 1000;
 64+
 65+ /* Static functions */
 66+
 67+ /**
 68+ * Returns minified JavaScript code.
 69+ *
 70+ * @param $s String JavaScript code to minify
 71+ * @return String Minified code
 72+ */
 73+ public static function minify( $s ) {
 74+ // First we declare a few tables that contain our parsing rules
 75+
 76+ // $opChars : characters, which can be combined without whitespace in between them
 77+ $opChars = array(
 78+ '!' => true,
 79+ '"' => true,
 80+ '%' => true,
 81+ '&' => true,
 82+ "'" => true,
 83+ '(' => true,
 84+ ')' => true,
 85+ '*' => true,
 86+ '+' => true,
 87+ ',' => true,
 88+ '-' => true,
 89+ '.' => true,
 90+ '/' => true,
 91+ ':' => true,
 92+ ';' => true,
 93+ '<' => true,
 94+ '=' => true,
 95+ '>' => true,
 96+ '?' => true,
 97+ '[' => true,
 98+ ']' => true,
 99+ '^' => true,
 100+ '{' => true,
 101+ '|' => true,
 102+ '}' => true,
 103+ '~' => true
 104+ );
 105+
 106+ // $tokenTypes : maps keywords and operators to their corresponding token type
 107+ $tokenTypes = array(
 108+ '!' => self::TYPE_UN_OP,
 109+ '~' => self::TYPE_UN_OP,
 110+ 'delete' => self::TYPE_UN_OP,
 111+ 'new' => self::TYPE_UN_OP,
 112+ 'typeof' => self::TYPE_UN_OP,
 113+ 'void' => self::TYPE_UN_OP,
 114+ '++' => self::TYPE_INCR_OP,
 115+ '--' => self::TYPE_INCR_OP,
 116+ '!=' => self::TYPE_BIN_OP,
 117+ '!==' => self::TYPE_BIN_OP,
 118+ '%' => self::TYPE_BIN_OP,
 119+ '%=' => self::TYPE_BIN_OP,
 120+ '&' => self::TYPE_BIN_OP,
 121+ '&&' => self::TYPE_BIN_OP,
 122+ '&=' => self::TYPE_BIN_OP,
 123+ '*' => self::TYPE_BIN_OP,
 124+ '*=' => self::TYPE_BIN_OP,
 125+ '+=' => self::TYPE_BIN_OP,
 126+ '-=' => self::TYPE_BIN_OP,
 127+ '.' => self::TYPE_BIN_OP,
 128+ '/' => self::TYPE_BIN_OP,
 129+ '/=' => self::TYPE_BIN_OP,
 130+ '<' => self::TYPE_BIN_OP,
 131+ '<<' => self::TYPE_BIN_OP,
 132+ '<<=' => self::TYPE_BIN_OP,
 133+ '<=' => self::TYPE_BIN_OP,
 134+ '=' => self::TYPE_BIN_OP,
 135+ '==' => self::TYPE_BIN_OP,
 136+ '===' => self::TYPE_BIN_OP,
 137+ '>' => self::TYPE_BIN_OP,
 138+ '>=' => self::TYPE_BIN_OP,
 139+ '>>' => self::TYPE_BIN_OP,
 140+ '>>=' => self::TYPE_BIN_OP,
 141+ '>>>' => self::TYPE_BIN_OP,
 142+ '>>>=' => self::TYPE_BIN_OP,
 143+ '^' => self::TYPE_BIN_OP,
 144+ '^=' => self::TYPE_BIN_OP,
 145+ '|' => self::TYPE_BIN_OP,
 146+ '|=' => self::TYPE_BIN_OP,
 147+ '||' => self::TYPE_BIN_OP,
 148+ 'in' => self::TYPE_BIN_OP,
 149+ 'instanceof' => self::TYPE_BIN_OP,
 150+ '+' => self::TYPE_ADD_OP,
 151+ '-' => self::TYPE_ADD_OP,
 152+ '?' => self::TYPE_HOOK,
 153+ ':' => self::TYPE_COLON,
 154+ ',' => self::TYPE_COMMA,
 155+ ';' => self::TYPE_SEMICOLON,
 156+ '{' => self::TYPE_BRACE_OPEN,
 157+ '}' => self::TYPE_BRACE_CLOSE,
 158+ '(' => self::TYPE_PAREN_OPEN,
 159+ '[' => self::TYPE_PAREN_OPEN,
 160+ ')' => self::TYPE_PAREN_CLOSE,
 161+ ']' => self::TYPE_PAREN_CLOSE,
 162+ 'break' => self::TYPE_RETURN,
 163+ 'continue' => self::TYPE_RETURN,
 164+ 'return' => self::TYPE_RETURN,
 165+ 'throw' => self::TYPE_RETURN,
 166+ 'catch' => self::TYPE_IF,
 167+ 'for' => self::TYPE_IF,
 168+ 'if' => self::TYPE_IF,
 169+ 'switch' => self::TYPE_IF,
 170+ 'while' => self::TYPE_IF,
 171+ 'with' => self::TYPE_IF,
 172+ 'case' => self::TYPE_DO,
 173+ 'do' => self::TYPE_DO,
 174+ 'else' => self::TYPE_DO,
 175+ 'finally' => self::TYPE_DO,
 176+ 'try' => self::TYPE_DO,
 177+ 'var' => self::TYPE_DO,
 178+ 'function' => self::TYPE_FUNC
 179+ );
 180+
 181+ // $goto : This is the main table for our state machine. For every state/token pair
 182+ // the following state is defined. When no rule exists for a given pair,
 183+ // the state is left unchanged.
 184+ $goto = array(
 185+ self::STATEMENT => array(
 186+ self::TYPE_UN_OP => self::EXPRESSION,
 187+ self::TYPE_INCR_OP => self::EXPRESSION,
 188+ self::TYPE_ADD_OP => self::EXPRESSION,
 189+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
 190+ self::TYPE_RETURN => self::EXPRESSION_NO_NL,
 191+ self::TYPE_IF => self::CONDITION,
 192+ self::TYPE_FUNC => self::CONDITION,
 193+ self::TYPE_LITERAL => self::EXPRESSION_OP
 194+ ),
 195+ self::CONDITION => array(
 196+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
 197+ ),
 198+ self::PROPERTY_ASSIGNMENT => array(
 199+ self::TYPE_COLON => self::PROPERTY_EXPRESSION,
 200+ self::TYPE_BRACE_OPEN => self::STATEMENT
 201+ ),
 202+ self::EXPRESSION => array(
 203+ self::TYPE_SEMICOLON => self::STATEMENT,
 204+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
 205+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
 206+ self::TYPE_FUNC => self::EXPRESSION_FUNC,
 207+ self::TYPE_LITERAL => self::EXPRESSION_OP
 208+ ),
 209+ self::EXPRESSION_NO_NL => array(
 210+ self::TYPE_SEMICOLON => self::STATEMENT,
 211+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
 212+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
 213+ self::TYPE_FUNC => self::EXPRESSION_FUNC,
 214+ self::TYPE_LITERAL => self::EXPRESSION_OP
 215+ ),
 216+ self::EXPRESSION_OP => array(
 217+ self::TYPE_BIN_OP => self::EXPRESSION,
 218+ self::TYPE_ADD_OP => self::EXPRESSION,
 219+ self::TYPE_HOOK => self::EXPRESSION_TERNARY,
 220+ self::TYPE_COLON => self::STATEMENT,
 221+ self::TYPE_COMMA => self::EXPRESSION,
 222+ self::TYPE_SEMICOLON => self::STATEMENT,
 223+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
 224+ ),
 225+ self::EXPRESSION_FUNC => array(
 226+ self::TYPE_BRACE_OPEN => self::STATEMENT
 227+ ),
 228+ self::EXPRESSION_TERNARY => array(
 229+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
 230+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
 231+ self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC,
 232+ self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP
 233+ ),
 234+ self::EXPRESSION_TERNARY_OP => array(
 235+ self::TYPE_BIN_OP => self::EXPRESSION_TERNARY,
 236+ self::TYPE_ADD_OP => self::EXPRESSION_TERNARY,
 237+ self::TYPE_HOOK => self::EXPRESSION_TERNARY,
 238+ self::TYPE_COMMA => self::EXPRESSION_TERNARY,
 239+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
 240+ ),
 241+ self::EXPRESSION_TERNARY_FUNC => array(
 242+ self::TYPE_BRACE_OPEN => self::STATEMENT
 243+ ),
 244+ self::PAREN_EXPRESSION => array(
 245+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
 246+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
 247+ self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC,
 248+ self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP
 249+ ),
 250+ self::PAREN_EXPRESSION_OP => array(
 251+ self::TYPE_BIN_OP => self::PAREN_EXPRESSION,
 252+ self::TYPE_ADD_OP => self::PAREN_EXPRESSION,
 253+ self::TYPE_HOOK => self::PAREN_EXPRESSION,
 254+ self::TYPE_COLON => self::PAREN_EXPRESSION,
 255+ self::TYPE_COMMA => self::PAREN_EXPRESSION,
 256+ self::TYPE_SEMICOLON => self::PAREN_EXPRESSION,
 257+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
 258+ ),
 259+ self::PAREN_EXPRESSION_FUNC => array(
 260+ self::TYPE_BRACE_OPEN => self::STATEMENT
 261+ ),
 262+ self::PROPERTY_EXPRESSION => array(
 263+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
 264+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
 265+ self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC,
 266+ self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP
 267+ ),
 268+ self::PROPERTY_EXPRESSION_OP => array(
 269+ self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION,
 270+ self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION,
 271+ self::TYPE_HOOK => self::PROPERTY_EXPRESSION,
 272+ self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT,
 273+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
 274+ ),
 275+ self::PROPERTY_EXPRESSION_FUNC => array(
 276+ self::TYPE_BRACE_OPEN => self::STATEMENT
 277+ )
 278+ );
 279+
 280+ // $push : This table contains the rules for when to push a state onto the stack.
 281+ // The pushed state is the state to return to when the corresponding
 282+ // closing token is found
 283+ $push = array(
 284+ self::STATEMENT => array(
 285+ self::TYPE_BRACE_OPEN => self::STATEMENT,
 286+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
 287+ ),
 288+ self::CONDITION => array(
 289+ self::TYPE_PAREN_OPEN => self::STATEMENT
 290+ ),
 291+ self::PROPERTY_ASSIGNMENT => array(
 292+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
 293+ ),
 294+ self::EXPRESSION => array(
 295+ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
 296+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
 297+ ),
 298+ self::EXPRESSION_NO_NL => array(
 299+ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
 300+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
 301+ ),
 302+ self::EXPRESSION_OP => array(
 303+ self::TYPE_HOOK => self::EXPRESSION,
 304+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
 305+ ),
 306+ self::EXPRESSION_FUNC => array(
 307+ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
 308+ ),
 309+ self::EXPRESSION_TERNARY => array(
 310+ self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
 311+ self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
 312+ ),
 313+ self::EXPRESSION_TERNARY_OP => array(
 314+ self::TYPE_HOOK => self::EXPRESSION_TERNARY,
 315+ self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
 316+ ),
 317+ self::EXPRESSION_TERNARY_FUNC => array(
 318+ self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
 319+ ),
 320+ self::PAREN_EXPRESSION => array(
 321+ self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
 322+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
 323+ ),
 324+ self::PAREN_EXPRESSION_OP => array(
 325+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
 326+ ),
 327+ self::PAREN_EXPRESSION_FUNC => array(
 328+ self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
 329+ ),
 330+ self::PROPERTY_EXPRESSION => array(
 331+ self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
 332+ self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
 333+ ),
 334+ self::PROPERTY_EXPRESSION_OP => array(
 335+ self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
 336+ ),
 337+ self::PROPERTY_EXPRESSION_FUNC => array(
 338+ self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
 339+ )
 340+ );
 341+
 342+ // $pop : Rules for when to pop a state from the stack
 343+ $pop = array(
 344+ self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ),
 345+ self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ),
 346+ self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
 347+ self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ),
 348+ self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ),
 349+ self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ),
 350+ self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ),
 351+ self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ),
 352+ self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
 353+ self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
 354+ );
 355+
 356+ // $semicolon : Rules for when a semicolon insertion is appropriate
 357+ $semicolon = array(
 358+ self::EXPRESSION_NO_NL => array(
 359+ self::TYPE_UN_OP => true,
 360+ self::TYPE_INCR_OP => true,
 361+ self::TYPE_ADD_OP => true,
 362+ self::TYPE_BRACE_OPEN => true,
 363+ self::TYPE_PAREN_OPEN => true,
 364+ self::TYPE_RETURN => true,
 365+ self::TYPE_IF => true,
 366+ self::TYPE_DO => true,
 367+ self::TYPE_FUNC => true,
 368+ self::TYPE_LITERAL => true
 369+ ),
 370+ self::EXPRESSION_OP => array(
 371+ self::TYPE_UN_OP => true,
 372+ self::TYPE_INCR_OP => true,
 373+ self::TYPE_BRACE_OPEN => true,
 374+ self::TYPE_RETURN => true,
 375+ self::TYPE_IF => true,
 376+ self::TYPE_DO => true,
 377+ self::TYPE_FUNC => true,
 378+ self::TYPE_LITERAL => true
 379+ )
 380+ );
 381+
 382+ // $divStates : Contains all states that can be followed by a division operator
 383+ $divStates = array(
 384+ self::EXPRESSION_OP => true,
 385+ self::EXPRESSION_TERNARY_OP => true,
 386+ self::PAREN_EXPRESSION_OP => true,
 387+ self::PROPERTY_EXPRESSION_OP => true
 388+ );
 389+
 390+ // Here's where the minifying takes place: Loop through the input, looking for tokens
 391+ // and output them to $out, taking actions to the above defined rules when appropriate.
 392+ $out = '';
 393+ $pos = 0;
 394+ $length = strlen( $s );
 395+ $newlineFound = true;
 396+ $state = self::STATEMENT;
 397+ $stack = array();
 398+ $last = ';'; // Pretend that we have seen a semicolon yet
 399+ while( $pos < $length ) {
 400+ // First, skip over any whitespace and multiline comments, recording whether we
 401+ // found any newline character
 402+ $skip = strspn( $s, " \t\n\r\v\f", $pos );
 403+ if( !$skip ) {
 404+ $ch = $s[$pos];
 405+ if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
 406+ // Multiline comment. Search for the end token or EOT.
 407+ $end = strpos( $s, '*/', $pos + 2 );
 408+ $skip = $end === false ? $length - $pos : $end - $pos + 2;
 409+ }
 410+ }
 411+ if( $skip ) {
 412+ // The semicolon insertion mechanism needs to know whether there was a newline
 413+ // between two tokens, so record it now.
 414+ if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
 415+ $newlineFound = true;
 416+ }
 417+ $pos += $skip;
 418+ continue;
 419+ }
 420+ // Handle C++-style comments and html comments, which are treated as single line
 421+ // comments by the browser, regardless of whether the end tag is on the same line.
 422+ // Handle --> the same way, but only if it's at the beginning of the line
 423+ if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
 424+ || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
 425+ || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
 426+ ) {
 427+ $pos += strcspn( $s, "\r\n", $pos );
 428+ continue;
 429+ }
 430+
 431+ // Find out which kind of token we're handling. $end will point past the end of it.
 432+ $end = $pos + 1;
 433+ // Handle string literals
 434+ if( $ch === "'" || $ch === '"' ) {
 435+ // Search to the end of the string literal, skipping over backslash escapes
 436+ $search = $ch . '\\';
 437+ do{
 438+ $end += strcspn( $s, $search, $end ) + 2;
 439+ } while( $end - 2 < $length && $s[$end - 2] === '\\' );
 440+ $end--;
 441+ // We have to distinguish between regexp literals and division operators
 442+ // A division operator is only possible in certain states
 443+ } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
 444+ // Regexp literal, search to the end, skipping over backslash escapes and
 445+ // character classes
 446+ for( ; ; ) {
 447+ do{
 448+ $end += strcspn( $s, '/[\\', $end ) + 2;
 449+ } while( $end - 2 < $length && $s[$end - 2] === '\\' );
 450+ $end--;
 451+ if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
 452+ break;
 453+ }
 454+ do{
 455+ $end += strcspn( $s, ']\\', $end ) + 2;
 456+ } while( $end - 2 < $length && $s[$end - 2] === '\\' );
 457+ $end--;
 458+ };
 459+ // Search past the regexp modifiers (gi)
 460+ while( $end < $length && ctype_alpha( $s[$end] ) ) {
 461+ $end++;
 462+ }
 463+ } elseif(
 464+ ctype_digit( $ch )
 465+ || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
 466+ ) {
 467+ // Numeric literal. Search for the end of it, but don't care about [+-]exponent
 468+ // at the end, as the results of "numeric [+-] numeric" and "numeric" are
 469+ // identical to our state machine.
 470+ $end += strspn( $s, '0123456789ABCDEFabcdefXx.', $end );
 471+ while( $s[$end - 1] === '.' ) {
 472+ // Special case: When a numeric ends with a dot, we have to check the
 473+ // literal for proper syntax
 474+ $decimal = strspn( $s, '0123456789', $pos, $end - $pos - 1 );
 475+ if( $decimal === $end - $pos - 1 ) {
 476+ break;
 477+ } else {
 478+ $end--;
 479+ }
 480+ }
 481+ } elseif( isset( $opChars[$ch] ) ) {
 482+ // Punctuation character. Search for the longest matching operator.
 483+ while(
 484+ $end < $length
 485+ && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
 486+ ) {
 487+ $end++;
 488+ }
 489+ } else {
 490+ // Identifier or reserved word. Search for the end by excluding whitespace and
 491+ // punctuation.
 492+ $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\f\v\r", $end );
 493+ }
 494+
 495+ // Now get the token type from our type array
 496+ $token = substr( $s, $pos, $end - $pos );
 497+ $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
 498+
 499+ if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
 500+ // This token triggers the semicolon insertion mechanism of javascript. While we
 501+ // could add the ; token here ourselves, keeping the newline has a few advantages.
 502+ $out .= "\n";
 503+ $state = self::STATEMENT;
 504+ } elseif( false /* Put your newline condition here */ ) {
 505+ $out .= "\n";
 506+ // Check, whether we have to separate the token from the last one with whitespace
 507+ } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
 508+ $out .= ' ';
 509+ // Don't accidentally create ++, -- or // tokens
 510+ } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
 511+ $out .= ' ';
 512+ }
 513+
 514+ $out .= $token;
 515+ $last = $s[$end - 1];
 516+ $pos = $end;
 517+ $newlineFound = false;
 518+
 519+ // Now that we have output our token, transition into the new state.
 520+ if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
 521+ $stack[] = $push[$state][$type];
 522+ }
 523+ if( $stack && isset( $pop[$state][$type] ) ) {
 524+ $state = array_pop( $stack );
 525+ } elseif( isset( $goto[$state][$type] ) ) {
 526+ $state = $goto[$state][$type];
 527+ }
 528+ }
 529+ return $out;
 530+ }
 531+}
Property changes on: trunk/phase3/includes/libs/JavaScriptMinifier.php
___________________________________________________________________
Added: svn:eol-style
1532 + native

Follow-up revisions

RevisionCommit summaryAuthorDate
r83891Followup r83885: implement maximum line length and statement termination (eac...catrope13:24, 14 March 2011
r846131.17wmf1: MFT r81692, r82468, r83814, r83885, r83891, r83897, r83902, r83903,...catrope17:42, 23 March 2011
r85434MFT: r83885, r83891, r83897, r83902, r83903, r83934, r83965, r83979, r83988, ...demon13:38, 5 April 2011
r91591Followup r83885: add JSMin+ 1.3 to use its parser to verify output of JavaScr...brion20:02, 6 July 2011
r103846Add PHPUnit tests for the minification failure case in bug 32548....brion22:20, 21 November 2011
r103865* (bug 32548) fix minification bug when numeric literal with exponent was spl...brion23:16, 21 November 2011

Comments

#Comment by Brion VIBBER (talk | contribs)   22:23, 21 November 2011

The numeric literal tokenization is incorrect; combined with the maximum line length (r83891) this can cause a fatal parse error in output when the beginning of an exponented number ("1.234e") gets split from the signed exponent part ("-5") -- causes bug 32548.

PHPUnit test case added in r103846.

#Comment by Brion VIBBER (talk | contribs)   23:17, 21 November 2011

Fixed in r103865 -- I wouldn't mind some more test cases though, I find the tokenizer code pretty skeezy as it doesn't report errors. :P

Status & tagging log