r98303 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r98302‎ \| r98303 \| r98304 >
Date:	11:58, 28 September 2011
Author:	tstarling
Status:	ok
Tags:
Comment:	MFT r98281
Modified paths:	/branches/wmf/1.18wmf1/includes/libs/jsminplus.php (modified) (history) /branches/wmf/1.18wmf1/tests/phpunit/includes/libs/JavaScriptMinifierTest.php (modified) (history) /branches/wmf/1.18wmf1/tests/qunit/index.html (modified) (history) /branches/wmf/1.18wmf1/tests/qunit/suites/resources/mediawiki/mediawiki.jscompat.test.js (added) (history)

Diff [purge]

Index: branches/wmf/1.18wmf1/tests/qunit/index.html
—	—	@@ -59,6 +59,7 @@
60	60	<!-- QUnit: Load test suites (maintain the same order as above please) -->
61	61	<script src="suites/resources/mediawiki/mediawiki.js"></script>
62	62	<script src="suites/resources/mediawiki/mediawiki.user.js"></script>
	63	+ <script src="suites/resources/mediawiki/mediawiki.jscompat.test.js"></script>
63	64
64	65	<script src="suites/resources/jquery/jquery.client.js"></script>
65	66	<script src="suites/resources/jquery/jquery.mwPrototypes.js"></script>
Property changes on: branches/wmf/1.18wmf1/tests/qunit/index.html
___________________________________________________________________
Modified: svn:mergeinfo
66	67	Merged /trunk/phase3/tests/qunit/index.html:r98281
Index: branches/wmf/1.18wmf1/tests/qunit/suites/resources/mediawiki/mediawiki.jscompat.test.js
—	—	@@ -0,0 +1,35 @@
	2	+/* Some misc JavaScript compatibility tests, just to make sure the environments we run in are consistent */
	3	+
	4	+module( 'mediawiki.jscompat' );
	5	+
	6	+test( 'Variable with Unicode letter in name', function() {
	7	+ expect(3);
	8	+ var orig = "some token";
	9	+ var ŝablono = orig;
	10	+ deepEqual( ŝablono, orig, 'ŝablono' );
	11	+ deepEqual( \u015dablono, orig, '\\u015dablono' );
	12	+ deepEqual( \u015Dablono, orig, '\\u015Dablono' );
	13	+});
	14	+
	15	+/*
	16	+// Not that we need this. ;)
	17	+// This fails on IE 6-8
	18	+// Works on IE 9, Firefox 6, Chrome 14
	19	+test( 'Keyword workaround: "if" as variable name using Unicode escapes', function() {
	20	+ var orig = "another token";
	21	+ \u0069\u0066 = orig;
	22	+ deepEqual( \u0069\u0066, orig, '\\u0069\\u0066' );
	23	+});
	24	+*/
	25	+
	26	+/*
	27	+// Not that we need this. ;)
	28	+// This fails on IE 6-9
	29	+// Works on Firefox 6, Chrome 14
	30	+test( 'Keyword workaround: "if" as member variable name using Unicode escapes', function() {
	31	+ var orig = "another token";
	32	+ var foo = {};
	33	+ foo.\u0069\u0066 = orig;
	34	+ deepEqual( foo.\u0069\u0066, orig, 'foo.\\u0069\\u0066' );
	35	+});
	36	+*/
Property changes on: branches/wmf/1.18wmf1/tests/qunit/suites/resources/mediawiki/mediawiki.jscompat.test.js
___________________________________________________________________
Added: svn:mergeinfo
1	37	Merged /branches/new-installer/phase3/tests/qunit/suites/resources/mediawiki/mediawiki.jscompat.test.js:r43664-66004
2	38	Merged /branches/REL1_15/phase3/tests/qunit/suites/resources/mediawiki/mediawiki.jscompat.test.js:r51646
3	39	Merged /branches/REL1_17/phase3/tests/qunit/suites/resources/mediawiki/mediawiki.jscompat.test.js:r81445,81448
4	40	Merged /branches/sqlite/tests/qunit/suites/resources/mediawiki/mediawiki.jscompat.test.js:r58211-58321
Added: svn:eol-style
5	41	+ native
Index: branches/wmf/1.18wmf1/tests/phpunit/includes/libs/JavaScriptMinifierTest.php
—	—	@@ -78,6 +78,12 @@
79	79
80	80	// newline insertion after 1000 chars: break after the "++", not before
81	81	array( str_repeat( ';', 996 ) . "if(x++);", str_repeat( ';', 996 ) . "if(x++\n);" ),
	82	+
	83	+ // Unicode letter characters should pass through ok in identifiers (bug 31187)
	84	+ array( "var KaŝSkatolVal = {}", 'var KaŝSkatolVal={}'),
	85	+ // And also per spec unicode char escape values should work in identifiers,
	86	+ // as long as it's a valid char. In future it might get normalized.
	87	+ array( "var Ka\\u015dSkatolVal = {}", 'var Ka\\u015dSkatolVal={}'),
82	88	);
83	89	}
84	90
Property changes on: branches/wmf/1.18wmf1/tests/phpunit/includes/libs/JavaScriptMinifierTest.php
___________________________________________________________________
Added: svn:mergeinfo
85	91	Merged /branches/new-installer/phase3/tests/phpunit/includes/libs/JavaScriptMinifierTest.php:r43664-66004
86	92	Merged /branches/REL1_15/phase3/tests/phpunit/includes/libs/JavaScriptMinifierTest.php:r51646
87	93	Merged /branches/REL1_17/phase3/tests/phpunit/includes/libs/JavaScriptMinifierTest.php:r81445,81448
88	94	Merged /branches/sqlite/tests/phpunit/includes/libs/JavaScriptMinifierTest.php:r58211-58321
89	95	Merged /trunk/phase3/tests/phpunit/includes/libs/JavaScriptMinifierTest.php:r92580,92634,92713,92762,92765,92791,92854,92884,92886-92887,92894,92898,92907,92932,92958,93141,93149,93151,93233-93234,93258,93266,93303,93516-93518,93520,93818-93822,93847,93858,93891,93935-93936,94058,94062,94068,94107,94155,94235,94277,94346,94372,94422,94425,94444,94448,94456,94498,94517,94601,94630,94728,94738,94825,94862,94995-94997,95023,95042,95072-95073,95155,95327,95332,95410,95422,95426,95442,95468,95601,95812,98281
Index: branches/wmf/1.18wmf1/includes/libs/jsminplus.php
—	—	@@ -1989,13 +1989,55 @@
1990	1990	break;
1991	1991
1992	1992	default:
1993		~~- // FIXME: add support for unicode and unicode escape sequence \uHHHH~~
1994		~~- if (preg_match('/^[$\w]+/', $input, $match))~~
	1993	+ // Fast path for identifiers: word chars followed by whitespace or various other tokens.
	1994	+ // Note we don't need to exclude digits in the first char, as they've already been found
	1995	+ // above.
	1996	+ if (!preg_match('/^[$\w]+(?=[\s\/\\|\^\&<>\+\-\*%=!.;,\?:~\[\]\{\}@])/', $input, $match))
1995	1997	{
1996		~~- $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;~~
	1998	+ // Character classes per ECMA-262 edition 5.1 section 7.6
	1999	+ // Per spec, must accept Unicode 3.0, may accept later versions.
	2000	+ // We'll take whatever PCRE understands, which should be more recent.
	2001	+ $identifierStartChars = "\\p{L}\\p{Nl}" . # UnicodeLetter
	2002	+ "\$" .
	2003	+ "_";
	2004	+ $identifierPartChars = $identifierStartChars .
	2005	+ "\\p{Mn}\\p{Mc}" . # UnicodeCombiningMark
	2006	+ "\\p{Nd}" . # UnicodeDigit
	2007	+ "\\p{Pc}"; # UnicodeConnectorPunctuation
	2008	+ $unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
	2009	+ $identifierRegex = "/^" .
	2010	+ "(?:[$identifierStartChars]\|$unicodeEscape)" .
	2011	+ "(?:[$identifierPartChars]\|$unicodeEscape)*" .
	2012	+ "/uS";
	2013	+ if (preg_match($identifierRegex, $input, $match))
	2014	+ {
	2015	+ if (strpos($match[0], '\\') !== false) {
	2016	+ // Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
	2017	+ // the original chars, but only within the boundaries of the identifier.
	2018	+ $decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/',
	2019	+ array(__CLASS__, 'unicodeEscapeCallback'),
	2020	+ $match[0]);
	2021	+
	2022	+ // Since our original regex didn't de-escape the originals, we need to check for validity again.
	2023	+ // No need to worry about token boundaries, as anything outside the identifier is illegal!
	2024	+ if (!preg_match("/^[$identifierStartChars][$identifierPartChars]*$/u", $decoded)) {
	2025	+ throw $this->newSyntaxError('Illegal token');
	2026	+ }
	2027	+
	2028	+ // Per spec it _ought_ to work to use these escapes for keywords words as well...
	2029	+ // but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
	2030	+ // that don't match the keyword.
	2031	+ if (in_array($decoded, $this->keywords)) {
	2032	+ throw $this->newSyntaxError('Illegal token');
	2033	+ }
	2034	+
	2035	+ // TODO: save the decoded form for output?
	2036	+ }
	2037	+ }
	2038	+ else
	2039	+ throw $this->newSyntaxError('Illegal token');
1997	2040	}
1998		~~- else~~
1999		~~- throw $this->newSyntaxError('Illegal token');~~
	2041	+ $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
2000	2042	}
2001	2043	}
2002	2044
—	—	@@ -2033,6 +2075,11 @@
2034	2076	{
2035	2077	return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
2036	2078	}
	2079	+
	2080	+ public static function unicodeEscapeCallback($m)
	2081	+ {
	2082	+ return html_entity_decode('&#x' . $m[1]. ';', ENT_QUOTES, 'UTF-8');
	2083	+ }
2037	2084	}
2038	2085
2039	2086	class JSToken
Property changes on: branches/wmf/1.18wmf1/includes/libs/jsminplus.php
___________________________________________________________________
Added: svn:mergeinfo
2040	2087	Merged /branches/new-installer/phase3/includes/libs/jsminplus.php:r43664-66004
2041	2088	Merged /branches/wmf-deployment/includes/libs/jsminplus.php:r53381
2042	2089	Merged /branches/REL1_15/phase3/includes/libs/jsminplus.php:r51646
2043	2090	Merged /branches/sqlite/includes/libs/jsminplus.php:r58211-58321
2044	2091	Merged /trunk/phase3/includes/libs/jsminplus.php:r92580,92634,92713,92762,92765,92791,92854,92884,92886-92887,92894,92898,92907,92932,92958,93141,93149,93151,93233-93234,93258,93266,93303,93516-93518,93818-93822,93847,93858,93891,93935-93936,94058,94062,94068,94107,94155,94235,94277,94346,94372,94422,94425,94444,94448,94456,94498,94517,94601,94630,94728,94738,94825,94862,94995-94997,95023,95042,95072-95073,95155,95327,95332,95410,95422,95426,95442,95468,95601,95812,98281

Past revisions this follows-up on

Revision	Commit summary	Author	Date
r98281	* (bug 31187) Fix for user JavaScript validation to allow identifiers with va...	brion	22:51, 27 September 2011

Status & tagging log

12:16, 28 September 2011 😂 (talk | contribs) changed the status of r98303 [removed: new added: ok]