r98285 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r98284‎ | r98285 | r98286 >
Date:23:15, 27 September 2011
Author:brion
Status:ok
Tags:
Comment:
MFT r98281: (bug 31187) patch JSMin+ to support valid Unicode characters in JavaScript identifiers
MFT r94614: force charset for qunit tests needed for the above
Modified paths:
  • /branches/REL1_18/phase3/includes/libs/jsminplus.php (modified) (history)
  • /branches/REL1_18/phase3/tests/phpunit/includes/libs/JavaScriptMinifierTest.php (modified) (history)
  • /branches/REL1_18/phase3/tests/qunit/index.html (modified) (history)
  • /branches/REL1_18/phase3/tests/qunit/suites/resources/mediawiki/mediawiki.jscompat.test.js (added) (history)

Diff [purge]

Index: branches/REL1_18/phase3/tests/qunit/index.html
@@ -2,7 +2,7 @@
33 <html>
44 <head>
55 <title>MediaWiki JavaScript Test Suite</title>
6 -
 6+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
77 <!-- MediaWiki Modules -->
88
99 <!-- MW: startup -->
@@ -57,6 +57,7 @@
5858 <script src="data/testrunner.js"></script>
5959
6060 <!-- QUnit: Load test suites (maintain the same order as above please) -->
 61+ <script src="suites/resources/mediawiki/mediawiki.jscompat.test.js"></script>
6162 <script src="suites/resources/mediawiki/mediawiki.js"></script>
6263 <script src="suites/resources/mediawiki/mediawiki.user.js"></script>
6364
Index: branches/REL1_18/phase3/tests/qunit/suites/resources/mediawiki/mediawiki.jscompat.test.js
@@ -0,0 +1,35 @@
 2+/* Some misc JavaScript compatibility tests, just to make sure the environments we run in are consistent */
 3+
 4+module( 'mediawiki.jscompat' );
 5+
 6+test( 'Variable with Unicode letter in name', function() {
 7+ expect(3);
 8+ var orig = "some token";
 9+ var ŝablono = orig;
 10+ deepEqual( ŝablono, orig, 'ŝablono' );
 11+ deepEqual( \u015dablono, orig, '\\u015dablono' );
 12+ deepEqual( \u015Dablono, orig, '\\u015Dablono' );
 13+});
 14+
 15+/*
 16+// Not that we need this. ;)
 17+// This fails on IE 6-8
 18+// Works on IE 9, Firefox 6, Chrome 14
 19+test( 'Keyword workaround: "if" as variable name using Unicode escapes', function() {
 20+ var orig = "another token";
 21+ \u0069\u0066 = orig;
 22+ deepEqual( \u0069\u0066, orig, '\\u0069\\u0066' );
 23+});
 24+*/
 25+
 26+/*
 27+// Not that we need this. ;)
 28+// This fails on IE 6-9
 29+// Works on Firefox 6, Chrome 14
 30+test( 'Keyword workaround: "if" as member variable name using Unicode escapes', function() {
 31+ var orig = "another token";
 32+ var foo = {};
 33+ foo.\u0069\u0066 = orig;
 34+ deepEqual( foo.\u0069\u0066, orig, 'foo.\\u0069\\u0066' );
 35+});
 36+*/
Property changes on: branches/REL1_18/phase3/tests/qunit/suites/resources/mediawiki/mediawiki.jscompat.test.js
___________________________________________________________________
Added: svn:eol-style
137 + native
Index: branches/REL1_18/phase3/tests/phpunit/includes/libs/JavaScriptMinifierTest.php
@@ -78,6 +78,12 @@
7979
8080 // newline insertion after 1000 chars: break after the "++", not before
8181 array( str_repeat( ';', 996 ) . "if(x++);", str_repeat( ';', 996 ) . "if(x++\n);" ),
 82+
 83+ // Unicode letter characters should pass through ok in identifiers (bug 31187)
 84+ array( "var KaŝSkatolVal = {}", 'var KaŝSkatolVal={}'),
 85+ // And also per spec unicode char escape values should work in identifiers,
 86+ // as long as it's a valid char. In future it might get normalized.
 87+ array( "var Ka\\u015dSkatolVal = {}", 'var Ka\\u015dSkatolVal={}'),
8288 );
8389 }
8490
Index: branches/REL1_18/phase3/includes/libs/jsminplus.php
@@ -1989,13 +1989,55 @@
19901990 break;
19911991
19921992 default:
1993 - // FIXME: add support for unicode and unicode escape sequence \uHHHH
1994 - if (preg_match('/^[$\w]+/', $input, $match))
 1993+ // Fast path for identifiers: word chars followed by whitespace or various other tokens.
 1994+ // Note we don't need to exclude digits in the first char, as they've already been found
 1995+ // above.
 1996+ if (!preg_match('/^[$\w]+(?=[\s\/\|\^\&<>\+\-\*%=!.;,\?:~\[\]\{\}\(\)@])/', $input, $match))
19951997 {
1996 - $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
 1998+ // Character classes per ECMA-262 edition 5.1 section 7.6
 1999+ // Per spec, must accept Unicode 3.0, *may* accept later versions.
 2000+ // We'll take whatever PCRE understands, which should be more recent.
 2001+ $identifierStartChars = "\\p{L}\\p{Nl}" . # UnicodeLetter
 2002+ "\$" .
 2003+ "_";
 2004+ $identifierPartChars = $identifierStartChars .
 2005+ "\\p{Mn}\\p{Mc}" . # UnicodeCombiningMark
 2006+ "\\p{Nd}" . # UnicodeDigit
 2007+ "\\p{Pc}"; # UnicodeConnectorPunctuation
 2008+ $unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
 2009+ $identifierRegex = "/^" .
 2010+ "(?:[$identifierStartChars]|$unicodeEscape)" .
 2011+ "(?:[$identifierPartChars]|$unicodeEscape)*" .
 2012+ "/uS";
 2013+ if (preg_match($identifierRegex, $input, $match))
 2014+ {
 2015+ if (strpos($match[0], '\\') !== false) {
 2016+ // Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
 2017+ // the original chars, but only within the boundaries of the identifier.
 2018+ $decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/',
 2019+ array(__CLASS__, 'unicodeEscapeCallback'),
 2020+ $match[0]);
 2021+
 2022+ // Since our original regex didn't de-escape the originals, we need to check for validity again.
 2023+ // No need to worry about token boundaries, as anything outside the identifier is illegal!
 2024+ if (!preg_match("/^[$identifierStartChars][$identifierPartChars]*$/u", $decoded)) {
 2025+ throw $this->newSyntaxError('Illegal token');
 2026+ }
 2027+
 2028+ // Per spec it _ought_ to work to use these escapes for keywords words as well...
 2029+ // but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
 2030+ // that don't match the keyword.
 2031+ if (in_array($decoded, $this->keywords)) {
 2032+ throw $this->newSyntaxError('Illegal token');
 2033+ }
 2034+
 2035+ // TODO: save the decoded form for output?
 2036+ }
 2037+ }
 2038+ else
 2039+ throw $this->newSyntaxError('Illegal token');
19972040 }
1998 - else
1999 - throw $this->newSyntaxError('Illegal token');
 2041+ $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
20002042 }
20012043 }
20022044
@@ -2033,6 +2075,11 @@
20342076 {
20352077 return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
20362078 }
 2079+
 2080+ public static function unicodeEscapeCallback($m)
 2081+ {
 2082+ return html_entity_decode('&#x' . $m[1]. ';', ENT_QUOTES, 'UTF-8');
 2083+ }
20372084 }
20382085
20392086 class JSToken

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r94614force QUnit charset to UTF-8hashar10:28, 16 August 2011
r98281* (bug 31187) Fix for user JavaScript validation to allow identifiers with va...brion22:51, 27 September 2011

Status & tagging log