Index: trunk/phase3/RELEASE-NOTES |
— | — | @@ -62,6 +62,8 @@ |
63 | 63 | themselves unless they are given the 'unblockself' permission. |
64 | 64 | * (bug 22876) Avoid possible PHP Notice if $wgDefaultUserOptions is not |
65 | 65 | correctly set |
| 66 | +* (bug 14952) Page titles are renormalized after html entities are removed so that |
| 67 | + links with non-NFC character references work correctly. |
66 | 68 | |
67 | 69 | == API changes in 1.17 == |
68 | 70 | * (bug 22738) Allow filtering by action type on query=logevent |
Index: trunk/phase3/maintenance/parserTests.txt |
— | — | @@ -4114,7 +4114,32 @@ |
4115 | 4115 | </p> |
4116 | 4116 | !!end |
4117 | 4117 | |
| 4118 | +!! article |
| 4119 | +אַ |
| 4120 | +!! text |
| 4121 | +Test for unicode normalization |
| 4122 | + |
| 4123 | +The page's name is U+05d0 U+05b7, with non-canonical form U+FB2E |
| 4124 | +!! endarticle |
| 4125 | + |
4118 | 4126 | !! test |
| 4127 | +(bug 19451) Links should refer to the normalized form. |
| 4128 | +!! input |
| 4129 | +[[אַ]] |
| 4130 | +[[אַ]] |
| 4131 | +[[אַ]] |
| 4132 | +[[אַ]] |
| 4133 | +[[אַ]] |
| 4134 | +!! result |
| 4135 | +<p><a href="https://www.mediawiki.org/wiki/%D7%90%D6%B7" title="אַ">אַ</a> |
| 4136 | +<a href="https://www.mediawiki.org/wiki/%D7%90%D6%B7" title="אַ">אַ</a> |
| 4137 | +<a href="https://www.mediawiki.org/wiki/%D7%90%D6%B7" title="אַ">אַ</a> |
| 4138 | +<a href="https://www.mediawiki.org/wiki/%D7%90%D6%B7" title="אַ">אַ</a> |
| 4139 | +<a href="https://www.mediawiki.org/wiki/%D7%90%D6%B7" title="אַ">אַ</a> |
| 4140 | +</p> |
| 4141 | +!! end |
| 4142 | + |
| 4143 | +!! test |
4119 | 4144 | Empty attribute crash test (bug 2067) |
4120 | 4145 | !! input |
4121 | 4146 | <font color="">foo</font> |
Index: trunk/phase3/includes/Title.php |
— | — | @@ -127,9 +127,9 @@ |
128 | 128 | } |
129 | 129 | |
130 | 130 | /** |
131 | | - * Convert things like é ā or 〗 into real text... |
| 131 | + * Convert things like é ā or 〗 into normalized(bug 14952) text |
132 | 132 | */ |
133 | | - $filteredText = Sanitizer::decodeCharReferences( $text ); |
| 133 | + $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text ); |
134 | 134 | |
135 | 135 | $t = new Title(); |
136 | 136 | $t->mDbkeyform = str_replace( ' ', '_', $filteredText ); |
Index: trunk/phase3/includes/Sanitizer.php |
— | — | @@ -1177,6 +1177,30 @@ |
1178 | 1178 | } |
1179 | 1179 | |
1180 | 1180 | /** |
| 1181 | + * Decode any character references, numeric or named entities, |
| 1182 | + * in the next and normalize the resulting string. (bug 14952) |
| 1183 | + * |
| 1184 | + * This is useful for page titles, not for text to be displayed, |
| 1185 | + * MediaWiki allows HTML entities to escape normalization as a feature. |
| 1186 | + * |
| 1187 | + * @param $text String (already normalized, containing entities) |
| 1188 | + * @return String (still normalized, without entities) |
| 1189 | + */ |
| 1190 | + public static function decodeCharReferencesAndNormalize( $text ) { |
| 1191 | + global $wgContLang; |
| 1192 | + $text = preg_replace_callback( |
| 1193 | + MW_CHAR_REFS_REGEX, |
| 1194 | + array( 'Sanitizer', 'decodeCharReferencesCallback' ), |
| 1195 | + $text, /* limit */ -1, $count ); |
| 1196 | + |
| 1197 | + if ( $count ) { |
| 1198 | + return $wgContLang->normalize( $text ); |
| 1199 | + } else { |
| 1200 | + return $text; |
| 1201 | + } |
| 1202 | + } |
| 1203 | + |
| 1204 | + /** |
1181 | 1205 | * @param $matches String |
1182 | 1206 | * @return String |
1183 | 1207 | */ |