Index: trunk/phase3/includes/EditPage.php |
— | — | @@ -211,8 +211,8 @@ |
212 | 212 | # These fields need to be checked for encoding. |
213 | 213 | # Also remove trailing whitespace, but don't remove _initial_ |
214 | 214 | # whitespace from the text boxes. This may be significant formatting. |
215 | | - $this->textbox1 = rtrim( $request->getText( 'wpTextbox1' ) ); |
216 | | - $this->textbox2 = rtrim( $request->getText( 'wpTextbox2' ) ); |
| 215 | + $this->textbox1 = $this->safeUnicodeInput( $request, 'wpTextbox1' ); |
| 216 | + $this->textbox2 = $this->safeUnicodeInput( $request, 'wpTextbox2' ); |
217 | 217 | $this->mMetaData = rtrim( $request->getText( 'metadata' ) ); |
218 | 218 | $this->summary = $request->getText( 'wpSummary' ); |
219 | 219 | |
— | — | @@ -699,6 +699,9 @@ |
700 | 700 | } |
701 | 701 | else $metadata = "" ; |
702 | 702 | |
| 703 | + $safemodehtml = $this->checkUnicodeCompliantBrowser() |
| 704 | + ? "" |
| 705 | + : "<input type='hidden' name=\"safemode\" value='1' />\n"; |
703 | 706 | |
704 | 707 | $wgOut->addHTML( <<<END |
705 | 708 | {$toolbar} |
— | — | @@ -708,12 +711,13 @@ |
709 | 712 | <textarea tabindex='1' accesskey="," name="wpTextbox1" rows='{$rows}' |
710 | 713 | cols='{$cols}'{$ew}> |
711 | 714 | END |
712 | | -. htmlspecialchars( $wgContLang->recodeForEdit( $this->textbox1 ) ) . |
| 715 | +. htmlspecialchars( $this->safeUnicodeOutput( $this->textbox1 ) ) . |
713 | 716 | " |
714 | 717 | </textarea> |
715 | 718 | {$metadata} |
716 | 719 | <br />{$editsummary} |
717 | 720 | {$checkboxhtml} |
| 721 | +{$safemodehtml} |
718 | 722 | <input tabindex='5' id='wpSave' type='submit' value=\"{$save}\" name=\"wpSave\" accesskey=\"".wfMsg('accesskey-save')."\"". |
719 | 723 | " title=\"".wfMsg('tooltip-save')."\"/> |
720 | 724 | <input tabindex='6' id='wpPreview' type='submit' $liveOnclick value=\"{$prev}\" name=\"wpPreview\" accesskey=\"".wfMsg('accesskey-preview')."\"". |
— | — | @@ -749,7 +753,7 @@ |
750 | 754 | |
751 | 755 | $wgOut->addWikiText( '==' . wfMsg( "yourtext" ) . '==' ); |
752 | 756 | $wgOut->addHTML( "<textarea tabindex=6 id='wpTextbox2' name=\"wpTextbox2\" rows='{$rows}' cols='{$cols}' wrap='virtual'>" |
753 | | -. htmlspecialchars( $wgContLang->recodeForEdit( $this->textbox2 ) ) . |
| 757 | +. htmlspecialchars( $this->safeUnicodeOutput( $this->textbox2 ) ) . |
754 | 758 | " |
755 | 759 | </textarea>" ); |
756 | 760 | } |
— | — | @@ -1162,6 +1166,122 @@ |
1163 | 1167 | return '<div id="wikiDiff">' . $difftext . '</div>'; |
1164 | 1168 | } |
1165 | 1169 | |
| 1170 | + /** |
| 1171 | + * Filter an input field through a Unicode de-armoring process if it |
| 1172 | + * came from an old browser with known broken Unicode editing issues. |
| 1173 | + * |
| 1174 | + * @param WebRequest $request |
| 1175 | + * @param string $field |
| 1176 | + * @return string |
| 1177 | + * @access private |
| 1178 | + */ |
| 1179 | + function safeUnicodeInput( $request, $field ) { |
| 1180 | + $text = rtrim( $request->getText( $field ) ); |
| 1181 | + return $request->getBool( 'safemode' ) |
| 1182 | + ? $this->unmakesafe( $text ) |
| 1183 | + : $text; |
| 1184 | + } |
| 1185 | + |
| 1186 | + /** |
| 1187 | + * Filter an output field through a Unicode de-armoring process if it |
| 1188 | + * came from an old browser with known broken Unicode editing issues. |
| 1189 | + * |
| 1190 | + * @param string $text |
| 1191 | + * @return string |
| 1192 | + * @access private |
| 1193 | + */ |
| 1194 | + function safeUnicodeOutput( $text ) { |
| 1195 | + global $wgContLang; |
| 1196 | + $codedText = $wgContLang->recodeForEdit( $text ); |
| 1197 | + return $this->checkUnicodeCompliantBrowser() |
| 1198 | + ? $codedText |
| 1199 | + : $this->makesafe( $codedText ); |
| 1200 | + } |
| 1201 | + |
| 1202 | + /** |
| 1203 | + * A number of web browsers are known to corrupt non-ASCII characters |
| 1204 | + * in a UTF-8 text editing environment. To protect against this, |
| 1205 | + * detected browsers will be served an armored version of the text, |
| 1206 | + * with non-ASCII chars converted to numeric HTML character references. |
| 1207 | + * |
| 1208 | + * Preexisting such character references will have a 0 added to them |
| 1209 | + * to ensure that round-trips do not alter the original data. |
| 1210 | + * |
| 1211 | + * @param string $invalue |
| 1212 | + * @return string |
| 1213 | + * @access private |
| 1214 | + */ |
| 1215 | + function makesafe( $invalue ) { |
| 1216 | + // Armor existing references for reversability. |
| 1217 | + $invalue = strtr( $invalue, array( "&#x" => "�" ) ); |
| 1218 | + |
| 1219 | + $bytesleft = 0; |
| 1220 | + $result = ""; |
| 1221 | + $working = 0; |
| 1222 | + for( $i = 0; $i < strlen( $invalue ); $i++ ) { |
| 1223 | + $bytevalue = ord( $invalue{$i} ); |
| 1224 | + if( $bytevalue <= 0x7F ) { //0xxx xxxx |
| 1225 | + $result .= chr( $bytevalue ); |
| 1226 | + $bytesleft = 0; |
| 1227 | + } elseif( $bytevalue <= 0xBF ) { //10xx xxxx |
| 1228 | + $working = $working << 6; |
| 1229 | + $working += ($bytevalue & 0x3F); |
| 1230 | + $bytesleft--; |
| 1231 | + if( $bytesleft <= 0 ) { |
| 1232 | + $result .= "&#x" . strtoupper( dechex( $working ) ) . ";"; |
| 1233 | + } |
| 1234 | + } elseif( $bytevalue <= 0xDF ) { //110x xxxx |
| 1235 | + $working = $bytevalue & 0x1F; |
| 1236 | + $bytesleft = 1; |
| 1237 | + } elseif( $bytevalue <= 0xEF ) { //1110 xxxx |
| 1238 | + $working = $bytevalue & 0x0F; |
| 1239 | + $bytesleft = 2; |
| 1240 | + } else { //1111 0xxx |
| 1241 | + $working = $bytevalue & 0x07; |
| 1242 | + $bytesleft = 3; |
| 1243 | + } |
| 1244 | + } |
| 1245 | + return $result; |
| 1246 | + } |
| 1247 | + |
| 1248 | + /** |
| 1249 | + * Reverse the previously applied transliteration of non-ASCII characters |
| 1250 | + * back to UTF-8. Used to protect data from corruption by broken web browsers |
| 1251 | + * as listed in $wgBrowserBlackList. |
| 1252 | + * |
| 1253 | + * @param string $invalue |
| 1254 | + * @return string |
| 1255 | + * @access private |
| 1256 | + */ |
| 1257 | + function unmakesafe( $invalue ) { |
| 1258 | + $result = ""; |
| 1259 | + for( $i = 0; $i < strlen( $invalue ); $i++ ) { |
| 1260 | + if( ( substr( $invalue, $i, 3 ) == "&#x" ) && ( $invalue{$i+3} != '0' ) ) { |
| 1261 | + $i += 3; |
| 1262 | + $hexstring = ""; |
| 1263 | + do { |
| 1264 | + $hexstring .= $invalue{$i}; |
| 1265 | + $i++; |
| 1266 | + } while( ctype_xdigit( $invalue{$i} ) && ( $i < strlen( $invalue ) ) ); |
| 1267 | + |
| 1268 | + // Do some sanity checks. These aren't needed for reversability, |
| 1269 | + // but should help keep the breakage down if the editor |
| 1270 | + // breaks one of the entities whilst editing. |
| 1271 | + if ((substr($invalue,$i,1)==";") and (strlen($hexstring) <= 6)) { |
| 1272 | + $codepoint = hexdec($hexstring); |
| 1273 | + $result .= codepointToUtf8( $codepoint ); |
| 1274 | + } else { |
| 1275 | + $result .= "&#x" . $hexstring . substr( $invalue, $i, 1 ); |
| 1276 | + } |
| 1277 | + } else { |
| 1278 | + $result .= substr( $invalue, $i, 1 ); |
| 1279 | + } |
| 1280 | + } |
| 1281 | + // reverse the transform that we made for reversability reasons. |
| 1282 | + return strtr( $result, array( "�" => "&#x" ) ); |
| 1283 | + } |
| 1284 | + |
| 1285 | + |
1166 | 1286 | } |
1167 | 1287 | |
1168 | 1288 | ?> |
Index: trunk/phase3/RELEASE-NOTES |
— | — | @@ -614,6 +614,8 @@ |
615 | 615 | * Fix typo in undefined array index access prevention |
616 | 616 | * (bug 2947) Update namespaces for sr localization |
617 | 617 | * (bug 2952) Added Asturian language file with translated namespaces |
| 618 | +* (bug 2676) Apply a protective transformation on editing input/output |
| 619 | + for browsers that hit the Unicode blacklist. Patch by plugwash. |
618 | 620 | |
619 | 621 | |
620 | 622 | === Caveats === |
Index: trunk/phase3/languages/Language.php |
— | — | @@ -730,7 +730,7 @@ |
731 | 731 | press \"Save page\".<br />", |
732 | 732 | 'yourtext' => 'Your text', |
733 | 733 | 'storedversion' => 'Stored version', |
734 | | -'nonunicodebrowser' => "<strong>WARNING: Your browser is not unicode compliant, please change it before editing an article.</strong>", |
| 734 | +'nonunicodebrowser' => "<strong>WARNING: Your browser is not unicode compliant. A workaround is in place to allow you to safely edit articles: non-ASCII characters will appear in the edit box as hexadecimal codes.</strong>", |
735 | 735 | 'editingold' => "<strong>WARNING: You are editing an out-of-date |
736 | 736 | revision of this page. |
737 | 737 | If you save it, any changes made since this revision will be lost.</strong>", |