r10313 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r10312‎ | r10313 | r10314 >
Date:10:08, 29 July 2005
Author:vibber
Status:old
Tags:
Comment:
* (bug 2676) Apply a protective transformation on editing input/output
for browsers that hit the Unicode blacklist. Patch by plugwash.
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/EditPage.php (modified) (history)
  • /trunk/phase3/languages/Language.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/EditPage.php
@@ -211,8 +211,8 @@
212212 # These fields need to be checked for encoding.
213213 # Also remove trailing whitespace, but don't remove _initial_
214214 # whitespace from the text boxes. This may be significant formatting.
215 - $this->textbox1 = rtrim( $request->getText( 'wpTextbox1' ) );
216 - $this->textbox2 = rtrim( $request->getText( 'wpTextbox2' ) );
 215+ $this->textbox1 = $this->safeUnicodeInput( $request, 'wpTextbox1' );
 216+ $this->textbox2 = $this->safeUnicodeInput( $request, 'wpTextbox2' );
217217 $this->mMetaData = rtrim( $request->getText( 'metadata' ) );
218218 $this->summary = $request->getText( 'wpSummary' );
219219
@@ -699,6 +699,9 @@
700700 }
701701 else $metadata = "" ;
702702
 703+ $safemodehtml = $this->checkUnicodeCompliantBrowser()
 704+ ? ""
 705+ : "<input type='hidden' name=\"safemode\" value='1' />\n";
703706
704707 $wgOut->addHTML( <<<END
705708 {$toolbar}
@@ -708,12 +711,13 @@
709712 <textarea tabindex='1' accesskey="," name="wpTextbox1" rows='{$rows}'
710713 cols='{$cols}'{$ew}>
711714 END
712 -. htmlspecialchars( $wgContLang->recodeForEdit( $this->textbox1 ) ) .
 715+. htmlspecialchars( $this->safeUnicodeOutput( $this->textbox1 ) ) .
713716 "
714717 </textarea>
715718 {$metadata}
716719 <br />{$editsummary}
717720 {$checkboxhtml}
 721+{$safemodehtml}
718722 <input tabindex='5' id='wpSave' type='submit' value=\"{$save}\" name=\"wpSave\" accesskey=\"".wfMsg('accesskey-save')."\"".
719723 " title=\"".wfMsg('tooltip-save')."\"/>
720724 <input tabindex='6' id='wpPreview' type='submit' $liveOnclick value=\"{$prev}\" name=\"wpPreview\" accesskey=\"".wfMsg('accesskey-preview')."\"".
@@ -749,7 +753,7 @@
750754
751755 $wgOut->addWikiText( '==' . wfMsg( "yourtext" ) . '==' );
752756 $wgOut->addHTML( "<textarea tabindex=6 id='wpTextbox2' name=\"wpTextbox2\" rows='{$rows}' cols='{$cols}' wrap='virtual'>"
753 -. htmlspecialchars( $wgContLang->recodeForEdit( $this->textbox2 ) ) .
 757+. htmlspecialchars( $this->safeUnicodeOutput( $this->textbox2 ) ) .
754758 "
755759 </textarea>" );
756760 }
@@ -1162,6 +1166,122 @@
11631167 return '<div id="wikiDiff">' . $difftext . '</div>';
11641168 }
11651169
 1170+ /**
 1171+ * Filter an input field through a Unicode de-armoring process if it
 1172+ * came from an old browser with known broken Unicode editing issues.
 1173+ *
 1174+ * @param WebRequest $request
 1175+ * @param string $field
 1176+ * @return string
 1177+ * @access private
 1178+ */
 1179+ function safeUnicodeInput( $request, $field ) {
 1180+ $text = rtrim( $request->getText( $field ) );
 1181+ return $request->getBool( 'safemode' )
 1182+ ? $this->unmakesafe( $text )
 1183+ : $text;
 1184+ }
 1185+
 1186+ /**
 1187+ * Filter an output field through a Unicode de-armoring process if it
 1188+ * came from an old browser with known broken Unicode editing issues.
 1189+ *
 1190+ * @param string $text
 1191+ * @return string
 1192+ * @access private
 1193+ */
 1194+ function safeUnicodeOutput( $text ) {
 1195+ global $wgContLang;
 1196+ $codedText = $wgContLang->recodeForEdit( $text );
 1197+ return $this->checkUnicodeCompliantBrowser()
 1198+ ? $codedText
 1199+ : $this->makesafe( $codedText );
 1200+ }
 1201+
 1202+ /**
 1203+ * A number of web browsers are known to corrupt non-ASCII characters
 1204+ * in a UTF-8 text editing environment. To protect against this,
 1205+ * detected browsers will be served an armored version of the text,
 1206+ * with non-ASCII chars converted to numeric HTML character references.
 1207+ *
 1208+ * Preexisting such character references will have a 0 added to them
 1209+ * to ensure that round-trips do not alter the original data.
 1210+ *
 1211+ * @param string $invalue
 1212+ * @return string
 1213+ * @access private
 1214+ */
 1215+ function makesafe( $invalue ) {
 1216+ // Armor existing references for reversability.
 1217+ $invalue = strtr( $invalue, array( "&#x" => "&#x0" ) );
 1218+
 1219+ $bytesleft = 0;
 1220+ $result = "";
 1221+ $working = 0;
 1222+ for( $i = 0; $i < strlen( $invalue ); $i++ ) {
 1223+ $bytevalue = ord( $invalue{$i} );
 1224+ if( $bytevalue <= 0x7F ) { //0xxx xxxx
 1225+ $result .= chr( $bytevalue );
 1226+ $bytesleft = 0;
 1227+ } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
 1228+ $working = $working << 6;
 1229+ $working += ($bytevalue & 0x3F);
 1230+ $bytesleft--;
 1231+ if( $bytesleft <= 0 ) {
 1232+ $result .= "&#x" . strtoupper( dechex( $working ) ) . ";";
 1233+ }
 1234+ } elseif( $bytevalue <= 0xDF ) { //110x xxxx
 1235+ $working = $bytevalue & 0x1F;
 1236+ $bytesleft = 1;
 1237+ } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
 1238+ $working = $bytevalue & 0x0F;
 1239+ $bytesleft = 2;
 1240+ } else { //1111 0xxx
 1241+ $working = $bytevalue & 0x07;
 1242+ $bytesleft = 3;
 1243+ }
 1244+ }
 1245+ return $result;
 1246+ }
 1247+
 1248+ /**
 1249+ * Reverse the previously applied transliteration of non-ASCII characters
 1250+ * back to UTF-8. Used to protect data from corruption by broken web browsers
 1251+ * as listed in $wgBrowserBlackList.
 1252+ *
 1253+ * @param string $invalue
 1254+ * @return string
 1255+ * @access private
 1256+ */
 1257+ function unmakesafe( $invalue ) {
 1258+ $result = "";
 1259+ for( $i = 0; $i < strlen( $invalue ); $i++ ) {
 1260+ if( ( substr( $invalue, $i, 3 ) == "&#x" ) && ( $invalue{$i+3} != '0' ) ) {
 1261+ $i += 3;
 1262+ $hexstring = "";
 1263+ do {
 1264+ $hexstring .= $invalue{$i};
 1265+ $i++;
 1266+ } while( ctype_xdigit( $invalue{$i} ) && ( $i < strlen( $invalue ) ) );
 1267+
 1268+ // Do some sanity checks. These aren't needed for reversability,
 1269+ // but should help keep the breakage down if the editor
 1270+ // breaks one of the entities whilst editing.
 1271+ if ((substr($invalue,$i,1)==";") and (strlen($hexstring) <= 6)) {
 1272+ $codepoint = hexdec($hexstring);
 1273+ $result .= codepointToUtf8( $codepoint );
 1274+ } else {
 1275+ $result .= "&#x" . $hexstring . substr( $invalue, $i, 1 );
 1276+ }
 1277+ } else {
 1278+ $result .= substr( $invalue, $i, 1 );
 1279+ }
 1280+ }
 1281+ // reverse the transform that we made for reversability reasons.
 1282+ return strtr( $result, array( "&#x0" => "&#x" ) );
 1283+ }
 1284+
 1285+
11661286 }
11671287
11681288 ?>
Index: trunk/phase3/RELEASE-NOTES
@@ -614,6 +614,8 @@
615615 * Fix typo in undefined array index access prevention
616616 * (bug 2947) Update namespaces for sr localization
617617 * (bug 2952) Added Asturian language file with translated namespaces
 618+* (bug 2676) Apply a protective transformation on editing input/output
 619+ for browsers that hit the Unicode blacklist. Patch by plugwash.
618620
619621
620622 === Caveats ===
Index: trunk/phase3/languages/Language.php
@@ -730,7 +730,7 @@
731731 press \"Save page\".<br />",
732732 'yourtext' => 'Your text',
733733 'storedversion' => 'Stored version',
734 -'nonunicodebrowser' => "<strong>WARNING: Your browser is not unicode compliant, please change it before editing an article.</strong>",
 734+'nonunicodebrowser' => "<strong>WARNING: Your browser is not unicode compliant. A workaround is in place to allow you to safely edit articles: non-ASCII characters will appear in the edit box as hexadecimal codes.</strong>",
735735 'editingold' => "<strong>WARNING: You are editing an out-of-date
736736 revision of this page.
737737 If you save it, any changes made since this revision will be lost.</strong>",

Status & tagging log