r81583 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r81582‎ | r81583 | r81584 >
Date:01:38, 6 February 2011
Author:dantman
Status:resolved (Comments)
Tags:needs-php-test 
Comment:
Switch <editsection> to <mw:editsection> and start hiding these editsection tags from Tidy so it doesn't break them.
Modified paths:
  • /trunk/phase3/includes/parser/Parser.php (modified) (history)
  • /trunk/phase3/includes/parser/ParserOutput.php (modified) (history)
  • /trunk/phase3/includes/parser/Tidy.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/parser/Tidy.php
@@ -6,6 +6,54 @@
77 */
88
99 /**
 10+ * Class used to hide mw:editsection tokens from Tidy so that it doesn't break them
 11+ * or break on them. This is a bit of a hack for now, but hopefully in the future
 12+ * we may create a real postprocessor or something that will replace this.
 13+ * It's called wrapper because for now it basically takes over MWTidy::tidy's task
 14+ * of wrapping the text in a xhtml block
 15+ *
 16+ * This re-uses some of the parser's UNIQ tricks, though some of it is private so it's
 17+ * duplicated. Perhaps we should create an abstract marker hiding class.
 18+ */
 19+class MWTidyWrapper {
 20+
 21+ protected $mTokens, $mUniqPrefix;
 22+
 23+ public function __construct() {
 24+ $this->mTokens = null;
 25+ $this->mUniqPrefix = null;
 26+ }
 27+
 28+ public function getWrapped( $text ) {
 29+ $this->mTokens = new ReplacementArray;
 30+ $this->mUniqPrefix = "\x7fUNIQ" . dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
 31+ $this->mMarkerIndex = 0;
 32+ $wrappedtext = preg_replace_callback( '#<(?:mw:)?editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)(</(?:mw:)?editsection>))#', array( &$this, 'replaceEditSectionLinksCallback' ), $text );
 33+
 34+ $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 35+ ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 36+ '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>';
 37+
 38+ return $wrappedtext;
 39+ }
 40+
 41+ /**
 42+ * @private
 43+ */
 44+ function replaceEditSectionLinksCallback( $m ) {
 45+ $marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX;
 46+ $this->mMarkerIndex++;
 47+ $this->mTokens->setPair( $marker, $m[0] );
 48+ return $marker;
 49+ }
 50+
 51+ public function postprocess( $text ) {
 52+ return $this->mTokens->replace( $text );
 53+ }
 54+
 55+}
 56+
 57+/**
1058 * Class to interact with HTML tidy
1159 *
1260 * Either the external tidy program or the in-process tidy extension
@@ -27,9 +75,8 @@
2876 public static function tidy( $text ) {
2977 global $wgTidyInternal;
3078
31 - $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
32 -' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
33 -'<head><title>test</title></head><body>'.$text.'</body></html>';
 79+ $wrapper = new MWTidyWrapper;
 80+ $wrappedtext = $wrapper->getWrapped( $text );
3481
3582 if( $wgTidyInternal ) {
3683 $correctedtext = self::execInternalTidy( $wrappedtext );
@@ -41,8 +88,14 @@
4289 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
4390 }
4491
 92+ $correctedtext = $wrapper->postprocess( $correctedtext ); // restore any hidden tokens
 93+
4594 return $correctedtext;
4695 }
 96+
 97+ function replaceEditSectionLinksCallback( $m ) {
 98+
 99+ }
47100
48101 /**
49102 * Check HTML for errors, used if $wgValidateAllHtml = true.
Index: trunk/phase3/includes/parser/Parser.php
@@ -3948,10 +3948,10 @@
39493949 // We use a page and section attribute to stop the language converter from converting these important bits
39503950 // of data, but put the headline hint inside a content block because the language converter is supposed to
39513951 // be able to convert that piece of data.
3952 - $editlink = '<editsection page="' . htmlspecialchars($editlinkArgs[0]);
 3952+ $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]);
39533953 $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"';
39543954 if ( isset($editlinkArgs[2]) ) {
3955 - $editlink .= '>' . $editlinkArgs[2] . '</editsection>';
 3955+ $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
39563956 } else {
39573957 $editlink .= '/>';
39583958 }
Index: trunk/phase3/includes/parser/ParserOutput.php
@@ -137,7 +137,7 @@
138138
139139 function getText() {
140140 if ( $this->mEditSectionTokens ) {
141 - return preg_replace_callback( '#<editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)(</editsection>))#', array( &$this, 'replaceEditSectionLinksCallback' ), $this->mText );
 141+ return preg_replace_callback( '#<(?:mw:)?editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)(</(?:mw:)?editsection>))#', array( &$this, 'replaceEditSectionLinksCallback' ), $this->mText );
142142 }
143143 return $this->mText;
144144 }

Follow-up revisions

RevisionCommit summaryAuthorDate
r81678Followup r81583, break some of the long lines and centralize the regex.dantman01:31, 8 February 2011
r92304Removed unused functionaaron20:47, 15 July 2011

Comments

#Comment by Nikerabbit (talk | contribs)   15:49, 7 February 2011

Could you break the very long lines in ParserOutput.php and Tidy.php?

#Comment by Aaron Schulz (talk | contribs)   21:30, 13 July 2011

What is the empty replaceEditSectionLinksCallback() there for?

#Comment by Dantman (talk | contribs)   20:46, 15 July 2011

Whoops, the replace code used to be part of the Tidy class until I decided it would be better to split it out. Feel free to cut that out.

#Comment by 😂 (talk | contribs)   17:36, 5 August 2011
  • Definitely needs some unit tests
  • If we're going to have a class for Tidy-related stuff, it might make sense to move more of Parser's tidy code into it

Status & tagging log