r111973 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r111972‎ | r111973 | r111974 >
Date:22:32, 20 February 2012
Author:dantman
Status:ok (Comments)
Tags:
Comment:
Revert Microdata improvements in r111891, r111898, r111899, r111901, r111903, and r111906 till after the git migration.
Modified paths:
  • /trunk/phase3/RELEASE-NOTES-1.20 (modified) (history)
  • /trunk/phase3/includes/Sanitizer.php (modified) (history)
  • /trunk/phase3/includes/parser/Tidy.php (modified) (history)
  • /trunk/phase3/includes/tidy.conf (modified) (history)
  • /trunk/phase3/tests/parser/parserTests.txt (modified) (history)

Diff [purge]

Index: trunk/phase3/tests/parser/parserTests.txt
@@ -5420,30 +5420,6 @@
54215421 !! end
54225422
54235423 !! test
5424 -Sanitizer: Validating that <meta> and <link> work, but only for Microdata
5425 -!! input
5426 -<div itemscope>
5427 - <meta itemprop="hello" content="world">
5428 - <meta http-equiv="refresh" content="5">
5429 - <meta itemprop="hello" http-equiv="refresh" content="5">
5430 - <link itemprop="hello" href="{{SERVER}}">
5431 - <link rel="stylesheet" href="{{SERVER}}">
5432 - <link rel="stylesheet" itemprop="hello" href="{{SERVER}}">
5433 -</div>
5434 -!! result
5435 -<div itemscope="itemscope">
5436 -<p> <meta itemprop="hello" content="world" />
5437 - &lt;meta http-equiv="refresh" content="5"&gt;
5438 - <meta itemprop="hello" content="5" />
5439 -</p>
5440 - <link itemprop="hello" href="http&#58;//Britney-Spears" />
5441 - &lt;link rel="stylesheet" href="<a rel="nofollow" class="external free" href="http://Britney-Spears">http://Britney-Spears</a>"&gt;
5442 - <link itemprop="hello" href="http&#58;//Britney-Spears" />
5443 -</div>
5444 -
5445 -!! end
5446 -
5447 -!! test
54485424 Language converter: output gets cut off unexpectedly (bug 5757)
54495425 !! options
54505426 language=zh
Index: trunk/phase3/includes/parser/Tidy.php
@@ -41,15 +41,9 @@
4242 dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
4343 $this->mMarkerIndex = 0;
4444
45 - // Replace <mw:editsection> elements with placeholders
4645 $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
4746 array( &$this, 'replaceEditSectionLinksCallback' ), $text );
4847
49 - // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so
50 - // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config
51 - $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext );
52 -
53 - // Wrap the whole thing in a doctype and body for Tidy.
5448 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
5549 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
5650 '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>';
@@ -74,13 +68,7 @@
7569 * @return string
7670 */
7771 public function postprocess( $text ) {
78 - // Revert <html-{link,meta}> back to <{link,meta}>
79 - $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text );
80 -
81 - // Restore the contents of placeholder tokens
82 - $text = $this->mTokens->replace( $text );
83 -
84 - return $text;
 72+ return $this->mTokens->replace( $text );
8573 }
8674
8775 }
Index: trunk/phase3/includes/Sanitizer.php
@@ -364,17 +364,14 @@
365365 * @return string
366366 */
367367 static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) {
368 - global $wgUseTidy, $wgHtml5, $wgAllowMicrodataAttributes, $wgAllowImageTag;
 368+ global $wgUseTidy;
369369
370370 static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
371371 $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
372372
373373 wfProfileIn( __METHOD__ );
374374
375 - // Base our staticInitialised variable off of the global config state so that if the globals
376 - // are changed (like in the secrewed up test system) we will re-initialise the settings.
377 - $globalContext = implode( '-', compact( 'wgHtml5', 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) );
378 - if ( !$staticInitialised || $staticInitialised != $globalContext ) {
 375+ if ( !$staticInitialised ) {
379376
380377 $htmlpairsStatic = array( # Tags that must be closed
381378 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
@@ -384,19 +381,12 @@
385382 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'abbr', 'dfn',
386383 'kbd', 'samp'
387384 );
388 - if ( $wgHtml5 ) {
389 - $htmlpairsStatic = array_merge( $htmlpairsStatic, array( 'data', 'time' ) );
390 - }
391385 $htmlsingle = array(
392386 'br', 'hr', 'li', 'dt', 'dd'
393387 );
394388 $htmlsingleonly = array( # Elements that cannot have close tags
395389 'br', 'hr'
396390 );
397 - if ( $wgHtml5 && $wgAllowMicrodataAttributes ) {
398 - $htmlsingle[] = $htmlsingleonly[] = 'meta';
399 - $htmlsingle[] = $htmlsingleonly[] = 'link';
400 - }
401391 $htmlnest = array( # Tags that can be nested--??
402392 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
403393 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
@@ -411,6 +401,7 @@
412402 'li',
413403 );
414404
 405+ global $wgAllowImageTag;
415406 if ( $wgAllowImageTag ) {
416407 $htmlsingle[] = 'img';
417408 $htmlsingleonly[] = 'img';
@@ -425,7 +416,7 @@
426417 foreach ( $vars as $var ) {
427418 $$var = array_flip( $$var );
428419 }
429 - $staticInitialised = $globalContext;
 420+ $staticInitialised = true;
430421 }
431422 # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays
432423 $extratags = array_flip( $extratags );
@@ -537,10 +528,6 @@
538529 call_user_func_array( $processCallback, array( &$params, $args ) );
539530 }
540531
541 - if ( !Sanitizer::validateTag( $params, $t ) ) {
542 - $badtag = true;
543 - }
544 -
545532 # Strip non-approved attributes from the tag
546533 $newparams = Sanitizer::fixTagAttributes( $params, $t );
547534 }
@@ -564,24 +551,16 @@
565552 preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
566553 $x, $regs );
567554 @list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
568 - $badtag = false;
569555 if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
570556 if( is_callable( $processCallback ) ) {
571557 call_user_func_array( $processCallback, array( &$params, $args ) );
572558 }
573 -
574 - if ( !Sanitizer::validateTag( $params, $t ) ) {
575 - $badtag = true;
576 - }
577 -
578559 $newparams = Sanitizer::fixTagAttributes( $params, $t );
579 - if ( !$badtag ) {
580 - $rest = str_replace( '>', '&gt;', $rest );
581 - $text .= "<$slash$t$newparams$brace$rest";
582 - continue;
583 - }
 560+ $rest = str_replace( '>', '&gt;', $rest );
 561+ $text .= "<$slash$t$newparams$brace$rest";
 562+ } else {
 563+ $text .= '&lt;' . str_replace( '>', '&gt;', $x);
584564 }
585 - $text .= '&lt;' . str_replace( '>', '&gt;', $x);
586565 }
587566 }
588567 wfProfileOut( __METHOD__ );
@@ -730,37 +709,6 @@
731710 }
732711
733712 /**
734 - * Takes attribute names and values for a tag and the tah name and
735 - * validates that the tag is allowed to be present.
736 - * This DOES NOT validate the attributes, nor does it validate the
737 - * tags themselves. This method only handles the special circumstances
738 - * where we may want to allow a tag within content but ONLY when it has
739 - * specific attributes set.
740 - *
741 - * @param $
742 - */
743 - static function validateTag( $params, $element ) {
744 - $params = Sanitizer::decodeTagAttributes( $params );
745 -
746 - if ( $element == 'meta' || $element == 'link' ) {
747 - if ( !isset( $params['itemprop'] ) ) {
748 - // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
749 - return false;
750 - }
751 - if ( $element == 'meta' && !isset( $params['content'] ) ) {
752 - // <meta> must have a content="" for the itemprop
753 - return false;
754 - }
755 - if ( $element == 'link' && !isset( $params['href'] ) ) {
756 - // <link> must have an associated href=""
757 - return false;
758 - }
759 - }
760 -
761 - return true;
762 - }
763 -
764 - /**
765713 * Take an array of attribute names and values and normalize or discard
766714 * illegal values for the given element type.
767715 *
@@ -861,7 +809,7 @@
862810 unset( $out['itemid'] );
863811 unset( $out['itemref'] );
864812 }
865 - # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
 813+ # TODO: Strip itemprop if we aren't descendants of an itemscope.
866814 }
867815 return $out;
868816 }
@@ -1486,7 +1434,10 @@
14871435 * @return Array
14881436 */
14891437 static function attributeWhitelist( $element ) {
1490 - $list = Sanitizer::setupAttributeWhitelist();
 1438+ static $list;
 1439+ if( !isset( $list ) ) {
 1440+ $list = Sanitizer::setupAttributeWhitelist();
 1441+ }
14911442 return isset( $list[$element] )
14921443 ? $list[$element]
14931444 : array();
@@ -1500,13 +1451,6 @@
15011452 static function setupAttributeWhitelist() {
15021453 global $wgAllowRdfaAttributes, $wgHtml5, $wgAllowMicrodataAttributes;
15031454
1504 - static $whitelist, $staticInitialised;
1505 - $globalContext = implode( '-', compact( 'wgAllowRdfaAttributes', 'wgHtml5', 'wgAllowMicrodataAttributes' ) );
1506 -
1507 - if ( isset( $whitelist ) && $staticInitialised == $globalContext ) {
1508 - return $whitelist;
1509 - }
1510 -
15111455 $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );
15121456
15131457 if ( $wgAllowRdfaAttributes ) {
@@ -1539,7 +1483,7 @@
15401484
15411485 # Numbers refer to sections in HTML 4.01 standard describing the element.
15421486 # See: http://www.w3.org/TR/html4/
1543 - $whitelist = array(
 1487+ $whitelist = array (
15441488 # 7.5.4
15451489 'div' => $block,
15461490 'center' => $common, # deprecated
@@ -1667,26 +1611,7 @@
16681612 # 'title' may not be 100% valid here; it's XHTML
16691613 # http://www.w3.org/TR/REC-MathML/
16701614 'math' => array( 'class', 'style', 'id', 'title' ),
1671 - );
1672 -
1673 - if ( $wgHtml5 ) {
1674 - # HTML5 elements, defined by:
1675 - # http://www.whatwg.org/specs/web-apps/current-work/multipage/
1676 - $whitelist += array(
1677 - 'data' => array_merge( $common, array( 'value' ) ),
1678 - 'time' => array_merge( $common, array( 'datetime' ) ),
1679 -
1680 - // meta and link are only present when Microdata is allowed anyways
1681 - // so we don't bother adding another condition here
1682 - // meta and link are only valid for use as Microdata so we do not
1683 - // allow the common attributes here.
1684 - 'meta' => array( 'itemprop', 'content' ),
1685 - 'link' => array( 'itemprop', 'href' ),
16861615 );
1687 - }
1688 -
1689 - $staticInitialised = $globalContext;
1690 -
16911616 return $whitelist;
16921617 }
16931618
Index: trunk/phase3/includes/tidy.conf
@@ -16,8 +16,3 @@
1717 quote-nbsp: yes
1818 fix-backslash: no
1919 fix-uri: no
20 -
21 -# Don't strip html5 elements we support
22 -# html-{meta,link} is a hack we use to prevent Tidy from stripping <meta> and <link> used in the body for Microdata
23 -new-empty-tags: html-meta, html-link
24 -new-inline-tags: data, time
Index: trunk/phase3/RELEASE-NOTES-1.20
@@ -22,8 +22,6 @@
2323 * (bug 34475) Add support for IP/CIDR notation to tablesorter
2424 * (bug 27619) Remove preference option to display broken links as link?
2525 * (bug 15404) Add support for sorting fractions in jquery.tablesorter
26 -* The <data>, <time>, <meta>, and <link> elements are allowed within WikiText for use
27 - with Microdata.
2826
2927 === Bug fixes in 1.20 ===
3028 * (bug 30245) Use the correct way to construct a log page title.

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r111891Handle one part of bug 32545 while improving MediaWiki's support for Microdat...dantman21:43, 19 February 2012
r111898Followup r111891; Base the $staticInitialised variable off of the global setu...dantman23:40, 19 February 2012
r111899Followup r111898; Do the same for the attribute whitelist.dantman00:03, 20 February 2012
r111901Followup r111891; Update the test to also make sure things like http-equiv an...dantman00:42, 20 February 2012
r111903Followup r111891; Update tidy.conf so that it doesn't strip out <data> and <t...dantman01:01, 20 February 2012
r111906Followup r111903; Introduce a new Tidy hack to prevent tidy from stripping th...dantman02:12, 20 February 2012

Comments

#Comment by RobLa-WMF (talk | contribs)   22:53, 20 February 2012

Thanks for helping out. Feel free to recommit if you line up a reviewer for this change.

Status & tagging log