r14530 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r14529‎ \| r14530 \| r14531 >
Date:	19:38, 1 June 2006
Author:	brion
Status:	old
Tags:
Comment:	Fix regressions in parser with incomplete tag stripping, plus some old bugs: * (bug 885) Pre-save transform no longer silently appends close tags * Pre-save transform no longer changes the case of close tags
Modified paths:	/trunk/phase3/RELEASE-NOTES (modified) (history) /trunk/phase3/includes/Parser.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/Parser.php
—	—	@@ -311,20 +311,20 @@
312	312	function getOptions() { return $this->mOptions; }
313	313
314	314	/**
315		~~- * Replaces all occurrences of <$tag>content</$tag> in the text~~
316		~~- * with a random marker and returns the new text. the output parameter~~
317		~~- * $content will be an associative array filled with data on the form~~
318		~~- * $unique_marker => content.~~
	315	+ * Replaces all occurrences of HTML-style comments and the given tags
	316	+ * in the text with a random marker and returns teh next text. The output
	317	+ * parameter $matches will be an associative array filled with data in
	318	+ * the form:
	319	+ * 'UNIQ-xxxxx' => array(
	320	+ * 'element',
	321	+ * 'tag content',
	322	+ * array( 'param' => 'x' ),
	323	+ * '<element param="x">tag content</element>' ) )
319	324	*
320		~~- * If $content is already set, the additional entries will be appended~~
321		~~- * If $tag is set to STRIP_COMMENTS, the function will extract~~
322		~~- * <!-- HTML comments -->~~
	325	+ * @param $elements list of element names. Comments are always extracted.
	326	+ * @param $text Source text string.
	327	+ * @param $uniq_prefix
323	328	*
324		~~- * $output: array( 'UNIQ-xxxxx' => array(~~
325		~~- * 'element',~~
326		~~- * 'tag content',~~
327		~~- * array( 'param' => 'x' ),~~
328		~~- * '<element param="x">' ) )~~
329	329	* @private
330	330	* @static
331	331	*/
—	—	@@ -334,58 +334,59 @@
335	335	$stripped = '';
336	336	$matches = array();
337	337
338		~~- if( $elements == STRIP_COMMENTS ) {~~
339		~~- $start = '/<!--()()/';~~
340		~~- } else {~~
341		~~- $taglist = implode( '\|', $elements );~~
342		~~- $start = "/<($taglist)(\\s+[^>]\|\\s\/?)>/i";~~
343		~~- }~~
	338	+ $taglist = implode( '\|', $elements );
	339	+ $start = "/<($taglist)(\\s+[^>]?\|\\s?)(\/?>)\|<(!--)/i";
344	340
345	341	while ( '' != $text ) {
346	342	$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
347	343	$stripped .= $p[0];
348		~~- if( count( $p ) < 4 ) {~~
	344	+ if( count( $p ) < 5 ) {
349	345	break;
350	346	}
351		~~- $element = $p[1];~~
352		~~- $attributes = $p[2];~~
353		~~- $inside = $p[3];~~
354		-
355		~~- // If $attributes ends with '/', we have an empty element tag, <tag />~~
356		~~- if( $element != '' && substr( $attributes, -1 ) == '/' ) {~~
357		~~- $attributes = substr( $attributes, 0, -1);~~
358		~~- $empty = '/';~~
	347	+ if( count( $p ) > 5 ) {
	348	+ // comment
	349	+ $element = $p[4];
	350	+ $attributes = '';
	351	+ $close = '';
	352	+ $inside = $p[5];
359	353	} else {
360		~~- $empty = '';~~
	354	+ // tag
	355	+ $element = $p[1];
	356	+ $attributes = $p[2];
	357	+ $close = $p[3];
	358	+ $inside = $p[4];
361	359	}
362	360
363	361	$marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++);
364	362	$stripped .= $marker;
365	363
366		~~- if ( $empty === '/' ) {~~
	364	+ if ( $close === '/>' ) {
367	365	// Empty element tag, <tag />
368	366	$content = null;
369	367	$text = $inside;
	368	+ $tail = null;
370	369	} else {
371		~~- if( $element ) {~~
372		~~- $end = "/<\\/$element\\s*>/i";~~
	370	+ if( $element == '!--' ) {
	371	+ $end = '/(-->)/';
373	372	} else {
374		~~- $end = '/-->/';~~
	373	+ $end = "/(<\\/$element\\s*>)/i";
375	374	}
376		~~- $q = preg_split( $end, $inside, 2 );~~
	375	+ $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
377	376	$content = $q[0];
378		~~- if( count( $q ) < 2 ) {~~
	377	+ if( count( $q ) < 3 ) {
379	378	# No end tag -- let it run out to the end of the text.
	379	+ $tail = '';
380	380	$text = '';
381	381	} else {
382		~~- $text = $q[1];~~
	382	+ $tail = $q[1];
	383	+ $text = $q[2];
383	384	}
384	385	}
385	386
386	387	$matches[$marker] = array( $element,
387	388	$content,
388	389	Sanitizer::decodeTagAttributes( $attributes ),
389		~~- "<$element$attributes$empty>" );~~
	390	+ "<$element$attributes$close$content$tail" );
390	391	}
391	392	return $stripped;
392	393	}
—	—	@@ -409,6 +410,7 @@
410	411	# Replace any instances of the placeholders
411	412	$uniq_prefix = $this->mUniqPrefix;
412	413	#$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
	414	+ $commentState = array();
413	415
414	416	$elements = array_merge(
415	417	array( 'nowiki', 'pre', 'gallery' ),
—	—	@@ -422,27 +424,24 @@
423	425	}
424	426
425	427
426		~~- // Strip comments in a first pass.~~
427		~~- // This saves us from needlessly rendering extensions in comment text~~
428		~~- $text = Parser::extractTagsAndParams(STRIP_COMMENTS, $text, $comment_matches, $uniq_prefix);~~
429		~~- $commentState = array();~~
430		~~- foreach( $comment_matches as $marker => $data ){~~
431		~~- list( $element, $content, $params, $tag ) = $data;~~
432		~~- $commentState[$marker] = '<!--' . $content . '-->';~~
433		~~- }~~
434		-
435	428	$matches = array();
436	429	$text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
437	430
438	431	foreach( $matches as $marker => $data ) {
439	432	list( $element, $content, $params, $tag ) = $data;
440		~~- // Restore any comments; the extension can deal with them.~~
441		~~- if( $content !== null) {~~
442		~~- $content = strtr( $content, $commentState );~~
443		~~- }~~
444	433	if( $render ) {
445	434	$tagName = strtolower( $element );
446	435	switch( $tagName ) {
	436	+ case '!--':
	437	+ // Comment
	438	+ if( substr( $tag, -3 ) == '-->' ) {
	439	+ $output = $tag;
	440	+ } else {
	441	+ // Unclosed comment in input.
	442	+ // Close it so later stripping can remove it
	443	+ $output = "$tag-->";
	444	+ }
	445	+ break;
447	446	case 'html':
448	447	if( $wgRawHtml ) {
449	448	$output = $content;
—	—	@@ -473,25 +472,20 @@
474	473	}
475	474	} else {
476	475	// Just stripping tags; keep the source
477		~~- if( $content === null ) {~~
478		~~- $output = $tag;~~
479		~~- } else {~~
480		~~- $output = "$tag$content</$element>";~~
481		~~- }~~
	476	+ $output = $tag;
482	477	}
483		~~- $state[$element][$marker] = $output;~~
	478	+ if( !$stripcomments && $element == '!--' ) {
	479	+ $commentState[$marker] = $output;
	480	+ } else {
	481	+ $state[$element][$marker] = $output;
	482	+ }
484	483	}
485	484
486	485	# Unstrip comments unless explicitly told otherwise.
487	486	# (The comments are always stripped prior to this point, so as to
488	487	# not invoke any extension tags / parser hooks contained within
489	488	# a comment.)
490		~~- if ( $stripcomments ) {~~
491		~~- // Add remaining comments to the state array~~
492		~~- foreach( $commentState as $marker => $content ) {~~
493		~~- $state['comment'][$marker] = $content;~~
494		~~- }~~
495		~~- } else {~~
	489	+ if ( !$stripcomments ) {
496	490	// Put them all back and forget them
497	491	$text = strtr( $text, $commentState );
498	492	}
Index: trunk/phase3/RELEASE-NOTES
—	—	@@ -405,6 +405,8 @@
406	406	further parsing (<ref>-style). There should no longer be surprise
407	407	expansion of foreign extensions inside HTML output, or differences
408	408	in behavior based on the order tags are loaded.
	409	+* (bug 885) Pre-save transform no longer silently appends close tags
	410	+* Pre-save transform no longer changes the case of close tags
409	411
410	412
411	413	== Compatibility ==

Follow-up revisions

Revision	Commit summary	Author	Date
r14586	Backport fixes and bump to 1.6.7...	brion	06:27, 6 June 2006

Status & tagging log

01:58, 13 October 2010 😂 (talk | contribs) changed the status of r14530 [removed: new added: old]