r14511 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r14510‎ \| r14511 \| r14512 >
Date:	06:16, 1 June 2006
Author:	brion
Status:	old
Tags:
Comment:	* (bug 5384) Fix <!-- comments --> in <ref> extension * Nesting of different tag extensions and comments should now work more consistently and more safely. A cleaner, one-pass tag strip lets the 'outer' tag either take source (<nowiki>-style) or pass it down to further parsing (<ref>-style). There should no longer be surprise expansion of foreign extensions inside HTML output, or differences in behavior based on the order tags are loaded.
Modified paths:	/trunk/phase3/RELEASE-NOTES (modified) (history) /trunk/phase3/includes/Parser.php (modified) (history) /trunk/phase3/maintenance/parserTests.txt (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/parserTests.txt
—	—	@@ -301,6 +301,7 @@
302	302	<cite>
303	303	<em>
304	304	</pre>
	305	+
305	306	!! end
306	307
307	308	###
Index: trunk/phase3/includes/Parser.php
—	—	@@ -9,6 +9,7 @@
10	10	/** */
11	11	require_once( 'Sanitizer.php' );
12	12	require_once( 'HttpFunctions.php' );
	13	+require_once( 'ImageGallery.php' );
13	14
14	15	/**
15	16	* Update this version number when the ParserOutput format
—	—	@@ -319,63 +320,60 @@
320	321	* If $tag is set to STRIP_COMMENTS, the function will extract
321	322	* <!-- HTML comments -->
322	323	*
	324	+ * $output: array( 'UNIQ-xxxxx' => array(
	325	+ * 'element',
	326	+ * 'tag content',
	327	+ * array( 'param' => 'x' ),
	328	+ * '<element param="x">' ) )
323	329	* @private
324	330	* @static
325	331	*/
326		~~- function extractTagsAndParams($tag, $text, &$content, &$tags, &$params, $uniq_prefix = ''){~~
327		~~- $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();~~
328		~~- if ( !$content ) {~~
329		~~- $content = array( );~~
330		~~- }~~
	332	+ function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
	333	+ $rand = Parser::getRandomString();
331	334	$n = 1;
332	335	$stripped = '';
	336	+ $matches = array();
333	337
334		~~- if ( !$tags ) {~~
335		~~- $tags = array( );~~
336		~~- }~~
337		-
338		~~- if ( !$params ) {~~
339		~~- $params = array( );~~
340		~~- }~~
341		-
342		~~- if( $tag == STRIP_COMMENTS ) {~~
343		~~- $start = '/<!--()/';~~
344		~~- $end = '/-->/';~~
	338	+ if( $elements == STRIP_COMMENTS ) {
	339	+ $start = '/<!--()()/';
345	340	} else {
346		~~- $start = "/<$tag(\\s+[^>]\|\\s\/?)>/i";~~
347		~~- $end = "/<\\/$tag\\s*>/i";~~
	341	+ $taglist = implode( '\|', $elements );
	342	+ $start = "/<($taglist)(\\s+[^>]\|\\s\/?)>/i";
348	343	}
349	344
350	345	while ( '' != $text ) {
351	346	$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
352	347	$stripped .= $p[0];
353		~~- if( count( $p ) < 3 ) {~~
	348	+ if( count( $p ) < 4 ) {
354	349	break;
355	350	}
356		~~- $attributes = $p[1];~~
357		~~- $inside = $p[2];~~
	351	+ $element = $p[1];
	352	+ $attributes = $p[2];
	353	+ $inside = $p[3];
358	354
359	355	// If $attributes ends with '/', we have an empty element tag, <tag />
360		~~- if( $tag != STRIP_COMMENTS && substr( $attributes, -1 ) == '/' ) {~~
	356	+ if( $element != '' && substr( $attributes, -1 ) == '/' ) {
361	357	$attributes = substr( $attributes, 0, -1);
362	358	$empty = '/';
363	359	} else {
364	360	$empty = '';
365	361	}
366	362
367		~~- $marker = $rnd . sprintf('%08X', $n++);~~
	363	+ $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++);
368	364	$stripped .= $marker;
369	365
370		~~- $tags[$marker] = "<$tag$attributes$empty>";~~
371		~~- $params[$marker] = Sanitizer::decodeTagAttributes( $attributes );~~
372		-
373	366	if ( $empty === '/' ) {
374	367	// Empty element tag, <tag />
375		~~- $content[$marker] = null;~~
	368	+ $content = null;
376	369	$text = $inside;
377	370	} else {
	371	+ if( $element ) {
	372	+ $end = "/<\\/$element\\s*>/i";
	373	+ } else {
	374	+ $end = '/-->/';
	375	+ }
378	376	$q = preg_split( $end, $inside, 2 );
379		~~- $content[$marker] = $q[0];~~
	377	+ $content = $q[0];
380	378	if( count( $q ) < 2 ) {
381	379	# No end tag -- let it run out to the end of the text.
382	380	break;
—	—	@@ -383,27 +381,16 @@
384	382	$text = $q[1];
385	383	}
386	384	}
	385	+
	386	+ $matches[$marker] = array( $element,
	387	+ $content,
	388	+ Sanitizer::decodeTagAttributes( $attributes ),
	389	+ "<$element$attributes$empty>" );
387	390	}
388	391	return $stripped;
389	392	}
390	393
391	394	/**
392		~~- * Wrapper function for extractTagsAndParams~~
393		~~- * for cases where $tags and $params isn't needed~~
394		~~- * i.e. where tags will never have params, like <nowiki>~~
395		- *
396		~~- * @private~~
397		~~- * @static~~
398		~~- */~~
399		~~- function extractTags( $tag, $text, &$content, $uniq_prefix = '' ) {~~
400		~~- $dummy_tags = array();~~
401		~~- $dummy_params = array();~~
402		-
403		~~- return Parser::extractTagsAndParams( $tag, $text, $content,~~
404		~~- $dummy_tags, $dummy_params, $uniq_prefix );~~
405		~~- }~~
406		-
407		- /**
408	395	* Strips and renders nowiki, pre, math, hiero
409	396	* If $render is set, performs necessary rendering operations on plugins
410	397	* Returns the text, and fills an array with data needed in unstrip()
—	—	@@ -418,124 +405,102 @@
419	406	*/
420	407	function strip( $text, &$state, $stripcomments = false ) {
421	408	$render = ($this->mOutputType == OT_HTML);
422		~~- $html_content = array();~~
423		~~- $nowiki_content = array();~~
424		~~- $math_content = array();~~
425		~~- $pre_content = array();~~
426		~~- $comment_content = array();~~
427		~~- $ext_content = array();~~
428		~~- $ext_tags = array();~~
429		~~- $ext_params = array();~~
430		~~- $gallery_content = array();~~
431	409
432	410	# Replace any instances of the placeholders
433	411	$uniq_prefix = $this->mUniqPrefix;
434	412	#$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
435		-
436		~~- # html~~
	413	+
	414	+ $elements = array_merge(
	415	+ array( 'nowiki', 'pre', 'gallery' ),
	416	+ array_keys( $this->mTagHooks ) );
437	417	global $wgRawHtml;
438	418	if( $wgRawHtml ) {
439		~~- $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);~~
440		~~- foreach( $html_content as $marker => $content ) {~~
441		~~- if ($render ) {~~
442		~~- # Raw and unchecked for validity.~~
443		~~- $state['html'][$marker] = $content;~~
444		~~- } else {~~
445		~~- $state['html'][$marker] = '<html>'.$content.'</html>';~~
446		~~- }~~
447		~~- }~~
	419	+ $elements[] = 'html';
448	420	}
449		-
450		~~- # nowiki~~
451		~~- $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);~~
452		~~- foreach( $nowiki_content as $marker => $content ) {~~
453		~~- if( $render ){~~
454		~~- $state['nowiki'][$marker] = wfEscapeHTMLTagsOnly( $content );~~
455		~~- } else {~~
456		~~- $state['nowiki'][$marker] = '<nowiki>'.$content.'</nowiki>';~~
457		~~- }~~
458		~~- }~~
459		-
460		~~- # math~~
461	421	if( $this->mOptions->getUseTeX() ) {
462		~~- $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);~~
463		~~- foreach( $math_content as $marker => $content ){~~
464		~~- if( $render ) {~~
465		~~- $state['math'][$marker] = renderMath( $content );~~
466		~~- } else {~~
467		~~- $state['math'][$marker] = '<math>'.$content.'</math>';~~
468		~~- }~~
469		~~- }~~
	422	+ $elements[] = 'math';
470	423	}
	424	+
471	425
472		~~- # pre~~
473		~~- $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);~~
474		~~- foreach( $pre_content as $marker => $content ){~~
475		~~- if( $render ){~~
476		~~- $state['pre'][$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';~~
477		~~- } else {~~
478		~~- $state['pre'][$marker] = '<pre>'.$content.'</pre>';~~
479		~~- }~~
	426	+ // Strip comments in a first pass.
	427	+ // This saves us from needlessly rendering extensions in comment text
	428	+ $text = Parser::extractTagsAndParams(STRIP_COMMENTS, $text, $comment_matches, $uniq_prefix);
	429	+ $commentState = array();
	430	+ foreach( $comment_matches as $marker => $data ){
	431	+ list( $element, $content, $params, $tag ) = $data;
	432	+ $commentState[$marker] = '<!--' . $content . '-->';
480	433	}
481		-
482		~~- # gallery~~
483		~~- $text = Parser::extractTags('gallery', $text, $gallery_content, $uniq_prefix);~~
484		~~- foreach( $gallery_content as $marker => $content ) {~~
485		~~- require_once( 'ImageGallery.php' );~~
486		~~- if ( $render ) {~~
487		~~- $state['gallery'][$marker] = $this->renderImageGallery( $content );~~
488		~~- } else {~~
489		~~- $state['gallery'][$marker] = '<gallery>'.$content.'</gallery>';~~
	434	+
	435	+ $matches = array();
	436	+ $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
	437	+
	438	+ foreach( $matches as $marker => $data ) {
	439	+ list( $element, $content, $params, $tag ) = $data;
	440	+ // Restore any comments; the extension can deal with them.
	441	+ if( $content !== null) {
	442	+ $content = strtr( $content, $commentState );
490	443	}
491		~~- }~~
492		-
493		~~- # Comments~~
494		~~- $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);~~
495		~~- foreach( $comment_content as $marker => $content ){~~
496		~~- $comment_content[$marker] = '<!--'.$content.'-->';~~
497		~~- }~~
498		-
499		~~- # Extensions~~
500		~~- foreach ( $this->mTagHooks as $tag => $callback ) {~~
501		~~- $ext_content[$tag] = array();~~
502		~~- $text = Parser::extractTagsAndParams( $tag, $text, $ext_content[$tag],~~
503		~~- $ext_tags[$tag], $ext_params[$tag], $uniq_prefix );~~
504		~~- foreach( $ext_content[$tag] as $marker => $content ) {~~
505		~~- $full_tag = $ext_tags[$tag][$marker];~~
506		~~- $params = $ext_params[$tag][$marker];~~
507		~~- if ( $render )~~
508		~~- $state[$tag][$marker] = call_user_func_array( $callback, array( $content, $params, $this ) );~~
509		~~- else {~~
510		~~- if ( is_null( $content ) ) {~~
511		~~- // Empty element tag~~
512		~~- $state[$tag][$marker] = $full_tag;~~
	444	+ if( $render ) {
	445	+ switch( $element ) {
	446	+ case 'html':
	447	+ if( $wgRawHtml ) {
	448	+ $output = $content;
	449	+ break;
	450	+ }
	451	+ // Shouldn't happen otherwise. :)
	452	+ case 'nowiki':
	453	+ $output = wfEscapeHTMLTagsOnly( $content );
	454	+ break;
	455	+ case 'math':
	456	+ $output = renderMath( $content );
	457	+ break;
	458	+ case 'pre':
	459	+ // Backwards-compatibility hack
	460	+ $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $content );
	461	+ $output = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
	462	+ break;
	463	+ case 'gallery':
	464	+ $output = $this->renderImageGallery( $content );
	465	+ break;
	466	+ default:
	467	+ $tagName = strtolower( $element );
	468	+ if( isset( $this->mTagHooks[$tagName] ) ) {
	469	+ $output = call_user_func_array( $this->mTagHooks[$tagName],
	470	+ array( $content, $params, $this ) );
513	471	} else {
514		~~- $state[$tag][$marker] = "$full_tag$content</$tag>";~~
	472	+ wfDebugDieBacktrace( "Invalid call hook $element" );
515	473	}
516	474	}
	475	+ } else {
	476	+ // Just stripping tags; keep the source
	477	+ if( $content === null ) {
	478	+ $output = $tag;
	479	+ } else {
	480	+ $output = "$tag$content</$element>";
	481	+ }
517	482	}
	483	+ $state[$element][$marker] = $output;
518	484	}
519	485
520	486	# Unstrip comments unless explicitly told otherwise.
521	487	# (The comments are always stripped prior to this point, so as to
522	488	# not invoke any extension tags / parser hooks contained within
523	489	# a comment.)
524		~~- if ( !$stripcomments ) {~~
525		~~- $tempstate = array( 'comment' => $comment_content );~~
526		~~- $text = $this->unstrip( $text, $tempstate );~~
527		~~- $comment_content = array();~~
528		~~- } else {~~
529		~~- if( !isset( $state['comment'] ) ) {~~
530		~~- $state['comment'] = array();~~
	490	+ if ( $stripcomments ) {
	491	+ // Add remaining comments to the state array
	492	+ foreach( $commentState as $marker => $content ) {
	493	+ $state['comment'][$marker] = $content;
531	494	}
532		~~- $state['comment'] += $comment_content;~~
	495	+ } else {
	496	+ // Put them all back and forget them
	497	+ $text = strtr( $text, $commentState );
533	498	}
534	499
535	500	return $text;
536	501	}
537	502
538	503	/**
539		~~- * restores pre, math, and hiero removed by strip()~~
	504	+ * Restores pre, math, and other extensions removed by strip()
540	505	*
541	506	* always call unstripNoWiki() after this one
542	507	* @private
—	—	@@ -545,20 +510,21 @@
546	511	return $text;
547	512	}
548	513
549		~~- # Must expand in reverse order, otherwise nested tags will be corrupted~~
550		~~- foreach( array_reverse( $state, true ) as $tag => $contentDict ) {~~
	514	+ $replacements = array();
	515	+ foreach( $state as $tag => $contentDict ) {
551	516	if( $tag != 'nowiki' && $tag != 'html' ) {
552		~~- foreach( array_reverse( $contentDict, true ) as $uniq => $content ) {~~
553		~~- $text = str_replace( $uniq, $content, $text );~~
	517	+ foreach( $contentDict as $uniq => $content ) {
	518	+ $replacements[$uniq] = $content;
554	519	}
555	520	}
556	521	}
	522	+ $text = strtr( $text, $replacements );
557	523
558	524	return $text;
559	525	}
560	526
561	527	/**
562		~~- * always call this after unstrip() to preserve the order~~
	528	+ * Always call this after unstrip() to preserve the order
563	529	*
564	530	* @private
565	531	*/
—	—	@@ -567,18 +533,15 @@
568	534	return $text;
569	535	}
570	536
571		~~- # Must expand in reverse order, otherwise nested tags will be corrupted~~
572		~~- if( isset( $state['nowiki'] ) )~~
573		~~- foreach( array_reverse( $state['nowiki'], true ) as $uniq => $content ) {~~
574		~~- $text = str_replace( $uniq, $content, $text );~~
	537	+ $replacements = array();
	538	+ foreach( $state as $tag => $contentDict ) {
	539	+ if( $tag == 'nowiki' \|\| $tag == 'html' ) {
	540	+ foreach( $contentDict as $uniq => $content ) {
	541	+ $replacements[$uniq] = $content;
	542	+ }
575	543	}
576		-
577		~~- global $wgRawHtml;~~
578		~~- if ($wgRawHtml && isset( $state['html'] ) ) {~~
579		~~- foreach( array_reverse( $state['html'], true ) as $uniq => $content ) {~~
580		~~- $text = str_replace( $uniq, $content, $text );~~
581		~~- }~~
582	544	}
	545	+ $text = strtr( $text, $replacements );
583	546
584	547	return $text;
585	548	}
—	—	@@ -593,14 +556,7 @@
594	557	function insertStripItem( $text, &$state ) {
595	558	$rnd = $this->mUniqPrefix . '-item' . Parser::getRandomString();
596	559	if ( !$state ) {
597		~~- $state = array(~~
598		~~- 'html' => array(),~~
599		~~- 'nowiki' => array(),~~
600		~~- 'math' => array(),~~
601		~~- 'pre' => array(),~~
602		~~- 'comment' => array(),~~
603		~~- 'gallery' => array(),~~
604		~~- );~~
	560	+ $state = array();
605	561	}
606	562	$state['item'][$rnd] = $text;
607	563	return $rnd;
Index: trunk/phase3/RELEASE-NOTES
—	—	@@ -398,6 +398,13 @@
399	399	* parserTests.php accepts a --file parameter to run an alternate test sutie
400	400	* parser tests can now test extensions using !!hooks sections
401	401	* Fix oddity with open tag parameters getting stuck on </li>
	402	+* (bug 5384) Fix <!-- comments --> in <ref> extension
	403	+* Nesting of different tag extensions and comments should now work more
	404	+ consistently and more safely. A cleaner, one-pass tag strip lets the
	405	+ 'outer' tag either take source (<nowiki>-style) or pass it down to
	406	+ further parsing (<ref>-style). There should no longer be surprise
	407	+ expansion of foreign extensions inside HTML output, or differences
	408	+ in behavior based on the order tags are loaded.
402	409
403	410
404	411	== Compatibility ==

Follow-up revisions

Revision	Commit summary	Author	Date
r14586	Backport fixes and bump to 1.6.7...	brion	06:27, 6 June 2006

Status & tagging log

01:58, 13 October 2010 😂 (talk | contribs) changed the status of r14511 [removed: new added: old]