Index: trunk/phase3/maintenance/parserTests.txt |
— | — | @@ -301,6 +301,7 @@ |
302 | 302 | <cite> |
303 | 303 | <em> |
304 | 304 | </pre> |
| 305 | + |
305 | 306 | !! end |
306 | 307 | |
307 | 308 | ### |
Index: trunk/phase3/includes/Parser.php |
— | — | @@ -9,6 +9,7 @@ |
10 | 10 | /** */ |
11 | 11 | require_once( 'Sanitizer.php' ); |
12 | 12 | require_once( 'HttpFunctions.php' ); |
| 13 | +require_once( 'ImageGallery.php' ); |
13 | 14 | |
14 | 15 | /** |
15 | 16 | * Update this version number when the ParserOutput format |
— | — | @@ -319,63 +320,60 @@ |
320 | 321 | * If $tag is set to STRIP_COMMENTS, the function will extract |
321 | 322 | * <!-- HTML comments --> |
322 | 323 | * |
| 324 | + * $output: array( 'UNIQ-xxxxx' => array( |
| 325 | + * 'element', |
| 326 | + * 'tag content', |
| 327 | + * array( 'param' => 'x' ), |
| 328 | + * '<element param="x">' ) ) |
323 | 329 | * @private |
324 | 330 | * @static |
325 | 331 | */ |
326 | | - function extractTagsAndParams($tag, $text, &$content, &$tags, &$params, $uniq_prefix = ''){ |
327 | | - $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString(); |
328 | | - if ( !$content ) { |
329 | | - $content = array( ); |
330 | | - } |
| 332 | + function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ |
| 333 | + $rand = Parser::getRandomString(); |
331 | 334 | $n = 1; |
332 | 335 | $stripped = ''; |
| 336 | + $matches = array(); |
333 | 337 | |
334 | | - if ( !$tags ) { |
335 | | - $tags = array( ); |
336 | | - } |
337 | | - |
338 | | - if ( !$params ) { |
339 | | - $params = array( ); |
340 | | - } |
341 | | - |
342 | | - if( $tag == STRIP_COMMENTS ) { |
343 | | - $start = '/<!--()/'; |
344 | | - $end = '/-->/'; |
| 338 | + if( $elements == STRIP_COMMENTS ) { |
| 339 | + $start = '/<!--()()/'; |
345 | 340 | } else { |
346 | | - $start = "/<$tag(\\s+[^>]*|\\s*\/?)>/i"; |
347 | | - $end = "/<\\/$tag\\s*>/i"; |
| 341 | + $taglist = implode( '|', $elements ); |
| 342 | + $start = "/<($taglist)(\\s+[^>]*|\\s*\/?)>/i"; |
348 | 343 | } |
349 | 344 | |
350 | 345 | while ( '' != $text ) { |
351 | 346 | $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); |
352 | 347 | $stripped .= $p[0]; |
353 | | - if( count( $p ) < 3 ) { |
| 348 | + if( count( $p ) < 4 ) { |
354 | 349 | break; |
355 | 350 | } |
356 | | - $attributes = $p[1]; |
357 | | - $inside = $p[2]; |
| 351 | + $element = $p[1]; |
| 352 | + $attributes = $p[2]; |
| 353 | + $inside = $p[3]; |
358 | 354 | |
359 | 355 | // If $attributes ends with '/', we have an empty element tag, <tag /> |
360 | | - if( $tag != STRIP_COMMENTS && substr( $attributes, -1 ) == '/' ) { |
| 356 | + if( $element != '' && substr( $attributes, -1 ) == '/' ) { |
361 | 357 | $attributes = substr( $attributes, 0, -1); |
362 | 358 | $empty = '/'; |
363 | 359 | } else { |
364 | 360 | $empty = ''; |
365 | 361 | } |
366 | 362 | |
367 | | - $marker = $rnd . sprintf('%08X', $n++); |
| 363 | + $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++); |
368 | 364 | $stripped .= $marker; |
369 | 365 | |
370 | | - $tags[$marker] = "<$tag$attributes$empty>"; |
371 | | - $params[$marker] = Sanitizer::decodeTagAttributes( $attributes ); |
372 | | - |
373 | 366 | if ( $empty === '/' ) { |
374 | 367 | // Empty element tag, <tag /> |
375 | | - $content[$marker] = null; |
| 368 | + $content = null; |
376 | 369 | $text = $inside; |
377 | 370 | } else { |
| 371 | + if( $element ) { |
| 372 | + $end = "/<\\/$element\\s*>/i"; |
| 373 | + } else { |
| 374 | + $end = '/-->/'; |
| 375 | + } |
378 | 376 | $q = preg_split( $end, $inside, 2 ); |
379 | | - $content[$marker] = $q[0]; |
| 377 | + $content = $q[0]; |
380 | 378 | if( count( $q ) < 2 ) { |
381 | 379 | # No end tag -- let it run out to the end of the text. |
382 | 380 | break; |
— | — | @@ -383,27 +381,16 @@ |
384 | 382 | $text = $q[1]; |
385 | 383 | } |
386 | 384 | } |
| 385 | + |
| 386 | + $matches[$marker] = array( $element, |
| 387 | + $content, |
| 388 | + Sanitizer::decodeTagAttributes( $attributes ), |
| 389 | + "<$element$attributes$empty>" ); |
387 | 390 | } |
388 | 391 | return $stripped; |
389 | 392 | } |
390 | 393 | |
391 | 394 | /** |
392 | | - * Wrapper function for extractTagsAndParams |
393 | | - * for cases where $tags and $params isn't needed |
394 | | - * i.e. where tags will never have params, like <nowiki> |
395 | | - * |
396 | | - * @private |
397 | | - * @static |
398 | | - */ |
399 | | - function extractTags( $tag, $text, &$content, $uniq_prefix = '' ) { |
400 | | - $dummy_tags = array(); |
401 | | - $dummy_params = array(); |
402 | | - |
403 | | - return Parser::extractTagsAndParams( $tag, $text, $content, |
404 | | - $dummy_tags, $dummy_params, $uniq_prefix ); |
405 | | - } |
406 | | - |
407 | | - /** |
408 | 395 | * Strips and renders nowiki, pre, math, hiero |
409 | 396 | * If $render is set, performs necessary rendering operations on plugins |
410 | 397 | * Returns the text, and fills an array with data needed in unstrip() |
— | — | @@ -418,124 +405,102 @@ |
419 | 406 | */ |
420 | 407 | function strip( $text, &$state, $stripcomments = false ) { |
421 | 408 | $render = ($this->mOutputType == OT_HTML); |
422 | | - $html_content = array(); |
423 | | - $nowiki_content = array(); |
424 | | - $math_content = array(); |
425 | | - $pre_content = array(); |
426 | | - $comment_content = array(); |
427 | | - $ext_content = array(); |
428 | | - $ext_tags = array(); |
429 | | - $ext_params = array(); |
430 | | - $gallery_content = array(); |
431 | 409 | |
432 | 410 | # Replace any instances of the placeholders |
433 | 411 | $uniq_prefix = $this->mUniqPrefix; |
434 | 412 | #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text ); |
435 | | - |
436 | | - # html |
| 413 | + |
| 414 | + $elements = array_merge( |
| 415 | + array( 'nowiki', 'pre', 'gallery' ), |
| 416 | + array_keys( $this->mTagHooks ) ); |
437 | 417 | global $wgRawHtml; |
438 | 418 | if( $wgRawHtml ) { |
439 | | - $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix); |
440 | | - foreach( $html_content as $marker => $content ) { |
441 | | - if ($render ) { |
442 | | - # Raw and unchecked for validity. |
443 | | - $state['html'][$marker] = $content; |
444 | | - } else { |
445 | | - $state['html'][$marker] = '<html>'.$content.'</html>'; |
446 | | - } |
447 | | - } |
| 419 | + $elements[] = 'html'; |
448 | 420 | } |
449 | | - |
450 | | - # nowiki |
451 | | - $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix); |
452 | | - foreach( $nowiki_content as $marker => $content ) { |
453 | | - if( $render ){ |
454 | | - $state['nowiki'][$marker] = wfEscapeHTMLTagsOnly( $content ); |
455 | | - } else { |
456 | | - $state['nowiki'][$marker] = '<nowiki>'.$content.'</nowiki>'; |
457 | | - } |
458 | | - } |
459 | | - |
460 | | - # math |
461 | 421 | if( $this->mOptions->getUseTeX() ) { |
462 | | - $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix); |
463 | | - foreach( $math_content as $marker => $content ){ |
464 | | - if( $render ) { |
465 | | - $state['math'][$marker] = renderMath( $content ); |
466 | | - } else { |
467 | | - $state['math'][$marker] = '<math>'.$content.'</math>'; |
468 | | - } |
469 | | - } |
| 422 | + $elements[] = 'math'; |
470 | 423 | } |
| 424 | + |
471 | 425 | |
472 | | - # pre |
473 | | - $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix); |
474 | | - foreach( $pre_content as $marker => $content ){ |
475 | | - if( $render ){ |
476 | | - $state['pre'][$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>'; |
477 | | - } else { |
478 | | - $state['pre'][$marker] = '<pre>'.$content.'</pre>'; |
479 | | - } |
| 426 | + // Strip comments in a first pass. |
| 427 | + // This saves us from needlessly rendering extensions in comment text |
| 428 | + $text = Parser::extractTagsAndParams(STRIP_COMMENTS, $text, $comment_matches, $uniq_prefix); |
| 429 | + $commentState = array(); |
| 430 | + foreach( $comment_matches as $marker => $data ){ |
| 431 | + list( $element, $content, $params, $tag ) = $data; |
| 432 | + $commentState[$marker] = '<!--' . $content . '-->'; |
480 | 433 | } |
481 | | - |
482 | | - # gallery |
483 | | - $text = Parser::extractTags('gallery', $text, $gallery_content, $uniq_prefix); |
484 | | - foreach( $gallery_content as $marker => $content ) { |
485 | | - require_once( 'ImageGallery.php' ); |
486 | | - if ( $render ) { |
487 | | - $state['gallery'][$marker] = $this->renderImageGallery( $content ); |
488 | | - } else { |
489 | | - $state['gallery'][$marker] = '<gallery>'.$content.'</gallery>'; |
| 434 | + |
| 435 | + $matches = array(); |
| 436 | + $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); |
| 437 | + |
| 438 | + foreach( $matches as $marker => $data ) { |
| 439 | + list( $element, $content, $params, $tag ) = $data; |
| 440 | + // Restore any comments; the extension can deal with them. |
| 441 | + if( $content !== null) { |
| 442 | + $content = strtr( $content, $commentState ); |
490 | 443 | } |
491 | | - } |
492 | | - |
493 | | - # Comments |
494 | | - $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix); |
495 | | - foreach( $comment_content as $marker => $content ){ |
496 | | - $comment_content[$marker] = '<!--'.$content.'-->'; |
497 | | - } |
498 | | - |
499 | | - # Extensions |
500 | | - foreach ( $this->mTagHooks as $tag => $callback ) { |
501 | | - $ext_content[$tag] = array(); |
502 | | - $text = Parser::extractTagsAndParams( $tag, $text, $ext_content[$tag], |
503 | | - $ext_tags[$tag], $ext_params[$tag], $uniq_prefix ); |
504 | | - foreach( $ext_content[$tag] as $marker => $content ) { |
505 | | - $full_tag = $ext_tags[$tag][$marker]; |
506 | | - $params = $ext_params[$tag][$marker]; |
507 | | - if ( $render ) |
508 | | - $state[$tag][$marker] = call_user_func_array( $callback, array( $content, $params, $this ) ); |
509 | | - else { |
510 | | - if ( is_null( $content ) ) { |
511 | | - // Empty element tag |
512 | | - $state[$tag][$marker] = $full_tag; |
| 444 | + if( $render ) { |
| 445 | + switch( $element ) { |
| 446 | + case 'html': |
| 447 | + if( $wgRawHtml ) { |
| 448 | + $output = $content; |
| 449 | + break; |
| 450 | + } |
| 451 | + // Shouldn't happen otherwise. :) |
| 452 | + case 'nowiki': |
| 453 | + $output = wfEscapeHTMLTagsOnly( $content ); |
| 454 | + break; |
| 455 | + case 'math': |
| 456 | + $output = renderMath( $content ); |
| 457 | + break; |
| 458 | + case 'pre': |
| 459 | + // Backwards-compatibility hack |
| 460 | + $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $content ); |
| 461 | + $output = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>'; |
| 462 | + break; |
| 463 | + case 'gallery': |
| 464 | + $output = $this->renderImageGallery( $content ); |
| 465 | + break; |
| 466 | + default: |
| 467 | + $tagName = strtolower( $element ); |
| 468 | + if( isset( $this->mTagHooks[$tagName] ) ) { |
| 469 | + $output = call_user_func_array( $this->mTagHooks[$tagName], |
| 470 | + array( $content, $params, $this ) ); |
513 | 471 | } else { |
514 | | - $state[$tag][$marker] = "$full_tag$content</$tag>"; |
| 472 | + wfDebugDieBacktrace( "Invalid call hook $element" ); |
515 | 473 | } |
516 | 474 | } |
| 475 | + } else { |
| 476 | + // Just stripping tags; keep the source |
| 477 | + if( $content === null ) { |
| 478 | + $output = $tag; |
| 479 | + } else { |
| 480 | + $output = "$tag$content</$element>"; |
| 481 | + } |
517 | 482 | } |
| 483 | + $state[$element][$marker] = $output; |
518 | 484 | } |
519 | 485 | |
520 | 486 | # Unstrip comments unless explicitly told otherwise. |
521 | 487 | # (The comments are always stripped prior to this point, so as to |
522 | 488 | # not invoke any extension tags / parser hooks contained within |
523 | 489 | # a comment.) |
524 | | - if ( !$stripcomments ) { |
525 | | - $tempstate = array( 'comment' => $comment_content ); |
526 | | - $text = $this->unstrip( $text, $tempstate ); |
527 | | - $comment_content = array(); |
528 | | - } else { |
529 | | - if( !isset( $state['comment'] ) ) { |
530 | | - $state['comment'] = array(); |
| 490 | + if ( $stripcomments ) { |
| 491 | + // Add remaining comments to the state array |
| 492 | + foreach( $commentState as $marker => $content ) { |
| 493 | + $state['comment'][$marker] = $content; |
531 | 494 | } |
532 | | - $state['comment'] += $comment_content; |
| 495 | + } else { |
| 496 | + // Put them all back and forget them |
| 497 | + $text = strtr( $text, $commentState ); |
533 | 498 | } |
534 | 499 | |
535 | 500 | return $text; |
536 | 501 | } |
537 | 502 | |
538 | 503 | /** |
539 | | - * restores pre, math, and hiero removed by strip() |
| 504 | + * Restores pre, math, and other extensions removed by strip() |
540 | 505 | * |
541 | 506 | * always call unstripNoWiki() after this one |
542 | 507 | * @private |
— | — | @@ -545,20 +510,21 @@ |
546 | 511 | return $text; |
547 | 512 | } |
548 | 513 | |
549 | | - # Must expand in reverse order, otherwise nested tags will be corrupted |
550 | | - foreach( array_reverse( $state, true ) as $tag => $contentDict ) { |
| 514 | + $replacements = array(); |
| 515 | + foreach( $state as $tag => $contentDict ) { |
551 | 516 | if( $tag != 'nowiki' && $tag != 'html' ) { |
552 | | - foreach( array_reverse( $contentDict, true ) as $uniq => $content ) { |
553 | | - $text = str_replace( $uniq, $content, $text ); |
| 517 | + foreach( $contentDict as $uniq => $content ) { |
| 518 | + $replacements[$uniq] = $content; |
554 | 519 | } |
555 | 520 | } |
556 | 521 | } |
| 522 | + $text = strtr( $text, $replacements ); |
557 | 523 | |
558 | 524 | return $text; |
559 | 525 | } |
560 | 526 | |
561 | 527 | /** |
562 | | - * always call this after unstrip() to preserve the order |
| 528 | + * Always call this after unstrip() to preserve the order |
563 | 529 | * |
564 | 530 | * @private |
565 | 531 | */ |
— | — | @@ -567,18 +533,15 @@ |
568 | 534 | return $text; |
569 | 535 | } |
570 | 536 | |
571 | | - # Must expand in reverse order, otherwise nested tags will be corrupted |
572 | | - if( isset( $state['nowiki'] ) ) |
573 | | - foreach( array_reverse( $state['nowiki'], true ) as $uniq => $content ) { |
574 | | - $text = str_replace( $uniq, $content, $text ); |
| 537 | + $replacements = array(); |
| 538 | + foreach( $state as $tag => $contentDict ) { |
| 539 | + if( $tag == 'nowiki' || $tag == 'html' ) { |
| 540 | + foreach( $contentDict as $uniq => $content ) { |
| 541 | + $replacements[$uniq] = $content; |
| 542 | + } |
575 | 543 | } |
576 | | - |
577 | | - global $wgRawHtml; |
578 | | - if ($wgRawHtml && isset( $state['html'] ) ) { |
579 | | - foreach( array_reverse( $state['html'], true ) as $uniq => $content ) { |
580 | | - $text = str_replace( $uniq, $content, $text ); |
581 | | - } |
582 | 544 | } |
| 545 | + $text = strtr( $text, $replacements ); |
583 | 546 | |
584 | 547 | return $text; |
585 | 548 | } |
— | — | @@ -593,14 +556,7 @@ |
594 | 557 | function insertStripItem( $text, &$state ) { |
595 | 558 | $rnd = $this->mUniqPrefix . '-item' . Parser::getRandomString(); |
596 | 559 | if ( !$state ) { |
597 | | - $state = array( |
598 | | - 'html' => array(), |
599 | | - 'nowiki' => array(), |
600 | | - 'math' => array(), |
601 | | - 'pre' => array(), |
602 | | - 'comment' => array(), |
603 | | - 'gallery' => array(), |
604 | | - ); |
| 560 | + $state = array(); |
605 | 561 | } |
606 | 562 | $state['item'][$rnd] = $text; |
607 | 563 | return $rnd; |
Index: trunk/phase3/RELEASE-NOTES |
— | — | @@ -398,6 +398,13 @@ |
399 | 399 | * parserTests.php accepts a --file parameter to run an alternate test sutie |
400 | 400 | * parser tests can now test extensions using !!hooks sections |
401 | 401 | * Fix oddity with open tag parameters getting stuck on </li> |
| 402 | +* (bug 5384) Fix <!-- comments --> in <ref> extension |
| 403 | +* Nesting of different tag extensions and comments should now work more |
| 404 | + consistently and more safely. A cleaner, one-pass tag strip lets the |
| 405 | + 'outer' tag either take source (<nowiki>-style) or pass it down to |
| 406 | + further parsing (<ref>-style). There should no longer be surprise |
| 407 | + expansion of foreign extensions inside HTML output, or differences |
| 408 | + in behavior based on the order tags are loaded. |
402 | 409 | |
403 | 410 | |
404 | 411 | == Compatibility == |