Index: trunk/phase3/includes/Parser.php |
— | — | @@ -311,20 +311,20 @@ |
312 | 312 | function getOptions() { return $this->mOptions; } |
313 | 313 | |
314 | 314 | /** |
315 | | - * Replaces all occurrences of <$tag>content</$tag> in the text |
316 | | - * with a random marker and returns the new text. the output parameter |
317 | | - * $content will be an associative array filled with data on the form |
318 | | - * $unique_marker => content. |
| 315 | + * Replaces all occurrences of HTML-style comments and the given tags |
| 316 | + * in the text with a random marker and returns teh next text. The output |
| 317 | + * parameter $matches will be an associative array filled with data in |
| 318 | + * the form: |
| 319 | + * 'UNIQ-xxxxx' => array( |
| 320 | + * 'element', |
| 321 | + * 'tag content', |
| 322 | + * array( 'param' => 'x' ), |
| 323 | + * '<element param="x">tag content</element>' ) ) |
319 | 324 | * |
320 | | - * If $content is already set, the additional entries will be appended |
321 | | - * If $tag is set to STRIP_COMMENTS, the function will extract |
322 | | - * <!-- HTML comments --> |
| 325 | + * @param $elements list of element names. Comments are always extracted. |
| 326 | + * @param $text Source text string. |
| 327 | + * @param $uniq_prefix |
323 | 328 | * |
324 | | - * $output: array( 'UNIQ-xxxxx' => array( |
325 | | - * 'element', |
326 | | - * 'tag content', |
327 | | - * array( 'param' => 'x' ), |
328 | | - * '<element param="x">' ) ) |
329 | 329 | * @private |
330 | 330 | * @static |
331 | 331 | */ |
— | — | @@ -334,58 +334,59 @@ |
335 | 335 | $stripped = ''; |
336 | 336 | $matches = array(); |
337 | 337 | |
338 | | - if( $elements == STRIP_COMMENTS ) { |
339 | | - $start = '/<!--()()/'; |
340 | | - } else { |
341 | | - $taglist = implode( '|', $elements ); |
342 | | - $start = "/<($taglist)(\\s+[^>]*|\\s*\/?)>/i"; |
343 | | - } |
| 338 | + $taglist = implode( '|', $elements ); |
| 339 | + $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i"; |
344 | 340 | |
345 | 341 | while ( '' != $text ) { |
346 | 342 | $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); |
347 | 343 | $stripped .= $p[0]; |
348 | | - if( count( $p ) < 4 ) { |
| 344 | + if( count( $p ) < 5 ) { |
349 | 345 | break; |
350 | 346 | } |
351 | | - $element = $p[1]; |
352 | | - $attributes = $p[2]; |
353 | | - $inside = $p[3]; |
354 | | - |
355 | | - // If $attributes ends with '/', we have an empty element tag, <tag /> |
356 | | - if( $element != '' && substr( $attributes, -1 ) == '/' ) { |
357 | | - $attributes = substr( $attributes, 0, -1); |
358 | | - $empty = '/'; |
| 347 | + if( count( $p ) > 5 ) { |
| 348 | + // comment |
| 349 | + $element = $p[4]; |
| 350 | + $attributes = ''; |
| 351 | + $close = ''; |
| 352 | + $inside = $p[5]; |
359 | 353 | } else { |
360 | | - $empty = ''; |
| 354 | + // tag |
| 355 | + $element = $p[1]; |
| 356 | + $attributes = $p[2]; |
| 357 | + $close = $p[3]; |
| 358 | + $inside = $p[4]; |
361 | 359 | } |
362 | 360 | |
363 | 361 | $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++); |
364 | 362 | $stripped .= $marker; |
365 | 363 | |
366 | | - if ( $empty === '/' ) { |
| 364 | + if ( $close === '/>' ) { |
367 | 365 | // Empty element tag, <tag /> |
368 | 366 | $content = null; |
369 | 367 | $text = $inside; |
| 368 | + $tail = null; |
370 | 369 | } else { |
371 | | - if( $element ) { |
372 | | - $end = "/<\\/$element\\s*>/i"; |
| 370 | + if( $element == '!--' ) { |
| 371 | + $end = '/(-->)/'; |
373 | 372 | } else { |
374 | | - $end = '/-->/'; |
| 373 | + $end = "/(<\\/$element\\s*>)/i"; |
375 | 374 | } |
376 | | - $q = preg_split( $end, $inside, 2 ); |
| 375 | + $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); |
377 | 376 | $content = $q[0]; |
378 | | - if( count( $q ) < 2 ) { |
| 377 | + if( count( $q ) < 3 ) { |
379 | 378 | # No end tag -- let it run out to the end of the text. |
| 379 | + $tail = ''; |
380 | 380 | $text = ''; |
381 | 381 | } else { |
382 | | - $text = $q[1]; |
| 382 | + $tail = $q[1]; |
| 383 | + $text = $q[2]; |
383 | 384 | } |
384 | 385 | } |
385 | 386 | |
386 | 387 | $matches[$marker] = array( $element, |
387 | 388 | $content, |
388 | 389 | Sanitizer::decodeTagAttributes( $attributes ), |
389 | | - "<$element$attributes$empty>" ); |
| 390 | + "<$element$attributes$close$content$tail" ); |
390 | 391 | } |
391 | 392 | return $stripped; |
392 | 393 | } |
— | — | @@ -409,6 +410,7 @@ |
410 | 411 | # Replace any instances of the placeholders |
411 | 412 | $uniq_prefix = $this->mUniqPrefix; |
412 | 413 | #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text ); |
| 414 | + $commentState = array(); |
413 | 415 | |
414 | 416 | $elements = array_merge( |
415 | 417 | array( 'nowiki', 'pre', 'gallery' ), |
— | — | @@ -422,27 +424,24 @@ |
423 | 425 | } |
424 | 426 | |
425 | 427 | |
426 | | - // Strip comments in a first pass. |
427 | | - // This saves us from needlessly rendering extensions in comment text |
428 | | - $text = Parser::extractTagsAndParams(STRIP_COMMENTS, $text, $comment_matches, $uniq_prefix); |
429 | | - $commentState = array(); |
430 | | - foreach( $comment_matches as $marker => $data ){ |
431 | | - list( $element, $content, $params, $tag ) = $data; |
432 | | - $commentState[$marker] = '<!--' . $content . '-->'; |
433 | | - } |
434 | | - |
435 | 428 | $matches = array(); |
436 | 429 | $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); |
437 | 430 | |
438 | 431 | foreach( $matches as $marker => $data ) { |
439 | 432 | list( $element, $content, $params, $tag ) = $data; |
440 | | - // Restore any comments; the extension can deal with them. |
441 | | - if( $content !== null) { |
442 | | - $content = strtr( $content, $commentState ); |
443 | | - } |
444 | 433 | if( $render ) { |
445 | 434 | $tagName = strtolower( $element ); |
446 | 435 | switch( $tagName ) { |
| 436 | + case '!--': |
| 437 | + // Comment |
| 438 | + if( substr( $tag, -3 ) == '-->' ) { |
| 439 | + $output = $tag; |
| 440 | + } else { |
| 441 | + // Unclosed comment in input. |
| 442 | + // Close it so later stripping can remove it |
| 443 | + $output = "$tag-->"; |
| 444 | + } |
| 445 | + break; |
447 | 446 | case 'html': |
448 | 447 | if( $wgRawHtml ) { |
449 | 448 | $output = $content; |
— | — | @@ -473,25 +472,20 @@ |
474 | 473 | } |
475 | 474 | } else { |
476 | 475 | // Just stripping tags; keep the source |
477 | | - if( $content === null ) { |
478 | | - $output = $tag; |
479 | | - } else { |
480 | | - $output = "$tag$content</$element>"; |
481 | | - } |
| 476 | + $output = $tag; |
482 | 477 | } |
483 | | - $state[$element][$marker] = $output; |
| 478 | + if( !$stripcomments && $element == '!--' ) { |
| 479 | + $commentState[$marker] = $output; |
| 480 | + } else { |
| 481 | + $state[$element][$marker] = $output; |
| 482 | + } |
484 | 483 | } |
485 | 484 | |
486 | 485 | # Unstrip comments unless explicitly told otherwise. |
487 | 486 | # (The comments are always stripped prior to this point, so as to |
488 | 487 | # not invoke any extension tags / parser hooks contained within |
489 | 488 | # a comment.) |
490 | | - if ( $stripcomments ) { |
491 | | - // Add remaining comments to the state array |
492 | | - foreach( $commentState as $marker => $content ) { |
493 | | - $state['comment'][$marker] = $content; |
494 | | - } |
495 | | - } else { |
| 489 | + if ( !$stripcomments ) { |
496 | 490 | // Put them all back and forget them |
497 | 491 | $text = strtr( $text, $commentState ); |
498 | 492 | } |
Index: trunk/phase3/RELEASE-NOTES |
— | — | @@ -405,6 +405,8 @@ |
406 | 406 | further parsing (<ref>-style). There should no longer be surprise |
407 | 407 | expansion of foreign extensions inside HTML output, or differences |
408 | 408 | in behavior based on the order tags are loaded. |
| 409 | +* (bug 885) Pre-save transform no longer silently appends close tags |
| 410 | +* Pre-save transform no longer changes the case of close tags |
409 | 411 | |
410 | 412 | |
411 | 413 | == Compatibility == |