Index: trunk/extensions/RegexFun/RELEASE-NOTES |
— | — | @@ -1,5 +1,12 @@ |
2 | 2 | Changelog: |
3 | 3 | ========== |
| 4 | + |
| 5 | + * (trunk) -- Version 1.1 |
| 6 | + - '#regex' in replace mode with 'e' flag doesn't require escaped replacement string anymore. |
| 7 | + Instead of something like "{{((}}#if:$2{{!}}$2{{!}}$1{{))}}", "{{#if:$2|$2|$1}}" can be used. |
| 8 | + - '#regex' with 'e' flag now first escapes matches before putting them into the back-references |
| 9 | + and expanding them. Therefore a new configuration variable '$egRegexFunExpansionEscapeTemplates' |
| 10 | + has been introduced which serves as a workaround for MW bug #32829. |
4 | 11 | |
5 | 12 | * December 5, 2011 -- Version 1.0.2 |
6 | 13 | - Limit won't exceed early when 'e' flag with many backrefs in replacement is used extensivelly. |
Index: trunk/extensions/RegexFun/RegexFun.php |
— | — | @@ -8,7 +8,7 @@ |
9 | 9 | * Support: http://www.mediawiki.org/wiki/Extension_talk:Regex_Fun |
10 | 10 | * Source code: http://svn.wikimedia.org/viewvc/mediawiki/trunk/extensions/RegexFun |
11 | 11 | * |
12 | | - * @version: 1.0.2 |
| 12 | + * @version: 1.1 |
13 | 13 | * @license: ISC license |
14 | 14 | * @author: Daniel Werner < danweetz@web.de > |
15 | 15 | * |
— | — | @@ -57,29 +57,28 @@ |
58 | 58 | * |
59 | 59 | * @var string |
60 | 60 | */ |
61 | | - const VERSION = '1.0.2'; |
| 61 | + const VERSION = '1.1'; |
62 | 62 | |
63 | 63 | /** |
64 | 64 | * Sets up parser functions |
65 | 65 | */ |
66 | 66 | public static function init( Parser &$parser ) { |
67 | | - self::initFunction( $parser, 'regex' ); |
| 67 | + self::initFunction( $parser, 'regex', SFH_OBJECT_ARGS ); |
68 | 68 | self::initFunction( $parser, 'regex_var' ); |
69 | 69 | self::initFunction( $parser, 'regexquote' ); |
70 | 70 | self::initFunction( $parser, 'regexall' ); |
71 | 71 | |
72 | 72 | return true; |
73 | 73 | } |
74 | | - private static function initFunction( Parser &$parser, $name, $functionCallback = null ) { |
75 | | - if( $functionCallback === null ) { |
76 | | - $functionCallback = array( __CLASS__, "pf_{$name}" ); |
77 | | - } |
78 | | - |
| 74 | + private static function initFunction( Parser &$parser, $name, $flags = 0 ) { |
79 | 75 | global $egRegexFunDisabledFunctions; |
80 | 76 | |
81 | 77 | // only register function if not disabled by configuration |
82 | 78 | if( ! in_array( $name, $egRegexFunDisabledFunctions ) ) { |
83 | | - $parser->setFunctionHook( $name, $functionCallback ); |
| 79 | + // all parser functions with prefix: |
| 80 | + $prefix = ( $flags & SFH_OBJECT_ARGS ) ? 'pfObj_' : 'pf_'; |
| 81 | + $functionCallback = array( __CLASS__, $prefix . $name ); |
| 82 | + $parser->setFunctionHook( $name, $functionCallback, $flags ); |
84 | 83 | } |
85 | 84 | } |
86 | 85 | |
— | — | @@ -246,15 +245,24 @@ |
247 | 246 | |
248 | 247 | /** |
249 | 248 | * Performs a regular expression search or replacement |
| 249 | + * Syntax: |
| 250 | + * {{#regex: subject |pattern |replacement |limit }} |
250 | 251 | * |
251 | | - * @param $parser Parser instance of running Parse |
252 | | - * @param $subject String input string to evaluate |
253 | | - * @param $pattern String regular expression pattern - must use /, | or % delimiter |
254 | | - * @param $replacement String regular expression replacement |
| 252 | + * subject: input string to evaluate |
| 253 | + * pattern: regular expression pattern - can use any valid preg delimiter, e.g. /, | or % and various modifiers |
| 254 | + * replacement: regular expression replacement (optional) |
| 255 | + * limit: max number of how many matches should be replaced (optional) |
255 | 256 | * |
256 | 257 | * @return String Result of replacing pattern with replacement in string, or matching text if replacement was omitted |
257 | 258 | */ |
258 | | - public static function pf_regex( Parser &$parser, $subject = '', $pattern = '', $replacement = null, $limit = -1 ) { |
| 259 | + //public static function pf_regex( Parser &$parser, $subject = '', $pattern = '', $replacement = null, $limit = -1 ) { |
| 260 | + public static function pfObj_regex( Parser &$parser, PPFrame $frame, array $args ) { |
| 261 | + // Get Parameters |
| 262 | + $subject = isset( $args[0] ) ? trim( $frame->expand( $args[0] ) ) : ''; |
| 263 | + $pattern = isset( $args[1] ) ? trim( $frame->expand( $args[1] ) ) : ''; |
| 264 | + $replacement = isset( $args[2] ) ? $args[2] : null; // unexpanded replacement in case 'e' flag is used |
| 265 | + $limit = isset( $args[3] ) ? trim( $frame->expand( $args[3] ) ) : -1; |
| 266 | + |
259 | 267 | // check whether limit exceeded: |
260 | 268 | if( self::limitExceeded( $parser ) ) { |
261 | 269 | return self::msgLimitExceeded(); |
— | — | @@ -282,7 +290,7 @@ |
283 | 291 | self::setLastMatches( $parser, false ); |
284 | 292 | |
285 | 293 | // do the regex plus all handling of special flags and validation |
286 | | - $output = self::doPregReplace( $pattern, $replacement, $subject, $limit, $parser ); |
| 294 | + $output = self::doPregReplace( $pattern, $replacement, $subject, $limit, $parser, $frame ); |
287 | 295 | |
288 | 296 | if( $output === false ) { |
289 | 297 | // invalid regex, don't store any infor for '#regex_var' |
— | — | @@ -301,12 +309,16 @@ |
302 | 310 | /** |
303 | 311 | * 'preg_replace'-like function but can handle special modifiers 'e' and 'r'. |
304 | 312 | * |
305 | | - * @param string &$pattern |
306 | | - * @param string $replacement |
307 | | - * @param string $subject |
308 | | - * @param int $limit |
309 | | - * @param Parser &$parser if 'e' flag should be allowed, a parser object for parsing is required. |
310 | | - * @param array $allowedSpecialFlags all special flags that should be handled, by default 'e' and 'r'. |
| 313 | + * @since 1.1 |
| 314 | + * |
| 315 | + * @param string &$pattern |
| 316 | + * @param PPNode|string $replacement should be a PPNode in case 'e' flag might be used since in that |
| 317 | + * case the string will be expanded after back-refs are inserted. Otherwise string is ok. |
| 318 | + * @param string $subject |
| 319 | + * @param int $limit |
| 320 | + * @param Parser &$parser if 'e' flag should be allowed, a parser object for parsing is required. |
| 321 | + * @param PPFrame $frame which keeps template parameters which should be used in case 'e' flag is set. |
| 322 | + * @param array $allowedSpecialFlags all special flags that should be handled, by default 'e' and 'r'. |
311 | 323 | */ |
312 | 324 | public static function doPregReplace( |
313 | 325 | $pattern, // not by value in here! |
— | — | @@ -314,10 +326,11 @@ |
315 | 327 | $subject, |
316 | 328 | $limit = -1, |
317 | 329 | &$parser = null, |
| 330 | + $frame = null, |
318 | 331 | array $allowedSpecialFlags = array( |
319 | 332 | self::FLAG_REPLACEMENT_PARSE, |
320 | 333 | self::FLAG_NO_REPLACE_NO_OUT, |
321 | | - ) |
| 334 | + ) |
322 | 335 | ) { |
323 | 336 | static $lastPattern = null; |
324 | 337 | static $activePattern = null; |
— | — | @@ -352,6 +365,11 @@ |
353 | 366 | $pattern = $activePattern; |
354 | 367 | } |
355 | 368 | |
| 369 | + // make sure we have a frame to expand the $replacement (necessary for 'e' flag support!) |
| 370 | + if( $frame === null ) { |
| 371 | + // new frame without template parameters then |
| 372 | + $frame = $parser->getPreprocessor()->newCustomFrame( array() ); |
| 373 | + } |
356 | 374 | |
357 | 375 | // FLAG 'e' (parse replace after match) handling: |
358 | 376 | if( ! empty( $specialFlags[ self::FLAG_REPLACEMENT_PARSE ] ) ) { |
— | — | @@ -366,12 +384,15 @@ |
367 | 385 | self::$tmpRegexCB = array( |
368 | 386 | 'replacement' => $replacement, |
369 | 387 | 'parser' => &$parser, |
| 388 | + 'frame' => $frame, |
370 | 389 | 'internal' => isset( $parser->mExtRegexFun['lastMatches'] ) && $parser->mExtRegexFun['lastMatches'] === false |
371 | 390 | ); |
372 | 391 | |
| 392 | + // do the actual replacement with special 'e' flag handling |
373 | 393 | $output = preg_replace_callback( $pattern, array( __CLASS__, 'doPregReplace_eFlag_callback' ), $subject, $limit, $count ); |
374 | 394 | } |
375 | 395 | else { |
| 396 | + $replacement = trim( $frame->expand( $replacement ) ); |
376 | 397 | // no 'e' flag, we can perform the standard function |
377 | 398 | $output = preg_replace( $pattern, $replacement, $subject, $limit, $count ); |
378 | 399 | } |
— | — | @@ -397,6 +418,7 @@ |
398 | 419 | |
399 | 420 | $replace = self::$tmpRegexCB['replacement']; |
400 | 421 | $parser = self::$tmpRegexCB['parser']; |
| 422 | + $frame = self::$tmpRegexCB['frame']; |
401 | 423 | $internal = self::$tmpRegexCB['internal']; // whether doPregReplace() is called as part of a parser function |
402 | 424 | |
403 | 425 | /* |
— | — | @@ -408,12 +430,11 @@ |
409 | 431 | self::setLastMatches( $parser, $matches ); |
410 | 432 | } |
411 | 433 | // replace backrefs with their actual values: |
412 | | - $replace = self::regexVarReplace( $replace, $matches ); |
| 434 | + $replace = trim( $frame->expand( $replace, PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES ) ); |
| 435 | + $replace = self::regexVarReplace( $replace, $matches, true ); |
413 | 436 | |
414 | 437 | // parse the replacement after matches are inserted |
415 | | - // use a new frame, no need for SFH_OBJECT_ARGS style parser functions |
416 | | - $frame = $parser->getPreprocessor()->newCustomFrame( $parser ); |
417 | | - $replace = $parser->preprocessToDom( $replace ); |
| 438 | + $replace = $parser->preprocessToDom( $replace, $frame->isTemplate() ? Parser::PTD_FOR_INCLUSION : 0 ); |
418 | 439 | $replace = trim( $frame->expand( $replace ) ); |
419 | 440 | |
420 | 441 | return $replace; |
— | — | @@ -503,15 +524,19 @@ |
504 | 525 | self::increaseRegexCount( $parser ); |
505 | 526 | |
506 | 527 | // do the actual transformation: |
507 | | - return self::regexVarReplace( $index, $lastMatches ); |
| 528 | + return self::regexVarReplace( $index, $lastMatches, false ); |
508 | 529 | } |
509 | 530 | } |
510 | 531 | |
511 | 532 | /** |
512 | 533 | * Replaces all backref variables within a replacement string with the backrefs actual |
513 | 534 | * values just like preg_replace would do it. |
| 535 | + * |
| 536 | + * @param string $replacement |
| 537 | + * @param array $matches |
| 538 | + * @param bool $forExpansion has to be set in case the 'e' flag should be handled |
514 | 539 | */ |
515 | | - private static function regexVarReplace( $replacement, $matches ) { |
| 540 | + private static function regexVarReplace( $replacement, array $matches, $forExpansion = false ) { |
516 | 541 | /* |
517 | 542 | * replace all back-references with their number increased by 1! |
518 | 543 | * this way we can also handle $0 in the right way! |
— | — | @@ -520,15 +545,23 @@ |
521 | 546 | '%(?<!\\\)(?:\$(?:(\d+)|\{(\d+)\})|\\\(\d+))%', |
522 | 547 | array( __CLASS__, 'regexVarReplace_increaseBackrefs_callback' ), |
523 | 548 | $replacement |
524 | | - ); |
| 549 | + ); |
525 | 550 | /* |
526 | 551 | * build a helper regex matching all the last matches to use preg_replace |
527 | 552 | * which will handle all the replace-escaping handling correct |
528 | 553 | */ |
529 | 554 | $regEx = ''; |
530 | | - foreach( $matches as $match ) { |
| 555 | + foreach( $matches as &$match ) { |
| 556 | + if( $forExpansion ) { |
| 557 | + /* |
| 558 | + * if this is for 'e' flag, we have to escape the matches so they won't break wiki markup |
| 559 | + * within the replacement in case they contain characters like '|'. |
| 560 | + */ |
| 561 | + $match = self::escapeForExpansion( $match ); |
| 562 | + } |
531 | 563 | $regEx .= '(' . preg_quote( $match, '/' ) . ')'; |
532 | 564 | } |
| 565 | + |
533 | 566 | $regEx = "/^{$regEx}$/"; |
534 | 567 | |
535 | 568 | return preg_replace( $regEx, $replacement, implode( '', $matches ) ); |
— | — | @@ -543,7 +576,11 @@ |
544 | 577 | $full = $matches[0]; |
545 | 578 | for( $i = 1; $index === false || $index === '' ; $i++ ) { |
546 | 579 | // $index can be false (shouldn't happen), '' or any number (including 0 !) |
547 | | - $index = @$matches[ $i ]; |
| 580 | + if( array_key_exists( $i, $matches ) ) { |
| 581 | + $index = $matches[ $i ]; |
| 582 | + } else { |
| 583 | + $index = false; |
| 584 | + } |
548 | 585 | } |
549 | 586 | return preg_replace( '%\d+%', (int)$index + 1, $full ); |
550 | 587 | } |
— | — | @@ -599,6 +636,34 @@ |
600 | 637 | } |
601 | 638 | |
602 | 639 | |
| 640 | + /** |
| 641 | + * Escapes a string so it can be used within PPFrame::expand() expansion without actually being |
| 642 | + * changed because of special characters. |
| 643 | + * Respects the configuration variable '$egRegexFunExpansionEscapeTemplates'. |
| 644 | + * |
| 645 | + * This is a workaround for bug #32829 |
| 646 | + * |
| 647 | + * @since 1.1 |
| 648 | + * |
| 649 | + * @param string $string |
| 650 | + * @return string |
| 651 | + */ |
| 652 | + public static function escapeForExpansion( $string ) { |
| 653 | + global $egRegexFunExpansionEscapeTemplates; |
| 654 | + |
| 655 | + if( $egRegexFunExpansionEscapeTemplates === null ) { |
| 656 | + return $string; |
| 657 | + } |
| 658 | + |
| 659 | + $string = strtr( |
| 660 | + $string, |
| 661 | + $egRegexFunExpansionEscapeTemplates |
| 662 | + ); |
| 663 | + |
| 664 | + return $string; |
| 665 | + } |
| 666 | + |
| 667 | + |
603 | 668 | /*********************************** |
604 | 669 | **** HELPER - For store of **** |
605 | 670 | **** regex stuff within Parser **** |
Index: trunk/extensions/RegexFun/RegexFun_Settings.php |
— | — | @@ -42,3 +42,22 @@ |
43 | 43 | * @var integer |
44 | 44 | */ |
45 | 45 | $egRegexFunMaxRegexPerParse = -1; |
| 46 | + |
| 47 | +/** |
| 48 | + * Contains a key-value pair list of characters that should be replaced by a template or parser function |
| 49 | + * call within matching back-reference values by '#regex' with 'e' flags in use. By replacing these special |
| 50 | + * characters before including the back-references values into the replacement string, these special |
| 51 | + * characters can't modify wiki syntax within the replacement code. |
| 52 | + * |
| 53 | + * If this is set to null, the old behavior will be active. |
| 54 | + * |
| 55 | + * @since 1.1 |
| 56 | + * @var array|null |
| 57 | + */ |
| 58 | +$egRegexFunExpansionEscapeTemplates = array( |
| 59 | + '=' => '{{=}}', |
| 60 | + '|' => '{{!}}', |
| 61 | + '{{' => '{{((}}', |
| 62 | + '}}' => '{{))}}' |
| 63 | +); |
| 64 | +$egRegexFunExpansionEscapeTemplates = null; |
\ No newline at end of file |