r105437 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r105436‎ | r105437 | r105438 >
Date:16:45, 7 December 2011
Author:danwe
Status:deferred
Tags:
Comment:
Version increased to 1.1. '#regex' in replace mode with 'e' flag doesn't require escaped replacement string anymore, 'egRegexFunExpansionEscapeTemplates' config var and '#regex' with 'e' flag will now first escape back-references before expanding.
Modified paths:
  • /trunk/extensions/RegexFun/RELEASE-NOTES (modified) (history)
  • /trunk/extensions/RegexFun/RegexFun.php (modified) (history)
  • /trunk/extensions/RegexFun/RegexFun_Settings.php (modified) (history)

Diff [purge]

Index: trunk/extensions/RegexFun/RELEASE-NOTES
@@ -1,5 +1,12 @@
22 Changelog:
33 ==========
 4+
 5+ * (trunk) -- Version 1.1
 6+ - '#regex' in replace mode with 'e' flag doesn't require escaped replacement string anymore.
 7+ Instead of something like "{{((}}#if:$2{{!}}$2{{!}}$1{{))}}", "{{#if:$2|$2|$1}}" can be used.
 8+ - '#regex' with 'e' flag now first escapes matches before putting them into the back-references
 9+ and expanding them. Therefore a new configuration variable '$egRegexFunExpansionEscapeTemplates'
 10+ has been introduced which serves as a workaround for MW bug #32829.
411
512 * December 5, 2011 -- Version 1.0.2
613 - Limit won't exceed early when 'e' flag with many backrefs in replacement is used extensivelly.
Index: trunk/extensions/RegexFun/RegexFun.php
@@ -8,7 +8,7 @@
99 * Support: http://www.mediawiki.org/wiki/Extension_talk:Regex_Fun
1010 * Source code: http://svn.wikimedia.org/viewvc/mediawiki/trunk/extensions/RegexFun
1111 *
12 - * @version: 1.0.2
 12+ * @version: 1.1
1313 * @license: ISC license
1414 * @author: Daniel Werner < danweetz@web.de >
1515 *
@@ -57,29 +57,28 @@
5858 *
5959 * @var string
6060 */
61 - const VERSION = '1.0.2';
 61+ const VERSION = '1.1';
6262
6363 /**
6464 * Sets up parser functions
6565 */
6666 public static function init( Parser &$parser ) {
67 - self::initFunction( $parser, 'regex' );
 67+ self::initFunction( $parser, 'regex', SFH_OBJECT_ARGS );
6868 self::initFunction( $parser, 'regex_var' );
6969 self::initFunction( $parser, 'regexquote' );
7070 self::initFunction( $parser, 'regexall' );
7171
7272 return true;
7373 }
74 - private static function initFunction( Parser &$parser, $name, $functionCallback = null ) {
75 - if( $functionCallback === null ) {
76 - $functionCallback = array( __CLASS__, "pf_{$name}" );
77 - }
78 -
 74+ private static function initFunction( Parser &$parser, $name, $flags = 0 ) {
7975 global $egRegexFunDisabledFunctions;
8076
8177 // only register function if not disabled by configuration
8278 if( ! in_array( $name, $egRegexFunDisabledFunctions ) ) {
83 - $parser->setFunctionHook( $name, $functionCallback );
 79+ // all parser functions with prefix:
 80+ $prefix = ( $flags & SFH_OBJECT_ARGS ) ? 'pfObj_' : 'pf_';
 81+ $functionCallback = array( __CLASS__, $prefix . $name );
 82+ $parser->setFunctionHook( $name, $functionCallback, $flags );
8483 }
8584 }
8685
@@ -246,15 +245,24 @@
247246
248247 /**
249248 * Performs a regular expression search or replacement
 249+ * Syntax:
 250+ * {{#regex: subject |pattern |replacement |limit }}
250251 *
251 - * @param $parser Parser instance of running Parse
252 - * @param $subject String input string to evaluate
253 - * @param $pattern String regular expression pattern - must use /, | or % delimiter
254 - * @param $replacement String regular expression replacement
 252+ * subject: input string to evaluate
 253+ * pattern: regular expression pattern - can use any valid preg delimiter, e.g. /, | or % and various modifiers
 254+ * replacement: regular expression replacement (optional)
 255+ * limit: max number of how many matches should be replaced (optional)
255256 *
256257 * @return String Result of replacing pattern with replacement in string, or matching text if replacement was omitted
257258 */
258 - public static function pf_regex( Parser &$parser, $subject = '', $pattern = '', $replacement = null, $limit = -1 ) {
 259+ //public static function pf_regex( Parser &$parser, $subject = '', $pattern = '', $replacement = null, $limit = -1 ) {
 260+ public static function pfObj_regex( Parser &$parser, PPFrame $frame, array $args ) {
 261+ // Get Parameters
 262+ $subject = isset( $args[0] ) ? trim( $frame->expand( $args[0] ) ) : '';
 263+ $pattern = isset( $args[1] ) ? trim( $frame->expand( $args[1] ) ) : '';
 264+ $replacement = isset( $args[2] ) ? $args[2] : null; // unexpanded replacement in case 'e' flag is used
 265+ $limit = isset( $args[3] ) ? trim( $frame->expand( $args[3] ) ) : -1;
 266+
259267 // check whether limit exceeded:
260268 if( self::limitExceeded( $parser ) ) {
261269 return self::msgLimitExceeded();
@@ -282,7 +290,7 @@
283291 self::setLastMatches( $parser, false );
284292
285293 // do the regex plus all handling of special flags and validation
286 - $output = self::doPregReplace( $pattern, $replacement, $subject, $limit, $parser );
 294+ $output = self::doPregReplace( $pattern, $replacement, $subject, $limit, $parser, $frame );
287295
288296 if( $output === false ) {
289297 // invalid regex, don't store any infor for '#regex_var'
@@ -301,12 +309,16 @@
302310 /**
303311 * 'preg_replace'-like function but can handle special modifiers 'e' and 'r'.
304312 *
305 - * @param string &$pattern
306 - * @param string $replacement
307 - * @param string $subject
308 - * @param int $limit
309 - * @param Parser &$parser if 'e' flag should be allowed, a parser object for parsing is required.
310 - * @param array $allowedSpecialFlags all special flags that should be handled, by default 'e' and 'r'.
 313+ * @since 1.1
 314+ *
 315+ * @param string &$pattern
 316+ * @param PPNode|string $replacement should be a PPNode in case 'e' flag might be used since in that
 317+ * case the string will be expanded after back-refs are inserted. Otherwise string is ok.
 318+ * @param string $subject
 319+ * @param int $limit
 320+ * @param Parser &$parser if 'e' flag should be allowed, a parser object for parsing is required.
 321+ * @param PPFrame $frame which keeps template parameters which should be used in case 'e' flag is set.
 322+ * @param array $allowedSpecialFlags all special flags that should be handled, by default 'e' and 'r'.
311323 */
312324 public static function doPregReplace(
313325 $pattern, // not by value in here!
@@ -314,10 +326,11 @@
315327 $subject,
316328 $limit = -1,
317329 &$parser = null,
 330+ $frame = null,
318331 array $allowedSpecialFlags = array(
319332 self::FLAG_REPLACEMENT_PARSE,
320333 self::FLAG_NO_REPLACE_NO_OUT,
321 - )
 334+ )
322335 ) {
323336 static $lastPattern = null;
324337 static $activePattern = null;
@@ -352,6 +365,11 @@
353366 $pattern = $activePattern;
354367 }
355368
 369+ // make sure we have a frame to expand the $replacement (necessary for 'e' flag support!)
 370+ if( $frame === null ) {
 371+ // new frame without template parameters then
 372+ $frame = $parser->getPreprocessor()->newCustomFrame( array() );
 373+ }
356374
357375 // FLAG 'e' (parse replace after match) handling:
358376 if( ! empty( $specialFlags[ self::FLAG_REPLACEMENT_PARSE ] ) ) {
@@ -366,12 +384,15 @@
367385 self::$tmpRegexCB = array(
368386 'replacement' => $replacement,
369387 'parser' => &$parser,
 388+ 'frame' => $frame,
370389 'internal' => isset( $parser->mExtRegexFun['lastMatches'] ) && $parser->mExtRegexFun['lastMatches'] === false
371390 );
372391
 392+ // do the actual replacement with special 'e' flag handling
373393 $output = preg_replace_callback( $pattern, array( __CLASS__, 'doPregReplace_eFlag_callback' ), $subject, $limit, $count );
374394 }
375395 else {
 396+ $replacement = trim( $frame->expand( $replacement ) );
376397 // no 'e' flag, we can perform the standard function
377398 $output = preg_replace( $pattern, $replacement, $subject, $limit, $count );
378399 }
@@ -397,6 +418,7 @@
398419
399420 $replace = self::$tmpRegexCB['replacement'];
400421 $parser = self::$tmpRegexCB['parser'];
 422+ $frame = self::$tmpRegexCB['frame'];
401423 $internal = self::$tmpRegexCB['internal']; // whether doPregReplace() is called as part of a parser function
402424
403425 /*
@@ -408,12 +430,11 @@
409431 self::setLastMatches( $parser, $matches );
410432 }
411433 // replace backrefs with their actual values:
412 - $replace = self::regexVarReplace( $replace, $matches );
 434+ $replace = trim( $frame->expand( $replace, PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES ) );
 435+ $replace = self::regexVarReplace( $replace, $matches, true );
413436
414437 // parse the replacement after matches are inserted
415 - // use a new frame, no need for SFH_OBJECT_ARGS style parser functions
416 - $frame = $parser->getPreprocessor()->newCustomFrame( $parser );
417 - $replace = $parser->preprocessToDom( $replace );
 438+ $replace = $parser->preprocessToDom( $replace, $frame->isTemplate() ? Parser::PTD_FOR_INCLUSION : 0 );
418439 $replace = trim( $frame->expand( $replace ) );
419440
420441 return $replace;
@@ -503,15 +524,19 @@
504525 self::increaseRegexCount( $parser );
505526
506527 // do the actual transformation:
507 - return self::regexVarReplace( $index, $lastMatches );
 528+ return self::regexVarReplace( $index, $lastMatches, false );
508529 }
509530 }
510531
511532 /**
512533 * Replaces all backref variables within a replacement string with the backrefs actual
513534 * values just like preg_replace would do it.
 535+ *
 536+ * @param string $replacement
 537+ * @param array $matches
 538+ * @param bool $forExpansion has to be set in case the 'e' flag should be handled
514539 */
515 - private static function regexVarReplace( $replacement, $matches ) {
 540+ private static function regexVarReplace( $replacement, array $matches, $forExpansion = false ) {
516541 /*
517542 * replace all back-references with their number increased by 1!
518543 * this way we can also handle $0 in the right way!
@@ -520,15 +545,23 @@
521546 '%(?<!\\\)(?:\$(?:(\d+)|\{(\d+)\})|\\\(\d+))%',
522547 array( __CLASS__, 'regexVarReplace_increaseBackrefs_callback' ),
523548 $replacement
524 - );
 549+ );
525550 /*
526551 * build a helper regex matching all the last matches to use preg_replace
527552 * which will handle all the replace-escaping handling correct
528553 */
529554 $regEx = '';
530 - foreach( $matches as $match ) {
 555+ foreach( $matches as &$match ) {
 556+ if( $forExpansion ) {
 557+ /*
 558+ * if this is for 'e' flag, we have to escape the matches so they won't break wiki markup
 559+ * within the replacement in case they contain characters like '|'.
 560+ */
 561+ $match = self::escapeForExpansion( $match );
 562+ }
531563 $regEx .= '(' . preg_quote( $match, '/' ) . ')';
532564 }
 565+
533566 $regEx = "/^{$regEx}$/";
534567
535568 return preg_replace( $regEx, $replacement, implode( '', $matches ) );
@@ -543,7 +576,11 @@
544577 $full = $matches[0];
545578 for( $i = 1; $index === false || $index === '' ; $i++ ) {
546579 // $index can be false (shouldn't happen), '' or any number (including 0 !)
547 - $index = @$matches[ $i ];
 580+ if( array_key_exists( $i, $matches ) ) {
 581+ $index = $matches[ $i ];
 582+ } else {
 583+ $index = false;
 584+ }
548585 }
549586 return preg_replace( '%\d+%', (int)$index + 1, $full );
550587 }
@@ -599,6 +636,34 @@
600637 }
601638
602639
 640+ /**
 641+ * Escapes a string so it can be used within PPFrame::expand() expansion without actually being
 642+ * changed because of special characters.
 643+ * Respects the configuration variable '$egRegexFunExpansionEscapeTemplates'.
 644+ *
 645+ * This is a workaround for bug #32829
 646+ *
 647+ * @since 1.1
 648+ *
 649+ * @param string $string
 650+ * @return string
 651+ */
 652+ public static function escapeForExpansion( $string ) {
 653+ global $egRegexFunExpansionEscapeTemplates;
 654+
 655+ if( $egRegexFunExpansionEscapeTemplates === null ) {
 656+ return $string;
 657+ }
 658+
 659+ $string = strtr(
 660+ $string,
 661+ $egRegexFunExpansionEscapeTemplates
 662+ );
 663+
 664+ return $string;
 665+ }
 666+
 667+
603668 /***********************************
604669 **** HELPER - For store of ****
605670 **** regex stuff within Parser ****
Index: trunk/extensions/RegexFun/RegexFun_Settings.php
@@ -42,3 +42,22 @@
4343 * @var integer
4444 */
4545 $egRegexFunMaxRegexPerParse = -1;
 46+
 47+/**
 48+ * Contains a key-value pair list of characters that should be replaced by a template or parser function
 49+ * call within matching back-reference values by '#regex' with 'e' flags in use. By replacing these special
 50+ * characters before including the back-references values into the replacement string, these special
 51+ * characters can't modify wiki syntax within the replacement code.
 52+ *
 53+ * If this is set to null, the old behavior will be active.
 54+ *
 55+ * @since 1.1
 56+ * @var array|null
 57+ */
 58+$egRegexFunExpansionEscapeTemplates = array(
 59+ '=' => '{{=}}',
 60+ '|' => '{{!}}',
 61+ '{{' => '{{((}}',
 62+ '}}' => '{{))}}'
 63+);
 64+$egRegexFunExpansionEscapeTemplates = null;
\ No newline at end of file

Status & tagging log