r104075 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r104074‎ | r104075 | r104076 >
Date:19:50, 23 November 2011
Author:danwe
Status:deferred
Tags:
Comment:
Functions for better interaction with other extensions, version number increased to 1.0.2
Modified paths:
  • /trunk/extensions/RegexFun/RELEASE-NOTES (modified) (history)
  • /trunk/extensions/RegexFun/RegexFun.php (modified) (history)

Diff [purge]

Index: trunk/extensions/RegexFun/RELEASE-NOTES
@@ -2,6 +2,9 @@
33 ==========
44
55 * (trunk) -- Version 1.0.2 alpha
 6+ - Limit won't exceed early when 'e' flag with many backrefs in replacement is used extensivelly.
 7+ - It's possible to use the 'Regex Fun' regex system with advanced flags within other extensions.
 8+ - Performance increased for executing huge numbers of the same regex on different strings.
69 - Internal representative functions for parser functions now have a 'pf_' prefix.
710
811 * November 6, 2011 -- Version 1.0.1
Index: trunk/extensions/RegexFun/RegexFun.php
@@ -57,7 +57,7 @@
5858 *
5959 * @var string
6060 */
61 - const VERSION = '1.0.2 alpha';
 61+ const VERSION = '1.0.2';
6262
6363 /**
6464 * Sets up parser functions
@@ -238,6 +238,7 @@
239239 return false;
240240 }
241241 if( $resetLastRegex ) {
 242+ // store infos for this regex for '#regex_var'
242243 self::initLastRegex( $parser, $pattern, $subject );
243244 }
244245 return true;
@@ -249,25 +250,26 @@
250251 * @param $parser Parser instance of running Parse
251252 * @param $subject String input string to evaluate
252253 * @param $pattern String regular expression pattern - must use /, | or % delimiter
253 - * @param $replace String regular expression replacement
 254+ * @param $replacement String regular expression replacement
254255 *
255256 * @return String Result of replacing pattern with replacement in string, or matching text if replacement was omitted
256257 */
257 - public static function pf_regex( Parser &$parser, $subject = '', $pattern = '', $replace = null, $limit = -1 ) {
 258+ public static function pf_regex( Parser &$parser, $subject = '', $pattern = '', $replacement = null, $limit = -1 ) {
258259 // check whether limit exceeded:
259260 if( self::limitExceeded( $parser ) ) {
260261 return self::msgLimitExceeded();
261262 }
262263 self::increaseRegexCount( $parser );
263264
264 - // validate, initialise and check for wrong input:
265 - $continue = self::validateRegexCall( $parser, $subject, $pattern, $specialFlags, true );
266 - if( ! $continue ) {
267 - return self::msgInvalidRegex( $pattern );
268 - }
269 -
270 - if( $replace === null ) {
 265+ if( $replacement === null ) {
271266 // search mode:
 267+
 268+ // validate, initialise and check for wrong input:
 269+ $continue = self::validateRegexCall( $parser, $subject, $pattern, $specialFlags, true );
 270+ if( ! $continue ) {
 271+ return self::msgInvalidRegex( $pattern );
 272+ }
 273+
272274 $lastMatches = self::getLastMatches( $parser );
273275 $output = ( preg_match( $pattern, $subject, $lastMatches ) ? $lastMatches[0] : '' );
274276 self::setLastMatches( $parser, $lastMatches );
@@ -275,49 +277,132 @@
276278 // replace mode:
277279 $limit = (int)$limit;
278280
279 - // set last matches to 'false' and get them on demand instead since preg_replace won't communicate them
280 - self::setLastMatches( $parser , false );
281 -
282 - // FLAG 'e' (parse replace after match) handling:
283 - if( ! empty( $specialFlags[ self::FLAG_REPLACEMENT_PARSE ] ) ) {
284 - // if 'e' flag is set, each replacement has to be parsed after matches are inserted but before replacing!
285 - self::$tmpRegexCB = array(
286 - 'replacement' => $replace,
287 - 'parser' => &$parser,
288 - );
289 - $output = preg_replace_callback( $pattern, array( __CLASS__, 'regex_eFlag_callback' ), $subject, $limit, $count );
 281+ // set last matches to 'false' and get them on demand instead since preg_replace won't communicate them
 282+ self::setLastMatches( $parser, false );
 283+
 284+ // do the regex plus all handling of special flags and validation
 285+ $output = self::doPregReplace( $pattern, $replacement, $subject, $limit, $parser );
 286+
 287+ if( $output === false ) {
 288+ // invalid regex, don't store any infor for '#regex_var'
 289+ self::setLastMatches( $parser , null );
 290+ return self::msgInvalidRegex( $pattern );
290291 }
291 - else {
292 - $output = preg_replace( $pattern, $replace, $subject, $limit, $count );
293 - }
294292
295 - // FLAG 'r' (no replacement - no output) handling:
296 - if( ! empty( $specialFlags[ self::FLAG_NO_REPLACE_NO_OUT ] ) ) {
297 - /*
298 - * only output replacement result if there actually was a match and therewith a replacement happened
299 - * (otherwise the input string would be returned)
300 - */
301 - if( $count < 1 ) {
302 - return '';
303 - }
304 - }
 293+ // set these infos after pattern validation/correction
 294+ self::setLastPattern( $parser, $pattern );
 295+ self::setLastSubject( $parser, $subject );
305296 }
 297+
306298 return $output;
307299 }
308300
309 - private static function regex_eFlag_callback( $matches ) {
 301+ /**
 302+ * 'preg_replace' like function but can handle special modifiers 'e' and 'r'.
 303+ *
 304+ * @param string &$pattern
 305+ * @param string $replacement
 306+ * @param string $subject
 307+ * @param int $limit
 308+ * @param Parser &$parser if 'e' flag should be allowed, a parser objecdt for parsing is required.
 309+ * @param array $allowedSpecialFlags all special flags that should be handled, by default 'e' and 'r'.
 310+ */
 311+ public static function doPregReplace(
 312+ &$pattern,
 313+ $replacement,
 314+ $subject,
 315+ $limit = -1,
 316+ &$parser = null,
 317+ array $allowedSpecialFlags = array(
 318+ self::FLAG_REPLACEMENT_PARSE,
 319+ self::FLAG_NO_REPLACE_NO_OUT,
 320+ )
 321+ ) {
 322+ static $lastPattern = null;
 323+ static $lastFlags = null;
 324+ static $specialFlags = null;
310325
 326+ /*
 327+ * cache validated pattern and use it as long as nothing has changed, this makes things
 328+ * faster in case we do a lot of stuff with the same regex.
 329+ */
 330+ if( $lastPattern === null || $lastPattern !== $pattern
 331+ || $lastFlags !== implode( ',', $allowedSpecialFlags )
 332+ ) {
 333+ // if allowed special flags change, we have to validate again^^
 334+ $lastFlags = implode( ',', $allowedSpecialFlags );
 335+
 336+ // validate regex and get special flags 'e' and 'r' if given:
 337+ if( ! self::validateRegex( $pattern, $specialFlags ) ) {
 338+ // invalid regex!
 339+ return false;
 340+ }
 341+
 342+ // filter unwanted special flags:
 343+ $allowedSpecialFlags = array_flip( $allowedSpecialFlags );
 344+ $specialFlags = array_intersect_key( $specialFlags, $allowedSpecialFlags );
 345+
 346+ $lastPattern = $pattern;
 347+ }
 348+
 349+
 350+ // FLAG 'e' (parse replace after match) handling:
 351+ if( ! empty( $specialFlags[ self::FLAG_REPLACEMENT_PARSE ] ) ) {
 352+
 353+ // 'e' requires a Parser for parsing!
 354+ if( ! ( $parser instanceof Parser ) ) {
 355+ // no valid Parser object, without, we can't parse anything!
 356+ throw new MWException( "Regex Fun 'e' flag discovered but no Parser object given!" );
 357+ }
 358+
 359+ // if 'e' flag is set, each replacement has to be parsed after matches are inserted but before replacing!
 360+ self::$tmpRegexCB = array(
 361+ 'replacement' => $replacement,
 362+ 'parser' => &$parser,
 363+ 'internal' => isset( $parser->mExtRegexFun['lastMatches'] ) && $parser->mExtRegexFun['lastMatches'] === false
 364+ );
 365+
 366+ $output = preg_replace_callback( $pattern, array( __CLASS__, 'doPregReplace_eFlag_callback' ), $subject, $limit, $count );
 367+ }
 368+ else {
 369+ // no 'e' flag, we can perform the standard function
 370+ $output = preg_replace( $pattern, $replacement, $subject, $limit, $count );
 371+ }
 372+
 373+
 374+ // FLAG 'r' (no replacement - no output) handling:
 375+ if( ! empty( $specialFlags[ self::FLAG_NO_REPLACE_NO_OUT ] ) ) {
 376+ /*
 377+ * only output replacement result if there actually was a match and therewith a replacement happened
 378+ * (otherwise the input string would be returned)
 379+ */
 380+ if( $count < 1 ) {
 381+ return '';
 382+ }
 383+ }
 384+
 385+ return $output;
 386+ }
 387+
 388+ private static function doPregReplace_eFlag_callback( $matches ) {
 389+
311390 /** Don't cache this since it could contain dynamic content like #var which should be parsed */
312391
313 - $replace = self::$tmpRegexCB['replacement'];
314 - $parser = self::$tmpRegexCB['parser'];
 392+ $replace = self::$tmpRegexCB['replacement'];
 393+ $parser = self::$tmpRegexCB['parser'];
 394+ $internal = self::$tmpRegexCB['internal']; // whether doPregReplace() is called as part of a parser function
315395
316 - // last matches in #regex replace mode were set to false before, set them now:
317 - self::setLastMatches( $parser, $matches );
 396+ /*
 397+ * only do this if set to false before, internally, so we won't destroy things if
 398+ * doPregReplace() was called from outside 'Regex Fun'
 399+ */
 400+ if( $internal ) {
 401+ // last matches in #regex replace mode were set to false before, set them now:
 402+ self::setLastMatches( $parser, $matches );
 403+ }
 404+ // replace backrefs with their actual values:
 405+ $replace = self::regexVarReplace( $replace, $matches );
318406
319 - // use #regex_var for transforming replacement string with matches:
320 - $replace = self::pf_regex_var( $parser, $replace );
321 -
322407 // parse the replacement after matches are inserted
323408 // use a new frame, no need for SFH_OBJECT_ARGS style parser functions
324409 $frame = $parser->getPreprocessor()->newCustomFrame( $parser );
@@ -410,33 +495,42 @@
411496 }
412497 self::increaseRegexCount( $parser );
413498
414 - /*
415 - * replace all back-references with their number increased by 1!
416 - * this way we can also handle $0 in the right way!
417 - */
418 - $index = preg_replace_callback(
419 - '%(?<!\\\)(?:\$(?:(\d+)|\{(\d+)\})|\\\(\d+))%',
420 - array( __CLASS__, 'regexVarIncreaseBackref' ),
421 - $index
422 - );
423 - /*
424 - * build a helper regex matching all the last matches to use preg_replace
425 - * which will handle all the replace-escaping handling correct
426 - */
427 - $regEx = '';
428 - foreach( $lastMatches as $match ) {
429 - $regEx .= '(' . preg_quote( $match, '/' ) . ')';
430 - }
431 - $regEx = "/^{$regEx}$/";
432 - $output = preg_replace( $regEx, $index, implode( '', $lastMatches ) );
433 -
434 - return $output;
 499+ // do the actual transformation:
 500+ return self::regexVarReplace( $index, $lastMatches );
435501 }
436502 }
 503+
437504 /**
438 - * only used by 'preg_replace_callback' in 'regex_var'
 505+ * Replaces all backref variables within a replacement string with the backrefs actual
 506+ * values just like preg_replace would do it.
439507 */
440 - private static function regexVarIncreaseBackref( $matches ) {
 508+ private static function regexVarReplace( $replacement, $matches ) {
 509+ /*
 510+ * replace all back-references with their number increased by 1!
 511+ * this way we can also handle $0 in the right way!
 512+ */
 513+ $replacement = preg_replace_callback(
 514+ '%(?<!\\\)(?:\$(?:(\d+)|\{(\d+)\})|\\\(\d+))%',
 515+ array( __CLASS__, 'regexVarReplace_increaseBackrefs_callback' ),
 516+ $replacement
 517+ );
 518+ /*
 519+ * build a helper regex matching all the last matches to use preg_replace
 520+ * which will handle all the replace-escaping handling correct
 521+ */
 522+ $regEx = '';
 523+ foreach( $matches as $match ) {
 524+ $regEx .= '(' . preg_quote( $match, '/' ) . ')';
 525+ }
 526+ $regEx = "/^{$regEx}$/";
 527+
 528+ return preg_replace( $regEx, $replacement, implode( '', $matches ) );
 529+ }
 530+
 531+ /**
 532+ * only used by 'preg_replace_callback' in 'regexVarReplace'
 533+ */
 534+ private static function regexVarReplace_increaseBackrefs_callback( $matches ) {
441535 // find index:
442536 $index = false;
443537 $full = $matches[0];
@@ -565,7 +659,11 @@
566660
567661 // last matches are set to false in case last regex was in replace mode! Get them on demand:
568662 if( $parser->mExtRegexFun['lastMatches'] === false ) {
569 - preg_match( self::getLastPattern( $parser ), self::getLastSubject( $parser ), $parser->mExtRegexFun['lastMatches'] );
 663+ preg_match(
 664+ self::getLastPattern( $parser ),
 665+ self::getLastSubject( $parser ),
 666+ $parser->mExtRegexFun['lastMatches']
 667+ );
570668 }
571669 return $parser->mExtRegexFun['lastMatches'];
572670 }

Status & tagging log