r102061 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r102060‎ | r102061 | r102062 >
Date:22:33, 4 November 2011
Author:danwe
Status:deferred
Tags:
Comment:
Version 1.0. Rewritten lots of stuff for having own regular expression flags with their own meaning in mediawiki context. 'e' flag => '#regex' replacement text will be parsed after reference inclusion before final replacement. 'r' flag replaces '#regexsearch'. Put under ISC License.
Modified paths:
  • /trunk/extensions/RegexFun/README (deleted) (history)
  • /trunk/extensions/RegexFun/RELEASE-NOTES (deleted) (history)
  • /trunk/extensions/RegexFun/RegexFun.i18n.magic.php (modified) (history)
  • /trunk/extensions/RegexFun/RegexFun.i18n.php (modified) (history)
  • /trunk/extensions/RegexFun/RegexFun.php (modified) (history)

Diff [purge]

Index: trunk/extensions/RegexFun/RELEASE-NOTES
@@ -1,9 +0,0 @@
2 - Changelog:
3 - ==========
4 - * Nov. xx, 2011 -- Version 1.0 (initial release).
5 - Introduces the following parser functions defined within 'ExtRegexFun' class:
6 - - #regex
7 - - #regexsearch
8 - - #regexall
9 - - #regex_var
10 - - #regexquote
\ No newline at end of file
Index: trunk/extensions/RegexFun/README
@@ -1,40 +0,0 @@
2 -== About ==
3 -
4 -''Regex Fun'' is a MediaWiki extension by Daniel Werner which adds parser functions for performing regular expression
5 -searches and replacements.
6 -The '#regex' parser function is inspired by RegexParserFunctions from Jim R. Wilson and mostly compatible with it.
7 -RegexParserFunctions has some design flaws and missing functionality that should be filled by this extension.
8 -
9 -''Regex Fun'' defines the following parser functions within your wiki:
10 -
11 - - #regex: Search or replace with help of php preg regular expression. Returns first match in search mode.
12 - Use of the 'e' modifier behind the expression will be filtered and eliminated from the pattern!
13 - - #regexsearch: Same as 'regex' with the difference that in replacement mode if no match could be found, an empty
14 - string '' will be returned instead of the unaltered original.
15 - - #regexall: Searches the whole string for as many matches as possible and returns them separated by a separator.
16 - - #regex_var: Allows to access references of the last used 'regex' or 'regexsearch' function.
17 - - #regexquote: Runs php function 'preg_quote' on a string to use user-input savelly in regex functions. In case the
18 - first character is a character with special meaning in MW, it will be replaced with its hexadecimal
19 - notation e.g. '\x23' instead of '#'. This will prevent from things going terribly wrong when using
20 - user input within a regular expression.
21 -
22 -* Website: http://www.mediawiki.org/wiki/Extension:Regex_Fun
23 -* Author: Daniel Werner < danweetz@web.de >
24 -
25 -
26 -== Installation ==
27 -
28 -Once you have downloaded the code, place the 'RegexFun' directory within your
29 -MediaWiki 'extensions' directory. Then add the following code to your
30 -[[Manual:LocalSettings.php|LocalSettings.php]] file:
31 -
32 - # RegexFun
33 - require_once( "$IP/extensions/RegexFun/RegexFun.php" );
34 -
35 -
36 -== Contributing ==
37 -
38 -If you have bug reports or requests, please add them to the ''Regex Fun'' Talk page [0].
39 -You can also send them to Daniel Werner < danweetz@web.de >
40 -
41 -[0] http://www.mediawiki.org/w/index.php?title=Extension_talk:Regex_Fun
\ No newline at end of file
Index: trunk/extensions/RegexFun/RegexFun.i18n.magic.php
@@ -15,7 +15,6 @@
1616
1717 $magicWords['en'] = array(
1818 'regex' => array( 0, 'regex' ),
19 - 'regexsearch' => array( 0, 'regexsearch' ),
2019 'regex_var' => array( 0, 'regex_var' ),
2120 'regexall' => array( 0, 'regexall' ),
2221 'regexquote' => array( 0, 'regexquote' ),
Index: trunk/extensions/RegexFun/RegexFun.i18n.php
@@ -17,7 +17,7 @@
1818 * @author Daniel Werner
1919 */
2020 $messages['en'] = array(
21 - 'regexfun-desc' => 'Adds parser functions allowing the use of regular expressions within the wiki.',
 21+ 'regexfun-desc' => 'Adds parser functions allowing the use of regular expressions within wiki articles.',
2222 'regexfun-invalid' => 'The regular expression "$1" is invalid.',
2323 );
2424
@@ -25,6 +25,6 @@
2626 * @author Daniel Werner
2727 */
2828 $messages['de'] = array(
29 - 'regexfun-desc' => 'Fügt Parser-Funktionen hinzu um reguläre Ausdrücke im Wiki verwenden zu können.',
 29+ 'regexfun-desc' => 'Fügt Parser-Funktionen hinzu um reguläre Ausdrücke auf Wiki-Seiten verwenden zu können.',
3030 'regexfun-invalid' => '„$1“ ist kein gültiger regulärer Ausdruck.',
3131 );
\ No newline at end of file
Index: trunk/extensions/RegexFun/RegexFun.php
@@ -1,12 +1,13 @@
22 <?php
33
44 /**
5 - * This is a MediaWiki extension which adds parser functions for performing regular
 5+ * 'Regex Fun' is a MediaWiki extension which adds parser functions for performing regular
66 * expression searches and replacements.
77 *
88 * Info on mediawiki.org: http://www.mediawiki.org/wiki/Extension:Regex_Fun
99 *
10 - * @version: 1.0 alpha
 10+ * @version: 1.0
 11+ * @license: ISC license
1112 * @author: Daniel Werner < danweetz@web.de >
1213 *
1314 * Documentation: http://www.mediawiki.org/wiki/Extension:Regex_Fun
@@ -30,13 +31,9 @@
3132 'url' => 'http://www.mediawiki.org/wiki/Extension:Regex_Fun',
3233 );
3334
34 -$dir = dirname( __FILE__ );
 35+$wgExtensionMessagesFiles['RegexFun' ] = ExtRegexFun::getDir() . '/RegexFun.i18n.php';
 36+$wgExtensionMessagesFiles['RegexFunMagic'] = ExtRegexFun::getDir() . '/RegexFun.i18n.magic.php';
3537
36 -$wgExtensionMessagesFiles['RegexFun' ] = $dir . '/RegexFun.i18n.php';
37 -$wgExtensionMessagesFiles['RegexFunMagic'] = $dir . '/RegexFun.i18n.magic.php';
38 -
39 -unset( $dir );
40 -
4138 $wgHooks['ParserFirstCallInit'][] = 'ExtRegexFun::init';
4239 $wgHooks['ParserClearState' ][] = 'ExtRegexFun::onParserClearState';
4340
@@ -55,67 +52,131 @@
5653 *
5754 * @var string
5855 */
59 - const VERSION = '1.0 alpha';
 56+ const VERSION = '1.0';
6057
61 - protected static $lastMatches = null;
62 - protected static $lastPattern = '';
63 - protected static $lastSubject = '';
64 -
6558 /**
6659 * Sets up parser functions
6760 */
6861 public static function init( &$parser ) {
6962 $parser->setFunctionHook( 'regex', array( __CLASS__, 'regex' ) );
70 - $parser->setFunctionHook( 'regexsearch', array( __CLASS__, 'regexsearch' ) );
7163 $parser->setFunctionHook( 'regex_var', array( __CLASS__, 'regex_var' ) );
7264 $parser->setFunctionHook( 'regexall', array( __CLASS__, 'regexall' ) );
7365 $parser->setFunctionHook( 'regexquote', array( __CLASS__, 'regexquote' ) );
7466 $parser->setFunctionHook( 'regexascii', array( __CLASS__, 'regexascii' ) );
7567
76 - return true;
 68+ return true;
7769 }
7870
7971 /**
 72+ * Returns the extensions base installation directory.
 73+ *
 74+ * @since 1.0
 75+ *
 76+ * @return boolean
 77+ */
 78+ public static function getDir() {
 79+ static $dir = null;
 80+
 81+ if( $dir === null ) {
 82+ $dir = dirname( __FILE__ );
 83+ }
 84+ return $dir;
 85+ }
 86+
 87+
 88+ const FLAG_NO_REPLACE_NO_OUT = 'r';
 89+ const FLAG_REPLACEMENT_PARSE = 'e'; // overwrites php 'e' flag
 90+
 91+ /**
 92+ * helper store for transmitting some values to a preg_replace_callback function
 93+ *
 94+ * @var array
 95+ */
 96+ private static $tmpRegexCB;
 97+
 98+ /**
8099 * Checks whether the given regular expression is valid or would cause an error.
81 - * Also alters the pattern in case it would be a security risk
 100+ * Also alters the pattern in case it would be a security risk and communicates
 101+ * about special flags which have no or different meaning in PHP. These will be
 102+ * removed from the original regex string but put into the &$specialFlags array.
82103 *
 104+ * @since 1.0
 105+ *
83106 * @param &$pattern String
 107+ * @param &$specialFlags array will contain all special flags the $pattern contains
 108+ *
84109 * @return Boolean
85110 */
86 - public static function isValidRegex( &$pattern ) {
87 - //return (bool)preg_match( '/^([\\/\\|%]).*\\1[imsSuUx]*$/', $pattern );
 111+ public static function validateRegex( &$pattern, &$specialFlags = array() ) {
88112
89 - // replace all eventual 'e' pattern modifiers since it's a huge security risk!
90 - $origPattern = $pattern;
91 - $delimiter = preg_quote( substr( $pattern, 0, 1 ), '/' );
92 - // from last delimiter (regex end) to end (only flags), replace all 'e':
93 - $pattern = preg_replace_callback(
94 - '/(?<=' . $delimiter . ')[^' . $delimiter . ']*?$/i',
95 - array( __CLASS__, 'validRegexHelper' ),
96 - $pattern
97 - );
 113+ $specialFlags = array();
98114
 115+ if( strlen( $pattern ) < 2 ) {
 116+ return false;
 117+ }
 118+
 119+ $delimiter = substr( trim( $pattern ), 0, 1 );
 120+ $delimiterQuoted = preg_quote( $delimiter, '/' );
 121+
 122+ // two parts, split by the last delimiter
 123+ $parts = preg_split( "/{$delimiterQuoted}(?=[^{$delimiterQuoted}]*$)/", $pattern, 2 );
 124+
 125+ $mainPart = $parts[0] . $delimiter; // delimiter to delimiter without flags
 126+ $flagsPart = $parts[1];
 127+
 128+ // remove 'e' modifier from final regex since it's a huge security risk with user input!
 129+ self::regexSpecialFlagsHandler( $flagsPart, self::FLAG_REPLACEMENT_PARSE, $specialFlags );
 130+
 131+ // marks #regex with replacement will output '' in case of no replacement
 132+ self::regexSpecialFlagsHandler( $flagsPart, self::FLAG_NO_REPLACE_NO_OUT, $specialFlags );
 133+
 134+ // put purified regex back together:
 135+ $newPattern = $mainPart . $flagsPart;
 136+
 137+ if( ! self::isValidRegex( $newPattern ) ) {
 138+ // no modification to $pattern done!
 139+ $specialFlags = array();
 140+ return false;
 141+ }
 142+ $pattern = $newPattern; // remember reference!
 143+ return true;
 144+ }
 145+
 146+ /**
 147+ * Returns whether the regular expression would be a valid one or not.
 148+ *
 149+ * @since 1.0
 150+ *
 151+ * @param $pattern string
 152+ *
 153+ * @return boolean
 154+ */
 155+ public static function isValidRegex( $pattern ) {
 156+ //return (bool)preg_match( '/^([\\/\\|%]).*\\1[imsSuUx]*$/', $pattern );
99157 /*
100 - * this takes care of all invalid regular expression use and the php notices
101 - * many regular expression extensions won't supress
 158+ * Testing of the pattern in a very simple way:
 159+ * This takes care of all invalid regular expression use and the ugly php notices
 160+ * which some other regex extensions for MW won't handle right.
102161 */
103162 wfSuppressWarnings(); // instead of using the evil @ operator!
104 - $validRegex = preg_match( $pattern, ' ' );
 163+ $isValid = false !== preg_match( $pattern, ' ' ); // preg_match returns false on error
105164 wfRestoreWarnings();
106165
107 - if( $validRegex === false ) {
108 - // set pattern back since the whole thing is invalid anyway:
109 - $pattern = $origPattern;
110 - return false;
111 - }
112 - return true;
113 - }
 166+ return $isValid;
 167+ }
 168+
114169 /**
115 - * only used by 'preg_replace_callback' in 'isValidRegex'
 170+ * Helper function to check a string of flags for a certain flag and set it as an array key
 171+ * in a special flags collecting array.
116172 */
117 - private static function validRegexHelper( $matches ) {
118 - // there is no big 'E' modifier so it won't hurt to replace it as well:
119 - return preg_replace( '/[e\s]/i', '', $matches[0] );
 173+ private static function regexSpecialFlagsHandler( &$modifiers, $flag, &$specialFlags ) {
 174+ $count = 0;
 175+ $modifiers = preg_replace( "/{$flag}/", '', $modifiers, -1, $count );
 176+ if( $count > 0 ) {
 177+ $specialFlags[ $flag ] = true;
 178+ return true;
 179+ }
 180+ return false;
120181 }
121182
122183 /**
@@ -124,43 +185,29 @@
125186 * which can be recognized by #iferror
126187 *
127188 * @param $pattern String the invalid regular expression
 189+ *
128190 * @return Array
129191 */
130192 public static function invalidRegexParsingOutput( $pattern ) {
131193 $msg = '<span class="error">' . wfMsgExt( 'regexfun-invalid', array( 'content' ), "<tt><nowiki>$pattern</nowiki></tt>" ). '</span>';
132 - return array( $msg, 'noparse' => false, 'isHTML' => false ); // isHTML must be false for #iferror!
 194+ return array( $msg, 'noparse' => true, 'isHTML' => false ); // isHTML must be false for #iferror!
133195 }
134196
135 - public static function onParserClearState( &$parser ) {
136 - //cleanup to avoid conflicts with job queue or Special:Import
137 - self::$lastMatches = null;
138 - self::$lastPattern = '';
139 - self::$lastSubject = '';
140 -
141 - return true;
142 - }
143 -
144 - protected static function initLastRegex( $pattern, $subject ) {
145 - self::$lastMatches = array();
146 - self::$lastPattern = $pattern;
147 - self::$lastSubject = $subject;
148 - }
149 -
150197 /**
151 - * also takes care of security risks in pattern which is why
152 - * the pattern is given by reference!
 198+ * Helper function. Validates regex and takes care of security risks in pattern which is why
 199+ * the pattern is taken by reference!
153200 */
154 - protected static function validateRegexCall( $subject, &$pattern, $resetLastRegex = false ) {
155 - self::$lastMatches = null; //reset last matches for the case anything goes wrong
156 - if( $subject === null || $pattern === null ) {
157 - return '';
 201+ protected static function validateRegexCall( Parser &$parser, $subject, &$pattern, &$specialFlags, $resetLastRegex = false ) {
 202+ if( $resetLastRegex ) {
 203+ //reset last matches for the case anything goes wrong
 204+ self::setLastMatches( $parser , null );
 205+ }
 206+ if( ! self::validateRegex( $pattern, $specialFlags ) ) {
 207+ return false;
158208 }
159 - if( ! self::isValidRegex( $pattern ) ) {
160 - return self::invalidRegexParsingOutput( $pattern );
 209+ if( $resetLastRegex ) {
 210+ self::initLastRegex( $parser, $pattern, $subject );
161211 }
162 - if( $resetLastRegex ) {
163 - self::initLastRegex( $pattern, $subject );
164 - }
165212 return true;
166213 }
167214
@@ -171,86 +218,105 @@
172219 * @param $subject String input string to evaluate
173220 * @param $pattern String regular expression pattern - must use /, | or % delimiter
174221 * @param $replace String regular expression replacement
 222+ *
175223 * @return String Result of replacing pattern with replacement in string, or matching text if replacement was omitted
176224 */
177 - public static function regex( &$parser, $subject = null, $pattern = null, $replace = null, $limit = null ) {
 225+ public static function regex( Parser &$parser, $subject = '', $pattern = '', $replace = null, $limit = -1 ) {
 226+
178227 // validate, initialise and check for wrong input:
179 - $continue = self::validateRegexCall( $subject, $pattern, true );
180 - if( $continue !== true ) {
181 - return $continue;
182 - }
 228+ $continue = self::validateRegexCall( $parser, $subject, $pattern, $specialFlags, true );
 229+ if( ! $continue ) {
 230+ return self::invalidRegexParsingOutput( $pattern );;
 231+ }
183232
184233 if( $replace === null ) {
185234 // search mode:
186 - $output = ( preg_match( $pattern, $subject, self::$lastMatches ) ? self::$lastMatches[0] : '' );
 235+ $lastMatches = self::getLastMatches( $parser );
 236+ $output = ( preg_match( $pattern, $subject, $lastMatches ) ? $lastMatches[0] : '' );
 237+ self::setLastMatches( $parser, $lastMatches );
187238 } else {
188 - // replace mode:
189 - if( $limit === null ) {
190 - $limit = -1;
191 - }
 239+ // replace mode:
192240 $limit = (int)$limit;
193241
194 - self::$lastMatches = false;
195 - $output = preg_replace( $pattern, $replace, $subject, $limit );
196 - }
197 - return array( $output, 'noparse' => true, 'isHTML' => false );
198 - }
199 -
200 - /**
201 - * Same as #regex but returns nothing if the pattern doesn't match and a replacement is given.
202 - */
203 - public static function regexsearch( &$parser, $subject = null, $pattern = null, $replace = null, $limit = null ) {
204 - // validate, initialise and check for wrong input:
205 - $continue = self::validateRegexCall( $subject, $pattern, true );
206 - if( $continue !== true ) {
207 - return $continue;
208 - }
209 -
210 - if( $replace === null ) {
211 - // search mode:
212 - $output = ( preg_match( $pattern, $subject, self::$lastMatches ) ? self::$lastMatches[0] : '' );
213 - } else {
214 - // replace mode:
215 - if( $limit === null ) {
216 - $limit = -1;
 242+ // set last matches to 'false' and get them on demand instead since preg_replace won't communicate them
 243+ self::setLastMatches( $parser , false );
 244+
 245+ // FLAG 'e' (parse replace after match) handling:
 246+ if( ! empty( $specialFlags[ self::FLAG_REPLACEMENT_PARSE ] ) ) {
 247+ // if 'e' flag is set, each replacement has to be parsed after matches are inserted but before replacing!
 248+ self::$tmpRegexCB = array(
 249+ 'replacement' => $replace,
 250+ 'parser' => &$parser,
 251+ );
 252+ $output = preg_replace_callback( $pattern, array( __CLASS__, 'regex_eFlag_callback' ), $subject, $limit, $count );
217253 }
218 - $limit = (int)$limit;
 254+ else {
 255+ $output = preg_replace( $pattern, $replace, $subject, $limit, $count );
 256+ }
219257
220 - self::$lastMatches = false;
221 - $output = preg_replace( $pattern, $replace, $subject, $limit, $count );
222 - if( $count < 1 ) {
223 - return '';
 258+ // FLAG 'r' (no replacement - no output) handling:
 259+ if( ! empty( $specialFlags[ self::FLAG_NO_REPLACE_NO_OUT ] ) ) {
 260+ /*
 261+ * only output replacement result if there actually was a match and therewith a replacement happened
 262+ * (otherwise the input string would be returned)
 263+ */
 264+ if( $count < 1 ) {
 265+ return '';
 266+ }
224267 }
225268 }
226 - return array( $output, 'noparse' => false, 'isHTML' => false );
 269+ return $output;
227270 }
228271
 272+ private static function regex_eFlag_callback( $matches ) {
 273+
 274+ /** Don't cache this since it could contain dynamic content like #var which should be parsed */
 275+
 276+ $replace = self::$tmpRegexCB['replacement'];
 277+ $parser = self::$tmpRegexCB['parser'];
 278+
 279+ // last matches in #regex replace mode were set to false before, set them now:
 280+ self::setLastMatches( $parser, $matches );
 281+
 282+ // use #regex_var for transforming replacement string with matches:
 283+ $replace = self::regex_var( $parser, $replace );
 284+
 285+ // parse the replacement after matches are inserted
 286+ // use a new frame, no need for SFH_OBJECT_ARGS style parser functions
 287+ $frame = $parser->getPreprocessor()->newCustomFrame( $parser );
 288+ $replace = $parser->preprocessToDom( $replace );
 289+ $replace = trim( $frame->expand( $replace ) );
 290+
 291+ return $replace;
 292+ }
 293+
229294 /**
230 - * Performs regular expression search and returns ALL matches separated
 295+ * Performs regular expression searches and returns ALL matches separated
231296 *
232297 * @param $parser Parser instance of running Parser
233298 * @param $subject String input string to evaluate
234299 * @param $pattern String regular expression pattern - must use /, | or % as delimiter
235300 * @param $separator String to separate all the matches
236301 * @param $offset Integer first match to print out. Negative values possible: -1 means last match.
237 - * @param $length Integer maximum matches for print out
 302+ * @param $limit Integer maximum matches for print out
 303+ *
238304 * @return String result of all matching text parts separated by a string
239305 */
240 - public static function regexall( &$parser , $subject = null , $pattern = null , $separator = ', ' , $offset = 0 , $length = null ) {
 306+ public static function regexall( &$parser , $subject = '' , $pattern = '' , $separator = ', ' , $offset = 0 , $limit = null ) {
241307 // validate and check for wrong input:
242 - $continue = self::validateRegexCall( $subject, $pattern, false );
243 - if( $continue !== true ) {
244 - return $continue;
 308+ $continue = self::validateRegexCall( $parser, $subject, $pattern, $specialFlags, false );
 309+ if( ! $continue ) {
 310+ return self::invalidRegexParsingOutput( $pattern );;
245311 }
246312 // adjust default values:
247313 $offset = (int)$offset;
248 - if( $length !== null ) {
249 - $length = (int)$length;
 314+ if( $limit !== null ) {
 315+ $limit = (int)$limit;
250316 }
251317
252318 if( preg_match_all( $pattern, $subject, $matches, PREG_SET_ORDER ) ) {
253319
254 - $matches = array_slice( $matches, $offset, $length );
 320+ $matches = array_slice( $matches, $offset, $limit );
255321 $output = ''; //$end = ($end or ($end >= count($matches)) ? $end : count($matches) );
256322
257323 for( $count = 0; $count < count( $matches ); $count++ ) {
@@ -259,7 +325,7 @@
260326 }
261327 $output .= trim( $matches[ $count ][0] );
262328 }
263 - return array( $output, 'noparse' => false, 'isHTML' => false );
 329+ return $output;
264330 }
265331 return '';
266332 }
@@ -272,27 +338,24 @@
273339 * @param $defaultVal Integer default value which will be returned when the result with the given index doesn't exist or is a void string
274340 */
275341 public static function regex_var( &$parser, $index = 0, $defaultVal = '' ) {
276 - if( self::$lastMatches === null ) { // last regex was invalid or none executed yet
 342+ // get matches from last #regex
 343+ $lastMatches = self::getLastMatches( $parser );
 344+
 345+ if( $lastMatches === null ) { // last regex was invalid or none executed yet
277346 return '';
278347 }
279 - // last matches are set to false in case last regex was in replace mode!
280 - if( self::$lastMatches === false ) {
281 - // execute last regular expression again, but this time not as replace
282 - preg_match( self::$lastPattern, self::$lastSubject, self::$lastMatches );
283 - }
284 -
 348+
285349 // if requested index is numerical:
286350 if (preg_match( '/^\d+$/', $index ) ) {
287351 // if requested index is in matches and isn't '':
288 - if( array_key_exists( $index, self::$lastMatches ) && self::$lastMatches[$index] !== '' )
289 - return self::$lastMatches[ $index ];
 352+ if( array_key_exists( $index, $lastMatches ) && $lastMatches[$index] !== '' )
 353+ return $lastMatches[ $index ];
290354 else {
291355 // no match! Return just the default value:
292356 return $defaultVal;
293357 }
294358 } else {
295 - // complex string is given, something like "$1, $2 and $3":
296 -
 359+ // complex string is given, something like "$1, $2 and $3":
297360 /*
298361 * replace all back-references with their number increased by 1!
299362 * this way we can also handle $0 in the right way!
@@ -307,13 +370,13 @@
308371 * which will handle all the replace-escaping handling correct
309372 */
310373 $regEx = '';
311 - foreach( self::$lastMatches as $match ) {
 374+ foreach( $lastMatches as $match ) {
312375 $regEx .= '(' . preg_quote( $match, '/' ) . ')';
313376 }
314377 $regEx = "/^{$regEx}$/";
315 - $output = preg_replace( $regEx, $index, implode( '', self::$lastMatches ) );
 378+ $output = preg_replace( $regEx, $index, implode( '', $lastMatches ) );
316379
317 - return array( $output, 'noparse' => false, 'isHTML' => false );
 380+ return $output;
318381 }
319382 }
320383 /**
@@ -336,6 +399,7 @@
337400 * @param $parser Parser instance of running Parser
338401 * @param $str String input string to change
339402 * @param $delimiter String delimiter which also will be escaped within $str (default is set to '/')
 403+ *
340404 * @return String Returns the quoted string
341405 */
342406 public static function regexquote( &$parser, $str = null, $delimiter = '/' ) {
@@ -365,27 +429,74 @@
366430 }
367431
368432
369 - /**
370 - * DEPRECATED: Functionality is included in 'regexquote' now for the first character
371 - * in case its a trouble-maker.
 433+ /*********************************
 434+ **** HELPER - For Store of ****
 435+ **** regex_var within Parser ****
 436+ *********************************
 437+ ****
 438+ **
 439+ * Adding the info to each Parser object makes it invulnerable to new Parser objects being created
 440+ * and destroyed throughout main parsing process. Only the one parser, 'ParserClearState' is called
 441+ * on will losse its data since the parsing process has been declared finished and the data won't be
 442+ * needed anymore.
 443+ **
 444+ ***/
 445+
 446+ protected static function initLastRegex( Parser &$parser, $pattern, $subject ) {
 447+ self::setLastMatches( $parser, array() );
 448+ self::setLastPattern( $parser, $pattern );
 449+ self::setLastSubject( $parser, $subject );
 450+ }
 451+
 452+ public static function onParserClearState( &$parser ) {
 453+ //cleanup to avoid conflicts with job queue or Special:Import
 454+ self::setLastMatches( $parser, null );
 455+ self::setLastPattern( $parser, '' );
 456+ self::setLastSubject( $parser, '' );
 457+
 458+ return true;
 459+ }
 460+
 461+ /**
 462+ * Returns the last regex matches done by #regex in the context of the same parser object.
372463 *
373 - * Converts all chars in a pattern for regex to escaped hex ascii syntax '=' => '\x3D'
374 - * This function is good for escaping characters which can cause problems in MW like
375 - * ';' as a first character of a string or '|' characters
376 - *
377 - * @param $parser ParseriInstance of running Parser
378 - * @param $str String input string to change
379 - * @return String result of the input with escaped characters in hex ascii syntax
380 - */
381 - /*
382 - public static function regexascii( &$parser, $str = null ) {
383 - if( $str === null ) return '';
 464+ * @param Parser $parser
 465+ * @return array|null
 466+ */
 467+ public static function getLastMatches( Parser &$parser ) {
 468+
 469+ if( isset( $parser->mExtRegexFun['lastMatches'] ) ) {
 470+
 471+ // last matches are set to false in case last regex was in replace mode! Get them on demand:
 472+ if( $parser->mExtRegexFun['lastMatches'] === false ) {
 473+ preg_match( self::getLastPattern( $parser ), self::getLastSubject( $parser ), $parser->mExtRegexFun['lastMatches'] );
 474+ }
 475+ return $parser->mExtRegexFun['lastMatches'];
 476+ }
 477+ return null;
 478+ }
 479+ protected static function setLastMatches( Parser &$parser, $value ) {
 480+ $parser->mExtRegexFun['lastMatches'] = $value;
 481+ }
384482
385 - $pattern = '/(.)/e';
386 - $replace = "'\\x' . dechex( ord( '$1' ) )";
 483+ public static function getLastPattern( Parser &$parser ) {
 484+ if( isset( $parser->mExtRegexFun['lastPattern'] ) ) {
 485+ return $parser->mExtRegexFun['lastPattern'];
 486+ }
 487+ return '';
 488+ }
 489+ protected static function setLastPattern( Parser &$parser, $value ) {
 490+ $parser->mExtRegexFun['lastPattern'] = $value;
 491+ }
387492
388 - $output = preg_replace( $pattern, $replace, $str );
389 - return array($output, 'noparse' => true);
390 - }
391 - */
 493+ public static function getLastSubject( Parser &$parser ) {
 494+ if( isset( $parser->mExtRegexFun['lastSubject'] ) ) {
 495+ return $parser->mExtRegexFun['lastSubject'];
 496+ }
 497+ return '';
 498+ }
 499+ protected static function setLastSubject( Parser &$parser, $value ) {
 500+ $parser->mExtRegexFun['lastSubject'] = $value;
 501+ }
 502+
392503 }
\ No newline at end of file

Follow-up revisions

RevisionCommit summaryAuthorDate
r102066r102061: Consistency tweals in preparation for adding extension to translatew...raymond23:07, 4 November 2011
r102067r102061: Adding extension to translatewiki.netraymond23:07, 4 November 2011

Status & tagging log