Index: trunk/extensions/RegexFun/RELEASE-NOTES |
— | — | @@ -1,9 +0,0 @@ |
2 | | - Changelog: |
3 | | - ========== |
4 | | - * Nov. xx, 2011 -- Version 1.0 (initial release). |
5 | | - Introduces the following parser functions defined within 'ExtRegexFun' class: |
6 | | - - #regex |
7 | | - - #regexsearch |
8 | | - - #regexall |
9 | | - - #regex_var |
10 | | - - #regexquote |
\ No newline at end of file |
Index: trunk/extensions/RegexFun/README |
— | — | @@ -1,40 +0,0 @@ |
2 | | -== About == |
3 | | - |
4 | | -''Regex Fun'' is a MediaWiki extension by Daniel Werner which adds parser functions for performing regular expression |
5 | | -searches and replacements. |
6 | | -The '#regex' parser function is inspired by RegexParserFunctions from Jim R. Wilson and mostly compatible with it. |
7 | | -RegexParserFunctions has some design flaws and missing functionality that should be filled by this extension. |
8 | | - |
9 | | -''Regex Fun'' defines the following parser functions within your wiki: |
10 | | - |
11 | | - - #regex: Search or replace with help of php preg regular expression. Returns first match in search mode. |
12 | | - Use of the 'e' modifier behind the expression will be filtered and eliminated from the pattern! |
13 | | - - #regexsearch: Same as 'regex' with the difference that in replacement mode if no match could be found, an empty |
14 | | - string '' will be returned instead of the unaltered original. |
15 | | - - #regexall: Searches the whole string for as many matches as possible and returns them separated by a separator. |
16 | | - - #regex_var: Allows to access references of the last used 'regex' or 'regexsearch' function. |
17 | | - - #regexquote: Runs php function 'preg_quote' on a string to use user-input savelly in regex functions. In case the |
18 | | - first character is a character with special meaning in MW, it will be replaced with its hexadecimal |
19 | | - notation e.g. '\x23' instead of '#'. This will prevent from things going terribly wrong when using |
20 | | - user input within a regular expression. |
21 | | - |
22 | | -* Website: http://www.mediawiki.org/wiki/Extension:Regex_Fun |
23 | | -* Author: Daniel Werner < danweetz@web.de > |
24 | | - |
25 | | - |
26 | | -== Installation == |
27 | | - |
28 | | -Once you have downloaded the code, place the 'RegexFun' directory within your |
29 | | -MediaWiki 'extensions' directory. Then add the following code to your |
30 | | -[[Manual:LocalSettings.php|LocalSettings.php]] file: |
31 | | - |
32 | | - # RegexFun |
33 | | - require_once( "$IP/extensions/RegexFun/RegexFun.php" ); |
34 | | - |
35 | | - |
36 | | -== Contributing == |
37 | | - |
38 | | -If you have bug reports or requests, please add them to the ''Regex Fun'' Talk page [0]. |
39 | | -You can also send them to Daniel Werner < danweetz@web.de > |
40 | | - |
41 | | -[0] http://www.mediawiki.org/w/index.php?title=Extension_talk:Regex_Fun |
\ No newline at end of file |
Index: trunk/extensions/RegexFun/RegexFun.i18n.magic.php |
— | — | @@ -15,7 +15,6 @@ |
16 | 16 | |
17 | 17 | $magicWords['en'] = array( |
18 | 18 | 'regex' => array( 0, 'regex' ), |
19 | | - 'regexsearch' => array( 0, 'regexsearch' ), |
20 | 19 | 'regex_var' => array( 0, 'regex_var' ), |
21 | 20 | 'regexall' => array( 0, 'regexall' ), |
22 | 21 | 'regexquote' => array( 0, 'regexquote' ), |
Index: trunk/extensions/RegexFun/RegexFun.i18n.php |
— | — | @@ -17,7 +17,7 @@ |
18 | 18 | * @author Daniel Werner |
19 | 19 | */ |
20 | 20 | $messages['en'] = array( |
21 | | - 'regexfun-desc' => 'Adds parser functions allowing the use of regular expressions within the wiki.', |
| 21 | + 'regexfun-desc' => 'Adds parser functions allowing the use of regular expressions within wiki articles.', |
22 | 22 | 'regexfun-invalid' => 'The regular expression "$1" is invalid.', |
23 | 23 | ); |
24 | 24 | |
— | — | @@ -25,6 +25,6 @@ |
26 | 26 | * @author Daniel Werner |
27 | 27 | */ |
28 | 28 | $messages['de'] = array( |
29 | | - 'regexfun-desc' => 'Fügt Parser-Funktionen hinzu um reguläre Ausdrücke im Wiki verwenden zu können.', |
| 29 | + 'regexfun-desc' => 'Fügt Parser-Funktionen hinzu um reguläre Ausdrücke auf Wiki-Seiten verwenden zu können.', |
30 | 30 | 'regexfun-invalid' => '„$1“ ist kein gültiger regulärer Ausdruck.', |
31 | 31 | ); |
\ No newline at end of file |
Index: trunk/extensions/RegexFun/RegexFun.php |
— | — | @@ -1,12 +1,13 @@ |
2 | 2 | <?php |
3 | 3 | |
4 | 4 | /** |
5 | | - * This is a MediaWiki extension which adds parser functions for performing regular |
| 5 | + * 'Regex Fun' is a MediaWiki extension which adds parser functions for performing regular |
6 | 6 | * expression searches and replacements. |
7 | 7 | * |
8 | 8 | * Info on mediawiki.org: http://www.mediawiki.org/wiki/Extension:Regex_Fun |
9 | 9 | * |
10 | | - * @version: 1.0 alpha |
| 10 | + * @version: 1.0 |
| 11 | + * @license: ISC license |
11 | 12 | * @author: Daniel Werner < danweetz@web.de > |
12 | 13 | * |
13 | 14 | * Documentation: http://www.mediawiki.org/wiki/Extension:Regex_Fun |
— | — | @@ -30,13 +31,9 @@ |
31 | 32 | 'url' => 'http://www.mediawiki.org/wiki/Extension:Regex_Fun', |
32 | 33 | ); |
33 | 34 | |
34 | | -$dir = dirname( __FILE__ ); |
| 35 | +$wgExtensionMessagesFiles['RegexFun' ] = ExtRegexFun::getDir() . '/RegexFun.i18n.php'; |
| 36 | +$wgExtensionMessagesFiles['RegexFunMagic'] = ExtRegexFun::getDir() . '/RegexFun.i18n.magic.php'; |
35 | 37 | |
36 | | -$wgExtensionMessagesFiles['RegexFun' ] = $dir . '/RegexFun.i18n.php'; |
37 | | -$wgExtensionMessagesFiles['RegexFunMagic'] = $dir . '/RegexFun.i18n.magic.php'; |
38 | | - |
39 | | -unset( $dir ); |
40 | | - |
41 | 38 | $wgHooks['ParserFirstCallInit'][] = 'ExtRegexFun::init'; |
42 | 39 | $wgHooks['ParserClearState' ][] = 'ExtRegexFun::onParserClearState'; |
43 | 40 | |
— | — | @@ -55,67 +52,131 @@ |
56 | 53 | * |
57 | 54 | * @var string |
58 | 55 | */ |
59 | | - const VERSION = '1.0 alpha'; |
| 56 | + const VERSION = '1.0'; |
60 | 57 | |
61 | | - protected static $lastMatches = null; |
62 | | - protected static $lastPattern = ''; |
63 | | - protected static $lastSubject = ''; |
64 | | - |
65 | 58 | /** |
66 | 59 | * Sets up parser functions |
67 | 60 | */ |
68 | 61 | public static function init( &$parser ) { |
69 | 62 | $parser->setFunctionHook( 'regex', array( __CLASS__, 'regex' ) ); |
70 | | - $parser->setFunctionHook( 'regexsearch', array( __CLASS__, 'regexsearch' ) ); |
71 | 63 | $parser->setFunctionHook( 'regex_var', array( __CLASS__, 'regex_var' ) ); |
72 | 64 | $parser->setFunctionHook( 'regexall', array( __CLASS__, 'regexall' ) ); |
73 | 65 | $parser->setFunctionHook( 'regexquote', array( __CLASS__, 'regexquote' ) ); |
74 | 66 | $parser->setFunctionHook( 'regexascii', array( __CLASS__, 'regexascii' ) ); |
75 | 67 | |
76 | | - return true; |
| 68 | + return true; |
77 | 69 | } |
78 | 70 | |
79 | 71 | /** |
| 72 | + * Returns the extensions base installation directory. |
| 73 | + * |
| 74 | + * @since 1.0 |
| 75 | + * |
| 76 | + * @return boolean |
| 77 | + */ |
| 78 | + public static function getDir() { |
| 79 | + static $dir = null; |
| 80 | + |
| 81 | + if( $dir === null ) { |
| 82 | + $dir = dirname( __FILE__ ); |
| 83 | + } |
| 84 | + return $dir; |
| 85 | + } |
| 86 | + |
| 87 | + |
| 88 | + const FLAG_NO_REPLACE_NO_OUT = 'r'; |
| 89 | + const FLAG_REPLACEMENT_PARSE = 'e'; // overwrites php 'e' flag |
| 90 | + |
| 91 | + /** |
| 92 | + * helper store for transmitting some values to a preg_replace_callback function |
| 93 | + * |
| 94 | + * @var array |
| 95 | + */ |
| 96 | + private static $tmpRegexCB; |
| 97 | + |
| 98 | + /** |
80 | 99 | * Checks whether the given regular expression is valid or would cause an error. |
81 | | - * Also alters the pattern in case it would be a security risk |
| 100 | + * Also alters the pattern in case it would be a security risk and communicates |
| 101 | + * about special flags which have no or different meaning in PHP. These will be |
| 102 | + * removed from the original regex string but put into the &$specialFlags array. |
82 | 103 | * |
| 104 | + * @since 1.0 |
| 105 | + * |
83 | 106 | * @param &$pattern String |
| 107 | + * @param &$specialFlags array will contain all special flags the $pattern contains |
| 108 | + * |
84 | 109 | * @return Boolean |
85 | 110 | */ |
86 | | - public static function isValidRegex( &$pattern ) { |
87 | | - //return (bool)preg_match( '/^([\\/\\|%]).*\\1[imsSuUx]*$/', $pattern ); |
| 111 | + public static function validateRegex( &$pattern, &$specialFlags = array() ) { |
88 | 112 | |
89 | | - // replace all eventual 'e' pattern modifiers since it's a huge security risk! |
90 | | - $origPattern = $pattern; |
91 | | - $delimiter = preg_quote( substr( $pattern, 0, 1 ), '/' ); |
92 | | - // from last delimiter (regex end) to end (only flags), replace all 'e': |
93 | | - $pattern = preg_replace_callback( |
94 | | - '/(?<=' . $delimiter . ')[^' . $delimiter . ']*?$/i', |
95 | | - array( __CLASS__, 'validRegexHelper' ), |
96 | | - $pattern |
97 | | - ); |
| 113 | + $specialFlags = array(); |
98 | 114 | |
| 115 | + if( strlen( $pattern ) < 2 ) { |
| 116 | + return false; |
| 117 | + } |
| 118 | + |
| 119 | + $delimiter = substr( trim( $pattern ), 0, 1 ); |
| 120 | + $delimiterQuoted = preg_quote( $delimiter, '/' ); |
| 121 | + |
| 122 | + // two parts, split by the last delimiter |
| 123 | + $parts = preg_split( "/{$delimiterQuoted}(?=[^{$delimiterQuoted}]*$)/", $pattern, 2 ); |
| 124 | + |
| 125 | + $mainPart = $parts[0] . $delimiter; // delimiter to delimiter without flags |
| 126 | + $flagsPart = $parts[1]; |
| 127 | + |
| 128 | + // remove 'e' modifier from final regex since it's a huge security risk with user input! |
| 129 | + self::regexSpecialFlagsHandler( $flagsPart, self::FLAG_REPLACEMENT_PARSE, $specialFlags ); |
| 130 | + |
| 131 | + // marks #regex with replacement will output '' in case of no replacement |
| 132 | + self::regexSpecialFlagsHandler( $flagsPart, self::FLAG_NO_REPLACE_NO_OUT, $specialFlags ); |
| 133 | + |
| 134 | + // put purified regex back together: |
| 135 | + $newPattern = $mainPart . $flagsPart; |
| 136 | + |
| 137 | + if( ! self::isValidRegex( $newPattern ) ) { |
| 138 | + // no modification to $pattern done! |
| 139 | + $specialFlags = array(); |
| 140 | + return false; |
| 141 | + } |
| 142 | + $pattern = $newPattern; // remember reference! |
| 143 | + return true; |
| 144 | + } |
| 145 | + |
| 146 | + /** |
| 147 | + * Returns whether the regular expression would be a valid one or not. |
| 148 | + * |
| 149 | + * @since 1.0 |
| 150 | + * |
| 151 | + * @param $pattern string |
| 152 | + * |
| 153 | + * @return boolean |
| 154 | + */ |
| 155 | + public static function isValidRegex( $pattern ) { |
| 156 | + //return (bool)preg_match( '/^([\\/\\|%]).*\\1[imsSuUx]*$/', $pattern ); |
99 | 157 | /* |
100 | | - * this takes care of all invalid regular expression use and the php notices |
101 | | - * many regular expression extensions won't supress |
| 158 | + * Testing of the pattern in a very simple way: |
| 159 | + * This takes care of all invalid regular expression use and the ugly php notices |
| 160 | + * which some other regex extensions for MW won't handle right. |
102 | 161 | */ |
103 | 162 | wfSuppressWarnings(); // instead of using the evil @ operator! |
104 | | - $validRegex = preg_match( $pattern, ' ' ); |
| 163 | + $isValid = false !== preg_match( $pattern, ' ' ); // preg_match returns false on error |
105 | 164 | wfRestoreWarnings(); |
106 | 165 | |
107 | | - if( $validRegex === false ) { |
108 | | - // set pattern back since the whole thing is invalid anyway: |
109 | | - $pattern = $origPattern; |
110 | | - return false; |
111 | | - } |
112 | | - return true; |
113 | | - } |
| 166 | + return $isValid; |
| 167 | + } |
| 168 | + |
114 | 169 | /** |
115 | | - * only used by 'preg_replace_callback' in 'isValidRegex' |
| 170 | + * Helper function to check a string of flags for a certain flag and set it as an array key |
| 171 | + * in a special flags collecting array. |
116 | 172 | */ |
117 | | - private static function validRegexHelper( $matches ) { |
118 | | - // there is no big 'E' modifier so it won't hurt to replace it as well: |
119 | | - return preg_replace( '/[e\s]/i', '', $matches[0] ); |
| 173 | + private static function regexSpecialFlagsHandler( &$modifiers, $flag, &$specialFlags ) { |
| 174 | + $count = 0; |
| 175 | + $modifiers = preg_replace( "/{$flag}/", '', $modifiers, -1, $count ); |
| 176 | + if( $count > 0 ) { |
| 177 | + $specialFlags[ $flag ] = true; |
| 178 | + return true; |
| 179 | + } |
| 180 | + return false; |
120 | 181 | } |
121 | 182 | |
122 | 183 | /** |
— | — | @@ -124,43 +185,29 @@ |
125 | 186 | * which can be recognized by #iferror |
126 | 187 | * |
127 | 188 | * @param $pattern String the invalid regular expression |
| 189 | + * |
128 | 190 | * @return Array |
129 | 191 | */ |
130 | 192 | public static function invalidRegexParsingOutput( $pattern ) { |
131 | 193 | $msg = '<span class="error">' . wfMsgExt( 'regexfun-invalid', array( 'content' ), "<tt><nowiki>$pattern</nowiki></tt>" ). '</span>'; |
132 | | - return array( $msg, 'noparse' => false, 'isHTML' => false ); // isHTML must be false for #iferror! |
| 194 | + return array( $msg, 'noparse' => true, 'isHTML' => false ); // isHTML must be false for #iferror! |
133 | 195 | } |
134 | 196 | |
135 | | - public static function onParserClearState( &$parser ) { |
136 | | - //cleanup to avoid conflicts with job queue or Special:Import |
137 | | - self::$lastMatches = null; |
138 | | - self::$lastPattern = ''; |
139 | | - self::$lastSubject = ''; |
140 | | - |
141 | | - return true; |
142 | | - } |
143 | | - |
144 | | - protected static function initLastRegex( $pattern, $subject ) { |
145 | | - self::$lastMatches = array(); |
146 | | - self::$lastPattern = $pattern; |
147 | | - self::$lastSubject = $subject; |
148 | | - } |
149 | | - |
150 | 197 | /** |
151 | | - * also takes care of security risks in pattern which is why |
152 | | - * the pattern is given by reference! |
| 198 | + * Helper function. Validates regex and takes care of security risks in pattern which is why |
| 199 | + * the pattern is taken by reference! |
153 | 200 | */ |
154 | | - protected static function validateRegexCall( $subject, &$pattern, $resetLastRegex = false ) { |
155 | | - self::$lastMatches = null; //reset last matches for the case anything goes wrong |
156 | | - if( $subject === null || $pattern === null ) { |
157 | | - return ''; |
| 201 | + protected static function validateRegexCall( Parser &$parser, $subject, &$pattern, &$specialFlags, $resetLastRegex = false ) { |
| 202 | + if( $resetLastRegex ) { |
| 203 | + //reset last matches for the case anything goes wrong |
| 204 | + self::setLastMatches( $parser , null ); |
| 205 | + } |
| 206 | + if( ! self::validateRegex( $pattern, $specialFlags ) ) { |
| 207 | + return false; |
158 | 208 | } |
159 | | - if( ! self::isValidRegex( $pattern ) ) { |
160 | | - return self::invalidRegexParsingOutput( $pattern ); |
| 209 | + if( $resetLastRegex ) { |
| 210 | + self::initLastRegex( $parser, $pattern, $subject ); |
161 | 211 | } |
162 | | - if( $resetLastRegex ) { |
163 | | - self::initLastRegex( $pattern, $subject ); |
164 | | - } |
165 | 212 | return true; |
166 | 213 | } |
167 | 214 | |
— | — | @@ -171,86 +218,105 @@ |
172 | 219 | * @param $subject String input string to evaluate |
173 | 220 | * @param $pattern String regular expression pattern - must use /, | or % delimiter |
174 | 221 | * @param $replace String regular expression replacement |
| 222 | + * |
175 | 223 | * @return String Result of replacing pattern with replacement in string, or matching text if replacement was omitted |
176 | 224 | */ |
177 | | - public static function regex( &$parser, $subject = null, $pattern = null, $replace = null, $limit = null ) { |
| 225 | + public static function regex( Parser &$parser, $subject = '', $pattern = '', $replace = null, $limit = -1 ) { |
| 226 | + |
178 | 227 | // validate, initialise and check for wrong input: |
179 | | - $continue = self::validateRegexCall( $subject, $pattern, true ); |
180 | | - if( $continue !== true ) { |
181 | | - return $continue; |
182 | | - } |
| 228 | + $continue = self::validateRegexCall( $parser, $subject, $pattern, $specialFlags, true ); |
| 229 | + if( ! $continue ) { |
| 230 | + return self::invalidRegexParsingOutput( $pattern );; |
| 231 | + } |
183 | 232 | |
184 | 233 | if( $replace === null ) { |
185 | 234 | // search mode: |
186 | | - $output = ( preg_match( $pattern, $subject, self::$lastMatches ) ? self::$lastMatches[0] : '' ); |
| 235 | + $lastMatches = self::getLastMatches( $parser ); |
| 236 | + $output = ( preg_match( $pattern, $subject, $lastMatches ) ? $lastMatches[0] : '' ); |
| 237 | + self::setLastMatches( $parser, $lastMatches ); |
187 | 238 | } else { |
188 | | - // replace mode: |
189 | | - if( $limit === null ) { |
190 | | - $limit = -1; |
191 | | - } |
| 239 | + // replace mode: |
192 | 240 | $limit = (int)$limit; |
193 | 241 | |
194 | | - self::$lastMatches = false; |
195 | | - $output = preg_replace( $pattern, $replace, $subject, $limit ); |
196 | | - } |
197 | | - return array( $output, 'noparse' => true, 'isHTML' => false ); |
198 | | - } |
199 | | - |
200 | | - /** |
201 | | - * Same as #regex but returns nothing if the pattern doesn't match and a replacement is given. |
202 | | - */ |
203 | | - public static function regexsearch( &$parser, $subject = null, $pattern = null, $replace = null, $limit = null ) { |
204 | | - // validate, initialise and check for wrong input: |
205 | | - $continue = self::validateRegexCall( $subject, $pattern, true ); |
206 | | - if( $continue !== true ) { |
207 | | - return $continue; |
208 | | - } |
209 | | - |
210 | | - if( $replace === null ) { |
211 | | - // search mode: |
212 | | - $output = ( preg_match( $pattern, $subject, self::$lastMatches ) ? self::$lastMatches[0] : '' ); |
213 | | - } else { |
214 | | - // replace mode: |
215 | | - if( $limit === null ) { |
216 | | - $limit = -1; |
| 242 | + // set last matches to 'false' and get them on demand instead since preg_replace won't communicate them |
| 243 | + self::setLastMatches( $parser , false ); |
| 244 | + |
| 245 | + // FLAG 'e' (parse replace after match) handling: |
| 246 | + if( ! empty( $specialFlags[ self::FLAG_REPLACEMENT_PARSE ] ) ) { |
| 247 | + // if 'e' flag is set, each replacement has to be parsed after matches are inserted but before replacing! |
| 248 | + self::$tmpRegexCB = array( |
| 249 | + 'replacement' => $replace, |
| 250 | + 'parser' => &$parser, |
| 251 | + ); |
| 252 | + $output = preg_replace_callback( $pattern, array( __CLASS__, 'regex_eFlag_callback' ), $subject, $limit, $count ); |
217 | 253 | } |
218 | | - $limit = (int)$limit; |
| 254 | + else { |
| 255 | + $output = preg_replace( $pattern, $replace, $subject, $limit, $count ); |
| 256 | + } |
219 | 257 | |
220 | | - self::$lastMatches = false; |
221 | | - $output = preg_replace( $pattern, $replace, $subject, $limit, $count ); |
222 | | - if( $count < 1 ) { |
223 | | - return ''; |
| 258 | + // FLAG 'r' (no replacement - no output) handling: |
| 259 | + if( ! empty( $specialFlags[ self::FLAG_NO_REPLACE_NO_OUT ] ) ) { |
| 260 | + /* |
| 261 | + * only output replacement result if there actually was a match and therewith a replacement happened |
| 262 | + * (otherwise the input string would be returned) |
| 263 | + */ |
| 264 | + if( $count < 1 ) { |
| 265 | + return ''; |
| 266 | + } |
224 | 267 | } |
225 | 268 | } |
226 | | - return array( $output, 'noparse' => false, 'isHTML' => false ); |
| 269 | + return $output; |
227 | 270 | } |
228 | 271 | |
| 272 | + private static function regex_eFlag_callback( $matches ) { |
| 273 | + |
| 274 | + /** Don't cache this since it could contain dynamic content like #var which should be parsed */ |
| 275 | + |
| 276 | + $replace = self::$tmpRegexCB['replacement']; |
| 277 | + $parser = self::$tmpRegexCB['parser']; |
| 278 | + |
| 279 | + // last matches in #regex replace mode were set to false before, set them now: |
| 280 | + self::setLastMatches( $parser, $matches ); |
| 281 | + |
| 282 | + // use #regex_var for transforming replacement string with matches: |
| 283 | + $replace = self::regex_var( $parser, $replace ); |
| 284 | + |
| 285 | + // parse the replacement after matches are inserted |
| 286 | + // use a new frame, no need for SFH_OBJECT_ARGS style parser functions |
| 287 | + $frame = $parser->getPreprocessor()->newCustomFrame( $parser ); |
| 288 | + $replace = $parser->preprocessToDom( $replace ); |
| 289 | + $replace = trim( $frame->expand( $replace ) ); |
| 290 | + |
| 291 | + return $replace; |
| 292 | + } |
| 293 | + |
229 | 294 | /** |
230 | | - * Performs regular expression search and returns ALL matches separated |
| 295 | + * Performs regular expression searches and returns ALL matches separated |
231 | 296 | * |
232 | 297 | * @param $parser Parser instance of running Parser |
233 | 298 | * @param $subject String input string to evaluate |
234 | 299 | * @param $pattern String regular expression pattern - must use /, | or % as delimiter |
235 | 300 | * @param $separator String to separate all the matches |
236 | 301 | * @param $offset Integer first match to print out. Negative values possible: -1 means last match. |
237 | | - * @param $length Integer maximum matches for print out |
| 302 | + * @param $limit Integer maximum matches for print out |
| 303 | + * |
238 | 304 | * @return String result of all matching text parts separated by a string |
239 | 305 | */ |
240 | | - public static function regexall( &$parser , $subject = null , $pattern = null , $separator = ', ' , $offset = 0 , $length = null ) { |
| 306 | + public static function regexall( &$parser , $subject = '' , $pattern = '' , $separator = ', ' , $offset = 0 , $limit = null ) { |
241 | 307 | // validate and check for wrong input: |
242 | | - $continue = self::validateRegexCall( $subject, $pattern, false ); |
243 | | - if( $continue !== true ) { |
244 | | - return $continue; |
| 308 | + $continue = self::validateRegexCall( $parser, $subject, $pattern, $specialFlags, false ); |
| 309 | + if( ! $continue ) { |
| 310 | + return self::invalidRegexParsingOutput( $pattern );; |
245 | 311 | } |
246 | 312 | // adjust default values: |
247 | 313 | $offset = (int)$offset; |
248 | | - if( $length !== null ) { |
249 | | - $length = (int)$length; |
| 314 | + if( $limit !== null ) { |
| 315 | + $limit = (int)$limit; |
250 | 316 | } |
251 | 317 | |
252 | 318 | if( preg_match_all( $pattern, $subject, $matches, PREG_SET_ORDER ) ) { |
253 | 319 | |
254 | | - $matches = array_slice( $matches, $offset, $length ); |
| 320 | + $matches = array_slice( $matches, $offset, $limit ); |
255 | 321 | $output = ''; //$end = ($end or ($end >= count($matches)) ? $end : count($matches) ); |
256 | 322 | |
257 | 323 | for( $count = 0; $count < count( $matches ); $count++ ) { |
— | — | @@ -259,7 +325,7 @@ |
260 | 326 | } |
261 | 327 | $output .= trim( $matches[ $count ][0] ); |
262 | 328 | } |
263 | | - return array( $output, 'noparse' => false, 'isHTML' => false ); |
| 329 | + return $output; |
264 | 330 | } |
265 | 331 | return ''; |
266 | 332 | } |
— | — | @@ -272,27 +338,24 @@ |
273 | 339 | * @param $defaultVal Integer default value which will be returned when the result with the given index doesn't exist or is a void string |
274 | 340 | */ |
275 | 341 | public static function regex_var( &$parser, $index = 0, $defaultVal = '' ) { |
276 | | - if( self::$lastMatches === null ) { // last regex was invalid or none executed yet |
| 342 | + // get matches from last #regex |
| 343 | + $lastMatches = self::getLastMatches( $parser ); |
| 344 | + |
| 345 | + if( $lastMatches === null ) { // last regex was invalid or none executed yet |
277 | 346 | return ''; |
278 | 347 | } |
279 | | - // last matches are set to false in case last regex was in replace mode! |
280 | | - if( self::$lastMatches === false ) { |
281 | | - // execute last regular expression again, but this time not as replace |
282 | | - preg_match( self::$lastPattern, self::$lastSubject, self::$lastMatches ); |
283 | | - } |
284 | | - |
| 348 | + |
285 | 349 | // if requested index is numerical: |
286 | 350 | if (preg_match( '/^\d+$/', $index ) ) { |
287 | 351 | // if requested index is in matches and isn't '': |
288 | | - if( array_key_exists( $index, self::$lastMatches ) && self::$lastMatches[$index] !== '' ) |
289 | | - return self::$lastMatches[ $index ]; |
| 352 | + if( array_key_exists( $index, $lastMatches ) && $lastMatches[$index] !== '' ) |
| 353 | + return $lastMatches[ $index ]; |
290 | 354 | else { |
291 | 355 | // no match! Return just the default value: |
292 | 356 | return $defaultVal; |
293 | 357 | } |
294 | 358 | } else { |
295 | | - // complex string is given, something like "$1, $2 and $3": |
296 | | - |
| 359 | + // complex string is given, something like "$1, $2 and $3": |
297 | 360 | /* |
298 | 361 | * replace all back-references with their number increased by 1! |
299 | 362 | * this way we can also handle $0 in the right way! |
— | — | @@ -307,13 +370,13 @@ |
308 | 371 | * which will handle all the replace-escaping handling correct |
309 | 372 | */ |
310 | 373 | $regEx = ''; |
311 | | - foreach( self::$lastMatches as $match ) { |
| 374 | + foreach( $lastMatches as $match ) { |
312 | 375 | $regEx .= '(' . preg_quote( $match, '/' ) . ')'; |
313 | 376 | } |
314 | 377 | $regEx = "/^{$regEx}$/"; |
315 | | - $output = preg_replace( $regEx, $index, implode( '', self::$lastMatches ) ); |
| 378 | + $output = preg_replace( $regEx, $index, implode( '', $lastMatches ) ); |
316 | 379 | |
317 | | - return array( $output, 'noparse' => false, 'isHTML' => false ); |
| 380 | + return $output; |
318 | 381 | } |
319 | 382 | } |
320 | 383 | /** |
— | — | @@ -336,6 +399,7 @@ |
337 | 400 | * @param $parser Parser instance of running Parser |
338 | 401 | * @param $str String input string to change |
339 | 402 | * @param $delimiter String delimiter which also will be escaped within $str (default is set to '/') |
| 403 | + * |
340 | 404 | * @return String Returns the quoted string |
341 | 405 | */ |
342 | 406 | public static function regexquote( &$parser, $str = null, $delimiter = '/' ) { |
— | — | @@ -365,27 +429,74 @@ |
366 | 430 | } |
367 | 431 | |
368 | 432 | |
369 | | - /** |
370 | | - * DEPRECATED: Functionality is included in 'regexquote' now for the first character |
371 | | - * in case its a trouble-maker. |
| 433 | + /********************************* |
| 434 | + **** HELPER - For Store of **** |
| 435 | + **** regex_var within Parser **** |
| 436 | + ********************************* |
| 437 | + **** |
| 438 | + ** |
| 439 | + * Adding the info to each Parser object makes it invulnerable to new Parser objects being created |
| 440 | + * and destroyed throughout main parsing process. Only the one parser, 'ParserClearState' is called |
| 441 | + * on will losse its data since the parsing process has been declared finished and the data won't be |
| 442 | + * needed anymore. |
| 443 | + ** |
| 444 | + ***/ |
| 445 | + |
| 446 | + protected static function initLastRegex( Parser &$parser, $pattern, $subject ) { |
| 447 | + self::setLastMatches( $parser, array() ); |
| 448 | + self::setLastPattern( $parser, $pattern ); |
| 449 | + self::setLastSubject( $parser, $subject ); |
| 450 | + } |
| 451 | + |
| 452 | + public static function onParserClearState( &$parser ) { |
| 453 | + //cleanup to avoid conflicts with job queue or Special:Import |
| 454 | + self::setLastMatches( $parser, null ); |
| 455 | + self::setLastPattern( $parser, '' ); |
| 456 | + self::setLastSubject( $parser, '' ); |
| 457 | + |
| 458 | + return true; |
| 459 | + } |
| 460 | + |
| 461 | + /** |
| 462 | + * Returns the last regex matches done by #regex in the context of the same parser object. |
372 | 463 | * |
373 | | - * Converts all chars in a pattern for regex to escaped hex ascii syntax '=' => '\x3D' |
374 | | - * This function is good for escaping characters which can cause problems in MW like |
375 | | - * ';' as a first character of a string or '|' characters |
376 | | - * |
377 | | - * @param $parser ParseriInstance of running Parser |
378 | | - * @param $str String input string to change |
379 | | - * @return String result of the input with escaped characters in hex ascii syntax |
380 | | - */ |
381 | | - /* |
382 | | - public static function regexascii( &$parser, $str = null ) { |
383 | | - if( $str === null ) return ''; |
| 464 | + * @param Parser $parser |
| 465 | + * @return array|null |
| 466 | + */ |
| 467 | + public static function getLastMatches( Parser &$parser ) { |
| 468 | + |
| 469 | + if( isset( $parser->mExtRegexFun['lastMatches'] ) ) { |
| 470 | + |
| 471 | + // last matches are set to false in case last regex was in replace mode! Get them on demand: |
| 472 | + if( $parser->mExtRegexFun['lastMatches'] === false ) { |
| 473 | + preg_match( self::getLastPattern( $parser ), self::getLastSubject( $parser ), $parser->mExtRegexFun['lastMatches'] ); |
| 474 | + } |
| 475 | + return $parser->mExtRegexFun['lastMatches']; |
| 476 | + } |
| 477 | + return null; |
| 478 | + } |
| 479 | + protected static function setLastMatches( Parser &$parser, $value ) { |
| 480 | + $parser->mExtRegexFun['lastMatches'] = $value; |
| 481 | + } |
384 | 482 | |
385 | | - $pattern = '/(.)/e'; |
386 | | - $replace = "'\\x' . dechex( ord( '$1' ) )"; |
| 483 | + public static function getLastPattern( Parser &$parser ) { |
| 484 | + if( isset( $parser->mExtRegexFun['lastPattern'] ) ) { |
| 485 | + return $parser->mExtRegexFun['lastPattern']; |
| 486 | + } |
| 487 | + return ''; |
| 488 | + } |
| 489 | + protected static function setLastPattern( Parser &$parser, $value ) { |
| 490 | + $parser->mExtRegexFun['lastPattern'] = $value; |
| 491 | + } |
387 | 492 | |
388 | | - $output = preg_replace( $pattern, $replace, $str ); |
389 | | - return array($output, 'noparse' => true); |
390 | | - } |
391 | | - */ |
| 493 | + public static function getLastSubject( Parser &$parser ) { |
| 494 | + if( isset( $parser->mExtRegexFun['lastSubject'] ) ) { |
| 495 | + return $parser->mExtRegexFun['lastSubject']; |
| 496 | + } |
| 497 | + return ''; |
| 498 | + } |
| 499 | + protected static function setLastSubject( Parser &$parser, $value ) { |
| 500 | + $parser->mExtRegexFun['lastSubject'] = $value; |
| 501 | + } |
| 502 | + |
392 | 503 | } |
\ No newline at end of file |