Index: trunk/extensions/RegexFunctions/RegexFunctions.php |
— | — | @@ -16,7 +16,7 @@ |
17 | 17 | 'name' => 'RegexFunctions', |
18 | 18 | 'author' => 'Ryan Schmidt', |
19 | 19 | 'url' => 'http://www.mediawiki.org/wiki/Extension:RegexFunctions', |
20 | | - 'version' => '1.1', |
| 20 | + 'version' => '1.2', |
21 | 21 | 'description' => 'Regular Expression parser functions', |
22 | 22 | 'descriptionmsg' => 'regexfunctions-desc', |
23 | 23 | ); |
— | — | @@ -27,12 +27,14 @@ |
28 | 28 | //default globals |
29 | 29 | //how many functions are allowed in a single page? Keep this at least above 3 for usability |
30 | 30 | $wgRegexFunctionsPerPage = 10; |
31 | | -//should we allow modifiers in the functions, e.g. the /g and /i modifiers for global and case-insensitive? |
32 | | -//This does NOT enable the 'e' modifier for preg_replace, see the next variable for that |
| 31 | +//should we allow modifiers in the functions, e.g. the /i modifier for case-insensitive? |
| 32 | +//This does NOT enable the /e modifier for preg_replace, see the next variable for that |
33 | 33 | $wgRegexFunctionsAllowModifiers = true; |
34 | | -//should we allow the 'e' modifier in preg_replace? Requires AllowModifiers to be true. |
35 | | -//Don't enable this unless you trust every single editor on your wiki, as it opens up a potential XSS vector |
| 34 | +//should we allow the /e modifier in preg_replace? Requires AllowModifiers to be true. |
| 35 | +//Don't enable this unless you trust every single editor on your wiki, as it may open up potential XSS vectors |
36 | 36 | $wgRegexFunctionsAllowE = false; |
| 37 | +//should we allow internal options to be set (e.g. (?opts) or (?opts:some regex)) |
| 38 | +$wgRegexFunctionsAllowOptions = true; |
37 | 39 | //limit for rsplit and rreplace functions. -1 is unlimited |
38 | 40 | $wgRegexFunctionsLimit = -1; |
39 | 41 | //array of functions to disable, aka these functions cannot be used :) |
— | — | @@ -59,7 +61,9 @@ |
60 | 62 | |
61 | 63 | class ExtRegexFunctions { |
62 | 64 | var $num = 0; |
63 | | - |
| 65 | + var $modifiers = array('i', 'm', 's', 'x', 'A', 'D', 'S', 'U', 'X', 'J', 'u', 'e'); |
| 66 | + var $options = array('i', 'm', 's', 'x', 'U', 'X', 'J'); |
| 67 | + |
64 | 68 | function rmatch ( &$parser, $string = '', &$pattern = '', &$return = '', $notfound = '', $offset = 0 ) { |
65 | 69 | global $wgRegexFunctionsPerPage, $wgRegexFunctionsAllowModifiers, $wgRegexFunctionsDisable; |
66 | 70 | if(in_array('rmatch', $wgRegexFunctionsDisable)) |
— | — | @@ -67,21 +71,19 @@ |
68 | 72 | $this->num++; |
69 | 73 | if($this->num > $wgRegexFunctionsPerPage) |
70 | 74 | return; |
71 | | - if(!$wgRegexFunctionsAllowModifiers) |
72 | | - $pattern = str_replace('/', '\/', $pattern); |
73 | | - $num = preg_match( $pattern, $string, $matches, PREG_OFFSET_CAPTURE, $offset ); |
| 75 | + $pattern = $this->sanitize($pattern, $wgRegexFunctionsAllowModifiers, false); |
| 76 | + $num = preg_match( $pattern, $string, $matches, PREG_OFFSET_CAPTURE, (int) $offset ); |
74 | 77 | if($num === false) |
75 | 78 | return; |
76 | 79 | if($num === 0) |
77 | 80 | return $notfound; |
78 | | - $mn = 0; |
79 | | - foreach($matches as $match) { |
80 | | - if($mn > 9) |
81 | | - break; |
82 | | - $return = str_replace('$'.$mn, $matches[$mn][0], $return); |
83 | | - $return = str_replace('\\\\'.$mn, $matches[$mn][1], $return); |
84 | | - $mn++; |
85 | | - } |
| 81 | + //change all backslashes to $ |
| 82 | + $return = str_replace('\\', '%$', $return); |
| 83 | + $return = preg_replace('/%?\$%?\$([0-9]+)/e', 'array_key_exists($1, $matches) ? $matches[$1][1] : \'\'', $return); |
| 84 | + $return = preg_replace('/%?\$%?\$\{([0-9]+)\}/e', 'array_key_exists($1, $matches) ? $matches[$1][1] : \'\'', $return); |
| 85 | + $return = preg_replace('/%?\$([0-9]+)/e', 'array_key_exists($1, $matches) ? $matches[$1][0] : \'\'', $return); |
| 86 | + $return = preg_replace('/%?\$\{([0-9]+)\}/e', 'array_key_exists($1, $matches) ? $matches[$1][0] : \'\'', $return); |
| 87 | + $return = str_replace('%$', '\\', $return); |
86 | 88 | return $return; |
87 | 89 | } |
88 | 90 | |
— | — | @@ -92,10 +94,18 @@ |
93 | 95 | $this->num++; |
94 | 96 | if($this->num > $wgRegexFunctionsPerPage) |
95 | 97 | return; |
96 | | - if(!$wgRegexFunctionsAllowModifiers) |
97 | | - $pattern = str_replace('/', '\/', $pattern); |
| 98 | + $pattern = $this->sanitize($pattern, $wgRegexFunctionsAllowModifiers, false); |
98 | 99 | $res = preg_split( $pattern, $string, $wgRegexFunctionsLimit ); |
99 | | - return $res[$piece]; |
| 100 | + $p = (int) $piece; |
| 101 | + //allow negative pieces to work from the end of the array |
| 102 | + if($p < 0) |
| 103 | + $p = $p + count($res); |
| 104 | + //sanitation for pieces that don't exist |
| 105 | + if($p < 0) |
| 106 | + $p = 0; |
| 107 | + if($p >= count($res)) |
| 108 | + $p = count($res) - 1; |
| 109 | + return $res[$p]; |
100 | 110 | } |
101 | 111 | |
102 | 112 | function rreplace ( &$parser, $string = '', &$pattern = '', &$replace = '' ) { |
— | — | @@ -105,11 +115,44 @@ |
106 | 116 | $this->num++; |
107 | 117 | if($this->num > $wgRegexFunctionsPerPage) |
108 | 118 | return; |
109 | | - if(!$wgRegexFunctionsAllowModifiers) |
110 | | - $pattern = str_replace('/', '\/', $pattern); |
111 | | - elseif(!$wgRegexFunctionsAllowE) |
112 | | - $pattern = preg_replace('/(\/.*?)e(.*?)$/i', '$1$2', $pattern); |
| 119 | + $pattern = $this->sanitize($pattern, $wgRegexFunctionsAllowModifiers, $wgRegexFunctionsAllowE); |
113 | 120 | $res = preg_replace($pattern, $replace, $string, $wgRegexFunctionsLimit); |
114 | 121 | return $res; |
115 | 122 | } |
| 123 | + |
| 124 | + //santizes a regex pattern |
| 125 | + function sanitize($pattern, $m = false, $e = false) { |
| 126 | + if(preg_match('/^\/(.*)([^\\\\])\/(.*?)$/', $pattern, $matches)) { |
| 127 | + $pat = preg_replace('/([^\\\\])?\(\?(.*)(\:.*)?\)/Ue', '\'$1(?\' . $this->cleanupInternal(\'$2\') . \'$3)\'', $matches[1] . $matches[2]); |
| 128 | + $ret = '/' . $pat . '/'; |
| 129 | + if($m) { |
| 130 | + $mod = ''; |
| 131 | + foreach($this->modifiers as $val) { |
| 132 | + if(strpos($matches[3], $val) !== false) |
| 133 | + $mod .= $val; |
| 134 | + } |
| 135 | + if(!$e) |
| 136 | + $mod = str_replace('e', '', $mod); |
| 137 | + $ret .= $mod; |
| 138 | + } |
| 139 | + } else { |
| 140 | + $pat = preg_replace('/([^\\\\])?\(\?(.*)(\:.*)?\)/Ue', '\'$1(?\' . $this->cleanupInternal(\'$2\') . \'$3)\'', $pattern); |
| 141 | + $pat = preg_replace('!([^\\\\])/!', '$1\\/', $pat); |
| 142 | + $ret = '/' . $pat . '/'; |
| 143 | + } |
| 144 | + return $ret; |
| 145 | + } |
| 146 | + |
| 147 | + //cleans up internal options, making sure they are valid |
| 148 | + function cleanupInternal($str) { |
| 149 | + global $wgRegexFunctionsAllowOptions; |
| 150 | + $ret = ''; |
| 151 | + if(!$wgRegexFunctionsAllowOptions) |
| 152 | + return ''; |
| 153 | + foreach($this->options as $opt) { |
| 154 | + if(strpos($str, $opt) !== false) |
| 155 | + $ret .= $opt; |
| 156 | + } |
| 157 | + return $ret; |
| 158 | + } |
116 | 159 | } |