r105629 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r105628‎ | r105629 | r105630 >
Date:00:28, 9 December 2011
Author:dantman
Status:ok
Tags:
Comment:
Followup r104676, r104688:
- Update our woefully out of date doc comment for WebRequest::getPathInfo (we haven't simply been extracting a PATH_INFO for ages)
- Make PathRouter::makeWeight protected
- Add more comments to the PathRouter code
- Add two more edge case tests to the PathRouter tests.
Modified paths:
  • /trunk/phase3/includes/PathRouter.php (modified) (history)
  • /trunk/phase3/includes/WebRequest.php (modified) (history)
  • /trunk/phase3/tests/phpunit/includes/PathRouterTest.php (modified) (history)

Diff [purge]

Index: trunk/phase3/tests/phpunit/includes/PathRouterTest.php
@@ -202,4 +202,25 @@
203203 $this->assertEquals( $matches, array( 'title' => "Lorem_ipsum_dolor_sit_amet,_consectetur_adipisicing_elit,_sed_do_eiusmod_tempor_incididunt_ut_labore_et_dolore_magna_aliqua._Ut_enim_ad_minim_veniam,_quis_nostrud_exercitation_ullamco_laboris_nisi_ut_aliquip_ex_ea_commodo_consequat._Duis_aute_irure_dolor_in_reprehenderit_in_voluptate_velit_esse_cillum_dolore_eu_fugiat_nulla_pariatur._Excepteur_sint_occaecat_cupidatat_non_proident,_sunt_in_culpa_qui_officia_deserunt_mollit_anim_id_est_laborum." ) );
204204 }
205205
 206+
 207+ /**
 208+ * Ensure that the php passed site of parameter values are not urldecoded
 209+ */
 210+ public function testPatternUrlencoding() {
 211+ $router = new PathRouter;
 212+ $router->add( "/wiki/$1", array( 'title' => '%20:$1' ) );
 213+ $matches = $router->parse( "/wiki/Foo" );
 214+ $this->assertEquals( $matches, array( 'title' => '%20:Foo' ) );
 215+ }
 216+
 217+ /**
 218+ * Ensure that raw parameter values do not have any variable replacements or urldecoding
 219+ */
 220+ public function testRawParamValue() {
 221+ $router = new PathRouter;
 222+ $router->add( "/wiki/$1", array( 'title' => array( 'value' => 'bar%20$1' ) ) );
 223+ $matches = $router->parse( "/wiki/Foo" );
 224+ $this->assertEquals( $matches, array( 'title' => 'bar%20$1' ) );
 225+ }
 226+
206227 }
Index: trunk/phase3/includes/WebRequest.php
@@ -62,15 +62,19 @@
6363 }
6464
6565 /**
66 - * Extract the PATH_INFO variable even when it isn't a reasonable
67 - * value. On some large webhosts, PATH_INFO includes the script
68 - * path as well as everything after it.
 66+ * Extract relevant query arguments from the http request uri's path
 67+ * to be merged with the normal php provided query arguments.
 68+ * Tries to use the REQUEST_URI data if available and parses it
 69+ * according to the wiki's configuration looking for any known pattern.
6970 *
 71+ * If the REQUEST_URI is not provided we'll fall back on the PATH_INFO
 72+ * provided by the server if any and use that to set a 'title' parameter.
 73+ *
7074 * @param $want string: If this is not 'all', then the function
7175 * will return an empty array if it determines that the URL is
7276 * inside a rewrite path.
7377 *
74 - * @return Array: 'title' key is the title of the article.
 78+ * @return Array: Any query arguments found in path matches.
7579 */
7680 static public function getPathInfo( $want = 'all' ) {
7781 // PATH_INFO is mangled due to http://bugs.php.net/bug.php?id=31892
@@ -120,13 +124,6 @@
121125
122126 global $wgVariantArticlePath, $wgContLang;
123127 if( $wgVariantArticlePath ) {
124 - /*$variantPaths = array();
125 - foreach( $wgContLang->getVariants() as $variant ) {
126 - $variantPaths[$variant] =
127 - str_replace( '$2', $variant, $wgVariantArticlePath );
128 - }
129 - $router->add( $variantPaths, array( 'parameter' => 'variant' ) );*/
130 - // Maybe actually this?
131128 $router->add( $wgVariantArticlePath,
132129 array( 'variant' => '$2'),
133130 array( '$2' => $wgContLang->getVariants() )
Index: trunk/phase3/includes/PathRouter.php
@@ -50,7 +50,15 @@
5151 * @author Daniel Friesen
5252 */
5353 class PathRouter {
 54+
 55+ /**
 56+ * Protected helper to do the actual bulk work of adding a single pattern.
 57+ * This is in a separate method so that add() can handle the difference between
 58+ * a single string $path and an array() $path that contains multiple path
 59+ * patterns each with an associated $key to pass on.
 60+ */
5461 protected function doAdd( $path, $params, $options, $key = null ) {
 62+ // Make sure all paths start with a /
5563 if ( $path[0] !== '/' ) {
5664 $path = '/' . $path;
5765 }
@@ -65,13 +73,18 @@
6674 }
6775 }
6876
 77+ // If 'title' is not specified and our path pattern contains a $1
 78+ // Add a default 'title' => '$1' rule to the parameters.
6979 if ( !isset( $params['title'] ) && strpos( $path, '$1' ) !== false ) {
7080 $params['title'] = '$1';
7181 }
 82+ // If the user explicitly marked 'title' as false then omit it from the matches
7283 if ( isset( $params['title'] ) && $params['title'] === false ) {
7384 unset( $params['title'] );
7485 }
7586
 87+ // Loop over our parameters and convert basic key => string
 88+ // patterns into fully descriptive array form
7689 foreach ( $params as $paramName => $paramData ) {
7790 if ( is_string( $paramData ) ) {
7891 if ( preg_match( '/\$(\d+|key)/u', $paramData ) ) {
@@ -87,6 +100,8 @@
88101 }
89102 }
90103
 104+ // Loop over our options and convert any single value $# restrictions
 105+ // into an array so we only have to do in_array tests.
91106 foreach ( $options as $optionName => $optionData ) {
92107 if ( preg_match( '/^\$\d+$/u', $optionName ) ) {
93108 if ( !is_array( $optionData ) ) {
@@ -131,6 +146,10 @@
132147 $this->add( $path, $params, $options );
133148 }
134149
 150+ /**
 151+ * Protected helper to re-sort our patterns so that the most specific
 152+ * (most heavily weighted) patterns are at the start of the array.
 153+ */
135154 protected function sortByWeight() {
136155 $weights = array();
137156 foreach( $this->patterns as $key => $pattern ) {
@@ -139,7 +158,7 @@
140159 array_multisort( $weights, SORT_DESC, SORT_NUMERIC, $this->patterns );
141160 }
142161
143 - public static function makeWeight( $pattern ) {
 162+ protected static function makeWeight( $pattern ) {
144163 # Start with a weight of 0
145164 $weight = 0;
146165
@@ -180,6 +199,8 @@
181200 * @return Array The array of matches for the path
182201 */
183202 public function parse( $path ) {
 203+ // Make sure our patterns are sorted by weight so the most specific
 204+ // matches are tested first
184205 $this->sortByWeight();
185206
186207 $matches = null;
@@ -191,41 +212,58 @@
192213 }
193214 }
194215
 216+ // We know the difference between null (no matches) and
 217+ // array() (a match with no data) but our WebRequest caller
 218+ // expects array() even when we have no matches so return
 219+ // a array() when we have null
195220 return is_null( $matches ) ? array() : $matches;
196221 }
197222
198223 protected static function extractTitle( $path, $pattern ) {
 224+ // Convert the path pattern into a regexp we can match with
199225 $regexp = preg_quote( $pattern->path, '#' );
 226+ // .* for the $1
200227 $regexp = preg_replace( '#\\\\\$1#u', '(?P<par1>.*)', $regexp );
 228+ // .+ for the rest of the parameter numbers
201229 $regexp = preg_replace( '#\\\\\$(\d+)#u', '(?P<par$1>.+?)', $regexp );
202230 $regexp = "#^{$regexp}$#";
203231
204232 $matches = array();
205233 $data = array();
206234
 235+ // Try to match the path we were asked to parse with our regexp
207236 if ( preg_match( $regexp, $path, $m ) ) {
 237+ // Ensure that any $# restriction we have set in our {$option}s
 238+ // matches properly here.
208239 foreach ( $pattern->options as $key => $option ) {
209240 if ( preg_match( '/^\$\d+$/u', $key ) ) {
210241 $n = intval( substr( $key, 1 ) );
211242 $value = rawurldecode( $m["par{$n}"] );
212243 if ( !in_array( $value, $option ) ) {
 244+ // If any restriction does not match return null
 245+ // to signify that this rule did not match.
213246 return null;
214247 }
215248 }
216249 }
217250
 251+ // Give our $data array a copy of every $# that was matched
218252 foreach ( $m as $matchKey => $matchValue ) {
219253 if ( preg_match( '/^par\d+$/u', $matchKey ) ) {
220254 $n = intval( substr( $matchKey, 3 ) );
221255 $data['$'.$n] = rawurldecode( $matchValue );
222256 }
223257 }
 258+ // If present give our $data array a $key as well
224259 if ( isset( $pattern->key ) ) {
225260 $data['$key'] = $pattern->key;
226261 }
227262
 263+ // Go through our parameters for this match and add data to our matches and data arrays
228264 foreach ( $pattern->params as $paramName => $paramData ) {
229265 $value = null;
 266+ // Differentiate data: from normal parameters and keep the correct
 267+ // array key around (ie: foo for data:foo)
230268 if ( preg_match( '/^data:/u', $paramName ) ) {
231269 $isData = true;
232270 $key = substr( $paramName, 5 );
@@ -235,15 +273,19 @@
236274 }
237275
238276 if ( isset( $paramData['value'] ) ) {
 277+ // For basic values just set the raw data as the value
239278 $value = $paramData['value'];
240279 } elseif ( isset( $paramData['pattern'] ) ) {
 280+ // For patterns we have to make value replacements on the string
241281 $value = $paramData['pattern'];
 282+ // For each $# match replace any $# within the value
242283 foreach ( $m as $matchKey => $matchValue ) {
243284 if ( preg_match( '/^par\d+$/u', $matchKey ) ) {
244285 $n = intval( substr( $matchKey, 3 ) );
245286 $value = str_replace( '$' . $n, rawurldecode( $matchValue ), $value );
246287 }
247288 }
 289+ // If a key was set replace any $key within the value
248290 if ( isset( $pattern->key ) ) {
249291 $value = str_replace( '$key', $pattern->key, $value );
250292 }
@@ -254,6 +296,7 @@
255297 }
256298 }
257299
 300+ // Send things that start with data: to $data, the rest to $matches
258301 if ( $isData ) {
259302 $data[$key] = $value;
260303 } else {
@@ -261,12 +304,15 @@
262305 }
263306 }
264307
 308+ // If this match includes a callback, execute it
265309 if ( isset( $pattern->options['callback'] ) ) {
266310 call_user_func_array( $pattern->options['callback'], array( &$matches, $data ) );
267311 }
268312 } else {
 313+ // Our regexp didn't match, return null to signify no match.
269314 return null;
270315 }
 316+ // Fall through, everything went ok, return our matches array
271317 return $matches;
272318 }
273319

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r104676Recommit PathRouter code from r104274, r104284, r104285 without the WebReques...dantman13:37, 30 November 2011
r104688Followup r104274, r104676. Fix the bug that broke fr. Forgot to rawurldecode ...dantman15:09, 30 November 2011

Status & tagging log