r104688 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r104687‎ | r104688 | r104689 >
Date:15:09, 30 November 2011
Author:dantman
Status:ok (Comments)
Tags:
Comment:
Followup r104274, r104676. Fix the bug that broke fr. Forgot to rawurldecode path contents.
Also add /u just for sanity sake.
Add new tests for url encoding, unicode, and length edge cases.
Modified paths:
  • /trunk/phase3/includes/PathRouter.php (modified) (history)
  • /trunk/phase3/tests/phpunit/includes/PathRouterTest.php (modified) (history)

Diff [purge]

Index: trunk/phase3/tests/phpunit/includes/PathRouterTest.php
@@ -5,13 +5,17 @@
66
77 class PathRouterTest extends MediaWikiTestCase {
88
 9+ public function setUp() {
 10+ $router = new PathRouter;
 11+ $router->add("/wiki/$1");
 12+ $this->basicRouter = $router;
 13+ }
 14+
915 /**
1016 * Test basic path parsing
1117 */
1218 public function testBasic() {
13 - $router = new PathRouter;
14 - $router->add("/$1");
15 - $matches = $router->parse( "/Foo" );
 19+ $matches = $this->basicRouter->parse( "/wiki/Foo" );
1620 $this->assertEquals( $matches, array( 'title' => "Foo" ) );
1721 }
1822
@@ -111,7 +115,7 @@
112116 array( 'a' => 'b', 'data:foo' => 'bar' ),
113117 array( 'callback' => array( $this, 'callbackForTest' ) )
114118 );
115 - $matches = $router->parse( '/Foo');
 119+ $matches = $router->parse( '/Foo' );
116120 $this->assertEquals( $matches, array(
117121 'title' => "Foo",
118122 'x' => 'Foo',
@@ -152,4 +156,50 @@
153157 }
154158 }
155159
 160+ /**
 161+ * Make sure the router handles titles like Special:Recentchanges correctly
 162+ */
 163+ public function testSpecial() {
 164+ $matches = $this->basicRouter->parse( "/wiki/Special:Recentchanges" );
 165+ $this->assertEquals( $matches, array( 'title' => "Special:Recentchanges" ) );
 166+ }
 167+
 168+ /**
 169+ * Make sure the router decodes urlencoding properly
 170+ */
 171+ public function testUrlencoding() {
 172+ $matches = $this->basicRouter->parse( "/wiki/Title_With%20Space" );
 173+ $this->assertEquals( $matches, array( 'title' => "Title_With Space" ) );
 174+ }
 175+
 176+ /**
 177+ * Make sure the router handles characters like +&() properly
 178+ */
 179+ public function testCharacters() {
 180+ $matches = $this->basicRouter->parse( "/wiki/Plus+And&Stuff()" );
 181+ $this->assertEquals( $matches, array( 'title' => "Plus+And&Stuff()" ) );
 182+ }
 183+
 184+ /**
 185+ * Make sure the router handles unicode characters correctly
 186+ * @depends testSpecial
 187+ * @depends testUrlencoding
 188+ * @depends testCharacters
 189+ */
 190+ public function testUnicode() {
 191+ $matches = $this->basicRouter->parse( "/wiki/Spécial:Modifications_récentes" );
 192+ $this->assertEquals( $matches, array( 'title' => "Spécial:Modifications_récentes" ) );
 193+
 194+ $matches = $this->basicRouter->parse( "/wiki/Sp%C3%A9cial:Modifications_r%C3%A9centes" );
 195+ $this->assertEquals( $matches, array( 'title' => "Spécial:Modifications_récentes" ) );
 196+ }
 197+
 198+ /**
 199+ * Ensure the router doesn't choke on long paths.
 200+ */
 201+ public function testLength() {
 202+ $matches = $this->basicRouter->parse( "/wiki/Lorem_ipsum_dolor_sit_amet,_consectetur_adipisicing_elit,_sed_do_eiusmod_tempor_incididunt_ut_labore_et_dolore_magna_aliqua._Ut_enim_ad_minim_veniam,_quis_nostrud_exercitation_ullamco_laboris_nisi_ut_aliquip_ex_ea_commodo_consequat._Duis_aute_irure_dolor_in_reprehenderit_in_voluptate_velit_esse_cillum_dolore_eu_fugiat_nulla_pariatur._Excepteur_sint_occaecat_cupidatat_non_proident,_sunt_in_culpa_qui_officia_deserunt_mollit_anim_id_est_laborum." );
 203+ $this->assertEquals( $matches, array( 'title' => "Lorem_ipsum_dolor_sit_amet,_consectetur_adipisicing_elit,_sed_do_eiusmod_tempor_incididunt_ut_labore_et_dolore_magna_aliqua._Ut_enim_ad_minim_veniam,_quis_nostrud_exercitation_ullamco_laboris_nisi_ut_aliquip_ex_ea_commodo_consequat._Duis_aute_irure_dolor_in_reprehenderit_in_voluptate_velit_esse_cillum_dolore_eu_fugiat_nulla_pariatur._Excepteur_sint_occaecat_cupidatat_non_proident,_sunt_in_culpa_qui_officia_deserunt_mollit_anim_id_est_laborum." ) );
 204+ }
 205+
156206 }
Index: trunk/phase3/includes/PathRouter.php
@@ -73,7 +73,7 @@
7474
7575 foreach ( $params as $paramName => $paramData ) {
7676 if ( is_string( $paramData ) ) {
77 - if ( preg_match( '/\$(\d+|key)/', $paramData ) ) {
 77+ if ( preg_match( '/\$(\d+|key)/u', $paramData ) ) {
7878 $paramArrKey = 'pattern';
7979 } else {
8080 // If there's no replacement use a value instead
@@ -87,7 +87,7 @@
8888 }
8989
9090 foreach ( $options as $optionName => $optionData ) {
91 - if ( preg_match( '/^\$\d+$/', $optionName ) ) {
 91+ if ( preg_match( '/^\$\d+$/u', $optionName ) ) {
9292 if ( !is_array( $optionData ) ) {
9393 $options[$optionName] = array( $optionData );
9494 }
@@ -147,10 +147,10 @@
148148
149149 # For each level of the path
150150 foreach( $path as $piece ) {
151 - if ( preg_match( '/^\$(\d+|key)$/', $piece ) ) {
 151+ if ( preg_match( '/^\$(\d+|key)$/u', $piece ) ) {
152152 # For a piece that is only a $1 variable add 1 points of weight
153153 $weight += 1;
154 - } elseif ( preg_match( '/\$(\d+|key)/', $piece ) ) {
 154+ } elseif ( preg_match( '/\$(\d+|key)/u', $piece ) ) {
155155 # For a piece that simply contains a $1 variable add 2 points of weight
156156 $weight += 2;
157157 } else {
@@ -160,7 +160,7 @@
161161 }
162162
163163 foreach ( $pattern->options as $key => $option ) {
164 - if ( preg_match( '/^\$\d+$/', $key ) ) {
 164+ if ( preg_match( '/^\$\d+$/u', $key ) ) {
165165 # Add 0.5 for restrictions to values
166166 # This way given two separate "/$2/$1" patterns the
167167 # one with a limited set of $2 values will dominate
@@ -195,8 +195,8 @@
196196
197197 protected static function extractTitle( $path, $pattern ) {
198198 $regexp = preg_quote( $pattern->path, '#' );
199 - $regexp = preg_replace( '#\\\\\$1#', '(?P<par1>.*)', $regexp );
200 - $regexp = preg_replace( '#\\\\\$(\d+)#', '(?P<par$1>.+?)', $regexp );
 199+ $regexp = preg_replace( '#\\\\\$1#u', '(?P<par1>.*)', $regexp );
 200+ $regexp = preg_replace( '#\\\\\$(\d+)#u', '(?P<par$1>.+?)', $regexp );
201201 $regexp = "#^{$regexp}$#";
202202
203203 $matches = array();
@@ -204,9 +204,9 @@
205205
206206 if ( preg_match( $regexp, $path, $m ) ) {
207207 foreach ( $pattern->options as $key => $option ) {
208 - if ( preg_match( '/^\$\d+$/', $key ) ) {
 208+ if ( preg_match( '/^\$\d+$/u', $key ) ) {
209209 $n = intval( substr( $key, 1 ) );
210 - $value = $m["par{$n}"];
 210+ $value = rawurldecode( $m["par{$n}"] );
211211 if ( !in_array( $value, $option ) ) {
212212 return null;
213213 }
@@ -214,9 +214,9 @@
215215 }
216216
217217 foreach ( $m as $matchKey => $matchValue ) {
218 - if ( preg_match( '/^par\d+$/', $matchKey ) ) {
 218+ if ( preg_match( '/^par\d+$/u', $matchKey ) ) {
219219 $n = intval( substr( $matchKey, 3 ) );
220 - $data['$'.$n] = $matchValue;
 220+ $data['$'.$n] = rawurldecode( $matchValue );
221221 }
222222 }
223223 if ( isset( $pattern->key ) ) {
@@ -225,7 +225,7 @@
226226
227227 foreach ( $pattern->params as $paramName => $paramData ) {
228228 $value = null;
229 - if ( preg_match( '/^data:/', $paramName ) ) {
 229+ if ( preg_match( '/^data:/u', $paramName ) ) {
230230 $isData = true;
231231 $key = substr( $paramName, 5 );
232232 } else {
@@ -238,15 +238,15 @@
239239 } elseif ( isset( $paramData['pattern'] ) ) {
240240 $value = $paramData['pattern'];
241241 foreach ( $m as $matchKey => $matchValue ) {
242 - if ( preg_match( '/^par\d+$/', $matchKey ) ) {
 242+ if ( preg_match( '/^par\d+$/u', $matchKey ) ) {
243243 $n = intval( substr( $matchKey, 3 ) );
244 - $value = str_replace( '$' . $n, $matchValue, $value );
 244+ $value = str_replace( '$' . $n, rawurldecode( $matchValue ), $value );
245245 }
246246 }
247247 if ( isset( $pattern->key ) ) {
248248 $value = str_replace( '$key', $pattern->key, $value );
249249 }
250 - if ( preg_match( '/\$(\d+|key)/', $value ) ) {
 250+ if ( preg_match( '/\$(\d+|key)/u', $value ) ) {
251251 // Still contains $# or $key patterns after replacement
252252 // Seams like we don't have all the data, abort
253253 return null;

Follow-up revisions

RevisionCommit summaryAuthorDate
r104689Followup r104688, reintroduce the full PathRouter code now that the bug with ...dantman15:12, 30 November 2011
r105510FU r104688:...aaron02:02, 8 December 2011
r105629Followup r104676, r104688:...dantman00:28, 9 December 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r104274Implement path routing code....dantman16:29, 26 November 2011
r104676Recommit PathRouter code from r104274, r104284, r104285 without the WebReques...dantman13:37, 30 November 2011

Comments

#Comment by Aaron Schulz (talk | contribs)   02:03, 8 December 2011
$1 will match 0 or more while the rest will match 1 or more
In a pattern $1, $2, etc... will be replaced with the relevant contents'.

So $1 will be replaced with a string and $2 (and beyond) with arrays? This could probably use a bit more documentation, including protected functions.

The idea of this looks good. I'll need more time reviewing this and figuring it out. Are there are tests that can be added?

#Comment by Dantman (talk | contribs)   03:49, 8 December 2011

Oh, whoops maybe I should have had a bit more clarity with that phrase.

What I meant was $1 is a /.*/ while the others are /.+/.

#Comment by Aaron Schulz (talk | contribs)   21:51, 8 December 2011

Is WebRequest::getPathInfo() expected to at least set the 'title' key? What if we have a path rule with either a) not 'title' and no $1 or b) 'title' set to false? If that rule is matched, then the result will not have 'title' in the array.

#Comment by Dantman (talk | contribs)   22:03, 8 December 2011

Not really. You can have a plain /index.php that doesn't match anything and get back nothing. The behavior in that case is to redirect to the mainpage. (Though in the future I'd like to differentiate between something like / /index.php and /wiki/ which will go to the Main Page and something like /nothinghere that'll be sent to a real 404 page)

'title' isn't actually the only thing we rely on. There are some things like 'oldid' which can work on their own.

So this theoretically should work as well (as long as we ask WebRequest for the info early enough):

$router->add( '/permalink/$1', array( 'title' => false, 'oldid' => '$1' ) );

IMHO the only thing 'wrong' in this situation is we may want to eliminate the default 'title' if $1 is used somewhere else. ie: So array( 'oldid' => '$1' ) alone will work the same as that snippet. But that's an api enhancement, not really a bugfix or fixme.

As for a) if you're talking about $router->add( '/$2/$1', array( 'action' => '$2' ) ); unless you explicitly set 'title' to something the default 'title' => '$1' is added to the array. That's why I added the ability to explicitly set it to false.

#Comment by Aaron Schulz (talk | contribs)   22:08, 8 December 2011

The code has:

if ( !isset( $params['title'] ) && strpos( $path, '$1' ) !== false ) {

So there has to be a $1 in there first. Though $1 will get added before that usually, unless 'strict' is set.

I just want to make sure nothing is expecting 'title' to be set (even if just an empty string/false) for getPathInfo(), as the comment for that function implies that it is set.

Also, why is makeWeight() public?

#Comment by Dantman (talk | contribs)   22:19, 8 December 2011

The $1 is there since there is no value in having a 'title' => '$1' when the path doesn't contain $1.

In fact, doing so (adding a $1 when the path has no $1) would cause the entire rule to be ignored because $1 has no matched data, even though the user didn't ask for the $1 to be added to the paramters. As a result if that $1 check wasn't there $router->addStrict( '/somepath/Bar', array( 'curid' => 45875 ); would have a 'title' => '$1' added to it, and the author would be left in blind confusion as their entire rule is completely ignored by the routing code because it will never have a value for $1 to put into 'title'.

makeWeight could probably be fixed to be protected. I might have been using it outside of the class when actually implementing it to test it and make sure it was working right. Or considered using it directly in tests to make sure it always weighted things correctly.

#Comment by Aaron Schulz (talk | contribs)   22:21, 8 December 2011

I'm not the check shouldn't be there. I'm trying to get at the expected value of WebRequest::getPathInfo() and it's documentation, which definitely needs to be less misleading.

#Comment by Aaron Schulz (talk | contribs)   22:22, 8 December 2011

Should be "I'm not saying" of course.

Status & tagging log