r69723 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r69722‎ | r69723 | r69724 >
Date:09:48, 22 July 2010
Author:peter17
Status:resolved (Comments)
Tags:
Comment:
iwtransclusion code moved from Parser.php to Interwiki.php; now able to retrieve templates from DB
Modified paths:
  • /branches/iwtransclusion/phase3/includes/Interwiki.php (modified) (history)
  • /branches/iwtransclusion/phase3/includes/parser/Parser.php (modified) (history)

Diff [purge]

Index: branches/iwtransclusion/phase3/includes/Interwiki.php
@@ -170,14 +170,14 @@
171171 * @return Boolean: whether everything was there
172172 */
173173 protected static function loadFromArray( $mc ) {
174 - if( isset( $mc['iw_url'] ) && isset( $mc['iw_api'] ) && isset( $mc['iw_wikiid'] )
175 - && isset( $mc['iw_local'] ) && isset( $mc['iw_trans'] ) ) {
 174+ if( isset( $mc['iw_url'] ) && isset( $mc['iw_local'] ) && isset( $mc['iw_trans'] ) ) {
176175 $iw = new Interwiki();
177176 $iw->mURL = $mc['iw_url'];
178 - $iw->mAPI = $mc['iw_api'];
179 - $iw->mWikiID = $mc['iw_wikiid'];
180177 $iw->mLocal = $mc['iw_local'];
181178 $iw->mTrans = $mc['iw_trans'];
 179+ $iw->mAPI = isset( $mc['iw_api'] ) ? $mc['iw_api'] : '';
 180+ $iw->mWikiID = isset( $mc['iw_wikiid'] ) ? $mc['iw_wikiid'] : '';
 181+
182182 return $iw;
183183 }
184184 return false;
@@ -256,4 +256,187 @@
257257 $msg = wfMsgForContent( $key );
258258 return wfEmptyMsg( $key, $msg ) ? '' : $msg;
259259 }
 260+
 261+
 262+
 263+ /**
 264+ * Transclude an interwiki link.
 265+ * TODO: separate in interwikiTranscludeFromDB & interwikiTranscludeFromAPI according to the iw type
 266+ */
 267+ public static function interwikiTransclude( $title ) {
 268+
 269+ global $wgEnableScaryTranscluding;
 270+
 271+ if ( !$wgEnableScaryTranscluding ) {
 272+ return wfMsg('scarytranscludedisabled');
 273+ }
 274+
 275+ // If we have a wikiID, we will use it to get an access to the remote database
 276+ // if not, we will use the API URL to retrieve the data through a HTTP Get
 277+
 278+ $wikiID = $title->getTransWikiID( );
 279+ $transAPI = $title->getTransAPI( );
 280+
 281+ if ( $wikiID !== '') {
 282+
 283+ $finalText = self::fetchTemplateFromDB( $wikiID, $title->getNamespace(), $title->getDBkey());
 284+ $subTemplates = self::fetchSubTemplatesListFromDB( $wikiID, $title->getNamespace(), $title->getDBkey());
 285+
 286+ foreach ($subTemplates as $template) {
 287+ $listSubTemplates.=$template['namespace'].':'.$template['title']."\n";
 288+ $list2.="<h2>".$template['title']."</h2>\n<pre>".self::fetchTemplateFromDB( $wikiID, $template['namespace'], $template['title'])."</pre>";
 289+ }
 290+
 291+ } else if( $transAPI !== '' ) {
 292+
 293+ $url1 = $transAPI."?action=query&prop=revisions&titles=$fullTitle&rvprop=content&format=json";
 294+
 295+ if ( strlen( $url1 ) > 255 ) {
 296+ return wfMsg( 'Interwiki-transclusion-url-too-long' );
 297+ }
 298+
 299+ $text = self::fetchTemplateHTTPMaybeFromCache( $url1 );
 300+
 301+ $fullTitle = $title->getNsText().':'.$title->getText();
 302+
 303+ $url2 = $transAPI."?action=parse&text={{".$fullTitle."}}&prop=templates&format=json";
 304+
 305+ $get = Http::get( $url2 );
 306+ $myArray = FormatJson::decode($get, true);
 307+
 308+ if ( ! empty( $myArray['parse'] )) {
 309+ $templates = $myArray['parse']['templates'];
 310+ }
 311+
 312+
 313+ // TODO: The templates are retrieved one by one.
 314+ // We should split the templates in two groups: up-to-date and out-of-date
 315+ // Only the second group would be retrieved through the API or DB request
 316+ for ($i = 0 ; $i < count( $templates ) ; $i++) {
 317+ $newTitle = $templates[$i]['*'];
 318+
 319+ $url = $transAPI."?action=query&prop=revisions&titles=$newTitle&rvprop=content&format=json";
 320+
 321+ $listSubTemplates.= $newTitle."\n";
 322+ $list2.="<h2>".$newTitle."</h2>\n<pre>".self::fetchTemplateHTTPMaybeFromCache( $url )."</pre>";
 323+
 324+ }
 325+
 326+ $finalText = "$url1\n$url2\n$text";
 327+
 328+ } else {
 329+ return wfMsg( 'Interwiki-transclusion-failed' );
 330+ }
 331+
 332+ return "<h2>$fullTitle</h2><pre>$finalText</pre> List of templates: <pre>".$listSubTemplates.'</pre>' . $list2;
 333+ }
 334+
 335+ public static function fetchTemplateFromDB ( $wikiID, $namespace, $DBkey ) {
 336+
 337+ try {
 338+ $dbr = wfGetDb( DB_SLAVE, array(), $wikiID );
 339+ } catch (Exception $e) {
 340+ return wfMsg( 'Failed-to-connect-the-distant-DB' );
 341+ }
 342+
 343+ $fields = array('old_text', 'page_id');
 344+ $res = $dbr->select(
 345+ array( 'page', 'revision', 'text' ),
 346+ $fields,
 347+ array( 'rev_id=page_latest',
 348+ 'page_namespace' => $namespace,
 349+ 'page_title' => $DBkey,
 350+ 'page_id=rev_page',
 351+ 'rev_text_id=old_id'),
 352+ null,
 353+ array( 'LIMIT' => 1 )
 354+ );
 355+
 356+ $obj = $dbr->resultObject( $res );
 357+
 358+ if ( $obj ) {
 359+ $row = $obj->fetchObject();
 360+ $obj->free();
 361+
 362+ if( $row ) {
 363+ $res = new Revision( $row );
 364+ $articleID = $res->mTextRow->page_id;
 365+ $text = $articleID."\n".$res->mTextRow->old_text;
 366+ }
 367+ }
 368+
 369+ return $text;
 370+ }
 371+
 372+ public static function fetchSubTemplatesListFromDB ( $wikiID, $namespace, $DBkey ) {
 373+
 374+ try {
 375+ $dbr = wfGetDb( DB_SLAVE, array(), $wikiID );
 376+ } catch (Exception $e) {
 377+ return wfMsg( 'Failed-to-connect-the-distant-DB' );
 378+ }
 379+
 380+ $fields = array('tl_namespace', 'tl_title');
 381+ $res = $dbr->select(
 382+ array( 'page', 'templatelinks' ),
 383+ $fields,
 384+ array( 'page_namespace' => $namespace,
 385+ 'page_title' => $DBkey,
 386+ 'tl_from=page_id' )
 387+ );
 388+
 389+ $obj = $dbr->resultObject( $res );
 390+
 391+ if ( $obj ) {
 392+
 393+ $listTemplates = array();
 394+
 395+ while ($row = $obj->fetchObject() ) {
 396+ $listTemplates[] = array( 'namespace' => $row->tl_namespace, 'title' => $row->tl_title );
 397+ }
 398+ $obj->free();
 399+ }
 400+
 401+ return $listTemplates;
 402+ }
 403+
 404+ public static function fetchTemplateHTTPMaybeFromCache( $url ) {
 405+ global $wgTranscludeCacheExpiry;
 406+ $dbr = wfGetDB( DB_SLAVE );
 407+ $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
 408+ $obj = $dbr->selectRow( 'transcache', array('tc_time', 'tc_contents' ),
 409+ array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) );
 410+
 411+ if ( $obj ) {
 412+ return $obj->tc_contents;
 413+ }
 414+
 415+ $get = Http::get( $url );
 416+
 417+ $content = FormatJson::decode( $get, true );
 418+
 419+ if ( ! empty($content['query']['pages']) ) {
 420+
 421+ $page = array_pop( $content['query']['pages'] );
 422+ $text = $page['revisions'][0]['*'];
 423+
 424+ } else {
 425+
 426+ return wfMsg( 'scarytranscludefailed', $url );
 427+
 428+ }
 429+
 430+ $dbw = wfGetDB( DB_MASTER );
 431+ $dbw->replace( 'transcache', array('tc_url'), array(
 432+ 'tc_url' => $url,
 433+ 'tc_time' => $dbw->timestamp( time() ),
 434+ 'tc_contents' => $text)
 435+ );
 436+
 437+ return $text;
 438+ }
 439+
 440+
 441+
 442+
260443 }
Index: branches/iwtransclusion/phase3/includes/parser/Parser.php
@@ -3166,16 +3166,12 @@
31673167 }
31683168 } elseif ( $title->isTrans() ) {
31693169 // TODO: Work by Peter17 in progress
3170 - # Interwiki transclusion
3171 - //if ( $this->ot['html'] && !$forceRawInterwiki ) {
3172 - // $text = $this->interwikiTransclude( $title, 'render' );
3173 - // $isHTML = true;
3174 - //} else {
3175 - $text = $this->interwikiTransclude( $title );
 3170+
 3171+ $text = Interwiki::interwikiTransclude( $title );
31763172 # Preprocess it like a template
31773173 $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
31783174 $isChildObj = true;
3179 - //}
 3175+
31803176 $found = true;
31813177 }
31823178
@@ -3386,116 +3382,6 @@
33873383
33883384
33893385 /**
3390 - * Transclude an interwiki link.
3391 - * TODO: separate in interwikiTranscludeFromDB & interwikiTranscludeFromAPI according to the iw type
3392 - */
3393 - function interwikiTransclude( $title ) {
3394 -
3395 - global $wgEnableScaryTranscluding;
3396 -
3397 - if ( !$wgEnableScaryTranscluding ) {
3398 - return wfMsg('scarytranscludedisabled');
3399 - }
3400 -
3401 - $fullTitle = $title->getNsText().':'.$title->getText();
3402 -
3403 - $url1 = $title->getTransAPI( )."?action=query&prop=revisions&titles=$fullTitle&rvprop=content&format=json";
3404 -
3405 - if ( strlen( $url1 ) > 255 ) {
3406 - return wfMsg( 'scarytranscludetoolong' );
3407 - }
3408 -
3409 - $text = $this->fetchTemplateMaybeFromCache( $url1 );
3410 -
3411 - $url2 = $title->getTransAPI( )."?action=parse&text={{".$fullTitle."}}&prop=templates&format=json";
3412 -
3413 - $get = Http::get( $url2 );
3414 - $myArray = FormatJson::decode($get, true);
3415 -
3416 - if ( ! empty( $myArray['parse'] )) {
3417 - $templates = $myArray['parse']['templates'];
3418 - }
3419 -
3420 -
3421 - // TODO: The templates are retrieved one by one.
3422 - // We should split the templates in two groups: up-to-date and out-of-date
3423 - // Only the second group would be retrieved through the API or DB request
3424 - for ($i = 0 ; $i < count( $templates ) ; $i++) {
3425 - $newTitle = $templates[$i]['*'];
3426 -
3427 - $url = $title->getTransAPI( )."?action=query&prop=revisions&titles=$newTitle&rvprop=content&format=json";
3428 -
3429 - $listSubTemplates.= $newTitle."\n";
3430 - $list2.="<h2>".$newTitle."</h2>\n<pre>".$this->fetchTemplateMaybeFromCache( $url )."</pre>";
3431 -
3432 - }
3433 -
3434 - return "<h2>$fullTitle</h2><pre>$url1\n$url2\n$text</pre> List of templates: <pre>".$listSubTemplates.'</pre>' . $list2;
3435 - }
3436 -
3437 -
3438 - function fetchTemplateMaybeFromCache( $url ) {
3439 - global $wgTranscludeCacheExpiry;
3440 - $dbr = wfGetDB( DB_SLAVE );
3441 - $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
3442 - $obj = $dbr->selectRow( 'transcache', array('tc_time', 'tc_contents' ),
3443 - array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) );
3444 -
3445 - if ( $obj ) {
3446 - return $obj->tc_contents;
3447 - }
3448 -
3449 - $get = Http::get( $url );
3450 -
3451 - $content = FormatJson::decode( $get, true );
3452 -
3453 - if ( ! empty($content['query']['pages']) ) {
3454 -
3455 - $page = array_pop( $content['query']['pages'] );
3456 - $text = $page['revisions'][0]['*'];
3457 -
3458 - } else {
3459 -
3460 - return wfMsg( 'scarytranscludefailed', $url );
3461 -
3462 - }
3463 -
3464 - $dbw = wfGetDB( DB_MASTER );
3465 - $dbw->replace( 'transcache', array('tc_url'), array(
3466 - 'tc_url' => $url,
3467 - 'tc_time' => $dbw->timestamp( time() ),
3468 - 'tc_contents' => $text)
3469 - );
3470 -
3471 - return $text;
3472 - }
3473 -
3474 - function fetchScaryTemplateMaybeFromCache( $url ) {
3475 - global $wgTranscludeCacheExpiry;
3476 - $dbr = wfGetDB( DB_SLAVE );
3477 - $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
3478 - $obj = $dbr->selectRow( 'transcache', array('tc_time', 'tc_contents' ),
3479 - array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) );
3480 - if ( $obj ) {
3481 - return $obj->tc_contents;
3482 - }
3483 -
3484 - $text = Http::get( $url );
3485 - if ( !$text ) {
3486 - return wfMsg( 'scarytranscludefailed', $url );
3487 - }
3488 -
3489 - $dbw = wfGetDB( DB_MASTER );
3490 - $dbw->replace( 'transcache', array('tc_url'), array(
3491 - 'tc_url' => $url,
3492 - 'tc_time' => $dbw->timestamp( time() ),
3493 - 'tc_contents' => $text)
3494 - );
3495 - return $text;
3496 - }
3497 -
3498 -
3499 - /**
35003386 * Triple brace replacement -- used for template arguments
35013387 * @private
35023388 */

Follow-up revisions

RevisionCommit summaryAuthorDate
r69730iwtransclusion code updated according to the remarks about r69723, except abo...peter1714:14, 22 July 2010
r69745Trying to solve a possible issue in caching foreign revision (described in r6...peter1719:42, 22 July 2010
r87105Merge r69541, r69723, r69730reedy00:25, 29 April 2011
r92986Merge r87105, Which is a merge of r69541, r69723, r69730reedy17:23, 24 July 2011

Comments

#Comment by Catrope (talk | contribs)   11:04, 22 July 2010
+		global $wgEnableScaryTranscluding;
+
+		if ( !$wgEnableScaryTranscluding ) {
+			return wfMsg('scarytranscludedisabled');
+		}
[...]
+			if ( strlen( $url1 ) > 255 ) {
+				return wfMsg( 'Interwiki-transclusion-url-too-long' );
+			}
[...]
+		return "<h2>$fullTitle</h2>< pre>$finalText</ pre> List of templates: < pre>".$listSubTemplates.'</ pre>' . $list2;

Please maintain a clean separation between backend and frontend. In practical terms, this means the logic for fetching things should live in Interwiki and the logic for presenting/using them should live in Parser. Interwiki.php should contain code that fetches a template and simply returns that template; if it fails, it should return some kind of error code, not an error message in the user's language, and if it succeeds it should return the text it fetched, not some HTML.

Furthermore, I don't think you need to bother with querying templatelinks to find dependent templates as the parser will find out about them soon enough when it parses them. It may make sense to preload them and cache them in a variable in the API case, so the request can be batched.

+		try {
+			$dbr = wfGetDb( DB_SLAVE, array(), $wikiID );
+		} catch (Exception $e) {
+			return wfMsg( 'Failed-to-connect-the-distant-DB' );
+		}

I don't think this is needed: I've seen plenty of places where DB connections are obtained and just used without error checking. Quick testing shows that MediaWiki dies with an error upon connection failure, so wfGetDb() will either give you a connection or kill the process.

+		$fields = array('old_text', 'page_id');
+		$res = $dbr->select(
+			array( 'page', 'revision', 'text' ),
+			$fields,
+			array( 'rev_id=page_latest',
+			       'page_namespace' => $namespace,
+			       'page_title'     => $DBkey,
+			       'page_id=rev_page',
+			       'rev_text_id=old_id'),
+			null,
+			array( 'LIMIT' => 1 )
+			);
[...]
+			if( $row ) {
+				$res = new Revision( $row );
+				$articleID = $res->mTextRow->page_id;
+				$text = $articleID."\n".$res->mTextRow->old_text;
+			}

Instead of rolling your own query and using ->mTextRow, you should use Revision::loadFromTitle( $db, $title ) (notice the $db parameter) and use ->getText() and ->getTitle()->getArticleID() to get the text and the page ID.

You also don't need to use resultObject() and fetchObject(), because you can simply do:

$res = $dbr->select( ... ); // Select multiple rows
foreach ( $res as $row ) { do stuff }

$row = $dbr->selectRow( ... ); // Select single row
$val = $dbr->selectField( ... ); // Select value of a single field of a single row
+				$listTemplates[] = array( 'namespace' => $row->tl_namespace, 'title' => $row->tl_title );

Generally, titles are represented as Title objects rather than array( 'namespace' => ns, 'title' => title ). You can obtain a Title object from a DB row with Title::makeTitle( namespace, title ) (or Title::newFromRow( $row ) if the row contains fields from the page table, but that's not the case in this example).

The caching for API requests is still URL-based, which I don't like (I said before that I'd like it to be interwiki+title-based instead) and currently doesn't have any code to store things in the cache.

#Comment by Peter17 (talk | contribs)   12:03, 22 July 2010

> Instead of rolling your own query and using ->mTextRow, you should use Revision::loadFromTitle( $db, $title ) (notice the $db parameter) and use ->getText().

My problem is that ->getText will call ->getRawText and then ->loadText() which will retrieve the old_text from the *local* DB! I need to have the Revision instance store the $db, so that it can ->loadText( $db ) from the *distant* DB...

> The caching for API requests is still URL-based, which I don't like (I said before that I'd like it to be interwiki+title-based instead)

I will change this, but I need to change the structure of the 'transcache' table. Do you allow me to do that?

> and currently doesn't have any code to store things in the cache.

Can/should I use something like the following?

$textid = $wikiID.':'.$title->$title->getNsText().':'.$title->getText();
$key = wfMemcKey( 'iwtransclustiontext', 'textid', $textId );
$text = $wgMemc->get( $key );
#Comment by Catrope (talk | contribs)   12:16, 22 July 2010

I missed that, good point. You'll need Revision to remember which DB it came from, falling back to the local DB if the DB field wasn't stored. That should be a fairly minor tweak to the Revision class.

Rather than changing the structure of transcache, create a new table. That's much less of a hassle, and it's a pattern that has been used in the past (e.g. with restructuring the cur and old tables a long, long time ago).

Your memcached code looks good, but note that:

  • wfMemcKey() takes a variable number arguments and joins them with colons, so you don't need to do that yourself. Just call wfMemcKey( 'iwtransclusiontext', 'tetxid', $wikiID, $title->getNsText(), $title->getText() ) or something along those lines
  • Passing in the namespace number (getNamespace()) is nicer than passing in the namespace text
  • Interwiki titles always have namespace 0 and the namespace prefix in the text (so for en:Talk:Foo, getText() returns 'Talk:Foo'), so passing in the namespace is pointless anyway

Also, by "store things in the cache" I really meant the transcache table or whatever is gonna be used in its stead, not memcached: what I was trying to say is that while your code reads API requests from transcache before making an HTTP request, it doesn't write the results back to transcache. Upon re-reading the code, I see that I was wrong: it does write back to cache.

Minor nitpick:

+			'tc_time' => $dbw->timestamp( time() ),

Don't use time() in MediaWiki. If you want the current time, just call $dbw->timestamp() or wfTimestamp(TS_FORMAT_NAME) (whichever is appropriate) without the time argument, and it'll default to the current time.

#Comment by Catrope (talk | contribs)   12:21, 22 July 2010

On second thought, per the discussion on wikitech-l, using memcached is probably a better idea than using some reworked version of the transcache table. Sorry for being so confusing.

#Comment by Peter17 (talk | contribs)   13:03, 22 July 2010

I still have a problem in storing the DB field in Revision.php:

  • if I store the $db of loadFromTitle( $db, $title ), I don't store enough information to create a connexion to MASTER_DB (needed in loadText() and getTimestampFromId()) because I don't have the wikiID.
  • if I store the wikiID, I need to pass it at the beginning, so, I cannot use loadFromTitle( $db, $title ) anymore.

What I propose is to create a new function, loadFromTitleForeignWiki( $wikiID, $title ), store the wikiID in mWikiID and create the $db for loadFromTitle(). This way, I can create a slave or master connexion any time it's needed. What do you think?

About the caching, if I understand well, I forget about all the DB caching and just use memcached in the case of the API request, right?

#Comment by Catrope (talk | contribs)   13:21, 22 July 2010

Yup, that's all good.

I just found out it gets a little bit more painful than that, even. loadText() tries the revision cache in memcached keyed by textid (and implicitly by wiki ID because it uses wfMemcKey()), so you'd have to write a counterpart to wfMemcKey() that'll generate a key for a different wiki ID (wfForeignMemcKey()?) and use that. If you don't do this, it might come up with a totally different revision from the local wiki because they happen to have the same text ID.

This is getting into the territory where I wanna run this by Tim to make sure he doesn't totally hate what you're doing, but it all sounds like small tweaks for now.

Status & tagging log