r70802 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r70801‎ | r70802 | r70803 >
Date:09:11, 10 August 2010
Author:peter17
Status:resolved (Comments)
Tags:
Comment:
Fix remarks about r70764; invalidate the cache of the distant pages when the local templates are edited
Modified paths:
  • /branches/iwtransclusion/phase3/includes/Article.php (modified) (history)
  • /branches/iwtransclusion/phase3/includes/BacklinkCache.php (modified) (history)
  • /branches/iwtransclusion/phase3/includes/DefaultSettings.php (modified) (history)
  • /branches/iwtransclusion/phase3/includes/HTMLCacheUpdate.php (modified) (history)
  • /branches/iwtransclusion/phase3/includes/Interwiki.php (modified) (history)
  • /branches/iwtransclusion/phase3/includes/Linker.php (modified) (history)

Diff [purge]

Index: branches/iwtransclusion/phase3/includes/Interwiki.php
@@ -9,6 +9,7 @@
1010 * All information is loaded on creation when called by Interwiki::fetch( $prefix ).
1111 * All work is done on slave, because this should *never* change (except during
1212 * schema updates etc, which aren't wiki-related)
 13+ * This class also contains the functions that allow interwiki templates transclusion.
1314 */
1415 class Interwiki {
1516
Index: branches/iwtransclusion/phase3/includes/Article.php
@@ -4158,6 +4158,10 @@
41594159
41604160 // Invalidate caches of articles which include this page
41614161 $wgDeferredUpdateList[] = new HTMLCacheUpdate( $title, 'templatelinks' );
 4162+
 4163+ // Invalidate caches of distant articles which transclude this page
 4164+ $wgDeferredUpdateList[] = new HTMLCacheUpdate( $title, 'globaltemplatelinks' );
 4165+ wfDoUpdates();
41624166
41634167 // Invalidate the caches of all pages which redirect here
41644168 $wgDeferredUpdateList[] = new HTMLCacheUpdate( $title, 'redirect' );
@@ -4330,7 +4334,7 @@
43314335 }
43324336
43334337 $dbr = wfGetDB( DB_SLAVE, array(), $wgGlobalDatabase );
4334 - $res = $dbr->select( array( 'globaltemplatelinks' ),
 4338+ $res = $dbr->select( 'globaltemplatelinks',
43354339 array( 'gtl_to_prefix', 'gtl_to_namespace', 'gtl_to_title' ),
43364340 array( 'gtl_from_wiki' => wfWikiID( ), 'gtl_from_page' => $id ),
43374341 __METHOD__ );
@@ -4340,10 +4344,8 @@
43414345 $result[] = Title::makeTitle( $row->gtl_to_namespace, $row->gtl_to_title, null, $row->gtl_to_prefix );
43424346 }
43434347 }
4344 -
4345 - $dbr->freeResult( $res );
43464348 }
4347 -
 4349+
43484350 return $result;
43494351 }
43504352
Index: branches/iwtransclusion/phase3/includes/Linker.php
@@ -1588,12 +1588,6 @@
15891589
15901590 $outText = '';
15911591 if ( count( $templates ) > 0 ) {
1592 - # Do a batch existence check
1593 - $batch = new LinkBatch;
1594 - foreach( $templates as $title ) {
1595 - $batch->addObj( $title );
1596 - }
1597 - $batch->execute();
15981592
15991593 # Construct the HTML
16001594 $outText = '<div class="mw-templatesUsedExplanation">';
Index: branches/iwtransclusion/phase3/includes/BacklinkCache.php
@@ -108,6 +108,27 @@
109109 }
110110
111111 /**
 112+ * Get the distant backtemplatelinks for the table globaltemplatelinks. Cached in process memory only.
 113+ * @param $table String
 114+ * @param $startId Integer or false
 115+ * @param $endId Integer or false
 116+ * @return TitleArray
 117+ */
 118+ public function getDistantTemplateLinks( ) {
 119+ global $wgGlobalDatabase, $wgLocalInterwiki;
 120+
 121+ $dbr = $dbr = wfGetDB( DB_SLAVE, array(), $wgGlobalDatabase );
 122+ $res = $dbr->select(
 123+ array( 'globaltemplatelinks' ),
 124+ array( 'gtl_from_wiki', 'gtl_from_page' ),
 125+ array( 'gtl_to_prefix' => $wgLocalInterwiki, 'gtl_to_title' => $this->title->getDBkey( ) ),
 126+ __METHOD__,
 127+ 'GROUP BY gtl_from_wiki'
 128+ );
 129+ return $res;
 130+ }
 131+
 132+ /**
112133 * Get the field name prefix for a given table
113134 */
114135 protected function getPrefix( $table ) {
@@ -117,6 +138,7 @@
118139 'categorylinks' => 'cl',
119140 'templatelinks' => 'tl',
120141 'redirect' => 'rd',
 142+ 'globaltemplatelinks' => 'gtl',
121143 );
122144
123145 if ( isset( $prefixes[$table] ) ) {
Index: branches/iwtransclusion/phase3/includes/HTMLCacheUpdate.php
@@ -46,6 +46,16 @@
4747 return;
4848 }
4949
 50+ if ( $this->mTable === 'globaltemplatelinks' ) {
 51+ global $wgEnableInterwikiTemplatesTracking;
 52+
 53+ if ( $wgEnableInterwikiTemplatesTracking ) {
 54+ $distantPageArray = $this->mCache->getDistantTemplateLinks( 'globaltemplatelinks' );
 55+ $this->invalidateDistantTitles( $distantPageArray );
 56+ }
 57+ return;
 58+ }
 59+
5060 # Get an estimate of the number of rows from the BacklinkCache
5161 $numRows = $this->mCache->getNumLinks( $this->mTable );
5262 if ( $numRows > $this->mRowsPerJob * 2 ) {
@@ -63,6 +73,7 @@
6474 $this->invalidateTitles( $titleArray );
6575 }
6676 }
 77+
6778 wfRunHooks( 'HTMLCacheUpdate::doUpdate', array($this->mTitle) );
6879 }
6980
@@ -200,7 +211,35 @@
201212 }
202213 }
203214 }
204 -
 215+
 216+ /**
 217+ * Invalidate an array of distant pages, given the wiki ID and page ID of those pages
 218+ */
 219+ protected function invalidateDistantTitles( $distantPageArray ) {
 220+ global $wgUseFileCache, $wgUseSquid, $wgLocalInterwiki;
 221+
 222+ $pagesByWiki = array();
 223+ foreach ( $distantPageArray as $row ) {
 224+ $wikiid = $row->gtl_from_wiki;
 225+ if( !isset( $pagesByWiki[$wikiid] ) ) {
 226+ $pagesByWiki[$wikiid] = array();
 227+ }
 228+ $pagesByWiki[$wikiid][] = $row->gtl_from_page;
 229+ }
 230+
 231+ foreach ( $pagesByWiki as $wikiid => $pages ) {
 232+ $dbw = wfGetDB( DB_MASTER, array( ), $wikiid );
 233+ $timestamp = $dbw->timestamp();
 234+ $batches = array_chunk( $pages, $this->mRowsPerQuery );
 235+ foreach ( $batches as $batch ) {
 236+ $dbw->update( 'page',
 237+ array( 'page_touched' => $timestamp ),
 238+ array( 'page_id IN (' . $dbw->makeList( $batch ) . ')' ),
 239+ __METHOD__
 240+ );
 241+ }
 242+ }
 243+ }
205244 }
206245
207246 /**
Index: branches/iwtransclusion/phase3/includes/DefaultSettings.php
@@ -2687,13 +2687,35 @@
26882688 $wgPreprocessorCacheThreshold = 1000;
26892689
26902690 /**
2691 - * Enable interwiki transcluding. Only when iw_trans=1.
 2691+ * Enable interwiki transcluding. Only when iw_trans=1 in the interwiki table.
 2692+ * If the interwiki prefix is associated with a wiki ID in the interwiki table,
 2693+ * then the distant templates will be retrieved in the distant DB. If there is
 2694+ * no wiki ID but a API URL for that prefix, the distant templates will be
 2695+ * retrieved using the API and cached in memcached.
26922696 */
2693 -$wgEnableScaryTranscluding = false;
 2697+$wgEnableInterwikiTranscluding = false;
26942698
26952699 /**
2696 - * Expiry time for interwiki transclusion
 2700+ * If $wgEnableInterwikiTranscluding is set to true and if an interwiki prefix
 2701+ * is associated with a wiki ID, then, this option should be set to true to
 2702+ * enable the cache invalidation of the distant pages when the local templates
 2703+ * are edited and also to display the list of the distant templates used by
 2704+ * the local pages. Enabling this requires to set up a global shared database
 2705+ * (see next option $wgGlobalDatabase).
26972706 */
 2707+$wgEnableInterwikiTemplatesTracking = false;
 2708+
 2709+/**
 2710+ * If $wgEnableInterwikiTemplatesTracking is set to true, this option should
 2711+ * contain the wiki ID of the database that hosts the globaltemplatelinks table.
 2712+ */
 2713+$wgGlobalDatabase = '';
 2714+
 2715+/**
 2716+ * If $wgEnableInterwikiTranscluding is set to true and if an interwiki
 2717+ * prefix is associated with an API URL and no wiki ID, this will be
 2718+ * the expiry time for the transcluded templates cached in memcached.
 2719+ */
26982720 $wgTranscludeCacheExpiry = 3600;
26992721
27002722 /** @} */ # end of parser settings }

Follow-up revisions

RevisionCommit summaryAuthorDate
r70803Fix remarks about r70802peter1709:30, 10 August 2010
r87109Merge r80802reedy00:46, 29 April 2011
r92994Merge r87109, merge of r70802reedy18:18, 24 July 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r70764Fix remarks about r70576; display the distant templates below the edit textareapeter1714:46, 9 August 2010

Comments

#Comment by Catrope (talk | contribs)   09:22, 10 August 2010
+		wfDoUpdates();

I'm fairly sure that's unnecessary. There's a reason the variable is called $wgDeferredUpdateList: the updates should be run when wfDoUpdates() is called at the end of the request.

+	 * @param $table String
+	 * @param $startId Integer or false
+	 * @param $endId Integer or false
+	 * @return TitleArray

This documentation is wrong: the function has no parameters and returns a ResultWrapper rather than a TitleArray.

+			'GROUP BY gtl_from_wiki'

This also looks wrong; why are you doing this?

#Comment by Peter17 (talk | contribs)   09:25, 10 August 2010

Nb. 1 & 3: were used for some tests I made. I forgot to remove them. Tiredness... really sorry... X-( Nb. 2: sure, I'll correct that

#Comment by Platonides (talk | contribs)   11:52, 10 August 2010

The wfDoUpdates(); and GROUP BY have been fixed in r70803

Instead of having an extra global $wgGlobalDatabase (which is additionally an ambiguous name) I think you should use Manual:$wgSharedTables

Your HTMLCacheUpdate won't be updating the HTMLCache. You are skipping the whole SquidUpdate pieces. You will probably have issues with Titles that can't represent DistantTitle. It may be worth making a DistantTitle class (parent/child of the Title one?) so that you don't need duplicating code for both types.

#Comment by Peter17 (talk | contribs)   12:35, 10 August 2010

> Instead of having an extra global $wgGlobalDatabase (which is additionally an ambiguous name) I think you should use Manual:$wgSharedTables

Hum... I'm not so sure... My mentor, Catrope, said "You probably shouldn't be using this mechanism. Instead, look at what CentralAuth does. IIRC they let you set $wgCentralAuthDB or some such and feed that to wfGetDB(). [...] To make it easier for other users to set up, it'd be nicer to use CentralAuth's way."

We think we could reuse this global database for a built-in GlobalUsage.

Also, I would like to create a GlobalNamespaces table to store the namespace text of the distant wikis instead of storing this in my globaltemplatelinks table...

> Your HTMLCacheUpdate won't be updating the HTMLCache...

We think that purging another wiki's HTML cache would be too hard and we are already updating page_touched (so, the pages will be regenerated when a logged user visits them)... This can still be discussed.

Catrope is looking at the squid stuff.

> You will probably have issues with Titles that can't represent DistantTitle

Can you tell me more about this (example)? Is this related with Squid or...?

Thanks in advance

#Comment by Platonides (talk | contribs)   19:49, 10 August 2010

CentralAuth is an extension. Since this is going to be in core, I think that fitting into $wgSharedTables would be appropiate. If you want to allow using shared databases, and a different database for globaltemplatelinks (a strange use case), perhaps the db name could be into an array parameter. Or extend $wgSharedDB to allow each table on a different database.

> We think that purging another wiki's HTML cache would be too hard and we are already updating page_touched (so, the pages will be regenerated when a logged user visits them)... This can still be discussed. So, it is on purpose. That would be hard for a generic use case. A farm like Wikimedia would like to purge it directly IMHO.


> Can you tell me more about this (example)? Is this related with Squid or...?

Yes. This was assuming you wanted to change that code to purge it. You would have problems to represent the titles on different wikis with Titles. Thus I suggested creating a class with an interface similar to Title, able to do that.

#Comment by Catrope (talk | contribs)   07:06, 11 August 2010

About $wgSharedTables: the use case I was worried about was not about wanting to use distinct shared databases (which is strange indeed), but about not wanting to also automatically share the user table. The default value of $wgSharedTables is array( 'user' ), so we can't detect whether the wiki admin really intended to share their user table or just wanted to enable interwiki transclusion. This would mean that a simple $wgEnable... = true setting would not be enough to set up this feature: instead, it would require the strangest of setup instructions.

If you feel this is acceptable (maybe this should be discussed more widely), it would be no problem at all to use $wgSharedTables and the associated counter-intuitive setup instructions. Alternatively, we could aim to fix this ridiculous system.

#Comment by Platonides (talk | contribs)   21:30, 11 August 2010

Under which circumstances would the globaltemplatelinks table be used?

If it is used for shared wikis, they should be sharing the user table anyway so providing it by default in $wgSharedTables seems sensible. They can override it if they want to.

#Comment by Catrope (talk | contribs)   21:49, 11 August 2010

I'm not sure that that assertion ("shared wikis wanting to use globaltemplatelinks will be sharing the user table anyway") is valid, and what I was trying to avoid is having different setup instructions depending on whether the user table is shared or not.

I think this topic should be brought up on wikitech-l for broader input.

#Comment by Catrope (talk | contribs)   07:11, 11 August 2010

(Sorry to split this up into two posts, didn't mean to.)

About the caching thing: you don't need a new class to represent interwiki titles, Title can handle them for the most part, although some parts of it used by SquidUpdate don't seem to be interwiki-aware. I don't understand that code much anyway, so I'll talk to Tim about it.

HTML cache (means caching page HTML in files in the cache/ directory) is something that isn't used on any serious wiki farms, let alone on WMF, so that's why I thought it would be acceptable to simply declare interwiki transclusion incompatible with it.

#Comment by Platonides (talk | contribs)   21:07, 11 August 2010

Oh, right. Title::getFullURL() already has a branch for interwikis. Nice.

I think that HTML cache checks the mtime with the page_touched so may even work with it.

#Comment by Catrope (talk | contribs)   21:49, 11 August 2010

Would be nice; the presence of HTMLCache purges in addition to page_touched updates makes me doubt that though.

Status & tagging log