r71412 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r71411‎ | r71412 | r71413 >
Date:16:41, 21 August 2010
Author:nikerabbit
Status:ok (Comments)
Tags:
Comment:
Added $wgAdaptiveMessageCache to avoid caching huge pile of never used messages at twn.
Modified paths:
  • /trunk/phase3/includes/DefaultSettings.php (modified) (history)
  • /trunk/phase3/includes/MessageCache.php (modified) (history)
  • /trunk/phase3/includes/Wiki.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/MessageCache.php
@@ -42,6 +42,26 @@
4343 /// Variable for tracking which variables are already loaded
4444 protected $mLoadedLanguages = array();
4545
 46+ /**
 47+ * Used for automatic detection of most used messages.
 48+ */
 49+ protected $mRequestedMessages = array();
 50+
 51+ /**
 52+ * How long the message request counts are stored. Longer period gives
 53+ * better sample, but also takes longer to adapt changes. The counts
 54+ * are aggregrated per day, regardless of the value of this variable.
 55+ */
 56+ protected static $mAdaptiveDataAge = 604800;
 57+
 58+ /**
 59+ * Filter the tail of less used messages that are requested more seldom
 60+ * than this factor times the number of request of most requested message.
 61+ * These messages are not loaded in the default set, but are still cached
 62+ * individually on demand with the normal cache expiry time.
 63+ */
 64+ protected static $mAdaptiveInclusionThreshold = 0.05;
 65+
4666 function __construct( $memCached, $useDB, $expiry ) {
4767 if ( !$memCached ) {
4868 $memCached = wfGetCache( CACHE_NONE );
@@ -309,12 +329,12 @@
310330 * $wgMaxMsgCacheEntrySize are assigned a special value, and are loaded
311331 * on-demand from the database later.
312332 *
313 - * @param $code Optional language code, see documenation of load().
314 - * @return Array: Loaded messages for storing in caches.
 333+ * @param $code \string Language code.
 334+ * @return \array Loaded messages for storing in caches.
315335 */
316 - function loadFromDB( $code = false ) {
 336+ function loadFromDB( $code ) {
317337 wfProfileIn( __METHOD__ );
318 - global $wgMaxMsgCacheEntrySize, $wgContLanguageCode;
 338+ global $wgMaxMsgCacheEntrySize, $wgContLanguageCode, $wgAdaptiveMessageCache;
319339 $dbr = wfGetDB( DB_SLAVE );
320340 $cache = array();
321341
@@ -324,19 +344,24 @@
325345 'page_namespace' => NS_MEDIAWIKI,
326346 );
327347
328 - if ( $code ) {
329 - # Is this fast enough. Should not matter if the filtering is done in the
330 - # database or in code.
 348+ $mostused = array();
 349+ if ( $wgAdaptiveMessageCache ) {
 350+ $mostused = $this->getMostUsedMessages();
331351 if ( $code !== $wgContLanguageCode ) {
332 - # Messages for particular language
333 - $conds[] = 'page_title' . $dbr->buildLike( $dbr->anyString(), "/$code" );
334 - } else {
335 - # Effectively disallows use of '/' character in NS_MEDIAWIKI for uses
336 - # other than language code.
337 - $conds[] = 'page_title NOT' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() );
 352+ foreach ( $mostused as $key => $value ) $mostused[$key] = "$value/$code";
338353 }
339354 }
340355
 356+ if ( count( $mostused ) ) {
 357+ $conds['page_title'] = $mostused;
 358+ } elseif ( $code !== $wgContLanguageCode ) {
 359+ $conds[] = 'page_title' . $dbr->buildLike( $dbr->anyString(), "/$code" );
 360+ } else {
 361+ # Effectively disallows use of '/' character in NS_MEDIAWIKI for uses
 362+ # other than language code.
 363+ $conds[] = 'page_title NOT' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() );
 364+ }
 365+
341366 # Conditions to fetch oversized pages to ignore them
342367 $bigConds = $conds;
343368 $bigConds[] = 'page_len > ' . intval( $wgMaxMsgCacheEntrySize );
@@ -361,6 +386,12 @@
362387 $cache[$row->page_title] = ' ' . Revision::getRevisionText( $row );
363388 }
364389
 390+ foreach ( $mostused as $key ) {
 391+ if ( !isset( $cache[$key] ) ) {
 392+ $cache[$key] = '!NONEXISTENT';
 393+ }
 394+ }
 395+
365396 $cache['VERSION'] = MSG_CACHE_VERSION;
366397 wfProfileOut( __METHOD__ );
367398 return $cache;
@@ -525,6 +556,11 @@
526557 $uckey = $wgContLang->ucfirst( $lckey );
527558 }
528559
 560+ /* Record each message request, but only once per request.
 561+ * This information is not used unless $wgAdaptiveMessageCache
 562+ * is enabled. */
 563+ $this->mRequestedMessages[$uckey] = true;
 564+
529565 # Try the MediaWiki namespace
530566 if( !$this->mDisable && $useDB ) {
531567 $title = $uckey;
@@ -589,8 +625,8 @@
590626 * @param $code String: code denoting the language to try.
591627 */
592628 function getMsgFromNamespace( $title, $code ) {
593 - $type = false;
594 - $message = false;
 629+ global $wgAdaptiveMessageCache;
 630+ $big = false;
595631
596632 $this->load( $code );
597633 if ( isset( $this->mCache[$code][$title] ) ) {
@@ -599,15 +635,28 @@
600636 return substr( $entry, 1 );
601637 } elseif ( $entry === '!NONEXISTENT' ) {
602638 return false;
 639+ } elseif( $entry === '!TOO BIG' ) {
 640+ // Fall through and try invididual message cache below
 641+
 642+ } else {
 643+ // XXX: This is not cached in process cache, should it?
 644+ $message = false;
 645+ wfRunHooks('MessagesPreLoad', array( $title, &$message ) );
 646+ if ( $message !== false ) {
 647+ return $message;
 648+ }
 649+
 650+ /* If message cache is in normal mode, it is guaranteed
 651+ * (except bugs) that there is always entry (or placeholder)
 652+ * in the cache if message exists. Thus we can do minor
 653+ * performance improvement and return false early.
 654+ */
 655+ if ( !$wgAdaptiveMessageCache ) {
 656+ return false;
 657+ }
603658 }
604659 }
605660
606 - # Call message hooks, in case they are defined
607 - wfRunHooks('MessagesPreLoad', array( $title, &$message ) );
608 - if ( $message !== false ) {
609 - return $message;
610 - }
611 -
612661 # Try the individual message cache
613662 $titleKey = wfMemcKey( 'messages', 'individual', $title );
614663 $entry = $this->mMemc->get( $titleKey );
@@ -631,6 +680,7 @@
632681 $this->mCache[$code][$title] = ' ' . $message;
633682 $this->mMemc->set( $titleKey, ' ' . $message, $this->mExpiry );
634683 } else {
 684+ $message = false;
635685 $this->mCache[$code][$title] = '!NONEXISTENT';
636686 $this->mMemc->set( $titleKey, '!NONEXISTENT', $this->mExpiry );
637687 }
@@ -771,4 +821,60 @@
772822 return array( $message, $lang );
773823 }
774824
 825+ public static function logMessages() {
 826+ global $wgMessageCache, $wgAdaptiveMessageCache;
 827+ if ( !$wgAdaptiveMessageCache || !$wgMessageCache instanceof MessageCache ) {
 828+ return;
 829+ }
 830+
 831+ $cachekey = wfMemckey( 'message-profiling' );
 832+ $cache = wfGetCache( CACHE_DB );
 833+ $data = $cache->get( $cachekey );
 834+
 835+ if ( !$data ) $data = array();
 836+
 837+ $age = self::$mAdaptiveDataAge;
 838+ $filterDate = substr( wfTimestamp( TS_MW, time()-$age ), 0, 8 );
 839+ foreach ( array_keys( $data ) as $key ) {
 840+ if ( $key < $filterDate ) unset( $data[$key] );
 841+ }
 842+
 843+ $index = substr( wfTimestampNow(), 0, 8 );
 844+ if ( !isset( $data[$index] ) ) $data[$index] = array();
 845+
 846+ foreach ( $wgMessageCache->mRequestedMessages as $message => $_ ) {
 847+ if ( !isset( $data[$index][$message] ) ) $data[$index][$message] = 0;
 848+ $data[$index][$message]++;
 849+ }
 850+
 851+ $cache->set( $cachekey, $data );
 852+ }
 853+
 854+ public function getMostUsedMessages() {
 855+ global $wgContLang;
 856+ $cachekey = wfMemckey( 'message-profiling' );
 857+ $cache = wfGetCache( CACHE_DB );
 858+ $data = $cache->get( $cachekey );
 859+ if ( !$data ) return array();
 860+
 861+ $list = array();
 862+
 863+ foreach( $data as $date => $messages ) {
 864+ foreach( $messages as $message => $count ) {
 865+ $key = $message;
 866+ if ( !isset( $list[$key] ) ) $list[$key] = 0;
 867+ $list[$key] += $count;
 868+ }
 869+ }
 870+
 871+ $max = max( $list );
 872+ foreach ( $list as $message => $count ) {
 873+ if ( $count < intval( $max * self::$mAdaptiveInclusionThreshold ) ) {
 874+ unset( $list[$message] );
 875+ }
 876+ }
 877+
 878+ return array_keys( $list );
 879+ }
 880+
775881 }
Index: trunk/phase3/includes/Wiki.php
@@ -449,6 +449,7 @@
450450 * Ends this task peacefully
451451 */
452452 function restInPeace() {
 453+ MessageCache::logMessages();
453454 wfLogProfilingData();
454455 // Commit and close up!
455456 $factory = wfGetLBFactory();
Index: trunk/phase3/includes/DefaultSettings.php
@@ -1509,6 +1509,13 @@
15101510 $wgLocalMessageCacheSerialized = true;
15111511
15121512 /**
 1513+ * Instead of caching everything, keep track which messages are requested and
 1514+ * load only most used messages. This only makes sense if there is lots of
 1515+ * interface messages customised in the wiki (like hundreds in many languages).
 1516+ */
 1517+$wgAdaptiveMessageCache = false;
 1518+
 1519+/**
15131520 * Localisation cache configuration. Associative array with keys:
15141521 * class: The class to use. May be overridden by extensions.
15151522 *

Follow-up revisions

RevisionCommit summaryAuthorDate
r71418Follow up r71412. $wgContLang is not used.platonides20:47, 21 August 2010
r74177Use 7*24*3600 instead of 604800....siebrand23:23, 2 October 2010
r74179Fix broken r74177 because it needs a constant and address CR comment on r7141...siebrand23:34, 2 October 2010
r814441.17: Back out r71412 (adaptive message cache) and its followups r71418, r741...catrope11:58, 3 February 2011
r925491.18: Back out adaptive message cache (r71412, r71418, r74177, r74179) by mer...catrope18:35, 19 July 2011

Comments

#Comment by Simetrical (talk | contribs)   21:37, 26 August 2010

Style nitpick: use 7*86400 or 7*24*3600 instead of 604800. It's reasonable to expect people to remember 3600 seconds per hour, and I think 86400 seconds per day is okay too (although some people don't), but 604800 seconds in a week is a little too much.

#Comment by Siebrand (talk | contribs)   23:19, 2 October 2010

r71886 is mentioned here because of a merge command, not because this rev was actually merged into 1.16wmf4.

#Comment by Trevor Parscal (WMF) (talk | contribs)   01:03, 8 February 2011

Appears to be reverted in r81444.

#Comment by 😂 (talk | contribs)   01:04, 8 February 2011

Just in 1.17, this is still in trunk.

#Comment by Platonides (talk | contribs)   09:50, 24 February 2011

Why wfGetCache( CACHE_DB ); ?

I think those should be wfGetCache( CACHE_ANYTHING );

#Comment by Nikerabbit (talk | contribs)   10:08, 24 February 2011

This feature will be reverted once twn no longer needs it, I hope soon. Basically:

  • CACHE_MEMCACHED uses too much memory
  • CACHE_DB is very slow
  • CACHE_ACCEL drives APC crazy
#Comment by Platonides (talk | contribs)   18:45, 16 June 2011

Does TWN still need it?

#Comment by Nikerabbit (talk | contribs)   19:47, 16 June 2011

Yes, we haven't moved translations away from NS_MEDIAWIKI yet.

#Comment by Catrope (talk | contribs)   21:40, 18 July 2011

Does this still need to be reverted in 1.18 now that 1.18 has been rebranched?

#Comment by Nikerabbit (talk | contribs)   05:58, 19 July 2011

Yes please. I will try to make this code useless for twn asap.

Status & tagging log