Index: branches/REL1_18/phase3/includes/Wiki.php |
— | — | @@ -422,6 +422,7 @@ |
423 | 423 | * Ends this task peacefully |
424 | 424 | */ |
425 | 425 | public function restInPeace() { |
| 426 | + MessageCache::logMessages(); |
426 | 427 | wfLogProfilingData(); |
427 | 428 | // Commit and close up! |
428 | 429 | $factory = wfGetLBFactory(); |
Index: branches/REL1_18/phase3/includes/cache/MessageCache.php |
— | — | @@ -43,6 +43,26 @@ |
44 | 44 | protected $mLoadedLanguages = array(); |
45 | 45 | |
46 | 46 | /** |
| 47 | + * Used for automatic detection of most used messages. |
| 48 | + */ |
| 49 | + protected $mRequestedMessages = array(); |
| 50 | + |
| 51 | + /** |
| 52 | + * How long the message request counts are stored. Longer period gives |
| 53 | + * better sample, but also takes longer to adapt changes. The counts |
| 54 | + * are aggregrated per day, regardless of the value of this variable. |
| 55 | + */ |
| 56 | + protected static $mAdaptiveDataAge = 604800; // Is 7*24*3600 |
| 57 | + |
| 58 | + /** |
| 59 | + * Filter the tail of less used messages that are requested more seldom |
| 60 | + * than this factor times the number of request of most requested message. |
| 61 | + * These messages are not loaded in the default set, but are still cached |
| 62 | + * individually on demand with the normal cache expiry time. |
| 63 | + */ |
| 64 | + protected static $mAdaptiveInclusionThreshold = 0.05; |
| 65 | + |
| 66 | + /** |
47 | 67 | * Singleton instance |
48 | 68 | * |
49 | 69 | * @var MessageCache |
— | — | @@ -349,12 +369,12 @@ |
350 | 370 | * $wgMaxMsgCacheEntrySize are assigned a special value, and are loaded |
351 | 371 | * on-demand from the database later. |
352 | 372 | * |
353 | | - * @param $code Optional language code, see documenation of load(). |
354 | | - * @return Array: Loaded messages for storing in caches. |
| 373 | + * @param $code String: language code. |
| 374 | + * @return Array: loaded messages for storing in caches. |
355 | 375 | */ |
356 | | - function loadFromDB( $code = false ) { |
| 376 | + function loadFromDB( $code ) { |
357 | 377 | wfProfileIn( __METHOD__ ); |
358 | | - global $wgMaxMsgCacheEntrySize, $wgLanguageCode; |
| 378 | + global $wgMaxMsgCacheEntrySize, $wgLanguageCode, $wgAdaptiveMessageCache; |
359 | 379 | $dbr = wfGetDB( DB_SLAVE ); |
360 | 380 | $cache = array(); |
361 | 381 | |
— | — | @@ -364,19 +384,26 @@ |
365 | 385 | 'page_namespace' => NS_MEDIAWIKI, |
366 | 386 | ); |
367 | 387 | |
368 | | - if ( $code ) { |
369 | | - # Is this fast enough. Should not matter if the filtering is done in the |
370 | | - # database or in code. |
| 388 | + $mostused = array(); |
| 389 | + if ( $wgAdaptiveMessageCache ) { |
| 390 | + $mostused = $this->getMostUsedMessages(); |
371 | 391 | if ( $code !== $wgLanguageCode ) { |
372 | | - # Messages for particular language |
373 | | - $conds[] = 'page_title' . $dbr->buildLike( $dbr->anyString(), "/$code" ); |
374 | | - } else { |
375 | | - # Effectively disallows use of '/' character in NS_MEDIAWIKI for uses |
376 | | - # other than language code. |
377 | | - $conds[] = 'page_title NOT' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() ); |
| 392 | + foreach ( $mostused as $key => $value ) { |
| 393 | + $mostused[$key] = "$value/$code"; |
| 394 | + } |
378 | 395 | } |
379 | 396 | } |
380 | 397 | |
| 398 | + if ( count( $mostused ) ) { |
| 399 | + $conds['page_title'] = $mostused; |
| 400 | + } elseif ( $code !== $wgLanguageCode ) { |
| 401 | + $conds[] = 'page_title' . $dbr->buildLike( $dbr->anyString(), "/$code" ); |
| 402 | + } else { |
| 403 | + # Effectively disallows use of '/' character in NS_MEDIAWIKI for uses |
| 404 | + # other than language code. |
| 405 | + $conds[] = 'page_title NOT' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() ); |
| 406 | + } |
| 407 | + |
381 | 408 | # Conditions to fetch oversized pages to ignore them |
382 | 409 | $bigConds = $conds; |
383 | 410 | $bigConds[] = 'page_len > ' . intval( $wgMaxMsgCacheEntrySize ); |
— | — | @@ -413,6 +440,12 @@ |
414 | 441 | $cache[$row->page_title] = $entry; |
415 | 442 | } |
416 | 443 | |
| 444 | + foreach ( $mostused as $key ) { |
| 445 | + if ( !isset( $cache[$key] ) ) { |
| 446 | + $cache[$key] = '!NONEXISTENT'; |
| 447 | + } |
| 448 | + } |
| 449 | + |
417 | 450 | $cache['VERSION'] = MSG_CACHE_VERSION; |
418 | 451 | wfProfileOut( __METHOD__ ); |
419 | 452 | return $cache; |
— | — | @@ -589,6 +622,13 @@ |
590 | 623 | $uckey = $wgContLang->ucfirst( $lckey ); |
591 | 624 | } |
592 | 625 | |
| 626 | + /** |
| 627 | + * Record each message request, but only once per request. |
| 628 | + * This information is not used unless $wgAdaptiveMessageCache |
| 629 | + * is enabled. |
| 630 | + */ |
| 631 | + $this->mRequestedMessages[$uckey] = true; |
| 632 | + |
593 | 633 | # Try the MediaWiki namespace |
594 | 634 | if( !$this->mDisable && $useDB ) { |
595 | 635 | $title = $uckey; |
— | — | @@ -653,8 +693,7 @@ |
654 | 694 | * @param $code String: code denoting the language to try. |
655 | 695 | */ |
656 | 696 | function getMsgFromNamespace( $title, $code ) { |
657 | | - $type = false; |
658 | | - $message = false; |
| 697 | + global $wgAdaptiveMessageCache; |
659 | 698 | |
660 | 699 | $this->load( $code ); |
661 | 700 | if ( isset( $this->mCache[$code][$title] ) ) { |
— | — | @@ -663,13 +702,26 @@ |
664 | 703 | return substr( $entry, 1 ); |
665 | 704 | } elseif ( $entry === '!NONEXISTENT' ) { |
666 | 705 | return false; |
| 706 | + } elseif( $entry === '!TOO BIG' ) { |
| 707 | + // Fall through and try invididual message cache below |
667 | 708 | } |
668 | | - } |
| 709 | + } else { |
| 710 | + // XXX: This is not cached in process cache, should it? |
| 711 | + $message = false; |
| 712 | + wfRunHooks( 'MessagesPreLoad', array( $title, &$message ) ); |
| 713 | + if ( $message !== false ) { |
| 714 | + return $message; |
| 715 | + } |
669 | 716 | |
670 | | - # Call message hooks, in case they are defined |
671 | | - wfRunHooks('MessagesPreLoad', array( $title, &$message ) ); |
672 | | - if ( $message !== false ) { |
673 | | - return $message; |
| 717 | + /** |
| 718 | + * If message cache is in normal mode, it is guaranteed |
| 719 | + * (except bugs) that there is always entry (or placeholder) |
| 720 | + * in the cache if message exists. Thus we can do minor |
| 721 | + * performance improvement and return false early. |
| 722 | + */ |
| 723 | + if ( !$wgAdaptiveMessageCache ) { |
| 724 | + return false; |
| 725 | + } |
674 | 726 | } |
675 | 727 | |
676 | 728 | # Try the individual message cache |
— | — | @@ -700,6 +752,7 @@ |
701 | 753 | $this->mMemc->set( $titleKey, ' ' . $message, $this->mExpiry ); |
702 | 754 | } |
703 | 755 | } else { |
| 756 | + $message = false; |
704 | 757 | $this->mCache[$code][$title] = '!NONEXISTENT'; |
705 | 758 | $this->mMemc->set( $titleKey, '!NONEXISTENT', $this->mExpiry ); |
706 | 759 | } |
— | — | @@ -842,4 +895,77 @@ |
843 | 896 | return array( $message, $lang ); |
844 | 897 | } |
845 | 898 | |
| 899 | + public static function logMessages() { |
| 900 | + wfProfileIn( __METHOD__ ); |
| 901 | + global $wgAdaptiveMessageCache; |
| 902 | + if ( !$wgAdaptiveMessageCache || !self::$instance instanceof MessageCache ) { |
| 903 | + wfProfileOut( __METHOD__ ); |
| 904 | + return; |
| 905 | + } |
| 906 | + |
| 907 | + $cachekey = wfMemckey( 'message-profiling' ); |
| 908 | + $cache = wfGetCache( CACHE_DB ); |
| 909 | + $data = $cache->get( $cachekey ); |
| 910 | + |
| 911 | + if ( !$data ) { |
| 912 | + $data = array(); |
| 913 | + } |
| 914 | + |
| 915 | + $age = self::$mAdaptiveDataAge; |
| 916 | + $filterDate = substr( wfTimestamp( TS_MW, time() - $age ), 0, 8 ); |
| 917 | + foreach ( array_keys( $data ) as $key ) { |
| 918 | + if ( $key < $filterDate ) { |
| 919 | + unset( $data[$key] ); |
| 920 | + } |
| 921 | + } |
| 922 | + |
| 923 | + $index = substr( wfTimestampNow(), 0, 8 ); |
| 924 | + if ( !isset( $data[$index] ) ) { |
| 925 | + $data[$index] = array(); |
| 926 | + } |
| 927 | + |
| 928 | + foreach ( self::$instance->mRequestedMessages as $message => $_ ) { |
| 929 | + if ( !isset( $data[$index][$message] ) ) { |
| 930 | + $data[$index][$message] = 0; |
| 931 | + } |
| 932 | + $data[$index][$message]++; |
| 933 | + } |
| 934 | + |
| 935 | + $cache->set( $cachekey, $data ); |
| 936 | + wfProfileOut( __METHOD__ ); |
| 937 | + } |
| 938 | + |
| 939 | + public function getMostUsedMessages() { |
| 940 | + wfProfileIn( __METHOD__ ); |
| 941 | + $cachekey = wfMemcKey( 'message-profiling' ); |
| 942 | + $cache = wfGetCache( CACHE_DB ); |
| 943 | + $data = $cache->get( $cachekey ); |
| 944 | + if ( !$data ) { |
| 945 | + wfProfileOut( __METHOD__ ); |
| 946 | + return array(); |
| 947 | + } |
| 948 | + |
| 949 | + $list = array(); |
| 950 | + |
| 951 | + foreach( $data as $messages ) { |
| 952 | + foreach( $messages as $message => $count ) { |
| 953 | + $key = $message; |
| 954 | + if ( !isset( $list[$key] ) ) { |
| 955 | + $list[$key] = 0; |
| 956 | + } |
| 957 | + $list[$key] += $count; |
| 958 | + } |
| 959 | + } |
| 960 | + |
| 961 | + $max = max( $list ); |
| 962 | + foreach ( $list as $message => $count ) { |
| 963 | + if ( $count < intval( $max * self::$mAdaptiveInclusionThreshold ) ) { |
| 964 | + unset( $list[$message] ); |
| 965 | + } |
| 966 | + } |
| 967 | + |
| 968 | + wfProfileOut( __METHOD__ ); |
| 969 | + return array_keys( $list ); |
| 970 | + } |
| 971 | + |
846 | 972 | } |
Index: branches/REL1_18/phase3/includes/DefaultSettings.php |
— | — | @@ -1579,6 +1579,13 @@ |
1580 | 1580 | $wgLocalMessageCacheSerialized = true; |
1581 | 1581 | |
1582 | 1582 | /** |
| 1583 | + * Instead of caching everything, keep track which messages are requested and |
| 1584 | + * load only most used messages. This only makes sense if there is lots of |
| 1585 | + * interface messages customised in the wiki (like hundreds in many languages). |
| 1586 | + */ |
| 1587 | +$wgAdaptiveMessageCache = false; |
| 1588 | + |
| 1589 | +/** |
1583 | 1590 | * Localisation cache configuration. Associative array with keys: |
1584 | 1591 | * class: The class to use. May be overridden by extensions. |
1585 | 1592 | * |