r96837 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r96836‎ | r96837 | r96838 >
Date:13:38, 12 September 2011
Author:catrope
Status:deferred (Comments)
Tags:
Comment:
RL2: Reorganize caching in LocalGadgetRepo so we no longer load the entire gadgets table from the DB on every page view. There is now one memcached entry per gadget, as well as an entry for the list of names.

* Rename loadData() to loadIDs() and add loadDataFor(). These functions pull info from 1) $this->data, 2) memcached, 3) DB and propagate back up the chain when misses occur
* Add getMemcKey() à la ForeignDBRepo (in phase3/includes/filerepo) and finally uncomment hasSharedCache. This means we now take advantage of cross-wiki memcached access for WMF-like configurations
* Put in a dirty hack to prevent cache pollution of the names list between LocalGadgetRepo and ForeignDBGadgetRepo. Will fix this properly later.
Modified paths:
  • /branches/RL2/extensions/Gadgets/Gadgets.php (modified) (history)
  • /branches/RL2/extensions/Gadgets/backend/ForeignDBGadgetRepo.php (modified) (history)
  • /branches/RL2/extensions/Gadgets/backend/LocalGadgetRepo.php (modified) (history)

Diff [purge]

Index: branches/RL2/extensions/Gadgets/Gadgets.php
@@ -47,7 +47,7 @@
4848 * // TODO: Make this the default?
4949 * 'dbFlags' => ( $wgDebugDumpSql ? DBO_DEBUG : 0 ) | DBO_DEFAULT // Use this value unless you know what you're doing
5050 * 'tablePrefix' => 'mw_', // Table prefix for the foreign wiki's database, or '' if no prefix
51 - //* 'hasSharedCache' => true, // Whether the foreign wiki's cache is accessible through $wgMemc // TODO: needed?
 51+ * 'hasSharedCache' => true, // Whether the foreign wiki's cache is accessible through $wgMemc
5252 * );
5353 *
5454 * For foreign API-based gadget repositories, use:
Index: branches/RL2/extensions/Gadgets/backend/LocalGadgetRepo.php
@@ -1,9 +1,12 @@
22 <?php
 3+// TODO: Make LocalGadgetRepo a singleton
 4+
35 /**
46 * Gadget repository that gets its gadgets from the local database.
57 */
68 class LocalGadgetRepo extends GadgetRepo {
7 - protected $data = null;
 9+ protected $data = array();
 10+ protected $namesLoaded = false;
811
912 /*** Public methods inherited from GadgetRepo ***/
1013
@@ -19,16 +22,16 @@
2023 }
2124
2225 public function getGadgetIds() {
23 - $this->loadData();
 26+ $this->loadIDs();
2427 return array_keys( $this->data );
2528 }
2629
2730 public function getGadget( $id ) {
28 - $this->loadData();
29 - if ( !isset( $this->data[$id] ) ) {
 31+ $data = $this->loadDataFor( $id );
 32+ if ( !$data ) {
3033 return null;
3134 }
32 - return new Gadget( $id, $this, $this->data[$id]['json'], $this->data[$id]['timestamp'] );
 35+ return new Gadget( $id, $this, $data['json'], $data['timestamp'] );
3336 }
3437
3538 public function getSource() {
@@ -44,12 +47,12 @@
4548 }
4649
4750 public function modifyGadget( Gadget $gadget, $timestamp = null ) {
 51+ global $wgMemc;
4852 if ( !$this->isWriteable() ) {
4953 return Status::newFatal( 'gadget-manager-readonly-repository' );
5054 }
5155
5256 $dbw = $this->getMasterDB();
53 - $this->loadData();
5457 $id = $gadget->getId();
5558 $json = $gadget->getJSON();
5659 $ts = $dbw->timestamp( $gadget->getTimestamp() );
@@ -73,12 +76,14 @@
7477 'gd_timestamp <= ' . $dbw->addQuotes( $ts ) // for conflict detection
7578 ), __METHOD__
7679 );
 80+ $created = $dbw->affectedRows();
7781 }
7882 $dbw->commit();
7983
8084 // Detect conflicts
81 - if ( $dbw->affectedRows() === 0 ) {
 85+ if ( !$created ) {
8286 // Some conflict occurred
 87+ wfDebug( __METHOD__ . ": conflict detected\n" );
8388 return Status::newFatal( 'gadgets-manager-modify-conflict', $id, $ts );
8489 }
8590
@@ -88,24 +93,45 @@
8994 // a clone. If it returned a reference to a cached object, the caller could change
9095 // that object and cause weird things to happen.
9196 $this->data[$id] = array( 'json' => $json, 'timestamp' => $newTs );
 97+ // Write to memc too
 98+ $key = $this->getMemcKey( 'gadgets', 'localrepodata', $id );
 99+ if ( $key !== false ) {
 100+ $wgMemc->set( $key, $this->data[$id] );
 101+ }
 102+ // Clear the gadget names array in memc
 103+ $namesKey = $this->getMemcKey( 'gadgets', 'localreponames' );
 104+ if ( $namesKey !== false ) {
 105+ $wgMemc->delete( $namesKey );
 106+ }
92107
93108 return Status::newGood();
94109 }
95110
96111 public function deleteGadget( $id ) {
 112+ global $wgMemc;
97113 if ( !$this->isWriteable() ) {
98114 return Status::newFatal( 'gadget-manager-readonly-repository' );
99115 }
100116
101 - $this->loadData();
102 - if ( !isset( $this->data[$id] ) ) {
103 - return Status::newFatal( 'gadgets-manager-nosuchgadget', $id );
 117+ // Remove gadget from database
 118+ $dbw = $this->getMasterDB();
 119+ $dbw->delete( 'gadgets', array( 'gd_id' => $id ), __METHOD__ );
 120+ $affectedRows = $dbw->affectedRows();
 121+
 122+ // Remove gadget from in-object cache
 123+ unset( $this->data[$id] );
 124+ // Remove from memc too
 125+ $key = $this->getMemcKey( 'gadgets', 'localrepodata', $id );
 126+ if ( $key !== false ) {
 127+ $wgMemc->delete( $key );
104128 }
 129+ // Clear the gadget names array in memc
 130+ $namesKey = $this->getMemcKey( 'gadgets', 'localreponames' );
 131+ if ( $namesKey !== false ) {
 132+ $wgMemc->delete( $namesKey );
 133+ }
105134
106 - unset( $this->data[$id] );
107 - $dbw = $this->getMasterDB();
108 - $dbw->delete( 'gadgets', array( 'gd_id' => $id ), __METHOD__ );
109 - if ( $dbw->affectedRows() === 0 ) {
 135+ if ( $affectedRows === 0 ) {
110136 return Status::newFatal( 'gadgets-manager-nosuchgadget', $id );
111137 }
112138 return Status::newGood();
@@ -129,36 +155,105 @@
130156 return wfGetDB( DB_SLAVE );
131157 }
132158
 159+
133160 /*** Protected methods ***/
134161
135162 /**
136 - * Populate $this->data from the DB, if that hasn't happened yet. All methods using
137 - * $this->data must call this before accessing $this->data .
 163+ * Get a memcached key. Subclasses can override this to use a foreign memc
 164+ * @return string|bool Cache key, or false if this repo has no shared memc
138165 */
139 - protected function loadData() {
140 - // FIXME: Make the cache shared somehow, it's getting repopulated for every instance now
141 - // FIXME: Reconsider the query-everything behavior; maybe use memc?
142 - if ( is_array( $this->data ) ) {
 166+ protected function getMemcKey( /* ... */ ) {
 167+ $args = func_get_args();
 168+ return call_user_func_array( 'wfMemcKey', $args );
 169+ }
 170+
 171+ /**
 172+ * Populate the keys in $this->data. Values are only populated when loading from the DB;
 173+ * when loading from memc, all values are set to null and are lazy-loaded in loadDataFor().
 174+ * @return array Array of gadget IDs
 175+ */
 176+ protected function loadIDs() {
 177+ global $wgMemc;
 178+ if ( $this->namesLoaded ) {
143179 // Already loaded
144 - return;
 180+ return array_keys( $this->data );
145181 }
146 - $this->data = array();
147182
148 - $query = $this->getLoadDataQuery();
 183+ // Try memc
 184+ $key = $this->getMemcKey( 'gadgets', 'localreponames' );
 185+ $cached = $key !== false ? $wgMemc->get( $key ) : false;
 186+ if ( is_array( $cached ) ) {
 187+ // Yay, data is in cache
 188+ // Add to $this->data , but let things already in $this->data take precedence
 189+ $this->data += $cached;
 190+ $this->namesLoaded = true;
 191+ return array_keys( $this->data );
 192+ }
 193+
 194+ // Get from DB
 195+ $query = $this->getLoadIDsQuery();
149196 $dbr = $this->getDB();
150197 $res = $dbr->select( $query['tables'], $query['fields'], $query['conds'], __METHOD__,
151198 $query['options'], $query['join_conds'] );
152199
 200+ $toCache = array();
153201 foreach ( $res as $row ) {
154202 $this->data[$row->gd_id] = array( 'json' => $row->gd_blob, 'timestamp' => $row->gd_timestamp );
 203+ $toCache[$row->gd_id] = null;
155204 }
 205+ // Write to memc
 206+ $wgMemc->set( $key, $toCache );
 207+ $this->namesLoaded = true;
 208+ return array_keys( $this->data );
156209 }
157210
158211 /**
159 - * Get the DB query to use in loadData(). Subclasses can override this to tweak the query.
 212+ * Populate a given Gadget's data in $this->data . Tries memc first, then falls back to a DB query.
 213+ * @param $id string Gadget ID
 214+ * @return array( 'json' => JSON string, 'timestamp' => timestamp ) or empty array if the gadget doesn't exist.
 215+ */
 216+ protected function loadDataFor( $id ) {
 217+ global $wgMemc;
 218+ if ( isset( $this->data[$id] ) && is_array( $this->data[$id] ) ) {
 219+ // Already loaded, nothing to do here.
 220+ return $this->data[$id];
 221+ }
 222+
 223+ // Try cache
 224+ $key = $this->getMemcKey( 'gadgets', 'localrepodata', $id );
 225+ $cached = $key !== false ? $wgMemc->get( $key ) : false;
 226+ if ( is_array( $cached ) ) {
 227+ // Yay, data is in cache
 228+ $this->data[$id] = $cached;
 229+ return $cached;
 230+ }
 231+
 232+ // Get from database
 233+ $query = $this->getLoadDataForQuery( $id );
 234+ $dbr = $this->getDB();
 235+ $row = $dbr->selectRow( $query['tables'], $query['fields'], $query['conds'], __METHOD__,
 236+ $query['options'], $query['join_conds']
 237+ );
 238+ if ( !$row ) {
 239+ // Gadget doesn't exist
 240+ // Use empty array to prevent confusion with $wgMemc->get() return values for missing keys
 241+ $data = array();
 242+ } else {
 243+ $data = array( 'json' => $row->gd_blob, 'timestamp' => $row->gd_timestamp );
 244+ }
 245+ // Save to object cache
 246+ $this->data[$id] = $data;
 247+ // Save to memc
 248+ $wgMemc->set( $key, $data );
 249+
 250+ return $data;
 251+ }
 252+
 253+ /**
 254+ * Get the DB query to use in getLoadIDs(). Subclasses can override this to tweak the query.
160255 * @return Array
161256 */
162 - protected function getLoadDataQuery() {
 257+ protected function getLoadIDsQuery() {
163258 return array(
164259 'tables' => 'gadgets',
165260 'fields' => array( 'gd_id', 'gd_blob', 'gd_timestamp' ),
@@ -167,4 +262,19 @@
168263 'join_conds' => array(),
169264 );
170265 }
 266+
 267+ /**
 268+ * Get the DB query to use in loadDataFor(). Subclasses can override this to tweak the query.
 269+ * @param $id string Gadget ID
 270+ * @return Array
 271+ */
 272+ protected function getLoadDataForQuery( $id ) {
 273+ return array(
 274+ 'tables' => 'gadgets',
 275+ 'fields' => array( 'gd_blob', 'gd_timestamp' ),
 276+ 'conds' => array( 'gd_id' => $id ),
 277+ 'options' => array(),
 278+ 'join_conds' => array(),
 279+ );
 280+ }
171281 }
Index: branches/RL2/extensions/Gadgets/backend/ForeignDBGadgetRepo.php
@@ -11,7 +11,7 @@
1212 * 'dbName': Database name
1313 * 'dbFlags': Bitmap of the DBO_* flags. Recommended value is ( $wgDebugDumpSql ? DBO_DEBUG : 0 ) | DBO_DEFAULT
1414 * 'tablePrefix': Table prefix
15 - //* 'hasSharedCache': Whether the foreign wiki's cache is accessible through $wgMemc // TODO: needed?
 15+ * 'hasSharedCache': Whether the foreign wiki's cache is accessible through $wgMemc
1616 */
1717 class ForeignDBGadgetRepo extends LocalGadgetRepo {
1818 protected $db = null;
@@ -26,7 +26,7 @@
2727 parent::__construct( $options );
2828
2929 $optionKeys = array( 'source', 'dbType', 'dbServer', 'dbUser', 'dbPassword', 'dbName',
30 - 'dbFlags', 'tablePrefix'/*, 'hasSharedCache'*/ );
 30+ 'dbFlags', 'tablePrefix', 'hasSharedCache' );
3131 foreach ( $optionKeys as $optionKey ) {
3232 $this->{$optionKey} = $options[$optionKey];
3333 }
@@ -61,9 +61,30 @@
6262 return $this->db;
6363 }
6464
65 - protected function getLoadDataQuery() {
66 - $query = parent::getLoadDataQuery();
 65+ protected function getMemcKey( /* ... */ ) {
 66+ if ( $this->hasSharedCache ) {
 67+ $args = func_get_args();
 68+ // FIXME: This is a dirty hack. Need to cache localrepo and foreignrepo name lists separately
 69+ // because one includes non-shared gadgets and the other doesn't
 70+ if ( $args[1] === 'localreponames' ) {
 71+ $args[1] = 'foreignreponames';
 72+ }
 73+ array_unshift( $args, $this->dbName, $this->tablePrefix );
 74+ return call_user_func_array( 'wfForeignMemcKey', $args );
 75+ } else {
 76+ return false;
 77+ }
 78+ }
 79+
 80+ protected function getLoadIDsQuery() {
 81+ $query = parent::getLoadIDsQuery();
6782 $query['conds']['gd_shared'] = 1;
6883 return $query;
6984 }
 85+
 86+ protected function getLoadDataForQuery( $id ) {
 87+ $query = parent::getLoadDataForQuery( $id );
 88+ $query['conds']['gd_shared'] = 1;
 89+ return $query;
 90+ }
7091 }

Follow-up revisions

RevisionCommit summaryAuthorDate
r96852RL2: Followup r96837: fix ugliness in dealing with ForeignDBRepo names cachin...catrope15:17, 12 September 2011
r98878[RL2] Followup r96837, prevent requests for nonexistent modules from pollutin...catrope18:00, 4 October 2011

Comments

#Comment by Krinkle (talk | contribs)   01:00, 20 September 2011

If I go to Special:Gadgets/babla (doesn't exist) on my local wiki I get this error:

Fatal error: Call to a member function isEnabledByDefault() on a non-object in /Users/krinkle/Sites/mediawiki/branches/RL2/extensions/Gadgets/GadgetHooks.php on line 200

Caused by the fact that getting "all gadgets" from the repo includes the null-reponse that was cached in LocalGadgetRepo->loadDataFor()


+		if ( !$row ) {
+			// Gadget doesn't exist
+			// Use empty array to prevent confusion with $wgMemc->get() return values for missing keys
+			$data = array();
+		} else {
+			$data = array( 'json' => $row->gd_blob, 'timestamp' => $row->gd_timestamp );
+		}
+		// Save to object cache
+		$this->data[$id] = $data;
+		// Save to memc
+		$wgMemc->set( $key, $data );
+		
+		return $data;
#Comment by Catrope (talk | contribs)   15:03, 4 October 2011

I didn't understand the bug description at first, but after reproducing the bug myself, examining the stack trace and var_dump()ing something, I now understand what's happening:

  • Someone calls getGadgetIds(), which calls loadIDs(), which populates $this->data and sets $this->idsLoaded = true;
  • Someone calls getGadget( 'babla' ), which calls loadDataFor( 'babla' ), which queries the DB, finds it missing, and sets $this->data['babla'] = array();
  • Someone calls getGadgetIds() again, which sees $this->idsLoaded = true; and returns array_keys( $this->data ), which includes 'babla'
  • The caller assumes that nothing returned by getGadgetIds() will ever return null when fed into getGadget(), which should be a safe assumption, but explodes here.

I'm on it.

Status & tagging log