r21364 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r21363‎ | r21364 | r21365 >
Date:10:11, 19 April 2007
Author:river
Status:old
Tags:
Comment:
make checkpoints explicit
still need to batch inserts to prevent exhausting lock objects
Modified paths:
  • /trunk/sixdeg/libsdstore/bdb_adjacency_store.cc (modified) (history)
  • /trunk/sixdeg/libsdstore/bdb_adjacency_store.h (modified) (history)
  • /trunk/sixdeg/mkcache/mkcache.cc (modified) (history)

Diff [purge]

Index: trunk/sixdeg/mkcache/mkcache.cc
@@ -116,6 +116,11 @@
117117 }
118118 }
119119
 120+struct page_entry {
 121+ text_id_t text;
 122+ std::string name;
 123+ std::set<page_id_t> adj;
 124+};
120125
121126 void
122127 build_for(MYSQL &mysql, std::string const &db)
@@ -124,8 +129,8 @@
125130 return;
126131
127132 if (!do_mysql_query(&mysql,
128 - "SELECT page_id, pl_from FROM pagelinks,page "
129 - "WHERE pl_title=page_title and pl_namespace=page_namespace and page_namespace=0"))
 133+ "SELECT p2.page_id, pl_from, p1.page_title, p1.page_latest FROM pagelinks,page p1,page p2 "
 134+ "WHERE pl_from=p1.page_id AND pl_title=p2.page_title and pl_namespace=p2.page_namespace and p2.page_namespace=0"))
130135 return;
131136
132137 MYSQL_RES *res = mysql_use_result(&mysql);
@@ -134,7 +139,7 @@
135140 * First we cache the data for this wiki in RAM, then commit all once.
136141 * This avoids constantly (over)writing in the database.
137142 */
138 - std::map<page_id_t, std::set<page_id_t> > cache;
 143+ std::map<page_id_t, page_entry> cache;
139144 MYSQL_ROW arow;
140145 int i = 0;
141146 std::cout << db << ": 0" << std::flush;
@@ -144,28 +149,54 @@
145150
146151 page_id_t from = boost::lexical_cast<page_id_t>(arow[1]);
147152 page_id_t to = boost::lexical_cast<page_id_t>(arow[0]);
148 - cache[from].insert(to);
 153+ std::map<page_id_t, page_entry>::iterator it = cache.find(from);
 154+ if (it == cache.end()) {
 155+ page_entry e;
 156+ e.name = arow[2];
 157+ e.text = boost::lexical_cast<text_id_t>(arow[3]);
 158+ it = cache.insert(std::make_pair(from, e)).first;
 159+ }
 160+
 161+ it->second.adj.insert(to);
149162 }
150163 mysql_free_result(res);
151164
152 - bdb_adjacency_transaction trans(store);
 165+ bdb_adjacency_transaction *trans = new bdb_adjacency_transaction(store);
153166 std::cout << " flush to storage... " << std::flush;
154 - for (std::map<page_id_t, std::set<page_id_t> >::iterator
 167+ i = 0;
 168+ for (std::map<page_id_t, page_entry>::iterator
155169 it = cache.begin(), end = cache.end();
156170 it != end; ++it) {
157 - trans.set_adjacencies(db, it->first, it->second);
 171+ if (++i == 10000) {
 172+ trans->commit();
 173+ delete trans;
 174+ trans = new bdb_adjacency_transaction(store);
 175+ std::cout << '.' << std::flush;
 176+ i = 0;
 177+ }
 178+
 179+ trans->add_title(db, it->first, it->second.name, it->second.text);
 180+ trans->set_adjacencies(db, it->first, it->second.adj);
158181 }
159 - trans.commit();
 182+ trans->commit();
 183+ delete trans;
 184+
 185+ std::cout << " checkpoint..." << std::flush;
 186+ store.checkpoint();
 187+
160188 std::cout << "\n";
161189
 190+#if 0
162191 if (!do_mysql_query(&mysql, "SELECT page_title,page_id,page_latest FROM page WHERE page_namespace=0"))
163192 return;
164193
165194 std::cout << db << ": titles: 0" << std::flush;
166195 res = mysql_use_result(&mysql);
 196+ bdb_adjacency_transaction ttrans(store);
167197 while ((arow = mysql_fetch_row(res)) != NULL) {
168198 if ((i++ % 10000) == 0)
169199 std::cout << "\r" << db << ": titles: " << (i - 1) << "..." << std::flush;
 200+#if 0
170201 if ((i % 10000) == 0)
171202 flush_titles();
172203
@@ -175,8 +206,15 @@
176207 t.name = arow[0];
177208 t.text = boost::lexical_cast<text_id_t>(arow[2]);
178209 pending_titles.push_back(t);
 210+#endif
 211+ page_id_t page = boost::lexical_cast<page_id_t>(arow[1]);
 212+ text_id_t text = boost::lexical_cast<text_id_t>(arow[2]);
 213+ ttrans.add_title(db, page, arow[0], text);
179214 }
 215+ std::cout << " flush to storage..." << std::flush;
 216+ ttrans.commit();
180217 std::cout << '\n';
181218 flush_titles();
182219 mysql_free_result(res);
 220+#endif
183221 }
Index: trunk/sixdeg/libsdstore/bdb_adjacency_store.cc
@@ -23,16 +23,13 @@
2424 #include "bdb_adjacency_store.h"
2525 #include "log.h"
2626
27 -#define DB_TYPE DB_HASH
 27+//#define DB_TYPE DB_HASH
 28+#define DB_TYPE DB_BTREE
2829
29 -void *chkp(void *arg)
 30+void
 31+bdb_adjacency_store::checkpoint(void)
3032 {
31 - DB_ENV *env = static_cast<DB_ENV *>(arg);
32 - for (;;) {
33 - sleep(40);
34 - env->txn_checkpoint(env, 0, 0, 0);
35 - }
36 - return 0;
 33+ env->txn_checkpoint(env, 0, 0, 0);
3734 }
3835
3936 bdb_adjacency_store::bdb_adjacency_store(void)
@@ -185,9 +182,6 @@
186183 return;
187184 }
188185 txn->commit(txn, 0);
189 -
190 - pthread_t tid;
191 - pthread_create(&tid, NULL, chkp, env);
192186 }
193187
194188 void
Index: trunk/sixdeg/libsdstore/bdb_adjacency_store.h
@@ -26,6 +26,7 @@
2727 bdb_adjacency_store(void);
2828 void open(std::string const &, open_mode);
2929 void close(void);
 30+ void checkpoint(void);
3031
3132 int error(void) const;
3233 std::string strerror(void) const;