Index: trunk/sixdeg/mkcache/mkcache.cc |
— | — | @@ -116,6 +116,11 @@ |
117 | 117 | } |
118 | 118 | } |
119 | 119 | |
| 120 | +struct page_entry { |
| 121 | + text_id_t text; |
| 122 | + std::string name; |
| 123 | + std::set<page_id_t> adj; |
| 124 | +}; |
120 | 125 | |
121 | 126 | void |
122 | 127 | build_for(MYSQL &mysql, std::string const &db) |
— | — | @@ -124,8 +129,8 @@ |
125 | 130 | return; |
126 | 131 | |
127 | 132 | if (!do_mysql_query(&mysql, |
128 | | - "SELECT page_id, pl_from FROM pagelinks,page " |
129 | | - "WHERE pl_title=page_title and pl_namespace=page_namespace and page_namespace=0")) |
| 133 | + "SELECT p2.page_id, pl_from, p1.page_title, p1.page_latest FROM pagelinks,page p1,page p2 " |
| 134 | + "WHERE pl_from=p1.page_id AND pl_title=p2.page_title and pl_namespace=p2.page_namespace and p2.page_namespace=0")) |
130 | 135 | return; |
131 | 136 | |
132 | 137 | MYSQL_RES *res = mysql_use_result(&mysql); |
— | — | @@ -134,7 +139,7 @@ |
135 | 140 | * First we cache the data for this wiki in RAM, then commit all once. |
136 | 141 | * This avoids constantly (over)writing in the database. |
137 | 142 | */ |
138 | | - std::map<page_id_t, std::set<page_id_t> > cache; |
| 143 | + std::map<page_id_t, page_entry> cache; |
139 | 144 | MYSQL_ROW arow; |
140 | 145 | int i = 0; |
141 | 146 | std::cout << db << ": 0" << std::flush; |
— | — | @@ -144,28 +149,54 @@ |
145 | 150 | |
146 | 151 | page_id_t from = boost::lexical_cast<page_id_t>(arow[1]); |
147 | 152 | page_id_t to = boost::lexical_cast<page_id_t>(arow[0]); |
148 | | - cache[from].insert(to); |
| 153 | + std::map<page_id_t, page_entry>::iterator it = cache.find(from); |
| 154 | + if (it == cache.end()) { |
| 155 | + page_entry e; |
| 156 | + e.name = arow[2]; |
| 157 | + e.text = boost::lexical_cast<text_id_t>(arow[3]); |
| 158 | + it = cache.insert(std::make_pair(from, e)).first; |
| 159 | + } |
| 160 | + |
| 161 | + it->second.adj.insert(to); |
149 | 162 | } |
150 | 163 | mysql_free_result(res); |
151 | 164 | |
152 | | - bdb_adjacency_transaction trans(store); |
| 165 | + bdb_adjacency_transaction *trans = new bdb_adjacency_transaction(store); |
153 | 166 | std::cout << " flush to storage... " << std::flush; |
154 | | - for (std::map<page_id_t, std::set<page_id_t> >::iterator |
| 167 | + i = 0; |
| 168 | + for (std::map<page_id_t, page_entry>::iterator |
155 | 169 | it = cache.begin(), end = cache.end(); |
156 | 170 | it != end; ++it) { |
157 | | - trans.set_adjacencies(db, it->first, it->second); |
| 171 | + if (++i == 10000) { |
| 172 | + trans->commit(); |
| 173 | + delete trans; |
| 174 | + trans = new bdb_adjacency_transaction(store); |
| 175 | + std::cout << '.' << std::flush; |
| 176 | + i = 0; |
| 177 | + } |
| 178 | + |
| 179 | + trans->add_title(db, it->first, it->second.name, it->second.text); |
| 180 | + trans->set_adjacencies(db, it->first, it->second.adj); |
158 | 181 | } |
159 | | - trans.commit(); |
| 182 | + trans->commit(); |
| 183 | + delete trans; |
| 184 | + |
| 185 | + std::cout << " checkpoint..." << std::flush; |
| 186 | + store.checkpoint(); |
| 187 | + |
160 | 188 | std::cout << "\n"; |
161 | 189 | |
| 190 | +#if 0 |
162 | 191 | if (!do_mysql_query(&mysql, "SELECT page_title,page_id,page_latest FROM page WHERE page_namespace=0")) |
163 | 192 | return; |
164 | 193 | |
165 | 194 | std::cout << db << ": titles: 0" << std::flush; |
166 | 195 | res = mysql_use_result(&mysql); |
| 196 | + bdb_adjacency_transaction ttrans(store); |
167 | 197 | while ((arow = mysql_fetch_row(res)) != NULL) { |
168 | 198 | if ((i++ % 10000) == 0) |
169 | 199 | std::cout << "\r" << db << ": titles: " << (i - 1) << "..." << std::flush; |
| 200 | +#if 0 |
170 | 201 | if ((i % 10000) == 0) |
171 | 202 | flush_titles(); |
172 | 203 | |
— | — | @@ -175,8 +206,15 @@ |
176 | 207 | t.name = arow[0]; |
177 | 208 | t.text = boost::lexical_cast<text_id_t>(arow[2]); |
178 | 209 | pending_titles.push_back(t); |
| 210 | +#endif |
| 211 | + page_id_t page = boost::lexical_cast<page_id_t>(arow[1]); |
| 212 | + text_id_t text = boost::lexical_cast<text_id_t>(arow[2]); |
| 213 | + ttrans.add_title(db, page, arow[0], text); |
179 | 214 | } |
| 215 | + std::cout << " flush to storage..." << std::flush; |
| 216 | + ttrans.commit(); |
180 | 217 | std::cout << '\n'; |
181 | 218 | flush_titles(); |
182 | 219 | mysql_free_result(res); |
| 220 | +#endif |
183 | 221 | } |
Index: trunk/sixdeg/libsdstore/bdb_adjacency_store.cc |
— | — | @@ -23,16 +23,13 @@ |
24 | 24 | #include "bdb_adjacency_store.h" |
25 | 25 | #include "log.h" |
26 | 26 | |
27 | | -#define DB_TYPE DB_HASH |
| 27 | +//#define DB_TYPE DB_HASH |
| 28 | +#define DB_TYPE DB_BTREE |
28 | 29 | |
29 | | -void *chkp(void *arg) |
| 30 | +void |
| 31 | +bdb_adjacency_store::checkpoint(void) |
30 | 32 | { |
31 | | - DB_ENV *env = static_cast<DB_ENV *>(arg); |
32 | | - for (;;) { |
33 | | - sleep(40); |
34 | | - env->txn_checkpoint(env, 0, 0, 0); |
35 | | - } |
36 | | - return 0; |
| 33 | + env->txn_checkpoint(env, 0, 0, 0); |
37 | 34 | } |
38 | 35 | |
39 | 36 | bdb_adjacency_store::bdb_adjacency_store(void) |
— | — | @@ -185,9 +182,6 @@ |
186 | 183 | return; |
187 | 184 | } |
188 | 185 | txn->commit(txn, 0); |
189 | | - |
190 | | - pthread_t tid; |
191 | | - pthread_create(&tid, NULL, chkp, env); |
192 | 186 | } |
193 | 187 | |
194 | 188 | void |
Index: trunk/sixdeg/libsdstore/bdb_adjacency_store.h |
— | — | @@ -26,6 +26,7 @@ |
27 | 27 | bdb_adjacency_store(void); |
28 | 28 | void open(std::string const &, open_mode); |
29 | 29 | void close(void); |
| 30 | + void checkpoint(void); |
30 | 31 | |
31 | 32 | int error(void) const; |
32 | 33 | std::string strerror(void) const; |