r62852 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62851‎ | r62852 | r62853 >
Date:22:17, 22 February 2010
Author:daniel
Status:deferred
Tags:
Comment:
concept info cache queries
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/php/build-concept-info.sh (added) (history)
  • /trunk/WikiWord/WikiWord/src/main/php/concept-info-local.sql (added) (history)
  • /trunk/WikiWord/WikiWord/src/main/php/concept-info.sql (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/php/wwthesaurus.php (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/main/php/concept-info.sql
@@ -1,6 +1,6 @@
22 SET SESSION group_concat_max_len = 262144; -- 1024*256
33
4 -create table full_all_local_concept_info (
 4+create table if not exists {collection}_{thesaurus}_concept_info (
55 concept int(11) NOT NULL,
66 `lang` varbinary(10) NOT NULL,
77 `name` varbinary(255) NOT NULL,
@@ -13,57 +13,8 @@
1414 PRIMARY KEY ( concept, lang )
1515 ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
1616
17 -insert into full_all_local_concept_info ( concept, lang, name )
 17+insert into {collection}_{thesaurus}_concept_info ( concept, lang, name )
1818 select global_concept, lang, local_concept_name
19 -from full_all_origin;
 19+from {collection}_{thesaurus}_origin;
2020
21 -update full_all_local_concept_info as I
22 -join full_all_origin as O on I.lang = O.lang and I.concept = O.global_concept
23 -join full_{lang}_definition as D on O.local_concept = D.concept and O.lang = "{lang}"
24 -set I.definition = D.definition
25 -where I.lang = "{lang}";
26 -
27 -update full_all_local_concept_info as I
28 -join ( select O.global_concept as concept, O.lang as lang,
29 - group_concat( concat(R.type, ":", R.name) separator "|" ) as pages
30 - from full_all_origin as O
31 - join full_{lang}_about as A on A.concept = O.local_concept and O.lang = "{lang}"
32 - join full_{lang}_resource as R on R.id = A.resource
33 - where O.lang = "{lang}" and R.type IN (10, 50)
34 - group by O.global_concept, O.lang
35 - ) as X
36 -on I.concept = X.concept and I.lang = X.lang
37 -set I.pages = X.pages
38 -where I.lang = "{lang}";
39 -
40 -
41 -update full_all_local_concept_info as I
42 -join ( select narrow as concept, group_concat(concat(broad, ":", local_concept_name) separator "|") as broader from full_all_broader
43 - join full_all_origin as O on O.global_concept = broad and O.lang = "{lang}"
44 - group by narrow ) as X
45 -on X.concept = I.concept, I.lang = "{lang}"
46 -set I.broader = X.broader;
47 -
48 -update full_all_local_concept_info as I
49 -join ( select broad as concept, group_concat(concat(narrow, ":", local_concept_name) separator "|") as narrower from full_all_broader
50 - join full_all_origin as O on O.global_concept = narrow and O.lang = "{lang}"
51 - group by broad ) as X
52 -on X.concept = I.concept, I.lang = "{lang}"
53 -set I.narrower = X.narrower;
54 -
55 -update full_all_local_concept_info as I
56 -join ( select concept1 as concept, group_concat(concat(concept2, ":", local_concept_name) separator "|") as similar from full_all_relation
57 - join full_all_origin as O on O.global_concept = concept2 and O.lang = "{lang}"
58 - where langmatch >= 1 or langref >= 1
59 - group by concept1 ) as X
60 -on X.concept = I.concept, I.lang = "{lang}"
61 -set I.similar = X.similar;
62 -
63 -update full_all_local_concept_info as I
64 -join ( select concept1 as concept, group_concat(concat(concept2, ":", local_concept_name) separator "|") as related from full_all_relation
65 - join full_all_origin as O on O.global_concept = concept2 and O.lang = "{lang}"
66 - where bilink >= 1
67 - group by concept1 ) as X
68 -on X.concept = I.concept, I.lang = "{lang}"
69 -set I.related = X.related;
70 -
 21+truncate {collection}_{thesaurus}_concept_info;
\ No newline at end of file
Index: trunk/WikiWord/WikiWord/src/main/php/wwthesaurus.php
@@ -219,111 +219,142 @@
220220 return $names;
221221 }
222222
 223+ function splitPages( $s ) {
 224+ $pp = explode("|", $s);
 225+
 226+ $pages = array();
 227+ foreach ($pp as $p) {
 228+ ($t, $n) = explode(":", $p, 2);
 229+ $pages[$n] = (int)$t;
 230+ }
 231+
 232+ return $pages;
 233+ }
 234+
 235+ function splitConcepts( $s ) {
 236+ $ss = explode("|", $s);
 237+
 238+ $concepts = array();
 239+ foreach ($ss as $p) {
 240+ ($id, $n) = explode(":", $p, 2);
 241+ $id = (int)$id;
 242+ $concepts[$id] = $n;
 243+ }
 244+
 245+ return $concepts;
 246+ }
 247+
223248 /////////////////////////////////////////////////////////
224 - function getConcept( $id, $lang = null, $limit = 100 ) {
225 - global $wwTablePrefix, $wwThesaurusDataset;
 249+ function getConceptInfo( $id, $lang = null, $fields = null ) {
 250+ global $wwTablePrefix, $wwThesaurusDataset, $wwLanguages;
226251
227 - if ($lang) $sql = "SELECT C.*, O.* FROM {$wwTablePrefix}_{$wwThesaurusDataset}_concept as C "
228 - . " LEFT JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_origin as O "
229 - . " ON C.id = O.global_concept " . " AND O.lang = \"".mysql_real_escape_string($lang)."\""
230 - . " WHERE C.id = ".(int)$id ;
231 - else $sql = "SELECT C.* FROM {$wwTablePrefix}_{$wwThesaurusDataset}_concept as C "
232 - . " WHERE C.id = ".(int)$id;
 252+ if ( $fields && is_array($fields)) $fields = implode(", ", $fields);
 253+ if ( !$fields ) $fields = "*";
233254
 255+ #TODO: scores, concept-type, ...
 256+
 257+ $sql = "SELECT $fields FROM {$wwTablePrefix}_{$wwThesaurusDataset}_concept_info "
 258+ . " WHERE C.id = ".(int)$id;
 259+
 260+ if ($lang) {
 261+ if ( is_array($lang) ) $sql .= " AND lang IN " . $this->quoteSet($lang);
 262+ else $sql .= " AND lang = " . $this->quote($lang);
 263+ }
 264+
234265 $r = $this->getRows($sql);
235266
236 - if ( !$r ) return false;
237 - else return $r[0];
238 - }
 267+ $rs = $this->query($sql);
 268+ if (!$rs) return false;
239269
240 - function getRelatedForConcept( $id, $lang = null, $limit = 100 ) {
241 - global $wwTablePrefix, $wwThesaurusDataset;
 270+ $concept = array();
 271+ $concept["id"] = $id;
 272+ $concept["languages"] = array();
242273
243 - $sql = "SELECT " . ($lang ? " C.*, O.* " : " C.* " ) . " FROM {$wwTablePrefix}_{$wwThesaurusDataset}_concept as C "
244 - . " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_relation as R ON R.concept2 = C.id ";
 274+ while ($row = mysql_fetch_assoc($rs)) {
 275+ $lang = $row["lang"];
 276+ $concept["languages"][] = $lang;
245277
246 - if ( $lang ) $sql .= " LEFT JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_origin as O ON O.lang = \"" . mysql_real_escape_string($lang) . "\" AND C.id = O.global_concept ";
 278+ #TODO: scores, concept-type, ...
247279
248 - $sql .= " WHERE R.concept1 = ".(int)$id
249 - . " AND ( R.bilink > 0 OR R.langref > 0 OR R.langmatch > 0 )"
250 - . " GROUP BY C.id "
251 - . " LIMIT " . (int)$limit;
 280+ if (@$row["name"] !== null) $concept["name"][$lang] = $row["name"];
 281+ if (@$row["definition"] !== null) $concept["definition"][$lang] = $row["definition"];
 282+ if (@$row["pages"] !== null) $concept["pages"][$lang] = $this->splitPages($row["pages"]);
252283
253 - return $this->getRows($sql);
254 - }
 284+ if (@$row["broader"] !== null) $concept["broader"][$lang] = $this->splitConcepts($row["broader"]);
 285+ if (@$row["narrower"] !== null) $concept["narrower"][$lang] = $this->splitConcepts($row["narrower"]);
 286+ if (@$row["similar"] !== null) $concept["similar"][$lang] = $this->splitConcepts($row["similar"]);
 287+ if (@$row["related"] !== null) $concept["related"][$lang] = $this->splitConcepts($row["related"]);
255288
256 - function getBroaderForConcept( $id, $lang = null, $limit = 100 ) {
257 - global $wwTablePrefix, $wwThesaurusDataset;
 289+ if (isset($concept["broader"][$lang])) $concept["broader"]["*"] += array_keys($concept["broader"][$lang]);
 290+ if (isset($concept["narrower"][$lang])) $concept["narrower"]["*"] += array_keys($concept["narrower"][$lang]);
 291+ if (isset($concept["similar"][$lang])) $concept["similar"]["*"] += array_keys($concept["similar"][$lang]);
 292+ if (isset($concept["related"][$lang])) $concept["related"]["*"] += array_keys($concept["related"][$lang]);
 293+ }
258294
259 - $sql = "SELECT " . ($lang ? " C.*, O.* " : " C.* " ) . " FROM {$wwTablePrefix}_{$wwThesaurusDataset}_concept as C "
260 - . " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_broader as R ON R.broad = C.id ";
 295+ if (isset($concept["broader"]["*"])) $concept["broader"]["*"] = array_unique($concept["broader"]["*"]);
 296+ if (isset($concept["narrower"]["*"])) $concept["narrower"]["*"] = array_unique($concept["narrower"]["*"]);
 297+ if (isset($concept["similar"]["*"])) $concept["similar"]["*"] = array_unique($concept["similar"]["*"]);
 298+ if (isset($concept["broader"]["*"])) $concept["related"]["*"] = array_unique($concept["related"]["*"]);
261299
262 - if ( $lang ) $sql .= " LEFT JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_origin as O ON O.lang = \"" . mysql_real_escape_string($lang) . "\" AND C.id = O.global_concept ";
 300+ mysql_free_result($rs);
263301
264 - $sql .= " WHERE R.narrow = ".(int)$id
265 - . " GROUP BY C.id "
266 - . " LIMIT " . (int)$limit;
 302+ return $concept;
 303+ }
267304
268 - return $this->getRows($sql);
 305+ function getConcept( $id, $lang = null, $limit = 100 ) {
 306+ return $this->getConceptInfo($id, $lang);
269307 }
270308
271 - function getNarrowerForConcept( $id, $lang = null, $limit = 100 ) {
272 - global $wwTablePrefix, $wwThesaurusDataset;
 309+ function getRelatedForConcept( $id, $lang = null, $limit = 100 ) {
 310+ $concept = $this->getConceptInfo($id, $lang, "lang, related");
 311+ if (!$concept) return false;
 312+ else if ($lang) return $concept["related"][$lang];
 313+ else return $concept["related"];
 314+ }
273315
274 - $sql = "SELECT " . ($lang ? " C.*, O.* " : " C.* " ) . " FROM {$wwTablePrefix}_{$wwThesaurusDataset}_concept as C "
275 - . " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_broader as R ON R.narrow = C.id ";
 316+ function getBroaderForConcept( $id, $lang = null, $limit = 100 ) {
 317+ $concept = $this->getConceptInfo($id, $lang, "lang, broader");
 318+ if (!$concept) return false;
 319+ else if ($lang) return $concept["broader"][$lang];
 320+ else return $concept["broader"];
 321+ }
276322
277 - if ( $lang ) $sql .= " LEFT JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_origin as O ON O.lang = \"" . mysql_real_escape_string($lang) . "\" AND C.id = O.global_concept ";
 323+ function getNarrowerForConcept( $id, $lang = null, $limit = 100 ) {
 324+ $concept = $this->getConceptInfo($id, $lang, "lang, narrower");
 325+ if (!$concept) return false;
 326+ else if ($lang) return $concept["narrower"][$lang];
 327+ else return $concept["narrower"];
 328+ }
278329
279 - $sql .= " WHERE R.broad = ".(int)$id
280 - . " GROUP BY C.id "
281 - . " LIMIT " . (int)$limit;
 330+ function getPagesForConcept( $id, $lang = null, $limit = 100 ) {
 331+ if (!$lang) return false;
282332
283 - return $this->getRows($sql);
 333+ $concept = $this->getConceptInfo($id, $lang, "lang, pages");
 334+ if (!$concept) return false;
 335+ else if ($lang) return array_keys( $concept["pages"][$lang] );
284336 }
285337
286 - function getPagesForConcept( $id, $lang = null, $limit = 100 ) {
287 - global $wwTablePrefix, $wwThesaurusDataset, $wwLanguages;
 338+ function getNamesForConcept( $id, $lang = null ) {
 339+ $concept = $this->getConceptInfo($id, $lang, "lang, name");
 340+ if (!$concept) return false;
288341
289 - if ( !$lang ) $lang = array_keys( $wwLanguages );
290 - if ( !is_array($lang) ) $lang = preg_split('![\\s,;|/:]\\s*!', $lang);
291342 $result = array();
292 -
293 - foreach ($lang as $ll) {
294 - $sql = "SELECT R.name FROM {$wwTablePrefix}_{$ll}_resource as R "
295 - . " JOIN {$wwTablePrefix}_{$ll}_about as A ON A.resource = R.id "
296 - . " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_origin as O ON O.lang = \"" . mysql_real_escape_string($ll) . "\" AND A.concept = O.local_concept "
297 - . " WHERE O.global_concept = " . (int)$id
298 - . " WHERE R.type IN ( " . WW_RC_TYPE_ARTICLE . ", " . WW_RC_TYPE_CATEGORY . " ) "
299 - . " LIMIT " . (int)$limit;
300 -
301 - $pages = $this->getList($sql, "name");
302 - if ( $pages === false || $pages === null ) return false;
303 - if ( !$pages ) continue;
304 -
305 - $result[$ll] = $pages;
 343+ foreach ($concept["languages"] as $ll) {
 344+ if (@$concept["name@$ll"])
 345+ $result[$ll] = $concept["name@$ll"];
306346 }
307347
308348 return $result;
309349 }
310350
311 - function getNamesForConcept( $id, $lang = null ) {
312 - global $wwTablePrefix, $wwThesaurusDataset, $wwLanguages;
 351+ function getDefinitionForConcept( $id, $lang = null, $limit = 100 ) {
 352+ $concept = $this->getConceptInfo($id, $lang, "lang, definition");
 353+ if (!$concept) return false;
313354
314 - if ( !$lang ) $lang = array_keys( $wwLanguages );
315 - if ( !is_array($lang) ) $lang = preg_split('![\\s,;|/:]\\s*!', $lang);
316355 $result = array();
317 -
318 - foreach ($lang as $ll) {
319 - $sql = "SELECT O.local_name FROM {$wwTablePrefix}_{$ll}_resource as O ";
320 - $sql .= " WHERE O.global_concept = " . (int)$id;
321 - $sql .= " AND O.lang = " . (int)$ll;
322 -
323 - $pages = $this->getList($sql, "name");
324 - if ( $pages === false || $pages === null ) return false;
325 - if ( !$pages ) continue;
326 -
327 - $result[$ll] = $pages;
 356+ foreach ($concept["languages"] as $ll) {
 357+ if (@$concept["definition@$ll"])
 358+ $result[$ll] = $concept["definition@$ll"];
328359 }
329360
330361 return $result;
@@ -353,29 +384,6 @@
354385 return $result;
355386 }
356387
357 - function getDefinitionForConcept( $id, $lang = null, $limit = 100 ) {
358 - global $wwTablePrefix, $wwThesaurusDataset, $wwLanguages;
359 -
360 - if ( !$lang ) $lang = array_keys( $wwLanguages );
361 - if ( !is_array($lang) ) $lang = preg_split('![\\s,;|/:]\\s*!', $lang);
362 - $result = array();
363 -
364 - foreach ($lang as $ll) {
365 - $sql = "SELECT D.definition FROM {$wwTablePrefix}_{$ll}_definition as D"
366 - . " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_origin as O ON O.lang = \"" . mysql_real_escape_string($ll) . "\" AND D.concept = O.local_concept "
367 - . " WHERE O.global_concept = " . (int)$id
368 - . " LIMIT " . (int)$limit;
369 -
370 - $definitions = $this->getList($sql, "definition");
371 - if ( $definitions === false || $definitions === null ) return false;
372 - if ( !$definitions ) continue;
373 -
374 - $result[$ll] = $definitions[0];
375 - }
376 -
377 - return $result;
378 - }
379 -
380388 function getLinksForConcept( $id, $lang = null, $limit = 100 ) {
381389 global $wwTablePrefix, $wwThesaurusDataset;
382390
Index: trunk/WikiWord/WikiWord/src/main/php/build-concept-info.sh
@@ -0,0 +1,15 @@
 2+#!/bin/bash
 3+set -e
 4+
 5+db="$1"
 6+collection="$2"
 7+thesaurus="$3"
 8+languages="en de fr nl it es pt pl"
 9+
 10+echo "preparing concept info"
 11+replace '{collection}' "$collection" '{thesaurus}' "$thesaurus" < concept-info.sql | mysql "$db"
 12+
 13+for n in $languages; do
 14+ echo "collection concept info: $n"
 15+ replace '{collection}' "$collection" '{thesaurus}' "$thesaurus" '{lang}' "$n" < concept-info-local.sql | mysql "$db"
 16+done
Index: trunk/WikiWord/WikiWord/src/main/php/concept-info-local.sql
@@ -0,0 +1,57 @@
 2+SET SESSION group_concat_max_len = 262144; -- 1024*256
 3+
 4+-- collect definitions
 5+update {collection}_{thesaurus}_concept_info as I
 6+join {collection}_{thesaurus}_origin as O on I.lang = O.lang and I.concept = O.global_concept
 7+join {collection}_{lang}_definition as D on O.local_concept = D.concept and O.lang = "{lang}"
 8+set I.definition = D.definition
 9+where I.lang = "{lang}";
 10+
 11+-- collect wiki pages
 12+update {collection}_{thesaurus}_concept_info as I
 13+join ( select O.global_concept as concept, O.lang as lang,
 14+ group_concat( concat(R.type, ":", R.name) separator "|" ) as pages
 15+ from {collection}_{thesaurus}_origin as O
 16+ join {collection}_{lang}_about as A on A.concept = O.local_concept and O.lang = "{lang}"
 17+ join {collection}_{lang}_resource as R on R.id = A.resource
 18+ where O.lang = "{lang}" and R.type IN (10, 50)
 19+ group by O.global_concept, O.lang
 20+ ) as X
 21+on I.concept = X.concept and I.lang = X.lang
 22+set I.pages = X.pages
 23+where I.lang = "{lang}";
 24+
 25+-- collect broader concepts
 26+update {collection}_{thesaurus}_concept_info as I
 27+join ( select narrow as concept, group_concat(concat(broad, ":", local_concept_name) separator "|") as broader from {collection}_{thesaurus}_broader
 28+ join {collection}_{thesaurus}_origin as O on O.global_concept = broad and O.lang = "{lang}"
 29+ group by narrow ) as X
 30+on X.concept = I.concept and I.lang = "{lang}"
 31+set I.broader = X.broader;
 32+
 33+-- collect narrower concepts
 34+update {collection}_{thesaurus}_concept_info as I
 35+join ( select broad as concept, group_concat(concat(narrow, ":", local_concept_name) separator "|") as narrower from {collection}_{thesaurus}_broader
 36+ join {collection}_{thesaurus}_origin as O on O.global_concept = narrow and O.lang = "{lang}"
 37+ group by broad ) as X
 38+on X.concept = I.concept and I.lang = "{lang}"
 39+set I.narrower = X.narrower;
 40+
 41+-- collect similar concepts
 42+update {collection}_{thesaurus}_concept_info as I
 43+join ( select concept1 as concept, group_concat(concat(concept2, ":", local_concept_name) separator "|") as similar from {collection}_{thesaurus}_relation
 44+ join {collection}_{thesaurus}_origin as O on O.global_concept = concept2 and O.lang = "{lang}"
 45+ where langmatch >= 1 or langref >= 1
 46+ group by concept1 ) as X
 47+on X.concept = I.concept and I.lang = "{lang}"
 48+set I.similar = X.similar;
 49+
 50+-- collect related concepts
 51+update {collection}_{thesaurus}_concept_info as I
 52+join ( select concept1 as concept, group_concat(concat(concept2, ":", local_concept_name) separator "|") as related from {collection}_{thesaurus}_relation
 53+ join {collection}_{thesaurus}_origin as O on O.global_concept = concept2 and O.lang = "{lang}"
 54+ where bilink >= 1
 55+ group by concept1 ) as X
 56+on X.concept = I.concept and I.lang = "{lang}"
 57+set I.related = X.related;
 58+
Property changes on: trunk/WikiWord/WikiWord/src/main/php/concept-info-local.sql
___________________________________________________________________
Added: svn:mergeinfo

Status & tagging log