Index: trunk/WikiWord/WikiWord/src/main/php/concept-info.sql |
— | — | @@ -0,0 +1,69 @@ |
| 2 | +SET SESSION group_concat_max_len = 262144; -- 1024*256 |
| 3 | + |
| 4 | +create table full_all_local_concept_info ( |
| 5 | + concept int(11) NOT NULL, |
| 6 | + `lang` varbinary(10) NOT NULL, |
| 7 | + `name` varbinary(255) NOT NULL, |
| 8 | + `pages` MEDIUMBLOB DEFAULT NULL, |
| 9 | + `definition` MEDIUMBLOB DEFAULT NULL, |
| 10 | + `broader` MEDIUMBLOB DEFAULT NULL, |
| 11 | + `narrower` MEDIUMBLOB DEFAULT NULL, |
| 12 | + `similar` MEDIUMBLOB DEFAULT NULL, |
| 13 | + `related` MEDIUMBLOB DEFAULT NULL, |
| 14 | + PRIMARY KEY ( concept, lang ) |
| 15 | + ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; |
| 16 | + |
| 17 | +insert into full_all_local_concept_info ( concept, lang, name ) |
| 18 | +select global_concept, lang, local_concept_name |
| 19 | +from full_all_origin; |
| 20 | + |
| 21 | +update full_all_local_concept_info as I |
| 22 | +join full_all_origin as O on I.lang = O.lang and I.concept = O.global_concept |
| 23 | +join full_{lang}_definition as D on O.local_concept = D.concept and O.lang = "{lang}" |
| 24 | +set I.definition = D.definition |
| 25 | +where I.lang = "{lang}"; |
| 26 | + |
| 27 | +update full_all_local_concept_info as I |
| 28 | +join ( select O.global_concept as concept, O.lang as lang, |
| 29 | + group_concat( concat(R.type, ":", R.name) separator "|" ) as pages |
| 30 | + from full_all_origin as O |
| 31 | + join full_{lang}_about as A on A.concept = O.local_concept and O.lang = "{lang}" |
| 32 | + join full_{lang}_resource as R on R.id = A.resource |
| 33 | + where O.lang = "{lang}" and R.type IN (10, 50) |
| 34 | + group by O.global_concept, O.lang |
| 35 | + ) as X |
| 36 | +on I.concept = X.concept and I.lang = X.lang |
| 37 | +set I.pages = X.pages |
| 38 | +where I.lang = "{lang}"; |
| 39 | + |
| 40 | + |
| 41 | +update full_all_local_concept_info as I |
| 42 | +join ( select narrow as concept, group_concat(concat(broad, ":", local_concept_name) separator "|") as broader from full_all_broader |
| 43 | + join full_all_origin as O on O.global_concept = broad and O.lang = "{lang}" |
| 44 | + group by narrow ) as X |
| 45 | +on X.concept = I.concept, I.lang = "{lang}" |
| 46 | +set I.broader = X.broader; |
| 47 | + |
| 48 | +update full_all_local_concept_info as I |
| 49 | +join ( select broad as concept, group_concat(concat(narrow, ":", local_concept_name) separator "|") as narrower from full_all_broader |
| 50 | + join full_all_origin as O on O.global_concept = narrow and O.lang = "{lang}" |
| 51 | + group by broad ) as X |
| 52 | +on X.concept = I.concept, I.lang = "{lang}" |
| 53 | +set I.narrower = X.narrower; |
| 54 | + |
| 55 | +update full_all_local_concept_info as I |
| 56 | +join ( select concept1 as concept, group_concat(concat(concept2, ":", local_concept_name) separator "|") as similar from full_all_relation |
| 57 | + join full_all_origin as O on O.global_concept = concept2 and O.lang = "{lang}" |
| 58 | + where langmatch >= 1 or langref >= 1 |
| 59 | + group by concept1 ) as X |
| 60 | +on X.concept = I.concept, I.lang = "{lang}" |
| 61 | +set I.similar = X.similar; |
| 62 | + |
| 63 | +update full_all_local_concept_info as I |
| 64 | +join ( select concept1 as concept, group_concat(concat(concept2, ":", local_concept_name) separator "|") as related from full_all_relation |
| 65 | + join full_all_origin as O on O.global_concept = concept2 and O.lang = "{lang}" |
| 66 | + where bilink >= 1 |
| 67 | + group by concept1 ) as X |
| 68 | +on X.concept = I.concept, I.lang = "{lang}" |
| 69 | +set I.related = X.related; |
| 70 | + |
Index: trunk/WikiWord/WikiWord/src/main/php/wwutils.php |
— | — | @@ -55,7 +55,9 @@ |
56 | 56 | |
57 | 57 | foreach ($a as $x) { |
58 | 58 | if ($s) $s.= ", "; |
59 | | - $s .= $this->quote($x); |
| 59 | + |
| 60 | + if (is_string($x)) $s .= $this->quote($x); |
| 61 | + else $s .= $x; |
60 | 62 | } |
61 | 63 | |
62 | 64 | return '(' . $s . ')'; |
Index: trunk/WikiWord/WikiWord/src/main/php/wwthesaurus.php |
— | — | @@ -1,6 +1,65 @@ |
2 | 2 | <?php |
3 | 3 | require_once(dirname(__FILE__)."/wwutils.php"); |
4 | 4 | |
| 5 | + /** Unknown type, SHOULD not occurr in final data. MAY be used for |
| 6 | + * resources that are referenced but where not available for analysis, |
| 7 | + * or have not yet been analyzed. |
| 8 | + **/ |
| 9 | + define('WW_RC_TYPE_UNKNOWN', 0); |
| 10 | + |
| 11 | + /** |
| 12 | + * A "real" page, describing a concept. |
| 13 | + */ |
| 14 | + define('WW_RC_TYPE_ARTICLE', 10); |
| 15 | + |
| 16 | + /** |
| 17 | + * This page is a supplemental part of an article, typically a transcluded |
| 18 | + * subpage or simmilar. |
| 19 | + */ |
| 20 | + define('WW_RC_TYPE_SUPPLEMENT', 15); |
| 21 | + |
| 22 | + |
| 23 | + /** |
| 24 | + * A page solely defining a redirect/alias for another page |
| 25 | + */ |
| 26 | + define('WW_RC_TYPE_REDIRECT', 20); |
| 27 | + |
| 28 | + /** |
| 29 | + * A disambuguation page, listing different meanings for the page title, |
| 30 | + * each linking to a article page. |
| 31 | + */ |
| 32 | + define('WW_RC_TYPE_DISAMBIG', 30); |
| 33 | + |
| 34 | + /** |
| 35 | + * A page that contains a list of concepts that share some common property or quality, |
| 36 | + * usually each linking to a page describing that concept. |
| 37 | + */ |
| 38 | + define('WW_RC_TYPE_LIST', 40); |
| 39 | + |
| 40 | + /** |
| 41 | + * A category page. |
| 42 | + */ |
| 43 | + define('WW_RC_TYPE_CATEGORY', 50); |
| 44 | + |
| 45 | + /** |
| 46 | + * This page does not contain relevant information for WikiWord |
| 47 | + */ |
| 48 | + define('WW_RC_TYPE_OTHER', 99); |
| 49 | + |
| 50 | + /** |
| 51 | + * A page that is broken in some way, or was marked as bad or disputed. Such pages |
| 52 | + * SHOULD generally be treated as if theys didn't exist. |
| 53 | + */ |
| 54 | + define('WW_RC_TYPE_BAD', 100); |
| 55 | + |
| 56 | + /** |
| 57 | + * A resource that is not a page by itself, but merely a section of a page. Sections |
| 58 | + * SHOULD always be part of a page of type ARTICLE, and are expected to descibe |
| 59 | + * a narrower concept than the "parent" page. |
| 60 | + */ |
| 61 | + define('WW_RC_TYPE_SECTION', 200); |
| 62 | + |
| 63 | + |
5 | 64 | class WWThesaurus extends WWUTils { |
6 | 65 | |
7 | 66 | function queryConceptsForTerm($lang, $term, $limit = 100) { |
— | — | @@ -235,6 +294,7 @@ |
236 | 295 | . " JOIN {$wwTablePrefix}_{$ll}_about as A ON A.resource = R.id " |
237 | 296 | . " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_origin as O ON O.lang = \"" . mysql_real_escape_string($ll) . "\" AND A.concept = O.local_concept " |
238 | 297 | . " WHERE O.global_concept = " . (int)$id |
| 298 | + . " WHERE R.type IN ( " . WW_RC_TYPE_ARTICLE . ", " . WW_RC_TYPE_CATEGORY . " ) " |
239 | 299 | . " LIMIT " . (int)$limit; |
240 | 300 | |
241 | 301 | $pages = $this->getList($sql, "name"); |
— | — | @@ -247,6 +307,28 @@ |
248 | 308 | return $result; |
249 | 309 | } |
250 | 310 | |
| 311 | + function getNamesForConcept( $id, $lang = null ) { |
| 312 | + global $wwTablePrefix, $wwThesaurusDataset, $wwLanguages; |
| 313 | + |
| 314 | + if ( !$lang ) $lang = array_keys( $wwLanguages ); |
| 315 | + if ( !is_array($lang) ) $lang = preg_split('![\\s,;|/:]\\s*!', $lang); |
| 316 | + $result = array(); |
| 317 | + |
| 318 | + foreach ($lang as $ll) { |
| 319 | + $sql = "SELECT O.local_name FROM {$wwTablePrefix}_{$ll}_resource as O "; |
| 320 | + $sql .= " WHERE O.global_concept = " . (int)$id; |
| 321 | + $sql .= " AND O.lang = " . (int)$ll; |
| 322 | + |
| 323 | + $pages = $this->getList($sql, "name"); |
| 324 | + if ( $pages === false || $pages === null ) return false; |
| 325 | + if ( !$pages ) continue; |
| 326 | + |
| 327 | + $result[$ll] = $pages; |
| 328 | + } |
| 329 | + |
| 330 | + return $result; |
| 331 | + } |
| 332 | + |
251 | 333 | function getTermsForConcept( $id, $lang = null, $limit = 100 ) { |
252 | 334 | global $wwTablePrefix, $wwThesaurusDataset, $wwLanguages; |
253 | 335 | |