Index: trunk/WikiWord/WikiWordWeb/src/main/maintenance/search-index-local.sql |
— | — | @@ -1,16 +0,0 @@ |
2 | | -insert into {collection}_{thesaurus}_search_index ( |
3 | | - concept, concept_name, `type`, |
4 | | - `lang`, `term`, `score`, `norm` ) |
5 | | -select O.global_concept, M.concept_name, C.type, "{lang}", |
6 | | - REPLACE( LCASE( CAST(M.term_text as CHAR CHARACTER SET utf8) COLLATE utf8_general_ci ), "-", "" ), |
7 | | - M.rule * M.freq, 1 |
8 | | -from {collection}_{lang}_meaning as M |
9 | | -join {collection}_{thesaurus}_origin as O on O.lang = "{lang}" and O.local_concept = M.concept |
10 | | -join {collection}_{thesaurus}_concept as C on C.id = O.global_concept |
11 | | -where (M.rule not in (10, 30) OR M.freq > 1) and C.type > 0 |
12 | | -on duplicate key update |
13 | | - score = if (score > values(score), score, values(score)), |
14 | | - norm = if (norm < values(norm), score, values(norm)); |
15 | | - |
\ No newline at end of file |
Index: trunk/WikiWord/WikiWordWeb/src/main/maintenance/build-search-index.sh |
— | — | @@ -4,12 +4,9 @@ |
5 | 5 | db="$1" |
6 | 6 | collection="$2" |
7 | 7 | thesaurus="$3" |
8 | | -languages="en de fr nl it es pt pl" |
9 | 8 | |
10 | | -echo "preparing search index" |
| 9 | +echo "building search index" |
11 | 10 | replace '{collection}' "$collection" '{thesaurus}' "$thesaurus" < search-index.sql | mysql "$db" |
12 | 11 | |
13 | | -for n in $languages; do |
14 | | - echo "collection search index: $n" |
15 | | - replace '{collection}' "$collection" '{thesaurus}' "$thesaurus" '{lang}' "$n" < search-index-local.sql | mysql "$db" |
16 | | -done |
| 12 | +echo "building resource index" |
| 13 | +replace '{collection}' "$collection" '{thesaurus}' "$thesaurus" < resource-index.sql | mysql "$db" |
Index: trunk/WikiWord/WikiWordWeb/src/main/maintenance/search-index.sql |
— | — | @@ -11,3 +11,19 @@ |
12 | 12 | ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; |
13 | 13 | |
14 | 14 | truncate {collection}_{thesaurus}_search_index; |
| 15 | + |
| 16 | +-- collect definitions |
| 17 | +insert into {collection}_{thesaurus}_search_index ( |
| 18 | + concept, concept_name, `type`, `lang`, `term`, `score`, `norm` ) |
| 19 | +select C.id, O.local_concept_name, C.type, M.lang, |
| 20 | + REPLACE( LCASE( CAST(M.term_text as CHAR CHARACTER SET utf8) COLLATE utf8_general_ci ), "-", "" ), |
| 21 | + M.rule * M.freq, 1 |
| 22 | +from {collection}_{thesaurus}_meaning as M |
| 23 | +join {collection}_{thesaurus}_concept as C on C.id = M.concept |
| 24 | +join {collection}_{thesaurus}_origin as O on O.global_concept = M.concept and O.lang = M.lang -- FIXME: remove this once the global menaing table contains the local concept name |
| 25 | +where (M.rule not in (10, 30) OR M.freq > 1) and C.type > 0 |
| 26 | +on duplicate key update |
| 27 | + score = if (score > values(score), score, values(score)), |
| 28 | + norm = if (norm < values(norm), score, values(norm)); |
| 29 | + |
| 30 | +-- FIXME: normalization levels! 0=none, 1=case-and-dash (+translit?), 2=whitespace-and-punctuation, 4=soundex |
\ No newline at end of file |
Index: trunk/WikiWord/WikiWordWeb/src/main/maintenance/resource-index.sql |
— | — | @@ -0,0 +1,14 @@ |
| 2 | +create table if not exists {collection}_{thesaurus}_resource_index ( |
| 3 | + concept int(11) NOT NULL, |
| 4 | + resources MEDIUMBLOB int(11) NOT NULL, |
| 5 | + PRIMARY KEY ( concept ), |
| 6 | + ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; |
| 7 | + |
| 8 | +truncate {collection}_{thesaurus}_resource_index; |
| 9 | + |
| 10 | +-- collect ressources in all languages |
| 11 | +insert into {collection}_{thesaurus}_resource_index ( |
| 12 | + concept, resources |
| 13 | +select concept, group_concat(distinct concat(type, ":", lang, ":", local_resource_name) separator "|" ) as resources |
| 14 | +from {collection}_{thesaurus}_about as A |
| 15 | +where type > 0; |
Index: trunk/WikiWord/WikiWordWeb/src/main/www/wikiword/api.php |
— | — | @@ -10,16 +10,19 @@ |
11 | 11 | if ( $query ) { |
12 | 12 | $lang = @$_REQUEST['lang']; |
13 | 13 | $qlang = @$_REQUEST['qlang']; |
| 14 | + $rclang = @$_REQUEST['rclang']; |
14 | 15 | $format = @$_REQUEST['format']; |
15 | 16 | if ( !$format ) $format = 'phps'; |
16 | 17 | |
17 | 18 | if ($lang) $lang = preg_replace('[^\\w\\d_]', '', $lang); |
18 | 19 | if ($qlang) $qlang = preg_replace('[^\\w\\d_]', '', $qlang); |
| 20 | + |
| 21 | + if ($rclang) $rclang = preg_replace('[^\\w\\d_]', '', $rclang); |
19 | 22 | |
20 | 23 | if ($lang) { |
21 | | - $lang = preg_split('![\\s,;|/:]\\s*!', $lang); |
22 | | - if ( !$qlang ) $qlang = $lang[0]; |
23 | | - if (count($lang) == 1) $lang = $lang[0]; |
| 24 | + $lang = preg_split('![\\s,;|/:]\\s*!', $lang); |
| 25 | + if ( !$qlang ) $qlang = $lang[0]; |
| 26 | + if (count($lang) == 1) $lang = $lang[0]; |
24 | 27 | } |
25 | 28 | |
26 | 29 | $result = array( 'query' => $query ); |
— | — | @@ -40,29 +43,30 @@ |
41 | 44 | |
42 | 45 | if ( $qlang === null ) $result['error'] = array('code' => 150, 'message' => "missing parameter qlang"); |
43 | 46 | else if ( $term !== null ) { |
44 | | - $result['concepts'] = $thesaurus->getConceptsForTerm($qlang, $term, $lang, $norm); #TODO: limit! |
45 | | - if ( $result['concepts'] === false || $result['concepts'] === null ) { |
46 | | - $result['error'] = array('code' => 210, 'message' => "failed to retrieve concepts for term $langt:$term"); |
47 | | - } |
| 47 | + $result['concepts'] = $thesaurus->getConceptsForTerm($qlang, $term, $lang, $norm, $rclang); #TODO: limit! |
| 48 | + if ( $result['concepts'] === false || $result['concepts'] === null ) { |
| 49 | + $result['error'] = array('code' => 210, 'message' => "failed to retrieve concepts for term $langt:$term"); |
| 50 | + } |
48 | 51 | } /*else if ( $page !== null ) { |
49 | 52 | $result['concepts'] = $thesaurus->getConceptsForPage($lang, $page); |
50 | 53 | if ( $result['concepts'] === false || $result['concepts'] === null ) { |
51 | 54 | $result['error'] = array('code' => 250, 'message' => "failed to retrieve concepts for page $langt:$page"); |
52 | 55 | } |
53 | 56 | } */else { |
54 | | - $result['error'] = array('code' => 110, 'message' => "missing parameter term"); |
| 57 | + $result['error'] = array('code' => 110, 'message' => "missing parameter term"); |
55 | 58 | } |
56 | 59 | } else if ($query == 'concept' || $query == 'info') { |
57 | 60 | $gcid = @$_REQUEST['gcid']; |
58 | 61 | if (!$gcid) $gcid = @$_REQUEST['id']; |
59 | 62 | |
60 | 63 | if ( $gcid === null ) { |
61 | | - $result['error'] = array('code' => 120, 'message' => "missing parameter gcid"); |
| 64 | + $result['error'] = array('code' => 120, 'message' => "missing parameter gcid"); |
62 | 65 | } else { |
63 | | - $result['concept'] = $thesaurus->getConceptInfo($gcid, $lang); #TODO: limit! |
64 | | - if ( $result['concept'] === false || $result['concept'] === null ) { |
65 | | - $result['error'] = array('code' => 210, 'message' => "concept not found: $gcid"); |
66 | | - } |
| 66 | + $fields = null; |
| 67 | + $result['concept'] = $thesaurus->getConceptInfo($gcid, $lang, $fields, $rclang); #TODO: limit! |
| 68 | + if ( $result['concept'] === false || $result['concept'] === null ) { |
| 69 | + $result['error'] = array('code' => 210, 'message' => "concept not found: $gcid"); |
| 70 | + } |
67 | 71 | } |
68 | 72 | } else if ($query == 'properties') { |
69 | 73 | $gcid = @$_REQUEST['gcid']; |
Index: trunk/WikiWord/WikiWordWeb/src/main/www/wikipics/search.php |
— | — | @@ -200,7 +200,7 @@ |
201 | 201 | $mode = NULL; |
202 | 202 | $result = NULL; |
203 | 203 | |
204 | | -$fallback_languages = array( "en", "commons" ); #TODO: make the user define this list |
| 204 | +$fallback_languages = array( "en" ); #TODO: make the user define this list |
205 | 205 | |
206 | 206 | if ( $lang ) { |
207 | 207 | $languages = explode( '|', $lang ); |
— | — | @@ -222,11 +222,11 @@ |
223 | 223 | try { |
224 | 224 | if ($lang && $conceptId) { |
225 | 225 | $mode = "concept"; |
226 | | - $result = $thesaurus->getConceptInfo($conceptId, $allLanguages); |
| 226 | + $result = $thesaurus->getConceptInfo($conceptId, $lang, null, $allLanguages); |
227 | 227 | if ( $result ) $result = array( $result ); //hack |
228 | 228 | } else if ($lang && $term) { |
229 | 229 | $mode = "term"; |
230 | | - $result = $thesaurus->getConceptsForTerm($lang, $term, $allLanguages, $norm, $limit); |
| 230 | + $result = $thesaurus->getConceptsForTerm($lang, $term, $languages, $norm, $allLanguages, $limit); |
231 | 231 | } |
232 | 232 | } catch (Exception $e) { |
233 | 233 | $error = $e->getMessage(); |
— | — | @@ -237,7 +237,8 @@ |
238 | 238 | if (!isset($scriptPath)) $scriptPath = "./"; |
239 | 239 | if (!isset($skinPath)) $skinPath = "$scriptPath/../skin/"; |
240 | 240 | |
241 | | -if ( $format == "atom" || $format == "xml" || $format == "opensearch" ) include("response.atom.php"); |
242 | | -else include("response.html.php"); |
| 241 | +/*if ( $format == "atom" || $format == "xml" || $format == "opensearch" ) include("response.atom.php"); |
| 242 | +else*/ |
| 243 | +include("response.html.php"); |
243 | 244 | |
244 | 245 | $utils->close(); |
Index: trunk/WikiWord/WikiWordWeb/src/main/www/common/wwclient.php |
— | — | @@ -94,11 +94,13 @@ |
95 | 95 | return $p['scores']; |
96 | 96 | }*/ |
97 | 97 | |
98 | | - function getConceptInfo( $id, $lang = null ) { |
| 98 | + function getConceptInfo( $id, $lang = null, $fields = null, $rclang = null ) { |
99 | 99 | $param = array( |
100 | 100 | 'query' => 'info', |
101 | 101 | 'gcid' => $id, |
102 | | - 'lang' => $lang |
| 102 | + 'lang' => $lang, |
| 103 | + 'fields' => fields, |
| 104 | + 'rclang' => $rclang |
103 | 105 | ); |
104 | 106 | |
105 | 107 | $rs = $this->query( $param ); |
— | — | @@ -132,12 +134,13 @@ |
133 | 135 | return $rs; |
134 | 136 | }*/ |
135 | 137 | |
136 | | - function getConceptsForTerm( $qlang, $term, $languages, $norm = 1, $limit = 100 ) { |
| 138 | + function getConceptsForTerm( $qlang, $term, $languages, $norm = 1, $rclang = null, $limit = 100 ) { |
137 | 139 | if ( is_array( $languages ) ) $languages = implode('|', $languages); |
138 | 140 | |
139 | 141 | $param = array( |
140 | 142 | 'query' => 'concepts', |
141 | 143 | 'qlang' => $qlang, |
| 144 | + 'rclang' => $rclang, |
142 | 145 | 'lang' => $languages, |
143 | 146 | 'norm' => $norm, |
144 | 147 | 'term' => $term, |
Index: trunk/WikiWord/WikiWordWeb/src/main/www/common/wwthesaurus.php |
— | — | @@ -72,20 +72,26 @@ |
73 | 73 | return $s; |
74 | 74 | } |
75 | 75 | |
76 | | - function queryConceptsForTerm($qlang, $term, $languages, $norm = 1, $limit = 100) { |
| 76 | + function queryConceptsForTerm($qlang, $term, $languages, $norm = 1, $rclang = null, $limit = 100) { |
77 | 77 | global $wwTablePrefix, $wwThesaurusDataset, $wwLanguages; |
78 | 78 | |
79 | 79 | if ( !$languages ) $languages = array_keys( $wwLanguages ); |
80 | 80 | |
81 | 81 | $term = $this->normalizeSearchString($term, $norm); |
82 | 82 | |
83 | | - $sql = "SELECT I.*, S.score FROM {$wwTablePrefix}_{$wwThesaurusDataset}_concept_info as I" |
84 | | - . " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_search_index as S ON I.concept = S.concept " |
85 | | - . " WHERE term = " . $this->quote($term) |
| 83 | + $sql = "SELECT I.*, S.score "; |
| 84 | + if ( $rclang ) $sql .= ", R.resources " |
| 85 | + |
| 86 | + $sql .= " FROM {$wwTablePrefix}_{$wwThesaurusDataset}_concept_info as I "; |
| 87 | + $sql .= " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_search_index as S ON I.concept = S.concept "; |
| 88 | + if ( $rclang ) $sql .= " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_resource_index as R ON R.concept = I.concept " |
| 89 | + |
| 90 | + $sql .= " WHERE term = " . $this->quote($term) |
86 | 91 | . " AND I.lang IN " . $this->quoteSet($languages) |
87 | 92 | . " AND S.lang = " . $this->quote($qlang) |
88 | | - . " AND S.norm <= " . (int)$norm |
89 | | - . " ORDER BY S.score DESC, S.concept " |
| 93 | + . " AND S.norm <= " . (int)$norm; |
| 94 | + |
| 95 | + $sql .= " ORDER BY S.score DESC, S.concept " |
90 | 96 | . " LIMIT " . (int)$limit; |
91 | 97 | |
92 | 98 | #FIXME: query-lang vs. output-languages! |
— | — | @@ -93,8 +99,8 @@ |
94 | 100 | return $this->query($sql); |
95 | 101 | } |
96 | 102 | |
97 | | - function getConceptsForTerm($qlang, $term, $languages, $norm = 1, $limit = 100) { |
98 | | - $rs = $this->queryConceptsForTerm($qlang, $term, $languages); |
| 103 | + function getConceptsForTerm($qlang, $term, $languages, $norm = 1, $rclang = null, $limit = 100) { |
| 104 | + $rs = $this->queryConceptsForTerm($qlang, $term, $languages, $norm, $rclang, $limit); |
99 | 105 | $list = WWUtils::slurpRows($rs); |
100 | 106 | mysql_free_result($rs); |
101 | 107 | return $this->buildConcepts($list); |
— | — | @@ -243,6 +249,23 @@ |
244 | 250 | } |
245 | 251 | */ |
246 | 252 | |
| 253 | + function spliceResources( $rc, &$into ) { |
| 254 | + if (!$rc) return; |
| 255 | + |
| 256 | + if (is_string($rc)) { |
| 257 | + $rr = explode("|", $rc); |
| 258 | + |
| 259 | + $rc = array(); |
| 260 | + foreach ($rr as $r) { |
| 261 | + list($t, $lang, $n) = explode(":", $p, 3); |
| 262 | + $rc[$lang][$n] = (int)$t; |
| 263 | + } |
| 264 | + } |
| 265 | + |
| 266 | + if (!$into) $into = $rc; |
| 267 | + else $into = array_merge( $into, $rc ); |
| 268 | + } |
| 269 | + |
247 | 270 | function splitPages( $s ) { |
248 | 271 | $pp = explode("|", $s); |
249 | 272 | |
— | — | @@ -269,7 +292,7 @@ |
270 | 293 | } |
271 | 294 | |
272 | 295 | ///////////////////////////////////////////////////////// |
273 | | - function getConceptInfo( $id, $lang = null, $fields = null ) { |
| 296 | + function getConceptInfo( $id, $lang = null, $fields = null, $rclang = null ) { |
274 | 297 | global $wwTablePrefix, $wwThesaurusDataset; |
275 | 298 | |
276 | 299 | #TODO: concept cache! |
— | — | @@ -279,9 +302,14 @@ |
280 | 303 | |
281 | 304 | #TODO: scores, concept-type, ... |
282 | 305 | |
283 | | - $sql = "SELECT $fields FROM {$wwTablePrefix}_{$wwThesaurusDataset}_concept_info " |
284 | | - . " WHERE concept = ".(int)$id; |
| 306 | + $sql = "SELECT $fields "; |
| 307 | + if ( $rclang ) $sql .= ", R.resources "; |
| 308 | + |
| 309 | + $sql .= " FROM {$wwTablePrefix}_{$wwThesaurusDataset}_concept_info as I "; |
| 310 | + if ( $rclang ) $sql .= " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_resource_index as R ON R.concept = I.concept "; |
285 | 311 | |
| 312 | + $sql .= " WHERE concept = ".(int)$id; |
| 313 | + |
286 | 314 | if ($lang) { |
287 | 315 | if ( is_array($lang) ) $sql .= " AND lang IN " . $this->quoteSet($lang); |
288 | 316 | else $sql .= " AND lang = " . $this->quote($lang); |
— | — | @@ -352,8 +380,10 @@ |
353 | 381 | |
354 | 382 | if (@$row["name"] !== null) $concept["name"][$lang] = $row["name"]; |
355 | 383 | if (@$row["definition"] !== null) $concept["definition"][$lang] = $row["definition"]; |
356 | | - if (@$row["pages"] !== null) $concept["pages"][$lang] = $this->splitPages($row["pages"]); |
357 | 384 | |
| 385 | + if (@$row["resources"]) $this->spliceResources($row["resources"], &$concept["pages"]) ); |
| 386 | + else if (@$row["pages"]) $concept["pages"][$lang] = $this->splitPages($row["pages"]); |
| 387 | + |
358 | 388 | if (@$row["broader"] !== null) $broader[$lang] = $this->splitConcepts($row["broader"]); |
359 | 389 | if (@$row["narrower"] !== null) $narrower[$lang] = $this->splitConcepts($row["narrower"]); |
360 | 390 | if (@$row["similar"] !== null) $similar[$lang] = $this->splitConcepts($row["similar"]); |