r19776 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r19775‎ | r19776 | r19777 >
Date:00:40, 5 February 2007
Author:erik
Status:old
Tags:
Comment:
more renaming
Modified paths:
  • /trunk/extensions/Wikidata/perl-tools/Import+OmegaWiki.pl (added) (history)
  • /trunk/extensions/Wikidata/perl-tools/Import+OmegaWiki.pl (added) (history)
  • /trunk/extensions/Wikidata/perl-tools/Import+WiktionaryZ.pl (deleted) (history)
  • /trunk/extensions/Wikidata/perl-tools/OmegaWiki.pm (added) (history)
  • /trunk/extensions/Wikidata/perl-tools/OmegaWiki.pm (added) (history)
  • /trunk/extensions/Wikidata/perl-tools/WiktionaryZ.pm (deleted) (history)

Diff [purge]

Index: trunk/extensions/Wikidata/perl-tools/Import WiktionaryZ.pl
@@ -1,55 +0,0 @@
2 -use OmegaWiki;
3 -use POSIX qw(strftime);
4 -
5 -my $startTime = time;
6 -
7 -# Example usage to import UMLS completely into an existing OmegaWiki database:
8 -# my $importer=new OmegaWiki('wikidatadb','root','MyPass');
9 -# $importer->setSourceDB('umls');
10 -# $importer->initialize;
11 -# $importer->importCompleteUMLS();
12 -
13 -# Example usage to import a part of UMLS into an existing OmegaWiki database:
14 -# my $importer=new OmegaWiki('wikidatadb','root','MyPass');
15 -# $importer->setSourceDB('umls');
16 -# $importer->initialize;
17 -# my %sourceAbbreviations = $importer->loadSourceAbbreviations();
18 -# delete($sourceAbbreviations{"MSH"});
19 -# $importer->importUMLS(\%sourceAbbreviations);
20 -
21 -my $importer=new OmegaWiki('wikidata_icpc','root','');
22 -$importer->setSourceDB('umls');
23 -#$importer->setSourceDB('swissprot');
24 -$importer->initialize;
25 -#$importer->importCompleteUMLS();
26 -
27 -#read the source abbreviations and remove those you do not wish to import
28 -my %sourceAbbreviations = $importer->loadSourceAbbreviations();
29 -my @deleteList;
30 -while (($key, $val) = each(%sourceAbbreviations)) {
31 -
32 -#remove all that contains "MSH":
33 -# if (index($key,"MSH") >= 0) {
34 -# push(@deleteList, $key);
35 -# }
36 -
37 -#remove all that does not contain "ICPC":
38 - if (index($key,"ICPC") < 0) {
39 - push(@deleteList, $key);
40 - }
41 -}
42 -
43 -foreach $sab (@deleteList){
44 - delete($sourceAbbreviations{$sab});
45 -}
46 -
47 -$importer->importUMLS(\%sourceAbbreviations);
48 -
49 -
50 -my $endTime = time;
51 -print "\n";
52 -print "Import started at: " . (strftime "%H:%M:%S", localtime($startTime)) . "\n";
53 -print "Import ended at: " . (strftime "%H:%M:%S", localtime($endTime)) . "\n";
54 -print "Elapsed time: " . (strftime "%H:%M:%S", gmtime($endTime - $startTime)) . "\n";
55 -
56 -exit 0;
\ No newline at end of file
Index: trunk/extensions/Wikidata/perl-tools/WiktionaryZ.pm
@@ -1,1063 +0,0 @@
2 -# Example usage to import UMLS into an existing OmegaWiki database:
3 -# use OmegaWiki;
4 -# my $importer=new OmegaWiki('wikidatadb','root','MyPass');
5 -# $importer->setSourceDB('umls');
6 -# $importer->initialize;
7 -# $importer->importCompleteUMLS();
8 -#
9 -# NOTE: When importing UMLS, we expect the presence of the semantic network data
10 -# in the tables SRDEF and the manually created tables SEMRELHIER and SEMTYPEHIER.
11 -# SEMRELHIER and SEMTYPEHIER contain information about the relations between
12 -# semantic types and relation types, using RB as the code for "broader than"
13 -# and RN for "narrower than".
14 -#
15 -# Todo for UMLS:
16 -# SyntransCollection
17 -# RelationCollection
18 -# Fully deal with alternative definitions referring to the same concept
19 -# Deal with preferred lexical expressions, primary concepts (general weighting mechanism?)
20 -
21 -package OmegaWiki;
22 -use DBI;
23 -use Encode;
24 -use POSIX qw(strftime);
25 -
26 -sub new {
27 - my $type=shift;
28 - my $self={};
29 - $self->{targetdb}=shift;
30 - $self->{targetuser}=shift;
31 - $self->{targetpass}=shift;
32 - $self->{targethost}=shift || 'localhost';
33 - $self->{targetport}=shift || '3306';
34 - $self->{targetdriver}=shift || 'mysql';
35 - bless($self, $type);
36 - return($self);
37 -}
38 -
39 -sub setSourceDB {
40 - my $self=shift;
41 - $self->{sourcedb}=shift;
42 - $self->{sourceuser}=shift || $self->{targetuser};
43 - $self->{sourcepass}=shift || $self->{targetpass};
44 - $self->{sourcehost}=shift || $self->{targethost};
45 - $self->{sourceport}=shift || $self->{targetport};
46 - $self->{sourcedriver}=shift || $self->{targetdriver};
47 -}
48 -
49 -sub connectSourceDB {
50 - my $self=shift;
51 - my $dsn = 'dbi:'.$self->{sourcedriver}.':'.$self->{sourcedb}.':'.$self->{sourcehost}.':'.$self->{sourceport};
52 - $self->{dbs}=DBI->connect($dsn,$self->{sourceuser},$self->{sourcepass});
53 -}
54 -
55 -sub connectTargetDB {
56 - my $self=shift;
57 - my $dsn = 'dbi:'.$self->{targetdriver}.':'.$self->{targetdb}.':'.$self->{targethost}.':'.$self->{targetport};
58 - $self->{dbt}=DBI->connect($dsn,$self->{targetuser},$self->{targetpass});
59 -}
60 -
61 -sub connectDBs() {
62 - my $self = shift;
63 -
64 - $self->connectSourceDB();
65 - $self->connectTargetDB();
66 -}
67 -
68 -sub loadLanguages() {
69 - my $self = shift;
70 -
71 - my %la=$self->loadLangs();
72 - $self->{la}=\%la;
73 - my %la_iso=$self->loadLangsIso();
74 - $self->{la_iso}=\%la_iso;
75 -}
76 -
77 -sub initialize {
78 - my $self=shift;
79 - $self->connectDBs();
80 - $self->loadLanguages();
81 -}
82 -
83 -sub importCompleteUMLS {
84 - my $self=shift;
85 - my $level=shift || 0; # 0= complete; 1=reltypes+; 2=rel+
86 -
87 - my %sourceAbbreviations = $self->loadSourceAbbreviations();
88 - $self->importUMLS(\%sourceAbbreviations, $level);
89 -}
90 -
91 -sub importUMLS() {
92 - my $self=shift;
93 - my $sourceAbbreviationsReference = shift;
94 - my $level=shift || 0; # 0= complete; 1=reltypes+; 2=rel+
95 -
96 - my %sourceAbbreviations = %{$sourceAbbreviationsReference};
97 - my %cid=$self->getOrCreateCollections(\%sourceAbbreviations);
98 - $self->{cid}=\%cid;
99 -
100 - if ($level<1){
101 - while (($sourceAbbreviation, $collectionId) = each %cid) {
102 - print "Import UMLS terms for $sourceAbbreviation\n";
103 - $self->importUMLSterms($sourceAbbreviation, $collectionId);
104 - }
105 - }
106 -
107 - if($level<2) {
108 - print "Import UMLS relation types 'REL'\n";
109 - $self->importUMLSrelationtypes('REL');
110 - print "Import UMLS relation types 'RELA'\n";
111 - $self->importUMLSrelationtypes('RELA');
112 - }
113 -
114 - if($level<3) {
115 - while (($sourceAbbreviation, $collectionId) = each %cid) {
116 - my %rt=$self->loadReltypes();
117 - $self->{reltypes}=\%rt;
118 - print "Import UMLS relations 'REL' for $sourceAbbreviation\n";
119 - $self->importUMLSrelations('REL',$sourceAbbreviation);
120 - print "Import UMLS relations 'RELA' for $sourceAbbreviation\n";
121 - $self->importUMLSrelations('RELA',$sourceAbbreviation);
122 - }
123 - }
124 -
125 - if($level<4) {
126 - print "Import SN types 'STY'\n";
127 - $self->importSNtypes('STY');
128 - print "Import SN types 'RL'\n";
129 - $self->importSNtypes('RL');
130 - print "Import ST relations 'STY'\n";
131 - $self->importSTrelations('STY');
132 - print "Import ST relations 'RL'\n";
133 - $self->importSTrelations('RL');
134 - # $self->importSTrelations2();
135 - }
136 -
137 - if($level<5) {
138 - while (($sourceAbbreviation, $collectionId) = each %cid) {
139 - my %attribs=$self->loadAttributes();
140 - $self->{attribs}=\%attribs;
141 - print "Import UMLS types for $sourceAbbreviation\n";
142 - $self->importUMLSstypes($sourceAbbreviation);
143 - }
144 - }
145 -}
146 -
147 -sub importGEMET {
148 - my $self=shift;
149 - $self->connectSourceDB();
150 - $self->connectTargetDB();
151 - my %la=$self->loadLangs();
152 - $self->{la}=\%la;
153 - my %cid=$self->bootstrapGemetCollection();
154 - $self->{cid}=\%cid;
155 - $self->initRel($self->{cid}{'GEMETREL'});
156 - my %rt=$self->loadReltypes();
157 - $self->{reltypes}=\%rt;
158 - $self->importGemetTerms();
159 - $self->importGemetRelations();
160 - $self->importGemetThemes();
161 -}
162 -
163 -sub importUMLSstypes {
164 - my $self=shift;
165 - my $sab=shift;
166 -
167 - my $getassocs=$self->{dbs}->prepare("select MRSTY.CUI, MRSTY.STY from MRCONSO,MRSTY where MRCONSO.SAB like ? and MRCONSO.CUI=MRSTY.CUI");
168 - $getassocs->execute($sab);
169 - while(my $row=$getassocs->fetchrow_hashref()) {
170 - my %rv=$self->getMidForMember($row->{CUI});
171 - my $att=$self->{attribs}{$row->{STY}};
172 - #print "$rv{mid} is a $row->{STY} ($att)\n";
173 - $self->addRelation($rv{rid},0,$rv{mid},$att, my $checkfordupes=1);
174 - }
175 -}
176 -
177 -sub getCollections(){
178 - my $self=shift;
179 - my %cid;
180 - $cid{'CRISP'}=$self->findCollection($self->findMeaning($self->findItem('CRISP Thesaurus, 2005',$self->{la}{'en'})));
181 - $cid{'STY'}=$self->findCollection($self->findMeaning($self->findItem('Semantic Network 2005AC Semantic Types',$self->{la}{'en'})));
182 - $cid{'RL'}=$self->findCollection($self->findMeaning($self->findItem('Semantic Network 2005AC Relation Types',$self->{la}{'en'})));
183 - $cid{'REL'}=$self->findCollection($self->findMeaning($self->findItem('UMLS Relation Types 2005',$self->{la}{'en'})));
184 - $cid{'RELA'}=$self->findCollection($self->findMeaning($self->findItem('UMLS Relation Attributes 2005',$self->{la}{'en'})));
185 - $cid{'ICPC'}=$self->findCollection($self->findMeaning($self->findItem('The International Classification of Primary Care (ICPC), 1993',$self->{la}{'en'})));
186 - $cid{'MESH'}=$self->findCollection($self->findMeaning($self->findItem('Medical Subject Headings (MeSH), 2005',$self->{la}{'en'})));
187 -# $cid{'SP'}=$self->findCollection($self->findMeaning($self->findItem('Swiss-Prot',$self->{la}{'en'})));
188 - return %cid;
189 -}
190 -
191 -sub findCollection() {
192 - my $self=shift;
193 - my $mid=shift;
194 - my $findcoll=$self->{dbt}->prepare("select collection_id from uw_collection_ns where collection_mid=? and is_latest=1");
195 - $findcoll->execute($mid);
196 - my $row=$findcoll->fetchrow_hashref();
197 - return $row->{collection_id};
198 -}
199 -
200 -sub getCollection {
201 - my $self = shift;
202 - my $expression = shift;
203 -
204 - return $self->findCollection($self->findMeaning($self->findExpressionId($expression,$self->{la}{'en'})));
205 -}
206 -
207 -sub bootstrapCollection {
208 - my $self = shift;
209 - my $expression = shift;
210 - my $collectionType = shift;
211 -
212 - %rv=$self->addExpression($expression,$self->{la}{'en'});
213 - return $self->addCollection($rv{mid},$collectionType);
214 -}
215 -
216 -sub getOrCreateCollection {
217 - my $self = shift;
218 - my $expression = shift;
219 -
220 - my $result = $self->getCollection($expression);
221 -
222 - if (!$result) {
223 - $result = $self->bootstrapCollection($expression, '');
224 - }
225 -
226 - return $result;
227 -}
228 -
229 -sub getOrCreateCollections {
230 - my $self = shift;
231 - my $sourceAbbreviationsReference = shift;
232 - my %sourceAbbreviations = %{$sourceAbbreviationsReference};
233 - my %cid;
234 -
235 - while (($key, $value) = each %sourceAbbreviations) {
236 - $cid{$key} = $self->getOrCreateCollection($value);
237 - }
238 -
239 - return %cid;
240 -}
241 -
242 -sub loadSourceAbbreviations {
243 - my $self = shift;
244 - my %sab;
245 -
246 - my $dataset = $self->{dbs}->prepare("select * from mrsab");
247 - $dataset->execute();
248 - while (my $row = $dataset->fetchrow_hashref()) {
249 - $sab{$row->{RSAB}} = $row->{SON};
250 - }
251 - return %sab;
252 -}
253 -
254 -# SEMTYPEHIER and SEMRELHIER contain only the is_a relationships, whereas
255 -# srstr contains all others
256 -# FIXME: only use SRSTR
257 -sub importSTrelations2 {
258 - my $self=shift;
259 - my $getrels=$self->{dbs}->prepare("select * from srstr where rel!='isa'");
260 - $getrels->execute();
261 - while(my $row=$getrels->fetchrow_hashref()) {
262 - my %rv1=$self->getMidForMember($row->{TYPE1},$self->{cid}{'STY'});
263 - my %rv2=$self->getMidForMember($row->{TYPE2},$self->{cid}{'STY'});
264 - my $rtmid=$self->{reltypes}{$row->{REL}};
265 - #print "Adding relation $row->{REL} ($rtmid) between $row->{TYPE1} and $row->{TYPE2}\n";
266 - $self->addRelation($rv1{rid},$rtmid,$rv1{mid},$rv2{mid},my $checkfordupes=1);
267 - }
268 -}
269 -
270 -
271 -sub importSTrelations {
272 - my $self=shift;
273 - my $which=shift;
274 - my $table;
275 - my $field1;
276 - my $field2;
277 - if($which eq 'STY') {
278 - $table='semtypehier';
279 - $field1='SEMTYPE1';
280 - $field2='SEMTYPE2';
281 - } elsif($which eq 'RL') {
282 - $table='semrelhier';
283 - $field1='RELTYPE1';
284 - $field2='RELTYPE2';
285 - }
286 -
287 - my $gettypehier=$self->{dbs}->prepare("select * from $table");
288 - $gettypehier->execute();
289 - while(my $typehier=$gettypehier->fetchrow_hashref()) {
290 - my %rv1=$self->getMidForMember($typehier->{$field1},$self->{cid}{$which});
291 - my %rv2=$self->getMidForMember($typehier->{$field2},$self->{cid}{$which});
292 - my $rtmid=$self->{reltypes}{$typehier->{RELATION}};
293 - print "Adding relation $typehier->{RELATION} ($rtmid) between $typehier->{$field1} and $typehier->{$field2}\n";
294 - $self->addRelation($rv1{rid},$rtmid,$rv1{mid},$rv2{mid},my $checkfordupes=1);
295 - }
296 -}
297 -
298 -# $member_id - the collection-internal identifier for this member
299 -# $cid The collection in which to search for this member (optional)
300 -# Returns the DefinedMeaningID and the revision id
301 -sub getMidForMember {
302 - my $self=shift;
303 - my $member_id=shift;
304 - my $cid=shift;
305 - my %rv;
306 - my $getmid;
307 - if($cid) {
308 - $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where collection_id=? and internal_member_id=? and is_latest_set=1 limit 1");
309 - $getmid->execute($cid,$member_id);
310 - } else {
311 - $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where internal_member_id=? and is_latest_set=1 limit 1");
312 - $getmid->execute($member_id);
313 - }
314 - my $member_mid=$getmid->fetchrow_hashref();
315 - $rv{mid}=$member_mid->{member_mid};
316 - $rv{rid}=$member_mid->{revision_id};
317 - return %rv;
318 -
319 -}
320 -
321 -sub loadReltypes {
322 - my $self=shift;
323 - my %reltypes;
324 - # Get the relation type
325 - $getreltype=$self->{dbt}->prepare("select member_mid,internal_member_id from uw_collection_contents,uw_collection_ns where uw_collection_ns.collection_type='RELT' and uw_collection_ns.collection_id=uw_collection_contents.collection_id");
326 - $getreltype->execute();
327 - while (my $reltype=$getreltype->fetchrow_hashref()) {
328 - $reltypes{$reltype->{internal_member_id}}=$reltype->{member_mid};
329 - }
330 - return %reltypes;
331 -}
332 -
333 -sub loadAttributes {
334 - my $self=shift;
335 - my %attributes;
336 - $getatt=$self->{dbt}->prepare("select member_mid,internal_member_id from uw_collection_contents,uw_collection_ns where uw_collection_ns.collection_type='ATTR' and uw_collection_ns.collection_id=uw_collection_contents.collection_id");
337 - $getatt->execute();
338 - while (my $att=$getatt->fetchrow_hashref()) {
339 - $attributes{$att->{internal_member_id}}=$att->{member_mid};
340 - }
341 - return %attributes;
342 -}
343 -
344 -
345 -# Get all SRDEF attributes
346 -# Get relations between SRDEF
347 -sub importSNtypes {
348 - my $self=shift;
349 - my $type=shift;
350 - $getsemtypes=$self->{dbs}->prepare("select semtypeab,type,definition from srdef where type=?");
351 - $getsemtypes->execute($type);
352 - while (my $semtype=$getsemtypes->fetchrow_hashref()) {
353 - my $type_expression=$semtype->{semtypeab};
354 - my $type_code=$type_expression;
355 - $type_expression=~s/_/ /g;
356 - $type_expression=lc($type_expression);
357 - my %rv=$self->addExpression($type_expression,$self->{la}{'en'},0,$self->{cid}{$type},$type_code);
358 - $self->addMeaningText($rv{'rid'},$rv{'mid'},$semtype->{definition},undef,$self->{la}{'en'});
359 - #print $type_expression." - $self->{cid}{$type} - $type_code\n";
360 - }
361 -}
362 -
363 -sub importUMLSrelations {
364 - my $self=shift;
365 - my $which=shift; # REL or RELA
366 - my $source=shift; # SAB as MySQL LIKE string
367 - my $getrels;
368 -
369 - if($which eq 'REL') {
370 - $getrels=$self->{dbs}->prepare("select cui1,cui2,rel from MRREL where sab like ?");
371 - } elsif($which eq 'RELA') {
372 - $getrels=$self->{dbs}->prepare("select cui1,cui2,rela from MRREL where sab like ? and rela!=''");
373 - }
374 - $getrels->execute($source);
375 - while(my $rel=$getrels->fetchrow_hashref()) {
376 - my $relid=$rel->{lc($which)};
377 - # These mean the same thing
378 - if($relid eq 'CHD') {
379 - $relid='RN';
380 - } elsif($relid eq 'PAR') {
381 - $relid='RB';
382 - }
383 - $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where internal_member_id=? and is_latest_set=1 limit 1");
384 - # Note that the direction in UMLS is opposite to ours
385 - $getmid->execute($rel->{cui2});
386 - my $mid1=$getmid->fetchrow_hashref();
387 - $getmid->execute($rel->{cui1});
388 - my $mid2=$getmid->fetchrow_hashref();
389 - # FIXME: We are ignoring term relations for now!
390 - if(($mid1->{member_mid} && $mid2->{member_mid}) && ($mid1->{member_mid} != $mid2->{member_mid}) && $self->{reltypes}{$relid}) {
391 - # Add the relation
392 - #print "Found relation ".$relid." (".$self->{reltypes}{$relid}.") between ".$mid1->{member_mid}." and ".$mid2->{member_mid}.".\n";
393 - $self->addRelation($mid1->{revision_id},$self->{reltypes}{$relid},$mid1->{member_mid},$mid2->{member_mid},my $checkfordupes=1);
394 - } else {
395 - if(!$mid1->{member_mid} && $mid2->{member_mid}) {
396 - print "Did not find MID for ".$rel->{cui1}."!\n";
397 - } elsif($mid1->{member_mid} && !$mid2->{member_mid}) {
398 - print "Did not find MID for ".$rel->{cui2}."!\n";
399 - } elsif(!$mid1->{member_mid} && !$mid2->{member_mid}) {
400 - print "Did not find MIDs for ".$rel->{cui1}." and ".$rel->{cui2}."!\n";
401 - }
402 - }
403 - }
404 -
405 -}
406 -
407 -
408 -sub bootstrapGemetCollection {
409 - my $self=shift;
410 - my %cid;
411 - %rv=$self->addExpression('GEMET Environmental Thesaurus Relation Types',$self->{la}{'en'});
412 - $cid{'GEMETREL'}=$self->addCollection($rv{mid},'RELT');
413 - %rv=$self->addExpression('GEMET Environmental Thesaurus Relation Types',$self->{la}{'en'});
414 - $cid{'GEMET'}=$self->addCollection($rv{mid},'');
415 - return %cid;
416 -}
417 -
418 -
419 -sub bootstrapCollections {
420 - my $self=shift;
421 - my %cid;
422 - my %rv;
423 -
424 - %rv=$self->addExpression('CRISP Thesaurus, 2005',$self->{la}{'en'});
425 - $cid{'CRISP'}=$self->addCollection($rv{mid},'');
426 - %rv=$self->addExpression('Semantic Network 2005AC Semantic Types',$self->{la}{'en'});
427 - $cid{'STY'}=$self->addCollection($rv{mid},'ATTR');
428 - %rv=$self->addExpression('Semantic Network 2005AC Relation Types',$self->{la}{'en'});
429 - $cid{'RL'}=$self->addCollection($rv{mid},'RELT');
430 - %rv=$self->addExpression('UMLS Relation Types 2005',$self->{la}{'en'});
431 - $cid{'REL'}=$self->addCollection($rv{mid},'RELT');
432 - %rv=$self->addExpression('UMLS Relation Attributes 2005',$self->{la}{'en'});
433 - $cid{'RELA'}=$self->addCollection($rv{mid},'RELT');
434 - %rv=$self->addExpression('The International Classification of Primary Care (ICPC), 1993',$self->{la}{'en'});
435 - $cid{'ICPC'}=$self->addCollection($rv{mid},'');
436 - %rv=$self->addExpression('Medical Subject Headings (MeSH), 2005',$self->{la}{'en'});
437 - $cid{'MESH'}=$self->addCollection($rv{mid},'');
438 -# %rv=$self->addExpression('Swiss-Prot',$self->{la}{'en'});
439 -# $cid{'SP'}=$self->addCollection($rv{mid},'');
440 - return %cid;
441 -}
442 -
443 -sub addCollection {
444 - my $self=shift;
445 - my $mid=shift;
446 - my $collection_type=shift;
447 - my $addcollection=$self->{dbt}->prepare('INSERT INTO uw_collection_ns(collection_mid,is_latest,collection_type) values(?,1,?)');
448 - $addcollection->execute($mid,$collection_type);
449 - my $cid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
450 - my $updatefirstver=$self->{dbt}->prepare('UPDATE uw_collection_ns set first_ver=? where collection_id=?');
451 - $updatefirstver->execute($cid,$cid);
452 - return $cid;
453 -}
454 -
455 -sub importUMLSrelationtypes {
456 - my $self=shift;
457 - my $which=shift;
458 - my $getreltypes;
459 - if($which eq 'REL') {
460 - # CHD and PAR are to be interpreted as RN and RB, SUBX is not used
461 - $getreltypes=$self->{dbs}->prepare("select * from rel where ABBREV!='CHD' and ABBREV!='PAR' and ABBREV!='SUBX'");
462 - } elsif($which eq 'RELA') {
463 - $getreltypes=$self->{dbs}->prepare("select * from rela");
464 - }
465 - $getreltypes->execute();
466 - while(my $reltype=$getreltypes->fetchrow_hashref()) {
467 - my %rv=$self->addExpression($reltype->{FULL},$self->{la}{'en'},0,$self->{cid}{$which},$reltype->{ABBREV});
468 - }
469 -}
470 -
471 -sub importUMLSterms {
472 - my $self=shift;
473 - my $sab=shift; # the source abbreviation which to import
474 - my $cid=shift; # which collection to associate the defined meanings with
475 -
476 - $getterm=$self->{dbs}->prepare("select str,cui,lat from MRCONSO where sab like ?");
477 - $getterm->execute($sab);
478 - my %textmid;
479 - while(my $r=$getterm->fetchrow_hashref()) {
480 - my %rv;
481 - my $dupe=0;
482 - my %cuimid=$self->getMidForMember($r->{cui});
483 -
484 - # Create new expression / Defined Meaning
485 - if(!$cuimid{mid}) {
486 - %rv=$self->addExpression($r->{str},$self->{la_iso}{lc($r->{lat})},0,$cid,$r->{cui});
487 - # If this is the first time we encounter this CUI, import the definitions
488 - # Note that we'll take any definitions, regardless of the SABs specified!
489 - if($rv{mid}!=-1) {
490 - $getdefs=$self->{dbs}->prepare("select def from MRDEF where cui=?");
491 - $getdefs->execute($r->{cui});
492 - while(my $d=$getdefs->fetchrow_hashref()) {
493 - # UMLS only has English definitions
494 - $self->addMeaningText($rv{rid},$rv{mid},$d->{def},0,$self->{la}{'en'});
495 - }
496 - $textmid{$rv{mid}}=1;
497 - }
498 - # Add as SynTrans to existing Defined Meaning
499 - } else {
500 - %rv=$self->addExpression($r->{str},$self->{la_iso}{lc($r->{lat})},$cuimid{mid});
501 - }
502 - }
503 -}
504 -
505 -
506 -sub importGemetTerms {
507 - my $self=shift;
508 - my $cid=shift;
509 - # Get all English terms as base
510 - $getterm=$self->{dbs}->prepare("select * from term where langcode=?");
511 - $getterm->execute('en');
512 - while($r=$getterm->fetchrow_hashref()) {
513 - # Add English term as defined meaning
514 - my %rv=$self->addExpression($r->{name},$self->{la}{'en'},0,);
515 -
516 - # All translations
517 - $gettrans=$self->{dbs}->prepare("select name,langcode from term where id_concept=? and langcode!='en'");
518 - $gettrans->execute($r->{id_concept});
519 - # Add them with the same meaning ID
520 - while($t=$gettrans->fetchrow_hashref()) {
521 - print "Language: $t->{langcode}\n";
522 - %tv=$self->addExpression($t->{name},$self->{la}{$t->{langcode}},$rv{mid});
523 - }
524 - # All definitions
525 - $getdef=$self->{dbs}->prepare("select definition,langcode from scope where id_concept=?");
526 - $getdef->execute($r->{id_concept});
527 - my $tcid=0;
528 - while($d=$getdef->fetchrow_hashref()) {
529 - if(!$tcid) {
530 - my %mv=$self->addMeaningText($rv{rid},$rv{mid},$d->{definition},0,$self->{la}{$d->{langcode}});
531 - $tcid=$mv{tcid};
532 -
533 - } else {
534 - $self->addMeaningText($rv{rid},$rv{mid},$d->{definition},$tcid,$self->{la}{$d->{langcode}});
535 -
536 - }
537 - }
538 - }
539 -}
540 -
541 -
542 -sub importGemetRelations {
543 - my $self=shift;
544 - # Import GEMET relations
545 - my $getrels=$self->{dbs}->prepare("select * from relation");
546 - $getrels->execute();
547 - while(my $rrow=$getrels->fetchrow_hashref()) {
548 - %rv_A=$self->findGemetItem($rrow->{id_concept});
549 - %rv_B=$self->findGemetItem($rrow->{id_relation});
550 - if($rv_A{mid} && $rv_B{mid}) {
551 - $self->addRelation($rv_A{rid},$self->{reltypes}{$rrow->{id_type}},$rv_A{mid},$rv_B{mid});
552 - }
553 - }
554 -}
555 -
556 -sub importGemetThemes {
557 - my $self=shift;
558 - # Get all themes
559 - my $getthemes=$self->{dbs}->prepare("select * from theme");
560 - my $gettheme_set=$self->{dbs}->prepare("select * from theme where id_theme=?");
561 - $getthemes->execute();
562 - while(my $theme_row=$getthemes->fetchrow_hashref()) {
563 - my $theme=$theme_row->{description};
564 - my @themes=split(/[,;]( ){0,1}/,$theme);
565 - foreach(@themes) {
566 - $_=~s/^ *$//i;
567 - if($_) {
568 - # Does this theme have a expression?
569 - my $t=$_;
570 - my %it=$self->findLatestRevision($t,$self->{la}{$theme_row->{langcode}});
571 - if($it{liid}) {
572 - # Get the meaning
573 - print "NEW THEME: $t - retrieving existing MID for LIID... ".$it{liid};
574 - $it{mid}=$self->findMeaning($rv{liid});
575 - print $it{mid}."\n";
576 - #print $t. " is a dupe! - $dupes\n";
577 - #$dupes++;
578 - } else {
579 - # Do we have any of its translations?
580 - # We can only add those if the theme does
581 - # not contain a , - otherwise we can't match!
582 - my $tra_mid=0;
583 - if(!($theme_row->{description}=~m/[,;]/i)) {
584 - print "NEW THEME: $t - no record, looking for its known translations in GEMET\n";
585 - #print "Checking for translations of ".$theme_row->{description}."\n";
586 - $gettheme_set->execute($theme_row->{id_theme});
587 - while((my $tra_row=$gettheme_set->fetchrow_hashref()) && !$tra_mid) {
588 - if($tra_lid=$self->findExpressionId($tra_row->{description},$self->{la}{$tra_row->{langcode}})) {
589 - $tra_mid=$self->findMeaning($tra_lid);
590 -
591 - }
592 - }
593 - } else {
594 - print "NEW THEME: $t - split from the original GEMET data\n";
595 - }
596 - # Let's make one
597 - if($tra_mid) {
598 - print "Adding new term as translation of $tra_mid\n";
599 - %it = $self->addExpression($t,$self->{la}{$theme_row->{langcode}},$tra_mid);
600 - } else {
601 - print "Adding new term independently, we do not know its translations.\n";
602 - %it = $self->addExpression($t,$self->{la}{$theme_row->{langcode}});
603 - }
604 -
605 -
606 - }
607 -
608 - if(!$have_rel{$theme_row->{id_theme}}) {
609 - # Get all items which have this relation
610 - my $getconcepts=$self->{dbs}->prepare('select id_concept from concept_theme where id_theme=?');
611 - $getconcepts->execute($theme_row->{id_theme});
612 - while(my $concrow=$getconcepts->fetchrow_hashref()) {
613 - # Get LIID,RID->meaning for the item
614 - my %tr=$self->findGemetItem($concrow->{id_concept});
615 - if($tr{rid}) {
616 - $self->addRelation($tr{rid},$self->{reltypes}{it},$tr{mid},$it{mid});
617 - print "Tied up a relation..";
618 - } else {
619 - print "Missing record to tie the relation to..";
620 - }
621 - }
622 - print "\n";
623 - $have_rel{$theme_row->{id_theme}}=1;
624 - }
625 -
626 - }
627 - }
628 - }
629 - #Split theme into parts
630 -}
631 -
632 -sub findGemetItem {
633 - my $self=shift;
634 - my $concept_id=shift;
635 - # get a word, language
636 - my $getword=$self->{dbs}->prepare("select langcode,name from term where id_concept=? LIMIT 1");
637 - $getword->execute($concept_id);
638 - my $wordrow=$getword->fetchrow_hashref();
639 -
640 - # find an expression + meaning
641 - my %rv=$self->findLatestRevision($wordrow->{name},$self->{la}{$wordrow->{langcode}});
642 - $rv{mid}=$self->findMeaning($rv{liid});
643 - return %rv;
644 -}
645 -
646 -sub addRelation {
647 - my $self=shift;
648 - my $revid=shift;
649 - my $rtid=shift;
650 - my $mid_A=shift;
651 - my $mid_B=shift;
652 - my $checkfordupes=shift;
653 -
654 - if($checkfordupes) {
655 - my $checkRelationDuplicates=$self->{dbt}->prepare('select 1 as one from uw_meaning_relations where meaning1_mid=? and meaning2_mid=? and relationtype_mid=? and is_latest_set=1 limit 1');
656 - $checkRelationDuplicates->execute($mid_A,$mid_B,$rtid);
657 - #print "Checking dupe $mid_A, $mid_B, relation type $rtid\n";
658 - my $dupecheck=$checkRelationDuplicates->fetchrow_hashref();
659 - if($dupecheck->{one}) {
660 - print "Duplicate relation, not adding.\n";
661 - return false;
662 - }
663 - }
664 -
665 - my $newkey= $self->getSetIdWhere('uw_meaning_relations','meaning1_mid',$mid_A) || $self->getMaxId('set_id','uw_meaning_relations');
666 - my $addrel=$self->{dbt}->prepare('insert into uw_meaning_relations(set_id,meaning1_mid,meaning2_mid,relationtype_mid,is_latest_set,first_set,revision_id) values(?,?,?,?,?,?,?)');
667 - $addrel->execute($newkey,$mid_A,$mid_B,$rtid,1,$newkey,$revid);
668 -
669 - print "newkey: $newkey\n";
670 - print "mid_A: $mid_A\n";
671 - print "mid_B: $mid_B\n";
672 - print "rtid: $rtid\n";
673 - print "revid: $revid\n";
674 -}
675 -
676 -
677 -sub findMeaning {
678 - my $self=shift;
679 - my $liid=shift;
680 - # Search syntrans table
681 - my $getsyn=$self->{dbt}->prepare("select defined_meaning_id from uw_syntrans where expression_id=?");
682 - $getsyn->execute($liid);
683 - my $syn_row=$getsyn->fetchrow_hashref();
684 - if($syn_row->{defined_meaning_id}) {
685 - return $syn_row->{defined_meaning_id};
686 - }
687 - my $getdm=$self->{dbt}->prepare("select defined_meaning_id from uw_defined_meaning where expression_id=? limit 1");
688 - $getdm->execute($liid);
689 - my $dm_row=$getdm->fetchrow_hashref();
690 - if($dm_row->{defined_meaning_id}) {
691 - return $dm_row->{defined_meaning_id};
692 - }
693 - return 0;
694 -}
695 -
696 -# If there already is a meaning text for this DefinedMeaning, it will add the MeaningText as an alternative definition
697 -sub addMeaningText {
698 - my $self=shift;
699 - my $rid=shift;
700 - my $mid=shift;
701 - my $meaningtext=shift; # optional
702 - my $meaningtext_set=shift; # optional TCID set to join with
703 - my $lid=shift; # ID, not code
704 - my %rv;
705 -
706 - # Add text row entry
707 - my $maketext=$self->{dbt}->prepare('insert into text(old_text) values(?)');
708 - $maketext->execute($meaningtext);
709 - # Get text row ID
710 - $tid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
711 - # Get new or existing translated content set ID
712 - $tcid=$meaningtext_set || $self->getMaxId('set_id','translated_content');
713 - # Create new translated content set
714 - my $maketc=$self->{dbt}->prepare('insert into translated_content(set_id,language_id,text_id,first_set,revision_id) values(?,?,?,?,?)');
715 - $maketc->execute($tcid,$lid,$tid,$tcid,$rid);
716 - $rv{tcid}=$tcid;
717 -
718 - # THIS DOESN'T WORK FOR DEFINITIONS IN MULTIPLE LANGUAGES
719 - # Check if a meaning text has already been set
720 - my $lookformeaning=$self->{dbt}->prepare('select meaning_text_tcid from uw_defined_meaning where defined_meaning_id=? and is_latest_ver=1');
721 - $lookformeaning->execute($mid);
722 - my $mrow=$lookformeaning->fetchrow_hashref();
723 - if($mrow->{meaning_text_tcid}) {
724 - # There is a meaning text - the new one is only an alternative
725 - my $altset=$self->getSetIdWhere('uw_alt_meaningtexts','meaning_mid',$mid) || $self->getMaxId('set_id','uw_alt_meaningtexts');
726 - my $addaltmeaning=$self->{dbt}->prepare('insert into uw_alt_meaningtexts(set_id,meaning_mid,meaning_text_tcid,is_latest_set,first_set,revision_id) values(?,?,?,?,?,?)');
727 - $addaltmeaning->execute($altset,$mid,$tcid,1,$altset,$rid)
728 - } else {
729 - my $updatemeaning=$self->{dbt}->prepare('update uw_defined_meaning set meaning_text_tcid=? where defined_meaning_id=?');
730 - $updatemeaning->execute($tcid,$mid);
731 - }
732 - return %rv;
733 -}
734 -
735 -
736 -# If the expression already exists, add a new DefinedMeaning - unless this is a translation or synonym; if a record already exists in SynTrans with this expression _and_ $translation_of as a DefinedMeaning, do not do anything
737 -sub addExpression {
738 - my $self=shift;
739 - # return MID, RID, LID, TCID!
740 - my $expression=shift;
741 - my $lid=shift; # ID, not code
742 - my $translation_of=shift; # 0 or MID (!), optional
743 - my $collection_id=shift; # optional
744 - my $collection_internal_member_id=shift; # what does the collection use to refer to this member?
745 - my %rv;
746 - my $isdupe=0;
747 - my %firv=$self->findLatestRevision($expression,$lid);
748 - if($firv{liid}) { $isdupe=1; }
749 -
750 - if(!$isdupe) {
751 -
752 - #create page
753 - my $pt=$self->canonize($expression);
754 - $makepage=$self->{dbt}->prepare('insert into page(page_namespace,page_title,page_is_new,page_title_language_id,page_touched) values(?,?,?,?,?)');
755 - $makepage->execute(16,$pt,1,$lid,$self->mwtimestamp());
756 - $pid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
757 - print "PID: $pid\n";
758 -
759 - $rv{pid}=$pid;
760 -
761 - #create revision
762 - $makerev=$self->{dbt}->prepare('insert into revision(rev_page,rev_comment,rev_user,rev_user_text,rev_timestamp) values(?,?,?,?,?)');
763 - $makerev->execute($pid,'Initial import',2,'GEMET',$self->mwtimestamp());
764 -
765 - #get revision_id
766 - $rid=$self->getId('select rev_id from revision where rev_page=?',$pid);
767 - $rv{rid}=$rid;
768 -
769 - #update page to link to revision
770 - $updatepage=$self->{dbt}->prepare('update page set page_latest=? where page_id=?');
771 - $updatepage->execute($rid,$pid);
772 -
773 - #create expression
774 - $makeitem=$self->{dbt}->prepare('insert into uw_expression_ns(spelling,language_id,is_latest) values(?,?,1)');
775 - $makeitem->execute($expression,$lid);
776 - $liid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
777 - $rv{liid}=$liid;
778 -
779 - # update firstver
780 - $updateitem=$self->{dbt}->prepare('update uw_expression_ns set first_ver=? where expression_id=?');
781 - $updateitem->execute($liid,$liid);
782 -
783 - #update revision to link to expression
784 - $updaterev=$self->{dbt}->prepare('update revision set rev_data_id=? where rev_id=?');
785 - $updaterev->execute($liid,$rid);
786 -
787 - } else {
788 -
789 - $rid=$firv{rid};
790 - $liid=$firv{liid};
791 - $rv{rid}=$rid;
792 - $rv{liid}=$liid;
793 -
794 - }
795 -
796 - #create definedmeaning
797 - if(!$translation_of) {
798 - $makemean=$self->{dbt}->prepare('insert into uw_defined_meaning(expression_id,revision_id) values(?,?)');
799 - $makemean->execute($liid,$rid);
800 - # We always want a syntrans record, so in this case it links to its own
801 - # def. meaning
802 - $translation_of=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
803 - $mid=$translation_of;
804 - $rv{mid}=$mid;
805 - $updatemeaningver=$self->{dbt}->prepare('update uw_defined_meaning set first_ver=? where defined_meaning_id=?');
806 - $updatemeaningver->execute($mid,$mid);
807 - if($collection_id) {
808 - $addtocoll=$self->{dbt}->prepare('insert into uw_collection_contents(set_id, collection_id, member_mid, is_latest_set, first_Set, revision_id, internal_member_id) values(?,?,?,?,?,?,?)');
809 - #fixme set association
810 - $addtocoll->execute(1,$collection_id,$mid,1,1,$rid,$collection_internal_member_id);
811 - }
812 - }
813 -
814 - # Check if we already have this specific record
815 - $checkdupes=$self->{dbt}->prepare('select set_id from uw_syntrans where defined_meaning_id=? and expression_id=?');
816 - $checkdupes->execute($translation_of,$liid);
817 - my $duperow=$checkdupes->fetchrow_hashref();
818 - my $dupeid=$duperow->{set_id};
819 - if(!$dupeid) {
820 -
821 - # Check if this is part of a set
822 - $getset=$self->{dbt}->prepare('select set_id from uw_syntrans where defined_meaning_id=? and is_latest_set=1');
823 - $getset->execute($mid);
824 - $row=$getset->fetchrow_hashref();
825 - my $setid=$row->{set_id} || $self->getMaxId('set_id','uw_syntrans');
826 - # Add syntrans record
827 - $maketrans=$self->{dbt}->prepare('insert into uw_syntrans(set_id,defined_meaning_id,expression_id,first_set,revision_id,is_latest_set,endemic_meaning) values(?,?,?,?,?,1,1)');
828 - $maketrans->execute($setid,$translation_of,$liid,$setid,$rid);
829 - $rv{setid}=$setid;
830 - $rv{mid}=$translation_of;
831 - } else{
832 - $rv{setid}=$dupeid; # Dupe
833 - $rv{mid}=-1; # Dupe
834 - }
835 - return %rv;
836 -
837 -}
838 -
839 -sub findLatestRevision {
840 - my $self = shift;
841 - my $expressionSpelling = shift;
842 - my $languageId = shift;
843 -
844 - my $expressionId = $self->findExpressionId($expressionSpelling, $languageId);
845 - if ($expressionId != 0) {
846 - my $getRevisionId = $self->{dbt}->prepare('select rev_id from revision where rev_data_id=?');
847 - $getRevisionId->execute($expressionId);
848 - my %revision;
849 - $revision{liid} = $expressionId;
850 - $revision{rid} = $getRevisionId->fetchrow_hashref->{rev_id};
851 - return %revision;
852 - } else {
853 - return 0;
854 - }
855 -}
856 -
857 -sub findExpressionId {
858 - my $self = shift;
859 - my $expressionSpelling = shift;
860 - my $languageId = shift;
861 -
862 - my $getItem = $self->{dbt}->prepare("select expression_id from uw_expression_ns where spelling=binary ? and language_id=? and is_latest=1");
863 - $getItem->execute($expressionSpelling, $languageId);
864 - my $itemRow = $getItem->fetchrow_hashref();
865 - if ($itemRow) {
866 - return $itemRow->{expression_id};
867 - } else {
868 - return 0;
869 - }
870 -}
871 -
872 -sub getMaxId {
873 - my $self=shift;
874 - my $field=shift;
875 - my $table=shift;
876 - $getmax=$self->{dbt}->prepare("select max($field) as maxset from $table");
877 - $getmax->execute();
878 - my $row=$getmax->fetchrow_hashref();
879 - return $row->{maxset}+1;
880 -}
881 -
882 -sub getSetIdWhere {
883 - my $self=shift;
884 - my $table=shift;
885 - my $wherefield=shift;
886 - my $wherekey=shift;
887 - $getmax=$self->{dbt}->prepare("select set_id from $table WHERE $wherefield=? AND is_latest_set=1 limit 1");
888 - $getmax->execute($wherekey);
889 - my $row=$getmax->fetchrow_hashref();
890 - return $row->{set_id};
891 -}
892 -
893 -
894 -sub getId {
895 - my $self=shift;
896 - my $prep=shift;
897 - $prep=~m/select (.*?) from/i;
898 - my $field=$1;
899 - my $getlang=$self->{dbt}->prepare($prep);
900 - $getlang->execute(@_);
901 - my $row=$getlang->fetchrow_hashref();
902 - my $id=$row->{$field};
903 - return $id;
904 -}
905 -
906 -sub mwtimestamp {
907 - my $self=shift;
908 - use POSIX qw(strftime);
909 - return(strftime "%Y%m%d%H%M%S", localtime);
910 -}
911 -
912 -
913 -sub canonize {
914 - my $self=shift;
915 - my $title=shift;
916 - #$title=ucfirst($title);
917 - $title=~s/ /_/ig;
918 - return $title;
919 -}
920 -
921 -sub initlangs {
922 - my $self=shift;
923 - %langs=(
924 - en_en=>'English',
925 - en_de=>'Englisch',
926 - 'en-US_de'=>'Englisch (USA)',
927 - 'en-US_en'=>'English (United States)',
928 - bg_en=>'Bulgarian',
929 - bg_de=>'Bulgarisch',
930 - cs_en=>'Czech',
931 - cs_de=>'Tschechisch',
932 - da_en=>'Dansk',
933 - da_de=>'D?isch',
934 - de_en=>'German',
935 - de_de=>'Deutsch',
936 - es_en=>'Spanish',
937 - es_de=>'Spanisch',
938 - et_en=>'Estonian',
939 - et_de=>'Estnisch',
940 - eu_en=>'Basque',
941 - eu_de=>'Baskisch',
942 - fi_en=>'Finnish',
943 - fi_de=>'Finnisch',
944 - fr_en=>'French',
945 - fr_de=>'Franz?isch',
946 - hu_en=>'Hungarian',
947 - hu_de=>'Ungarisch',
948 - it_en=>'Italian',
949 - it_de=>'Italienisch',
950 - nl_en=>'Dutch',
951 - nl_de=>'Niederl?disch',
952 - no_en=>'Norwegian',
953 - no_de=>'Norwegisch',
954 - pl_en=>'Polish',
955 - pl_de=>'Polnisch',
956 - pt_en=>'Portuguese',
957 - pt_de=>'Portugiesisch',
958 - ru_en=>'Russian',
959 - ru_de=>'Russisch',
960 - sk_en=>'Slovak',
961 - sk_de=>'Slowakische Sprache',
962 - sl_en=>'Slovenian',
963 - sl_de=>'Slowenisch',
964 - el_en=>'Greek',
965 - el_de=>'Griechisch',
966 - sv_en=>'Swedish',
967 - sv_de=>'Schwedisch');
968 - foreach(keys(%langs)) {
969 - $key=$_;
970 - $key=~m/(.*?)_(.*)/i;
971 - $lang=$1;
972 - #print "Lang: $lang\n";
973 - $wordlang=$2;
974 - if($wordlang eq 'en') {
975 - $addwm=$self->{dbt}->prepare("insert into language(wikimedia_key) values(?)");
976 - $addwm->execute($lang);
977 - }
978 - }
979 - foreach(keys(%langs)) {
980 - $key=$_;
981 - $key=~m/(.*?)_(.*)/i;
982 - $lang=$1;
983 - #print "Lang: $lang\n";
984 - $wordlang=$2;
985 - $langword_u=$langs{$key};
986 - $langword=encode("utf8",$langword_u);
987 - $newwm=$self->{dbt}->prepare("select language_id from language where wikimedia_key=?");
988 - $newwm->execute($lang);
989 - my $row=$newwm->fetchrow_hashref();
990 - $newwm->execute('en');
991 - my $en_row=$newwm->fetchrow_hashref();
992 - $newwm->execute('de');
993 - my $de_row=$newwm->fetchrow_hashref();
994 - $newword=$self->{dbt}->prepare("insert into language_names values (?,?,?)");
995 - if($wordlang eq 'en') {
996 - $newword->execute($row->{language_id},$en_row->{language_id},$langword);
997 - } elsif($wordlang eq 'de') {
998 - $newword->execute($row->{language_id},$de_row->{language_id},$langword);
999 - }
1000 - }
1001 -}
1002 -
1003 -sub initRel {
1004 - my $self=shift;
1005 - my $cid=shift;
1006 - %rel_types=(
1007 - bt_en=>'broader terms',
1008 - bt_de=>'breitere Begriffe',
1009 - nt_en=>'narrower terms',
1010 - nt_de=>'engere Begriffe',
1011 - rt_en=>'related terms',
1012 - rt_de=>'verwandte Begriffe',
1013 - it_en=>'is part of theme',
1014 - it_de=>'ist Themenbestandteil von'
1015 - );
1016 -
1017 - %rel_definitions=(
1018 - bt_en=>'Those terms in a thesaurus which are broader than others',
1019 - bt_de=>'Die Begriffe in einem Thesaurus, die breiter sind als andere',
1020 - nt_en=>'Those terms in a thesaurus which are narrower than others',
1021 - nt_de=>'Die Begriffe in einem Thesaurus, die enger sind als andere',
1022 - rt_en=>'Those terms in a thesaurus which are related to others',
1023 - rt_de=>'Die Begriffe in einem Thesaurus, die mit anderen verwandt sind',
1024 - it_en=>'Those terms in a thesaurus or dictionary which are associated with a topic',
1025 - it_de=>'Die Begriffe in einem Thesaurus oder Woerterbuch, die mit einem Thema assoziiert sind');
1026 -
1027 - foreach(keys(%rel_types)) {
1028 - $key=$_;
1029 - $key=~m/(..)_(..)/i;
1030 - $ident=$1;
1031 - $lang=$2;
1032 - if($lang eq 'de') {
1033 - $en_key="$ident\_en";
1034 - my %rv=$self->addExpression($rel_types{$en_key},$self->{la}{'en'},0,$cid,$ident);
1035 - $self->addMeaningText($rv{rid},$rv{mid},$rel_definitions{$en_key},0,$self->{la}{'en'});
1036 - my %dv=$self->addExpression($rel_types{$key},$self->{la}{'de'},$rv{'mid'});
1037 - $self->addMeaningText($dv{rid},$dv{mid},$rel_definitions{$key},$rv{'tcid'},$self->{la}{'de'});
1038 - }
1039 - }
1040 -}
1041 -
1042 -sub loadLangs {
1043 - my $self=shift;
1044 - my %la;
1045 - $getlangs=$self->{dbt}->prepare('select language_id,wikimedia_key from language');
1046 - $getlangs->execute();
1047 - while($langrow=$getlangs->fetchrow_hashref()) {
1048 - $la{$langrow->{wikimedia_key}}=$langrow->{language_id};
1049 - }
1050 - return %la;
1051 -}
1052 -
1053 -sub loadLangsIso {
1054 - my $self=shift;
1055 - my %la_iso;
1056 - $getlangs=$self->{dbt}->prepare('select language_id,iso639_2 from language');
1057 - $getlangs->execute();
1058 - while($langrow=$getlangs->fetchrow_hashref()) {
1059 - $la_iso{$langrow->{iso639_2}}=$langrow->{language_id};
1060 - }
1061 - return %la_iso;
1062 -}
1063 -
1064 -return(1);
Index: trunk/extensions/Wikidata/perl-tools/Import OmegaWiki.pl
@@ -0,0 +1,55 @@
 2+use OmegaWiki;
 3+use POSIX qw(strftime);
 4+
 5+my $startTime = time;
 6+
 7+# Example usage to import UMLS completely into an existing OmegaWiki database:
 8+# my $importer=new OmegaWiki('wikidatadb','root','MyPass');
 9+# $importer->setSourceDB('umls');
 10+# $importer->initialize;
 11+# $importer->importCompleteUMLS();
 12+
 13+# Example usage to import a part of UMLS into an existing OmegaWiki database:
 14+# my $importer=new OmegaWiki('wikidatadb','root','MyPass');
 15+# $importer->setSourceDB('umls');
 16+# $importer->initialize;
 17+# my %sourceAbbreviations = $importer->loadSourceAbbreviations();
 18+# delete($sourceAbbreviations{"MSH"});
 19+# $importer->importUMLS(\%sourceAbbreviations);
 20+
 21+my $importer=new OmegaWiki('wikidata_icpc','root','');
 22+$importer->setSourceDB('umls');
 23+#$importer->setSourceDB('swissprot');
 24+$importer->initialize;
 25+#$importer->importCompleteUMLS();
 26+
 27+#read the source abbreviations and remove those you do not wish to import
 28+my %sourceAbbreviations = $importer->loadSourceAbbreviations();
 29+my @deleteList;
 30+while (($key, $val) = each(%sourceAbbreviations)) {
 31+
 32+#remove all that contains "MSH":
 33+# if (index($key,"MSH") >= 0) {
 34+# push(@deleteList, $key);
 35+# }
 36+
 37+#remove all that does not contain "ICPC":
 38+ if (index($key,"ICPC") < 0) {
 39+ push(@deleteList, $key);
 40+ }
 41+}
 42+
 43+foreach $sab (@deleteList){
 44+ delete($sourceAbbreviations{$sab});
 45+}
 46+
 47+$importer->importUMLS(\%sourceAbbreviations);
 48+
 49+
 50+my $endTime = time;
 51+print "\n";
 52+print "Import started at: " . (strftime "%H:%M:%S", localtime($startTime)) . "\n";
 53+print "Import ended at: " . (strftime "%H:%M:%S", localtime($endTime)) . "\n";
 54+print "Elapsed time: " . (strftime "%H:%M:%S", gmtime($endTime - $startTime)) . "\n";
 55+
 56+exit 0;
\ No newline at end of file
Index: trunk/extensions/Wikidata/perl-tools/OmegaWiki.pm
@@ -0,0 +1,1063 @@
 2+# Example usage to import UMLS into an existing OmegaWiki database:
 3+# use OmegaWiki;
 4+# my $importer=new OmegaWiki('wikidatadb','root','MyPass');
 5+# $importer->setSourceDB('umls');
 6+# $importer->initialize;
 7+# $importer->importCompleteUMLS();
 8+#
 9+# NOTE: When importing UMLS, we expect the presence of the semantic network data
 10+# in the tables SRDEF and the manually created tables SEMRELHIER and SEMTYPEHIER.
 11+# SEMRELHIER and SEMTYPEHIER contain information about the relations between
 12+# semantic types and relation types, using RB as the code for "broader than"
 13+# and RN for "narrower than".
 14+#
 15+# Todo for UMLS:
 16+# SyntransCollection
 17+# RelationCollection
 18+# Fully deal with alternative definitions referring to the same concept
 19+# Deal with preferred lexical expressions, primary concepts (general weighting mechanism?)
 20+
 21+package OmegaWiki;
 22+use DBI;
 23+use Encode;
 24+use POSIX qw(strftime);
 25+
 26+sub new {
 27+ my $type=shift;
 28+ my $self={};
 29+ $self->{targetdb}=shift;
 30+ $self->{targetuser}=shift;
 31+ $self->{targetpass}=shift;
 32+ $self->{targethost}=shift || 'localhost';
 33+ $self->{targetport}=shift || '3306';
 34+ $self->{targetdriver}=shift || 'mysql';
 35+ bless($self, $type);
 36+ return($self);
 37+}
 38+
 39+sub setSourceDB {
 40+ my $self=shift;
 41+ $self->{sourcedb}=shift;
 42+ $self->{sourceuser}=shift || $self->{targetuser};
 43+ $self->{sourcepass}=shift || $self->{targetpass};
 44+ $self->{sourcehost}=shift || $self->{targethost};
 45+ $self->{sourceport}=shift || $self->{targetport};
 46+ $self->{sourcedriver}=shift || $self->{targetdriver};
 47+}
 48+
 49+sub connectSourceDB {
 50+ my $self=shift;
 51+ my $dsn = 'dbi:'.$self->{sourcedriver}.':'.$self->{sourcedb}.':'.$self->{sourcehost}.':'.$self->{sourceport};
 52+ $self->{dbs}=DBI->connect($dsn,$self->{sourceuser},$self->{sourcepass});
 53+}
 54+
 55+sub connectTargetDB {
 56+ my $self=shift;
 57+ my $dsn = 'dbi:'.$self->{targetdriver}.':'.$self->{targetdb}.':'.$self->{targethost}.':'.$self->{targetport};
 58+ $self->{dbt}=DBI->connect($dsn,$self->{targetuser},$self->{targetpass});
 59+}
 60+
 61+sub connectDBs() {
 62+ my $self = shift;
 63+
 64+ $self->connectSourceDB();
 65+ $self->connectTargetDB();
 66+}
 67+
 68+sub loadLanguages() {
 69+ my $self = shift;
 70+
 71+ my %la=$self->loadLangs();
 72+ $self->{la}=\%la;
 73+ my %la_iso=$self->loadLangsIso();
 74+ $self->{la_iso}=\%la_iso;
 75+}
 76+
 77+sub initialize {
 78+ my $self=shift;
 79+ $self->connectDBs();
 80+ $self->loadLanguages();
 81+}
 82+
 83+sub importCompleteUMLS {
 84+ my $self=shift;
 85+ my $level=shift || 0; # 0= complete; 1=reltypes+; 2=rel+
 86+
 87+ my %sourceAbbreviations = $self->loadSourceAbbreviations();
 88+ $self->importUMLS(\%sourceAbbreviations, $level);
 89+}
 90+
 91+sub importUMLS() {
 92+ my $self=shift;
 93+ my $sourceAbbreviationsReference = shift;
 94+ my $level=shift || 0; # 0= complete; 1=reltypes+; 2=rel+
 95+
 96+ my %sourceAbbreviations = %{$sourceAbbreviationsReference};
 97+ my %cid=$self->getOrCreateCollections(\%sourceAbbreviations);
 98+ $self->{cid}=\%cid;
 99+
 100+ if ($level<1){
 101+ while (($sourceAbbreviation, $collectionId) = each %cid) {
 102+ print "Import UMLS terms for $sourceAbbreviation\n";
 103+ $self->importUMLSterms($sourceAbbreviation, $collectionId);
 104+ }
 105+ }
 106+
 107+ if($level<2) {
 108+ print "Import UMLS relation types 'REL'\n";
 109+ $self->importUMLSrelationtypes('REL');
 110+ print "Import UMLS relation types 'RELA'\n";
 111+ $self->importUMLSrelationtypes('RELA');
 112+ }
 113+
 114+ if($level<3) {
 115+ while (($sourceAbbreviation, $collectionId) = each %cid) {
 116+ my %rt=$self->loadReltypes();
 117+ $self->{reltypes}=\%rt;
 118+ print "Import UMLS relations 'REL' for $sourceAbbreviation\n";
 119+ $self->importUMLSrelations('REL',$sourceAbbreviation);
 120+ print "Import UMLS relations 'RELA' for $sourceAbbreviation\n";
 121+ $self->importUMLSrelations('RELA',$sourceAbbreviation);
 122+ }
 123+ }
 124+
 125+ if($level<4) {
 126+ print "Import SN types 'STY'\n";
 127+ $self->importSNtypes('STY');
 128+ print "Import SN types 'RL'\n";
 129+ $self->importSNtypes('RL');
 130+ print "Import ST relations 'STY'\n";
 131+ $self->importSTrelations('STY');
 132+ print "Import ST relations 'RL'\n";
 133+ $self->importSTrelations('RL');
 134+ # $self->importSTrelations2();
 135+ }
 136+
 137+ if($level<5) {
 138+ while (($sourceAbbreviation, $collectionId) = each %cid) {
 139+ my %attribs=$self->loadAttributes();
 140+ $self->{attribs}=\%attribs;
 141+ print "Import UMLS types for $sourceAbbreviation\n";
 142+ $self->importUMLSstypes($sourceAbbreviation);
 143+ }
 144+ }
 145+}
 146+
 147+sub importGEMET {
 148+ my $self=shift;
 149+ $self->connectSourceDB();
 150+ $self->connectTargetDB();
 151+ my %la=$self->loadLangs();
 152+ $self->{la}=\%la;
 153+ my %cid=$self->bootstrapGemetCollection();
 154+ $self->{cid}=\%cid;
 155+ $self->initRel($self->{cid}{'GEMETREL'});
 156+ my %rt=$self->loadReltypes();
 157+ $self->{reltypes}=\%rt;
 158+ $self->importGemetTerms();
 159+ $self->importGemetRelations();
 160+ $self->importGemetThemes();
 161+}
 162+
 163+sub importUMLSstypes {
 164+ my $self=shift;
 165+ my $sab=shift;
 166+
 167+ my $getassocs=$self->{dbs}->prepare("select MRSTY.CUI, MRSTY.STY from MRCONSO,MRSTY where MRCONSO.SAB like ? and MRCONSO.CUI=MRSTY.CUI");
 168+ $getassocs->execute($sab);
 169+ while(my $row=$getassocs->fetchrow_hashref()) {
 170+ my %rv=$self->getMidForMember($row->{CUI});
 171+ my $att=$self->{attribs}{$row->{STY}};
 172+ #print "$rv{mid} is a $row->{STY} ($att)\n";
 173+ $self->addRelation($rv{rid},0,$rv{mid},$att, my $checkfordupes=1);
 174+ }
 175+}
 176+
 177+sub getCollections(){
 178+ my $self=shift;
 179+ my %cid;
 180+ $cid{'CRISP'}=$self->findCollection($self->findMeaning($self->findItem('CRISP Thesaurus, 2005',$self->{la}{'en'})));
 181+ $cid{'STY'}=$self->findCollection($self->findMeaning($self->findItem('Semantic Network 2005AC Semantic Types',$self->{la}{'en'})));
 182+ $cid{'RL'}=$self->findCollection($self->findMeaning($self->findItem('Semantic Network 2005AC Relation Types',$self->{la}{'en'})));
 183+ $cid{'REL'}=$self->findCollection($self->findMeaning($self->findItem('UMLS Relation Types 2005',$self->{la}{'en'})));
 184+ $cid{'RELA'}=$self->findCollection($self->findMeaning($self->findItem('UMLS Relation Attributes 2005',$self->{la}{'en'})));
 185+ $cid{'ICPC'}=$self->findCollection($self->findMeaning($self->findItem('The International Classification of Primary Care (ICPC), 1993',$self->{la}{'en'})));
 186+ $cid{'MESH'}=$self->findCollection($self->findMeaning($self->findItem('Medical Subject Headings (MeSH), 2005',$self->{la}{'en'})));
 187+# $cid{'SP'}=$self->findCollection($self->findMeaning($self->findItem('Swiss-Prot',$self->{la}{'en'})));
 188+ return %cid;
 189+}
 190+
 191+sub findCollection() {
 192+ my $self=shift;
 193+ my $mid=shift;
 194+ my $findcoll=$self->{dbt}->prepare("select collection_id from uw_collection_ns where collection_mid=? and is_latest=1");
 195+ $findcoll->execute($mid);
 196+ my $row=$findcoll->fetchrow_hashref();
 197+ return $row->{collection_id};
 198+}
 199+
 200+sub getCollection {
 201+ my $self = shift;
 202+ my $expression = shift;
 203+
 204+ return $self->findCollection($self->findMeaning($self->findExpressionId($expression,$self->{la}{'en'})));
 205+}
 206+
 207+sub bootstrapCollection {
 208+ my $self = shift;
 209+ my $expression = shift;
 210+ my $collectionType = shift;
 211+
 212+ %rv=$self->addExpression($expression,$self->{la}{'en'});
 213+ return $self->addCollection($rv{mid},$collectionType);
 214+}
 215+
 216+sub getOrCreateCollection {
 217+ my $self = shift;
 218+ my $expression = shift;
 219+
 220+ my $result = $self->getCollection($expression);
 221+
 222+ if (!$result) {
 223+ $result = $self->bootstrapCollection($expression, '');
 224+ }
 225+
 226+ return $result;
 227+}
 228+
 229+sub getOrCreateCollections {
 230+ my $self = shift;
 231+ my $sourceAbbreviationsReference = shift;
 232+ my %sourceAbbreviations = %{$sourceAbbreviationsReference};
 233+ my %cid;
 234+
 235+ while (($key, $value) = each %sourceAbbreviations) {
 236+ $cid{$key} = $self->getOrCreateCollection($value);
 237+ }
 238+
 239+ return %cid;
 240+}
 241+
 242+sub loadSourceAbbreviations {
 243+ my $self = shift;
 244+ my %sab;
 245+
 246+ my $dataset = $self->{dbs}->prepare("select * from mrsab");
 247+ $dataset->execute();
 248+ while (my $row = $dataset->fetchrow_hashref()) {
 249+ $sab{$row->{RSAB}} = $row->{SON};
 250+ }
 251+ return %sab;
 252+}
 253+
 254+# SEMTYPEHIER and SEMRELHIER contain only the is_a relationships, whereas
 255+# srstr contains all others
 256+# FIXME: only use SRSTR
 257+sub importSTrelations2 {
 258+ my $self=shift;
 259+ my $getrels=$self->{dbs}->prepare("select * from srstr where rel!='isa'");
 260+ $getrels->execute();
 261+ while(my $row=$getrels->fetchrow_hashref()) {
 262+ my %rv1=$self->getMidForMember($row->{TYPE1},$self->{cid}{'STY'});
 263+ my %rv2=$self->getMidForMember($row->{TYPE2},$self->{cid}{'STY'});
 264+ my $rtmid=$self->{reltypes}{$row->{REL}};
 265+ #print "Adding relation $row->{REL} ($rtmid) between $row->{TYPE1} and $row->{TYPE2}\n";
 266+ $self->addRelation($rv1{rid},$rtmid,$rv1{mid},$rv2{mid},my $checkfordupes=1);
 267+ }
 268+}
 269+
 270+
 271+sub importSTrelations {
 272+ my $self=shift;
 273+ my $which=shift;
 274+ my $table;
 275+ my $field1;
 276+ my $field2;
 277+ if($which eq 'STY') {
 278+ $table='semtypehier';
 279+ $field1='SEMTYPE1';
 280+ $field2='SEMTYPE2';
 281+ } elsif($which eq 'RL') {
 282+ $table='semrelhier';
 283+ $field1='RELTYPE1';
 284+ $field2='RELTYPE2';
 285+ }
 286+
 287+ my $gettypehier=$self->{dbs}->prepare("select * from $table");
 288+ $gettypehier->execute();
 289+ while(my $typehier=$gettypehier->fetchrow_hashref()) {
 290+ my %rv1=$self->getMidForMember($typehier->{$field1},$self->{cid}{$which});
 291+ my %rv2=$self->getMidForMember($typehier->{$field2},$self->{cid}{$which});
 292+ my $rtmid=$self->{reltypes}{$typehier->{RELATION}};
 293+ print "Adding relation $typehier->{RELATION} ($rtmid) between $typehier->{$field1} and $typehier->{$field2}\n";
 294+ $self->addRelation($rv1{rid},$rtmid,$rv1{mid},$rv2{mid},my $checkfordupes=1);
 295+ }
 296+}
 297+
 298+# $member_id - the collection-internal identifier for this member
 299+# $cid The collection in which to search for this member (optional)
 300+# Returns the DefinedMeaningID and the revision id
 301+sub getMidForMember {
 302+ my $self=shift;
 303+ my $member_id=shift;
 304+ my $cid=shift;
 305+ my %rv;
 306+ my $getmid;
 307+ if($cid) {
 308+ $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where collection_id=? and internal_member_id=? and is_latest_set=1 limit 1");
 309+ $getmid->execute($cid,$member_id);
 310+ } else {
 311+ $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where internal_member_id=? and is_latest_set=1 limit 1");
 312+ $getmid->execute($member_id);
 313+ }
 314+ my $member_mid=$getmid->fetchrow_hashref();
 315+ $rv{mid}=$member_mid->{member_mid};
 316+ $rv{rid}=$member_mid->{revision_id};
 317+ return %rv;
 318+
 319+}
 320+
 321+sub loadReltypes {
 322+ my $self=shift;
 323+ my %reltypes;
 324+ # Get the relation type
 325+ $getreltype=$self->{dbt}->prepare("select member_mid,internal_member_id from uw_collection_contents,uw_collection_ns where uw_collection_ns.collection_type='RELT' and uw_collection_ns.collection_id=uw_collection_contents.collection_id");
 326+ $getreltype->execute();
 327+ while (my $reltype=$getreltype->fetchrow_hashref()) {
 328+ $reltypes{$reltype->{internal_member_id}}=$reltype->{member_mid};
 329+ }
 330+ return %reltypes;
 331+}
 332+
 333+sub loadAttributes {
 334+ my $self=shift;
 335+ my %attributes;
 336+ $getatt=$self->{dbt}->prepare("select member_mid,internal_member_id from uw_collection_contents,uw_collection_ns where uw_collection_ns.collection_type='ATTR' and uw_collection_ns.collection_id=uw_collection_contents.collection_id");
 337+ $getatt->execute();
 338+ while (my $att=$getatt->fetchrow_hashref()) {
 339+ $attributes{$att->{internal_member_id}}=$att->{member_mid};
 340+ }
 341+ return %attributes;
 342+}
 343+
 344+
 345+# Get all SRDEF attributes
 346+# Get relations between SRDEF
 347+sub importSNtypes {
 348+ my $self=shift;
 349+ my $type=shift;
 350+ $getsemtypes=$self->{dbs}->prepare("select semtypeab,type,definition from srdef where type=?");
 351+ $getsemtypes->execute($type);
 352+ while (my $semtype=$getsemtypes->fetchrow_hashref()) {
 353+ my $type_expression=$semtype->{semtypeab};
 354+ my $type_code=$type_expression;
 355+ $type_expression=~s/_/ /g;
 356+ $type_expression=lc($type_expression);
 357+ my %rv=$self->addExpression($type_expression,$self->{la}{'en'},0,$self->{cid}{$type},$type_code);
 358+ $self->addMeaningText($rv{'rid'},$rv{'mid'},$semtype->{definition},undef,$self->{la}{'en'});
 359+ #print $type_expression." - $self->{cid}{$type} - $type_code\n";
 360+ }
 361+}
 362+
 363+sub importUMLSrelations {
 364+ my $self=shift;
 365+ my $which=shift; # REL or RELA
 366+ my $source=shift; # SAB as MySQL LIKE string
 367+ my $getrels;
 368+
 369+ if($which eq 'REL') {
 370+ $getrels=$self->{dbs}->prepare("select cui1,cui2,rel from MRREL where sab like ?");
 371+ } elsif($which eq 'RELA') {
 372+ $getrels=$self->{dbs}->prepare("select cui1,cui2,rela from MRREL where sab like ? and rela!=''");
 373+ }
 374+ $getrels->execute($source);
 375+ while(my $rel=$getrels->fetchrow_hashref()) {
 376+ my $relid=$rel->{lc($which)};
 377+ # These mean the same thing
 378+ if($relid eq 'CHD') {
 379+ $relid='RN';
 380+ } elsif($relid eq 'PAR') {
 381+ $relid='RB';
 382+ }
 383+ $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where internal_member_id=? and is_latest_set=1 limit 1");
 384+ # Note that the direction in UMLS is opposite to ours
 385+ $getmid->execute($rel->{cui2});
 386+ my $mid1=$getmid->fetchrow_hashref();
 387+ $getmid->execute($rel->{cui1});
 388+ my $mid2=$getmid->fetchrow_hashref();
 389+ # FIXME: We are ignoring term relations for now!
 390+ if(($mid1->{member_mid} && $mid2->{member_mid}) && ($mid1->{member_mid} != $mid2->{member_mid}) && $self->{reltypes}{$relid}) {
 391+ # Add the relation
 392+ #print "Found relation ".$relid." (".$self->{reltypes}{$relid}.") between ".$mid1->{member_mid}." and ".$mid2->{member_mid}.".\n";
 393+ $self->addRelation($mid1->{revision_id},$self->{reltypes}{$relid},$mid1->{member_mid},$mid2->{member_mid},my $checkfordupes=1);
 394+ } else {
 395+ if(!$mid1->{member_mid} && $mid2->{member_mid}) {
 396+ print "Did not find MID for ".$rel->{cui1}."!\n";
 397+ } elsif($mid1->{member_mid} && !$mid2->{member_mid}) {
 398+ print "Did not find MID for ".$rel->{cui2}."!\n";
 399+ } elsif(!$mid1->{member_mid} && !$mid2->{member_mid}) {
 400+ print "Did not find MIDs for ".$rel->{cui1}." and ".$rel->{cui2}."!\n";
 401+ }
 402+ }
 403+ }
 404+
 405+}
 406+
 407+
 408+sub bootstrapGemetCollection {
 409+ my $self=shift;
 410+ my %cid;
 411+ %rv=$self->addExpression('GEMET Environmental Thesaurus Relation Types',$self->{la}{'en'});
 412+ $cid{'GEMETREL'}=$self->addCollection($rv{mid},'RELT');
 413+ %rv=$self->addExpression('GEMET Environmental Thesaurus Relation Types',$self->{la}{'en'});
 414+ $cid{'GEMET'}=$self->addCollection($rv{mid},'');
 415+ return %cid;
 416+}
 417+
 418+
 419+sub bootstrapCollections {
 420+ my $self=shift;
 421+ my %cid;
 422+ my %rv;
 423+
 424+ %rv=$self->addExpression('CRISP Thesaurus, 2005',$self->{la}{'en'});
 425+ $cid{'CRISP'}=$self->addCollection($rv{mid},'');
 426+ %rv=$self->addExpression('Semantic Network 2005AC Semantic Types',$self->{la}{'en'});
 427+ $cid{'STY'}=$self->addCollection($rv{mid},'ATTR');
 428+ %rv=$self->addExpression('Semantic Network 2005AC Relation Types',$self->{la}{'en'});
 429+ $cid{'RL'}=$self->addCollection($rv{mid},'RELT');
 430+ %rv=$self->addExpression('UMLS Relation Types 2005',$self->{la}{'en'});
 431+ $cid{'REL'}=$self->addCollection($rv{mid},'RELT');
 432+ %rv=$self->addExpression('UMLS Relation Attributes 2005',$self->{la}{'en'});
 433+ $cid{'RELA'}=$self->addCollection($rv{mid},'RELT');
 434+ %rv=$self->addExpression('The International Classification of Primary Care (ICPC), 1993',$self->{la}{'en'});
 435+ $cid{'ICPC'}=$self->addCollection($rv{mid},'');
 436+ %rv=$self->addExpression('Medical Subject Headings (MeSH), 2005',$self->{la}{'en'});
 437+ $cid{'MESH'}=$self->addCollection($rv{mid},'');
 438+# %rv=$self->addExpression('Swiss-Prot',$self->{la}{'en'});
 439+# $cid{'SP'}=$self->addCollection($rv{mid},'');
 440+ return %cid;
 441+}
 442+
 443+sub addCollection {
 444+ my $self=shift;
 445+ my $mid=shift;
 446+ my $collection_type=shift;
 447+ my $addcollection=$self->{dbt}->prepare('INSERT INTO uw_collection_ns(collection_mid,is_latest,collection_type) values(?,1,?)');
 448+ $addcollection->execute($mid,$collection_type);
 449+ my $cid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
 450+ my $updatefirstver=$self->{dbt}->prepare('UPDATE uw_collection_ns set first_ver=? where collection_id=?');
 451+ $updatefirstver->execute($cid,$cid);
 452+ return $cid;
 453+}
 454+
 455+sub importUMLSrelationtypes {
 456+ my $self=shift;
 457+ my $which=shift;
 458+ my $getreltypes;
 459+ if($which eq 'REL') {
 460+ # CHD and PAR are to be interpreted as RN and RB, SUBX is not used
 461+ $getreltypes=$self->{dbs}->prepare("select * from rel where ABBREV!='CHD' and ABBREV!='PAR' and ABBREV!='SUBX'");
 462+ } elsif($which eq 'RELA') {
 463+ $getreltypes=$self->{dbs}->prepare("select * from rela");
 464+ }
 465+ $getreltypes->execute();
 466+ while(my $reltype=$getreltypes->fetchrow_hashref()) {
 467+ my %rv=$self->addExpression($reltype->{FULL},$self->{la}{'en'},0,$self->{cid}{$which},$reltype->{ABBREV});
 468+ }
 469+}
 470+
 471+sub importUMLSterms {
 472+ my $self=shift;
 473+ my $sab=shift; # the source abbreviation which to import
 474+ my $cid=shift; # which collection to associate the defined meanings with
 475+
 476+ $getterm=$self->{dbs}->prepare("select str,cui,lat from MRCONSO where sab like ?");
 477+ $getterm->execute($sab);
 478+ my %textmid;
 479+ while(my $r=$getterm->fetchrow_hashref()) {
 480+ my %rv;
 481+ my $dupe=0;
 482+ my %cuimid=$self->getMidForMember($r->{cui});
 483+
 484+ # Create new expression / Defined Meaning
 485+ if(!$cuimid{mid}) {
 486+ %rv=$self->addExpression($r->{str},$self->{la_iso}{lc($r->{lat})},0,$cid,$r->{cui});
 487+ # If this is the first time we encounter this CUI, import the definitions
 488+ # Note that we'll take any definitions, regardless of the SABs specified!
 489+ if($rv{mid}!=-1) {
 490+ $getdefs=$self->{dbs}->prepare("select def from MRDEF where cui=?");
 491+ $getdefs->execute($r->{cui});
 492+ while(my $d=$getdefs->fetchrow_hashref()) {
 493+ # UMLS only has English definitions
 494+ $self->addMeaningText($rv{rid},$rv{mid},$d->{def},0,$self->{la}{'en'});
 495+ }
 496+ $textmid{$rv{mid}}=1;
 497+ }
 498+ # Add as SynTrans to existing Defined Meaning
 499+ } else {
 500+ %rv=$self->addExpression($r->{str},$self->{la_iso}{lc($r->{lat})},$cuimid{mid});
 501+ }
 502+ }
 503+}
 504+
 505+
 506+sub importGemetTerms {
 507+ my $self=shift;
 508+ my $cid=shift;
 509+ # Get all English terms as base
 510+ $getterm=$self->{dbs}->prepare("select * from term where langcode=?");
 511+ $getterm->execute('en');
 512+ while($r=$getterm->fetchrow_hashref()) {
 513+ # Add English term as defined meaning
 514+ my %rv=$self->addExpression($r->{name},$self->{la}{'en'},0,);
 515+
 516+ # All translations
 517+ $gettrans=$self->{dbs}->prepare("select name,langcode from term where id_concept=? and langcode!='en'");
 518+ $gettrans->execute($r->{id_concept});
 519+ # Add them with the same meaning ID
 520+ while($t=$gettrans->fetchrow_hashref()) {
 521+ print "Language: $t->{langcode}\n";
 522+ %tv=$self->addExpression($t->{name},$self->{la}{$t->{langcode}},$rv{mid});
 523+ }
 524+ # All definitions
 525+ $getdef=$self->{dbs}->prepare("select definition,langcode from scope where id_concept=?");
 526+ $getdef->execute($r->{id_concept});
 527+ my $tcid=0;
 528+ while($d=$getdef->fetchrow_hashref()) {
 529+ if(!$tcid) {
 530+ my %mv=$self->addMeaningText($rv{rid},$rv{mid},$d->{definition},0,$self->{la}{$d->{langcode}});
 531+ $tcid=$mv{tcid};
 532+
 533+ } else {
 534+ $self->addMeaningText($rv{rid},$rv{mid},$d->{definition},$tcid,$self->{la}{$d->{langcode}});
 535+
 536+ }
 537+ }
 538+ }
 539+}
 540+
 541+
 542+sub importGemetRelations {
 543+ my $self=shift;
 544+ # Import GEMET relations
 545+ my $getrels=$self->{dbs}->prepare("select * from relation");
 546+ $getrels->execute();
 547+ while(my $rrow=$getrels->fetchrow_hashref()) {
 548+ %rv_A=$self->findGemetItem($rrow->{id_concept});
 549+ %rv_B=$self->findGemetItem($rrow->{id_relation});
 550+ if($rv_A{mid} && $rv_B{mid}) {
 551+ $self->addRelation($rv_A{rid},$self->{reltypes}{$rrow->{id_type}},$rv_A{mid},$rv_B{mid});
 552+ }
 553+ }
 554+}
 555+
 556+sub importGemetThemes {
 557+ my $self=shift;
 558+ # Get all themes
 559+ my $getthemes=$self->{dbs}->prepare("select * from theme");
 560+ my $gettheme_set=$self->{dbs}->prepare("select * from theme where id_theme=?");
 561+ $getthemes->execute();
 562+ while(my $theme_row=$getthemes->fetchrow_hashref()) {
 563+ my $theme=$theme_row->{description};
 564+ my @themes=split(/[,;]( ){0,1}/,$theme);
 565+ foreach(@themes) {
 566+ $_=~s/^ *$//i;
 567+ if($_) {
 568+ # Does this theme have a expression?
 569+ my $t=$_;
 570+ my %it=$self->findLatestRevision($t,$self->{la}{$theme_row->{langcode}});
 571+ if($it{liid}) {
 572+ # Get the meaning
 573+ print "NEW THEME: $t - retrieving existing MID for LIID... ".$it{liid};
 574+ $it{mid}=$self->findMeaning($rv{liid});
 575+ print $it{mid}."\n";
 576+ #print $t. " is a dupe! - $dupes\n";
 577+ #$dupes++;
 578+ } else {
 579+ # Do we have any of its translations?
 580+ # We can only add those if the theme does
 581+ # not contain a , - otherwise we can't match!
 582+ my $tra_mid=0;
 583+ if(!($theme_row->{description}=~m/[,;]/i)) {
 584+ print "NEW THEME: $t - no record, looking for its known translations in GEMET\n";
 585+ #print "Checking for translations of ".$theme_row->{description}."\n";
 586+ $gettheme_set->execute($theme_row->{id_theme});
 587+ while((my $tra_row=$gettheme_set->fetchrow_hashref()) && !$tra_mid) {
 588+ if($tra_lid=$self->findExpressionId($tra_row->{description},$self->{la}{$tra_row->{langcode}})) {
 589+ $tra_mid=$self->findMeaning($tra_lid);
 590+
 591+ }
 592+ }
 593+ } else {
 594+ print "NEW THEME: $t - split from the original GEMET data\n";
 595+ }
 596+ # Let's make one
 597+ if($tra_mid) {
 598+ print "Adding new term as translation of $tra_mid\n";
 599+ %it = $self->addExpression($t,$self->{la}{$theme_row->{langcode}},$tra_mid);
 600+ } else {
 601+ print "Adding new term independently, we do not know its translations.\n";
 602+ %it = $self->addExpression($t,$self->{la}{$theme_row->{langcode}});
 603+ }
 604+
 605+
 606+ }
 607+
 608+ if(!$have_rel{$theme_row->{id_theme}}) {
 609+ # Get all items which have this relation
 610+ my $getconcepts=$self->{dbs}->prepare('select id_concept from concept_theme where id_theme=?');
 611+ $getconcepts->execute($theme_row->{id_theme});
 612+ while(my $concrow=$getconcepts->fetchrow_hashref()) {
 613+ # Get LIID,RID->meaning for the item
 614+ my %tr=$self->findGemetItem($concrow->{id_concept});
 615+ if($tr{rid}) {
 616+ $self->addRelation($tr{rid},$self->{reltypes}{it},$tr{mid},$it{mid});
 617+ print "Tied up a relation..";
 618+ } else {
 619+ print "Missing record to tie the relation to..";
 620+ }
 621+ }
 622+ print "\n";
 623+ $have_rel{$theme_row->{id_theme}}=1;
 624+ }
 625+
 626+ }
 627+ }
 628+ }
 629+ #Split theme into parts
 630+}
 631+
 632+sub findGemetItem {
 633+ my $self=shift;
 634+ my $concept_id=shift;
 635+ # get a word, language
 636+ my $getword=$self->{dbs}->prepare("select langcode,name from term where id_concept=? LIMIT 1");
 637+ $getword->execute($concept_id);
 638+ my $wordrow=$getword->fetchrow_hashref();
 639+
 640+ # find an expression + meaning
 641+ my %rv=$self->findLatestRevision($wordrow->{name},$self->{la}{$wordrow->{langcode}});
 642+ $rv{mid}=$self->findMeaning($rv{liid});
 643+ return %rv;
 644+}
 645+
 646+sub addRelation {
 647+ my $self=shift;
 648+ my $revid=shift;
 649+ my $rtid=shift;
 650+ my $mid_A=shift;
 651+ my $mid_B=shift;
 652+ my $checkfordupes=shift;
 653+
 654+ if($checkfordupes) {
 655+ my $checkRelationDuplicates=$self->{dbt}->prepare('select 1 as one from uw_meaning_relations where meaning1_mid=? and meaning2_mid=? and relationtype_mid=? and is_latest_set=1 limit 1');
 656+ $checkRelationDuplicates->execute($mid_A,$mid_B,$rtid);
 657+ #print "Checking dupe $mid_A, $mid_B, relation type $rtid\n";
 658+ my $dupecheck=$checkRelationDuplicates->fetchrow_hashref();
 659+ if($dupecheck->{one}) {
 660+ print "Duplicate relation, not adding.\n";
 661+ return false;
 662+ }
 663+ }
 664+
 665+ my $newkey= $self->getSetIdWhere('uw_meaning_relations','meaning1_mid',$mid_A) || $self->getMaxId('set_id','uw_meaning_relations');
 666+ my $addrel=$self->{dbt}->prepare('insert into uw_meaning_relations(set_id,meaning1_mid,meaning2_mid,relationtype_mid,is_latest_set,first_set,revision_id) values(?,?,?,?,?,?,?)');
 667+ $addrel->execute($newkey,$mid_A,$mid_B,$rtid,1,$newkey,$revid);
 668+
 669+ print "newkey: $newkey\n";
 670+ print "mid_A: $mid_A\n";
 671+ print "mid_B: $mid_B\n";
 672+ print "rtid: $rtid\n";
 673+ print "revid: $revid\n";
 674+}
 675+
 676+
 677+sub findMeaning {
 678+ my $self=shift;
 679+ my $liid=shift;
 680+ # Search syntrans table
 681+ my $getsyn=$self->{dbt}->prepare("select defined_meaning_id from uw_syntrans where expression_id=?");
 682+ $getsyn->execute($liid);
 683+ my $syn_row=$getsyn->fetchrow_hashref();
 684+ if($syn_row->{defined_meaning_id}) {
 685+ return $syn_row->{defined_meaning_id};
 686+ }
 687+ my $getdm=$self->{dbt}->prepare("select defined_meaning_id from uw_defined_meaning where expression_id=? limit 1");
 688+ $getdm->execute($liid);
 689+ my $dm_row=$getdm->fetchrow_hashref();
 690+ if($dm_row->{defined_meaning_id}) {
 691+ return $dm_row->{defined_meaning_id};
 692+ }
 693+ return 0;
 694+}
 695+
 696+# If there already is a meaning text for this DefinedMeaning, it will add the MeaningText as an alternative definition
 697+sub addMeaningText {
 698+ my $self=shift;
 699+ my $rid=shift;
 700+ my $mid=shift;
 701+ my $meaningtext=shift; # optional
 702+ my $meaningtext_set=shift; # optional TCID set to join with
 703+ my $lid=shift; # ID, not code
 704+ my %rv;
 705+
 706+ # Add text row entry
 707+ my $maketext=$self->{dbt}->prepare('insert into text(old_text) values(?)');
 708+ $maketext->execute($meaningtext);
 709+ # Get text row ID
 710+ $tid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
 711+ # Get new or existing translated content set ID
 712+ $tcid=$meaningtext_set || $self->getMaxId('set_id','translated_content');
 713+ # Create new translated content set
 714+ my $maketc=$self->{dbt}->prepare('insert into translated_content(set_id,language_id,text_id,first_set,revision_id) values(?,?,?,?,?)');
 715+ $maketc->execute($tcid,$lid,$tid,$tcid,$rid);
 716+ $rv{tcid}=$tcid;
 717+
 718+ # THIS DOESN'T WORK FOR DEFINITIONS IN MULTIPLE LANGUAGES
 719+ # Check if a meaning text has already been set
 720+ my $lookformeaning=$self->{dbt}->prepare('select meaning_text_tcid from uw_defined_meaning where defined_meaning_id=? and is_latest_ver=1');
 721+ $lookformeaning->execute($mid);
 722+ my $mrow=$lookformeaning->fetchrow_hashref();
 723+ if($mrow->{meaning_text_tcid}) {
 724+ # There is a meaning text - the new one is only an alternative
 725+ my $altset=$self->getSetIdWhere('uw_alt_meaningtexts','meaning_mid',$mid) || $self->getMaxId('set_id','uw_alt_meaningtexts');
 726+ my $addaltmeaning=$self->{dbt}->prepare('insert into uw_alt_meaningtexts(set_id,meaning_mid,meaning_text_tcid,is_latest_set,first_set,revision_id) values(?,?,?,?,?,?)');
 727+ $addaltmeaning->execute($altset,$mid,$tcid,1,$altset,$rid)
 728+ } else {
 729+ my $updatemeaning=$self->{dbt}->prepare('update uw_defined_meaning set meaning_text_tcid=? where defined_meaning_id=?');
 730+ $updatemeaning->execute($tcid,$mid);
 731+ }
 732+ return %rv;
 733+}
 734+
 735+
 736+# If the expression already exists, add a new DefinedMeaning - unless this is a translation or synonym; if a record already exists in SynTrans with this expression _and_ $translation_of as a DefinedMeaning, do not do anything
 737+sub addExpression {
 738+ my $self=shift;
 739+ # return MID, RID, LID, TCID!
 740+ my $expression=shift;
 741+ my $lid=shift; # ID, not code
 742+ my $translation_of=shift; # 0 or MID (!), optional
 743+ my $collection_id=shift; # optional
 744+ my $collection_internal_member_id=shift; # what does the collection use to refer to this member?
 745+ my %rv;
 746+ my $isdupe=0;
 747+ my %firv=$self->findLatestRevision($expression,$lid);
 748+ if($firv{liid}) { $isdupe=1; }
 749+
 750+ if(!$isdupe) {
 751+
 752+ #create page
 753+ my $pt=$self->canonize($expression);
 754+ $makepage=$self->{dbt}->prepare('insert into page(page_namespace,page_title,page_is_new,page_title_language_id,page_touched) values(?,?,?,?,?)');
 755+ $makepage->execute(16,$pt,1,$lid,$self->mwtimestamp());
 756+ $pid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
 757+ print "PID: $pid\n";
 758+
 759+ $rv{pid}=$pid;
 760+
 761+ #create revision
 762+ $makerev=$self->{dbt}->prepare('insert into revision(rev_page,rev_comment,rev_user,rev_user_text,rev_timestamp) values(?,?,?,?,?)');
 763+ $makerev->execute($pid,'Initial import',2,'GEMET',$self->mwtimestamp());
 764+
 765+ #get revision_id
 766+ $rid=$self->getId('select rev_id from revision where rev_page=?',$pid);
 767+ $rv{rid}=$rid;
 768+
 769+ #update page to link to revision
 770+ $updatepage=$self->{dbt}->prepare('update page set page_latest=? where page_id=?');
 771+ $updatepage->execute($rid,$pid);
 772+
 773+ #create expression
 774+ $makeitem=$self->{dbt}->prepare('insert into uw_expression_ns(spelling,language_id,is_latest) values(?,?,1)');
 775+ $makeitem->execute($expression,$lid);
 776+ $liid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
 777+ $rv{liid}=$liid;
 778+
 779+ # update firstver
 780+ $updateitem=$self->{dbt}->prepare('update uw_expression_ns set first_ver=? where expression_id=?');
 781+ $updateitem->execute($liid,$liid);
 782+
 783+ #update revision to link to expression
 784+ $updaterev=$self->{dbt}->prepare('update revision set rev_data_id=? where rev_id=?');
 785+ $updaterev->execute($liid,$rid);
 786+
 787+ } else {
 788+
 789+ $rid=$firv{rid};
 790+ $liid=$firv{liid};
 791+ $rv{rid}=$rid;
 792+ $rv{liid}=$liid;
 793+
 794+ }
 795+
 796+ #create definedmeaning
 797+ if(!$translation_of) {
 798+ $makemean=$self->{dbt}->prepare('insert into uw_defined_meaning(expression_id,revision_id) values(?,?)');
 799+ $makemean->execute($liid,$rid);
 800+ # We always want a syntrans record, so in this case it links to its own
 801+ # def. meaning
 802+ $translation_of=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
 803+ $mid=$translation_of;
 804+ $rv{mid}=$mid;
 805+ $updatemeaningver=$self->{dbt}->prepare('update uw_defined_meaning set first_ver=? where defined_meaning_id=?');
 806+ $updatemeaningver->execute($mid,$mid);
 807+ if($collection_id) {
 808+ $addtocoll=$self->{dbt}->prepare('insert into uw_collection_contents(set_id, collection_id, member_mid, is_latest_set, first_Set, revision_id, internal_member_id) values(?,?,?,?,?,?,?)');
 809+ #fixme set association
 810+ $addtocoll->execute(1,$collection_id,$mid,1,1,$rid,$collection_internal_member_id);
 811+ }
 812+ }
 813+
 814+ # Check if we already have this specific record
 815+ $checkdupes=$self->{dbt}->prepare('select set_id from uw_syntrans where defined_meaning_id=? and expression_id=?');
 816+ $checkdupes->execute($translation_of,$liid);
 817+ my $duperow=$checkdupes->fetchrow_hashref();
 818+ my $dupeid=$duperow->{set_id};
 819+ if(!$dupeid) {
 820+
 821+ # Check if this is part of a set
 822+ $getset=$self->{dbt}->prepare('select set_id from uw_syntrans where defined_meaning_id=? and is_latest_set=1');
 823+ $getset->execute($mid);
 824+ $row=$getset->fetchrow_hashref();
 825+ my $setid=$row->{set_id} || $self->getMaxId('set_id','uw_syntrans');
 826+ # Add syntrans record
 827+ $maketrans=$self->{dbt}->prepare('insert into uw_syntrans(set_id,defined_meaning_id,expression_id,first_set,revision_id,is_latest_set,endemic_meaning) values(?,?,?,?,?,1,1)');
 828+ $maketrans->execute($setid,$translation_of,$liid,$setid,$rid);
 829+ $rv{setid}=$setid;
 830+ $rv{mid}=$translation_of;
 831+ } else{
 832+ $rv{setid}=$dupeid; # Dupe
 833+ $rv{mid}=-1; # Dupe
 834+ }
 835+ return %rv;
 836+
 837+}
 838+
 839+sub findLatestRevision {
 840+ my $self = shift;
 841+ my $expressionSpelling = shift;
 842+ my $languageId = shift;
 843+
 844+ my $expressionId = $self->findExpressionId($expressionSpelling, $languageId);
 845+ if ($expressionId != 0) {
 846+ my $getRevisionId = $self->{dbt}->prepare('select rev_id from revision where rev_data_id=?');
 847+ $getRevisionId->execute($expressionId);
 848+ my %revision;
 849+ $revision{liid} = $expressionId;
 850+ $revision{rid} = $getRevisionId->fetchrow_hashref->{rev_id};
 851+ return %revision;
 852+ } else {
 853+ return 0;
 854+ }
 855+}
 856+
 857+sub findExpressionId {
 858+ my $self = shift;
 859+ my $expressionSpelling = shift;
 860+ my $languageId = shift;
 861+
 862+ my $getItem = $self->{dbt}->prepare("select expression_id from uw_expression_ns where spelling=binary ? and language_id=? and is_latest=1");
 863+ $getItem->execute($expressionSpelling, $languageId);
 864+ my $itemRow = $getItem->fetchrow_hashref();
 865+ if ($itemRow) {
 866+ return $itemRow->{expression_id};
 867+ } else {
 868+ return 0;
 869+ }
 870+}
 871+
 872+sub getMaxId {
 873+ my $self=shift;
 874+ my $field=shift;
 875+ my $table=shift;
 876+ $getmax=$self->{dbt}->prepare("select max($field) as maxset from $table");
 877+ $getmax->execute();
 878+ my $row=$getmax->fetchrow_hashref();
 879+ return $row->{maxset}+1;
 880+}
 881+
 882+sub getSetIdWhere {
 883+ my $self=shift;
 884+ my $table=shift;
 885+ my $wherefield=shift;
 886+ my $wherekey=shift;
 887+ $getmax=$self->{dbt}->prepare("select set_id from $table WHERE $wherefield=? AND is_latest_set=1 limit 1");
 888+ $getmax->execute($wherekey);
 889+ my $row=$getmax->fetchrow_hashref();
 890+ return $row->{set_id};
 891+}
 892+
 893+
 894+sub getId {
 895+ my $self=shift;
 896+ my $prep=shift;
 897+ $prep=~m/select (.*?) from/i;
 898+ my $field=$1;
 899+ my $getlang=$self->{dbt}->prepare($prep);
 900+ $getlang->execute(@_);
 901+ my $row=$getlang->fetchrow_hashref();
 902+ my $id=$row->{$field};
 903+ return $id;
 904+}
 905+
 906+sub mwtimestamp {
 907+ my $self=shift;
 908+ use POSIX qw(strftime);
 909+ return(strftime "%Y%m%d%H%M%S", localtime);
 910+}
 911+
 912+
 913+sub canonize {
 914+ my $self=shift;
 915+ my $title=shift;
 916+ #$title=ucfirst($title);
 917+ $title=~s/ /_/ig;
 918+ return $title;
 919+}
 920+
 921+sub initlangs {
 922+ my $self=shift;
 923+ %langs=(
 924+ en_en=>'English',
 925+ en_de=>'Englisch',
 926+ 'en-US_de'=>'Englisch (USA)',
 927+ 'en-US_en'=>'English (United States)',
 928+ bg_en=>'Bulgarian',
 929+ bg_de=>'Bulgarisch',
 930+ cs_en=>'Czech',
 931+ cs_de=>'Tschechisch',
 932+ da_en=>'Dansk',
 933+ da_de=>'D?isch',
 934+ de_en=>'German',
 935+ de_de=>'Deutsch',
 936+ es_en=>'Spanish',
 937+ es_de=>'Spanisch',
 938+ et_en=>'Estonian',
 939+ et_de=>'Estnisch',
 940+ eu_en=>'Basque',
 941+ eu_de=>'Baskisch',
 942+ fi_en=>'Finnish',
 943+ fi_de=>'Finnisch',
 944+ fr_en=>'French',
 945+ fr_de=>'Franz?isch',
 946+ hu_en=>'Hungarian',
 947+ hu_de=>'Ungarisch',
 948+ it_en=>'Italian',
 949+ it_de=>'Italienisch',
 950+ nl_en=>'Dutch',
 951+ nl_de=>'Niederl?disch',
 952+ no_en=>'Norwegian',
 953+ no_de=>'Norwegisch',
 954+ pl_en=>'Polish',
 955+ pl_de=>'Polnisch',
 956+ pt_en=>'Portuguese',
 957+ pt_de=>'Portugiesisch',
 958+ ru_en=>'Russian',
 959+ ru_de=>'Russisch',
 960+ sk_en=>'Slovak',
 961+ sk_de=>'Slowakische Sprache',
 962+ sl_en=>'Slovenian',
 963+ sl_de=>'Slowenisch',
 964+ el_en=>'Greek',
 965+ el_de=>'Griechisch',
 966+ sv_en=>'Swedish',
 967+ sv_de=>'Schwedisch');
 968+ foreach(keys(%langs)) {
 969+ $key=$_;
 970+ $key=~m/(.*?)_(.*)/i;
 971+ $lang=$1;
 972+ #print "Lang: $lang\n";
 973+ $wordlang=$2;
 974+ if($wordlang eq 'en') {
 975+ $addwm=$self->{dbt}->prepare("insert into language(wikimedia_key) values(?)");
 976+ $addwm->execute($lang);
 977+ }
 978+ }
 979+ foreach(keys(%langs)) {
 980+ $key=$_;
 981+ $key=~m/(.*?)_(.*)/i;
 982+ $lang=$1;
 983+ #print "Lang: $lang\n";
 984+ $wordlang=$2;
 985+ $langword_u=$langs{$key};
 986+ $langword=encode("utf8",$langword_u);
 987+ $newwm=$self->{dbt}->prepare("select language_id from language where wikimedia_key=?");
 988+ $newwm->execute($lang);
 989+ my $row=$newwm->fetchrow_hashref();
 990+ $newwm->execute('en');
 991+ my $en_row=$newwm->fetchrow_hashref();
 992+ $newwm->execute('de');
 993+ my $de_row=$newwm->fetchrow_hashref();
 994+ $newword=$self->{dbt}->prepare("insert into language_names values (?,?,?)");
 995+ if($wordlang eq 'en') {
 996+ $newword->execute($row->{language_id},$en_row->{language_id},$langword);
 997+ } elsif($wordlang eq 'de') {
 998+ $newword->execute($row->{language_id},$de_row->{language_id},$langword);
 999+ }
 1000+ }
 1001+}
 1002+
 1003+sub initRel {
 1004+ my $self=shift;
 1005+ my $cid=shift;
 1006+ %rel_types=(
 1007+ bt_en=>'broader terms',
 1008+ bt_de=>'breitere Begriffe',
 1009+ nt_en=>'narrower terms',
 1010+ nt_de=>'engere Begriffe',
 1011+ rt_en=>'related terms',
 1012+ rt_de=>'verwandte Begriffe',
 1013+ it_en=>'is part of theme',
 1014+ it_de=>'ist Themenbestandteil von'
 1015+ );
 1016+
 1017+ %rel_definitions=(
 1018+ bt_en=>'Those terms in a thesaurus which are broader than others',
 1019+ bt_de=>'Die Begriffe in einem Thesaurus, die breiter sind als andere',
 1020+ nt_en=>'Those terms in a thesaurus which are narrower than others',
 1021+ nt_de=>'Die Begriffe in einem Thesaurus, die enger sind als andere',
 1022+ rt_en=>'Those terms in a thesaurus which are related to others',
 1023+ rt_de=>'Die Begriffe in einem Thesaurus, die mit anderen verwandt sind',
 1024+ it_en=>'Those terms in a thesaurus or dictionary which are associated with a topic',
 1025+ it_de=>'Die Begriffe in einem Thesaurus oder Woerterbuch, die mit einem Thema assoziiert sind');
 1026+
 1027+ foreach(keys(%rel_types)) {
 1028+ $key=$_;
 1029+ $key=~m/(..)_(..)/i;
 1030+ $ident=$1;
 1031+ $lang=$2;
 1032+ if($lang eq 'de') {
 1033+ $en_key="$ident\_en";
 1034+ my %rv=$self->addExpression($rel_types{$en_key},$self->{la}{'en'},0,$cid,$ident);
 1035+ $self->addMeaningText($rv{rid},$rv{mid},$rel_definitions{$en_key},0,$self->{la}{'en'});
 1036+ my %dv=$self->addExpression($rel_types{$key},$self->{la}{'de'},$rv{'mid'});
 1037+ $self->addMeaningText($dv{rid},$dv{mid},$rel_definitions{$key},$rv{'tcid'},$self->{la}{'de'});
 1038+ }
 1039+ }
 1040+}
 1041+
 1042+sub loadLangs {
 1043+ my $self=shift;
 1044+ my %la;
 1045+ $getlangs=$self->{dbt}->prepare('select language_id,wikimedia_key from language');
 1046+ $getlangs->execute();
 1047+ while($langrow=$getlangs->fetchrow_hashref()) {
 1048+ $la{$langrow->{wikimedia_key}}=$langrow->{language_id};
 1049+ }
 1050+ return %la;
 1051+}
 1052+
 1053+sub loadLangsIso {
 1054+ my $self=shift;
 1055+ my %la_iso;
 1056+ $getlangs=$self->{dbt}->prepare('select language_id,iso639_2 from language');
 1057+ $getlangs->execute();
 1058+ while($langrow=$getlangs->fetchrow_hashref()) {
 1059+ $la_iso{$langrow->{iso639_2}}=$langrow->{language_id};
 1060+ }
 1061+ return %la_iso;
 1062+}
 1063+
 1064+return(1);