Index: trunk/extensions/Wikidata/WiktionaryZ/WiktionaryZ.php |
— | — | @@ -0,0 +1,622 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +require_once('Expression.php'); |
| 5 | + |
| 6 | +/** |
| 7 | + * Renders a content page from WiktionaryZ based on the GEMET database. |
| 8 | + * @package MediaWiki |
| 9 | + */ |
| 10 | +class WiktionaryZ { |
| 11 | + /* TODOs: |
| 12 | + use $dbr->select() instead of $dbr->query() wherever possible; it lets MediaWiki handle additional |
| 13 | + table prefixes and such. |
| 14 | + */ |
| 15 | + protected $sectionToEdit = 0; |
| 16 | + protected $currentSection = 0; |
| 17 | + protected $inSection = false; |
| 18 | + protected $inSectionLevel = 0; |
| 19 | + |
| 20 | + function initializeSections($sectionToEdit) { |
| 21 | + $this->currentSection = 0; |
| 22 | + $this->inSection = $sectionToEdit == 0; |
| 23 | + $this->inSectionLevel = 0; |
| 24 | + $this->sectionToEdit = $sectionToEdit; |
| 25 | + } |
| 26 | + |
| 27 | + function addSection($level) { |
| 28 | + if ($this->sectionToEdit != 0) { |
| 29 | + $this->currentSection++; |
| 30 | + |
| 31 | + if ($this->currentSection == $this->sectionToEdit) { |
| 32 | + $this->inSection = true; |
| 33 | + $this->inSectionLevel = $level; |
| 34 | + } |
| 35 | + else if ($level == $this->inSectionLevel) |
| 36 | + $this->inSection = false; |
| 37 | + } |
| 38 | + |
| 39 | + return $this->inSection; |
| 40 | + } |
| 41 | + |
| 42 | + function view() { |
| 43 | + |
| 44 | + global $wgOut, $wgTitle, $wgUser; |
| 45 | + $userlang=$wgUser->getOption('language'); |
| 46 | + |
| 47 | + # $w is the variable used to store generated wikitext |
| 48 | + $w=''; |
| 49 | + $w="Your user interface language preference: '''".$userlang."''' - [[Special:Preferences|set your preferences]]"; |
| 50 | + |
| 51 | + # Get language names, preferably in UI language |
| 52 | + $langdefs=$this->getLangNames($userlang); |
| 53 | + |
| 54 | + $dbr =& wfGetDB( DB_MASTER ); |
| 55 | + |
| 56 | + # Get entry record from GEMET namespace |
| 57 | + $res=$dbr->query("SELECT * from uw_expression_ns WHERE spelling=BINARY ".$dbr->addQuotes($wgTitle->getText())); |
| 58 | + |
| 59 | + while($row=$dbr->fetchObject($res)) { |
| 60 | + $tcids=array(); |
| 61 | + $dms=array(); |
| 62 | + $syntrans=array(); |
| 63 | + $oids=array(); |
| 64 | + $rels=array(); |
| 65 | + |
| 66 | + $w.="\n== ''Spelling: ''" . $row->spelling . " - ''Language:'' ".$langdefs[$row->language_id]." ==\n"; |
| 67 | + |
| 68 | + # Get meanings via Expression ID |
| 69 | + $st_res=$dbr->query("SELECT defined_meaning_id from uw_syntrans WHERE expression_id=".$row->expression_id); |
| 70 | + while($st_row=$dbr->fetchObject($st_res)) { |
| 71 | + $dms[]=$st_row->defined_meaning_id; |
| 72 | + # Get synonyms and translations for each |
| 73 | + $li_res=$dbr->query("SELECT expression_id from uw_syntrans where defined_meaning_id=".$st_row->defined_meaning_id." and expression_id!=".$row->expression_id); |
| 74 | + while($li_row=$dbr->fetchObject($li_res)) { |
| 75 | + $syntrans[$st_row->defined_meaning_id][]=$li_row->expression_id; |
| 76 | + } |
| 77 | + |
| 78 | + } |
| 79 | + |
| 80 | + # Get meaning text IDs |
| 81 | + foreach($dms as $mid) { |
| 82 | + $dm_res=$dbr->query("SELECT meaning_text_tcid from uw_defined_meaning WHERE defined_meaning_id=".$mid." and is_latest_ver=1"); |
| 83 | + while($dm_row=$dbr->fetchObject($dm_res)) { |
| 84 | + $tcids[$mid][]=$dm_row->meaning_text_tcid; |
| 85 | + } |
| 86 | + $alt_res=$dbr->query("select meaning_text_tcid from uw_alt_meaningtexts where meaning_mid=".$mid." and is_latest_set=1"); |
| 87 | + while($alt_row=$dbr->fetchObject($alt_res)) { |
| 88 | + $tcids[$mid][]=$alt_row->meaning_text_tcid; |
| 89 | + } |
| 90 | + |
| 91 | + $transl=array(); |
| 92 | + if(array_key_exists($mid,$syntrans)) { |
| 93 | + foreach($syntrans[$mid] as $liid) { |
| 94 | + $sp_res=$dbr->query("SELECT * from uw_expression_ns WHERE expression_id=".$liid); |
| 95 | + while($sp_row=$dbr->fetchObject($sp_res)) { |
| 96 | + $transl[$sp_row->language_id][]=$sp_row->spelling; |
| 97 | + } |
| 98 | + } |
| 99 | + } |
| 100 | + $translmid[$mid]=$transl; |
| 101 | + } |
| 102 | + |
| 103 | + |
| 104 | + # Get relations |
| 105 | + $meaning_rels=array(); |
| 106 | + foreach($dms as $mid) { |
| 107 | + $rels=array(); |
| 108 | + $rt_res=$dbr->query("SELECT * from uw_meaning_relations where meaning1_mid=".$mid." and relationtype_mid!=0 and is_latest_set=1"); |
| 109 | + while($rt_row=$dbr->fetchObject($rt_res)) { |
| 110 | + $rels[$rt_row->relationtype_mid][]=$rt_row->meaning2_mid; |
| 111 | + } |
| 112 | + $meaning_rels[$mid]=$rels; |
| 113 | + |
| 114 | + } |
| 115 | + |
| 116 | + # Get attributes |
| 117 | + $attrib_rels=array(); |
| 118 | + foreach($dms as $mid) { |
| 119 | + $atts=array(); |
| 120 | + $att_res=$dbr->query("SELECT * from uw_meaning_relations where meaning1_mid=".$mid." and relationtype_mid=0 and is_latest_set=1"); |
| 121 | + while($att_row=$dbr->fetchObject($att_res)) { |
| 122 | + $atts[]=$att_row->meaning2_mid; |
| 123 | + } |
| 124 | + $attrib_rels[$mid]=$atts; |
| 125 | + } |
| 126 | + |
| 127 | + $typenames=$this->getRelationTypes(); |
| 128 | + $attnames=$this->getAttributeValues(); |
| 129 | + |
| 130 | + foreach($dms as $mid) { |
| 131 | + $oids=array(); |
| 132 | + $w.="\n\n===Definition===\n"; |
| 133 | + foreach($tcids[$mid] as $tc) { |
| 134 | + $tc_res=$dbr->query("SELECT * from translated_content where set_id=".$tc); |
| 135 | + while($tc_row=$dbr->fetchObject($tc_res)) { |
| 136 | + $oids[$tc_row->language_id][]=$tc_row->text_id; |
| 137 | + } |
| 138 | + } |
| 139 | + |
| 140 | + foreach($oids as $lang=>$oid) { |
| 141 | + foreach($oid as $oid_d) { |
| 142 | + $w.="\n\n'''''$langdefs[$lang]'''''\n\n"; |
| 143 | + $t_res=$dbr->query("SELECT * from text where old_id=".$oid_d); |
| 144 | + while($t_row=$dbr->fetchObject($t_res)) { |
| 145 | + $w.=$t_row->old_text; |
| 146 | + } |
| 147 | + } |
| 148 | + } |
| 149 | + # Get spellings of translations and synonyms |
| 150 | + $w.="<table border='0' cellpadding='5'><tr valign='top'><td width='20%'>\n'''Translations and Synonyms'''\n"; |
| 151 | + foreach($translmid[$mid] as $lang=>$splist) { |
| 152 | + foreach($splist as $spl) { |
| 153 | + if(!empty($spl)) { |
| 154 | + $w.="* ''".$langdefs[$lang]."'': [[WiktionaryZ:$spl|$spl]]\n"; |
| 155 | + } |
| 156 | + } |
| 157 | + } |
| 158 | + |
| 159 | + # Relations |
| 160 | + $w.="</td><td>"; |
| 161 | + |
| 162 | + $w.="\n\n'''Relations:'''\n"; |
| 163 | + $rels=$meaning_rels[$mid]; |
| 164 | + foreach($rels as $type=>$rellist) { |
| 165 | + $w.="\n$typenames[$type]:\n"; |
| 166 | + foreach($rellist as $rel) { |
| 167 | + $rs_res=$dbr->query("SELECT expression_id from uw_defined_meaning where defined_meaning_id=".$rel." LIMIT 1"); |
| 168 | + $rs_row=$dbr->fetchObject($rs_res); |
| 169 | + if($rs_row->expression_id) { |
| 170 | + $li_res=$dbr->query("SELECT spelling from uw_expression_ns where expression_id=".$rs_row->expression_id); |
| 171 | + $li_row=$dbr->fetchObject($li_res); |
| 172 | + $w.="* [[WiktionaryZ:".$li_row->spelling."|"."$li_row->spelling]]\n"; |
| 173 | + } |
| 174 | + } |
| 175 | + } |
| 176 | + $w.="\n\n'''Attributes:'''\n"; |
| 177 | + $atts=$attrib_rels[$mid]; |
| 178 | + foreach($atts as $att) { |
| 179 | + $w.="* [[WiktionaryZ:".$attnames[$att]."|".$attnames[$att]."]]\n"; |
| 180 | + } |
| 181 | + |
| 182 | + $w.="</td></tr></table>"; |
| 183 | + |
| 184 | + } |
| 185 | + |
| 186 | + |
| 187 | + } |
| 188 | + $wgOut->addWikiText($w); |
| 189 | + # We may later want to disable the regular page component |
| 190 | + # $wgOut->setPageTitleArray($this->mTitle->getTitleArray()); |
| 191 | + } |
| 192 | + |
| 193 | + # Falls back to English if no language name translations available for chosen languages |
| 194 | + function getLangNames($code) { |
| 195 | + $id=$this->getLanguageIdForCode($code); |
| 196 | + if(!$id) $id=$this->getLanguageIdForCode('en'); |
| 197 | + $names=$this->getLanguageNamesForId($id); |
| 198 | + if(empty($names)) { |
| 199 | + $id=$this->getLanguageIdForCode('en'); |
| 200 | + $names=$this->getLanguageNamesForId($id); |
| 201 | + } |
| 202 | + return $names; |
| 203 | + } |
| 204 | + |
| 205 | + function getLanguageIdForCode($code) { |
| 206 | + $dbr =& wfGetDB( DB_SLAVE ); |
| 207 | + $id_res=$dbr->query("select language_id from language where wikimedia_key='".$code."'"); |
| 208 | + $id_row=$dbr->fetchObject($id_res); |
| 209 | + return $id_row->language_id; |
| 210 | + } |
| 211 | + |
| 212 | + function getLanguageNamesForId($id) { |
| 213 | + $dbr =& wfGetDB( DB_SLAVE ); |
| 214 | + $langs=array(); |
| 215 | + $lang_res=$dbr->query("select language_names.language_id,language_names.language_name,language.wikimedia_key from language,language_names where language_names.name_language_id=".$id." and language.language_id=language_names.name_language_id"); |
| 216 | + while($lang_row=$dbr->fetchObject($lang_res)) { |
| 217 | + $langs[$lang_row->language_id]=$lang_row->language_name; |
| 218 | + } |
| 219 | + return $langs; |
| 220 | + } |
| 221 | + |
| 222 | + function getRelationTypes() { |
| 223 | + $relationtypes=array(); |
| 224 | + $reltypecollections=$this->getReltypeCollections(); |
| 225 | + $dbr =& wfGetDB( DB_SLAVE ); |
| 226 | + foreach($reltypecollections as $cname=>$cid) { |
| 227 | + $rel_res=$dbr->query("select member_mid from uw_collection_contents where collection_id=$cid and is_latest_set=1"); |
| 228 | + while($rel_row=$dbr->fetchObject($rel_res)) { |
| 229 | + # fixme hardcoded English |
| 230 | + $rel_name=$this->getExpressionForMeaningId($rel_row->member_mid, 85); |
| 231 | + $relationtypes[$rel_row->member_mid]=$rel_name; |
| 232 | + } |
| 233 | + } |
| 234 | + return $relationtypes; |
| 235 | + } |
| 236 | + |
| 237 | + function getReltypeCollections() { |
| 238 | + $reltypecollections=array(); |
| 239 | + $dbr =& wfGetDB ( DB_SLAVE ); |
| 240 | + $col_res=$dbr->query("select collection_id,collection_mid from uw_collection_ns where collection_type='RELT' and is_latest=1"); |
| 241 | + while($col_row=$dbr->fetchObject($col_res)) { |
| 242 | + # fixme hardcoded English |
| 243 | + $collection_name=$this->getExpressionForMeaningId($col_row->collection_mid,85); |
| 244 | + $reltypecollections[$collection_name]=$col_row->collection_id; |
| 245 | + } |
| 246 | + return $reltypecollections; |
| 247 | + |
| 248 | + } |
| 249 | + |
| 250 | + function getExpressionForMeaningId($mid, $langcode) { |
| 251 | + $dbr =& wfGetDB(DB_SLAVE); |
| 252 | + $sql="SELECT spelling from uw_syntrans,uw_expression_ns where defined_meaning_id=".$mid." and uw_expression_ns.expression_id=uw_syntrans.expression_id and uw_expression_ns.language_id=".$langcode." limit 1"; |
| 253 | + $sp_res=$dbr->query($sql); |
| 254 | + $sp_row=$dbr->fetchObject($sp_res); |
| 255 | + return $sp_row->spelling; |
| 256 | + } |
| 257 | + |
| 258 | + function checkForm() { |
| 259 | + return true; |
| 260 | + } |
| 261 | + |
| 262 | + function saveForm() { |
| 263 | + global |
| 264 | + $wgTitle, $wgUser, $wgRequest, $wgOut; |
| 265 | + |
| 266 | + $userlang = $wgUser->getOption('language'); |
| 267 | + |
| 268 | + # Get language names, preferably in UI language |
| 269 | + $langdefs=$this->getLangNames($userlang); |
| 270 | + |
| 271 | + $this->initializeSections($wgRequest->getInt('section')); |
| 272 | + $dbr =& wfGetDB( DB_MASTER ); |
| 273 | + |
| 274 | + # Get entry record from GEMET namespace |
| 275 | + $res=$dbr->query("SELECT * from uw_expression_ns WHERE spelling=BINARY ".$dbr->addQuotes($wgTitle->getText())); |
| 276 | + |
| 277 | + while($row=$dbr->fetchObject($res)) { |
| 278 | + $this->addSection(1); |
| 279 | + $expressionId = $row->expression_id; |
| 280 | + $definedMeaningIds = $this->getDefinedMeaningsForExpression($expressionId); |
| 281 | + $synonymsAndTranslationIds = $this->getSynonymAndTranslationIds($definedMeaningIds, $expressionId); |
| 282 | + $definedMeaningTexts = $this->getDefinedMeaningTexts($definedMeaningIds); |
| 283 | + $definedMeaningRelations = $this->getDefinedMeaningRelations($definedMeaningIds); |
| 284 | + |
| 285 | + foreach($definedMeaningRelations as $definedMeaningId => $relations) { |
| 286 | + if ($this->addSection(2)) { |
| 287 | + $translatedContents = $this->getTranslatedContents($definedMeaningTexts[$definedMeaningId]); |
| 288 | + |
| 289 | + foreach($translatedContents as $languageId => $textId) { |
| 290 | + $definition = $wgRequest->getText('definition-'.$textId); |
| 291 | + |
| 292 | + if ($definition != '') |
| 293 | + $this->setText($textId, $definition); |
| 294 | + } |
| 295 | + |
| 296 | + $this->addTranslatedDefinitionFromRequest($definedMeaningId, $definedMeaningTexts[$definedMeaningId], getRevisionForExpressionId($expressionId), array_keys($translatedContents)); |
| 297 | + $this->addSynonymsOrTranslationsFromRequest($definedMeaningId); |
| 298 | + } |
| 299 | + } |
| 300 | + } |
| 301 | + |
| 302 | + Title::touchArray(array($wgTitle)); |
| 303 | + } |
| 304 | + |
| 305 | + function edit() { |
| 306 | + global |
| 307 | + $wgOut, $wgTitle, $wgUser, $wgRequest; |
| 308 | + |
| 309 | + if ($wgRequest->getText('save') != '') |
| 310 | + $this->saveForm(); |
| 311 | + |
| 312 | + $this->initializeSections($wgRequest->getInt('section')); |
| 313 | + |
| 314 | + $userlang = $wgUser->getOption('language'); |
| 315 | + |
| 316 | + # $w is the variable used to store generated wikitext |
| 317 | + $wgOut->addWikiText("Your user interface language preference: '''$userlang''' - [[Special:Preferences|set your preferences]]"); |
| 318 | + $wgOut->addHTML('<form method="post">'); |
| 319 | + |
| 320 | + # Get language names, preferably in UI language |
| 321 | + $langdefs=$this->getLangNames($userlang); |
| 322 | + |
| 323 | + $dbr =& wfGetDB(DB_MASTER); |
| 324 | + |
| 325 | + # Get entry record from GEMET namespace |
| 326 | + $queryResult = $dbr->query("SELECT * from uw_expression_ns WHERE spelling=BINARY ".$dbr->addQuotes($wgTitle->getText())); |
| 327 | + |
| 328 | + while($row = $dbr->fetchObject($queryResult)) { |
| 329 | + if ($this->addSection(1)) |
| 330 | + $wgOut->addHTML("<h2> <i>Spelling: </i>" . $row->spelling . " - <i>Language:</i> ".$langdefs[$row->language_id]." </h2>"); |
| 331 | + |
| 332 | + $expressionId = $row->expression_id; |
| 333 | + $definedMeaningIds = $this->getDefinedMeaningsForExpression($expressionId); |
| 334 | + $synonymsAndTranslationIds = $this->getSynonymAndTranslationIds($definedMeaningIds, $expressionId); |
| 335 | + $spellingsPerDefinedMeaningAndLanguage = $this->getSpellingsPerDefinedMeaningAndLanguage($definedMeaningIds, $synonymsAndTranslationIds); |
| 336 | + $definedMeaningTexts = $this->getDefinedMeaningTexts($definedMeaningIds); |
| 337 | + $definedMeaningRelations = $this->getDefinedMeaningRelations($definedMeaningIds); |
| 338 | + |
| 339 | + foreach ($definedMeaningRelations as $definedMeaningId => $relations) { |
| 340 | + if ($this->addSection(2)) { |
| 341 | + $wgOut->addHTML('<table border="0" cellpadding="5""><tr valign="top""><td width="20%"">'); |
| 342 | + $wgOut->addHTML('<b>Translations and synonyms</b>'); |
| 343 | + |
| 344 | + foreach($spellingsPerDefinedMeaningAndLanguage[$definedMeaningId] as $languageId => $spellings) { |
| 345 | + $languageName = $langdefs[$languageId]; |
| 346 | + |
| 347 | + foreach($spellings as $spelling) |
| 348 | + if(!empty($spelling)) |
| 349 | + $wgOut->addWikiText("* ''$languageName'': [[WiktionaryZ:$spelling|$spelling]]\n"); |
| 350 | + } |
| 351 | + |
| 352 | + $wgOut->addHTML('</td><td>'); |
| 353 | + |
| 354 | + $wgOut->addHTML("<div><b>Definition</b></div>"); |
| 355 | + $translatedContents = $this->getTranslatedContents($definedMeaningTexts[$definedMeaningId]); |
| 356 | + |
| 357 | + foreach($translatedContents as $languageId => $textId) { |
| 358 | + $wgOut->addHTML("<div><i>$langdefs[$languageId]</i></div>". |
| 359 | + '<textarea name="definition-'. $textId .'" rows="5">'.htmlspecialchars($this->getText($textId)).'</textarea>'); |
| 360 | + } |
| 361 | + |
| 362 | + $wgOut->addHTML('<div><i>Translate into</i>: <select name="translated-definition-language-'. $definedMeaningId .'">'. $this->getLanguageOptions(array_keys($translatedContents)) .'</select></div>'. |
| 363 | + '<textarea name="translated-definition-'.$definedMeaningId.'" rows="5"></textarea>'); |
| 364 | + |
| 365 | + $wgOut->addHTML('</td></tr></table>'. $this->getAddTranslationsAndSynonymsFormFields($definedMeaningId)); |
| 366 | + } |
| 367 | + } |
| 368 | + } |
| 369 | + |
| 370 | + $wgOut->addHTML('<input type="submit" name="save" value="Save"/>'); |
| 371 | + $wgOut->addHTML('</form>'); |
| 372 | + } |
| 373 | + |
| 374 | + function getDefinedMeaningsForExpression($expressionId) { |
| 375 | + $dbr =& wfGetDB(DB_SLAVE); |
| 376 | + $definedMeanings = array(); |
| 377 | + $queryResult = $dbr->query("SELECT defined_meaning_id from uw_syntrans WHERE expression_id=$expressionId"); |
| 378 | + |
| 379 | + while($definedMeaning = $dbr->fetchObject($queryResult)) |
| 380 | + $definedMeanings[] = $definedMeaning->defined_meaning_id; |
| 381 | + |
| 382 | + return $definedMeanings; |
| 383 | + } |
| 384 | + |
| 385 | + function getDefinedMeaningTexts($definedMeaningIds) { |
| 386 | + $dbr =& wfGetDB(DB_SLAVE); |
| 387 | + $definedMeaningTexts = array(); |
| 388 | + |
| 389 | + foreach($definedMeaningIds as $definedMeaningId) { |
| 390 | + $queryResult = $dbr->query("SELECT meaning_text_tcid from uw_defined_meaning WHERE defined_meaning_id=$definedMeaningId and is_latest_ver=1"); |
| 391 | + |
| 392 | + while($dm_row=$dbr->fetchObject($queryResult)) |
| 393 | + $definedMeaningTexts[$definedMeaningId] = $dm_row->meaning_text_tcid; |
| 394 | + } |
| 395 | + |
| 396 | + return $definedMeaningTexts; |
| 397 | + } |
| 398 | + |
| 399 | + function getSpellingsPerDefinedMeaningAndLanguage($definedMeaningIds, $synonymsAndTranslationIds) { |
| 400 | + $dbr =& wfGetDB(DB_SLAVE); |
| 401 | + $spellingsPerDefinedMeaningAndLanguage = array(); |
| 402 | + |
| 403 | + foreach($definedMeaningIds as $definedMeaningId) { |
| 404 | + $spellingsPerLanguage = array(); |
| 405 | + |
| 406 | + if (array_key_exists($definedMeaningId, $synonymsAndTranslationIds)) |
| 407 | + foreach($synonymsAndTranslationIds[$definedMeaningId] as $synonymOrTranslation) { |
| 408 | + $queryResult = $dbr->query("SELECT * from uw_expression_ns WHERE expression_id=$synonymOrTranslation"); |
| 409 | + |
| 410 | + while($expression = $dbr->fetchObject($queryResult)) |
| 411 | + $spellingsPerLanguage[$expression->language_id][] = $expression->spelling; |
| 412 | + } |
| 413 | + |
| 414 | + $spellingsPerDefinedMeaningAndLanguage[$definedMeaningId] = $spellingsPerLanguage; |
| 415 | + } |
| 416 | + |
| 417 | + return $spellingsPerDefinedMeaningAndLanguage; |
| 418 | + } |
| 419 | + |
| 420 | + function getSynonymAndTranslationIds($definedMeaningIds, $skippedExpressionId) { |
| 421 | + $dbr =& wfGetDB(DB_SLAVE); |
| 422 | + $synonymAndTranslationIds = array(); |
| 423 | + |
| 424 | + foreach($definedMeaningIds as $definedMeaningId) { |
| 425 | + $queryResult = $dbr->query("SELECT expression_id from uw_syntrans where defined_meaning_id=$definedMeaningId and expression_id!=$skippedExpressionId"); |
| 426 | + |
| 427 | + while($synonymOrTranslation = $dbr->fetchObject($queryResult)) |
| 428 | + $synonymAndTranslationIds[$definedMeaningId][] = $synonymOrTranslation->expression_id; |
| 429 | + } |
| 430 | + |
| 431 | + return $synonymAndTranslationIds; |
| 432 | + } |
| 433 | + |
| 434 | + function getDefinedMeaningRelations($definedMeaningIds) { |
| 435 | + $dbr =& wfGetDB(DB_SLAVE); |
| 436 | + $definedMeaningRelations = array(); |
| 437 | + |
| 438 | + foreach($definedMeaningIds as $definedMeaningId) { |
| 439 | + $relations = array(); |
| 440 | + $queryResult = $dbr->query("SELECT * from uw_meaning_relations where meaning1_mid=$definedMeaningId and is_latest_set=1"); |
| 441 | + |
| 442 | + while($definedMeaningRelation = $dbr->fetchObject($queryResult)) |
| 443 | + $relations[$definedMeaningRelation->relationtype_mid][]=$definedMeaningRelation->meaning2_mid; |
| 444 | + |
| 445 | + $definedMeaningRelations[$definedMeaningId] = $relations; |
| 446 | + } |
| 447 | + |
| 448 | + return $definedMeaningRelations; |
| 449 | + } |
| 450 | + |
| 451 | + function getTranslatedContents($setId) { |
| 452 | + $dbr =& wfGetDB(DB_SLAVE); |
| 453 | + $queryResult = $dbr->query("SELECT * from translated_content where set_id=$setId"); |
| 454 | + $translatedContents = array(); |
| 455 | + |
| 456 | + while($translatedContent = $dbr->fetchObject($queryResult)) |
| 457 | + $translatedContents[$translatedContent->language_id] = $translatedContent->text_id; |
| 458 | + |
| 459 | + return $translatedContents; |
| 460 | + } |
| 461 | + |
| 462 | + function getAddTranslationsAndSynonymsFormFields($definedMeaningId) { |
| 463 | + return '<div><b>Add translation/synonym</b></div> |
| 464 | + <table> |
| 465 | + <tr><th>Language</th><th>Spelling</th><th>Identical meaning?</th><th>Input rows</th></tr> |
| 466 | + <tr id="add-translation-synonym-'. $definedMeaningId .'" class="repeat"> |
| 467 | + <td><select name="language-'. $definedMeaningId .'">'. $this->getLanguageOptions() .'</select></td> |
| 468 | + <td><input type="text" name="spelling-'. $definedMeaningId .'" maxlength="255"/></td> |
| 469 | + <td><input type="checkbox" name="endemic-meaning-'. $definedMeaningId .'" checked="checked"/> |
| 470 | + <td></td> |
| 471 | + </tr> |
| 472 | + </table>'; |
| 473 | + } |
| 474 | + |
| 475 | + function getTranslationIdsForDefinedMeaning($definedMeaningId) { |
| 476 | + $dbr =& wfGetDB(DB_SLAVE); |
| 477 | + $queryResult = $dbr->query("SELECT * from text where old_id=$textId"); |
| 478 | + } |
| 479 | + |
| 480 | + function getLanguageOptions($languageIdsToExclude = array()) { |
| 481 | + global |
| 482 | + $wgUser; |
| 483 | + |
| 484 | + $userLanguage = $wgUser->getOption('language'); |
| 485 | + $userLanguageId = $this->getLanguageIdForCode($userLanguage); |
| 486 | + $idNameIndex = $this->getLangNames($userLanguage); |
| 487 | + asort($idNameIndex); |
| 488 | + |
| 489 | + $result = ''; |
| 490 | + |
| 491 | + foreach($idNameIndex as $id => $name) { |
| 492 | + if (!in_array($id, $languageIdsToExclude)) { |
| 493 | + if ($id == $userLanguageId) |
| 494 | + $selected = ' selected="selected"'; |
| 495 | + else |
| 496 | + $selected = ''; |
| 497 | + |
| 498 | + $result .= '<option value="'. $id .'"'. $selected .'>'. $name . '</option>'; |
| 499 | + } |
| 500 | + } |
| 501 | + |
| 502 | + return $result; |
| 503 | + } |
| 504 | + |
| 505 | + function getText($textId) { |
| 506 | + $dbr =& wfGetDB(DB_SLAVE); |
| 507 | + $queryResult = $dbr->query("SELECT * from text where old_id=$textId"); |
| 508 | + |
| 509 | + if($text = $dbr->fetchObject($queryResult)) |
| 510 | + return $text->old_text; |
| 511 | + else |
| 512 | + return ""; |
| 513 | + } |
| 514 | + |
| 515 | + function setText($textId, $text) { |
| 516 | + $dbr = &wfGetDB(DB_MASTER); |
| 517 | + $text = $dbr->addQuotes($text); |
| 518 | + $sql = "UPDATE text SET old_text=$text WHERE old_id=$textId"; |
| 519 | + $dbr->query($sql); |
| 520 | + } |
| 521 | + |
| 522 | + function createText($text) { |
| 523 | + $dbr = &wfGetDB(DB_MASTER); |
| 524 | + $text = $dbr->addQuotes($text); |
| 525 | + $sql = "insert into text(old_text) values($text)"; |
| 526 | + $dbr->query($sql); |
| 527 | + |
| 528 | + return $dbr->insertId(); |
| 529 | + } |
| 530 | + |
| 531 | + function addSynonymOrTranslation($spelling, $languageId, $definedMeaningId, $endemicMeaning) { |
| 532 | + $expression = findOrCreateExpression($spelling, $languageId); |
| 533 | + $expression->assureIsBoundToDefinedMeaning($definedMeaningId, $endemicMeaning); |
| 534 | + } |
| 535 | + |
| 536 | + function addSynonymOrTranslationFromRequest($definedMeaningId, $postFix) { |
| 537 | + global |
| 538 | + $wgRequest; |
| 539 | + |
| 540 | + if (array_key_exists('language-'. $postFix, $_POST)) { |
| 541 | + $languageId = $wgRequest->getInt('language-'. $postFix); |
| 542 | + $spelling = $wgRequest->getText('spelling-'. $postFix); |
| 543 | + $endemicMeaning = $wgRequest->getCheck('endemic-meaning-'.$postFix); |
| 544 | + |
| 545 | + if ($spelling != '') |
| 546 | + $this->addSynonymOrTranslation($spelling, $languageId, $definedMeaningId, $endemicMeaning); |
| 547 | + } |
| 548 | + } |
| 549 | + |
| 550 | + function addSynonymsOrTranslationsFromRequest($definedMeaningId) { |
| 551 | + global |
| 552 | + $wgRequest; |
| 553 | + |
| 554 | + $this->addSynonymOrTranslationFromRequest($definedMeaningId, $definedMeaningId); |
| 555 | + |
| 556 | + for ($i = 2; $i <= $wgRequest->getInt('add-translation-synonym-'. $definedMeaningId . '-RC'); $i++) |
| 557 | + $this->addSynonymOrTranslationFromRequest($definedMeaningId, $definedMeaningId . '-' . $i); |
| 558 | + } |
| 559 | + |
| 560 | + function createTranslatedContent($setId, $languageId, $textId, $revisionId) { |
| 561 | + $dbr = &wfGetDB(DB_MASTER); |
| 562 | + $sql = "insert into translated_content(set_id,language_id,text_id,first_set,revision_id) values($setId, $languageId, $textId, $setId, $revisionId)"; |
| 563 | + $dbr->query($sql); |
| 564 | + |
| 565 | + return $dbr->insertId(); |
| 566 | + } |
| 567 | + |
| 568 | + function addTranslatedDefinition($setId, $languageId, $definition, $revisionId) { |
| 569 | + $textId = $this->createText($definition); |
| 570 | + $this->createTranslatedContent($setId, $languageId, $textId, $revisionId); |
| 571 | + } |
| 572 | + |
| 573 | + function addTranslatedDefinitionFromRequest($definedMeaningId, $setId, $revisionId, $languageIdsToExclude) { |
| 574 | + global |
| 575 | + $wgRequest; |
| 576 | + |
| 577 | + $languageId = $wgRequest->getInt('translated-definition-language-'.$definedMeaningId); |
| 578 | + $definition = $wgRequest->getText('translated-definition-'.$definedMeaningId); |
| 579 | + |
| 580 | + if ($definition != '' && !in_array($languageId, $languageIdsToExclude)) |
| 581 | + $this->addTranslatedDefinition($setId, $languageId, $definition, $revisionId); |
| 582 | + } |
| 583 | + |
| 584 | + function getAttributeValues(){ |
| 585 | + $atts=array(); |
| 586 | + $attcollections=$this->getCollectionsByType('ATTR'); |
| 587 | + $dbr =& wfGetDB( DB_SLAVE ); |
| 588 | + foreach($attcollections as $cname=>$cid) { |
| 589 | + $att_res=$dbr->query("select member_mid from uw_collection_contents where collection_id=$cid and is_latest_set=1"); |
| 590 | + while($att_row=$dbr->fetchObject($att_res)) { |
| 591 | + # fixme hardcoded English |
| 592 | + $att_name=$this->getExpressionForMid($att_row->member_mid, 85); |
| 593 | + $atts[$att_row->member_mid]=$att_name; |
| 594 | + } |
| 595 | + } |
| 596 | + return $atts; |
| 597 | + } |
| 598 | + |
| 599 | + function getCollectionsByType($type) { |
| 600 | + $typecollections=array(); |
| 601 | + $dbr =& wfGetDB ( DB_SLAVE ); |
| 602 | + $col_res=$dbr->query("select collection_id,collection_mid from uw_collection_ns where collection_type=".$dbr->addQuotes($type)." and is_latest=1"); |
| 603 | + while($col_row=$dbr->fetchObject($col_res)) { |
| 604 | + # fixme hardcoded English |
| 605 | + $collection_name=$this->getExpressionForMid($col_row->collection_mid,85); |
| 606 | + $typecollections[$collection_name]=$col_row->collection_id; |
| 607 | + } |
| 608 | + return $typecollections; |
| 609 | + |
| 610 | + } |
| 611 | + |
| 612 | + function getExpressionForMid($mid,$langcode) { |
| 613 | + $dbr =& wfGetDB(DB_SLAVE); |
| 614 | + $sql="SELECT spelling from uw_syntrans,uw_expression_ns where defined_meaning_id=".$mid." and uw_expression_ns.expression_id=uw_syntrans.expression_id and uw_expression_ns.language_id=".$langcode." limit 1"; |
| 615 | + $sp_res=$dbr->query($sql); |
| 616 | + $sp_row=$dbr->fetchObject($sp_res); |
| 617 | + return $sp_row->spelling; |
| 618 | + #return $sql; |
| 619 | + } |
| 620 | + |
| 621 | +} |
| 622 | + |
| 623 | +?> |
\ No newline at end of file |
Index: trunk/extensions/Wikidata/WiktionaryZ/Expression.php |
— | — | @@ -0,0 +1,204 @@ |
| 2 | +<?php
|
| 3 | +class Expression {
|
| 4 | + public $id;
|
| 5 | + public $spelling;
|
| 6 | + public $languageId;
|
| 7 | + public $pageId;
|
| 8 | + public $revisionId;
|
| 9 | +
|
| 10 | + function __construct($id, $spelling, $languageId) {
|
| 11 | + $this->id = $id;
|
| 12 | + $this->spelling = $spelling;
|
| 13 | + $this->languageId = $languageId;
|
| 14 | + }
|
| 15 | +
|
| 16 | + function getPageTitle() {
|
| 17 | + return str_replace(' ', '_', $this->spelling);
|
| 18 | + }
|
| 19 | +
|
| 20 | + function updateFromDatabase() {
|
| 21 | + $this->revisionId = getRevisionForExpressionId($this->id);
|
| 22 | + }
|
| 23 | +
|
| 24 | + function createNewInDatabase() {
|
| 25 | + $this->pageId = $this->createPage();
|
| 26 | + $this->revisionId = createInitialRevisionForPage($this->pageId, 'Created by adding expression');
|
| 27 | +
|
| 28 | + linkExpressionToRevision($this->id, $this->revisionId);
|
| 29 | + }
|
| 30 | +
|
| 31 | + function createPage() {
|
| 32 | + return createPage(16, $this->getPageTitle(), $this->languageId);
|
| 33 | + }
|
| 34 | +
|
| 35 | + function isBoundToDefinedMeaning($definedMeaningId) {
|
| 36 | + return getSetIdForDefinedMeaningAndExpression($definedMeaningId, $this->id);
|
| 37 | + }
|
| 38 | +
|
| 39 | + function bindToDefinedMeaning($definedMeaningId, $endemicMeaning) {
|
| 40 | + $setId = determineSetIdForDefinedMeaning($definedMeaningId);
|
| 41 | + createSynonymOrTranslation($setId, $definedMeaningId, $this->id, $this->revisionId, $endemicMeaning);
|
| 42 | + }
|
| 43 | +
|
| 44 | + function assureIsBoundToDefinedMeaning($definedMeaningId, $endemicMeaning) {
|
| 45 | + if (!$this->isBoundToDefinedMeaning($definedMeaningId))
|
| 46 | + $this->bindToDefinedMeaning($definedMeaningId, $endemicMeaning);
|
| 47 | + }
|
| 48 | +}
|
| 49 | +
|
| 50 | +function getExpressionId($spelling, $languageId) {
|
| 51 | + $dbr = &wfGetDB(DB_SLAVE);
|
| 52 | + $sql = 'select expression_id from uw_expression_ns where spelling=binary '. $dbr->addQuotes($spelling) . ' and language_id=' . $languageId . ' and is_latest=1';
|
| 53 | + $queryResult = $dbr->query($sql);
|
| 54 | + $expression = $dbr->fetchObject($queryResult);
|
| 55 | + return $expression->expression_id;
|
| 56 | +}
|
| 57 | +
|
| 58 | +function setFirstVersion($expressionId, $firstVersionId) {
|
| 59 | + $dbr = &wfGetDB(DB_MASTER);
|
| 60 | + $sql = "update uw_expression_ns set first_ver=$firstVersionId where expression_id=$expressionId";
|
| 61 | + $dbr->query($sql);
|
| 62 | +}
|
| 63 | +
|
| 64 | +function createExpressionId($spelling, $languageId) {
|
| 65 | + $dbr = &wfGetDB(DB_MASTER);
|
| 66 | + $spelling = $dbr->addQuotes($spelling);
|
| 67 | + $sql = "insert into uw_expression_ns(spelling,language_id,is_latest) values($spelling, $languageId, 1)";
|
| 68 | + $dbr->query($sql);
|
| 69 | + $expressionId = $dbr->insertId();
|
| 70 | +
|
| 71 | + setFirstVersion($expressionId, $expressionId);
|
| 72 | +
|
| 73 | + return $expressionId;
|
| 74 | +}
|
| 75 | +
|
| 76 | +function getRevisionForExpressionId($expressionId) {
|
| 77 | + $dbr = &wfGetDB(DB_SLAVE);
|
| 78 | + $sql = "select rev_id from revision where rev_data_id=$expressionId";
|
| 79 | + $queryResult = $dbr->query($sql);
|
| 80 | +
|
| 81 | + if ($revision = $dbr->fetchObject($queryResult))
|
| 82 | + return $revision->rev_id;
|
| 83 | + else
|
| 84 | + return null;
|
| 85 | +}
|
| 86 | +
|
| 87 | +function createPage($namespace, $title, $languageId) {
|
| 88 | + $dbr = &wfGetDB(DB_MASTER);
|
| 89 | + $title = $dbr->addQuotes($title);
|
| 90 | + $timestamp = $dbr->timestamp();
|
| 91 | +
|
| 92 | + $sql = "insert into page(page_namespace,page_title,page_is_new,page_title_language_id,page_touched) ".
|
| 93 | + "values($namespace, $title, 1, $languageId, $timestamp)";
|
| 94 | + $dbr->query($sql);
|
| 95 | +
|
| 96 | + return $dbr->insertId();
|
| 97 | +}
|
| 98 | +
|
| 99 | +function setPageLatestRevision($pageId, $latestRevision) {
|
| 100 | + $dbr = &wfGetDB(DB_MASTER);
|
| 101 | + $sql = "update page set page_latest=$latestRevision where page_id=$pageId";
|
| 102 | + $dbr->query($sql);
|
| 103 | +}
|
| 104 | +
|
| 105 | +function createInitialRevisionForPage($pageId, $comment) {
|
| 106 | + global
|
| 107 | + $wgUser;
|
| 108 | +
|
| 109 | + $dbr = &wfGetDB(DB_MASTER);
|
| 110 | + $userId = $wgUser->getID();
|
| 111 | + $userName = $dbr->addQuotes($wgUser->getName());
|
| 112 | + $comment = $dbr->addQuotes($comment);
|
| 113 | + $timestamp = $dbr->timestamp();
|
| 114 | +
|
| 115 | + $sql = "insert into revision(rev_page,rev_comment,rev_user,rev_user_text,rev_timestamp) ".
|
| 116 | + "values($pageId, $comment, $userId, $userName, $timestamp)";
|
| 117 | + $dbr->query($sql);
|
| 118 | +
|
| 119 | + $revisionId = $dbr->insertId();
|
| 120 | + setPageLatestRevision($pageId, $revisionId);
|
| 121 | +
|
| 122 | + return $revisionId;
|
| 123 | +}
|
| 124 | +
|
| 125 | +function linkExpressionToRevision($expressionId, $revisionId) {
|
| 126 | + $dbr = &wfGetDB(DB_MASTER);
|
| 127 | + $sql = "update revision set rev_data_id=$expressionId where rev_id=$revisionId";
|
| 128 | + $dbr->query($sql);
|
| 129 | +}
|
| 130 | +
|
| 131 | +function findExpression($spelling, $languageId) {
|
| 132 | + if ($expressionId = getExpressionId($spelling, $languageId)) {
|
| 133 | + $expression = new Expression($expressionId, $spelling, $languageId);
|
| 134 | + $expression->updateFromDatabase();
|
| 135 | + return $expression;
|
| 136 | + }
|
| 137 | + else
|
| 138 | + return null;
|
| 139 | +}
|
| 140 | +
|
| 141 | +function createExpression($spelling, $languageId) {
|
| 142 | + $expression = new Expression(createExpressionId($spelling, $languageId), $spelling, $languageId);
|
| 143 | + $expression->createNewInDatabase();
|
| 144 | + return $expression;
|
| 145 | +}
|
| 146 | +
|
| 147 | +function findOrCreateExpression($spelling, $languageId) {
|
| 148 | + if ($expression = findExpression($spelling, $languageId))
|
| 149 | + return $expression;
|
| 150 | + else
|
| 151 | + return createExpression($spelling, $languageId);
|
| 152 | +}
|
| 153 | +
|
| 154 | +function getSetIdForDefinedMeaningAndExpression($definedMeaningId, $expressionId) {
|
| 155 | + $dbr = &wfGetDB(DB_SLAVE);
|
| 156 | + $sql = "select set_id from uw_syntrans where defined_meaning_id=$definedMeaningId and expression_id=$expressionId";
|
| 157 | + $queryResult = $dbr->query($sql);
|
| 158 | +
|
| 159 | + if ($set = $dbr->fetchObject($queryResult))
|
| 160 | + return $set->set_id;
|
| 161 | + else
|
| 162 | + return 0;
|
| 163 | +}
|
| 164 | +
|
| 165 | +function getLatestSetIdForDefinedMeaning($definedMeaningId) {
|
| 166 | + $dbr = &wfGetDB(DB_SLAVE);
|
| 167 | + $sql = "select set_id from uw_syntrans where defined_meaning_id=$definedMeaningId and is_latest_set=1";
|
| 168 | + $queryResult = $dbr->query($sql);
|
| 169 | +
|
| 170 | + if ($set = $dbr->fetchObject($queryResult))
|
| 171 | + return $set->set_id;
|
| 172 | + else
|
| 173 | + return 0;
|
| 174 | +}
|
| 175 | +
|
| 176 | +function determineSetIdForDefinedMeaning($definedMeaningId) {
|
| 177 | + $result = getLatestSetIdForDefinedMeaning($definedMeaningId);
|
| 178 | +
|
| 179 | + if ($result == 0)
|
| 180 | + $result = getMaximum('set_id', 'uw_syntrans') + 1;
|
| 181 | +
|
| 182 | + return $result;
|
| 183 | +}
|
| 184 | +
|
| 185 | +
|
| 186 | +function createSynonymOrTranslation($setId, $definedMeaningId, $expressionId, $revisionId, $endemicMeaning) {
|
| 187 | + $dbr = &wfGetDB(DB_MASTER);
|
| 188 | + $endemicMeaningInteger = (int) $endemicMeaning;
|
| 189 | + $sql = "insert into uw_syntrans(set_id,defined_meaning_id,expression_id,first_set,revision_id,endemic_meaning,is_latest_set) ".
|
| 190 | + "values($setId, $definedMeaningId, $expressionId, $setId, $revisionId, $endemicMeaningInteger, 1)";
|
| 191 | + $queryResult = $dbr->query($sql);
|
| 192 | +}
|
| 193 | +
|
| 194 | +function getMaximum($field, $table) {
|
| 195 | + $dbr = &wfGetDB(DB_SLAVE);
|
| 196 | + $sql = "select max($field) as maximum from $table";
|
| 197 | + $queryResult = $dbr->query($sql);
|
| 198 | +
|
| 199 | + if ($maximum = $dbr->fetchObject($queryResult))
|
| 200 | + return $maximum->maximum;
|
| 201 | + else
|
| 202 | + return 0;
|
| 203 | +}
|
| 204 | +
|
| 205 | +?>
|
Index: trunk/extensions/Wikidata/WiktionaryZ/WiktionaryZ.pm |
— | — | @@ -0,0 +1,966 @@ |
| 2 | +# Example usage to import UMLS into an existing WiktionaryZ database: |
| 3 | +# use WiktionaryZ; |
| 4 | +# my $importer=new WiktionaryZ('wikidatadb','root','MyPass'); |
| 5 | +# $importer->setSourceDB('umls'); |
| 6 | +# $importer->importUMLS(); |
| 7 | +# |
| 8 | +# NOTE: When importing UMLS, we expect the presence of the semantic network data |
| 9 | +# in the tables SRDEF and the manually created tables SEMRELHIER and SEMTYPEHIER. |
| 10 | +# SEMRELHIER and SEMTYPEHIER contain information about the relations between |
| 11 | +# semantic types and relation types, using RB as the code for "broader than" |
| 12 | +# and RN for "narrower than". |
| 13 | + |
| 14 | +# Todo for GEMET: |
| 15 | +# - deal with homonyms (esp. when merging data), avoid duplicate page titles in the same language (use addMeaning) |
| 16 | +# - fix mixup bug caused by usage of "+1" with $self->getMaxId |
| 17 | +# |
| 18 | +# Todo for UMLS: |
| 19 | +# SyntransCollection |
| 20 | +# RelationCollection |
| 21 | +# Fully deal with alternative definitions referring to the same concept |
| 22 | +# Deal with preferred lexical expressions, primary concepts (general weighting mechanism?) |
| 23 | + |
| 24 | +package WiktionaryZ; |
| 25 | +use DBI; |
| 26 | +use Encode; |
| 27 | +use POSIX qw(strftime); |
| 28 | + |
| 29 | +sub new { |
| 30 | + my $type=shift; |
| 31 | + my $self={}; |
| 32 | + $self->{targetdb}=shift; |
| 33 | + $self->{targetuser}=shift; |
| 34 | + $self->{targetpass}=shift; |
| 35 | + $self->{targethost}=shift || 'localhost'; |
| 36 | + $self->{targetport}=shift || '3306'; |
| 37 | + $self->{targetdriver}=shift || 'mysql'; |
| 38 | + bless($self, $type); |
| 39 | + return($self); |
| 40 | +} |
| 41 | + |
| 42 | +sub setSourceDB { |
| 43 | + my $self=shift; |
| 44 | + $self->{sourcedb}=shift; |
| 45 | + $self->{sourceuser}=shift || $self->{targetuser}; |
| 46 | + $self->{sourcepass}=shift || $self->{targetpass}; |
| 47 | + $self->{sourcehost}=shift || $self->{targethost}; |
| 48 | + $self->{sourceport}=shift || $self->{targetport}; |
| 49 | + $self->{sourcedriver}=shift || $self->{targetdriver}; |
| 50 | +} |
| 51 | + |
| 52 | +sub connectSourceDB() { |
| 53 | + my $self=shift; |
| 54 | + my $dsn = 'dbi:'.$self->{sourcedriver}.':'.$self->{sourcedb}.':'.$self->{sourcehost}.':'.$self->{sourceport}; |
| 55 | + $self->{dbs}=DBI->connect($dsn,$self->{sourceuser},$self->{sourcepass}); |
| 56 | +} |
| 57 | + |
| 58 | +sub connectTargetDB() { |
| 59 | + my $self=shift; |
| 60 | + my $dsn = 'dbi:'.$self->{targetdriver}.':'.$self->{targetdb}.':'.$self->{targethost}.':'.$self->{targetport}; |
| 61 | + $self->{dbt}=DBI->connect($dsn,$self->{targetuser},$self->{targetpass}); |
| 62 | +} |
| 63 | + |
| 64 | + |
| 65 | +sub importUMLS { |
| 66 | + my $self=shift; |
| 67 | + my $level=shift || 0; # 0= complete; 1=reltypes+; 2=rel+ |
| 68 | + $self->connectSourceDB(); |
| 69 | + $self->connectTargetDB(); |
| 70 | + my %la=$self->loadLangs(); |
| 71 | + $self->{la}=\%la; |
| 72 | + my %la_iso=$self->loadLangsIso(); |
| 73 | + $self->{la_iso}=\%la_iso; |
| 74 | + |
| 75 | + if(!$level) { |
| 76 | + my %cid=$self->bootstrapCollections(); |
| 77 | + $self->{cid}=\%cid; |
| 78 | + } else { |
| 79 | + my %cid=$self->getCollections(); |
| 80 | + $self->{cid}=\%cid; |
| 81 | + } |
| 82 | + if($level<1) { |
| 83 | + $self->importUMLSterms("CSP",$self->{cid}{'CRISP'}); |
| 84 | + $self->importUMLSterms("ICPC%",$self->{cid}{'ICPC'}); |
| 85 | + #importUMLSterms("MSH",$self->{cid}{'MESH'}); |
| 86 | + } |
| 87 | + if($level<2) { |
| 88 | + $self->importUMLSrelationtypes('REL'); |
| 89 | + $self->importUMLSrelationtypes('RELA'); |
| 90 | + } |
| 91 | + if($level<3) { |
| 92 | + my %rt=$self->loadReltypes(); |
| 93 | + $self->{reltypes}=\%rt; |
| 94 | + $self->importUMLSrelations('REL','CSP'); |
| 95 | + $self->importUMLSrelations('RELA','CSP'); |
| 96 | + $self->importUMLSrelations('REL','ICPC%'); |
| 97 | + $self->importUMLSrelations('RELA','ICPC%'); |
| 98 | + #importUMLSrelations('REL','MSH'); |
| 99 | + #importUMLSrelations('RELA','MSH'); |
| 100 | + } |
| 101 | + if($level<4) { |
| 102 | + $self->importSNtypes('STY'); |
| 103 | + $self->importSNtypes('RL'); |
| 104 | + $self->importSTrelations('STY'); |
| 105 | + $self->importSTrelations('RL'); |
| 106 | + # $self->importSTrelations2(); |
| 107 | + } |
| 108 | + if($level<5) { |
| 109 | + my %attribs=$self->loadAttributes(); |
| 110 | + $self->{attribs}=\%attribs; |
| 111 | + $self->importUMLSstypes('CSP'); |
| 112 | + $self->importUMLSstypes('ICPC%'); |
| 113 | + } |
| 114 | +} |
| 115 | + |
| 116 | +sub importGEMET { |
| 117 | + my $self=shift; |
| 118 | + $self->connectSourceDB(); |
| 119 | + $self->connectTargetDB(); |
| 120 | + my %la=$self->loadLangs(); |
| 121 | + $self->{la}=\%la; |
| 122 | + my %cid=$self->bootstrapGemetCollection(); |
| 123 | + $self->{cid}=\%cid; |
| 124 | + $self->initRel($self->{cid}{'GEMETREL'}); |
| 125 | + my %rt=$self->loadReltypes(); |
| 126 | + $self->{reltypes}=\%rt; |
| 127 | + $self->importGemetTerms(); |
| 128 | + $self->importGemetRelations(); |
| 129 | + $self->importGemetThemes(); |
| 130 | +} |
| 131 | + |
| 132 | +sub importUMLSstypes() { |
| 133 | + my $self=shift; |
| 134 | + my $sab=shift; |
| 135 | + my $cid=shift; |
| 136 | + my $getassocs=$self->{dbs}->prepare("select MRSTY.CUI, MRSTY.STY from MRCONSO,MRSTY where MRCONSO.SAB like ? and MRCONSO.CUI=MRSTY.CUI"); |
| 137 | + $getassocs->execute($sab); |
| 138 | + while(my $row=$getassocs->fetchrow_hashref()) { |
| 139 | + |
| 140 | + my %rv=$self->getMidForMember($row->{CUI}); |
| 141 | + my $att=$self->{attribs}{$row->{STY}}; |
| 142 | + #print "$rv{mid} is a $row->{STY} ($att)\n"; |
| 143 | + $self->addRelation($rv{rid},0,$rv{mid},$att, my $checkfordupes=1); |
| 144 | + } |
| 145 | + |
| 146 | +} |
| 147 | + |
| 148 | + |
| 149 | +sub getCollections(){ |
| 150 | + my $self=shift; |
| 151 | + my %cid; |
| 152 | + $cid{'CRISP'}=$self->findCollection($self->findMeaning($self->findItem('CRISP Thesaurus, 2005',$self->{la}{'en'}))); |
| 153 | + $cid{'STY'}=$self->findCollection($self->findMeaning($self->findItem('Semantic Network 2005AC Semantic Types',$self->{la}{'en'}))); |
| 154 | + $cid{'RL'}=$self->findCollection($self->findMeaning($self->findItem('Semantic Network 2005AC Relation Types',$self->{la}{'en'}))); |
| 155 | + $cid{'REL'}=$self->findCollection($self->findMeaning($self->findItem('UMLS Relation Types 2005',$self->{la}{'en'}))); |
| 156 | + $cid{'RELA'}=$self->findCollection($self->findMeaning($self->findItem('UMLS Relation Attributes 2005',$self->{la}{'en'}))); |
| 157 | + $cid{'ICPC'}=$self->findCollection($self->findMeaning($self->findItem('The International Classification of Primary Care (ICPC), 1993',$self->{la}{'en'}))); |
| 158 | + $cid{'MESH'}=$self->findCollection($self->findMeaning($self->findItem('Medical Subject Headings (MeSH), 2005',$self->{la}{'en'}))); |
| 159 | + return %cid; |
| 160 | +} |
| 161 | + |
| 162 | +sub findCollection() { |
| 163 | + my $self=shift; |
| 164 | + my $mid=shift; |
| 165 | + my $findcoll=$self->{dbt}->prepare("select collection_id from uw_collection_ns where collection_mid=? and is_latest=1"); |
| 166 | + $findcoll->execute($mid); |
| 167 | + my $row=$findcoll->fetchrow_hashref(); |
| 168 | + return $row->{collection_id}; |
| 169 | +} |
| 170 | + |
| 171 | +# SEMTYPEHIER and SEMRELHIER contain only the is_a relationships, whereas |
| 172 | +# srstr contains all others |
| 173 | +# FIXME: only use SRSTR |
| 174 | +sub importSTrelations2() { |
| 175 | + my $self=shift; |
| 176 | + my $getrels=$self->{dbs}->prepare("select * from srstr where rel!='isa'"); |
| 177 | + $getrels->execute(); |
| 178 | + while(my $row=$getrels->fetchrow_hashref()) { |
| 179 | + my %rv1=$self->getMidForMember($row->{TYPE1},$self->{cid}{'STY'}); |
| 180 | + my %rv2=$self->getMidForMember($row->{TYPE2},$self->{cid}{'STY'}); |
| 181 | + my $rtmid=$self->{reltypes}{$row->{REL}}; |
| 182 | + #print "Adding relation $row->{REL} ($rtmid) between $row->{TYPE1} and $row->{TYPE2}\n"; |
| 183 | + $self->addRelation($rv1{rid},$rtmid,$rv1{mid},$rv2{mid},my $checkfordupes=1); |
| 184 | + } |
| 185 | +} |
| 186 | + |
| 187 | + |
| 188 | +sub importSTrelations { |
| 189 | + my $self=shift; |
| 190 | + my $which=shift; |
| 191 | + my $table; |
| 192 | + my $field1; |
| 193 | + my $field2; |
| 194 | + if($which eq 'STY') { |
| 195 | + $table='semtypehier'; |
| 196 | + $field1='SEMTYPE1'; |
| 197 | + $field2='SEMTYPE2'; |
| 198 | + } elsif($which eq 'RL') { |
| 199 | + $table='semrelhier'; |
| 200 | + $field1='RELTYPE1'; |
| 201 | + $field2='RELTYPE2'; |
| 202 | + } |
| 203 | + |
| 204 | + my $gettypehier=$self->{dbs}->prepare("select * from $table"); |
| 205 | + $gettypehier->execute(); |
| 206 | + while(my $typehier=$gettypehier->fetchrow_hashref()) { |
| 207 | + my %rv1=$self->getMidForMember($typehier->{$field1},$self->{cid}{$which}); |
| 208 | + my %rv2=$self->getMidForMember($typehier->{$field2},$self->{cid}{$which}); |
| 209 | + my $rtmid=$self->{reltypes}{$typehier->{RELATION}}; |
| 210 | + print "Adding relation $typehier->{RELATION} ($rtmid) between $typehier->{$field1} and $typehier->{$field2}\n"; |
| 211 | + $self->addRelation($rv1{rid},$rtmid,$rv1{mid},$rv2{mid},my $checkfordupes=1); |
| 212 | + } |
| 213 | +} |
| 214 | + |
| 215 | +# $member_id - the collection-internal identifier for this member |
| 216 | +# $cid The collection in which to search for this member (optional) |
| 217 | +# Returns the DefinedMeaningID and the revision id |
| 218 | +sub getMidForMember { |
| 219 | + my $self=shift; |
| 220 | + my $member_id=shift; |
| 221 | + my $cid=shift; |
| 222 | + my %rv; |
| 223 | + my $getmid; |
| 224 | + if($cid) { |
| 225 | + $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where collection_id=? and internal_member_id=? and is_latest_set=1 limit 1"); |
| 226 | + $getmid->execute($cid,$member_id); |
| 227 | + } else { |
| 228 | + $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where internal_member_id=? and is_latest_set=1 limit 1"); |
| 229 | + $getmid->execute($member_id); |
| 230 | + } |
| 231 | + my $member_mid=$getmid->fetchrow_hashref(); |
| 232 | + $rv{mid}=$member_mid->{member_mid}; |
| 233 | + $rv{rid}=$member_mid->{revision_id}; |
| 234 | + return %rv; |
| 235 | + |
| 236 | +} |
| 237 | + |
| 238 | +sub loadReltypes { |
| 239 | + my $self=shift; |
| 240 | + my %reltypes; |
| 241 | + # Get the relation type |
| 242 | + $getreltype=$self->{dbt}->prepare("select member_mid,internal_member_id from uw_collection_contents,uw_collection_ns where uw_collection_ns.collection_type='RELT' and uw_collection_ns.collection_id=uw_collection_contents.collection_id"); |
| 243 | + $getreltype->execute(); |
| 244 | + while (my $reltype=$getreltype->fetchrow_hashref()) { |
| 245 | + $reltypes{$reltype->{internal_member_id}}=$reltype->{member_mid}; |
| 246 | + } |
| 247 | + return %reltypes; |
| 248 | +} |
| 249 | + |
| 250 | +sub loadAttributes { |
| 251 | + my $self=shift; |
| 252 | + my %attributes; |
| 253 | + $getatt=$self->{dbt}->prepare("select member_mid,internal_member_id from uw_collection_contents,uw_collection_ns where uw_collection_ns.collection_type='ATTR' and uw_collection_ns.collection_id=uw_collection_contents.collection_id"); |
| 254 | + $getatt->execute(); |
| 255 | + while (my $att=$getatt->fetchrow_hashref()) { |
| 256 | + $attributes{$att->{internal_member_id}}=$att->{member_mid}; |
| 257 | + } |
| 258 | + return %attributes; |
| 259 | +} |
| 260 | + |
| 261 | + |
| 262 | +# Get all SRDEF attributes |
| 263 | +# Get relations between SRDEF |
| 264 | +sub importSNtypes { |
| 265 | + my $self=shift; |
| 266 | + my $type=shift; |
| 267 | + $getsemtypes=$self->{dbs}->prepare("select semtypeab,type,definition from srdef where type=?"); |
| 268 | + $getsemtypes->execute($type); |
| 269 | + while (my $semtype=$getsemtypes->fetchrow_hashref()) { |
| 270 | + my $type_expression=$semtype->{semtypeab}; |
| 271 | + my $type_code=$type_expression; |
| 272 | + $type_expression=~s/_/ /g; |
| 273 | + $type_expression=lc($type_expression); |
| 274 | + my %rv=$self->addExpression($type_expression,$self->{la}{'en'},0,$self->{cid}{$type},$type_code); |
| 275 | + $self->addMeaningText($rv{'rid'},$rv{'mid'},$semtype->{definition},undef,$self->{la}{'en'}); |
| 276 | + #print $type_expression." - $self->{cid}{$type} - $type_code\n"; |
| 277 | + } |
| 278 | +} |
| 279 | + |
| 280 | +sub importUMLSrelations { |
| 281 | + my $self=shift; |
| 282 | + my $which=shift; # REL or RELA |
| 283 | + my $source=shift; # SAB as MySQL LIKE string |
| 284 | + my $getrels; |
| 285 | + |
| 286 | + if($which eq 'REL') { |
| 287 | + $getrels=$self->{dbs}->prepare("select cui1,cui2,rel from MRREL where sab like ?"); |
| 288 | + } elsif($which eq 'RELA') { |
| 289 | + $getrels=$self->{dbs}->prepare("select cui1,cui2,rela from MRREL where sab like ? and rela!=''"); |
| 290 | + } |
| 291 | + $getrels->execute($source); |
| 292 | + while(my $rel=$getrels->fetchrow_hashref()) { |
| 293 | + my $relid=$rel->{lc($which)}; |
| 294 | + # These mean the same thing |
| 295 | + if($relid eq 'CHD') { |
| 296 | + $relid='RN'; |
| 297 | + } elsif($relid eq 'PAR') { |
| 298 | + $relid='RB'; |
| 299 | + } |
| 300 | + $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where internal_member_id=? and is_latest_set=1 limit 1"); |
| 301 | + # Note that the direction in UMLS is opposite to ours |
| 302 | + $getmid->execute($rel->{cui2}); |
| 303 | + my $mid1=$getmid->fetchrow_hashref(); |
| 304 | + $getmid->execute($rel->{cui1}); |
| 305 | + my $mid2=$getmid->fetchrow_hashref(); |
| 306 | + # FIXME: We are ignoring term relations for now! |
| 307 | + if(($mid1->{member_mid} && $mid2->{member_mid}) && ($mid1->{member_mid} != $mid2->{member_mid}) && $self->{reltypes}{$relid}) { |
| 308 | + # Add the relation |
| 309 | + #print "Found relation ".$relid." (".$self->{reltypes}{$relid}.") between ".$mid1->{member_mid}." and ".$mid2->{member_mid}.".\n"; |
| 310 | + $self->addRelation($mid1->{revision_id},$self->{reltypes}{$relid},$mid1->{member_mid},$mid2->{member_mid},my $checkfordupes=1); |
| 311 | + } else { |
| 312 | + if(!$mid1->{member_mid} && $mid2->{member_mid}) { |
| 313 | + print "Did not find MID for ".$rel->{cui1}."!\n"; |
| 314 | + } elsif($mid1->{member_mid} && !$mid2->{member_mid}) { |
| 315 | + print "Did not find MID for ".$rel->{cui2}."!\n"; |
| 316 | + } elsif(!$mid1->{member_mid} && !$mid2->{member_mid}) { |
| 317 | + print "Did not find MIDs for ".$rel->{cui1}." and ".$rel->{cui2}."!\n"; |
| 318 | + } |
| 319 | + } |
| 320 | + } |
| 321 | + |
| 322 | +} |
| 323 | + |
| 324 | + |
| 325 | +sub bootstrapGemetCollection { |
| 326 | + my $self=shift; |
| 327 | + my %cid; |
| 328 | + %rv=$self->addExpression('GEMET Environmental Thesaurus Relation Types',$self->{la}{'en'}); |
| 329 | + $cid{'GEMETREL'}=$self->addCollection($rv{mid},'RELT'); |
| 330 | + %rv=$self->addExpression('GEMET Environmental Thesaurus Relation Types',$self->{la}{'en'}); |
| 331 | + $cid{'GEMET'}=$self->addCollection($rv{mid},''); |
| 332 | + return %cid; |
| 333 | +} |
| 334 | + |
| 335 | + |
| 336 | +sub bootstrapCollections { |
| 337 | + my $self=shift; |
| 338 | + my %cid; |
| 339 | + my %rv; |
| 340 | + |
| 341 | + %rv=$self->addExpression('CRISP Thesaurus, 2005',$self->{la}{'en'}); |
| 342 | + $cid{'CRISP'}=$self->addCollection($rv{mid},''); |
| 343 | + %rv=$self->addExpression('Semantic Network 2005AC Semantic Types',$self->{la}{'en'}); |
| 344 | + $cid{'STY'}=$self->addCollection($rv{mid},'ATTR'); |
| 345 | + %rv=$self->addExpression('Semantic Network 2005AC Relation Types',$self->{la}{'en'}); |
| 346 | + $cid{'RL'}=$self->addCollection($rv{mid},'RELT'); |
| 347 | + %rv=$self->addExpression('UMLS Relation Types 2005',$self->{la}{'en'}); |
| 348 | + $cid{'REL'}=$self->addCollection($rv{mid},'RELT'); |
| 349 | + %rv=$self->addExpression('UMLS Relation Attributes 2005',$self->{la}{'en'}); |
| 350 | + $cid{'RELA'}=$self->addCollection($rv{mid},'RELT'); |
| 351 | + %rv=$self->addExpression('The International Classification of Primary Care (ICPC), 1993',$self->{la}{'en'}); |
| 352 | + $cid{'ICPC'}=$self->addCollection($rv{mid},''); |
| 353 | + %rv=$self->addExpression('Medical Subject Headings (MeSH), 2005',$self->{la}{'en'}); |
| 354 | + $cid{'MESH'}=$self->addCollection($rv{mid},''); |
| 355 | + return %cid; |
| 356 | + |
| 357 | + |
| 358 | +} |
| 359 | + |
| 360 | +sub addCollection { |
| 361 | + my $self=shift; |
| 362 | + my $mid=shift; |
| 363 | + my $collection_type=shift; |
| 364 | + my $addcollection=$self->{dbt}->prepare('INSERT INTO uw_collection_ns(collection_mid,is_latest,collection_type) values(?,1,?)'); |
| 365 | + $addcollection->execute($mid,$collection_type); |
| 366 | + my $cid=$self->{dbt}->last_insert_id(undef,undef,undef,undef); |
| 367 | + my $updatefirstver=$self->{dbt}->prepare('UPDATE uw_collection_ns set first_ver=? where collection_id=?'); |
| 368 | + $updatefirstver->execute($cid,$cid); |
| 369 | + return $cid; |
| 370 | +} |
| 371 | + |
| 372 | +sub importUMLSrelationtypes { |
| 373 | + my $self=shift; |
| 374 | + my $which=shift; |
| 375 | + my $getreltypes; |
| 376 | + if($which eq 'REL') { |
| 377 | + # CHD and PAR are to be interpreted as RN and RB, SUBX is not used |
| 378 | + $getreltypes=$self->{dbs}->prepare("select * from rel where ABBREV!='CHD' and ABBREV!='PAR' and ABBREV!='SUBX'"); |
| 379 | + } elsif($which eq 'RELA') { |
| 380 | + $getreltypes=$self->{dbs}->prepare("select * from rela"); |
| 381 | + } |
| 382 | + $getreltypes->execute(); |
| 383 | + while(my $reltype=$getreltypes->fetchrow_hashref()) { |
| 384 | + my %rv=$self->addExpression($reltype->{FULL},$self->{la}{'en'},0,$self->{cid}{$which},$reltype->{ABBREV}); |
| 385 | + } |
| 386 | +} |
| 387 | + |
| 388 | +sub importUMLSterms { |
| 389 | + my $self=shift; |
| 390 | + my $sab=shift; # the source abbreviation which to import |
| 391 | + my $cid=shift; # which collection to associate the defined meanings with |
| 392 | + $getterm=$self->{dbs}->prepare("select str,cui,lat from MRCONSO where sab like ?"); |
| 393 | + $getterm->execute($sab); |
| 394 | + my %textmid; |
| 395 | + while(my $r=$getterm->fetchrow_hashref()) { |
| 396 | + my %rv; |
| 397 | + my $dupe=0; |
| 398 | + my %cuimid=$self->getMidForMember($r->{cui}); |
| 399 | + |
| 400 | + # Create new expression / Defined Meaning |
| 401 | + if(!$cuimid{mid}) { |
| 402 | + %rv=$self->addExpression($r->{str},$self->{la_iso}{lc($r->{lat})},0,$cid,$r->{cui}); |
| 403 | + # If this is the first time we encounter this CUI, import the definitions |
| 404 | + # Note that we'll take any definitions, regardless of the SABs specified! |
| 405 | + if($rv{mid}!=-1) { |
| 406 | + $getdefs=$self->{dbs}->prepare("select def from MRDEF where cui=?"); |
| 407 | + $getdefs->execute($r->{cui}); |
| 408 | + while(my $d=$getdefs->fetchrow_hashref()) { |
| 409 | + # UMLS only has English definitions |
| 410 | + $self->addMeaningText($rv{rid},$rv{mid},$d->{def},0,$self->{la}{'en'}); |
| 411 | + } |
| 412 | + $textmid{$rv{mid}}=1; |
| 413 | + } |
| 414 | + # Add as SynTrans to existing Defined Meaning |
| 415 | + } else { |
| 416 | + %rv=$self->addExpression($r->{str},$self->{la_iso}{lc($r->{lat})},$cuimid{mid}); |
| 417 | + } |
| 418 | + } |
| 419 | +} |
| 420 | + |
| 421 | + |
| 422 | +sub importGemetTerms { |
| 423 | + my $self=shift; |
| 424 | + my $cid=shift; |
| 425 | + # Get all English terms as base |
| 426 | + $getterm=$self->{dbs}->prepare("select * from term where langcode=?"); |
| 427 | + $getterm->execute('en'); |
| 428 | + while($r=$getterm->fetchrow_hashref()) { |
| 429 | + # Add English term as defined meaning |
| 430 | + my %rv=$self->addExpression($r->{name},$self->{la}{'en'},0,); |
| 431 | + |
| 432 | + # All translations |
| 433 | + $gettrans=$self->{dbs}->prepare("select name,langcode from term where id_concept=? and langcode!='en'"); |
| 434 | + $gettrans->execute($r->{id_concept}); |
| 435 | + # Add them with the same meaning ID |
| 436 | + while($t=$gettrans->fetchrow_hashref()) { |
| 437 | + print "Language: $t->{langcode}\n"; |
| 438 | + %tv=$self->addExpression($t->{name},$self->{la}{$t->{langcode}},$rv{mid}); |
| 439 | + } |
| 440 | + # All definitions |
| 441 | + $getdef=$self->{dbs}->prepare("select definition,langcode from scope where id_concept=?"); |
| 442 | + $getdef->execute($r->{id_concept}); |
| 443 | + my $tcid=0; |
| 444 | + while($d=$getdef->fetchrow_hashref()) { |
| 445 | + if(!$tcid) { |
| 446 | + my %mv=$self->addMeaningText($rv{rid},$rv{mid},$d->{definition},0,$self->{la}{$d->{langcode}}); |
| 447 | + $tcid=$mv{tcid}; |
| 448 | + |
| 449 | + } else { |
| 450 | + $self->addMeaningText($rv{rid},$rv{mid},$d->{definition},$tcid,$self->{la}{$d->{langcode}}); |
| 451 | + |
| 452 | + } |
| 453 | + } |
| 454 | + } |
| 455 | +} |
| 456 | + |
| 457 | + |
| 458 | +sub importGemetRelations { |
| 459 | + my $self=shift; |
| 460 | + # Import GEMET relations |
| 461 | + my $getrels=$self->{dbs}->prepare("select * from relation"); |
| 462 | + $getrels->execute(); |
| 463 | + while(my $rrow=$getrels->fetchrow_hashref()) { |
| 464 | + %rv_A=$self->findGemetItem($rrow->{id_concept}); |
| 465 | + %rv_B=$self->findGemetItem($rrow->{id_relation}); |
| 466 | + if($rv_A{mid} && $rv_B{mid}) { |
| 467 | + $self->addRelation($rv_A{rid},$self->{reltypes}{$rrow->{id_type}},$rv_A{mid},$rv_B{mid}); |
| 468 | + } |
| 469 | + } |
| 470 | +} |
| 471 | + |
| 472 | +sub importGemetThemes { |
| 473 | + my $self=shift; |
| 474 | + # Get all themes |
| 475 | + my $getthemes=$self->{dbs}->prepare("select * from theme"); |
| 476 | + my $gettheme_set=$self->{dbs}->prepare("select * from theme where id_theme=?"); |
| 477 | + $getthemes->execute(); |
| 478 | + while(my $theme_row=$getthemes->fetchrow_hashref()) { |
| 479 | + my $theme=$theme_row->{description}; |
| 480 | + my @themes=split(/[,;]( ){0,1}/,$theme); |
| 481 | + foreach(@themes) { |
| 482 | + $_=~s/^ *$//i; |
| 483 | + if($_) { |
| 484 | + # Does this theme have a expression? |
| 485 | + my $t=$_; |
| 486 | + my %it=$self->findItem($t,$self->{la}{$theme_row->{langcode}},1); |
| 487 | + if($it{liid}) { |
| 488 | + # Get the meaning |
| 489 | + print "NEW THEME: $t - retrieving existing MID for LIID... ".$it{liid}; |
| 490 | + $it{mid}=$self->findMeaning($rv{liid}); |
| 491 | + print $it{mid}."\n"; |
| 492 | + #print $t. " is a dupe! - $dupes\n"; |
| 493 | + #$dupes++; |
| 494 | + } else { |
| 495 | + # Do we have any of its translations? |
| 496 | + # We can only add those if the theme does |
| 497 | + # not contain a , - otherwise we can't match! |
| 498 | + my $tra_mid=0; |
| 499 | + if(!($theme_row->{description}=~m/[,;]/i)) { |
| 500 | + print "NEW THEME: $t - no record, looking for its known translations in GEMET\n"; |
| 501 | + #print "Checking for translations of ".$theme_row->{description}."\n"; |
| 502 | + $gettheme_set->execute($theme_row->{id_theme}); |
| 503 | + while((my $tra_row=$gettheme_set->fetchrow_hashref()) && !$tra_mid) { |
| 504 | + if($tra_lid=$self->findItem($tra_row->{description},$self->{la}{$tra_row->{langcode}})) { |
| 505 | + $tra_mid=$self->findMeaning($tra_lid); |
| 506 | + |
| 507 | + } |
| 508 | + } |
| 509 | + } else { |
| 510 | + print "NEW THEME: $t - split from the original GEMET data\n"; |
| 511 | + } |
| 512 | + # Let's make one |
| 513 | + if($tra_mid) { |
| 514 | + print "Adding new term as translation of $tra_mid\n"; |
| 515 | + %it = $self->addExpression($t,$self->{la}{$theme_row->{langcode}},$tra_mid); |
| 516 | + } else { |
| 517 | + print "Adding new term independently, we do not know its translations.\n"; |
| 518 | + %it = $self->addExpression($t,$self->{la}{$theme_row->{langcode}}); |
| 519 | + } |
| 520 | + |
| 521 | + |
| 522 | + } |
| 523 | + |
| 524 | + if(!$have_rel{$theme_row->{id_theme}}) { |
| 525 | + # Get all items which have this relation |
| 526 | + my $getconcepts=$self->{dbs}->prepare('select id_concept from concept_theme where id_theme=?'); |
| 527 | + $getconcepts->execute($theme_row->{id_theme}); |
| 528 | + while(my $concrow=$getconcepts->fetchrow_hashref()) { |
| 529 | + # Get LIID,RID->meaning for the item |
| 530 | + my %tr=$self->findGemetItem($concrow->{id_concept}); |
| 531 | + if($tr{rid}) { |
| 532 | + $self->addRelation($tr{rid},$self->{reltypes}{it},$tr{mid},$it{mid}); |
| 533 | + print "Tied up a relation.."; |
| 534 | + } else { |
| 535 | + print "Missing record to tie the relation to.."; |
| 536 | + } |
| 537 | + } |
| 538 | + print "\n"; |
| 539 | + $have_rel{$theme_row->{id_theme}}=1; |
| 540 | + } |
| 541 | + |
| 542 | + } |
| 543 | + } |
| 544 | + } |
| 545 | + #Split theme into parts |
| 546 | +} |
| 547 | + |
| 548 | +sub findGemetItem() { |
| 549 | + my $self=shift; |
| 550 | + my $concept_id=shift; |
| 551 | + # get a word, language |
| 552 | + my $getword=$self->{dbs}->prepare("select langcode,name from term where id_concept=? LIMIT 1"); |
| 553 | + $getword->execute($concept_id); |
| 554 | + my $wordrow=$getword->fetchrow_hashref(); |
| 555 | + |
| 556 | + # find an expression + meaning |
| 557 | + my %rv=$self->findItem($wordrow->{name},$self->{la}{$wordrow->{langcode}},1); |
| 558 | + $rv{mid}=$self->findMeaning($rv{liid}); |
| 559 | + return %rv; |
| 560 | +} |
| 561 | + |
| 562 | +sub addRelation() { |
| 563 | + my $self=shift; |
| 564 | + my $revid=shift; |
| 565 | + my $rtid=shift; |
| 566 | + my $mid_A=shift; |
| 567 | + my $mid_B=shift; |
| 568 | + my $checkfordupes=shift; |
| 569 | + if($checkfordupes) { |
| 570 | + my $checkdupes=$self->{dbt}->prepare('select 1 as one from uw_meaning_relations where meaning1_mid=? and meaning2_mid=? and relationtype_mid=? and is_latest_set=1 limit 1'); |
| 571 | + $checkdupes->execute($mid_A,$mid_B,$rtid); |
| 572 | + #print "Checking dupe $mid_A, $mid_B, relation type $rtid\n"; |
| 573 | + my $dupecheck=$checkdupes->fetchrow_hashref(); |
| 574 | + if($dupecheck->{one}) { |
| 575 | + print "Duplicate relation, not adding.\n"; |
| 576 | + return false; |
| 577 | + } |
| 578 | + } |
| 579 | + $newkey= $self->getSetIdWhere('uw_meaning_relations','meaning1_mid',$mid_A) || $self->getMaxId('set_id','uw_meaning_relations'); |
| 580 | + $addrel=$self->{dbt}->prepare('insert into uw_meaning_relations(set_id,meaning1_mid,meaning2_mid,relationtype_mid,is_latest_set,first_set,revision_id) values(?,?,?,?,?,?,?)'); |
| 581 | + $addrel->execute($newkey,$mid_A,$mid_B,$rtid,1,$newkey,$revid); |
| 582 | + #print "New key: $key\n"; |
| 583 | +} |
| 584 | + |
| 585 | + |
| 586 | +sub findMeaning() { |
| 587 | + my $self=shift; |
| 588 | + my $liid=shift; |
| 589 | + # Search syntrans table |
| 590 | + my $getsyn=$self->{dbt}->prepare("select defined_meaning_id from uw_syntrans where expression_id=?"); |
| 591 | + $getsyn->execute($liid); |
| 592 | + my $syn_row=$getsyn->fetchrow_hashref(); |
| 593 | + if($syn_row->{defined_meaning_id}) { |
| 594 | + return $syn_row->{defined_meaning_id}; |
| 595 | + } |
| 596 | + my $getdm=$self->{dbt}->prepare("select defined_meaning_id from uw_defined_meaning where expression_id=? limit 1"); |
| 597 | + $getdm->execute($liid); |
| 598 | + my $dm_row=$getdm->fetchrow_hashref(); |
| 599 | + if($dm_row->{defined_meaning_id}) { |
| 600 | + return $dm_row->{defined_meaning_id}; |
| 601 | + } |
| 602 | + return 0; |
| 603 | +} |
| 604 | + |
| 605 | +# If there already is a meaning text for this DefinedMeaning, it will add the MeaningText as an alternative definition |
| 606 | +sub addMeaningText { |
| 607 | + my $self=shift; |
| 608 | + my $rid=shift; |
| 609 | + my $mid=shift; |
| 610 | + my $meaningtext=shift; # optional |
| 611 | + my $meaningtext_set=shift; # optional TCID set to join with |
| 612 | + my $lid=shift; # ID, not code |
| 613 | + my %rv; |
| 614 | + |
| 615 | + # Add text row entry |
| 616 | + my $maketext=$self->{dbt}->prepare('insert into text(old_text) values(?)'); |
| 617 | + $maketext->execute($meaningtext); |
| 618 | + # Get text row ID |
| 619 | + $tid=$self->{dbt}->last_insert_id(undef,undef,undef,undef); |
| 620 | + # Get new or existing translated content set ID |
| 621 | + $tcid=$meaningtext_set || $self->getMaxId('set_id','translated_content'); |
| 622 | + # Create new translated content set |
| 623 | + my $maketc=$self->{dbt}->prepare('insert into translated_content(set_id,language_id,text_id,first_set,revision_id) values(?,?,?,?,?)'); |
| 624 | + $maketc->execute($tcid,$lid,$tid,$tcid,$rid); |
| 625 | + $rv{tcid}=$tcid; |
| 626 | + |
| 627 | + # THIS DOESN'T WORK FOR DEFINITIONS IN MULTIPLE LANGUAGES |
| 628 | + # Check if a meaning text has already been set |
| 629 | + my $lookformeaning=$self->{dbt}->prepare('select meaning_text_tcid from uw_defined_meaning where defined_meaning_id=? and is_latest_ver=1'); |
| 630 | + $lookformeaning->execute($mid); |
| 631 | + my $mrow=$lookformeaning->fetchrow_hashref(); |
| 632 | + if($mrow->{meaning_text_tcid}) { |
| 633 | + # There is a meaning text - the new one is only an alternative |
| 634 | + my $altset=$self->getSetIdWhere('uw_alt_meaningtexts','meaning_mid',$mid) || $self->getMaxId('set_id','uw_alt_meaningtexts'); |
| 635 | + my $addaltmeaning=$self->{dbt}->prepare('insert into uw_alt_meaningtexts(set_id,meaning_mid,meaning_text_tcid,is_latest_set,first_set,revision_id) values(?,?,?,?,?,?)'); |
| 636 | + $addaltmeaning->execute($altset,$mid,$tcid,1,$altset,$rid) |
| 637 | + } else { |
| 638 | + my $updatemeaning=$self->{dbt}->prepare('update uw_defined_meaning set meaning_text_tcid=? where defined_meaning_id=?'); |
| 639 | + $updatemeaning->execute($tcid,$mid); |
| 640 | + } |
| 641 | + return %rv; |
| 642 | +} |
| 643 | + |
| 644 | + |
| 645 | +# If the expression already exists, add a new DefinedMeaning - unless this is a translation or synonym; if a record already exists in SynTrans with this expression _and_ $translation_of as a DefinedMeaning, do not do anything |
| 646 | +sub addExpression { |
| 647 | + my $self=shift; |
| 648 | + # return MID, RID, LID, TCID! |
| 649 | + my $expression=shift; |
| 650 | + my $lid=shift; # ID, not code |
| 651 | + my $translation_of=shift; # 0 or MID (!), optional |
| 652 | + my $collection_id=shift; # optional |
| 653 | + my $collection_internal_member_id=shift; # what does the collection use to refer to this member? |
| 654 | + my %rv; |
| 655 | + my $isdupe=0; |
| 656 | + my %firv=$self->findItem($expression,$lid,1); |
| 657 | + if($firv{liid}) { $isdupe=1; } |
| 658 | + |
| 659 | + if(!$isdupe) { |
| 660 | + |
| 661 | + #create page |
| 662 | + $pt=$self->canonize($expression); |
| 663 | + $makepage=$self->{dbt}->prepare('insert into page(page_namespace,page_title,page_is_new,page_title_language_id,page_touched) values(?,?,?,?,?)'); |
| 664 | + $makepage->execute(16,$pt,1,$lid,$self->mwtimestamp()); |
| 665 | + $pid=$self->{dbt}->last_insert_id(undef,undef,undef,undef); |
| 666 | + print "PID: $pid\n"; |
| 667 | + |
| 668 | + $rv{pid}=$pid; |
| 669 | + |
| 670 | + #create revision |
| 671 | + $makerev=$self->{dbt}->prepare('insert into revision(rev_page,rev_comment,rev_user,rev_user_text,rev_timestamp) values(?,?,?,?,?)'); |
| 672 | + $makerev->execute($pid,'Initial import',2,'GEMET',$self->mwtimestamp()); |
| 673 | + |
| 674 | + #get revision_id |
| 675 | + $rid=$self->getId('select rev_id from revision where rev_page=?',$pid); |
| 676 | + $rv{rid}=$rid; |
| 677 | + |
| 678 | + #update page to link to revision |
| 679 | + $updatepage=$self->{dbt}->prepare('update page set page_latest=? where page_id=?'); |
| 680 | + $updatepage->execute($rid,$pid); |
| 681 | + |
| 682 | + #create expression |
| 683 | + $makeitem=$self->{dbt}->prepare('insert into uw_expression_ns(spelling,language_id,is_latest) values(?,?,1)'); |
| 684 | + $makeitem->execute($expression,$lid); |
| 685 | + $liid=$self->{dbt}->last_insert_id(undef,undef,undef,undef); |
| 686 | + $rv{liid}=$liid; |
| 687 | + |
| 688 | + # update firstver |
| 689 | + $updateitem=$self->{dbt}->prepare('update uw_expression_ns set first_ver=? where expression_id=?'); |
| 690 | + $updateitem->execute($liid,$liid); |
| 691 | + |
| 692 | + #update revision to link to expression |
| 693 | + $updaterev=$self->{dbt}->prepare('update revision set rev_data_id=? where rev_id=?'); |
| 694 | + $updaterev->execute($liid,$rid); |
| 695 | + |
| 696 | + } else { |
| 697 | + |
| 698 | + $rid=$firv{rid}; |
| 699 | + $liid=$firv{liid}; |
| 700 | + $rv{rid}=$rid; |
| 701 | + $rv{liid}=$liid; |
| 702 | + |
| 703 | + } |
| 704 | + |
| 705 | + #create definedmeaning and/or syntrans record |
| 706 | + if(!$translation_of) { |
| 707 | + $makemean=$self->{dbt}->prepare('insert into uw_defined_meaning(expression_id,revision_id) values(?,?)'); |
| 708 | + $makemean->execute($liid,$rid); |
| 709 | + # We always want a syntrans record, so in this case it links to its own |
| 710 | + # def. meaning |
| 711 | + $translation_of=$self->{dbt}->last_insert_id(undef,undef,undef,undef); |
| 712 | + $mid=$translation_of; |
| 713 | + $rv{mid}=$mid; |
| 714 | + $updatemeaningver=$self->{dbt}->prepare('update uw_defined_meaning set first_ver=? where defined_meaning_id=?'); |
| 715 | + $updatemeaningver->execute($mid,$mid); |
| 716 | + if($collection_id) { |
| 717 | + $addtocoll=$self->{dbt}->prepare('insert into uw_collection_contents(set_id, collection_id, member_mid, is_latest_set, first_Set, revision_id, internal_member_id) values(?,?,?,?,?,?,?)'); |
| 718 | + #fixme set association |
| 719 | + $addtocoll->execute(1,$collection_id,$mid,1,1,$rid,$collection_internal_member_id); |
| 720 | + } |
| 721 | + } |
| 722 | + |
| 723 | + # Check if we already have this specific record |
| 724 | + $checkdupes=$self->{dbt}->prepare('select set_id from uw_syntrans where defined_meaning_id=? and expression_id=?'); |
| 725 | + $checkdupes->execute($translation_of,$liid); |
| 726 | + my $duperow=$checkdupes->fetchrow_hashref(); |
| 727 | + my $dupeid=$duperow->{set_id}; |
| 728 | + if(!$dupeid) { |
| 729 | + |
| 730 | + # Check if this is part of a set |
| 731 | + $getset=$self->{dbt}->prepare('select set_id from uw_syntrans where defined_meaning_id=? and is_latest_set=1'); |
| 732 | + $getset->execute($mid); |
| 733 | + $row=$getset->fetchrow_hashref(); |
| 734 | + my $setid=$row->{set_id} || $self->getMaxId('set_id','uw_syntrans'); |
| 735 | + # Add syntrans record |
| 736 | + $maketrans=$self->{dbt}->prepare('insert into uw_syntrans(set_id,defined_meaning_id,expression_id,first_set,revision_id,is_latest_set) values(?,?,?,?,?,1)'); |
| 737 | + $maketrans->execute($setid,$translation_of,$liid,$setid,$rid); |
| 738 | + $rv{setid}=$setid; |
| 739 | + $rv{mid}=$translation_of; |
| 740 | + } else{ |
| 741 | + $rv{setid}=$dupeid; # Dupe |
| 742 | + $rv{mid}=-1; # Dupe |
| 743 | + } |
| 744 | + return %rv; |
| 745 | + |
| 746 | +} |
| 747 | + |
| 748 | +sub findItem { |
| 749 | + my $self=shift; |
| 750 | + my $expression=shift; |
| 751 | + my $lid=shift; |
| 752 | + my $returnrid=shift; |
| 753 | + #print "Searching for $expression in $lid\n"; |
| 754 | + my $getitem=$self->{dbt}->prepare("select expression_id from uw_expression_ns where spelling=binary ? and language_id=? and is_latest=1"); |
| 755 | + $getitem->execute($expression,$lid); |
| 756 | + my $item_row=$getitem->fetchrow_hashref(); |
| 757 | + if($item_row) { |
| 758 | + if(!$returnrid) { |
| 759 | + return $item_row->{expression_id}; |
| 760 | + } else { |
| 761 | + my $getrev= $self->{dbt}->prepare('select rev_id from revision where rev_data_id=?'); |
| 762 | + $getrev->execute($item_row->{expression_id}); |
| 763 | + my %rv; |
| 764 | + $rv{liid}=$item_row->{expression_id}; |
| 765 | + $rv{rid}=$getrev->fetchrow_hashref->{rev_id}; |
| 766 | + return %rv; |
| 767 | + } |
| 768 | + } else { |
| 769 | + return 0; |
| 770 | + } |
| 771 | + |
| 772 | +} |
| 773 | + |
| 774 | + |
| 775 | +sub getMaxId { |
| 776 | + my $self=shift; |
| 777 | + my $field=shift; |
| 778 | + my $table=shift; |
| 779 | + $getmax=$self->{dbt}->prepare("select max($field) as maxset from $table"); |
| 780 | + $getmax->execute(); |
| 781 | + my $row=$getmax->fetchrow_hashref(); |
| 782 | + return $row->{maxset}+1; |
| 783 | +} |
| 784 | + |
| 785 | +sub getSetIdWhere { |
| 786 | + my $self=shift; |
| 787 | + my $table=shift; |
| 788 | + my $wherefield=shift; |
| 789 | + my $wherekey=shift; |
| 790 | + $getmax=$self->{dbt}->prepare("select set_id from $table WHERE $wherefield=? AND is_latest_set=1 limit 1"); |
| 791 | + $getmax->execute($wherekey); |
| 792 | + my $row=$getmax->fetchrow_hashref(); |
| 793 | + return $row->{set_id}; |
| 794 | +} |
| 795 | + |
| 796 | + |
| 797 | +sub getId { |
| 798 | + my $self=shift; |
| 799 | + my $prep=shift; |
| 800 | + $prep=~m/select (.*?) from/i; |
| 801 | + my $field=$1; |
| 802 | + my $getlang=$self->{dbt}->prepare($prep); |
| 803 | + $getlang->execute(@_); |
| 804 | + my $row=$getlang->fetchrow_hashref(); |
| 805 | + my $id=$row->{$field}; |
| 806 | + return $id; |
| 807 | +} |
| 808 | + |
| 809 | +sub mwtimestamp { |
| 810 | + my $self=shift; |
| 811 | + use POSIX qw(strftime); |
| 812 | + return(strftime "%Y%m%d%H%M%S", localtime); |
| 813 | +} |
| 814 | + |
| 815 | + |
| 816 | +sub canonize { |
| 817 | + my $self=shift; |
| 818 | + my $title=shift; |
| 819 | + #$title=ucfirst($title); |
| 820 | + $title=~s/ /_/ig; |
| 821 | + return $title; |
| 822 | +} |
| 823 | + |
| 824 | +sub initlangs { |
| 825 | + my $self=shift; |
| 826 | + %langs=( |
| 827 | + en_en=>'English', |
| 828 | + en_de=>'Englisch', |
| 829 | + 'en-US_de'=>'Englisch (USA)', |
| 830 | + 'en-US_en'=>'English (United States)', |
| 831 | + bg_en=>'Bulgarian', |
| 832 | + bg_de=>'Bulgarisch', |
| 833 | + cs_en=>'Czech', |
| 834 | + cs_de=>'Tschechisch', |
| 835 | + da_en=>'Dansk', |
| 836 | + da_de=>'D?isch', |
| 837 | + de_en=>'German', |
| 838 | + de_de=>'Deutsch', |
| 839 | + es_en=>'Spanish', |
| 840 | + es_de=>'Spanisch', |
| 841 | + et_en=>'Estonian', |
| 842 | + et_de=>'Estnisch', |
| 843 | + eu_en=>'Basque', |
| 844 | + eu_de=>'Baskisch', |
| 845 | + fi_en=>'Finnish', |
| 846 | + fi_de=>'Finnisch', |
| 847 | + fr_en=>'French', |
| 848 | + fr_de=>'Franz?isch', |
| 849 | + hu_en=>'Hungarian', |
| 850 | + hu_de=>'Ungarisch', |
| 851 | + it_en=>'Italian', |
| 852 | + it_de=>'Italienisch', |
| 853 | + nl_en=>'Dutch', |
| 854 | + nl_de=>'Niederl?disch', |
| 855 | + no_en=>'Norwegian', |
| 856 | + no_de=>'Norwegisch', |
| 857 | + pl_en=>'Polish', |
| 858 | + pl_de=>'Polnisch', |
| 859 | + pt_en=>'Portuguese', |
| 860 | + pt_de=>'Portugiesisch', |
| 861 | + ru_en=>'Russian', |
| 862 | + ru_de=>'Russisch', |
| 863 | + sk_en=>'Slovak', |
| 864 | + sk_de=>'Slowakische Sprache', |
| 865 | + sl_en=>'Slovenian', |
| 866 | + sl_de=>'Slowenisch', |
| 867 | + el_en=>'Greek', |
| 868 | + el_de=>'Griechisch', |
| 869 | + sv_en=>'Swedish', |
| 870 | + sv_de=>'Schwedisch'); |
| 871 | + foreach(keys(%langs)) { |
| 872 | + $key=$_; |
| 873 | + $key=~m/(.*?)_(.*)/i; |
| 874 | + $lang=$1; |
| 875 | + #print "Lang: $lang\n"; |
| 876 | + $wordlang=$2; |
| 877 | + if($wordlang eq 'en') { |
| 878 | + $addwm=$self->{dbt}->prepare("insert into language(wikimedia_key) values(?)"); |
| 879 | + $addwm->execute($lang); |
| 880 | + } |
| 881 | + } |
| 882 | + foreach(keys(%langs)) { |
| 883 | + $key=$_; |
| 884 | + $key=~m/(.*?)_(.*)/i; |
| 885 | + $lang=$1; |
| 886 | + #print "Lang: $lang\n"; |
| 887 | + $wordlang=$2; |
| 888 | + $langword_u=$langs{$key}; |
| 889 | + $langword=encode("utf8",$langword_u); |
| 890 | + $newwm=$self->{dbt}->prepare("select language_id from language where wikimedia_key=?"); |
| 891 | + $newwm->execute($lang); |
| 892 | + my $row=$newwm->fetchrow_hashref(); |
| 893 | + $newwm->execute('en'); |
| 894 | + my $en_row=$newwm->fetchrow_hashref(); |
| 895 | + $newwm->execute('de'); |
| 896 | + my $de_row=$newwm->fetchrow_hashref(); |
| 897 | + $newword=$self->{dbt}->prepare("insert into language_names values (?,?,?)"); |
| 898 | + if($wordlang eq 'en') { |
| 899 | + $newword->execute($row->{language_id},$en_row->{language_id},$langword); |
| 900 | + } elsif($wordlang eq 'de') { |
| 901 | + $newword->execute($row->{language_id},$de_row->{language_id},$langword); |
| 902 | + } |
| 903 | + } |
| 904 | +} |
| 905 | + |
| 906 | +sub initRel { |
| 907 | + my $self=shift; |
| 908 | + my $cid=shift; |
| 909 | + %rel_types=( |
| 910 | + bt_en=>'broader terms', |
| 911 | + bt_de=>'breitere Begriffe', |
| 912 | + nt_en=>'narrower terms', |
| 913 | + nt_de=>'engere Begriffe', |
| 914 | + rt_en=>'related terms', |
| 915 | + rt_de=>'verwandte Begriffe', |
| 916 | + it_en=>'is part of theme', |
| 917 | + it_de=>'ist Themenbestandteil von' |
| 918 | + ); |
| 919 | + |
| 920 | + %rel_definitions=( |
| 921 | + bt_en=>'Those terms in a thesaurus which are broader than others', |
| 922 | + bt_de=>'Die Begriffe in einem Thesaurus, die breiter sind als andere', |
| 923 | + nt_en=>'Those terms in a thesaurus which are narrower than others', |
| 924 | + nt_de=>'Die Begriffe in einem Thesaurus, die enger sind als andere', |
| 925 | + rt_en=>'Those terms in a thesaurus which are related to others', |
| 926 | + rt_de=>'Die Begriffe in einem Thesaurus, die mit anderen verwandt sind', |
| 927 | + it_en=>'Those terms in a thesaurus or dictionary which are associated with a topic', |
| 928 | + it_de=>'Die Begriffe in einem Thesaurus oder Woerterbuch, die mit einem Thema assoziiert sind'); |
| 929 | + |
| 930 | + foreach(keys(%rel_types)) { |
| 931 | + $key=$_; |
| 932 | + $key=~m/(..)_(..)/i; |
| 933 | + $ident=$1; |
| 934 | + $lang=$2; |
| 935 | + if($lang eq 'de') { |
| 936 | + $en_key="$ident\_en"; |
| 937 | + my %rv=$self->addExpression($rel_types{$en_key},$self->{la}{'en'},0,$cid,$ident); |
| 938 | + $self->addMeaningText($rv{rid},$rv{mid},$rel_definitions{$en_key},0,$self->{la}{'en'}); |
| 939 | + my %dv=$self->addExpression($rel_types{$key},$self->{la}{'de'},$rv{'mid'}); |
| 940 | + $self->addMeaningText($dv{rid},$dv{mid},$rel_definitions{$key},$rv{'tcid'},$self->{la}{'de'}); |
| 941 | + } |
| 942 | + } |
| 943 | +} |
| 944 | + |
| 945 | +sub loadLangs() { |
| 946 | + my $self=shift; |
| 947 | + my %la; |
| 948 | + $getlangs=$self->{dbt}->prepare('select language_id,wikimedia_key from language'); |
| 949 | + $getlangs->execute(); |
| 950 | + while($langrow=$getlangs->fetchrow_hashref()) { |
| 951 | + $la{$langrow->{wikimedia_key}}=$langrow->{language_id}; |
| 952 | + } |
| 953 | + return %la; |
| 954 | +} |
| 955 | + |
| 956 | +sub loadLangsIso() { |
| 957 | + my $self=shift; |
| 958 | + my %la_iso; |
| 959 | + $getlangs=$self->{dbt}->prepare('select language_id,iso639_2 from language'); |
| 960 | + $getlangs->execute(); |
| 961 | + while($langrow=$getlangs->fetchrow_hashref()) { |
| 962 | + $la_iso{$langrow->{iso639_2}}=$langrow->{language_id}; |
| 963 | + } |
| 964 | + return %la_iso; |
| 965 | +} |
| 966 | + |
| 967 | +return(1); |
\ No newline at end of file |
Index: trunk/extensions/Wikidata/README |
— | — | @@ -0,0 +1 @@ |
| 2 | +This directory is for Wikidata-related extensions, tools and applications. Wikidata itself is intended to become part of the MediaWiki core. |
\ No newline at end of file |