r14127 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r14126‎ | r14127 | r14128 >
Date:02:47, 8 May 2006
Author:erik
Status:old
Tags:
Comment:
- WiktionaryZ importer library in Perl
- Preliminary WiktionaryZ PHP view/edit code

To run this, you'll need some more code that will be committed as soon as we have the situation worked out with the namespace branches.
Modified paths:
  • /trunk/extensions/Wikidata (added) (history)
  • /trunk/extensions/Wikidata/README (added) (history)
  • /trunk/extensions/Wikidata/WiktionaryZ (added) (history)
  • /trunk/extensions/Wikidata/WiktionaryZ/Expression.php (added) (history)
  • /trunk/extensions/Wikidata/WiktionaryZ/WiktionaryZ.php (added) (history)
  • /trunk/extensions/Wikidata/WiktionaryZ/WiktionaryZ.pm (added) (history)

Diff [purge]

Index: trunk/extensions/Wikidata/WiktionaryZ/WiktionaryZ.php
@@ -0,0 +1,622 @@
 2+<?php
 3+
 4+require_once('Expression.php');
 5+
 6+/**
 7+ * Renders a content page from WiktionaryZ based on the GEMET database.
 8+ * @package MediaWiki
 9+ */
 10+class WiktionaryZ {
 11+ /* TODOs:
 12+ use $dbr->select() instead of $dbr->query() wherever possible; it lets MediaWiki handle additional
 13+ table prefixes and such.
 14+ */
 15+ protected $sectionToEdit = 0;
 16+ protected $currentSection = 0;
 17+ protected $inSection = false;
 18+ protected $inSectionLevel = 0;
 19+
 20+ function initializeSections($sectionToEdit) {
 21+ $this->currentSection = 0;
 22+ $this->inSection = $sectionToEdit == 0;
 23+ $this->inSectionLevel = 0;
 24+ $this->sectionToEdit = $sectionToEdit;
 25+ }
 26+
 27+ function addSection($level) {
 28+ if ($this->sectionToEdit != 0) {
 29+ $this->currentSection++;
 30+
 31+ if ($this->currentSection == $this->sectionToEdit) {
 32+ $this->inSection = true;
 33+ $this->inSectionLevel = $level;
 34+ }
 35+ else if ($level == $this->inSectionLevel)
 36+ $this->inSection = false;
 37+ }
 38+
 39+ return $this->inSection;
 40+ }
 41+
 42+ function view() {
 43+
 44+ global $wgOut, $wgTitle, $wgUser;
 45+ $userlang=$wgUser->getOption('language');
 46+
 47+ # $w is the variable used to store generated wikitext
 48+ $w='';
 49+ $w="Your user interface language preference: '''".$userlang."''' - [[Special:Preferences|set your preferences]]";
 50+
 51+ # Get language names, preferably in UI language
 52+ $langdefs=$this->getLangNames($userlang);
 53+
 54+ $dbr =& wfGetDB( DB_MASTER );
 55+
 56+ # Get entry record from GEMET namespace
 57+ $res=$dbr->query("SELECT * from uw_expression_ns WHERE spelling=BINARY ".$dbr->addQuotes($wgTitle->getText()));
 58+
 59+ while($row=$dbr->fetchObject($res)) {
 60+ $tcids=array();
 61+ $dms=array();
 62+ $syntrans=array();
 63+ $oids=array();
 64+ $rels=array();
 65+
 66+ $w.="\n== ''Spelling: ''" . $row->spelling . " - ''Language:'' ".$langdefs[$row->language_id]." ==\n";
 67+
 68+ # Get meanings via Expression ID
 69+ $st_res=$dbr->query("SELECT defined_meaning_id from uw_syntrans WHERE expression_id=".$row->expression_id);
 70+ while($st_row=$dbr->fetchObject($st_res)) {
 71+ $dms[]=$st_row->defined_meaning_id;
 72+ # Get synonyms and translations for each
 73+ $li_res=$dbr->query("SELECT expression_id from uw_syntrans where defined_meaning_id=".$st_row->defined_meaning_id." and expression_id!=".$row->expression_id);
 74+ while($li_row=$dbr->fetchObject($li_res)) {
 75+ $syntrans[$st_row->defined_meaning_id][]=$li_row->expression_id;
 76+ }
 77+
 78+ }
 79+
 80+ # Get meaning text IDs
 81+ foreach($dms as $mid) {
 82+ $dm_res=$dbr->query("SELECT meaning_text_tcid from uw_defined_meaning WHERE defined_meaning_id=".$mid." and is_latest_ver=1");
 83+ while($dm_row=$dbr->fetchObject($dm_res)) {
 84+ $tcids[$mid][]=$dm_row->meaning_text_tcid;
 85+ }
 86+ $alt_res=$dbr->query("select meaning_text_tcid from uw_alt_meaningtexts where meaning_mid=".$mid." and is_latest_set=1");
 87+ while($alt_row=$dbr->fetchObject($alt_res)) {
 88+ $tcids[$mid][]=$alt_row->meaning_text_tcid;
 89+ }
 90+
 91+ $transl=array();
 92+ if(array_key_exists($mid,$syntrans)) {
 93+ foreach($syntrans[$mid] as $liid) {
 94+ $sp_res=$dbr->query("SELECT * from uw_expression_ns WHERE expression_id=".$liid);
 95+ while($sp_row=$dbr->fetchObject($sp_res)) {
 96+ $transl[$sp_row->language_id][]=$sp_row->spelling;
 97+ }
 98+ }
 99+ }
 100+ $translmid[$mid]=$transl;
 101+ }
 102+
 103+
 104+ # Get relations
 105+ $meaning_rels=array();
 106+ foreach($dms as $mid) {
 107+ $rels=array();
 108+ $rt_res=$dbr->query("SELECT * from uw_meaning_relations where meaning1_mid=".$mid." and relationtype_mid!=0 and is_latest_set=1");
 109+ while($rt_row=$dbr->fetchObject($rt_res)) {
 110+ $rels[$rt_row->relationtype_mid][]=$rt_row->meaning2_mid;
 111+ }
 112+ $meaning_rels[$mid]=$rels;
 113+
 114+ }
 115+
 116+ # Get attributes
 117+ $attrib_rels=array();
 118+ foreach($dms as $mid) {
 119+ $atts=array();
 120+ $att_res=$dbr->query("SELECT * from uw_meaning_relations where meaning1_mid=".$mid." and relationtype_mid=0 and is_latest_set=1");
 121+ while($att_row=$dbr->fetchObject($att_res)) {
 122+ $atts[]=$att_row->meaning2_mid;
 123+ }
 124+ $attrib_rels[$mid]=$atts;
 125+ }
 126+
 127+ $typenames=$this->getRelationTypes();
 128+ $attnames=$this->getAttributeValues();
 129+
 130+ foreach($dms as $mid) {
 131+ $oids=array();
 132+ $w.="\n\n===Definition===\n";
 133+ foreach($tcids[$mid] as $tc) {
 134+ $tc_res=$dbr->query("SELECT * from translated_content where set_id=".$tc);
 135+ while($tc_row=$dbr->fetchObject($tc_res)) {
 136+ $oids[$tc_row->language_id][]=$tc_row->text_id;
 137+ }
 138+ }
 139+
 140+ foreach($oids as $lang=>$oid) {
 141+ foreach($oid as $oid_d) {
 142+ $w.="\n\n'''''$langdefs[$lang]'''''\n\n";
 143+ $t_res=$dbr->query("SELECT * from text where old_id=".$oid_d);
 144+ while($t_row=$dbr->fetchObject($t_res)) {
 145+ $w.=$t_row->old_text;
 146+ }
 147+ }
 148+ }
 149+ # Get spellings of translations and synonyms
 150+ $w.="<table border='0' cellpadding='5'><tr valign='top'><td width='20%'>\n'''Translations and Synonyms'''\n";
 151+ foreach($translmid[$mid] as $lang=>$splist) {
 152+ foreach($splist as $spl) {
 153+ if(!empty($spl)) {
 154+ $w.="* ''".$langdefs[$lang]."'': [[WiktionaryZ:$spl|$spl]]\n";
 155+ }
 156+ }
 157+ }
 158+
 159+ # Relations
 160+ $w.="</td><td>";
 161+
 162+ $w.="\n\n'''Relations:'''\n";
 163+ $rels=$meaning_rels[$mid];
 164+ foreach($rels as $type=>$rellist) {
 165+ $w.="\n$typenames[$type]:\n";
 166+ foreach($rellist as $rel) {
 167+ $rs_res=$dbr->query("SELECT expression_id from uw_defined_meaning where defined_meaning_id=".$rel." LIMIT 1");
 168+ $rs_row=$dbr->fetchObject($rs_res);
 169+ if($rs_row->expression_id) {
 170+ $li_res=$dbr->query("SELECT spelling from uw_expression_ns where expression_id=".$rs_row->expression_id);
 171+ $li_row=$dbr->fetchObject($li_res);
 172+ $w.="* [[WiktionaryZ:".$li_row->spelling."|"."$li_row->spelling]]\n";
 173+ }
 174+ }
 175+ }
 176+ $w.="\n\n'''Attributes:'''\n";
 177+ $atts=$attrib_rels[$mid];
 178+ foreach($atts as $att) {
 179+ $w.="* [[WiktionaryZ:".$attnames[$att]."|".$attnames[$att]."]]\n";
 180+ }
 181+
 182+ $w.="</td></tr></table>";
 183+
 184+ }
 185+
 186+
 187+ }
 188+ $wgOut->addWikiText($w);
 189+ # We may later want to disable the regular page component
 190+ # $wgOut->setPageTitleArray($this->mTitle->getTitleArray());
 191+ }
 192+
 193+ # Falls back to English if no language name translations available for chosen languages
 194+ function getLangNames($code) {
 195+ $id=$this->getLanguageIdForCode($code);
 196+ if(!$id) $id=$this->getLanguageIdForCode('en');
 197+ $names=$this->getLanguageNamesForId($id);
 198+ if(empty($names)) {
 199+ $id=$this->getLanguageIdForCode('en');
 200+ $names=$this->getLanguageNamesForId($id);
 201+ }
 202+ return $names;
 203+ }
 204+
 205+ function getLanguageIdForCode($code) {
 206+ $dbr =& wfGetDB( DB_SLAVE );
 207+ $id_res=$dbr->query("select language_id from language where wikimedia_key='".$code."'");
 208+ $id_row=$dbr->fetchObject($id_res);
 209+ return $id_row->language_id;
 210+ }
 211+
 212+ function getLanguageNamesForId($id) {
 213+ $dbr =& wfGetDB( DB_SLAVE );
 214+ $langs=array();
 215+ $lang_res=$dbr->query("select language_names.language_id,language_names.language_name,language.wikimedia_key from language,language_names where language_names.name_language_id=".$id." and language.language_id=language_names.name_language_id");
 216+ while($lang_row=$dbr->fetchObject($lang_res)) {
 217+ $langs[$lang_row->language_id]=$lang_row->language_name;
 218+ }
 219+ return $langs;
 220+ }
 221+
 222+ function getRelationTypes() {
 223+ $relationtypes=array();
 224+ $reltypecollections=$this->getReltypeCollections();
 225+ $dbr =& wfGetDB( DB_SLAVE );
 226+ foreach($reltypecollections as $cname=>$cid) {
 227+ $rel_res=$dbr->query("select member_mid from uw_collection_contents where collection_id=$cid and is_latest_set=1");
 228+ while($rel_row=$dbr->fetchObject($rel_res)) {
 229+ # fixme hardcoded English
 230+ $rel_name=$this->getExpressionForMeaningId($rel_row->member_mid, 85);
 231+ $relationtypes[$rel_row->member_mid]=$rel_name;
 232+ }
 233+ }
 234+ return $relationtypes;
 235+ }
 236+
 237+ function getReltypeCollections() {
 238+ $reltypecollections=array();
 239+ $dbr =& wfGetDB ( DB_SLAVE );
 240+ $col_res=$dbr->query("select collection_id,collection_mid from uw_collection_ns where collection_type='RELT' and is_latest=1");
 241+ while($col_row=$dbr->fetchObject($col_res)) {
 242+ # fixme hardcoded English
 243+ $collection_name=$this->getExpressionForMeaningId($col_row->collection_mid,85);
 244+ $reltypecollections[$collection_name]=$col_row->collection_id;
 245+ }
 246+ return $reltypecollections;
 247+
 248+ }
 249+
 250+ function getExpressionForMeaningId($mid, $langcode) {
 251+ $dbr =& wfGetDB(DB_SLAVE);
 252+ $sql="SELECT spelling from uw_syntrans,uw_expression_ns where defined_meaning_id=".$mid." and uw_expression_ns.expression_id=uw_syntrans.expression_id and uw_expression_ns.language_id=".$langcode." limit 1";
 253+ $sp_res=$dbr->query($sql);
 254+ $sp_row=$dbr->fetchObject($sp_res);
 255+ return $sp_row->spelling;
 256+ }
 257+
 258+ function checkForm() {
 259+ return true;
 260+ }
 261+
 262+ function saveForm() {
 263+ global
 264+ $wgTitle, $wgUser, $wgRequest, $wgOut;
 265+
 266+ $userlang = $wgUser->getOption('language');
 267+
 268+ # Get language names, preferably in UI language
 269+ $langdefs=$this->getLangNames($userlang);
 270+
 271+ $this->initializeSections($wgRequest->getInt('section'));
 272+ $dbr =& wfGetDB( DB_MASTER );
 273+
 274+ # Get entry record from GEMET namespace
 275+ $res=$dbr->query("SELECT * from uw_expression_ns WHERE spelling=BINARY ".$dbr->addQuotes($wgTitle->getText()));
 276+
 277+ while($row=$dbr->fetchObject($res)) {
 278+ $this->addSection(1);
 279+ $expressionId = $row->expression_id;
 280+ $definedMeaningIds = $this->getDefinedMeaningsForExpression($expressionId);
 281+ $synonymsAndTranslationIds = $this->getSynonymAndTranslationIds($definedMeaningIds, $expressionId);
 282+ $definedMeaningTexts = $this->getDefinedMeaningTexts($definedMeaningIds);
 283+ $definedMeaningRelations = $this->getDefinedMeaningRelations($definedMeaningIds);
 284+
 285+ foreach($definedMeaningRelations as $definedMeaningId => $relations) {
 286+ if ($this->addSection(2)) {
 287+ $translatedContents = $this->getTranslatedContents($definedMeaningTexts[$definedMeaningId]);
 288+
 289+ foreach($translatedContents as $languageId => $textId) {
 290+ $definition = $wgRequest->getText('definition-'.$textId);
 291+
 292+ if ($definition != '')
 293+ $this->setText($textId, $definition);
 294+ }
 295+
 296+ $this->addTranslatedDefinitionFromRequest($definedMeaningId, $definedMeaningTexts[$definedMeaningId], getRevisionForExpressionId($expressionId), array_keys($translatedContents));
 297+ $this->addSynonymsOrTranslationsFromRequest($definedMeaningId);
 298+ }
 299+ }
 300+ }
 301+
 302+ Title::touchArray(array($wgTitle));
 303+ }
 304+
 305+ function edit() {
 306+ global
 307+ $wgOut, $wgTitle, $wgUser, $wgRequest;
 308+
 309+ if ($wgRequest->getText('save') != '')
 310+ $this->saveForm();
 311+
 312+ $this->initializeSections($wgRequest->getInt('section'));
 313+
 314+ $userlang = $wgUser->getOption('language');
 315+
 316+ # $w is the variable used to store generated wikitext
 317+ $wgOut->addWikiText("Your user interface language preference: '''$userlang''' - [[Special:Preferences|set your preferences]]");
 318+ $wgOut->addHTML('<form method="post">');
 319+
 320+ # Get language names, preferably in UI language
 321+ $langdefs=$this->getLangNames($userlang);
 322+
 323+ $dbr =& wfGetDB(DB_MASTER);
 324+
 325+ # Get entry record from GEMET namespace
 326+ $queryResult = $dbr->query("SELECT * from uw_expression_ns WHERE spelling=BINARY ".$dbr->addQuotes($wgTitle->getText()));
 327+
 328+ while($row = $dbr->fetchObject($queryResult)) {
 329+ if ($this->addSection(1))
 330+ $wgOut->addHTML("<h2> <i>Spelling: </i>" . $row->spelling . " - <i>Language:</i> ".$langdefs[$row->language_id]." </h2>");
 331+
 332+ $expressionId = $row->expression_id;
 333+ $definedMeaningIds = $this->getDefinedMeaningsForExpression($expressionId);
 334+ $synonymsAndTranslationIds = $this->getSynonymAndTranslationIds($definedMeaningIds, $expressionId);
 335+ $spellingsPerDefinedMeaningAndLanguage = $this->getSpellingsPerDefinedMeaningAndLanguage($definedMeaningIds, $synonymsAndTranslationIds);
 336+ $definedMeaningTexts = $this->getDefinedMeaningTexts($definedMeaningIds);
 337+ $definedMeaningRelations = $this->getDefinedMeaningRelations($definedMeaningIds);
 338+
 339+ foreach ($definedMeaningRelations as $definedMeaningId => $relations) {
 340+ if ($this->addSection(2)) {
 341+ $wgOut->addHTML('<table border="0" cellpadding="5""><tr valign="top""><td width="20%"">');
 342+ $wgOut->addHTML('<b>Translations and synonyms</b>');
 343+
 344+ foreach($spellingsPerDefinedMeaningAndLanguage[$definedMeaningId] as $languageId => $spellings) {
 345+ $languageName = $langdefs[$languageId];
 346+
 347+ foreach($spellings as $spelling)
 348+ if(!empty($spelling))
 349+ $wgOut->addWikiText("* ''$languageName'': [[WiktionaryZ:$spelling|$spelling]]\n");
 350+ }
 351+
 352+ $wgOut->addHTML('</td><td>');
 353+
 354+ $wgOut->addHTML("<div><b>Definition</b></div>");
 355+ $translatedContents = $this->getTranslatedContents($definedMeaningTexts[$definedMeaningId]);
 356+
 357+ foreach($translatedContents as $languageId => $textId) {
 358+ $wgOut->addHTML("<div><i>$langdefs[$languageId]</i></div>".
 359+ '<textarea name="definition-'. $textId .'" rows="5">'.htmlspecialchars($this->getText($textId)).'</textarea>');
 360+ }
 361+
 362+ $wgOut->addHTML('<div><i>Translate into</i>: <select name="translated-definition-language-'. $definedMeaningId .'">'. $this->getLanguageOptions(array_keys($translatedContents)) .'</select></div>'.
 363+ '<textarea name="translated-definition-'.$definedMeaningId.'" rows="5"></textarea>');
 364+
 365+ $wgOut->addHTML('</td></tr></table>'. $this->getAddTranslationsAndSynonymsFormFields($definedMeaningId));
 366+ }
 367+ }
 368+ }
 369+
 370+ $wgOut->addHTML('<input type="submit" name="save" value="Save"/>');
 371+ $wgOut->addHTML('</form>');
 372+ }
 373+
 374+ function getDefinedMeaningsForExpression($expressionId) {
 375+ $dbr =& wfGetDB(DB_SLAVE);
 376+ $definedMeanings = array();
 377+ $queryResult = $dbr->query("SELECT defined_meaning_id from uw_syntrans WHERE expression_id=$expressionId");
 378+
 379+ while($definedMeaning = $dbr->fetchObject($queryResult))
 380+ $definedMeanings[] = $definedMeaning->defined_meaning_id;
 381+
 382+ return $definedMeanings;
 383+ }
 384+
 385+ function getDefinedMeaningTexts($definedMeaningIds) {
 386+ $dbr =& wfGetDB(DB_SLAVE);
 387+ $definedMeaningTexts = array();
 388+
 389+ foreach($definedMeaningIds as $definedMeaningId) {
 390+ $queryResult = $dbr->query("SELECT meaning_text_tcid from uw_defined_meaning WHERE defined_meaning_id=$definedMeaningId and is_latest_ver=1");
 391+
 392+ while($dm_row=$dbr->fetchObject($queryResult))
 393+ $definedMeaningTexts[$definedMeaningId] = $dm_row->meaning_text_tcid;
 394+ }
 395+
 396+ return $definedMeaningTexts;
 397+ }
 398+
 399+ function getSpellingsPerDefinedMeaningAndLanguage($definedMeaningIds, $synonymsAndTranslationIds) {
 400+ $dbr =& wfGetDB(DB_SLAVE);
 401+ $spellingsPerDefinedMeaningAndLanguage = array();
 402+
 403+ foreach($definedMeaningIds as $definedMeaningId) {
 404+ $spellingsPerLanguage = array();
 405+
 406+ if (array_key_exists($definedMeaningId, $synonymsAndTranslationIds))
 407+ foreach($synonymsAndTranslationIds[$definedMeaningId] as $synonymOrTranslation) {
 408+ $queryResult = $dbr->query("SELECT * from uw_expression_ns WHERE expression_id=$synonymOrTranslation");
 409+
 410+ while($expression = $dbr->fetchObject($queryResult))
 411+ $spellingsPerLanguage[$expression->language_id][] = $expression->spelling;
 412+ }
 413+
 414+ $spellingsPerDefinedMeaningAndLanguage[$definedMeaningId] = $spellingsPerLanguage;
 415+ }
 416+
 417+ return $spellingsPerDefinedMeaningAndLanguage;
 418+ }
 419+
 420+ function getSynonymAndTranslationIds($definedMeaningIds, $skippedExpressionId) {
 421+ $dbr =& wfGetDB(DB_SLAVE);
 422+ $synonymAndTranslationIds = array();
 423+
 424+ foreach($definedMeaningIds as $definedMeaningId) {
 425+ $queryResult = $dbr->query("SELECT expression_id from uw_syntrans where defined_meaning_id=$definedMeaningId and expression_id!=$skippedExpressionId");
 426+
 427+ while($synonymOrTranslation = $dbr->fetchObject($queryResult))
 428+ $synonymAndTranslationIds[$definedMeaningId][] = $synonymOrTranslation->expression_id;
 429+ }
 430+
 431+ return $synonymAndTranslationIds;
 432+ }
 433+
 434+ function getDefinedMeaningRelations($definedMeaningIds) {
 435+ $dbr =& wfGetDB(DB_SLAVE);
 436+ $definedMeaningRelations = array();
 437+
 438+ foreach($definedMeaningIds as $definedMeaningId) {
 439+ $relations = array();
 440+ $queryResult = $dbr->query("SELECT * from uw_meaning_relations where meaning1_mid=$definedMeaningId and is_latest_set=1");
 441+
 442+ while($definedMeaningRelation = $dbr->fetchObject($queryResult))
 443+ $relations[$definedMeaningRelation->relationtype_mid][]=$definedMeaningRelation->meaning2_mid;
 444+
 445+ $definedMeaningRelations[$definedMeaningId] = $relations;
 446+ }
 447+
 448+ return $definedMeaningRelations;
 449+ }
 450+
 451+ function getTranslatedContents($setId) {
 452+ $dbr =& wfGetDB(DB_SLAVE);
 453+ $queryResult = $dbr->query("SELECT * from translated_content where set_id=$setId");
 454+ $translatedContents = array();
 455+
 456+ while($translatedContent = $dbr->fetchObject($queryResult))
 457+ $translatedContents[$translatedContent->language_id] = $translatedContent->text_id;
 458+
 459+ return $translatedContents;
 460+ }
 461+
 462+ function getAddTranslationsAndSynonymsFormFields($definedMeaningId) {
 463+ return '<div><b>Add translation/synonym</b></div>
 464+ <table>
 465+ <tr><th>Language</th><th>Spelling</th><th>Identical meaning?</th><th>Input rows</th></tr>
 466+ <tr id="add-translation-synonym-'. $definedMeaningId .'" class="repeat">
 467+ <td><select name="language-'. $definedMeaningId .'">'. $this->getLanguageOptions() .'</select></td>
 468+ <td><input type="text" name="spelling-'. $definedMeaningId .'" maxlength="255"/></td>
 469+ <td><input type="checkbox" name="endemic-meaning-'. $definedMeaningId .'" checked="checked"/>
 470+ <td></td>
 471+ </tr>
 472+ </table>';
 473+ }
 474+
 475+ function getTranslationIdsForDefinedMeaning($definedMeaningId) {
 476+ $dbr =& wfGetDB(DB_SLAVE);
 477+ $queryResult = $dbr->query("SELECT * from text where old_id=$textId");
 478+ }
 479+
 480+ function getLanguageOptions($languageIdsToExclude = array()) {
 481+ global
 482+ $wgUser;
 483+
 484+ $userLanguage = $wgUser->getOption('language');
 485+ $userLanguageId = $this->getLanguageIdForCode($userLanguage);
 486+ $idNameIndex = $this->getLangNames($userLanguage);
 487+ asort($idNameIndex);
 488+
 489+ $result = '';
 490+
 491+ foreach($idNameIndex as $id => $name) {
 492+ if (!in_array($id, $languageIdsToExclude)) {
 493+ if ($id == $userLanguageId)
 494+ $selected = ' selected="selected"';
 495+ else
 496+ $selected = '';
 497+
 498+ $result .= '<option value="'. $id .'"'. $selected .'>'. $name . '</option>';
 499+ }
 500+ }
 501+
 502+ return $result;
 503+ }
 504+
 505+ function getText($textId) {
 506+ $dbr =& wfGetDB(DB_SLAVE);
 507+ $queryResult = $dbr->query("SELECT * from text where old_id=$textId");
 508+
 509+ if($text = $dbr->fetchObject($queryResult))
 510+ return $text->old_text;
 511+ else
 512+ return "";
 513+ }
 514+
 515+ function setText($textId, $text) {
 516+ $dbr = &wfGetDB(DB_MASTER);
 517+ $text = $dbr->addQuotes($text);
 518+ $sql = "UPDATE text SET old_text=$text WHERE old_id=$textId";
 519+ $dbr->query($sql);
 520+ }
 521+
 522+ function createText($text) {
 523+ $dbr = &wfGetDB(DB_MASTER);
 524+ $text = $dbr->addQuotes($text);
 525+ $sql = "insert into text(old_text) values($text)";
 526+ $dbr->query($sql);
 527+
 528+ return $dbr->insertId();
 529+ }
 530+
 531+ function addSynonymOrTranslation($spelling, $languageId, $definedMeaningId, $endemicMeaning) {
 532+ $expression = findOrCreateExpression($spelling, $languageId);
 533+ $expression->assureIsBoundToDefinedMeaning($definedMeaningId, $endemicMeaning);
 534+ }
 535+
 536+ function addSynonymOrTranslationFromRequest($definedMeaningId, $postFix) {
 537+ global
 538+ $wgRequest;
 539+
 540+ if (array_key_exists('language-'. $postFix, $_POST)) {
 541+ $languageId = $wgRequest->getInt('language-'. $postFix);
 542+ $spelling = $wgRequest->getText('spelling-'. $postFix);
 543+ $endemicMeaning = $wgRequest->getCheck('endemic-meaning-'.$postFix);
 544+
 545+ if ($spelling != '')
 546+ $this->addSynonymOrTranslation($spelling, $languageId, $definedMeaningId, $endemicMeaning);
 547+ }
 548+ }
 549+
 550+ function addSynonymsOrTranslationsFromRequest($definedMeaningId) {
 551+ global
 552+ $wgRequest;
 553+
 554+ $this->addSynonymOrTranslationFromRequest($definedMeaningId, $definedMeaningId);
 555+
 556+ for ($i = 2; $i <= $wgRequest->getInt('add-translation-synonym-'. $definedMeaningId . '-RC'); $i++)
 557+ $this->addSynonymOrTranslationFromRequest($definedMeaningId, $definedMeaningId . '-' . $i);
 558+ }
 559+
 560+ function createTranslatedContent($setId, $languageId, $textId, $revisionId) {
 561+ $dbr = &wfGetDB(DB_MASTER);
 562+ $sql = "insert into translated_content(set_id,language_id,text_id,first_set,revision_id) values($setId, $languageId, $textId, $setId, $revisionId)";
 563+ $dbr->query($sql);
 564+
 565+ return $dbr->insertId();
 566+ }
 567+
 568+ function addTranslatedDefinition($setId, $languageId, $definition, $revisionId) {
 569+ $textId = $this->createText($definition);
 570+ $this->createTranslatedContent($setId, $languageId, $textId, $revisionId);
 571+ }
 572+
 573+ function addTranslatedDefinitionFromRequest($definedMeaningId, $setId, $revisionId, $languageIdsToExclude) {
 574+ global
 575+ $wgRequest;
 576+
 577+ $languageId = $wgRequest->getInt('translated-definition-language-'.$definedMeaningId);
 578+ $definition = $wgRequest->getText('translated-definition-'.$definedMeaningId);
 579+
 580+ if ($definition != '' && !in_array($languageId, $languageIdsToExclude))
 581+ $this->addTranslatedDefinition($setId, $languageId, $definition, $revisionId);
 582+ }
 583+
 584+ function getAttributeValues(){
 585+ $atts=array();
 586+ $attcollections=$this->getCollectionsByType('ATTR');
 587+ $dbr =& wfGetDB( DB_SLAVE );
 588+ foreach($attcollections as $cname=>$cid) {
 589+ $att_res=$dbr->query("select member_mid from uw_collection_contents where collection_id=$cid and is_latest_set=1");
 590+ while($att_row=$dbr->fetchObject($att_res)) {
 591+ # fixme hardcoded English
 592+ $att_name=$this->getExpressionForMid($att_row->member_mid, 85);
 593+ $atts[$att_row->member_mid]=$att_name;
 594+ }
 595+ }
 596+ return $atts;
 597+ }
 598+
 599+ function getCollectionsByType($type) {
 600+ $typecollections=array();
 601+ $dbr =& wfGetDB ( DB_SLAVE );
 602+ $col_res=$dbr->query("select collection_id,collection_mid from uw_collection_ns where collection_type=".$dbr->addQuotes($type)." and is_latest=1");
 603+ while($col_row=$dbr->fetchObject($col_res)) {
 604+ # fixme hardcoded English
 605+ $collection_name=$this->getExpressionForMid($col_row->collection_mid,85);
 606+ $typecollections[$collection_name]=$col_row->collection_id;
 607+ }
 608+ return $typecollections;
 609+
 610+ }
 611+
 612+ function getExpressionForMid($mid,$langcode) {
 613+ $dbr =& wfGetDB(DB_SLAVE);
 614+ $sql="SELECT spelling from uw_syntrans,uw_expression_ns where defined_meaning_id=".$mid." and uw_expression_ns.expression_id=uw_syntrans.expression_id and uw_expression_ns.language_id=".$langcode." limit 1";
 615+ $sp_res=$dbr->query($sql);
 616+ $sp_row=$dbr->fetchObject($sp_res);
 617+ return $sp_row->spelling;
 618+ #return $sql;
 619+ }
 620+
 621+}
 622+
 623+?>
\ No newline at end of file
Index: trunk/extensions/Wikidata/WiktionaryZ/Expression.php
@@ -0,0 +1,204 @@
 2+<?php
 3+class Expression {
 4+ public $id;
 5+ public $spelling;
 6+ public $languageId;
 7+ public $pageId;
 8+ public $revisionId;
 9+
 10+ function __construct($id, $spelling, $languageId) {
 11+ $this->id = $id;
 12+ $this->spelling = $spelling;
 13+ $this->languageId = $languageId;
 14+ }
 15+
 16+ function getPageTitle() {
 17+ return str_replace(' ', '_', $this->spelling);
 18+ }
 19+
 20+ function updateFromDatabase() {
 21+ $this->revisionId = getRevisionForExpressionId($this->id);
 22+ }
 23+
 24+ function createNewInDatabase() {
 25+ $this->pageId = $this->createPage();
 26+ $this->revisionId = createInitialRevisionForPage($this->pageId, 'Created by adding expression');
 27+
 28+ linkExpressionToRevision($this->id, $this->revisionId);
 29+ }
 30+
 31+ function createPage() {
 32+ return createPage(16, $this->getPageTitle(), $this->languageId);
 33+ }
 34+
 35+ function isBoundToDefinedMeaning($definedMeaningId) {
 36+ return getSetIdForDefinedMeaningAndExpression($definedMeaningId, $this->id);
 37+ }
 38+
 39+ function bindToDefinedMeaning($definedMeaningId, $endemicMeaning) {
 40+ $setId = determineSetIdForDefinedMeaning($definedMeaningId);
 41+ createSynonymOrTranslation($setId, $definedMeaningId, $this->id, $this->revisionId, $endemicMeaning);
 42+ }
 43+
 44+ function assureIsBoundToDefinedMeaning($definedMeaningId, $endemicMeaning) {
 45+ if (!$this->isBoundToDefinedMeaning($definedMeaningId))
 46+ $this->bindToDefinedMeaning($definedMeaningId, $endemicMeaning);
 47+ }
 48+}
 49+
 50+function getExpressionId($spelling, $languageId) {
 51+ $dbr = &wfGetDB(DB_SLAVE);
 52+ $sql = 'select expression_id from uw_expression_ns where spelling=binary '. $dbr->addQuotes($spelling) . ' and language_id=' . $languageId . ' and is_latest=1';
 53+ $queryResult = $dbr->query($sql);
 54+ $expression = $dbr->fetchObject($queryResult);
 55+ return $expression->expression_id;
 56+}
 57+
 58+function setFirstVersion($expressionId, $firstVersionId) {
 59+ $dbr = &wfGetDB(DB_MASTER);
 60+ $sql = "update uw_expression_ns set first_ver=$firstVersionId where expression_id=$expressionId";
 61+ $dbr->query($sql);
 62+}
 63+
 64+function createExpressionId($spelling, $languageId) {
 65+ $dbr = &wfGetDB(DB_MASTER);
 66+ $spelling = $dbr->addQuotes($spelling);
 67+ $sql = "insert into uw_expression_ns(spelling,language_id,is_latest) values($spelling, $languageId, 1)";
 68+ $dbr->query($sql);
 69+ $expressionId = $dbr->insertId();
 70+
 71+ setFirstVersion($expressionId, $expressionId);
 72+
 73+ return $expressionId;
 74+}
 75+
 76+function getRevisionForExpressionId($expressionId) {
 77+ $dbr = &wfGetDB(DB_SLAVE);
 78+ $sql = "select rev_id from revision where rev_data_id=$expressionId";
 79+ $queryResult = $dbr->query($sql);
 80+
 81+ if ($revision = $dbr->fetchObject($queryResult))
 82+ return $revision->rev_id;
 83+ else
 84+ return null;
 85+}
 86+
 87+function createPage($namespace, $title, $languageId) {
 88+ $dbr = &wfGetDB(DB_MASTER);
 89+ $title = $dbr->addQuotes($title);
 90+ $timestamp = $dbr->timestamp();
 91+
 92+ $sql = "insert into page(page_namespace,page_title,page_is_new,page_title_language_id,page_touched) ".
 93+ "values($namespace, $title, 1, $languageId, $timestamp)";
 94+ $dbr->query($sql);
 95+
 96+ return $dbr->insertId();
 97+}
 98+
 99+function setPageLatestRevision($pageId, $latestRevision) {
 100+ $dbr = &wfGetDB(DB_MASTER);
 101+ $sql = "update page set page_latest=$latestRevision where page_id=$pageId";
 102+ $dbr->query($sql);
 103+}
 104+
 105+function createInitialRevisionForPage($pageId, $comment) {
 106+ global
 107+ $wgUser;
 108+
 109+ $dbr = &wfGetDB(DB_MASTER);
 110+ $userId = $wgUser->getID();
 111+ $userName = $dbr->addQuotes($wgUser->getName());
 112+ $comment = $dbr->addQuotes($comment);
 113+ $timestamp = $dbr->timestamp();
 114+
 115+ $sql = "insert into revision(rev_page,rev_comment,rev_user,rev_user_text,rev_timestamp) ".
 116+ "values($pageId, $comment, $userId, $userName, $timestamp)";
 117+ $dbr->query($sql);
 118+
 119+ $revisionId = $dbr->insertId();
 120+ setPageLatestRevision($pageId, $revisionId);
 121+
 122+ return $revisionId;
 123+}
 124+
 125+function linkExpressionToRevision($expressionId, $revisionId) {
 126+ $dbr = &wfGetDB(DB_MASTER);
 127+ $sql = "update revision set rev_data_id=$expressionId where rev_id=$revisionId";
 128+ $dbr->query($sql);
 129+}
 130+
 131+function findExpression($spelling, $languageId) {
 132+ if ($expressionId = getExpressionId($spelling, $languageId)) {
 133+ $expression = new Expression($expressionId, $spelling, $languageId);
 134+ $expression->updateFromDatabase();
 135+ return $expression;
 136+ }
 137+ else
 138+ return null;
 139+}
 140+
 141+function createExpression($spelling, $languageId) {
 142+ $expression = new Expression(createExpressionId($spelling, $languageId), $spelling, $languageId);
 143+ $expression->createNewInDatabase();
 144+ return $expression;
 145+}
 146+
 147+function findOrCreateExpression($spelling, $languageId) {
 148+ if ($expression = findExpression($spelling, $languageId))
 149+ return $expression;
 150+ else
 151+ return createExpression($spelling, $languageId);
 152+}
 153+
 154+function getSetIdForDefinedMeaningAndExpression($definedMeaningId, $expressionId) {
 155+ $dbr = &wfGetDB(DB_SLAVE);
 156+ $sql = "select set_id from uw_syntrans where defined_meaning_id=$definedMeaningId and expression_id=$expressionId";
 157+ $queryResult = $dbr->query($sql);
 158+
 159+ if ($set = $dbr->fetchObject($queryResult))
 160+ return $set->set_id;
 161+ else
 162+ return 0;
 163+}
 164+
 165+function getLatestSetIdForDefinedMeaning($definedMeaningId) {
 166+ $dbr = &wfGetDB(DB_SLAVE);
 167+ $sql = "select set_id from uw_syntrans where defined_meaning_id=$definedMeaningId and is_latest_set=1";
 168+ $queryResult = $dbr->query($sql);
 169+
 170+ if ($set = $dbr->fetchObject($queryResult))
 171+ return $set->set_id;
 172+ else
 173+ return 0;
 174+}
 175+
 176+function determineSetIdForDefinedMeaning($definedMeaningId) {
 177+ $result = getLatestSetIdForDefinedMeaning($definedMeaningId);
 178+
 179+ if ($result == 0)
 180+ $result = getMaximum('set_id', 'uw_syntrans') + 1;
 181+
 182+ return $result;
 183+}
 184+
 185+
 186+function createSynonymOrTranslation($setId, $definedMeaningId, $expressionId, $revisionId, $endemicMeaning) {
 187+ $dbr = &wfGetDB(DB_MASTER);
 188+ $endemicMeaningInteger = (int) $endemicMeaning;
 189+ $sql = "insert into uw_syntrans(set_id,defined_meaning_id,expression_id,first_set,revision_id,endemic_meaning,is_latest_set) ".
 190+ "values($setId, $definedMeaningId, $expressionId, $setId, $revisionId, $endemicMeaningInteger, 1)";
 191+ $queryResult = $dbr->query($sql);
 192+}
 193+
 194+function getMaximum($field, $table) {
 195+ $dbr = &wfGetDB(DB_SLAVE);
 196+ $sql = "select max($field) as maximum from $table";
 197+ $queryResult = $dbr->query($sql);
 198+
 199+ if ($maximum = $dbr->fetchObject($queryResult))
 200+ return $maximum->maximum;
 201+ else
 202+ return 0;
 203+}
 204+
 205+?>
Index: trunk/extensions/Wikidata/WiktionaryZ/WiktionaryZ.pm
@@ -0,0 +1,966 @@
 2+# Example usage to import UMLS into an existing WiktionaryZ database:
 3+# use WiktionaryZ;
 4+# my $importer=new WiktionaryZ('wikidatadb','root','MyPass');
 5+# $importer->setSourceDB('umls');
 6+# $importer->importUMLS();
 7+#
 8+# NOTE: When importing UMLS, we expect the presence of the semantic network data
 9+# in the tables SRDEF and the manually created tables SEMRELHIER and SEMTYPEHIER.
 10+# SEMRELHIER and SEMTYPEHIER contain information about the relations between
 11+# semantic types and relation types, using RB as the code for "broader than"
 12+# and RN for "narrower than".
 13+
 14+# Todo for GEMET:
 15+# - deal with homonyms (esp. when merging data), avoid duplicate page titles in the same language (use addMeaning)
 16+# - fix mixup bug caused by usage of "+1" with $self->getMaxId
 17+#
 18+# Todo for UMLS:
 19+# SyntransCollection
 20+# RelationCollection
 21+# Fully deal with alternative definitions referring to the same concept
 22+# Deal with preferred lexical expressions, primary concepts (general weighting mechanism?)
 23+
 24+package WiktionaryZ;
 25+use DBI;
 26+use Encode;
 27+use POSIX qw(strftime);
 28+
 29+sub new {
 30+ my $type=shift;
 31+ my $self={};
 32+ $self->{targetdb}=shift;
 33+ $self->{targetuser}=shift;
 34+ $self->{targetpass}=shift;
 35+ $self->{targethost}=shift || 'localhost';
 36+ $self->{targetport}=shift || '3306';
 37+ $self->{targetdriver}=shift || 'mysql';
 38+ bless($self, $type);
 39+ return($self);
 40+}
 41+
 42+sub setSourceDB {
 43+ my $self=shift;
 44+ $self->{sourcedb}=shift;
 45+ $self->{sourceuser}=shift || $self->{targetuser};
 46+ $self->{sourcepass}=shift || $self->{targetpass};
 47+ $self->{sourcehost}=shift || $self->{targethost};
 48+ $self->{sourceport}=shift || $self->{targetport};
 49+ $self->{sourcedriver}=shift || $self->{targetdriver};
 50+}
 51+
 52+sub connectSourceDB() {
 53+ my $self=shift;
 54+ my $dsn = 'dbi:'.$self->{sourcedriver}.':'.$self->{sourcedb}.':'.$self->{sourcehost}.':'.$self->{sourceport};
 55+ $self->{dbs}=DBI->connect($dsn,$self->{sourceuser},$self->{sourcepass});
 56+}
 57+
 58+sub connectTargetDB() {
 59+ my $self=shift;
 60+ my $dsn = 'dbi:'.$self->{targetdriver}.':'.$self->{targetdb}.':'.$self->{targethost}.':'.$self->{targetport};
 61+ $self->{dbt}=DBI->connect($dsn,$self->{targetuser},$self->{targetpass});
 62+}
 63+
 64+
 65+sub importUMLS {
 66+ my $self=shift;
 67+ my $level=shift || 0; # 0= complete; 1=reltypes+; 2=rel+
 68+ $self->connectSourceDB();
 69+ $self->connectTargetDB();
 70+ my %la=$self->loadLangs();
 71+ $self->{la}=\%la;
 72+ my %la_iso=$self->loadLangsIso();
 73+ $self->{la_iso}=\%la_iso;
 74+
 75+ if(!$level) {
 76+ my %cid=$self->bootstrapCollections();
 77+ $self->{cid}=\%cid;
 78+ } else {
 79+ my %cid=$self->getCollections();
 80+ $self->{cid}=\%cid;
 81+ }
 82+ if($level<1) {
 83+ $self->importUMLSterms("CSP",$self->{cid}{'CRISP'});
 84+ $self->importUMLSterms("ICPC%",$self->{cid}{'ICPC'});
 85+ #importUMLSterms("MSH",$self->{cid}{'MESH'});
 86+ }
 87+ if($level<2) {
 88+ $self->importUMLSrelationtypes('REL');
 89+ $self->importUMLSrelationtypes('RELA');
 90+ }
 91+ if($level<3) {
 92+ my %rt=$self->loadReltypes();
 93+ $self->{reltypes}=\%rt;
 94+ $self->importUMLSrelations('REL','CSP');
 95+ $self->importUMLSrelations('RELA','CSP');
 96+ $self->importUMLSrelations('REL','ICPC%');
 97+ $self->importUMLSrelations('RELA','ICPC%');
 98+ #importUMLSrelations('REL','MSH');
 99+ #importUMLSrelations('RELA','MSH');
 100+ }
 101+ if($level<4) {
 102+ $self->importSNtypes('STY');
 103+ $self->importSNtypes('RL');
 104+ $self->importSTrelations('STY');
 105+ $self->importSTrelations('RL');
 106+ # $self->importSTrelations2();
 107+ }
 108+ if($level<5) {
 109+ my %attribs=$self->loadAttributes();
 110+ $self->{attribs}=\%attribs;
 111+ $self->importUMLSstypes('CSP');
 112+ $self->importUMLSstypes('ICPC%');
 113+ }
 114+}
 115+
 116+sub importGEMET {
 117+ my $self=shift;
 118+ $self->connectSourceDB();
 119+ $self->connectTargetDB();
 120+ my %la=$self->loadLangs();
 121+ $self->{la}=\%la;
 122+ my %cid=$self->bootstrapGemetCollection();
 123+ $self->{cid}=\%cid;
 124+ $self->initRel($self->{cid}{'GEMETREL'});
 125+ my %rt=$self->loadReltypes();
 126+ $self->{reltypes}=\%rt;
 127+ $self->importGemetTerms();
 128+ $self->importGemetRelations();
 129+ $self->importGemetThemes();
 130+}
 131+
 132+sub importUMLSstypes() {
 133+ my $self=shift;
 134+ my $sab=shift;
 135+ my $cid=shift;
 136+ my $getassocs=$self->{dbs}->prepare("select MRSTY.CUI, MRSTY.STY from MRCONSO,MRSTY where MRCONSO.SAB like ? and MRCONSO.CUI=MRSTY.CUI");
 137+ $getassocs->execute($sab);
 138+ while(my $row=$getassocs->fetchrow_hashref()) {
 139+
 140+ my %rv=$self->getMidForMember($row->{CUI});
 141+ my $att=$self->{attribs}{$row->{STY}};
 142+ #print "$rv{mid} is a $row->{STY} ($att)\n";
 143+ $self->addRelation($rv{rid},0,$rv{mid},$att, my $checkfordupes=1);
 144+ }
 145+
 146+}
 147+
 148+
 149+sub getCollections(){
 150+ my $self=shift;
 151+ my %cid;
 152+ $cid{'CRISP'}=$self->findCollection($self->findMeaning($self->findItem('CRISP Thesaurus, 2005',$self->{la}{'en'})));
 153+ $cid{'STY'}=$self->findCollection($self->findMeaning($self->findItem('Semantic Network 2005AC Semantic Types',$self->{la}{'en'})));
 154+ $cid{'RL'}=$self->findCollection($self->findMeaning($self->findItem('Semantic Network 2005AC Relation Types',$self->{la}{'en'})));
 155+ $cid{'REL'}=$self->findCollection($self->findMeaning($self->findItem('UMLS Relation Types 2005',$self->{la}{'en'})));
 156+ $cid{'RELA'}=$self->findCollection($self->findMeaning($self->findItem('UMLS Relation Attributes 2005',$self->{la}{'en'})));
 157+ $cid{'ICPC'}=$self->findCollection($self->findMeaning($self->findItem('The International Classification of Primary Care (ICPC), 1993',$self->{la}{'en'})));
 158+ $cid{'MESH'}=$self->findCollection($self->findMeaning($self->findItem('Medical Subject Headings (MeSH), 2005',$self->{la}{'en'})));
 159+ return %cid;
 160+}
 161+
 162+sub findCollection() {
 163+ my $self=shift;
 164+ my $mid=shift;
 165+ my $findcoll=$self->{dbt}->prepare("select collection_id from uw_collection_ns where collection_mid=? and is_latest=1");
 166+ $findcoll->execute($mid);
 167+ my $row=$findcoll->fetchrow_hashref();
 168+ return $row->{collection_id};
 169+}
 170+
 171+# SEMTYPEHIER and SEMRELHIER contain only the is_a relationships, whereas
 172+# srstr contains all others
 173+# FIXME: only use SRSTR
 174+sub importSTrelations2() {
 175+ my $self=shift;
 176+ my $getrels=$self->{dbs}->prepare("select * from srstr where rel!='isa'");
 177+ $getrels->execute();
 178+ while(my $row=$getrels->fetchrow_hashref()) {
 179+ my %rv1=$self->getMidForMember($row->{TYPE1},$self->{cid}{'STY'});
 180+ my %rv2=$self->getMidForMember($row->{TYPE2},$self->{cid}{'STY'});
 181+ my $rtmid=$self->{reltypes}{$row->{REL}};
 182+ #print "Adding relation $row->{REL} ($rtmid) between $row->{TYPE1} and $row->{TYPE2}\n";
 183+ $self->addRelation($rv1{rid},$rtmid,$rv1{mid},$rv2{mid},my $checkfordupes=1);
 184+ }
 185+}
 186+
 187+
 188+sub importSTrelations {
 189+ my $self=shift;
 190+ my $which=shift;
 191+ my $table;
 192+ my $field1;
 193+ my $field2;
 194+ if($which eq 'STY') {
 195+ $table='semtypehier';
 196+ $field1='SEMTYPE1';
 197+ $field2='SEMTYPE2';
 198+ } elsif($which eq 'RL') {
 199+ $table='semrelhier';
 200+ $field1='RELTYPE1';
 201+ $field2='RELTYPE2';
 202+ }
 203+
 204+ my $gettypehier=$self->{dbs}->prepare("select * from $table");
 205+ $gettypehier->execute();
 206+ while(my $typehier=$gettypehier->fetchrow_hashref()) {
 207+ my %rv1=$self->getMidForMember($typehier->{$field1},$self->{cid}{$which});
 208+ my %rv2=$self->getMidForMember($typehier->{$field2},$self->{cid}{$which});
 209+ my $rtmid=$self->{reltypes}{$typehier->{RELATION}};
 210+ print "Adding relation $typehier->{RELATION} ($rtmid) between $typehier->{$field1} and $typehier->{$field2}\n";
 211+ $self->addRelation($rv1{rid},$rtmid,$rv1{mid},$rv2{mid},my $checkfordupes=1);
 212+ }
 213+}
 214+
 215+# $member_id - the collection-internal identifier for this member
 216+# $cid The collection in which to search for this member (optional)
 217+# Returns the DefinedMeaningID and the revision id
 218+sub getMidForMember {
 219+ my $self=shift;
 220+ my $member_id=shift;
 221+ my $cid=shift;
 222+ my %rv;
 223+ my $getmid;
 224+ if($cid) {
 225+ $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where collection_id=? and internal_member_id=? and is_latest_set=1 limit 1");
 226+ $getmid->execute($cid,$member_id);
 227+ } else {
 228+ $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where internal_member_id=? and is_latest_set=1 limit 1");
 229+ $getmid->execute($member_id);
 230+ }
 231+ my $member_mid=$getmid->fetchrow_hashref();
 232+ $rv{mid}=$member_mid->{member_mid};
 233+ $rv{rid}=$member_mid->{revision_id};
 234+ return %rv;
 235+
 236+}
 237+
 238+sub loadReltypes {
 239+ my $self=shift;
 240+ my %reltypes;
 241+ # Get the relation type
 242+ $getreltype=$self->{dbt}->prepare("select member_mid,internal_member_id from uw_collection_contents,uw_collection_ns where uw_collection_ns.collection_type='RELT' and uw_collection_ns.collection_id=uw_collection_contents.collection_id");
 243+ $getreltype->execute();
 244+ while (my $reltype=$getreltype->fetchrow_hashref()) {
 245+ $reltypes{$reltype->{internal_member_id}}=$reltype->{member_mid};
 246+ }
 247+ return %reltypes;
 248+}
 249+
 250+sub loadAttributes {
 251+ my $self=shift;
 252+ my %attributes;
 253+ $getatt=$self->{dbt}->prepare("select member_mid,internal_member_id from uw_collection_contents,uw_collection_ns where uw_collection_ns.collection_type='ATTR' and uw_collection_ns.collection_id=uw_collection_contents.collection_id");
 254+ $getatt->execute();
 255+ while (my $att=$getatt->fetchrow_hashref()) {
 256+ $attributes{$att->{internal_member_id}}=$att->{member_mid};
 257+ }
 258+ return %attributes;
 259+}
 260+
 261+
 262+# Get all SRDEF attributes
 263+# Get relations between SRDEF
 264+sub importSNtypes {
 265+ my $self=shift;
 266+ my $type=shift;
 267+ $getsemtypes=$self->{dbs}->prepare("select semtypeab,type,definition from srdef where type=?");
 268+ $getsemtypes->execute($type);
 269+ while (my $semtype=$getsemtypes->fetchrow_hashref()) {
 270+ my $type_expression=$semtype->{semtypeab};
 271+ my $type_code=$type_expression;
 272+ $type_expression=~s/_/ /g;
 273+ $type_expression=lc($type_expression);
 274+ my %rv=$self->addExpression($type_expression,$self->{la}{'en'},0,$self->{cid}{$type},$type_code);
 275+ $self->addMeaningText($rv{'rid'},$rv{'mid'},$semtype->{definition},undef,$self->{la}{'en'});
 276+ #print $type_expression." - $self->{cid}{$type} - $type_code\n";
 277+ }
 278+}
 279+
 280+sub importUMLSrelations {
 281+ my $self=shift;
 282+ my $which=shift; # REL or RELA
 283+ my $source=shift; # SAB as MySQL LIKE string
 284+ my $getrels;
 285+
 286+ if($which eq 'REL') {
 287+ $getrels=$self->{dbs}->prepare("select cui1,cui2,rel from MRREL where sab like ?");
 288+ } elsif($which eq 'RELA') {
 289+ $getrels=$self->{dbs}->prepare("select cui1,cui2,rela from MRREL where sab like ? and rela!=''");
 290+ }
 291+ $getrels->execute($source);
 292+ while(my $rel=$getrels->fetchrow_hashref()) {
 293+ my $relid=$rel->{lc($which)};
 294+ # These mean the same thing
 295+ if($relid eq 'CHD') {
 296+ $relid='RN';
 297+ } elsif($relid eq 'PAR') {
 298+ $relid='RB';
 299+ }
 300+ $getmid=$self->{dbt}->prepare("select member_mid,revision_id from uw_collection_contents where internal_member_id=? and is_latest_set=1 limit 1");
 301+ # Note that the direction in UMLS is opposite to ours
 302+ $getmid->execute($rel->{cui2});
 303+ my $mid1=$getmid->fetchrow_hashref();
 304+ $getmid->execute($rel->{cui1});
 305+ my $mid2=$getmid->fetchrow_hashref();
 306+ # FIXME: We are ignoring term relations for now!
 307+ if(($mid1->{member_mid} && $mid2->{member_mid}) && ($mid1->{member_mid} != $mid2->{member_mid}) && $self->{reltypes}{$relid}) {
 308+ # Add the relation
 309+ #print "Found relation ".$relid." (".$self->{reltypes}{$relid}.") between ".$mid1->{member_mid}." and ".$mid2->{member_mid}.".\n";
 310+ $self->addRelation($mid1->{revision_id},$self->{reltypes}{$relid},$mid1->{member_mid},$mid2->{member_mid},my $checkfordupes=1);
 311+ } else {
 312+ if(!$mid1->{member_mid} && $mid2->{member_mid}) {
 313+ print "Did not find MID for ".$rel->{cui1}."!\n";
 314+ } elsif($mid1->{member_mid} && !$mid2->{member_mid}) {
 315+ print "Did not find MID for ".$rel->{cui2}."!\n";
 316+ } elsif(!$mid1->{member_mid} && !$mid2->{member_mid}) {
 317+ print "Did not find MIDs for ".$rel->{cui1}." and ".$rel->{cui2}."!\n";
 318+ }
 319+ }
 320+ }
 321+
 322+}
 323+
 324+
 325+sub bootstrapGemetCollection {
 326+ my $self=shift;
 327+ my %cid;
 328+ %rv=$self->addExpression('GEMET Environmental Thesaurus Relation Types',$self->{la}{'en'});
 329+ $cid{'GEMETREL'}=$self->addCollection($rv{mid},'RELT');
 330+ %rv=$self->addExpression('GEMET Environmental Thesaurus Relation Types',$self->{la}{'en'});
 331+ $cid{'GEMET'}=$self->addCollection($rv{mid},'');
 332+ return %cid;
 333+}
 334+
 335+
 336+sub bootstrapCollections {
 337+ my $self=shift;
 338+ my %cid;
 339+ my %rv;
 340+
 341+ %rv=$self->addExpression('CRISP Thesaurus, 2005',$self->{la}{'en'});
 342+ $cid{'CRISP'}=$self->addCollection($rv{mid},'');
 343+ %rv=$self->addExpression('Semantic Network 2005AC Semantic Types',$self->{la}{'en'});
 344+ $cid{'STY'}=$self->addCollection($rv{mid},'ATTR');
 345+ %rv=$self->addExpression('Semantic Network 2005AC Relation Types',$self->{la}{'en'});
 346+ $cid{'RL'}=$self->addCollection($rv{mid},'RELT');
 347+ %rv=$self->addExpression('UMLS Relation Types 2005',$self->{la}{'en'});
 348+ $cid{'REL'}=$self->addCollection($rv{mid},'RELT');
 349+ %rv=$self->addExpression('UMLS Relation Attributes 2005',$self->{la}{'en'});
 350+ $cid{'RELA'}=$self->addCollection($rv{mid},'RELT');
 351+ %rv=$self->addExpression('The International Classification of Primary Care (ICPC), 1993',$self->{la}{'en'});
 352+ $cid{'ICPC'}=$self->addCollection($rv{mid},'');
 353+ %rv=$self->addExpression('Medical Subject Headings (MeSH), 2005',$self->{la}{'en'});
 354+ $cid{'MESH'}=$self->addCollection($rv{mid},'');
 355+ return %cid;
 356+
 357+
 358+}
 359+
 360+sub addCollection {
 361+ my $self=shift;
 362+ my $mid=shift;
 363+ my $collection_type=shift;
 364+ my $addcollection=$self->{dbt}->prepare('INSERT INTO uw_collection_ns(collection_mid,is_latest,collection_type) values(?,1,?)');
 365+ $addcollection->execute($mid,$collection_type);
 366+ my $cid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
 367+ my $updatefirstver=$self->{dbt}->prepare('UPDATE uw_collection_ns set first_ver=? where collection_id=?');
 368+ $updatefirstver->execute($cid,$cid);
 369+ return $cid;
 370+}
 371+
 372+sub importUMLSrelationtypes {
 373+ my $self=shift;
 374+ my $which=shift;
 375+ my $getreltypes;
 376+ if($which eq 'REL') {
 377+ # CHD and PAR are to be interpreted as RN and RB, SUBX is not used
 378+ $getreltypes=$self->{dbs}->prepare("select * from rel where ABBREV!='CHD' and ABBREV!='PAR' and ABBREV!='SUBX'");
 379+ } elsif($which eq 'RELA') {
 380+ $getreltypes=$self->{dbs}->prepare("select * from rela");
 381+ }
 382+ $getreltypes->execute();
 383+ while(my $reltype=$getreltypes->fetchrow_hashref()) {
 384+ my %rv=$self->addExpression($reltype->{FULL},$self->{la}{'en'},0,$self->{cid}{$which},$reltype->{ABBREV});
 385+ }
 386+}
 387+
 388+sub importUMLSterms {
 389+ my $self=shift;
 390+ my $sab=shift; # the source abbreviation which to import
 391+ my $cid=shift; # which collection to associate the defined meanings with
 392+ $getterm=$self->{dbs}->prepare("select str,cui,lat from MRCONSO where sab like ?");
 393+ $getterm->execute($sab);
 394+ my %textmid;
 395+ while(my $r=$getterm->fetchrow_hashref()) {
 396+ my %rv;
 397+ my $dupe=0;
 398+ my %cuimid=$self->getMidForMember($r->{cui});
 399+
 400+ # Create new expression / Defined Meaning
 401+ if(!$cuimid{mid}) {
 402+ %rv=$self->addExpression($r->{str},$self->{la_iso}{lc($r->{lat})},0,$cid,$r->{cui});
 403+ # If this is the first time we encounter this CUI, import the definitions
 404+ # Note that we'll take any definitions, regardless of the SABs specified!
 405+ if($rv{mid}!=-1) {
 406+ $getdefs=$self->{dbs}->prepare("select def from MRDEF where cui=?");
 407+ $getdefs->execute($r->{cui});
 408+ while(my $d=$getdefs->fetchrow_hashref()) {
 409+ # UMLS only has English definitions
 410+ $self->addMeaningText($rv{rid},$rv{mid},$d->{def},0,$self->{la}{'en'});
 411+ }
 412+ $textmid{$rv{mid}}=1;
 413+ }
 414+ # Add as SynTrans to existing Defined Meaning
 415+ } else {
 416+ %rv=$self->addExpression($r->{str},$self->{la_iso}{lc($r->{lat})},$cuimid{mid});
 417+ }
 418+ }
 419+}
 420+
 421+
 422+sub importGemetTerms {
 423+ my $self=shift;
 424+ my $cid=shift;
 425+ # Get all English terms as base
 426+ $getterm=$self->{dbs}->prepare("select * from term where langcode=?");
 427+ $getterm->execute('en');
 428+ while($r=$getterm->fetchrow_hashref()) {
 429+ # Add English term as defined meaning
 430+ my %rv=$self->addExpression($r->{name},$self->{la}{'en'},0,);
 431+
 432+ # All translations
 433+ $gettrans=$self->{dbs}->prepare("select name,langcode from term where id_concept=? and langcode!='en'");
 434+ $gettrans->execute($r->{id_concept});
 435+ # Add them with the same meaning ID
 436+ while($t=$gettrans->fetchrow_hashref()) {
 437+ print "Language: $t->{langcode}\n";
 438+ %tv=$self->addExpression($t->{name},$self->{la}{$t->{langcode}},$rv{mid});
 439+ }
 440+ # All definitions
 441+ $getdef=$self->{dbs}->prepare("select definition,langcode from scope where id_concept=?");
 442+ $getdef->execute($r->{id_concept});
 443+ my $tcid=0;
 444+ while($d=$getdef->fetchrow_hashref()) {
 445+ if(!$tcid) {
 446+ my %mv=$self->addMeaningText($rv{rid},$rv{mid},$d->{definition},0,$self->{la}{$d->{langcode}});
 447+ $tcid=$mv{tcid};
 448+
 449+ } else {
 450+ $self->addMeaningText($rv{rid},$rv{mid},$d->{definition},$tcid,$self->{la}{$d->{langcode}});
 451+
 452+ }
 453+ }
 454+ }
 455+}
 456+
 457+
 458+sub importGemetRelations {
 459+ my $self=shift;
 460+ # Import GEMET relations
 461+ my $getrels=$self->{dbs}->prepare("select * from relation");
 462+ $getrels->execute();
 463+ while(my $rrow=$getrels->fetchrow_hashref()) {
 464+ %rv_A=$self->findGemetItem($rrow->{id_concept});
 465+ %rv_B=$self->findGemetItem($rrow->{id_relation});
 466+ if($rv_A{mid} && $rv_B{mid}) {
 467+ $self->addRelation($rv_A{rid},$self->{reltypes}{$rrow->{id_type}},$rv_A{mid},$rv_B{mid});
 468+ }
 469+ }
 470+}
 471+
 472+sub importGemetThemes {
 473+ my $self=shift;
 474+ # Get all themes
 475+ my $getthemes=$self->{dbs}->prepare("select * from theme");
 476+ my $gettheme_set=$self->{dbs}->prepare("select * from theme where id_theme=?");
 477+ $getthemes->execute();
 478+ while(my $theme_row=$getthemes->fetchrow_hashref()) {
 479+ my $theme=$theme_row->{description};
 480+ my @themes=split(/[,;]( ){0,1}/,$theme);
 481+ foreach(@themes) {
 482+ $_=~s/^ *$//i;
 483+ if($_) {
 484+ # Does this theme have a expression?
 485+ my $t=$_;
 486+ my %it=$self->findItem($t,$self->{la}{$theme_row->{langcode}},1);
 487+ if($it{liid}) {
 488+ # Get the meaning
 489+ print "NEW THEME: $t - retrieving existing MID for LIID... ".$it{liid};
 490+ $it{mid}=$self->findMeaning($rv{liid});
 491+ print $it{mid}."\n";
 492+ #print $t. " is a dupe! - $dupes\n";
 493+ #$dupes++;
 494+ } else {
 495+ # Do we have any of its translations?
 496+ # We can only add those if the theme does
 497+ # not contain a , - otherwise we can't match!
 498+ my $tra_mid=0;
 499+ if(!($theme_row->{description}=~m/[,;]/i)) {
 500+ print "NEW THEME: $t - no record, looking for its known translations in GEMET\n";
 501+ #print "Checking for translations of ".$theme_row->{description}."\n";
 502+ $gettheme_set->execute($theme_row->{id_theme});
 503+ while((my $tra_row=$gettheme_set->fetchrow_hashref()) && !$tra_mid) {
 504+ if($tra_lid=$self->findItem($tra_row->{description},$self->{la}{$tra_row->{langcode}})) {
 505+ $tra_mid=$self->findMeaning($tra_lid);
 506+
 507+ }
 508+ }
 509+ } else {
 510+ print "NEW THEME: $t - split from the original GEMET data\n";
 511+ }
 512+ # Let's make one
 513+ if($tra_mid) {
 514+ print "Adding new term as translation of $tra_mid\n";
 515+ %it = $self->addExpression($t,$self->{la}{$theme_row->{langcode}},$tra_mid);
 516+ } else {
 517+ print "Adding new term independently, we do not know its translations.\n";
 518+ %it = $self->addExpression($t,$self->{la}{$theme_row->{langcode}});
 519+ }
 520+
 521+
 522+ }
 523+
 524+ if(!$have_rel{$theme_row->{id_theme}}) {
 525+ # Get all items which have this relation
 526+ my $getconcepts=$self->{dbs}->prepare('select id_concept from concept_theme where id_theme=?');
 527+ $getconcepts->execute($theme_row->{id_theme});
 528+ while(my $concrow=$getconcepts->fetchrow_hashref()) {
 529+ # Get LIID,RID->meaning for the item
 530+ my %tr=$self->findGemetItem($concrow->{id_concept});
 531+ if($tr{rid}) {
 532+ $self->addRelation($tr{rid},$self->{reltypes}{it},$tr{mid},$it{mid});
 533+ print "Tied up a relation..";
 534+ } else {
 535+ print "Missing record to tie the relation to..";
 536+ }
 537+ }
 538+ print "\n";
 539+ $have_rel{$theme_row->{id_theme}}=1;
 540+ }
 541+
 542+ }
 543+ }
 544+ }
 545+ #Split theme into parts
 546+}
 547+
 548+sub findGemetItem() {
 549+ my $self=shift;
 550+ my $concept_id=shift;
 551+ # get a word, language
 552+ my $getword=$self->{dbs}->prepare("select langcode,name from term where id_concept=? LIMIT 1");
 553+ $getword->execute($concept_id);
 554+ my $wordrow=$getword->fetchrow_hashref();
 555+
 556+ # find an expression + meaning
 557+ my %rv=$self->findItem($wordrow->{name},$self->{la}{$wordrow->{langcode}},1);
 558+ $rv{mid}=$self->findMeaning($rv{liid});
 559+ return %rv;
 560+}
 561+
 562+sub addRelation() {
 563+ my $self=shift;
 564+ my $revid=shift;
 565+ my $rtid=shift;
 566+ my $mid_A=shift;
 567+ my $mid_B=shift;
 568+ my $checkfordupes=shift;
 569+ if($checkfordupes) {
 570+ my $checkdupes=$self->{dbt}->prepare('select 1 as one from uw_meaning_relations where meaning1_mid=? and meaning2_mid=? and relationtype_mid=? and is_latest_set=1 limit 1');
 571+ $checkdupes->execute($mid_A,$mid_B,$rtid);
 572+ #print "Checking dupe $mid_A, $mid_B, relation type $rtid\n";
 573+ my $dupecheck=$checkdupes->fetchrow_hashref();
 574+ if($dupecheck->{one}) {
 575+ print "Duplicate relation, not adding.\n";
 576+ return false;
 577+ }
 578+ }
 579+ $newkey= $self->getSetIdWhere('uw_meaning_relations','meaning1_mid',$mid_A) || $self->getMaxId('set_id','uw_meaning_relations');
 580+ $addrel=$self->{dbt}->prepare('insert into uw_meaning_relations(set_id,meaning1_mid,meaning2_mid,relationtype_mid,is_latest_set,first_set,revision_id) values(?,?,?,?,?,?,?)');
 581+ $addrel->execute($newkey,$mid_A,$mid_B,$rtid,1,$newkey,$revid);
 582+ #print "New key: $key\n";
 583+}
 584+
 585+
 586+sub findMeaning() {
 587+ my $self=shift;
 588+ my $liid=shift;
 589+ # Search syntrans table
 590+ my $getsyn=$self->{dbt}->prepare("select defined_meaning_id from uw_syntrans where expression_id=?");
 591+ $getsyn->execute($liid);
 592+ my $syn_row=$getsyn->fetchrow_hashref();
 593+ if($syn_row->{defined_meaning_id}) {
 594+ return $syn_row->{defined_meaning_id};
 595+ }
 596+ my $getdm=$self->{dbt}->prepare("select defined_meaning_id from uw_defined_meaning where expression_id=? limit 1");
 597+ $getdm->execute($liid);
 598+ my $dm_row=$getdm->fetchrow_hashref();
 599+ if($dm_row->{defined_meaning_id}) {
 600+ return $dm_row->{defined_meaning_id};
 601+ }
 602+ return 0;
 603+}
 604+
 605+# If there already is a meaning text for this DefinedMeaning, it will add the MeaningText as an alternative definition
 606+sub addMeaningText {
 607+ my $self=shift;
 608+ my $rid=shift;
 609+ my $mid=shift;
 610+ my $meaningtext=shift; # optional
 611+ my $meaningtext_set=shift; # optional TCID set to join with
 612+ my $lid=shift; # ID, not code
 613+ my %rv;
 614+
 615+ # Add text row entry
 616+ my $maketext=$self->{dbt}->prepare('insert into text(old_text) values(?)');
 617+ $maketext->execute($meaningtext);
 618+ # Get text row ID
 619+ $tid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
 620+ # Get new or existing translated content set ID
 621+ $tcid=$meaningtext_set || $self->getMaxId('set_id','translated_content');
 622+ # Create new translated content set
 623+ my $maketc=$self->{dbt}->prepare('insert into translated_content(set_id,language_id,text_id,first_set,revision_id) values(?,?,?,?,?)');
 624+ $maketc->execute($tcid,$lid,$tid,$tcid,$rid);
 625+ $rv{tcid}=$tcid;
 626+
 627+ # THIS DOESN'T WORK FOR DEFINITIONS IN MULTIPLE LANGUAGES
 628+ # Check if a meaning text has already been set
 629+ my $lookformeaning=$self->{dbt}->prepare('select meaning_text_tcid from uw_defined_meaning where defined_meaning_id=? and is_latest_ver=1');
 630+ $lookformeaning->execute($mid);
 631+ my $mrow=$lookformeaning->fetchrow_hashref();
 632+ if($mrow->{meaning_text_tcid}) {
 633+ # There is a meaning text - the new one is only an alternative
 634+ my $altset=$self->getSetIdWhere('uw_alt_meaningtexts','meaning_mid',$mid) || $self->getMaxId('set_id','uw_alt_meaningtexts');
 635+ my $addaltmeaning=$self->{dbt}->prepare('insert into uw_alt_meaningtexts(set_id,meaning_mid,meaning_text_tcid,is_latest_set,first_set,revision_id) values(?,?,?,?,?,?)');
 636+ $addaltmeaning->execute($altset,$mid,$tcid,1,$altset,$rid)
 637+ } else {
 638+ my $updatemeaning=$self->{dbt}->prepare('update uw_defined_meaning set meaning_text_tcid=? where defined_meaning_id=?');
 639+ $updatemeaning->execute($tcid,$mid);
 640+ }
 641+ return %rv;
 642+}
 643+
 644+
 645+# If the expression already exists, add a new DefinedMeaning - unless this is a translation or synonym; if a record already exists in SynTrans with this expression _and_ $translation_of as a DefinedMeaning, do not do anything
 646+sub addExpression {
 647+ my $self=shift;
 648+ # return MID, RID, LID, TCID!
 649+ my $expression=shift;
 650+ my $lid=shift; # ID, not code
 651+ my $translation_of=shift; # 0 or MID (!), optional
 652+ my $collection_id=shift; # optional
 653+ my $collection_internal_member_id=shift; # what does the collection use to refer to this member?
 654+ my %rv;
 655+ my $isdupe=0;
 656+ my %firv=$self->findItem($expression,$lid,1);
 657+ if($firv{liid}) { $isdupe=1; }
 658+
 659+ if(!$isdupe) {
 660+
 661+ #create page
 662+ $pt=$self->canonize($expression);
 663+ $makepage=$self->{dbt}->prepare('insert into page(page_namespace,page_title,page_is_new,page_title_language_id,page_touched) values(?,?,?,?,?)');
 664+ $makepage->execute(16,$pt,1,$lid,$self->mwtimestamp());
 665+ $pid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
 666+ print "PID: $pid\n";
 667+
 668+ $rv{pid}=$pid;
 669+
 670+ #create revision
 671+ $makerev=$self->{dbt}->prepare('insert into revision(rev_page,rev_comment,rev_user,rev_user_text,rev_timestamp) values(?,?,?,?,?)');
 672+ $makerev->execute($pid,'Initial import',2,'GEMET',$self->mwtimestamp());
 673+
 674+ #get revision_id
 675+ $rid=$self->getId('select rev_id from revision where rev_page=?',$pid);
 676+ $rv{rid}=$rid;
 677+
 678+ #update page to link to revision
 679+ $updatepage=$self->{dbt}->prepare('update page set page_latest=? where page_id=?');
 680+ $updatepage->execute($rid,$pid);
 681+
 682+ #create expression
 683+ $makeitem=$self->{dbt}->prepare('insert into uw_expression_ns(spelling,language_id,is_latest) values(?,?,1)');
 684+ $makeitem->execute($expression,$lid);
 685+ $liid=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
 686+ $rv{liid}=$liid;
 687+
 688+ # update firstver
 689+ $updateitem=$self->{dbt}->prepare('update uw_expression_ns set first_ver=? where expression_id=?');
 690+ $updateitem->execute($liid,$liid);
 691+
 692+ #update revision to link to expression
 693+ $updaterev=$self->{dbt}->prepare('update revision set rev_data_id=? where rev_id=?');
 694+ $updaterev->execute($liid,$rid);
 695+
 696+ } else {
 697+
 698+ $rid=$firv{rid};
 699+ $liid=$firv{liid};
 700+ $rv{rid}=$rid;
 701+ $rv{liid}=$liid;
 702+
 703+ }
 704+
 705+ #create definedmeaning and/or syntrans record
 706+ if(!$translation_of) {
 707+ $makemean=$self->{dbt}->prepare('insert into uw_defined_meaning(expression_id,revision_id) values(?,?)');
 708+ $makemean->execute($liid,$rid);
 709+ # We always want a syntrans record, so in this case it links to its own
 710+ # def. meaning
 711+ $translation_of=$self->{dbt}->last_insert_id(undef,undef,undef,undef);
 712+ $mid=$translation_of;
 713+ $rv{mid}=$mid;
 714+ $updatemeaningver=$self->{dbt}->prepare('update uw_defined_meaning set first_ver=? where defined_meaning_id=?');
 715+ $updatemeaningver->execute($mid,$mid);
 716+ if($collection_id) {
 717+ $addtocoll=$self->{dbt}->prepare('insert into uw_collection_contents(set_id, collection_id, member_mid, is_latest_set, first_Set, revision_id, internal_member_id) values(?,?,?,?,?,?,?)');
 718+ #fixme set association
 719+ $addtocoll->execute(1,$collection_id,$mid,1,1,$rid,$collection_internal_member_id);
 720+ }
 721+ }
 722+
 723+ # Check if we already have this specific record
 724+ $checkdupes=$self->{dbt}->prepare('select set_id from uw_syntrans where defined_meaning_id=? and expression_id=?');
 725+ $checkdupes->execute($translation_of,$liid);
 726+ my $duperow=$checkdupes->fetchrow_hashref();
 727+ my $dupeid=$duperow->{set_id};
 728+ if(!$dupeid) {
 729+
 730+ # Check if this is part of a set
 731+ $getset=$self->{dbt}->prepare('select set_id from uw_syntrans where defined_meaning_id=? and is_latest_set=1');
 732+ $getset->execute($mid);
 733+ $row=$getset->fetchrow_hashref();
 734+ my $setid=$row->{set_id} || $self->getMaxId('set_id','uw_syntrans');
 735+ # Add syntrans record
 736+ $maketrans=$self->{dbt}->prepare('insert into uw_syntrans(set_id,defined_meaning_id,expression_id,first_set,revision_id,is_latest_set) values(?,?,?,?,?,1)');
 737+ $maketrans->execute($setid,$translation_of,$liid,$setid,$rid);
 738+ $rv{setid}=$setid;
 739+ $rv{mid}=$translation_of;
 740+ } else{
 741+ $rv{setid}=$dupeid; # Dupe
 742+ $rv{mid}=-1; # Dupe
 743+ }
 744+ return %rv;
 745+
 746+}
 747+
 748+sub findItem {
 749+ my $self=shift;
 750+ my $expression=shift;
 751+ my $lid=shift;
 752+ my $returnrid=shift;
 753+ #print "Searching for $expression in $lid\n";
 754+ my $getitem=$self->{dbt}->prepare("select expression_id from uw_expression_ns where spelling=binary ? and language_id=? and is_latest=1");
 755+ $getitem->execute($expression,$lid);
 756+ my $item_row=$getitem->fetchrow_hashref();
 757+ if($item_row) {
 758+ if(!$returnrid) {
 759+ return $item_row->{expression_id};
 760+ } else {
 761+ my $getrev= $self->{dbt}->prepare('select rev_id from revision where rev_data_id=?');
 762+ $getrev->execute($item_row->{expression_id});
 763+ my %rv;
 764+ $rv{liid}=$item_row->{expression_id};
 765+ $rv{rid}=$getrev->fetchrow_hashref->{rev_id};
 766+ return %rv;
 767+ }
 768+ } else {
 769+ return 0;
 770+ }
 771+
 772+}
 773+
 774+
 775+sub getMaxId {
 776+ my $self=shift;
 777+ my $field=shift;
 778+ my $table=shift;
 779+ $getmax=$self->{dbt}->prepare("select max($field) as maxset from $table");
 780+ $getmax->execute();
 781+ my $row=$getmax->fetchrow_hashref();
 782+ return $row->{maxset}+1;
 783+}
 784+
 785+sub getSetIdWhere {
 786+ my $self=shift;
 787+ my $table=shift;
 788+ my $wherefield=shift;
 789+ my $wherekey=shift;
 790+ $getmax=$self->{dbt}->prepare("select set_id from $table WHERE $wherefield=? AND is_latest_set=1 limit 1");
 791+ $getmax->execute($wherekey);
 792+ my $row=$getmax->fetchrow_hashref();
 793+ return $row->{set_id};
 794+}
 795+
 796+
 797+sub getId {
 798+ my $self=shift;
 799+ my $prep=shift;
 800+ $prep=~m/select (.*?) from/i;
 801+ my $field=$1;
 802+ my $getlang=$self->{dbt}->prepare($prep);
 803+ $getlang->execute(@_);
 804+ my $row=$getlang->fetchrow_hashref();
 805+ my $id=$row->{$field};
 806+ return $id;
 807+}
 808+
 809+sub mwtimestamp {
 810+ my $self=shift;
 811+ use POSIX qw(strftime);
 812+ return(strftime "%Y%m%d%H%M%S", localtime);
 813+}
 814+
 815+
 816+sub canonize {
 817+ my $self=shift;
 818+ my $title=shift;
 819+ #$title=ucfirst($title);
 820+ $title=~s/ /_/ig;
 821+ return $title;
 822+}
 823+
 824+sub initlangs {
 825+ my $self=shift;
 826+ %langs=(
 827+ en_en=>'English',
 828+ en_de=>'Englisch',
 829+ 'en-US_de'=>'Englisch (USA)',
 830+ 'en-US_en'=>'English (United States)',
 831+ bg_en=>'Bulgarian',
 832+ bg_de=>'Bulgarisch',
 833+ cs_en=>'Czech',
 834+ cs_de=>'Tschechisch',
 835+ da_en=>'Dansk',
 836+ da_de=>'D?isch',
 837+ de_en=>'German',
 838+ de_de=>'Deutsch',
 839+ es_en=>'Spanish',
 840+ es_de=>'Spanisch',
 841+ et_en=>'Estonian',
 842+ et_de=>'Estnisch',
 843+ eu_en=>'Basque',
 844+ eu_de=>'Baskisch',
 845+ fi_en=>'Finnish',
 846+ fi_de=>'Finnisch',
 847+ fr_en=>'French',
 848+ fr_de=>'Franz?isch',
 849+ hu_en=>'Hungarian',
 850+ hu_de=>'Ungarisch',
 851+ it_en=>'Italian',
 852+ it_de=>'Italienisch',
 853+ nl_en=>'Dutch',
 854+ nl_de=>'Niederl?disch',
 855+ no_en=>'Norwegian',
 856+ no_de=>'Norwegisch',
 857+ pl_en=>'Polish',
 858+ pl_de=>'Polnisch',
 859+ pt_en=>'Portuguese',
 860+ pt_de=>'Portugiesisch',
 861+ ru_en=>'Russian',
 862+ ru_de=>'Russisch',
 863+ sk_en=>'Slovak',
 864+ sk_de=>'Slowakische Sprache',
 865+ sl_en=>'Slovenian',
 866+ sl_de=>'Slowenisch',
 867+ el_en=>'Greek',
 868+ el_de=>'Griechisch',
 869+ sv_en=>'Swedish',
 870+ sv_de=>'Schwedisch');
 871+ foreach(keys(%langs)) {
 872+ $key=$_;
 873+ $key=~m/(.*?)_(.*)/i;
 874+ $lang=$1;
 875+ #print "Lang: $lang\n";
 876+ $wordlang=$2;
 877+ if($wordlang eq 'en') {
 878+ $addwm=$self->{dbt}->prepare("insert into language(wikimedia_key) values(?)");
 879+ $addwm->execute($lang);
 880+ }
 881+ }
 882+ foreach(keys(%langs)) {
 883+ $key=$_;
 884+ $key=~m/(.*?)_(.*)/i;
 885+ $lang=$1;
 886+ #print "Lang: $lang\n";
 887+ $wordlang=$2;
 888+ $langword_u=$langs{$key};
 889+ $langword=encode("utf8",$langword_u);
 890+ $newwm=$self->{dbt}->prepare("select language_id from language where wikimedia_key=?");
 891+ $newwm->execute($lang);
 892+ my $row=$newwm->fetchrow_hashref();
 893+ $newwm->execute('en');
 894+ my $en_row=$newwm->fetchrow_hashref();
 895+ $newwm->execute('de');
 896+ my $de_row=$newwm->fetchrow_hashref();
 897+ $newword=$self->{dbt}->prepare("insert into language_names values (?,?,?)");
 898+ if($wordlang eq 'en') {
 899+ $newword->execute($row->{language_id},$en_row->{language_id},$langword);
 900+ } elsif($wordlang eq 'de') {
 901+ $newword->execute($row->{language_id},$de_row->{language_id},$langword);
 902+ }
 903+ }
 904+}
 905+
 906+sub initRel {
 907+ my $self=shift;
 908+ my $cid=shift;
 909+ %rel_types=(
 910+ bt_en=>'broader terms',
 911+ bt_de=>'breitere Begriffe',
 912+ nt_en=>'narrower terms',
 913+ nt_de=>'engere Begriffe',
 914+ rt_en=>'related terms',
 915+ rt_de=>'verwandte Begriffe',
 916+ it_en=>'is part of theme',
 917+ it_de=>'ist Themenbestandteil von'
 918+ );
 919+
 920+ %rel_definitions=(
 921+ bt_en=>'Those terms in a thesaurus which are broader than others',
 922+ bt_de=>'Die Begriffe in einem Thesaurus, die breiter sind als andere',
 923+ nt_en=>'Those terms in a thesaurus which are narrower than others',
 924+ nt_de=>'Die Begriffe in einem Thesaurus, die enger sind als andere',
 925+ rt_en=>'Those terms in a thesaurus which are related to others',
 926+ rt_de=>'Die Begriffe in einem Thesaurus, die mit anderen verwandt sind',
 927+ it_en=>'Those terms in a thesaurus or dictionary which are associated with a topic',
 928+ it_de=>'Die Begriffe in einem Thesaurus oder Woerterbuch, die mit einem Thema assoziiert sind');
 929+
 930+ foreach(keys(%rel_types)) {
 931+ $key=$_;
 932+ $key=~m/(..)_(..)/i;
 933+ $ident=$1;
 934+ $lang=$2;
 935+ if($lang eq 'de') {
 936+ $en_key="$ident\_en";
 937+ my %rv=$self->addExpression($rel_types{$en_key},$self->{la}{'en'},0,$cid,$ident);
 938+ $self->addMeaningText($rv{rid},$rv{mid},$rel_definitions{$en_key},0,$self->{la}{'en'});
 939+ my %dv=$self->addExpression($rel_types{$key},$self->{la}{'de'},$rv{'mid'});
 940+ $self->addMeaningText($dv{rid},$dv{mid},$rel_definitions{$key},$rv{'tcid'},$self->{la}{'de'});
 941+ }
 942+ }
 943+}
 944+
 945+sub loadLangs() {
 946+ my $self=shift;
 947+ my %la;
 948+ $getlangs=$self->{dbt}->prepare('select language_id,wikimedia_key from language');
 949+ $getlangs->execute();
 950+ while($langrow=$getlangs->fetchrow_hashref()) {
 951+ $la{$langrow->{wikimedia_key}}=$langrow->{language_id};
 952+ }
 953+ return %la;
 954+}
 955+
 956+sub loadLangsIso() {
 957+ my $self=shift;
 958+ my %la_iso;
 959+ $getlangs=$self->{dbt}->prepare('select language_id,iso639_2 from language');
 960+ $getlangs->execute();
 961+ while($langrow=$getlangs->fetchrow_hashref()) {
 962+ $la_iso{$langrow->{iso639_2}}=$langrow->{language_id};
 963+ }
 964+ return %la_iso;
 965+}
 966+
 967+return(1);
\ No newline at end of file
Index: trunk/extensions/Wikidata/README
@@ -0,0 +1 @@
 2+This directory is for Wikidata-related extensions, tools and applications. Wikidata itself is intended to become part of the MediaWiki core.
\ No newline at end of file

Status & tagging log