Index: trunk/extensions/Wikidata/WP/ProgressBar.php |
— | — | @@ -1,7 +1,7 @@ |
2 | 2 | <?php
|
3 | 3 |
|
4 | 4 | global
|
5 | | - $beginTime;
|
| 5 | + $beginTime, $progressBarMaximum, $progressBarCurrent, $lastTimeDisplayed, $lastProgressDisplayed, $progressBarRefreshRate;
|
6 | 6 |
|
7 | 7 | function durationToString($seconds) {
|
8 | 8 | $hours = floor($seconds / 3600);
|
— | — | @@ -12,26 +12,57 @@ |
13 | 13 | return str_pad($hours, 2, "0", STR_PAD_LEFT) .":". str_pad($minutes, 2, "0", STR_PAD_LEFT) .":". str_pad($seconds, 2, "0", STR_PAD_LEFT);
|
14 | 14 | }
|
15 | 15 |
|
16 | | -function progressBar($current, $maximum) {
|
| 16 | +function initializeProgressBar($maximum, $refreshRate) {
|
| 17 | + global
|
| 18 | + $progressBarMaximum, $progressBarCurrent, $lastProgressDisplayed, $progressBarRefreshRate;
|
| 19 | +
|
| 20 | + $progressBarMaximum = $maximum;
|
| 21 | + $progressBarCurrent = 0;
|
| 22 | + $lastProgressDisplayed = 0;
|
| 23 | + $progressBarRefreshRate = $refreshRate;
|
| 24 | +
|
| 25 | + displayProgressBar();
|
| 26 | +}
|
| 27 | +
|
| 28 | +function displayProgressBar() {
|
17 | 29 | global
|
18 | | - $beginTime;
|
| 30 | + $beginTime, $progressBarMaximum, $progressBarCurrent, $lastTimeDisplayed, $lastProgressDisplayed, $progressBarRefreshRate;
|
19 | 31 |
|
20 | | - $timeElapsed = time() - $beginTime;
|
21 | | - $barWidth = 45;
|
22 | | -
|
23 | | - if ($maximum > 0) {
|
24 | | - $percentage = floor(100 * $current / $maximum);
|
25 | | - $barFull = floor($barWidth * $current / $maximum);
|
26 | | - }
|
27 | | - else {
|
28 | | - $percentage = 100;
|
29 | | - $barFull = $barWidth;
|
| 32 | + if ($progressBarCurrent == 0 || $progressBarCurrent >= $lastProgressDisplayed + $progressBarRefreshRate) {
|
| 33 | + $lastProgressDisplayed = $progressBarCurrent;
|
| 34 | + $timeElapsed = time() - $beginTime;
|
| 35 | + $barWidth = 45;
|
| 36 | +
|
| 37 | + if ($progressBarMaximum > 0) {
|
| 38 | + $percentage = floor(100 * $progressBarCurrent / $progressBarMaximum);
|
| 39 | + $barFull = floor($barWidth * $progressBarCurrent / $progressBarMaximum);
|
| 40 | + }
|
| 41 | + else {
|
| 42 | + $percentage = 100;
|
| 43 | + $barFull = $barWidth;
|
| 44 | + }
|
| 45 | +
|
| 46 | + echo "\r " . str_pad($percentage, 3, " ", STR_PAD_LEFT) . "% of $progressBarMaximum [". str_repeat("=", $barFull) . str_repeat(" ", $barWidth - $barFull) .
|
| 47 | + "] " . durationToString($timeElapsed);
|
30 | 48 | }
|
31 | | -
|
32 | | - echo "\r " . str_pad($percentage, 3, " ", STR_PAD_LEFT) . "% of $maximum [". str_repeat("=", $barFull) . str_repeat(" ", $barWidth - $barFull) .
|
33 | | - "] " . durationToString($timeElapsed);
|
34 | 49 | }
|
35 | 50 |
|
| 51 | +function advanceProgressBar($amount) {
|
| 52 | + global
|
| 53 | + $progressBarCurrent;
|
| 54 | +
|
| 55 | + $progressBarCurrent += $amount;
|
| 56 | + displayProgressBar();
|
| 57 | +}
|
| 58 | +
|
| 59 | +function setProgressBarPosition($position) {
|
| 60 | + global
|
| 61 | + $progressBarCurrent;
|
| 62 | +
|
| 63 | + $progressBarCurrent = $position;
|
| 64 | + displayProgressBar();
|
| 65 | +}
|
| 66 | +
|
36 | 67 | function clearProgressBar() {
|
37 | 68 | echo "\r" . str_repeat(" ", 79) . "\r";
|
38 | 69 | }
|
Index: trunk/extensions/Wikidata/WP/UMLSImport.php |
— | — | @@ -3,9 +3,6 @@ |
4 | 4 | require_once("../../../LocalSettings.php"); |
5 | 5 | require_once("../WiktionaryZ/Expression.php"); |
6 | 6 | |
7 | | -// Uncomment following line for versioning support |
8 | | -require_once("../WiktionaryZ/Transaction.php"); |
9 | | - |
10 | 7 | require_once("ProgressBar.php"); |
11 | 8 | require_once("Setup.php"); |
12 | 9 | |
— | — | @@ -14,14 +11,14 @@ |
15 | 12 | public $sourceAbbreviations = array(); |
16 | 13 | } |
17 | 14 | |
18 | | -function importUMLSFromDatabase($server, $databaseName, $userName, $password) { |
| 15 | +/* |
| 16 | + * Import UMLS entirely. Be sure to have started a transaction first! |
| 17 | + */ |
| 18 | +function importUMLSFromDatabase($server, $databaseName, $userName, $password, $sources = null) { |
19 | 19 | $result = new UMLSImportResult; |
20 | 20 | |
21 | 21 | openDatabase($server, $databaseName, $userName, $password); |
22 | 22 | |
23 | | - // Uncomment following line for versioning support |
24 | | - startNewTransaction(0, 0, "UMLS Import"); |
25 | | - |
26 | 23 | $languageId = 85; |
27 | 24 | echo "Creating UMLS collections\n"; |
28 | 25 | $umlsCollectionId = bootstrapCollection("UMLS", $languageId, ""); |
— | — | @@ -37,7 +34,7 @@ |
38 | 35 | addDefinedMeaningToCollection(getCollectionMeaningId($semanticNetworkRelationTypesCollectionId), $umlsCollectionId, "RL"); |
39 | 36 | |
40 | 37 | echo "Loading source abbreviations\n"; |
41 | | - $sourceAbbreviations = loadSourceAbbreviations(); |
| 38 | + $sourceAbbreviations = loadSourceAbbreviations($sources); |
42 | 39 | |
43 | 40 | echo "Loading languages\n"; |
44 | 41 | $isoLanguages = loadIsoLanguages(); |
— | — | @@ -45,15 +42,21 @@ |
46 | 43 | echo "Importing UMLS terms per source\n"; |
47 | 44 | $i = 1; |
48 | 45 | foreach ($sourceAbbreviations as $sab => $source) { |
49 | | -// if($sab == "ICPC" || $sab == "SRC" || $sab == "GO" || $sab == "NCI") { |
50 | | - $collectionId = bootstrapCollection($source, $languageId, ""); |
51 | | - $result->sourceAbbreviations[$sab] = $collectionId; |
52 | | - clearProgressBar(); |
53 | | - importUMLSTerms($i, $source, $sab, $umlsCollectionId, $collectionId, $languageId, $isoLanguages); |
54 | | - $i++; |
55 | | -// } |
| 46 | + $collectionId = bootstrapCollection($source, $languageId, ""); |
| 47 | + $result->sourceAbbreviations[$sab] = $collectionId; |
| 48 | + echo " $i: $sab - $source\n"; |
| 49 | + importUMLSTerms($sab, $umlsCollectionId, $collectionId, $languageId, $isoLanguages); |
| 50 | + $i++; |
56 | 51 | } |
57 | 52 | |
| 53 | + echo "Importing UMLS definitions per source\n"; |
| 54 | + $i = 1; |
| 55 | + foreach ($sourceAbbreviations as $sab => $source) { |
| 56 | + echo " $i: $sab - $source\n"; |
| 57 | + importUMLSDefinitions($sab, $umlsCollectionId, $result->sourceAbbreviations[$sab], $languageId); |
| 58 | + $i++; |
| 59 | + } |
| 60 | + |
58 | 61 | echo "Importing UMLS relation types\n"; |
59 | 62 | importUMLSRelationTypes($relationCollectionId, $languageId); |
60 | 63 | |
— | — | @@ -63,17 +66,17 @@ |
64 | 67 | echo "Importing UMLS relations per source\n"; |
65 | 68 | $relationCollection = getCollectionContents($relationCollectionId); |
66 | 69 | $relationAttributesCollection = getCollectionContents($relationAttributesCollectionId); |
67 | | - $i = 0; |
| 70 | + $i = 1; |
68 | 71 | |
69 | 72 | foreach ($sourceAbbreviations as $sab => $source) { |
70 | | -// if($sab == "ICPC" || $sab == "GO" || $sab == "NCI") { |
71 | | - echo " $i: $source\n"; |
72 | | - $query = "select cui1, cui2, rel from MRREL where sab like '$sab'"; |
73 | | - importUMLSRelations($umlsCollectionId , $relationCollection, $query); |
74 | | - $query = "select cui1, cui2, rela from MRREL where sab like '$sab' and rela!=''"; |
75 | | - importUMLSRelations($umlsCollectionId , $relationAttributesCollection, $query); |
76 | | - $i++; |
77 | | -// } |
| 73 | + echo " $i: $sab - $source\n"; |
| 74 | + |
| 75 | + $query = "select cui1, cui2, rel from MRREL where sab like '$sab'"; |
| 76 | + importUMLSRelations($umlsCollectionId , $relationCollection, $query); |
| 77 | + |
| 78 | + $query = "select cui1, cui2, rela from MRREL where sab like '$sab' and rela!=''"; |
| 79 | + importUMLSRelations($umlsCollectionId , $relationAttributesCollection, $query); |
| 80 | + $i++; |
78 | 81 | } |
79 | 82 | |
80 | 83 | echo "Importing semantic network types\n"; |
— | — | @@ -88,10 +91,8 @@ |
89 | 92 | $attributeTypes = getCollectionContents($semanticNetworkSemanticTypesCollectionId); |
90 | 93 | $i = 1; |
91 | 94 | foreach ($sourceAbbreviations as $sab => $source) { |
92 | | -// if($sab == "ICPC" || $sab == "GO" || $sab == "NCI") { |
93 | | - echo " " . $i++ . ": $source\n"; |
94 | | - importUMLSSemanticTypes($sab, $umlsCollectionId, $attributeTypes); |
95 | | -// } |
| 95 | + echo " " . $i++ . ": $sab - $source\n"; |
| 96 | + importUMLSSemanticTypes($sab, $umlsCollectionId, $attributeTypes); |
96 | 97 | } |
97 | 98 | |
98 | 99 | return $result; |
— | — | @@ -119,33 +120,44 @@ |
120 | 121 | return $languages; |
121 | 122 | } |
122 | 123 | |
123 | | -function loadSourceAbbreviations() { |
| 124 | +function loadSourceAbbreviations($sources = null) { |
124 | 125 | global |
125 | 126 | $db; |
126 | 127 | |
127 | 128 | $sourceAbbreviations = array(); |
128 | 129 | $queryResult = mysql_query("select RSAB, SON from MRSAB", $db); |
129 | 130 | |
130 | | - while ($sab = mysql_fetch_object($queryResult)) { |
131 | | - $sourceAbbreviations[$sab->RSAB] = $sab->SON; |
132 | | - } |
| 131 | + while ($sab = mysql_fetch_object($queryResult)) |
| 132 | + if ($sources == null || in_array($sab->RSAB, $sources)) |
| 133 | + $sourceAbbreviations[$sab->RSAB] = str_replace('_', '-', $sab->SON); |
133 | 134 | |
134 | 135 | mysql_free_result($queryResult); |
135 | 136 | |
136 | 137 | return $sourceAbbreviations; |
137 | 138 | } |
138 | 139 | |
139 | | -function importUMLSTerms($index, $name, $sab, $umlsCollectionId, $sourceCollectionId, $languageId, $isoLanguages) { |
| 140 | +function getSourceName($sourceAbbreviation) { |
140 | 141 | global |
141 | 142 | $db; |
142 | 143 | |
143 | | - $queryResult = mysql_query("select str, cui, lat, code from MRCONSO where sab like '$sab'", $db); |
144 | | - $rowCount = mysql_num_rows($queryResult); |
| 144 | + $sourceAbbreviations = array(); |
| 145 | + $queryResult = mysql_query("select SON from MRSAB WHERE RSAB='$source'", $db); |
145 | 146 | |
146 | | - echo " $index: $name ($rowCount)\n"; |
147 | | - $i = 0; |
148 | | - progressBar(0, $rowCount); |
| 147 | + $sab = mysql_fetch_object($queryResult); |
| 148 | + $result = $sab->SON; |
| 149 | + |
| 150 | + mysql_free_result($queryResult); |
| 151 | + |
| 152 | + return $result; |
| 153 | +} |
149 | 154 | |
| 155 | +function importUMLSTerms($sab, $umlsCollectionId, $sourceCollectionId, $languageId, $isoLanguages) { |
| 156 | + global |
| 157 | + $db; |
| 158 | + |
| 159 | + $queryResult = mysql_query("select str, cui, lat, code from MRCONSO where sab like '$sab'", $db); |
| 160 | + initializeProgressBar(mysql_num_rows($queryResult), 100); |
| 161 | + |
150 | 162 | $collectionMeaningId = getCollectionMeaningId($sourceCollectionId); |
151 | 163 | |
152 | 164 | while ($umlsTerm = mysql_fetch_object($queryResult)) { |
— | — | @@ -157,26 +169,51 @@ |
158 | 170 | addDefinedMeaningToCollection($definedMeaningId, $umlsCollectionId, $umlsTerm->cui); |
159 | 171 | } |
160 | 172 | $expression->assureIsBoundToDefinedMeaning($definedMeaningId, true); |
161 | | - $definitionQueryResult = mysql_query("select def, sab from MRDEF where sab = '$sab' and cui='$umlsTerm->cui'", $db); |
162 | | - if($definition = mysql_fetch_object($definitionQueryResult)) { |
163 | | - if(!getDefinedMeaningDefinitionId($definedMeaningId)) { |
| 173 | +// $definitionQueryResult = mysql_query("select def, sab from MRDEF where sab = '$sab' and cui='$umlsTerm->cui'", $db); |
| 174 | +// if($definition = mysql_fetch_object($definitionQueryResult)) { |
| 175 | +// if(!getDefinedMeaningDefinitionId($definedMeaningId)) { |
| 176 | +// addDefinedMeaningDefiningDefinition($definedMeaningId, $languageId, $definition->def); |
| 177 | +// } |
| 178 | +// addDefinedMeaningAlternativeDefinition($definedMeaningId, $languageId, $definition->def, $collectionMeaningId); |
| 179 | +// |
| 180 | +// while ($definition = mysql_fetch_object($definitionQueryResult)) { |
| 181 | +// addDefinedMeaningAlternativeDefinition($definedMeaningId, $languageId, $definition->def, $collectionMeaningId); |
| 182 | +// } |
| 183 | +// } |
| 184 | +// mysql_free_result($definitionQueryResult); |
| 185 | + |
| 186 | + addDefinedMeaningToCollectionIfNotPresent($definedMeaningId, $sourceCollectionId, $umlsTerm->code); |
| 187 | + advanceProgressBar(1); |
| 188 | + } |
| 189 | + |
| 190 | + mysql_free_result($queryResult); |
| 191 | + clearProgressBar(); |
| 192 | +} |
| 193 | + |
| 194 | +function importUMLSDefinitions($sab, $umlsCollectionId, $sourceCollectionId, $languageId) { |
| 195 | + global |
| 196 | + $db; |
| 197 | + |
| 198 | + $queryResult = mysql_query("select def, cui from MRDEF where sab = '$sab'", $db); |
| 199 | + initializeProgressBar(mysql_num_rows($queryResult), 100); |
| 200 | + |
| 201 | + $collectionMeaningId = getCollectionMeaningId($sourceCollectionId); |
| 202 | + |
| 203 | + while ($definition = mysql_fetch_object($queryResult)) { |
| 204 | + $definedMeaningId = getDefinedMeaningFromCollection($umlsCollectionId, $definition->cui); |
| 205 | + |
| 206 | + if($definedMeaningId) { |
| 207 | + if(!getDefinedMeaningDefinitionId($definedMeaningId)) |
164 | 208 | addDefinedMeaningDefiningDefinition($definedMeaningId, $languageId, $definition->def); |
165 | | - } |
| 209 | + |
166 | 210 | addDefinedMeaningAlternativeDefinition($definedMeaningId, $languageId, $definition->def, $collectionMeaningId); |
167 | | - |
168 | | - while ($definition = mysql_fetch_object($definitionQueryResult)) { |
169 | | - addDefinedMeaningAlternativeDefinition($definedMeaningId, $languageId, $definition->def, $collectionMeaningId); |
170 | | - } |
171 | 211 | } |
172 | | - mysql_free_result($definitionQueryResult); |
173 | | - |
174 | | - addDefinedMeaningToCollectionIfNotPresent($definedMeaningId, $sourceCollectionId, $umlsTerm->code); |
175 | | - $i++; |
176 | 212 | |
177 | | - if ($i % 50 == 0) |
178 | | - progressBar($i, $rowCount); |
| 213 | + advanceProgressBar(1); |
179 | 214 | } |
180 | | - mysql_free_result($queryResult); |
| 215 | + |
| 216 | + mysql_free_result($queryResult); |
| 217 | + clearProgressBar(); |
181 | 218 | } |
182 | 219 | |
183 | 220 | function importUMLSRelationTypes($relationCollectionId, $languageId) { |
— | — | @@ -220,6 +257,8 @@ |
221 | 258 | $db; |
222 | 259 | |
223 | 260 | $queryResult = mysql_query($query, $db); |
| 261 | + initializeProgressBar(mysql_num_rows($queryResult), 100); |
| 262 | + |
224 | 263 | while ($relation = mysql_fetch_row($queryResult)) { |
225 | 264 | $relationType = $relation[2]; |
226 | 265 | if(strcmp($relationType, 'CHD') == 0) { |
— | — | @@ -245,8 +284,11 @@ |
246 | 285 | print_r($relationCollectionContents); |
247 | 286 | print_r($relation); |
248 | 287 | } |
249 | | - addRelation($definedMeaningId2, $relationMeaningId, $definedMeaningId1); |
| 288 | + addRelation($definedMeaningId2, $relationMeaningId, $definedMeaningId1); |
| 289 | + advanceProgressBar(1); |
250 | 290 | } |
| 291 | + |
| 292 | + clearProgressBar(); |
251 | 293 | } |
252 | 294 | |
253 | 295 | function importSNTypes($collectionId, $query, $languageId) { |
— | — | @@ -303,6 +345,9 @@ |
304 | 346 | |
305 | 347 | $query = "SELECT MRSTY.CUI, MRSTY.STY FROM MRCONSO,MRSTY where MRCONSO.SAB like '$sab' and MRCONSO.CUI=MRSTY.CUI"; |
306 | 348 | $queryResult = mysql_query($query, $db); |
| 349 | + |
| 350 | + initializeProgressBar(mysql_num_rows($queryResult), 100); |
| 351 | + |
307 | 352 | while ($attribute = mysql_fetch_object($queryResult)) { |
308 | 353 | $definedMeaningId = getDefinedMeaningFromCollection($collectionId, $attribute->CUI); |
309 | 354 | $attributeMeaningId = $attributeTypes[$attribute->STY]; |
— | — | @@ -315,8 +360,11 @@ |
316 | 361 | echo "Unknown attribute $attribute->STY\n"; |
317 | 362 | print_r($attribute); |
318 | 363 | } |
319 | | - addRelation($definedMeaningId, 0, $attributeMeaningId); |
320 | | - } |
| 364 | + addClassMembership($definedMeaningId, $attributeMeaningId); |
| 365 | + advanceProgressBar(1); |
| 366 | + } |
| 367 | + |
| 368 | + clearProgressBar(); |
321 | 369 | } |
322 | 370 | |
323 | 371 | ?> |
Index: trunk/extensions/Wikidata/WP/XMLImport.php |
— | — | @@ -1,7 +1,5 @@ |
2 | 2 | <?php
|
3 | 3 |
|
4 | | -require_once("ProgressBar.php");
|
5 | | -
|
6 | 4 | $depth = array();
|
7 | 5 | $specificXMLParser;
|
8 | 6 |
|
Index: trunk/extensions/Wikidata/WP/2GoMappingImport.php |
— | — | @@ -1,6 +1,6 @@ |
2 | 2 | <?php |
3 | 3 | |
4 | | -function importEC2GoMapping($fileName) { |
| 4 | +function loadEC2GoMapping($fileName) { |
5 | 5 | $mapping=array(); |
6 | 6 | $fileHandle = fopen($fileName, "r"); |
7 | 7 | |
— | — | @@ -31,7 +31,7 @@ |
32 | 32 | return $mapping; |
33 | 33 | } |
34 | 34 | |
35 | | -function importSwissProtKeyWord2GoMapping($fileName) { |
| 35 | +function loadSwissProtKeyWord2GoMapping($fileName) { |
36 | 36 | $mapping=array(); |
37 | 37 | $fileHandle = fopen($fileName, "r"); |
38 | 38 | |
Index: trunk/extensions/Wikidata/WP/DataImport.php |
— | — | @@ -2,115 +2,71 @@ |
3 | 3 | |
4 | 4 | define('MEDIAWIKI', true ); |
5 | 5 | require_once("../../../LocalSettings.php"); |
6 | | -require_once("../WiktionaryZ/Expression.php"); |
7 | 6 | require_once("Setup.php"); |
| 7 | +require_once("../WiktionaryZ/Expression.php"); |
| 8 | +require_once("../WiktionaryZ/Transaction.php"); |
8 | 9 | require_once('SwissProtImport.php'); |
9 | 10 | require_once('XMLImport.php'); |
10 | 11 | require_once('2GoMappingImport.php'); |
11 | | -require_once("ProgressBar.php"); |
12 | 12 | require_once("UMLSImport.php"); |
13 | 13 | |
14 | | -// Uncomment following line for versioning support |
15 | | -require_once("../WiktionaryZ/Transaction.php"); |
16 | | - |
17 | 14 | ob_end_flush(); |
18 | 15 | |
19 | 16 | global |
20 | | - $beginTime, $wgCommandLineMode, $numberOfBytes; |
| 17 | + $beginTime, $wgCommandLineMode, $wgUser, $numberOfBytes; |
21 | 18 | |
22 | 19 | $beginTime = time(); |
23 | 20 | $wgCommandLineMode = true; |
24 | 21 | |
| 22 | +/* |
| 23 | + * User IDs to use during the import of both UMLS and Swiss-Prot |
| 24 | + */ |
| 25 | +$nlmUserID = 8; |
| 26 | +$sibUserID = 10; |
| 27 | + |
25 | 28 | $linkEC2GoFileName = "LinksEC2Go.txt"; |
26 | 29 | $linkSwissProtKeyWord2GoFileName = "LinksSP2Go.txt"; |
27 | 30 | $swissProtXMLFileName = "uniprot_sprot.xml"; |
28 | 31 | //$swissProtXMLFileName = "100000lines.xml"; |
29 | 32 | |
30 | | -$umlsImport = importUMLSFromDatabase("localhost", "umls", "root", ""); |
31 | | -$EC2GoMapping = importEC2GoMapping($linkEC2GoFileName); |
32 | | -$SP2GoMapping = importSwissProtKeyWord2GoMapping($linkSwissProtKeyWord2GoFileName); |
| 33 | +$wgUser->setID($nlmUserID); |
| 34 | +startNewTransaction($nlmUserID, 0, "UMLS Import"); |
| 35 | +echo "Importing UMLS\n"; |
| 36 | +$umlsImport = importUMLSFromDatabase("localhost", "umls", "root", "nicheGod");//, array("NCI", "GO")); |
33 | 37 | |
| 38 | +$EC2GoMapping = loadEC2GoMapping($linkEC2GoFileName); |
| 39 | +$SP2GoMapping = loadSwissProtKeyWord2GoMapping($linkSwissProtKeyWord2GoFileName); |
| 40 | + |
| 41 | +$wgUser->setID($sibUserID); |
| 42 | +startNewTransaction($sibUserID, 0, "Swiss-Prot Import"); |
| 43 | +echo "\nImporting Swiss-Prot\n"; |
| 44 | +//importSwissProt($swissProtXMLFileName); |
34 | 45 | importSwissProt($swissProtXMLFileName, $umlsImport->umlsCollectionId, $umlsImport->sourceAbbreviations['GO'], $EC2GoMapping, $SP2GoMapping); |
35 | 46 | //importSwissProt($swissProtXMLFileName, 18, 25, $EC2GoMapping, $SP2GoMapping); |
36 | 47 | |
37 | 48 | $endTime = time(); |
38 | | -echo "\nTime elapsed: " . durationToString($endTime - $beginTime); |
| 49 | +echo "\n\nTime elapsed: " . durationToString($endTime - $beginTime); |
39 | 50 | |
40 | | -function echoNofLines($fileHandle, $numberOfLines) { |
41 | | - $i = 0; |
42 | | - do { |
43 | | - $buffer = fgets($fileHandle); |
44 | | - $buffer = rtrim($buffer,"\n"); |
45 | | - echo $buffer; |
46 | | - $i += 1; |
47 | | - } while($i < $numberOfLines || strpos($buffer, '</entry>') === false); |
48 | | - echo "</uniprot>"; |
49 | | -} |
| 51 | +//function echoNofLines($fileHandle, $numberOfLines) { |
| 52 | +// $i = 0; |
| 53 | +// do { |
| 54 | +// $buffer = fgets($fileHandle); |
| 55 | +// $buffer = rtrim($buffer,"\n"); |
| 56 | +// echo $buffer; |
| 57 | +// $i += 1; |
| 58 | +// } while($i < $numberOfLines || strpos($buffer, '</entry>') === false); |
| 59 | +// echo "</uniprot>"; |
| 60 | +//} |
| 61 | +// |
| 62 | +//function echoLinesUntilText($fileHandle, $text) { |
| 63 | +// $found = false; |
| 64 | +// do { |
| 65 | +// $buffer = fgets($fileHandle); |
| 66 | +// $buffer = rtrim($buffer,"\n"); |
| 67 | +// echo $buffer; |
| 68 | +// $found = strpos($buffer, $text) !== false; |
| 69 | +// } while(!$found || strpos($buffer, '</entry>') === false); |
| 70 | +// echo "</uniprot>"; |
| 71 | +//} |
50 | 72 | |
51 | | -function echoLinesUntilText($fileHandle, $text) { |
52 | | - $found = false; |
53 | | - do { |
54 | | - $buffer = fgets($fileHandle); |
55 | | - $buffer = rtrim($buffer,"\n"); |
56 | | - echo $buffer; |
57 | | - $found = strpos($buffer, $text) !== false; |
58 | | - } while(!$found || strpos($buffer, '</entry>') === false); |
59 | | - echo "</uniprot>"; |
60 | | -} |
61 | | - |
62 | | -function importSwissProtEntries($fileHandle) { |
63 | | -// $selectLanguageId = 'SELECT language_id FROM language_names WHERE language_name ="English"'; |
64 | | -// $dbr =& wfGetDB(DB_MASTER); |
65 | | -// $queryResult = $dbr->query($selectLanguageId); |
66 | | -// |
67 | | -// if ($languageIdObject = $dbr->fetchObject($queryResult)){ |
68 | | -// $languageId = $languageIdObject->language_id; |
69 | | -// } |
70 | | - |
71 | | - $languageId = 85; |
72 | | - $collectionId = bootstrapCollection("Swiss-Prot", $languageId); |
73 | | - |
74 | | -// while (!feof($fileHandle)) { |
75 | | - for ($i = 1; $i <= 1000; $i++) { |
76 | | - $entry = new SwissProtImportEntry; |
77 | | - $entry->import($fileHandle); |
78 | | - $entry->echoEntry(); |
79 | | - $identifier = $entry->getIdentifier(); |
80 | | - |
81 | | - $descriptionAttribute = $entry->getDescriptionAttribute(); |
82 | | - print_r($descriptionAttribute); |
83 | | - $expression = findExpression($descriptionAttribute->protein->name, $languageId); |
84 | | - if (!$expression) { |
85 | | - $expression = createExpression($descriptionAttribute->protein->name, $languageId); |
86 | | - $definedMeaningId = createNewDefinedMeaning($expression->id, $languageId, $descriptionAttribute->protein->name); |
87 | | - |
88 | | - addDefinedMeaningToCollection($definedMeaningId, $collectionId, $descriptionAttribute->protein->name); |
89 | | - } |
90 | | - } |
91 | | -} |
92 | | - |
93 | | -function getPrefixAnalysis($fileHandle){ |
94 | | - $prefixArray=array(); |
95 | | - |
96 | | - while (!feof($fileHandle)) { |
97 | | - $buffer = fgets($fileHandle); |
98 | | - $buffer = rtrim($buffer,"\n"); |
99 | | - $currentPrefix = substr($buffer, 0, 2); |
100 | | - |
101 | | - if ($currentPrefix != ""){ |
102 | | - if (!array_key_exists($currentPrefix, $prefixArray)) { |
103 | | - $prefixArray[$currentPrefix]=1; |
104 | | - } |
105 | | - else { |
106 | | - $prefixArray[$currentPrefix]+=1; |
107 | | - } |
108 | | - } |
109 | | - } |
110 | | - |
111 | | - echo "Number of prefixes: " . count($prefixArray) . "\n"; |
112 | | - foreach ($prefixArray as $prefix => $value) { |
113 | | - echo $prefix . ": $value\n"; |
114 | | - } |
115 | | -} |
116 | | - |
117 | 73 | ?> |
Index: trunk/extensions/Wikidata/WP/SwissProtImport.php |
— | — | @@ -1,53 +1,45 @@ |
2 | 2 | <?php |
3 | 3 | |
4 | 4 | require_once('XMLImport.php'); |
| 5 | +//require_once('ProgressBar.php'); |
| 6 | +//require_once('..\WiktionaryZ\Expression.php'); |
5 | 7 | |
6 | | -function importSwissProt($xmlFileName, $umlsCollectionId, $goCollectionId, $EC2GoMapping, $keyword2GoMapping) { |
7 | | - // Uncomment following line for versioning support |
8 | | - startNewTransaction(10, 0, "Swiss-Prot Import"); |
9 | | - |
10 | | - // Find some UMLS concepts for cross references from SwissProt: |
11 | | - $umlsTerms = array(); |
12 | | - $umlsTerms["protein"] = getCollectionMemberId($umlsCollectionId, "C0033684"); |
13 | | - $umlsTerms["gene"] = getCollectionMemberId($umlsCollectionId, "C0017337"); |
14 | | - $umlsTerms["organism"] = getCollectionMemberId($umlsCollectionId, "C0029235"); |
15 | | - $umlsTerms["protein fragment"] = getCollectionMemberId($umlsCollectionId, "C1335533"); |
16 | | - |
| 8 | +/* |
| 9 | + * Import Swiss-Prot from the XML file. Be sure to have started a transaction first! |
| 10 | + */ |
| 11 | +function importSwissProt($xmlFileName, $umlsCollectionId = 0, $goCollectionId = 0, $EC2GoMapping = array(), $keyword2GoMapping = array()) { |
17 | 12 | // Create mappings from EC numbers and SwissProt keywords to GO term meaning id's: |
18 | | - $goCollection = getCollectionContents($goCollectionId); |
19 | | - |
20 | 13 | $EC2GoMeaningId = array(); |
21 | | - foreach ($EC2GoMapping as $EC => $GO) { |
22 | | - if (array_key_exists($GO, $goCollection)) { |
23 | | - $goMeaningId = $goCollection[$GO]; |
24 | | - $EC2GoMeaningId[$EC] = $goMeaningId; |
25 | | - } |
26 | | - } |
| 14 | + $keyword2GoMeaningId = array(); |
27 | 15 | |
28 | | - $keyword2GoMeaningId = array(); |
29 | | - foreach ($keyword2GoMapping as $keyword => $GO) { |
30 | | - if (array_key_exists($GO, $goCollection)) { |
31 | | - $goMeaningId = $goCollection[$GO]; |
32 | | - $keyword2GoMeaningId[$keyword] = $goMeaningId; |
| 16 | + if ($goCollectionId != 0) { |
| 17 | + $goCollection = getCollectionContents($goCollectionId); |
| 18 | + |
| 19 | + foreach ($EC2GoMapping as $EC => $GO) { |
| 20 | + if (array_key_exists($GO, $goCollection)) { |
| 21 | + $goMeaningId = $goCollection[$GO]; |
| 22 | + $EC2GoMeaningId[$EC] = $goMeaningId; |
| 23 | + } |
33 | 24 | } |
| 25 | + |
| 26 | + foreach ($keyword2GoMapping as $keyword => $GO) { |
| 27 | + if (array_key_exists($GO, $goCollection)) { |
| 28 | + $goMeaningId = $goCollection[$GO]; |
| 29 | + $keyword2GoMeaningId[$keyword] = $goMeaningId; |
| 30 | + } |
| 31 | + } |
34 | 32 | } |
35 | 33 | |
36 | 34 | // SwissProt import: |
37 | 35 | $numberOfBytes = filesize($xmlFileName); |
38 | | - progressBar(0, $numberOfBytes); |
| 36 | + initializeProgressBar($numberOfBytes, 5000000); |
39 | 37 | $fileHandle = fopen($xmlFileName, "r"); |
40 | | - importEntriesFromXMLFile($fileHandle, $umlsTerms, $EC2GoMeaningId, $keyword2GoMeaningId); |
| 38 | + importEntriesFromXMLFile($fileHandle, $umlsCollectionId, $EC2GoMeaningId, $keyword2GoMeaningId); |
41 | 39 | |
42 | 40 | fclose($fileHandle); |
43 | 41 | } |
44 | 42 | |
45 | | -function importEntriesFromXMLFile($fileHandle, $umlsTerms, $EC2GoMeaningId, $keyword2GoMeaningId) { |
46 | | -// $selectLanguageId = 'SELECT language_id FROM language_names WHERE language_name ="English"'; |
47 | | -// $queryResult = $dbr->query($selectLanguageId); |
48 | | -// if ($languageIdObject = $dbr->fetchObject($queryResult)){ |
49 | | -// $languageId = $languageIdObject->language_id; |
50 | | -// } |
51 | | - |
| 43 | +function importEntriesFromXMLFile($fileHandle, $umlsCollectionId, $EC2GoMeaningIdMapping, $keyword2GoMeaningIdMapping) { |
52 | 44 | $languageId = 85; |
53 | 45 | $collectionId = bootstrapCollection("Swiss-Prot", $languageId, ""); |
54 | 46 | $classCollectionId = bootstrapCollection("Swiss-Prot classes", $languageId, "ATTR"); |
— | — | @@ -62,23 +54,19 @@ |
63 | 55 | $xmlParser->relationTypeCollectionId = $relationTypeCollectionId; |
64 | 56 | $xmlParser->textAttibuteCollectionId = $textAttibuteCollectionId; |
65 | 57 | $xmlParser->ECCollectionId = $ECCollectionId; |
66 | | - $xmlParser->EC2GoMeaningIdMapping = $EC2GoMeaningId; |
67 | | - $xmlParser->keyword2GoMeaningIdMapping = $keyword2GoMeaningId; |
| 58 | + $xmlParser->EC2GoMeaningIdMapping = $EC2GoMeaningIdMapping; |
| 59 | + $xmlParser->keyword2GoMeaningIdMapping = $keyword2GoMeaningIdMapping; |
68 | 60 | |
69 | | - $xmlParser->setUMLSTerms($umlsTerms); |
70 | | -// $xmlParser->classes["protein"] = $umlsTerms["protein"]; |
71 | | -// $xmlParser->addClass("protein"); |
72 | | -// $xmlParser->classes["protein fragment"] = $umlsTerms["protein fragment"]; |
73 | | -// $xmlParser->addClass("protein fragment"); |
74 | | -// $xmlParser->classes["organism"] = $umlsTerms["organism"]; |
75 | | -// $xmlParser->addClass("organism"); |
76 | | - $xmlParser->addClass("organism specific protein"); |
77 | | -// $xmlParser->classes["gene"] = $umlsTerms["gene"]; |
78 | | -// $xmlParser->addClass("gene"); |
79 | | - $xmlParser->addClass("organism specific gene"); |
80 | | - $xmlParser->addClass("text attribute"); |
81 | | - $xmlParser->addClass("enzyme commission number"); |
| 61 | + // Find some UMLS concepts for cross references from SwissProt: |
| 62 | + if ($umlsCollectionId != 0) { |
| 63 | + $xmlParser->proteinConceptId = getCollectionMemberId($umlsCollectionId, "C0033684"); |
| 64 | + $xmlParser->geneConceptId = getCollectionMemberId($umlsCollectionId, "C0017337"); |
| 65 | + $xmlParser->organismConceptId = getCollectionMemberId($umlsCollectionId, "C0029235"); |
| 66 | + $xmlParser->proteinFragmentConceptId = getCollectionMemberId($umlsCollectionId, "C1335533"); |
| 67 | + } |
82 | 68 | |
| 69 | + $xmlParser->initialize(); |
| 70 | + |
83 | 71 | parseXML($fileHandle, $xmlParser); |
84 | 72 | } |
85 | 73 | |
— | — | @@ -93,34 +81,107 @@ |
94 | 82 | public $keyword2GoMeaningIdMapping; |
95 | 83 | public $numberOfEntries = 0; |
96 | 84 | |
97 | | - public $classes = array(); |
98 | | - public $relationTypes = array(); |
99 | 85 | public $proteins = array(); |
100 | 86 | public $species = array(); |
101 | 87 | public $genes = array(); |
102 | 88 | public $attributes = array(); |
103 | 89 | public $ECNumbers = array(); |
104 | 90 | |
105 | | - public function addClass($name) { |
106 | | - if (array_key_exists($name, $this->classes)) { |
107 | | - $definedMeaningId = $this->classes[$name]; |
108 | | - } |
109 | | - else { |
110 | | - $definedMeaningId = $this->addExpressionAsDefinedMeaning($name, $name, $name, $this->classCollectionId); |
111 | | - $this->classes[$name] = $definedMeaningId; |
112 | | - } |
113 | | - return $definedMeaningId; |
| 91 | + public $proteinConceptId = 0; |
| 92 | + public $proteinFragmentConceptId = 0; |
| 93 | + public $organismSpecificProteinConceptId = 0; |
| 94 | + public $organismSpecificGeneId = 0; |
| 95 | + public $geneConceptId = 0; |
| 96 | + public $organismConceptId = 0; |
| 97 | + public $referencedByConceptId = 0; |
| 98 | + public $keywordConceptId = 0; |
| 99 | + public $includesConceptId = 0; |
| 100 | + public $includedInConceptId = 0; |
| 101 | + public $containsConceptId = 0; |
| 102 | + public $containedInConceptId = 0; |
| 103 | + public $textAttributeConceptId = 0; |
| 104 | + public $enzymeCommissionNumberConceptId = 0; |
| 105 | + public $activityConceptId = 0; |
| 106 | + |
| 107 | + protected function bootstrapDefinedMeaning($spelling, $definition) { |
| 108 | + $expression = $this->getOrCreateExpression($spelling); |
| 109 | + $definedMeaningId = createNewDefinedMeaning($expression->id, $this->languageId, $definition); |
| 110 | + |
| 111 | + return $definedMeaningId; |
114 | 112 | } |
115 | 113 | |
116 | | - public function setUMLSTerms($umlsTerms) { |
117 | | - foreach ($umlsTerms as $term => $definedMeaningId) { |
118 | | - $this->classes[$term] = $definedMeaningId; |
119 | | - addDefinedMeaningToCollectionIfNotPresent($definedMeaningId, $this->classCollectionId, $term); |
120 | | - $this->relationTypes[$term] = $definedMeaningId; |
121 | | - addDefinedMeaningToCollectionIfNotPresent($definedMeaningId, $this->relationTypeCollectionId, $term); |
122 | | - } |
| 114 | + protected function bootstrapConceptIds() { |
| 115 | + if ($this->proteinConceptId == 0) |
| 116 | + $this->proteinConceptId = $this->bootstrapDefinedMeaning("protein", "protein"); |
| 117 | + |
| 118 | + if ($this->proteinFragmentConceptId == 0) |
| 119 | + $this->proteinFragmentConceptId = $this->bootstrapDefinedMeaning("protein fragment", "protein fragment"); |
| 120 | + |
| 121 | + if ($this->organismSpecificProteinConceptId == 0) |
| 122 | + $this->organismSpecificProteinConceptId = $this->bootstrapDefinedMeaning("organism specific protein", "organism specific protein"); |
| 123 | + |
| 124 | + if ($this->organismSpecificGeneConceptId == 0) |
| 125 | + $this->organismSpecificGeneConceptId = $this->bootstrapDefinedMeaning("organism specific gene", "organism specific gene"); |
| 126 | + |
| 127 | + if ($this->geneConceptId == 0) |
| 128 | + $this->geneConceptId = $this->bootstrapDefinedMeaning("gene", "gene"); |
| 129 | + |
| 130 | + if ($this->organismConceptId == 0) |
| 131 | + $this->organismConceptId = $this->bootstrapDefinedMeaning("organism", "organism"); |
| 132 | + |
| 133 | + if ($this->referencedByConceptId == 0) |
| 134 | + $this->referencedByConceptId = $this->bootstrapDefinedMeaning("referenced by", "referenced by"); |
| 135 | + |
| 136 | + if ($this->keywordConceptId == 0) |
| 137 | + $this->keywordConceptId = $this->bootstrapDefinedMeaning("keyword", "keyword"); |
| 138 | + |
| 139 | + if ($this->includesConceptId == 0) |
| 140 | + $this->includesConceptId = $this->bootstrapDefinedMeaning("includes", "includes"); |
| 141 | + |
| 142 | + if ($this->includedInConceptId == 0) |
| 143 | + $this->includedInConceptId = $this->bootstrapDefinedMeaning("included in", "included in"); |
| 144 | + |
| 145 | + if ($this->containsConceptId == 0) |
| 146 | + $this->containsConceptId = $this->bootstrapDefinedMeaning("contains", "contains"); |
| 147 | + |
| 148 | + if ($this->containedInConceptId == 0) |
| 149 | + $this->containedInConceptId = $this->bootstrapDefinedMeaning("contained in", "contained in"); |
| 150 | + |
| 151 | + if ($this->enzymeCommissionNumberConceptId == 0) |
| 152 | + $this->enzymeCommissionNumberConceptId = $this->bootstrapDefinedMeaning("enzyme commission number", "organism specific gene"); |
| 153 | + |
| 154 | + if ($this->textAttributeConceptId == 0) |
| 155 | + $this->textAttributeConceptId = $this->bootstrapDefinedMeaning("text attribute", "text attribute"); |
| 156 | + |
| 157 | + if ($this->activityConceptId == 0) |
| 158 | + $this->activityConceptId = $this->bootstrapDefinedMeaning("activity", "activity"); |
123 | 159 | } |
124 | 160 | |
| 161 | + public function initialize() { |
| 162 | + $this->bootstrapConceptIds(); |
| 163 | + |
| 164 | + // Add concepts to classes |
| 165 | + addDefinedMeaningToCollectionIfNotPresent($this->proteinConceptId, $this->classCollectionId, "protein"); |
| 166 | + addDefinedMeaningToCollectionIfNotPresent($this->proteinFragmentConceptId, $this->classCollectionId, "protein fragment"); |
| 167 | + addDefinedMeaningToCollectionIfNotPresent($this->geneConceptId, $this->classCollectionId, "gene"); |
| 168 | + addDefinedMeaningToCollectionIfNotPresent($this->organismConceptId, $this->classCollectionId, "organism"); |
| 169 | + addDefinedMeaningToCollectionIfNotPresent($this->organismSpecificProteinConceptId, $this->classCollectionId, "organism specific protein"); |
| 170 | + addDefinedMeaningToCollectionIfNotPresent($this->textAttributeConceptId, $this->classCollectionId, "text attribute"); |
| 171 | + addDefinedMeaningToCollectionIfNotPresent($this->enzymeCommissionNumberConceptId, $this->classCollectionId, "enzyme commission number"); |
| 172 | + |
| 173 | + // Add concepts to relation types |
| 174 | + addDefinedMeaningToCollectionIfNotPresent($this->proteinConceptId, $this->relationTypeCollectionId, "protein"); |
| 175 | + addDefinedMeaningToCollectionIfNotPresent($this->referencedByConceptId, $this->relationTypeCollectionId, "referenced by"); |
| 176 | + addDefinedMeaningToCollectionIfNotPresent($this->geneConceptId, $this->relationTypeCollectionId, "gene"); |
| 177 | + addDefinedMeaningToCollectionIfNotPresent($this->organismConceptId, $this->relationTypeCollectionId, "organism"); |
| 178 | + addDefinedMeaningToCollectionIfNotPresent($this->activityConceptId, $this->relationTypeCollectionId, "activity"); |
| 179 | + addDefinedMeaningToCollectionIfNotPresent($this->keywordConceptId, $this->relationTypeCollectionId, "keyword"); |
| 180 | + addDefinedMeaningToCollectionIfNotPresent($this->includesConceptId, $this->relationTypeCollectionId, "includes"); |
| 181 | + addDefinedMeaningToCollectionIfNotPresent($this->includedInConceptId, $this->relationTypeCollectionId, "included in"); |
| 182 | + addDefinedMeaningToCollectionIfNotPresent($this->containsConceptId, $this->relationTypeCollectionId, "contains"); |
| 183 | + addDefinedMeaningToCollectionIfNotPresent($this->containedInConceptId, $this->relationTypeCollectionId, "contained in"); |
| 184 | + } |
| 185 | + |
125 | 186 | public function startElement($parser, $name, $attributes) { |
126 | 187 | global |
127 | 188 | $numberOfBytes; |
— | — | @@ -133,10 +194,11 @@ |
134 | 195 | $this->stack[] = $handler; |
135 | 196 | } |
136 | 197 | else { |
137 | | - if (count($this->stack) == 1 && $this->numberOfEntries % 10 == 0) { |
| 198 | + if (count($this->stack) == 1) { |
138 | 199 | $currentByteIndex = xml_get_current_byte_index($parser); |
139 | | - progressBar($currentByteIndex, $numberOfBytes); |
| 200 | + setProgressBarPosition($currentByteIndex); |
140 | 201 | } |
| 202 | + |
141 | 203 | BaseXMLParser::startElement($parser, $name, $attributes); |
142 | 204 | } |
143 | 205 | } |
— | — | @@ -167,9 +229,9 @@ |
168 | 230 | } |
169 | 231 | |
170 | 232 | if($protein->fragment) |
171 | | - addRelation($definedMeaningId, 0, $this->classes["protein fragment"]); |
| 233 | + addClassMembership($definedMeaningId, $this->proteinFragmentConceptId); |
172 | 234 | else |
173 | | - addRelation($definedMeaningId, 0, $this->classes["protein"]); |
| 235 | + addClassMembership($definedMeaningId, $this->proteinConceptId); |
174 | 236 | |
175 | 237 | return $definedMeaningId; |
176 | 238 | } |
— | — | @@ -183,7 +245,7 @@ |
184 | 246 | $this->genes[$name] = $definedMeaningId; |
185 | 247 | } |
186 | 248 | |
187 | | - addRelation($definedMeaningId, 0, $this->classes["gene"]); |
| 249 | + addClassMembership($definedMeaningId, $this->geneConceptId); |
188 | 250 | |
189 | 251 | foreach ($synonyms as $key => $synonym) { |
190 | 252 | addSynonymOrTranslation($synonym, $this->languageId, $definedMeaningId, true); |
— | — | @@ -201,7 +263,7 @@ |
202 | 264 | $this->species[$name] = $definedMeaningId; |
203 | 265 | } |
204 | 266 | |
205 | | - addRelation($definedMeaningId, 0, $this->classes["organism"]); |
| 267 | + addClassMembership($definedMeaningId, $this->organismConceptId); |
206 | 268 | |
207 | 269 | foreach ($translations as $key => $translation) { |
208 | 270 | addSynonymOrTranslation($translation, $this->languageId, $definedMeaningId, true); |
— | — | @@ -211,51 +273,39 @@ |
212 | 274 | } |
213 | 275 | |
214 | 276 | public function addEntry($entry, $proteinMeaningId, $geneMeaningId, $organismSpeciesMeaningId) { |
215 | | - $activityLabel = "activity"; |
216 | | - $proteinLabel = "protein"; |
217 | | - $referencedByLabel = "referenced by"; |
218 | | - $geneLabel = "gene"; |
219 | | - $organismLabel = "organism"; |
220 | | - $includesLabel = "includes"; |
221 | | - $includedInLabel = "included in"; |
222 | | - $containsLabel = "contains"; |
223 | | - $containedInLabel = "contained in"; |
224 | | - $keywordLabel = "keyword"; |
225 | | - |
226 | | - |
227 | | -// change name to make sure it works in wiki-urls: |
| 277 | + // change name to make sure it works in wiki-urls: |
228 | 278 | $swissProtExpression = str_replace('_', '-', $entry->name); |
229 | 279 | $entryExpression = $entry->protein->name . ' in ' . $entry->organism; |
230 | 280 | |
231 | | -// add the expression as defined meaning: |
| 281 | + // add the expression as defined meaning: |
232 | 282 | $expression = $this->getOrCreateExpression($entryExpression); |
233 | 283 | $definedMeaningId = createNewDefinedMeaning($expression->id, $this->languageId, $entryExpression); |
234 | 284 | addDefinedMeaningToCollection($definedMeaningId, $this->collectionId, $entry->accession); |
235 | 285 | |
236 | | -// Add entry synonyms: Swiss-Prot entry name and species specific protein synonyms |
| 286 | + // Add entry synonyms: Swiss-Prot entry name and species specific protein synonyms |
237 | 287 | addSynonymOrTranslation($swissProtExpression, $this->languageId, $definedMeaningId, true); |
238 | 288 | |
239 | 289 | foreach ($entry->protein->synonyms as $key => $synonym) |
240 | 290 | addSynonymOrTranslation($synonym, $this->languageId, $definedMeaningId, true); |
241 | 291 | |
242 | | -// set the class of the entry: |
243 | | - addRelation($definedMeaningId, 0, $this->classes["organism specific protein"]); |
| 292 | + // set the class of the entry: |
| 293 | + addClassMembership($definedMeaningId, $this->organismSpecificProteinConceptId); |
244 | 294 | |
245 | | -// set the protein of the swiss prot entry and relate the protein to the entry: |
246 | | - addRelation($definedMeaningId, $this->getOrCreateRelationTypeMeaningId($proteinLabel), $proteinMeaningId); |
247 | | - addRelation($proteinMeaningId, $this->getOrCreateRelationTypeMeaningId($referencedByLabel), $definedMeaningId); |
| 295 | + // set the protein of the swiss prot entry and relate the protein to the entry: |
| 296 | + addRelation($definedMeaningId, $this->proteinConceptId, $proteinMeaningId); |
| 297 | + addRelation($proteinMeaningId, $this->referencedByConceptId, $definedMeaningId); |
248 | 298 | |
249 | | -// set the gene of the swiss prot entry and relate the gene to the entry: |
| 299 | + // set the gene of the swiss prot entry and relate the gene to the entry: |
250 | 300 | if($geneMeaningId >= 0) { |
251 | | - addRelation($definedMeaningId, $this->getOrCreateRelationTypeMeaningId($geneLabel), $geneMeaningId); |
252 | | - addRelation($geneMeaningId, $this->getOrCreateRelationTypeMeaningId($referencedByLabel), $definedMeaningId); |
| 301 | + addRelation($definedMeaningId, $this->geneConceptId, $geneMeaningId); |
| 302 | + addRelation($geneMeaningId, $this->referencedByConceptId, $definedMeaningId); |
253 | 303 | } |
254 | 304 | |
255 | | -// set the species of the swiss prot entry and relate the species to the entry: |
256 | | - addRelation($definedMeaningId, $this->getOrCreateRelationTypeMeaningId($organismLabel), $organismSpeciesMeaningId); |
257 | | - addRelation($organismSpeciesMeaningId, $this->getOrCreateRelationTypeMeaningId($referencedByLabel), $definedMeaningId); |
| 305 | + // set the species of the swiss prot entry and relate the species to the entry: |
| 306 | + addRelation($definedMeaningId, $this->organismConceptId, $organismSpeciesMeaningId); |
| 307 | + addRelation($organismSpeciesMeaningId, $this->referencedByConceptId, $definedMeaningId); |
258 | 308 | |
259 | | -// add the comment fields as text attributes: |
| 309 | + // add the comment fields as text attributes: |
260 | 310 | foreach ($entry->comments as $key => $comment) { |
261 | 311 | $attributeMeaningId = $this->getOrCreateAttributeMeaningId($comment->type); |
262 | 312 | $textValue = $comment->text; |
— | — | @@ -266,34 +316,34 @@ |
267 | 317 | addDefinedMeaningTextAttributeValue($definedMeaningId, $attributeMeaningId, $this->languageId, $textValue); |
268 | 318 | } |
269 | 319 | |
270 | | -// add EC number: |
| 320 | + // add EC number: |
271 | 321 | if($entry->EC != ""){ |
272 | 322 | $ECNumberMeaningId = $this->getOrCreateECNumberMeaningId($entry->EC); |
273 | | - addRelation($definedMeaningId, $this->getOrCreateRelationTypeMeaningId($activityLabel), $ECNumberMeaningId); |
274 | | - addRelation($ECNumberMeaningId, $this->getOrCreateRelationTypeMeaningId($referencedByLabel), $definedMeaningId); |
| 323 | + addRelation($definedMeaningId, $this->activityConceptId, $ECNumberMeaningId); |
| 324 | + addRelation($ECNumberMeaningId, $this->referencedByConceptId, $definedMeaningId); |
275 | 325 | } |
276 | 326 | |
277 | | -// add keywords: |
| 327 | + // add keywords: |
278 | 328 | foreach ($entry->keywords as $key => $keyword) { |
279 | 329 | if (array_key_exists($keyword, $this->keyword2GoMeaningIdMapping)) { |
280 | 330 | $goMeaningId = $this->keyword2GoMeaningIdMapping[$keyword]; |
281 | | - addRelation($definedMeaningId, $this->getOrCreateRelationTypeMeaningId($keywordLabel), $goMeaningId); |
282 | | - addRelation($goMeaningId, $this->getOrCreateRelationTypeMeaningId($referencedByLabel), $definedMeaningId); |
| 331 | + addRelation($definedMeaningId, $this->keywordConceptId, $goMeaningId); |
| 332 | + addRelation($goMeaningId, $this->referencedByConceptId, $definedMeaningId); |
283 | 333 | } |
284 | 334 | } |
285 | 335 | |
286 | | -// Add protein includes relations |
| 336 | + // Add protein includes relations |
287 | 337 | foreach ($entry->protein->domains as $key => $domain) { |
288 | 338 | $domainMeaningId = $this->addProtein($domain); |
289 | | - addRelation($definedMeaningId, $this->getOrCreateRelationTypeMeaningId($includesLabel), $domainMeaningId); |
290 | | - addRelation($domainMeaningId, $this->getOrCreateRelationTypeMeaningId($includedInLabel), $definedMeaningId); |
| 339 | + addRelation($definedMeaningId, $this->includesConceptId, $domainMeaningId); |
| 340 | + addRelation($domainMeaningId, $this->includedInConceptId, $definedMeaningId); |
291 | 341 | } |
292 | 342 | |
293 | | -// Add protein includes relations |
| 343 | + // Add protein includes relations |
294 | 344 | foreach ($entry->protein->components as $key => $component) { |
295 | 345 | $componentMeaningId = $this->addProtein($component); |
296 | | - addRelation($definedMeaningId, $this->getOrCreateRelationTypeMeaningId($containsLabel), $componentMeaningId); |
297 | | - addRelation($componentMeaningId, $this->getOrCreateRelationTypeMeaningId($containedInLabel), $definedMeaningId); |
| 346 | + addRelation($definedMeaningId, $this->containsConceptId, $componentMeaningId); |
| 347 | + addRelation($componentMeaningId, $this->containedInConceptId, $definedMeaningId); |
298 | 348 | } |
299 | 349 | |
300 | 350 | return $definedMeaningId; |
— | — | @@ -305,7 +355,7 @@ |
306 | 356 | } |
307 | 357 | else { |
308 | 358 | $definedMeaningId = $this->addExpressionAsDefinedMeaning($attribute, $attribute, $attribute, $this->textAttibuteCollectionId); |
309 | | - addRelation($definedMeaningId, 0, $this->classes["text attribute"]); |
| 359 | + addClassMembership($definedMeaningId, $this->textAttributeConceptId); |
310 | 360 | $this->attributes[$attribute] = $definedMeaningId; |
311 | 361 | } |
312 | 362 | return $definedMeaningId; |
— | — | @@ -315,7 +365,7 @@ |
316 | 366 | if (array_key_exists($EC, $this->ECNumbers)) { |
317 | 367 | $definedMeaningId = $this->ECNumbers[$EC]; |
318 | 368 | } |
319 | | - elseif(array_key_exists($EC, $this->EC2GoMeaningIdMapping)) { |
| 369 | + elseif (array_key_exists($EC, $this->EC2GoMeaningIdMapping)) { |
320 | 370 | $definedMeaningId = $this->EC2GoMeaningIdMapping[$EC]; |
321 | 371 | $this->ECNumbers[$EC] = $definedMeaningId; |
322 | 372 | $expression = $this->getOrCreateExpression($EC); |
— | — | @@ -323,7 +373,7 @@ |
324 | 374 | } |
325 | 375 | else { |
326 | 376 | $definedMeaningId = $this->addExpressionAsDefinedMeaning($EC, $EC, $EC, $this->ECCollectionId); |
327 | | - addRelation($definedMeaningId, 0, $this->classes["enzyme commission number"]); |
| 377 | + addClassMembership($definedMeaningId, $this->enzymeCommissionNumberConceptId); |
328 | 378 | $this->ECNumbers[$EC] = $definedMeaningId; |
329 | 379 | } |
330 | 380 | return $definedMeaningId; |
— | — | @@ -343,17 +393,6 @@ |
344 | 394 | addDefinedMeaningToCollection($definedMeaningId, $collectionId, $internalIdentifier); |
345 | 395 | return $definedMeaningId; |
346 | 396 | } |
347 | | - |
348 | | - public function getOrCreateRelationTypeMeaningId($spelling) { |
349 | | - if (array_key_exists($spelling, $this->relationTypes)){ |
350 | | - $relationTypeMeaningId = $this->relationTypes[$spelling]; |
351 | | - } |
352 | | - else { |
353 | | - $relationTypeMeaningId = $this->addExpressionAsDefinedMeaning($spelling, $spelling, $spelling, $this->relationTypeCollectionId); |
354 | | - $this->relationTypes[$spelling] = $relationTypeMeaningId; |
355 | | - } |
356 | | - return $relationTypeMeaningId; |
357 | | - } |
358 | 397 | } |
359 | 398 | |
360 | 399 | class UniProtXMLElementHandler extends DefaultXMLElementHandler { |