Index: trunk/extensions/Wikidata/util/umls/collection.php |
— | — | @@ -0,0 +1,168 @@ |
| 2 | +<?php |
| 3 | +header("Content-type: text/html; charset=UTF-8"); |
| 4 | +$dc="uw"; |
| 5 | + |
| 6 | +define('MEDIAWIKI', true ); |
| 7 | +include_once("../../../../LocalSettings.php"); |
| 8 | +global $wgDBserver, $wgDBuser, $wgDBpassword, $wgDBname; |
| 9 | + |
| 10 | +$db1=$wgDBserver; # hostname |
| 11 | +$db2=$wgDBuser; # user |
| 12 | +$db3=$wgDBpassword; # pass |
| 13 | +$db4=$wgDBname; # db-name |
| 14 | + |
| 15 | +$connection=MySQL_connect($db1,$db2,$db3); |
| 16 | +if (!$connection)die("Cannot connect to SQL server. Try again later."); |
| 17 | +MySQL_select_db($db4)or die("Cannot open database"); |
| 18 | +mysql_query("SET NAMES 'utf8'"); |
| 19 | + |
| 20 | +echo " |
| 21 | +<style type=\"text/css\"><!-- |
| 22 | +body {font-family:arial,sans-serif} |
| 23 | +--></style> |
| 24 | +"; |
| 25 | + |
| 26 | +function stopwatch(){ |
| 27 | + list($usec, $sec) = explode(" ", microtime()); |
| 28 | + return ((float)$usec + (float)$sec); |
| 29 | +} |
| 30 | + |
| 31 | +/* |
| 32 | +$result = mysql_query("SELECT |
| 33 | +{$dc}_defined_meaning.defined_meaning_id , {$dc}_expression_ns.spelling |
| 34 | +FROM {$dc}_defined_meaning, {$dc}_expression_ns |
| 35 | +where {$dc}_defined_meaning.defined_meaning_id=1446 |
| 36 | +and {$dc}_defined_meaning.expression_id={$dc}_expression_ns.expression_id |
| 37 | +limit 0,40")or die ("error ".mysql_error()); |
| 38 | + |
| 39 | +*/ |
| 40 | + |
| 41 | +$start=stopwatch(); |
| 42 | + |
| 43 | +$collection_id=$_REQUEST['collection']; |
| 44 | + |
| 45 | +$result = mysql_query( |
| 46 | +"SELECT spelling |
| 47 | +FROM {$dc}_collection_ns, {$dc}_defined_meaning, {$dc}_expression_ns |
| 48 | +WHERE collection_id=$collection_id |
| 49 | +AND collection_mid=defined_meaning_id |
| 50 | +AND {$dc}_defined_meaning.expression_id={$dc}_expression_ns.expression_id |
| 51 | +")or die ("error ".mysql_error()); |
| 52 | + |
| 53 | +$row= mysql_fetch_array($result, MYSQL_NUM); |
| 54 | +$collection= $row[0]; |
| 55 | + |
| 56 | +echo"<center> |
| 57 | +<h1> $collection </h1> |
| 58 | +<h2> Number of Expressions per language in this collection </h2> |
| 59 | +<hr width=950 size=1 noshade><br> |
| 60 | +"; |
| 61 | + |
| 62 | + |
| 63 | +$result = mysql_query("SELECT * |
| 64 | +FROM language_names |
| 65 | +where name_language_id = 85 |
| 66 | +")or die ("error ".mysql_error()); |
| 67 | + |
| 68 | +while ($row = mysql_fetch_array($result, MYSQL_NUM)) { |
| 69 | +//echo $row[0]." - ".$row[1]." - ".$row[2]."<br>"; |
| 70 | +$lang[$row[0]]=$row[2]; |
| 71 | +} |
| 72 | + |
| 73 | + |
| 74 | +//////////////////////////////////////////////////////// |
| 75 | +$collection_esc=mysql_real_escape_string( $collection_id); |
| 76 | +$result = mysql_query("SELECT |
| 77 | +language_id, COUNT(DISTINCT defined_meaning_id) as counts |
| 78 | +FROM {$dc}_collection_contents, {$dc}_syntrans, {$dc}_expression_ns |
| 79 | +WHERE collection_id = $collection_esc |
| 80 | +AND {$dc}_syntrans.defined_meaning_id= {$dc}_collection_contents.member_mid |
| 81 | +AND {$dc}_expression_ns.expression_id = {$dc}_syntrans.expression_id |
| 82 | +AND {$dc}_expression_ns.remove_transaction_id IS NULL |
| 83 | +AND {$dc}_syntrans.remove_transaction_id IS NULL |
| 84 | +AND {$dc}_collection_contents.remove_transaction_id is NULL |
| 85 | +GROUP BY language_id |
| 86 | +ORDER BY counts DESC |
| 87 | + ")or die ("error ".mysql_error()); |
| 88 | + |
| 89 | +echo ' |
| 90 | +<table cellpadding=0 width=950><tr><td width=200><b>Language</b></td><td align=right><b>Expressions</b></td><td width=30></td><td></td></tr>'; |
| 91 | +$width=500; |
| 92 | +$limit=0; |
| 93 | +$max=0; |
| 94 | +$limit_percent=10; |
| 95 | + |
| 96 | +while ($row = mysql_fetch_array($result, MYSQL_NUM)) { |
| 97 | +$language_id=$row[0]; |
| 98 | +$count=$row[1]; |
| 99 | +if($max<$row[1]) { |
| 100 | + $max=$row[1]; |
| 101 | + $limit=(int) ($max*($limit_percent/100)+0.5); # 10% cutoff, note that ORDER BY ... DESC should have first row = max ;-) |
| 102 | +} |
| 103 | +$wi=ceil((($row[1]/$max)*$width)); |
| 104 | +$per=ceil((($row[1]/$max)*100)); |
| 105 | +$language_link="<a href=\"missing.php?collection=$collection_id&language=$language_id\">".$lang[$language_id]."</a>"; |
| 106 | +if($row[1]>$limit)echo "<tr><td >".$language_link.'</td><td align="right">'.$row[1]."</td><td width=30></td><td><img src=sc1.png width=\"$wi\" height=20> $per %</td></tr>"; |
| 107 | +else $tx.=$language_link." (".$row[1]."/ $per%), "; |
| 108 | +//$ar[$row[0]].=$row[1]." ".$row[2]."\n"; |
| 109 | +//filewrite("out/".$row[0].".txt",$row[1]." ".$row[2]); |
| 110 | +} |
| 111 | +echo " |
| 112 | +<tr><td colspan=4> |
| 113 | +<div align=justify> |
| 114 | + |
| 115 | +<h3>Languages with $limit entries or less ( / cutoff at $limit_percent% or less)</h3> |
| 116 | +$tx |
| 117 | +</div> |
| 118 | +</td> |
| 119 | +</table><center>"; |
| 120 | +/* |
| 121 | +for($i=0;$i<250;$i++){ |
| 122 | +if(strlen($ar[$i])>20)filewrite("out/".$lang[$i].".txt",$ar[$i]); |
| 123 | + |
| 124 | +} |
| 125 | +*/ |
| 126 | +//////////////////////////////////////////////////////// |
| 127 | + |
| 128 | + |
| 129 | +//echo "<pre>".$ar[85]."</pre>"; |
| 130 | + |
| 131 | +echo " |
| 132 | +<br> |
| 133 | +<hr size=1 noshade width=950> |
| 134 | +<table width=950><tr><td> |
| 135 | +<small>Page time: ".substr((stopwatch()-$start),0,5)." seconds</small> |
| 136 | +<td align=right> |
| 137 | + |
| 138 | +<small>Script based on work contributed by <a href=http://www.dicts.info/>Zdenek Broz</a> |
| 139 | +</small> |
| 140 | +</td> |
| 141 | +</tr></table> |
| 142 | +<br>"; |
| 143 | + |
| 144 | + |
| 145 | +function filewrite($file,$txt){ |
| 146 | +$fw=fopen($file,"w+"); |
| 147 | +fwrite($fw,$txt."\n"); |
| 148 | +fclose($fw); |
| 149 | +} |
| 150 | + |
| 151 | + |
| 152 | + |
| 153 | +echo" |
| 154 | +</center></center> |
| 155 | +<hr>\n"; |
| 156 | +?> |
| 157 | +Notes: |
| 158 | +<ul> |
| 159 | +<li>Languages link to lists of words that are still missing for this collection. </li> |
| 160 | +<li>Especially for large collections, <b>it might take a minute or two to get all the missing words</b></li> |
| 161 | +</ul> |
| 162 | +<hr> |
| 163 | +<p align="left"> |
| 164 | +<h3> see also</h3> |
| 165 | +<ul> |
| 166 | +<li><a href="collections.php">Other collections</a></li> |
| 167 | +<li><a href="stats.php">Overview, expressions per language</a></li> |
| 168 | +<li><a href="../../..">return to Omegawiki proper</li></a> |
| 169 | +</p> |
Property changes on: trunk/extensions/Wikidata/util/umls/collection.php |
___________________________________________________________________ |
Added: svn:executable |
1 | 170 | + * |
Index: trunk/extensions/Wikidata/util/umls/stats.png |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Property changes on: trunk/extensions/Wikidata/util/umls/stats.png |
___________________________________________________________________ |
Added: svn:mime-type |
2 | 171 | + application/octet-stream |
Index: trunk/extensions/Wikidata/util/umls/collections.php |
— | — | @@ -0,0 +1,81 @@ |
| 2 | +<?php |
| 3 | +header("Content-type: text/html; charset=UTF-8"); |
| 4 | + |
| 5 | + |
| 6 | +$dc="uw"; |
| 7 | + |
| 8 | +define('MEDIAWIKI', true ); |
| 9 | +include_once("../../../../LocalSettings.php"); |
| 10 | +global $wgDBserver, $wgDBuser, $wgDBpassword, $wgDBname; |
| 11 | + |
| 12 | +$db1=$wgDBserver; # hostname |
| 13 | +$db2=$wgDBuser; # user |
| 14 | +$db3=$wgDBpassword; # pass |
| 15 | +$db4=$wgDBname; # db-name |
| 16 | + |
| 17 | +$connection=MySQL_connect($db1,$db2,$db3); |
| 18 | +if (!$connection)die("Cannot connect to SQL server. Try again later."); |
| 19 | +MySQL_select_db($db4)or die("Cannot open database"); |
| 20 | +mysql_query("SET NAMES 'utf8'"); |
| 21 | + |
| 22 | +echo " |
| 23 | +<style type=\"text/css\"><!-- |
| 24 | +body {font-family:arial,sans-serif} |
| 25 | +--></style> |
| 26 | +"; |
| 27 | + |
| 28 | +function stopwatch(){ |
| 29 | + list($usec, $sec) = explode(" ", microtime()); |
| 30 | + return ((float)$usec + (float)$sec); |
| 31 | +} |
| 32 | + |
| 33 | + |
| 34 | +$start=stopwatch(); |
| 35 | + |
| 36 | + |
| 37 | +echo" |
| 38 | +<h1>Collections</h1> |
| 39 | +<hr width=950 size=1 noshade><br> |
| 40 | +"; |
| 41 | + |
| 42 | +$collection_esc=mysql_real_escape_string( $collection_id); |
| 43 | +$result = mysql_query(" |
| 44 | + SELECT spellings.id, spelling, counts.total FROM |
| 45 | + ( |
| 46 | + SELECT collection_id as id, spelling |
| 47 | + FROM {$dc}_collection_ns, {$dc}_defined_meaning, {$dc}_expression_ns |
| 48 | + WHERE collection_mid=defined_meaning_id |
| 49 | + AND {$dc}_defined_meaning.expression_id={$dc}_expression_ns.expression_id |
| 50 | + ) AS spellings JOIN |
| 51 | + ( |
| 52 | + SELECT {$dc}_collection_ns.collection_id AS id ,count(*) AS total |
| 53 | + FROM {$dc}_collection_contents, {$dc}_collection_ns |
| 54 | + WHERE {$dc}_collection_ns.collection_id={$dc}_collection_contents.collection_id |
| 55 | + AND {$dc}_collection_contents.remove_transaction_id is NULL |
| 56 | + AND {$dc}_collection_ns.remove_transaction_id is NULL |
| 57 | + GROUP BY {$dc}_collection_ns.collection_id |
| 58 | + ) AS counts |
| 59 | + ON spellings.id=counts.id |
| 60 | + ORDER BY spelling |
| 61 | +")or die ("error ".mysql_error()); |
| 62 | + |
| 63 | + |
| 64 | + |
| 65 | +print "<ul>"; |
| 66 | +while ($row = mysql_fetch_array($result, MYSQL_NUM)) { |
| 67 | + $id=$row[0]; |
| 68 | + $spelling=$row[1]; |
| 69 | + $total=$row[2]; |
| 70 | + print "<li><a href=\"collection.php?collection=$id\">$spelling</a> ($total defined meanings) </li>"; |
| 71 | +} |
| 72 | +print "</ul>"; |
| 73 | + |
| 74 | +echo"<hr><div align=\"right\"><small>Page time: ".substr((stopwatch()-$start),0,5)." seconds</small></div>"; |
| 75 | +?> |
| 76 | + |
| 77 | +<p align="left"> |
| 78 | +<h3> see also</h3> |
| 79 | +<ul> |
| 80 | +<li><a href="stats.php">Overview, expressions per language</a></li> |
| 81 | +<li><a href="../../..">return to Omegawiki proper</li></a> |
| 82 | +</p> |
Property changes on: trunk/extensions/Wikidata/util/umls/collections.php |
___________________________________________________________________ |
Added: svn:executable |
1 | 83 | + * |
Index: trunk/extensions/Wikidata/util/umls/stats.php |
— | — | @@ -0,0 +1,151 @@ |
| 2 | +<?php |
| 3 | +header("Content-type: text/html; charset=UTF-8"); |
| 4 | + |
| 5 | +$dc="uw"; |
| 6 | + |
| 7 | +define('MEDIAWIKI', true ); |
| 8 | +include_once("../../../../LocalSettings.php"); |
| 9 | +global $wgDBserver, $wgDBuser, $wgDBpassword, $wgDBname; |
| 10 | + |
| 11 | +$db1=$wgDBserver; # hostname |
| 12 | +$db2=$wgDBuser; # user |
| 13 | +$db3=$wgDBpassword; # pass |
| 14 | +$db4=$wgDBname; # db-name |
| 15 | + |
| 16 | +$connection=MySQL_connect($db1,$db2,$db3); |
| 17 | +if (!$connection)die("Cannot connect to SQL server. Try again later."); |
| 18 | +MySQL_select_db($db4)or die("Cannot open database"); |
| 19 | +mysql_query("SET NAMES 'utf8'"); |
| 20 | + |
| 21 | +echo " |
| 22 | +<style type=\"text/css\"><!-- |
| 23 | +body {font-family:arial,sans-serif} |
| 24 | +--></style> |
| 25 | +"; |
| 26 | + |
| 27 | +function stopwatch(){ |
| 28 | + list($usec, $sec) = explode(" ", microtime()); |
| 29 | + return ((float)$usec + (float)$sec); |
| 30 | +} |
| 31 | + |
| 32 | +/* |
| 33 | +$result = mysql_query("SELECT |
| 34 | +{$dc}_defined_meaning.defined_meaning_id , {$dc}_expression_ns.spelling |
| 35 | +FROM {$dc}_defined_meaning, {$dc}_expression_ns |
| 36 | +where {$dc}_defined_meaning.defined_meaning_id=1446 |
| 37 | +and {$dc}_defined_meaning.expression_id={$dc}_expression_ns.expression_id |
| 38 | +limit 0,40")or die ("error ".mysql_error()); |
| 39 | + |
| 40 | +*/ |
| 41 | + |
| 42 | +$start=stopwatch(); |
| 43 | + |
| 44 | +echo"<center> |
| 45 | +<h1>Number of Expressions per language</h1> |
| 46 | +<hr width=950 size=1 noshade><br> |
| 47 | +"; |
| 48 | + |
| 49 | +$expressions_r=mysql_query("SELECT COUNT(*) FROM {$dc}_expression_ns WHERE remove_transaction_id IS NULL"); |
| 50 | +$expressions_a=mysql_fetch_row($expressions_r); |
| 51 | +$expressions=$expressions_a[0]; |
| 52 | + |
| 53 | +$defined_meanings_r=mysql_query("SELECT COUNT(*) FROM {$dc}_defined_meaning WHERE remove_transaction_id IS NULL"); |
| 54 | +$defined_meanings_a=mysql_fetch_row($defined_meanings_r); |
| 55 | +$defined_meanings=$defined_meanings_a[0]; |
| 56 | +echo"<br>\n"; |
| 57 | +echo"Total <b>$defined_meanings</b> DefinedMeanings in database, linking together <b>$expressions</b> Expressions. Broken down per language:\n"; |
| 58 | +echo"</br>\n"; |
| 59 | +echo"<hr>\n"; |
| 60 | + |
| 61 | +$result = mysql_query("SELECT * |
| 62 | +FROM language_names |
| 63 | +where name_language_id = 85 |
| 64 | +")or die ("error ".mysql_error()); |
| 65 | + |
| 66 | +while ($row = mysql_fetch_array($result, MYSQL_NUM)) { |
| 67 | +//echo $row[0]." - ".$row[1]." - ".$row[2]."<br>"; |
| 68 | +$lang[$row[0]]=$row[2]; |
| 69 | +} |
| 70 | + |
| 71 | + |
| 72 | +//////////////////////////////////////////////////////// |
| 73 | +$result = mysql_query(" |
| 74 | +SELECT |
| 75 | +language_id, count(*) as tot |
| 76 | +FROM {$dc}_expression_ns |
| 77 | +WHERE expression_id IN |
| 78 | +( |
| 79 | + SELECT DISTINCT expression_id |
| 80 | + FROM {$dc}_syntrans |
| 81 | + WHERE remove_transaction_id IS NULL |
| 82 | +) |
| 83 | +AND remove_transaction_id IS NULL |
| 84 | +group by language_id |
| 85 | +order by tot desc |
| 86 | + ")or die ("error ".mysql_error()); |
| 87 | + |
| 88 | +echo ' |
| 89 | +<table cellpadding=0 width=950><tr><td width=200><b>Language</b></td><td align=right><b>Expressions</b></td><td width=30></td><td></td></tr>'; |
| 90 | +$width=500; |
| 91 | +$limit=500; |
| 92 | +$max=0; |
| 93 | +while ($row = mysql_fetch_array($result, MYSQL_NUM)) { |
| 94 | +if($max<$row[1])$max=$row[1]; |
| 95 | +$wi=ceil((($row[1]/$max)*$width)); |
| 96 | +$per=ceil((($row[1]/$max)*100)); |
| 97 | +if($row[1]>$limit)echo "<tr><td >".$lang[$row[0]].'</td><td align="right">'.$row[1]."</td><td width=30></td><td><img src=sc1.png width=\"$wi\" height=20> $per %</td></tr>"; |
| 98 | +else $tx.=$lang[$row[0]]." (".$row[1]."), "; |
| 99 | +//$ar[$row[0]].=$row[1]." ".$row[2]."\n"; |
| 100 | +//filewrite("out/".$row[0].".txt",$row[1]." ".$row[2]); |
| 101 | +} |
| 102 | +echo " |
| 103 | +<tr><td colspan=4> |
| 104 | +<div align=justify> |
| 105 | + |
| 106 | +<h3>Languages with less than $limit entries:</h3> |
| 107 | +$tx |
| 108 | +</div> |
| 109 | +</td> |
| 110 | +</table><center>"; |
| 111 | +/* |
| 112 | +for($i=0;$i<250;$i++){ |
| 113 | +if(strlen($ar[$i])>20)filewrite("out/".$lang[$i].".txt",$ar[$i]); |
| 114 | + |
| 115 | +} |
| 116 | +*/ |
| 117 | +//////////////////////////////////////////////////////// |
| 118 | + |
| 119 | + |
| 120 | +//echo "<pre>".$ar[85]."</pre>"; |
| 121 | + |
| 122 | +echo " |
| 123 | +<br> |
| 124 | +<hr size=1 noshade width=950> |
| 125 | +<table width=950><tr><td> |
| 126 | +<small>Page time: ".substr((stopwatch()-$start),0,5)." seconds</small> |
| 127 | +<td align=right> |
| 128 | + |
| 129 | +<small>Script contributed by <a href=http://www.dicts.info/>Zdenek Broz</a> |
| 130 | +</small> |
| 131 | +</td> |
| 132 | +</tr></table> |
| 133 | +<br>"; |
| 134 | + |
| 135 | + |
| 136 | +function filewrite($file,$txt){ |
| 137 | +$fw=fopen($file,"w+"); |
| 138 | +fwrite($fw,$txt."\n"); |
| 139 | +fclose($fw); |
| 140 | +} |
| 141 | + |
| 142 | + |
| 143 | + |
| 144 | + |
| 145 | +?> |
| 146 | +</center></center> |
| 147 | +<p align="left"> |
| 148 | +<h3> see also</h3> |
| 149 | +<ul> |
| 150 | +<li><a href="collections.php">Collections</a></li> |
| 151 | +<li><a href="../../..">return to Omegawiki proper</li></a> |
| 152 | +</p> |
Index: trunk/extensions/Wikidata/util/umls/sc1.png |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Property changes on: trunk/extensions/Wikidata/util/umls/sc1.png |
___________________________________________________________________ |
Added: svn:mime-type |
1 | 153 | + application/octet-stream |