r24524 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r24523‎ | r24524 | r24525 >
Date:15:13, 1 August 2007
Author:kim
Status:old
Tags:
Comment:
Prepared scripts for umls (or any other db), just add water ... errr, the correct dataset, in the $dc
variable.

At some point this stuff will need further refactoring, but not today O:-)
Modified paths:
  • /trunk/extensions/Wikidata/util/umls (added) (history)
  • /trunk/extensions/Wikidata/util/umls/collection.php (added) (history)
  • /trunk/extensions/Wikidata/util/umls/collections.php (added) (history)
  • /trunk/extensions/Wikidata/util/umls/sc1.png (added) (history)
  • /trunk/extensions/Wikidata/util/umls/stats.php (added) (history)
  • /trunk/extensions/Wikidata/util/umls/stats.png (added) (history)

Diff [purge]

Index: trunk/extensions/Wikidata/util/umls/collection.php
@@ -0,0 +1,168 @@
 2+<?php
 3+header("Content-type: text/html; charset=UTF-8");
 4+$dc="uw";
 5+
 6+define('MEDIAWIKI', true );
 7+include_once("../../../../LocalSettings.php");
 8+global $wgDBserver, $wgDBuser, $wgDBpassword, $wgDBname;
 9+
 10+$db1=$wgDBserver; # hostname
 11+$db2=$wgDBuser; # user
 12+$db3=$wgDBpassword; # pass
 13+$db4=$wgDBname; # db-name
 14+
 15+$connection=MySQL_connect($db1,$db2,$db3);
 16+if (!$connection)die("Cannot connect to SQL server. Try again later.");
 17+MySQL_select_db($db4)or die("Cannot open database");
 18+mysql_query("SET NAMES 'utf8'");
 19+
 20+echo "
 21+<style type=\"text/css\"><!--
 22+body {font-family:arial,sans-serif}
 23+--></style>
 24+";
 25+
 26+function stopwatch(){
 27+ list($usec, $sec) = explode(" ", microtime());
 28+ return ((float)$usec + (float)$sec);
 29+}
 30+
 31+/*
 32+$result = mysql_query("SELECT
 33+{$dc}_defined_meaning.defined_meaning_id , {$dc}_expression_ns.spelling
 34+FROM {$dc}_defined_meaning, {$dc}_expression_ns
 35+where {$dc}_defined_meaning.defined_meaning_id=1446
 36+and {$dc}_defined_meaning.expression_id={$dc}_expression_ns.expression_id
 37+limit 0,40")or die ("error ".mysql_error());
 38+
 39+*/
 40+
 41+$start=stopwatch();
 42+
 43+$collection_id=$_REQUEST['collection'];
 44+
 45+$result = mysql_query(
 46+"SELECT spelling
 47+FROM {$dc}_collection_ns, {$dc}_defined_meaning, {$dc}_expression_ns
 48+WHERE collection_id=$collection_id
 49+AND collection_mid=defined_meaning_id
 50+AND {$dc}_defined_meaning.expression_id={$dc}_expression_ns.expression_id
 51+")or die ("error ".mysql_error());
 52+
 53+$row= mysql_fetch_array($result, MYSQL_NUM);
 54+$collection= $row[0];
 55+
 56+echo"<center>
 57+<h1> $collection </h1>
 58+<h2> Number of Expressions per language in this collection </h2>
 59+<hr width=950 size=1 noshade><br>
 60+";
 61+
 62+
 63+$result = mysql_query("SELECT *
 64+FROM language_names
 65+where name_language_id = 85
 66+")or die ("error ".mysql_error());
 67+
 68+while ($row = mysql_fetch_array($result, MYSQL_NUM)) {
 69+//echo $row[0]." - ".$row[1]." - ".$row[2]."<br>";
 70+$lang[$row[0]]=$row[2];
 71+}
 72+
 73+
 74+////////////////////////////////////////////////////////
 75+$collection_esc=mysql_real_escape_string( $collection_id);
 76+$result = mysql_query("SELECT
 77+language_id, COUNT(DISTINCT defined_meaning_id) as counts
 78+FROM {$dc}_collection_contents, {$dc}_syntrans, {$dc}_expression_ns
 79+WHERE collection_id = $collection_esc
 80+AND {$dc}_syntrans.defined_meaning_id= {$dc}_collection_contents.member_mid
 81+AND {$dc}_expression_ns.expression_id = {$dc}_syntrans.expression_id
 82+AND {$dc}_expression_ns.remove_transaction_id IS NULL
 83+AND {$dc}_syntrans.remove_transaction_id IS NULL
 84+AND {$dc}_collection_contents.remove_transaction_id is NULL
 85+GROUP BY language_id
 86+ORDER BY counts DESC
 87+ ")or die ("error ".mysql_error());
 88+
 89+echo '
 90+<table cellpadding=0 width=950><tr><td width=200><b>Language</b></td><td align=right><b>Expressions</b></td><td width=30></td><td></td></tr>';
 91+$width=500;
 92+$limit=0;
 93+$max=0;
 94+$limit_percent=10;
 95+
 96+while ($row = mysql_fetch_array($result, MYSQL_NUM)) {
 97+$language_id=$row[0];
 98+$count=$row[1];
 99+if($max<$row[1]) {
 100+ $max=$row[1];
 101+ $limit=(int) ($max*($limit_percent/100)+0.5); # 10% cutoff, note that ORDER BY ... DESC should have first row = max ;-)
 102+}
 103+$wi=ceil((($row[1]/$max)*$width));
 104+$per=ceil((($row[1]/$max)*100));
 105+$language_link="<a href=\"missing.php?collection=$collection_id&language=$language_id\">".$lang[$language_id]."</a>";
 106+if($row[1]>$limit)echo "<tr><td >".$language_link.'</td><td align="right">'.$row[1]."</td><td width=30></td><td><img src=sc1.png width=\"$wi\" height=20> $per %</td></tr>";
 107+else $tx.=$language_link." (".$row[1]."/ $per%), ";
 108+//$ar[$row[0]].=$row[1]." ".$row[2]."\n";
 109+//filewrite("out/".$row[0].".txt",$row[1]." ".$row[2]);
 110+}
 111+echo "
 112+<tr><td colspan=4>
 113+<div align=justify>
 114+
 115+<h3>Languages with $limit entries or less ( / cutoff at $limit_percent% or less)</h3>
 116+$tx
 117+</div>
 118+</td>
 119+</table><center>";
 120+/*
 121+for($i=0;$i<250;$i++){
 122+if(strlen($ar[$i])>20)filewrite("out/".$lang[$i].".txt",$ar[$i]);
 123+
 124+}
 125+*/
 126+////////////////////////////////////////////////////////
 127+
 128+
 129+//echo "<pre>".$ar[85]."</pre>";
 130+
 131+echo "
 132+<br>
 133+<hr size=1 noshade width=950>
 134+<table width=950><tr><td>
 135+<small>Page time: ".substr((stopwatch()-$start),0,5)." seconds</small>
 136+<td align=right>
 137+
 138+<small>Script based on work contributed by <a href=http://www.dicts.info/>Zdenek Broz</a>
 139+</small>
 140+</td>
 141+</tr></table>
 142+<br>";
 143+
 144+
 145+function filewrite($file,$txt){
 146+$fw=fopen($file,"w+");
 147+fwrite($fw,$txt."\n");
 148+fclose($fw);
 149+}
 150+
 151+
 152+
 153+echo"
 154+</center></center>
 155+<hr>\n";
 156+?>
 157+Notes:
 158+<ul>
 159+<li>Languages link to lists of words that are still missing for this collection. </li>
 160+<li>Especially for large collections, <b>it might take a minute or two to get all the missing words</b></li>
 161+</ul>
 162+<hr>
 163+<p align="left">
 164+<h3> see also</h3>
 165+<ul>
 166+<li><a href="collections.php">Other collections</a></li>
 167+<li><a href="stats.php">Overview, expressions per language</a></li>
 168+<li><a href="../../..">return to Omegawiki proper</li></a>
 169+</p>
Property changes on: trunk/extensions/Wikidata/util/umls/collection.php
___________________________________________________________________
Added: svn:executable
1170 + *
Index: trunk/extensions/Wikidata/util/umls/stats.png
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: trunk/extensions/Wikidata/util/umls/stats.png
___________________________________________________________________
Added: svn:mime-type
2171 + application/octet-stream
Index: trunk/extensions/Wikidata/util/umls/collections.php
@@ -0,0 +1,81 @@
 2+<?php
 3+header("Content-type: text/html; charset=UTF-8");
 4+
 5+
 6+$dc="uw";
 7+
 8+define('MEDIAWIKI', true );
 9+include_once("../../../../LocalSettings.php");
 10+global $wgDBserver, $wgDBuser, $wgDBpassword, $wgDBname;
 11+
 12+$db1=$wgDBserver; # hostname
 13+$db2=$wgDBuser; # user
 14+$db3=$wgDBpassword; # pass
 15+$db4=$wgDBname; # db-name
 16+
 17+$connection=MySQL_connect($db1,$db2,$db3);
 18+if (!$connection)die("Cannot connect to SQL server. Try again later.");
 19+MySQL_select_db($db4)or die("Cannot open database");
 20+mysql_query("SET NAMES 'utf8'");
 21+
 22+echo "
 23+<style type=\"text/css\"><!--
 24+body {font-family:arial,sans-serif}
 25+--></style>
 26+";
 27+
 28+function stopwatch(){
 29+ list($usec, $sec) = explode(" ", microtime());
 30+ return ((float)$usec + (float)$sec);
 31+}
 32+
 33+
 34+$start=stopwatch();
 35+
 36+
 37+echo"
 38+<h1>Collections</h1>
 39+<hr width=950 size=1 noshade><br>
 40+";
 41+
 42+$collection_esc=mysql_real_escape_string( $collection_id);
 43+$result = mysql_query("
 44+ SELECT spellings.id, spelling, counts.total FROM
 45+ (
 46+ SELECT collection_id as id, spelling
 47+ FROM {$dc}_collection_ns, {$dc}_defined_meaning, {$dc}_expression_ns
 48+ WHERE collection_mid=defined_meaning_id
 49+ AND {$dc}_defined_meaning.expression_id={$dc}_expression_ns.expression_id
 50+ ) AS spellings JOIN
 51+ (
 52+ SELECT {$dc}_collection_ns.collection_id AS id ,count(*) AS total
 53+ FROM {$dc}_collection_contents, {$dc}_collection_ns
 54+ WHERE {$dc}_collection_ns.collection_id={$dc}_collection_contents.collection_id
 55+ AND {$dc}_collection_contents.remove_transaction_id is NULL
 56+ AND {$dc}_collection_ns.remove_transaction_id is NULL
 57+ GROUP BY {$dc}_collection_ns.collection_id
 58+ ) AS counts
 59+ ON spellings.id=counts.id
 60+ ORDER BY spelling
 61+")or die ("error ".mysql_error());
 62+
 63+
 64+
 65+print "<ul>";
 66+while ($row = mysql_fetch_array($result, MYSQL_NUM)) {
 67+ $id=$row[0];
 68+ $spelling=$row[1];
 69+ $total=$row[2];
 70+ print "<li><a href=\"collection.php?collection=$id\">$spelling</a> ($total defined meanings) </li>";
 71+}
 72+print "</ul>";
 73+
 74+echo"<hr><div align=\"right\"><small>Page time: ".substr((stopwatch()-$start),0,5)." seconds</small></div>";
 75+?>
 76+
 77+<p align="left">
 78+<h3> see also</h3>
 79+<ul>
 80+<li><a href="stats.php">Overview, expressions per language</a></li>
 81+<li><a href="../../..">return to Omegawiki proper</li></a>
 82+</p>
Property changes on: trunk/extensions/Wikidata/util/umls/collections.php
___________________________________________________________________
Added: svn:executable
183 + *
Index: trunk/extensions/Wikidata/util/umls/stats.php
@@ -0,0 +1,151 @@
 2+<?php
 3+header("Content-type: text/html; charset=UTF-8");
 4+
 5+$dc="uw";
 6+
 7+define('MEDIAWIKI', true );
 8+include_once("../../../../LocalSettings.php");
 9+global $wgDBserver, $wgDBuser, $wgDBpassword, $wgDBname;
 10+
 11+$db1=$wgDBserver; # hostname
 12+$db2=$wgDBuser; # user
 13+$db3=$wgDBpassword; # pass
 14+$db4=$wgDBname; # db-name
 15+
 16+$connection=MySQL_connect($db1,$db2,$db3);
 17+if (!$connection)die("Cannot connect to SQL server. Try again later.");
 18+MySQL_select_db($db4)or die("Cannot open database");
 19+mysql_query("SET NAMES 'utf8'");
 20+
 21+echo "
 22+<style type=\"text/css\"><!--
 23+body {font-family:arial,sans-serif}
 24+--></style>
 25+";
 26+
 27+function stopwatch(){
 28+ list($usec, $sec) = explode(" ", microtime());
 29+ return ((float)$usec + (float)$sec);
 30+}
 31+
 32+/*
 33+$result = mysql_query("SELECT
 34+{$dc}_defined_meaning.defined_meaning_id , {$dc}_expression_ns.spelling
 35+FROM {$dc}_defined_meaning, {$dc}_expression_ns
 36+where {$dc}_defined_meaning.defined_meaning_id=1446
 37+and {$dc}_defined_meaning.expression_id={$dc}_expression_ns.expression_id
 38+limit 0,40")or die ("error ".mysql_error());
 39+
 40+*/
 41+
 42+$start=stopwatch();
 43+
 44+echo"<center>
 45+<h1>Number of Expressions per language</h1>
 46+<hr width=950 size=1 noshade><br>
 47+";
 48+
 49+$expressions_r=mysql_query("SELECT COUNT(*) FROM {$dc}_expression_ns WHERE remove_transaction_id IS NULL");
 50+$expressions_a=mysql_fetch_row($expressions_r);
 51+$expressions=$expressions_a[0];
 52+
 53+$defined_meanings_r=mysql_query("SELECT COUNT(*) FROM {$dc}_defined_meaning WHERE remove_transaction_id IS NULL");
 54+$defined_meanings_a=mysql_fetch_row($defined_meanings_r);
 55+$defined_meanings=$defined_meanings_a[0];
 56+echo"<br>\n";
 57+echo"Total <b>$defined_meanings</b> DefinedMeanings in database, linking together <b>$expressions</b> Expressions. Broken down per language:\n";
 58+echo"</br>\n";
 59+echo"<hr>\n";
 60+
 61+$result = mysql_query("SELECT *
 62+FROM language_names
 63+where name_language_id = 85
 64+")or die ("error ".mysql_error());
 65+
 66+while ($row = mysql_fetch_array($result, MYSQL_NUM)) {
 67+//echo $row[0]." - ".$row[1]." - ".$row[2]."<br>";
 68+$lang[$row[0]]=$row[2];
 69+}
 70+
 71+
 72+////////////////////////////////////////////////////////
 73+$result = mysql_query("
 74+SELECT
 75+language_id, count(*) as tot
 76+FROM {$dc}_expression_ns
 77+WHERE expression_id IN
 78+(
 79+ SELECT DISTINCT expression_id
 80+ FROM {$dc}_syntrans
 81+ WHERE remove_transaction_id IS NULL
 82+)
 83+AND remove_transaction_id IS NULL
 84+group by language_id
 85+order by tot desc
 86+ ")or die ("error ".mysql_error());
 87+
 88+echo '
 89+<table cellpadding=0 width=950><tr><td width=200><b>Language</b></td><td align=right><b>Expressions</b></td><td width=30></td><td></td></tr>';
 90+$width=500;
 91+$limit=500;
 92+$max=0;
 93+while ($row = mysql_fetch_array($result, MYSQL_NUM)) {
 94+if($max<$row[1])$max=$row[1];
 95+$wi=ceil((($row[1]/$max)*$width));
 96+$per=ceil((($row[1]/$max)*100));
 97+if($row[1]>$limit)echo "<tr><td >".$lang[$row[0]].'</td><td align="right">'.$row[1]."</td><td width=30></td><td><img src=sc1.png width=\"$wi\" height=20> $per %</td></tr>";
 98+else $tx.=$lang[$row[0]]." (".$row[1]."), ";
 99+//$ar[$row[0]].=$row[1]." ".$row[2]."\n";
 100+//filewrite("out/".$row[0].".txt",$row[1]." ".$row[2]);
 101+}
 102+echo "
 103+<tr><td colspan=4>
 104+<div align=justify>
 105+
 106+<h3>Languages with less than $limit entries:</h3>
 107+$tx
 108+</div>
 109+</td>
 110+</table><center>";
 111+/*
 112+for($i=0;$i<250;$i++){
 113+if(strlen($ar[$i])>20)filewrite("out/".$lang[$i].".txt",$ar[$i]);
 114+
 115+}
 116+*/
 117+////////////////////////////////////////////////////////
 118+
 119+
 120+//echo "<pre>".$ar[85]."</pre>";
 121+
 122+echo "
 123+<br>
 124+<hr size=1 noshade width=950>
 125+<table width=950><tr><td>
 126+<small>Page time: ".substr((stopwatch()-$start),0,5)." seconds</small>
 127+<td align=right>
 128+
 129+<small>Script contributed by <a href=http://www.dicts.info/>Zdenek Broz</a>
 130+</small>
 131+</td>
 132+</tr></table>
 133+<br>";
 134+
 135+
 136+function filewrite($file,$txt){
 137+$fw=fopen($file,"w+");
 138+fwrite($fw,$txt."\n");
 139+fclose($fw);
 140+}
 141+
 142+
 143+
 144+
 145+?>
 146+</center></center>
 147+<p align="left">
 148+<h3> see also</h3>
 149+<ul>
 150+<li><a href="collections.php">Collections</a></li>
 151+<li><a href="../../..">return to Omegawiki proper</li></a>
 152+</p>
Index: trunk/extensions/Wikidata/util/umls/sc1.png
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: trunk/extensions/Wikidata/util/umls/sc1.png
___________________________________________________________________
Added: svn:mime-type
1153 + application/octet-stream

Status & tagging log