r24525 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r24524‎ | r24525 | r24526 >
Date:15:21, 1 August 2007
Author:kim
Status:old
Tags:
Comment:
now switched dc to umls, let's cross fingers
Modified paths:
  • /trunk/extensions/Wikidata/util/umls/collection.php (modified) (history)
  • /trunk/extensions/Wikidata/util/umls/collections.php (modified) (history)
  • /trunk/extensions/Wikidata/util/umls/missing.php (added) (history)
  • /trunk/extensions/Wikidata/util/umls/stats.php (modified) (history)

Diff [purge]

Index: trunk/extensions/Wikidata/util/umls/missing.php
@@ -0,0 +1,271 @@
 2+<?php
 3+header("Content-type: text/html; charset=UTF-8");
 4+
 5+$dc="umls";
 6+
 7+define('MEDIAWIKI', true );
 8+include_once("../../../../LocalSettings.php");
 9+global $wgDBserver, $wgDBuser, $wgDBpassword, $wgDBname;
 10+
 11+$db1=$wgDBserver; # hostname
 12+$db2=$wgDBuser; # user
 13+$db3=$wgDBpassword; # pass
 14+$db4=$wgDBname; # db-name
 15+
 16+$connection=MySQL_connect($db1,$db2,$db3);
 17+if (!$connection)die("Cannot connect to SQL server. Try again later.");
 18+MySQL_select_db($db4)or die("Cannot open database");
 19+mysql_query("SET NAMES 'utf8'");
 20+
 21+echo "
 22+<style type=\"text/css\"><!--
 23+body {font-family:arial,sans-serif}
 24+--></style>
 25+";
 26+
 27+function stopwatch(){
 28+ list($usec, $sec) = explode(" ", microtime());
 29+ return ((float)$usec + (float)$sec);
 30+}
 31+
 32+
 33+$start=stopwatch();
 34+
 35+$collection_id=$_REQUEST['collection'];
 36+$language_id=$_REQUEST['language'];
 37+$collection_esc=mysql_real_escape_string($collection_id);
 38+$language_esc=mysql_real_escape_string( $language_id);
 39+
 40+$query=
 41+"SELECT spelling
 42+FROM {$dc}_collection_ns, {$dc}_defined_meaning, {$dc}_expression_ns
 43+WHERE collection_id=$collection_id
 44+AND collection_mid=defined_meaning_id
 45+AND {$dc}_defined_meaning.expression_id={$dc}_expression_ns.expression_id
 46+";
 47+echo $query;
 48+$result = mysql_query($query) or die ("error ".mysql_error());
 49+$row= mysql_fetch_array($result, MYSQL_NUM);
 50+$collection= $row[0];
 51+
 52+$result = mysql_query("SELECT language_name
 53+FROM language_names
 54+where name_language_id = 85
 55+and language_id=$language_id
 56+")or die ("error ".mysql_error());
 57+
 58+$row= mysql_fetch_array($result, MYSQL_NUM);
 59+$language=$row[0];
 60+
 61+echo"
 62+<h1>$collection</h1>
 63+<h2>$language</h2>
 64+<small><i>For large collections, this query might take up to a minute. Please be patient</i></small>
 65+<hr width=950 size=1 noshade><br>
 66+<h3> Missing defined meanings </h3>
 67+
 68+";
 69+
 70+# Malafaya: Here lies the old query, assuming there's always an English expression for the DM
 71+/*
 72+$result = mysql_query("
 73+ SELECT en.id, en.spelling
 74+ FROM
 75+ (
 76+ SELECT member_mid as id, spelling
 77+ FROM {$dc}_collection_contents, {$dc}_syntrans, {$dc}_expression_ns
 78+ WHERE collection_id = $collection_esc
 79+ AND {$dc}_syntrans.defined_meaning_id= {$dc}_collection_contents.member_mid
 80+ AND {$dc}_expression_ns.expression_id = {$dc}_syntrans.expression_id
 81+ AND language_id=85
 82+ AND {$dc}_syntrans.remove_transaction_id IS NULL
 83+ ORDER BY spelling
 84+ ) as en
 85+ LEFT JOIN
 86+ (
 87+ SELECT member_mid as id, spelling
 88+ FROM {$dc}_collection_contents, {$dc}_syntrans, {$dc}_expression_ns WHERE
 89+ collection_id = $collection_esc
 90+ AND {$dc}_syntrans.defined_meaning_id= {$dc}_collection_contents.member_mid
 91+ AND {$dc}_expression_ns.expression_id = {$dc}_syntrans.expression_id
 92+ AND language_id = $language_esc
 93+ AND {$dc}_syntrans.remove_transaction_id IS NULL
 94+ ORDER BY spelling
 95+ ) as actual
 96+ ON en.id=actual.id
 97+ WHERE actual.id IS NULL
 98+")or die ("error ".mysql_error());
 99+*/
 100+
 101+# Malafaya: This is my query (performance must be checked live) for missing expressions based on
 102+# * don't count deleted stuff (old query did)
 103+# * do 2 joins: between members of collection and target language, and then with english for default, but only for elements having target language expression as NULL (non-existing)
 104+# * this gives us the DM id, the spelling in target language (or NULL, if none), the spelling in English (or NULL, if none)
 105+# (+Kim) Alternately, just try the actual defining expression, which should never be NULL in the first place.
 106+# Warning: some DMs came up in OLPC and Swadesh collections belonging to those collections but having no expressions associated... These are visible in this query
 107+
 108+$result = mysql_query("
 109+ SELECT member.id, translation_dm.spelling_dm
 110+ FROM
 111+ (
 112+ SELECT member_mid as id
 113+ FROM {$dc}_collection_contents WHERE
 114+ collection_id = $collection_esc
 115+ AND remove_transaction_id IS NULL
 116+ ) as member
 117+ LEFT JOIN
 118+ (
 119+ SELECT spelling, defined_meaning_id
 120+ FROM {$dc}_syntrans, {$dc}_expression_ns WHERE
 121+ {$dc}_expression_ns.expression_id = {$dc}_syntrans.expression_id
 122+ AND {$dc}_syntrans.remove_transaction_id IS NULL
 123+ AND language_id = $language_esc
 124+ AND defined_meaning_id IN
 125+ (
 126+ SELECT member_mid as id
 127+ FROM {$dc}_collection_contents WHERE
 128+ collection_id = $collection_esc
 129+ AND remove_transaction_id IS NULL
 130+ )
 131+ ) as translation
 132+ ON
 133+ translation.defined_meaning_id = member.id
 134+ LEFT JOIN
 135+ (
 136+ SELECT COALESCE(translation_en.spelling_en, translation_dm1.spelling_dm ) as spelling_dm, translation_dm1.defined_meaning_id as defined_meaning_id
 137+ FROM
 138+ (
 139+ SELECT spelling as spelling_dm, defined_meaning_id
 140+ FROM {$dc}_defined_meaning, {$dc}_expression_ns WHERE
 141+ {$dc}_expression_ns.expression_id = {$dc}_defined_meaning.expression_id
 142+ AND {$dc}_defined_meaning.remove_transaction_id IS NULL
 143+ AND {$dc}_expression_ns.remove_transaction_id IS NULL
 144+ AND defined_meaning_id IN
 145+ (
 146+ SELECT member_mid as id
 147+ FROM {$dc}_collection_contents WHERE
 148+ collection_id = $collection_esc
 149+ AND remove_transaction_id IS NULL
 150+ )
 151+ ) as translation_dm1
 152+ LEFT JOIN
 153+ (
 154+ SELECT spelling as spelling_en, defined_meaning_id
 155+ FROM {$dc}_syntrans, {$dc}_expression_ns WHERE
 156+ {$dc}_expression_ns.expression_id = {$dc}_syntrans.expression_id
 157+ AND {$dc}_syntrans.remove_transaction_id IS NULL
 158+ AND language_id = 85
 159+ AND defined_meaning_id IN
 160+ (
 161+ SELECT member_mid as id
 162+ FROM {$dc}_collection_contents WHERE
 163+ collection_id = $collection_esc
 164+ AND remove_transaction_id IS NULL
 165+ )
 166+ ) as translation_en
 167+ ON translation_dm1.defined_meaning_id=translation_en.defined_meaning_id
 168+ ) as translation_dm
 169+ ON
 170+ translation_dm.defined_meaning_id = member.id
 171+ WHERE translation.spelling IS NULL
 172+ ORDER BY spelling_dm
 173+")or die ("error ".mysql_error());
 174+
 175+
 176+while ($row = mysql_fetch_array($result, MYSQL_NUM)) {
 177+ $id=$row[0];
 178+ $spelling_dm=$row[1];
 179+
 180+ # Malafaya: Not translated to target language
 181+ if ($spelling_dm == null)
 182+ # Malafaya: Not translated to English either; use a placeholder expression
 183+ print "<a href=\"../../../index.php?title=DefinedMeaning:(untranslated)_($id)\">(untranslated)</a>;\n";
 184+ else
 185+ # Malafaya: English translation exists; use it
 186+ print "<a href=\"../../../index.php?title=DefinedMeaning:".$spelling_dm."_($id)\">$spelling_dm</a>;\n";
 187+}
 188+print "<br>\n";
 189+
 190+
 191+print "<hr>\n
 192+<h3>Already present</h3>\n";
 193+
 194+# Malafaya: Old query with same caveats as the one above
 195+
 196+/*
 197+$result = mysql_query("
 198+ SELECT actual.id, actual.spelling
 199+ FROM
 200+ (
 201+ SELECT member_mid as id, spelling
 202+ FROM {$dc}_collection_contents, {$dc}_syntrans, {$dc}_expression_ns
 203+ WHERE collection_id = $collection_esc
 204+ AND {$dc}_syntrans.defined_meaning_id= {$dc}_collection_contents.member_mid
 205+ AND {$dc}_expression_ns.expression_id = {$dc}_syntrans.expression_id
 206+ AND language_id=85
 207+ AND {$dc}_syntrans.remove_transaction_id IS NULL
 208+ ORDER BY spelling
 209+ ) as en
 210+ LEFT JOIN
 211+ (
 212+ SELECT member_mid as id, spelling
 213+ FROM {$dc}_collection_contents, {$dc}_syntrans, {$dc}_expression_ns WHERE
 214+ collection_id = $collection_esc
 215+ AND {$dc}_syntrans.defined_meaning_id= {$dc}_collection_contents.member_mid
 216+ AND {$dc}_expression_ns.expression_id = {$dc}_syntrans.expression_id
 217+ AND language_id = $language_esc
 218+ AND {$dc}_syntrans.remove_transaction_id IS NULL
 219+ ORDER BY spelling
 220+ ) as actual
 221+ ON en.id=actual.id
 222+ WHERE actual.id IS NOT NULL
 223+")or die ("error ".mysql_error());
 224+*/
 225+
 226+# Malafaya: my new query, not counting deleted stuff; just select target language expression for DMs in collection (whether translated to English or not, it's not relevant)
 227+
 228+$result = mysql_query("
 229+ SELECT defined_meaning_id, spelling
 230+ FROM {$dc}_syntrans, {$dc}_expression_ns WHERE
 231+ {$dc}_expression_ns.expression_id = {$dc}_syntrans.expression_id
 232+ AND {$dc}_syntrans.remove_transaction_id IS NULL
 233+ AND language_id = $language_esc
 234+ AND {$dc}_syntrans.defined_meaning_id IN
 235+ (
 236+ SELECT member_mid as id
 237+ FROM {$dc}_collection_contents WHERE
 238+ collection_id = $collection_esc
 239+ AND remove_transaction_id IS NULL
 240+ )
 241+ ORDER BY spelling
 242+")or die ("error ".mysql_error());
 243+
 244+
 245+while ($row = mysql_fetch_array($result, MYSQL_NUM)) {
 246+ $id=$row[0];
 247+ $spelling=$row[1];
 248+ print "<a href=\"../../../index.php?title=DefinedMeaning:".$spelling."_($id)\">$spelling</a>;\n";
 249+}
 250+
 251+
 252+
 253+echo"
 254+<hr>
 255+<div align=\"right\">
 256+<small>Page time: ".substr((stopwatch()-$start),0,5)." seconds</small>
 257+</div>
 258+Notes:
 259+<ul>
 260+<li>Particular (typically common) words occur multiple times. This is because these words have multiple (defined) meanings.</li>
 261+</ul>
 262+<hr>
 263+<p align=\"left\">
 264+<h3> see also</h3>
 265+<ul>
 266+<li><a href=\"collection.php?collection=$collection_id\">Return to Number of Expressions per language in this collection</a></li>
 267+<li><a href=\"stats.php\">Overview, expressions per language</a></li>
 268+<li><a href=\"../../..\">return to Omegawiki proper</li></a>
 269+</p>
 270+"
 271+
 272+?>
Property changes on: trunk/extensions/Wikidata/util/umls/missing.php
___________________________________________________________________
Added: svn:executable
1273 + *
Index: trunk/extensions/Wikidata/util/umls/collection.php
@@ -1,6 +1,6 @@
22 <?php
33 header("Content-type: text/html; charset=UTF-8");
4 -$dc="uw";
 4+$dc="umls";
55
66 define('MEDIAWIKI', true );
77 include_once("../../../../LocalSettings.php");
Index: trunk/extensions/Wikidata/util/umls/collections.php
@@ -2,7 +2,7 @@
33 header("Content-type: text/html; charset=UTF-8");
44
55
6 -$dc="uw";
 6+$dc="umls";
77
88 define('MEDIAWIKI', true );
99 include_once("../../../../LocalSettings.php");
Index: trunk/extensions/Wikidata/util/umls/stats.php
@@ -1,7 +1,7 @@
22 <?php
33 header("Content-type: text/html; charset=UTF-8");
44
5 -$dc="uw";
 5+$dc="umls";
66
77 define('MEDIAWIKI', true );
88 include_once("../../../../LocalSettings.php");

Status & tagging log