r62699 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62698‎ | r62699 | r62700 >
Date:11:00, 19 February 2010
Author:daniel
Status:deferred
Tags:
Comment:
concept info cache
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/php/concept-info.sql (added) (history)
  • /trunk/WikiWord/WikiWord/src/main/php/wwthesaurus.php (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/php/wwutils.php (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/main/php/concept-info.sql
@@ -0,0 +1,69 @@
 2+SET SESSION group_concat_max_len = 262144; -- 1024*256
 3+
 4+create table full_all_local_concept_info (
 5+ concept int(11) NOT NULL,
 6+ `lang` varbinary(10) NOT NULL,
 7+ `name` varbinary(255) NOT NULL,
 8+ `pages` MEDIUMBLOB DEFAULT NULL,
 9+ `definition` MEDIUMBLOB DEFAULT NULL,
 10+ `broader` MEDIUMBLOB DEFAULT NULL,
 11+ `narrower` MEDIUMBLOB DEFAULT NULL,
 12+ `similar` MEDIUMBLOB DEFAULT NULL,
 13+ `related` MEDIUMBLOB DEFAULT NULL,
 14+ PRIMARY KEY ( concept, lang )
 15+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
 16+
 17+insert into full_all_local_concept_info ( concept, lang, name )
 18+select global_concept, lang, local_concept_name
 19+from full_all_origin;
 20+
 21+update full_all_local_concept_info as I
 22+join full_all_origin as O on I.lang = O.lang and I.concept = O.global_concept
 23+join full_{lang}_definition as D on O.local_concept = D.concept and O.lang = "{lang}"
 24+set I.definition = D.definition
 25+where I.lang = "{lang}";
 26+
 27+update full_all_local_concept_info as I
 28+join ( select O.global_concept as concept, O.lang as lang,
 29+ group_concat( concat(R.type, ":", R.name) separator "|" ) as pages
 30+ from full_all_origin as O
 31+ join full_{lang}_about as A on A.concept = O.local_concept and O.lang = "{lang}"
 32+ join full_{lang}_resource as R on R.id = A.resource
 33+ where O.lang = "{lang}" and R.type IN (10, 50)
 34+ group by O.global_concept, O.lang
 35+ ) as X
 36+on I.concept = X.concept and I.lang = X.lang
 37+set I.pages = X.pages
 38+where I.lang = "{lang}";
 39+
 40+
 41+update full_all_local_concept_info as I
 42+join ( select narrow as concept, group_concat(concat(broad, ":", local_concept_name) separator "|") as broader from full_all_broader
 43+ join full_all_origin as O on O.global_concept = broad and O.lang = "{lang}"
 44+ group by narrow ) as X
 45+on X.concept = I.concept, I.lang = "{lang}"
 46+set I.broader = X.broader;
 47+
 48+update full_all_local_concept_info as I
 49+join ( select broad as concept, group_concat(concat(narrow, ":", local_concept_name) separator "|") as narrower from full_all_broader
 50+ join full_all_origin as O on O.global_concept = narrow and O.lang = "{lang}"
 51+ group by broad ) as X
 52+on X.concept = I.concept, I.lang = "{lang}"
 53+set I.narrower = X.narrower;
 54+
 55+update full_all_local_concept_info as I
 56+join ( select concept1 as concept, group_concat(concat(concept2, ":", local_concept_name) separator "|") as similar from full_all_relation
 57+ join full_all_origin as O on O.global_concept = concept2 and O.lang = "{lang}"
 58+ where langmatch >= 1 or langref >= 1
 59+ group by concept1 ) as X
 60+on X.concept = I.concept, I.lang = "{lang}"
 61+set I.similar = X.similar;
 62+
 63+update full_all_local_concept_info as I
 64+join ( select concept1 as concept, group_concat(concat(concept2, ":", local_concept_name) separator "|") as related from full_all_relation
 65+ join full_all_origin as O on O.global_concept = concept2 and O.lang = "{lang}"
 66+ where bilink >= 1
 67+ group by concept1 ) as X
 68+on X.concept = I.concept, I.lang = "{lang}"
 69+set I.related = X.related;
 70+
Index: trunk/WikiWord/WikiWord/src/main/php/wwutils.php
@@ -55,7 +55,9 @@
5656
5757 foreach ($a as $x) {
5858 if ($s) $s.= ", ";
59 - $s .= $this->quote($x);
 59+
 60+ if (is_string($x)) $s .= $this->quote($x);
 61+ else $s .= $x;
6062 }
6163
6264 return '(' . $s . ')';
Index: trunk/WikiWord/WikiWord/src/main/php/wwthesaurus.php
@@ -1,6 +1,65 @@
22 <?php
33 require_once(dirname(__FILE__)."/wwutils.php");
44
 5+ /** Unknown type, SHOULD not occurr in final data. MAY be used for
 6+ * resources that are referenced but where not available for analysis,
 7+ * or have not yet been analyzed.
 8+ **/
 9+ define('WW_RC_TYPE_UNKNOWN', 0);
 10+
 11+ /**
 12+ * A "real" page, describing a concept.
 13+ */
 14+ define('WW_RC_TYPE_ARTICLE', 10);
 15+
 16+ /**
 17+ * This page is a supplemental part of an article, typically a transcluded
 18+ * subpage or simmilar.
 19+ */
 20+ define('WW_RC_TYPE_SUPPLEMENT', 15);
 21+
 22+
 23+ /**
 24+ * A page solely defining a redirect/alias for another page
 25+ */
 26+ define('WW_RC_TYPE_REDIRECT', 20);
 27+
 28+ /**
 29+ * A disambuguation page, listing different meanings for the page title,
 30+ * each linking to a article page.
 31+ */
 32+ define('WW_RC_TYPE_DISAMBIG', 30);
 33+
 34+ /**
 35+ * A page that contains a list of concepts that share some common property or quality,
 36+ * usually each linking to a page describing that concept.
 37+ */
 38+ define('WW_RC_TYPE_LIST', 40);
 39+
 40+ /**
 41+ * A category page.
 42+ */
 43+ define('WW_RC_TYPE_CATEGORY', 50);
 44+
 45+ /**
 46+ * This page does not contain relevant information for WikiWord
 47+ */
 48+ define('WW_RC_TYPE_OTHER', 99);
 49+
 50+ /**
 51+ * A page that is broken in some way, or was marked as bad or disputed. Such pages
 52+ * SHOULD generally be treated as if theys didn't exist.
 53+ */
 54+ define('WW_RC_TYPE_BAD', 100);
 55+
 56+ /**
 57+ * A resource that is not a page by itself, but merely a section of a page. Sections
 58+ * SHOULD always be part of a page of type ARTICLE, and are expected to descibe
 59+ * a narrower concept than the "parent" page.
 60+ */
 61+ define('WW_RC_TYPE_SECTION', 200);
 62+
 63+
564 class WWThesaurus extends WWUTils {
665
766 function queryConceptsForTerm($lang, $term, $limit = 100) {
@@ -235,6 +294,7 @@
236295 . " JOIN {$wwTablePrefix}_{$ll}_about as A ON A.resource = R.id "
237296 . " JOIN {$wwTablePrefix}_{$wwThesaurusDataset}_origin as O ON O.lang = \"" . mysql_real_escape_string($ll) . "\" AND A.concept = O.local_concept "
238297 . " WHERE O.global_concept = " . (int)$id
 298+ . " WHERE R.type IN ( " . WW_RC_TYPE_ARTICLE . ", " . WW_RC_TYPE_CATEGORY . " ) "
239299 . " LIMIT " . (int)$limit;
240300
241301 $pages = $this->getList($sql, "name");
@@ -247,6 +307,28 @@
248308 return $result;
249309 }
250310
 311+ function getNamesForConcept( $id, $lang = null ) {
 312+ global $wwTablePrefix, $wwThesaurusDataset, $wwLanguages;
 313+
 314+ if ( !$lang ) $lang = array_keys( $wwLanguages );
 315+ if ( !is_array($lang) ) $lang = preg_split('![\\s,;|/:]\\s*!', $lang);
 316+ $result = array();
 317+
 318+ foreach ($lang as $ll) {
 319+ $sql = "SELECT O.local_name FROM {$wwTablePrefix}_{$ll}_resource as O ";
 320+ $sql .= " WHERE O.global_concept = " . (int)$id;
 321+ $sql .= " AND O.lang = " . (int)$ll;
 322+
 323+ $pages = $this->getList($sql, "name");
 324+ if ( $pages === false || $pages === null ) return false;
 325+ if ( !$pages ) continue;
 326+
 327+ $result[$ll] = $pages;
 328+ }
 329+
 330+ return $result;
 331+ }
 332+
251333 function getTermsForConcept( $id, $lang = null, $limit = 100 ) {
252334 global $wwTablePrefix, $wwThesaurusDataset, $wwLanguages;
253335

Status & tagging log