r46664 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r46663‎ | r46664 | r46665 >
Date:23:39, 31 January 2009
Author:daniel
Status:deferred
Tags:
Comment:
following through on 'about' relation (tbc)
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/LocalConceptStoreSchema.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/WikiWordConceptStoreSchema.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/debug-tweaks.properties (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ConceptImporter.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/LocalConceptStoreBuilder.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/LocalConceptStoreSchema.java
@@ -106,7 +106,7 @@
107107 aboutTable.addField( new ReferenceField(this, "concept", "INT", null, false, KeyType.INDEX, "concept", "id", null ) );
108108 aboutTable.addField( new ReferenceField(this, "concept_name", getTextType(255), null, true, KeyType.INDEX, "concept", "name", null ) );
109109 aboutTable.addKey( new DatabaseKey(this, KeyType.PRIMARY, "about", new String[] {"resource", "concept"}) );
110 - addTable(aliasTable);
 110+ addTable(aboutTable);
111111
112112 meaningTable = new RelationTable(this, "meaning", defaultTableAttributes);
113113 //meaningTable.addField( new DatabaseField(this, "id", "INT", "AUTO_INCREMENT", false, KeyType.PRIMARY) );
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/WikiWordConceptStoreSchema.java
@@ -57,7 +57,7 @@
5858 langlinkTable.addField( new DatabaseField(this, "language", getTextType(16), null, true, null ) );
5959 langlinkTable.addField( new DatabaseField(this, "target", getTextType(255), null, true, null ) );
6060 langlinkTable.addKey( new DatabaseKey(this, KeyType.INDEX, "language_target", new String[] {"language", "target"}) );
61 - langlinkTable.addKey( new DatabaseKey(this, KeyType.UNIQUE, "concept_language_target", new String[] {"concept", "language", "target"}) );
 61+ langlinkTable.addKey( new DatabaseKey(this, KeyType.INDEX, "concept_language_target", new String[] {"concept", "language", "target"}) );
6262 addTable(langlinkTable);
6363
6464 groupStats.add( new GroupStatsSpec("concept", "type", conceptTypeCodeTranslator));
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ConceptImporter.java
@@ -260,6 +260,11 @@
261261 List<WikiTextAnalyzer.WikiLink> links = analyzerPage.getLinks();
262262 linkTracker.step(links.size());
263263
 264+ //XXX: after resolving all aliases, change type to OTHER!
 265+ //int conceptId = storeConcept(rcId, name, ConceptType.UNKNOWN);
 266+
 267+ int conceptId = store.storeAbout(rcId, name);
 268+
264269 for (WikiTextAnalyzer.WikiLink link : links) {
265270 WikiTextAnalyzer.LinkMagic m = link.getMagic();
266271
@@ -267,6 +272,11 @@
268273 //FIXME: store this also as a reference to the categorie's concept under it's original title!
269274 storeConceptBroader(rcId, name, link.getPage().toString(), ExtractionRule.BROADER_FROM_CAT);
270275 }
 276+
 277+ if (m==WikiTextAnalyzer.LinkMagic.LANGUAGE) {
 278+ //FIXME: language links point to *resource* names. resolve accordingly.
 279+ storeLanguageLink(rcId, conceptId, name, link.getInterwiki().toString(), link.getTarget().toString());
 280+ }
271281 }
272282
273283
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java
@@ -628,12 +628,14 @@
629629 return 0;
630630 }
631631
632 - public void storeAbout(int resource, String conceptName) {
 632+ public int storeAbout(int resource, String conceptName) {
633633 trace("+ storeAbout: resource = "+resource+", conceptName = "+conceptName);
 634+ return -1;
634635 }
635636
636 - public void storeAbout(int resource, int concept, String conceptName) {
 637+ public int storeAbout(int resource, int concept, String conceptName) {
637638 trace("+ storeAbout: resource = "+resource+", concept = "+concept+", conceptName = "+conceptName);
 639+ return -1;
638640 }
639641
640642 }
\ No newline at end of file
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java
@@ -194,8 +194,7 @@
195195
196196
197197 protected void deleteDataFrom(int rcId, String op) throws PersistenceException {
198 - deleteDataFrom(rcId, op, definitionTable, "concept", conceptTable, "resource");
199 - //deleteDataFrom(rcId, op, conceptDescriptionTable, "concept", conceptTable, "resource");
 198+ deleteDataFrom(rcId, op, definitionTable, "concept", aboutTable, "resource");
200199
201200 deleteDataFrom(rcId, op, linkTable, "resource");
202201 deleteDataFrom(rcId, op, langlinkTable, "resource");
@@ -436,14 +435,14 @@
437436 /**
438437 * @see de.brightbyte.wikiword.store.builder.LocalConceptStoreBuilder#storeAbout(int, String)
439438 */
440 - public void storeAbout(int rcId, String conceptName) throws PersistenceException {
441 - storeAbout(rcId, -1, conceptName);
 439+ public int storeAbout(int rcId, String conceptName) throws PersistenceException {
 440+ return storeAbout(rcId, -1, conceptName);
442441 }
443442
444443 /**
445444 * @see de.brightbyte.wikiword.store.builder.LocalConceptStoreBuilder#storeAbout(int, int, String)
446445 */
447 - public void storeAbout(int rcId, int concept, String conceptName) throws PersistenceException {
 446+ public int storeAbout(int rcId, int concept, String conceptName) throws PersistenceException {
448447 try {
449448 if (rcId<0) throw new IllegalArgumentException("bad resource id "+rcId);
450449 conceptName = checkName(rcId, conceptName, "concept name (resource #{0})", rcId);
@@ -452,9 +451,13 @@
453452 aboutInserter.updateString("concept_name", conceptName);
454453
455454 if (concept>0) aboutInserter.updateInt("concept", concept);
456 - else if (idManager!=null) aboutInserter.updateInt("concept", idManager.aquireId(conceptName));
 455+ else if (idManager!=null) {
 456+ concept = idManager.aquireId(conceptName);
 457+ aboutInserter.updateInt("concept", concept);
 458+ }
457459
458460 aliasInserter.updateRow();
 461+ return concept;
459462 } catch (SQLException e) {
460463 throw new PersistenceException(e);
461464 }
@@ -1047,20 +1050,20 @@
10481051
10491052 protected int buildSectionConcepts() throws PersistenceException {
10501053 //NOTE: we shouldn't need the "ignore" bit. Let'S keep it for robustness
1051 - String sql = "INSERT ignore INTO "+conceptTable.getSQLName()+" ( id, name, type, resource, random ) "
1052 - +"SELECT S.section_concept, S.section_name, "+ConceptType.UNKNOWN.getCode()+", C.resource, RAND() "
1053 - +"FROM "+sectionTable.getSQLName()+" AS S "
1054 - +"LEFT JOIN "+conceptTable.getSQLName()+" AS C ";
 1054+ String sql = "INSERT ignore INTO "+conceptTable.getSQLName()+" ( id, name, type, random ) "
 1055+ +"SELECT S.section_concept, S.section_name, "+ConceptType.UNKNOWN.getCode()+", RAND() "
 1056+ +"FROM "+sectionTable.getSQLName()+" AS S ";
10551057
1056 - if (idManager!=null) sql += "ON S.concept = C.id ";
1057 - else sql += "ON S.concept_name = C.name ";
 1058+ //XXX: no more need for joining...
 1059+ //if (idManager!=null) sql += "ON S.concept = C.concept ";
 1060+ //else sql += "ON S.concept_name = C.concept_name ";
10581061
1059 - String where = "WHERE C.type IS NULL "
1060 - //+"OR C.type != "+ConceptType.NONE.getCode()+" " //XXX: really allow type ALIAS? //XXX: check for NONE/BAD?!
1061 - //+"GROUP BY S.section_name"
1062 - ;
 1062+ //String where = "WHERE C.type IS NULL " ; //WTF?! why do we need this?!
 1063+ String where = "";
10631064
10641065 return executeUpdate("buildSectionConcepts", sql+where); //TODO: chunk if ids available?!
 1066+
 1067+ //TODO: inject about records, so section concepts are linked to resources?
10651068 }
10661069
10671070 protected int buildSectionBroader() throws PersistenceException {
@@ -1078,8 +1081,8 @@
10791082
10801083 protected int buildMissingConcepts(DatabaseTable table, String conceptIdField, String conceptNameField) throws PersistenceException {
10811084 //NOTE: we shouldn't need the "ignore" bit. Let'S keep it for robustness
1082 - String sql = "INSERT ignore INTO "+conceptTable.getSQLName()+" ( id, name, type, resource, random ) "
1083 - +"SELECT T."+conceptIdField+", T."+conceptNameField+", "+ConceptType.UNKNOWN.getCode()+", NULL, RAND() "
 1085+ String sql = "INSERT ignore INTO "+conceptTable.getSQLName()+" ( id, name, type, random ) "
 1086+ +"SELECT T."+conceptIdField+", T."+conceptNameField+", "+ConceptType.UNKNOWN.getCode()+", RAND() "
10841087 +"FROM "+table.getSQLName()+" as T "
10851088 +"LEFT JOIN "+conceptTable.getSQLName()+" as C ";
10861089
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/LocalConceptStoreBuilder.java
@@ -54,10 +54,12 @@
5555 String sourceName, int target, String targetName, AliasScope scope)
5656 throws PersistenceException;
5757
58 - public abstract void storeAbout(int resource, String conceptName)
59 - throws PersistenceException;
 58+ /* returns concept ID, of known; -1 otherwise */
 59+ public abstract int storeAbout(int resource, String conceptName)
 60+ throws PersistenceException;
6061
61 - public abstract void storeAbout(int resource, int concept, String conceptName)
 62+ /* returns concept ID, of known; -1 otherwise */
 63+ public abstract int storeAbout(int resource, int concept, String conceptName)
6264 throws PersistenceException;
6365
6466 //public abstract void storeConceptReference(int rcId, int source,
Index: trunk/WikiWord/WikiWordBuilder/debug-tweaks.properties
@@ -0,0 +1,100 @@
 2+## System config
 3+console.encoding = "UTF-8"
 4+
 5+## language handling
 6+
 7+# treat "commons" as a language code
 8+languages.commonsAsLanguage = false
 9+
 10+# treat "simple" as a language code
 11+languages.simpleAsLanguage = true
 12+
 13+## RDF Export Config
 14+# datase URI qualifier - should uniquely identify the entity creating
 15+# the datasets (that is, YOU). This enusres unique dataset URIs for
 16+# datasets created by different people.
 17+# The default is "*" which means "unknown, don't use this publically".
 18+rdf.dataset.qualifier = "*"
 19+
 20+## Import Driver
 21+# Run import in a thread separate from the one reading and parsing the dump.
 22+# Disabling this by the queue size to 0 will slightly reduce overhead on single-core systems;
 23+# Using a queue will not have as big an impact if unzippers (bunzip/gunzip) are used
 24+#dumpdriver.pageImportQueue = 8
 25+dumpdriver.pageImportQueue = 0
 26+
 27+# external unzippers - may boost performance, especially
 28+# on multi-core systems. The name of the file to
 29+# unzip will be appended to the command given here. Spaces
 30+# before the last / are taken to be part of the path, spaces
 31+# after the last / separate parameters.
 32+dumpdriver.externalBunzip = null
 33+dumpdriver.externalGunzip = null
 34+#dumpdriver.externalBunzip = "/bin/bunzip2 -c"
 35+#dumpdriver.externalGunzip = "/bin/gunzip -c"
 36+
 37+### Importer Output and Persistance ############
 38+importer.progressInterval = 1000
 39+importer.safepointInterval = 30000
 40+#importer.safepointInterval = 1000
 41+
 42+### Database Performance #######################
 43+#dbstore.backgroundFlushQueue = 4
 44+dbstore.backgroundFlushQueue = 0
 45+dbstore.useEntityBuffer = false
 46+dbstore.useRelationBuffer = false
 47+#dbstore.useEntityBuffer = true
 48+#dbstore.useRelationBuffer = true
 49+#dbstore.insertionBufferFactor = 16
 50+dbstore.insertionBufferFactor = 64
 51+dbstore.engine = "MyISAM"
 52+#dbstore.engine = "InnoDB"
 53+
 54+#sql mode - see http://dev.mysql.com/doc/refman/5.1/en/server-sql-mode.html
 55+dbstore.sqlMode = "STRICT_ALL_TABLES"
 56+#dbstore.sqlMode = "STRICT_TRANS_TABLES"
 57+
 58+#NOTE: MySQL does not support 4-byte utf-8 codes. So turn everything into binary...
 59+dbstore.useBinaryText = true;
 60+
 61+#maximum size of sql statement, bytes! MySQL's default: a bit below 16M bytes (!)
 62+#if not specified, mysql is asked for the current value.
 63+#dbstore.maxStatementSize = 16776192;
 64+
 65+#chunk size to use when chunking large updates by id
 66+#default is 100000, set to 0 to disable all chunking
 67+dbstore.queryChunkSize = 100000
 68+dbstore.updateChunkSize = 100000
 69+
 70+### Property Cache Fields ###########################
 71+#dbstore.cacheReferenceSeparator = '\u001E'
 72+#dbstore.cacheReferenceFieldSeparator = '\u001F'
 73+dbstore.listBlobSize = 65025
 74+
 75+### ID manager ######################################
 76+# NOTE: when using this, allow for 116 bytes plus the average size of names per ID entry.
 77+# So if you have anaverage name length of 12 and expect1million entries,
 78+# allow for about 1.3 gigabyte RAM to be used for ID caching.
 79+dbstore.idManager=false
 80+#dbstore.auxFileDir defaults to system temp dir
 81+#dbstore.auxFileDir="/tmp"
 82+dbstore.idManager.bufferSize=16384
 83+
 84+### CycleFinder #####################################
 85+dbstore.CycleFinder.levelWarningThreshold=32
 86+dbstore.CycleFinder.degreeWarningThreshold=1024
 87+dbstore.CycleFinder.maxDepth=1024
 88+
 89+### Database Debug Output ######################
 90+#see java.util.logging.Level for codes to use with dbstore.logLevel
 91+dbstore.logLevel = 720
 92+dbstore.explainSQLThreashold = 0
 93+#dbstore.explainSQLThreashold = 1000000
 94+dbstore.slowSQLThreashold = 0
 95+#dbstore.slowSQLThreashold = 10
 96+#dbstore.slowSQLThreashold = 60
 97+dbstore.traceSQL = false
 98+
 99+### Custom special purpose packages #################
 100+#wikiword.ConfigPackages=["de.brightbyte.wikiword.wikipro","de.brightbyte.wikiword.wikipro.wikis"]
 101+

Status & tagging log