Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ConceptImporter.java |
— | — | @@ -484,7 +484,7 @@ |
485 | 485 | } |
486 | 486 | } |
487 | 487 | } else { |
488 | | - out.info("skipped inter-namespace redirect "+rcName+" -> "+link); |
| 488 | + warn(rcId, "bad redirect (inter-namespace)", rcName+" -> "+link, null); |
489 | 489 | } |
490 | 490 | } |
491 | 491 | else if (name.equals(link.getPage().toString())) { |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/InputFileHelper.java |
— | — | @@ -59,6 +59,27 @@ |
60 | 60 | } |
61 | 61 | } |
62 | 62 | |
| 63 | + |
| 64 | + public URL getInputURL(String d) { |
| 65 | + return getInputURL(null, d); |
| 66 | + } |
| 67 | + |
| 68 | + public URL getInputURL(URL base, String n) { |
| 69 | + try { |
| 70 | + URL u = base == null || base.getProtocol().equals("file") ? new URL(n) : new URL(base, n); |
| 71 | + return u; |
| 72 | + } catch (MalformedURLException e) { |
| 73 | + //ignore and continue |
| 74 | + } |
| 75 | + |
| 76 | + try { |
| 77 | + File f = new File(n); |
| 78 | + return f.toURI().toURL(); |
| 79 | + } catch (MalformedURLException e) { |
| 80 | + throw new RuntimeException("failed to create file URL", e); |
| 81 | + } |
| 82 | + } |
| 83 | + |
63 | 84 | public InputStream open(String n) throws IOException { |
64 | 85 | return open(null, n); |
65 | 86 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ImportDump.java |
— | — | @@ -1,6 +1,5 @@ |
2 | 2 | package de.brightbyte.wikiword.builder; |
3 | 3 | |
4 | | -import java.io.File; |
5 | 4 | import java.net.MalformedURLException; |
6 | 5 | import java.net.URL; |
7 | 6 | |
— | — | @@ -18,31 +17,35 @@ |
19 | 18 | super(agendaTask, false, true); |
20 | 19 | } |
21 | 20 | |
22 | | - protected URL dumpFile; |
| 21 | + private URL dumpFile; |
23 | 22 | |
24 | 23 | @Override |
25 | 24 | protected boolean applyArguments() { |
26 | 25 | String d = getTargetFileName(); |
27 | 26 | if (d==null) return false; |
28 | 27 | |
29 | | - if (args.isSet("url")) { |
30 | | - try { |
31 | | - dumpFile = new URL(d); |
32 | | - } catch (MalformedURLException e) { |
33 | | - throw new IllegalArgumentException("bad url: "+d, e); |
34 | | - } |
35 | | - } |
36 | | - else { |
37 | | - try { |
38 | | - dumpFile = new File(d).toURI().toURL(); |
39 | | - } catch (MalformedURLException e) { |
40 | | - throw new RuntimeException("failed to generate local file url for `"+d+"`"); |
41 | | - } |
42 | | - } |
43 | | - |
44 | 28 | return true; |
45 | 29 | } |
46 | 30 | |
| 31 | + protected URL getDumpFileURL() { |
| 32 | + String d = getTargetFileName(); |
| 33 | + |
| 34 | + if (dumpFile==null) { |
| 35 | + if (args.isSet("url")) { |
| 36 | + try { |
| 37 | + dumpFile = new URL(d); |
| 38 | + } catch (MalformedURLException e) { |
| 39 | + throw new IllegalArgumentException("bad url: "+d, e); |
| 40 | + } |
| 41 | + } |
| 42 | + else { |
| 43 | + dumpFile = inputHelper.getInputURL(d); |
| 44 | + } |
| 45 | + } |
| 46 | + |
| 47 | + return dumpFile; |
| 48 | + } |
| 49 | + |
47 | 50 | @Override |
48 | 51 | protected void declareOptions() { |
49 | 52 | super.declareOptions(); |
— | — | @@ -104,7 +107,7 @@ |
105 | 108 | |
106 | 109 | ///////////////////////// main import run //////////////////////////////////// |
107 | 110 | if (agenda.beginTask("ImportDump.run", "analysis")) { |
108 | | - DataSourceDriver driver = new XmlDumpDriver(dumpFile, getLogOutput(), tweaks); |
| 111 | + DataSourceDriver driver = new XmlDumpDriver(getDumpFileURL(), inputHelper, getLogOutput(), tweaks); |
109 | 112 | |
110 | 113 | importer.reset(); |
111 | 114 | importer.prepare(); |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ImportConcepts.java |
— | — | @@ -2,9 +2,12 @@ |
3 | 3 | |
4 | 4 | import java.io.IOException; |
5 | 5 | |
| 6 | +import de.brightbyte.io.ConsoleIO; |
6 | 7 | import de.brightbyte.util.PersistenceException; |
| 8 | +import de.brightbyte.wikiword.Corpus; |
7 | 9 | import de.brightbyte.wikiword.analyzer.WikiTextAnalyzer; |
8 | 10 | import de.brightbyte.wikiword.store.WikiWordStoreFactory; |
| 11 | +import de.brightbyte.wikiword.store.builder.DebugLocalConceptStoreBuilder; |
9 | 12 | import de.brightbyte.wikiword.store.builder.LocalConceptStoreBuilder; |
10 | 13 | import de.brightbyte.wikiword.store.builder.PropertyStoreBuilder; |
11 | 14 | import de.brightbyte.wikiword.store.builder.TextStoreBuilder; |
— | — | @@ -22,7 +25,12 @@ |
23 | 26 | public ImportConcepts() { |
24 | 27 | super("ImportConcepts"); |
25 | 28 | } |
26 | | - |
| 29 | + |
| 30 | + protected WikiWordStoreFactory<? extends LocalConceptStoreBuilder> createConceptStoreFactory() throws IOException, PersistenceException { |
| 31 | + if (args.isSet("debug")) return new DebugLocalConceptStoreBuilder.Factory((Corpus)getConfiguredDataset(), ConsoleIO.output); |
| 32 | + else return super.createConceptStoreFactory(); |
| 33 | + } |
| 34 | + |
27 | 35 | @Override |
28 | 36 | protected void createStores(WikiWordStoreFactory<? extends LocalConceptStoreBuilder> factory) throws IOException, PersistenceException { |
29 | 37 | super.createStores(factory); |
— | — | @@ -53,6 +61,8 @@ |
54 | 62 | protected void declareOptions() { |
55 | 63 | super.declareOptions(); |
56 | 64 | |
| 65 | + args.declare("debug", null, false, Boolean.class, "debug mode, don't write to store."); |
| 66 | + |
57 | 67 | ConceptImporter.declareOptions(args); |
58 | 68 | } |
59 | 69 | |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ImportApp.java |
— | — | @@ -35,7 +35,8 @@ |
36 | 36 | |
37 | 37 | protected Operation operation = null; |
38 | 38 | private Monitor agendaMonitor; |
39 | | - protected String[] baseTasks = new String[] {}; |
| 39 | + protected String[] baseTasks = new String[] {}; |
| 40 | + protected InputFileHelper inputHelper; |
40 | 41 | |
41 | 42 | public ImportApp(String agendaTask, boolean allowGlobal, boolean allowLocal) { //TODO: agenda-params! |
42 | 43 | super(allowGlobal, allowLocal); |
— | — | @@ -45,7 +46,7 @@ |
46 | 47 | |
47 | 48 | @SuppressWarnings("unchecked") |
48 | 49 | @Override |
49 | | - protected WikiWordStoreFactory<S> createConceptStoreFactory() throws IOException, PersistenceException { |
| 50 | + protected WikiWordStoreFactory<? extends S> createConceptStoreFactory() throws IOException, PersistenceException { |
50 | 51 | return new DatabaseConceptStoreBuilders.Factory(getConfiguredDataSource(), getConfiguredDataset(), tweaks, null, true, true); |
51 | 52 | } |
52 | 53 | |
— | — | @@ -58,7 +59,6 @@ |
59 | 60 | args.declare("dbstats", null, false, Boolean.class, "calculate and dumps database table statistics"); |
60 | 61 | args.declare("noimport", null, false, Boolean.class, "do not import pages"); |
61 | 62 | args.declare("wiki", null, true, String.class, "sets the wiki name"); |
62 | | - args.declare("dummy", null, false, Boolean.class, "use a dummy store (benchmarking mode). In this case, <db-info-file> is ignored"); |
63 | 63 | //args.declare("buildstats", null, false, Boolean.class, "generate corpus statistics"); |
64 | 64 | //args.declare("noimport", null, false, Boolean.class, "do not import anything"); |
65 | 65 | args.declare("optimize", null, false, Boolean.class, "optimizes tables for later queries - this may take very long"); |
— | — | @@ -276,6 +276,13 @@ |
277 | 277 | } |
278 | 278 | |
279 | 279 | @Override |
| 280 | + protected void prepareApp() { |
| 281 | + inputHelper = new InputFileHelper( |
| 282 | + tweaks.getTweak("dumpdriver.externalGunzip", tweaks.getTweak("input.externalGunzip", (String)null)), |
| 283 | + tweaks.getTweak("dumpdriver.externalBunzip", tweaks.getTweak("input.externalBunzip", (String)null))); |
| 284 | + } |
| 285 | + |
| 286 | + @Override |
280 | 287 | protected void execute() throws Exception { |
281 | 288 | boolean noimport = args.isSet("noimport"); |
282 | 289 | boolean dbcheck = args.isSet("dbcheck"); |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/ExtractFromDump.java |
— | — | @@ -6,6 +6,7 @@ |
7 | 7 | |
8 | 8 | import de.brightbyte.util.PersistenceException; |
9 | 9 | import de.brightbyte.wikiword.analyzer.WikiTextAnalyzer; |
| 10 | +import de.brightbyte.wikiword.builder.InputFileHelper; |
10 | 11 | import de.brightbyte.wikiword.output.DataOutput; |
11 | 12 | import de.brightbyte.wikiword.processor.DataSourceDriver; |
12 | 13 | import de.brightbyte.wikiword.processor.WikiWordPageProcessor; |
— | — | @@ -60,7 +61,7 @@ |
61 | 62 | processor.setLogOutput(getLogOutput()); |
62 | 63 | processor.configure(args); |
63 | 64 | |
64 | | - DataSourceDriver driver = new XmlDumpDriver(dumpFile, getLogOutput(), tweaks); |
| 65 | + DataSourceDriver driver = new XmlDumpDriver(dumpFile, inputHelper, getLogOutput(), tweaks); |
65 | 66 | |
66 | 67 | processor.reset(); |
67 | 68 | processor.prepare(); |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/ExtractorApp.java |
— | — | @@ -12,6 +12,7 @@ |
13 | 13 | import de.brightbyte.io.ConsoleIO; |
14 | 14 | import de.brightbyte.util.PersistenceException; |
15 | 15 | import de.brightbyte.wikiword.CliApp; |
| 16 | +import de.brightbyte.wikiword.builder.InputFileHelper; |
16 | 17 | import de.brightbyte.wikiword.output.DataOutput; |
17 | 18 | |
18 | 19 | /** |
— | — | @@ -20,11 +21,12 @@ |
21 | 22 | public abstract class ExtractorApp<S extends DataOutput> extends CliApp { |
22 | 23 | |
23 | 24 | protected S output; |
| 25 | + protected InputFileHelper inputHelper; |
24 | 26 | |
25 | 27 | public ExtractorApp() { |
26 | 28 | super(); |
27 | 29 | } |
28 | | - |
| 30 | + |
29 | 31 | @Override |
30 | 32 | protected void declareOptions() { |
31 | 33 | super.declareOptions(); |
— | — | @@ -85,6 +87,10 @@ |
86 | 88 | protected void prepareApp() throws Exception { |
87 | 89 | super.prepareApp(); |
88 | 90 | |
| 91 | + inputHelper = new InputFileHelper( |
| 92 | + tweaks.getTweak("dumpdriver.externalGunzip", tweaks.getTweak("input.externalGunzip", (String)null)), |
| 93 | + tweaks.getTweak("dumpdriver.externalBunzip", tweaks.getTweak("input.externalBunzip", (String)null))); |
| 94 | + |
89 | 95 | output = createOutput(); |
90 | 96 | } |
91 | 97 | |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java |
— | — | @@ -9,8 +9,8 @@ |
10 | 10 | import de.brightbyte.application.Agenda; |
11 | 11 | import de.brightbyte.application.Agenda.Record; |
12 | 12 | import de.brightbyte.application.Agenda.State; |
| 13 | +import de.brightbyte.data.cursor.CursorProcessor; |
13 | 14 | import de.brightbyte.data.cursor.DataSet; |
14 | | -import de.brightbyte.data.cursor.CursorProcessor; |
15 | 15 | import de.brightbyte.io.Output; |
16 | 16 | import de.brightbyte.util.PersistenceException; |
17 | 17 | import de.brightbyte.wikiword.ConceptType; |
— | — | @@ -24,12 +24,27 @@ |
25 | 25 | import de.brightbyte.wikiword.schema.AliasScope; |
26 | 26 | import de.brightbyte.wikiword.store.GroupNameTranslator; |
27 | 27 | import de.brightbyte.wikiword.store.WikiWordConceptStore; |
| 28 | +import de.brightbyte.wikiword.store.WikiWordStoreFactory; |
28 | 29 | |
29 | 30 | /** |
30 | 31 | * Dummy implementation of WikiStoreBuilder for testing and debugging |
31 | 32 | */ |
32 | 33 | public class DebugLocalConceptStoreBuilder implements LocalConceptStoreBuilder { |
33 | 34 | |
| 35 | + public static class Factory implements WikiWordStoreFactory<DebugLocalConceptStoreBuilder> { |
| 36 | + protected Output out; |
| 37 | + protected Corpus corpus; |
| 38 | + |
| 39 | + public Factory(Corpus corpus, Output out) { |
| 40 | + this.out = out; |
| 41 | + this.corpus = corpus; |
| 42 | + } |
| 43 | + |
| 44 | + public DebugLocalConceptStoreBuilder newStore() throws PersistenceException { |
| 45 | + return new DebugLocalConceptStoreBuilder(corpus, out); |
| 46 | + } |
| 47 | + } |
| 48 | + |
34 | 49 | public class DebugTextStoreBuilder implements TextStoreBuilder { |
35 | 50 | |
36 | 51 | public void storePlainText(int rcId, String name, String text) throws PersistenceException { |
— | — | @@ -408,14 +423,14 @@ |
409 | 424 | @Override |
410 | 425 | public Record logStart(int level, String context, String task, Map<String, Object> parameters, boolean complex) { |
411 | 426 | Record rec = super.logStart(level, context, task, parameters, complex); |
412 | | - trace("+ logStart: level = "+level+", task = "+task+", parameters = "+parameters+", complex = "+complex); |
| 427 | + log("+ logStart: level = "+level+", task = "+task+", parameters = "+parameters+", complex = "+complex); |
413 | 428 | return rec; |
414 | 429 | } |
415 | 430 | |
416 | 431 | @Override |
417 | 432 | public void logTerminated(int start, int end, long duration, State state, String result) { |
418 | 433 | super.logTerminated(start, end, duration, state, result); |
419 | | - trace("+ logStart: start = "+start+", end = "+end+", duration = "+duration+", state = "+state+", result = "+result); |
| 434 | + log("+ logStart: start = "+start+", end = "+end+", duration = "+duration+", state = "+state+", result = "+result); |
420 | 435 | } |
421 | 436 | |
422 | 437 | } |
— | — | @@ -436,11 +451,12 @@ |
437 | 452 | protected int sectionCounter = 0; |
438 | 453 | |
439 | 454 | private Agenda agenda; |
440 | | - private DatasetIdentifier dataset = DatasetIdentifier.forName("DEBUG", "dummy"); |
| 455 | + private DatasetIdentifier dataset; |
441 | 456 | |
442 | | - public DebugLocalConceptStoreBuilder(Output out) { |
| 457 | + public DebugLocalConceptStoreBuilder(Corpus corpus, Output out) { |
443 | 458 | super(); |
444 | 459 | this.out = out; |
| 460 | + this.dataset = corpus; |
445 | 461 | |
446 | 462 | try { |
447 | 463 | this.agenda = new Agenda( new DebugAgendaPersistor() ); |
— | — | @@ -497,13 +513,13 @@ |
498 | 514 | |
499 | 515 | public int storeConcept(int rcId, String name, ConceptType ctype) { |
500 | 516 | conceptCounter++; |
501 | | - trace("+ storeConcept: rc = "+rcId+", name = "+name+", type = "+ctype); |
| 517 | + log("+ storeConcept: rc = "+rcId+", name = "+name+", type = "+ctype); |
502 | 518 | return conceptCounter; |
503 | 519 | } |
504 | 520 | |
505 | 521 | public int storeResource(String name, ResourceType ptype, Date time) { |
506 | 522 | resourceCounter++; |
507 | | - trace("+ resourceCounter: id = "+resourceCounter+", name = "+name+", type = "+ptype+", timestamp = "+time); |
| 523 | + log("+ storeResource: id = "+resourceCounter+", name = "+name+", type = "+ptype+", timestamp = "+time); |
508 | 524 | return resourceCounter; |
509 | 525 | } |
510 | 526 | |
— | — | @@ -516,84 +532,84 @@ |
517 | 533 | |
518 | 534 | public void storeDefinition(int rcId, int conceptId, String definition) { |
519 | 535 | definitionCounter++; |
520 | | - trace("+ storeDefinition: conceptId = "+conceptId+": "+definition); |
| 536 | + log("+ storeDefinition: conceptId = "+conceptId+": "+definition); |
521 | 537 | } |
522 | 538 | |
523 | 539 | public int storePlainText(int rcId, String text) { |
524 | 540 | plainTextCounter++; |
525 | | - trace("+ storePlainText: resource = "+rcId+": "); |
526 | | - trace("---------------------------------"); |
527 | | - trace(text); |
528 | | - trace("\n---------------------------------"); |
| 541 | + log("+ storePlainText: resource = "+rcId+": "); |
| 542 | + log("---------------------------------"); |
| 543 | + log(text); |
| 544 | + log("\n---------------------------------"); |
529 | 545 | return plainTextCounter; |
530 | 546 | } |
531 | 547 | |
532 | 548 | public int storeRawText(int rcId, String text) { |
533 | 549 | rawTextCounter++; |
534 | | - trace("+ storeRawText: resource = "+rcId+": "); |
535 | | - trace("---------------------------------"); |
536 | | - trace(text); |
537 | | - trace("\n---------------------------------"); |
| 550 | + log("+ storeRawText: resource = "+rcId+": "); |
| 551 | + log("---------------------------------"); |
| 552 | + log(text); |
| 553 | + log("\n---------------------------------"); |
538 | 554 | return rawTextCounter; |
539 | 555 | } |
540 | 556 | |
541 | 557 | |
542 | 558 | public void storeConceptBroader(int rcId, int narrowId, String narrowName, String broadName, ExtractionRule rule) { |
543 | 559 | conceptBroaderCounter++; |
544 | | - trace("+ storeConceptBroader: rc = "+rcId+", narrow ("+narrowId+") = "+narrowName+", broad = "+broadName+", rule = "+rule); |
| 560 | + log("+ storeConceptBroader: rc = "+rcId+", narrow ("+narrowId+") = "+narrowName+", broad = "+broadName+", rule = "+rule); |
545 | 561 | } |
546 | 562 | |
547 | 563 | public void storeConceptBroader(int rcId, String narrowName, String broadName, ExtractionRule rule) { |
548 | 564 | conceptBroaderCounter++; |
549 | | - trace("+ storeConceptBroader: rc = "+rcId+", narrow = "+narrowName+", broad = "+broadName+", rule = "+rule); |
| 565 | + log("+ storeConceptBroader: rc = "+rcId+", narrow = "+narrowName+", broad = "+broadName+", rule = "+rule); |
550 | 566 | } |
551 | 567 | |
552 | 568 | public void storeConceptAlias(int rcId, int left, String leftName, int right, String rightName, AliasScope scope) { |
553 | 569 | conceptEquivalentCounter++; |
554 | | - trace("+ storeConceptEquivalent: rc = "+rcId+", left ("+left+") = "+leftName+", right ("+right+") = "+rightName+", scope = "+scope); |
| 570 | + log("+ storeConceptEquivalent: rc = "+rcId+", left ("+left+") = "+leftName+", right ("+right+") = "+rightName+", scope = "+scope); |
555 | 571 | } |
556 | 572 | |
557 | 573 | public void storeConceptReference(int rcId, int source, String sourceName, String target) { |
558 | 574 | conceptReferenceCounter++; |
559 | | - trace("+ storeConceptReference: rc = "+rcId+", source ("+source+") = "+sourceName+", target = "+target+""); |
| 575 | + log("+ storeConceptReference: rc = "+rcId+", source ("+source+") = "+sourceName+", target = "+target+""); |
560 | 576 | } |
561 | 577 | |
562 | 578 | public void storeLanguageLink(int rcId, int concept, String conceptName, String lang, String target) { |
563 | 579 | languageLinkCounter++; |
564 | | - trace("+ storeLanguageLink: rc = "+rcId+", concept ("+concept+") = "+conceptName+", language = "+lang+", target = "+target+""); |
| 580 | + log("+ storeLanguageLink: rc = "+rcId+", concept ("+concept+") = "+conceptName+", language = "+lang+", target = "+target+""); |
565 | 581 | } |
566 | 582 | |
567 | 583 | public void storeLink(int rcId, int anchorId, String anchorName, |
568 | 584 | String term, String targetName, ExtractionRule rule) { |
569 | 585 | linkCounter++; |
570 | | - trace("+ storeTermUse: rc = "+rcId+", anchor ("+anchorId+") = "+anchorName+", term = "+term+", target = "+targetName+", rule = "+rule+""); |
| 586 | + log("+ storeTermUse: rc = "+rcId+", anchor ("+anchorId+") = "+anchorName+", term = "+term+", target = "+targetName+", rule = "+rule+""); |
571 | 587 | } |
572 | 588 | |
573 | 589 | public void storeReference(int rcId, String term, int targetId, String targetName, |
574 | 590 | ExtractionRule rule) { |
575 | 591 | linkCounter++; |
576 | | - trace("+ storeTermUse: rc = "+rcId+", target ("+targetId+") = "+targetName+", term = "+term+", rule = "+rule+""); |
| 592 | + log("+ storeTermUse: rc = "+rcId+", target ("+targetId+") = "+targetName+", term = "+term+", rule = "+rule+""); |
577 | 593 | } |
578 | 594 | |
579 | 595 | public void storeSection(int rcId, String name, String page) { |
580 | 596 | sectionCounter++; |
581 | | - trace("+ section: rc = "+rcId+", name ("+name+") = "+page); |
| 597 | + log("+ section: rc = "+rcId+", name ("+name+") = "+page); |
582 | 598 | } |
583 | 599 | |
584 | 600 | public void checkConsistency() { |
585 | | - trace("* checkConsistency *"); |
| 601 | + log("* checkConsistency *"); |
586 | 602 | } |
587 | 603 | |
588 | 604 | public void flush() { |
589 | | - trace("* flush *"); |
| 605 | + log("* flush *"); |
590 | 606 | } |
591 | 607 | |
592 | 608 | public void deleteDataFrom(int rcId) { |
593 | | - trace("- delete data from resource "+rcId); |
| 609 | + log("- delete data from resource "+rcId); |
594 | 610 | } |
595 | 611 | |
596 | 612 | public void deleteDataAfter(int rcId, boolean inclusive) { |
597 | | - trace("- delete data after resource "+rcId); |
| 613 | + log("- delete data after resource "+rcId); |
598 | 614 | } |
599 | 615 | |
600 | 616 | public Agenda getAgenda() { |
— | — | @@ -605,7 +621,7 @@ |
606 | 622 | } |
607 | 623 | |
608 | 624 | public void optimize() { |
609 | | - trace("- optimize"); |
| 625 | + log("- optimize"); |
610 | 626 | } |
611 | 627 | |
612 | 628 | public void dumpTableStats(PrintStream out, String table) { |
— | — | @@ -621,10 +637,10 @@ |
622 | 638 | } |
623 | 639 | |
624 | 640 | public void buildStatistics() { |
625 | | - trace("- build stats"); |
| 641 | + log("- build stats"); |
626 | 642 | } |
627 | 643 | public void clearStatistics() { |
628 | | - trace("- clear stats"); |
| 644 | + log("- clear stats"); |
629 | 645 | } |
630 | 646 | |
631 | 647 | public int getNumberOfWarnings() { |
— | — | @@ -713,12 +729,12 @@ |
714 | 730 | } |
715 | 731 | |
716 | 732 | public int storeAbout(int resource, String rcName, String conceptName) { |
717 | | - trace("+ storeAbout: resource = "+resource+", resourceName = "+rcName+", conceptName = "+conceptName); |
| 733 | + log("+ storeAbout: resource = "+resource+", resourceName = "+rcName+", conceptName = "+conceptName); |
718 | 734 | return -1; |
719 | 735 | } |
720 | 736 | |
721 | 737 | public int storeAbout(int resource, String rcName, int concept, String conceptName) { |
722 | | - trace("+ storeAbout: resource = "+resource+", resourceName = "+rcName+", concept = "+concept+", conceptName = "+conceptName); |
| 738 | + log("+ storeAbout: resource = "+resource+", resourceName = "+rcName+", concept = "+concept+", conceptName = "+conceptName); |
723 | 739 | return -1; |
724 | 740 | } |
725 | 741 | |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/XmlDumpDriver.java |
— | — | @@ -199,33 +199,31 @@ |
200 | 200 | |
201 | 201 | } |
202 | 202 | |
203 | | - public XmlDumpDriver(URL dump, LeveledOutput log, TweakSet tweaks) { |
| 203 | + public XmlDumpDriver(URL dump, InputFileHelper inputHelper, LeveledOutput log, TweakSet tweaks) { |
204 | 204 | if (dump==null) throw new NullPointerException(); |
205 | 205 | this.dump= dump; |
206 | | - init(log, tweaks); |
| 206 | + init(inputHelper, log, tweaks); |
207 | 207 | } |
208 | 208 | |
209 | 209 | public XmlDumpDriver(InputStream in, LeveledOutput log, TweakSet tweaks) { |
210 | 210 | if (in==null) throw new NullPointerException(); |
211 | 211 | this.in= in; |
212 | | - init(log, tweaks); |
| 212 | + init(null, log, tweaks); |
213 | 213 | } |
214 | 214 | |
215 | 215 | private int importQueueCapacity = 0; |
216 | 216 | private InputFileHelper inputHelper; |
217 | 217 | |
218 | | - private void init(LeveledOutput log, TweakSet tweaks) { |
| 218 | + private void init(InputFileHelper inputHelper, LeveledOutput log, TweakSet tweaks) { |
219 | 219 | if (log==null) throw new NullPointerException(); |
220 | 220 | if (tweaks==null) throw new NullPointerException(); |
| 221 | + if (inputHelper==null && in==null) throw new NullPointerException(); |
221 | 222 | |
222 | 223 | this.tweaks = tweaks; |
223 | 224 | this.log = log; |
| 225 | + this.inputHelper = inputHelper; |
224 | 226 | |
225 | 227 | importQueueCapacity = tweaks.getTweak("dumpdriver.pageImportQueue", 8); |
226 | | - |
227 | | - inputHelper = new InputFileHelper( |
228 | | - tweaks.getTweak("dumpdriver.externalGunzip", tweaks.getTweak("input.externalGunzip", (String)null)), |
229 | | - tweaks.getTweak("dumpdriver.externalBunzip", tweaks.getTweak("input.externalBunzip", (String)null))); |
230 | 228 | } |
231 | 229 | |
232 | 230 | public void run(WikiWordPageProcessor importer) throws IOException, SQLException, InterruptedException, PersistenceException { |