r53382 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r53381‎ | r53382 | r53383 >
Date:21:48, 16 July 2009
Author:daniel
Status:deferred
Tags:
Comment:
fix alias extraction
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiConfiguration.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzer.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/extractor/TemplateParameterValueExtractor.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ConceptImporter.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiConfiguration.java
@@ -492,6 +492,7 @@
493493 propertyExtractors.addAll(with.propertyExtractors);
494494 pageTermExtractors.addAll(with.pageTermExtractors);
495495 redirectExtractors.addAll(with.redirectExtractors);
 496+ aliasExtractors.addAll(with.aliasExtractors);
496497 supplementNameExtractors.addAll(with.supplementNameExtractors);
497498 supplementedConceptExtractors.addAll(with.supplementedConceptExtractors);
498499
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/extractor/TemplateParameterValueExtractor.java
@@ -8,7 +8,6 @@
99 import de.brightbyte.wikiword.analyzer.AnalyzerUtils;
1010 import de.brightbyte.wikiword.analyzer.WikiPage;
1111 import de.brightbyte.wikiword.analyzer.mangler.Mangler;
12 -import de.brightbyte.wikiword.analyzer.matcher.ExactNameMatcher;
1312 import de.brightbyte.wikiword.analyzer.matcher.NameMatcher;
1413 import de.brightbyte.wikiword.analyzer.matcher.PatternNameMatcher;
1514 import de.brightbyte.wikiword.analyzer.template.TemplateData;
@@ -22,7 +21,7 @@
2322 protected Mangler mangler = null;
2423
2524 public TemplateParameterValueExtractor(String template, int flags, String parameter) {
26 - this(new ExactNameMatcher(template), parameter);
 25+ this(new PatternNameMatcher(template, flags, true), parameter);
2726 }
2827
2928 public TemplateParameterValueExtractor(Pattern template, String parameter) {
@@ -56,7 +55,7 @@
5756 if (prefix!=null) v = prefix+v;
5857 if (v!=null) {
5958 if (mangler!=null) v = mangler.mangle(v);
60 - AnalyzerUtils.addToSet(into, v);
 59+ into = AnalyzerUtils.addToSet(into, v);
6160 }
6261 }
6362 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzer.java
@@ -839,7 +839,7 @@
840840 */
841841 public WikiLink getAliasFor() {
842842 if (!aliasForKnown) {
843 - aliasFor = extractRedirectLink( this );
 843+ aliasFor = extractAliasLink( this );
844844 aliasForKnown = true;
845845 }
846846
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ConceptImporter.java
@@ -278,7 +278,7 @@
279279
280280 //if the cat page contains a reference to the main topic page, store it.
281281 WikiTextAnalyzer.WikiLink aliasFor = analyzerPage.getAliasFor();
282 - storeConceptAlias(rcId, conceptId, name, -1, aliasFor.getTargetConcept().toString(), AliasScope.CATEGORY);
 282+ if (aliasFor!=null) storeConceptAlias(rcId, conceptId, name, -1, aliasFor.getTargetConcept().toString(), AliasScope.CATEGORY);
283283
284284 List<WikiTextAnalyzer.WikiLink> links = analyzerPage.getLinks();
285285 linkTracker.step(links.size());
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java
@@ -179,7 +179,7 @@
180180 disambigStripSectionPattern = sectionPattern("See also", 0);
181181 //FIXME: disambig pages marked with {{shipindex}} are tabular!
182182
183 - aliasExtractors.add( new TemplateParameterValueExtractor("Catmore?", 0, "1") ); //FIXME: testme
 183+ aliasExtractors.add( new TemplateParameterValueExtractor("Catmore2?", 0, "1") ); //FIXME: testme
184184 aliasExtractors.add( new TemplateParameterValueExtractor("Catmore1", 0, "1").setManger( new RegularExpressionMangler("^.*\\[\\[ *(.+?) *(\\||\\]\\])", "$1", 0) ) );
185185 //TODO: Catmoresub
186186

Status & tagging log