r50283 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r50282‎ | r50283 | r50284 >
Date:21:14, 6 May 2009
Author:daniel
Status:deferred
Tags:
Comment:
refined patterns
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder4Biography/src/main/java/de/brightbyte/wikiword/biography/wikis/WikiConfiguration_dewiki.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder4Biography/src/main/java/de/brightbyte/wikiword/biography/wikis/WikiConfiguration_dewiki.java
@@ -2,11 +2,14 @@
33
44 import java.util.regex.Pattern;
55
 6+import de.brightbyte.wikiword.ConceptType;
67 import de.brightbyte.wikiword.analyzer.WikiConfiguration;
78 import de.brightbyte.wikiword.analyzer.extractor.CategoryPatternParameterExtractor;
89 import de.brightbyte.wikiword.analyzer.extractor.PagePropertyValueExtractor;
910 import de.brightbyte.wikiword.analyzer.extractor.TemplateParameterExtractor;
1011 import de.brightbyte.wikiword.analyzer.matcher.ExactNameMatcher;
 12+import de.brightbyte.wikiword.analyzer.sensor.HasPropertySensor;
 13+import de.brightbyte.wikiword.analyzer.sensor.HasTemplateLikeSensor;
1114 import de.brightbyte.wikiword.analyzer.template.DefaultTemplateParameterPropertySpec;
1215
1316 public class WikiConfiguration_dewiki extends WikiConfiguration {
@@ -18,9 +21,9 @@
1922 propertyExtractors.add( new CategoryPatternParameterExtractor("^Gestorben_(\\d+(_v\\._Chr\\.)?)$", "$1", 0, "person-death-date") );
2023
2124 propertyExtractors.add( new CategoryPatternParameterExtractor("^Maler_(der|des)_(.+)$", "$2", 0, "artist-group") );
22 - propertyExtractors.add( new CategoryPatternParameterExtractor("^(Maler|Bildhauer|Fotograf)(_|$)", "$2", 0, "artist-group") );
23 - propertyExtractors.add( new CategoryPatternParameterExtractor("^.*[^_](maler|bildhauer|fotograf)$", "$2", 0, "artist-group") );
24 - propertyExtractors.add( new CategoryPatternParameterExtractor("^.*([-_\\wäöü]+)(maler|bildhauer|fotograf)$", "$1", 0, "artist-group") );
 25+ propertyExtractors.add( new CategoryPatternParameterExtractor("^(Maler|Bildhauer|Fotograf)(_|$).*$", "$1", 0, "artist-group") );
 26+ propertyExtractors.add( new CategoryPatternParameterExtractor("^.*[^_](maler|bildhauer|fotograf)$", "$1", 0, "artist-group").setCapitalize(true) );
 27+ propertyExtractors.add( new CategoryPatternParameterExtractor("^.*?([-_\\wäöü]+)(maler|bildhauer|fotograf)$", "$2", 0, "artist-group") );
2528
2629 propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Personendaten"),
2730 new DefaultTemplateParameterPropertySpec("NAME", "person-sortname").setStripMarkup(true),
@@ -43,7 +46,11 @@
4447 //TODO: {{BAM|Kohl|Helmut}}
4548
4649 pageTermExtractors.add( new PagePropertyValueExtractor("person-sortname") );
47 - pageTermExtractors.add( new PagePropertyValueExtractor("person-name") );
 50+ pageTermExtractors.add( new PagePropertyValueExtractor("person-name") );
 51+
 52+ conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "person-name") );
 53+ conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "person-birth-date") );
 54+ conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "artist-group") );
4855 }
4956
5057 }

Status & tagging log