r52895 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r52894‎ | r52895 | r52896 >
Date:09:39, 8 July 2009
Author:daniel
Status:deferred
Tags:
Comment:
basic ResourceType detection for it, pt
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/matcher/AnyNameMatcher.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/sensor/HasTemplateLikeSensor.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/sensor/HasTemplateSensor.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_eswiki.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_itwiki.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_ptwiki.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/matcher/AnyNameMatcher.java
@@ -11,6 +11,8 @@
1212
1313 public class AnyNameMatcher implements NameMatcher {
1414
 15+ public static final AnyNameMatcher instance = new AnyNameMatcher();
 16+
1517 public AnyNameMatcher() {
1618 //noop
1719 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/sensor/HasTemplateSensor.java
@@ -5,17 +5,18 @@
66
77 import java.util.Map;
88
 9+import de.brightbyte.wikiword.analyzer.matcher.AnyNameMatcher;
910 import de.brightbyte.wikiword.analyzer.matcher.ExactNameMatcher;
1011 import de.brightbyte.wikiword.analyzer.matcher.NameMatcher;
1112
1213 public class HasTemplateSensor<V> extends HasTemplateLikeSensor<V> {
1314
14 - public HasTemplateSensor(V value, String pattern) {
15 - this(value, new ExactNameMatcher(pattern), null);
 15+ public HasTemplateSensor(V value, String name) {
 16+ this(value, name==null ? AnyNameMatcher.instance : new ExactNameMatcher(name), null);
1617 }
1718
18 - public HasTemplateSensor(V value, String pattern, String[] params) {
19 - this(value, new ExactNameMatcher(pattern), HasTemplateLikeSensor.<NameMatcher>paramKeyMap(params));
 19+ public HasTemplateSensor(V value, String name, String... params) {
 20+ this(value, name==null ? AnyNameMatcher.instance : new ExactNameMatcher(name), HasTemplateLikeSensor.<NameMatcher>paramKeyMap(params));
2021 }
2122
2223 private HasTemplateSensor(V value, NameMatcher matcher, Map<String, NameMatcher> params) {
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/sensor/HasTemplateLikeSensor.java
@@ -10,6 +10,7 @@
1111
1212 import de.brightbyte.data.MultiMap;
1313 import de.brightbyte.wikiword.analyzer.WikiPage;
 14+import de.brightbyte.wikiword.analyzer.matcher.AnyNameMatcher;
1415 import de.brightbyte.wikiword.analyzer.matcher.NameMatcher;
1516 import de.brightbyte.wikiword.analyzer.matcher.PatternNameMatcher;
1617 import de.brightbyte.wikiword.analyzer.template.TemplateExtractor;
@@ -23,16 +24,20 @@
2425 //TODO: provide an OR mode, so this triggers if *any* param matches
2526
2627 public HasTemplateLikeSensor(V value, String pattern, int flags) {
27 - this(value, new PatternNameMatcher(pattern, flags | Pattern.MULTILINE, false), null);
 28+ this(value, pattern==null ? AnyNameMatcher.instance : new PatternNameMatcher(pattern, flags | Pattern.MULTILINE, false), null);
2829 }
2930
30 - public HasTemplateLikeSensor(V value, String pattern, int flags, String[] params) {
31 - this(value, new PatternNameMatcher(pattern, flags | Pattern.MULTILINE, false), HasTemplateLikeSensor.<NameMatcher>paramKeyMap(params));
 31+ public HasTemplateLikeSensor(V value, String pattern, int flags, String... params) {
 32+ this(value, pattern==null ? AnyNameMatcher.instance : new PatternNameMatcher(pattern, flags | Pattern.MULTILINE, false), HasTemplateLikeSensor.<NameMatcher>paramKeyMap(params));
3233 }
3334
3435 public HasTemplateLikeSensor(V value, NameMatcher matcher, Map<String, NameMatcher> params) {
3536 super(value);
 37+ if (matcher==null) throw new NullPointerException();
 38+
 39+
3640 this.matcher = matcher;
 41+ this.params = params;
3742 }
3843
3944 @Override
@@ -77,6 +82,7 @@
7883
7984 protected static <V> Map<String, V> paramKeyMap(String[] params) {
8085 if (params==null) return null;
 86+ if (params.length==0) return null;
8187
8288 HashMap<String, V> m = new HashMap<String, V>();
8389 for (String p: params) {
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_ptwiki.java
@@ -1,11 +1,19 @@
22 package de.brightbyte.wikiword.wikis;
33
 4+import de.brightbyte.wikiword.ResourceType;
45 import de.brightbyte.wikiword.analyzer.WikiConfiguration;
 6+import de.brightbyte.wikiword.analyzer.sensor.HasCategoryLikeSensor;
 7+import de.brightbyte.wikiword.analyzer.sensor.HasTemplateLikeSensor;
 8+import de.brightbyte.wikiword.analyzer.sensor.NamespaceSensor;
59
610 public class WikiConfiguration_ptwiki extends WikiConfiguration {
711
812 public WikiConfiguration_ptwiki() {
9 - //TODO: configure for no.wikipedia.org
 13+ resourceTypeSensors.add( new NamespaceSensor<ResourceType>(ResourceType.LIST, 102)); // 102 = Anexo
 14+ resourceTypeSensors.add( new HasCategoryLikeSensor<ResourceType>(ResourceType.LIST, "^(Listas|\u00cdndice)(_|$)", 0));
 15+ resourceTypeSensors.add( new HasTemplateLikeSensor<ResourceType>(ResourceType.BAD, "^(Elimina\u00e7\u00e3o_r\u00e1pida|Er1|Apagar2|VDA2|Copyright2)$", 0));
 16+
 17+ //resourceTypeSensors.add( new HasTemplateLikeSensor<ResourceType>(ResourceType.BAD, "^ambox$", 0, ....));
1018 }
1119
1220 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_eswiki.java
@@ -5,6 +5,7 @@
66 import de.brightbyte.wikiword.analyzer.WikiConfiguration;
77 import de.brightbyte.wikiword.analyzer.sensor.HasSectionSensor;
88 import de.brightbyte.wikiword.analyzer.sensor.HasTemplateLikeSensor;
 9+import de.brightbyte.wikiword.analyzer.sensor.HasTemplateSensor;
910 import de.brightbyte.wikiword.analyzer.sensor.NamespaceSensor;
1011
1112 public class WikiConfiguration_eswiki extends WikiConfiguration {
@@ -16,10 +17,17 @@
1718
1819 resourceTypeSensors.add( new HasTemplateLikeSensor<ResourceType>(ResourceType.BAD, "^(Destruir|Copyvio|Plagio|CdbM?|SRA|Sin_?relevancia|Irrelevante|Autotrad|RobotDestruir|Prob|Publicidad|Infraesbozo)$", 0));
1920
20 - conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(Fica_de_(persona|actor|artista|autoridad)|BD|NF|Sucesi\u00f3n)$", 0));
 21+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(BD|NF|Sucesi\u00f3n)$", 0));
 22+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(Fica_de_.+)$", 0, "fechanac"));
 23+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(Fica_de_.+)$", 0, "fechamuerte"));
 24+
2125 conceptTypeSensors.add( new HasSectionSensor<ConceptType>(ConceptType.PERSON, "Biograf\u00eda"));
2226 conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.LIFEFORM, "^(Taxobox|Fica_de_(tax\u00f3n))$", 0));
2327 conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Ficha_de_localidad.*)$", 0));
 28+ conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.PLACE, "coord"));
 29+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Fica_de_.+)$", 0, "coor"));
 30+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Fica_de_.+)$", 0, "mapa"));
 31+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Fica_de_.+)$", 0, "poblaci\u00f3n"));
2432
2533 //TODO: number, date, event, work...
2634 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_itwiki.java
@@ -1,11 +1,16 @@
22 package de.brightbyte.wikiword.wikis;
33
 4+import de.brightbyte.wikiword.ResourceType;
45 import de.brightbyte.wikiword.analyzer.WikiConfiguration;
 6+import de.brightbyte.wikiword.analyzer.sensor.HasCategoryLikeSensor;
 7+import de.brightbyte.wikiword.analyzer.sensor.HasTemplateLikeSensor;
58
69 public class WikiConfiguration_itwiki extends WikiConfiguration {
710
811 public WikiConfiguration_itwiki() {
9 - //TODO: configure for no.wikipedia.org
 12+ //resourceTypeSensors.add( new TitleSensor<ResourceType>(ResourceType.LIST, "^Elenco_", 0));
 13+ resourceTypeSensors.add( new HasCategoryLikeSensor<ResourceType>(ResourceType.LIST, "^(Liste|Cronologie|Cronologia)(_|$)", 0));
 14+ resourceTypeSensors.add( new HasTemplateLikeSensor<ResourceType>(ResourceType.BAD, "^(Cancellazione|Cancella_subito|Cancelcopy)$", 0));
1015 }
1116
1217 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java
@@ -149,8 +149,13 @@
150150 conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.TIME, "Day", null));
151151
152152 conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.WORK, "(^|_)(statue|work|play|album|song|painting|opera|novel|musical|novel|composition)s?(_|$)", Pattern.CASE_INSENSITIVE));
 153+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_.*$", Pattern.CASE_INSENSITIVE, "artist"));
 154+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_.*$", Pattern.CASE_INSENSITIVE, "author"));
 155+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_.*$", Pattern.CASE_INSENSITIVE, "composer"));
 156+
153157 conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.EVENT, "(^|_)(event|war|battle|siege|treaties|flood|famine|fire|conflict|crisis|disaster|riot|assasination|execution|crime)s?(_|$)", Pattern.CASE_INSENSITIVE));
154 - conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_Military_Conflict$", Pattern.CASE_INSENSITIVE));
 158+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_.*$", Pattern.CASE_INSENSITIVE, "date"));
 159+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_.*$", Pattern.CASE_INSENSITIVE, "year"));
155160
156161 conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.LIFEFORM, "Taxobox", null));
157162 conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.NUMBER, "^Integers$|(^N|_n)umbers$", 0));

Status & tagging log