Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/matcher/AnyNameMatcher.java |
— | — | @@ -11,6 +11,8 @@ |
12 | 12 | |
13 | 13 | public class AnyNameMatcher implements NameMatcher { |
14 | 14 | |
| 15 | + public static final AnyNameMatcher instance = new AnyNameMatcher(); |
| 16 | + |
15 | 17 | public AnyNameMatcher() { |
16 | 18 | //noop |
17 | 19 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/sensor/HasTemplateSensor.java |
— | — | @@ -5,17 +5,18 @@ |
6 | 6 | |
7 | 7 | import java.util.Map; |
8 | 8 | |
| 9 | +import de.brightbyte.wikiword.analyzer.matcher.AnyNameMatcher; |
9 | 10 | import de.brightbyte.wikiword.analyzer.matcher.ExactNameMatcher; |
10 | 11 | import de.brightbyte.wikiword.analyzer.matcher.NameMatcher; |
11 | 12 | |
12 | 13 | public class HasTemplateSensor<V> extends HasTemplateLikeSensor<V> { |
13 | 14 | |
14 | | - public HasTemplateSensor(V value, String pattern) { |
15 | | - this(value, new ExactNameMatcher(pattern), null); |
| 15 | + public HasTemplateSensor(V value, String name) { |
| 16 | + this(value, name==null ? AnyNameMatcher.instance : new ExactNameMatcher(name), null); |
16 | 17 | } |
17 | 18 | |
18 | | - public HasTemplateSensor(V value, String pattern, String[] params) { |
19 | | - this(value, new ExactNameMatcher(pattern), HasTemplateLikeSensor.<NameMatcher>paramKeyMap(params)); |
| 19 | + public HasTemplateSensor(V value, String name, String... params) { |
| 20 | + this(value, name==null ? AnyNameMatcher.instance : new ExactNameMatcher(name), HasTemplateLikeSensor.<NameMatcher>paramKeyMap(params)); |
20 | 21 | } |
21 | 22 | |
22 | 23 | private HasTemplateSensor(V value, NameMatcher matcher, Map<String, NameMatcher> params) { |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/sensor/HasTemplateLikeSensor.java |
— | — | @@ -10,6 +10,7 @@ |
11 | 11 | |
12 | 12 | import de.brightbyte.data.MultiMap; |
13 | 13 | import de.brightbyte.wikiword.analyzer.WikiPage; |
| 14 | +import de.brightbyte.wikiword.analyzer.matcher.AnyNameMatcher; |
14 | 15 | import de.brightbyte.wikiword.analyzer.matcher.NameMatcher; |
15 | 16 | import de.brightbyte.wikiword.analyzer.matcher.PatternNameMatcher; |
16 | 17 | import de.brightbyte.wikiword.analyzer.template.TemplateExtractor; |
— | — | @@ -23,16 +24,20 @@ |
24 | 25 | //TODO: provide an OR mode, so this triggers if *any* param matches |
25 | 26 | |
26 | 27 | public HasTemplateLikeSensor(V value, String pattern, int flags) { |
27 | | - this(value, new PatternNameMatcher(pattern, flags | Pattern.MULTILINE, false), null); |
| 28 | + this(value, pattern==null ? AnyNameMatcher.instance : new PatternNameMatcher(pattern, flags | Pattern.MULTILINE, false), null); |
28 | 29 | } |
29 | 30 | |
30 | | - public HasTemplateLikeSensor(V value, String pattern, int flags, String[] params) { |
31 | | - this(value, new PatternNameMatcher(pattern, flags | Pattern.MULTILINE, false), HasTemplateLikeSensor.<NameMatcher>paramKeyMap(params)); |
| 31 | + public HasTemplateLikeSensor(V value, String pattern, int flags, String... params) { |
| 32 | + this(value, pattern==null ? AnyNameMatcher.instance : new PatternNameMatcher(pattern, flags | Pattern.MULTILINE, false), HasTemplateLikeSensor.<NameMatcher>paramKeyMap(params)); |
32 | 33 | } |
33 | 34 | |
34 | 35 | public HasTemplateLikeSensor(V value, NameMatcher matcher, Map<String, NameMatcher> params) { |
35 | 36 | super(value); |
| 37 | + if (matcher==null) throw new NullPointerException(); |
| 38 | + |
| 39 | + |
36 | 40 | this.matcher = matcher; |
| 41 | + this.params = params; |
37 | 42 | } |
38 | 43 | |
39 | 44 | @Override |
— | — | @@ -77,6 +82,7 @@ |
78 | 83 | |
79 | 84 | protected static <V> Map<String, V> paramKeyMap(String[] params) { |
80 | 85 | if (params==null) return null; |
| 86 | + if (params.length==0) return null; |
81 | 87 | |
82 | 88 | HashMap<String, V> m = new HashMap<String, V>(); |
83 | 89 | for (String p: params) { |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_ptwiki.java |
— | — | @@ -1,11 +1,19 @@ |
2 | 2 | package de.brightbyte.wikiword.wikis; |
3 | 3 | |
| 4 | +import de.brightbyte.wikiword.ResourceType; |
4 | 5 | import de.brightbyte.wikiword.analyzer.WikiConfiguration; |
| 6 | +import de.brightbyte.wikiword.analyzer.sensor.HasCategoryLikeSensor; |
| 7 | +import de.brightbyte.wikiword.analyzer.sensor.HasTemplateLikeSensor; |
| 8 | +import de.brightbyte.wikiword.analyzer.sensor.NamespaceSensor; |
5 | 9 | |
6 | 10 | public class WikiConfiguration_ptwiki extends WikiConfiguration { |
7 | 11 | |
8 | 12 | public WikiConfiguration_ptwiki() { |
9 | | - //TODO: configure for no.wikipedia.org |
| 13 | + resourceTypeSensors.add( new NamespaceSensor<ResourceType>(ResourceType.LIST, 102)); // 102 = Anexo |
| 14 | + resourceTypeSensors.add( new HasCategoryLikeSensor<ResourceType>(ResourceType.LIST, "^(Listas|\u00cdndice)(_|$)", 0)); |
| 15 | + resourceTypeSensors.add( new HasTemplateLikeSensor<ResourceType>(ResourceType.BAD, "^(Elimina\u00e7\u00e3o_r\u00e1pida|Er1|Apagar2|VDA2|Copyright2)$", 0)); |
| 16 | + |
| 17 | + //resourceTypeSensors.add( new HasTemplateLikeSensor<ResourceType>(ResourceType.BAD, "^ambox$", 0, ....)); |
10 | 18 | } |
11 | 19 | |
12 | 20 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_eswiki.java |
— | — | @@ -5,6 +5,7 @@ |
6 | 6 | import de.brightbyte.wikiword.analyzer.WikiConfiguration; |
7 | 7 | import de.brightbyte.wikiword.analyzer.sensor.HasSectionSensor; |
8 | 8 | import de.brightbyte.wikiword.analyzer.sensor.HasTemplateLikeSensor; |
| 9 | +import de.brightbyte.wikiword.analyzer.sensor.HasTemplateSensor; |
9 | 10 | import de.brightbyte.wikiword.analyzer.sensor.NamespaceSensor; |
10 | 11 | |
11 | 12 | public class WikiConfiguration_eswiki extends WikiConfiguration { |
— | — | @@ -16,10 +17,17 @@ |
17 | 18 | |
18 | 19 | resourceTypeSensors.add( new HasTemplateLikeSensor<ResourceType>(ResourceType.BAD, "^(Destruir|Copyvio|Plagio|CdbM?|SRA|Sin_?relevancia|Irrelevante|Autotrad|RobotDestruir|Prob|Publicidad|Infraesbozo)$", 0)); |
19 | 20 | |
20 | | - conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(Fica_de_(persona|actor|artista|autoridad)|BD|NF|Sucesi\u00f3n)$", 0)); |
| 21 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(BD|NF|Sucesi\u00f3n)$", 0)); |
| 22 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(Fica_de_.+)$", 0, "fechanac")); |
| 23 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(Fica_de_.+)$", 0, "fechamuerte")); |
| 24 | + |
21 | 25 | conceptTypeSensors.add( new HasSectionSensor<ConceptType>(ConceptType.PERSON, "Biograf\u00eda")); |
22 | 26 | conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.LIFEFORM, "^(Taxobox|Fica_de_(tax\u00f3n))$", 0)); |
23 | 27 | conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Ficha_de_localidad.*)$", 0)); |
| 28 | + conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.PLACE, "coord")); |
| 29 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Fica_de_.+)$", 0, "coor")); |
| 30 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Fica_de_.+)$", 0, "mapa")); |
| 31 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Fica_de_.+)$", 0, "poblaci\u00f3n")); |
24 | 32 | |
25 | 33 | //TODO: number, date, event, work... |
26 | 34 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_itwiki.java |
— | — | @@ -1,11 +1,16 @@ |
2 | 2 | package de.brightbyte.wikiword.wikis; |
3 | 3 | |
| 4 | +import de.brightbyte.wikiword.ResourceType; |
4 | 5 | import de.brightbyte.wikiword.analyzer.WikiConfiguration; |
| 6 | +import de.brightbyte.wikiword.analyzer.sensor.HasCategoryLikeSensor; |
| 7 | +import de.brightbyte.wikiword.analyzer.sensor.HasTemplateLikeSensor; |
5 | 8 | |
6 | 9 | public class WikiConfiguration_itwiki extends WikiConfiguration { |
7 | 10 | |
8 | 11 | public WikiConfiguration_itwiki() { |
9 | | - //TODO: configure for no.wikipedia.org |
| 12 | + //resourceTypeSensors.add( new TitleSensor<ResourceType>(ResourceType.LIST, "^Elenco_", 0)); |
| 13 | + resourceTypeSensors.add( new HasCategoryLikeSensor<ResourceType>(ResourceType.LIST, "^(Liste|Cronologie|Cronologia)(_|$)", 0)); |
| 14 | + resourceTypeSensors.add( new HasTemplateLikeSensor<ResourceType>(ResourceType.BAD, "^(Cancellazione|Cancella_subito|Cancelcopy)$", 0)); |
10 | 15 | } |
11 | 16 | |
12 | 17 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java |
— | — | @@ -149,8 +149,13 @@ |
150 | 150 | conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.TIME, "Day", null)); |
151 | 151 | |
152 | 152 | conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.WORK, "(^|_)(statue|work|play|album|song|painting|opera|novel|musical|novel|composition)s?(_|$)", Pattern.CASE_INSENSITIVE)); |
| 153 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_.*$", Pattern.CASE_INSENSITIVE, "artist")); |
| 154 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_.*$", Pattern.CASE_INSENSITIVE, "author")); |
| 155 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_.*$", Pattern.CASE_INSENSITIVE, "composer")); |
| 156 | + |
153 | 157 | conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.EVENT, "(^|_)(event|war|battle|siege|treaties|flood|famine|fire|conflict|crisis|disaster|riot|assasination|execution|crime)s?(_|$)", Pattern.CASE_INSENSITIVE)); |
154 | | - conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_Military_Conflict$", Pattern.CASE_INSENSITIVE)); |
| 158 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_.*$", Pattern.CASE_INSENSITIVE, "date")); |
| 159 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.EVENT, "^Infobox_.*$", Pattern.CASE_INSENSITIVE, "year")); |
155 | 160 | |
156 | 161 | conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.LIFEFORM, "Taxobox", null)); |
157 | 162 | conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.NUMBER, "^Integers$|(^N|_n)umbers$", 0)); |