r53927 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r53926‎ | r53927 | r53928 >
Date:09:27, 29 July 2009
Author:daniel
Status:deferred
Tags:
Comment:
{{0}}
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_dewiki.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_frwiki.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_ndswiki.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_nlwiki.java (modified) (history)
  • /trunk/WikiWord/pom.xml (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_ndswiki.java
@@ -14,30 +14,30 @@
1515
1616 public WikiConfiguration_ndswiki() {
1717 super();
18 - conceptTypeSensors.add( new HasTemplateLikeSensor(ConceptType.PLACE, "^[Ll]\u00e4nner_in_.*", 0));
19 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.PLACE, "Oort"));
20 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.PLACE, "Land"));
 18+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^[Ll]\u00e4nner_in_.*", 0));
 19+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.PLACE, "Oort"));
 20+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.PLACE, "Land"));
2121
22 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.PERSON, "Mann"));
23 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.PERSON, "Fru"));
 22+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.PERSON, "Mann"));
 23+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.PERSON, "Fru"));
2424
25 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.NAME, "V\u00f6rnaam_f\u00f6r_Deerns"));
26 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.NAME, "V\u00f6rnaam_f\u00f6r_Jungs"));
27 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.NAME, "Familiennaam"));
 25+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.NAME, "V\u00f6rnaam_f\u00f6r_Deerns"));
 26+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.NAME, "V\u00f6rnaam_f\u00f6r_Jungs"));
 27+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.NAME, "Familiennaam"));
2828
29 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.TIME, "Johr"));
30 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.TIME, "Dag"));
31 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.TIME, "Johrhunnert"));
 29+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.TIME, "Johr"));
 30+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.TIME, "Dag"));
 31+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.TIME, "Johrhunnert"));
3232
33 - conceptTypeSensors.add( new HasTemplateSensor(ConceptType.LIFEFORM, "Taxobox"));
 33+ conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.LIFEFORM, "Taxobox"));
3434 //TODO: cooperations & organizations
3535
36 - resourceTypeSensors.add( new HasTemplateSensor(ResourceType.BAD, "Delete"));
37 - resourceTypeSensors.add( new HasTemplateSensor(ResourceType.BAD, "Gauweg"));
38 - resourceTypeSensors.add( new HasTemplateSensor(ResourceType.BAD, "Wegsmieten"));
 36+ resourceTypeSensors.add( new HasTemplateSensor<ResourceType>(ResourceType.BAD, "Delete"));
 37+ resourceTypeSensors.add( new HasTemplateSensor<ResourceType>(ResourceType.BAD, "Gauweg"));
 38+ resourceTypeSensors.add( new HasTemplateSensor<ResourceType>(ResourceType.BAD, "Wegsmieten"));
3939
4040 //resourceTypeSensors.add( new HasTemplateSensor(ResourceType.DISAMBIG, "Mehrd\u00fcdig_Begreep") );
41 - resourceTypeSensors.add( new TitleSensor(ResourceType.LIST, "Lieste?_(van|mit).*", 0));
 41+ resourceTypeSensors.add( new TitleSensor<ResourceType>(ResourceType.LIST, "Lieste?_(van|mit).*", 0));
4242
4343 disambigStripSectionPattern = sectionPattern("Kiek ok( bi)?:?", 0); //FIXME: often not as a section, but plain text!
4444
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java
@@ -79,6 +79,7 @@
8080 stripClutterManglers.add( new RegularExpressionMangler("\\[\\[:commons:", "[[commons:", Pattern.CASE_INSENSITIVE));
8181
8282 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Okina", 0, false), "\u02BB"));
 83+ stripClutterManglers.add( new RegularExpressionMangler(templatePattern("0", 0, true), " "));
8384 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("\u00b7|moddot|dot", 0, false), "\u00b7"));
8485 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("spaces", 1, true), " "));
8586
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_nlwiki.java
@@ -19,35 +19,36 @@
2020 super();
2121 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("wrapper", 0, true), "{|"));
2222
 23+ stripClutterManglers.add( new RegularExpressionMangler(templatePattern("0", 0, true), " "));
2324 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("e", 0, false), "$1"));
2425 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("unicode", 1, true), "$2"));
2526
26 - conceptTypeSensors.add( new HasTemplateLikeSensor(ConceptType.PLACE, "^(Landtabel|Gemeente|Plaats)($|_)|(^|_)plaats$", 0));
27 - conceptTypeSensors.add( new HasCategoryLikeSensor(ConceptType.PLACE, "^(Gemeente|Stad|Land|Plaats)(_|$)", 0));
 27+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Landtabel|Gemeente|Plaats)($|_)|(^|_)plaats$", 0));
 28+ conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.PLACE, "^(Gemeente|Stad|Land|Plaats)(_|$)", 0));
2829
29 - conceptTypeSensors.add( new HasTemplateSensor(ConceptType.TIME, "Jaarbox"));
30 - conceptTypeSensors.add( new HasTemplateSensor(ConceptType.TIME, "Kalenders"));
31 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.TIME, "Datum"));
32 - conceptTypeSensors.add( new TitleSensor(ConceptType.TIME, "(\\d{1,4}|\\d{1,2}e_eeuw)(_v\\._Chr\\.)?", 0));
 30+ conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.TIME, "Jaarbox"));
 31+ conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.TIME, "Kalenders"));
 32+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.TIME, "Datum"));
 33+ conceptTypeSensors.add( new TitleSensor<ConceptType>(ConceptType.TIME, "(\\d{1,4}|\\d{1,2}e_eeuw)(_v\\._Chr\\.)?", 0));
3334
34 - conceptTypeSensors.add( new HasCategoryLikeSensor(ConceptType.PERSON, "(^|_)persoon(_|$)|(.*schapper|.*oloog|.*icus)$", 0));
35 - conceptTypeSensors.add( new HasTemplateLikeSensor(ConceptType.PERSON, "^(Infobox_(artiest|Auteur|acteur|Comedian|.*speler|Presentator|regisseur)|Winnaars_.*)$", Pattern.CASE_INSENSITIVE));
36 - conceptTypeSensors.add( new HasTemplateLikeSensor(ConceptType.PERSON, "^(Infobox_.*|.*cus|.*eur|.*ler|.*schapper)$", Pattern.CASE_INSENSITIVE, new String[] {"geboren"}));
37 - conceptTypeSensors.add( new HasSectionLikeSensor(ConceptType.PERSON, "^((.* )?leven|Carri\u00e8re|Stamvader|Levensloop|Filmografie|Bibliografie|publicaties|(Eigen )?Biografie|Priv\u00e9|.*Loopbaan.*|Jeugd|Kinderen|Familie|Familieachtergrond)$", Pattern.CASE_INSENSITIVE) );
 35+ conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.PERSON, "(^|_)persoon(_|$)|(.*schapper|.*oloog|.*icus)$", 0));
 36+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(Infobox_(artiest|Auteur|acteur|Comedian|.*speler|Presentator|regisseur)|Winnaars_.*)$", Pattern.CASE_INSENSITIVE));
 37+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(Infobox_.*|.*cus|.*eur|.*ler|.*schapper)$", Pattern.CASE_INSENSITIVE, new String[] {"geboren"}));
 38+ conceptTypeSensors.add( new HasSectionLikeSensor<ConceptType>(ConceptType.PERSON, "^((.* )?leven|Carri\u00e8re|Stamvader|Levensloop|Filmografie|Bibliografie|publicaties|(Eigen )?Biografie|Priv\u00e9|.*Loopbaan.*|Jeugd|Kinderen|Familie|Familieachtergrond)$", Pattern.CASE_INSENSITIVE) );
3839
3940 //conceptTypeSensors.add( new WikiTextAnalyzer.TitleSensor(".*_\\(voornaam\\)", 0));
40 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.NAME, "Jongensnaam"));
41 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.NAME, "Meisjesnaam"));
42 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.NAME, "Achternaam"));
 41+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.NAME, "Jongensnaam"));
 42+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.NAME, "Meisjesnaam"));
 43+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.NAME, "Achternaam"));
4344
44 - conceptTypeSensors.add( new HasTemplateSensor(ConceptType.LIFEFORM, "Taxobox_end"));
 45+ conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.LIFEFORM, "Taxobox_end"));
4546 //TODO: cooperations & organizations
4647
47 - resourceTypeSensors.add( new HasTemplateLikeSensor(ResourceType.BAD, "^(Weg|Ne)$|weg$", 0));
48 - resourceTypeSensors.add( new HasTemplateSensor(ResourceType.DISAMBIG, "Dp") );
 48+ resourceTypeSensors.add( new HasTemplateLikeSensor<ResourceType>(ResourceType.BAD, "^(Weg|Ne)$|weg$", 0));
 49+ resourceTypeSensors.add( new HasTemplateSensor<ResourceType>(ResourceType.DISAMBIG, "Dp") );
4950 //resourceTypeSensors.add( new TitleSensor(ResourceType.DISAMBIG, ".*\\(doorverwijspagina\\)", 0) );
50 - resourceTypeSensors.add( new HasCategoryLikeSensor(ResourceType.LIST, "^Lijsten_|lijsten$", 0) );
51 - resourceTypeSensors.add( new TitleSensor(ResourceType.LIST, "Lijst_.*|.*lijst", 0) );
 51+ resourceTypeSensors.add( new HasCategoryLikeSensor<ResourceType>(ResourceType.LIST, "^Lijsten_|lijsten$", 0) );
 52+ resourceTypeSensors.add( new TitleSensor<ResourceType>(ResourceType.LIST, "Lijst_.*|.*lijst", 0) );
5253 //resourceTypeSensors.add( new WikiTextAnalyzer.RegularExpressionTitleSensor("^Lijst_", 0) ); //NOTE: too broad. some concrete concepts have a name matching this.
5354
5455 disambigStripSectionPattern = sectionPattern("Zie ook", 0);
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_dewiki.java
@@ -39,6 +39,7 @@
4040 stripClutterManglers.add( new RegularExpressionMangler("\\[\\[:commons:", "[[commons:", Pattern.CASE_INSENSITIVE));
4141
4242 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("Okina", 0, false), "\u02BB"));
 43+ stripClutterManglers.add( new RegularExpressionMangler(templatePattern("0", 0, true), " "));
4344 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("IPA(?:-Text)|IAST|Unicode|Musik", 1, true), "$2"));
4445 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("SWD|Wiktionary", 0, true), ""));
4546
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_frwiki.java
@@ -23,6 +23,7 @@
2424
2525 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("er?|\u00e8?re|(?:mini[ _])?[IVXCM]+(?:e|re|er)?|\\d+r?er?|Mlle|Mme|elle", 0, true), "$1"));
2626 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("romain|rom|rom-min|rom-maj|APIb|IPA", 1, true), "$2"));
 27+ stripClutterManglers.add( new RegularExpressionMangler(templatePattern("0", 0, true), " "));
2728 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("avjc", 0, false), "av. J.-C."));
2829 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("[XVI]+es", 0, false), "$1"));
2930 stripClutterManglers.add( new RegularExpressionMangler(templatePattern("formatnum", 1, true), "$2"));
@@ -43,24 +44,24 @@
4445 stripClutterManglers.add( new RegularExpressionMangler(
4546 templatePattern("lang(?:\\s*\\|\\s*(?:rtl|ltr)\\s*)?", 2, true), "$3"));
4647
47 - conceptTypeSensors.add( new HasCategoryLikeSensor(ConceptType.PLACE, "^(Pays|Territoire|R\u00e9publique|Subdivision|Ville|Municipalit\u00e9s|Ocean)(_|$)", 0));
 48+ conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.PLACE, "^(Pays|Territoire|R\u00e9publique|Subdivision|Ville|Municipalit\u00e9s|Ocean)(_|$)", 0));
4849 conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.PLACE, "ODP", new HashMap<String, NameMatcher>() { { put("1", new PatternNameMatcher("Regional/.*", 0, true)); } }));
49 - conceptTypeSensors.add( new HasTemplateLikeSensor(ConceptType.PLACE, "^(Infobox_)?(Pays|Continent|Commune_)(_|$)", 0));
 50+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Infobox_)?(Pays|Continent|Commune_)(_|$)", 0));
5051
51 - conceptTypeSensors.add( new HasCategoryLikeSensor(ConceptType.PERSON, "(^Homme$|^Femme$|^Naissance_en|D\u00e9c\u00e8s_en)", 0));
 52+ conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.PERSON, "(^Homme$|^Femme$|^Naissance_en|D\u00e9c\u00e8s_en)", 0));
5253
53 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.NAME, "Pr\u00e9nom"));
54 - conceptTypeSensors.add( new HasCategorySensor(ConceptType.NAME, "Patronyme"));
 54+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.NAME, "Pr\u00e9nom"));
 55+ conceptTypeSensors.add( new HasCategorySensor<ConceptType>(ConceptType.NAME, "Patronyme"));
5556
56 - conceptTypeSensors.add( new HasTemplateLikeSensor(ConceptType.TIME, "^(Ann\u00e9es|Portail_ann\u00e9es_\\d+|Portails_?I+er?_mill\u00e9naire(_av\\._J\\.-C\\.)?|Portails_d\u00e9cennies)$", 0));
 57+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.TIME, "^(Ann\u00e9es|Portail_ann\u00e9es_\\d+|Portails_?I+er?_mill\u00e9naire(_av\\._J\\.-C\\.)?|Portails_d\u00e9cennies)$", 0));
5758
58 - conceptTypeSensors.add( new HasTemplateLikeSensor(ConceptType.LIFEFORM, "^Taxobox_", 0));
 59+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.LIFEFORM, "^Taxobox_", 0));
5960 //TODO: cooperations & organizations
6061
61 - resourceTypeSensors.add( new HasTemplateLikeSensor(ResourceType.BAD, "^Suppression[ _/]", 0));
 62+ resourceTypeSensors.add( new HasTemplateLikeSensor<ResourceType>(ResourceType.BAD, "^Suppression[ _/]", 0));
6263
6364 //resourceTypeSensors.add( new HasTemplateLikeSensor(ResourceType.DISAMBIG, "^Homonymie(_|$)|_homonymes$|^Paronymie$|^Patronyme$|^Internationalisation$", 0) );
64 - resourceTypeSensors.add( new HasCategoryLikeSensor(ResourceType.LIST, "^Liste(_|$)", 0));
 65+ resourceTypeSensors.add( new HasCategoryLikeSensor<ResourceType>(ResourceType.LIST, "^Liste(_|$)", 0));
6566
6667 disambigStripSectionPattern = sectionPattern("^(Voir aussi|Liens internes)$", 0);
6768
Index: trunk/WikiWord/pom.xml
@@ -12,8 +12,7 @@
1313 <module>WikiWord</module>
1414 <module>WikiWordBuilder</module>
1515 <module>WikiWordIntegrator</module>
16 - <module>WikiWordBuilder4LifeScience</module>
17 - <module>WikiWordBuilder4Biography</module>
 16+ <module>WikiWordProperties</module>
1817 </modules>
1918
2019 <!--

Status & tagging log