r64486 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64485‎ | r64486 | r64487 >
Date:12:20, 1 April 2010
Author:daniel
Status:deferred
Tags:
Comment:
improved enwiki concept type detection
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_enwiki.java
@@ -144,10 +144,10 @@
145145 stripClutterManglers.add( new RegularExpressionMangler("rank\\s*=\\s*\\[\\[List[ _]of[ _][-\\w\\d\\s]+?\\|\\s*Ranked\\s+\\{\\{[-\\w\\d\\s]+?counties\\s*\\|\\s*\\w+=[-\\w\\d\\s]+\\}\\}\\]\\]", "", 0));
146146
147147 conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.PLACE,
148 - "^(Geography_of|Places|Villages|Towns|Cities|Captials?|Counties|Countries|Municipalities|Settlements|States|Provinces|Territories|Federal_states|Islands|Regions|Domains|Communes|Districts|Locations)" +
149 - "(_|$)|_(places|villages|towns|cities|capitals|counties|countries|municipalities|settlements|states|provinces|territories|federal_states|islands|regions|domains|communes|districts|locations)$", 0));
 148+ "^(NUTS|Geography_of|Places|Villages|Towns|Cities|Captials?|Constituencies|Counties|Countries|Municipalities|Settlements|States|Provinces|Territories|Federal_states|Islands|Regions|Domains|Communes|Districts|Locations)" +
 149+ "(_|$)|_(places|villages|towns|cities|capitals|constituencies(_.*)?|counties|countries|municipalities|settlements|states|provinces|territories|federal_states|islands|regions|domains|communes|districts|locations)$", 0));
150150
151 - conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Geobox|Infobox_([Ss]ettlement|[Cc]ountry|[Ss]tate|[Ll]ocation))$", 0));
 151+ conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PLACE, "^(Geobox|Infobox_(.*_)?([Ss]ettlement|[Cc]ountry|[Ss]tate|[Ll]ocation|[Cc]ounty)|.*_constituency_infobox)$", 0));
152152
153153 conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(ConceptType.PERSON, "^(Male|Female|People)_|_(people|men|women|births|deaths)$", 0));
154154 conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(ConceptType.PERSON, "^(Persondata|Lifetime|BD|BIRTH-DEATH-SORT|Infobox.*_(person|[aA]rtist|creator|writer|musician|biography|clergy|scientist))$", 0));

Status & tagging log