r53903 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r53902‎ | r53903 | r53904 >
Date:21:06, 28 July 2009
Author:daniel
Status:deferred
Tags:
Comment:
tweaking special property extraction
Modified paths:
  • /trunk/WikiWord/WikiWordProperties/.project (modified) (history)
  • /trunk/WikiWord/WikiWordProperties/pom.xml (modified) (history)
  • /trunk/WikiWord/WikiWordProperties/src/main/java/de/brightbyte/wikiword/lifescience/wikis/WikiConfiguration_enwiki.java (modified) (history)
  • /trunk/WikiWord/WikiWordProperties/src/test/java/de/brightbyte/wikiword/wikipro (deleted) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordProperties/.project
@@ -1,6 +1,6 @@
22 <?xml version="1.0" encoding="UTF-8"?>
33 <projectDescription>
4 - <name>WikiWordBuilder4Biography</name>
 4+ <name>WikiWordProperties</name>
55 <comment></comment>
66 <projects>
77 </projects>
Index: trunk/WikiWord/WikiWordProperties/src/main/java/de/brightbyte/wikiword/lifescience/wikis/WikiConfiguration_enwiki.java
@@ -153,7 +153,7 @@
154154 private static final String neuroNamesChars = "["+alphabeticChars+"]+-["+numericChars+"]+";
155155
156156 //TODO: exclude "Biography"...
157 - public static final String lifeScienceJournalPattern = "(^|[ _])(Chem[a-z]*|Biol?[.a-z]*|Gen[eo][a-z]*|Med[a-z]*|Cell[a-z]*|DNA|RNA|Nucleic|EMBO|FEBS|Onco[a-z]*|Blood|Immono[a-z]*|Cancer|Virol[a-z]*|Med[a-z]*|Clin[a-z]*|Lancet|Nature|PLoS|Neuro[a-z]*|Zootaxa|JAMA|FASEB|Bacter[a-z]*|Mutat[a-z]*|Mol[a-z]*|Protein|Dermat[a-z]*|Pathol[a-z]*|Endocr[a-z]*|Microbio[a-z]*)($|[_ ])";
 157+ //public static final String lifeScienceJournalPattern = "(^|[ _])(Chem[a-z]*|Biol?[.a-z]*|Gen[eo][a-z]*|Med[a-z]*|Cell[a-z]*|DNA|RNA|Nucleic|EMBO|FEBS|Onco[a-z]*|Blood|Immono[a-z]*|Cancer|Virol[a-z]*|Med[a-z]*|Clin[a-z]*|Lancet|Nature|PLoS|Neuro[a-z]*|Zootaxa|JAMA|FASEB|Bacter[a-z]*|Mutat[a-z]*|Mol[a-z]*|Protein|Dermat[a-z]*|Pathol[a-z]*|Endocr[a-z]*|Microbio[a-z]*)($|[_ ])";
158158
159159
160160 protected static DefaultTemplateParameterPropertySpec makeNamePropertySpec(String param, String prop, boolean multi, boolean space) {
@@ -219,8 +219,8 @@
220220
221221 propertyExtractors.add( new TemplateParameterExtractor( new ExactNameMatcher("Cite_journal"),
222222 new DefaultTemplateParameterPropertySpec("journal", "journal")
223 - .addNormalizer(punctuationStripPattern, "")
224 - .setCondition(lifeScienceJournalPattern, 0, false) ) );
 223+ .addNormalizer(punctuationStripPattern, "")/*
 224+ .setCondition(lifeScienceJournalPattern, 0, false)*/ ) );
225225
226226 TemplateParameterPropertySpec atcSpec = new AbstractTemplateParameterPropertySpec("ATC") {
227227 private Matcher validator = Pattern.compile("["+upperAlphaNumericChars+"]+").matcher("");
@@ -473,8 +473,8 @@
474474 makeNamePropertySpec("abbreviation", "Name", true, true),
475475 makeNamePropertySpec("type", "RNA type", true, true),
476476 new DefaultTemplateParameterPropertySpec("journal", "journal")
477 - .addNormalizer(punctuationStripPattern, "")
478 - .setCondition(lifeScienceJournalPattern, 0, false)
 477+ .addNormalizer(punctuationStripPattern, "") /*
 478+ .setCondition(lifeScienceJournalPattern, 0, false) */
479479 ) );
480480
481481 propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Infobox_chemical_analysis", 0, true),
Index: trunk/WikiWord/WikiWordProperties/pom.xml
@@ -4,7 +4,7 @@
55
66 <modelVersion>4.0.0</modelVersion>
77 <groupId>de.wikimedia</groupId>
8 - <artifactId>WikiWordBuilder4Biography</artifactId>
 8+ <artifactId>WikiWordProperties</artifactId>
99 <version>0.3</version>
1010
1111 <dependencies>

Status & tagging log