Index: trunk/WikiWord/WikiWordProperties/.classpath |
— | — | @@ -0,0 +1,10 @@ |
| 2 | +<?xml version="1.0" encoding="UTF-8"?> |
| 3 | +<classpath> |
| 4 | + <classpathentry kind="src" path="src/main/java"/> |
| 5 | + <classpathentry kind="src" path="src/test/java"/> |
| 6 | + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> |
| 7 | + <classpathentry combineaccessrules="false" kind="src" path="/WikiWord"/> |
| 8 | + <classpathentry combineaccessrules="false" kind="src" path="/WikiWordBuilder"/> |
| 9 | + <classpathentry combineaccessrules="false" kind="src" path="/BrightByteUtil"/> |
| 10 | + <classpathentry kind="output" path="bin"/> |
| 11 | +</classpath> |
Index: trunk/WikiWord/WikiWordProperties/.project |
— | — | @@ -0,0 +1,17 @@ |
| 2 | +<?xml version="1.0" encoding="UTF-8"?> |
| 3 | +<projectDescription> |
| 4 | + <name>WikiWordBuilder4Biography</name> |
| 5 | + <comment></comment> |
| 6 | + <projects> |
| 7 | + </projects> |
| 8 | + <buildSpec> |
| 9 | + <buildCommand> |
| 10 | + <name>org.eclipse.jdt.core.javabuilder</name> |
| 11 | + <arguments> |
| 12 | + </arguments> |
| 13 | + </buildCommand> |
| 14 | + </buildSpec> |
| 15 | + <natures> |
| 16 | + <nature>org.eclipse.jdt.core.javanature</nature> |
| 17 | + </natures> |
| 18 | +</projectDescription> |
Index: trunk/WikiWord/WikiWordProperties/target/.svnignore |
— | — | @@ -0,0 +1,2 @@ |
| 2 | +* |
| 3 | + |
Property changes on: trunk/WikiWord/WikiWordProperties/target |
___________________________________________________________________ |
Name: svn:ignore |
1 | 4 | + * |
Index: trunk/WikiWord/WikiWordProperties/src/main/assembly/src.xml |
— | — | @@ -0,0 +1,24 @@ |
| 2 | +<assembly> |
| 3 | + <id>src</id> |
| 4 | + <formats> |
| 5 | + <format>tar.gz</format> |
| 6 | + </formats> |
| 7 | + <fileSets> |
| 8 | + <fileSet> |
| 9 | + <includes> |
| 10 | + <include>*</include> |
| 11 | + </includes> |
| 12 | + <excludes> |
| 13 | + <exclude>bin</exclude> |
| 14 | + <exclude>target</exclude> |
| 15 | + <exclude>local.*</exclude> |
| 16 | + </excludes> |
| 17 | + </fileSet> |
| 18 | + <fileSet> |
| 19 | + <directory>src</directory> |
| 20 | + </fileSet> |
| 21 | + <fileSet> |
| 22 | + <directory>doc</directory> |
| 23 | + </fileSet> |
| 24 | + </fileSets> |
| 25 | +</assembly> |
\ No newline at end of file |
Index: trunk/WikiWord/WikiWordProperties/src/main/assembly/bin-dep.xml |
— | — | @@ -0,0 +1,50 @@ |
| 2 | +<assembly> |
| 3 | + <id>bin-dep</id> |
| 4 | + <formats> |
| 5 | + <format>tar.gz</format> |
| 6 | + </formats> |
| 7 | + <fileSets> |
| 8 | + <fileSet> |
| 9 | + <includes> |
| 10 | + <include>README*</include> |
| 11 | + <include>LICENSE*</include> |
| 12 | + <include>NOTICE*</include> |
| 13 | + <include>*.properties</include> |
| 14 | + <include>*.sh</include> |
| 15 | + <include>*.bat</include> |
| 16 | + </includes> |
| 17 | + <excludes> |
| 18 | + <exclude>debug.*</exclude> |
| 19 | + <exclude>local.*</exclude> |
| 20 | + </excludes> |
| 21 | + </fileSet> |
| 22 | + <fileSet> |
| 23 | + <directory>src/main/</directory> |
| 24 | + <outputDirectory></outputDirectory> |
| 25 | + <includes> |
| 26 | + <include>*.rdf</include> |
| 27 | + </includes> |
| 28 | + </fileSet> |
| 29 | + <!--<fileSet> |
| 30 | + <directory>doc</directory> |
| 31 | + </fileSet>--> |
| 32 | + <!--<fileSet> |
| 33 | + <directory>target</directory> |
| 34 | + <outputDirectory></outputDirectory> |
| 35 | + <includes> |
| 36 | + <include>*.jar</include> |
| 37 | + </includes> |
| 38 | + </fileSet>--> |
| 39 | + </fileSets> |
| 40 | + <dependencySets> |
| 41 | + <dependencySet> |
| 42 | + <outputDirectory>/lib</outputDirectory> |
| 43 | + <unpack>false</unpack> |
| 44 | + <scope>runtime</scope> |
| 45 | + <excludes> |
| 46 | + <exclude>junit:junit</exclude> |
| 47 | + <exclude>org.apache.maven.wagon:wagon-ssh</exclude> |
| 48 | + </excludes> |
| 49 | + </dependencySet> |
| 50 | + </dependencySets> |
| 51 | +</assembly> |
\ No newline at end of file |
Index: trunk/WikiWord/WikiWordProperties/src/main/java/de/brightbyte/wikiword/biography/BiographyConceptType.java |
— | — | @@ -0,0 +1,36 @@ |
| 2 | +package de.brightbyte.wikiword.biography; |
| 3 | + |
| 4 | +import de.brightbyte.wikiword.ConceptType; |
| 5 | +import de.brightbyte.wikiword.ConceptTypeSet; |
| 6 | + |
| 7 | +/** |
| 8 | + * Enumeration of concept types; each concept type represents a very broad category of concepts, |
| 9 | + * which can be used to filter concepts identified in a corpus. The idea is at for some uses, |
| 10 | + * some kinds of concepts are not usefull, or especially usefull. For example, people and polaces |
| 11 | + * are not suitable for use in a general dictionary, but very useful for topic tracking. |
| 12 | + * Each type is associated with a code (for internal use) and a URI (for external use). |
| 13 | + * The URI is constructed based on {@link RdfEntities.conceptTypeBase}. |
| 14 | + */ |
| 15 | +public class BiographyConceptType extends ConceptType { |
| 16 | + |
| 17 | + /** |
| 18 | + * NamespaceSet for the canonical concept types. Loaded from the ConceptTypes.properties |
| 19 | + * file in this package. |
| 20 | + */ |
| 21 | + public static final ConceptTypeSet biographyConceptTypes; |
| 22 | + |
| 23 | + static { |
| 24 | + try { |
| 25 | + biographyConceptTypes = getConceptTypes(null, "de.brightbyte.wikiword.biography"); //FIXME: make unmodifiable! |
| 26 | + |
| 27 | + } |
| 28 | + catch (NumberFormatException ex) { |
| 29 | + throw new ExceptionInInitializerError(ex); |
| 30 | + } |
| 31 | + } |
| 32 | + |
| 33 | + public BiographyConceptType(int code, String name) { |
| 34 | + super(code, name); |
| 35 | + } |
| 36 | + |
| 37 | +} |
Index: trunk/WikiWord/WikiWordProperties/src/main/java/de/brightbyte/wikiword/biography/wikis/WikiConfiguration_enwiki.java |
— | — | @@ -0,0 +1,110 @@ |
| 2 | +package de.brightbyte.wikiword.biography.wikis; |
| 3 | + |
| 4 | +import java.util.regex.Pattern; |
| 5 | + |
| 6 | +import de.brightbyte.wikiword.ConceptType; |
| 7 | +import de.brightbyte.wikiword.analyzer.WikiConfiguration; |
| 8 | +import de.brightbyte.wikiword.analyzer.extractor.CategoryPatternParameterExtractor; |
| 9 | +import de.brightbyte.wikiword.analyzer.extractor.PagePropertyValueExtractor; |
| 10 | +import de.brightbyte.wikiword.analyzer.extractor.TemplateParameterExtractor; |
| 11 | +import de.brightbyte.wikiword.analyzer.mangler.RegularExpressionMangler; |
| 12 | +import de.brightbyte.wikiword.analyzer.matcher.ExactNameMatcher; |
| 13 | +import de.brightbyte.wikiword.analyzer.matcher.PatternNameMatcher; |
| 14 | +import de.brightbyte.wikiword.analyzer.sensor.HasPropertySensor; |
| 15 | +import de.brightbyte.wikiword.analyzer.template.DefaultTemplateParameterPropertySpec; |
| 16 | + |
| 17 | +public class WikiConfiguration_enwiki extends WikiConfiguration { |
| 18 | + |
| 19 | + public WikiConfiguration_enwiki() { |
| 20 | + super(); |
| 21 | + |
| 22 | + stripClutterManglers.add( new RegularExpressionMangler(templatePattern("awd", 1, true), "$1")); //TODO: {{awd|award|year|title|role|name}} |
| 23 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("(Birth|Death)(Date(AndAge)?|_date(_and_age)?)", 1, true), " $1") ); |
| 24 | + |
| 25 | + propertyExtractors.add( new CategoryPatternParameterExtractor("^(\\d+s?)_births$", "$1", 0, "person-birth-date") ); |
| 26 | + propertyExtractors.add( new CategoryPatternParameterExtractor("^(\\d+s?)_deaths$", "$1", 0, "person-death-date") ); |
| 27 | + |
| 28 | + propertyExtractors.add( new CategoryPatternParameterExtractor("^(.+)_(artists|painters|sculptors)$", "$1", 0, "artist-group") ); |
| 29 | + propertyExtractors.add( new CategoryPatternParameterExtractor("^.*(^|_)(painter|sculptor|photographer)s$", "$2", Pattern.CASE_INSENSITIVE, "artist-group") ); |
| 30 | + |
| 31 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Persondata"), |
| 32 | + new DefaultTemplateParameterPropertySpec("NAME", "person-sortname").setStripMarkup(true), |
| 33 | + new DefaultTemplateParameterPropertySpec("NAME", "person-name").setStripMarkup(true), |
| 34 | + new DefaultTemplateParameterPropertySpec("ALTERNATIV NAMENS", "person-name").setStripMarkup(true) |
| 35 | + .setSplitPattern(Pattern.compile("\\s[;]\\s")).addNormalizer(Pattern.compile("\\(.*?\\)"),""), |
| 36 | + new DefaultTemplateParameterPropertySpec("SHORT DESCRIPTION", "person-occupation").setStripMarkup(true), |
| 37 | + new DefaultTemplateParameterPropertySpec("DATE OF BIRTH", "person-birth-date").setStripMarkup(true), |
| 38 | + new DefaultTemplateParameterPropertySpec("PLACE OF BIRTH", "person-birth-place").setStripMarkup(true), |
| 39 | + new DefaultTemplateParameterPropertySpec("DATE OF DEATH", "person-death-date").setStripMarkup(true), |
| 40 | + new DefaultTemplateParameterPropertySpec("PLACE OF DEATH", "person-death-place").setStripMarkup(true) |
| 41 | + ) ); |
| 42 | + |
| 43 | + Pattern defaultSplitPattern = Pattern.compile("[,;/]\\s+|<br\\s*/?>"); |
| 44 | + |
| 45 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Infobox_Artist"), |
| 46 | + new DefaultTemplateParameterPropertySpec("name", "person-name").setStripMarkup(true), |
| 47 | + new DefaultTemplateParameterPropertySpec("birthname", "person-name").setStripMarkup(true), |
| 48 | + new DefaultTemplateParameterPropertySpec("birthdate", "person-birth-date").setStripMarkup(true), |
| 49 | + new DefaultTemplateParameterPropertySpec("birthplace", "person-birth-place").setStripMarkup(true), |
| 50 | + new DefaultTemplateParameterPropertySpec("location", "person-birth-place").setStripMarkup(true), |
| 51 | + new DefaultTemplateParameterPropertySpec("deathdate", "person-death-date").setStripMarkup(true), |
| 52 | + new DefaultTemplateParameterPropertySpec("deathplace", "person-death-place").setStripMarkup(true), |
| 53 | + new DefaultTemplateParameterPropertySpec("nationality", "person-nationality").setStripMarkup(true), |
| 54 | + new DefaultTemplateParameterPropertySpec("field", "artist-group").setStripMarkup(true).setSplitPattern(defaultSplitPattern), |
| 55 | + new DefaultTemplateParameterPropertySpec("movement", "artist-group").setStripMarkup(true).setSplitPattern(defaultSplitPattern), |
| 56 | + new DefaultTemplateParameterPropertySpec("training", "artist-training").setStripMarkup(true), |
| 57 | + new DefaultTemplateParameterPropertySpec("award", "artist-award").setStripMarkup(true).setSplitPattern(defaultSplitPattern) |
| 58 | + ) ); |
| 59 | + |
| 60 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Infobox_(((Medical|Military)_)?[Pp]erson|Actor|Astronaut|Criminal|Engineer|Musical_artist|Philosopher|Pope|ReligiousBio|Scientist)", 0, true), |
| 61 | + new DefaultTemplateParameterPropertySpec("name", "person-name").setStripMarkup(true), |
| 62 | + new DefaultTemplateParameterPropertySpec("other_names", "person-name").setStripMarkup(true), |
| 63 | + new DefaultTemplateParameterPropertySpec("birth_date", "person-birth-date").setStripMarkup(true), |
| 64 | + new DefaultTemplateParameterPropertySpec("birth_place", "person-birth-place").setStripMarkup(true), |
| 65 | + new DefaultTemplateParameterPropertySpec("death_date", "person-death-date").setStripMarkup(true), |
| 66 | + new DefaultTemplateParameterPropertySpec("death_place", "person-death-place").setStripMarkup(true), |
| 67 | + new DefaultTemplateParameterPropertySpec("occupation", "person-occupation").setStripMarkup(true), |
| 68 | + new DefaultTemplateParameterPropertySpec("known_for", "person-known-for").setStripMarkup(true), |
| 69 | + new DefaultTemplateParameterPropertySpec("nationality", "person-nationality").setStripMarkup(true), |
| 70 | + new DefaultTemplateParameterPropertySpec("residence", "person-nationality").setStripMarkup(true), |
| 71 | + new DefaultTemplateParameterPropertySpec("citizenship", "person-nationality").setStripMarkup(true) |
| 72 | + ) ); |
| 73 | + |
| 74 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Birth_date|BrithDate", 0, true), |
| 75 | + new DefaultTemplateParameterPropertySpec("1", "person-birth-date").setStripMarkup(true) |
| 76 | + ) ); |
| 77 | + |
| 78 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Death_date|DeathDate", 0, true), |
| 79 | + new DefaultTemplateParameterPropertySpec("1", "person-death-date").setStripMarkup(true) |
| 80 | + ) ); |
| 81 | + |
| 82 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Death_date_and_age|DeathDateAndAge", 0, true), |
| 83 | + new DefaultTemplateParameterPropertySpec("1", "person-death-date").setStripMarkup(true), |
| 84 | + new DefaultTemplateParameterPropertySpec("1", "person-birth-date").setStripMarkup(true) |
| 85 | + ) ); |
| 86 | + |
| 87 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Infobox_Medical_Person"), |
| 88 | + new DefaultTemplateParameterPropertySpec("profession", "person-occupation").setStripMarkup(true).setSplitPattern(defaultSplitPattern), |
| 89 | + new DefaultTemplateParameterPropertySpec("profession", "expert-group").setStripMarkup(true).setSplitPattern(defaultSplitPattern), |
| 90 | + new DefaultTemplateParameterPropertySpec("specialism", "expert-group").setStripMarkup(true).setSplitPattern(defaultSplitPattern), |
| 91 | + new DefaultTemplateParameterPropertySpec("research_field", "expert-group").setStripMarkup(true).setSplitPattern(defaultSplitPattern), |
| 92 | + new DefaultTemplateParameterPropertySpec("work_institutions", "person-affiliation").setStripMarkup(true).setSplitPattern(defaultSplitPattern), |
| 93 | + new DefaultTemplateParameterPropertySpec("prizes", "expert-prize").setStripMarkup(true).setSplitPattern(defaultSplitPattern) |
| 94 | + ) ); |
| 95 | + |
| 96 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Infobox_Scientist"), |
| 97 | + new DefaultTemplateParameterPropertySpec("fields", "expert-group").setStripMarkup(true).setSplitPattern(defaultSplitPattern), |
| 98 | + new DefaultTemplateParameterPropertySpec("alma-mater", "person-education").setStripMarkup(true).setSplitPattern(defaultSplitPattern), |
| 99 | + new DefaultTemplateParameterPropertySpec("workplaces", "person-affiliation").setStripMarkup(true).setSplitPattern(defaultSplitPattern), |
| 100 | + new DefaultTemplateParameterPropertySpec("awards", "expert-prize").setStripMarkup(true).setSplitPattern(defaultSplitPattern) |
| 101 | + ) ); |
| 102 | + |
| 103 | + pageTermExtractors.add( new PagePropertyValueExtractor("person-sortname") ); |
| 104 | + pageTermExtractors.add( new PagePropertyValueExtractor("person-name") ); |
| 105 | + |
| 106 | + conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "artist-group")); |
| 107 | + conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "person-name")); |
| 108 | + conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "person-birth-date")); |
| 109 | + } |
| 110 | + |
| 111 | +} |
Index: trunk/WikiWord/WikiWordProperties/src/main/java/de/brightbyte/wikiword/biography/wikis/WikiConfiguration_dewiki.java |
— | — | @@ -0,0 +1,55 @@ |
| 2 | +package de.brightbyte.wikiword.biography.wikis; |
| 3 | + |
| 4 | +import java.util.regex.Pattern; |
| 5 | + |
| 6 | +import de.brightbyte.wikiword.ConceptType; |
| 7 | +import de.brightbyte.wikiword.analyzer.WikiConfiguration; |
| 8 | +import de.brightbyte.wikiword.analyzer.extractor.CategoryPatternParameterExtractor; |
| 9 | +import de.brightbyte.wikiword.analyzer.extractor.PagePropertyValueExtractor; |
| 10 | +import de.brightbyte.wikiword.analyzer.extractor.TemplateParameterExtractor; |
| 11 | +import de.brightbyte.wikiword.analyzer.matcher.ExactNameMatcher; |
| 12 | +import de.brightbyte.wikiword.analyzer.sensor.HasPropertySensor; |
| 13 | +import de.brightbyte.wikiword.analyzer.template.DefaultTemplateParameterPropertySpec; |
| 14 | + |
| 15 | +public class WikiConfiguration_dewiki extends WikiConfiguration { |
| 16 | + |
| 17 | + public WikiConfiguration_dewiki() { |
| 18 | + super(); |
| 19 | + |
| 20 | + propertyExtractors.add( new CategoryPatternParameterExtractor("^Geboren_(\\d+(_v\\._Chr\\.)?)$", "$1", 0, "person-birth-date") ); |
| 21 | + propertyExtractors.add( new CategoryPatternParameterExtractor("^Gestorben_(\\d+(_v\\._Chr\\.)?)$", "$1", 0, "person-death-date") ); |
| 22 | + |
| 23 | + propertyExtractors.add( new CategoryPatternParameterExtractor("^Maler_(der|des)_(.+)$", "$2", 0, "artist-group") ); |
| 24 | + propertyExtractors.add( new CategoryPatternParameterExtractor("^(Maler|Bildhauer|Fotograf)(_|$).*$", "$1", 0, "artist-group") ); |
| 25 | + propertyExtractors.add( new CategoryPatternParameterExtractor("^.*[^_](maler|bildhauer|fotograf)$", "$1", 0, "artist-group").setCapitalize(true) ); |
| 26 | + propertyExtractors.add( new CategoryPatternParameterExtractor("^.*?([-_\\wäöü]+)(maler|bildhauer|fotograf)$", "$2", 0, "artist-group") ); |
| 27 | + |
| 28 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Personendaten"), |
| 29 | + new DefaultTemplateParameterPropertySpec("NAME", "person-sortname").setStripMarkup(true), |
| 30 | + new DefaultTemplateParameterPropertySpec("NAME", "person-name").setStripMarkup(true), |
| 31 | + new DefaultTemplateParameterPropertySpec("ALTERNATIVNAMEN", "person-name").setStripMarkup(true) |
| 32 | + .setSplitPattern(Pattern.compile("\\s[;]\\s")).addNormalizer(Pattern.compile("\\(.*?\\)"),""), |
| 33 | + new DefaultTemplateParameterPropertySpec("KURZBESCHREIBUNG", "person-occupation").setStripMarkup(true), |
| 34 | + new DefaultTemplateParameterPropertySpec("GEBURTSDATUM", "person-birth-date").setStripMarkup(true), |
| 35 | + new DefaultTemplateParameterPropertySpec("STERBEDATUM", "person-death-date").setStripMarkup(true), |
| 36 | + new DefaultTemplateParameterPropertySpec("GEBURTSORT", "person-birth-place").setStripMarkup(true), |
| 37 | + new DefaultTemplateParameterPropertySpec("STERBEORT", "person-death-place").setStripMarkup(true) |
| 38 | + ) ); |
| 39 | + |
| 40 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("PND"), |
| 41 | + new DefaultTemplateParameterPropertySpec("1", "ID-PND").setStripMarkup(true) ) ); |
| 42 | + |
| 43 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("LeMO"), |
| 44 | + new DefaultTemplateParameterPropertySpec("1", "ID-LeMO").setStripMarkup(true) ) ); |
| 45 | + |
| 46 | + //TODO: {{BAM|Kohl|Helmut}} |
| 47 | + |
| 48 | + pageTermExtractors.add( new PagePropertyValueExtractor("person-sortname") ); |
| 49 | + pageTermExtractors.add( new PagePropertyValueExtractor("person-name") ); |
| 50 | + |
| 51 | + conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "person-name") ); |
| 52 | + conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "person-birth-date") ); |
| 53 | + conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "artist-group") ); |
| 54 | + } |
| 55 | + |
| 56 | +} |
Index: trunk/WikiWord/WikiWordProperties/src/main/java/de/brightbyte/wikiword/biography/ConceptTypes.properties |
Index: trunk/WikiWord/WikiWordProperties/src/main/java/de/brightbyte/wikiword/lifescience/wikis/WikiConfiguration_enwiki.java |
— | — | @@ -0,0 +1,668 @@ |
| 2 | +package de.brightbyte.wikiword.lifescience.wikis; |
| 3 | + |
| 4 | +import java.util.regex.Matcher; |
| 5 | +import java.util.regex.Pattern; |
| 6 | + |
| 7 | +import de.brightbyte.wikiword.ConceptType; |
| 8 | +import de.brightbyte.wikiword.Namespace; |
| 9 | +import de.brightbyte.wikiword.ResourceType; |
| 10 | +import de.brightbyte.wikiword.analyzer.WikiConfiguration; |
| 11 | +import de.brightbyte.wikiword.analyzer.WikiPage; |
| 12 | +import de.brightbyte.wikiword.analyzer.extractor.CategoryPatternParameterExtractor; |
| 13 | +import de.brightbyte.wikiword.analyzer.extractor.PagePropertyValueExtractor; |
| 14 | +import de.brightbyte.wikiword.analyzer.extractor.PropertyValueExtractor; |
| 15 | +import de.brightbyte.wikiword.analyzer.extractor.TemplateNamePatternParameterExtractor; |
| 16 | +import de.brightbyte.wikiword.analyzer.extractor.TemplateParameterExtractor; |
| 17 | +import de.brightbyte.wikiword.analyzer.extractor.TitlePartExtractor; |
| 18 | +import de.brightbyte.wikiword.analyzer.mangler.RegularExpressionMangler; |
| 19 | +import de.brightbyte.wikiword.analyzer.mangler.TextArmor; |
| 20 | +import de.brightbyte.wikiword.analyzer.matcher.ExactNameMatcher; |
| 21 | +import de.brightbyte.wikiword.analyzer.matcher.PatternNameMatcher; |
| 22 | +import de.brightbyte.wikiword.analyzer.sensor.HasCategoryLikeSensor; |
| 23 | +import de.brightbyte.wikiword.analyzer.sensor.HasCategorySensor; |
| 24 | +import de.brightbyte.wikiword.analyzer.sensor.HasPropertySensor; |
| 25 | +import de.brightbyte.wikiword.analyzer.sensor.HasTemplateLikeSensor; |
| 26 | +import de.brightbyte.wikiword.analyzer.sensor.HasTemplateSensor; |
| 27 | +import de.brightbyte.wikiword.analyzer.sensor.TitleSensor; |
| 28 | +import de.brightbyte.wikiword.analyzer.template.AbstractTemplateParameterPropertySpec; |
| 29 | +import de.brightbyte.wikiword.analyzer.template.DeepTemplateExtractor; |
| 30 | +import de.brightbyte.wikiword.analyzer.template.DefaultTemplateParameterPropertySpec; |
| 31 | +import de.brightbyte.wikiword.analyzer.template.TemplateData; |
| 32 | +import de.brightbyte.wikiword.analyzer.template.TemplateExtractor; |
| 33 | +import de.brightbyte.wikiword.analyzer.template.TemplateParameterPropertySpec; |
| 34 | +import de.brightbyte.wikiword.analyzer.template.TemplateExtractor.Context; |
| 35 | +import de.brightbyte.wikiword.lifescience.LifeScienceConceptType; |
| 36 | + |
| 37 | +public class WikiConfiguration_enwiki extends WikiConfiguration { |
| 38 | + |
| 39 | + protected static String[] resolveSequence(String s, int max) { |
| 40 | + int idx = s.indexOf('-'); |
| 41 | + if (idx<0) return new String[] { s }; |
| 42 | + |
| 43 | + return resolveSequence(s.substring(0, idx).trim(), s.substring(idx+1).trim(), max); |
| 44 | + } |
| 45 | + |
| 46 | + protected static String[] resolveSequence(String from, String to, int max) { |
| 47 | + int i = 0; |
| 48 | + int j = from.length()-1; |
| 49 | + int k = to.length()-1; |
| 50 | + |
| 51 | + while (i<from.length() && i<to.length() && from.charAt(i)==to.charAt(i)) { |
| 52 | + i++; |
| 53 | + } |
| 54 | + |
| 55 | + while (j>=i && k>=i && from.charAt(j)==to.charAt(k)) { |
| 56 | + j--; |
| 57 | + k--; |
| 58 | + } |
| 59 | + |
| 60 | + if (j<i || k<i) return null; |
| 61 | + |
| 62 | + String f = from.substring(i, j+1); |
| 63 | + String t = to.substring(i, k+1); |
| 64 | + |
| 65 | + String prefix = from.substring(0, i); |
| 66 | + String suffix = from.substring(j+1); |
| 67 | + |
| 68 | + int a; |
| 69 | + int b; |
| 70 | + |
| 71 | + try { |
| 72 | + a = Integer.parseInt(f); |
| 73 | + b = Integer.parseInt(t); |
| 74 | + } |
| 75 | + catch (NumberFormatException ex) { |
| 76 | + return null; |
| 77 | + } |
| 78 | + |
| 79 | + int c = b-a +1; |
| 80 | + if (c>max) return null; |
| 81 | + |
| 82 | + String[] ss = new String[c]; |
| 83 | + |
| 84 | + for (int n=0; n<c; n++) { |
| 85 | + ss[n] = prefix + (a+n) + suffix; |
| 86 | + } |
| 87 | + |
| 88 | + return ss; |
| 89 | + } |
| 90 | + |
| 91 | + //FIXME: for some, <br> resp. \n needs to be stripped! |
| 92 | + |
| 93 | + protected static final String numericChars = "0-9"; |
| 94 | + protected static final String upperAlphabeticChars = "A-Z"; |
| 95 | + protected static final String alphabeticChars = upperAlphabeticChars+"a-z"; |
| 96 | + protected static final String upperAlphaNumericChars = upperAlphabeticChars+numericChars; |
| 97 | + protected static final String alphaNumericChars = alphabeticChars+numericChars; |
| 98 | + protected static final String dashChars = "-\u2212\uFE63\u2010-\u2014\uFE58\uFF0D"; |
| 99 | + |
| 100 | + protected static final Pattern identifierSeparatorPattern = Pattern.compile(",\\p{IsZ}+|[\\p{IsZ};]+|<br */?>", 0); |
| 101 | + protected static final Pattern nameSeparatorPattern = Pattern.compile(",\\p{IsZ}+|[\r\n;]+|<br */?>", 0); |
| 102 | + protected static final Pattern badStuffStripPattern = Pattern.compile("[\r\n]+", 0); |
| 103 | + protected static final Pattern spaceStripPattern = Pattern.compile("\\p{IsZ}+", 0); |
| 104 | + protected static final Pattern iupacCleanupPattern = Pattern.compile("(?<=["+dashChars+numericChars+"]|[0-9][a-z])\\p{IsZ}+", 0); |
| 105 | + protected static final Pattern punctuationStripPattern = Pattern.compile("[\r\n,.;/]+", 0); |
| 106 | + protected static final Pattern breakStripPattern = Pattern.compile("[\r\n\\p{IsZ}]+", 0); |
| 107 | + |
| 108 | + private static final String uniProtChars = "["+upperAlphaNumericChars+"]{6,}"; |
| 109 | + private static final String pubChemChars = "["+numericChars+"]+"; |
| 110 | + private static final String pbbChars = "["+numericChars+"]+"; |
| 111 | + private static final String drugBankChars = "["+upperAlphabeticChars+"]{2,}["+numericChars+"]{4,}"; |
| 112 | + private static final String casChars = "["+numericChars+"]+(["+dashChars+"]["+numericChars+"]+)*"; |
| 113 | + private static final String smilesChars = "["+dashChars+"+="+alphaNumericChars+"/\\\\()@#:\\[\\]>.]+"; //FIXME: not greedy enough |
| 114 | + private static final String atcChars = "["+upperAlphabeticChars+"]{6,}"; |
| 115 | + private static final String diseasesDbChars = "["+numericChars+"]+"; |
| 116 | + private static final String pagesChars = "["+numericChars+"]+(\\s*["+dashChars+",]\\s*["+numericChars+"]+)*"; |
| 117 | + |
| 118 | + private static final String icd10Chars = "["+upperAlphabeticChars+"]["+numericChars+"]+(\\.["+numericChars+"]*)?"; //FIXME: ranges! |
| 119 | + private static final String icd9Chars = "["+numericChars+"]+(\\.["+numericChars+"]*)?"; //FIXME: ranges! |
| 120 | + private static final String icdOChars = "M["+numericChars+"]+(/["+numericChars+"]*)?"; |
| 121 | + private static final String omimChars = "["+numericChars+"]{4,}"; |
| 122 | + private static final String medlinePlusChars = "["+numericChars+"]{6,}"; |
| 123 | + private static final String meshChars = "["+upperAlphabeticChars+"]?["+numericChars+"]+(\\.["+numericChars+"]+)*"; |
| 124 | + private static final String eMedicineChars = "["+alphabeticChars+"]+/["+numericChars+"]+"; |
| 125 | + private static final String chemAbbrevChars = "["+dashChars+alphaNumericChars+"(),]+"; |
| 126 | + |
| 127 | + private static final String inChIChars = "["+dashChars+"+"+alphabeticChars+"\\(\\),/]+"; |
| 128 | + private static final String einecsChars = "["+numericChars+"]+(["+dashChars+"]["+numericChars+"]+)*"; |
| 129 | + private static final String ecChemChars = "["+numericChars+"]+(["+dashChars+"]["+numericChars+"]+)*"; |
| 130 | + private static final String uncasnChars = "["+numericChars+"]{4,}"; |
| 131 | + private static final String rtecsChars = "["+upperAlphabeticChars+"]+["+numericChars+"]+"; |
| 132 | + private static final String keggChars = "["+upperAlphabeticChars+"]+["+numericChars+"]+"; |
| 133 | + private static final String chEbiChars = "["+numericChars+"]+"; |
| 134 | + private static final String gmelinChars = "["+numericChars+"]+"; |
| 135 | + private static final String beilsteinChars = "["+numericChars+"]+(["+dashChars+"]["+numericChars+"]+)*"; |
| 136 | + private static final String hgncChars = "["+numericChars+"]+"; |
| 137 | + private static final String hgiChars = "["+numericChars+"]+"; |
| 138 | + private static final String proteinSymbolChars = "["+alphaNumericChars+"]+(["+dashChars+"]["+alphaNumericChars+"]+)*(\\.["+numericChars+"]+)*"; |
| 139 | + private static final String entrezGeneChars = "["+numericChars+"]+"; |
| 140 | + private static final String refSeqChars = "["+upperAlphabeticChars+"]+_["+numericChars+"]+"; //NOTE: value may contain a decimal point, but we ignore that bit for better matching |
| 141 | + private static final String pdbChars = "["+upperAlphaNumericChars+"]{4,}"; |
| 142 | + |
| 143 | + private static final String ecEnzymeChars = "["+numericChars+"](\\.["+numericChars+"]+)*"; |
| 144 | + private static final String homoloGeneChars = "["+numericChars+"]+"; |
| 145 | + private static final String mgiChars = "["+numericChars+"]{6,}"; |
| 146 | + private static final String ensemblChars = "["+upperAlphabeticChars+"]{2,}["+numericChars+"]{10,}"; |
| 147 | + private static final String icscChars = "["+numericChars+"]{4,}"; |
| 148 | + private static final String goCodeChars = "["+numericChars+"]{6,}"; |
| 149 | + //private static final String chemFormulaChars = "["+dashChars+"+,\\(\\)"+alphaNumericChars+"]{3,}"; |
| 150 | + private static final String chemSpiderChars = "["+numericChars+"]+"; |
| 151 | + private static final String threeDMetChars = "["+alphaNumericChars+"]{3,}"; |
| 152 | + |
| 153 | + private static final String dorlandsChars = "["+alphabeticChars+"]+/["+numericChars+"]+"; |
| 154 | + private static final String neuroNamesChars = "["+alphabeticChars+"]+-["+numericChars+"]+"; |
| 155 | + |
| 156 | + //TODO: exclude "Biography"... |
| 157 | + public static final String lifeScienceJournalPattern = "(^|[ _])(Chem[a-z]*|Biol?[.a-z]*|Gen[eo][a-z]*|Med[a-z]*|Cell[a-z]*|DNA|RNA|Nucleic|EMBO|FEBS|Onco[a-z]*|Blood|Immono[a-z]*|Cancer|Virol[a-z]*|Med[a-z]*|Clin[a-z]*|Lancet|Nature|PLoS|Neuro[a-z]*|Zootaxa|JAMA|FASEB|Bacter[a-z]*|Mutat[a-z]*|Mol[a-z]*|Protein|Dermat[a-z]*|Pathol[a-z]*|Endocr[a-z]*|Microbio[a-z]*)($|[_ ])"; |
| 158 | + |
| 159 | + |
| 160 | + protected static DefaultTemplateParameterPropertySpec makeNamePropertySpec(String param, String prop, boolean multi, boolean space) { |
| 161 | + DefaultTemplateParameterPropertySpec spec = new DefaultTemplateParameterPropertySpec(param, prop); |
| 162 | + |
| 163 | + if (multi) { |
| 164 | + if (space) spec.setSplitPattern(nameSeparatorPattern); |
| 165 | + else spec.setSplitPattern(identifierSeparatorPattern); |
| 166 | + } |
| 167 | + |
| 168 | + if (space) spec.addNormalizer(badStuffStripPattern, ""); |
| 169 | + else spec.addNormalizer(spaceStripPattern, ""); |
| 170 | + |
| 171 | + return spec; |
| 172 | + } |
| 173 | + |
| 174 | + protected static DefaultTemplateParameterPropertySpec makeIdentifierPropertySpec(String param, String prop, String pattern) { |
| 175 | + DefaultTemplateParameterPropertySpec spec = new DefaultTemplateParameterPropertySpec(param, prop); |
| 176 | + |
| 177 | + pattern = "(?<=[^\\w\\d]|^)("+pattern+")(?=[^\\w\\d]|$)"; |
| 178 | + |
| 179 | + spec.setFindPattern(Pattern.compile(pattern)); |
| 180 | + |
| 181 | + if (pattern.indexOf('\u2212')>=0) { //XXX: hack for normalizing dashes |
| 182 | + spec.addNormalizer(Pattern.compile("["+dashChars+"]"), "-"); |
| 183 | + } |
| 184 | + |
| 185 | + return spec; |
| 186 | + } |
| 187 | + |
| 188 | + public WikiConfiguration_enwiki() { |
| 189 | + super(); |
| 190 | + |
| 191 | + templateExtractorFactory= new TemplateExtractor.Factory() { |
| 192 | + public TemplateExtractor newTemplateExtractor(Context context, TextArmor armor) { |
| 193 | + DeepTemplateExtractor extractor = new DeepTemplateExtractor(context, armor); |
| 194 | + extractor.addContainerField("Protbox", "Codes"); |
| 195 | + extractor.addContainerField("Protbox", "Caption"); |
| 196 | + //FIXME: this needs to accumulate!!!! //FIXME //FIXME //FIXME //FIXME //FIXME //FIXME //FIXME //FIXME //FIXME //FIXME //FIXME //FIXME |
| 197 | + return extractor; |
| 198 | + } |
| 199 | + }; |
| 200 | + |
| 201 | + //NOTE: apply template replacement only when stripping markup, but then before everything else |
| 202 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("ICD9", 1, true), " $2 ") ); |
| 203 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("ICD10", 3, true), " $2$3.$4 ") ); //XXX: use all 5 params? |
| 204 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("ICDO", 2, true), " M$2/$3 ") ); |
| 205 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("CAS", 1, true), " $2 ") ); |
| 206 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("ATC", 2, true), " $2$3 ") ); |
| 207 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("DiseasesDB2", 1, true), " $2 ") ); |
| 208 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("OMIM\\d?", 1, true), " $2 ") ); |
| 209 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("SMILES", 1, true), " $2 ") ); //FIXME: named param S= ! |
| 210 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("eMedicine2", 2, true), " $2/$3 ") ); |
| 211 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("MedlinePlus2", 1, true), " $2 ") ); |
| 212 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("PDB", 1, true), " $2 ") ); |
| 213 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("PDB2", 1, true), " $2 ") ); |
| 214 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("PDB3", 1, true), " $2 ") ); |
| 215 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("EC_number", 1, true), " $2 ") ); |
| 216 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("OMIM", 1, true), " $2 ") ); |
| 217 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("EntrezGene", 1, true), " $2 ") ); |
| 218 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("UniProt", 1, true), " $2 ") ); |
| 219 | + stripMarkupManglers.add(0, new RegularExpressionMangler( templatePattern("RefSeq", 1, true), " $2 ") ); |
| 220 | + |
| 221 | + propertyExtractors.add( new TemplateParameterExtractor( new ExactNameMatcher("Cite_journal"), |
| 222 | + new DefaultTemplateParameterPropertySpec("journal", "journal") |
| 223 | + .addNormalizer(punctuationStripPattern, "") |
| 224 | + .setCondition(lifeScienceJournalPattern, 0, false) ) ); |
| 225 | + |
| 226 | + TemplateParameterPropertySpec atcSpec = new AbstractTemplateParameterPropertySpec("ATC") { |
| 227 | + private Matcher validator = Pattern.compile("["+upperAlphaNumericChars+"]+").matcher(""); |
| 228 | + |
| 229 | + @Override |
| 230 | + public CharSequence getPropertyValue(WikiPage page, TemplateData params) { |
| 231 | + CharSequence code= params.getParameter("ATCCode"); |
| 232 | + if (code!=null) { |
| 233 | + if (code.length()==0) return null; |
| 234 | + validator.reset(code); |
| 235 | + if (!validator.matches()) return null; |
| 236 | + return code; |
| 237 | + } |
| 238 | + |
| 239 | + CharSequence pre= params.getParameter("ATC_prefix"); |
| 240 | + CharSequence suf= params.getParameter("ATC_suffix"); |
| 241 | + |
| 242 | + if (pre==null) pre = params.getParameter("ATCCode_prefix"); |
| 243 | + if (suf==null) suf = params.getParameter("ATCCode_suffix"); |
| 244 | + |
| 245 | + if (pre==null || suf==null) return null; |
| 246 | + if (pre.length()==0 || suf.length()==0) return null; |
| 247 | + |
| 248 | + validator.reset(pre); |
| 249 | + if (!validator.matches()) return null; |
| 250 | + |
| 251 | + validator.reset(suf); |
| 252 | + if (!validator.matches()) return null; |
| 253 | + |
| 254 | + return pre+""+suf; |
| 255 | + } |
| 256 | + }; |
| 257 | + |
| 258 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Drugbox"), |
| 259 | + makeNamePropertySpec("IUPAC_name", "IUPAC", false, false).addCleanup(iupacCleanupPattern, ""), |
| 260 | + makeNamePropertySpec("synonyms", "Name", true, true), |
| 261 | + |
| 262 | + makeIdentifierPropertySpec("PubChem", "PubChem", pubChemChars), |
| 263 | + makeIdentifierPropertySpec("DrugBank", "DrugBank", drugBankChars), |
| 264 | + makeIdentifierPropertySpec("CAS_number", "CAS", casChars), |
| 265 | + |
| 266 | + makeIdentifierPropertySpec("smiles", "SMILES", smilesChars).addCleanup(breakStripPattern, ""), |
| 267 | + //makeIdentifierPropertySpec("chemical_formula", "Formula", true, false), |
| 268 | + |
| 269 | + makeIdentifierPropertySpec("ATC_supplemental", "ATC", atcChars), |
| 270 | + makeIdentifierPropertySpec("CAS_supplemental", "CAS", casChars), |
| 271 | + atcSpec |
| 272 | + ) ); |
| 273 | + |
| 274 | + TemplateParameterPropertySpec eMedSpec = new AbstractTemplateParameterPropertySpec("eMedicine") { |
| 275 | + private Matcher subjectValidator = Pattern.compile("["+alphaNumericChars+"]+").matcher(""); |
| 276 | + private Matcher topicValidator = Pattern.compile("["+numericChars+"]+").matcher(""); |
| 277 | + |
| 278 | + @Override |
| 279 | + public CharSequence getPropertyValue(WikiPage page, TemplateData params) { |
| 280 | + CharSequence pre= params.getParameter("eMedicineSubj"); |
| 281 | + CharSequence suf= params.getParameter("eMedicineTopic"); |
| 282 | + if (pre==null || suf==null) return null; |
| 283 | + if (pre.length()==0 || suf.length()==0) return null; |
| 284 | + |
| 285 | + subjectValidator.reset(pre); |
| 286 | + if (pre.equals("search")) return null; |
| 287 | + if (!subjectValidator.matches()) return null; |
| 288 | + |
| 289 | + topicValidator.reset(suf); |
| 290 | + if (!topicValidator.matches()) return null; |
| 291 | + |
| 292 | + return pre+"/"+suf; |
| 293 | + } |
| 294 | + }; |
| 295 | + |
| 296 | + |
| 297 | + TemplateParameterPropertySpec dorlandsSpec = new AbstractTemplateParameterPropertySpec("Dorlands") { |
| 298 | + private Matcher preValidator = Pattern.compile("["+alphabeticChars+"]_["+numericChars+"]+").matcher(""); |
| 299 | + private Matcher sufValidator = Pattern.compile("["+numericChars+"]+").matcher(""); |
| 300 | + |
| 301 | + @Override |
| 302 | + public CharSequence getPropertyValue(WikiPage page, TemplateData params) { |
| 303 | + CharSequence pre= params.getParameter("DorlandsPre"); |
| 304 | + CharSequence suf= params.getParameter("DorlandsSuf"); |
| 305 | + if (pre==null || suf==null) return null; |
| 306 | + if (pre.length()==0 || suf.length()==0) return null; |
| 307 | + |
| 308 | + preValidator.reset(pre); |
| 309 | + if (!preValidator.matches()) return null; |
| 310 | + |
| 311 | + sufValidator.reset(suf); |
| 312 | + if (!sufValidator.matches()) return null; |
| 313 | + |
| 314 | + return pre+"/"+suf; |
| 315 | + } |
| 316 | + }; |
| 317 | + |
| 318 | + TemplateParameterPropertySpec neuroNamesSpec = new AbstractTemplateParameterPropertySpec("NeuroNames") { |
| 319 | + private Matcher typeValidator = Pattern.compile("["+alphabeticChars+"]+").matcher(""); |
| 320 | + private Matcher numValidator = Pattern.compile("["+numericChars+"]+").matcher(""); |
| 321 | + |
| 322 | + @Override |
| 323 | + public CharSequence getPropertyValue(WikiPage page, TemplateData params) { |
| 324 | + CharSequence type= params.getParameter("BrainInfoType"); |
| 325 | + CharSequence num= params.getParameter("BrainInfoNumber"); |
| 326 | + if (type==null || num==null) return null; |
| 327 | + if (type.length()==0 || num.length()==0) return null; |
| 328 | + |
| 329 | + typeValidator.reset(type); |
| 330 | + if (!typeValidator.matches()) return null; |
| 331 | + |
| 332 | + typeValidator.reset(num); |
| 333 | + if (!numValidator.matches()) return null; |
| 334 | + |
| 335 | + return type+"-"+num; |
| 336 | + } |
| 337 | + }; |
| 338 | + |
| 339 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Infobox_Disease|Infobox_Symptom|SignSymptom_infobox|DiseaseDisorder_infobox|Interventions_infobox", 0, true), |
| 340 | + makeIdentifierPropertySpec("DiseasesDB", "DiseasesDB", diseasesDbChars), |
| 341 | + makeIdentifierPropertySpec("ICD10", "ICD10", icd10Chars), |
| 342 | + makeIdentifierPropertySpec("ICD9", "ICD9", icd9Chars), |
| 343 | + makeIdentifierPropertySpec("ICDO", "ICDO", icdOChars), |
| 344 | + makeIdentifierPropertySpec("OMIM", "OMIM", omimChars), |
| 345 | + makeIdentifierPropertySpec("MedlinePlus", "MedlinePlus", medlinePlusChars), |
| 346 | + makeIdentifierPropertySpec("MeshID", "MeSH", meshChars), //FIXME: UniqueId vs. TreeNumber |
| 347 | + makeIdentifierPropertySpec("MeshNumber", "MeSH", meshChars), //FIXME: UniqueId vs. TreeNumber |
| 348 | + makeNamePropertySpec("MeshName", "MeSHName", true, true), |
| 349 | + makeIdentifierPropertySpec("OMIM_mult", "OMIM", omimChars), |
| 350 | + makeIdentifierPropertySpec("DiseasesDB_mult", "DiseasesDB", diseasesDbChars), |
| 351 | + makeIdentifierPropertySpec("MedlinePlus_mult", "MedlinePlus", medlinePlusChars), |
| 352 | + makeIdentifierPropertySpec("eMedicine_mult", "eMedicine", eMedicineChars), |
| 353 | + eMedSpec |
| 354 | + ) ); |
| 355 | + |
| 356 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Chembox_new"), |
| 357 | + makeNamePropertySpec("IUPACName", "IUPAC", true, true).addCleanup(iupacCleanupPattern, ""), |
| 358 | + makeNamePropertySpec("OtherNames", "Name", true, true) //FIXME: often spaced for auto-breaks and separated by <br> |
| 359 | + ) ); |
| 360 | + |
| 361 | + //TODO: terms from names |
| 362 | + |
| 363 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Chembox_[Pp]harmacology", 0, true), |
| 364 | + makeIdentifierPropertySpec("DrugBank", "DrugBank", drugBankChars), |
| 365 | + atcSpec |
| 366 | + ) ); |
| 367 | + |
| 368 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Chembox_[Hh]azards", 0, true), |
| 369 | + makeIdentifierPropertySpec("RTECS", "RTECS", rtecsChars) |
| 370 | + ) ); |
| 371 | + |
| 372 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Chembox_[Ii]dentifiers", 0, true), |
| 373 | + makeIdentifierPropertySpec("Abbreviations", "ChemAbbrev", chemAbbrevChars), |
| 374 | + makeIdentifierPropertySpec("CASNo", "CAS", casChars), |
| 375 | + makeIdentifierPropertySpec("SMILES", "SMILES", smilesChars).addCleanup(breakStripPattern, ""), |
| 376 | + makeIdentifierPropertySpec("FullSMILES", "SMILS", smilesChars).addCleanup(breakStripPattern, ""), |
| 377 | + makeIdentifierPropertySpec("InChI", "InChI", inChIChars).addCleanup(breakStripPattern, ""), |
| 378 | + makeIdentifierPropertySpec("DrugBank", "DrugBank", drugBankChars), |
| 379 | + makeIdentifierPropertySpec("EINECS", "EINECS", einecsChars), |
| 380 | + makeIdentifierPropertySpec("EC-number", "EC/chem", ecChemChars), //NOTE: replaces EINECS and ELINCS; not be confused with the Enzyme Commission EC number for enzymes. makeIdentifierPropertySpec("EINECSCASNO", "CAS", true, false), |
| 381 | + makeIdentifierPropertySpec("UNNumber", "UNCASN", uncasnChars), |
| 382 | + makeIdentifierPropertySpec("PubChem", "PubChem", pubChemChars), |
| 383 | + makeIdentifierPropertySpec("RTECS", "RTECS", rtecsChars), |
| 384 | + makeIdentifierPropertySpec("KEGG", "KEGG", keggChars), |
| 385 | + makeNamePropertySpec("MeSHName", "MeSHName", true, true), |
| 386 | + makeIdentifierPropertySpec("ChEBI", "ChEBI", chEbiChars), |
| 387 | + makeIdentifierPropertySpec("Beilstein", "Beilstein", beilsteinChars), |
| 388 | + makeIdentifierPropertySpec("Gmelin", "Gmelin", gmelinChars), |
| 389 | + makeIdentifierPropertySpec("3DMet", "3DMet", threeDMetChars), |
| 390 | + makeIdentifierPropertySpec("ChemSpiderID", "ChemSpider", chemSpiderChars), |
| 391 | + atcSpec |
| 392 | + ) ); |
| 393 | + |
| 394 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("NatOrganicBox", 0, true), |
| 395 | + makeNamePropertySpec("name", "IUPAC", false, false).addCleanup(iupacCleanupPattern, ""), |
| 396 | + makeNamePropertySpec("synonyms", "Name", true, true), |
| 397 | + makeIdentifierPropertySpec("abbreviations", "ChemAbbrev", chemAbbrevChars), |
| 398 | + //makeIdentifierPropertySpec("chemical_formula", "Formula", chemFormulaChars), |
| 399 | + |
| 400 | + makeIdentifierPropertySpec("CAS", "CAS", casChars), |
| 401 | + makeIdentifierPropertySpec("DrugBank", "DrugBank", drugBankChars), //FIXME: getting "?" |
| 402 | + makeIdentifierPropertySpec("SMILES", "SMILES", smilesChars).addCleanup(breakStripPattern, ""), |
| 403 | + makeIdentifierPropertySpec("EINECS", "EINECS", einecsChars), |
| 404 | + makeIdentifierPropertySpec("PubChem", "PubChem", pubChemChars), |
| 405 | + atcSpec |
| 406 | + ) ); |
| 407 | + |
| 408 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Elementbox", 0, true), |
| 409 | + makeNamePropertySpec("name", "Name", true, true), |
| 410 | + makeIdentifierPropertySpec("number", "ElementNumber", "["+numericChars+"]"), |
| 411 | + makeIdentifierPropertySpec("symbol", "ElementSymbol", "["+alphaNumericChars+"]"), |
| 412 | + |
| 413 | + makeIdentifierPropertySpec("CAS number", "CAS", casChars), |
| 414 | + atcSpec |
| 415 | + ) ); |
| 416 | + |
| 417 | + //TODO: ...as terms |
| 418 | + |
| 419 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Protbox"), |
| 420 | + makeNamePropertySpec("Name", "ProteinName", false, true), |
| 421 | + makeNamePropertySpec("Names", "ProteinName", true, true), |
| 422 | + |
| 423 | + //makeIdentifierPropertySpec("Gene", "HGNC", hgncChars), |
| 424 | + makeIdentifierPropertySpec("HGNCid", "HGNC", hgncChars), |
| 425 | + makeIdentifierPropertySpec("MGIid", "MGI", hgiChars), |
| 426 | + makeIdentifierPropertySpec("Symbol", "ProteinSymbol", proteinSymbolChars), |
| 427 | + makeIdentifierPropertySpec("AltSymbols", "ProteinSymbol", proteinSymbolChars), |
| 428 | + |
| 429 | + makeIdentifierPropertySpec("EntrezGene", "EntrezGene", entrezGeneChars), |
| 430 | + makeIdentifierPropertySpec("OMIM", "OMIM", omimChars), |
| 431 | + makeIdentifierPropertySpec("RefSeq", "RefSeq", refSeqChars), |
| 432 | + makeIdentifierPropertySpec("UniProt", "UniProt", uniProtChars), |
| 433 | + makeIdentifierPropertySpec("PDB", "PDB", pdbChars), |
| 434 | + makeIdentifierPropertySpec("ECnumber", "EC/enzyme", ecEnzymeChars) |
| 435 | + ) ); |
| 436 | + |
| 437 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Taxobox"), |
| 438 | + makeNamePropertySpec("name", "Name", false, true), |
| 439 | + makeNamePropertySpec("regnum", "taxo-regnum", true, true), |
| 440 | + makeNamePropertySpec("divisio", "taxo-divisio", true, true), |
| 441 | + makeNamePropertySpec("classis", "taxo-classis", true, true), |
| 442 | + makeNamePropertySpec("ordo", "taxo-ordo", true, true), |
| 443 | + makeNamePropertySpec("familia", "taxo-familia", true, true), |
| 444 | + makeNamePropertySpec("genus", "taxo-genus", true, true), |
| 445 | + makeNamePropertySpec("species", "taxo-species", true, true) |
| 446 | + ) ); |
| 447 | + |
| 448 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Enzyme_(links|references)", 0, true), |
| 449 | + makeIdentifierPropertySpec("EC_number", "EC/enzyme", ecEnzymeChars) |
| 450 | + ) ); |
| 451 | + |
| 452 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("GO_code_links"), |
| 453 | + makeNamePropertySpec("name", "ProteinName", false, true), |
| 454 | + makeIdentifierPropertySpec("GO_code", "GO_code", goCodeChars) |
| 455 | + ) ); |
| 456 | + |
| 457 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("CAS_registry"), //XXX: only as identifying element, or also in-context? |
| 458 | + makeIdentifierPropertySpec("1", "CAS", casChars) |
| 459 | + ) ); |
| 460 | + |
| 461 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("MSW3_Groves"), |
| 462 | + makeIdentifierPropertySpec("id", "GrovesId", numericChars), |
| 463 | + makeIdentifierPropertySpec("pages", "GrovesPages", pagesChars) |
| 464 | + ) ); |
| 465 | + |
| 466 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Rfam"), |
| 467 | + makeIdentifierPropertySpec("id", "RNA family", alphaNumericChars), |
| 468 | + makeNamePropertySpec("name", "Name", false, true) |
| 469 | + ) ); |
| 470 | + |
| 471 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Rfam_box", 0, true), |
| 472 | + makeIdentifierPropertySpec("acc", "RNA family", alphaNumericChars), |
| 473 | + makeNamePropertySpec("description", "Name", true, true), |
| 474 | + makeNamePropertySpec("abbreviation", "Name", true, true), |
| 475 | + makeNamePropertySpec("type", "RNA type", true, true), |
| 476 | + new DefaultTemplateParameterPropertySpec("journal", "journal") |
| 477 | + .addNormalizer(punctuationStripPattern, "") |
| 478 | + .setCondition(lifeScienceJournalPattern, 0, false) |
| 479 | + ) ); |
| 480 | + |
| 481 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Infobox_chemical_analysis", 0, true), |
| 482 | + makeNamePropertySpec("name", "Name", true, true), |
| 483 | + makeNamePropertySpec("acronym", "Name", true, true), |
| 484 | + makeNamePropertySpec("classification", "AnalysisClass", true, true), |
| 485 | + makeNamePropertySpec("analytes", "Analytes", true, true) |
| 486 | + ) ); |
| 487 | + |
| 488 | + //Stuff from the container field Codes in Protbox: |
| 489 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Protbox.Codes::OMIM"), |
| 490 | + makeIdentifierPropertySpec("1", "OMIM", omimChars) ) ); |
| 491 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Protbox.Codes::OMIM2"), |
| 492 | + makeIdentifierPropertySpec("1", "OMIM", omimChars) ) ); |
| 493 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Protbox.Codes::EntrezGene"), |
| 494 | + makeIdentifierPropertySpec("1", "EntrezGene", entrezGeneChars) ) ); |
| 495 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Protbox.Codes::UniProt"), |
| 496 | + makeIdentifierPropertySpec("1", "UniProt", uniProtChars) ) ); |
| 497 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Protbox.Codes::RefSeq"), |
| 498 | + makeIdentifierPropertySpec("1", "RefSeq", refSeqChars) ) ); |
| 499 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Protbox.Codes::EC_number"), |
| 500 | + makeIdentifierPropertySpec("1", "EC/enzyme", ecEnzymeChars) ) ); |
| 501 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Protbox.Codes::PDB"), |
| 502 | + makeIdentifierPropertySpec("1", "PDB", pdbChars) ) ); |
| 503 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Protbox.Caption::PDB"), |
| 504 | + makeIdentifierPropertySpec("1", "PDB", pdbChars) ) ); |
| 505 | + |
| 506 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Protein"), |
| 507 | + makeIdentifierPropertySpec("Symbol", "ProteinSymbol", proteinSymbolChars), |
| 508 | + makeIdentifierPropertySpec("AltSymbols", "ProteinSymbol", proteinSymbolChars), |
| 509 | + makeIdentifierPropertySpec("CAS_number", "CAS", casChars), |
| 510 | + makeIdentifierPropertySpec("DrugBank", "DrugBank", drugBankChars), //FIXME: getting "?" |
| 511 | + makeIdentifierPropertySpec("EntrezGene", "EntrezGene", entrezGeneChars), |
| 512 | + makeIdentifierPropertySpec("HGNCid", "HGNC", hgncChars), |
| 513 | + makeIdentifierPropertySpec("MGIid", "MGI", hgiChars), |
| 514 | + makeIdentifierPropertySpec("OMIM", "OMIM", omimChars), |
| 515 | + makeIdentifierPropertySpec("PDB", "PDB", pdbChars), |
| 516 | + makeIdentifierPropertySpec("RefSeq", "RefSeq", refSeqChars), |
| 517 | + makeIdentifierPropertySpec("UniProt", "UniProt", uniProtChars), |
| 518 | + makeIdentifierPropertySpec("ECnumber", "EC/enzyme", ecEnzymeChars), |
| 519 | + makeIdentifierPropertySpec("ATC_supplemental", "ATC", atcChars), |
| 520 | + makeIdentifierPropertySpec("CAS_supplemental", "CAS", casChars), |
| 521 | + atcSpec |
| 522 | + ) ); |
| 523 | + |
| 524 | + //TODO: pull names and symbols as terms! |
| 525 | + |
| 526 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("GNF_Protein_box"), |
| 527 | + makeIdentifierPropertySpec("Symbol", "ProteinSymbol", proteinSymbolChars), |
| 528 | + makeIdentifierPropertySpec("AltSymbols", "ProteinSymbol", proteinSymbolChars), |
| 529 | + makeIdentifierPropertySpec("HGNCid", "HGNC", hgncChars), |
| 530 | + makeIdentifierPropertySpec("MGIid", "MGI", hgiChars), |
| 531 | + makeIdentifierPropertySpec("OMIM", "OMIM", omimChars), |
| 532 | + makeIdentifierPropertySpec("PDB", "PDB", pdbChars), |
| 533 | + makeIdentifierPropertySpec("ECnumber", "EC/enzyme", ecEnzymeChars), |
| 534 | + makeIdentifierPropertySpec("Homologene", "HomoloGene", homoloGeneChars), |
| 535 | + makeIdentifierPropertySpec("MGIid", "MGI", mgiChars) |
| 536 | + ) ); |
| 537 | + |
| 538 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("GNF_Ortholog_box"), |
| 539 | + makeIdentifierPropertySpec("Hs_Uniprot", "UniProt", uniProtChars), |
| 540 | + makeIdentifierPropertySpec("Mm_Uniprot", "UniProt", uniProtChars), |
| 541 | + makeIdentifierPropertySpec("Hs_Ensembl", "Ensembl", ensemblChars), |
| 542 | + makeIdentifierPropertySpec("Mm_Ensembl", "Ensembl", ensemblChars), |
| 543 | + makeIdentifierPropertySpec("Hs_EntrezGene", "EntrezGene", entrezGeneChars), |
| 544 | + makeIdentifierPropertySpec("Mm_EntrezGene", "EntrezGene", entrezGeneChars), |
| 545 | + makeIdentifierPropertySpec("Hs_RefseqProtein", "RefSeq", refSeqChars), |
| 546 | + makeIdentifierPropertySpec("Mm_RefseqProtein", "RefSeq", refSeqChars), |
| 547 | + makeIdentifierPropertySpec("Hs_RefseqmRNA", "RefSeq", refSeqChars), |
| 548 | + makeIdentifierPropertySpec("Mm_RefseqmRNA", "RefSeq", refSeqChars) |
| 549 | + ) ); |
| 550 | + |
| 551 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Infobox_neuron"), |
| 552 | + makeNamePropertySpec("neuron_name", "Name", false, true), |
| 553 | + makeNamePropertySpec("function", "Function", false, true), |
| 554 | + makeNamePropertySpec("GraySubject", "GraySubject", true, true) |
| 555 | + ) ); |
| 556 | + |
| 557 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Infobox_neurotransmitter"), |
| 558 | + makeNamePropertySpec("name", "Name", false, true), |
| 559 | + makeNamePropertySpec("abbrev", "Name", false, true) |
| 560 | + ) ); |
| 561 | + |
| 562 | + //TODO: {{MedlinePlus}}...? |
| 563 | + |
| 564 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Infobox_(Anatomy|Artery|Vein|Bone|Brain|Nerve|Muscle|Embryology|Ligament|Lymph)", 0, true), |
| 565 | + makeNamePropertySpec("Name", "Name", true, true), |
| 566 | + makeNamePropertySpec("Latin", "AnatomyLatin", true, true), |
| 567 | + makeNamePropertySpec("GraySubject", "GraySubject", true, true), |
| 568 | + makeNamePropertySpec("MeshName", "MeSHName", true, true), |
| 569 | + makeIdentifierPropertySpec("MeshNumber", "MeSH", meshChars), |
| 570 | + makeNamePropertySpec("DorlandsID", "DorlandsName", true, true), |
| 571 | + makeIdentifierPropertySpec("Dorlands", "Dorlands", dorlandsChars), |
| 572 | + dorlandsSpec, |
| 573 | + neuroNamesSpec |
| 574 | + ) ); |
| 575 | + |
| 576 | + //FIXME: URLDecode for MeshName, etc! |
| 577 | + |
| 578 | + //TODO: Infobox_(Artery|Brain|Bone|...) |
| 579 | + // GraySubject, MeSH name&number, DorlandsPre/DorlandsSuf (Elsevier ) |
| 580 | + |
| 581 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("ICSC"), |
| 582 | + makeIdentifierPropertySpec("1", "ICSC", icscChars) |
| 583 | + ) ); |
| 584 | + |
| 585 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("PubChem"), |
| 586 | + makeIdentifierPropertySpec("1", "PubChem", pubChemChars) |
| 587 | + ) ); |
| 588 | + |
| 589 | + |
| 590 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("PBB"), |
| 591 | + makeIdentifierPropertySpec("geneid", "_PBB_", pbbChars) |
| 592 | + ) ); |
| 593 | + |
| 594 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Infobox_abortion_method"), |
| 595 | + makeNamePropertySpec("name", "Name", false, true), |
| 596 | + makeNamePropertySpec("AKA/Abbreviation", "Name", true, true), |
| 597 | + makeNamePropertySpec("Abortion_type", "AbortionType", false, true) |
| 598 | + ) ); |
| 599 | + |
| 600 | + propertyExtractors.add( new TemplateParameterExtractor(new ExactNameMatcher("Infobox_Birth_control"), |
| 601 | + makeNamePropertySpec("name", "Name", false, true) //not really interesting, just make the concept show up as relevant for LS |
| 602 | + ) ); |
| 603 | + |
| 604 | + propertyExtractors.add( new TemplateParameterExtractor(new PatternNameMatcher("Infobox_(((Medical)_)?[Pp]erson|Scientist)", 0, true), |
| 605 | + new DefaultTemplateParameterPropertySpec("name", "person-name").setStripMarkup(true), |
| 606 | + new DefaultTemplateParameterPropertySpec("other_names", "person-name").setStripMarkup(true), |
| 607 | + new DefaultTemplateParameterPropertySpec("birth_date", "person-birth-date").setStripMarkup(true), |
| 608 | + new DefaultTemplateParameterPropertySpec("occupation", "person-occupation").setStripMarkup(true), |
| 609 | + new DefaultTemplateParameterPropertySpec("known_for", "person-known-for").setStripMarkup(true).setSplitPattern(nameSeparatorPattern), |
| 610 | + new DefaultTemplateParameterPropertySpec("nationality", "person-nationality").setStripMarkup(true) |
| 611 | + ) ); |
| 612 | + |
| 613 | + propertyExtractors.add(new CategoryPatternParameterExtractor("(_|$)([Ff]oods|[Vv]egetables|[Ff]ruits)", null, 0, "food-group")); |
| 614 | + propertyExtractors.add(new TemplateNamePatternParameterExtractor("((.+-)?(Med(ical)?|Treatment|Pathology|Anatomy|Antibiotic|Disease)(-.+)?)-stub", "$2", 0, "med-stub-group")); //TODO: no limits to this one |
| 615 | + |
| 616 | + pageTermExtractors.add( new PagePropertyValueExtractor("IUPAC") ); |
| 617 | + pageTermExtractors.add( new PagePropertyValueExtractor("AnatomyLatin") ); |
| 618 | + pageTermExtractors.add( new PagePropertyValueExtractor("ProteinSymbol") ); |
| 619 | + pageTermExtractors.add( new PagePropertyValueExtractor("ProteinName") ); |
| 620 | + pageTermExtractors.add( new PagePropertyValueExtractor("MeSHName") ); |
| 621 | + pageTermExtractors.add( new PagePropertyValueExtractor("Name") ); |
| 622 | + pageTermExtractors.add( new PagePropertyValueExtractor("Symbol") ); |
| 623 | + pageTermExtractors.add( new PagePropertyValueExtractor("DorlandsName") ); |
| 624 | + pageTermExtractors.add( new PagePropertyValueExtractor("person-name") ); |
| 625 | + |
| 626 | + supplementSensors.add( new TitleSensor<ResourceType>(ResourceType.SUPPLEMENT, Namespace.TEMPLATE, "PBB/\\d+", 0)); |
| 627 | + |
| 628 | + supplementNameExtractors.add( new PropertyValueExtractor("_PBB_").setPrefix("Template:PBB/") ); |
| 629 | + |
| 630 | + supplementedConceptExtractors.add( new TitlePartExtractor(Namespace.MAIN, "(.*)_\\(data_page\\)", 0, "$1") ); |
| 631 | + supplementedConceptExtractors.add( new TitlePartExtractor(Namespace.TEMPLATE, "Infobox_(.*)", 0, "$1") |
| 632 | + .addCondition( new HasCategorySensor<ResourceType>(ResourceType.SUPPLEMENT, "Periodic_table_infobox_templates") ) ); |
| 633 | + |
| 634 | + conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(LifeScienceConceptType.DRUG, "_(treatments|therapies)$", 0)); |
| 635 | + conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(LifeScienceConceptType.DRUG, "Drugbox")); |
| 636 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(LifeScienceConceptType.DRUG, "^Drugs_|^DrugsNav$", 0)); |
| 637 | + conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(LifeScienceConceptType.DRUG, "Major_Drug_Groups")); |
| 638 | + |
| 639 | + conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(LifeScienceConceptType.PROTEIN, "EC_\\d+(\\.\\d+)*", 0)); //FIXME: too much meta-stuff! |
| 640 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(LifeScienceConceptType.PROTEIN, "^(Enzyme_links|PBB|Protein|GNF_.*_box)$", 0) ); |
| 641 | + |
| 642 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(LifeScienceConceptType.CHEMICAL, "^Chembox|^NatOrganicBox$|^ICSC$|^Elementbox|^(Complex_)?Enzymatic_Reaction", 0)); |
| 643 | + conceptTypeSensors.add( new HasCategorySensor<ConceptType>(LifeScienceConceptType.CHEMICAL, "Chemical_elements")); |
| 644 | + |
| 645 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(LifeScienceConceptType.DISEASE, "^(Infobox_Disease|Infobox_Symptom|SignSymptom_infobox|DiseaseDisorder_infobox)$", 0)); |
| 646 | + conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(LifeScienceConceptType.DISEASE, "(_diseases|_disorders)$", 0, false)); |
| 647 | + |
| 648 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(LifeScienceConceptType.ORGAN, "^Infobox_(Brain|Nerve|Muscle|Vein|Artery|Bone|Anatomy|Ligament|Lymph)$", 0)); |
| 649 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(LifeScienceConceptType.ORGAN, "_glands$|^SUNYAnatomy|^(BUHistology|AnatomyAtlasesMicroscopic|Gray's|Anatomy-stub)$", 0)); |
| 650 | + |
| 651 | + conceptTypeSensors.add( new HasTemplateLikeSensor<ConceptType>(LifeScienceConceptType.FOOD, "Nutritional_value", 0)); |
| 652 | + conceptTypeSensors.add( new HasCategoryLikeSensor<ConceptType>(LifeScienceConceptType.FOOD, "(_|$)([Ff]oods|[Vv]egetables|[Ff]ruits)", 0, false)); |
| 653 | + |
| 654 | + conceptTypeSensors.add( new HasTemplateSensor<ConceptType>(ConceptType.LIFEFORM, "Taxobox")); |
| 655 | + conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.LIFEFORM, "GrovesId")); |
| 656 | + |
| 657 | + conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "person-name")); |
| 658 | + conceptTypeSensors.add( new HasPropertySensor<ConceptType>(ConceptType.PERSON, "person-birth-date")); |
| 659 | + |
| 660 | + //TODO; LOTS of anatomy navigation boxes |
| 661 | + |
| 662 | + //TODO: generic markers, such as {{MedlinePlus}}, {{MeshName}}, {{GPnotebook}}, {{Gene}}, etc, or [[Category:EC_.*]] |
| 663 | + |
| 664 | + //TODO: terms from properties! (ids, latin name, box caption, etc) |
| 665 | + |
| 666 | + |
| 667 | + } |
| 668 | + |
| 669 | +} |
Index: trunk/WikiWord/WikiWordProperties/src/main/java/de/brightbyte/wikiword/lifescience/LifeScienceConceptType.java |
— | — | @@ -0,0 +1,52 @@ |
| 2 | +package de.brightbyte.wikiword.lifescience; |
| 3 | + |
| 4 | +import de.brightbyte.wikiword.ConceptType; |
| 5 | +import de.brightbyte.wikiword.ConceptTypeSet; |
| 6 | + |
| 7 | +/** |
| 8 | + * Enumeration of concept types; each concept type represents a very broad category of concepts, |
| 9 | + * which can be used to filter concepts identified in a corpus. The idea is at for some uses, |
| 10 | + * some kinds of concepts are not usefull, or especially usefull. For example, people and polaces |
| 11 | + * are not suitable for use in a general dictionary, but very useful for topic tracking. |
| 12 | + * Each type is associated with a code (for internal use) and a URI (for external use). |
| 13 | + * The URI is constructed based on {@link RdfEntities.conceptTypeBase}. |
| 14 | + */ |
| 15 | +public class LifeScienceConceptType extends ConceptType { |
| 16 | + |
| 17 | + public static final ConceptType DISEASE; |
| 18 | + public static final ConceptType DRUG; |
| 19 | + public static final ConceptType CHEMICAL; |
| 20 | + public static final ConceptType PROTEIN; |
| 21 | + public static final ConceptType ORGAN; |
| 22 | + public static final ConceptType FOOD; |
| 23 | + |
| 24 | + /** |
| 25 | + * NamespaceSet for the canonical concept types. Loaded from the ConceptTypes.properties |
| 26 | + * file in this package. |
| 27 | + */ |
| 28 | + public static final ConceptTypeSet lifeScienceConceptTypes; |
| 29 | + |
| 30 | + static { |
| 31 | + try { |
| 32 | + lifeScienceConceptTypes = getConceptTypes(null, "de.brightbyte.wikiword.lifescience"); //FIXME: make unmodifiable! |
| 33 | + |
| 34 | + DISEASE = lifeScienceConceptTypes.getType(1001); |
| 35 | + //SYMPTOM = wikiProConceptTypes.getType(1002); |
| 36 | + DRUG = lifeScienceConceptTypes.getType(1003); |
| 37 | + //TREATMENT = wikiProConceptTypes.getType(1004); |
| 38 | + CHEMICAL = lifeScienceConceptTypes.getType(1005); |
| 39 | + PROTEIN = lifeScienceConceptTypes.getType(1006); |
| 40 | + //GENE = wikiProConceptTypes.getType(1007); |
| 41 | + ORGAN = lifeScienceConceptTypes.getType(1008); |
| 42 | + FOOD = lifeScienceConceptTypes.getType(1009); |
| 43 | + } |
| 44 | + catch (NumberFormatException ex) { |
| 45 | + throw new ExceptionInInitializerError(ex); |
| 46 | + } |
| 47 | + } |
| 48 | + |
| 49 | + public LifeScienceConceptType(int code, String name) { |
| 50 | + super(code, name); |
| 51 | + } |
| 52 | + |
| 53 | +} |
Index: trunk/WikiWord/WikiWordProperties/src/main/java/de/brightbyte/wikiword/lifescience/ConceptTypes.properties |
— | — | @@ -0,0 +1,5 @@ |
| 2 | +1001=DISEASE |
| 3 | +1003=DRUG |
| 4 | +1005=CHEMICAL |
| 5 | +1006=PROTEIN |
| 6 | +1008=ORGAN |
Property changes on: trunk/WikiWord/WikiWordProperties/src/main/java/de/brightbyte/wikiword/lifescience |
___________________________________________________________________ |
Name: svn:mergeinfo |
1 | 7 | + |
Index: trunk/WikiWord/WikiWordProperties/.svnignore |
— | — | @@ -0,0 +1,2 @@ |
| 2 | +*.pyc |
| 3 | + |
Index: trunk/WikiWord/WikiWordProperties/COPYING |
— | — | @@ -0,0 +1,33 @@ |
| 2 | +WikiWord is a system for automatically extracting a thesaurus from |
| 3 | +Wikipedia. It was developed by Daniel Kinzler in 2007-2009. |
| 4 | + |
| 5 | +Development started in 2007 as part of a master's thesis at the |
| 6 | +University of Leipzig, see <http://brightbyte.de/page/WikiWord>. |
| 7 | + |
| 8 | +Development was supported since 2009 by Wikimedia Deutschland e.V. |
| 9 | + |
| 10 | + NOTE: This software is not released as a product. It was designed |
| 11 | + for Wikimedia Deutschland's own use, and is made public as is, in |
| 12 | + the hope it may be useful. Wikimedia Deutschland may at any time |
| 13 | + discontinue developing or supporting this software. There is no |
| 14 | + guarantee any new versions or even fixes for security issues will |
| 15 | + be released. |
| 16 | + |
| 17 | +WikiWord was originally licensed under the GPL, with support of the |
| 18 | +University of Leipzig, and was released under the LGPL by its author in |
| 19 | +2009, in coordination with Wikimedia Deutschland. If you want to use it |
| 20 | +under some other license or condition, please contact the author at |
| 21 | +<http://brightbyte.de>. |
| 22 | + |
| 23 | + This program is free software: you can redistribute it and/or modify |
| 24 | + it under the terms of the GNU General Public License as published by |
| 25 | + the Free Software Foundation, either version 3 of the License, or |
| 26 | + (at your option) any later version. |
| 27 | + |
| 28 | + This program is distributed in the hope that it will be useful, |
| 29 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | + GNU General Public License for more details. |
| 32 | + |
| 33 | + You should have received a copy of the GNU General Public License |
| 34 | + along with this program. If not, see <http://www.gnu.org/licenses/>. |
Property changes on: trunk/WikiWord/WikiWordProperties/bin |
___________________________________________________________________ |
Name: svn:ignore |
1 | 35 | + * |
Index: trunk/WikiWord/WikiWordProperties/pom.xml |
— | — | @@ -0,0 +1,105 @@ |
| 2 | +<project xmlns="http://maven.apache.org/POM/4.0.0"
|
| 3 | + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
| 4 | + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
| 5 | +
|
| 6 | + <modelVersion>4.0.0</modelVersion>
|
| 7 | + <groupId>de.wikimedia</groupId>
|
| 8 | + <artifactId>WikiWordBuilder4Biography</artifactId>
|
| 9 | + <version>0.3</version>
|
| 10 | +
|
| 11 | + <dependencies>
|
| 12 | + <dependency>
|
| 13 | + <groupId>org.wikimedia</groupId>
|
| 14 | + <artifactId>mwdumper</artifactId>
|
| 15 | + <version>1.11</version>
|
| 16 | + <scope>compile</scope>
|
| 17 | + </dependency>
|
| 18 | + <dependency>
|
| 19 | + <groupId>de.brightbyte</groupId>
|
| 20 | + <artifactId>BrightByteUtil</artifactId>
|
| 21 | + <version>0.2</version>
|
| 22 | + <scope>compile</scope>
|
| 23 | + </dependency>
|
| 24 | + <dependency>
|
| 25 | + <groupId>de.brightbyte</groupId>
|
| 26 | + <artifactId>BrightByteDB</artifactId>
|
| 27 | + <version>0.2</version>
|
| 28 | + <scope>compile</scope>
|
| 29 | + </dependency>
|
| 30 | + <dependency>
|
| 31 | + <groupId>de.wikimedia</groupId>
|
| 32 | + <artifactId>WikiWord</artifactId>
|
| 33 | + <version>0.3</version>
|
| 34 | + <scope>compile</scope>
|
| 35 | + </dependency>
|
| 36 | + <dependency>
|
| 37 | + <groupId>de.wikimedia</groupId>
|
| 38 | + <artifactId>WikiWordBuilder</artifactId>
|
| 39 | + <version>0.3</version>
|
| 40 | + <scope>compile</scope>
|
| 41 | + </dependency>
|
| 42 | + <dependency>
|
| 43 | + <groupId>junit</groupId>
|
| 44 | + <artifactId>junit</artifactId>
|
| 45 | + <version>3.8</version>
|
| 46 | + <scope>test</scope>
|
| 47 | + </dependency>
|
| 48 | + </dependencies>
|
| 49 | +
|
| 50 | + <build>
|
| 51 | + <plugins>
|
| 52 | + <plugin>
|
| 53 | + <groupId>org.apache.maven.plugins</groupId>
|
| 54 | + <artifactId>maven-compiler-plugin</artifactId>
|
| 55 | + <configuration>
|
| 56 | + <source>1.5</source>
|
| 57 | + <target>1.5</target>
|
| 58 | + </configuration>
|
| 59 | + </plugin>
|
| 60 | + <plugin>
|
| 61 | + <groupId>org.apache.maven.plugins</groupId>
|
| 62 | + <artifactId>maven-javadoc-plugin</artifactId>
|
| 63 | + <executions>
|
| 64 | + <execution>
|
| 65 | + <phase>package</phase>
|
| 66 | + </execution>
|
| 67 | + </executions>
|
| 68 | + </plugin>
|
| 69 | + <plugin>
|
| 70 | + <artifactId>maven-assembly-plugin</artifactId>
|
| 71 | + <configuration>
|
| 72 | + <descriptors>
|
| 73 | + <descriptor>src/main/assembly/bin-dep.xml</descriptor>
|
| 74 | + <descriptor>src/main/assembly/src.xml</descriptor>
|
| 75 | + </descriptors>
|
| 76 | + </configuration>
|
| 77 | + </plugin>
|
| 78 | + </plugins>
|
| 79 | +
|
| 80 | + <resources>
|
| 81 | + <resource>
|
| 82 | + <filtering>false</filtering>
|
| 83 | + <directory>src/main/java</directory>
|
| 84 | + <includes>
|
| 85 | + <include>**</include>
|
| 86 | + </includes>
|
| 87 | + <excludes>
|
| 88 | + <exclude>**/*.java</exclude>
|
| 89 | + </excludes>
|
| 90 | + </resource>
|
| 91 | + </resources>
|
| 92 | + <testResources>
|
| 93 | + <testResource>
|
| 94 | + <filtering>false</filtering>
|
| 95 | + <directory>src/test/java</directory>
|
| 96 | + <includes>
|
| 97 | + <include>**</include>
|
| 98 | + </includes>
|
| 99 | + <excludes>
|
| 100 | + <exclude>**/*.java</exclude>
|
| 101 | + </excludes>
|
| 102 | + </testResource>
|
| 103 | + </testResources>
|
| 104 | + </build>
|
| 105 | +
|
| 106 | +</project>
|
Index: trunk/WikiWord/WikiWordProperties/LGPL |
— | — | @@ -0,0 +1,165 @@ |
| 2 | + GNU LESSER GENERAL PUBLIC LICENSE |
| 3 | + Version 3, 29 June 2007 |
| 4 | + |
| 5 | + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> |
| 6 | + Everyone is permitted to copy and distribute verbatim copies |
| 7 | + of this license document, but changing it is not allowed. |
| 8 | + |
| 9 | + |
| 10 | + This version of the GNU Lesser General Public License incorporates |
| 11 | +the terms and conditions of version 3 of the GNU General Public |
| 12 | +License, supplemented by the additional permissions listed below. |
| 13 | + |
| 14 | + 0. Additional Definitions. |
| 15 | + |
| 16 | + As used herein, "this License" refers to version 3 of the GNU Lesser |
| 17 | +General Public License, and the "GNU GPL" refers to version 3 of the GNU |
| 18 | +General Public License. |
| 19 | + |
| 20 | + "The Library" refers to a covered work governed by this License, |
| 21 | +other than an Application or a Combined Work as defined below. |
| 22 | + |
| 23 | + An "Application" is any work that makes use of an interface provided |
| 24 | +by the Library, but which is not otherwise based on the Library. |
| 25 | +Defining a subclass of a class defined by the Library is deemed a mode |
| 26 | +of using an interface provided by the Library. |
| 27 | + |
| 28 | + A "Combined Work" is a work produced by combining or linking an |
| 29 | +Application with the Library. The particular version of the Library |
| 30 | +with which the Combined Work was made is also called the "Linked |
| 31 | +Version". |
| 32 | + |
| 33 | + The "Minimal Corresponding Source" for a Combined Work means the |
| 34 | +Corresponding Source for the Combined Work, excluding any source code |
| 35 | +for portions of the Combined Work that, considered in isolation, are |
| 36 | +based on the Application, and not on the Linked Version. |
| 37 | + |
| 38 | + The "Corresponding Application Code" for a Combined Work means the |
| 39 | +object code and/or source code for the Application, including any data |
| 40 | +and utility programs needed for reproducing the Combined Work from the |
| 41 | +Application, but excluding the System Libraries of the Combined Work. |
| 42 | + |
| 43 | + 1. Exception to Section 3 of the GNU GPL. |
| 44 | + |
| 45 | + You may convey a covered work under sections 3 and 4 of this License |
| 46 | +without being bound by section 3 of the GNU GPL. |
| 47 | + |
| 48 | + 2. Conveying Modified Versions. |
| 49 | + |
| 50 | + If you modify a copy of the Library, and, in your modifications, a |
| 51 | +facility refers to a function or data to be supplied by an Application |
| 52 | +that uses the facility (other than as an argument passed when the |
| 53 | +facility is invoked), then you may convey a copy of the modified |
| 54 | +version: |
| 55 | + |
| 56 | + a) under this License, provided that you make a good faith effort to |
| 57 | + ensure that, in the event an Application does not supply the |
| 58 | + function or data, the facility still operates, and performs |
| 59 | + whatever part of its purpose remains meaningful, or |
| 60 | + |
| 61 | + b) under the GNU GPL, with none of the additional permissions of |
| 62 | + this License applicable to that copy. |
| 63 | + |
| 64 | + 3. Object Code Incorporating Material from Library Header Files. |
| 65 | + |
| 66 | + The object code form of an Application may incorporate material from |
| 67 | +a header file that is part of the Library. You may convey such object |
| 68 | +code under terms of your choice, provided that, if the incorporated |
| 69 | +material is not limited to numerical parameters, data structure |
| 70 | +layouts and accessors, or small macros, inline functions and templates |
| 71 | +(ten or fewer lines in length), you do both of the following: |
| 72 | + |
| 73 | + a) Give prominent notice with each copy of the object code that the |
| 74 | + Library is used in it and that the Library and its use are |
| 75 | + covered by this License. |
| 76 | + |
| 77 | + b) Accompany the object code with a copy of the GNU GPL and this license |
| 78 | + document. |
| 79 | + |
| 80 | + 4. Combined Works. |
| 81 | + |
| 82 | + You may convey a Combined Work under terms of your choice that, |
| 83 | +taken together, effectively do not restrict modification of the |
| 84 | +portions of the Library contained in the Combined Work and reverse |
| 85 | +engineering for debugging such modifications, if you also do each of |
| 86 | +the following: |
| 87 | + |
| 88 | + a) Give prominent notice with each copy of the Combined Work that |
| 89 | + the Library is used in it and that the Library and its use are |
| 90 | + covered by this License. |
| 91 | + |
| 92 | + b) Accompany the Combined Work with a copy of the GNU GPL and this license |
| 93 | + document. |
| 94 | + |
| 95 | + c) For a Combined Work that displays copyright notices during |
| 96 | + execution, include the copyright notice for the Library among |
| 97 | + these notices, as well as a reference directing the user to the |
| 98 | + copies of the GNU GPL and this license document. |
| 99 | + |
| 100 | + d) Do one of the following: |
| 101 | + |
| 102 | + 0) Convey the Minimal Corresponding Source under the terms of this |
| 103 | + License, and the Corresponding Application Code in a form |
| 104 | + suitable for, and under terms that permit, the user to |
| 105 | + recombine or relink the Application with a modified version of |
| 106 | + the Linked Version to produce a modified Combined Work, in the |
| 107 | + manner specified by section 6 of the GNU GPL for conveying |
| 108 | + Corresponding Source. |
| 109 | + |
| 110 | + 1) Use a suitable shared library mechanism for linking with the |
| 111 | + Library. A suitable mechanism is one that (a) uses at run time |
| 112 | + a copy of the Library already present on the user's computer |
| 113 | + system, and (b) will operate properly with a modified version |
| 114 | + of the Library that is interface-compatible with the Linked |
| 115 | + Version. |
| 116 | + |
| 117 | + e) Provide Installation Information, but only if you would otherwise |
| 118 | + be required to provide such information under section 6 of the |
| 119 | + GNU GPL, and only to the extent that such information is |
| 120 | + necessary to install and execute a modified version of the |
| 121 | + Combined Work produced by recombining or relinking the |
| 122 | + Application with a modified version of the Linked Version. (If |
| 123 | + you use option 4d0, the Installation Information must accompany |
| 124 | + the Minimal Corresponding Source and Corresponding Application |
| 125 | + Code. If you use option 4d1, you must provide the Installation |
| 126 | + Information in the manner specified by section 6 of the GNU GPL |
| 127 | + for conveying Corresponding Source.) |
| 128 | + |
| 129 | + 5. Combined Libraries. |
| 130 | + |
| 131 | + You may place library facilities that are a work based on the |
| 132 | +Library side by side in a single library together with other library |
| 133 | +facilities that are not Applications and are not covered by this |
| 134 | +License, and convey such a combined library under terms of your |
| 135 | +choice, if you do both of the following: |
| 136 | + |
| 137 | + a) Accompany the combined library with a copy of the same work based |
| 138 | + on the Library, uncombined with any other library facilities, |
| 139 | + conveyed under the terms of this License. |
| 140 | + |
| 141 | + b) Give prominent notice with the combined library that part of it |
| 142 | + is a work based on the Library, and explaining where to find the |
| 143 | + accompanying uncombined form of the same work. |
| 144 | + |
| 145 | + 6. Revised Versions of the GNU Lesser General Public License. |
| 146 | + |
| 147 | + The Free Software Foundation may publish revised and/or new versions |
| 148 | +of the GNU Lesser General Public License from time to time. Such new |
| 149 | +versions will be similar in spirit to the present version, but may |
| 150 | +differ in detail to address new problems or concerns. |
| 151 | + |
| 152 | + Each version is given a distinguishing version number. If the |
| 153 | +Library as you received it specifies that a certain numbered version |
| 154 | +of the GNU Lesser General Public License "or any later version" |
| 155 | +applies to it, you have the option of following the terms and |
| 156 | +conditions either of that published version or of any later version |
| 157 | +published by the Free Software Foundation. If the Library as you |
| 158 | +received it does not specify a version number of the GNU Lesser |
| 159 | +General Public License, you may choose any version of the GNU Lesser |
| 160 | +General Public License ever published by the Free Software Foundation. |
| 161 | + |
| 162 | + If the Library as you received it specifies that a proxy can decide |
| 163 | +whether future versions of the GNU Lesser General Public License shall |
| 164 | +apply, that proxy's public statement of acceptance of any version is |
| 165 | +permanent authorization for you to choose that version for the |
| 166 | +Library. |
Index: trunk/WikiWord/WikiWordProperties/build.xml |
— | — | @@ -0,0 +1,90 @@ |
| 2 | +<?xml version="1.0" encoding="UTF-8"?>
|
| 3 | +<project xmlns:artifact="antlib:org.apache.maven.artifact.ant"
|
| 4 | + name="WikiWordBuilder" default="dist" basedir=".">
|
| 5 | + <property name="src" location="src/main/java"/>
|
| 6 | + <property name="build" location="build"/>
|
| 7 | + <property name="dist" location="dist"/>
|
| 8 | +
|
| 9 | + <artifact:pom id="maven.project" file="pom.xml">
|
| 10 | + </artifact:pom>
|
| 11 | +
|
| 12 | + <artifact:dependencies pathId="compile.classpath" filesetId="compile.fileset" useScope="compile">
|
| 13 | + <artifact:pom refid="maven.project"/>
|
| 14 | + </artifact:dependencies>
|
| 15 | +
|
| 16 | + <artifact:dependencies pathId="runtime.classpath" filesetId="runtime.fileset" useScope="runtime">
|
| 17 | + <artifact:pom refid="maven.project"/>
|
| 18 | + </artifact:dependencies>
|
| 19 | +
|
| 20 | + <property name="versionedName" value="${maven.project.artifactId}-${maven.project.version}"/>
|
| 21 | +
|
| 22 | + <target name="init">
|
| 23 | + <!-- Create the time stamp -->
|
| 24 | + <tstamp/>
|
| 25 | + <!-- Create the build directory structure used by compile -->
|
| 26 | + <mkdir dir="${build}"/>
|
| 27 | + </target>
|
| 28 | +
|
| 29 | + <target name="compile" depends="init" description="compile the source " >
|
| 30 | + <!-- Compile the java code from ${src} into ${build} -->
|
| 31 | + <javac srcdir="${src}" destdir="${build}" source="1.5" target="1.5" encoding="UTF-8" debug="true" debuglevel="lines,source,vars"> |
| 32 | + <classpath refid="compile.classpath" />
|
| 33 | + </javac>
|
| 34 | + <copy todir="${build}"> |
| 35 | + <fileset dir="${src}"> |
| 36 | + <exclude name="**/*.java"/> |
| 37 | + </fileset> |
| 38 | + </copy> |
| 39 | + </target>
|
| 40 | +
|
| 41 | + <target name="dist" depends="compile"
|
| 42 | + description="generate the distribution" >
|
| 43 | + <!-- Create the distribution directory -->
|
| 44 | + <mkdir dir="${dist}/lib"/>
|
| 45 | + <!-- Put everything in ${build} into the MyProject-${DSTAMP}.jar file -->
|
| 46 | + <jar jarfile="${dist}/lib/${versionedName}.jar" basedir="${build}"/>
|
| 47 | + <copy todir="${dist}/lib">
|
| 48 | + <fileset refid="runtime.fileset"/>
|
| 49 | + <mapper type="flatten" />
|
| 50 | + </copy>
|
| 51 | + </target>
|
| 52 | +
|
| 53 | + <target name="assemble" depends="dist" description="assemble depolyment package">
|
| 54 | + <mkdir dir="${dist}/unpacked"/>
|
| 55 | + <mkdir dir="${dist}/assembled"/>
|
| 56 | +
|
| 57 | + <!-- build jar with all dependencies -->
|
| 58 | + <unjar dest="${dist}/unpacked">
|
| 59 | + <fileset dir="${dist}/lib"/>
|
| 60 | + </unjar>
|
| 61 | +
|
| 62 | + <jar jarfile="${dist}/assembled/${versionedName}-with-deps.jar" basedir="${dist}/unpacked"/>
|
| 63 | +
|
| 64 | +
|
| 65 | + <copy todir="${dist}/assembled/bin">
|
| 66 | + <fileset dir="bin"/>
|
| 67 | + </copy>
|
| 68 | +
|
| 69 | + <copy tofile="${dist}/assembled/bin/${maven.project.artifactId}.jar">
|
| 70 | + <fileset file="${dist}/assembled/${versionedName}-with-deps.jar"/>
|
| 71 | + </copy> |
| 72 | + |
| 73 | + <input addproperty="keystorepass">Enter Keystore Password</input> |
| 74 | + |
| 75 | + <signjar jar="${dist}/assembled/bin/${maven.project.artifactId}.jar" |
| 76 | + alias="brightbyte" storepass="${keystorepass}" />
|
| 77 | + </target>
|
| 78 | +
|
| 79 | + <target name="install" depends="dist" description="install to local maven repository">
|
| 80 | + <artifact:install file="dist/lib/${versionedName}.jar">
|
| 81 | + <pom refid="maven.project"/>
|
| 82 | + </artifact:install>
|
| 83 | + </target>
|
| 84 | +
|
| 85 | + <target name="clean" description="clean up" >
|
| 86 | + <!-- Delete the ${build} and ${dist} directory trees -->
|
| 87 | + <delete dir="${build}"/>
|
| 88 | + <delete dir="${dist}"/>
|
| 89 | + </target>
|
| 90 | +
|
| 91 | +</project>
|
Property changes on: trunk/WikiWord/WikiWordProperties |
___________________________________________________________________ |
Name: svn:mergeinfo |
1 | 92 | + |
Name: svn:ignore |
2 | 93 | + *.pyc |