r50284 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r50283‎ | r50284 | r50285 >
Date:21:15, 6 May 2009
Author:daniel
Status:deferred
Tags:
Comment:
improve sensors
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/AnalyzerUtils.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzer.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/extractor/CategoryPatternParameterExtractor.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/sensor/HasPropertySensor.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/extractor/CategoryPatternParameterExtractor.java
@@ -9,12 +9,14 @@
1010
1111 import de.brightbyte.data.MultiMap;
1212 import de.brightbyte.data.ValueSetMultiMap;
 13+import de.brightbyte.wikiword.analyzer.AnalyzerUtils;
1314 import de.brightbyte.wikiword.analyzer.WikiPage;
1415
1516 public class CategoryPatternParameterExtractor implements PropertyExtractor {
1617 protected String property;
1718 protected Matcher matcher;
1819 protected String replacement;
 20+ private boolean capitalize = false;
1921
2022 public CategoryPatternParameterExtractor(String pattern, String replacement, int flags, String property) {
2123 this(Pattern.compile(pattern, flags), replacement, property);
@@ -33,10 +35,15 @@
3436 public MultiMap<String, CharSequence, Set<CharSequence>> extract(WikiPage page, MultiMap<String, CharSequence, Set<CharSequence>> into) {
3537 for(CharSequence s: page.getCategories()) {
3638 matcher.reset(s);
37 - if (matcher.find()) {
38 - String v = matcher.group();
 39+ if (matcher.matches()) {
 40+ CharSequence v = matcher.group();
3941 v = matcher.replaceAll(replacement);
 42+ v = AnalyzerUtils.replaceUnderscoreBySpace(v);
 43+ v = AnalyzerUtils.trim(v);
4044
 45+ if (capitalize)
 46+ v = AnalyzerUtils.titleCase(v);
 47+
4148 if (into==null) into = new ValueSetMultiMap<String, CharSequence>();
4249 into.put(property, v);
4350 }
@@ -44,4 +51,9 @@
4552
4653 return into;
4754 }
 55+
 56+ public PropertyExtractor setCapitalize(boolean capitalize) {
 57+ this.capitalize = capitalize;
 58+ return this;
 59+ }
4860 }
\ No newline at end of file
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzer.java
@@ -1456,33 +1456,8 @@
14571457
14581458 title = AnalyzerUtils.replaceSpaceByUnderscore(title);
14591459
1460 - if (titleCase && forceCase && title.charAt(0)>'Z') { //fast check for ascii caps first
1461 - int ch = Character.codePointAt(title, 0);
1462 - if (Character.isLowerCase(ch)) {
1463 - int uch = Character.toUpperCase(ch);
1464 - if (uch!=ch) {
1465 - int w = Character.charCount(ch);
1466 - int len= title.length();
1467 -
1468 - char[] uchars = Character.toChars(uch);
1469 - char[] chars = new char[uchars.length + len - w];
1470 - System.arraycopy(uchars, 0, chars, 0, uchars.length);
1471 -
1472 - if (title instanceof String) {
1473 - ((String)title).getChars(w, len, chars, uchars.length);
1474 - }
1475 - else if (title instanceof StringBuilder) {
1476 - ((StringBuilder)title).getChars(w, len, chars, uchars.length);
1477 - }
1478 - else {
1479 - for (int i = 0; i<len-w; i++) {
1480 - chars[i+uchars.length] = title.charAt(i);
1481 - }
1482 - }
1483 -
1484 - title = new String(chars);
1485 - }
1486 - }
 1460+ if (titleCase && forceCase) {
 1461+ title = AnalyzerUtils.titleCase(title);
14871462 }
14881463
14891464 return title;
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/AnalyzerUtils.java
@@ -118,4 +118,38 @@
119119 return toLowerCase(trim(s));
120120 }
121121
 122+ public static CharSequence titleCase(CharSequence s) {
 123+ if (s.length()==0) return s;
 124+
 125+ if (s.charAt(0)<='Z') return s; //fast check for ascii caps first
 126+
 127+ int ch = Character.codePointAt(s, 0);
 128+ if (Character.isLowerCase(ch)) {
 129+ int uch = Character.toUpperCase(ch);
 130+ if (uch!=ch) {
 131+ int w = Character.charCount(ch);
 132+ int len= s.length();
 133+
 134+ char[] uchars = Character.toChars(uch);
 135+ char[] chars = new char[uchars.length + len - w];
 136+ System.arraycopy(uchars, 0, chars, 0, uchars.length);
 137+
 138+ if (s instanceof String) {
 139+ ((String)s).getChars(w, len, chars, uchars.length);
 140+ }
 141+ else if (s instanceof StringBuilder) {
 142+ ((StringBuilder)s).getChars(w, len, chars, uchars.length);
 143+ }
 144+ else {
 145+ for (int i = 0; i<len-w; i++) {
 146+ chars[i+uchars.length] = s.charAt(i);
 147+ }
 148+ }
 149+
 150+ s = new String(chars);
 151+ }
 152+ }
 153+
 154+ return s;
 155+ }
122156 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/sensor/HasPropertySensor.java
@@ -25,6 +25,7 @@
2626 public HasPropertySensor(V value, String name, NameMatcher matcher) {
2727 super(value);
2828 this.matcher = matcher;
 29+ this.name = name;
2930 }
3031
3132 @Override

Status & tagging log