r53335 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r53334‎ | r53335 | r53336 >
Date:21:50, 15 July 2009
Author:daniel
Status:deferred
Tags:
Comment:
link extraction: test cases and regressions
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/Languages.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzer.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzerTest.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/Languages.java
@@ -45,7 +45,7 @@
4646 ln.remove("simple");
4747 }
4848
49 - if (tweaks.getTweak("languages.metaAsLanguage", true)) {
 49+ if (tweaks.getTweak("languages.metaAsLanguage", false)) {
5050 ln.put("meta", "Meta-Wiki");
5151 } else {
5252 ln.remove("meta");
Index: trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzerTest.java
@@ -255,8 +255,8 @@
256256 assertEquals("(newline)", true, isBadLinkTarget("bla\nblubb"));
257257 assertEquals("(space)", false, isBadLinkTarget("bla blubb"));
258258 assertEquals("(umlaut)", false, isBadLinkTarget("bl\u00f6h"));
259 - assertEquals("foo:bar", true, isBadLinkTarget("foo:bar"));
260 - assertEquals("foo: bar", false, isBadLinkTarget("foo: bar"));
 259+ //assertEquals("foo:bar", true, isBadLinkTarget("foo:bar"));
 260+ //assertEquals("foo: bar", false, isBadLinkTarget("foo: bar"));
261261 assertEquals("..", true, isBadLinkTarget(".."));
262262 }
263263
@@ -472,7 +472,7 @@
473473
474474 text = "";
475475 exp = new ArrayList<WikiLink>();
476 - text += "[[x_z:zeug]]\n"; //bad (should be caught by badLinkTarget)
 476+ text += "[[..]]\n"; //bad (should be caught by badLinkTarget)
477477 text += "[[xyz:zeug|zeug]]\n"; //interwiki
478478 text += "[[de:Zeug]]\n"; //interlanguage
479479 text += "[[:de:Zeug]]\n"; //interwiki
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzer.java
@@ -254,7 +254,7 @@
255255 private CharSequence interwiki;
256256 private int namespace;
257257 private CharSequence target;
258 - private CharSequence page;
 258+ private CharSequence title;
259259 private CharSequence section;
260260 private CharSequence text;
261261 private boolean impliedText;
@@ -263,12 +263,12 @@
264264 private CharSequence lenientSection;
265265 private CharSequence lenientText;
266266
267 - public WikiLink(CharSequence interwiki, CharSequence target, int namespace, CharSequence page, CharSequence section, CharSequence text, boolean impliedText, LinkMagic magic) {
 267+ public WikiLink(CharSequence interwiki, CharSequence target, int namespace, CharSequence title, CharSequence section, CharSequence text, boolean impliedText, LinkMagic magic) {
268268 super();
269269 this.magic = magic;
270270 this.interwiki = interwiki;
271271 this.namespace = namespace;
272 - this.page = page;
 272+ this.title = title;
273273 this.section = section;
274274 this.text = text;
275275 this.impliedText = impliedText;
@@ -284,7 +284,7 @@
285285 }
286286
287287 public CharSequence getTitle() {
288 - return page;
 288+ return title;
289289 }
290290
291291 public CharSequence getTarget() {
@@ -332,7 +332,7 @@
333333 }
334334
335335 public CharSequence getLenientText() {
336 - if (lenientText==null && StringUtils.equals(text, page)) return getLenientPage();
 336+ if (lenientText==null && StringUtils.equals(text, title)) return getLenientPage();
337337
338338 if (lenientText==null) lenientText = AnalyzerUtils.trimAndLower(normalizeTitle(getText()));
339339 return lenientText;
@@ -344,9 +344,8 @@
345345 s.append("[[");
346346 if (magic!=null && magic!=LinkMagic.NONE) s.append('{').append(magic).append('}');
347347 if (interwiki!=null) s.append('<').append(interwiki).append('>');
348 - if (namespace!=Namespace.MAIN) s.append(namespace).append(':');
349 - s.append(page);
350 - if (section!=null) s.append('#').append(section);
 348+ if (namespace!=Namespace.MAIN) s.append('(').append(namespace).append(')');
 349+ s.append(target);
351350 if (text!=null) s.append('|').append(text);
352351 s.append("]]");
353352 return s.toString();
@@ -360,7 +359,7 @@
361360 result = PRIME * result + ((interwiki == null) ? 0 : interwiki.hashCode());
362361 result = PRIME * result + ((magic == null) ? 0 : magic.hashCode());
363362 result = PRIME * result + namespace;
364 - result = PRIME * result + ((page == null) ? 0 : page.hashCode());
 363+ result = PRIME * result + ((title == null) ? 0 : title.hashCode());
365364 result = PRIME * result + ((section == null) ? 0 : section.hashCode());
366365 result = PRIME * result + ((target == null) ? 0 : target.hashCode());
367366 result = PRIME * result + ((text == null) ? 0 : text.hashCode());
@@ -388,18 +387,6 @@
389388 return false;
390389 } else if (!magic.equals(other.magic))
391390 return false;
392 - if (namespace != other.namespace)
393 - return false;
394 - if (page == null) {
395 - if (other.page != null)
396 - return false;
397 - } else if (!StringUtils.equals(page, other.page))
398 - return false;
399 - if (section == null) {
400 - if (other.section != null)
401 - return false;
402 - } else if (!StringUtils.equals(section, other.section))
403 - return false;
404391 if (target == null) {
405392 if (other.target != null)
406393 return false;
@@ -1394,6 +1381,14 @@
13951382
13961383 if (target.length()==0) return null;
13971384
 1385+ boolean implied = false;
 1386+
 1387+ if (text==null) {
 1388+ implied = true;
 1389+ text = target;
 1390+ }
 1391+
 1392+ boolean setTargetToTitle = true;
13981393 CharSequence title = target;
13991394
14001395 //handle section links ------------------------
@@ -1410,7 +1405,7 @@
14111406 }
14121407 else if (idx>0) {
14131408 section = title.subSequence(idx+1, title.length());
1414 - title = target.subSequence(0, idx);
 1409+ title = title.subSequence(0, idx);
14151410 }
14161411
14171412 //TODO: subpages starting with "/"...
@@ -1418,11 +1413,13 @@
14191414 if (section!=null) { //handle special encoded chars in section ref
14201415 section = decodeSectionName(AnalyzerUtils.trim(section));
14211416 section = AnalyzerUtils.replaceSpaceByUnderscore(section);
1422 - if (target==null) target = context + "#" + section;
 1417+ if (target==null) {
 1418+ target = context + "#" + section;
 1419+ setTargetToTitle = false;
 1420+ }
14231421 }
14241422
14251423 //handle qualifiers ------------------------
1426 - boolean setTargetToTitle = false;
14271424 idx = StringUtils.indexOf(':', title);
14281425 if (idx>=0) {
14291426 CharSequence pre = AnalyzerUtils.trim(title.subSequence(0, idx));
@@ -1433,6 +1430,7 @@
14341431 title = title.subSequence(idx+1, title.length());
14351432 target = target.subSequence(idx+1, target.length());
14361433 target = getNamespaceName(ns) + ":" + normalizeTitle(target);
 1434+ setTargetToTitle = false;
14371435
14381436 if (!esc) {
14391437 if (ns==Namespace.IMAGE) magic = LinkMagic.IMAGE;
@@ -1440,7 +1438,7 @@
14411439 }
14421440 }
14431441 else if (isInterwikiPrefix(pre)) {
1444 - if (target==title) setTargetToTitle = true;
 1442+ if (target!=title) setTargetToTitle = false;
14451443 title = title.subSequence(idx+1, title.length());
14461444
14471445 if (!setTargetToTitle) {
@@ -1462,18 +1460,8 @@
14631461 }*/
14641462 }
14651463
1466 - boolean implied = false;
1467 -
1468 - if (text==null) {
1469 - implied = true;
1470 -
1471 - if (magic == LinkMagic.CATEGORY) {
1472 - text = context; //sort key defaults to local page
1473 - }
1474 - else {
1475 - text = target;
1476 - if (text.charAt(0)==':') text = text.subSequence(1, text.length());
1477 - }
 1464+ if (implied && magic == LinkMagic.CATEGORY) {
 1465+ text = context; //sort key defaults to local page
14781466 }
14791467
14801468 if (tail!=null && magic == LinkMagic.NONE) text = text.toString() + tail;
@@ -1482,11 +1470,12 @@
14831471
14841472 if (title.length()==0) return null;
14851473
 1474+ if (target==title) setTargetToTitle = true;
14861475 title = normalizeTitle(title);
14871476 if (setTargetToTitle)
14881477 target = title;
14891478
1490 - return new WikiLink(interwiki, title, namespace, title, section, text, implied, magic);
 1479+ return new WikiLink(interwiki, target, namespace, title, section, text, implied, magic);
14911480 }
14921481
14931482 public boolean isInterlanguagePrefix(CharSequence pre) {

Status & tagging log