r72031 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r72030‎ | r72031 | r72032 >
Date:16:57, 31 August 2010
Author:aj
Status:deferred
Tags:
Comment:
Added external links, media links and configurable validation of these.
Modified paths:
  • /trunk/parsers/libmwparser/TODO (modified) (history)
  • /trunk/parsers/libmwparser/config.h (deleted) (history)
  • /trunk/parsers/libmwparser/config.h.in (modified) (history)
  • /trunk/parsers/libmwparser/configure (modified) (history)
  • /trunk/parsers/libmwparser/configure.ac (modified) (history)
  • /trunk/parsers/libmwparser/include/mwlexercontext.h (modified) (history)
  • /trunk/parsers/libmwparser/include/mwlistener.h (modified) (history)
  • /trunk/parsers/libmwparser/include/mwparsercontext.h (modified) (history)
  • /trunk/parsers/libmwparser/include/mwutils.h (modified) (history)
  • /trunk/parsers/libmwparser/src/mwLexer.g (modified) (history)
  • /trunk/parsers/libmwparser/src/mwParser.g (modified) (history)
  • /trunk/parsers/libmwparser/src/mwlexercontext.c (modified) (history)
  • /trunk/parsers/libmwparser/src/mwlexerpredicatetable.php (modified) (history)
  • /trunk/parsers/libmwparser/src/mwlinks.c (modified) (history)
  • /trunk/parsers/libmwparser/src/tracingcontext.c (modified) (history)
  • /trunk/parsers/libmwparser/tests/link/link1.out (modified) (history)
  • /trunk/parsers/libmwparser/tests/link/link3.in (added) (history)
  • /trunk/parsers/libmwparser/tests/link/link3.out (added) (history)
  • /trunk/parsers/libmwparser/tests/link/link4.in (added) (history)
  • /trunk/parsers/libmwparser/tests/link/link4.out (added) (history)
  • /trunk/parsers/libmwparser/tests/src/testtext.c (modified) (history)
  • /trunk/parsers/libmwparser/tests/testsuite (modified) (history)
  • /trunk/parsers/libmwparser/tests/testsuite.at (modified) (history)

Diff [purge]

Index: trunk/parsers/libmwparser/config.h
@@ -1,81 +0,0 @@
2 -/* config.h. Generated from config.h.in by configure. */
3 -/* config.h.in. Generated from configure.ac by autoheader. */
4 -
5 -/* Define to 1 if you have the <dlfcn.h> header file. */
6 -#define HAVE_DLFCN_H 1
7 -
8 -/* Define to 1 if you have the <inttypes.h> header file. */
9 -#define HAVE_INTTYPES_H 1
10 -
11 -/* Define to 1 if you have the <memory.h> header file. */
12 -#define HAVE_MEMORY_H 1
13 -
14 -/* Define to 1 if stdbool.h conforms to C99. */
15 -#define HAVE_STDBOOL_H 1
16 -
17 -/* Define to 1 if you have the <stdint.h> header file. */
18 -#define HAVE_STDINT_H 1
19 -
20 -/* Define to 1 if you have the <stdlib.h> header file. */
21 -#define HAVE_STDLIB_H 1
22 -
23 -/* Define to 1 if you have the <strings.h> header file. */
24 -#define HAVE_STRINGS_H 1
25 -
26 -/* Define to 1 if you have the <string.h> header file. */
27 -#define HAVE_STRING_H 1
28 -
29 -/* Define to 1 if you have the <sys/stat.h> header file. */
30 -#define HAVE_SYS_STAT_H 1
31 -
32 -/* Define to 1 if you have the <sys/types.h> header file. */
33 -#define HAVE_SYS_TYPES_H 1
34 -
35 -/* Header file for supporting wide character regexps. */
36 -#define HAVE_TRE_REGEX_H 1
37 -
38 -/* Define to 1 if you have the <unistd.h> header file. */
39 -#define HAVE_UNISTD_H 1
40 -
41 -/* Define to 1 if you have the <wchar.h> header file. */
42 -#define HAVE_WCHAR_H 1
43 -
44 -/* Define to 1 if the system has the type `_Bool'. */
45 -#define HAVE__BOOL 1
46 -
47 -/* Define to the sub-directory in which libtool stores uninstalled libraries.
48 - */
49 -#define LT_OBJDIR ".libs/"
50 -
51 -/* Define to 1 if assertions should be disabled. */
52 -/* #undef NDEBUG */
53 -
54 -/* Name of package */
55 -#define PACKAGE "libmwparser"
56 -
57 -/* Define to the address where bug reports for this package should be sent. */
58 -#define PACKAGE_BUGREPORT "andreas.jonsson@kreablo.se"
59 -
60 -/* Define to the full name of this package. */
61 -#define PACKAGE_NAME "libmwparser"
62 -
63 -/* Define to the full name and version of this package. */
64 -#define PACKAGE_STRING "libmwparser 0.3"
65 -
66 -/* Define to the one symbol short name of this package. */
67 -#define PACKAGE_TARNAME "libmwparser"
68 -
69 -/* Define to the home page for this package. */
70 -#define PACKAGE_URL ""
71 -
72 -/* Define to the version of this package. */
73 -#define PACKAGE_VERSION "0.3"
74 -
75 -/* The size of `wchar_t', as computed by sizeof. */
76 -#define SIZEOF_WCHAR_T 4
77 -
78 -/* Define to 1 if you have the ANSI C header files. */
79 -#define STDC_HEADERS 1
80 -
81 -/* Version number of package */
82 -#define VERSION "0.3"
Index: trunk/parsers/libmwparser/config.h.in
@@ -1,5 +1,8 @@
22 /* config.h.in. Generated from configure.ac by autoheader. */
33
 4+/* Define if building universal (internal helper macro) */
 5+#undef AC_APPLE_UNIVERSAL_BUILD
 6+
47 /* Define to 1 if you have the <dlfcn.h> header file. */
58 #undef HAVE_DLFCN_H
69
@@ -78,3 +81,15 @@
7982
8083 /* Version number of package */
8184 #undef VERSION
 85+
 86+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
 87+ significant byte first (like Motorola and SPARC, unlike Intel). */
 88+#if defined AC_APPLE_UNIVERSAL_BUILD
 89+# if defined __BIG_ENDIAN__
 90+# define WORDS_BIGENDIAN 1
 91+# endif
 92+#else
 93+# ifndef WORDS_BIGENDIAN
 94+# undef WORDS_BIGENDIAN
 95+# endif
 96+#endif
Index: trunk/parsers/libmwparser/tests/src/testtext.c
@@ -171,7 +171,7 @@
172172 int diff_us = stop.tv_usec - start.tv_usec;
173173
174174 diff_us += diff_s * 1000000;
175 - // fprintf(stderr, "Time: %d micro seconds\n", diff_us);
 175+ // fprintf(stderr, "Time: %d micro seconds\n", diff_us);
176176
177177 return 0;
178178 }
Index: trunk/parsers/libmwparser/tests/testsuite
@@ -579,7 +579,7 @@
580580 # List of the tested programs.
581581 at_tested=''
582582 # List of the all the test groups.
583 -at_groups_all=' 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29'
 583+at_groups_all=' 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31'
584584 # As many question marks as there are digits in the last test group number.
585585 # Used to normalize the test group numbers so that `ls' lists them in
586586 # numerical order.
@@ -614,6 +614,8 @@
615615 27;testsuite.at:189;hr;;
616616 28;testsuite.at:196;link1;;
617617 29;testsuite.at:204;link2;;
 618+30;testsuite.at:211;link3;;
 619+31;testsuite.at:218;link4;;
618620 "
619621
620622 # at_fn_validate_ranges NAME...
@@ -625,7 +627,7 @@
626628 for at_grp
627629 do
628630 eval at_value=\$$at_grp
629 - if test $at_value -lt 1 || test $at_value -gt 29; then
 631+ if test $at_value -lt 1 || test $at_value -gt 31; then
630632 $as_echo "invalid test group: $at_value" >&2
631633 exit 1
632634 fi
@@ -3012,3 +3014,75 @@
30133015 ) 5>&1 2>&1 | eval $at_tee_pipe
30143016 read at_status <"$at_status_file"
30153017 #AT_STOP_29
 3018+#AT_START_30
 3019+# 30. testsuite.at:211: link3
 3020+at_setup_line='testsuite.at:211'
 3021+at_fn_banner 1
 3022+at_desc="link3"
 3023+at_desc_line=" 30: $at_desc "
 3024+$at_quiet $as_echo_n "$at_desc_line"
 3025+at_xfail=no
 3026+echo "# -*- compilation -*-" >> "$at_group_log"
 3027+(
 3028+ $as_echo "30. testsuite.at:211: testing ..."
 3029+ $at_traceon
 3030+
 3031+
 3032+cat >tempoutput <<'_ATEOF'
 3033+_ATEOF
 3034+
 3035+{ set +x
 3036+$as_echo "$at_srcdir/testsuite.at:214: testtext \${srcdir}/link/link3.in > tempoutput && diff \${srcdir}/link/link3.out tempoutput"
 3037+at_fn_check_prepare_notrace 'a ${...} parameter expansion' "testsuite.at:214"
 3038+( $at_check_trace; testtext ${srcdir}/link/link3.in > tempoutput && diff ${srcdir}/link/link3.out tempoutput
 3039+) >>"$at_stdout" 2>>"$at_stderr"
 3040+at_status=$? at_failed=false
 3041+$at_check_filter
 3042+at_fn_diff_devnull "$at_stderr" || at_failed=:
 3043+at_fn_diff_devnull "$at_stdout" || at_failed=:
 3044+at_fn_check_status 0 $at_status "$at_srcdir/testsuite.at:214"
 3045+$at_failed && at_fn_log_failure
 3046+$at_traceon; }
 3047+
 3048+
 3049+ set +x
 3050+ $at_times_p && times >"$at_times_file"
 3051+) 5>&1 2>&1 | eval $at_tee_pipe
 3052+read at_status <"$at_status_file"
 3053+#AT_STOP_30
 3054+#AT_START_31
 3055+# 31. testsuite.at:218: link4
 3056+at_setup_line='testsuite.at:218'
 3057+at_fn_banner 1
 3058+at_desc="link4"
 3059+at_desc_line=" 31: $at_desc "
 3060+$at_quiet $as_echo_n "$at_desc_line"
 3061+at_xfail=no
 3062+echo "# -*- compilation -*-" >> "$at_group_log"
 3063+(
 3064+ $as_echo "31. testsuite.at:218: testing ..."
 3065+ $at_traceon
 3066+
 3067+
 3068+cat >tempoutput <<'_ATEOF'
 3069+_ATEOF
 3070+
 3071+{ set +x
 3072+$as_echo "$at_srcdir/testsuite.at:221: testtext \${srcdir}/link/link4.in > tempoutput && diff \${srcdir}/link/link4.out tempoutput"
 3073+at_fn_check_prepare_notrace 'a ${...} parameter expansion' "testsuite.at:221"
 3074+( $at_check_trace; testtext ${srcdir}/link/link4.in > tempoutput && diff ${srcdir}/link/link4.out tempoutput
 3075+) >>"$at_stdout" 2>>"$at_stderr"
 3076+at_status=$? at_failed=false
 3077+$at_check_filter
 3078+at_fn_diff_devnull "$at_stderr" || at_failed=:
 3079+at_fn_diff_devnull "$at_stdout" || at_failed=:
 3080+at_fn_check_status 0 $at_status "$at_srcdir/testsuite.at:221"
 3081+$at_failed && at_fn_log_failure
 3082+$at_traceon; }
 3083+
 3084+
 3085+ set +x
 3086+ $at_times_p && times >"$at_times_file"
 3087+) 5>&1 2>&1 | eval $at_tee_pipe
 3088+read at_status <"$at_status_file"
 3089+#AT_STOP_31
Index: trunk/parsers/libmwparser/tests/testsuite.at
@@ -207,3 +207,17 @@
208208 AT_CHECK([testtext ${srcdir}/link/link2.in > tempoutput && diff ${srcdir}/link/link2.out tempoutput])
209209
210210 AT_CLEANUP
 211+
 212+AT_SETUP([link3])
 213+
 214+AT_DATA([tempoutput], [])
 215+AT_CHECK([testtext ${srcdir}/link/link3.in > tempoutput && diff ${srcdir}/link/link3.out tempoutput])
 216+
 217+AT_CLEANUP
 218+
 219+AT_SETUP([link4])
 220+
 221+AT_DATA([tempoutput], [])
 222+AT_CHECK([testtext ${srcdir}/link/link4.in > tempoutput && diff ${srcdir}/link/link4.out tempoutput])
 223+
 224+AT_CLEANUP
Index: trunk/parsers/libmwparser/tests/link/link1.out
@@ -1,7 +1,6 @@
22 BEGIN ARTICLE
33 BEGIN PARAGRAPH
4 - SPECIAL[[]
5 - SPECIAL[[]
 4+ SPECIAL[[[]
65 WORD[Link]
76 SPECIAL[|]
87 WORD[text]
Index: trunk/parsers/libmwparser/tests/link/link3.in
@@ -0,0 +1 @@
 2+[[Link]] [[[Link]]
Index: trunk/parsers/libmwparser/tests/link/link3.out
@@ -0,0 +1,11 @@
 2+BEGIN ARTICLE
 3+ BEGIN PARAGRAPH
 4+ INTERNAL LINK[Link]
 5+ SPACE[ ]
 6+ SPECIAL[[[]
 7+ SPECIAL[[]
 8+ WORD[Link]
 9+ SPECIAL[]]
 10+ SPECIAL[]]
 11+ END PARAGRAPH
 12+END ARTICLE
Index: trunk/parsers/libmwparser/tests/link/link4.in
@@ -0,0 +1,3 @@
 2+[[Link]] [[[Link]] [link ] [http://foo bar] [https://a] [mailto:a] [http://b
 3+
 4+
Index: trunk/parsers/libmwparser/tests/link/link4.out
@@ -0,0 +1,32 @@
 2+BEGIN ARTICLE
 3+ BEGIN PARAGRAPH
 4+ INTERNAL LINK[Link]
 5+ SPACE[ ]
 6+ SPECIAL[[[]
 7+ SPECIAL[[]
 8+ WORD[Link]
 9+ SPECIAL[]]
 10+ SPECIAL[]]
 11+ SPACE[ ]
 12+ SPECIAL[[]
 13+ WORD[link]
 14+ SPACE[ ]
 15+ SPECIAL[]]
 16+ SPACE[ ]
 17+ BEGIN EXTERNAL LINK[http://foo]
 18+ WORD[bar]
 19+ END EXTERNAL LINK
 20+ SPACE[ ]
 21+ EXTERNAL LINK[https://a]
 22+ SPACE[ ]
 23+ EXTERNAL LINK[mailto:a]
 24+ SPACE[ ]
 25+ NEWLINE
 26+ SPECIAL[[]
 27+ WORD[http]
 28+ SPECIAL[:]
 29+ SPECIAL[/]
 30+ SPECIAL[/]
 31+ WORD[b]
 32+ END PARAGRAPH
 33+END ARTICLE
Index: trunk/parsers/libmwparser/configure.ac
@@ -53,6 +53,7 @@
5454 AC_HEADER_ASSERT
5555 AC_CHECK_HEADERS([wchar.h])
5656 AC_CHECK_SIZEOF([wchar_t], [], [#include<wchar.h>])
 57+AC_C_BIGENDIAN
5758
5859 AC_CHECK_HEADER(tre/regex.h, AC_DEFINE([HAVE_TRE_REGEX_H], [1], [Header file for supporting wide character regexps.]), AC_MSG_ERROR([tre/regex.h not found]))
5960 AC_CHECK_LIB([tre], [tre_regwcomp], [LIBREGEX="-ltre"], AC_MSG_ERROR([Regexp library for wide characters not found.]))
Index: trunk/parsers/libmwparser/TODO
@@ -1,9 +0,0 @@
2 -Test optimization options:
3 -
4 --ftree-loop-linear
5 --floop-interchange
6 --floop-strip-mine
7 --floop-block
Index: trunk/parsers/libmwparser/configure
@@ -4756,7 +4756,231 @@
47574757 _ACEOF
47584758
47594759
 4760+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
 4761+$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
 4762+if test "${ac_cv_c_bigendian+set}" = set; then :
 4763+ $as_echo_n "(cached) " >&6
 4764+else
 4765+ ac_cv_c_bigendian=unknown
 4766+ # See if we're dealing with a universal compiler.
 4767+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 4768+/* end confdefs.h. */
 4769+#ifndef __APPLE_CC__
 4770+ not a universal capable compiler
 4771+ #endif
 4772+ typedef int dummy;
47604773
 4774+_ACEOF
 4775+if ac_fn_c_try_compile "$LINENO"; then :
 4776+
 4777+ # Check for potential -arch flags. It is not universal unless
 4778+ # there are at least two -arch flags with different values.
 4779+ ac_arch=
 4780+ ac_prev=
 4781+ for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
 4782+ if test -n "$ac_prev"; then
 4783+ case $ac_word in
 4784+ i?86 | x86_64 | ppc | ppc64)
 4785+ if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
 4786+ ac_arch=$ac_word
 4787+ else
 4788+ ac_cv_c_bigendian=universal
 4789+ break
 4790+ fi
 4791+ ;;
 4792+ esac
 4793+ ac_prev=
 4794+ elif test "x$ac_word" = "x-arch"; then
 4795+ ac_prev=arch
 4796+ fi
 4797+ done
 4798+fi
 4799+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 4800+ if test $ac_cv_c_bigendian = unknown; then
 4801+ # See if sys/param.h defines the BYTE_ORDER macro.
 4802+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 4803+/* end confdefs.h. */
 4804+#include <sys/types.h>
 4805+ #include <sys/param.h>
 4806+
 4807+int
 4808+main ()
 4809+{
 4810+#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
 4811+ && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
 4812+ && LITTLE_ENDIAN)
 4813+ bogus endian macros
 4814+ #endif
 4815+
 4816+ ;
 4817+ return 0;
 4818+}
 4819+_ACEOF
 4820+if ac_fn_c_try_compile "$LINENO"; then :
 4821+ # It does; now see whether it defined to BIG_ENDIAN or not.
 4822+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 4823+/* end confdefs.h. */
 4824+#include <sys/types.h>
 4825+ #include <sys/param.h>
 4826+
 4827+int
 4828+main ()
 4829+{
 4830+#if BYTE_ORDER != BIG_ENDIAN
 4831+ not big endian
 4832+ #endif
 4833+
 4834+ ;
 4835+ return 0;
 4836+}
 4837+_ACEOF
 4838+if ac_fn_c_try_compile "$LINENO"; then :
 4839+ ac_cv_c_bigendian=yes
 4840+else
 4841+ ac_cv_c_bigendian=no
 4842+fi
 4843+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 4844+fi
 4845+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 4846+ fi
 4847+ if test $ac_cv_c_bigendian = unknown; then
 4848+ # See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).
 4849+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 4850+/* end confdefs.h. */
 4851+#include <limits.h>
 4852+
 4853+int
 4854+main ()
 4855+{
 4856+#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
 4857+ bogus endian macros
 4858+ #endif
 4859+
 4860+ ;
 4861+ return 0;
 4862+}
 4863+_ACEOF
 4864+if ac_fn_c_try_compile "$LINENO"; then :
 4865+ # It does; now see whether it defined to _BIG_ENDIAN or not.
 4866+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 4867+/* end confdefs.h. */
 4868+#include <limits.h>
 4869+
 4870+int
 4871+main ()
 4872+{
 4873+#ifndef _BIG_ENDIAN
 4874+ not big endian
 4875+ #endif
 4876+
 4877+ ;
 4878+ return 0;
 4879+}
 4880+_ACEOF
 4881+if ac_fn_c_try_compile "$LINENO"; then :
 4882+ ac_cv_c_bigendian=yes
 4883+else
 4884+ ac_cv_c_bigendian=no
 4885+fi
 4886+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 4887+fi
 4888+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 4889+ fi
 4890+ if test $ac_cv_c_bigendian = unknown; then
 4891+ # Compile a test program.
 4892+ if test "$cross_compiling" = yes; then :
 4893+ # Try to guess by grepping values from an object file.
 4894+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 4895+/* end confdefs.h. */
 4896+short int ascii_mm[] =
 4897+ { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
 4898+ short int ascii_ii[] =
 4899+ { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
 4900+ int use_ascii (int i) {
 4901+ return ascii_mm[i] + ascii_ii[i];
 4902+ }
 4903+ short int ebcdic_ii[] =
 4904+ { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
 4905+ short int ebcdic_mm[] =
 4906+ { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
 4907+ int use_ebcdic (int i) {
 4908+ return ebcdic_mm[i] + ebcdic_ii[i];
 4909+ }
 4910+ extern int foo;
 4911+
 4912+int
 4913+main ()
 4914+{
 4915+return use_ascii (foo) == use_ebcdic (foo);
 4916+ ;
 4917+ return 0;
 4918+}
 4919+_ACEOF
 4920+if ac_fn_c_try_compile "$LINENO"; then :
 4921+ if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
 4922+ ac_cv_c_bigendian=yes
 4923+ fi
 4924+ if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
 4925+ if test "$ac_cv_c_bigendian" = unknown; then
 4926+ ac_cv_c_bigendian=no
 4927+ else
 4928+ # finding both strings is unlikely to happen, but who knows?
 4929+ ac_cv_c_bigendian=unknown
 4930+ fi
 4931+ fi
 4932+fi
 4933+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 4934+else
 4935+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 4936+/* end confdefs.h. */
 4937+$ac_includes_default
 4938+int
 4939+main ()
 4940+{
 4941+
 4942+ /* Are we little or big endian? From Harbison&Steele. */
 4943+ union
 4944+ {
 4945+ long int l;
 4946+ char c[sizeof (long int)];
 4947+ } u;
 4948+ u.l = 1;
 4949+ return u.c[sizeof (long int) - 1] == 1;
 4950+
 4951+ ;
 4952+ return 0;
 4953+}
 4954+_ACEOF
 4955+if ac_fn_c_try_run "$LINENO"; then :
 4956+ ac_cv_c_bigendian=no
 4957+else
 4958+ ac_cv_c_bigendian=yes
 4959+fi
 4960+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
 4961+ conftest.$ac_objext conftest.beam conftest.$ac_ext
 4962+fi
 4963+
 4964+ fi
 4965+fi
 4966+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
 4967+$as_echo "$ac_cv_c_bigendian" >&6; }
 4968+ case $ac_cv_c_bigendian in #(
 4969+ yes)
 4970+ $as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h
 4971+;; #(
 4972+ no)
 4973+ ;; #(
 4974+ universal)
 4975+
 4976+$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
 4977+
 4978+ ;; #(
 4979+ *)
 4980+ as_fn_error "unknown endianness
 4981+ presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;;
 4982+ esac
 4983+
 4984+
47614985 ac_fn_c_check_header_mongrel "$LINENO" "tre/regex.h" "ac_cv_header_tre_regex_h" "$ac_includes_default"
47624986 if test "x$ac_cv_header_tre_regex_h" = x""yes; then :
47634987
@@ -5362,13 +5586,13 @@
53635587 else
53645588 lt_cv_nm_interface="BSD nm"
53655589 echo "int some_variable = 0;" > conftest.$ac_ext
5366 - (eval echo "\"\$as_me:5365: $ac_compile\"" >&5)
 5590+ (eval echo "\"\$as_me:5589: $ac_compile\"" >&5)
53675591 (eval "$ac_compile" 2>conftest.err)
53685592 cat conftest.err >&5
5369 - (eval echo "\"\$as_me:5368: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
 5593+ (eval echo "\"\$as_me:5592: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
53705594 (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
53715595 cat conftest.err >&5
5372 - (eval echo "\"\$as_me:5371: output\"" >&5)
 5596+ (eval echo "\"\$as_me:5595: output\"" >&5)
53735597 cat conftest.out >&5
53745598 if $GREP 'External.*some_variable' conftest.out > /dev/null; then
53755599 lt_cv_nm_interface="MS dumpbin"
@@ -6573,7 +6797,7 @@
65746798 ;;
65756799 *-*-irix6*)
65766800 # Find out which ABI we are using.
6577 - echo '#line 6576 "configure"' > conftest.$ac_ext
 6801+ echo '#line 6800 "configure"' > conftest.$ac_ext
65786802 if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
65796803 (eval $ac_compile) 2>&5
65806804 ac_status=$?
@@ -7831,11 +8055,11 @@
78328056 -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
78338057 -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
78348058 -e 's:$: $lt_compiler_flag:'`
7835 - (eval echo "\"\$as_me:7834: $lt_compile\"" >&5)
 8059+ (eval echo "\"\$as_me:8058: $lt_compile\"" >&5)
78368060 (eval "$lt_compile" 2>conftest.err)
78378061 ac_status=$?
78388062 cat conftest.err >&5
7839 - echo "$as_me:7838: \$? = $ac_status" >&5
 8063+ echo "$as_me:8062: \$? = $ac_status" >&5
78408064 if (exit $ac_status) && test -s "$ac_outfile"; then
78418065 # The compiler can only warn and ignore the option if not recognized
78428066 # So say no if there are warnings other than the usual output.
@@ -8170,11 +8394,11 @@
81718395 -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
81728396 -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
81738397 -e 's:$: $lt_compiler_flag:'`
8174 - (eval echo "\"\$as_me:8173: $lt_compile\"" >&5)
 8398+ (eval echo "\"\$as_me:8397: $lt_compile\"" >&5)
81758399 (eval "$lt_compile" 2>conftest.err)
81768400 ac_status=$?
81778401 cat conftest.err >&5
8178 - echo "$as_me:8177: \$? = $ac_status" >&5
 8402+ echo "$as_me:8401: \$? = $ac_status" >&5
81798403 if (exit $ac_status) && test -s "$ac_outfile"; then
81808404 # The compiler can only warn and ignore the option if not recognized
81818405 # So say no if there are warnings other than the usual output.
@@ -8275,11 +8499,11 @@
82768500 -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
82778501 -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
82788502 -e 's:$: $lt_compiler_flag:'`
8279 - (eval echo "\"\$as_me:8278: $lt_compile\"" >&5)
 8503+ (eval echo "\"\$as_me:8502: $lt_compile\"" >&5)
82808504 (eval "$lt_compile" 2>out/conftest.err)
82818505 ac_status=$?
82828506 cat out/conftest.err >&5
8283 - echo "$as_me:8282: \$? = $ac_status" >&5
 8507+ echo "$as_me:8506: \$? = $ac_status" >&5
82848508 if (exit $ac_status) && test -s out/conftest2.$ac_objext
82858509 then
82868510 # The compiler can only warn and ignore the option if not recognized
@@ -8330,11 +8554,11 @@
83318555 -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
83328556 -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
83338557 -e 's:$: $lt_compiler_flag:'`
8334 - (eval echo "\"\$as_me:8333: $lt_compile\"" >&5)
 8558+ (eval echo "\"\$as_me:8557: $lt_compile\"" >&5)
83358559 (eval "$lt_compile" 2>out/conftest.err)
83368560 ac_status=$?
83378561 cat out/conftest.err >&5
8338 - echo "$as_me:8337: \$? = $ac_status" >&5
 8562+ echo "$as_me:8561: \$? = $ac_status" >&5
83398563 if (exit $ac_status) && test -s out/conftest2.$ac_objext
83408564 then
83418565 # The compiler can only warn and ignore the option if not recognized
@@ -10714,7 +10938,7 @@
1071510939 lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1071610940 lt_status=$lt_dlunknown
1071710941 cat > conftest.$ac_ext <<_LT_EOF
10718 -#line 10717 "configure"
 10942+#line 10941 "configure"
1071910943 #include "confdefs.h"
1072010944
1072110945 #if HAVE_DLFCN_H
@@ -10810,7 +11034,7 @@
1081111035 lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1081211036 lt_status=$lt_dlunknown
1081311037 cat > conftest.$ac_ext <<_LT_EOF
10814 -#line 10813 "configure"
 11038+#line 11037 "configure"
1081511039 #include "confdefs.h"
1081611040
1081711041 #if HAVE_DLFCN_H
@@ -11160,6 +11384,7 @@
1116111385 Usually this means the macro was only invoked conditionally." "$LINENO" 5
1116211386 fi
1116311387
 11388+
1116411389 : ${CONFIG_STATUS=./config.status}
1116511390 ac_write_fail=0
1116611391 ac_clean_files_save=$ac_clean_files
Index: trunk/parsers/libmwparser/include/mwutils.h
@@ -5,28 +5,4 @@
66 #include <antlr3.h>
77 #include "config.h"
88
9 -static inline const wchar_t *
10 -mwAntlr3stows(pANTLR3_STRING string, void **state)
11 -{
12 -#if (SIZEOF_WCHAR_T == 4)
13 - return (wchar_t*)string->chars;
14 -#elif (SIZEOF_WCHAR_T == 2)
15 -#error Unsupported wchar_t size!
16 -#else
17 -#error Unsupported wchar_t size!
189 #endif
19 -}
20 -
21 -static inline void
22 -mwFreeStringConversionState(void *state)
23 -{
24 -#if(SIZEOF_WCHAR_T == 4)
25 - /* do nothing */
26 -#elif (SIZEOF_WCHAR_T == 2)
27 -#error Unsupported wchar_t size!
28 -#else
29 -#error Unsupported wchar_t size!
30 -#endif
31 -}
32 -
33 -#endif
Index: trunk/parsers/libmwparser/include/mwparsercontext.h
@@ -91,6 +91,12 @@
9292 void (*beginInternalLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_STRING linkTitle);
9393 void (*endInternalLink)(struct MWPARSERCONTEXT_struct * context);
9494 void (*onInternalLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_STRING linkTitle);
 95+ void (*beginExternalLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_STRING linkUrl);
 96+ void (*endExternalLink)(struct MWPARSERCONTEXT_struct * context);
 97+ void (*onExternalLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_STRING linkUrl);
 98+ void (*beginMediaLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_VECTOR attr);
 99+ void (*endMediaLink)(struct MWPARSERCONTEXT_struct * context);
 100+ void (*onMediaLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_VECTOR attr);
95101 void (*beginFormat)(struct MWPARSERCONTEXT_struct * context,
96102 void (*begin)(),
97103 void (*end)(),
Index: trunk/parsers/libmwparser/include/mwlexercontext.h
@@ -5,6 +5,7 @@
66 #include <wchar.h>
77 #include <tre/regex.h>
88 #include <antlr3defs.h>
 9+#include <iconv.h>
910
1011 /*
1112 * Different table types can be nested, but not mixed.
@@ -57,6 +58,7 @@
5859 pANTLR3_STACK blockContextStack;
5960 int headingLevel;
6061 regex_t legalTitleChars;
 62+ regex_t mediaLinkTitle;
6163
6264 /*
6365 * State for speculative execution.
@@ -68,6 +70,12 @@
6971 MWLEXERSPECULATION mediaLinkSpeculation;
7072 int istreamIndex;
7173
 74+ /*
 75+ * Character conversion.
 76+ */
 77+
 78+ iconv_t conversionState;
 79+
7280 /** Method for deallocating this instance. */
7381 void (*free)(void * context);
7482 /** Reset instance */
@@ -79,7 +87,7 @@
8088 pANTLR3_VECTOR_FACTORY vectorFactory;
8189 pANTLR3_STRING_FACTORY stringFactory;
8290 bool (*isLegalTitle)(struct MWLEXERCONTEXT_struct * context, pANTLR3_STRING text);
83 - bool (*isLegalExternalLink)(struct MWLEXERCONTEXT_struct * context, pANTLR3_STRING text);
 91+ bool (*isMediaLinkTitle)(struct MWLEXERCONTEXT_struct * context, pANTLR3_STRING text);
8492
8593
8694 }
Index: trunk/parsers/libmwparser/include/mwlistener.h
@@ -93,7 +93,13 @@
9494 void (*endHeading)(struct MWLISTENER_struct * context);
9595 void (*beginInternalLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkTitle);
9696 void (*endInternalLink)(struct MWLISTENER_struct * context);
97 - void (*onInternalLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkTitle);
 97+ void (*onInternalLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkUrl);
 98+ void (*beginExternalLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkUrl);
 99+ void (*endExternalLink)(struct MWLISTENER_struct * context);
 100+ void (*onExternalLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkUrl);
 101+ void (*beginMediaLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr);
 102+ void (*endMediaLink)(struct MWLISTENER_struct * context);
 103+ void (*onMediaLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr);
98104 void (*beginTableOfContents)(struct MWLISTENER_struct * context);
99105 void (*endTableOfContents)(struct MWLISTENER_struct * context);
100106 void (*beginTableOfContentsItem)(struct MWLISTENER_struct * context, int level);
Index: trunk/parsers/libmwparser/src/mwlexerpredicatetable.php
@@ -93,7 +93,7 @@
9494 'name' => 'wikitextListElement',
9595 'initiallyDisabled' => array(),
9696 'types' => array('block'),
97 - 'affects' => array(new TypeDisable('block', 'WIKITEXT_BLOCK')),
 97+ 'affects' => array(new TypeDisable('block', 'WIKITEXT_BLOCK_OR_LINK')),
9898 'mayNest' => false,
9999 'scope' => new Scope('eol'),
100100 ),
@@ -254,7 +254,22 @@
255255 'initiallyDisabled' => array(),
256256 'mayNest' => false,
257257 'types' => array(),
258 - )
 258+ 'affects' => array(new PredicateDisable('externalLinkOpen', 'WIKITEXT_BLOCK_OR_LINK')),
 259+ ),
 260+ array(
 261+ 'name' => "externalLinkOpen",
 262+ 'close' => "externalLinkClose",
 263+ 'initiallyDisabled' => array(),
 264+ 'mayNest' => false,
 265+ 'types' => array(),
 266+ ),
 267+ array(
 268+ 'name' => "mediaLinkOpen",
 269+ 'close' => "mediaLinkClose",
 270+ 'initiallyDisabled' => array(),
 271+ 'mayNest' => false,
 272+ 'types' => array(),
 273+ ),
259274 );
260275
261276 foreach(array('B', 'Del', 'I', 'Ins', 'U', 'Font', 'Big', 'Small', 'Sub', 'Sup', 'Cite',
@@ -325,7 +340,9 @@
326341 'BLOCK_CONTEXT',
327342 'BLOCKQUOTE',
328343 'NESTING_LIMIT',
329 - 'WIKITEXT_BLOCK'
 344+ 'WIKITEXT_BLOCK_OR_LINK' // It should be OK for these two causes
 345+ // to share the same bit, since they are never applied
 346+ // to the same predicate.
330347 );
331348
332349 define('CX', 'context');
Index: trunk/parsers/libmwparser/src/mwParser.g
@@ -564,8 +564,9 @@
565565 | (HTML_H6_CLOSE { CX->endTableOfContentsItem(CX); }))|EOF)
566566 ;
567567
568 -link_element: internal_link
 568+link_element: internal_link | external_link | media_link
569569 ;
 570+
570571 internal_link: complete_internal_link | begin_internal_link | end_internal_link
571572 ;
572573
@@ -586,3 +587,46 @@
587588 IE(CX->endInternalLink(CX);)
588589 }
589590 ;
 591+
 592+external_link: complete_external_link | begin_external_link | end_external_link
 593+ ;
 594+
 595+complete_external_link: linkToken = EXTERNAL_LINK
 596+ {
 597+ IE(CX->onExternalLink(CX, $linkToken->custom);)
 598+ }
 599+ ;
 600+
 601+begin_external_link: linkToken = BEGIN_EXTERNAL_LINK
 602+ {
 603+ IE(CX->beginExternalLink(CX, $linkToken->custom);)
 604+ }
 605+ ;
 606+
 607+end_external_link: END_EXTERNAL_LINK
 608+ {
 609+ IE(CX->endExternalLink(CX);)
 610+ }
 611+ ;
 612+
 613+media_link: complete_media_link | begin_media_link | end_media_link
 614+ ;
 615+
 616+complete_media_link: linkToken = MEDIA_LINK
 617+ {
 618+ IE(CX->onMediaLink(CX, $linkToken->custom);)
 619+ }
 620+ ;
 621+
 622+begin_media_link: linkToken = BEGIN_MEDIA_LINK
 623+ {
 624+ IE(CX->beginMediaLink(CX, $linkToken->custom);)
 625+ }
 626+ ;
 627+
 628+end_media_link: END_MEDIA_LINK
 629+ {
 630+ IE(CX->endMediaLink(CX);)
 631+ }
 632+ ;
 633+
Index: trunk/parsers/libmwparser/src/mwlinks.c
@@ -1,10 +1,16 @@
2 -#include <antlr3defs.h>
 2+#include <antlr3.h>
33 #include <mwparsercontext.h>
44 #include <mwlinks.h>
55
66 static void beginInternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkTitle);
77 static void endInternalLink(MWPARSERCONTEXT *context);
88 static void onInternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkTitle);
 9+static void beginExternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkUrl);
 10+static void endExternalLink(MWPARSERCONTEXT *context);
 11+static void onExternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkUrl);
 12+static void beginMediaLink(MWPARSERCONTEXT *context, pANTLR3_VECTOR attr);
 13+static void endMediaLink(MWPARSERCONTEXT *context);
 14+static void onMediaLink(MWPARSERCONTEXT *context, pANTLR3_VECTOR attr);
915
1016 static void
1117 beginInternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkTitle)
@@ -32,11 +38,74 @@
3339 l->onInternalLink(l, linkTitle);
3440 }
3541
 42+static void
 43+beginExternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkUrl)
 44+{
 45+ MW_DELAYED_CALL( context, beginExternalLink, endExternalLink, linkUrl, NULL);
 46+ MW_BEGIN_ORDERED_FORMAT(context, beginExternalLink, endExternalLink, linkUrl, NULL, false);
 47+ MWLISTENER *l = &context->listener;
 48+ l->beginExternalLink(l, linkUrl);
 49+}
3650
 51+static void
 52+endExternalLink(MWPARSERCONTEXT *context)
 53+{
 54+ MW_SKIP_IF_EMPTY( context, beginExternalLink, endExternalLink, NULL);
 55+ MW_END_ORDERED_FORMAT(context, beginExternalLink, endExternalLink, NULL);
 56+ MWLISTENER *l = &context->listener;
 57+ l->endExternalLink(l);
 58+}
 59+
 60+static void
 61+onExternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkUrl)
 62+{
 63+ MW_TRIGGER_DELAYED_CALLS(context);
 64+ MWLISTENER *l = &context->listener;
 65+ l->onExternalLink(l, linkUrl);
 66+}
 67+
 68+static void
 69+beginMediaLink(MWPARSERCONTEXT *context, pANTLR3_VECTOR attr)
 70+{
 71+ MW_DELAYED_CALL( context, beginMediaLink, endMediaLink, attr, NULL);
 72+ MW_BEGIN_ORDERED_FORMAT(context, beginMediaLink, endMediaLink, attr, NULL, false);
 73+
 74+ pANTLR3_STRING linkUrl = attr->get(attr, attr->count - 1);
 75+ attr->remove(attr, attr->count - 1);
 76+ MWLISTENER *l = &context->listener;
 77+ l->beginMediaLink(l, linkUrl, attr);
 78+}
 79+
 80+static void
 81+endMediaLink(MWPARSERCONTEXT *context)
 82+{
 83+ MW_SKIP_IF_EMPTY( context, beginMediaLink, endMediaLink, NULL);
 84+ MW_END_ORDERED_FORMAT(context, beginMediaLink, endMediaLink, NULL);
 85+ MWLISTENER *l = &context->listener;
 86+ l->endMediaLink(l);
 87+}
 88+
 89+static void
 90+onMediaLink(MWPARSERCONTEXT *context, pANTLR3_VECTOR attr)
 91+{
 92+ MW_TRIGGER_DELAYED_CALLS(context);
 93+ pANTLR3_STRING linkUrl = attr->get(attr, attr->count - 1);
 94+ attr->remove(attr, attr->count - 1);
 95+ MWLISTENER *l = &context->listener;
 96+ l->onMediaLink(l, linkUrl, attr);
 97+}
 98+
 99+
37100 void
38101 mwLinksInit(MWPARSERCONTEXT *context)
39102 {
40103 context->beginInternalLink = beginInternalLink;
41104 context->endInternalLink = endInternalLink;
42105 context->onInternalLink = onInternalLink;
 106+ context->beginExternalLink = beginExternalLink;
 107+ context->endExternalLink = endExternalLink;
 108+ context->onExternalLink = onExternalLink;
 109+ context->beginMediaLink = beginMediaLink;
 110+ context->endMediaLink = endMediaLink;
 111+ context->onMediaLink = onMediaLink;
43112 }
Index: trunk/parsers/libmwparser/src/mwLexer.g
@@ -1,3 +1,22 @@
 2+/*
 3+ * Copyright 2010 Andreas Jonsson
 4+ *
 5+ * This file is part of libmwparser.
 6+ *
 7+ * Libmwparser is free software: you can redistribute it and/or modify
 8+ * it under the terms of the GNU General Public License as published by
 9+ * the Free Software Foundation, either version 3 of the License, or
 10+ * (at your option) any later version.
 11+ *
 12+ * This program is distributed in the hope that it will be useful,
 13+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 14+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15+ * GNU General Public License for more details.
 16+ *
 17+ * You should have received a copy of the GNU General Public License
 18+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
 19+ */
 20+
221 lexer grammar mwLexer;
322
423 /*
@@ -9,12 +28,13 @@
1029 }
1130
1231 tokens {
13 - BEGIN_EXTERNAL_LINK;
1432 EXTERNAL_LINK;
1533 TABLE_HEADING;
1634 TABLE_HEADING_INLINE;
1735 TABLE_CAPTION;
1836 BEGIN_INTERNAL_LINK;
 37+ BEGIN_EXTERNAL_LINK;
 38+ BEGIN_MEDIA_LINK;
1939 HORIZONTAL_RULE;
2040 NOWIKI;
2141 BEGIN_HEADING;
@@ -145,6 +165,11 @@
146166 #define MW_SETTYPE(type) do { _type = type; } while (0)
147167 #define MW_EMITNEW(type, text) do { EMITNEW(NEW_TOK(type, text)); } while (0)
148168 #define MW_HIDE() do { LEXSTATE->channel = HIDDEN; } while (0)
 169+#define D_(msg) (fputs(msg, stderr), fputc('\n', stderr), printLexerInfo(LEXER), true)
 170+#define NEW_TOK(type, text) (newToken(LEXSTATE->tokFactory, type, text))
 171+#define SUBSTR1(start) (INPUT->substr(INPUT, start, GETCHARINDEX() - 1))
 172+#define SUBSTR2(start, end) (INPUT->substr(INPUT, start, end))
 173+#define HEADING_LEVEL USER1
149174
150175 static pANTLR3_COMMON_TOKEN
151176 newToken(pANTLR3_TOKEN_FACTORY factory, ANTLR3_UINT32 type, pANTLR3_STRING text)
@@ -174,6 +199,12 @@
175200 speculationFailure(context, sizeof(failures)/sizeof(MWLEXERSPECULATION*), failures); \
176201 } while (0)
177202
 203+
 204+/**
 205+ * Initiate a speculative execution.
 206+ * @param context
 207+ * @param speculation Storage space for the context backup.
 208+ */
178209 static void
179210 speculationInitiate(MWLEXERCONTEXT *context, MWLEXERSPECULATION *speculation)
180211 {
@@ -186,18 +217,32 @@
187218 speculation->istreamIndex = context->istreamIndex++;
188219 }
189220
 221+/**
 222+ * Indictate that a speculative execution has succeeded.
 223+ */
190224 static void
191225 speculationSuccess(MWLEXERCONTEXT *context, MWLEXERSPECULATION *speculation)
192226 {
193227 speculation->active = false;
194228 }
195229
 230+/**
 231+ * Abort speculative execution, without restoring the context.
 232+ */
196233 static void
197234 speculationAbort(MWLEXERCONTEXT *context, MWLEXERSPECULATION *speculation)
198235 {
199236 speculation->active = false;
200237 }
201238
 239+/**
 240+ * Indicate that one or several speculative executions has failed and
 241+ * restore the context to the initiation point of the "oldest"
 242+ * speculation.
 243+ * @param context
 244+ * @param n Number of speculations in the array.
 245+ * @param speculation Array of speculation backup storage structures.
 246+ */
202247 static void
203248 speculationFailure(MWLEXERCONTEXT *context, int n, MWLEXERSPECULATION *speculation[])
204249 {
@@ -221,11 +266,19 @@
222267 }
223268 }
224269
 270+/**
 271+ * Check if a particular speculation has already been tried at the
 272+ * current character index.
 273+ * @return {\code true} if the speculation already has been tried and failed.
 274+ */
225275 static bool
226276 alreadyTried(MWLEXERCONTEXT *context, MWLEXERSPECULATION *speculation) {
227277 return speculation->failurePoint == context->lexer->getCharIndex(context->lexer);
228278 }
229279
 280+/**
 281+ * Action to execute at the end of file.
 282+ */
230283 static void
231284 eofAction(void *param)
232285 {
@@ -239,18 +292,6 @@
240293
241294
242295
243 -#define ACTIVATE_EOF_ACTION do { \
244 - LEXER->eofAction = eofAction; \
245 - LEXER->eofActionParameter = CX; \
246 -} while (0)
247 -
248 -#define D_(msg) (fputs(msg, stderr), fputc('\n', stderr), printLexerInfo(LEXER), true)
249 -
250 -#define NEW_TOK(type, text) (newToken(LEXSTATE->tokFactory, type, text))
251 -#define SUBSTR1(start) (INPUT->substr(INPUT, start, GETCHARINDEX() - 1))
252 -#define SUBSTR2(start, end) (INPUT->substr(INPUT, start, end))
253 -
254 -#define HEADING_LEVEL USER1
255296 }
256297
257298 NOWIKI
@@ -451,6 +492,7 @@
452493 }
453494 '[['
454495 {
 496+ SPECULATION_FAILURE(CX, &CX->externalLinkSpeculation);
455497 mark = MARK();
456498 }
457499 (
@@ -468,7 +510,10 @@
469511 )
470512 )
471513 {
472 - if (!fail && isCompleteLink && CX->isLegalTitle(CX, linkTitle)) {
 514+ if (!fail && CX->isMediaLinkTitle(CX, linkTitle)) {
 515+ MW_EMIT();
 516+ SPECULATION_FAILURE(CX, &CX->internalLinkSpeculation);
 517+ } else if (!fail && isCompleteLink && CX->isLegalTitle(CX, linkTitle)) {
473518 ACTION(CUSTOM = linkTitle;)
474519 speculationAbort(CX, &CX->internalLinkSpeculation);
475520 } else if (!fail && CX->isLegalTitle(CX, linkTitle)) {
@@ -496,6 +541,68 @@
497542 }
498543 ;
499544
 545+MEDIA_LINK
 546+@init{
 547+ ANTLR3_MARKER mark;
 548+ pANTLR3_STRING linkTitle;
 549+ bool isCompleteLink = false;
 550+ bool isLegalTitle = false;
 551+ bool fail = false;
 552+ pANTLR3_VECTOR attr = NULL;
 553+}: {!CX->mediaLinkOpenDisabled && !alreadyTried(CX, &CX->mediaLinkSpeculation)}?=>
 554+ (
 555+ {
 556+ speculationInitiate(CX, &CX->mediaLinkSpeculation);
 557+ }
 558+ '[['
 559+ {
 560+ mark = MARK();
 561+ }
 562+ (
 563+ SPACE_TAB_CHAR*
 564+ (
 565+ INTERNAL_LINK_TITLE[&linkTitle]
 566+ SPACE_TAB_CHAR*
 567+ (
 568+ ']]' {isCompleteLink=true;}
 569+ | '|' MEDIA_LINK_ATTRIBUTES[&attr]
 570+ | {fail = true;}
 571+ )
 572+ )
 573+ | {fail = true;}
 574+ )
 575+ )
 576+ {
 577+ if (!fail && CX->isMediaLinkTitle(CX, linkTitle)) {
 578+ if (attr == NULL) {
 579+ attr = CX->vectorFactory->newVector(CX->vectorFactory);
 580+ }
 581+ /*
 582+ * We'll pack the link title in the attribute vector.
 583+ * The parser will unpack it and send it as a separate
 584+ * parameter to the client.
 585+ */
 586+ attr->add(attr, linkTitle, NULL);
 587+ ACTION(CUSTOM = attr;)
 588+ if (isCompleteLink) {
 589+ speculationAbort(CX, &CX->mediaLinkSpeculation);
 590+ } else {
 591+ onMediaLinkOpen(CX);
 592+ MW_SETTYPE(BEGIN_MEDIA_LINK);
 593+ }
 594+ } else {
 595+ speculationAbort(CX, &CX->mediaLinkSpeculation);
 596+ REWIND(mark);
 597+ MW_SETTYPE(SPECIAL);
 598+ }
 599+ }
 600+ ;
 601+
 602+fragment
 603+MEDIA_LINK_ATTRIBUTES[pANTLR3_VECTOR *attr]:
 604+ (MEDIA_LINK_ATTRIBUTE[&attr])*
 605+ ;
 606+
500607 END_INTERNAL_LINK: {!CX->internalLinkCloseDisabled}?=> ']]'
501608 {
502609 speculationSuccess(CX, &CX->internalLinkSpeculation);
@@ -503,13 +610,80 @@
504611 }
505612 ;
506613
507 -/*
 614+END_MEDIA_LINK: {!CX->mediaLinkCloseDisabled}?=> ']]'
 615+ {
 616+ speculationSuccess(CX, &CX->mediaLinkSpeculation);
 617+ onMediaLinkClose(CX);
 618+ }
 619+ ;
 620+
 621+EXTERNAL_LINK
 622+@init{
 623+ bool success = true;
 624+ bool complete = true;
 625+ ANTLR3_MARKER urlStart;
 626+ ANTLR3_MARKER urlEnd;
 627+}: {!CX->externalLinkOpenDisabled && !alreadyTried(CX, &CX->externalLinkSpeculation)}?=>
 628+ {
 629+ speculationInitiate(CX, &CX->externalLinkSpeculation);
 630+ }
 631+ ('[' ({urlStart = GETCHARINDEX();} URL_PROTOCOL
 632+ (( {urlEnd = GETCHARINDEX();} URL_CHAR)+ SPACE_TAB_CHAR* (']' | {complete = false;})
 633+ | {success = false;})
 634+ | {success = false;}) )
 635+ {
 636+ if (success) {
 637+ ACTION(CUSTOM = SUBSTR2(urlStart, urlEnd);)
 638+ if (!complete) {
 639+ MW_SETTYPE(BEGIN_EXTERNAL_LINK);
 640+ onExternalLinkOpen(CX);
 641+ } else {
 642+ speculationAbort(CX, &CX->externalLinkSpeculation);
 643+ }
 644+ } else {
 645+ speculationAbort(CX, &CX->externalLinkSpeculation);
 646+ MW_SETTYPE(SPECIAL);
 647+ }
 648+ }
 649+ ;
 650+
 651+END_EXTERNAL_LINK: {!CX->externalLinkCloseDisabled}?=> ']'
 652+ {
 653+ speculationSuccess(CX, &CX->externalLinkSpeculation);
 654+ onExternalLinkClose(CX);
 655+ }
 656+ ;
 657+
 658+EXTERNAL_LINK_FAIL_CONDITION: {CX->externalLinkSpeculation.active}?=>
 659+ '['
 660+ {
 661+ /*
 662+ * We must actually emit this token before failing the
 663+ * speculation, otherwise it will be emitted _after_
 664+ * the token stream has been reverted.
 665+ */
 666+ MW_EMIT();
 667+ SPECULATION_FAILURE(CX, &CX->externalLinkSpeculation);
 668+ }
 669+ ;
 670+
 671+
508672 fragment
509 -EXTERNAL_LINK_TITLE:
510 -//'/\[(\b(' . wfUrlProtocols() . ')'.
511 -// '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S'
512 - LETTER '://'
513 -*/
 673+URL_PROTOCOL:
 674+ 'http://' |
 675+ 'https://' |
 676+ 'ftp://' |
 677+ 'irc://' |
 678+ 'gopher://' |
 679+ 'telnet://' |
 680+ 'nntp://' | // @bug 3808 RFC 1738
 681+ 'worldwind://'|
 682+ 'mailto:' |
 683+ 'news:' |
 684+ 'svn://' |
 685+ 'git://' |
 686+ 'mms://'
 687+ ;
514688
515689 fragment
516690 INTERNAL_LINK_TITLE[pANTLR3_STRING *linkTitle]
@@ -564,7 +738,7 @@
565739 ('\r\n' | NEWLINE_CHAR) {
566740 onEol(CX);
567741 speculationSuccess(CX, &CX->indentSpeculation);
568 - SPECULATION_FAILURE(CX, &CX->headingSpeculation);
 742+ SPECULATION_FAILURE(CX, &CX->headingSpeculation, &CX->externalLinkSpeculation);
569743 }
570744 ;
571745
@@ -968,7 +1142,9 @@
9691143 ')'|'*'|'+'|','|'-'|'.'|'/'|':'|
9701144 ';'|'<'|'='|'>'|'?'|'@'|'['|'\\'|
9711145 ']'|'^'|'_'|'`'|'{'|'|'|'}'|'~';
 1146+fragment URL_CHAR: ~('<'|'>'|'['|']'|'\u0000' .. '\u0020'|'\u007F');
9721147
 1148+
9731149 /* This should map the latin-1 range 0x80-0xff to the corresponding unicode codepoints: */
9741150 fragment LEGAL_TITLE_CHAR_RANGE: 'a'
9751151 ;
Index: trunk/parsers/libmwparser/src/mwlexercontext.c
@@ -1,6 +1,27 @@
 2+/*
 3+ * Copyright 2010 Andreas Jonsson
 4+ *
 5+ * This file is part of libmwparser.
 6+ *
 7+ * Libmwparser is free software: you can redistribute it and/or modify
 8+ * it under the terms of the GNU General Public License as published by
 9+ * the Free Software Foundation, either version 3 of the License, or
 10+ * (at your option) any later version.
 11+ *
 12+ * This program is distributed in the hope that it will be useful,
 13+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 14+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15+ * GNU General Public License for more details.
 16+ *
 17+ * You should have received a copy of the GNU General Public License
 18+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
 19+ */
 20+
221 #include <antlr3.h>
322 #include <mwlexercontext.h>
423 #include <assert.h>
 24+#include <iconv.h>
 25+#include <errno.h>
526
627 #include <mwLexer.h>
728 #include "mwlexerpredicates.h"
@@ -22,8 +43,12 @@
2344 static bool MWLexerContextReset(MWLEXERCONTEXT *context);
2445
2546 static bool isLegalTitle(MWLEXERCONTEXT *context, pANTLR3_STRING linkTitle);
26 -static bool isLegalExternalLink(MWLEXERCONTEXT *context, pANTLR3_STRING url);
 47+static bool isMediaLinkTitle(MWLEXERCONTEXT *context, pANTLR3_STRING url);
2748
 49+static int openConversion(MWLEXERCONTEXT *context, ANTLR3_UINT8 encoding);
 50+static const wchar_t *mwAntlr3stows(MWLEXERCONTEXT *context, pANTLR3_STRING string, void **state);
 51+static void mwFreeStringConversionState(void *state);
 52+
2853 /**
2954 * Set the characters allowed in a page title.
3055 *
@@ -32,9 +57,6 @@
3358 */
3459 static int setLegalTitleChars(MWLEXERCONTEXT *context, const wchar_t *posixExtendedRegexp);
3560
36 -
37 -
38 -
3961 MWLEXERCONTEXT *MWLexerContextNew(pANTLR3_LEXER lexer)
4062 {
4163 MWLEXERCONTEXT *context = ANTLR3_MALLOC(sizeof(*context));
@@ -51,7 +73,7 @@
5274 * specially.
5375 */
5476 int err = regwcomp(&context->legalTitleChars,
55 - L"^[- %!\"$&'()*,.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+]+$",
 77+ L"^[- %!\"$&'()*,./0-9:;=?@A-Z\\\\^_`a-z~\x80-\xFF+]+$",
5678 REG_EXTENDED);
5779 if (err) {
5880 char errbuf[200];
@@ -61,6 +83,18 @@
6284 return NULL;
6385 }
6486
 87+ err = regwcomp(&context->mediaLinkTitle,
 88+ L"^File:[- %!\"$&'()*,./0-9:;=?@A-Z\\\\^_`a-z~\x80-\xFF+]+$",
 89+ REG_EXTENDED);
 90+ if (err) {
 91+ char errbuf[200];
 92+ regerror(err, &context->mediaLinkTitle, errbuf, 200);
 93+ fprintf(stderr, "Failed to compile media link title regular expression: %s\n", errbuf);
 94+ context->free(context);
 95+ return NULL;
 96+ }
 97+
 98+
6599 #define NULL_FAIL(p) do { \
66100 if (p == NULL) { \
67101 context->free(context); \
@@ -77,6 +111,8 @@
78112 context->headingSpeculation.contextBackup.blockContextStack = NULL;
79113 context->mediaLinkSpeculation.contextBackup.blockContextStack = NULL;
80114
 115+ context->conversionState = (iconv_t)-1;
 116+
81117 context->vectorFactory = antlr3VectorFactoryNew(ANTLR3_SIZE_HINT);
82118 NULL_FAIL(context->vectorFactory);
83119
@@ -95,13 +131,18 @@
96132 NULL_FAIL(context->mediaLinkSpeculation.contextBackup.blockContextStack);
97133
98134 context->isLegalTitle = isLegalTitle;
99 - context->isLegalExternalLink = isLegalExternalLink;
 135+ context->isMediaLinkTitle = isMediaLinkTitle;
100136
101137 if (!context->reset(context)) {
102138 context->free(context);
103139 return NULL;
104140 }
105141
 142+ if (openConversion(context, context->lexer->input->encoding) < 0) {
 143+ context->free(context);
 144+ return NULL;
 145+ }
 146+
106147 return context;
107148 }
108149
@@ -186,8 +227,12 @@
187228 context->mediaLinkSpeculation.contextBackup.blockContextStack
188229 ->free(context->mediaLinkSpeculation.contextBackup.blockContextStack);
189230 }
 231+ if (context->conversionState != (iconv_t)-1) {
 232+ iconv_close(context->conversionState);
 233+ }
190234
191235 regfree(&context->legalTitleChars);
 236+ regfree(&context->mediaLinkTitle);
192237 ANTLR3_FREE(lexerContext);
193238 }
194239
@@ -195,20 +240,22 @@
196241 isLegalTitle(MWLEXERCONTEXT *context, pANTLR3_STRING linkTitle)
197242 {
198243 void *state;
199 - const wchar_t *wsLinkTitle = mwAntlr3stows(linkTitle, &state);
 244+ const wchar_t *wsLinkTitle = mwAntlr3stows(context, linkTitle, &state);
200245 regmatch_t match;
201246 int err = regwexec(&context->legalTitleChars, wsLinkTitle, 1, &match, 0);
202247 mwFreeStringConversionState(state);
203 - char buf[256];
204 - regerror(err, &context->legalTitleChars, buf, 256);
205 - //printf("result was: %d, message: %s, string: '%ls'\n", err, buf, linkTitle->chars);
206 - return true;
 248+ return err == 0;
207249 }
208250
209251 static bool
210 -isLegalExternalLink(MWLEXERCONTEXT *context, pANTLR3_STRING linkTitle)
 252+isMediaLinkTitle(MWLEXERCONTEXT *context, pANTLR3_STRING linkTitle)
211253 {
212 - return true;
 254+ void *state;
 255+ const wchar_t *wsLinkTitle = mwAntlr3stows(context, linkTitle, &state);
 256+ regmatch_t match;
 257+ int err = regwexec(&context->mediaLinkTitle, wsLinkTitle, 1, &match, 0);
 258+ mwFreeStringConversionState(state);
 259+ return err == 0;
213260 }
214261
215262 void printLexerInfo(pANTLR3_LEXER lexer)
@@ -223,3 +270,92 @@
224271 lexer->getCharIndex(lexer));
225272 }
226273
 274+
 275+static int
 276+openConversion(MWLEXERCONTEXT *context, ANTLR3_UINT8 encoding)
 277+{
 278+ static struct {
 279+ ANTLR3_UINT8 antlrEncoding;
 280+ const char* iconvEncoding;
 281+ } encodingTable[] = {
 282+ { ANTLR3_ENC_8BIT, "ASCII" },
 283+ { ANTLR3_ENC_UTF8, "UTF-8" },
 284+ { ANTLR3_ENC_UTF16, "UTF-16" },
 285+ { ANTLR3_ENC_UTF16BE, "UTF-16BE" },
 286+ { ANTLR3_ENC_UTF16LE, "UTF-16LE" },
 287+ { ANTLR3_ENC_UTF32, "UTF-32" },
 288+ { ANTLR3_ENC_UTF32BE, "UTF-32BE" },
 289+ { ANTLR3_ENC_UTF32LE, "UTF-32LE" },
 290+ { ANTLR3_ENC_EBCDIC, "EBCDIC-INT" },
 291+ { 0 , NULL }
 292+ };
 293+
 294+ int i;
 295+ for (i = 0; encodingTable[i].iconvEncoding != NULL; i++) {
 296+ if (encodingTable[i].antlrEncoding == encoding) {
 297+ break;
 298+ }
 299+ }
 300+ if (encodingTable[i].iconvEncoding == NULL) {
 301+ errno = EINVAL;
 302+ return -1;
 303+ }
 304+#if (SIZEOF_WCHAR_T == 4)
 305+#ifdef WORDS_BIGENDIAN
 306+ context->conversionState = iconv_open("UTF-32BE", encodingTable[i].iconvEncoding);
 307+#else
 308+ context->conversionState = iconv_open("UTF-32LE", encodingTable[i].iconvEncoding);
 309+#endif
 310+#elif (SIZEOF_WCHAR_T == 2)
 311+#ifdef WORDS_BIGENDIAN
 312+ context->conversionState = iconv_open("UTF-16BE", encodingTable[i].iconvEncoding);
 313+#else
 314+ context->conversionState = iconv_open("UTF-16LE", encodingTable[i].iconvEncoding);
 315+#endif
 316+#else
 317+#error Unsupported size of wchar_t!
 318+#endif
 319+ if (context->conversionState == (iconv_t)-1) {
 320+ return -1;
 321+ }
 322+}
 323+
 324+static size_t
 325+convertString(MWLEXERCONTEXT *context, ANTLR3_STRING *string, void *buf, size_t bufSize) {
 326+ size_t outBytesLeft = bufSize;
 327+ size_t inBytesLeft = string->size;
 328+ char *inBuf = string->chars;
 329+ char *outBuf = buf;
 330+
 331+ size_t ret = iconv(context->conversionState, NULL, NULL, NULL, NULL);
 332+
 333+ ret = iconv(context->conversionState, &inBuf, &inBytesLeft, &outBuf, &outBytesLeft);
 334+
 335+ return ret;
 336+}
 337+
 338+
 339+static const wchar_t *
 340+mwAntlr3stows(MWLEXERCONTEXT *context, pANTLR3_STRING string, void **state)
 341+{
 342+ size_t bufSize = (string->len + 1) * sizeof(wchar_t);
 343+ wchar_t *buf = ANTLR3_MALLOC(bufSize);
 344+
 345+ size_t ret = convertString(context, string, buf, bufSize);
 346+
 347+ if (ret == (size_t)-1) {
 348+ ANTLR3_FREE(buf);
 349+ perror(NULL);
 350+ return NULL;
 351+ }
 352+
 353+ *state = buf;
 354+
 355+ return buf;
 356+}
 357+
 358+static void
 359+mwFreeStringConversionState(void *state)
 360+{
 361+ ANTLR3_FREE(state);
 362+}
Index: trunk/parsers/libmwparser/src/tracingcontext.c
@@ -83,6 +83,12 @@
8484 static void TCBeginInternalLink(MWLISTENER *listener, pANTLR3_STRING linkTitle);
8585 static void TCEndInternalLink(MWLISTENER *listener);
8686 static void TCOnInternalLink(MWLISTENER *listener, pANTLR3_STRING linkTitle);
 87+static void TCBeginExternalLink(MWLISTENER *listener, pANTLR3_STRING linkUrl);
 88+static void TCEndExternalLink(MWLISTENER *listener);
 89+static void TCOnExternalLink(MWLISTENER *listener, pANTLR3_STRING linkUrl);
 90+static void TCBeginMediaLink(MWLISTENER *listener, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr);
 91+static void TCEndMediaLink(MWLISTENER *listener);
 92+static void TCOnMediaLink(MWLISTENER *listener, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr);
8793 static void TCBeginHtmlU(MWLISTENER *listener, pANTLR3_VECTOR attributes);
8894 static void TCEndHtmlU(MWLISTENER *listener);
8995 static void TCBeginHtmlDel(MWLISTENER *listener, pANTLR3_VECTOR attributes);
@@ -196,6 +202,12 @@
197203 listener->beginInternalLink = TCBeginInternalLink;
198204 listener->endInternalLink = TCEndInternalLink;
199205 listener->onInternalLink = TCOnInternalLink;
 206+ listener->beginExternalLink = TCBeginExternalLink;
 207+ listener->endExternalLink = TCEndExternalLink;
 208+ listener->onExternalLink = TCOnExternalLink;
 209+ listener->beginMediaLink = TCBeginMediaLink;
 210+ listener->endMediaLink = TCEndMediaLink;
 211+ listener->onMediaLink = TCOnMediaLink;
200212 listener->beginBulletList = TCBeginBulletList;
201213 listener->endBulletList = TCEndBulletList;
202214 listener->beginBulletListItem = TCBeginBulletListItem;
@@ -452,6 +464,56 @@
453465 }
454466
455467 static void
 468+TCBeginExternalLink(MWLISTENER *listener, pANTLR3_STRING linkUrl)
 469+{
 470+ TCPrintIndent(listener);
 471+ printf("BEGIN EXTERNAL LINK[%s]\n", linkUrl->chars);
 472+ TCIncreaseIndent(listener);
 473+}
 474+
 475+static void
 476+TCEndExternalLink(MWLISTENER *listener)
 477+{
 478+ TCDecreaseIndent(listener);
 479+ TCPrintIndent(listener);
 480+ printf("END EXTERNAL LINK\n");
 481+}
 482+
 483+static void
 484+TCOnExternalLink(MWLISTENER *listener, pANTLR3_STRING linkUrl)
 485+{
 486+ TCPrintIndent(listener);
 487+ printf("EXTERNAL LINK[%s]\n", linkUrl->chars);
 488+}
 489+
 490+static void
 491+TCBeginMediaLink(MWLISTENER *listener, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr)
 492+{
 493+ TCPrintIndent(listener);
 494+ printf("BEGIN MEDIA LINK[%s]", linkUrl->chars);
 495+ TCPrintAttributes(attr);
 496+ printf("\n");
 497+ TCIncreaseIndent(listener);
 498+}
 499+
 500+static void
 501+TCEndMediaLink(MWLISTENER *listener)
 502+{
 503+ TCDecreaseIndent(listener);
 504+ TCPrintIndent(listener);
 505+ printf("END MEDIA LINK\n");
 506+}
 507+
 508+static void
 509+TCOnMediaLink(MWLISTENER *listener, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr)
 510+{
 511+ TCPrintIndent(listener);
 512+ printf("MEDIA LINK[%s]", linkUrl->chars);
 513+ TCPrintAttributes(attr);
 514+ printf("\n");
 515+}
 516+
 517+static void
456518 TCBeginBulletList(MWLISTENER *listener, pANTLR3_VECTOR attr)
457519 {
458520 TCPrintIndent(listener);

Status & tagging log