Index: trunk/parsers/libmwparser/config.h |
— | — | @@ -1,81 +0,0 @@ |
2 | | -/* config.h. Generated from config.h.in by configure. */ |
3 | | -/* config.h.in. Generated from configure.ac by autoheader. */ |
4 | | - |
5 | | -/* Define to 1 if you have the <dlfcn.h> header file. */ |
6 | | -#define HAVE_DLFCN_H 1 |
7 | | - |
8 | | -/* Define to 1 if you have the <inttypes.h> header file. */ |
9 | | -#define HAVE_INTTYPES_H 1 |
10 | | - |
11 | | -/* Define to 1 if you have the <memory.h> header file. */ |
12 | | -#define HAVE_MEMORY_H 1 |
13 | | - |
14 | | -/* Define to 1 if stdbool.h conforms to C99. */ |
15 | | -#define HAVE_STDBOOL_H 1 |
16 | | - |
17 | | -/* Define to 1 if you have the <stdint.h> header file. */ |
18 | | -#define HAVE_STDINT_H 1 |
19 | | - |
20 | | -/* Define to 1 if you have the <stdlib.h> header file. */ |
21 | | -#define HAVE_STDLIB_H 1 |
22 | | - |
23 | | -/* Define to 1 if you have the <strings.h> header file. */ |
24 | | -#define HAVE_STRINGS_H 1 |
25 | | - |
26 | | -/* Define to 1 if you have the <string.h> header file. */ |
27 | | -#define HAVE_STRING_H 1 |
28 | | - |
29 | | -/* Define to 1 if you have the <sys/stat.h> header file. */ |
30 | | -#define HAVE_SYS_STAT_H 1 |
31 | | - |
32 | | -/* Define to 1 if you have the <sys/types.h> header file. */ |
33 | | -#define HAVE_SYS_TYPES_H 1 |
34 | | - |
35 | | -/* Header file for supporting wide character regexps. */ |
36 | | -#define HAVE_TRE_REGEX_H 1 |
37 | | - |
38 | | -/* Define to 1 if you have the <unistd.h> header file. */ |
39 | | -#define HAVE_UNISTD_H 1 |
40 | | - |
41 | | -/* Define to 1 if you have the <wchar.h> header file. */ |
42 | | -#define HAVE_WCHAR_H 1 |
43 | | - |
44 | | -/* Define to 1 if the system has the type `_Bool'. */ |
45 | | -#define HAVE__BOOL 1 |
46 | | - |
47 | | -/* Define to the sub-directory in which libtool stores uninstalled libraries. |
48 | | - */ |
49 | | -#define LT_OBJDIR ".libs/" |
50 | | - |
51 | | -/* Define to 1 if assertions should be disabled. */ |
52 | | -/* #undef NDEBUG */ |
53 | | - |
54 | | -/* Name of package */ |
55 | | -#define PACKAGE "libmwparser" |
56 | | - |
57 | | -/* Define to the address where bug reports for this package should be sent. */ |
58 | | -#define PACKAGE_BUGREPORT "andreas.jonsson@kreablo.se" |
59 | | - |
60 | | -/* Define to the full name of this package. */ |
61 | | -#define PACKAGE_NAME "libmwparser" |
62 | | - |
63 | | -/* Define to the full name and version of this package. */ |
64 | | -#define PACKAGE_STRING "libmwparser 0.3" |
65 | | - |
66 | | -/* Define to the one symbol short name of this package. */ |
67 | | -#define PACKAGE_TARNAME "libmwparser" |
68 | | - |
69 | | -/* Define to the home page for this package. */ |
70 | | -#define PACKAGE_URL "" |
71 | | - |
72 | | -/* Define to the version of this package. */ |
73 | | -#define PACKAGE_VERSION "0.3" |
74 | | - |
75 | | -/* The size of `wchar_t', as computed by sizeof. */ |
76 | | -#define SIZEOF_WCHAR_T 4 |
77 | | - |
78 | | -/* Define to 1 if you have the ANSI C header files. */ |
79 | | -#define STDC_HEADERS 1 |
80 | | - |
81 | | -/* Version number of package */ |
82 | | -#define VERSION "0.3" |
Index: trunk/parsers/libmwparser/config.h.in |
— | — | @@ -1,5 +1,8 @@ |
2 | 2 | /* config.h.in. Generated from configure.ac by autoheader. */ |
3 | 3 | |
| 4 | +/* Define if building universal (internal helper macro) */ |
| 5 | +#undef AC_APPLE_UNIVERSAL_BUILD |
| 6 | + |
4 | 7 | /* Define to 1 if you have the <dlfcn.h> header file. */ |
5 | 8 | #undef HAVE_DLFCN_H |
6 | 9 | |
— | — | @@ -78,3 +81,15 @@ |
79 | 82 | |
80 | 83 | /* Version number of package */ |
81 | 84 | #undef VERSION |
| 85 | + |
| 86 | +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most |
| 87 | + significant byte first (like Motorola and SPARC, unlike Intel). */ |
| 88 | +#if defined AC_APPLE_UNIVERSAL_BUILD |
| 89 | +# if defined __BIG_ENDIAN__ |
| 90 | +# define WORDS_BIGENDIAN 1 |
| 91 | +# endif |
| 92 | +#else |
| 93 | +# ifndef WORDS_BIGENDIAN |
| 94 | +# undef WORDS_BIGENDIAN |
| 95 | +# endif |
| 96 | +#endif |
Index: trunk/parsers/libmwparser/tests/src/testtext.c |
— | — | @@ -171,7 +171,7 @@ |
172 | 172 | int diff_us = stop.tv_usec - start.tv_usec; |
173 | 173 | |
174 | 174 | diff_us += diff_s * 1000000; |
175 | | - // fprintf(stderr, "Time: %d micro seconds\n", diff_us); |
| 175 | + // fprintf(stderr, "Time: %d micro seconds\n", diff_us); |
176 | 176 | |
177 | 177 | return 0; |
178 | 178 | } |
Index: trunk/parsers/libmwparser/tests/testsuite |
— | — | @@ -579,7 +579,7 @@ |
580 | 580 | # List of the tested programs. |
581 | 581 | at_tested='' |
582 | 582 | # List of the all the test groups. |
583 | | -at_groups_all=' 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29' |
| 583 | +at_groups_all=' 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31' |
584 | 584 | # As many question marks as there are digits in the last test group number. |
585 | 585 | # Used to normalize the test group numbers so that `ls' lists them in |
586 | 586 | # numerical order. |
— | — | @@ -614,6 +614,8 @@ |
615 | 615 | 27;testsuite.at:189;hr;; |
616 | 616 | 28;testsuite.at:196;link1;; |
617 | 617 | 29;testsuite.at:204;link2;; |
| 618 | +30;testsuite.at:211;link3;; |
| 619 | +31;testsuite.at:218;link4;; |
618 | 620 | " |
619 | 621 | |
620 | 622 | # at_fn_validate_ranges NAME... |
— | — | @@ -625,7 +627,7 @@ |
626 | 628 | for at_grp |
627 | 629 | do |
628 | 630 | eval at_value=\$$at_grp |
629 | | - if test $at_value -lt 1 || test $at_value -gt 29; then |
| 631 | + if test $at_value -lt 1 || test $at_value -gt 31; then |
630 | 632 | $as_echo "invalid test group: $at_value" >&2 |
631 | 633 | exit 1 |
632 | 634 | fi |
— | — | @@ -3012,3 +3014,75 @@ |
3013 | 3015 | ) 5>&1 2>&1 | eval $at_tee_pipe |
3014 | 3016 | read at_status <"$at_status_file" |
3015 | 3017 | #AT_STOP_29 |
| 3018 | +#AT_START_30 |
| 3019 | +# 30. testsuite.at:211: link3 |
| 3020 | +at_setup_line='testsuite.at:211' |
| 3021 | +at_fn_banner 1 |
| 3022 | +at_desc="link3" |
| 3023 | +at_desc_line=" 30: $at_desc " |
| 3024 | +$at_quiet $as_echo_n "$at_desc_line" |
| 3025 | +at_xfail=no |
| 3026 | +echo "# -*- compilation -*-" >> "$at_group_log" |
| 3027 | +( |
| 3028 | + $as_echo "30. testsuite.at:211: testing ..." |
| 3029 | + $at_traceon |
| 3030 | + |
| 3031 | + |
| 3032 | +cat >tempoutput <<'_ATEOF' |
| 3033 | +_ATEOF |
| 3034 | + |
| 3035 | +{ set +x |
| 3036 | +$as_echo "$at_srcdir/testsuite.at:214: testtext \${srcdir}/link/link3.in > tempoutput && diff \${srcdir}/link/link3.out tempoutput" |
| 3037 | +at_fn_check_prepare_notrace 'a ${...} parameter expansion' "testsuite.at:214" |
| 3038 | +( $at_check_trace; testtext ${srcdir}/link/link3.in > tempoutput && diff ${srcdir}/link/link3.out tempoutput |
| 3039 | +) >>"$at_stdout" 2>>"$at_stderr" |
| 3040 | +at_status=$? at_failed=false |
| 3041 | +$at_check_filter |
| 3042 | +at_fn_diff_devnull "$at_stderr" || at_failed=: |
| 3043 | +at_fn_diff_devnull "$at_stdout" || at_failed=: |
| 3044 | +at_fn_check_status 0 $at_status "$at_srcdir/testsuite.at:214" |
| 3045 | +$at_failed && at_fn_log_failure |
| 3046 | +$at_traceon; } |
| 3047 | + |
| 3048 | + |
| 3049 | + set +x |
| 3050 | + $at_times_p && times >"$at_times_file" |
| 3051 | +) 5>&1 2>&1 | eval $at_tee_pipe |
| 3052 | +read at_status <"$at_status_file" |
| 3053 | +#AT_STOP_30 |
| 3054 | +#AT_START_31 |
| 3055 | +# 31. testsuite.at:218: link4 |
| 3056 | +at_setup_line='testsuite.at:218' |
| 3057 | +at_fn_banner 1 |
| 3058 | +at_desc="link4" |
| 3059 | +at_desc_line=" 31: $at_desc " |
| 3060 | +$at_quiet $as_echo_n "$at_desc_line" |
| 3061 | +at_xfail=no |
| 3062 | +echo "# -*- compilation -*-" >> "$at_group_log" |
| 3063 | +( |
| 3064 | + $as_echo "31. testsuite.at:218: testing ..." |
| 3065 | + $at_traceon |
| 3066 | + |
| 3067 | + |
| 3068 | +cat >tempoutput <<'_ATEOF' |
| 3069 | +_ATEOF |
| 3070 | + |
| 3071 | +{ set +x |
| 3072 | +$as_echo "$at_srcdir/testsuite.at:221: testtext \${srcdir}/link/link4.in > tempoutput && diff \${srcdir}/link/link4.out tempoutput" |
| 3073 | +at_fn_check_prepare_notrace 'a ${...} parameter expansion' "testsuite.at:221" |
| 3074 | +( $at_check_trace; testtext ${srcdir}/link/link4.in > tempoutput && diff ${srcdir}/link/link4.out tempoutput |
| 3075 | +) >>"$at_stdout" 2>>"$at_stderr" |
| 3076 | +at_status=$? at_failed=false |
| 3077 | +$at_check_filter |
| 3078 | +at_fn_diff_devnull "$at_stderr" || at_failed=: |
| 3079 | +at_fn_diff_devnull "$at_stdout" || at_failed=: |
| 3080 | +at_fn_check_status 0 $at_status "$at_srcdir/testsuite.at:221" |
| 3081 | +$at_failed && at_fn_log_failure |
| 3082 | +$at_traceon; } |
| 3083 | + |
| 3084 | + |
| 3085 | + set +x |
| 3086 | + $at_times_p && times >"$at_times_file" |
| 3087 | +) 5>&1 2>&1 | eval $at_tee_pipe |
| 3088 | +read at_status <"$at_status_file" |
| 3089 | +#AT_STOP_31 |
Index: trunk/parsers/libmwparser/tests/testsuite.at |
— | — | @@ -207,3 +207,17 @@ |
208 | 208 | AT_CHECK([testtext ${srcdir}/link/link2.in > tempoutput && diff ${srcdir}/link/link2.out tempoutput]) |
209 | 209 | |
210 | 210 | AT_CLEANUP |
| 211 | + |
| 212 | +AT_SETUP([link3]) |
| 213 | + |
| 214 | +AT_DATA([tempoutput], []) |
| 215 | +AT_CHECK([testtext ${srcdir}/link/link3.in > tempoutput && diff ${srcdir}/link/link3.out tempoutput]) |
| 216 | + |
| 217 | +AT_CLEANUP |
| 218 | + |
| 219 | +AT_SETUP([link4]) |
| 220 | + |
| 221 | +AT_DATA([tempoutput], []) |
| 222 | +AT_CHECK([testtext ${srcdir}/link/link4.in > tempoutput && diff ${srcdir}/link/link4.out tempoutput]) |
| 223 | + |
| 224 | +AT_CLEANUP |
Index: trunk/parsers/libmwparser/tests/link/link1.out |
— | — | @@ -1,7 +1,6 @@ |
2 | 2 | BEGIN ARTICLE |
3 | 3 | BEGIN PARAGRAPH |
4 | | - SPECIAL[[] |
5 | | - SPECIAL[[] |
| 4 | + SPECIAL[[[] |
6 | 5 | WORD[Link] |
7 | 6 | SPECIAL[|] |
8 | 7 | WORD[text] |
Index: trunk/parsers/libmwparser/tests/link/link3.in |
— | — | @@ -0,0 +1 @@ |
| 2 | +[[Link]] [[[Link]] |
Index: trunk/parsers/libmwparser/tests/link/link3.out |
— | — | @@ -0,0 +1,11 @@ |
| 2 | +BEGIN ARTICLE |
| 3 | + BEGIN PARAGRAPH |
| 4 | + INTERNAL LINK[Link] |
| 5 | + SPACE[ ] |
| 6 | + SPECIAL[[[] |
| 7 | + SPECIAL[[] |
| 8 | + WORD[Link] |
| 9 | + SPECIAL[]] |
| 10 | + SPECIAL[]] |
| 11 | + END PARAGRAPH |
| 12 | +END ARTICLE |
Index: trunk/parsers/libmwparser/tests/link/link4.in |
— | — | @@ -0,0 +1,3 @@ |
| 2 | +[[Link]] [[[Link]] [link ] [http://foo bar] [https://a] [mailto:a] [http://b |
| 3 | + |
| 4 | + |
Index: trunk/parsers/libmwparser/tests/link/link4.out |
— | — | @@ -0,0 +1,32 @@ |
| 2 | +BEGIN ARTICLE |
| 3 | + BEGIN PARAGRAPH |
| 4 | + INTERNAL LINK[Link] |
| 5 | + SPACE[ ] |
| 6 | + SPECIAL[[[] |
| 7 | + SPECIAL[[] |
| 8 | + WORD[Link] |
| 9 | + SPECIAL[]] |
| 10 | + SPECIAL[]] |
| 11 | + SPACE[ ] |
| 12 | + SPECIAL[[] |
| 13 | + WORD[link] |
| 14 | + SPACE[ ] |
| 15 | + SPECIAL[]] |
| 16 | + SPACE[ ] |
| 17 | + BEGIN EXTERNAL LINK[http://foo] |
| 18 | + WORD[bar] |
| 19 | + END EXTERNAL LINK |
| 20 | + SPACE[ ] |
| 21 | + EXTERNAL LINK[https://a] |
| 22 | + SPACE[ ] |
| 23 | + EXTERNAL LINK[mailto:a] |
| 24 | + SPACE[ ] |
| 25 | + NEWLINE |
| 26 | + SPECIAL[[] |
| 27 | + WORD[http] |
| 28 | + SPECIAL[:] |
| 29 | + SPECIAL[/] |
| 30 | + SPECIAL[/] |
| 31 | + WORD[b] |
| 32 | + END PARAGRAPH |
| 33 | +END ARTICLE |
Index: trunk/parsers/libmwparser/configure.ac |
— | — | @@ -53,6 +53,7 @@ |
54 | 54 | AC_HEADER_ASSERT |
55 | 55 | AC_CHECK_HEADERS([wchar.h]) |
56 | 56 | AC_CHECK_SIZEOF([wchar_t], [], [#include<wchar.h>]) |
| 57 | +AC_C_BIGENDIAN |
57 | 58 | |
58 | 59 | AC_CHECK_HEADER(tre/regex.h, AC_DEFINE([HAVE_TRE_REGEX_H], [1], [Header file for supporting wide character regexps.]), AC_MSG_ERROR([tre/regex.h not found])) |
59 | 60 | AC_CHECK_LIB([tre], [tre_regwcomp], [LIBREGEX="-ltre"], AC_MSG_ERROR([Regexp library for wide characters not found.])) |
Index: trunk/parsers/libmwparser/TODO |
— | — | @@ -1,9 +0,0 @@ |
2 | | -Test optimization options: |
3 | | - |
4 | | --ftree-loop-linear |
5 | | --floop-interchange |
6 | | --floop-strip-mine |
7 | | --floop-block |
Index: trunk/parsers/libmwparser/configure |
— | — | @@ -4756,7 +4756,231 @@ |
4757 | 4757 | _ACEOF |
4758 | 4758 | |
4759 | 4759 | |
| 4760 | + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5 |
| 4761 | +$as_echo_n "checking whether byte ordering is bigendian... " >&6; } |
| 4762 | +if test "${ac_cv_c_bigendian+set}" = set; then : |
| 4763 | + $as_echo_n "(cached) " >&6 |
| 4764 | +else |
| 4765 | + ac_cv_c_bigendian=unknown |
| 4766 | + # See if we're dealing with a universal compiler. |
| 4767 | + cat confdefs.h - <<_ACEOF >conftest.$ac_ext |
| 4768 | +/* end confdefs.h. */ |
| 4769 | +#ifndef __APPLE_CC__ |
| 4770 | + not a universal capable compiler |
| 4771 | + #endif |
| 4772 | + typedef int dummy; |
4760 | 4773 | |
| 4774 | +_ACEOF |
| 4775 | +if ac_fn_c_try_compile "$LINENO"; then : |
| 4776 | + |
| 4777 | + # Check for potential -arch flags. It is not universal unless |
| 4778 | + # there are at least two -arch flags with different values. |
| 4779 | + ac_arch= |
| 4780 | + ac_prev= |
| 4781 | + for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do |
| 4782 | + if test -n "$ac_prev"; then |
| 4783 | + case $ac_word in |
| 4784 | + i?86 | x86_64 | ppc | ppc64) |
| 4785 | + if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then |
| 4786 | + ac_arch=$ac_word |
| 4787 | + else |
| 4788 | + ac_cv_c_bigendian=universal |
| 4789 | + break |
| 4790 | + fi |
| 4791 | + ;; |
| 4792 | + esac |
| 4793 | + ac_prev= |
| 4794 | + elif test "x$ac_word" = "x-arch"; then |
| 4795 | + ac_prev=arch |
| 4796 | + fi |
| 4797 | + done |
| 4798 | +fi |
| 4799 | +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext |
| 4800 | + if test $ac_cv_c_bigendian = unknown; then |
| 4801 | + # See if sys/param.h defines the BYTE_ORDER macro. |
| 4802 | + cat confdefs.h - <<_ACEOF >conftest.$ac_ext |
| 4803 | +/* end confdefs.h. */ |
| 4804 | +#include <sys/types.h> |
| 4805 | + #include <sys/param.h> |
| 4806 | + |
| 4807 | +int |
| 4808 | +main () |
| 4809 | +{ |
| 4810 | +#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \ |
| 4811 | + && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \ |
| 4812 | + && LITTLE_ENDIAN) |
| 4813 | + bogus endian macros |
| 4814 | + #endif |
| 4815 | + |
| 4816 | + ; |
| 4817 | + return 0; |
| 4818 | +} |
| 4819 | +_ACEOF |
| 4820 | +if ac_fn_c_try_compile "$LINENO"; then : |
| 4821 | + # It does; now see whether it defined to BIG_ENDIAN or not. |
| 4822 | + cat confdefs.h - <<_ACEOF >conftest.$ac_ext |
| 4823 | +/* end confdefs.h. */ |
| 4824 | +#include <sys/types.h> |
| 4825 | + #include <sys/param.h> |
| 4826 | + |
| 4827 | +int |
| 4828 | +main () |
| 4829 | +{ |
| 4830 | +#if BYTE_ORDER != BIG_ENDIAN |
| 4831 | + not big endian |
| 4832 | + #endif |
| 4833 | + |
| 4834 | + ; |
| 4835 | + return 0; |
| 4836 | +} |
| 4837 | +_ACEOF |
| 4838 | +if ac_fn_c_try_compile "$LINENO"; then : |
| 4839 | + ac_cv_c_bigendian=yes |
| 4840 | +else |
| 4841 | + ac_cv_c_bigendian=no |
| 4842 | +fi |
| 4843 | +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext |
| 4844 | +fi |
| 4845 | +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext |
| 4846 | + fi |
| 4847 | + if test $ac_cv_c_bigendian = unknown; then |
| 4848 | + # See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris). |
| 4849 | + cat confdefs.h - <<_ACEOF >conftest.$ac_ext |
| 4850 | +/* end confdefs.h. */ |
| 4851 | +#include <limits.h> |
| 4852 | + |
| 4853 | +int |
| 4854 | +main () |
| 4855 | +{ |
| 4856 | +#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN) |
| 4857 | + bogus endian macros |
| 4858 | + #endif |
| 4859 | + |
| 4860 | + ; |
| 4861 | + return 0; |
| 4862 | +} |
| 4863 | +_ACEOF |
| 4864 | +if ac_fn_c_try_compile "$LINENO"; then : |
| 4865 | + # It does; now see whether it defined to _BIG_ENDIAN or not. |
| 4866 | + cat confdefs.h - <<_ACEOF >conftest.$ac_ext |
| 4867 | +/* end confdefs.h. */ |
| 4868 | +#include <limits.h> |
| 4869 | + |
| 4870 | +int |
| 4871 | +main () |
| 4872 | +{ |
| 4873 | +#ifndef _BIG_ENDIAN |
| 4874 | + not big endian |
| 4875 | + #endif |
| 4876 | + |
| 4877 | + ; |
| 4878 | + return 0; |
| 4879 | +} |
| 4880 | +_ACEOF |
| 4881 | +if ac_fn_c_try_compile "$LINENO"; then : |
| 4882 | + ac_cv_c_bigendian=yes |
| 4883 | +else |
| 4884 | + ac_cv_c_bigendian=no |
| 4885 | +fi |
| 4886 | +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext |
| 4887 | +fi |
| 4888 | +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext |
| 4889 | + fi |
| 4890 | + if test $ac_cv_c_bigendian = unknown; then |
| 4891 | + # Compile a test program. |
| 4892 | + if test "$cross_compiling" = yes; then : |
| 4893 | + # Try to guess by grepping values from an object file. |
| 4894 | + cat confdefs.h - <<_ACEOF >conftest.$ac_ext |
| 4895 | +/* end confdefs.h. */ |
| 4896 | +short int ascii_mm[] = |
| 4897 | + { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 }; |
| 4898 | + short int ascii_ii[] = |
| 4899 | + { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 }; |
| 4900 | + int use_ascii (int i) { |
| 4901 | + return ascii_mm[i] + ascii_ii[i]; |
| 4902 | + } |
| 4903 | + short int ebcdic_ii[] = |
| 4904 | + { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 }; |
| 4905 | + short int ebcdic_mm[] = |
| 4906 | + { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 }; |
| 4907 | + int use_ebcdic (int i) { |
| 4908 | + return ebcdic_mm[i] + ebcdic_ii[i]; |
| 4909 | + } |
| 4910 | + extern int foo; |
| 4911 | + |
| 4912 | +int |
| 4913 | +main () |
| 4914 | +{ |
| 4915 | +return use_ascii (foo) == use_ebcdic (foo); |
| 4916 | + ; |
| 4917 | + return 0; |
| 4918 | +} |
| 4919 | +_ACEOF |
| 4920 | +if ac_fn_c_try_compile "$LINENO"; then : |
| 4921 | + if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then |
| 4922 | + ac_cv_c_bigendian=yes |
| 4923 | + fi |
| 4924 | + if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then |
| 4925 | + if test "$ac_cv_c_bigendian" = unknown; then |
| 4926 | + ac_cv_c_bigendian=no |
| 4927 | + else |
| 4928 | + # finding both strings is unlikely to happen, but who knows? |
| 4929 | + ac_cv_c_bigendian=unknown |
| 4930 | + fi |
| 4931 | + fi |
| 4932 | +fi |
| 4933 | +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext |
| 4934 | +else |
| 4935 | + cat confdefs.h - <<_ACEOF >conftest.$ac_ext |
| 4936 | +/* end confdefs.h. */ |
| 4937 | +$ac_includes_default |
| 4938 | +int |
| 4939 | +main () |
| 4940 | +{ |
| 4941 | + |
| 4942 | + /* Are we little or big endian? From Harbison&Steele. */ |
| 4943 | + union |
| 4944 | + { |
| 4945 | + long int l; |
| 4946 | + char c[sizeof (long int)]; |
| 4947 | + } u; |
| 4948 | + u.l = 1; |
| 4949 | + return u.c[sizeof (long int) - 1] == 1; |
| 4950 | + |
| 4951 | + ; |
| 4952 | + return 0; |
| 4953 | +} |
| 4954 | +_ACEOF |
| 4955 | +if ac_fn_c_try_run "$LINENO"; then : |
| 4956 | + ac_cv_c_bigendian=no |
| 4957 | +else |
| 4958 | + ac_cv_c_bigendian=yes |
| 4959 | +fi |
| 4960 | +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ |
| 4961 | + conftest.$ac_objext conftest.beam conftest.$ac_ext |
| 4962 | +fi |
| 4963 | + |
| 4964 | + fi |
| 4965 | +fi |
| 4966 | +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5 |
| 4967 | +$as_echo "$ac_cv_c_bigendian" >&6; } |
| 4968 | + case $ac_cv_c_bigendian in #( |
| 4969 | + yes) |
| 4970 | + $as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h |
| 4971 | +;; #( |
| 4972 | + no) |
| 4973 | + ;; #( |
| 4974 | + universal) |
| 4975 | + |
| 4976 | +$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h |
| 4977 | + |
| 4978 | + ;; #( |
| 4979 | + *) |
| 4980 | + as_fn_error "unknown endianness |
| 4981 | + presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;; |
| 4982 | + esac |
| 4983 | + |
| 4984 | + |
4761 | 4985 | ac_fn_c_check_header_mongrel "$LINENO" "tre/regex.h" "ac_cv_header_tre_regex_h" "$ac_includes_default" |
4762 | 4986 | if test "x$ac_cv_header_tre_regex_h" = x""yes; then : |
4763 | 4987 | |
— | — | @@ -5362,13 +5586,13 @@ |
5363 | 5587 | else |
5364 | 5588 | lt_cv_nm_interface="BSD nm" |
5365 | 5589 | echo "int some_variable = 0;" > conftest.$ac_ext |
5366 | | - (eval echo "\"\$as_me:5365: $ac_compile\"" >&5) |
| 5590 | + (eval echo "\"\$as_me:5589: $ac_compile\"" >&5) |
5367 | 5591 | (eval "$ac_compile" 2>conftest.err) |
5368 | 5592 | cat conftest.err >&5 |
5369 | | - (eval echo "\"\$as_me:5368: $NM \\\"conftest.$ac_objext\\\"\"" >&5) |
| 5593 | + (eval echo "\"\$as_me:5592: $NM \\\"conftest.$ac_objext\\\"\"" >&5) |
5370 | 5594 | (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) |
5371 | 5595 | cat conftest.err >&5 |
5372 | | - (eval echo "\"\$as_me:5371: output\"" >&5) |
| 5596 | + (eval echo "\"\$as_me:5595: output\"" >&5) |
5373 | 5597 | cat conftest.out >&5 |
5374 | 5598 | if $GREP 'External.*some_variable' conftest.out > /dev/null; then |
5375 | 5599 | lt_cv_nm_interface="MS dumpbin" |
— | — | @@ -6573,7 +6797,7 @@ |
6574 | 6798 | ;; |
6575 | 6799 | *-*-irix6*) |
6576 | 6800 | # Find out which ABI we are using. |
6577 | | - echo '#line 6576 "configure"' > conftest.$ac_ext |
| 6801 | + echo '#line 6800 "configure"' > conftest.$ac_ext |
6578 | 6802 | if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 |
6579 | 6803 | (eval $ac_compile) 2>&5 |
6580 | 6804 | ac_status=$? |
— | — | @@ -7831,11 +8055,11 @@ |
7832 | 8056 | -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ |
7833 | 8057 | -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ |
7834 | 8058 | -e 's:$: $lt_compiler_flag:'` |
7835 | | - (eval echo "\"\$as_me:7834: $lt_compile\"" >&5) |
| 8059 | + (eval echo "\"\$as_me:8058: $lt_compile\"" >&5) |
7836 | 8060 | (eval "$lt_compile" 2>conftest.err) |
7837 | 8061 | ac_status=$? |
7838 | 8062 | cat conftest.err >&5 |
7839 | | - echo "$as_me:7838: \$? = $ac_status" >&5 |
| 8063 | + echo "$as_me:8062: \$? = $ac_status" >&5 |
7840 | 8064 | if (exit $ac_status) && test -s "$ac_outfile"; then |
7841 | 8065 | # The compiler can only warn and ignore the option if not recognized |
7842 | 8066 | # So say no if there are warnings other than the usual output. |
— | — | @@ -8170,11 +8394,11 @@ |
8171 | 8395 | -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ |
8172 | 8396 | -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ |
8173 | 8397 | -e 's:$: $lt_compiler_flag:'` |
8174 | | - (eval echo "\"\$as_me:8173: $lt_compile\"" >&5) |
| 8398 | + (eval echo "\"\$as_me:8397: $lt_compile\"" >&5) |
8175 | 8399 | (eval "$lt_compile" 2>conftest.err) |
8176 | 8400 | ac_status=$? |
8177 | 8401 | cat conftest.err >&5 |
8178 | | - echo "$as_me:8177: \$? = $ac_status" >&5 |
| 8402 | + echo "$as_me:8401: \$? = $ac_status" >&5 |
8179 | 8403 | if (exit $ac_status) && test -s "$ac_outfile"; then |
8180 | 8404 | # The compiler can only warn and ignore the option if not recognized |
8181 | 8405 | # So say no if there are warnings other than the usual output. |
— | — | @@ -8275,11 +8499,11 @@ |
8276 | 8500 | -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ |
8277 | 8501 | -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ |
8278 | 8502 | -e 's:$: $lt_compiler_flag:'` |
8279 | | - (eval echo "\"\$as_me:8278: $lt_compile\"" >&5) |
| 8503 | + (eval echo "\"\$as_me:8502: $lt_compile\"" >&5) |
8280 | 8504 | (eval "$lt_compile" 2>out/conftest.err) |
8281 | 8505 | ac_status=$? |
8282 | 8506 | cat out/conftest.err >&5 |
8283 | | - echo "$as_me:8282: \$? = $ac_status" >&5 |
| 8507 | + echo "$as_me:8506: \$? = $ac_status" >&5 |
8284 | 8508 | if (exit $ac_status) && test -s out/conftest2.$ac_objext |
8285 | 8509 | then |
8286 | 8510 | # The compiler can only warn and ignore the option if not recognized |
— | — | @@ -8330,11 +8554,11 @@ |
8331 | 8555 | -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ |
8332 | 8556 | -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ |
8333 | 8557 | -e 's:$: $lt_compiler_flag:'` |
8334 | | - (eval echo "\"\$as_me:8333: $lt_compile\"" >&5) |
| 8558 | + (eval echo "\"\$as_me:8557: $lt_compile\"" >&5) |
8335 | 8559 | (eval "$lt_compile" 2>out/conftest.err) |
8336 | 8560 | ac_status=$? |
8337 | 8561 | cat out/conftest.err >&5 |
8338 | | - echo "$as_me:8337: \$? = $ac_status" >&5 |
| 8562 | + echo "$as_me:8561: \$? = $ac_status" >&5 |
8339 | 8563 | if (exit $ac_status) && test -s out/conftest2.$ac_objext |
8340 | 8564 | then |
8341 | 8565 | # The compiler can only warn and ignore the option if not recognized |
— | — | @@ -10714,7 +10938,7 @@ |
10715 | 10939 | lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 |
10716 | 10940 | lt_status=$lt_dlunknown |
10717 | 10941 | cat > conftest.$ac_ext <<_LT_EOF |
10718 | | -#line 10717 "configure" |
| 10942 | +#line 10941 "configure" |
10719 | 10943 | #include "confdefs.h" |
10720 | 10944 | |
10721 | 10945 | #if HAVE_DLFCN_H |
— | — | @@ -10810,7 +11034,7 @@ |
10811 | 11035 | lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 |
10812 | 11036 | lt_status=$lt_dlunknown |
10813 | 11037 | cat > conftest.$ac_ext <<_LT_EOF |
10814 | | -#line 10813 "configure" |
| 11038 | +#line 11037 "configure" |
10815 | 11039 | #include "confdefs.h" |
10816 | 11040 | |
10817 | 11041 | #if HAVE_DLFCN_H |
— | — | @@ -11160,6 +11384,7 @@ |
11161 | 11385 | Usually this means the macro was only invoked conditionally." "$LINENO" 5 |
11162 | 11386 | fi |
11163 | 11387 | |
| 11388 | + |
11164 | 11389 | : ${CONFIG_STATUS=./config.status} |
11165 | 11390 | ac_write_fail=0 |
11166 | 11391 | ac_clean_files_save=$ac_clean_files |
Index: trunk/parsers/libmwparser/include/mwutils.h |
— | — | @@ -5,28 +5,4 @@ |
6 | 6 | #include <antlr3.h> |
7 | 7 | #include "config.h" |
8 | 8 | |
9 | | -static inline const wchar_t * |
10 | | -mwAntlr3stows(pANTLR3_STRING string, void **state) |
11 | | -{ |
12 | | -#if (SIZEOF_WCHAR_T == 4) |
13 | | - return (wchar_t*)string->chars; |
14 | | -#elif (SIZEOF_WCHAR_T == 2) |
15 | | -#error Unsupported wchar_t size! |
16 | | -#else |
17 | | -#error Unsupported wchar_t size! |
18 | 9 | #endif |
19 | | -} |
20 | | - |
21 | | -static inline void |
22 | | -mwFreeStringConversionState(void *state) |
23 | | -{ |
24 | | -#if(SIZEOF_WCHAR_T == 4) |
25 | | - /* do nothing */ |
26 | | -#elif (SIZEOF_WCHAR_T == 2) |
27 | | -#error Unsupported wchar_t size! |
28 | | -#else |
29 | | -#error Unsupported wchar_t size! |
30 | | -#endif |
31 | | -} |
32 | | - |
33 | | -#endif |
Index: trunk/parsers/libmwparser/include/mwparsercontext.h |
— | — | @@ -91,6 +91,12 @@ |
92 | 92 | void (*beginInternalLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_STRING linkTitle); |
93 | 93 | void (*endInternalLink)(struct MWPARSERCONTEXT_struct * context); |
94 | 94 | void (*onInternalLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_STRING linkTitle); |
| 95 | + void (*beginExternalLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_STRING linkUrl); |
| 96 | + void (*endExternalLink)(struct MWPARSERCONTEXT_struct * context); |
| 97 | + void (*onExternalLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_STRING linkUrl); |
| 98 | + void (*beginMediaLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_VECTOR attr); |
| 99 | + void (*endMediaLink)(struct MWPARSERCONTEXT_struct * context); |
| 100 | + void (*onMediaLink)(struct MWPARSERCONTEXT_struct * context, pANTLR3_VECTOR attr); |
95 | 101 | void (*beginFormat)(struct MWPARSERCONTEXT_struct * context, |
96 | 102 | void (*begin)(), |
97 | 103 | void (*end)(), |
Index: trunk/parsers/libmwparser/include/mwlexercontext.h |
— | — | @@ -5,6 +5,7 @@ |
6 | 6 | #include <wchar.h> |
7 | 7 | #include <tre/regex.h> |
8 | 8 | #include <antlr3defs.h> |
| 9 | +#include <iconv.h> |
9 | 10 | |
10 | 11 | /* |
11 | 12 | * Different table types can be nested, but not mixed. |
— | — | @@ -57,6 +58,7 @@ |
58 | 59 | pANTLR3_STACK blockContextStack; |
59 | 60 | int headingLevel; |
60 | 61 | regex_t legalTitleChars; |
| 62 | + regex_t mediaLinkTitle; |
61 | 63 | |
62 | 64 | /* |
63 | 65 | * State for speculative execution. |
— | — | @@ -68,6 +70,12 @@ |
69 | 71 | MWLEXERSPECULATION mediaLinkSpeculation; |
70 | 72 | int istreamIndex; |
71 | 73 | |
| 74 | + /* |
| 75 | + * Character conversion. |
| 76 | + */ |
| 77 | + |
| 78 | + iconv_t conversionState; |
| 79 | + |
72 | 80 | /** Method for deallocating this instance. */ |
73 | 81 | void (*free)(void * context); |
74 | 82 | /** Reset instance */ |
— | — | @@ -79,7 +87,7 @@ |
80 | 88 | pANTLR3_VECTOR_FACTORY vectorFactory; |
81 | 89 | pANTLR3_STRING_FACTORY stringFactory; |
82 | 90 | bool (*isLegalTitle)(struct MWLEXERCONTEXT_struct * context, pANTLR3_STRING text); |
83 | | - bool (*isLegalExternalLink)(struct MWLEXERCONTEXT_struct * context, pANTLR3_STRING text); |
| 91 | + bool (*isMediaLinkTitle)(struct MWLEXERCONTEXT_struct * context, pANTLR3_STRING text); |
84 | 92 | |
85 | 93 | |
86 | 94 | } |
Index: trunk/parsers/libmwparser/include/mwlistener.h |
— | — | @@ -93,7 +93,13 @@ |
94 | 94 | void (*endHeading)(struct MWLISTENER_struct * context); |
95 | 95 | void (*beginInternalLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkTitle); |
96 | 96 | void (*endInternalLink)(struct MWLISTENER_struct * context); |
97 | | - void (*onInternalLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkTitle); |
| 97 | + void (*onInternalLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkUrl); |
| 98 | + void (*beginExternalLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkUrl); |
| 99 | + void (*endExternalLink)(struct MWLISTENER_struct * context); |
| 100 | + void (*onExternalLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkUrl); |
| 101 | + void (*beginMediaLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr); |
| 102 | + void (*endMediaLink)(struct MWLISTENER_struct * context); |
| 103 | + void (*onMediaLink)(struct MWLISTENER_struct * context, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr); |
98 | 104 | void (*beginTableOfContents)(struct MWLISTENER_struct * context); |
99 | 105 | void (*endTableOfContents)(struct MWLISTENER_struct * context); |
100 | 106 | void (*beginTableOfContentsItem)(struct MWLISTENER_struct * context, int level); |
Index: trunk/parsers/libmwparser/src/mwlexerpredicatetable.php |
— | — | @@ -93,7 +93,7 @@ |
94 | 94 | 'name' => 'wikitextListElement', |
95 | 95 | 'initiallyDisabled' => array(), |
96 | 96 | 'types' => array('block'), |
97 | | - 'affects' => array(new TypeDisable('block', 'WIKITEXT_BLOCK')), |
| 97 | + 'affects' => array(new TypeDisable('block', 'WIKITEXT_BLOCK_OR_LINK')), |
98 | 98 | 'mayNest' => false, |
99 | 99 | 'scope' => new Scope('eol'), |
100 | 100 | ), |
— | — | @@ -254,7 +254,22 @@ |
255 | 255 | 'initiallyDisabled' => array(), |
256 | 256 | 'mayNest' => false, |
257 | 257 | 'types' => array(), |
258 | | - ) |
| 258 | + 'affects' => array(new PredicateDisable('externalLinkOpen', 'WIKITEXT_BLOCK_OR_LINK')), |
| 259 | + ), |
| 260 | + array( |
| 261 | + 'name' => "externalLinkOpen", |
| 262 | + 'close' => "externalLinkClose", |
| 263 | + 'initiallyDisabled' => array(), |
| 264 | + 'mayNest' => false, |
| 265 | + 'types' => array(), |
| 266 | + ), |
| 267 | + array( |
| 268 | + 'name' => "mediaLinkOpen", |
| 269 | + 'close' => "mediaLinkClose", |
| 270 | + 'initiallyDisabled' => array(), |
| 271 | + 'mayNest' => false, |
| 272 | + 'types' => array(), |
| 273 | + ), |
259 | 274 | ); |
260 | 275 | |
261 | 276 | foreach(array('B', 'Del', 'I', 'Ins', 'U', 'Font', 'Big', 'Small', 'Sub', 'Sup', 'Cite', |
— | — | @@ -325,7 +340,9 @@ |
326 | 341 | 'BLOCK_CONTEXT', |
327 | 342 | 'BLOCKQUOTE', |
328 | 343 | 'NESTING_LIMIT', |
329 | | - 'WIKITEXT_BLOCK' |
| 344 | + 'WIKITEXT_BLOCK_OR_LINK' // It should be OK for these two causes |
| 345 | + // to share the same bit, since they are never applied |
| 346 | + // to the same predicate. |
330 | 347 | ); |
331 | 348 | |
332 | 349 | define('CX', 'context'); |
Index: trunk/parsers/libmwparser/src/mwParser.g |
— | — | @@ -564,8 +564,9 @@ |
565 | 565 | | (HTML_H6_CLOSE { CX->endTableOfContentsItem(CX); }))|EOF) |
566 | 566 | ; |
567 | 567 | |
568 | | -link_element: internal_link |
| 568 | +link_element: internal_link | external_link | media_link |
569 | 569 | ; |
| 570 | + |
570 | 571 | internal_link: complete_internal_link | begin_internal_link | end_internal_link |
571 | 572 | ; |
572 | 573 | |
— | — | @@ -586,3 +587,46 @@ |
587 | 588 | IE(CX->endInternalLink(CX);) |
588 | 589 | } |
589 | 590 | ; |
| 591 | + |
| 592 | +external_link: complete_external_link | begin_external_link | end_external_link |
| 593 | + ; |
| 594 | + |
| 595 | +complete_external_link: linkToken = EXTERNAL_LINK |
| 596 | + { |
| 597 | + IE(CX->onExternalLink(CX, $linkToken->custom);) |
| 598 | + } |
| 599 | + ; |
| 600 | + |
| 601 | +begin_external_link: linkToken = BEGIN_EXTERNAL_LINK |
| 602 | + { |
| 603 | + IE(CX->beginExternalLink(CX, $linkToken->custom);) |
| 604 | + } |
| 605 | + ; |
| 606 | + |
| 607 | +end_external_link: END_EXTERNAL_LINK |
| 608 | + { |
| 609 | + IE(CX->endExternalLink(CX);) |
| 610 | + } |
| 611 | + ; |
| 612 | + |
| 613 | +media_link: complete_media_link | begin_media_link | end_media_link |
| 614 | + ; |
| 615 | + |
| 616 | +complete_media_link: linkToken = MEDIA_LINK |
| 617 | + { |
| 618 | + IE(CX->onMediaLink(CX, $linkToken->custom);) |
| 619 | + } |
| 620 | + ; |
| 621 | + |
| 622 | +begin_media_link: linkToken = BEGIN_MEDIA_LINK |
| 623 | + { |
| 624 | + IE(CX->beginMediaLink(CX, $linkToken->custom);) |
| 625 | + } |
| 626 | + ; |
| 627 | + |
| 628 | +end_media_link: END_MEDIA_LINK |
| 629 | + { |
| 630 | + IE(CX->endMediaLink(CX);) |
| 631 | + } |
| 632 | + ; |
| 633 | + |
Index: trunk/parsers/libmwparser/src/mwlinks.c |
— | — | @@ -1,10 +1,16 @@ |
2 | | -#include <antlr3defs.h> |
| 2 | +#include <antlr3.h> |
3 | 3 | #include <mwparsercontext.h> |
4 | 4 | #include <mwlinks.h> |
5 | 5 | |
6 | 6 | static void beginInternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkTitle); |
7 | 7 | static void endInternalLink(MWPARSERCONTEXT *context); |
8 | 8 | static void onInternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkTitle); |
| 9 | +static void beginExternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkUrl); |
| 10 | +static void endExternalLink(MWPARSERCONTEXT *context); |
| 11 | +static void onExternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkUrl); |
| 12 | +static void beginMediaLink(MWPARSERCONTEXT *context, pANTLR3_VECTOR attr); |
| 13 | +static void endMediaLink(MWPARSERCONTEXT *context); |
| 14 | +static void onMediaLink(MWPARSERCONTEXT *context, pANTLR3_VECTOR attr); |
9 | 15 | |
10 | 16 | static void |
11 | 17 | beginInternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkTitle) |
— | — | @@ -32,11 +38,74 @@ |
33 | 39 | l->onInternalLink(l, linkTitle); |
34 | 40 | } |
35 | 41 | |
| 42 | +static void |
| 43 | +beginExternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkUrl) |
| 44 | +{ |
| 45 | + MW_DELAYED_CALL( context, beginExternalLink, endExternalLink, linkUrl, NULL); |
| 46 | + MW_BEGIN_ORDERED_FORMAT(context, beginExternalLink, endExternalLink, linkUrl, NULL, false); |
| 47 | + MWLISTENER *l = &context->listener; |
| 48 | + l->beginExternalLink(l, linkUrl); |
| 49 | +} |
36 | 50 | |
| 51 | +static void |
| 52 | +endExternalLink(MWPARSERCONTEXT *context) |
| 53 | +{ |
| 54 | + MW_SKIP_IF_EMPTY( context, beginExternalLink, endExternalLink, NULL); |
| 55 | + MW_END_ORDERED_FORMAT(context, beginExternalLink, endExternalLink, NULL); |
| 56 | + MWLISTENER *l = &context->listener; |
| 57 | + l->endExternalLink(l); |
| 58 | +} |
| 59 | + |
| 60 | +static void |
| 61 | +onExternalLink(MWPARSERCONTEXT *context, pANTLR3_STRING linkUrl) |
| 62 | +{ |
| 63 | + MW_TRIGGER_DELAYED_CALLS(context); |
| 64 | + MWLISTENER *l = &context->listener; |
| 65 | + l->onExternalLink(l, linkUrl); |
| 66 | +} |
| 67 | + |
| 68 | +static void |
| 69 | +beginMediaLink(MWPARSERCONTEXT *context, pANTLR3_VECTOR attr) |
| 70 | +{ |
| 71 | + MW_DELAYED_CALL( context, beginMediaLink, endMediaLink, attr, NULL); |
| 72 | + MW_BEGIN_ORDERED_FORMAT(context, beginMediaLink, endMediaLink, attr, NULL, false); |
| 73 | + |
| 74 | + pANTLR3_STRING linkUrl = attr->get(attr, attr->count - 1); |
| 75 | + attr->remove(attr, attr->count - 1); |
| 76 | + MWLISTENER *l = &context->listener; |
| 77 | + l->beginMediaLink(l, linkUrl, attr); |
| 78 | +} |
| 79 | + |
| 80 | +static void |
| 81 | +endMediaLink(MWPARSERCONTEXT *context) |
| 82 | +{ |
| 83 | + MW_SKIP_IF_EMPTY( context, beginMediaLink, endMediaLink, NULL); |
| 84 | + MW_END_ORDERED_FORMAT(context, beginMediaLink, endMediaLink, NULL); |
| 85 | + MWLISTENER *l = &context->listener; |
| 86 | + l->endMediaLink(l); |
| 87 | +} |
| 88 | + |
| 89 | +static void |
| 90 | +onMediaLink(MWPARSERCONTEXT *context, pANTLR3_VECTOR attr) |
| 91 | +{ |
| 92 | + MW_TRIGGER_DELAYED_CALLS(context); |
| 93 | + pANTLR3_STRING linkUrl = attr->get(attr, attr->count - 1); |
| 94 | + attr->remove(attr, attr->count - 1); |
| 95 | + MWLISTENER *l = &context->listener; |
| 96 | + l->onMediaLink(l, linkUrl, attr); |
| 97 | +} |
| 98 | + |
| 99 | + |
37 | 100 | void |
38 | 101 | mwLinksInit(MWPARSERCONTEXT *context) |
39 | 102 | { |
40 | 103 | context->beginInternalLink = beginInternalLink; |
41 | 104 | context->endInternalLink = endInternalLink; |
42 | 105 | context->onInternalLink = onInternalLink; |
| 106 | + context->beginExternalLink = beginExternalLink; |
| 107 | + context->endExternalLink = endExternalLink; |
| 108 | + context->onExternalLink = onExternalLink; |
| 109 | + context->beginMediaLink = beginMediaLink; |
| 110 | + context->endMediaLink = endMediaLink; |
| 111 | + context->onMediaLink = onMediaLink; |
43 | 112 | } |
Index: trunk/parsers/libmwparser/src/mwLexer.g |
— | — | @@ -1,3 +1,22 @@ |
| 2 | +/* |
| 3 | + * Copyright 2010 Andreas Jonsson |
| 4 | + * |
| 5 | + * This file is part of libmwparser. |
| 6 | + * |
| 7 | + * Libmwparser is free software: you can redistribute it and/or modify |
| 8 | + * it under the terms of the GNU General Public License as published by |
| 9 | + * the Free Software Foundation, either version 3 of the License, or |
| 10 | + * (at your option) any later version. |
| 11 | + * |
| 12 | + * This program is distributed in the hope that it will be useful, |
| 13 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | + * GNU General Public License for more details. |
| 16 | + * |
| 17 | + * You should have received a copy of the GNU General Public License |
| 18 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | + */ |
| 20 | + |
2 | 21 | lexer grammar mwLexer; |
3 | 22 | |
4 | 23 | /* |
— | — | @@ -9,12 +28,13 @@ |
10 | 29 | } |
11 | 30 | |
12 | 31 | tokens { |
13 | | - BEGIN_EXTERNAL_LINK; |
14 | 32 | EXTERNAL_LINK; |
15 | 33 | TABLE_HEADING; |
16 | 34 | TABLE_HEADING_INLINE; |
17 | 35 | TABLE_CAPTION; |
18 | 36 | BEGIN_INTERNAL_LINK; |
| 37 | + BEGIN_EXTERNAL_LINK; |
| 38 | + BEGIN_MEDIA_LINK; |
19 | 39 | HORIZONTAL_RULE; |
20 | 40 | NOWIKI; |
21 | 41 | BEGIN_HEADING; |
— | — | @@ -145,6 +165,11 @@ |
146 | 166 | #define MW_SETTYPE(type) do { _type = type; } while (0) |
147 | 167 | #define MW_EMITNEW(type, text) do { EMITNEW(NEW_TOK(type, text)); } while (0) |
148 | 168 | #define MW_HIDE() do { LEXSTATE->channel = HIDDEN; } while (0) |
| 169 | +#define D_(msg) (fputs(msg, stderr), fputc('\n', stderr), printLexerInfo(LEXER), true) |
| 170 | +#define NEW_TOK(type, text) (newToken(LEXSTATE->tokFactory, type, text)) |
| 171 | +#define SUBSTR1(start) (INPUT->substr(INPUT, start, GETCHARINDEX() - 1)) |
| 172 | +#define SUBSTR2(start, end) (INPUT->substr(INPUT, start, end)) |
| 173 | +#define HEADING_LEVEL USER1 |
149 | 174 | |
150 | 175 | static pANTLR3_COMMON_TOKEN |
151 | 176 | newToken(pANTLR3_TOKEN_FACTORY factory, ANTLR3_UINT32 type, pANTLR3_STRING text) |
— | — | @@ -174,6 +199,12 @@ |
175 | 200 | speculationFailure(context, sizeof(failures)/sizeof(MWLEXERSPECULATION*), failures); \ |
176 | 201 | } while (0) |
177 | 202 | |
| 203 | + |
| 204 | +/** |
| 205 | + * Initiate a speculative execution. |
| 206 | + * @param context |
| 207 | + * @param speculation Storage space for the context backup. |
| 208 | + */ |
178 | 209 | static void |
179 | 210 | speculationInitiate(MWLEXERCONTEXT *context, MWLEXERSPECULATION *speculation) |
180 | 211 | { |
— | — | @@ -186,18 +217,32 @@ |
187 | 218 | speculation->istreamIndex = context->istreamIndex++; |
188 | 219 | } |
189 | 220 | |
| 221 | +/** |
| 222 | + * Indictate that a speculative execution has succeeded. |
| 223 | + */ |
190 | 224 | static void |
191 | 225 | speculationSuccess(MWLEXERCONTEXT *context, MWLEXERSPECULATION *speculation) |
192 | 226 | { |
193 | 227 | speculation->active = false; |
194 | 228 | } |
195 | 229 | |
| 230 | +/** |
| 231 | + * Abort speculative execution, without restoring the context. |
| 232 | + */ |
196 | 233 | static void |
197 | 234 | speculationAbort(MWLEXERCONTEXT *context, MWLEXERSPECULATION *speculation) |
198 | 235 | { |
199 | 236 | speculation->active = false; |
200 | 237 | } |
201 | 238 | |
| 239 | +/** |
| 240 | + * Indicate that one or several speculative executions has failed and |
| 241 | + * restore the context to the initiation point of the "oldest" |
| 242 | + * speculation. |
| 243 | + * @param context |
| 244 | + * @param n Number of speculations in the array. |
| 245 | + * @param speculation Array of speculation backup storage structures. |
| 246 | + */ |
202 | 247 | static void |
203 | 248 | speculationFailure(MWLEXERCONTEXT *context, int n, MWLEXERSPECULATION *speculation[]) |
204 | 249 | { |
— | — | @@ -221,11 +266,19 @@ |
222 | 267 | } |
223 | 268 | } |
224 | 269 | |
| 270 | +/** |
| 271 | + * Check if a particular speculation has already been tried at the |
| 272 | + * current character index. |
| 273 | + * @return {\code true} if the speculation already has been tried and failed. |
| 274 | + */ |
225 | 275 | static bool |
226 | 276 | alreadyTried(MWLEXERCONTEXT *context, MWLEXERSPECULATION *speculation) { |
227 | 277 | return speculation->failurePoint == context->lexer->getCharIndex(context->lexer); |
228 | 278 | } |
229 | 279 | |
| 280 | +/** |
| 281 | + * Action to execute at the end of file. |
| 282 | + */ |
230 | 283 | static void |
231 | 284 | eofAction(void *param) |
232 | 285 | { |
— | — | @@ -239,18 +292,6 @@ |
240 | 293 | |
241 | 294 | |
242 | 295 | |
243 | | -#define ACTIVATE_EOF_ACTION do { \ |
244 | | - LEXER->eofAction = eofAction; \ |
245 | | - LEXER->eofActionParameter = CX; \ |
246 | | -} while (0) |
247 | | - |
248 | | -#define D_(msg) (fputs(msg, stderr), fputc('\n', stderr), printLexerInfo(LEXER), true) |
249 | | - |
250 | | -#define NEW_TOK(type, text) (newToken(LEXSTATE->tokFactory, type, text)) |
251 | | -#define SUBSTR1(start) (INPUT->substr(INPUT, start, GETCHARINDEX() - 1)) |
252 | | -#define SUBSTR2(start, end) (INPUT->substr(INPUT, start, end)) |
253 | | - |
254 | | -#define HEADING_LEVEL USER1 |
255 | 296 | } |
256 | 297 | |
257 | 298 | NOWIKI |
— | — | @@ -451,6 +492,7 @@ |
452 | 493 | } |
453 | 494 | '[[' |
454 | 495 | { |
| 496 | + SPECULATION_FAILURE(CX, &CX->externalLinkSpeculation); |
455 | 497 | mark = MARK(); |
456 | 498 | } |
457 | 499 | ( |
— | — | @@ -468,7 +510,10 @@ |
469 | 511 | ) |
470 | 512 | ) |
471 | 513 | { |
472 | | - if (!fail && isCompleteLink && CX->isLegalTitle(CX, linkTitle)) { |
| 514 | + if (!fail && CX->isMediaLinkTitle(CX, linkTitle)) { |
| 515 | + MW_EMIT(); |
| 516 | + SPECULATION_FAILURE(CX, &CX->internalLinkSpeculation); |
| 517 | + } else if (!fail && isCompleteLink && CX->isLegalTitle(CX, linkTitle)) { |
473 | 518 | ACTION(CUSTOM = linkTitle;) |
474 | 519 | speculationAbort(CX, &CX->internalLinkSpeculation); |
475 | 520 | } else if (!fail && CX->isLegalTitle(CX, linkTitle)) { |
— | — | @@ -496,6 +541,68 @@ |
497 | 542 | } |
498 | 543 | ; |
499 | 544 | |
| 545 | +MEDIA_LINK |
| 546 | +@init{ |
| 547 | + ANTLR3_MARKER mark; |
| 548 | + pANTLR3_STRING linkTitle; |
| 549 | + bool isCompleteLink = false; |
| 550 | + bool isLegalTitle = false; |
| 551 | + bool fail = false; |
| 552 | + pANTLR3_VECTOR attr = NULL; |
| 553 | +}: {!CX->mediaLinkOpenDisabled && !alreadyTried(CX, &CX->mediaLinkSpeculation)}?=> |
| 554 | + ( |
| 555 | + { |
| 556 | + speculationInitiate(CX, &CX->mediaLinkSpeculation); |
| 557 | + } |
| 558 | + '[[' |
| 559 | + { |
| 560 | + mark = MARK(); |
| 561 | + } |
| 562 | + ( |
| 563 | + SPACE_TAB_CHAR* |
| 564 | + ( |
| 565 | + INTERNAL_LINK_TITLE[&linkTitle] |
| 566 | + SPACE_TAB_CHAR* |
| 567 | + ( |
| 568 | + ']]' {isCompleteLink=true;} |
| 569 | + | '|' MEDIA_LINK_ATTRIBUTES[&attr] |
| 570 | + | {fail = true;} |
| 571 | + ) |
| 572 | + ) |
| 573 | + | {fail = true;} |
| 574 | + ) |
| 575 | + ) |
| 576 | + { |
| 577 | + if (!fail && CX->isMediaLinkTitle(CX, linkTitle)) { |
| 578 | + if (attr == NULL) { |
| 579 | + attr = CX->vectorFactory->newVector(CX->vectorFactory); |
| 580 | + } |
| 581 | + /* |
| 582 | + * We'll pack the link title in the attribute vector. |
| 583 | + * The parser will unpack it and send it as a separate |
| 584 | + * parameter to the client. |
| 585 | + */ |
| 586 | + attr->add(attr, linkTitle, NULL); |
| 587 | + ACTION(CUSTOM = attr;) |
| 588 | + if (isCompleteLink) { |
| 589 | + speculationAbort(CX, &CX->mediaLinkSpeculation); |
| 590 | + } else { |
| 591 | + onMediaLinkOpen(CX); |
| 592 | + MW_SETTYPE(BEGIN_MEDIA_LINK); |
| 593 | + } |
| 594 | + } else { |
| 595 | + speculationAbort(CX, &CX->mediaLinkSpeculation); |
| 596 | + REWIND(mark); |
| 597 | + MW_SETTYPE(SPECIAL); |
| 598 | + } |
| 599 | + } |
| 600 | + ; |
| 601 | + |
| 602 | +fragment |
| 603 | +MEDIA_LINK_ATTRIBUTES[pANTLR3_VECTOR *attr]: |
| 604 | + (MEDIA_LINK_ATTRIBUTE[&attr])* |
| 605 | + ; |
| 606 | + |
500 | 607 | END_INTERNAL_LINK: {!CX->internalLinkCloseDisabled}?=> ']]' |
501 | 608 | { |
502 | 609 | speculationSuccess(CX, &CX->internalLinkSpeculation); |
— | — | @@ -503,13 +610,80 @@ |
504 | 611 | } |
505 | 612 | ; |
506 | 613 | |
507 | | -/* |
| 614 | +END_MEDIA_LINK: {!CX->mediaLinkCloseDisabled}?=> ']]' |
| 615 | + { |
| 616 | + speculationSuccess(CX, &CX->mediaLinkSpeculation); |
| 617 | + onMediaLinkClose(CX); |
| 618 | + } |
| 619 | + ; |
| 620 | + |
| 621 | +EXTERNAL_LINK |
| 622 | +@init{ |
| 623 | + bool success = true; |
| 624 | + bool complete = true; |
| 625 | + ANTLR3_MARKER urlStart; |
| 626 | + ANTLR3_MARKER urlEnd; |
| 627 | +}: {!CX->externalLinkOpenDisabled && !alreadyTried(CX, &CX->externalLinkSpeculation)}?=> |
| 628 | + { |
| 629 | + speculationInitiate(CX, &CX->externalLinkSpeculation); |
| 630 | + } |
| 631 | + ('[' ({urlStart = GETCHARINDEX();} URL_PROTOCOL |
| 632 | + (( {urlEnd = GETCHARINDEX();} URL_CHAR)+ SPACE_TAB_CHAR* (']' | {complete = false;}) |
| 633 | + | {success = false;}) |
| 634 | + | {success = false;}) ) |
| 635 | + { |
| 636 | + if (success) { |
| 637 | + ACTION(CUSTOM = SUBSTR2(urlStart, urlEnd);) |
| 638 | + if (!complete) { |
| 639 | + MW_SETTYPE(BEGIN_EXTERNAL_LINK); |
| 640 | + onExternalLinkOpen(CX); |
| 641 | + } else { |
| 642 | + speculationAbort(CX, &CX->externalLinkSpeculation); |
| 643 | + } |
| 644 | + } else { |
| 645 | + speculationAbort(CX, &CX->externalLinkSpeculation); |
| 646 | + MW_SETTYPE(SPECIAL); |
| 647 | + } |
| 648 | + } |
| 649 | + ; |
| 650 | + |
| 651 | +END_EXTERNAL_LINK: {!CX->externalLinkCloseDisabled}?=> ']' |
| 652 | + { |
| 653 | + speculationSuccess(CX, &CX->externalLinkSpeculation); |
| 654 | + onExternalLinkClose(CX); |
| 655 | + } |
| 656 | + ; |
| 657 | + |
| 658 | +EXTERNAL_LINK_FAIL_CONDITION: {CX->externalLinkSpeculation.active}?=> |
| 659 | + '[' |
| 660 | + { |
| 661 | + /* |
| 662 | + * We must actually emit this token before failing the |
| 663 | + * speculation, otherwise it will be emitted _after_ |
| 664 | + * the token stream has been reverted. |
| 665 | + */ |
| 666 | + MW_EMIT(); |
| 667 | + SPECULATION_FAILURE(CX, &CX->externalLinkSpeculation); |
| 668 | + } |
| 669 | + ; |
| 670 | + |
| 671 | + |
508 | 672 | fragment |
509 | | -EXTERNAL_LINK_TITLE: |
510 | | -//'/\[(\b(' . wfUrlProtocols() . ')'. |
511 | | -// '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S' |
512 | | - LETTER '://' |
513 | | -*/ |
| 673 | +URL_PROTOCOL: |
| 674 | + 'http://' | |
| 675 | + 'https://' | |
| 676 | + 'ftp://' | |
| 677 | + 'irc://' | |
| 678 | + 'gopher://' | |
| 679 | + 'telnet://' | |
| 680 | + 'nntp://' | // @bug 3808 RFC 1738 |
| 681 | + 'worldwind://'| |
| 682 | + 'mailto:' | |
| 683 | + 'news:' | |
| 684 | + 'svn://' | |
| 685 | + 'git://' | |
| 686 | + 'mms://' |
| 687 | + ; |
514 | 688 | |
515 | 689 | fragment |
516 | 690 | INTERNAL_LINK_TITLE[pANTLR3_STRING *linkTitle] |
— | — | @@ -564,7 +738,7 @@ |
565 | 739 | ('\r\n' | NEWLINE_CHAR) { |
566 | 740 | onEol(CX); |
567 | 741 | speculationSuccess(CX, &CX->indentSpeculation); |
568 | | - SPECULATION_FAILURE(CX, &CX->headingSpeculation); |
| 742 | + SPECULATION_FAILURE(CX, &CX->headingSpeculation, &CX->externalLinkSpeculation); |
569 | 743 | } |
570 | 744 | ; |
571 | 745 | |
— | — | @@ -968,7 +1142,9 @@ |
969 | 1143 | ')'|'*'|'+'|','|'-'|'.'|'/'|':'| |
970 | 1144 | ';'|'<'|'='|'>'|'?'|'@'|'['|'\\'| |
971 | 1145 | ']'|'^'|'_'|'`'|'{'|'|'|'}'|'~'; |
| 1146 | +fragment URL_CHAR: ~('<'|'>'|'['|']'|'\u0000' .. '\u0020'|'\u007F'); |
972 | 1147 | |
| 1148 | + |
973 | 1149 | /* This should map the latin-1 range 0x80-0xff to the corresponding unicode codepoints: */ |
974 | 1150 | fragment LEGAL_TITLE_CHAR_RANGE: 'a' |
975 | 1151 | ; |
Index: trunk/parsers/libmwparser/src/mwlexercontext.c |
— | — | @@ -1,6 +1,27 @@ |
| 2 | +/* |
| 3 | + * Copyright 2010 Andreas Jonsson |
| 4 | + * |
| 5 | + * This file is part of libmwparser. |
| 6 | + * |
| 7 | + * Libmwparser is free software: you can redistribute it and/or modify |
| 8 | + * it under the terms of the GNU General Public License as published by |
| 9 | + * the Free Software Foundation, either version 3 of the License, or |
| 10 | + * (at your option) any later version. |
| 11 | + * |
| 12 | + * This program is distributed in the hope that it will be useful, |
| 13 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | + * GNU General Public License for more details. |
| 16 | + * |
| 17 | + * You should have received a copy of the GNU General Public License |
| 18 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | + */ |
| 20 | + |
2 | 21 | #include <antlr3.h> |
3 | 22 | #include <mwlexercontext.h> |
4 | 23 | #include <assert.h> |
| 24 | +#include <iconv.h> |
| 25 | +#include <errno.h> |
5 | 26 | |
6 | 27 | #include <mwLexer.h> |
7 | 28 | #include "mwlexerpredicates.h" |
— | — | @@ -22,8 +43,12 @@ |
23 | 44 | static bool MWLexerContextReset(MWLEXERCONTEXT *context); |
24 | 45 | |
25 | 46 | static bool isLegalTitle(MWLEXERCONTEXT *context, pANTLR3_STRING linkTitle); |
26 | | -static bool isLegalExternalLink(MWLEXERCONTEXT *context, pANTLR3_STRING url); |
| 47 | +static bool isMediaLinkTitle(MWLEXERCONTEXT *context, pANTLR3_STRING url); |
27 | 48 | |
| 49 | +static int openConversion(MWLEXERCONTEXT *context, ANTLR3_UINT8 encoding); |
| 50 | +static const wchar_t *mwAntlr3stows(MWLEXERCONTEXT *context, pANTLR3_STRING string, void **state); |
| 51 | +static void mwFreeStringConversionState(void *state); |
| 52 | + |
28 | 53 | /** |
29 | 54 | * Set the characters allowed in a page title. |
30 | 55 | * |
— | — | @@ -32,9 +57,6 @@ |
33 | 58 | */ |
34 | 59 | static int setLegalTitleChars(MWLEXERCONTEXT *context, const wchar_t *posixExtendedRegexp); |
35 | 60 | |
36 | | - |
37 | | - |
38 | | - |
39 | 61 | MWLEXERCONTEXT *MWLexerContextNew(pANTLR3_LEXER lexer) |
40 | 62 | { |
41 | 63 | MWLEXERCONTEXT *context = ANTLR3_MALLOC(sizeof(*context)); |
— | — | @@ -51,7 +73,7 @@ |
52 | 74 | * specially. |
53 | 75 | */ |
54 | 76 | int err = regwcomp(&context->legalTitleChars, |
55 | | - L"^[- %!\"$&'()*,.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+]+$", |
| 77 | + L"^[- %!\"$&'()*,./0-9:;=?@A-Z\\\\^_`a-z~\x80-\xFF+]+$", |
56 | 78 | REG_EXTENDED); |
57 | 79 | if (err) { |
58 | 80 | char errbuf[200]; |
— | — | @@ -61,6 +83,18 @@ |
62 | 84 | return NULL; |
63 | 85 | } |
64 | 86 | |
| 87 | + err = regwcomp(&context->mediaLinkTitle, |
| 88 | + L"^File:[- %!\"$&'()*,./0-9:;=?@A-Z\\\\^_`a-z~\x80-\xFF+]+$", |
| 89 | + REG_EXTENDED); |
| 90 | + if (err) { |
| 91 | + char errbuf[200]; |
| 92 | + regerror(err, &context->mediaLinkTitle, errbuf, 200); |
| 93 | + fprintf(stderr, "Failed to compile media link title regular expression: %s\n", errbuf); |
| 94 | + context->free(context); |
| 95 | + return NULL; |
| 96 | + } |
| 97 | + |
| 98 | + |
65 | 99 | #define NULL_FAIL(p) do { \ |
66 | 100 | if (p == NULL) { \ |
67 | 101 | context->free(context); \ |
— | — | @@ -77,6 +111,8 @@ |
78 | 112 | context->headingSpeculation.contextBackup.blockContextStack = NULL; |
79 | 113 | context->mediaLinkSpeculation.contextBackup.blockContextStack = NULL; |
80 | 114 | |
| 115 | + context->conversionState = (iconv_t)-1; |
| 116 | + |
81 | 117 | context->vectorFactory = antlr3VectorFactoryNew(ANTLR3_SIZE_HINT); |
82 | 118 | NULL_FAIL(context->vectorFactory); |
83 | 119 | |
— | — | @@ -95,13 +131,18 @@ |
96 | 132 | NULL_FAIL(context->mediaLinkSpeculation.contextBackup.blockContextStack); |
97 | 133 | |
98 | 134 | context->isLegalTitle = isLegalTitle; |
99 | | - context->isLegalExternalLink = isLegalExternalLink; |
| 135 | + context->isMediaLinkTitle = isMediaLinkTitle; |
100 | 136 | |
101 | 137 | if (!context->reset(context)) { |
102 | 138 | context->free(context); |
103 | 139 | return NULL; |
104 | 140 | } |
105 | 141 | |
| 142 | + if (openConversion(context, context->lexer->input->encoding) < 0) { |
| 143 | + context->free(context); |
| 144 | + return NULL; |
| 145 | + } |
| 146 | + |
106 | 147 | return context; |
107 | 148 | } |
108 | 149 | |
— | — | @@ -186,8 +227,12 @@ |
187 | 228 | context->mediaLinkSpeculation.contextBackup.blockContextStack |
188 | 229 | ->free(context->mediaLinkSpeculation.contextBackup.blockContextStack); |
189 | 230 | } |
| 231 | + if (context->conversionState != (iconv_t)-1) { |
| 232 | + iconv_close(context->conversionState); |
| 233 | + } |
190 | 234 | |
191 | 235 | regfree(&context->legalTitleChars); |
| 236 | + regfree(&context->mediaLinkTitle); |
192 | 237 | ANTLR3_FREE(lexerContext); |
193 | 238 | } |
194 | 239 | |
— | — | @@ -195,20 +240,22 @@ |
196 | 241 | isLegalTitle(MWLEXERCONTEXT *context, pANTLR3_STRING linkTitle) |
197 | 242 | { |
198 | 243 | void *state; |
199 | | - const wchar_t *wsLinkTitle = mwAntlr3stows(linkTitle, &state); |
| 244 | + const wchar_t *wsLinkTitle = mwAntlr3stows(context, linkTitle, &state); |
200 | 245 | regmatch_t match; |
201 | 246 | int err = regwexec(&context->legalTitleChars, wsLinkTitle, 1, &match, 0); |
202 | 247 | mwFreeStringConversionState(state); |
203 | | - char buf[256]; |
204 | | - regerror(err, &context->legalTitleChars, buf, 256); |
205 | | - //printf("result was: %d, message: %s, string: '%ls'\n", err, buf, linkTitle->chars); |
206 | | - return true; |
| 248 | + return err == 0; |
207 | 249 | } |
208 | 250 | |
209 | 251 | static bool |
210 | | -isLegalExternalLink(MWLEXERCONTEXT *context, pANTLR3_STRING linkTitle) |
| 252 | +isMediaLinkTitle(MWLEXERCONTEXT *context, pANTLR3_STRING linkTitle) |
211 | 253 | { |
212 | | - return true; |
| 254 | + void *state; |
| 255 | + const wchar_t *wsLinkTitle = mwAntlr3stows(context, linkTitle, &state); |
| 256 | + regmatch_t match; |
| 257 | + int err = regwexec(&context->mediaLinkTitle, wsLinkTitle, 1, &match, 0); |
| 258 | + mwFreeStringConversionState(state); |
| 259 | + return err == 0; |
213 | 260 | } |
214 | 261 | |
215 | 262 | void printLexerInfo(pANTLR3_LEXER lexer) |
— | — | @@ -223,3 +270,92 @@ |
224 | 271 | lexer->getCharIndex(lexer)); |
225 | 272 | } |
226 | 273 | |
| 274 | + |
| 275 | +static int |
| 276 | +openConversion(MWLEXERCONTEXT *context, ANTLR3_UINT8 encoding) |
| 277 | +{ |
| 278 | + static struct { |
| 279 | + ANTLR3_UINT8 antlrEncoding; |
| 280 | + const char* iconvEncoding; |
| 281 | + } encodingTable[] = { |
| 282 | + { ANTLR3_ENC_8BIT, "ASCII" }, |
| 283 | + { ANTLR3_ENC_UTF8, "UTF-8" }, |
| 284 | + { ANTLR3_ENC_UTF16, "UTF-16" }, |
| 285 | + { ANTLR3_ENC_UTF16BE, "UTF-16BE" }, |
| 286 | + { ANTLR3_ENC_UTF16LE, "UTF-16LE" }, |
| 287 | + { ANTLR3_ENC_UTF32, "UTF-32" }, |
| 288 | + { ANTLR3_ENC_UTF32BE, "UTF-32BE" }, |
| 289 | + { ANTLR3_ENC_UTF32LE, "UTF-32LE" }, |
| 290 | + { ANTLR3_ENC_EBCDIC, "EBCDIC-INT" }, |
| 291 | + { 0 , NULL } |
| 292 | + }; |
| 293 | + |
| 294 | + int i; |
| 295 | + for (i = 0; encodingTable[i].iconvEncoding != NULL; i++) { |
| 296 | + if (encodingTable[i].antlrEncoding == encoding) { |
| 297 | + break; |
| 298 | + } |
| 299 | + } |
| 300 | + if (encodingTable[i].iconvEncoding == NULL) { |
| 301 | + errno = EINVAL; |
| 302 | + return -1; |
| 303 | + } |
| 304 | +#if (SIZEOF_WCHAR_T == 4) |
| 305 | +#ifdef WORDS_BIGENDIAN |
| 306 | + context->conversionState = iconv_open("UTF-32BE", encodingTable[i].iconvEncoding); |
| 307 | +#else |
| 308 | + context->conversionState = iconv_open("UTF-32LE", encodingTable[i].iconvEncoding); |
| 309 | +#endif |
| 310 | +#elif (SIZEOF_WCHAR_T == 2) |
| 311 | +#ifdef WORDS_BIGENDIAN |
| 312 | + context->conversionState = iconv_open("UTF-16BE", encodingTable[i].iconvEncoding); |
| 313 | +#else |
| 314 | + context->conversionState = iconv_open("UTF-16LE", encodingTable[i].iconvEncoding); |
| 315 | +#endif |
| 316 | +#else |
| 317 | +#error Unsupported size of wchar_t! |
| 318 | +#endif |
| 319 | + if (context->conversionState == (iconv_t)-1) { |
| 320 | + return -1; |
| 321 | + } |
| 322 | +} |
| 323 | + |
| 324 | +static size_t |
| 325 | +convertString(MWLEXERCONTEXT *context, ANTLR3_STRING *string, void *buf, size_t bufSize) { |
| 326 | + size_t outBytesLeft = bufSize; |
| 327 | + size_t inBytesLeft = string->size; |
| 328 | + char *inBuf = string->chars; |
| 329 | + char *outBuf = buf; |
| 330 | + |
| 331 | + size_t ret = iconv(context->conversionState, NULL, NULL, NULL, NULL); |
| 332 | + |
| 333 | + ret = iconv(context->conversionState, &inBuf, &inBytesLeft, &outBuf, &outBytesLeft); |
| 334 | + |
| 335 | + return ret; |
| 336 | +} |
| 337 | + |
| 338 | + |
| 339 | +static const wchar_t * |
| 340 | +mwAntlr3stows(MWLEXERCONTEXT *context, pANTLR3_STRING string, void **state) |
| 341 | +{ |
| 342 | + size_t bufSize = (string->len + 1) * sizeof(wchar_t); |
| 343 | + wchar_t *buf = ANTLR3_MALLOC(bufSize); |
| 344 | + |
| 345 | + size_t ret = convertString(context, string, buf, bufSize); |
| 346 | + |
| 347 | + if (ret == (size_t)-1) { |
| 348 | + ANTLR3_FREE(buf); |
| 349 | + perror(NULL); |
| 350 | + return NULL; |
| 351 | + } |
| 352 | + |
| 353 | + *state = buf; |
| 354 | + |
| 355 | + return buf; |
| 356 | +} |
| 357 | + |
| 358 | +static void |
| 359 | +mwFreeStringConversionState(void *state) |
| 360 | +{ |
| 361 | + ANTLR3_FREE(state); |
| 362 | +} |
Index: trunk/parsers/libmwparser/src/tracingcontext.c |
— | — | @@ -83,6 +83,12 @@ |
84 | 84 | static void TCBeginInternalLink(MWLISTENER *listener, pANTLR3_STRING linkTitle); |
85 | 85 | static void TCEndInternalLink(MWLISTENER *listener); |
86 | 86 | static void TCOnInternalLink(MWLISTENER *listener, pANTLR3_STRING linkTitle); |
| 87 | +static void TCBeginExternalLink(MWLISTENER *listener, pANTLR3_STRING linkUrl); |
| 88 | +static void TCEndExternalLink(MWLISTENER *listener); |
| 89 | +static void TCOnExternalLink(MWLISTENER *listener, pANTLR3_STRING linkUrl); |
| 90 | +static void TCBeginMediaLink(MWLISTENER *listener, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr); |
| 91 | +static void TCEndMediaLink(MWLISTENER *listener); |
| 92 | +static void TCOnMediaLink(MWLISTENER *listener, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr); |
87 | 93 | static void TCBeginHtmlU(MWLISTENER *listener, pANTLR3_VECTOR attributes); |
88 | 94 | static void TCEndHtmlU(MWLISTENER *listener); |
89 | 95 | static void TCBeginHtmlDel(MWLISTENER *listener, pANTLR3_VECTOR attributes); |
— | — | @@ -196,6 +202,12 @@ |
197 | 203 | listener->beginInternalLink = TCBeginInternalLink; |
198 | 204 | listener->endInternalLink = TCEndInternalLink; |
199 | 205 | listener->onInternalLink = TCOnInternalLink; |
| 206 | + listener->beginExternalLink = TCBeginExternalLink; |
| 207 | + listener->endExternalLink = TCEndExternalLink; |
| 208 | + listener->onExternalLink = TCOnExternalLink; |
| 209 | + listener->beginMediaLink = TCBeginMediaLink; |
| 210 | + listener->endMediaLink = TCEndMediaLink; |
| 211 | + listener->onMediaLink = TCOnMediaLink; |
200 | 212 | listener->beginBulletList = TCBeginBulletList; |
201 | 213 | listener->endBulletList = TCEndBulletList; |
202 | 214 | listener->beginBulletListItem = TCBeginBulletListItem; |
— | — | @@ -452,6 +464,56 @@ |
453 | 465 | } |
454 | 466 | |
455 | 467 | static void |
| 468 | +TCBeginExternalLink(MWLISTENER *listener, pANTLR3_STRING linkUrl) |
| 469 | +{ |
| 470 | + TCPrintIndent(listener); |
| 471 | + printf("BEGIN EXTERNAL LINK[%s]\n", linkUrl->chars); |
| 472 | + TCIncreaseIndent(listener); |
| 473 | +} |
| 474 | + |
| 475 | +static void |
| 476 | +TCEndExternalLink(MWLISTENER *listener) |
| 477 | +{ |
| 478 | + TCDecreaseIndent(listener); |
| 479 | + TCPrintIndent(listener); |
| 480 | + printf("END EXTERNAL LINK\n"); |
| 481 | +} |
| 482 | + |
| 483 | +static void |
| 484 | +TCOnExternalLink(MWLISTENER *listener, pANTLR3_STRING linkUrl) |
| 485 | +{ |
| 486 | + TCPrintIndent(listener); |
| 487 | + printf("EXTERNAL LINK[%s]\n", linkUrl->chars); |
| 488 | +} |
| 489 | + |
| 490 | +static void |
| 491 | +TCBeginMediaLink(MWLISTENER *listener, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr) |
| 492 | +{ |
| 493 | + TCPrintIndent(listener); |
| 494 | + printf("BEGIN MEDIA LINK[%s]", linkUrl->chars); |
| 495 | + TCPrintAttributes(attr); |
| 496 | + printf("\n"); |
| 497 | + TCIncreaseIndent(listener); |
| 498 | +} |
| 499 | + |
| 500 | +static void |
| 501 | +TCEndMediaLink(MWLISTENER *listener) |
| 502 | +{ |
| 503 | + TCDecreaseIndent(listener); |
| 504 | + TCPrintIndent(listener); |
| 505 | + printf("END MEDIA LINK\n"); |
| 506 | +} |
| 507 | + |
| 508 | +static void |
| 509 | +TCOnMediaLink(MWLISTENER *listener, pANTLR3_STRING linkUrl, pANTLR3_VECTOR attr) |
| 510 | +{ |
| 511 | + TCPrintIndent(listener); |
| 512 | + printf("MEDIA LINK[%s]", linkUrl->chars); |
| 513 | + TCPrintAttributes(attr); |
| 514 | + printf("\n"); |
| 515 | +} |
| 516 | + |
| 517 | +static void |
456 | 518 | TCBeginBulletList(MWLISTENER *listener, pANTLR3_VECTOR attr) |
457 | 519 | { |
458 | 520 | TCPrintIndent(listener); |