Index: trunk/phase3/includes/Parser.php |
— | — | @@ -822,9 +822,6 @@ |
823 | 823 | $fname = 'Parser::internalParse'; |
824 | 824 | wfProfileIn( $fname ); |
825 | 825 | |
826 | | - global $fixLbug ; |
827 | | - if ( $fixLbug ) $text = preg_replace ( '/(l|L)\'/' , '\\1'' , $text ) ; |
828 | | - |
829 | 826 | $text = $this->removeHTMLtags( $text ); |
830 | 827 | $text = $this->replaceVariables( $text, $args ); |
831 | 828 | |
— | — | @@ -836,12 +833,10 @@ |
837 | 834 | $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text ); |
838 | 835 | } |
839 | 836 | $text = $this->doAllQuotes( $text ); |
840 | | - // $text = $this->doExponent( $text ); |
841 | 837 | $text = $this->replaceExternalLinks( $text ); |
842 | 838 | $text = $this->doMagicLinks( $text ); |
843 | 839 | $text = $this->replaceInternalLinks ( $text ); |
844 | 840 | $text = $this->replaceInternalLinks ( $text ); |
845 | | - //$text = $this->doTokenizedParser ( $text ); |
846 | 841 | $text = $this->doTableStuff( $text ); |
847 | 842 | $text = $this->formatHeadings( $text, $isMain ); |
848 | 843 | $sk =& $this->mOptions->getSkin(); |
— | — | @@ -892,57 +887,167 @@ |
893 | 888 | $outtext = ''; |
894 | 889 | $lines = explode( "\n", $text ); |
895 | 890 | foreach ( $lines as $line ) { |
896 | | - $outtext .= $this->doQuotes ( '', $line, '' ) . "\n"; |
| 891 | + $outtext .= $this->doQuotes ( $line ) . "\n"; |
897 | 892 | } |
898 | 893 | $outtext = substr($outtext, 0,-1); |
899 | 894 | wfProfileOut( $fname ); |
900 | 895 | return $outtext; |
901 | 896 | } |
902 | 897 | |
903 | | - /* private */ function doQuotes( $pre, $text, $mode ) { |
904 | | - if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) { |
905 | | - $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>"; |
906 | | - $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>"; |
907 | | - if ( substr ($m[2], 0, 1) == '\'' ) { |
908 | | - $m[2] = substr ($m[2], 1); |
909 | | - if ($mode == 'em') { |
910 | | - return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'emstrong' ); |
911 | | - } else if ($mode == 'strong') { |
912 | | - return $m1_strong . $this->doQuotes ( '', $m[2], '' ); |
913 | | - } else if (($mode == 'emstrong') || ($mode == 'both')) { |
914 | | - return $this->doQuotes ( '', $pre.$m1_strong.$m[2], 'em' ); |
915 | | - } else if ($mode == 'strongem') { |
916 | | - return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( '', $m[2], 'em' ); |
917 | | - } else { |
918 | | - return $m[1] . $this->doQuotes ( '', $m[2], 'strong' ); |
| 898 | + /* private */ function doQuotes( $text ) { |
| 899 | + $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE); |
| 900 | + if (count ($arr) == 1) |
| 901 | + return $text; |
| 902 | + else |
| 903 | + { |
| 904 | + $i = 0; |
| 905 | + foreach ($arr as $r) |
| 906 | + { |
| 907 | + if (($i % 2) == 1) |
| 908 | + { |
| 909 | + # If there are ever four apostrophes, assume the first is supposed to |
| 910 | + # be text, and the remaining three constitute mark-up for bold text. |
| 911 | + if (strlen ($arr[$i]) == 4) |
| 912 | + { |
| 913 | + $arr[$i-1] .= "'"; |
| 914 | + $arr[$i] = "'''"; |
| 915 | + } |
| 916 | + # If there are more than 5 apostrophes in a row, assume they're all |
| 917 | + # text except for the last 5. |
| 918 | + else if (strlen ($arr[$i]) > 5) |
| 919 | + { |
| 920 | + $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5); |
| 921 | + $arr[$i] = "'''''"; |
| 922 | + } |
| 923 | + |
919 | 924 | } |
920 | | - } else { |
921 | | - if ($mode == 'strong') { |
922 | | - return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'strongem' ); |
923 | | - } else if ($mode == 'em') { |
924 | | - return $m1_em . $this->doQuotes ( '', $m[2], '' ); |
925 | | - } else if ($mode == 'emstrong') { |
926 | | - return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( '', $m[2], 'strong' ); |
927 | | - } else if (($mode == 'strongem') || ($mode == 'both')) { |
928 | | - return $this->doQuotes ( '', $pre.$m1_em.$m[2], 'strong' ); |
929 | | - } else { |
930 | | - return $m[1] . $this->doQuotes ( '', $m[2], 'em' ); |
| 925 | + $i++; |
| 926 | + } |
| 927 | + |
| 928 | + # Now see if there's an odd or even number of "bold" and "italic" |
| 929 | + # mark-up. There should normally be an even number of both. |
| 930 | + $i = 0; |
| 931 | + $numbold = 0; |
| 932 | + $numitalics = 0; |
| 933 | + foreach ($arr as $r) |
| 934 | + { |
| 935 | + if (($i % 2) == 1) |
| 936 | + { |
| 937 | + if (strlen ($r) == 2) $numitalics++; else |
| 938 | + if (strlen ($r) == 3) $numbold++; else |
| 939 | + if (strlen ($r) == 5) { $numitalics++; $numbold++; } |
931 | 940 | } |
| 941 | + $i++; |
932 | 942 | } |
933 | | - } else { |
934 | | - $text_strong = ($text == '') ? '' : "<strong>{$text}</strong>"; |
935 | | - $text_em = ($text == '') ? '' : "<em>{$text}</em>"; |
936 | | - if ($mode == '') { |
937 | | - return $pre . $text; |
938 | | - } else if ($mode == 'em') { |
939 | | - return $pre . $text_em; |
940 | | - } else if ($mode == 'strong') { |
941 | | - return $pre . $text_strong; |
942 | | - } else if ($mode == 'strongem') { |
943 | | - return (($pre == '') && ($text == '')) ? '' : "<strong>{$pre}{$text_em}</strong>"; |
944 | | - } else { |
945 | | - return (($pre == '') && ($text == '')) ? '' : "<em>{$pre}{$text_strong}</em>"; |
| 943 | + |
| 944 | + # If there is an odd number of both bold and italics, it is likely |
| 945 | + # that one of the bold ones was meant to be an apostrophe followed |
| 946 | + # by italics. Which one we cannot know for certain, but it is more |
| 947 | + # likely to be one that has a single-letter word before it. |
| 948 | + if (($numbold % 2 == 1) && ($numitalics % 2 == 1)) |
| 949 | + { |
| 950 | + $i = 0; |
| 951 | + $firstsingleletterword = -1; |
| 952 | + $firstmultiletterword = -1; |
| 953 | + $firstspace = -1; |
| 954 | + foreach ($arr as $r) |
| 955 | + { |
| 956 | + if (($i % 2 == 1) and (strlen ($r) == 3)) |
| 957 | + { |
| 958 | + $x1 = substr ($arr[$i-1], -1); |
| 959 | + $x2 = substr ($arr[$i-1], -2, 1); |
| 960 | + if ($x1 == " ") { |
| 961 | + if ($firstspace == -1) $firstspace = $i; |
| 962 | + } else if ($x2 == " ") { |
| 963 | + if ($firstsingleletterword == -1) $firstsingleletterword = $i; |
| 964 | + } else { |
| 965 | + if ($firstmultiletterword == -1) $firstmultiletterword = $i; |
| 966 | + } |
| 967 | + } |
| 968 | + $i++; |
| 969 | + } |
| 970 | + |
| 971 | + # If there is a single-letter word, use it! |
| 972 | + if ($firstsingleletterword > -1) |
| 973 | + { |
| 974 | + $arr [ $firstsingleletterword ] = "''"; |
| 975 | + $arr [ $firstsingleletterword-1 ] .= "'"; |
| 976 | + } |
| 977 | + # If not, but there's a multi-letter word, use that one. |
| 978 | + else if ($firstmultiletterword > -1) |
| 979 | + { |
| 980 | + $arr [ $firstmultiletterword ] = "''"; |
| 981 | + $arr [ $firstmultiletterword-1 ] .= "'"; |
| 982 | + } |
| 983 | + # ... otherwise use the first one that has neither. |
| 984 | + else |
| 985 | + { |
| 986 | + $arr [ $firstspace ] = "''"; |
| 987 | + $arr [ $firstspace-1 ] .= "'"; |
| 988 | + } |
946 | 989 | } |
| 990 | + |
| 991 | + # Now let's actually convert our apostrophic mush to HTML! |
| 992 | + $output = ''; |
| 993 | + $buffer = ''; |
| 994 | + $state = ''; |
| 995 | + $i = 0; |
| 996 | + foreach ($arr as $r) |
| 997 | + { |
| 998 | + if (($i % 2) == 0) |
| 999 | + { |
| 1000 | + if ($state == 'both') |
| 1001 | + $buffer .= $r; |
| 1002 | + else |
| 1003 | + $output .= $r; |
| 1004 | + } |
| 1005 | + else |
| 1006 | + { |
| 1007 | + if (strlen ($r) == 2) |
| 1008 | + { |
| 1009 | + if ($state == 'em') |
| 1010 | + { $output .= "</em>"; $state = ''; } |
| 1011 | + else if ($state == 'strongem') |
| 1012 | + { $output .= "</em>"; $state = 'strong'; } |
| 1013 | + else if ($state == 'emstrong') |
| 1014 | + { $output .= "</strong></em><strong>"; $state = 'strong'; } |
| 1015 | + else if ($state == 'both') |
| 1016 | + { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; } |
| 1017 | + else # $state can be 'strong' or '' |
| 1018 | + { $output .= "<em>"; $state .= 'em'; } |
| 1019 | + } |
| 1020 | + else if (strlen ($r) == 3) |
| 1021 | + { |
| 1022 | + if ($state == 'strong') |
| 1023 | + { $output .= "</strong>"; $state = ''; } |
| 1024 | + else if ($state == 'strongem') |
| 1025 | + { $output .= "</em></strong><em>"; $state = 'em'; } |
| 1026 | + else if ($state == 'emstrong') |
| 1027 | + { $output .= "</strong>"; $state = 'em'; } |
| 1028 | + else if ($state == 'both') |
| 1029 | + { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; } |
| 1030 | + else # $state can be 'em' or '' |
| 1031 | + { $output .= "<strong>"; $state .= 'strong'; } |
| 1032 | + } |
| 1033 | + else if (strlen ($r) == 5) |
| 1034 | + { |
| 1035 | + if ($state == 'strong') |
| 1036 | + { $output .= "</strong><em>"; $state = 'em'; } |
| 1037 | + else if ($state == 'em') |
| 1038 | + { $output .= "</em><strong>"; $state = 'strong'; } |
| 1039 | + else if ($state == 'strongem') |
| 1040 | + { $output .= "</em></strong>"; $state = ''; } |
| 1041 | + else if ($state == 'emstrong') |
| 1042 | + { $output .= "</strong></em>"; $state = ''; } |
| 1043 | + else if ($state == 'both') |
| 1044 | + { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; } |
| 1045 | + else # ($state == '') |
| 1046 | + { $buffer = ''; $state = 'both'; } |
| 1047 | + } |
| 1048 | + } |
| 1049 | + $i++; |
| 1050 | + } |
| 1051 | + return $output; |
947 | 1052 | } |
948 | 1053 | } |
949 | 1054 | |