r4549 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r4548‎ | r4549 | r4550 >
Date:20:47, 6 August 2004
Author:timwi
Status:old
Tags:
Comment:
Somewhat less hacky fix to the French l''''homme''' problem.
Modified paths:
  • /trunk/phase3/includes/Parser.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/Parser.php
@@ -822,9 +822,6 @@
823823 $fname = 'Parser::internalParse';
824824 wfProfileIn( $fname );
825825
826 - global $fixLbug ;
827 - if ( $fixLbug ) $text = preg_replace ( '/(l|L)\'/' , '\\1'' , $text ) ;
828 -
829826 $text = $this->removeHTMLtags( $text );
830827 $text = $this->replaceVariables( $text, $args );
831828
@@ -836,12 +833,10 @@
837834 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
838835 }
839836 $text = $this->doAllQuotes( $text );
840 - // $text = $this->doExponent( $text );
841837 $text = $this->replaceExternalLinks( $text );
842838 $text = $this->doMagicLinks( $text );
843839 $text = $this->replaceInternalLinks ( $text );
844840 $text = $this->replaceInternalLinks ( $text );
845 - //$text = $this->doTokenizedParser ( $text );
846841 $text = $this->doTableStuff( $text );
847842 $text = $this->formatHeadings( $text, $isMain );
848843 $sk =& $this->mOptions->getSkin();
@@ -892,57 +887,167 @@
893888 $outtext = '';
894889 $lines = explode( "\n", $text );
895890 foreach ( $lines as $line ) {
896 - $outtext .= $this->doQuotes ( '', $line, '' ) . "\n";
 891+ $outtext .= $this->doQuotes ( $line ) . "\n";
897892 }
898893 $outtext = substr($outtext, 0,-1);
899894 wfProfileOut( $fname );
900895 return $outtext;
901896 }
902897
903 - /* private */ function doQuotes( $pre, $text, $mode ) {
904 - if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
905 - $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
906 - $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
907 - if ( substr ($m[2], 0, 1) == '\'' ) {
908 - $m[2] = substr ($m[2], 1);
909 - if ($mode == 'em') {
910 - return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'emstrong' );
911 - } else if ($mode == 'strong') {
912 - return $m1_strong . $this->doQuotes ( '', $m[2], '' );
913 - } else if (($mode == 'emstrong') || ($mode == 'both')) {
914 - return $this->doQuotes ( '', $pre.$m1_strong.$m[2], 'em' );
915 - } else if ($mode == 'strongem') {
916 - return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( '', $m[2], 'em' );
917 - } else {
918 - return $m[1] . $this->doQuotes ( '', $m[2], 'strong' );
 898+ /* private */ function doQuotes( $text ) {
 899+ $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 900+ if (count ($arr) == 1)
 901+ return $text;
 902+ else
 903+ {
 904+ $i = 0;
 905+ foreach ($arr as $r)
 906+ {
 907+ if (($i % 2) == 1)
 908+ {
 909+ # If there are ever four apostrophes, assume the first is supposed to
 910+ # be text, and the remaining three constitute mark-up for bold text.
 911+ if (strlen ($arr[$i]) == 4)
 912+ {
 913+ $arr[$i-1] .= "'";
 914+ $arr[$i] = "'''";
 915+ }
 916+ # If there are more than 5 apostrophes in a row, assume they're all
 917+ # text except for the last 5.
 918+ else if (strlen ($arr[$i]) > 5)
 919+ {
 920+ $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
 921+ $arr[$i] = "'''''";
 922+ }
 923+
919924 }
920 - } else {
921 - if ($mode == 'strong') {
922 - return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'strongem' );
923 - } else if ($mode == 'em') {
924 - return $m1_em . $this->doQuotes ( '', $m[2], '' );
925 - } else if ($mode == 'emstrong') {
926 - return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( '', $m[2], 'strong' );
927 - } else if (($mode == 'strongem') || ($mode == 'both')) {
928 - return $this->doQuotes ( '', $pre.$m1_em.$m[2], 'strong' );
929 - } else {
930 - return $m[1] . $this->doQuotes ( '', $m[2], 'em' );
 925+ $i++;
 926+ }
 927+
 928+ # Now see if there's an odd or even number of "bold" and "italic"
 929+ # mark-up. There should normally be an even number of both.
 930+ $i = 0;
 931+ $numbold = 0;
 932+ $numitalics = 0;
 933+ foreach ($arr as $r)
 934+ {
 935+ if (($i % 2) == 1)
 936+ {
 937+ if (strlen ($r) == 2) $numitalics++; else
 938+ if (strlen ($r) == 3) $numbold++; else
 939+ if (strlen ($r) == 5) { $numitalics++; $numbold++; }
931940 }
 941+ $i++;
932942 }
933 - } else {
934 - $text_strong = ($text == '') ? '' : "<strong>{$text}</strong>";
935 - $text_em = ($text == '') ? '' : "<em>{$text}</em>";
936 - if ($mode == '') {
937 - return $pre . $text;
938 - } else if ($mode == 'em') {
939 - return $pre . $text_em;
940 - } else if ($mode == 'strong') {
941 - return $pre . $text_strong;
942 - } else if ($mode == 'strongem') {
943 - return (($pre == '') && ($text == '')) ? '' : "<strong>{$pre}{$text_em}</strong>";
944 - } else {
945 - return (($pre == '') && ($text == '')) ? '' : "<em>{$pre}{$text_strong}</em>";
 943+
 944+ # If there is an odd number of both bold and italics, it is likely
 945+ # that one of the bold ones was meant to be an apostrophe followed
 946+ # by italics. Which one we cannot know for certain, but it is more
 947+ # likely to be one that has a single-letter word before it.
 948+ if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
 949+ {
 950+ $i = 0;
 951+ $firstsingleletterword = -1;
 952+ $firstmultiletterword = -1;
 953+ $firstspace = -1;
 954+ foreach ($arr as $r)
 955+ {
 956+ if (($i % 2 == 1) and (strlen ($r) == 3))
 957+ {
 958+ $x1 = substr ($arr[$i-1], -1);
 959+ $x2 = substr ($arr[$i-1], -2, 1);
 960+ if ($x1 == " ") {
 961+ if ($firstspace == -1) $firstspace = $i;
 962+ } else if ($x2 == " ") {
 963+ if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 964+ } else {
 965+ if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 966+ }
 967+ }
 968+ $i++;
 969+ }
 970+
 971+ # If there is a single-letter word, use it!
 972+ if ($firstsingleletterword > -1)
 973+ {
 974+ $arr [ $firstsingleletterword ] = "''";
 975+ $arr [ $firstsingleletterword-1 ] .= "'";
 976+ }
 977+ # If not, but there's a multi-letter word, use that one.
 978+ else if ($firstmultiletterword > -1)
 979+ {
 980+ $arr [ $firstmultiletterword ] = "''";
 981+ $arr [ $firstmultiletterword-1 ] .= "'";
 982+ }
 983+ # ... otherwise use the first one that has neither.
 984+ else
 985+ {
 986+ $arr [ $firstspace ] = "''";
 987+ $arr [ $firstspace-1 ] .= "'";
 988+ }
946989 }
 990+
 991+ # Now let's actually convert our apostrophic mush to HTML!
 992+ $output = '';
 993+ $buffer = '';
 994+ $state = '';
 995+ $i = 0;
 996+ foreach ($arr as $r)
 997+ {
 998+ if (($i % 2) == 0)
 999+ {
 1000+ if ($state == 'both')
 1001+ $buffer .= $r;
 1002+ else
 1003+ $output .= $r;
 1004+ }
 1005+ else
 1006+ {
 1007+ if (strlen ($r) == 2)
 1008+ {
 1009+ if ($state == 'em')
 1010+ { $output .= "</em>"; $state = ''; }
 1011+ else if ($state == 'strongem')
 1012+ { $output .= "</em>"; $state = 'strong'; }
 1013+ else if ($state == 'emstrong')
 1014+ { $output .= "</strong></em><strong>"; $state = 'strong'; }
 1015+ else if ($state == 'both')
 1016+ { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; }
 1017+ else # $state can be 'strong' or ''
 1018+ { $output .= "<em>"; $state .= 'em'; }
 1019+ }
 1020+ else if (strlen ($r) == 3)
 1021+ {
 1022+ if ($state == 'strong')
 1023+ { $output .= "</strong>"; $state = ''; }
 1024+ else if ($state == 'strongem')
 1025+ { $output .= "</em></strong><em>"; $state = 'em'; }
 1026+ else if ($state == 'emstrong')
 1027+ { $output .= "</strong>"; $state = 'em'; }
 1028+ else if ($state == 'both')
 1029+ { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; }
 1030+ else # $state can be 'em' or ''
 1031+ { $output .= "<strong>"; $state .= 'strong'; }
 1032+ }
 1033+ else if (strlen ($r) == 5)
 1034+ {
 1035+ if ($state == 'strong')
 1036+ { $output .= "</strong><em>"; $state = 'em'; }
 1037+ else if ($state == 'em')
 1038+ { $output .= "</em><strong>"; $state = 'strong'; }
 1039+ else if ($state == 'strongem')
 1040+ { $output .= "</em></strong>"; $state = ''; }
 1041+ else if ($state == 'emstrong')
 1042+ { $output .= "</strong></em>"; $state = ''; }
 1043+ else if ($state == 'both')
 1044+ { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; }
 1045+ else # ($state == '')
 1046+ { $buffer = ''; $state = 'both'; }
 1047+ }
 1048+ }
 1049+ $i++;
 1050+ }
 1051+ return $output;
9471052 }
9481053 }
9491054

Status & tagging log