r4549 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r4548‎ \| r4549 \| r4550 >
Date:	20:47, 6 August 2004
Author:	timwi
Status:	old
Tags:
Comment:	Somewhat less hacky fix to the French l''''homme''' problem.
Modified paths:	/trunk/phase3/includes/Parser.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/Parser.php
—	—	@@ -822,9 +822,6 @@
823	823	$fname = 'Parser::internalParse';
824	824	wfProfileIn( $fname );
825	825
826		~~- global $fixLbug ;~~
827		~~- if ( $fixLbug ) $text = preg_replace ( '/(l\|L)\'/' , '\\1'' , $text ) ;~~
828		-
829	826	$text = $this->removeHTMLtags( $text );
830	827	$text = $this->replaceVariables( $text, $args );
831	828
—	—	@@ -836,12 +833,10 @@
837	834	$text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
838	835	}
839	836	$text = $this->doAllQuotes( $text );
840		~~- // $text = $this->doExponent( $text );~~
841	837	$text = $this->replaceExternalLinks( $text );
842	838	$text = $this->doMagicLinks( $text );
843	839	$text = $this->replaceInternalLinks ( $text );
844	840	$text = $this->replaceInternalLinks ( $text );
845		~~- //$text = $this->doTokenizedParser ( $text );~~
846	841	$text = $this->doTableStuff( $text );
847	842	$text = $this->formatHeadings( $text, $isMain );
848	843	$sk =& $this->mOptions->getSkin();
—	—	@@ -892,57 +887,167 @@
893	888	$outtext = '';
894	889	$lines = explode( "\n", $text );
895	890	foreach ( $lines as $line ) {
896		~~- $outtext .= $this->doQuotes ( '', $line, '' ) . "\n";~~
	891	+ $outtext .= $this->doQuotes ( $line ) . "\n";
897	892	}
898	893	$outtext = substr($outtext, 0,-1);
899	894	wfProfileOut( $fname );
900	895	return $outtext;
901	896	}
902	897
903		~~- /* private */ function doQuotes( $pre, $text, $mode ) {~~
904		~~- if ( preg_match( "/^(.)''(.)$/sU", $text, $m ) ) {~~
905		~~- $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";~~
906		~~- $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";~~
907		~~- if ( substr ($m[2], 0, 1) == '\'' ) {~~
908		~~- $m[2] = substr ($m[2], 1);~~
909		~~- if ($mode == 'em') {~~
910		~~- return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'emstrong' );~~
911		~~- } else if ($mode == 'strong') {~~
912		~~- return $m1_strong . $this->doQuotes ( '', $m[2], '' );~~
913		~~- } else if (($mode == 'emstrong') \|\| ($mode == 'both')) {~~
914		~~- return $this->doQuotes ( '', $pre.$m1_strong.$m[2], 'em' );~~
915		~~- } else if ($mode == 'strongem') {~~
916		~~- return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( '', $m[2], 'em' );~~
917		~~- } else {~~
918		~~- return $m[1] . $this->doQuotes ( '', $m[2], 'strong' );~~
	898	+ /* private */ function doQuotes( $text ) {
	899	+ $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
	900	+ if (count ($arr) == 1)
	901	+ return $text;
	902	+ else
	903	+ {
	904	+ $i = 0;
	905	+ foreach ($arr as $r)
	906	+ {
	907	+ if (($i % 2) == 1)
	908	+ {
	909	+ # If there are ever four apostrophes, assume the first is supposed to
	910	+ # be text, and the remaining three constitute mark-up for bold text.
	911	+ if (strlen ($arr[$i]) == 4)
	912	+ {
	913	+ $arr[$i-1] .= "'";
	914	+ $arr[$i] = "'''";
	915	+ }
	916	+ # If there are more than 5 apostrophes in a row, assume they're all
	917	+ # text except for the last 5.
	918	+ else if (strlen ($arr[$i]) > 5)
	919	+ {
	920	+ $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
	921	+ $arr[$i] = "'''''";
	922	+ }
	923	+
919	924	}
920		~~- } else {~~
921		~~- if ($mode == 'strong') {~~
922		~~- return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'strongem' );~~
923		~~- } else if ($mode == 'em') {~~
924		~~- return $m1_em . $this->doQuotes ( '', $m[2], '' );~~
925		~~- } else if ($mode == 'emstrong') {~~
926		~~- return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( '', $m[2], 'strong' );~~
927		~~- } else if (($mode == 'strongem') \|\| ($mode == 'both')) {~~
928		~~- return $this->doQuotes ( '', $pre.$m1_em.$m[2], 'strong' );~~
929		~~- } else {~~
930		~~- return $m[1] . $this->doQuotes ( '', $m[2], 'em' );~~
	925	+ $i++;
	926	+ }
	927	+
	928	+ # Now see if there's an odd or even number of "bold" and "italic"
	929	+ # mark-up. There should normally be an even number of both.
	930	+ $i = 0;
	931	+ $numbold = 0;
	932	+ $numitalics = 0;
	933	+ foreach ($arr as $r)
	934	+ {
	935	+ if (($i % 2) == 1)
	936	+ {
	937	+ if (strlen ($r) == 2) $numitalics++; else
	938	+ if (strlen ($r) == 3) $numbold++; else
	939	+ if (strlen ($r) == 5) { $numitalics++; $numbold++; }
931	940	}
	941	+ $i++;
932	942	}
933		~~- } else {~~
934		~~- $text_strong = ($text == '') ? '' : "<strong>{$text}</strong>";~~
935		~~- $text_em = ($text == '') ? '' : "<em>{$text}</em>";~~
936		~~- if ($mode == '') {~~
937		~~- return $pre . $text;~~
938		~~- } else if ($mode == 'em') {~~
939		~~- return $pre . $text_em;~~
940		~~- } else if ($mode == 'strong') {~~
941		~~- return $pre . $text_strong;~~
942		~~- } else if ($mode == 'strongem') {~~
943		~~- return (($pre == '') && ($text == '')) ? '' : "<strong>{$pre}{$text_em}</strong>";~~
944		~~- } else {~~
945		~~- return (($pre == '') && ($text == '')) ? '' : "<em>{$pre}{$text_strong}</em>";~~
	943	+
	944	+ # If there is an odd number of both bold and italics, it is likely
	945	+ # that one of the bold ones was meant to be an apostrophe followed
	946	+ # by italics. Which one we cannot know for certain, but it is more
	947	+ # likely to be one that has a single-letter word before it.
	948	+ if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
	949	+ {
	950	+ $i = 0;
	951	+ $firstsingleletterword = -1;
	952	+ $firstmultiletterword = -1;
	953	+ $firstspace = -1;
	954	+ foreach ($arr as $r)
	955	+ {
	956	+ if (($i % 2 == 1) and (strlen ($r) == 3))
	957	+ {
	958	+ $x1 = substr ($arr[$i-1], -1);
	959	+ $x2 = substr ($arr[$i-1], -2, 1);
	960	+ if ($x1 == " ") {
	961	+ if ($firstspace == -1) $firstspace = $i;
	962	+ } else if ($x2 == " ") {
	963	+ if ($firstsingleletterword == -1) $firstsingleletterword = $i;
	964	+ } else {
	965	+ if ($firstmultiletterword == -1) $firstmultiletterword = $i;
	966	+ }
	967	+ }
	968	+ $i++;
	969	+ }
	970	+
	971	+ # If there is a single-letter word, use it!
	972	+ if ($firstsingleletterword > -1)
	973	+ {
	974	+ $arr [ $firstsingleletterword ] = "''";
	975	+ $arr [ $firstsingleletterword-1 ] .= "'";
	976	+ }
	977	+ # If not, but there's a multi-letter word, use that one.
	978	+ else if ($firstmultiletterword > -1)
	979	+ {
	980	+ $arr [ $firstmultiletterword ] = "''";
	981	+ $arr [ $firstmultiletterword-1 ] .= "'";
	982	+ }
	983	+ # ... otherwise use the first one that has neither.
	984	+ else
	985	+ {
	986	+ $arr [ $firstspace ] = "''";
	987	+ $arr [ $firstspace-1 ] .= "'";
	988	+ }
946	989	}
	990	+
	991	+ # Now let's actually convert our apostrophic mush to HTML!
	992	+ $output = '';
	993	+ $buffer = '';
	994	+ $state = '';
	995	+ $i = 0;
	996	+ foreach ($arr as $r)
	997	+ {
	998	+ if (($i % 2) == 0)
	999	+ {
	1000	+ if ($state == 'both')
	1001	+ $buffer .= $r;
	1002	+ else
	1003	+ $output .= $r;
	1004	+ }
	1005	+ else
	1006	+ {
	1007	+ if (strlen ($r) == 2)
	1008	+ {
	1009	+ if ($state == 'em')
	1010	+ { $output .= "</em>"; $state = ''; }
	1011	+ else if ($state == 'strongem')
	1012	+ { $output .= "</em>"; $state = 'strong'; }
	1013	+ else if ($state == 'emstrong')
	1014	+ { $output .= "</strong></em><strong>"; $state = 'strong'; }
	1015	+ else if ($state == 'both')
	1016	+ { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; }
	1017	+ else # $state can be 'strong' or ''
	1018	+ { $output .= "<em>"; $state .= 'em'; }
	1019	+ }
	1020	+ else if (strlen ($r) == 3)
	1021	+ {
	1022	+ if ($state == 'strong')
	1023	+ { $output .= "</strong>"; $state = ''; }
	1024	+ else if ($state == 'strongem')
	1025	+ { $output .= "</em></strong><em>"; $state = 'em'; }
	1026	+ else if ($state == 'emstrong')
	1027	+ { $output .= "</strong>"; $state = 'em'; }
	1028	+ else if ($state == 'both')
	1029	+ { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; }
	1030	+ else # $state can be 'em' or ''
	1031	+ { $output .= "<strong>"; $state .= 'strong'; }
	1032	+ }
	1033	+ else if (strlen ($r) == 5)
	1034	+ {
	1035	+ if ($state == 'strong')
	1036	+ { $output .= "</strong><em>"; $state = 'em'; }
	1037	+ else if ($state == 'em')
	1038	+ { $output .= "</em><strong>"; $state = 'strong'; }
	1039	+ else if ($state == 'strongem')
	1040	+ { $output .= "</em></strong>"; $state = ''; }
	1041	+ else if ($state == 'emstrong')
	1042	+ { $output .= "</strong></em>"; $state = ''; }
	1043	+ else if ($state == 'both')
	1044	+ { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; }
	1045	+ else # ($state == '')
	1046	+ { $buffer = ''; $state = 'both'; }
	1047	+ }
	1048	+ }
	1049	+ $i++;
	1050	+ }
	1051	+ return $output;
947	1052	}
948	1053	}
949	1054

Status & tagging log

15:00, 12 September 2011 Meno25 (talk | contribs) changed the status of r4549 [removed: ok added: old]
13:42, 18 June 2009 😂 (talk | contribs) changed the status of r4549 [removed: new added: ok]