r61527 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r61526‎ | r61527 | r61528 >
Date:18:56, 26 January 2010
Author:platonides
Status:reverted
Tags:
Comment:
Step 3: Balance the quotes directly on $text
Side effect: Some ' are converted to ' on output.
Modified paths:
  • /trunk/phase3/includes/parser/Parser.php (modified) (history)
  • /trunk/phase3/maintenance/parserTests.txt (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/parserTests.txt
@@ -116,7 +116,7 @@
117117 </li><li> plain<b><i>bold-italic</i>bold</b>plain
118118 </li><li> plain<i>italic<b>bold-italic</b></i>plain
119119 </li><li> plain<b>bold<i>bold-italic</i></b>plain
120 -</li><li> plain l'<i>italic</i>plain
 120+</li><li> plain l&#39;<i>italic</i>plain
121121 </li><li> plain l'<b>bold</b> plain
122122 </li></ul>
123123
@@ -6415,7 +6415,7 @@
64166416 !! input
64176417 ''' ''x'
64186418 !! result
6419 -<pre>'<i> </i>x'
 6419+<pre>&#39;<i> </i>x'
64206420 </pre>
64216421 !!end
64226422
@@ -7585,19 +7585,19 @@
75867586 |}
75877587 -->
75887588 !! result
7589 -<p><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b>
7590 -</p><p><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b>
7591 -</p><p><span> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b></span>
7592 -</p><p> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b>
 7589+<p><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
 7590+</p><p><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
 7591+</p><p><span> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b></span>
 7592+</p><p> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
75937593 </p>
7594 -<pre><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b>
 7594+<pre><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
75957595 </pre>
75967596 <table>
75977597 <tr>
7598 -<td> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b>
 7598+<td> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
75997599 </td></tr></table>
7600 -<p><b>This was Italic<i> this was plain'</i> and this was bold</b>
7601 -but <b>This is bold<i> this is bold italic'</i> and this is bold</b>
 7600+<p><b>This was Italic<i> this was plain&#39;</i> and this was bold</b>
 7601+but <b>This is bold<i> this is bold italic&#39;</i> and this is bold</b>
76027602 </p><p><br />
76037603 </p>
76047604 !! end
Index: trunk/phase3/includes/parser/Parser.php
@@ -1119,70 +1119,44 @@
11201120 return $text;
11211121 else
11221122 {
1123 - # Split in groups of 2, 3, 5 or 6 apostrophes.
1124 - # If there are ever four apostrophes, assume the first is supposed to
1125 - # be text, and the remaining three constitute mark-up for bold text.
1126 - # If there are more than 6 apostrophes in a row, assume they're all
1127 - # text except for the last 6.
1128 - $arr = preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1129 -
1130 -
11311123 # If there is an odd number of both bold and italics, it is likely
11321124 # that one of the bold ones was meant to be an apostrophe followed
11331125 # by italics. Which one we cannot know for certain, but it is more
11341126 # likely to be one that has a single-letter word before it.
11351127 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) )
11361128 {
1137 - $i = 0;
1138 -
1139 - # These are indexes to the /next/ array entry than the
1140 - # one holding the text matching the condition which gives name
1141 - # to the variable.
1142 - $firstsingleletterword = -1;
1143 - $firstmultiletterword = -1;
1144 - $firstspace = -1;
1145 -
1146 - foreach ( $arr as $r )
1147 - {
1148 - # Filter the "'''". Separators are on odd positions.
1149 - # $arr[0] will be an empty string if needed.
1150 - if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) )
1151 - {
1152 - $x1 = substr ($arr[$i-1], -1);
1153 - $x2 = substr ($arr[$i-1], -2, 1);
1154 - if ($x1 === ' ') {
1155 - if ($firstspace == -1) $firstspace = $i;
1156 - } elseif ($x2 === ' ') {
1157 - if ($firstsingleletterword == -1) $firstsingleletterword = $i;
1158 - } elseif ($arr[$i-1] !== "") {
1159 - if ($firstmultiletterword == -1) $firstmultiletterword = $i;
1160 - }
1161 - }
1162 - $i++;
1163 - }
11641129
1165 - # If there is a single-letter word, use it!
1166 - if ($firstsingleletterword > -1)
1167 - {
1168 - $arr [ $firstsingleletterword ] = "''";
1169 - $arr [ $firstsingleletterword-1 ] .= "'";
 1130+ # This algorithm moves the literal quote at the
 1131+ # right of a single word, at the right of a
 1132+ # multiletter word or at the right of a space.
 1133+ # Otherwise, it does nothing.
 1134+ #
 1135+ # The original if-based version can be found at
 1136+ # http://svn.wikimedia.org/viewvc/mediawiki/trunk/phase3/includes/parser/Parser.php?revision=61519&view=markup
 1137+ #
 1138+ # Unlike the original one, here we convert the
 1139+ # texty quotes to &#39; which shouldn't matter.
 1140+
 1141+ $quoteBalancerReplacements = array(
 1142+ "/(?<= [^ ])'''(?!')/"=>"&#39;''",
 1143+ "/(?<=[^ '])'''(?!')/"=>"&#39;''",
 1144+ "/(^|(?<=[^'])) '''(?!')/"=>" &#39;''");
 1145+
 1146+ foreach( $quoteBalancerReplacements as $k => $v) {
 1147+ $text = preg_replace($k, $v, $text, 1, $count);
 1148+ if ($count != 0)
 1149+ break;
11701150 }
1171 - # If not, but there's a multi-letter word, use that one.
1172 - elseif ($firstmultiletterword > -1)
1173 - {
1174 - $arr [ $firstmultiletterword ] = "''";
1175 - $arr [ $firstmultiletterword-1 ] .= "'";
1176 - }
1177 - # ... otherwise use the first one that has neither.
1178 - # (notice that it is possible for all three to be -1 if, for example,
1179 - # there is only one pentuple-apostrophe in the line)
1180 - elseif ($firstspace > -1)
1181 - {
1182 - $arr [ $firstspace ] = "''";
1183 - $arr [ $firstspace-1 ] .= "'";
1184 - }
11851151 }
11861152
 1153+ # Split in groups of 2, 3, 5 or 6 apostrophes.
 1154+ # If there are ever four apostrophes, assume the first is supposed to
 1155+ # be text, and the remaining three constitute mark-up for bold text.
 1156+ # If there are more than 6 apostrophes in a row, assume they're all
 1157+ # text except for the last 6.
 1158+ $arr = preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 1159+
 1160+
11871161 # Now let's actually convert our apostrophic mush to HTML!
11881162 $output = ''; # Processed text
11891163 $buffer = ''; # Content if $state is 'both'

Follow-up revisions

RevisionCommit summaryAuthorDate
r61551Revert r61528, r61527, r61526, r61525, r61519, r61515, r61053, r61052 (Parser...tstarling02:41, 27 January 2010

Status & tagging log