Index: trunk/phase3/maintenance/parserTests.txt |
— | — | @@ -116,7 +116,7 @@ |
117 | 117 | </li><li> plain<b><i>bold-italic</i>bold</b>plain |
118 | 118 | </li><li> plain<i>italic<b>bold-italic</b></i>plain |
119 | 119 | </li><li> plain<b>bold<i>bold-italic</i></b>plain |
120 | | -</li><li> plain l'<i>italic</i>plain |
| 120 | +</li><li> plain l'<i>italic</i>plain |
121 | 121 | </li><li> plain l'<b>bold</b> plain |
122 | 122 | </li></ul> |
123 | 123 | |
— | — | @@ -6415,7 +6415,7 @@ |
6416 | 6416 | !! input |
6417 | 6417 | ''' ''x' |
6418 | 6418 | !! result |
6419 | | -<pre>'<i> </i>x' |
| 6419 | +<pre>'<i> </i>x' |
6420 | 6420 | </pre> |
6421 | 6421 | !!end |
6422 | 6422 | |
— | — | @@ -7585,19 +7585,19 @@ |
7586 | 7586 | |} |
7587 | 7587 | --> |
7588 | 7588 | !! result |
7589 | | -<p><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
7590 | | -</p><p><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
7591 | | -</p><p><span> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b></span> |
7592 | | -</p><p> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
| 7589 | +<p><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
| 7590 | +</p><p><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
| 7591 | +</p><p><span> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b></span> |
| 7592 | +</p><p> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
7593 | 7593 | </p> |
7594 | | -<pre><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
| 7594 | +<pre><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
7595 | 7595 | </pre> |
7596 | 7596 | <table> |
7597 | 7597 | <tr> |
7598 | | -<td> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
| 7598 | +<td> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
7599 | 7599 | </td></tr></table> |
7600 | | -<p><b>This was Italic<i> this was plain'</i> and this was bold</b> |
7601 | | -but <b>This is bold<i> this is bold italic'</i> and this is bold</b> |
| 7600 | +<p><b>This was Italic<i> this was plain'</i> and this was bold</b> |
| 7601 | +but <b>This is bold<i> this is bold italic'</i> and this is bold</b> |
7602 | 7602 | </p><p><br /> |
7603 | 7603 | </p> |
7604 | 7604 | !! end |
Index: trunk/phase3/includes/parser/Parser.php |
— | — | @@ -1119,70 +1119,44 @@ |
1120 | 1120 | return $text; |
1121 | 1121 | else |
1122 | 1122 | { |
1123 | | - # Split in groups of 2, 3, 5 or 6 apostrophes. |
1124 | | - # If there are ever four apostrophes, assume the first is supposed to |
1125 | | - # be text, and the remaining three constitute mark-up for bold text. |
1126 | | - # If there are more than 6 apostrophes in a row, assume they're all |
1127 | | - # text except for the last 6. |
1128 | | - $arr = preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
1129 | | - |
1130 | | - |
1131 | 1123 | # If there is an odd number of both bold and italics, it is likely |
1132 | 1124 | # that one of the bold ones was meant to be an apostrophe followed |
1133 | 1125 | # by italics. Which one we cannot know for certain, but it is more |
1134 | 1126 | # likely to be one that has a single-letter word before it. |
1135 | 1127 | if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) |
1136 | 1128 | { |
1137 | | - $i = 0; |
1138 | | - |
1139 | | - # These are indexes to the /next/ array entry than the |
1140 | | - # one holding the text matching the condition which gives name |
1141 | | - # to the variable. |
1142 | | - $firstsingleletterword = -1; |
1143 | | - $firstmultiletterword = -1; |
1144 | | - $firstspace = -1; |
1145 | | - |
1146 | | - foreach ( $arr as $r ) |
1147 | | - { |
1148 | | - # Filter the "'''". Separators are on odd positions. |
1149 | | - # $arr[0] will be an empty string if needed. |
1150 | | - if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) |
1151 | | - { |
1152 | | - $x1 = substr ($arr[$i-1], -1); |
1153 | | - $x2 = substr ($arr[$i-1], -2, 1); |
1154 | | - if ($x1 === ' ') { |
1155 | | - if ($firstspace == -1) $firstspace = $i; |
1156 | | - } elseif ($x2 === ' ') { |
1157 | | - if ($firstsingleletterword == -1) $firstsingleletterword = $i; |
1158 | | - } elseif ($arr[$i-1] !== "") { |
1159 | | - if ($firstmultiletterword == -1) $firstmultiletterword = $i; |
1160 | | - } |
1161 | | - } |
1162 | | - $i++; |
1163 | | - } |
1164 | 1129 | |
1165 | | - # If there is a single-letter word, use it! |
1166 | | - if ($firstsingleletterword > -1) |
1167 | | - { |
1168 | | - $arr [ $firstsingleletterword ] = "''"; |
1169 | | - $arr [ $firstsingleletterword-1 ] .= "'"; |
| 1130 | + # This algorithm moves the literal quote at the |
| 1131 | + # right of a single word, at the right of a |
| 1132 | + # multiletter word or at the right of a space. |
| 1133 | + # Otherwise, it does nothing. |
| 1134 | + # |
| 1135 | + # The original if-based version can be found at |
| 1136 | + # http://svn.wikimedia.org/viewvc/mediawiki/trunk/phase3/includes/parser/Parser.php?revision=61519&view=markup |
| 1137 | + # |
| 1138 | + # Unlike the original one, here we convert the |
| 1139 | + # texty quotes to ' which shouldn't matter. |
| 1140 | + |
| 1141 | + $quoteBalancerReplacements = array( |
| 1142 | + "/(?<= [^ ])'''(?!')/"=>"'''", |
| 1143 | + "/(?<=[^ '])'''(?!')/"=>"'''", |
| 1144 | + "/(^|(?<=[^'])) '''(?!')/"=>" '''"); |
| 1145 | + |
| 1146 | + foreach( $quoteBalancerReplacements as $k => $v) { |
| 1147 | + $text = preg_replace($k, $v, $text, 1, $count); |
| 1148 | + if ($count != 0) |
| 1149 | + break; |
1170 | 1150 | } |
1171 | | - # If not, but there's a multi-letter word, use that one. |
1172 | | - elseif ($firstmultiletterword > -1) |
1173 | | - { |
1174 | | - $arr [ $firstmultiletterword ] = "''"; |
1175 | | - $arr [ $firstmultiletterword-1 ] .= "'"; |
1176 | | - } |
1177 | | - # ... otherwise use the first one that has neither. |
1178 | | - # (notice that it is possible for all three to be -1 if, for example, |
1179 | | - # there is only one pentuple-apostrophe in the line) |
1180 | | - elseif ($firstspace > -1) |
1181 | | - { |
1182 | | - $arr [ $firstspace ] = "''"; |
1183 | | - $arr [ $firstspace-1 ] .= "'"; |
1184 | | - } |
1185 | 1151 | } |
1186 | 1152 | |
| 1153 | + # Split in groups of 2, 3, 5 or 6 apostrophes. |
| 1154 | + # If there are ever four apostrophes, assume the first is supposed to |
| 1155 | + # be text, and the remaining three constitute mark-up for bold text. |
| 1156 | + # If there are more than 6 apostrophes in a row, assume they're all |
| 1157 | + # text except for the last 6. |
| 1158 | + $arr = preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
| 1159 | + |
| 1160 | + |
1187 | 1161 | # Now let's actually convert our apostrophic mush to HTML! |
1188 | 1162 | $output = ''; # Processed text |
1189 | 1163 | $buffer = ''; # Content if $state is 'both' |