Index: trunk/phase3/maintenance/parserTests.txt |
— | — | @@ -116,7 +116,7 @@ |
117 | 117 | </li><li> plain<b><i>bold-italic</i>bold</b>plain |
118 | 118 | </li><li> plain<i>italic<b>bold-italic</b></i>plain |
119 | 119 | </li><li> plain<b>bold<i>bold-italic</i></b>plain |
120 | | -</li><li> plain l'<i>italic</i>plain |
| 120 | +</li><li> plain l'<i>italic</i>plain |
121 | 121 | </li><li> plain l'<b>bold</b> plain |
122 | 122 | </li></ul> |
123 | 123 | |
— | — | @@ -5253,17 +5253,19 @@ |
5254 | 5254 | </p> |
5255 | 5255 | !! end |
5256 | 5256 | |
5257 | | -# This was the original html, but it has also been |
5258 | | -# <p>'<i>bold'</i><b>bold<i>bolditalics</i></b> |
| 5257 | +# Original result was this: |
| 5258 | +# <p><b>bold</b><b>bold<i>bolditalics</i></b> |
5259 | 5259 | # </p> |
5260 | | -# See bug 18765. |
| 5260 | +# While that might be marginally more intuitive, maybe, the six-apostrophe |
| 5261 | +# construct is clearly pathological and the result stated here (which is what |
| 5262 | +# the parser actually does) is about as reasonable as anything. |
5261 | 5263 | !!test |
5262 | 5264 | Mixing markup for italics and bold |
5263 | 5265 | !! options |
5264 | 5266 | !! input |
5265 | 5267 | '''bold''''''bold''bolditalics''''' |
5266 | 5268 | !! result |
5267 | | -<p><b>bold</b><b>bold<i>bolditalics</i></b> |
| 5269 | +<p>'<i>bold'</i><b>bold<i>bolditalics</i></b> |
5268 | 5270 | </p> |
5269 | 5271 | !! end |
5270 | 5272 | |
— | — | @@ -6415,7 +6417,7 @@ |
6416 | 6418 | !! input |
6417 | 6419 | ''' ''x' |
6418 | 6420 | !! result |
6419 | | -<pre>'<i> </i>x' |
| 6421 | +<pre>'<i> </i>x' |
6420 | 6422 | </pre> |
6421 | 6423 | !!end |
6422 | 6424 | |
— | — | @@ -7558,82 +7560,6 @@ |
7559 | 7561 | <a href="https://www.mediawiki.org/wiki/Main_Page#section" title="Main Page">#section</a> |
7560 | 7562 | !! end |
7561 | 7563 | |
7562 | | -!! test |
7563 | | -Bold/italic markup handled differently depending on leading whitespace (bug 18765) |
7564 | | -!!input |
7565 | | -'''Look at ''this edit'''s complicated bold/italic markup!''' |
7566 | | - |
7567 | | -<!-- Comment -->'''Look at ''this edit'''s complicated bold/italic markup!''' |
7568 | | - |
7569 | | -<span> '''Look at ''this edit'''s complicated bold/italic markup!'''</span> |
7570 | | - |
7571 | | -<nowiki></nowiki> '''Look at ''this edit'''s complicated bold/italic markup!''' |
7572 | | - |
7573 | | -<!-- Hello world---> '''Look at ''this edit'''s complicated bold/italic markup!''' |
7574 | | - |
7575 | | -{| |
7576 | | -| '''Look at ''this edit'''s complicated bold/italic markup!''' |
7577 | | -|} |
7578 | | - |
7579 | | -'''This was Italic'' this was plain''' and this was bold''' |
7580 | | -but '''This is bold'' this is bold italic''' and this is bold''' |
7581 | | - |
7582 | | -<!-- Wishlist: Breaking because <span> and | are treated as text |
7583 | | -<span>'''Look at ''this edit'''s complicated bold/italic markup!'''</span> |
7584 | | -{| |
7585 | | -|'''Look at ''this edit'''s complicated bold/italic markup!''' |
7586 | | -|} |
7587 | | -!! result |
7588 | | -<p><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
7589 | | -</p><p><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
7590 | | -</p><p><span> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b></span> |
7591 | | -</p><p> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
7592 | | -</p> |
7593 | | -<pre><b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
7594 | | -</pre> |
7595 | | -<table> |
7596 | | -<tr> |
7597 | | -<td> <b>Look at <i>this edit'</i>s complicated bold/italic markup!</b> |
7598 | | -</td></tr></table> |
7599 | | -<p><b>This was Italic<i> this was plain'</i> and this was bold</b> |
7600 | | -but <b>This is bold<i> this is bold italic'</i> and this is bold</b> |
7601 | | -</p><p><br /> |
7602 | | -</p> |
7603 | | -!! end |
7604 | | - |
7605 | | -!! test |
7606 | | -Six quotes |
7607 | | -!!input |
7608 | | -''Italic''''''Bold |
7609 | | - |
7610 | | -'''Bold''BoldItalic''''''Normal |
7611 | | - |
7612 | | -''Italic'''BoldItalic''''''Normal''''' |
7613 | | - |
7614 | | -'''''BoldItalic''''''MoreBoldItalic'' |
7615 | | - |
7616 | | -''''''Normal |
7617 | | -!!result |
7618 | | -<p><i>Italic'</i><b>Bold</b> |
7619 | | -</p><p><b>Bold<i>BoldItalic'</i></b>Normal |
7620 | | -</p><p><i>Italic<b>BoldItalic'</b></i>Normal |
7621 | | -</p><p><i><b>BoldItalic</b><b>MoreBoldItalic</b></i> |
7622 | | -</p><p>Normal |
7623 | | -</p> |
7624 | | -!!end |
7625 | | - |
7626 | | - |
7627 | | -!! test |
7628 | | -Too many quotes |
7629 | | -!!input |
7630 | | -I '''like'''''quotes''''''''''' |
7631 | | -!! result |
7632 | | -<p>I <b>like</b><i>quotes''''''</i><b> </b> |
7633 | | -</p> |
7634 | | -!! end |
7635 | | - |
7636 | | - |
7637 | 7564 | Note: some elements used in these Microdata examples don't work, like <img> |
7638 | 7565 | and <time>. |
7639 | 7566 | !! test |
Index: trunk/phase3/tests/preg_split_test.php |
— | — | @@ -1,24 +0,0 @@ |
2 | | -<?php |
3 | | -include "../includes/StringUtils.php"; |
4 | | - |
5 | | -$pattern = "/('')+/"; |
6 | | -$subject = str_repeat("'' ", 1024*1024 + 7); |
7 | | - |
8 | | -$m = memory_get_usage(); |
9 | | - |
10 | | -$ps1 = preg_split($pattern, $subject); |
11 | | - |
12 | | -$r = ""; |
13 | | -foreach ($ps1 as $c) { |
14 | | - $r .= $c . "|"; |
15 | | -} |
16 | | -echo "Original preg_split: " . md5($r) . " " . (memory_get_usage()-$m) . "\n"; |
17 | | - |
18 | | -unset($ps1); |
19 | | - |
20 | | -$r = ""; |
21 | | -$ps2 = StringUtils::preg_split($pattern, $subject); |
22 | | -foreach ($ps2 as $c) { |
23 | | - $r .= $c . "|"; |
24 | | -} |
25 | | -echo "StringUtils preg_split: " . md5($r) . " " . (memory_get_usage()-$m) . "\n"; |
Index: trunk/phase3/includes/parser/Parser.php |
— | — | @@ -213,7 +213,7 @@ |
214 | 214 | * Must not consist of all title characters, or else it will change |
215 | 215 | * the behaviour of <nowiki> in a link. |
216 | 216 | */ |
217 | | - # $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); |
| 217 | + #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); |
218 | 218 | # Changed to \x7f to allow XML double-parsing -- TS |
219 | 219 | $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); |
220 | 220 | |
— | — | @@ -338,7 +338,7 @@ |
339 | 339 | '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2', |
340 | 340 | # french spaces, Guillemet-right |
341 | 341 | '/(\\302\\253) /' => '\\1 ', |
342 | | - '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874. |
| 342 | + '/ (!\s*important)/' => ' \\1', #Beware of CSS magic word !important, bug #11874. |
343 | 343 | ); |
344 | 344 | $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); |
345 | 345 | |
— | — | @@ -556,7 +556,7 @@ |
557 | 557 | $taglist = implode( '|', $elements ); |
558 | 558 | $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i"; |
559 | 559 | |
560 | | - while ( $text !== '' ) { |
| 560 | + while ( $text != '' ) { |
561 | 561 | $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); |
562 | 562 | $stripped .= $p[0]; |
563 | 563 | if( count( $p ) < 5 ) { |
— | — | @@ -723,11 +723,11 @@ |
724 | 724 | array_push( $tr_history , false ); |
725 | 725 | array_push( $tr_attributes , '' ); |
726 | 726 | array_push( $has_opened_tr , false ); |
727 | | - } elseif ( count ( $td_history ) == 0 ) { |
| 727 | + } else if ( count ( $td_history ) == 0 ) { |
728 | 728 | // Don't do any of the following |
729 | 729 | $out .= $outLine."\n"; |
730 | 730 | continue; |
731 | | - } elseif ( substr ( $line , 0 , 2 ) === '|}' ) { |
| 731 | + } else if ( substr ( $line , 0 , 2 ) === '|}' ) { |
732 | 732 | // We are ending a table |
733 | 733 | $line = '</table>' . substr ( $line , 2 ); |
734 | 734 | $last_tag = array_pop ( $last_tag_history ); |
— | — | @@ -745,7 +745,7 @@ |
746 | 746 | } |
747 | 747 | array_pop ( $tr_attributes ); |
748 | 748 | $outLine = $line . str_repeat( '</dd></dl>' , $indent_level ); |
749 | | - } elseif ( substr ( $line , 0 , 2 ) === '|-' ) { |
| 749 | + } else if ( substr ( $line , 0 , 2 ) === '|-' ) { |
750 | 750 | // Now we have a table row |
751 | 751 | $line = preg_replace( '#^\|-+#', '', $line ); |
752 | 752 | |
— | — | @@ -773,7 +773,7 @@ |
774 | 774 | array_push ( $td_history , false ); |
775 | 775 | array_push ( $last_tag_history , '' ); |
776 | 776 | } |
777 | | - elseif ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 ) === '|+' ) { |
| 777 | + else if ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 ) === '|+' ) { |
778 | 778 | // This might be cell elements, td, th or captions |
779 | 779 | if ( substr ( $line , 0 , 2 ) === '|+' ) { |
780 | 780 | $first_character = '+'; |
— | — | @@ -818,9 +818,9 @@ |
819 | 819 | |
820 | 820 | if ( $first_character === '|' ) { |
821 | 821 | $last_tag = 'td'; |
822 | | - } elseif ( $first_character === '!' ) { |
| 822 | + } else if ( $first_character === '!' ) { |
823 | 823 | $last_tag = 'th'; |
824 | | - } elseif ( $first_character === '+' ) { |
| 824 | + } else if ( $first_character === '+' ) { |
825 | 825 | $last_tag = 'caption'; |
826 | 826 | } else { |
827 | 827 | $last_tag = ''; |
— | — | @@ -835,7 +835,7 @@ |
836 | 836 | // be mistaken as delimiting cell parameters |
837 | 837 | if ( strpos( $cell_data[0], '[[' ) !== false ) { |
838 | 838 | $cell = "{$previous}<{$last_tag}>{$cell}"; |
839 | | - } elseif ( count ( $cell_data ) == 1 ) |
| 839 | + } else if ( count ( $cell_data ) == 1 ) |
840 | 840 | $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; |
841 | 841 | else { |
842 | 842 | $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); |
— | — | @@ -1108,59 +1108,100 @@ |
1109 | 1109 | } |
1110 | 1110 | |
1111 | 1111 | /** |
1112 | | - * Processes bolds and italics on a single line. |
1113 | 1112 | * Helper function for doAllQuotes() |
1114 | 1113 | */ |
1115 | 1114 | public function doQuotes( $text ) { |
1116 | | - # Counts the number of occurrences of bold and italics mark-ups. |
1117 | | - self::countBoldAndItalic($text, $numbold, $numitalics); |
1118 | | - |
1119 | | - if ( ( $numbold == 0 ) && ( $numitalics == 0 ) ) |
| 1115 | + $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
| 1116 | + if ( count( $arr ) == 1 ) |
1120 | 1117 | return $text; |
1121 | 1118 | else |
1122 | 1119 | { |
| 1120 | + # First, do some preliminary work. This may shift some apostrophes from |
| 1121 | + # being mark-up to being text. It also counts the number of occurrences |
| 1122 | + # of bold and italics mark-ups. |
| 1123 | + $i = 0; |
| 1124 | + $numbold = 0; |
| 1125 | + $numitalics = 0; |
| 1126 | + foreach ( $arr as $r ) |
| 1127 | + { |
| 1128 | + if ( ( $i % 2 ) == 1 ) |
| 1129 | + { |
| 1130 | + # If there are ever four apostrophes, assume the first is supposed to |
| 1131 | + # be text, and the remaining three constitute mark-up for bold text. |
| 1132 | + if ( strlen( $arr[$i] ) == 4 ) |
| 1133 | + { |
| 1134 | + $arr[$i-1] .= "'"; |
| 1135 | + $arr[$i] = "'''"; |
| 1136 | + } |
| 1137 | + # If there are more than 5 apostrophes in a row, assume they're all |
| 1138 | + # text except for the last 5. |
| 1139 | + else if ( strlen( $arr[$i] ) > 5 ) |
| 1140 | + { |
| 1141 | + $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); |
| 1142 | + $arr[$i] = "'''''"; |
| 1143 | + } |
| 1144 | + # Count the number of occurrences of bold and italics mark-ups. |
| 1145 | + # We are not counting sequences of five apostrophes. |
| 1146 | + if ( strlen( $arr[$i] ) == 2 ) { $numitalics++; } |
| 1147 | + else if ( strlen( $arr[$i] ) == 3 ) { $numbold++; } |
| 1148 | + else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; } |
| 1149 | + } |
| 1150 | + $i++; |
| 1151 | + } |
| 1152 | + |
1123 | 1153 | # If there is an odd number of both bold and italics, it is likely |
1124 | 1154 | # that one of the bold ones was meant to be an apostrophe followed |
1125 | 1155 | # by italics. Which one we cannot know for certain, but it is more |
1126 | 1156 | # likely to be one that has a single-letter word before it. |
1127 | 1157 | if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) |
1128 | 1158 | { |
| 1159 | + $i = 0; |
| 1160 | + $firstsingleletterword = -1; |
| 1161 | + $firstmultiletterword = -1; |
| 1162 | + $firstspace = -1; |
| 1163 | + foreach ( $arr as $r ) |
| 1164 | + { |
| 1165 | + if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) |
| 1166 | + { |
| 1167 | + $x1 = substr ($arr[$i-1], -1); |
| 1168 | + $x2 = substr ($arr[$i-1], -2, 1); |
| 1169 | + if ($x1 === ' ') { |
| 1170 | + if ($firstspace == -1) $firstspace = $i; |
| 1171 | + } else if ($x2 === ' ') { |
| 1172 | + if ($firstsingleletterword == -1) $firstsingleletterword = $i; |
| 1173 | + } else { |
| 1174 | + if ($firstmultiletterword == -1) $firstmultiletterword = $i; |
| 1175 | + } |
| 1176 | + } |
| 1177 | + $i++; |
| 1178 | + } |
1129 | 1179 | |
1130 | | - # This algorithm moves the literal quote at the |
1131 | | - # right of a single word, at the right of a |
1132 | | - # multiletter word or at the right of a space. |
1133 | | - # Otherwise, it does nothing. |
1134 | | - # |
1135 | | - # The original if-based version can be found at |
1136 | | - # http://svn.wikimedia.org/viewvc/mediawiki/trunk/phase3/includes/parser/Parser.php?revision=61519&view=markup |
1137 | | - # |
1138 | | - # Unlike the original one, here we convert the |
1139 | | - # texty quotes to ' which shouldn't matter. |
1140 | | - |
1141 | | - $quoteBalancerReplacements = array( |
1142 | | - "/(?<= [^ ])'''(?!')/"=>"'''", |
1143 | | - "/(?<=[^ '])'''(?!')/"=>"'''", |
1144 | | - "/(^|(?<=[^'])) '''(?!')/"=>" '''"); |
1145 | | - |
1146 | | - foreach( $quoteBalancerReplacements as $k => $v) { |
1147 | | - $text = preg_replace($k, $v, $text, 1, $count); |
1148 | | - if ($count != 0) |
1149 | | - break; |
| 1180 | + # If there is a single-letter word, use it! |
| 1181 | + if ($firstsingleletterword > -1) |
| 1182 | + { |
| 1183 | + $arr [ $firstsingleletterword ] = "''"; |
| 1184 | + $arr [ $firstsingleletterword-1 ] .= "'"; |
1150 | 1185 | } |
| 1186 | + # If not, but there's a multi-letter word, use that one. |
| 1187 | + else if ($firstmultiletterword > -1) |
| 1188 | + { |
| 1189 | + $arr [ $firstmultiletterword ] = "''"; |
| 1190 | + $arr [ $firstmultiletterword-1 ] .= "'"; |
| 1191 | + } |
| 1192 | + # ... otherwise use the first one that has neither. |
| 1193 | + # (notice that it is possible for all three to be -1 if, for example, |
| 1194 | + # there is only one pentuple-apostrophe in the line) |
| 1195 | + else if ($firstspace > -1) |
| 1196 | + { |
| 1197 | + $arr [ $firstspace ] = "''"; |
| 1198 | + $arr [ $firstspace-1 ] .= "'"; |
| 1199 | + } |
1151 | 1200 | } |
1152 | 1201 | |
1153 | | - # Split in groups of 2, 3, 5 or 6 apostrophes. |
1154 | | - # If there are ever four apostrophes, assume the first is supposed to |
1155 | | - # be text, and the remaining three constitute mark-up for bold text. |
1156 | | - # If there are more than 6 apostrophes in a row, assume they're all |
1157 | | - # text except for the last 6. |
1158 | | - $arr = Stringutils::preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
1159 | | - |
1160 | | - |
1161 | 1202 | # Now let's actually convert our apostrophic mush to HTML! |
1162 | | - $output = ''; # Processed text |
1163 | | - $buffer = ''; # Content if $state is 'both' |
1164 | | - $state = ''; # Flags with the order of open tags: '|b|i|bi|ib|both' |
| 1203 | + $output = ''; |
| 1204 | + $buffer = ''; |
| 1205 | + $state = ''; |
1165 | 1206 | $i = 0; |
1166 | 1207 | foreach ($arr as $r) |
1167 | 1208 | { |
— | — | @@ -1177,58 +1218,43 @@ |
1178 | 1219 | { |
1179 | 1220 | if ($state === 'i') |
1180 | 1221 | { $output .= '</i>'; $state = ''; } |
1181 | | - elseif ($state === 'bi') |
| 1222 | + else if ($state === 'bi') |
1182 | 1223 | { $output .= '</i>'; $state = 'b'; } |
1183 | | - elseif ($state === 'ib') |
| 1224 | + else if ($state === 'ib') |
1184 | 1225 | { $output .= '</b></i><b>'; $state = 'b'; } |
1185 | | - elseif ($state === 'both') |
| 1226 | + else if ($state === 'both') |
1186 | 1227 | { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; } |
1187 | 1228 | else # $state can be 'b' or '' |
1188 | 1229 | { $output .= '<i>'; $state .= 'i'; } |
1189 | 1230 | } |
1190 | | - elseif (strlen ($r) == 3) |
| 1231 | + else if (strlen ($r) == 3) |
1191 | 1232 | { |
1192 | 1233 | if ($state === 'b') |
1193 | 1234 | { $output .= '</b>'; $state = ''; } |
1194 | | - elseif ($state === 'bi') |
| 1235 | + else if ($state === 'bi') |
1195 | 1236 | { $output .= '</i></b><i>'; $state = 'i'; } |
1196 | | - elseif ($state === 'ib') |
| 1237 | + else if ($state === 'ib') |
1197 | 1238 | { $output .= '</b>'; $state = 'i'; } |
1198 | | - elseif ($state === 'both') |
| 1239 | + else if ($state === 'both') |
1199 | 1240 | { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; } |
1200 | 1241 | else # $state can be 'i' or '' |
1201 | 1242 | { $output .= '<b>'; $state .= 'b'; } |
1202 | 1243 | } |
1203 | | - elseif (strlen ($r) == 5) |
| 1244 | + else if (strlen ($r) == 5) |
1204 | 1245 | { |
1205 | 1246 | if ($state === 'b') |
1206 | 1247 | { $output .= '</b><i>'; $state = 'i'; } |
1207 | | - elseif ($state === 'i') |
| 1248 | + else if ($state === 'i') |
1208 | 1249 | { $output .= '</i><b>'; $state = 'b'; } |
1209 | | - elseif ($state === 'bi') |
| 1250 | + else if ($state === 'bi') |
1210 | 1251 | { $output .= '</i></b>'; $state = ''; } |
1211 | | - elseif ($state === 'ib') |
| 1252 | + else if ($state === 'ib') |
1212 | 1253 | { $output .= '</b></i>'; $state = ''; } |
1213 | | - elseif ($state === 'both') |
| 1254 | + else if ($state === 'both') |
1214 | 1255 | { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; } |
1215 | 1256 | else # ($state == '') |
1216 | 1257 | { $buffer = ''; $state = 'both'; } |
1217 | 1258 | } |
1218 | | - elseif (strlen ($r) == 6) |
1219 | | - { |
1220 | | - if ($state === 'b') |
1221 | | - { $output .= '</b><b>'; $state = 'b'; } |
1222 | | - elseif ($state === 'i') |
1223 | | - { $output .= '\'</i><b>'; $state = 'b'; } |
1224 | | - elseif ($state === 'bi') |
1225 | | - { $output .= '\'</i></b>'; $state = ''; } |
1226 | | - elseif ($state === 'ib') |
1227 | | - { $output .= '\'</b></i>'; $state = ''; } |
1228 | | - elseif ($state === 'both') |
1229 | | - { $output .= '<i><b>'.$buffer.'</b><b>'; $state = 'ib'; } |
1230 | | - else # ($state == '') |
1231 | | - { $buffer = ''; $state = ''; } |
1232 | | - } |
1233 | 1259 | } |
1234 | 1260 | $i++; |
1235 | 1261 | } |
— | — | @@ -1247,57 +1273,6 @@ |
1248 | 1274 | } |
1249 | 1275 | |
1250 | 1276 | /** |
1251 | | - * Counts the number of bold and italic items from a line of text. |
1252 | | - * Helper function for doQuotes() |
1253 | | - */ |
1254 | | - private static function countBoldAndItalic($text, &$numBold, &$numItalics) { |
1255 | | - $numBold = 0; |
1256 | | - $numItalics = 0; |
1257 | | - $offset = 0; |
1258 | | - |
1259 | | - do { |
1260 | | - $offset = strpos($text, "'", $offset); |
1261 | | - if ($offset === false) |
1262 | | - return; |
1263 | | - |
1264 | | - $quoteLen = strspn($text, "'", $offset); |
1265 | | - $offset += $quoteLen; |
1266 | | - |
1267 | | - switch ($quoteLen) { |
1268 | | - case 0: |
1269 | | - case 1: |
1270 | | - break; |
1271 | | - |
1272 | | - case 2: |
1273 | | - $numItalics++; |
1274 | | - break; |
1275 | | - |
1276 | | - case 3: |
1277 | | - $numBold++; |
1278 | | - break; |
1279 | | - |
1280 | | - case 4: |
1281 | | - # If there are ever four apostrophes, assume the first is supposed to |
1282 | | - # be text, and the remaining three constitute mark-up for bold text. |
1283 | | - $numBold++; |
1284 | | - $numItalics++; |
1285 | | - break; |
1286 | | - |
1287 | | - case 5: |
1288 | | - $numItalics++; |
1289 | | - $numBold++; |
1290 | | - break; |
1291 | | - |
1292 | | - case 6: |
1293 | | - default: |
1294 | | - # If there are more than 6 apostrophes in a row, assume they're all |
1295 | | - # text except for the last 6. |
1296 | | - $numBold+=2; |
1297 | | - } |
1298 | | - } while (true); |
1299 | | - } |
1300 | | - |
1301 | | - /** |
1302 | 1277 | * Replace external links (REL) |
1303 | 1278 | * |
1304 | 1279 | * Note: this is all very hackish and the order of execution matters a lot. |
— | — | @@ -1538,9 +1513,9 @@ |
1539 | 1514 | $sk = $this->mOptions->getSkin(); |
1540 | 1515 | $holders = new LinkHolderArray( $this ); |
1541 | 1516 | |
1542 | | - # split the entire text string on occurences of [[ |
| 1517 | + #split the entire text string on occurences of [[ |
1543 | 1518 | $a = StringUtils::explode( '[[', ' ' . $s ); |
1544 | | - # get the first element (all text up to first [[), and remove the space we added |
| 1519 | + #get the first element (all text up to first [[), and remove the space we added |
1545 | 1520 | $s = $a->current(); |
1546 | 1521 | $a->next(); |
1547 | 1522 | $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" |
— | — | @@ -1685,10 +1660,10 @@ |
1686 | 1661 | |
1687 | 1662 | if ( $might_be_img ) { # if this is actually an invalid link |
1688 | 1663 | wfProfileIn( __METHOD__."-might_be_img" ); |
1689 | | - if ( $ns == NS_FILE && $noforce ) { # but might be an image |
| 1664 | + if ( $ns == NS_FILE && $noforce ) { #but might be an image |
1690 | 1665 | $found = false; |
1691 | 1666 | while ( true ) { |
1692 | | - # look at the next 'line' to see if we can close it there |
| 1667 | + #look at the next 'line' to see if we can close it there |
1693 | 1668 | $a->next(); |
1694 | 1669 | $next_line = $a->current(); |
1695 | 1670 | if ( $next_line === false || $next_line === null ) { |
— | — | @@ -1702,24 +1677,24 @@ |
1703 | 1678 | $trail = $m[2]; |
1704 | 1679 | break; |
1705 | 1680 | } elseif ( count( $m ) == 2 ) { |
1706 | | - # if there's exactly one ]] that's fine, we'll keep looking |
| 1681 | + #if there's exactly one ]] that's fine, we'll keep looking |
1707 | 1682 | $text .= "[[{$m[0]}]]{$m[1]}"; |
1708 | 1683 | } else { |
1709 | | - # if $next_line is invalid too, we need look no further |
| 1684 | + #if $next_line is invalid too, we need look no further |
1710 | 1685 | $text .= '[[' . $next_line; |
1711 | 1686 | break; |
1712 | 1687 | } |
1713 | 1688 | } |
1714 | 1689 | if ( !$found ) { |
1715 | 1690 | # we couldn't find the end of this imageLink, so output it raw |
1716 | | - # but don't ignore what might be perfectly normal links in the text we've examined |
| 1691 | + #but don't ignore what might be perfectly normal links in the text we've examined |
1717 | 1692 | $holders->merge( $this->replaceInternalLinks2( $text ) ); |
1718 | 1693 | $s .= "{$prefix}[[$link|$text"; |
1719 | 1694 | # note: no $trail, because without an end, there *is* no trail |
1720 | 1695 | wfProfileOut( __METHOD__."-might_be_img" ); |
1721 | 1696 | continue; |
1722 | 1697 | } |
1723 | | - } else { # it's not an image, so output it raw |
| 1698 | + } else { #it's not an image, so output it raw |
1724 | 1699 | $s .= "{$prefix}[[$link|$text"; |
1725 | 1700 | # note: no $trail, because without an end, there *is* no trail |
1726 | 1701 | wfProfileOut( __METHOD__."-might_be_img" ); |
— | — | @@ -1796,7 +1771,7 @@ |
1797 | 1772 | } |
1798 | 1773 | |
1799 | 1774 | # Self-link checking |
1800 | | - if( $nt->getFragment() === '' && $ns !== NS_SPECIAL ) { |
| 1775 | + if( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { |
1801 | 1776 | if( in_array( $nt->getPrefixedText(), $selflink, true ) ) { |
1802 | 1777 | $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail ); |
1803 | 1778 | continue; |
— | — | @@ -1916,7 +1891,7 @@ |
1917 | 1892 | */ |
1918 | 1893 | /* private */ function closeParagraph() { |
1919 | 1894 | $result = ''; |
1920 | | - if ( $this->mLastSection !== '' ) { |
| 1895 | + if ( $this->mLastSection != '' ) { |
1921 | 1896 | $result = '</' . $this->mLastSection . ">\n"; |
1922 | 1897 | } |
1923 | 1898 | $this->mInPre = false; |
— | — | @@ -1932,7 +1907,7 @@ |
1933 | 1908 | if ( $fl < $shorter ) { $shorter = $fl; } |
1934 | 1909 | |
1935 | 1910 | for ( $i = 0; $i < $shorter; ++$i ) { |
1936 | | - if ( $st1{$i} !== $st2{$i} ) { break; } |
| 1911 | + if ( $st1{$i} != $st2{$i} ) { break; } |
1937 | 1912 | } |
1938 | 1913 | return $i; |
1939 | 1914 | } |
— | — | @@ -2105,7 +2080,7 @@ |
2106 | 2081 | '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t ); |
2107 | 2082 | if ( $openmatch or $closematch ) { |
2108 | 2083 | $paragraphStack = false; |
2109 | | - # TODO bug 5718: paragraph closed |
| 2084 | + # TODO bug 5718: paragraph closed |
2110 | 2085 | $output .= $this->closeParagraph(); |
2111 | 2086 | if ( $preOpenMatch and !$preCloseMatch ) { |
2112 | 2087 | $this->mInPre = true; |
— | — | @@ -2115,8 +2090,8 @@ |
2116 | 2091 | } else { |
2117 | 2092 | $inBlockElem = true; |
2118 | 2093 | } |
2119 | | - } elseif ( !$inBlockElem && !$this->mInPre ) { |
2120 | | - if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) !== '' ) ) { |
| 2094 | + } else if ( !$inBlockElem && !$this->mInPre ) { |
| 2095 | + if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) { |
2121 | 2096 | // pre |
2122 | 2097 | if ($this->mLastSection !== 'pre') { |
2123 | 2098 | $paragraphStack = false; |
— | — | @@ -2145,7 +2120,7 @@ |
2146 | 2121 | $output .= $paragraphStack; |
2147 | 2122 | $paragraphStack = false; |
2148 | 2123 | $this->mLastSection = 'p'; |
2149 | | - } elseif ($this->mLastSection !== 'p') { |
| 2124 | + } else if ($this->mLastSection !== 'p') { |
2150 | 2125 | $output .= $this->closeParagraph().'<p>'; |
2151 | 2126 | $this->mLastSection = 'p'; |
2152 | 2127 | } |
— | — | @@ -2166,7 +2141,7 @@ |
2167 | 2142 | $output .= $this->closeList( $prefix2[$prefixLength-1] ); |
2168 | 2143 | --$prefixLength; |
2169 | 2144 | } |
2170 | | - if ( $this->mLastSection !== '' ) { |
| 2145 | + if ( $this->mLastSection != '' ) { |
2171 | 2146 | $output .= '</' . $this->mLastSection . '>'; |
2172 | 2147 | $this->mLastSection = ''; |
2173 | 2148 | } |
— | — | @@ -2972,7 +2947,7 @@ |
2973 | 2948 | $isHTML = true; |
2974 | 2949 | $this->disableCache(); |
2975 | 2950 | } |
2976 | | - } elseif ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) { |
| 2951 | + } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) { |
2977 | 2952 | $found = false; //access denied |
2978 | 2953 | wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() ); |
2979 | 2954 | } else { |
— | — | @@ -3585,7 +3560,7 @@ |
3586 | 3561 | if (preg_match("/^$markerRegex/", $headline, $markerMatches)) { |
3587 | 3562 | $serial = $markerMatches[1]; |
3588 | 3563 | list( $titleText, $sectionIndex ) = $this->mHeadings[$serial]; |
3589 | | - $isTemplate = ($titleText !== $baseTitleText); |
| 3564 | + $isTemplate = ($titleText != $baseTitleText); |
3590 | 3565 | $headline = preg_replace("/^$markerRegex/", "", $headline); |
3591 | 3566 | } |
3592 | 3567 | |
— | — | @@ -3701,7 +3676,7 @@ |
3702 | 3677 | if ( $legacyHeadline == $safeHeadline ) { |
3703 | 3678 | # No reason to have both (in fact, we can't) |
3704 | 3679 | $legacyHeadline = false; |
3705 | | - } elseif ( $legacyHeadline !== Sanitizer::escapeId( |
| 3680 | + } elseif ( $legacyHeadline != Sanitizer::escapeId( |
3706 | 3681 | $legacyHeadline, 'xml' ) ) { |
3707 | 3682 | # The legacy id is invalid XML. We used to allow this, but |
3708 | 3683 | # there's no reason to do so anymore. Backward |
— | — | @@ -3875,8 +3850,8 @@ |
3876 | 3851 | else |
3877 | 3852 | continue; |
3878 | 3853 | } |
3879 | | - if ( $s['index'] !== $section || |
3880 | | - $s['fromtitle'] !== $titletext ) { |
| 3854 | + if ( $s['index'] != $section || |
| 3855 | + $s['fromtitle'] != $titletext ) { |
3881 | 3856 | self::incrementNumbering( $numbering, |
3882 | 3857 | $s['toclevel'], $lastLevel ); |
3883 | 3858 | |
— | — | @@ -3927,7 +3902,7 @@ |
3928 | 3903 | private static function incrementNumbering( &$number, $level, $lastLevel ) { |
3929 | 3904 | if ( $level > $lastLevel ) |
3930 | 3905 | $number[$level - 1] = 1; |
3931 | | - elseif ( $level < $lastLevel ) { |
| 3906 | + else if ( $level < $lastLevel ) { |
3932 | 3907 | foreach ( $number as $key => $unused ) |
3933 | 3908 | if ( $key >= $level ) |
3934 | 3909 | unset( $number[$key] ); |
— | — | @@ -4037,7 +4012,7 @@ |
4038 | 4013 | $m = array(); |
4039 | 4014 | if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) { |
4040 | 4015 | $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); |
4041 | | - } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" !== '' ) { |
| 4016 | + } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) { |
4042 | 4017 | $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); |
4043 | 4018 | } else { |
4044 | 4019 | # if there's no context, don't bother duplicating the title |
— | — | @@ -4876,7 +4851,7 @@ |
4877 | 4852 | if ( $node->getName() === 'h' ) { |
4878 | 4853 | $bits = $node->splitHeading(); |
4879 | 4854 | $curLevel = $bits['level']; |
4880 | | - if ( $bits['i'] !== $sectionIndex && $curLevel <= $targetLevel ) { |
| 4855 | + if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { |
4881 | 4856 | break; |
4882 | 4857 | } |
4883 | 4858 | } |
— | — | @@ -4892,7 +4867,7 @@ |
4893 | 4868 | // Add two newlines on -- trailing whitespace in $newText is conventionally |
4894 | 4869 | // stripped by the editor, so we need both newlines to restore the paragraph gap |
4895 | 4870 | // Only add trailing whitespace if there is newText |
4896 | | - if($newText !== "") { |
| 4871 | + if($newText != "") { |
4897 | 4872 | $outText .= $newText . "\n\n"; |
4898 | 4873 | } |
4899 | 4874 | |
Index: trunk/phase3/includes/StringUtils.php |
— | — | @@ -179,14 +179,6 @@ |
180 | 180 | return new ArrayIterator( explode( $separator, $subject ) ); |
181 | 181 | } |
182 | 182 | } |
183 | | - |
184 | | - /** |
185 | | - * Workalike for preg_split() with limited memory usage. |
186 | | - * Returns an Iterator |
187 | | - */ |
188 | | - static function preg_split( $pattern, $subject, $limit = -1, $flags = 0 ) { |
189 | | - return new PregSplitIterator( $pattern, $subject, $limit, $flags ); |
190 | | - } |
191 | 183 | } |
192 | 184 | |
193 | 185 | /** |
— | — | @@ -417,82 +409,3 @@ |
418 | 410 | } |
419 | 411 | } |
420 | 412 | |
421 | | - |
422 | | -/** |
423 | | - * An iterator which works exactly like: |
424 | | - * |
425 | | - * foreach ( preg_split( $pattern, $s, $limit, $flags ) as $element ) { |
426 | | - * ... |
427 | | - * } |
428 | | - * |
429 | | - * Except it doesn't use huge amounts of memory when $limit is -1 |
430 | | - * |
431 | | - * The flag PREG_SPLIT_OFFSET_CAPTURE isn't supported. |
432 | | - */ |
433 | | -class PregSplitIterator implements Iterator { |
434 | | - // The subject string |
435 | | - var $pattern, $subject, $originalLimit, $flags; |
436 | | - |
437 | | - // The last extracted group of items. |
438 | | - var $smallArray; |
439 | | - |
440 | | - // The position on the iterator. |
441 | | - var $curPos; |
442 | | - |
443 | | - const MAX_LIMIT = 100; |
444 | | - |
445 | | - /** |
446 | | - * Construct a PregSplitIterator |
447 | | - */ |
448 | | - function __construct( $pattern, $s, $limit, $flags) { |
449 | | - $this->pattern = $pattern; |
450 | | - $this->subject = $s; |
451 | | - $this->originalLimit = $limit; |
452 | | - $this->flags = $flags; |
453 | | - |
454 | | - $this->rewind(); |
455 | | - } |
456 | | - |
457 | | - private function effectiveLimit() { |
458 | | - if ($this->originalLimit == -1) { |
459 | | - return self::MAX_LIMIT + 1; |
460 | | - } else if ($this->limit > self::MAX_LIMIT) { |
461 | | - $this->limit -= self::MAX_LIMIT; |
462 | | - return self::MAX_LIMIT + 1; |
463 | | - } else { |
464 | | - $old = $this->limit; |
465 | | - $this->limit = 0; |
466 | | - return $old; |
467 | | - } |
468 | | - } |
469 | | - |
470 | | - function rewind() { |
471 | | - $this->curPos = 0; |
472 | | - $this->limit = $this->originalLimit; |
473 | | - if ($this->limit == -1) $this->limit = self::MAX_LIMIT; |
474 | | - $this->smallArray = preg_split( $this->pattern, $this->subject, $this->effectiveLimit(), $this->flags); |
475 | | - } |
476 | | - |
477 | | - function current() { |
478 | | - return $this->smallArray[$this->curPos % self::MAX_LIMIT]; |
479 | | - } |
480 | | - |
481 | | - function key() { |
482 | | - return $this->curPos; |
483 | | - } |
484 | | - |
485 | | - function next() { |
486 | | - $this->curPos++; |
487 | | - if ( $this->curPos % self::MAX_LIMIT == 0 ) { |
488 | | - # Last item contains the rest unsplitted. |
489 | | - if ($this->limit > 0) { |
490 | | - $this->smallArray = preg_split( $this->pattern, $this->smallArray[self::MAX_LIMIT], $this->effectiveLimit(), $this->flags); |
491 | | - } |
492 | | - } |
493 | | - return; |
494 | | - } |
495 | | - |
496 | | - function valid() { |
497 | | - return $this->curPos % self::MAX_LIMIT < count($this->smallArray); |
498 | | - } |
499 | | -} |
Index: trunk/phase3/RELEASE-NOTES |
— | — | @@ -711,8 +711,6 @@ |
712 | 712 | * (bug 9794) User rights log entries for foreign user now links to the foreign |
713 | 713 | user's page if possible |
714 | 714 | * (bug 14717) Don't load nonexistent CSS fix files for non-Monobook skins |
715 | | -* (bug 18765) Increased consistency of bold-italic markup for unbalanced quotes. |
716 | | - Improved representation of six quotes (may break existing markup). |
717 | 715 | * (bug 22034) Use wfClientAcceptsGzip() in wfGzipHandler instead of |
718 | 716 | reimplementing it. |
719 | 717 | * (bug 19226) First line renders differently on many UI messages. |