Index: trunk/phase3/languages/LanguageConverter.php |
— | — | @@ -300,34 +300,6 @@ |
301 | 301 | } |
302 | 302 | |
303 | 303 | /** |
304 | | - * Caption convert, base on preg_replace_callback. |
305 | | - * |
306 | | - * To convert text in "title" or "alt", like '<img alt="text" ... ' |
307 | | - * or '<span title="text" ... ' |
308 | | - * |
309 | | - * @return String like ' alt="yyyy"' or ' title="yyyy"' |
310 | | - */ |
311 | | - protected function captionConvert( $matches ) { |
312 | | - // TODO: cache the preferred variant in every autoConvert() process, |
313 | | - // this helps improve performance in a way. |
314 | | - $toVariant = $this->getPreferredVariant(); |
315 | | - $title = $matches[1]; |
316 | | - $text = $matches[2]; |
317 | | - |
318 | | - // we convert captions except URL |
319 | | - if ( !strpos( $text, '://' ) ) { |
320 | | - $text = $this->translate( $text, $toVariant ); |
321 | | - } |
322 | | - |
323 | | - // remove HTML tags to prevent disrupting the layout |
324 | | - $text = preg_replace( '/<[^>]+>/', '', $text ); |
325 | | - // escape HTML special chars to prevent disrupting the layout |
326 | | - $text = htmlspecialchars( $text ); |
327 | | - |
328 | | - return " {$title}=\"{$text}\""; |
329 | | - } |
330 | | - |
331 | | - /** |
332 | 304 | * Dictionary-based conversion. |
333 | 305 | * This function would not parse the conversion rules. |
334 | 306 | * If you want to parse rules, try to use convert() or |
— | — | @@ -374,41 +346,75 @@ |
375 | 347 | |
376 | 348 | $reg = '/' . $codefix . $scriptfix . $prefix . |
377 | 349 | '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; |
| 350 | + $startPos = 0; |
| 351 | + $sourceBlob = ''; |
| 352 | + $literalBlob = ''; |
378 | 353 | |
379 | | - $matches = preg_split( $reg, $text, - 1, PREG_SPLIT_OFFSET_CAPTURE ); |
| 354 | + // Guard against delimiter nulls in the input |
| 355 | + $text = str_replace( "\000", '', $text ); |
380 | 356 | |
381 | | - $m = array_shift( $matches ); |
| 357 | + while ( $startPos < strlen( $text ) ) { |
| 358 | + if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { |
| 359 | + $elementPos = $markupMatches[0][1]; |
| 360 | + $element = $markupMatches[0][0]; |
| 361 | + } else { |
| 362 | + $elementPos = strlen( $text ); |
| 363 | + $element = ''; |
| 364 | + } |
382 | 365 | |
383 | | - $ret = $this->translate( $m[0], $toVariant ); |
384 | | - $mstart = $m[1] + strlen( $m[0] ); |
| 366 | + // Queue the part before the markup for translation in a batch |
| 367 | + $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; |
385 | 368 | |
386 | | - // enable convertsion of '<img alt="xxxx" ... ' |
387 | | - // or '<span title="xxxx" ... ' |
388 | | - $captionpattern = '/\s(title|alt)\s*=\s*"([\s\S]*?)"/'; |
| 369 | + // Advance to the next position |
| 370 | + $startPos = $elementPos + strlen( $element ); |
389 | 371 | |
390 | | - $trtext = ''; |
391 | | - $trtextmark = "\0"; |
392 | | - $notrtext = array(); |
393 | | - foreach ( $matches as $m ) { |
394 | | - $mark = substr( $text, $mstart, $m[1] - $mstart ); |
395 | | - $mark = preg_replace_callback( $captionpattern, |
396 | | - array( &$this, 'captionConvert' ), |
397 | | - $mark ); |
398 | | - // Let's convert the trtext only once, |
399 | | - // it would give us more performance improvement |
400 | | - $notrtext[] = $mark; |
401 | | - $trtext .= $m[0] . $trtextmark; |
402 | | - $mstart = $m[1] + strlen( $m[0] ); |
| 372 | + // Translate any alt or title attributes inside the matched element |
| 373 | + if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, |
| 374 | + $elementMatches ) ) |
| 375 | + { |
| 376 | + $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); |
| 377 | + $changed = false; |
| 378 | + foreach ( array( 'title', 'alt' ) as $attrName ) { |
| 379 | + if ( !isset( $attrs[$attrName] ) ) { |
| 380 | + continue; |
| 381 | + } |
| 382 | + $attr = $attrs[$attrName]; |
| 383 | + // Don't convert URLs |
| 384 | + if ( !strpos( $attr, '://' ) ) { |
| 385 | + $attr = $this->translate( $attr, $toVariant ); |
| 386 | + } |
| 387 | + |
| 388 | + // Remove HTML tags to avoid disrupting the layout |
| 389 | + $attr = preg_replace( '/<[^>]+>/', '', $attr ); |
| 390 | + if ( $attr !== $attrs[$attrName] ) { |
| 391 | + $attrs[$attrName] = $attr; |
| 392 | + $changed = true; |
| 393 | + } |
| 394 | + } |
| 395 | + if ( $changed ) { |
| 396 | + $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . |
| 397 | + $elementMatches[3]; |
| 398 | + } |
| 399 | + } |
| 400 | + $literalBlob .= $element . "\000"; |
403 | 401 | } |
404 | | - $notrtext[] = ''; |
405 | | - $trtext = $this->translate( $trtext, $toVariant ); |
406 | | - $trtext = StringUtils::explode( $trtextmark, $trtext ); |
407 | | - foreach ( $trtext as $t ) { |
408 | | - $ret .= array_shift( $notrtext ); |
409 | | - $ret .= $t; |
| 402 | + |
| 403 | + // Do the main translation batch |
| 404 | + $translatedBlob = $this->translate( $sourceBlob, $toVariant ); |
| 405 | + |
| 406 | + // Put the output back together |
| 407 | + $translatedIter = StringUtils::explode( "\000", $translatedBlob ); |
| 408 | + $literalIter = StringUtils::explode( "\000", $literalBlob ); |
| 409 | + $output = ''; |
| 410 | + while ( $translatedIter->valid() && $literalIter->valid() ) { |
| 411 | + $output .= $translatedIter->current(); |
| 412 | + $output .= $literalIter->current(); |
| 413 | + $translatedIter->next(); |
| 414 | + $literalIter->next(); |
410 | 415 | } |
| 416 | + |
411 | 417 | wfProfileOut( __METHOD__ ); |
412 | | - return $ret; |
| 418 | + return $output; |
413 | 419 | } |
414 | 420 | |
415 | 421 | /** |