Index: trunk/extensions/Translate/_autoload.php |
— | — | @@ -116,3 +116,4 @@ |
117 | 117 | $wgAutoloadClasses['JavaFFS'] = $dir . 'FFS.php'; |
118 | 118 | $wgAutoloadClasses['YamlFFS'] = $dir . 'FFS.php'; |
119 | 119 | $wgAutoloadClasses['JavaScriptFFS'] = $dir . 'FFS.php'; |
| 120 | +$wgAutoloadClasses['GettextFFS'] = $dir . '/ffs/Gettext.php'; |
Index: trunk/extensions/Translate/ffs/Gettext.php |
— | — | @@ -70,7 +70,7 @@ |
71 | 71 | $matches = array(); |
72 | 72 | if ( preg_match( "/^msgctxt\s($poformat)/mx", $section, $matches ) ) { |
73 | 73 | // Remove quoting |
74 | | - $item['ctxt'] = self::formatForWiki( $matches[1] ); |
| 74 | + $item['ctxt'] = GettextFFS::formatForWiki( $matches[1] ); |
75 | 75 | } elseif ( $useCtxtAsKey ) { |
76 | 76 | // Invalid message |
77 | 77 | continue; |
— | — | @@ -78,7 +78,7 @@ |
79 | 79 | |
80 | 80 | $matches = array(); |
81 | 81 | if ( preg_match( "/^msgid\s($poformat)/mx", $section, $matches ) ) { |
82 | | - $item['id'] = self::formatForWiki( $matches[1] ); |
| 82 | + $item['id'] = GettextFFS::formatForWiki( $matches[1] ); |
83 | 83 | } else { |
84 | 84 | # echo "Definition not found!\n$section"; |
85 | 85 | continue; |
— | — | @@ -88,7 +88,7 @@ |
89 | 89 | $matches = array(); |
90 | 90 | if ( preg_match( "/^msgid_plural\s($poformat)/mx", $section, $matches ) ) { |
91 | 91 | $pluralMessage = true; |
92 | | - $plural = self::formatForWiki( $matches[1] ); |
| 92 | + $plural = GettextFFS::formatForWiki( $matches[1] ); |
93 | 93 | $item['id'] = "{{PLURAL:GETTEXT|{$item['id']}|$plural}}"; |
94 | 94 | } |
95 | 95 | |
— | — | @@ -98,7 +98,7 @@ |
99 | 99 | for ( $i = 0; $i < $pluralForms[1]; $i++ ) { |
100 | 100 | $matches = array(); |
101 | 101 | if ( preg_match( "/^msgstr\[$i\]\s($poformat)/mx", $section, $matches ) ) { |
102 | | - $actualForms[] = self::formatForWiki( $matches[1] ); |
| 102 | + $actualForms[] = GettextFFS::formatForWiki( $matches[1] ); |
103 | 103 | } else { |
104 | 104 | throw new MWException( "Plural not found, expecting $i" ); |
105 | 105 | } |
— | — | @@ -109,7 +109,7 @@ |
110 | 110 | |
111 | 111 | $matches = array(); |
112 | 112 | if ( preg_match( "/^msgstr\s($poformat)/mx", $section, $matches ) ) { |
113 | | - $item['str'] = self::formatForWiki( $matches[1] ); |
| 113 | + $item['str'] = GettextFFS::formatForWiki( $matches[1] ); |
114 | 114 | } else { |
115 | 115 | # echo "Translation not found!\n"; |
116 | 116 | continue; |
— | — | @@ -142,15 +142,7 @@ |
143 | 143 | if ( $useCtxtAsKey ) { |
144 | 144 | $key = $item['ctxt']; |
145 | 145 | } else { |
146 | | - global $wgLegalTitleChars; |
147 | | - $hash = sha1( $item['ctxt'] . $item['id'] ); |
148 | | - $snippet = $item['id']; |
149 | | - $snippet = preg_replace( "/[^$wgLegalTitleChars]/", ' ', $snippet ); |
150 | | - $snippet = preg_replace( "/[:&%\/_]/", ' ', $snippet ); |
151 | | - $snippet = preg_replace( "/ {2,}/", ' ', $snippet ); |
152 | | - $snippet = $lang->truncate( $snippet, 30, '' ); |
153 | | - $snippet = str_replace( ' ', '_', trim( $snippet ) ); |
154 | | - $key = $this->prefix . $hash . '-' . $snippet; |
| 146 | + $key = GettextFFS::generateKeyFromItem( $item ); |
155 | 147 | } |
156 | 148 | |
157 | 149 | $changes[$key] = $item; |
— | — | @@ -160,15 +152,6 @@ |
161 | 153 | return $changes; |
162 | 154 | } |
163 | 155 | |
164 | | - public static function formatForWiki( $data ) { |
165 | | - $quotePattern = '/(^"|"$\n?)/m'; |
166 | | - $data = preg_replace( $quotePattern, '', $data ); |
167 | | - $data = stripcslashes( $data ); |
168 | | - if ( preg_match( '/\s$/', $data ) ) { |
169 | | - $data .= '\\'; |
170 | | - } |
171 | | - return $data; |
172 | | - } |
173 | 156 | |
174 | 157 | public function parseMessages( StringMangler $mangler ) { |
175 | 158 | $defs = $this->parseFile(); |
— | — | @@ -392,4 +375,219 @@ |
393 | 376 | |
394 | 377 | return $splitPlurals; |
395 | 378 | } |
| 379 | +} |
| 380 | + |
| 381 | +class GettextFFS extends SimpleFFS { |
| 382 | + |
| 383 | + // |
| 384 | + // READ |
| 385 | + // |
| 386 | + |
| 387 | + public function readFromVariable( $data ) { |
| 388 | + $authors = $messages = array(); |
| 389 | + |
| 390 | + # Authors first |
| 391 | + $matches = array(); |
| 392 | + preg_match_all( '/^#\s*Author:\s*(.*)$/m', $data, $matches ); |
| 393 | + $authors = $matches[1]; |
| 394 | + |
| 395 | + # Then messages and everything else |
| 396 | + $parsedData = $this->parseGettext( $data ); |
| 397 | + $parsedData['MESSAGES'] = $this->group->getMangler()->mangle( $parsedData['MESSAGES'] ); |
| 398 | + $parsedData['AUTHORS'] = $authors; |
| 399 | + |
| 400 | + return $parsedData; |
| 401 | + } |
| 402 | + |
| 403 | + public function parseGettext( $data ) { |
| 404 | + $data = str_replace( "\r\n", "\n", $data ); |
| 405 | + $messages = $template = $metadata = array(); |
| 406 | + |
| 407 | + // Defined only once. Be sure to *not* use it without match, or you might get old data |
| 408 | + $matches = array(); |
| 409 | + |
| 410 | + if ( preg_match( '/X-Language-Code:\s+([a-zA-Z-_]+)/', $data, $matches ) ) { |
| 411 | + $metadata['code'] = $matches[1]; |
| 412 | + } |
| 413 | + |
| 414 | + if ( preg_match( '/X-Message-Group:\s+([a-zA-Z0-9-_]+)/', $data, $matches ) ) { |
| 415 | + $metadata['group'] = $matches[1]; |
| 416 | + } |
| 417 | + |
| 418 | + $pluralForms = false; |
| 419 | + if ( preg_match( '/Plural-Forms:\s+nplurals=([0-9]+).*;/', $data, $matches ) ) { |
| 420 | + $metadata['plurals'] = $matches; |
| 421 | + $pluralForms = $matches; |
| 422 | + } |
| 423 | + |
| 424 | + $useCtxtAsKey = isset($this->extra['CtxtAsKey']) && $this->extra['CtxtAsKey']; |
| 425 | + |
| 426 | + $poformat = '".*"\n?(^".*"$\n?)*'; |
| 427 | + $quotePattern = '/(^"|"$\n?)/m'; |
| 428 | + |
| 429 | + $sections = preg_split( '/\n{2,}/', $data ); |
| 430 | + array_shift( $sections ); // First isn't an actual message |
| 431 | + |
| 432 | + foreach ( $sections as $section ) { |
| 433 | + if ( trim( $section ) === '' ) continue; |
| 434 | + |
| 435 | + $item = array( |
| 436 | + 'ctxt' => '', |
| 437 | + 'id' => '', |
| 438 | + 'str' => '', |
| 439 | + 'flags' => array(), |
| 440 | + 'comments' => array(), |
| 441 | + ); |
| 442 | + |
| 443 | + $matches = array(); |
| 444 | + if ( preg_match( "/^msgid\s($poformat)/mx", $section, $matches ) ) { |
| 445 | + $item['id'] = self::formatForWiki( $matches[1] ); |
| 446 | + } else { |
| 447 | + throw new MWException( "Unable to parse msgid:\n\n$section" ); |
| 448 | + } |
| 449 | + |
| 450 | + if ( preg_match( "/^msgctxt\s($poformat)/mx", $section, $matches ) ) { |
| 451 | + $item['ctxt'] = self::formatForWiki( $matches[1] ); |
| 452 | + } elseif ( $useCtxtAsKey ) { // Invalid message |
| 453 | + $metadata['warnings'][] = "Ctxt missing for {$item['id']}"; |
| 454 | + } |
| 455 | + |
| 456 | + |
| 457 | + $pluralMessage = false; |
| 458 | + if ( preg_match( "/^msgid_plural\s($poformat)/mx", $section, $matches ) ) { |
| 459 | + $pluralMessage = true; |
| 460 | + $plural = self::formatForWiki( $matches[1] ); |
| 461 | + $item['id'] = "{{PLURAL:GETTEXT|{$item['id']}|$plural}}"; |
| 462 | + } |
| 463 | + |
| 464 | + if ( $pluralMessage ) { |
| 465 | + |
| 466 | + $actualForms = array(); |
| 467 | + for ( $i = 0; $i < $pluralForms[1]; $i++ ) { |
| 468 | + if ( preg_match( "/^msgstr\[$i\]\s($poformat)/mx", $section, $matches ) ) { |
| 469 | + $actualForms[] = self::formatForWiki( $matches[1] ); |
| 470 | + } else { |
| 471 | + throw new MWException( "Plural not found, expecting $i" ); |
| 472 | + } |
| 473 | + } |
| 474 | + |
| 475 | + $item['str'] = '{{PLURAL:GETTEXT|' . implode( '|', $actualForms ) . '}}'; |
| 476 | + } else { |
| 477 | + |
| 478 | + $matches = array(); |
| 479 | + if ( preg_match( "/^msgstr\s($poformat)/mx", $section, $matches ) ) { |
| 480 | + $item['str'] = self::formatForWiki( $matches[1] ); |
| 481 | + } else { |
| 482 | + throw new MWException( "Unable to parse msgstr:\n\n$section" ); |
| 483 | + } |
| 484 | + } |
| 485 | + |
| 486 | + // Parse flags |
| 487 | + $matches = array(); |
| 488 | + if ( preg_match( '/^#,(.*)$/mu', $section, $matches ) ) { |
| 489 | + $flags = array_map( 'trim', explode( ',', $matches[1] ) ); |
| 490 | + foreach ( $flags as $key => $flag ) { |
| 491 | + if ( $flag === 'fuzzy' ) { |
| 492 | + $item['str'] = TRANSLATE_FUZZY . $item['str']; |
| 493 | + unset( $flags[$key] ); |
| 494 | + } |
| 495 | + } |
| 496 | + $item['flags'] = $flags; |
| 497 | + } |
| 498 | + |
| 499 | + // Rest of the comments |
| 500 | + $matches = array(); |
| 501 | + if ( preg_match_all( '/^#(.?) (.*)$/m', $section, $matches, PREG_SET_ORDER ) ) { |
| 502 | + foreach ( $matches as $match ) { |
| 503 | + if ( $match[1] !== ',' ) { |
| 504 | + $item['comments'][$match[1]][] = $match[2]; |
| 505 | + } |
| 506 | + } |
| 507 | + } |
| 508 | + |
| 509 | + if ( $useCtxtAsKey ) { |
| 510 | + $key = $item['ctxt']; |
| 511 | + } else { |
| 512 | + $key = self::generateKeyFromItem( $item ); |
| 513 | + } |
| 514 | + |
| 515 | + $messages[$key] = $item['str']; |
| 516 | + $template[$key] = $item; |
| 517 | + |
| 518 | + } |
| 519 | + |
| 520 | + return array( |
| 521 | + 'MESSAGES' => $messages, |
| 522 | + 'TEMPLATE' => $template, |
| 523 | + 'METADATA' => $metadata, |
| 524 | + ); |
| 525 | + } |
| 526 | + |
| 527 | + public static function generateKeyFromItem( $item ) { |
| 528 | + $lang = Language::factory( 'en' ); |
| 529 | + global $wgLegalTitleChars; |
| 530 | + $hash = sha1( $item['ctxt'] . $item['id'] ); |
| 531 | + $snippet = $item['id']; |
| 532 | + $snippet = preg_replace( "/[^$wgLegalTitleChars]/", ' ', $snippet ); |
| 533 | + $snippet = preg_replace( "/[:&%\/_]/", ' ', $snippet ); |
| 534 | + $snippet = preg_replace( "/ {2,}/", ' ', $snippet ); |
| 535 | + $snippet = $lang->truncate( $snippet, 30, '' ); |
| 536 | + $snippet = str_replace( ' ', '_', trim( $snippet ) ); |
| 537 | + return "$hash-$snippet"; |
| 538 | + } |
| 539 | + |
| 540 | + public static function formatForWiki( $data ) { |
| 541 | + $quotePattern = '/(^"|"$\n?)/m'; |
| 542 | + $data = preg_replace( $quotePattern, '', $data ); |
| 543 | + $data = stripcslashes( $data ); |
| 544 | + if ( preg_match( '/\s$/', $data ) ) { |
| 545 | + $data .= '\\'; |
| 546 | + } |
| 547 | + return $data; |
| 548 | + } |
| 549 | + |
| 550 | + // |
| 551 | + // WRITE |
| 552 | + // |
| 553 | + |
| 554 | + protected function writeReal( MessageCollection $collection ) { |
| 555 | + throw new MWException( 'Not implemented' ); |
| 556 | + $output = $this->doHeader( $collection ); |
| 557 | + $output .= $this->doAuthors( $collection ); |
| 558 | + |
| 559 | + $mangler = $this->group->getMangler(); |
| 560 | + |
| 561 | + $messages = array(); |
| 562 | + foreach ( $collection as $key => $m ) { |
| 563 | + $key = $mangler->unmangle( $key ); |
| 564 | + $value = $m->translation(); |
| 565 | + $value = str_replace( TRANSLATE_FUZZY, '', $value ); |
| 566 | + if ( $value === '' ) continue; |
| 567 | + |
| 568 | + $messages[$key] = $value; |
| 569 | + } |
| 570 | + $output .= TranslateSpyc::dump( $messages ); |
| 571 | + return $output; |
| 572 | + } |
| 573 | + |
| 574 | + protected function doHeader( MessageCollection $collection ) { |
| 575 | + global $wgSitename; |
| 576 | + $code = $collection->code; |
| 577 | + $name = TranslateUtils::getLanguageName( $code ); |
| 578 | + $native = TranslateUtils::getLanguageName( $code, true ); |
| 579 | + $output = "# Messages for $name ($native)\n"; |
| 580 | + $output .= "# Exported from $wgSitename\n"; |
| 581 | + return $output; |
| 582 | + } |
| 583 | + |
| 584 | + protected function doAuthors( MessageCollection $collection ) { |
| 585 | + $output = ''; |
| 586 | + $authors = $collection->getAuthors(); |
| 587 | + $authors = $this->filterAuthors( $authors, $collection->code ); |
| 588 | + foreach ( $authors as $author ) { |
| 589 | + $output .= "# Author: $author\n"; |
| 590 | + } |
| 591 | + return $output; |
| 592 | + } |
| 593 | + |
396 | 594 | } |
\ No newline at end of file |