r110215 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r110214‎ | r110215 | r110216 >
Date:16:56, 28 January 2012
Author:nikerabbit
Status:resolved (Comments)
Tags:i18nreview, miscextensions 
Comment:
Bootstrap script for tm
Modified paths:
  • /trunk/extensions/Translate/scripts/ttmserver-export.php (added) (history)

Diff [purge]

Index: trunk/extensions/Translate/scripts/ttmserver-export.php
@@ -0,0 +1,201 @@
 2+<?php
 3+/**
 4+ * Script to bootstrap TTMServer translation memory
 5+ *
 6+ * @author Niklas Laxström
 7+ *
 8+ * @copyright Copyright © 2010-2012, Niklas Laxström
 9+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
 10+ * @file
 11+ */
 12+
 13+// Standard boilerplate to define $IP
 14+if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
 15+ $IP = getenv( 'MW_INSTALL_PATH' );
 16+} else {
 17+ $dir = dirname( __FILE__ ); $IP = "$dir/../../..";
 18+}
 19+require_once( "$IP/maintenance/Maintenance.php" );
 20+
 21+/**
 22+ * Script to bootstrap translatetoolkit translation memory.
 23+ * @since 2012-01-26
 24+ */
 25+class TTMServerBootstrap extends Maintenance {
 26+ public function __construct() {
 27+ parent::__construct();
 28+ $this->mDescription = 'Script to bootstrap TTMServer';
 29+ $this->addOption( 'threads', 'Number of threads', /*required*/false, /*has arg*/true );
 30+ $this->setBatchSize( 100 );
 31+ $this->start = microtime( true );
 32+ }
 33+
 34+ protected function statusLine( $text, $channel = null ) {
 35+ $pid = sprintf( "%5s", getmypid() );
 36+ $prefix = sprintf( "%6.2f", microtime( true ) - $this->start );
 37+ $mem = sprintf( "%5.1fM", ( memory_get_usage( true ) / (1024*1024) ) );
 38+ $this->output( "$pid $prefix $mem $text", $channel );
 39+ }
 40+
 41+ public function execute() {
 42+ $server = TTMServer::primary();
 43+ if ( $server instanceof FakeTTMServer ) {
 44+ $this->error( "Translation memory is not configured properly" );
 45+ $this->exit();
 46+ }
 47+
 48+ $dbw = $server->getDB( DB_MASTER );
 49+
 50+ $this->statusLine( 'Deleting sources.. ', 1 );
 51+ $dbw->delete( 'translate_tms', '*', __METHOD__ );
 52+ $this->output( 'translations.. ', 1 );
 53+ $dbw->delete( 'translate_tmt', '*', __METHOD__ );
 54+ $this->output( 'fulltext.. ', 1 );
 55+ $dbw->delete( 'translate_tmf', '*', __METHOD__ );
 56+ $table = $dbw->tableName( 'translate_tmf' );
 57+ #$dbw->query( "DROP INDEX tmf_text ON $table" );
 58+ $this->output( 'done!', 1 );
 59+
 60+ $this->statusLine( 'Loading groups... ', 2 );
 61+ $groups = MessageGroups::singleton()->getGroups();
 62+ $this->output( 'done!', 2 );
 63+
 64+
 65+ $threads = $this->getOption( 'threads', 1 );
 66+ $pids = array();
 67+
 68+ foreach ( $groups as $id => $group ) {
 69+ if ( $group->isMeta() ) {
 70+ continue;
 71+ }
 72+
 73+ // Fork to avoid unbounded memory usage growth
 74+ $pid = pcntl_fork();
 75+
 76+ if ( $pid === 0 ) {
 77+ // Child, reseed because there is no bug in PHP:
 78+ // http://bugs.php.net/bug.php?id=42465
 79+ mt_srand( getmypid() );
 80+ $this->exportGroup( $group, $threads > 1 );
 81+ exit();
 82+ } elseif ( $pid === -1 ) {
 83+ // Fork failed do it serialized
 84+ $this->exportGroup( $group );
 85+ } else {
 86+ $this->statusLine( "Forked thread $pid to handle $id\n" );
 87+ $pids[$pid] = true;
 88+
 89+ // If we hit the thread limit, wait for any child to finish.
 90+ if ( count( $pids ) >= $threads ) {
 91+ $status = 0;
 92+ $pid = pcntl_wait( $status );
 93+ unset( $pids[$pid] );
 94+ }
 95+ }
 96+ }
 97+
 98+ // Return control after all threads have finished.
 99+ foreach ( array_keys( $pids ) as $pid ) {
 100+ $status = 0;
 101+ pcntl_waitpid( $pid, $status );
 102+ }
 103+
 104+ $this->statusLine( 'Adding fulltext index...', 9 );
 105+ $table = $dbw->tableName( 'translate_tmf' );
 106+ $dbw->query( "CREATE FULLTEXT INDEX tmf_text ON $table (tmf_text)" );
 107+ $this->output( ' done!', 9 );
 108+ }
 109+
 110+ protected function exportGroup( MessageGroup $group, $multi = false ) {
 111+ // Make sure all existing connections are dead,
 112+ // we can't use them in forked children.
 113+ LBFactory::destroyInstance();
 114+ $server = TTMServer::primary();
 115+
 116+ $id = $group->getId();
 117+ $sourceLanguage = $group->getSourceLanguage();
 118+
 119+ if ( $multi ) {
 120+ $stats = MessageGroupStats::forGroup( $id );
 121+ $this->statusLine( "Loaded stats for $id\n" );
 122+ } else {
 123+ $this->statusLine( "Loading stats... ", 4 );
 124+ $stats = MessageGroupStats::forGroup( $id );
 125+ $this->output( "done!", 4 );
 126+ $this->statusLine( "Inserting sources: ", 5 );
 127+ }
 128+
 129+ $collection = $group->initCollection( $sourceLanguage );
 130+ $collection->filter( 'ignored' );
 131+ $collection->filter( 'optional' );
 132+ $collection->initMessages();
 133+
 134+ $sids = array();
 135+ $counter = 0;
 136+
 137+ foreach ( $collection->keys() as $mkey => $title ) {
 138+ $def = $collection[$mkey]->definition();
 139+ $sids[$mkey] = $server->insertSource( $title, $sourceLanguage, $def );
 140+ if ( ++$counter % $this->mBatchSize === 0 && !$multi ) {
 141+ wfWaitForSlaves( 10 );
 142+ $this->output( '.', 5 );
 143+ }
 144+ }
 145+
 146+ $total = count( $sids );
 147+ if ( $multi ) {
 148+ $this->statusLine( "Inserted $total source entries for $id\n" );
 149+ } else {
 150+ $this->output( "$total entries", 5 );
 151+ $this->statusLine( "Inserting translations...", 6 );
 152+ }
 153+
 154+ $dbw = $server->getDB( DB_MASTER );
 155+
 156+ foreach ( $stats as $targetLanguage => $numbers ) {
 157+ if ( $targetLanguage === $sourceLanguage ) {
 158+ continue;
 159+ }
 160+ if ( $numbers[MessageGroupStats::TRANSLATED] === 0 ) {
 161+ continue;
 162+ }
 163+
 164+ if ( !$multi ) {
 165+ $this->output( sprintf( "%19s ", $targetLanguage ), $targetLanguage );
 166+ }
 167+
 168+ $collection->resetForNewLanguage( $targetLanguage );
 169+ $collection->filter( 'ignored' );
 170+ $collection->filter( 'optional' );
 171+ $collection->filter( 'translated', false );
 172+ $collection->loadTranslations();
 173+
 174+ $inserts = array();
 175+ foreach ( $collection->keys() as $mkey => $title ) {
 176+ $inserts[] = array(
 177+ 'tmt_sid' => $sids[$mkey],
 178+ 'tmt_lang' => $targetLanguage,
 179+ 'tmt_text' => $collection[$mkey]->translation()
 180+ );
 181+ }
 182+
 183+ do {
 184+ $batch = array_splice( $inserts, 0, $this->mBatchSize );
 185+ $dbw->insert( 'translate_tmt', $batch, __METHOD__ );
 186+
 187+ if ( !$multi ) {
 188+ $this->output( '.', $targetLanguage );
 189+ }
 190+ wfWaitForSlaves( 10 );
 191+ } while( count( $inserts ) );
 192+ }
 193+
 194+ if ( $multi ) {
 195+ $this->statusLine( "Inserted translations for $id\n" );
 196+ }
 197+ }
 198+
 199+}
 200+
 201+$maintClass = 'TTMServerBootstrap';
 202+require_once( RUN_MAINTENANCE_IF_MAIN );
Property changes on: trunk/extensions/Translate/scripts/ttmserver-export.php
___________________________________________________________________
Added: svn:eol-style
1203 + native

Follow-up revisions

RevisionCommit summaryAuthorDate
r110472Fix two issues reported in CR of r110215nikerabbit10:17, 1 February 2012

Comments

#Comment by Santhosh.thottingal (talk | contribs)   05:04, 1 February 2012

PHP Fatal error: Call to undefined method TTMServerBootstrap::exit() in /path/extensions/Translate/scripts/ttmserver-export.php on line 44

#Comment by Santhosh.thottingal (talk | contribs)   05:47, 1 February 2012

When we run maintenance/update.php, the index tmf_text is created. So while running this ttmserver-export.php, we will get error about duplication.

Query: CREATE FULLTEXT INDEX tmf_text ON `testwiki_translate_tmf` (tmf_text) Function: Error: 1061 Duplicate key name 'tmf_text' (localhost)

Status & tagging log