r73114 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r73113‎ | r73114 | r73115 >
Date:13:03, 16 September 2010
Author:daniel
Status:deferred
Tags:
Comment:
filter impor tby id list form file
Modified paths:
  • /trunk/extensions/DataTransclusion/ImportMAB2.php (modified) (history)

Diff [purge]

Index: trunk/extensions/DataTransclusion/ImportMAB2.php
@@ -35,6 +35,9 @@
3636
3737 $this->addOption( "multi-record", "read multiple records from a single file. Records may be separated by special lines matching --record-separator; if --record-separator is not given, all records are expected to start with filed number 001.", false, false );
3838 $this->addOption( "record-separator", "regular expression for lines separating records in a multi-record file. Implies --multi-record", false, true );
 39+
 40+ $this->addOption( "id-list-field", "id field to compare against the id list.", false, true );
 41+ $this->addOption( "id-list-file", "list of ids to import.", false, true );
3942 }
4043
4144 public function createTables( ) {
@@ -81,6 +84,10 @@
8285 $this->recordSeparator = $this->getOption( 'record-separator' );
8386 $this->multiRecord = $this->recordSeparator || $this->hasOption( 'multi-record' );
8487
 88+ $this->idListField = $this->getOption( 'id-list-field' );
 89+ $this->idListFile = $this->getOption( 'id-list-file' );
 90+ $this->idList = null;
 91+
8592 $src = $this->mArgs[0];
8693 $dir = $this->mArgs[1];
8794 $this->blob_table = $this->mArgs[2];
@@ -116,10 +123,23 @@
117124 foreach ( $this->source->keyFields as $key ) {
118125 $this->id_map[ $key ] = MAB2RecordTransformer::getMABFields( $key );
119126 if ( !$this->id_map[ $key ] ) {
120 - $this->error( "unknown key field '$key', no MAB fields mapped." );
 127+ $this->error( "unknown key field '$key', no MAB fields mapped.\n" );
121128 }
122129 }
123130
 131+ if ( $this->idListFile ) {
 132+ $this->output( "loading id list from {$this->idListFile}.\n" );
 133+ $this->idList = $this->loadList( $this->idListFile, $this->idListField );
 134+ if ( $this->idList === false ) {
 135+ $this->error( "failed to load id list from {$this->idListFile}.\n" );
 136+ return;
 137+ }
 138+ }
 139+
 140+ if ( $this->idList && $this->idListField ) {
 141+ $this->output( "filtering by {$this->idListField} from {$this->idListFile}.\n" );
 142+ }
 143+
124144 $dir = "php://stdin";
125145
126146 if ( is_dir( $dir ) ) {
@@ -224,6 +244,28 @@
225245 $db->insert( $this->index_table, $insert, __METHOD__, array( 'IGNORE' ) );
226246 }
227247
 248+ public function loadList( $file, $field = null ) {
 249+ $f = fopen( $file, 'r' );
 250+ if ( !$f ) return false;
 251+
 252+ $list = array();
 253+
 254+ while ( true ) {
 255+ $s = fgets( $f );
 256+
 257+ if ( $s === "" || $s === false ) {
 258+ break;
 259+ }
 260+
 261+ $s = trim( $s );
 262+ if ( $field ) $s = $this->source->normalize( $field, $s );
 263+
 264+ $list[] = $s;
 265+ }
 266+
 267+ return $list;
 268+ }
 269+
228270 public function importMabFile( $file ) {
229271 $f = fopen( $file, 'r' );
230272 if ( !$f ) return false;
@@ -283,28 +325,47 @@
284326 if ( $rec ) {
285327 $ids = $this->getIds($rec);
286328
287 - if ( $ids ) {
 329+ if ( !$ids ) {
 330+ $this->output( "skipping part of file $file\n" );
288331 if ( $this->debug ) {
289 - var_export( $ids );
290 - if ( !$this->noblob ) var_export( $rec );
 332+ var_export( $rec );
291333 print "------------------------------------\n";
292 - } else {
293 - $id = false;
294 - foreach ( $this->source->keyFields as $idf ) {
295 - if ( !empty( $ids[ $idf ] ) ) {
296 - $id = "$idf:" . $ids[$idf][0];
 334+ }
 335+
 336+ continue;
 337+ }
 338+
 339+ $id = false;
 340+ foreach ( $this->source->keyFields as $idf ) {
 341+ if ( !empty( $ids[ $idf ] ) ) {
 342+ $id = "$idf:" . $ids[$idf][0];
 343+ }
 344+ }
 345+
 346+ if ( $this->idList && $this->idListField ) {
 347+ $found = false;
 348+ if ( !empty( $ids[ $this->idListField ] ) ) {
 349+ foreach ( $ids[ $this->idListField ] as $v ) {
 350+ if ( in_array( $v, $this->idList ) ) {
 351+ $found = true;
 352+ break;
297353 }
298354 }
 355+ }
299356
300 - $this->output( "importing record $id\n" );
301 - $this->storeRecord($rec, $ids);
 357+ if ( !$found ) {
 358+ $this->output( "ignoring record #$id from file $file\n" );
 359+ continue;
302360 }
 361+ }
 362+
 363+ if ( $this->debug ) {
 364+ var_export( $ids );
 365+ if ( !$this->noblob ) var_export( $rec );
 366+ print "------------------------------------\n";
303367 } else {
304 - $this->output( "skipping record from file $file\n" );
305 - if ( $this->debug ) {
306 - var_export( $rec );
307 - print "------------------------------------\n";
308 - }
 368+ $this->output( "importing record $id\n" );
 369+ $this->storeRecord($rec, $ids);
309370 }
310371 }
311372 }

Status & tagging log