Index: trunk/extensions/OAI/oaiUpdate.php |
— | — | @@ -6,6 +6,7 @@ |
7 | 7 | #dl( '/usr/lib/php/extensions/no-debug-non-zts-20020429/domxml.so' ); |
8 | 8 | } |
9 | 9 | |
| 10 | +$options = array( 'dry-run' ); |
10 | 11 | require_once( 'commandLine.inc' ); |
11 | 12 | #require_once( 'extensions/OAI/OAIHarvest.php' ); |
12 | 13 | |
— | — | @@ -18,12 +19,21 @@ |
19 | 20 | $harvester = new OAIHarvester( $oaiSourceRepository ); |
20 | 21 | |
21 | 22 | $dbr =& wfGetDB( DB_SLAVE ); |
22 | | -$lastUpdate = wfTimestamp( TS_MW, $dbr->selectField( 'cur', 'MAX(cur_timestamp)' ) ); |
| 23 | +$highest = $dbr->selectField( 'cur', 'MAX(cur_timestamp)' ); |
| 24 | +if( $highest ) { |
| 25 | + $lastUpdate = wfTimestamp( TS_MW, $highest ); |
| 26 | +} else { |
| 27 | + # Starting from an empty database! |
| 28 | + $lastUpdate = '19700101000000'; |
| 29 | +} |
23 | 30 | |
24 | 31 | $callback = 'showUpdates'; |
25 | 32 | function showUpdates( $record ) { |
| 33 | + global $options; |
26 | 34 | $record->dump(); |
27 | | - $record->apply(); |
| 35 | + if( !isset( $options['dry-run'] ) ) { |
| 36 | + $record->apply(); |
| 37 | + } |
28 | 38 | } |
29 | 39 | |
30 | 40 | $result = $harvester->listUpdates( $lastUpdate, $callback ); |
Index: trunk/extensions/OAI/OAIRepo.php |
— | — | @@ -553,8 +553,8 @@ |
554 | 554 | 'namespace' => 'http://www.openarchives.org/OAI/2.0/oai_dc/', |
555 | 555 | 'schema' => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd' ), |
556 | 556 | 'mediawiki' => array( |
557 | | - 'namespace' => 'http://www.mediawiki.org/xml/export-0.1/', |
558 | | - 'schema' => 'http://www.mediawiki.org/xml/export-0.1.xsd' ) ); |
| 557 | + 'namespace' => 'http://www.mediawiki.org/xml/export-0.2/', |
| 558 | + 'schema' => 'http://www.mediawiki.org/xml/export-0.2.xsd' ) ); |
559 | 559 | } |
560 | 560 | |
561 | 561 | } |
— | — | @@ -691,11 +691,11 @@ |
692 | 692 | global $wgContLanguageCode; |
693 | 693 | $title = Title::makeTitle( $this->_row->namespace, $this->_row->title ); |
694 | 694 | $out = oaiTag( 'mediawiki', array( |
695 | | - 'xmlns' => 'http://www.mediawiki.org/xml/export-0.1/', |
| 695 | + 'xmlns' => 'http://www.mediawiki.org/xml/export-0.2/', |
696 | 696 | 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', |
697 | | - 'xsi:schemaLocation' => 'http://www.mediawiki.org/xml/export-0.1/ ' . |
698 | | - 'http://www.mediawiki.org/xml/export-0.1.xsd', |
699 | | - 'version' => '0.1', |
| 697 | + 'xsi:schemaLocation' => 'http://www.mediawiki.org/xml/export-0.2/ ' . |
| 698 | + 'http://www.mediawiki.org/xml/export-0.2.xsd', |
| 699 | + 'version' => '0.2', |
700 | 700 | 'xml:lang' => $wgContLanguageCode ) ) . "\n"; |
701 | 701 | $out .= "<page>\n"; |
702 | 702 | $out .= oaiTag( 'title', array(), $title->getPrefixedText() ) . "\n"; |
— | — | @@ -704,11 +704,52 @@ |
705 | 705 | $out .= oaiTag( 'restrictions', array(), $this->_row->restrictions ) . "\n"; |
706 | 706 | } |
707 | 707 | $out .= revision2xml( $this->_row, true, true ); |
| 708 | + if( $title->getNamespace() == NS_IMAGE ) { |
| 709 | + $out .= $this->renderUpload(); |
| 710 | + } |
708 | 711 | $out .= "</page>\n"; |
709 | 712 | $out .= "</mediawiki>\n"; |
710 | 713 | return $out; |
711 | 714 | } |
712 | 715 | |
| 716 | + function renderUpload() { |
| 717 | + $fname = 'WikiOAIRecord::renderUpload'; |
| 718 | + $db =& wfGetDB( DB_SLAVE ); |
| 719 | + $imageRow = $db->selectRow( 'image', |
| 720 | + array( 'img_name', 'img_size', 'img_description', |
| 721 | + 'img_user', 'img_user_text', 'img_timestamp' ), |
| 722 | + array( 'img_name' => $this->_row->title ), |
| 723 | + $fname ); |
| 724 | + if( $imageRow ) { |
| 725 | + $url = Image::wfImageUrl( $imageRow->img_name ); |
| 726 | + if( $url{0} == '/' ) { |
| 727 | + global $wgServer; |
| 728 | + $url = $wgServer . $url; |
| 729 | + } |
| 730 | + return implode( "\n", array( |
| 731 | + "<upload>", |
| 732 | + oaiTag( 'timestamp', array(), wfTimestamp2ISO8601( $imageRow->img_timestamp ) ), |
| 733 | + $this->renderContributor( $imageRow->img_user, $imageRow->img_user_text ), |
| 734 | + oaiTag( 'comment', array(), $imageRow->img_description ), |
| 735 | + oaiTag( 'filename', array(), $imageRow->img_name ), |
| 736 | + oaiTag( 'src', array(), $url ), |
| 737 | + oaiTag( 'size', array(), $imageRow->img_size ), |
| 738 | + "</upload>\n" ) ); |
| 739 | + } else { |
| 740 | + return ''; |
| 741 | + } |
| 742 | + } |
| 743 | + |
| 744 | + function renderContributor( $id, $text ) { |
| 745 | + if( $id ) { |
| 746 | + $tag = oaiTag( 'username', array(), $text ) . |
| 747 | + oaiTag( 'id', array(), $id ); |
| 748 | + } else { |
| 749 | + $tag = oaiTag( 'ip', array(), $text ); |
| 750 | + } |
| 751 | + return '<contributor>' . $tag . '</contributor>'; |
| 752 | + } |
| 753 | + |
713 | 754 | } |
714 | 755 | |
715 | 756 | function oaiUpdatePage( $id, $action ) { |
Index: trunk/extensions/OAI/OAIHarvest.php |
— | — | @@ -224,6 +224,7 @@ |
225 | 225 | } |
226 | 226 | |
227 | 227 | $uagent = ini_set( 'user_agent', $this->userAgent() ); |
| 228 | + echo "Fetching: $url\n"; |
228 | 229 | $result = file_get_contents( $url ); |
229 | 230 | ini_set( 'user_agent', $uagent ); |
230 | 231 | |
— | — | @@ -284,8 +285,7 @@ |
285 | 286 | return Title::newFromText( $this->_page['title'] ); |
286 | 287 | } |
287 | 288 | |
288 | | - function getTimestamp() { |
289 | | - $time = $this->_page['revisions'][0]['timestamp']; |
| 289 | + function getTimestamp( $time ) { |
290 | 290 | if( preg_match( '/^(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)Z$/', $time, $matches ) ) { |
291 | 291 | return wfTimestamp( TS_MW, |
292 | 292 | $matches[1] . $matches[2] . $matches[3] . |
— | — | @@ -302,7 +302,7 @@ |
303 | 303 | $title = $this->getTitle(); |
304 | 304 | if( $title ) { |
305 | 305 | printf( "%s %10d [[%s]]\n", |
306 | | - $this->getTimestamp(), |
| 306 | + $this->getTimestamp( $this->_page['revisions'][0]['timestamp'] ), |
307 | 307 | $this->getArticleId(), |
308 | 308 | $title->getPrefixedText() ); |
309 | 309 | } else { |
— | — | @@ -314,8 +314,6 @@ |
315 | 315 | } |
316 | 316 | |
317 | 317 | function apply() { |
318 | | - $fname = 'OAIUpdateRecord::apply'; |
319 | | - |
320 | 318 | if( $this->isDeleted() ) { |
321 | 319 | return $this->doDelete(); |
322 | 320 | } |
— | — | @@ -328,9 +326,27 @@ |
329 | 327 | $this->_page['title'] ) ); |
330 | 328 | } |
331 | 329 | |
| 330 | + $id = 0; |
| 331 | + foreach( $this->_page['revisions'] as $revision ) { |
| 332 | + $id = $this->applyRevision( $revision ); |
| 333 | + } |
| 334 | + |
| 335 | + fixLinksFromArticle( $id ); |
| 336 | + |
| 337 | + foreach( $this->_page['uploads'] as $upload ) { |
| 338 | + if( OAIError::isError( $err = $this->applyUpload( $upload ) ) ) |
| 339 | + return $err; |
| 340 | + } |
| 341 | + |
| 342 | + return true; |
| 343 | + } |
| 344 | + |
| 345 | + function applyRevision( $revision ) { |
| 346 | + $fname = 'OAIUpdateRecord::applyRevision'; |
| 347 | + |
| 348 | + $title = $this->getTitle(); |
332 | 349 | $id = $this->getArticleId(); |
333 | | - $timestamp = $this->getTimestamp(); |
334 | | - $revision = $this->_page['revisions'][0]; |
| 350 | + $timestamp = $this->getTimestamp( $revision['timestamp'] ); |
335 | 351 | |
336 | 352 | $dbw =& wfGetDB( DB_WRITE ); |
337 | 353 | $dbw->begin(); |
— | — | @@ -388,8 +404,84 @@ |
389 | 405 | } |
390 | 406 | $dbw->commit(); |
391 | 407 | |
392 | | - fixLinksFromArticle( $id ); |
| 408 | + return $id; |
| 409 | + } |
| 410 | + |
| 411 | + function applyUpload( $upload ) { |
| 412 | + $fname = 'WikiOAIUpdate::applyUpload'; |
393 | 413 | |
| 414 | + # FIXME: validate these files... |
| 415 | + if( strpos( $upload['filename'], '/' ) !== false |
| 416 | + || strpos( $upload['filename'], '\\' ) !== false |
| 417 | + || $upload['filename'] == '' |
| 418 | + || $upload['filename'] !== trim( $upload['filename'] ) ) { |
| 419 | + return new OAIError( 'Invalid filename "' . $upload['filename'] . '"' ); |
| 420 | + } |
| 421 | + |
| 422 | + $dbw =& wfGetDB( DB_MASTER ); |
| 423 | + $data = array( |
| 424 | + 'img_name' => $upload['filename'], |
| 425 | + 'img_size' => IntVal( $upload['size'] ), |
| 426 | + 'img_description' => $upload['comment'], |
| 427 | + 'img_user' => IntVal( $upload['contributor']['id'] ), |
| 428 | + 'img_user_text' => $upload['contributor']['username'], |
| 429 | + 'img_timestamp' => $dbw->timestamp( $this->getTimestamp( $upload['timestamp'] ) ) ); |
| 430 | + |
| 431 | + $dbw->begin(); |
| 432 | + echo "REPLACING image row\n"; |
| 433 | + $dbw->replace( 'image', array( 'img_name' ), $data, $fname ); |
| 434 | + $dbw->commit(); |
| 435 | + |
| 436 | + return $this->downloadUpload( $upload ); |
| 437 | + } |
| 438 | + |
| 439 | + function downloadUpload( $upload ) { |
| 440 | + global $wgDisableUploads; |
| 441 | + if( $wgDisableUploads ) { |
| 442 | + echo "Uploads disabled locally: NOT fetching URL '" . |
| 443 | + $upload['src'] . "'.\n"; |
| 444 | + return true; |
| 445 | + } |
| 446 | + |
| 447 | + # We assume the filename has already been validated by code above us. |
| 448 | + $filename = wfImageDir( $upload['filename'] ) . '/' . $upload['filename']; |
| 449 | + |
| 450 | + $timestamp = wfTimestamp( TS_UNIX, $this->getTimestamp( $upload['timestamp'] ) ); |
| 451 | + if( file_exists( $filename ) |
| 452 | + && filemtime( $filename ) == $timestamp |
| 453 | + && filesize( $filename ) == $upload['size'] ) { |
| 454 | + echo "Local file $filename matches; skipping download.\n"; |
| 455 | + return true; |
| 456 | + } |
| 457 | + |
| 458 | + if( !preg_match( '!^http://!', $upload['src'] ) ) |
| 459 | + return new OAIError( 'Invalid image source URL "' . $upload['src'] . "'." ); |
| 460 | + |
| 461 | + $input = fopen( $upload['src'], 'rb' ); |
| 462 | + if( !$input ) { |
| 463 | + unlink( $filename ); |
| 464 | + return new OAIError( 'Could not fetch image source URL "' . $upload['src'] . "'." ); |
| 465 | + } |
| 466 | + |
| 467 | + if( file_exists( $filename ) ) { |
| 468 | + unlink( $filename ); |
| 469 | + } |
| 470 | + if( !( $output = fopen( $filename, 'xb' ) ) ) { |
| 471 | + return new OAIError( 'Could not create local image file "' . $filename . '" for writing.' ); |
| 472 | + } |
| 473 | + |
| 474 | + echo "Fetching " . $upload['src'] . " to $filename: "; |
| 475 | + while( !feof( $input ) ) { |
| 476 | + $buffer = fread( $input, 65536 ); |
| 477 | + fwrite( $output, $buffer ); |
| 478 | + echo "."; |
| 479 | + } |
| 480 | + fclose( $input ); |
| 481 | + fclose( $output ); |
| 482 | + |
| 483 | + touch( $filename, $timestamp ); |
| 484 | + echo " done.\n"; |
| 485 | + |
394 | 486 | return true; |
395 | 487 | } |
396 | 488 | |
— | — | @@ -454,7 +546,7 @@ |
455 | 547 | <contributor> |
456 | 548 | <ip> |
457 | 549 | <id> |
458 | | - <name> |
| 550 | + <username> |
459 | 551 | <comment> |
460 | 552 | <text> |
461 | 553 | <minor> |
— | — | @@ -519,6 +611,11 @@ |
520 | 612 | return $revision; |
521 | 613 | $data['revisions'][] = $revision; |
522 | 614 | break; |
| 615 | + case 'upload': |
| 616 | + if( OAIError::isError( $upload = OAIUpdateRecord::grabUpload( $node ) ) ) |
| 617 | + return $upload; |
| 618 | + $data['uploads'][] = $upload; |
| 619 | + break; |
523 | 620 | default: |
524 | 621 | return new OAIError( "Unexpected page element <$element>" ); |
525 | 622 | } |
— | — | @@ -551,6 +648,31 @@ |
552 | 649 | return $data; |
553 | 650 | } |
554 | 651 | |
| 652 | + function grabUpload( $upload ) { |
| 653 | + $data = array(); |
| 654 | + for( $node = oaiNextChild( $upload ); |
| 655 | + !OAIError::isError( $node ); |
| 656 | + $node = oaiNextSibling( $node ) ) { |
| 657 | + switch( $element = $node->node_name() ) { |
| 658 | + case 'timestamp': |
| 659 | + case 'comment': |
| 660 | + case 'filename': |
| 661 | + case 'src': |
| 662 | + case 'size': |
| 663 | + $data[$element] = OAIUpdateRecord::decode( $node->get_content() ); |
| 664 | + break; |
| 665 | + case 'contributor': |
| 666 | + if( OAIError::isError( $contrib = OAIUpdateRecord::grabContributor( $node ) ) ) |
| 667 | + return $contrib; |
| 668 | + $data[$element] = $contrib; |
| 669 | + break; |
| 670 | + default: |
| 671 | + return new OAIError( "Unexpected upload element <$element>" ); |
| 672 | + } |
| 673 | + } |
| 674 | + return $data; |
| 675 | + } |
| 676 | + |
555 | 677 | function grabContributor( $node ) { |
556 | 678 | $data = array(); |
557 | 679 | for( $node = oaiNextChild( $node ); |
Index: trunk/extensions/OAI/README |
— | — | @@ -19,3 +19,10 @@ |
20 | 20 | Clients will get only the latest current update; this does not include |
21 | 21 | complete old page entries by design, as basic mirrors generally don't need |
22 | 22 | to maintain that extra stuff. |
| 23 | + |
| 24 | + |
| 25 | +As of May 19, the updater will attempt to update the links tables on edits, |
| 26 | +and can fetch uploaded image files automatically. |
| 27 | + |
| 28 | +(Uploads must be enabled locally with $wgDisableUploads = false; or no files |
| 29 | +will be fetched. image table records will be updated either way.) |