r9099 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r9098‎ | r9099 | r9100 >
Date:09:32, 19 May 2005
Author:vibber
Status:old (Comments)
Tags:
Comment:
Repository:
* Updated to 0.2 export schema:
* Include image upload records with image pages.

Harvester:
* Add --dry-run option to fetch updates without applying (for testing).
* If database is empty, start from epoch instead of current time.
* Dump repo URLs as we fetch them as a debugging aid.
* Run link table updates as we save pages.
* Run the standard delete updates to deal with link tables.
* Update image table records from <upload> info
* If local uploads enabled, fetch remote images for local storage.
Modified paths:
  • /trunk/extensions/OAI/OAIHarvest.php (modified) (history)
  • /trunk/extensions/OAI/OAIRepo.php (modified) (history)
  • /trunk/extensions/OAI/README (modified) (history)
  • /trunk/extensions/OAI/oaiUpdate.php (modified) (history)

Diff [purge]

Index: trunk/extensions/OAI/oaiUpdate.php
@@ -6,6 +6,7 @@
77 #dl( '/usr/lib/php/extensions/no-debug-non-zts-20020429/domxml.so' );
88 }
99
 10+$options = array( 'dry-run' );
1011 require_once( 'commandLine.inc' );
1112 #require_once( 'extensions/OAI/OAIHarvest.php' );
1213
@@ -18,12 +19,21 @@
1920 $harvester = new OAIHarvester( $oaiSourceRepository );
2021
2122 $dbr =& wfGetDB( DB_SLAVE );
22 -$lastUpdate = wfTimestamp( TS_MW, $dbr->selectField( 'cur', 'MAX(cur_timestamp)' ) );
 23+$highest = $dbr->selectField( 'cur', 'MAX(cur_timestamp)' );
 24+if( $highest ) {
 25+ $lastUpdate = wfTimestamp( TS_MW, $highest );
 26+} else {
 27+ # Starting from an empty database!
 28+ $lastUpdate = '19700101000000';
 29+}
2330
2431 $callback = 'showUpdates';
2532 function showUpdates( $record ) {
 33+ global $options;
2634 $record->dump();
27 - $record->apply();
 35+ if( !isset( $options['dry-run'] ) ) {
 36+ $record->apply();
 37+ }
2838 }
2939
3040 $result = $harvester->listUpdates( $lastUpdate, $callback );
Index: trunk/extensions/OAI/OAIRepo.php
@@ -553,8 +553,8 @@
554554 'namespace' => 'http://www.openarchives.org/OAI/2.0/oai_dc/',
555555 'schema' => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd' ),
556556 'mediawiki' => array(
557 - 'namespace' => 'http://www.mediawiki.org/xml/export-0.1/',
558 - 'schema' => 'http://www.mediawiki.org/xml/export-0.1.xsd' ) );
 557+ 'namespace' => 'http://www.mediawiki.org/xml/export-0.2/',
 558+ 'schema' => 'http://www.mediawiki.org/xml/export-0.2.xsd' ) );
559559 }
560560
561561 }
@@ -691,11 +691,11 @@
692692 global $wgContLanguageCode;
693693 $title = Title::makeTitle( $this->_row->namespace, $this->_row->title );
694694 $out = oaiTag( 'mediawiki', array(
695 - 'xmlns' => 'http://www.mediawiki.org/xml/export-0.1/',
 695+ 'xmlns' => 'http://www.mediawiki.org/xml/export-0.2/',
696696 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
697 - 'xsi:schemaLocation' => 'http://www.mediawiki.org/xml/export-0.1/ ' .
698 - 'http://www.mediawiki.org/xml/export-0.1.xsd',
699 - 'version' => '0.1',
 697+ 'xsi:schemaLocation' => 'http://www.mediawiki.org/xml/export-0.2/ ' .
 698+ 'http://www.mediawiki.org/xml/export-0.2.xsd',
 699+ 'version' => '0.2',
700700 'xml:lang' => $wgContLanguageCode ) ) . "\n";
701701 $out .= "<page>\n";
702702 $out .= oaiTag( 'title', array(), $title->getPrefixedText() ) . "\n";
@@ -704,11 +704,52 @@
705705 $out .= oaiTag( 'restrictions', array(), $this->_row->restrictions ) . "\n";
706706 }
707707 $out .= revision2xml( $this->_row, true, true );
 708+ if( $title->getNamespace() == NS_IMAGE ) {
 709+ $out .= $this->renderUpload();
 710+ }
708711 $out .= "</page>\n";
709712 $out .= "</mediawiki>\n";
710713 return $out;
711714 }
712715
 716+ function renderUpload() {
 717+ $fname = 'WikiOAIRecord::renderUpload';
 718+ $db =& wfGetDB( DB_SLAVE );
 719+ $imageRow = $db->selectRow( 'image',
 720+ array( 'img_name', 'img_size', 'img_description',
 721+ 'img_user', 'img_user_text', 'img_timestamp' ),
 722+ array( 'img_name' => $this->_row->title ),
 723+ $fname );
 724+ if( $imageRow ) {
 725+ $url = Image::wfImageUrl( $imageRow->img_name );
 726+ if( $url{0} == '/' ) {
 727+ global $wgServer;
 728+ $url = $wgServer . $url;
 729+ }
 730+ return implode( "\n", array(
 731+ "<upload>",
 732+ oaiTag( 'timestamp', array(), wfTimestamp2ISO8601( $imageRow->img_timestamp ) ),
 733+ $this->renderContributor( $imageRow->img_user, $imageRow->img_user_text ),
 734+ oaiTag( 'comment', array(), $imageRow->img_description ),
 735+ oaiTag( 'filename', array(), $imageRow->img_name ),
 736+ oaiTag( 'src', array(), $url ),
 737+ oaiTag( 'size', array(), $imageRow->img_size ),
 738+ "</upload>\n" ) );
 739+ } else {
 740+ return '';
 741+ }
 742+ }
 743+
 744+ function renderContributor( $id, $text ) {
 745+ if( $id ) {
 746+ $tag = oaiTag( 'username', array(), $text ) .
 747+ oaiTag( 'id', array(), $id );
 748+ } else {
 749+ $tag = oaiTag( 'ip', array(), $text );
 750+ }
 751+ return '<contributor>' . $tag . '</contributor>';
 752+ }
 753+
713754 }
714755
715756 function oaiUpdatePage( $id, $action ) {
Index: trunk/extensions/OAI/OAIHarvest.php
@@ -224,6 +224,7 @@
225225 }
226226
227227 $uagent = ini_set( 'user_agent', $this->userAgent() );
 228+ echo "Fetching: $url\n";
228229 $result = file_get_contents( $url );
229230 ini_set( 'user_agent', $uagent );
230231
@@ -284,8 +285,7 @@
285286 return Title::newFromText( $this->_page['title'] );
286287 }
287288
288 - function getTimestamp() {
289 - $time = $this->_page['revisions'][0]['timestamp'];
 289+ function getTimestamp( $time ) {
290290 if( preg_match( '/^(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)Z$/', $time, $matches ) ) {
291291 return wfTimestamp( TS_MW,
292292 $matches[1] . $matches[2] . $matches[3] .
@@ -302,7 +302,7 @@
303303 $title = $this->getTitle();
304304 if( $title ) {
305305 printf( "%s %10d [[%s]]\n",
306 - $this->getTimestamp(),
 306+ $this->getTimestamp( $this->_page['revisions'][0]['timestamp'] ),
307307 $this->getArticleId(),
308308 $title->getPrefixedText() );
309309 } else {
@@ -314,8 +314,6 @@
315315 }
316316
317317 function apply() {
318 - $fname = 'OAIUpdateRecord::apply';
319 -
320318 if( $this->isDeleted() ) {
321319 return $this->doDelete();
322320 }
@@ -328,9 +326,27 @@
329327 $this->_page['title'] ) );
330328 }
331329
 330+ $id = 0;
 331+ foreach( $this->_page['revisions'] as $revision ) {
 332+ $id = $this->applyRevision( $revision );
 333+ }
 334+
 335+ fixLinksFromArticle( $id );
 336+
 337+ foreach( $this->_page['uploads'] as $upload ) {
 338+ if( OAIError::isError( $err = $this->applyUpload( $upload ) ) )
 339+ return $err;
 340+ }
 341+
 342+ return true;
 343+ }
 344+
 345+ function applyRevision( $revision ) {
 346+ $fname = 'OAIUpdateRecord::applyRevision';
 347+
 348+ $title = $this->getTitle();
332349 $id = $this->getArticleId();
333 - $timestamp = $this->getTimestamp();
334 - $revision = $this->_page['revisions'][0];
 350+ $timestamp = $this->getTimestamp( $revision['timestamp'] );
335351
336352 $dbw =& wfGetDB( DB_WRITE );
337353 $dbw->begin();
@@ -388,8 +404,84 @@
389405 }
390406 $dbw->commit();
391407
392 - fixLinksFromArticle( $id );
 408+ return $id;
 409+ }
 410+
 411+ function applyUpload( $upload ) {
 412+ $fname = 'WikiOAIUpdate::applyUpload';
393413
 414+ # FIXME: validate these files...
 415+ if( strpos( $upload['filename'], '/' ) !== false
 416+ || strpos( $upload['filename'], '\\' ) !== false
 417+ || $upload['filename'] == ''
 418+ || $upload['filename'] !== trim( $upload['filename'] ) ) {
 419+ return new OAIError( 'Invalid filename "' . $upload['filename'] . '"' );
 420+ }
 421+
 422+ $dbw =& wfGetDB( DB_MASTER );
 423+ $data = array(
 424+ 'img_name' => $upload['filename'],
 425+ 'img_size' => IntVal( $upload['size'] ),
 426+ 'img_description' => $upload['comment'],
 427+ 'img_user' => IntVal( $upload['contributor']['id'] ),
 428+ 'img_user_text' => $upload['contributor']['username'],
 429+ 'img_timestamp' => $dbw->timestamp( $this->getTimestamp( $upload['timestamp'] ) ) );
 430+
 431+ $dbw->begin();
 432+ echo "REPLACING image row\n";
 433+ $dbw->replace( 'image', array( 'img_name' ), $data, $fname );
 434+ $dbw->commit();
 435+
 436+ return $this->downloadUpload( $upload );
 437+ }
 438+
 439+ function downloadUpload( $upload ) {
 440+ global $wgDisableUploads;
 441+ if( $wgDisableUploads ) {
 442+ echo "Uploads disabled locally: NOT fetching URL '" .
 443+ $upload['src'] . "'.\n";
 444+ return true;
 445+ }
 446+
 447+ # We assume the filename has already been validated by code above us.
 448+ $filename = wfImageDir( $upload['filename'] ) . '/' . $upload['filename'];
 449+
 450+ $timestamp = wfTimestamp( TS_UNIX, $this->getTimestamp( $upload['timestamp'] ) );
 451+ if( file_exists( $filename )
 452+ && filemtime( $filename ) == $timestamp
 453+ && filesize( $filename ) == $upload['size'] ) {
 454+ echo "Local file $filename matches; skipping download.\n";
 455+ return true;
 456+ }
 457+
 458+ if( !preg_match( '!^http://!', $upload['src'] ) )
 459+ return new OAIError( 'Invalid image source URL "' . $upload['src'] . "'." );
 460+
 461+ $input = fopen( $upload['src'], 'rb' );
 462+ if( !$input ) {
 463+ unlink( $filename );
 464+ return new OAIError( 'Could not fetch image source URL "' . $upload['src'] . "'." );
 465+ }
 466+
 467+ if( file_exists( $filename ) ) {
 468+ unlink( $filename );
 469+ }
 470+ if( !( $output = fopen( $filename, 'xb' ) ) ) {
 471+ return new OAIError( 'Could not create local image file "' . $filename . '" for writing.' );
 472+ }
 473+
 474+ echo "Fetching " . $upload['src'] . " to $filename: ";
 475+ while( !feof( $input ) ) {
 476+ $buffer = fread( $input, 65536 );
 477+ fwrite( $output, $buffer );
 478+ echo ".";
 479+ }
 480+ fclose( $input );
 481+ fclose( $output );
 482+
 483+ touch( $filename, $timestamp );
 484+ echo " done.\n";
 485+
394486 return true;
395487 }
396488
@@ -454,7 +546,7 @@
455547 <contributor>
456548 <ip>
457549 <id>
458 - <name>
 550+ <username>
459551 <comment>
460552 <text>
461553 <minor>
@@ -519,6 +611,11 @@
520612 return $revision;
521613 $data['revisions'][] = $revision;
522614 break;
 615+ case 'upload':
 616+ if( OAIError::isError( $upload = OAIUpdateRecord::grabUpload( $node ) ) )
 617+ return $upload;
 618+ $data['uploads'][] = $upload;
 619+ break;
523620 default:
524621 return new OAIError( "Unexpected page element <$element>" );
525622 }
@@ -551,6 +648,31 @@
552649 return $data;
553650 }
554651
 652+ function grabUpload( $upload ) {
 653+ $data = array();
 654+ for( $node = oaiNextChild( $upload );
 655+ !OAIError::isError( $node );
 656+ $node = oaiNextSibling( $node ) ) {
 657+ switch( $element = $node->node_name() ) {
 658+ case 'timestamp':
 659+ case 'comment':
 660+ case 'filename':
 661+ case 'src':
 662+ case 'size':
 663+ $data[$element] = OAIUpdateRecord::decode( $node->get_content() );
 664+ break;
 665+ case 'contributor':
 666+ if( OAIError::isError( $contrib = OAIUpdateRecord::grabContributor( $node ) ) )
 667+ return $contrib;
 668+ $data[$element] = $contrib;
 669+ break;
 670+ default:
 671+ return new OAIError( "Unexpected upload element <$element>" );
 672+ }
 673+ }
 674+ return $data;
 675+ }
 676+
555677 function grabContributor( $node ) {
556678 $data = array();
557679 for( $node = oaiNextChild( $node );
Index: trunk/extensions/OAI/README
@@ -19,3 +19,10 @@
2020 Clients will get only the latest current update; this does not include
2121 complete old page entries by design, as basic mirrors generally don't need
2222 to maintain that extra stuff.
 23+
 24+
 25+As of May 19, the updater will attempt to update the links tables on edits,
 26+and can fetch uploaded image files automatically.
 27+
 28+(Uploads must be enabled locally with $wgDisableUploads = false; or no files
 29+will be fetched. image table records will be updated either way.)

Comments

#Comment by Reedy (talk | contribs)   12:01, 19 August 2010

Where's fixLinksFromArticle ?

#Comment by 😂 (talk | contribs)   16:50, 20 August 2010

It's part of RefreshLinks. I broke this extension in the maintenance rewrite. Fixed in r71368, r71369.

Status & tagging log