r57940 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r57939‎ | r57940 | r57941 >
Date:04:40, 20 October 2009
Author:tstarling
Status:ok
Tags:
Comment:
Need DumpHTML to run static.wikipedia.org/index.php
Modified paths:
  • /branches/wmf-deployment/extensions/DumpHTML (added) (history)

Diff [purge]

Index: branches/wmf-deployment/extensions/DumpHTML/dumpHTML.php
@@ -0,0 +1,182 @@
 2+<?php
 3+/**
 4+ * @todo document
 5+ * @addtogroup Maintenance
 6+ */
 7+
 8+$usage = <<<ENDS
 9+Usage:
 10+php dumpHTML.php [options...]
 11+
 12+ --help show this message
 13+
 14+ -d <dest> destination directory
 15+ -s <start> start ID
 16+ -e <end> end ID
 17+ -k <skin> skin to use (defaults to htmldump)
 18+ --no-overwrite skip existing HTML files
 19+ --checkpoint <file> use a checkpoint file to allow restarting of interrupted dumps
 20+ --slice <n/m> split the job into m segments and do the n'th one
 21+ --images only do image description pages
 22+ --shared-desc only do shared (commons) image description pages
 23+ --no-shared-desc don't do shared image description pages
 24+ --categories only do category pages
 25+ --redirects only do redirects
 26+ --special only do miscellaneous stuff
 27+ --force-copy copy commons instead of symlink, needed for Wikimedia
 28+ --interlang allow interlanguage links
 29+ --image-snapshot copy all images used to the destination directory
 30+ --compress generate compressed version of the html pages
 31+ --udp-profile <N> profile 1/N rendering operations using ProfilerSimpleUDP
 32+ --oom-adj <N> set /proc/<pid>/oom_adj
 33+ --show-titles write each article title to stdout
 34+ --group <group> use the specified user group to read articles
 35+
 36+ENDS;
 37+
 38+define( 'MW_HTML_FOR_DUMP', 1 );
 39+
 40+$optionsWithArgs = array( 's', 'd', 'e', 'k', 'checkpoint', 'slice', 'udp-profile', 'oom-adj', 'group' );
 41+$options = array( 'help' );
 42+$profiling = false;
 43+
 44+if ( $profiling ) {
 45+ define( 'MW_CMDLINE_CALLBACK', 'wfSetupDump' );
 46+ function wfSetupDump() {
 47+ global $wgProfiling, $wgProfileToDatabase, $wgProfileSampleRate;
 48+ $wgProfiling = true;
 49+ $wgProfileToDatabase = false;
 50+ $wgProfileSampleRate = 1;
 51+ }
 52+}
 53+
 54+if ( in_array( '--udp-profile', $argv ) ) {
 55+ define( 'MW_FORCE_PROFILE', 1 );
 56+}
 57+
 58+$IP = getenv( 'MW_INSTALL_PATH' );
 59+if ( $IP === false ) {
 60+ $IP = dirname(__FILE__).'/../..';
 61+}
 62+require_once( $IP."/maintenance/commandLine.inc" );
 63+require_once( dirname(__FILE__)."/dumpHTML.inc" );
 64+require_once( dirname(__FILE__)."/SkinOffline.php" );
 65+
 66+if ( version_compare( $wgVersion, '1.11.1', '<' ) ) {
 67+ echo "Error, the DumpHTML extension needs at least MediaWiki version 1.11.1 to work, you have version $wgVersion.\n";
 68+ echo "Try using maintenance/dumpHTML.php instead.\n";
 69+ exit;
 70+}
 71+
 72+error_reporting( E_ALL & (~E_NOTICE) );
 73+
 74+if( isset( $options['help'] ) || isset( $options['h'] ) ) {
 75+ echo $usage;
 76+ exit;
 77+}
 78+
 79+if ( !wfIsWindows() && isset( $options['oom-adj'] ) ) {
 80+ $adj = intval( $options['oom-adj'] );
 81+ $pid = getmypid();
 82+ file_put_contents( "/proc/$pid/oom_adj", $adj );
 83+}
 84+
 85+if ( !empty( $options['s'] ) ) {
 86+ $start = $options['s'];
 87+} else {
 88+ $start = 1;
 89+}
 90+
 91+if ( !empty( $options['e'] ) ) {
 92+ $end = $options['e'];
 93+} else {
 94+ $dbr = wfGetDB( DB_SLAVE );
 95+ $end = $dbr->selectField( 'page', 'max(page_id)', false );
 96+}
 97+
 98+if ( !empty( $options['d'] ) ) {
 99+ $dest = $options['d'];
 100+} else {
 101+ $dest = "$IP/static";
 102+}
 103+
 104+$skin = isset( $options['k'] ) ? $options['k'] : 'offline';
 105+
 106+if ( $options['slice'] ) {
 107+ $bits = explode( '/', $options['slice'] );
 108+ if ( count( $bits ) != 2 || $bits[0] < 1 || $bits[0] > $bits[1] ) {
 109+ print "Invalid slice specification";
 110+ exit;
 111+ }
 112+ $sliceNumerator = $bits[0];
 113+ $sliceDenominator = $bits[1];
 114+} else {
 115+ $sliceNumerator = $sliceDenominator = 1;
 116+}
 117+
 118+$wgHTMLDump = new DumpHTML( array(
 119+ 'dest' => $dest,
 120+ 'forceCopy' => $options['force-copy'],
 121+ 'alternateScriptPath' => $options['interlang'],
 122+ 'interwiki' => $options['interlang'],
 123+ 'skin' => $skin,
 124+ 'makeSnapshot' => $options['image-snapshot'],
 125+ 'checkpointFile' => $options['checkpoint'],
 126+ 'startID' => $start,
 127+ 'endID' => $end,
 128+ 'sliceNumerator' => $sliceNumerator,
 129+ 'sliceDenominator' => $sliceDenominator,
 130+ 'noOverwrite' => $options['no-overwrite'],
 131+ 'compress' => $options['compress'],
 132+ 'noSharedDesc' => $options['no-shared-desc'],
 133+ 'udpProfile' => $options['udp-profile'],
 134+ 'showTitles' => $options['show-titles'],
 135+ 'group' => $options['group'],
 136+));
 137+
 138+$wgHTMLDump->setupDestDir();
 139+
 140+if ( $options['special'] ) {
 141+ $wgHTMLDump->doSpecials();
 142+} elseif ( $options['images'] ) {
 143+ $wgHTMLDump->doImageDescriptions();
 144+} elseif ( $options['categories'] ) {
 145+ $wgHTMLDump->doCategories();
 146+} elseif ( $options['redirects'] ) {
 147+ $wgHTMLDump->doRedirects();
 148+} elseif ( $options['shared-desc'] ) {
 149+ $wgHTMLDump->doSharedImageDescriptions();
 150+} else {
 151+ print "Creating static HTML dump in directory $dest. \n";
 152+ $dbr = wfGetDB( DB_SLAVE );
 153+ $server = $dbr->getProperty( 'mServer' );
 154+ print "Using database {$server}\n";
 155+
 156+ if ( !isset( $options['e'] ) ) {
 157+ $wgHTMLDump->doEverything();
 158+ } else {
 159+ $wgHTMLDump->doArticles();
 160+ }
 161+}
 162+
 163+if ( isset( $options['debug'] ) ) {
 164+ #print_r($GLOBALS);
 165+ # Workaround for bug #36957
 166+ $globals = array_keys( $GLOBALS );
 167+ #sort( $globals );
 168+ $sizes = array();
 169+ foreach ( $globals as $name ) {
 170+ $sizes[$name] = strlen( serialize( $GLOBALS[$name] ) );
 171+ }
 172+ arsort($sizes);
 173+ $sizes = array_slice( $sizes, 0, 20 );
 174+ foreach ( $sizes as $name => $size ) {
 175+ printf( "%9d %s\n", $size, $name );
 176+ }
 177+}
 178+
 179+if ( $profiling ) {
 180+ echo $wgProfiler->getOutput();
 181+}
 182+
 183+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/dumpHTML.php
___________________________________________________________________
Name: svn:keywords
1184 + Author Date Id Revision
Name: svn:eol-style
2185 + native
Index: branches/wmf-deployment/extensions/DumpHTML/dumpHTML.inc
@@ -0,0 +1,1412 @@
 2+<?php
 3+/**
 4+ * @addtogroup Maintenance
 5+ */
 6+
 7+define( 'REPORTING_INTERVAL', 10 );
 8+
 9+class DumpHTML {
 10+ # Destination directory
 11+ var $dest;
 12+
 13+ # Extension base directory
 14+ var $extdir;
 15+
 16+ # Skip existing files
 17+ var $noOverwrite = false;
 18+
 19+ # Show interlanguage links?
 20+ var $interwiki = true;
 21+
 22+ # Depth of HTML directory tree
 23+ var $depth = 3;
 24+
 25+ # Directory that commons images are copied into
 26+ var $sharedStaticDirectory;
 27+
 28+ # Directory that the images are in, after copying
 29+ var $destUploadDirectory;
 30+
 31+ # Base URL for images, after copying
 32+ var $destUploadUrl;
 33+
 34+ # Base URL for the destination directory
 35+ var $articleBaseUrl;
 36+
 37+ # Relative path to image directory
 38+ var $imageRel = 'upload';
 39+
 40+ # Copy commons images instead of symlinking
 41+ var $forceCopy = false;
 42+
 43+ # Make a copy of all images encountered
 44+ var $makeSnapshot = false;
 45+
 46+ # Don't image description pages in doEverything()
 47+ var $noSharedDesc = false;
 48+
 49+ # Make links assuming the script path is in the same directory as
 50+ # the destination
 51+ var $alternateScriptPath = false;
 52+
 53+ # Original values of various globals
 54+ var $oldArticlePath = false, $oldCopyrightIcon = false, $oldLogo, $oldRepoGroup, $oldScriptPath;
 55+
 56+ # Has setupGlobals been called?
 57+ var $setupDone = false;
 58+
 59+ # Has to compress html pages
 60+ var $compress = false;
 61+
 62+ # List of raw pages used in the current article
 63+ var $rawPages;
 64+
 65+ # Skin to use
 66+ var $skin = 'offline';
 67+
 68+ # User group to use
 69+ var $group = false;
 70+
 71+ # Checkpoint stuff
 72+ var $checkpointFile = false, $checkpoints = false;
 73+
 74+ var $startID = 1, $endID = false;
 75+
 76+ var $sliceNumerator = 1, $sliceDenominator = 1;
 77+
 78+ # Max page ID, lazy initialised
 79+ var $maxPageID = false;
 80+
 81+ # UDP profiling
 82+ var $udpProfile, $udpProfileCounter = 0, $udpProfileInit = false;
 83+
 84+ # Debugging options
 85+ var $showTitles = false;
 86+
 87+ # Extension version
 88+ const VERSION = '2.0';
 89+
 90+ function DumpHTML( $settings = array() ) {
 91+ foreach ( $settings as $var => $value ) {
 92+ $this->$var = $value;
 93+ }
 94+ $this->extdir = dirname( __FILE__ );
 95+ }
 96+
 97+ function loadCheckpoints() {
 98+ if ( $this->checkpoints !== false ) {
 99+ return true;
 100+ } elseif ( !$this->checkpointFile ) {
 101+ return false;
 102+ } else {
 103+ $lines = @file( $this->checkpointFile );
 104+ if ( $lines === false ) {
 105+ print "Starting new checkpoint file \"{$this->checkpointFile}\"\n";
 106+ $this->checkpoints = array();
 107+ } else {
 108+ $lines = array_map( 'trim', $lines );
 109+ $this->checkpoints = array();
 110+ foreach ( $lines as $line ) {
 111+ list( $name, $value ) = explode( '=', $line, 2 );
 112+ $this->checkpoints[$name] = $value;
 113+ }
 114+ }
 115+ return true;
 116+ }
 117+ }
 118+
 119+ function getCheckpoint( $type, $defValue = false ) {
 120+ if ( !$this->loadCheckpoints() ) {
 121+ return false;
 122+ }
 123+ if ( !isset( $this->checkpoints[$type] ) ) {
 124+ return false;
 125+ } else {
 126+ return $this->checkpoints[$type];
 127+ }
 128+ }
 129+
 130+ function setCheckpoint( $type, $value ) {
 131+ if ( !$this->checkpointFile ) {
 132+ return;
 133+ }
 134+ $this->checkpoints[$type] = $value;
 135+ $blob = '';
 136+ foreach ( $this->checkpoints as $type => $value ) {
 137+ $blob .= "$type=$value\n";
 138+ }
 139+ file_put_contents( $this->checkpointFile, $blob );
 140+ }
 141+
 142+ function doEverything() {
 143+ if ( $this->getCheckpoint( 'everything' ) == 'done' ) {
 144+ print "Checkpoint says everything is already done\n";
 145+ return;
 146+ }
 147+ $this->doArticles();
 148+ $this->doCategories();
 149+ $this->doRedirects();
 150+ if ( $this->sliceNumerator == 1 ) {
 151+ $this->doSpecials();
 152+ }
 153+ $this->doLocalImageDescriptions();
 154+
 155+ if ( !$this->noSharedDesc ) {
 156+ $this->doSharedImageDescriptions();
 157+ }
 158+
 159+ $this->setCheckpoint( 'everything', 'done' );
 160+ }
 161+
 162+ /**
 163+ * Write a set of articles specified by start and end page_id
 164+ * Skip categories and images, they will be done separately
 165+ */
 166+ function doArticles() {
 167+ if ( $this->endID === false ) {
 168+ $end = $this->getMaxPageID();
 169+ } else {
 170+ $end = $this->endID;
 171+ }
 172+ $start = $this->startID;
 173+
 174+ # Start from the checkpoint
 175+ $cp = $this->getCheckpoint( 'article' );
 176+ if ( $cp == 'done' ) {
 177+ print "Articles already done\n";
 178+ return;
 179+ } elseif ( $cp !== false ) {
 180+ $start = $cp;
 181+ print "Resuming article dump from checkpoint at page_id $start of $end\n";
 182+ } else {
 183+ print "Starting from page_id $start of $end\n";
 184+ }
 185+
 186+ # Move the start point to the correct slice if it isn't there already
 187+ $start = $this->modSliceStart( $start );
 188+
 189+ $this->setupGlobals();
 190+
 191+ $mainPageObj = Title::newMainPage();
 192+ $mainPage = $mainPageObj->getPrefixedDBkey();
 193+
 194+ for ( $id = $start, $i = 0; $id <= $end; $id += $this->sliceDenominator, $i++ ) {
 195+ wfWaitForSlaves( 20 );
 196+ if ( !( $i % REPORTING_INTERVAL) ) {
 197+ print "Processing ID: $id\r";
 198+ $this->setCheckpoint( 'article', $id );
 199+ }
 200+ if ( !($i % (REPORTING_INTERVAL*10) ) ) {
 201+ print "\n";
 202+ }
 203+ $title = Title::newFromID( $id );
 204+ if ( $title ) {
 205+ $ns = $title->getNamespace() ;
 206+ if ( $ns != NS_CATEGORY && $ns != NS_MEDIAWIKI &&
 207+ $title->getPrefixedDBkey() != $mainPage ) {
 208+ $this->doArticle( $title );
 209+ }
 210+ }
 211+ }
 212+ $this->setCheckpoint( 'article', 'done' );
 213+ print "\n";
 214+ }
 215+
 216+ function doSpecials() {
 217+ $this->doMainPage();
 218+
 219+ $this->setupGlobals();
 220+ print "Special:Categories...";
 221+ $this->doArticle( SpecialPage::getTitleFor( 'Categories' ) );
 222+ print "\n";
 223+ }
 224+
 225+ /** Write the main page as index.html */
 226+ function doMainPage() {
 227+
 228+ print "Making index.html ";
 229+
 230+ // Set up globals with no ../../.. in the link URLs
 231+ $this->setupGlobals( 0 );
 232+
 233+ $title = Title::newMainPage();
 234+ $text = $this->getArticleHTML( $title );
 235+
 236+ # Parse the XHTML to find the images
 237+ #$images = $this->findImages( $text );
 238+ #$this->copyImages( $images );
 239+
 240+ $file = fopen( "{$this->dest}/index.html", "w" );
 241+ if ( !$file ) {
 242+ print "\nCan't open index.html for writing\n";
 243+ return false;
 244+ }
 245+ fwrite( $file, $text );
 246+ fclose( $file );
 247+ print "\n";
 248+ }
 249+
 250+ function doImageDescriptions() {
 251+ $this->doLocalImageDescriptions();
 252+ if ( !$this->noSharedDesc ) {
 253+ $this->doSharedImageDescriptions();
 254+ }
 255+ }
 256+
 257+ /**
 258+ * Dump image description pages that don't have an associated article, but do
 259+ * have a local image
 260+ */
 261+ function doLocalImageDescriptions() {
 262+ $chunkSize = 1000;
 263+
 264+ $dbr = wfGetDB( DB_SLAVE );
 265+
 266+ $cp = $this->getCheckpoint( 'local image' );
 267+ if ( $cp == 'done' ) {
 268+ print "Local image descriptions already done\n";
 269+ return;
 270+ } elseif ( $cp !== false ) {
 271+ print "Writing image description pages starting from $cp\n";
 272+ $conds = array( 'img_name >= ' . $dbr->addQuotes( $cp ) );
 273+ } else {
 274+ print "Writing image description pages for local images\n";
 275+ $conds = false;
 276+ }
 277+
 278+ $this->setupGlobals();
 279+ $i = 0;
 280+
 281+ do {
 282+ $res = $dbr->select( 'image', array( 'img_name' ), $conds, __METHOD__,
 283+ array( 'ORDER BY' => 'img_name', 'LIMIT' => $chunkSize ) );
 284+ $numRows = $dbr->numRows( $res );
 285+
 286+ while ( $row = $dbr->fetchObject( $res ) ) {
 287+ # Update conds for the next chunk query
 288+ $conds = array( 'img_name > ' . $dbr->addQuotes( $row->img_name ) );
 289+
 290+ // Slice the result set with a filter
 291+ if ( !$this->sliceFilter( $row->img_name ) ) {
 292+ continue;
 293+ }
 294+
 295+ wfWaitForSlaves( 10 );
 296+ if ( !( ++$i % REPORTING_INTERVAL ) ) {
 297+ print "{$row->img_name}\n";
 298+ if ( $row->img_name !== 'done' ) {
 299+ $this->setCheckpoint( 'local image', $row->img_name );
 300+ }
 301+ }
 302+ $title = Title::makeTitle( NS_IMAGE, $row->img_name );
 303+ if ( $title->getArticleID() ) {
 304+ // Already done by dumpHTML
 305+ continue;
 306+ }
 307+ $this->doArticle( $title );
 308+ }
 309+ $dbr->freeResult( $res );
 310+ } while ( $numRows );
 311+
 312+ $this->setCheckpoint( 'local image', 'done' );
 313+ print "\n";
 314+ }
 315+
 316+ /**
 317+ * Dump images which only have a real description page on commons
 318+ */
 319+ function doSharedImageDescriptions() {
 320+ list( $start, $end ) = $this->sliceRange( 0, 255 );
 321+
 322+ $cp = $this->getCheckpoint( 'shared image' );
 323+ if ( $cp == 'done' ) {
 324+ print "Shared description pages already done\n";
 325+ return;
 326+ } elseif ( $cp !== false ) {
 327+ print "Writing description pages for commons images starting from directory $cp/255\n";
 328+ $start = $cp;
 329+ } else {
 330+ print "Writing description pages for commons images\n";
 331+ }
 332+
 333+ $this->setupGlobals();
 334+ $i = 0;
 335+ foreach ( $this->oldRepoGroup->foreignInfo as $repo ) {
 336+ $repoName = $repo['name'];
 337+ for ( $hash = $start; $hash <= $end; $hash++ ) {
 338+ $this->setCheckpoint( 'shared image', $hash );
 339+ $rel = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash );
 340+ $dir = "{$this->destUploadDirectory}/$repoName/$rel";
 341+ $handle = @opendir( $dir );
 342+ while ( $handle && $file = readdir( $handle ) ) {
 343+ if ( $file[0] == '.' ) {
 344+ continue;
 345+ }
 346+ if ( !(++$i % REPORTING_INTERVAL ) ) {
 347+ print "$rel $i\r";
 348+ }
 349+
 350+ $title = Title::makeTitleSafe( NS_IMAGE, $file );
 351+ if ( !$title ) {
 352+ wfDebug( __METHOD__.": invalid title: $file\n" );
 353+ continue;
 354+ }
 355+ $this->doArticle( $title );
 356+ }
 357+ if ( $handle ) {
 358+ closedir( $handle );
 359+ }
 360+ print "\n";
 361+ }
 362+ }
 363+ $this->setCheckpoint( 'shared image', 'done' );
 364+ print "\n";
 365+ }
 366+
 367+ function doCategories() {
 368+ $chunkSize = 1000;
 369+
 370+ $this->setupGlobals();
 371+ $dbr = wfGetDB( DB_SLAVE );
 372+
 373+ $cp = $this->getCheckpoint( 'category' );
 374+ if ( $cp == 'done' ) {
 375+ print "Category pages already done\n";
 376+ return;
 377+ } elseif ( $cp !== false ) {
 378+ print "Resuming category page dump from $cp\n";
 379+ $conds = array( 'cl_to >= ' . $dbr->addQuotes( $cp ) );
 380+ } else {
 381+ print "Starting category pages\n";
 382+ $conds = false;
 383+ }
 384+
 385+ $i = 0;
 386+ do {
 387+ $res = $dbr->select( 'categorylinks', 'DISTINCT cl_to', $conds, __METHOD__,
 388+ array( 'ORDER BY' => 'cl_to', 'LIMIT' => $chunkSize ) );
 389+ $numRows = $dbr->numRows( $res );
 390+
 391+ while ( $row = $dbr->fetchObject( $res ) ) {
 392+ // Set conditions for next chunk
 393+ $conds = array( 'cl_to > ' . $dbr->addQuotes( $row->cl_to ) );
 394+
 395+ // Filter pages from other slices
 396+ if ( !$this->sliceFilter( $row->cl_to ) ) {
 397+ continue;
 398+ }
 399+
 400+ wfWaitForSlaves( 10 );
 401+ if ( !(++$i % REPORTING_INTERVAL ) ) {
 402+ print "{$row->cl_to}\n";
 403+ if ( $row->cl_to != 'done' ) {
 404+ $this->setCheckpoint( 'category', $row->cl_to );
 405+ }
 406+ }
 407+ $title = Title::makeTitle( NS_CATEGORY, $row->cl_to );
 408+ $this->doArticle( $title );
 409+ }
 410+ $dbr->freeResult( $res );
 411+ } while ( $numRows );
 412+
 413+ $this->setCheckpoint( 'category', 'done' );
 414+ print "\n";
 415+ }
 416+
 417+ function doRedirects() {
 418+ print "Doing redirects...\n";
 419+
 420+ $chunkSize = 10000;
 421+ $end = $this->getMaxPageID();
 422+ $cp = $this->getCheckpoint( 'redirect' );
 423+ if ( $cp == 'done' ) {
 424+ print "Redirects already done\n";
 425+ return;
 426+ } elseif ( $cp !== false ) {
 427+ print "Resuming redirect generation from page_id $cp\n";
 428+ $start = intval( $cp );
 429+ } else {
 430+ $start = 1;
 431+ }
 432+
 433+ $this->setupGlobals();
 434+ $dbr = wfGetDB( DB_SLAVE );
 435+ $i = 0;
 436+
 437+ for ( $chunkStart = $start; $chunkStart <= $end; $chunkStart += $chunkSize ) {
 438+ $chunkEnd = min( $end, $chunkStart + $chunkSize - 1 );
 439+ $conds = array(
 440+ 'page_is_redirect' => 1,
 441+ "page_id BETWEEN $chunkStart AND $chunkEnd"
 442+ );
 443+ # Modulo slicing in SQL
 444+ if ( $this->sliceDenominator != 1 ) {
 445+ $n = intval( $this->sliceNumerator );
 446+ $m = intval( $this->sliceDenominator );
 447+ $conds[] = "page_id % $m = $n";
 448+ }
 449+ $res = $dbr->select( 'page', array( 'page_id', 'page_namespace', 'page_title' ),
 450+ $conds, __METHOD__ );
 451+
 452+ while ( $row = $dbr->fetchObject( $res ) ) {
 453+ $title = Title::makeTitle( $row->page_namespace, $row->page_title );
 454+ if ( !(++$i % (REPORTING_INTERVAL*10) ) ) {
 455+ printf( "Done %d redirects (%2.3f%%)\n", $i, $row->page_id / $end * 100 );
 456+ $this->setCheckpoint( 'redirect', $row->page_id );
 457+ }
 458+ $this->doArticle( $title );
 459+ }
 460+ $dbr->freeResult( $res );
 461+ }
 462+ $this->setCheckpoint( 'redirect', 'done' );
 463+ }
 464+
 465+ /** Write an article specified by title */
 466+ function doArticle( $title ) {
 467+ if ( $this->noOverwrite ) {
 468+ $fileName = "{$this->dest}/" . $this->getHashedFilename( $title );
 469+ if ( file_exists( $fileName ) ) {
 470+ return;
 471+ }
 472+ }
 473+
 474+ if ( $this->showTitles ) {
 475+ print $title->getPrefixedDBkey() . "\n";
 476+ }
 477+
 478+ $this->profile();
 479+
 480+ $this->rawPages = array();
 481+ $text = $this->getArticleHTML( $title );
 482+
 483+ if ( $text === false ) {
 484+ return;
 485+ }
 486+
 487+ # Parse the XHTML to find the images
 488+ #$images = $this->findImages( $text );
 489+ #$this->copyImages( $images );
 490+
 491+ # Write to file
 492+ $this->writeArticle( $title, $text );
 493+
 494+ # Do raw pages
 495+ $this->mkdir( "{$this->dest}/raw", 0755 );
 496+ foreach( $this->rawPages as $record ) {
 497+ list( $file, $title, $params ) = $record;
 498+
 499+ $path = "{$this->dest}/raw/$file";
 500+ if ( !file_exists( $path ) ) {
 501+ $article = new Article( $title );
 502+ $request = new FauxRequest( $params );
 503+ $rp = new RawPage( $article, $request );
 504+ $text = $rp->getRawText();
 505+
 506+ print "Writing $file\n";
 507+ $file = fopen( $path, 'w' );
 508+ if ( !$file ) {
 509+ print("Can't open file $path for writing\n");
 510+ continue;
 511+ }
 512+ fwrite( $file, $text );
 513+ fclose( $file );
 514+ }
 515+ }
 516+
 517+ wfIncrStats( 'dumphtml_article' );
 518+ }
 519+
 520+ /** Write the given text to the file identified by the given title object */
 521+ function writeArticle( $title, $text ) {
 522+ wfProfileIn( __METHOD__ );
 523+ $filename = $this->getHashedFilename( $title );
 524+
 525+ # Temporary hack for current dump, this should be moved to
 526+ # getFriendlyName() at the earliest opportunity.
 527+ #
 528+ # Limit filename length to 255 characters, so it works on ext3.
 529+ # Titles are in fact limited to 255 characters, but dumpHTML
 530+ # adds a suffix which may put them over the limit.
 531+ $length = strlen( $filename );
 532+ if ( $length > 255 ) {
 533+ print "Warning: Filename too long ($length bytes). Skipping.\n";
 534+ wfProfileOut( __METHOD__ );
 535+ return;
 536+ }
 537+
 538+ $fullName = "{$this->dest}/$filename";
 539+ $fullDir = dirname( $fullName );
 540+
 541+ if ( $this->compress ) {
 542+ $fullName .= ".gz";
 543+ $text = gzencode( $text, 9 );
 544+ }
 545+
 546+ if ( preg_match( '/[\x80-\xFF]/', $fullName ) && wfIsWindows() ) {
 547+ # Work around PHP unicode bug
 548+ $rand = mt_rand( 0, 99999999 );
 549+ $fullDir = str_replace( '/', '\\', $fullDir );
 550+ $fullName = str_replace( '/', '\\', $fullName );
 551+ $tempName = "{$this->dest}\\temp\\TEMP-$rand";
 552+
 553+ $success = file_put_contents( $tempName, $text );
 554+ if ( $success ) {
 555+ wfShellExec( "cscript /nologo " . wfEscapeShellArg(
 556+ dirname( __FILE__ ) . "\\rename-hack.vbs",
 557+ $this->escapeForVBScript( $tempName ),
 558+ $this->escapeForVBScript( $fullName ) ) );
 559+ }
 560+ } else {
 561+ if ( !$this->mkdir( $fullDir ) ) {
 562+ print "Error: unable to create directory '$fullDir'.\n";
 563+ }
 564+ #wfSuppressWarnings();
 565+ $success = file_put_contents( $fullName, $text );
 566+ #wfRestoreWarnings();
 567+ }
 568+
 569+ if ( !$success ) {
 570+ die("Can't open file '$fullName' for writing.\nCheck permissions or use another destination (-d).\n");
 571+ }
 572+ wfProfileOut( __METHOD__ );
 573+ }
 574+
 575+ /** Escape a UTF-8 string for VBScript's Unescape() */
 576+ function escapeForVBScript( $in ) {
 577+ $utf16 = iconv( 'UTF-8', 'UTF-16BE', $in );
 578+ $out = '';
 579+ for ( $i = 0; $i < strlen( $utf16 ); $i += 2 ) {
 580+ $codepoint = ord( $utf16[$i] ) * 256 + ord( $utf16[$i+1] );
 581+ if ( $codepoint < 128 && $codepoint >= 32 ) {
 582+ $out .= chr( $codepoint );
 583+ } else {
 584+ $out .= sprintf( "%%u%04X", $codepoint );
 585+ }
 586+ }
 587+ return $out;
 588+ }
 589+
 590+ /** Copy a directory recursively, not including .svn */
 591+ function copyDirectory( $source, $dest ) {
 592+ if ( !is_dir( $dest ) ) {
 593+ if ( !mkdir( $dest ) ) {
 594+ echo "Warning: unable to create directory \"$dest\"\n";
 595+ return false;
 596+ }
 597+ }
 598+ $dir = opendir( $source );
 599+ if ( !$dir ) {
 600+ echo "Warning: unable to open directory \"$source\"\n";
 601+ return false;
 602+ }
 603+ while ( false !== ( $fileName = readdir( $dir ) ) ) {
 604+ if ( substr( $fileName, 0, 1 ) == '.' ) {
 605+ continue;
 606+ }
 607+ $currentSource = "$source/$fileName";
 608+ $currentDest = "$dest/$fileName";
 609+ if ( is_dir( $currentSource ) ) {
 610+ $this->copyDirectory( $currentSource, $currentDest );
 611+ } elseif ( is_file( $currentSource ) ) {
 612+ copy( $currentSource, $currentDest );
 613+ }
 614+ }
 615+ return true;
 616+ }
 617+
 618+ /** Set up the destination directory */
 619+ function setupDestDir() {
 620+ global $IP;
 621+
 622+ if ( is_dir( $this->dest ) ) {
 623+ echo "WARNING: destination directory already exists, skipping initialisation\n";
 624+ return;
 625+ }
 626+ echo "Initialising destination directory...\n";
 627+ if ( !$this->mkdir( "{$this->dest}/skins" ) ) {
 628+ throw new MWException( "Unable to create destination directory." );
 629+ }
 630+
 631+ file_put_contents( "{$this->dest}/dumpHTML.version", self::VERSION );
 632+ $this->copyDirectory( "$IP/skins/vector", "{$this->dest}/skins/vector" );
 633+ $this->copyDirectory( "$IP/skins/monobook", "{$this->dest}/skins/monobook" );
 634+ $this->copyDirectory( "$IP/skins/common", "{$this->dest}/skins/common" );
 635+ $this->copyDirectory( "{$this->extdir}/skin", "{$this->dest}/skins/offline" );
 636+ }
 637+
 638+ /** Create a file repo group which is a proxy of an old one */
 639+ function newRepoGroup( $old ) {
 640+ return new DumpHTML_ProxyRepoGroup( $this, $old );
 641+ }
 642+
 643+ /** Set up globals required for parsing */
 644+ function setupGlobals( $currentDepth = NULL ) {
 645+ global $wgUser, $wgStylePath, $wgArticlePath, $wgMathPath;
 646+ global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgScriptPath;
 647+ global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath;
 648+ global $wgEnableParserCache, $wgHooks, $wgServer;
 649+ global $wgRightsUrl, $wgRightsText, $wgCopyrightIcon, $wgEnableSidebarCache;
 650+ global $wgGenerateThumbnailOnParse, $wgValidSkinNames, $wgFavicon;
 651+ global $wgDisableCounters;
 652+
 653+ if ( !$this->setupDone ) {
 654+ $wgHooks['GetLocalURL'][] =& $this;
 655+ $wgHooks['GetFullURL'][] =& $this;
 656+ $wgHooks['SiteNoticeBefore'][] =& $this;
 657+ $wgHooks['SiteNoticeAfter'][] =& $this;
 658+ $this->oldArticlePath = $wgServer . $wgArticlePath;
 659+ $this->oldLogo = $wgLogo;
 660+ $this->oldRepoGroup = RepoGroup::singleton();
 661+ $this->oldCopyrightIcon = $wgCopyrightIcon;
 662+ $this->oldScriptPath = $wgScriptPath;
 663+ $this->oldFavicon = $wgFavicon;
 664+ $wgValidSkinNames['offline'] = 'Offline';
 665+ }
 666+
 667+ if ( is_null( $currentDepth ) ) {
 668+ $currentDepth = $this->depth;
 669+ }
 670+
 671+ if ( $this->alternateScriptPath ) {
 672+ if ( $currentDepth == 0 ) {
 673+ $wgScriptPath = '.';
 674+ } else {
 675+ $wgScriptPath = '../..' . str_repeat( '/..', $currentDepth - 1 );
 676+ }
 677+ } else {
 678+ if ( $currentDepth == 0 ) {
 679+ $wgScriptPath = '..' . str_repeat( '/..', $currentDepth );
 680+ } else {
 681+ $wgScriptPath = '../..' . str_repeat( '/..', $currentDepth );
 682+ }
 683+ }
 684+
 685+ if ( $currentDepth == 0 ) {
 686+ $wgArticlePath = '$1';
 687+ $this->articleBaseUrl = '.';
 688+ } else {
 689+ $this->articleBaseUrl = '..' . str_repeat( '/..', $currentDepth );
 690+ $wgArticlePath = str_repeat( '../', $currentDepth + 1 ) . '$1';
 691+ }
 692+
 693+ $uploadBits = explode( '/', str_replace( '\\', '/', $wgUploadPath ) );
 694+ $this->imageRel = $uploadBits[count($uploadBits) - 1];
 695+ if ( !in_array( $this->imageRel, array( 'images', 'upload' ) ) ) {
 696+ $this->imageRel = 'images';
 697+ }
 698+
 699+ $wgStylePath = "{$this->articleBaseUrl}/skins";
 700+
 701+
 702+ if ( $this->makeSnapshot ) {
 703+ $this->destUploadUrl = "{$this->articleBaseUrl}/{$this->imageRel}";
 704+ } else {
 705+ $this->destUploadUrl = "$wgScriptPath/{$this->imageRel}";
 706+ }
 707+ $wgUploadPath = $this->destUploadUrl; // For BC
 708+ $wgMaxCredits = -1;
 709+ $wgHideInterlanguageLinks = !$this->interwiki;
 710+ $wgThumbnailScriptPath = false;
 711+ $wgEnableParserCache = false;
 712+ $wgMathPath = "$wgScriptPath/math";
 713+ $wgEnableSidebarCache = false;
 714+ $wgGenerateThumbnailOnParse = true;
 715+ $wgDisableCounters = true;
 716+
 717+ if ( !empty( $wgRightsText ) ) {
 718+ $wgRightsUrl = "$wgScriptPath/COPYING.html";
 719+ }
 720+
 721+ $wgUser = User::newFromName( '__dumpHTML', false );
 722+ $wgUser->setOption( 'skin', $this->skin );
 723+ $wgUser->setOption( 'editsection', 0 );
 724+ if ( $this->group ) {
 725+ $groups = explode( ',', $this->group );
 726+ foreach ( $groups as $group ) {
 727+ $wgUser->addGroup( $group );
 728+ }
 729+ if ( !$wgUser->isAllowed( 'read' ) ) {
 730+ print "The specified user group is not allowed to read\n";
 731+ exit( 1 );
 732+ }
 733+ } elseif ( !$wgUser->isAllowed( 'read' ) ) {
 734+ print "Default users are not allowed to read, please specify a --group option, e.g. --group=sysop\n";
 735+ exit( 1 );
 736+ }
 737+
 738+ if ( $this->makeSnapshot ) {
 739+ $this->destUploadDirectory = "{$this->dest}/{$this->imageRel}";
 740+ if ( realpath( $this->destUploadDirectory ) == realpath( $wgUploadDirectory ) ) {
 741+ print "Disabling image snapshot because the destination is the same as the source\n";
 742+ $this->makeSnapshot = false;
 743+ $this->destUploadDirectory = false;
 744+ }
 745+ } else {
 746+ $this->destUploadDirectory = false;
 747+ }
 748+
 749+ $newRepoGroup = $this->newRepoGroup( $this->oldRepoGroup );
 750+ RepoGroup::setSingleton( $newRepoGroup );
 751+
 752+ # Make a snapshot of the logo image and copyright icon
 753+ $wgLogo = $this->makeUrlSnapshot( $this->oldLogo );
 754+ if ( preg_match( '/<img [^>]*src="([^"]*)"/', $this->oldCopyrightIcon, $m ) ) {
 755+ $urlText = $m[1];
 756+ $url = Sanitizer::decodeCharReferences( $urlText );
 757+ $url = $this->makeUrlSnapshot( $url );
 758+ $wgCopyrightIcon = str_replace( $urlText, htmlspecialchars( $url ), $this->oldCopyrightIcon);
 759+ }
 760+
 761+ # Make a snapshot of the favicon
 762+ $wgFavicon = $this->makeUrlSnapshot( $this->oldFavicon );
 763+
 764+ $this->setupDone = true;
 765+ }
 766+
 767+ /**
 768+ * Make a copy of a URL in the destination directory, and return the new relative URL
 769+ */
 770+ function makeUrlSnapshot( $url ) {
 771+ global $wgServer;
 772+ $this->mkdir( "{$this->dest}/misc" );
 773+ $destName = urldecode( basename( $url ) );
 774+ $destPath = "{$this->dest}/misc/$destName";
 775+ if ( !file_exists( $destPath ) ) {
 776+ if ( !preg_match( '/^https?:/', $url ) ) {
 777+ $url = $wgServer . $url;
 778+ }
 779+ $contents = Http::get( $url );
 780+ file_put_contents( $destPath, $contents );
 781+ }
 782+ return "{$this->articleBaseUrl}/misc/" . urlencode( $destName );
 783+ }
 784+
 785+ /** Reads the content of a title object, executes the skin and captures the result */
 786+ function getArticleHTML( $title ) {
 787+ global $wgOut, $wgTitle, $wgArticle, $wgUser;
 788+
 789+ $linkCache =& LinkCache::singleton();
 790+ $linkCache->clear();
 791+ $wgTitle = $title;
 792+ if ( is_null( $wgTitle ) ) {
 793+ return false;
 794+ }
 795+
 796+ $ns = $wgTitle->getNamespace();
 797+ if ( $ns == NS_SPECIAL ) {
 798+ $wgOut = new OutputPage;
 799+ $wgOut->setParserOptions( new ParserOptions );
 800+ SpecialPage::executePath( $wgTitle );
 801+ } else {
 802+ /** @todo merge with Wiki.php code */
 803+ if ( $ns == NS_IMAGE ) {
 804+ $wgArticle = new ImagePage( $wgTitle );
 805+ } elseif ( $ns == NS_CATEGORY ) {
 806+ $wgArticle = new CategoryPage( $wgTitle );
 807+ } else {
 808+ $wgArticle = new Article( $wgTitle );
 809+ }
 810+ $rt = Title::newFromRedirect( $wgArticle->fetchContent() );
 811+ if ( $rt != NULL ) {
 812+ return $this->getRedirect( $rt );
 813+ } else {
 814+ $wgOut = new OutputPage;
 815+ $wgOut->setParserOptions( new ParserOptions );
 816+
 817+ $wgArticle->view();
 818+ }
 819+ }
 820+
 821+
 822+ $sk =& $wgUser->getSkin();
 823+ ob_start();
 824+ $sk->outputPage( $wgOut );
 825+ $text = ob_get_contents();
 826+ ob_end_clean();
 827+
 828+ return $text;
 829+ }
 830+
 831+ function getRedirect( $rt ) {
 832+ $url = $rt->escapeLocalURL();
 833+ $text = $rt->getPrefixedText();
 834+ return <<<ENDTEXT
 835+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 836+<html xmlns="http://www.w3.org/1999/xhtml">
 837+<head>
 838+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
 839+ <meta http-equiv="Refresh" content="0;url=$url" />
 840+</head>
 841+<body>
 842+ <p>Redirecting to <a href="$url">$text</a></p>
 843+</body>
 844+</html>
 845+ENDTEXT;
 846+ }
 847+
 848+ /** Returns image paths used in an XHTML document */
 849+ function findImages( $text ) {
 850+ global $wgOutputEncoding, $wgDumpImages;
 851+ $parser = xml_parser_create( $wgOutputEncoding );
 852+ xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' );
 853+
 854+ $wgDumpImages = array();
 855+ xml_parse( $parser, $text );
 856+ xml_parser_free( $parser );
 857+
 858+ return $wgDumpImages;
 859+ }
 860+
 861+ /**
 862+ * Returns true if the path exists, false otherwise
 863+ * PHP's file_exists() returns false for broken symlinks, this returns true.
 864+ */
 865+ function pathExists( $path ) {
 866+ wfSuppressWarnings();
 867+ $exists = (bool)lstat( $path );
 868+ wfRestoreWarnings();
 869+ return $exists;
 870+ }
 871+
 872+ /**
 873+ * Copy a file specified by a URL to a given directory
 874+ *
 875+ * @param string $srcPath The source URL
 876+ * @param string $srcPathBase The base directory of the source URL
 877+ * @param string $srcDirBase The base filesystem directory of the source URL
 878+ * @param string $destDirBase The base filesystem directory of the destination URL
 879+ */
 880+ function relativeCopy( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ) {
 881+ $rel = substr( $srcPath, strlen( $srcPathBase ) + 1 ); // +1 for slash
 882+ $sourceLoc = "$srcDirBase/$rel";
 883+ $destLoc = "$destDirBase/$rel";
 884+ #print "Copying $sourceLoc to $destLoc\n";
 885+ if ( !$this->pathExists( $destLoc ) ) {
 886+ $this->mkdir( dirname( $destLoc ), 0755 );
 887+ if ( function_exists( 'symlink' ) && !$this->forceCopy ) {
 888+ if ( !symlink( $sourceLoc, $destLoc ) ) {
 889+ print "Warning: unable to create symlink at $destLoc\n";
 890+ }
 891+ } else {
 892+ if ( !copy( $sourceLoc, $destLoc ) ) {
 893+ print "Warning: unable to copy $sourceLoc to $destLoc\n";
 894+ }
 895+ }
 896+ }
 897+ }
 898+
 899+ /**
 900+ * Copy an image, and if it is a thumbnail, copy its parent image too
 901+ */
 902+ function copyImage( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ) {
 903+ $this->relativeCopy( $srcPath, $srcPathBase, $srcDirBase, $destDirBase );
 904+ if ( substr( $srcPath, strlen( $srcPathBase ) + 1, 6 ) == 'thumb/' ) {
 905+ # The image was a thumbnail
 906+ # Copy the source image as well
 907+ $rel = substr( $srcPath, strlen( $srcPathBase ) + 1 );
 908+ $parts = explode( '/', $rel );
 909+ $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}";
 910+ $newSrc = "$srcPathBase/$rel";
 911+ $this->relativeCopy( $newSrc, $srcPathBase, $srcDirBase, $destDirBase );
 912+ }
 913+ }
 914+
 915+ /**
 916+ * Copy images (or create symlinks) from commons to a static directory.
 917+ * This is necessary even if you intend to distribute all of commons, because
 918+ * the directory contents is used to work out which image description pages
 919+ * are needed.
 920+ *
 921+ * Also copies math images, and full-sized images if the makeSnapshot option
 922+ * is specified.
 923+ *
 924+ */
 925+ function copyImages( $images ) {
 926+ global $wgUploadPath, $wgUploadDirectory, $wgMathPath, $wgMathDirectory;
 927+ # Find shared uploads and copy them into the static directory
 928+ $mathPathLength = strlen( $wgMathPath );
 929+ $uploadPathLength = strlen( $wgUploadPath );
 930+ foreach ( $images as $escapedImage => $dummy ) {
 931+ $image = urldecode( $escapedImage );
 932+
 933+ if ( substr( $image, 0, $mathPathLength ) == $wgMathPath ) {
 934+ $this->relativeCopy( $image, $wgMathPath, $wgMathDirectory, "{$this->dest}/math" );
 935+ } elseif ( $this->makeSnapshot && substr( $image, 0, $uploadPathLength ) == $wgUploadPath ) {
 936+ $this->copyImage( $image, $wgUploadPath, $wgUploadDirectory, $this->destUploadDirectory );
 937+ }
 938+ }
 939+ }
 940+
 941+ function onGetFullURL( &$title, &$url, $query ) {
 942+ global $wgContLang, $wgArticlePath;
 943+
 944+ $iw = $title->getInterwiki();
 945+ if ( $title->isExternal() && $wgContLang->getLanguageName( $iw ) ) {
 946+ if ( $title->getDBkey() == '' ) {
 947+ $url = str_replace( '$1', "../$iw/index.html", $wgArticlePath );
 948+ } else {
 949+ $url = str_replace( '$1', "../$iw/" . wfUrlencode( $this->getHashedFilename( $title ) ),
 950+ $wgArticlePath );
 951+ }
 952+ $url .= $this->compress ? ".gz" : "";
 953+ return false;
 954+ } else {
 955+ return true;
 956+ }
 957+ }
 958+
 959+ function onGetLocalURL( &$title, &$url, $query ) {
 960+ global $wgArticlePath;
 961+
 962+ if ( $title->isExternal() ) {
 963+ # Default is fine for interwiki
 964+ return true;
 965+ }
 966+
 967+ $url = false;
 968+ if ( $query != '' ) {
 969+ $params = array();
 970+ parse_str( $query, $params );
 971+ if ( isset($params['action']) && $params['action'] == 'raw' ) {
 972+ if ( $params['gen'] == 'css' || $params['gen'] == 'js' ) {
 973+ $file = 'gen.' . $params['gen'];
 974+ } else {
 975+ $file = $this->getFriendlyName( $title->getPrefixedDBkey() );
 976+ // Clean up Monobook.css etc.
 977+ $matches = array();
 978+ if ( preg_match( '/^(.*)\.(css|js)_[0-9a-f]{4}$/', $file, $matches ) ) {
 979+ $file = $matches[1] . '.' . $matches[2];
 980+ }
 981+ }
 982+ $this->rawPages[$file] = array( $file, $title, $params );
 983+ $url = str_replace( '$1', "raw/" . wfUrlencode( $file ), $wgArticlePath );
 984+ }
 985+ }
 986+ if ( $url === false ) {
 987+ $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename( $title ) ), $wgArticlePath );
 988+ }
 989+ $url .= $this->compress ? ".gz" : "";
 990+ return false;
 991+ }
 992+
 993+ function getHashedFilename( &$title ) {
 994+ if ( !$title ) {
 995+ throw new MWException( 'Invalid $title parameter to '.__METHOD__ );
 996+ }
 997+ if ( '' != $title->mInterwiki ) {
 998+ $dbkey = $title->getDBkey();
 999+ } else {
 1000+ $dbkey = $title->getPrefixedDBkey();
 1001+ }
 1002+
 1003+ $mainPage = Title::newMainPage();
 1004+ if ( $mainPage->getPrefixedDBkey() == $dbkey ) {
 1005+ return 'index.html';
 1006+ }
 1007+
 1008+ return 'articles/' . $this->getHashedDirectory( $title ) . '/' .
 1009+ $this->getFriendlyName( $dbkey ) . '.html';
 1010+ }
 1011+
 1012+ function getFriendlyName( $name ) {
 1013+ global $wgLang;
 1014+ # Replace illegal characters for Windows paths with underscores
 1015+ $friendlyName = strtr( $name, '/\\*?"<>|~', '_________' );
 1016+
 1017+ # Work out lower case form. We assume we're on a system with case-insensitive
 1018+ # filenames, so unless the case is of a special form, we have to disambiguate
 1019+ if ( function_exists( 'mb_strtolower' ) ) {
 1020+ $lowerCase = $wgLang->ucfirst( mb_strtolower( $name ) );
 1021+ } else {
 1022+ $lowerCase = ucfirst( strtolower( $name ) );
 1023+ }
 1024+
 1025+ # Make it mostly unique
 1026+ if ( $lowerCase != $friendlyName ) {
 1027+ $friendlyName .= '_' . substr(md5( $name ), 0, 4);
 1028+ }
 1029+ # Handle colon specially by replacing it with tilde
 1030+ # Thus we reduce the number of paths with hashes appended
 1031+ $friendlyName = str_replace( ':', '~', $friendlyName );
 1032+
 1033+ return $friendlyName;
 1034+ }
 1035+
 1036+ /**
 1037+ * Get a relative directory for putting a title into
 1038+ */
 1039+ function getHashedDirectory( &$title ) {
 1040+ if ( '' != $title->getInterwiki() ) {
 1041+ $pdbk = $title->getDBkey();
 1042+ } else {
 1043+ $pdbk = $title->getPrefixedDBkey();
 1044+ }
 1045+
 1046+ # Find the first colon if there is one, use characters after it
 1047+ $p = strpos( $pdbk, ':' );
 1048+ if ( $p !== false ) {
 1049+ $dbk = substr( $pdbk, $p + 1 );
 1050+ $dbk = substr( $dbk, strspn( $dbk, '_' ) );
 1051+ } else {
 1052+ $dbk = $pdbk;
 1053+ }
 1054+
 1055+ # Split into characters
 1056+ $m = array();
 1057+ preg_match_all( '/./us', $dbk, $m );
 1058+
 1059+ $chars = $m[0];
 1060+ $length = count( $chars );
 1061+ $dir = '';
 1062+
 1063+ for ( $i = 0; $i < $this->depth; $i++ ) {
 1064+ if ( $i ) {
 1065+ $dir .= '/';
 1066+ }
 1067+ if ( $i >= $length ) {
 1068+ $dir .= '_';
 1069+ } else {
 1070+ $c = $chars[$i];
 1071+ if ( ord( $c ) >= 128 || preg_match( '/[a-zA-Z0-9!#$%&()+,[\]^_`{}-]/', $c ) ) {
 1072+ if ( function_exists( 'mb_strtolower' ) ) {
 1073+ $dir .= mb_strtolower( $c );
 1074+ } else {
 1075+ $dir .= strtolower( $c );
 1076+ }
 1077+ } else {
 1078+ $dir .= sprintf( "%02X", ord( $c ) );
 1079+ }
 1080+ }
 1081+ }
 1082+ return $dir;
 1083+ }
 1084+
 1085+ /**
 1086+ * Calculate the start end end of a job based on the current slice
 1087+ * @param integer $start
 1088+ * @param integer $end
 1089+ * @return array of integers
 1090+ */
 1091+ function sliceRange( $start, $end ) {
 1092+ $count = $end - $start + 1;
 1093+ $each = $count / $this->sliceDenominator;
 1094+ $sliceStart = $start + intval( $each * ( $this->sliceNumerator - 1 ) );
 1095+ if ( $this->sliceNumerator == $this->sliceDenominator ) {
 1096+ $sliceEnd = $end;
 1097+ } else {
 1098+ $sliceEnd = $start + intval( $each * $this->sliceNumerator ) - 1;
 1099+ }
 1100+ return array( $sliceStart, $sliceEnd );
 1101+ }
 1102+
 1103+ /**
 1104+ * Adjust a start point so that it belongs to the current slice, where slices are defined by integer modulo
 1105+ * @param integer $start
 1106+ * @param integer $base The true start of the range; the minimum start
 1107+ */
 1108+ function modSliceStart( $start, $base = 1 ) {
 1109+ return ( $start - $base ) - ( ( $start - $base ) % $this->sliceDenominator ) + $this->sliceNumerator - 1 + $base;
 1110+ }
 1111+
 1112+ /**
 1113+ * Determine whether a string belongs to the current slice, based on hash
 1114+ */
 1115+ function sliceFilter( $s ) {
 1116+ return crc32( $s ) % $this->sliceDenominator == $this->sliceNumerator - 1;
 1117+ }
 1118+
 1119+ /**
 1120+ * No site notice
 1121+ */
 1122+ function onSiteNoticeBefore( &$text ) {
 1123+ $text = '';
 1124+ return false;
 1125+ }
 1126+ function onSiteNoticeAfter( &$text ) {
 1127+ $text = '';
 1128+ return false;
 1129+ }
 1130+
 1131+ function getMaxPageID() {
 1132+ if ( $this->maxPageID === false ) {
 1133+ $dbr = wfGetDB( DB_SLAVE );
 1134+ $this->maxPageID = $dbr->selectField( 'page', 'max(page_id)', false, __METHOD__ );
 1135+ }
 1136+ return $this->maxPageID;
 1137+ }
 1138+
 1139+ function profile() {
 1140+ global $wgProfiler, $wgRequestTime, $wgRUstart;
 1141+
 1142+ if ( !$this->udpProfile ) {
 1143+ return;
 1144+ }
 1145+ if ( !$this->udpProfileInit ) {
 1146+ $this->udpProfileInit = true;
 1147+ $this->udpProfileCounter = 0;
 1148+ } elseif ( $this->udpProfileCounter == 1 % $this->udpProfile ) {
 1149+ wfProfileOut( '-total' );
 1150+ $wgProfiler->getFunctionReport();
 1151+ $wgProfiler = new DumpHTML_ProfilerStub;
 1152+ }
 1153+ if ( $this->udpProfileCounter == 0 ) {
 1154+ $wgProfiler = new ProfilerSimpleUDP;
 1155+ $wgProfiler->setProfileID( 'dumpHTML' );
 1156+ $wgRequestTime = microtime( true );
 1157+ $wgRUstart = getrusage();
 1158+ wfProfileIn( '-total' );
 1159+ }
 1160+ $this->udpProfileCounter = ( $this->udpProfileCounter + 1 ) % $this->udpProfile;
 1161+ }
 1162+
 1163+ function debug( $text ) {
 1164+ print "$text\n";
 1165+ }
 1166+
 1167+ function mkdir( $dir ) {
 1168+ //if ( wfIsWindows() ) {
 1169+ return wfMkdirParents( $dir, 0755 );
 1170+ /*} else {
 1171+ $dir = escapeshellarg( $dir );
 1172+ `mkdir -p -- $dir`;
 1173+ return true;
 1174+ }*/
 1175+ }
 1176+}
 1177+
 1178+class DumpHTML_ProfilerStub {
 1179+ function profileIn() {}
 1180+ function profileOut() {}
 1181+ function getOutput() {}
 1182+ function close() {}
 1183+ function getFunctionReport() {}
 1184+ function getCurrentSection() { return '';}
 1185+}
 1186+
 1187+class DumpHTML_ProxyRepoGroup extends RepoGroup {
 1188+ var $dump, $backendRG;
 1189+
 1190+ function __construct( $dump, $backendRG ) {
 1191+ $this->dump = $dump;
 1192+ $this->backendRG = $backendRG;
 1193+ $backendRG->initialiseRepos();
 1194+
 1195+ if ( count( $backendRG->foreignRepos ) ) {
 1196+ $localDest = "{$this->dump->destUploadDirectory}/local";
 1197+ $localUrl = "{$this->dump->destUploadUrl}/local";
 1198+ } else {
 1199+ $localDest = $this->dump->destUploadDirectory;
 1200+ $localUrl = $this->dump->destUploadUrl;
 1201+ }
 1202+ if ( !$dump->makeSnapshot ) {
 1203+ $localDest = false;
 1204+ }
 1205+ $this->reposInitialised = true;
 1206+ $this->localRepo = new DumpHTML_ProxyRepo( $backendRG->getLocalRepo(), $dump, $localDest, $localUrl );
 1207+ $this->foreignRepos = array();
 1208+ foreach ( $backendRG->foreignRepos as $index => $repo ) {
 1209+ $friendlyName = strtr( $repo->getName(), array( '/. ', '___' ) );
 1210+ if ( !$dump->makeSnapshot ) {
 1211+ $foreignDest = false;
 1212+ } else {
 1213+ $foreignDest = "{$dump->destUploadDirectory}/$friendlyName";
 1214+ }
 1215+ $this->foreignRepos[] = new DumpHTML_ProxyRepo( $repo, $dump, $foreignDest,
 1216+ $dump->destUploadUrl . '/' . urlencode( $friendlyName ) );
 1217+ }
 1218+ }
 1219+}
 1220+
 1221+class DumpHTML_ProxyRepo {
 1222+ function __construct( $backend, $dump, $directory, $url ) {
 1223+ $this->backend = $backend;
 1224+ $this->dump = $dump;
 1225+ $this->directory = $directory;
 1226+ $this->url = $url;
 1227+ $this->name = $backend->getName();
 1228+ $this->backend->thumbScriptUrl = false;
 1229+ $this->backend->transformVia404 = false;
 1230+ $this->backendUrl = $backend->getZoneUrl( 'public' );
 1231+ }
 1232+
 1233+ function __call( $name, $args ) {
 1234+ return call_user_func_array( array( $this->backend, $name ), $args );
 1235+ }
 1236+
 1237+ function newFile( $title, $time = false) {
 1238+ $file = $this->backend->newFile( $title, $time );
 1239+ if ( $file ) {
 1240+ $file = new DumpHTML_ProxyFile( $file, $this );
 1241+ $file->copyToDump();
 1242+ }
 1243+ return $file;
 1244+ }
 1245+
 1246+ function findFile( $title, $time = false ) {
 1247+ $file = $this->backend->findFile( $title, $time );
 1248+ if ( $file ) {
 1249+ $file = new DumpHTML_ProxyFile( $file, $this );
 1250+ $file->copyToDump();
 1251+ }
 1252+ return $file;
 1253+ }
 1254+
 1255+ function copyToDump( $rel ) {
 1256+ if ( !$this->dump->makeSnapshot ) {
 1257+ return;
 1258+ }
 1259+
 1260+ if ( is_callable( array( $this->backend, 'getZonePath' ) ) ) {
 1261+ $sourceBase = $this->backend->getZonePath( 'public' );
 1262+ } elseif ( is_callable( array( $this->backend, 'getZoneUrl' ) ) ) {
 1263+ $sourceBase = false;
 1264+ $sourceBaseUrl = $this->backend->getZoneUrl( 'public' );
 1265+ } else {
 1266+ $sourceBase = false;
 1267+ $sourceBaseUrl = false;
 1268+ }
 1269+
 1270+ $dest = "{$this->directory}/$rel";
 1271+
 1272+ if ( $this->dump->pathExists( $dest ) ) {
 1273+ return;
 1274+ }
 1275+
 1276+ if ( $sourceBase !== false ) {
 1277+ $source = "$sourceBase/$rel";
 1278+ if ( !file_exists( $source ) ) {
 1279+ // Hopefully we'll get another go at it later
 1280+ return;
 1281+ }
 1282+ if ( !is_dir( dirname( $dest ) ) ) {
 1283+ $this->dump->mkdir( dirname( $dest ) );
 1284+ }
 1285+
 1286+ #$this->dump->debug( "Copying $source to $dest" );
 1287+ if ( function_exists( 'symlink' ) && !$this->dump->forceCopy ) {
 1288+ if ( !symlink( $source, $dest ) ) {
 1289+ $this->dump->debug( "Warning: unable to create symlink at $dest" );
 1290+ }
 1291+ } else {
 1292+ if ( !copy( $source, $dest ) ) {
 1293+ $this->dump->debug( "Warning: unable to copy $source to $dest" );
 1294+ }
 1295+ }
 1296+ } elseif ( $sourceBaseUrl !== false ) {
 1297+ $urlRel = implode( '/', array_map( 'rawurlencode', explode( '/', $rel ) ) );
 1298+ $sourceUrl = $sourceBaseUrl . '/' . $urlRel;
 1299+ $contents = Http::get( $sourceUrl );
 1300+ if ( $contents === false ) {
 1301+ $this->dump->debug( "Unable to get contents of file from $sourceUrl" );
 1302+ } else {
 1303+ if ( !file_put_contents( $dest, $contents ) ) {
 1304+ $this->debug( "Unable to write to $dest" );
 1305+ }
 1306+ }
 1307+ } // else give up
 1308+ }
 1309+}
 1310+
 1311+class DumpHTML_ProxyFile {
 1312+ function __construct( $backend, $repo ) {
 1313+ $this->backend = $backend;
 1314+ $this->repo = $repo;
 1315+ $this->dump = $repo->dump;
 1316+ }
 1317+
 1318+ function __call( $name, $args ) {
 1319+ $callback = array( $this->backend, $name );
 1320+ if ( !is_callable( $callback ) ) {
 1321+ throw new MWException( "Attempt to call invalid function LocalFile::$name\n" );
 1322+ }
 1323+ $result = call_user_func_array( array( $this->backend, $name ), $args );
 1324+ if ( is_string( $result ) ) {
 1325+ $result = $this->fixURL( $result );
 1326+ } elseif ( $result instanceof MediaTransformOutput ) {
 1327+ $result = $this->fixMTO( $result );
 1328+ }
 1329+ return $result;
 1330+ }
 1331+
 1332+ function getUrl() {
 1333+ return $this->repo->url . '/' . $this->backend->getUrlRel();
 1334+ }
 1335+
 1336+ public function getFullURL() {
 1337+ return $this->getUrl();
 1338+ }
 1339+
 1340+ function fixURL( $url ) {
 1341+ if ( is_string( $url ) && substr( $url, 0, strlen( $this->repo->backendUrl ) ) == $this->repo->backendUrl ) {
 1342+ $rel = substr( $url, strlen( $this->repo->backendUrl ) + 1 );
 1343+ $rel = implode('/', array_map( 'rawurldecode', explode( '/', $rel ) ) );
 1344+ $this->repo->copyToDump( $rel );
 1345+ $newUrl = $this->repo->url . '/' . $rel;
 1346+ $url = $newUrl;
 1347+ }
 1348+ return $url;
 1349+ }
 1350+
 1351+ function fixMTO( $thumb ) {
 1352+ // FIXME: accessing private members, needs MTO::setUrl()
 1353+ if ( isset( $thumb->url ) ) {
 1354+ $thumb->url = $this->fixURL( $thumb->url );
 1355+ }
 1356+ return $thumb;
 1357+ }
 1358+
 1359+ function copyToDump() {
 1360+ if ( !$this->dump->makeSnapshot ) {
 1361+ return;
 1362+ }
 1363+
 1364+ $source = $this->backend->getPath();
 1365+ $dest = $this->repo->directory . '/' . $this->backend->getRel();
 1366+
 1367+ if ( $this->dump->pathExists( $dest ) ) {
 1368+ return;
 1369+ }
 1370+
 1371+ #$this->dump->debug ( "Copying $source to $dest\n" );
 1372+ if ( $source === false ) {
 1373+ $sourceUrl = $this->backend->getUrl();
 1374+ $contents = Http::get( $sourceUrl );
 1375+ if ( $contents === false ) {
 1376+ $this->dump->debug( "Unable to get contents of file from $sourceUrl" );
 1377+ } else {
 1378+ if ( !file_put_contents( $dest, $contents ) ) {
 1379+ $this->debug( "Unable to write to $dest" );
 1380+ }
 1381+ }
 1382+ } else {
 1383+ if ( !is_dir( dirname( $dest ) ) ) {
 1384+ $this->dump->mkdir( dirname( $dest ) );
 1385+ }
 1386+
 1387+ if ( function_exists( 'symlink' ) && !$this->dump->forceCopy ) {
 1388+ if ( !symlink( $source, $dest ) ) {
 1389+ $this->dump->debug( "Warning: unable to create symlink at $dest" );
 1390+ }
 1391+ } else {
 1392+ if ( !copy( $source, $dest ) ) {
 1393+ $this->dump->debug( "Warning: unable to copy $source to $dest" );
 1394+ }
 1395+ }
 1396+ }
 1397+ }
 1398+}
 1399+
 1400+
 1401+/** XML parser callback */
 1402+function wfDumpStartTagHandler( $parser, $name, $attribs ) {
 1403+ global $wgDumpImages;
 1404+
 1405+ if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) {
 1406+ $wgDumpImages[$attribs['SRC']] = true;
 1407+ }
 1408+}
 1409+
 1410+/** XML parser callback */
 1411+function wfDumpEndTagHandler( $parser, $name ) {}
 1412+
 1413+# vim: syn=php
Property changes on: branches/wmf-deployment/extensions/DumpHTML/dumpHTML.inc
___________________________________________________________________
Name: svn:keywords
11414 + Author Date Id Revision
Name: svn:eol-style
21415 + native
Index: branches/wmf-deployment/extensions/DumpHTML/rename-hack.vbs
@@ -0,0 +1,30 @@
 2+' Script to move a file to a multibyte filename
 3+' Workaround for PHP 5's inadequacy
 4+dim dirsToCreate(20)
 5+
 6+source = Unescape( WScript.Arguments.Item( 0 ) )
 7+dest = Unescape( WScript.Arguments.Item( 1 ) )
 8+Set fso = CreateObject("Scripting.FileSystemObject")
 9+
 10+' Create the destination directory
 11+destDir = fso.GetParentFolderName( fso.GetAbsolutePathName( dest ) )
 12+parent = destDir
 13+numDirs = 0
 14+While parent <> "" and not fso.FolderExists(parent)
 15+ dirsToCreate(numDirs) = parent
 16+ numDirs = numDirs + 1
 17+ parent = fso.GetParentFolderName( parent )
 18+Wend
 19+
 20+For i = numDirs - 1 to 0 step -1
 21+ fso.CreateFolder( dirsToCreate( i ) )
 22+Next
 23+
 24+' Remove the destination file if it exists already
 25+if fso.FileExists( dest ) then
 26+ fso.DeleteFile( dest )
 27+end if
 28+
 29+' Move the temporary file to its destination
 30+fso.MoveFile source, dest
 31+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/rename-hack.vbs
___________________________________________________________________
Name: svn:eol-style
132 + native
Index: branches/wmf-deployment/extensions/DumpHTML/skin/md5.js
@@ -0,0 +1,256 @@
 2+/*
 3+ * A JavaScript implementation of the RSA Data Security, Inc. MD5 Message
 4+ * Digest Algorithm, as defined in RFC 1321.
 5+ * Version 2.1 Copyright (C) Paul Johnston 1999 - 2002.
 6+ * Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet
 7+ * Distributed under the BSD License
 8+ * See http://pajhome.org.uk/crypt/md5 for more info.
 9+ */
 10+
 11+/*
 12+ * Configurable variables. You may need to tweak these to be compatible with
 13+ * the server-side, but the defaults work in most cases.
 14+ */
 15+var hexcase = 0; /* hex output format. 0 - lowercase; 1 - uppercase */
 16+var b64pad = ""; /* base-64 pad character. "=" for strict RFC compliance */
 17+var chrsz = 8; /* bits per input character. 8 - ASCII; 16 - Unicode */
 18+
 19+/*
 20+ * These are the functions you'll usually want to call
 21+ * They take string arguments and return either hex or base-64 encoded strings
 22+ */
 23+function hex_md5(s){ return binl2hex(core_md5(str2binl(s), s.length * chrsz));}
 24+function b64_md5(s){ return binl2b64(core_md5(str2binl(s), s.length * chrsz));}
 25+function str_md5(s){ return binl2str(core_md5(str2binl(s), s.length * chrsz));}
 26+function hex_hmac_md5(key, data) { return binl2hex(core_hmac_md5(key, data)); }
 27+function b64_hmac_md5(key, data) { return binl2b64(core_hmac_md5(key, data)); }
 28+function str_hmac_md5(key, data) { return binl2str(core_hmac_md5(key, data)); }
 29+
 30+/*
 31+ * Perform a simple self-test to see if the VM is working
 32+ */
 33+function md5_vm_test()
 34+{
 35+ return hex_md5("abc") == "900150983cd24fb0d6963f7d28e17f72";
 36+}
 37+
 38+/*
 39+ * Calculate the MD5 of an array of little-endian words, and a bit length
 40+ */
 41+function core_md5(x, len)
 42+{
 43+ /* append padding */
 44+ x[len >> 5] |= 0x80 << ((len) % 32);
 45+ x[(((len + 64) >>> 9) << 4) + 14] = len;
 46+
 47+ var a = 1732584193;
 48+ var b = -271733879;
 49+ var c = -1732584194;
 50+ var d = 271733878;
 51+
 52+ for(var i = 0; i < x.length; i += 16)
 53+ {
 54+ var olda = a;
 55+ var oldb = b;
 56+ var oldc = c;
 57+ var oldd = d;
 58+
 59+ a = md5_ff(a, b, c, d, x[i+ 0], 7 , -680876936);
 60+ d = md5_ff(d, a, b, c, x[i+ 1], 12, -389564586);
 61+ c = md5_ff(c, d, a, b, x[i+ 2], 17, 606105819);
 62+ b = md5_ff(b, c, d, a, x[i+ 3], 22, -1044525330);
 63+ a = md5_ff(a, b, c, d, x[i+ 4], 7 , -176418897);
 64+ d = md5_ff(d, a, b, c, x[i+ 5], 12, 1200080426);
 65+ c = md5_ff(c, d, a, b, x[i+ 6], 17, -1473231341);
 66+ b = md5_ff(b, c, d, a, x[i+ 7], 22, -45705983);
 67+ a = md5_ff(a, b, c, d, x[i+ 8], 7 , 1770035416);
 68+ d = md5_ff(d, a, b, c, x[i+ 9], 12, -1958414417);
 69+ c = md5_ff(c, d, a, b, x[i+10], 17, -42063);
 70+ b = md5_ff(b, c, d, a, x[i+11], 22, -1990404162);
 71+ a = md5_ff(a, b, c, d, x[i+12], 7 , 1804603682);
 72+ d = md5_ff(d, a, b, c, x[i+13], 12, -40341101);
 73+ c = md5_ff(c, d, a, b, x[i+14], 17, -1502002290);
 74+ b = md5_ff(b, c, d, a, x[i+15], 22, 1236535329);
 75+
 76+ a = md5_gg(a, b, c, d, x[i+ 1], 5 , -165796510);
 77+ d = md5_gg(d, a, b, c, x[i+ 6], 9 , -1069501632);
 78+ c = md5_gg(c, d, a, b, x[i+11], 14, 643717713);
 79+ b = md5_gg(b, c, d, a, x[i+ 0], 20, -373897302);
 80+ a = md5_gg(a, b, c, d, x[i+ 5], 5 , -701558691);
 81+ d = md5_gg(d, a, b, c, x[i+10], 9 , 38016083);
 82+ c = md5_gg(c, d, a, b, x[i+15], 14, -660478335);
 83+ b = md5_gg(b, c, d, a, x[i+ 4], 20, -405537848);
 84+ a = md5_gg(a, b, c, d, x[i+ 9], 5 , 568446438);
 85+ d = md5_gg(d, a, b, c, x[i+14], 9 , -1019803690);
 86+ c = md5_gg(c, d, a, b, x[i+ 3], 14, -187363961);
 87+ b = md5_gg(b, c, d, a, x[i+ 8], 20, 1163531501);
 88+ a = md5_gg(a, b, c, d, x[i+13], 5 , -1444681467);
 89+ d = md5_gg(d, a, b, c, x[i+ 2], 9 , -51403784);
 90+ c = md5_gg(c, d, a, b, x[i+ 7], 14, 1735328473);
 91+ b = md5_gg(b, c, d, a, x[i+12], 20, -1926607734);
 92+
 93+ a = md5_hh(a, b, c, d, x[i+ 5], 4 , -378558);
 94+ d = md5_hh(d, a, b, c, x[i+ 8], 11, -2022574463);
 95+ c = md5_hh(c, d, a, b, x[i+11], 16, 1839030562);
 96+ b = md5_hh(b, c, d, a, x[i+14], 23, -35309556);
 97+ a = md5_hh(a, b, c, d, x[i+ 1], 4 , -1530992060);
 98+ d = md5_hh(d, a, b, c, x[i+ 4], 11, 1272893353);
 99+ c = md5_hh(c, d, a, b, x[i+ 7], 16, -155497632);
 100+ b = md5_hh(b, c, d, a, x[i+10], 23, -1094730640);
 101+ a = md5_hh(a, b, c, d, x[i+13], 4 , 681279174);
 102+ d = md5_hh(d, a, b, c, x[i+ 0], 11, -358537222);
 103+ c = md5_hh(c, d, a, b, x[i+ 3], 16, -722521979);
 104+ b = md5_hh(b, c, d, a, x[i+ 6], 23, 76029189);
 105+ a = md5_hh(a, b, c, d, x[i+ 9], 4 , -640364487);
 106+ d = md5_hh(d, a, b, c, x[i+12], 11, -421815835);
 107+ c = md5_hh(c, d, a, b, x[i+15], 16, 530742520);
 108+ b = md5_hh(b, c, d, a, x[i+ 2], 23, -995338651);
 109+
 110+ a = md5_ii(a, b, c, d, x[i+ 0], 6 , -198630844);
 111+ d = md5_ii(d, a, b, c, x[i+ 7], 10, 1126891415);
 112+ c = md5_ii(c, d, a, b, x[i+14], 15, -1416354905);
 113+ b = md5_ii(b, c, d, a, x[i+ 5], 21, -57434055);
 114+ a = md5_ii(a, b, c, d, x[i+12], 6 , 1700485571);
 115+ d = md5_ii(d, a, b, c, x[i+ 3], 10, -1894986606);
 116+ c = md5_ii(c, d, a, b, x[i+10], 15, -1051523);
 117+ b = md5_ii(b, c, d, a, x[i+ 1], 21, -2054922799);
 118+ a = md5_ii(a, b, c, d, x[i+ 8], 6 , 1873313359);
 119+ d = md5_ii(d, a, b, c, x[i+15], 10, -30611744);
 120+ c = md5_ii(c, d, a, b, x[i+ 6], 15, -1560198380);
 121+ b = md5_ii(b, c, d, a, x[i+13], 21, 1309151649);
 122+ a = md5_ii(a, b, c, d, x[i+ 4], 6 , -145523070);
 123+ d = md5_ii(d, a, b, c, x[i+11], 10, -1120210379);
 124+ c = md5_ii(c, d, a, b, x[i+ 2], 15, 718787259);
 125+ b = md5_ii(b, c, d, a, x[i+ 9], 21, -343485551);
 126+
 127+ a = safe_add(a, olda);
 128+ b = safe_add(b, oldb);
 129+ c = safe_add(c, oldc);
 130+ d = safe_add(d, oldd);
 131+ }
 132+ return Array(a, b, c, d);
 133+
 134+}
 135+
 136+/*
 137+ * These functions implement the four basic operations the algorithm uses.
 138+ */
 139+function md5_cmn(q, a, b, x, s, t)
 140+{
 141+ return safe_add(bit_rol(safe_add(safe_add(a, q), safe_add(x, t)), s),b);
 142+}
 143+function md5_ff(a, b, c, d, x, s, t)
 144+{
 145+ return md5_cmn((b & c) | ((~b) & d), a, b, x, s, t);
 146+}
 147+function md5_gg(a, b, c, d, x, s, t)
 148+{
 149+ return md5_cmn((b & d) | (c & (~d)), a, b, x, s, t);
 150+}
 151+function md5_hh(a, b, c, d, x, s, t)
 152+{
 153+ return md5_cmn(b ^ c ^ d, a, b, x, s, t);
 154+}
 155+function md5_ii(a, b, c, d, x, s, t)
 156+{
 157+ return md5_cmn(c ^ (b | (~d)), a, b, x, s, t);
 158+}
 159+
 160+/*
 161+ * Calculate the HMAC-MD5, of a key and some data
 162+ */
 163+function core_hmac_md5(key, data)
 164+{
 165+ var bkey = str2binl(key);
 166+ if(bkey.length > 16) bkey = core_md5(bkey, key.length * chrsz);
 167+
 168+ var ipad = Array(16), opad = Array(16);
 169+ for(var i = 0; i < 16; i++)
 170+ {
 171+ ipad[i] = bkey[i] ^ 0x36363636;
 172+ opad[i] = bkey[i] ^ 0x5C5C5C5C;
 173+ }
 174+
 175+ var hash = core_md5(ipad.concat(str2binl(data)), 512 + data.length * chrsz);
 176+ return core_md5(opad.concat(hash), 512 + 128);
 177+}
 178+
 179+/*
 180+ * Add integers, wrapping at 2^32. This uses 16-bit operations internally
 181+ * to work around bugs in some JS interpreters.
 182+ */
 183+function safe_add(x, y)
 184+{
 185+ var lsw = (x & 0xFFFF) + (y & 0xFFFF);
 186+ var msw = (x >> 16) + (y >> 16) + (lsw >> 16);
 187+ return (msw << 16) | (lsw & 0xFFFF);
 188+}
 189+
 190+/*
 191+ * Bitwise rotate a 32-bit number to the left.
 192+ */
 193+function bit_rol(num, cnt)
 194+{
 195+ return (num << cnt) | (num >>> (32 - cnt));
 196+}
 197+
 198+/*
 199+ * Convert a string to an array of little-endian words
 200+ * If chrsz is ASCII, characters >255 have their hi-byte silently ignored.
 201+ */
 202+function str2binl(str)
 203+{
 204+ var bin = Array();
 205+ var mask = (1 << chrsz) - 1;
 206+ for(var i = 0; i < str.length * chrsz; i += chrsz)
 207+ bin[i>>5] |= (str.charCodeAt(i / chrsz) & mask) << (i%32);
 208+ return bin;
 209+}
 210+
 211+/*
 212+ * Convert an array of little-endian words to a string
 213+ */
 214+function binl2str(bin)
 215+{
 216+ var str = "";
 217+ var mask = (1 << chrsz) - 1;
 218+ for(var i = 0; i < bin.length * 32; i += chrsz)
 219+ str += String.fromCharCode((bin[i>>5] >>> (i % 32)) & mask);
 220+ return str;
 221+}
 222+
 223+/*
 224+ * Convert an array of little-endian words to a hex string.
 225+ */
 226+function binl2hex(binarray)
 227+{
 228+ var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef";
 229+ var str = "";
 230+ for(var i = 0; i < binarray.length * 4; i++)
 231+ {
 232+ str += hex_tab.charAt((binarray[i>>2] >> ((i%4)*8+4)) & 0xF) +
 233+ hex_tab.charAt((binarray[i>>2] >> ((i%4)*8 )) & 0xF);
 234+ }
 235+ return str;
 236+}
 237+
 238+/*
 239+ * Convert an array of little-endian words to a base-64 string
 240+ */
 241+function binl2b64(binarray)
 242+{
 243+ var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 244+ var str = "";
 245+ for(var i = 0; i < binarray.length * 4; i += 3)
 246+ {
 247+ var triplet = (((binarray[i >> 2] >> 8 * ( i %4)) & 0xFF) << 16)
 248+ | (((binarray[i+1 >> 2] >> 8 * ((i+1)%4)) & 0xFF) << 8 )
 249+ | ((binarray[i+2 >> 2] >> 8 * ((i+2)%4)) & 0xFF);
 250+ for(var j = 0; j < 4; j++)
 251+ {
 252+ if(i * 8 + j * 6 > binarray.length * 32) str += b64pad;
 253+ else str += tab.charAt((triplet >> 6*(3-j)) & 0x3F);
 254+ }
 255+ }
 256+ return str;
 257+}
Property changes on: branches/wmf-deployment/extensions/DumpHTML/skin/md5.js
___________________________________________________________________
Name: svn:keywords
1258 + Author Date Id Revision
Name: svn:eol-style
2259 + native
Index: branches/wmf-deployment/extensions/DumpHTML/skin/utf8.js
@@ -0,0 +1,72 @@
 2+/**
 3+ * Obtained from http://homepage3.nifty.com/aokura/jscript/index.html
 4+ * The webpage says, among other things:
 5+ * * ソースコードの全てあるいは一部を使用したことにより生じた損害に関しては一切責任を負いません。
 6+ * * ソースコードの使用、配布に制限はありません。ご自由にお使いください。
 7+ * * 動作チェックが不充分な場合もありますので、注意してください。
 8+ *
 9+ * Which, loosely translated, means:
 10+ * * The author takes no responsibility for damage which occurs due to the use of this code.
 11+ * * There is no restriction on the use and distribution of the source code. Please use freely.
 12+ * * Please be careful, testing may have been insufficient.
 13+ */
 14+
 15+
 16+/**********************************************************************
 17+ *
 18+ * Unicode ⇔ UTF-8
 19+ *
 20+ * Copyright (c) 2005 AOK <soft@aokura.com>
 21+ *
 22+ **********************************************************************/
 23+
 24+function _to_utf8(s) {
 25+ var c, d = "";
 26+ for (var i = 0; i < s.length; i++) {
 27+ c = s.charCodeAt(i);
 28+ if (c <= 0x7f) {
 29+ d += s.charAt(i);
 30+ } else if (c >= 0x80 && c <= 0x7ff) {
 31+ d += String.fromCharCode(((c >> 6) & 0x1f) | 0xc0);
 32+ d += String.fromCharCode((c & 0x3f) | 0x80);
 33+ } else {
 34+ d += String.fromCharCode((c >> 12) | 0xe0);
 35+ d += String.fromCharCode(((c >> 6) & 0x3f) | 0x80);
 36+ d += String.fromCharCode((c & 0x3f) | 0x80);
 37+ }
 38+ }
 39+ return d;
 40+}
 41+
 42+function _from_utf8(s) {
 43+ var c, d = "", flag = 0, tmp;
 44+ for (var i = 0; i < s.length; i++) {
 45+ c = s.charCodeAt(i);
 46+ if (flag == 0) {
 47+ if ((c & 0xe0) == 0xe0) {
 48+ flag = 2;
 49+ tmp = (c & 0x0f) << 12;
 50+ } else if ((c & 0xc0) == 0xc0) {
 51+ flag = 1;
 52+ tmp = (c & 0x1f) << 6;
 53+ } else if ((c & 0x80) == 0) {
 54+ d += s.charAt(i);
 55+ } else {
 56+ flag = 0;
 57+ }
 58+ } else if (flag == 1) {
 59+ flag = 0;
 60+ d += String.fromCharCode(tmp | (c & 0x3f));
 61+ } else if (flag == 2) {
 62+ flag = 3;
 63+ tmp |= (c & 0x3f) << 6;
 64+ } else if (flag == 3) {
 65+ flag = 0;
 66+ d += String.fromCharCode(tmp | (c & 0x3f));
 67+ } else {
 68+ flag = 0;
 69+ }
 70+ }
 71+ return d;
 72+}
 73+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/skin/utf8.js
___________________________________________________________________
Name: svn:keywords
174 + Author Date Id Revision
Name: svn:eol-style
275 + native
Index: branches/wmf-deployment/extensions/DumpHTML/skin/lookup.js
@@ -0,0 +1,93 @@
 2+/**
 3+ * "Go" function for static HTML dump
 4+ */
 5+function goToStatic(depth) {
 6+ var url = getStaticURL(document.getElementById("searchInput").value, depth);
 7+ if (url != "") {
 8+ location = url;
 9+ } else {
 10+ alert("Invalid title");
 11+ }
 12+}
 13+
 14+/**
 15+ * Determine relative path for a given non-canonical title
 16+ */
 17+function getStaticURL(text, depth) {
 18+ var pdbk = getPDBK(text);
 19+ if (pdbk == "") {
 20+ return "";
 21+ } else {
 22+ var i;
 23+ var path = getHashedDirectory(pdbk, depth) + "/" + getFriendlyName(pdbk) + ".html";
 24+ if (!/(index\.html|\/)$/.exec(location)) {
 25+ for (i = 0; i < depth; i++) {
 26+ path = "../" + path;
 27+ }
 28+ } else {
 29+ path = "articles/" + path;
 30+ }
 31+ return path;
 32+ }
 33+}
 34+
 35+function getPDBK(text) {
 36+ // Spaces to underscores
 37+ text = text.replace(/ /g, "_");
 38+
 39+ // Trim leading and trailing space
 40+ text = text.replace(/^_+/g, "");
 41+ text = text.replace(/_+$/g, "");
 42+
 43+ // Capitalise first letter
 44+ return ucfirst(text);
 45+}
 46+
 47+function getHashedDirectory(pdbk, depth) {
 48+ // Find the first colon if there is one, use characters after it
 49+ var dbk = pdbk.replace(/^[^:]*:_*(.*)$/, "$1");
 50+ var i, c, dir = "";
 51+
 52+ for (i=0; i < depth; i++) {
 53+ if (i) {
 54+ dir += "/";
 55+ }
 56+ if (i >= dbk.length) {
 57+ dir += "_";
 58+ } else {
 59+ c = dbk.charAt(i);
 60+ cc = dbk.charCodeAt(i);
 61+
 62+ if (cc >= 128 || /[a-zA-Z0-9!#$%&()+,[\]^_`{}-]/.exec(c)) {
 63+ dir += c.toLowerCase();
 64+ } else {
 65+ dir += binl2hex([cc]).substr(0,2).toUpperCase();
 66+ }
 67+ }
 68+ }
 69+ return dir;
 70+}
 71+
 72+function ucfirst(s) {
 73+ return s.charAt(0).toUpperCase() + s.substring(1, s.length);
 74+}
 75+
 76+function getFriendlyName(name) {
 77+ // Replace illegal characters for Windows paths with underscores
 78+ var friendlyName = name.replace(/[\/\\*?"<>|~]/g, "_");
 79+
 80+ // Work out lower case form. We assume we're on a system with case-insensitive
 81+ // filenames, so unless the case is of a special form, we have to disambiguate
 82+ var lowerCase = ucfirst(name.toLowerCase());
 83+
 84+ // Make it mostly unique
 85+ if (lowerCase != friendlyName) {
 86+ friendlyName += "_" + hex_md5(_to_utf8(name)).substring(0, 4);
 87+ }
 88+ // Handle colon specially by replacing it with tilde
 89+ // Thus we reduce the number of paths with hashes appended
 90+ friendlyName = friendlyName.replace(":", "~");
 91+
 92+ return friendlyName;
 93+}
 94+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/skin/lookup.js
___________________________________________________________________
Name: svn:keywords
195 + Author Date Id Revision
Name: svn:eol-style
296 + native
Index: branches/wmf-deployment/extensions/DumpHTML/skin/main.css
@@ -0,0 +1,9 @@
 2+@import "../monobook/main.css";
 3+
 4+#footer li {
 5+ display: block;
 6+}
 7+head:first-child + body #footer li { white-space: normal; }
 8+.usermessage { display: none; }
 9+.editsection { display: none; }
 10+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/skin/main.css
___________________________________________________________________
Name: svn:keywords
111 + Author Date Id Revision
Name: svn:eol-style
212 + native
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/finish-lang
@@ -0,0 +1,34 @@
 2+#!/bin/bash
 3+
 4+if [ "X$3" == "X" ];then
 5+ echo "Usage: finish <lang> <edition> <checkpoint>"
 6+ exit
 7+fi
 8+
 9+. `dirname $0`/config.sh
 10+
 11+bindir=$base/scripts
 12+lang=$1
 13+edition=$2
 14+checkpoint=$3
 15+site=wikipedia
 16+langdir=$base/new/$site/$lang
 17+
 18+if [ -e $langdir/compressed ]; then
 19+ echo "Already compressed $lang"
 20+ echo "everything=done" > $checkpoint
 21+ exit
 22+fi
 23+if [ ! -e $langdir/index.html ]; then
 24+ echo "$lang directory is broken, missing index.html, skipping."
 25+ exit
 26+fi
 27+
 28+date > $langdir/compressed
 29+
 30+echo "$lang: Compressing HTML..."
 31+$bindir/compress-html $lang $edition "$checkpoint"
 32+#echo "$lang: Making image tarball..."
 33+#ssh albert tar -C /mnt/static -cf /a/upload_snapshot/$edition/downloads/wikipedia-$lang-images.tar -h $lang/upload
 34+#ln -sf /mnt/upload_snapshot/$edition/downloads/wikipedia-$lang-images.tar /mnt/static/downloads/$edition/$lang/wikipedia-$lang-images.tar
 35+echo "$lang: Done."
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/finish-lang
___________________________________________________________________
Name: svn:eol-style
136 + native
Name: svn:executable
237 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/index.inc
@@ -0,0 +1,154 @@
 2+<?php
 3+ $currentDir = readlink( '/a/static/downloads/current' );
 4+ $currentBase = basename( $currentDir );
 5+ $currentText = htmlspecialchars( date( 'F Y', strtotime( $currentBase . '-01' ) ) );
 6+ $encCurrentBase = htmlspecialchars( urlencode( $currentBase ) );
 7+ $inProgressDir = @readlink( '/a/static/downloads/in_progress' );
 8+ $inProgressText = $inProgressDir ?
 9+ htmlspecialchars( date( 'F Y', strtotime( basename( $inProgressDir ) . '-01' ) ) ) : false;
 10+?>
 11+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 12+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
 13+ <head>
 14+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 15+ <title>Wikipedia Static HTML Dumps</title>
 16+ <style type="text/css" media="screen">/*<![CDATA[*/
 17+body {
 18+ font: x-small sans-serif;
 19+ background: #f9f9f9 url(http://en.wikipedia.org/skins-1.5/monobook/headbg.jpg) 0 0 no-repeat;
 20+ color: black;
 21+ margin: 0;
 22+ padding: 0;
 23+}
 24+/* scale back up to a sane default */
 25+#globalWrapper {
 26+ font-size: 127%;
 27+ width: 100%;
 28+ margin: 0;
 29+ padding: 0;
 30+}
 31+#content {
 32+ margin: 2em 2em 0 2em;
 33+ padding: 0 1em 1.5em 1em;
 34+ background: white;
 35+ color: black;
 36+ border: 1px solid #aaa;
 37+ line-height: 1.5em;
 38+ position: relative;
 39+ z-index: 2;
 40+}
 41+h1, h2, h3, h4, h5, h6 {
 42+ color: black;
 43+ background: none;
 44+ font-weight: normal;
 45+ margin: 0;
 46+ padding-top: .5em;
 47+ padding-bottom: .17em;
 48+ border-bottom: 1px solid #aaa;
 49+}
 50+h1 {
 51+ font-size: 188%;
 52+ margin-bottom: .6em;
 53+}
 54+h2 {
 55+ font-size: 150%;
 56+ margin-bottom: .6em;
 57+}
 58+h3, h4, h5, h6 {
 59+ border-bottom: none;
 60+ font-weight: bold;
 61+ margin-bottom: .3em;
 62+}
 63+
 64+ul {
 65+ line-height: 1.5em;
 66+ list-style-type: square;
 67+ margin: .3em 0 0 1.5em;
 68+ padding: 0;
 69+ list-style-image: url(bullet.gif);
 70+}
 71+ol {
 72+ line-height: 1.5em;
 73+ margin: .3em 0 0 3.2em;
 74+ padding: 0;
 75+ list-style-image: none;
 76+}
 77+li {
 78+ margin-bottom: .1em;
 79+}
 80+dt {
 81+ font-weight: bold;
 82+ margin-bottom: .05em;
 83+}
 84+dl {
 85+ margin-top: .2em;
 86+ margin-bottom: .5em;
 87+}
 88+dd {
 89+ line-height: 1.5em;
 90+ margin-left: 2em;
 91+ margin-bottom: .5em;
 92+}
 93+a {
 94+ text-decoration: none;
 95+ color: #002bb8;
 96+ background: none;
 97+}
 98+a:visited {
 99+ color: #5a3696;
 100+}
 101+a:active {
 102+ color: #faa700;
 103+}
 104+a:hover {
 105+ text-decoration: underline;
 106+}
 107+
 108+.visualClear {
 109+ clear: both;
 110+}
 111+
 112+/*]]>*/</style>
 113+
 114+</head>
 115+<body>
 116+ <div id="globalWrapper">
 117+ <div id="content">
 118+ <h1>Wikipedia Static HTML Dumps</h1>
 119+<p>This is a set of static HTML dumps of Wikipedia. Note that putting one of these dumps on the web unmodified will constitute a trademark violation. They are intended for private viewing in an intranet or desktop installation.</p>
 120+
 121+<p>The current dump is the <?php echo $currentText; ?> edition. This dump has no image snapshot, it's just HTML. Due to performance problems when compressing millions of files with <a href="http://7-zip.org">7-zip</a>, the archives are now packaged as a 7-zipped tar file.</p>
 122+
 123+<?php if ( $inProgressText ): ?>
 124+<p>A <?php echo $inProgressText; ?> dump is in progress.</p>
 125+<?php endif; ?>
 126+
 127+<ul>
 128+ <li><strong><a href="https://www.mediawiki.org/downloads/<?php echo $encCurrentBase; ?>">Downloads</a></strong></li>
 129+</ul>
 130+<h2>Browse</h2>
 131+<p>Try before you download, click on a language code below.</p>
 132+
 133+<p>
 134+<?php
 135+$wikipedias = array_map( 'trim', file( '/home/wikipedia/common/wikipedia.dblist' ) );
 136+$private = array_map( 'trim', file( '/home/wikipedia/common/private.dblist' ) );
 137+$wikipedias = array_diff( $wikipedias, $private );
 138+$first = true;
 139+foreach ( $wikipedias as $db ) {
 140+ $db = trim( $db );
 141+ $langWithUnderscores = substr( $db, 0, strlen( $db ) - 4 );
 142+ $lang = str_replace( '_', '-', $langWithUnderscores );
 143+ if ( $first ) {
 144+ $first = false;
 145+ } else {
 146+ echo " - ";
 147+ }
 148+ echo "<a href=\"new/wikipedia/$langWithUnderscores/index.html\">$lang</a>\n";
 149+}
 150+?>
 151+</p>
 152+ <div class="visualClear"></div>
 153+ </div>
 154+</div>
 155+</body></html>
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/index.inc
___________________________________________________________________
Name: svn:eol-style
1156 + native
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/queueSlave
@@ -0,0 +1,191 @@
 2+#!/usr/bin/python
 3+
 4+import sys, os, os.path, signal, socket, re, time
 5+
 6+def redirectOutput(fileName):
 7+ os.close(1)
 8+ os.close(2)
 9+ fd = os.open(fileName, os.O_WRONLY|os.O_CREAT|os.O_APPEND, 0666)
 10+ os.dup2(fd, 1)
 11+ os.dup2(fd,2)
 12+
 13+def dumpHTML(outputFile, *params):
 14+ fullParams = ["nice", "nice", "-n15", "php", "-n", "dumpHTML.php"]
 15+ fullParams.extend(params)
 16+ msg(" ".join(fullParams))
 17+ pid = os.fork()
 18+ if pid == 0:
 19+ redirectOutput(outputFile)
 20+ os.execvp("nice", fullParams)
 21+ sys.exit(1)
 22+
 23+ # Wait for the child to exit (or the parent)
 24+ status = os.waitpid(pid, os.WNOHANG)
 25+ while status == (0,0) and os.getppid() > 1:
 26+ time.sleep(5)
 27+ status = os.waitpid(pid, os.WNOHANG)
 28+
 29+ # If the parent exited, then kill the child
 30+ if status == (0,0):
 31+ os.kill(pid, signal.SIGKILL)
 32+ elif os.WIFSIGNALED(status[1]):
 33+ msg("Process exited on signal %d" % os.WTERMSIG(status[1]))
 34+
 35+def finishWiki(outputFile, lang, checkpoint):
 36+ global edition, siteDir, baseDir
 37+ msg("Finishing language "+lang)
 38+ cmd = "%(baseDir)s/scripts/finish-lang %(lang)s %(edition)s %(checkpoint)s 2>&1 >> %(outputFile)s" % {
 39+ 'baseDir' : baseDir, 'lang' : lang, 'edition' : edition, 'outputFile': outputFile,
 40+ 'checkpoint': checkpoint }
 41+ msg(cmd)
 42+ os.system(cmd)
 43+
 44+def writeStatus(jobID, status):
 45+ global jobDir
 46+ f = open(jobDir+"/"+jobID, "w")
 47+ print >> f, socket.gethostname(), os.getpid()
 48+ print >> f, status
 49+ f.close()
 50+
 51+def isStatusMine(jobID):
 52+ global jobDir
 53+ try:
 54+ f = open(jobDir+"/"+jobID, "r")
 55+ except:
 56+ msg("Status file is missing")
 57+ return False
 58+
 59+ fields = f.readline().split()
 60+ f.close()
 61+ if len(fields) != 2:
 62+ msg("Warning: invalid status file")
 63+ return False
 64+
 65+ if fields[0] == socket.gethostname() and fields[1] == str(os.getpid()):
 66+ return True
 67+ else:
 68+ return False
 69+
 70+def isDone(checkpoint, jobType):
 71+ test = jobType+'=done'
 72+ try: f = open(checkpoint, "r")
 73+ except:
 74+ return False
 75+ try:
 76+ for line in f:
 77+ if line.rstrip() == test:
 78+ return True
 79+ finally:
 80+ f.close()
 81+ return False
 82+
 83+
 84+def writeStatusIfMine(jobID, status):
 85+ if isStatusMine(jobID):
 86+ writeStatus(jobID, status)
 87+ else:
 88+ msg("Not overwriting status file, it doesn't belong to me.")
 89+
 90+def msg(*params):
 91+ print " ".join(params)
 92+ sys.stdout.flush()
 93+
 94+#---------------------------------------------------------------------------------
 95+
 96+hostname = socket.gethostname()
 97+myPid = os.getpid()
 98+
 99+msg("queueSlave on %s %d" % (hostname, myPid))
 100+
 101+queueHost = sys.argv[1]
 102+queuePort = int(sys.argv[2])
 103+baseDir = sys.argv[3]
 104+edition = sys.argv[4]
 105+siteDir = baseDir+"/new/wikipedia"
 106+logDir = baseDir+"/var/logs"
 107+jobDir = baseDir+"/var/jobs"
 108+checkpointDir = baseDir+"/var/checkpoints"
 109+downloadsDir = baseDir + "/downloads/" + edition;
 110+
 111+try: os.makedirs(logDir)
 112+except: pass
 113+try: os.makedirs(jobDir)
 114+except: pass
 115+try: os.makedirs(checkpointDir)
 116+except: pass
 117+
 118+queueSock = socket.socket()
 119+queueSock.connect((queueHost, queuePort))
 120+queueFile = queueSock.makefile()
 121+
 122+os.chdir("/home/wikipedia/common/php-1.5/extensions/DumpHTML")
 123+waiting = False
 124+
 125+dataRegex = re.compile("data (\w+) ([a-z_-]+) (\w+) (\d+/\d+)")
 126+
 127+# Loop until the parent exits
 128+while (os.getppid() > 1):
 129+ queueFile.write("deq\n")
 130+ queueFile.flush()
 131+ s = queueFile.readline()
 132+ m = dataRegex.match(s)
 133+ if m != None:
 134+ waiting = False
 135+ jobID = m.group(1)
 136+ wiki = m.group(2)
 137+ type = m.group(3)
 138+ slice = m.group(4)
 139+ lang = wiki.replace( 'wiki', '' )
 140+ dest = siteDir+"/"+lang
 141+ jobString = wiki+"_" + type + "_" + slice.replace( '/', '_' )
 142+ outputFile = logDir+"/"+jobString
 143+ checkpoint = checkpointDir+"/"+jobString
 144+
 145+ if type == "articles":
 146+ writeStatus(jobID, 'running')
 147+ msg(wiki + ' articles ' + slice)
 148+ dumpHTML(outputFile, wiki,"--no-shared-desc", "--image-snapshot",
 149+ "--interlang","-d",dest,"--slice",slice,
 150+ "--udp-profile","50",
 151+ "--oom-adj", "6",
 152+ #"--show-titles",
 153+ "--checkpoint",checkpoint,"--no-overwrite")
 154+
 155+ if isDone(checkpoint, 'everything'):
 156+ msg("Done")
 157+ writeStatusIfMine(jobID, 'done')
 158+ else:
 159+ msg("Terminated, unfinished")
 160+ writeStatusIfMine(jobID, 'terminated')
 161+
 162+ elif type == "shared":
 163+ writeStatus(jobID, 'running')
 164+ msg(wiki + ' shared ' + slice)
 165+ dumpHTML(outputFile, wiki,"--shared-desc", "--image-snapshot",
 166+ "--interlang","-d",dest,"--slice",slice,
 167+ "--udp-profile", "50",
 168+ "--oom-adj", "4",
 169+ "--checkpoint",checkpoint,"--no-overwrite")
 170+ if isDone(checkpoint, 'shared image'):
 171+ msg("Done")
 172+ writeStatusIfMine(jobID, 'done')
 173+ else:
 174+ msg("Terminated, unfinished")
 175+ writeStatusIfMine(jobID, 'terminated')
 176+
 177+ elif type == "finish":
 178+ writeStatus(jobID, 'running')
 179+ finishWiki(outputFile, lang, checkpoint)
 180+ if isDone(checkpoint, 'everything'):
 181+ msg("Done")
 182+ writeStatusIfMine(jobID, 'done')
 183+ else:
 184+ msg("Terminated, unfinished")
 185+ writeStatusIfMine(jobID, 'terminated')
 186+ else:
 187+ if not waiting:
 188+ msg("Waiting...")
 189+ waiting = True
 190+ time.sleep(1)
 191+
 192+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/queueSlave
___________________________________________________________________
Name: svn:eol-style
1193 + native
Name: svn:executable
2194 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/netqueue.py
@@ -0,0 +1,72 @@
 2+#!/usr/bin/python
 3+# vim: set ts=4 sw=4 :
 4+import SocketServer, sys, signal, os, threading, Queue
 5+
 6+class QueueServer(SocketServer.ThreadingMixIn, SocketServer.TCPServer):
 7+ queue = Queue.Queue(0)
 8+ allow_reuse_address = True
 9+
 10+ def enqueue(self, value):
 11+ self.queue.put(value)
 12+
 13+ def dequeue(self):
 14+ try:
 15+ value = self.queue.get_nowait()
 16+ except Queue.Empty:
 17+ value = None
 18+ return value
 19+
 20+ def blockingDequeue(self, file):
 21+ value = self.queue.get()
 22+ #if file.closed:
 23+ # File doesn't want it, requeue it
 24+ # self.queue.put(value)
 25+ # value = None
 26+ return value;
 27+
 28+ def clearQueue(self):
 29+ self.queue = Queue.Queue(0)
 30+
 31+
 32+class QueueRequestHandler(SocketServer.StreamRequestHandler):
 33+ def handle(self):
 34+ try:
 35+ for line in self.rfile:
 36+ cmd = line.strip()
 37+ if cmd[:4] == "enq ":
 38+ self.server.enqueue(cmd[4:])
 39+ self.wfile.write("ok\n")
 40+ elif cmd == "deq":
 41+ value = self.server.dequeue()
 42+ if value is None:
 43+ self.wfile.write("empty\n")
 44+ else:
 45+ self.wfile.write("data " + value + "\n")
 46+ elif cmd == "bdeq":
 47+ value = self.server.blockingDequeue(self.wfile)
 48+ if value is None:
 49+ self.wfile.write("empty\n")
 50+ else:
 51+ self.wfile.write("data " + value + "\n")
 52+ elif cmd == "size":
 53+ self.wfile.write("size " + str(self.server.queue.qsize()) + "\n")
 54+ elif cmd == "clear":
 55+ self.server.clearQueue()
 56+ self.wfile.write("ok\n")
 57+ else:
 58+ self.wfile.write("invalid command\n")
 59+ except:
 60+ sys.stdout.write("netqueue: Error processing socket " + self.request.getpeername() + "\n")
 61+
 62+
 63+if __name__ == '__main__':
 64+ server = QueueServer(('127.0.0.1', 8200), QueueRequestHandler)
 65+ try:
 66+ server.serve_forever()
 67+ except KeyboardInterrupt:
 68+ print "Caught KeyboardInterrupt"
 69+ os.kill(os.getpid(), signal.SIGKILL)
 70+ sys.exit(0)
 71+
 72+
 73+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/netqueue.py
___________________________________________________________________
Name: svn:eol-style
174 + native
Name: svn:executable
275 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/compress-volumes
@@ -0,0 +1,32 @@
 2+#!/bin/bash
 3+
 4+set -e
 5+
 6+if [ -z "$2" ]; then
 7+ echo "Usage: $0 <language> <edition>"
 8+ exit 1
 9+fi
 10+
 11+. `dirname $0`/config.sh
 12+
 13+lang=$1
 14+edition=$2
 15+dest=$base/downloads/$edition/$lang
 16+sitebase=$base/wikipedia
 17+bindir=$base/scripts
 18+
 19+cd $dest
 20+rm html.lst.*
 21+split -a1 -d -l1000000 html.lst html.lst.
 22+cd $sitebase
 23+for f in $dest/html.lst.* ; do
 24+ vol=${f#$dest/html.lst.}
 25+ if [ $vol == 0 ];then
 26+ response_files="@$f @$dest/skins.lst"
 27+ else
 28+ response_files="@$f"
 29+ fi
 30+ $bindir/7za-readdir-hack -l -ms8m a $dest/wikipedia-$lang-html.$vol.7z $response_files
 31+done
 32+
 33+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/compress-volumes
___________________________________________________________________
Name: svn:eol-style
134 + native
Name: svn:executable
235 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/start-edition
@@ -0,0 +1,16 @@
 2+#!/bin/bash
 3+
 4+. `dirname $0`/config.sh
 5+
 6+if [ -z $base ]; then
 7+ echo "No base directory"
 8+ exit;
 9+fi
 10+
 11+rm -rf $base/var/checkpoints
 12+mkdir $base/var/checkpoints
 13+rm -rf $base/var/jobs
 14+mkdir $base/var/jobs
 15+rm -rf $base/var/logs
 16+mkdir $base/var/logs
 17+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/start-edition
___________________________________________________________________
Name: svn:eol-style
118 + native
Name: svn:executable
219 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/.htaccess
@@ -0,0 +1 @@
 2+Deny from all
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/.htaccess
___________________________________________________________________
Name: svn:eol-style
13 + native
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/do-edition
@@ -0,0 +1,96 @@
 2+#!/usr/bin/python
 3+import sys, os, socket, signal, time, stat
 4+
 5+base = "/a/static"
 6+scripts = base + "/scripts"
 7+#host = socket.gethostbyname();
 8+host = "localhost"
 9+
 10+if len(sys.argv) < 2:
 11+ print "Usage: do-edition <edition>"
 12+ sys.exit(1)
 13+
 14+edition = sys.argv[1]
 15+
 16+threads = {
 17+ "localhost": 24,
 18+}
 19+
 20+# Create some directories
 21+try: os.makedirs(base + "/var/logs")
 22+except: pass
 23+try: os.makedirs(base + "/var/checkpoints")
 24+except: pass
 25+
 26+# Set up in_progress symlink
 27+try: os.unlink(base+'/downloads/in_progress')
 28+except: pass
 29+os.symlink(base+'/downloads/'+edition, base+'/downloads/in_progress')
 30+
 31+# Start queue server
 32+print "Starting queue server"
 33+queueServer = os.fork()
 34+if 0 == queueServer:
 35+ # Run it in a new group so that its precious finishlang children don't get hurt
 36+ os.close(1)
 37+ os.close(2)
 38+ os.setsid()
 39+ fd = os.open(base+"/var/logs/netqueue.out", os.O_WRONLY|os.O_CREAT|os.O_APPEND, 0666)
 40+ os.dup2(fd, 1)
 41+ os.dup2(fd, 2)
 42+ os.execlp("python", "python", scripts+"/netqueue.py")
 43+ sys.exit(1)
 44+
 45+# Wait for it to start up
 46+queueSock = socket.socket()
 47+while queueSock.connect_ex(("localhost", 8200)):
 48+ time.sleep(0.1)
 49+
 50+
 51+# Start slave threads
 52+slaves = []
 53+for host, number in threads.iteritems():
 54+ for i in range(number):
 55+ print "Starting thread %d on host %s" % (i, host)
 56+ pid = os.fork()
 57+ if pid == 0:
 58+ # Redirect stdout
 59+ os.close(1)
 60+ fd = os.open("%s/var/logs/%s-%d.out" % (base, host, i), os.O_WRONLY|os.O_CREAT|os.O_APPEND, 0666)
 61+ os.dup2(fd, 1)
 62+
 63+ # Redirect stderr
 64+ os.close(2)
 65+ fd = os.open("%s/var/logs/%s-%d.err" % (base, host, i), os.O_WRONLY|os.O_CREAT|os.O_APPEND, 0666)
 66+ os.dup2(fd, 2)
 67+
 68+ if host == "localhost":
 69+ os.execlp("python", "python", scripts+"/queueSlave", host, "8200", base, edition)
 70+ sys.exit(1)
 71+ else:
 72+ os.execlp("ssh", "ssh", host, "python", scripts+"/queueSlave", host, "8200", base, edition)
 73+ sys.exit(1)
 74+ slaves.append(pid)
 75+
 76+# Start controller, wait for it to exit
 77+print "Starting controller"
 78+try:
 79+ status = os.spawnlp(os.P_WAIT, "php", "php", "-n", "queueController.php")
 80+except KeyboardInterrupt:
 81+ status = "interrupted"
 82+
 83+# Kill queue server
 84+os.kill(queueServer, signal.SIGKILL)
 85+
 86+if status == 0:
 87+ print "Controller has exited, all done\n"
 88+
 89+ # Set up current symlink
 90+ try: os.unlink(base+'/downloads/in_progress')
 91+ except: pass
 92+ try: os.unlink(base+'/downloads/current')
 93+ except: pass
 94+ os.symlink(base+'/downloads/'+edition, base+'/downloads/current')
 95+else:
 96+ print "Exited with status: " + str(status)
 97+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/do-edition
___________________________________________________________________
Name: svn:eol-style
198 + native
Name: svn:executable
299 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/start-lang
@@ -0,0 +1,27 @@
 2+#!/bin/bash
 3+
 4+if [ -z $1 ];then
 5+ echo "Usage: start-lang <lang>"
 6+ exit 1
 7+fi
 8+
 9+lang=$1
 10+shift
 11+dest=/mnt/static/wikipedia/$lang-new
 12+
 13+if [ ! -d $dest ];then
 14+ mkdir -p $dest
 15+ #rm -rf /mnt/upload3/wikipedia/$lang/shared
 16+ ln -s /home/wikipedia/htdocs/wikipedia.org/images $dest/images
 17+
 18+ # Upload snapshot disabled, not enough space
 19+ #[ -d /mnt/upload_snapshot/new/$lang ] || mkdir -p /mnt/upload_snapshot/new/$lang
 20+ #ln -s /mnt/upload_snapshot/new/$lang $dest/upload
 21+
 22+ mkdir $dest/upload
 23+ ln -s /mnt/upload3/wikipedia/$lang/* $dest/upload/
 24+ rm -f $dest/upload/shared
 25+ mkdir $dest/upload/shared
 26+ ln -s /home/wikipedia/common/php-1.5/skins $dest/skins
 27+ cp /mnt/static/COPYING.html $dest/COPYING.html
 28+fi
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/start-lang
___________________________________________________________________
Name: svn:eol-style
129 + native
Name: svn:executable
230 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/README
@@ -0,0 +1 @@
 2+This directory contains the job control system used to run DumpHTML on Wikimedia. It is Wikimedia-specific and will require some tweaking to make it work in other environments.
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/README
___________________________________________________________________
Name: svn:eol-style
13 + native
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/compress-html
@@ -0,0 +1,52 @@
 2+#!/bin/bash
 3+
 4+
 5+if [ "X$3" == X ]; then
 6+ echo "Usage: compress-html <language> <edition> <checkpoint>"
 7+ exit
 8+fi
 9+
 10+. `dirname $0`/config.sh
 11+
 12+lang=$1
 13+edition=$2
 14+checkpoint=$3
 15+site=wikipedia
 16+
 17+sitebase=$base/new/$site
 18+dest=$base/downloads/$edition/$lang
 19+p7zip=$base/scripts/7za-readdir-hack
 20+
 21+mkdir -p $dest
 22+
 23+set -e
 24+
 25+echo Finding files...
 26+cd $sitebase
 27+find $lang/ -name \*.html > $dest/html.lst
 28+
 29+find $lang/skins $lang/raw $lang/misc -type f > $dest/skins.lst
 30+echo $lang/dumpHTML.version >> $dest/skins.lst
 31+
 32+[ -e $lang/images ] && find $lang/images -not -type d > $dest/images.lst
 33+
 34+echo Found `wc -l < $dest/html.lst` files
 35+
 36+echo Creating HTML archive...
 37+rm -f $dest/wikipedia-$lang-html.tar.7z
 38+
 39+
 40+# Set chunk size to 8MB for faster random access
 41+#$p7zip -l -ms8m a $dest/wikipedia-$lang-html.7z @$dest/html.lst @$dest/skins.lst
 42+
 43+#fileCount=`wc -l $base/downloads/$edition/$lang/html.lst | awk '{print $1}'`
 44+#if [ $fileCount -gt 2000000 ]; then
 45+# echo "Creating split archives"
 46+# $base/scripts/compress-volumes "$lang" "$edition"
 47+#fi
 48+
 49+
 50+tar -c -T $dest/html.lst -T $dest/skins.lst | $p7zip a $dest/wikipedia-$lang-html.tar.7z -si -bd
 51+
 52+echo "everything=done" > $checkpoint
 53+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/compress-html
___________________________________________________________________
Name: svn:eol-style
154 + native
Name: svn:executable
255 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/config.sh
@@ -0,0 +1 @@
 2+base=/a/static
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/config.sh
___________________________________________________________________
Name: svn:eol-style
13 + native
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/finish
@@ -0,0 +1,11 @@
 2+#!/bin/bash
 3+
 4+if [ "X$2" == "X" ];then
 5+ echo "Usage: finish <edition> <lang>"
 6+ exit
 7+fi
 8+export bindir=/var/static/scripts
 9+export edition=$1
 10+. $bindir/functions
 11+
 12+finishlang $2
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/finish
___________________________________________________________________
Name: svn:eol-style
113 + native
Name: svn:executable
214 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/compress-volumes
@@ -0,0 +1,25 @@
 2+#!/bin/bash
 3+
 4+if [ "asdf$1" == asdf ];then
 5+ echo "Usage: compress-volumes <language>"
 6+ exit
 7+fi
 8+
 9+basedir=/var/zwinger/htdocs/static
 10+htmldir=$basedir/$1
 11+listdir=$basedir/downloads/$1/listfiles
 12+destdir=$basedir/downloads/$1/volumes
 13+
 14+cd $basedir
 15+
 16+for listfile in $listdir/vol* ;do
 17+ vol=`basename $listfile`
 18+ destfile=$destdir/$vol.7z
 19+
 20+ if [ -e $destfile ];then
 21+ echo "$destfile already done"
 22+ else
 23+ echo 7z a $destdir/$vol.7z @$listfile
 24+ 7z a $destdir/$vol.7z @$listfile
 25+ fi
 26+done
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/compress-volumes
___________________________________________________________________
Name: svn:eol-style
127 + native
Name: svn:executable
228 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/thread1
@@ -0,0 +1,57 @@
 2+#!/bin/bash
 3+. $bindir/functions
 4+
 5+for lang in \
 6+aa \
 7+ab \
 8+af \
 9+ak \
 10+als \
 11+am \
 12+ang \
 13+an \
 14+arc \
 15+ar \
 16+ast \
 17+as \
 18+av \
 19+ay \
 20+az \
 21+ba \
 22+be \
 23+bg \
 24+bh \
 25+bi \
 26+bm \
 27+bn \
 28+bo \
 29+br \
 30+bs \
 31+ca \
 32+ceb \
 33+ce \
 34+cho \
 35+chr \
 36+ch \
 37+chy \
 38+co \
 39+cr \
 40+csb \
 41+cs \
 42+cv \
 43+cy \
 44+da \
 45+de \
 46+dk \
 47+dv \
 48+dz \
 49+ee \
 50+el
 51+do
 52+ dolang $lang
 53+ finishlang $lang
 54+done
 55+
 56+dolang en -e 694697
 57+
 58+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/thread1
___________________________________________________________________
Name: svn:eol-style
159 + native
Name: svn:executable
260 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/thread2
@@ -0,0 +1,6 @@
 2+#!/bin/bash
 3+. $bindir/functions
 4+
 5+dolang en -s 694697 -e 2918581
 6+
 7+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/thread2
___________________________________________________________________
Name: svn:eol-style
18 + native
Name: svn:executable
29 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/thread3
@@ -0,0 +1,54 @@
 2+#!/bin/bash
 3+. $bindir/functions
 4+
 5+dolang en -s 2918581
 6+
 7+for lang in \
 8+eo \
 9+es \
 10+et \
 11+eu \
 12+fa \
 13+ff \
 14+fiu-vro \
 15+'fi' \
 16+fj \
 17+fo \
 18+fr \
 19+fur \
 20+fy \
 21+ga \
 22+gd \
 23+gl \
 24+gn \
 25+got \
 26+gu \
 27+gv \
 28+ha \
 29+haw \
 30+he \
 31+hi \
 32+ho \
 33+hr \
 34+ht \
 35+hu \
 36+hy \
 37+hz \
 38+ia \
 39+id \
 40+ie \
 41+ig \
 42+ii \
 43+ik \
 44+ilo \
 45+io \
 46+is \
 47+it \
 48+iu
 49+do
 50+ dolang $lang
 51+ finishlang $lang
 52+done
 53+
 54+dolang ja -e 323460
 55+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/thread3
___________________________________________________________________
Name: svn:eol-style
156 + native
Name: svn:executable
257 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/ar-fixup
@@ -0,0 +1,18 @@
 2+#!/bin/bash
 3+
 4+if [ "X$1" == X ];then
 5+ echo "Usage: ar-fixup <edition>"
 6+ exit
 7+fi
 8+
 9+cd /var/static
 10+dest=/var/static/downloads/$edition/ar
 11+if ! grep -q ar/upload/2/26/Arabisc1.png $dest/skins.lst;then
 12+ echo ar/upload/2/26/Arabisc1.png >> $dest/skins.lst
 13+fi
 14+sed -i~ 's!http://upload\.wikimedia\.org/wikipedia/ar/2/26/Arabisc1\.png!../upload/2/26/Arabisc1.png!' ar/raw/ميدياويكي~Monobook.css
 15+
 16+rm -f $dest/wikipedia-ar-html.7z
 17+echo Compressing...
 18+7z -l a $dest/wikipedia-ar-html.7z @$dest/html.lst @$dest/skins.lst > /dev/null
 19+echo Done
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/ar-fixup
___________________________________________________________________
Name: svn:eol-style
120 + native
Name: svn:executable
221 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/thread4
@@ -0,0 +1,134 @@
 2+#!/bin/bash
 3+. $bindir/functions
 4+
 5+dolang ja -s 323460
 6+
 7+for lang in \
 8+jbo \
 9+jv \
 10+ka \
 11+kg \
 12+ki \
 13+kj \
 14+kk \
 15+kl \
 16+km \
 17+kn \
 18+ko \
 19+kr \
 20+ks \
 21+ku \
 22+kv \
 23+kw \
 24+ky \
 25+lad \
 26+la \
 27+lb \
 28+lg \
 29+li \
 30+ln \
 31+lo \
 32+lt \
 33+lv \
 34+mg \
 35+mh \
 36+mi \
 37+mk \
 38+ml \
 39+mn \
 40+mo \
 41+mr \
 42+ms \
 43+mt \
 44+mus \
 45+my \
 46+nah \
 47+nap \
 48+na \
 49+nds \
 50+ne \
 51+ng \
 52+nl \
 53+nn \
 54+no \
 55+nv \
 56+ny \
 57+oc \
 58+om \
 59+or \
 60+os \
 61+pam \
 62+pa \
 63+pi \
 64+pl \
 65+ps \
 66+pt \
 67+qu \
 68+rm \
 69+rn \
 70+roa-rup \
 71+ro \
 72+ru \
 73+rw \
 74+sa \
 75+scn \
 76+sco \
 77+sc \
 78+sd \
 79+se \
 80+sg \
 81+sh \
 82+simple \
 83+si \
 84+sk \
 85+sl \
 86+sm \
 87+sn \
 88+so \
 89+sq \
 90+sr \
 91+ss \
 92+st \
 93+su \
 94+sv \
 95+sw \
 96+ta \
 97+te \
 98+tg \
 99+th \
 100+ti \
 101+tk \
 102+tlh \
 103+tl \
 104+tn \
 105+to \
 106+tpi \
 107+tr \
 108+ts \
 109+tt \
 110+tum \
 111+tw \
 112+ty \
 113+ug \
 114+uk \
 115+ur \
 116+uz \
 117+ve \
 118+vi \
 119+vo \
 120+war \
 121+wa \
 122+wo \
 123+xh \
 124+yi \
 125+yo \
 126+za \
 127+zh-min-nan \
 128+zh \
 129+zu
 130+do
 131+ dolang $lang
 132+ finishlang $lang
 133+done
 134+
 135+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/thread4
___________________________________________________________________
Name: svn:eol-style
1136 + native
Name: svn:executable
2137 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/index++
@@ -0,0 +1,9 @@
 2+#!/usr/bin/perl
 3+
 4+for ($i=24; $i>=0; $i--) {
 5+ $oldname = sprintf("vol%02d", $i);
 6+ $newname = sprintf("vol%02d", $i+1);
 7+ rename($oldname, $newname) or die "Error moving file $oldname to $newname";
 8+
 9+}
 10+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/index++
___________________________________________________________________
Name: svn:eol-style
111 + native
Name: svn:executable
212 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/dumpHTML.sh
@@ -0,0 +1,19 @@
 2+#!/bin/bash
 3+
 4+lang=$1
 5+shift
 6+dest=/var/static/$lang-new
 7+
 8+if [ ! -d $dest ];then
 9+ rm -rf /mnt/upload3/wikipedia/$lang/shared
 10+ mkdir $dest
 11+ ln -s /home/wikipedia/htdocs/wikipedia.org/images $dest/images
 12+ ln -s /mnt/wikipedia/htdocs/wikipedia.org/upload/$lang $dest/upload
 13+ ln -s /home/wikipedia/common/php-1.5/skins $dest/skins
 14+ cp /var/static/COPYING.html $dest/COPYING.html
 15+fi
 16+
 17+cd /home/wikipedia/common/php-1.5/maintenance
 18+#php dumpHTML.php $lang'wiki' --interlang --force-copy -d $dest "$@"
 19+php dumpHTML.php $lang'wiki' --interlang -d $dest "$@"
 20+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/dumpHTML.sh
___________________________________________________________________
Name: svn:eol-style
121 + native
Name: svn:executable
222 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/compress-volumes2
@@ -0,0 +1,25 @@
 2+#!/bin/bash
 3+
 4+if [ "asdf$1" == asdf ];then
 5+ echo "Usage: compress-volumes <language>"
 6+ exit
 7+fi
 8+
 9+basedir=/mnt/static
 10+htmldir=$basedir/$1
 11+destdir=$basedir/downloads/$1/volumes
 12+shift
 13+
 14+cd $basedir
 15+
 16+for listfile in "$@" ;do
 17+ vol=`basename $listfile`
 18+ destfile=$destdir/$vol.7z
 19+
 20+ if [ -e $destfile ];then
 21+ echo "$destfile already done"
 22+ else
 23+ echo 7z a $destdir/$vol.7z @$listfile
 24+ 7z a $destdir/$vol.7z @$listfile
 25+ fi
 26+done
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/compress-volumes2
___________________________________________________________________
Name: svn:eol-style
127 + native
Name: svn:executable
228 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/queueSlave.php
@@ -0,0 +1,47 @@
 2+<?php
 3+
 4+$queueHost = $argv[1];
 5+$queuePort = $argv[2];
 6+$baseDir = $argv[3];
 7+
 8+$queueSock = fsockopen( $queueHost, $queuePort );
 9+if ( !$queueSock ) {
 10+ echo "Unable to connect to queue server\n";
 11+ die( 1 );
 12+}
 13+
 14+chdir( "/home/wikipedia/common/php-1.5/maintenance" );
 15+$waiting = false;
 16+while ( 1 ) {
 17+ if ( !fwrite( $queueSock, "deq\n" ) ) {
 18+ echo "Unable to write to queue server\n";
 19+ die( 1 );
 20+ }
 21+ $s = fgets( $queueSock );
 22+ if ( $s === false ) {
 23+ echo "Unable to read from queue server\n";
 24+ die( 1 );
 25+ }
 26+ if ( preg_match( '!^data ([a-z_-]+) (\d+/\d+)!', $s, $m ) ) {
 27+ $waiting = false;
 28+ $wiki = $m[1];
 29+ $slice = $m[2];
 30+ echo "-------------------------------------------------------------------\n";
 31+ echo "$wiki $slice\n";
 32+ echo "-------------------------------------------------------------------\n";
 33+ $checkpoint = "$baseDir/checkpoints/{$wiki}_" . str_replace( '/', '_', $slice );
 34+ $lang = str_replace( 'wiki', '', $wiki );
 35+ $dest = "$baseDir/$lang-new";
 36+
 37+ passthru( "php -n dumpHTML.php $wiki --force-copy --image-snapshot --interlang -d $dest --slice $slice --checkpoint $checkpoint" );
 38+ } else {
 39+ # Wait for jobs
 40+ if ( !$waiting ) {
 41+ print "Waiting...\n";
 42+ $waiting = true;
 43+ }
 44+ sleep( 5 );
 45+ }
 46+}
 47+
 48+?>
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/queueSlave.php
___________________________________________________________________
Name: svn:eol-style
149 + native
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/throttle
@@ -0,0 +1,47 @@
 2+#!/usr/bin/perl -w
 3+
 4+if ($#ARGV != 0) {
 5+ print "Usage: throttle <pid>\n";
 6+ exit 1;
 7+}
 8+
 9+$pid = $ARGV[0];
 10+$threshold = 1800;
 11+
 12+# Get status
 13+open STAT, "/proc/$pid/stat" or die "No such process $pid\n";
 14+@bits = split(/ /, <STAT>);
 15+$state = $bits[2];
 16+close STAT;
 17+
 18+print "Found process, state=$state";
 19+
 20+if ($state eq 'T') {
 21+ print " (not running)\n";
 22+ $running = 0;
 23+} else {
 24+ print " (running)\n";
 25+ $running = 1;
 26+}
 27+
 28+# Monitor albert's NFS traffic stats and continue when the calls per second drops below $threshold
 29+$alive = 1;
 30+while ($alive) {
 31+ $traffic = `ganglia-fetch -h albert -p 8662 albert.wikimedia.org nfs_server_calls`;
 32+ chomp($traffic);
 33+
 34+ if ($running) {
 35+ if ($traffic > $threshold) {
 36+ print "nfs_server_calls = $traffic, stopping\n";
 37+ $alive = kill SIGSTOP, $pid;
 38+ $running = 0;
 39+ }
 40+ } else {
 41+ if ($traffic < $threshold) {
 42+ print "nfs_server_calls = $traffic, starting\n";
 43+ $alive = kill SIGCONT, $pid;
 44+ $running = 1;
 45+ }
 46+ }
 47+ sleep 10;
 48+}
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/throttle
___________________________________________________________________
Name: svn:eol-style
149 + native
Name: svn:executable
250 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/functions
@@ -0,0 +1,21 @@
 2+#!/bin/bash
 3+
 4+dolang() {
 5+ echo -------------------------------------------------
 6+ echo $1
 7+ echo -------------------------------------------------
 8+ cd /home/wikipedia/common/php-1.5/maintenance
 9+ $bindir/dumpHTML.sh "$@"
 10+}
 11+
 12+finishlang() {
 13+ if [ -d /mnt/static/$1 ];then
 14+ mv /mnt/static/$1 /mnt/static/$1-old
 15+ fi
 16+ mv /mnt/static/$1-new /mnt/static/$1
 17+
 18+ echo "Compressing $1..."
 19+ $bindir/compress-html $1 $edition 2>&1 >/dev/null
 20+ echo "Done."
 21+}
 22+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/functions
___________________________________________________________________
Name: svn:eol-style
123 + native
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/copy-en
@@ -0,0 +1,25 @@
 2+#!/usr/bin/perl -w
 3+use File::Copy;
 4+use File::Path;
 5+use File::Basename;
 6+
 7+while (<>) {
 8+ chomp($_);
 9+ $source = "/mnt/wikipedia/htdocs/static/$_";
 10+ $dest = "/var/static/$_";
 11+ if (!-e $dest) {
 12+ if (!-d dirname($dest)) {
 13+ mkpath(dirname($dest));
 14+ }
 15+ print "$_ ";
 16+ $result = copy($source, $dest);
 17+ if ( $result ) {
 18+ print "OK\n";
 19+ } else {
 20+ print "failed\n";
 21+ }
 22+ sleep 0.5
 23+ } else {
 24+ print "$_ already copied\n";
 25+ }
 26+}
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/copy-en
___________________________________________________________________
Name: svn:eol-style
127 + native
Name: svn:executable
228 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/filterNamespaces.php
@@ -0,0 +1,35 @@
 2+<?php
 3+
 4+$oldDir = getcwd();
 5+require_once( '/home/wikipedia/common/php/maintenance/commandLine.inc' );
 6+chdir( $oldDir );
 7+
 8+$filename = $args[0];
 9+if ( !$filename ) {
 10+ die("No filename specified\n");
 11+}
 12+
 13+$lines = file( $filename );
 14+if ( !$lines ) {
 15+ die( "Unable to open file $filename\n" );
 16+}
 17+
 18+foreach ( $lines as $line ) {
 19+ $base = basename( trim( $line ) );
 20+ $tildePos = strpos( $base, '~' );
 21+ $printIt = true;
 22+ if ( $tildePos !== false ) {
 23+ $ns = substr( $base, 0, $tildePos );
 24+ $nsi = $wgLang->getNsIndex( $ns );
 25+ if ( $nsi !== false ) {
 26+ if ( !in_array( $nsi, array( NS_IMAGE, NS_PROJECT, NS_HELP, NS_CATEGORY ) ) ) {
 27+ $printIt = false;
 28+ }
 29+ }
 30+ }
 31+ if ( $printIt ) {
 32+ print $line;
 33+ }
 34+}
 35+
 36+?>
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/filterNamespaces.php
___________________________________________________________________
Name: svn:eol-style
137 + native
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/do4thread
@@ -0,0 +1,16 @@
 2+#!/bin/bash
 3+
 4+if [ "X$1" == "X" ];then
 5+ echo "Usage: do4thread <edition>"
 6+ exit
 7+fi
 8+export bindir=/var/static/scripts
 9+export edition=$1
 10+
 11+
 12+cd /home/wikipedia/common/php-1.5/maintenance
 13+$bindir/thread1 $1 >> /var/static/thread1.log 2>&1 &
 14+$bindir/thread2 $1 >> /var/static/thread2.log 2>&1 &
 15+$bindir/thread3 $1 >> /var/static/thread3.log 2>&1 &
 16+$bindir/thread4 $1 >> /var/static/thread4.log 2>&1 &
 17+
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/old/do4thread
___________________________________________________________________
Name: svn:eol-style
118 + native
Name: svn:executable
219 + *
Index: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/queueController.php
@@ -0,0 +1,236 @@
 2+<?php
 3+
 4+$baseDir = '/a/static';
 5+
 6+$wgNoDBParam = true;
 7+require_once( '/home/wikipedia/common/php/maintenance/commandLine.inc' );
 8+
 9+$wikiList = array_map( 'trim', file( '/home/wikipedia/common/wikipedia.dblist' ) );
 10+$private = array_map( 'trim', file( '/home/wikipedia/common/private.dblist' ) );
 11+$closed = array_map( 'trim', file( '/home/wikipedia/common/closed.dblist' ) );
 12+$wikiList = array_diff( $wikiList, $private, $closed );
 13+
 14+$targetQueueSize = 20;
 15+$maxArticlesPerJob = 10000;
 16+$jobTimeout = 86400;
 17+
 18+$queueSock = fsockopen( 'localhost', 8200 );
 19+if ( !$queueSock ) {
 20+ echo "Unable to connect to queue server\n";
 21+ die(1);
 22+}
 23+
 24+# Flush the queue
 25+fwrite( $queueSock, "clear\n" );
 26+fgets( $queueSock );
 27+
 28+# Fetch wiki stats
 29+$wikiSizes = @file_get_contents( "$baseDir/var/checkpoints/wikiSizes" );
 30+if ( $wikiSizes ) {
 31+ $wikiSizes = unserialize( $wikiSizes );
 32+} else {
 33+ $wikiSizes = array();
 34+ foreach ( $wikiList as $wiki ) {
 35+ $lb = wfGetLB( $wiki );
 36+ $db = $lb->getConnection( DB_SLAVE, array(), $wiki );
 37+ $wikiSizes[$wiki] = $db->selectField( "`$wiki`.site_stats", 'ss_total_pages' );
 38+ $lb->reuseConnection( $db );
 39+ }
 40+ file_put_contents( "$baseDir/var/checkpoints/wikiSizes", serialize( $wikiSizes ) );
 41+}
 42+
 43+# Update the cached wikiSizes as per the current dblists
 44+foreach ( $wikiSizes as $wiki => $size ) {
 45+ if ( !in_array( $wiki, $wikiList ) ) {
 46+ unset( $wikiSizes[$wiki] );
 47+ }
 48+}
 49+
 50+# Compute job array
 51+$jobs = array();
 52+$gates = array(
 53+ 'everything' => count( $wikiSizes ),
 54+);
 55+
 56+foreach ( $wikiSizes as $wiki => $size ) {
 57+ # Article jobs
 58+ $numJobs = intval( ceil( $size / $maxArticlesPerJob ) );
 59+ $jobsRemainingPerWiki[$wiki] = $numJobs;
 60+ $trigger = "$wiki articles";
 61+ $gates[$trigger] = $numJobs;
 62+
 63+ for ( $i = 1; $i <= $numJobs; $i++ ) {
 64+ $jobID = count( $jobs );
 65+ $jobs[] = array(
 66+ 'id' => $jobID,
 67+ 'cmd' => "$jobID $wiki articles $i/$numJobs",
 68+ 'wiki'=> $wiki,
 69+ 'trigger' => $trigger
 70+ );
 71+ }
 72+
 73+ # Shared description page jobs
 74+ $numSharedJobs = min( $numJobs, 256 );
 75+ $trigger = "$wiki shared";
 76+ $gates[$trigger] = $numSharedJobs;
 77+
 78+ for ( $i = 1; $i <= $numSharedJobs; $i++ ) {
 79+ $jobID = count( $jobs );
 80+ $jobs[] = array(
 81+ 'id' => $jobID,
 82+ 'gate' => "$wiki articles",
 83+ 'cmd' => "$jobID $wiki shared $i/$numSharedJobs",
 84+ 'wiki' => $wiki,
 85+ 'trigger' => $trigger
 86+ );
 87+ }
 88+
 89+ # Compression job
 90+ $jobID = count( $jobs );
 91+ $jobs[] = array(
 92+ 'id' => $jobID,
 93+ 'gate' => "$wiki shared",
 94+ 'cmd' => "$jobID $wiki finish 1/1",
 95+ 'wiki' => $wiki,
 96+ 'trigger' => 'everything',
 97+ );
 98+}
 99+
 100+# Write job list
 101+if ( !is_dir( "$baseDir/var/jobs" ) ) {
 102+ mkdir( "$baseDir/var/jobs", true );
 103+}
 104+$file = fopen( "$baseDir/var/jobs/list", 'w' );
 105+if ( !$file ) {
 106+ print "Unable to open $baseDir/var/jobs/list for writing\n";
 107+ exit( 1 );
 108+}
 109+foreach ( $jobs as $job ) {
 110+ fwrite( $file, $job['cmd']."\n" );
 111+}
 112+fclose( $file );
 113+
 114+$doneCount = 0;
 115+$start = 0;
 116+$queued = 0;
 117+$jobCount = count( $jobs );
 118+$queueTimes = array();
 119+$initialisedWikis = array();
 120+
 121+print "$jobCount jobs to do\n";
 122+
 123+while ( $gates['everything'] ) {
 124+ for ( $i = $start; $i < $jobCount && getQueueSize() < $targetQueueSize; $i++ ) {
 125+ if ( !isset( $jobs[$i] ) ) {
 126+ # Already done and removed
 127+ continue;
 128+ }
 129+ $job = $jobs[$i];
 130+
 131+ if ( isset( $job['gate'] ) && $gates[$job['gate']] ) {
 132+ # Job is waiting for a gate
 133+ continue;
 134+ }
 135+
 136+ $queueing = false;
 137+ if ( isDone( $job ) ) {
 138+ $doneCount++;
 139+ print "Job $i done: {$job['cmd']} ($doneCount of $jobCount)\n";
 140+
 141+ # Handle any triggers for this job
 142+ if ( isset( $job['trigger'] ) && $gates[$job['trigger']] ) {
 143+ --$gates[$job['trigger']];
 144+ }
 145+ # Remove the job from the job list
 146+ unset( $jobs[$i] );
 147+ # Advance the start pointer
 148+ while ( !isset( $jobs[$start] ) && $start < $jobCount ) {
 149+ $start++;
 150+ }
 151+ } elseif ( !isset( $queueTimes[$i] ) ) {
 152+ print "Queueing job $i: {$job['cmd']}\n";
 153+ $queueing = true;
 154+ } elseif ( time() > $queueTimes[$i] + $jobTimeout ) {
 155+ print "Timeout, requeueing job $i: {$job['cmd']}\n";
 156+ $queueing = true;
 157+ } elseif ( isTerminated( $job ) ) {
 158+ print "Job $i died, requeueing: {$job['cmd']}\n";
 159+ removeJobStatus( $job );
 160+ $queueing = true;
 161+ } else {
 162+ $queueing = false;
 163+ }
 164+ if ( $queueing ) {
 165+ $wiki = $job['wiki'];
 166+ if ( !isset( $initialisedWikis[$wiki] ) ) {
 167+ startWiki( $wiki );
 168+ $initialisedWikis[$wiki] = true;
 169+ }
 170+ enqueue( $job );
 171+ $queueTimes[$i] = time();
 172+ }
 173+ }
 174+ sleep(10);
 175+}
 176+
 177+//------------------------------------------------------------
 178+
 179+function getQueueSize() {
 180+ global $queueSock;
 181+ if ( fwrite( $queueSock, "size\n" ) === false ) {
 182+ die( "Unable to write to queue server\n" );
 183+ }
 184+
 185+ $response = fgets( $queueSock );
 186+ if ( $response === false ) {
 187+ die( "Unable to read from queue server\n" );
 188+ }
 189+ if ( !preg_match( "/^size (\d*)/", $response, $m ) ) {
 190+ die( "Invalid response to size request\n" );
 191+ }
 192+ return $m[1];
 193+}
 194+
 195+function getJobStatus( $job ) {
 196+ global $baseDir;
 197+ $jobStatusFile = "$baseDir/var/jobs/{$job['id']}";
 198+ $lines = @file( $jobStatusFile );
 199+
 200+ if ( !isset( $lines[1] ) ) {
 201+ return false;
 202+ } else {
 203+ return trim( $lines[1] );
 204+ }
 205+}
 206+
 207+function removeJobStatus( $job ) {
 208+ global $baseDir;
 209+ $jobStatusFile = "$baseDir/var/jobs/{$job['id']}";
 210+ @unlink( $jobStatusFile );
 211+}
 212+
 213+function isDone( $job ) {
 214+ return getJobStatus( $job ) == 'done';
 215+}
 216+
 217+function isTerminated( $job ) {
 218+ return getJobStatus( $job ) == 'terminated';
 219+}
 220+
 221+function enqueue( $job ) {
 222+ global $queueSock;
 223+ if ( false === fwrite( $queueSock, "enq {$job['cmd']}\n" ) ) {
 224+ die( "Unable to write to queue server\n" );
 225+ }
 226+
 227+ # Read and throw away response
 228+ $response = fgets( $queueSock );
 229+}
 230+
 231+function startWiki( $wiki ) {
 232+ global $baseDir;
 233+ $lang = str_replace( 'wiki', '', $wiki );
 234+ print "Starting language $lang\n";
 235+}
 236+
 237+?>
Property changes on: branches/wmf-deployment/extensions/DumpHTML/wm-scripts/queueController.php
___________________________________________________________________
Name: svn:eol-style
1238 + native
Index: branches/wmf-deployment/extensions/DumpHTML/README
@@ -0,0 +1 @@
 2+Work in progress on a replacement for maintenance/dumpHTML.php, to work with MW 1.11+.
Property changes on: branches/wmf-deployment/extensions/DumpHTML/README
___________________________________________________________________
Name: svn:eol-style
13 + native
Index: branches/wmf-deployment/extensions/DumpHTML/SkinOffline.php
@@ -0,0 +1,244 @@
 2+<?php
 3+
 4+/**
 5+ * Default skin for HTML dumps, based on MonoBook.php
 6+ */
 7+
 8+if( !defined( 'MEDIAWIKI' ) )
 9+ die( 1 );
 10+
 11+/**
 12+ * Inherit main code from SkinTemplate, set the CSS and template filter.
 13+ * @todo document
 14+ * @addtogroup Skins
 15+ */
 16+class SkinOffline extends SkinTemplate {
 17+ /** Using monobook. */
 18+ function initPage( &$out ) {
 19+ global $wgStylePath;
 20+ SkinTemplate::initPage( $out );
 21+ $this->template = 'SkinOfflineTemplate';
 22+ $this->skinpath = "$wgStylePath/offline";
 23+ }
 24+
 25+ function setupTemplate( $className, $repository = false, $cache_dir = false ) {
 26+ global $wgFavicon;
 27+ $tpl = parent::setupTemplate( $className, $repository, $cache_dir );
 28+ $tpl->set( 'skinpath', $this->skinpath );
 29+ $tpl->set( 'favicon', $wgFavicon );
 30+ return $tpl;
 31+ }
 32+
 33+ function buildSidebar() {
 34+ $sections = parent::buildSidebar();
 35+ $badMessages = array( 'recentchanges-url', 'randompage-url' );
 36+ $badUrls = array();
 37+ foreach ( $badMessages as $msg ) {
 38+ $badUrls[] = self::makeInternalOrExternalUrl( wfMsgForContent( $msg ) );
 39+ }
 40+
 41+ foreach ( $sections as $heading => $section ) {
 42+ foreach ( $section as $index => $link ) {
 43+ if ( in_array( $link['href'], $badUrls ) ) {
 44+ unset( $sections[$heading][$index] );
 45+ }
 46+ }
 47+ }
 48+ return $sections;
 49+ }
 50+
 51+ function buildContentActionUrls() {
 52+ global $wgHTMLDump;
 53+
 54+ $content_actions = array();
 55+ $nskey = $this->getNameSpaceKey();
 56+ $content_actions[$nskey] = $this->tabAction(
 57+ $this->mTitle->getSubjectPage(),
 58+ $nskey,
 59+ !$this->mTitle->isTalkPage() );
 60+
 61+ $content_actions['talk'] = $this->tabAction(
 62+ $this->mTitle->getTalkPage(),
 63+ 'talk',
 64+ $this->mTitle->isTalkPage(),
 65+ '',
 66+ true);
 67+
 68+ if ( isset( $wgHTMLDump ) ) {
 69+ $content_actions['current'] = array(
 70+ 'text' => wfMsg( 'currentrev' ),
 71+ 'href' => str_replace( '$1', wfUrlencode( $this->mTitle->getPrefixedDBkey() ),
 72+ $wgHTMLDump->oldArticlePath ),
 73+ 'class' => false
 74+ );
 75+ }
 76+ return $content_actions;
 77+ }
 78+
 79+ function makeBrokenLinkObj( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
 80+ if ( !isset( $nt ) ) {
 81+ return "<!-- ERROR -->{$prefix}{$text}{$trail}";
 82+ }
 83+
 84+ if ( $nt->getNamespace() == NS_CATEGORY ) {
 85+ # Determine if the category has any articles in it
 86+ $dbr = wfGetDB( DB_SLAVE );
 87+ $hasMembers = $dbr->selectField( 'categorylinks', '1',
 88+ array( 'cl_to' => $nt->getDBkey() ), __METHOD__ );
 89+ if ( $hasMembers ) {
 90+ return $this->makeKnownLinkObj( $nt, $text, $query, $trail, $prefix );
 91+ }
 92+ }
 93+
 94+ if ( $text == '' ) {
 95+ $text = $nt->getPrefixedText();
 96+ }
 97+ return $prefix . $text . $trail;
 98+ }
 99+
 100+ function printSource() {
 101+ return '';
 102+ }
 103+}
 104+
 105+/**
 106+ * @todo document
 107+ * @addtogroup Skins
 108+ */
 109+class SkinOfflineTemplate extends QuickTemplate {
 110+ /**
 111+ * Template filter callback for MonoBook skin.
 112+ * Takes an associative array of data set from a SkinTemplate-based
 113+ * class, and a wrapper for MediaWiki's localization database, and
 114+ * outputs a formatted page.
 115+ *
 116+ * @private
 117+ */
 118+ function execute() {
 119+ wfSuppressWarnings();
 120+?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 121+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="<?php $this->text('lang') ?>" lang="<?php $this->text('lang') ?>" dir="<?php $this->text('dir') ?>">
 122+ <head>
 123+ <meta http-equiv="Content-Type" content="<?php $this->text('mimetype') ?>; charset=<?php $this->text('charset') ?>" />
 124+ <!-- headlinks removed -->
 125+ <link rel="shortcut icon" href="<?php $this->text('favicon'); ?>"/>
 126+ <title><?php $this->text('pagetitle') ?></title>
 127+ <style type="text/css">/*<![CDATA[*/ @import "<?php $this->text('skinpath') ?>/main.css"; /*]]>*/</style>
 128+ <link rel="stylesheet" type="text/css" media="print" href="<?php $this->text('stylepath') ?>/common/commonPrint.css" />
 129+ <!--[if lt IE 5.5000]><style type="text/css">@import "<?php $this->text('stylepath') ?>/<?php $this->text('stylename') ?>/IE50Fixes.css";</style><![endif]-->
 130+ <!--[if IE 5.5000]><style type="text/css">@import "<?php $this->text('stylepath') ?>/<?php $this->text('stylename') ?>/IE55Fixes.css";</style><![endif]-->
 131+ <!--[if IE 6]><style type="text/css">@import "<?php $this->text('stylepath') ?>/<?php $this->text('stylename') ?>/IE60Fixes.css";</style><![endif]-->
 132+ <!--[if IE]><script type="<?php $this->text('jsmimetype') ?>" src="<?php $this->text('stylepath') ?>/common/IEFixes.js"></script>
 133+ <meta http-equiv="imagetoolbar" content="no" /><![endif]-->
 134+ <script type="<?php $this->text('jsmimetype') ?>" src="<?php $this->text('stylepath' ) ?>/common/wikibits.js"></script>
 135+ <script type="<?php $this->text('jsmimetype') ?>" src="<?php $this->text('skinpath' ) ?>/md5.js"></script>
 136+ <script type="<?php $this->text('jsmimetype') ?>" src="<?php $this->text('skinpath' ) ?>/utf8.js"></script>
 137+ <script type="<?php $this->text('jsmimetype') ?>" src="<?php $this->text('skinpath' ) ?>/lookup.js"></script>
 138+ <?php if($this->data['jsvarurl' ]) { ?><script type="<?php $this->text('jsmimetype') ?>" src="<?php $this->text('jsvarurl' ) ?>"></script><?php } ?>
 139+ <?php if($this->data['pagecss' ]) { ?><style type="text/css"><?php $this->html('pagecss' ) ?></style><?php } ?>
 140+ <?php if($this->data['usercss' ]) { ?><style type="text/css"><?php $this->html('usercss' ) ?></style><?php } ?>
 141+ <?php if($this->data['userjs' ]) { ?><script type="<?php $this->text('jsmimetype') ?>" src="<?php $this->text('userjs' ) ?>"></script><?php } ?>
 142+ <?php if($this->data['userjsprev']) { ?><script type="<?php $this->text('jsmimetype') ?>"><?php $this->html('userjsprev') ?></script><?php } ?>
 143+ </head>
 144+ <body
 145+ <?php if($this->data['pageclass']) { ?>class="<?php $this->text('pageclass') ?>"<?php } ?>>
 146+ <div id="globalWrapper">
 147+ <div id="column-content">
 148+ <div id="content">
 149+ <a name="top" id="contentTop"></a>
 150+ <?php if($this->data['sitenotice']) { ?><div id="siteNotice"><?php $this->html('sitenotice') ?></div><?php } ?>
 151+ <h1 class="firstHeading"><?php $this->data['displaytitle']!=""?$this->html('title'):$this->text('title') ?></h1>
 152+ <div id="bodyContent">
 153+ <h3 id="siteSub"><?php $this->msg('tagline') ?></h3>
 154+ <div id="contentSub"><?php $this->html('subtitle') ?></div>
 155+ <?php if($this->data['undelete']) { ?><div id="contentSub"><?php $this->html('undelete') ?></div><?php } ?>
 156+ <?php if($this->data['newtalk'] ) { ?><div class="usermessage"><?php $this->html('newtalk') ?></div><?php } ?>
 157+ <!-- start content -->
 158+ <?php $this->html('bodytext') ?>
 159+ <?php if($this->data['catlinks']) { ?><div id="catlinks"><?php $this->html('catlinks') ?></div><?php } ?>
 160+ <!-- end content -->
 161+ <div class="visualClear"></div>
 162+ </div>
 163+ </div>
 164+ </div>
 165+ <div id="column-one">
 166+ <div id="p-cactions" class="portlet">
 167+ <h5>Views</h5>
 168+ <ul>
 169+ <?php foreach($this->data['content_actions'] as $key => $action) {
 170+ ?><li id="ca-<?php echo htmlspecialchars($key) ?>"
 171+ <?php if($action['class']) { ?>class="<?php echo htmlspecialchars($action['class']) ?>"<?php } ?>
 172+ ><a href="<?php echo htmlspecialchars($action['href']) ?>"><?php
 173+ echo htmlspecialchars($action['text']) ?></a></li><?php
 174+ } ?>
 175+ </ul>
 176+ </div>
 177+ <div class="portlet" id="p-logo">
 178+ <a style="background-image: url(<?php $this->text('logopath') ?>);"
 179+ href="<?php echo htmlspecialchars($this->data['nav_urls']['mainpage']['href'])?>"
 180+ title="<?php $this->msg('mainpage') ?>"></a>
 181+ </div>
 182+ <script type="<?php $this->text('jsmimetype') ?>"> if (window.isMSIE55) fixalpha(); </script>
 183+ <?php foreach ($this->data['sidebar'] as $bar => $cont) { ?>
 184+ <div class='portlet' id='p-<?php echo htmlspecialchars($bar) ?>'>
 185+ <h5><?php $out = wfMsg( $bar ); if (wfEmptyMsg($bar, $out)) echo $bar; else echo $out; ?></h5>
 186+ <div class='pBody'>
 187+ <ul>
 188+ <?php foreach($cont as $key => $val) { ?>
 189+ <li id="<?php echo htmlspecialchars($val['id']) ?>"><a href="<?php echo htmlspecialchars($val['href']) ?>"><?php echo htmlspecialchars($val['text'])?></a></li>
 190+ <?php } ?>
 191+ </ul>
 192+ </div>
 193+ </div>
 194+ <?php } ?>
 195+ <div id="p-search" class="portlet">
 196+ <h5><label for="searchInput"><?php $this->msg('search') ?></label></h5>
 197+ <div id="searchBody" class="pBody">
 198+ <form action="javascript:goToStatic(3)" id="searchform"><div>
 199+ <input id="searchInput" name="search" type="text"
 200+ <?php if($this->haveMsg('accesskey-search')) {
 201+ ?>accesskey="<?php $this->msg('accesskey-search') ?>"<?php }
 202+ if( isset( $this->data['search'] ) ) {
 203+ ?> value="<?php $this->text('search') ?>"<?php } ?> />
 204+ <input type='submit' name="go" class="searchButton" id="searchGoButton"
 205+ value="<?php $this->msg('go') ?>" />
 206+ </div></form>
 207+ </div>
 208+ </div>
 209+ <?php if( $this->data['language_urls'] ) { ?><div id="p-lang" class="portlet">
 210+ <h5><?php $this->msg('otherlanguages') ?></h5>
 211+ <div class="pBody">
 212+ <ul>
 213+ <?php foreach($this->data['language_urls'] as $langlink) { ?>
 214+ <li>
 215+ <a href="<?php echo htmlspecialchars($langlink['href'])
 216+ ?>"><?php echo $langlink['text'] ?></a>
 217+ </li>
 218+ <?php } ?>
 219+ </ul>
 220+ </div>
 221+ </div>
 222+ <?php } ?>
 223+ </div><!-- end of the left (by default at least) column -->
 224+ <div class="visualClear"></div>
 225+ <div id="footer">
 226+ <?php if($this->data['poweredbyico']) { ?><div id="f-poweredbyico"><?php $this->html('poweredbyico') ?></div><?php } ?>
 227+ <?php if($this->data['copyrightico']) { ?><div id="f-copyrightico"><?php $this->html('copyrightico') ?></div><?php } ?>
 228+ <ul id="f-list">
 229+ <?php if($this->data['lastmod' ]) { ?><li id="f-lastmod"><?php $this->html('lastmod') ?></li><?php } ?>
 230+ <?php if($this->data['numberofwatchingusers' ]) { ?><li id="f-numberofwatchingusers"><?php $this->html('numberofwatchingusers') ?></li><?php } ?>
 231+ <?php if($this->data['credits' ]) { ?><li id="f-credits"><?php $this->html('credits') ?></li><?php } ?>
 232+ <?php if($this->data['copyright' ]) { ?><li id="f-copyright"><?php $this->html('copyright') ?></li><?php } ?>
 233+ <?php if($this->data['about' ]) { ?><li id="f-about"><?php $this->html('about') ?></li><?php } ?>
 234+ <?php if($this->data['disclaimer']) { ?><li id="f-disclaimer"><?php $this->html('disclaimer') ?></li><?php } ?>
 235+ <?php if($this->data['tagline']) { ?><li id="f-tagline"><?php echo $this->data['tagline'] ?></li><?php } ?>
 236+ </ul>
 237+ </div>
 238+ </div>
 239+ </body>
 240+</html>
 241+<?php
 242+ wfRestoreWarnings();
 243+ }
 244+}
 245+?>
Property changes on: branches/wmf-deployment/extensions/DumpHTML/SkinOffline.php
___________________________________________________________________
Name: svn:keywords
1246 + Author Date Id Revision
Name: svn:eol-style
2247 + native

Status & tagging log