Index: trunk/tools/dumpHTML/queueSlave |
— | — | @@ -0,0 +1,56 @@ |
| 2 | +#!/usr/bin/python |
| 3 | + |
| 4 | +import sys,os,signal,socket,re,time |
| 5 | + |
| 6 | +queueHost = sys.argv[1] |
| 7 | +queuePort = int(sys.argv[2]) |
| 8 | +baseDir = sys.argv[3] |
| 9 | +siteDir = baseDir+"/wikipedia" |
| 10 | + |
| 11 | +queueSock = socket.socket() |
| 12 | +queueSock.connect((queueHost, queuePort)) |
| 13 | +queueFile = queueSock.makefile() |
| 14 | + |
| 15 | +os.chdir("/home/wikipedia/common/php-1.5/maintenance") |
| 16 | +waiting = False |
| 17 | + |
| 18 | +dataRegex = re.compile("data ([a-z_-]+) (\d+/\d+)") |
| 19 | + |
| 20 | +# Loop until the parent exits |
| 21 | +while (os.getppid() > 1): |
| 22 | + queueFile.write("deq\n") |
| 23 | + queueFile.flush() |
| 24 | + s = queueFile.readline() |
| 25 | + m = dataRegex.match(s) |
| 26 | + if m != None: |
| 27 | + waiting = False |
| 28 | + wiki = m.group(1) |
| 29 | + slice = m.group(2) |
| 30 | + print "-------------------------------------------------------------------" |
| 31 | + print wiki + ' ' + slice |
| 32 | + print "-------------------------------------------------------------------" |
| 33 | + checkpoint = baseDir+"/checkpoints/"+wiki+"_" + slice.replace( '/', '_' ) |
| 34 | + lang = wiki.replace( 'wiki', '' ) |
| 35 | + dest = siteDir+"/"+lang+"-new" |
| 36 | + pid = os.spawnlp(os.P_NOWAIT, "nice", "nice", "-n15", |
| 37 | + "php","-n","dumpHTML.php",wiki,#"--force-copy", |
| 38 | + "--image-snapshot","--interlang","-d",dest,"--slice",slice, |
| 39 | + "--checkpoint",checkpoint,"--no-overwrite") |
| 40 | + |
| 41 | + # Wait for the child to exit (or the parent) |
| 42 | + status = os.waitpid(pid, os.WNOHANG) |
| 43 | + while status == (0,0) and os.getppid() > 1: |
| 44 | + time.sleep(5) |
| 45 | + status = os.waitpid(pid, os.WNOHANG) |
| 46 | + |
| 47 | + # If the parent exited, then kill the child |
| 48 | + if status == (0,0): |
| 49 | + os.kill(pid, signal.SIGKILL) |
| 50 | + |
| 51 | + else: |
| 52 | + if not waiting: |
| 53 | + print "Waiting..." |
| 54 | + waiting = True |
| 55 | + time.sleep(5) |
| 56 | + |
| 57 | + |
Property changes on: trunk/tools/dumpHTML/queueSlave |
___________________________________________________________________ |
Added: svn:executable |
1 | 58 | + * |
Index: trunk/tools/dumpHTML/netqueue.py |
— | — | @@ -0,0 +1,69 @@ |
| 2 | +#!/usr/bin/python |
| 3 | + |
| 4 | +import SocketServer, sys, signal, os, threading, Queue |
| 5 | + |
| 6 | +class QueueServer(SocketServer.ThreadingMixIn, SocketServer.TCPServer): |
| 7 | + queue = Queue.Queue(0) |
| 8 | + allow_reuse_address = True |
| 9 | + |
| 10 | + def enqueue(self, value): |
| 11 | + self.queue.put(value) |
| 12 | + |
| 13 | + def dequeue(self): |
| 14 | + try: |
| 15 | + value = self.queue.get_nowait() |
| 16 | + except Queue.Empty: |
| 17 | + value = None |
| 18 | + return value |
| 19 | + |
| 20 | + def blockingDequeue(self, file): |
| 21 | + value = self.queue.get() |
| 22 | + #if file.closed: |
| 23 | + # File doesn't want it, requeue it |
| 24 | + # self.queue.put(value) |
| 25 | + # value = None |
| 26 | + return value; |
| 27 | + |
| 28 | + def clearQueue(self): |
| 29 | + self.queue = Queue.Queue(0) |
| 30 | + |
| 31 | + |
| 32 | +class QueueRequestHandler(SocketServer.StreamRequestHandler): |
| 33 | + def handle(self): |
| 34 | + for line in self.rfile: |
| 35 | + cmd = line.strip() |
| 36 | + if cmd[:4] == "enq ": |
| 37 | + self.server.enqueue(cmd[4:]) |
| 38 | + self.wfile.write("ok\n") |
| 39 | + elif cmd == "deq": |
| 40 | + value = self.server.dequeue() |
| 41 | + if value is None: |
| 42 | + self.wfile.write("empty\n") |
| 43 | + else: |
| 44 | + self.wfile.write("data " + value + "\n") |
| 45 | + elif cmd == "bdeq": |
| 46 | + value = self.server.blockingDequeue(self.wfile) |
| 47 | + if value is None: |
| 48 | + self.wfile.write("empty\n") |
| 49 | + else: |
| 50 | + self.wfile.write("data " + value + "\n") |
| 51 | + elif cmd == "size": |
| 52 | + self.wfile.write("size " + str(self.server.queue.qsize()) + "\n") |
| 53 | + elif cmd == "clear": |
| 54 | + self.server.clearQueue() |
| 55 | + self.wfile.write("ok\n") |
| 56 | + else: |
| 57 | + self.wfile.write("invalid command\n") |
| 58 | + |
| 59 | + |
| 60 | +if __name__ == '__main__': |
| 61 | + server = QueueServer(('', 8200), QueueRequestHandler) |
| 62 | + try: |
| 63 | + server.serve_forever() |
| 64 | + except KeyboardInterrupt: |
| 65 | + print "Caught KeyboardInterrupt" |
| 66 | + os.kill(os.getpid(), signal.SIGKILL) |
| 67 | + sys.exit(0) |
| 68 | + |
| 69 | + |
| 70 | + |
Property changes on: trunk/tools/dumpHTML/netqueue.py |
___________________________________________________________________ |
Added: svn:executable |
1 | 71 | + * |
Index: trunk/tools/dumpHTML/do-edition |
— | — | @@ -0,0 +1,81 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +import sys, os, socket, signal, time, stat |
| 4 | + |
| 5 | +base = "/mnt/static" |
| 6 | +scripts = base + "/scripts" |
| 7 | + |
| 8 | +if len(sys.argv) < 2: |
| 9 | + print "Usage: do-edition <edition>" |
| 10 | + sys.exit(1) |
| 11 | + |
| 12 | +edition = sys.argv[1] |
| 13 | + |
| 14 | +threads = { |
| 15 | + "localhost": 1 |
| 16 | + #"srv42": 4, |
| 17 | + #"srv122": 2, |
| 18 | + #"srv123": 2, |
| 19 | + #"srv124": 2, |
| 20 | + #"srv125": 2 |
| 21 | +} |
| 22 | + |
| 23 | +# Create some directories |
| 24 | +try: os.makedirs(base + "/logs") |
| 25 | +except: pass |
| 26 | +try: os.makedirs(base + "/checkpoints") |
| 27 | +except: pass |
| 28 | + |
| 29 | +# Start queue server |
| 30 | +print "Starting queue server" |
| 31 | +queueServer = os.fork() |
| 32 | +if 0 == queueServer: |
| 33 | + # Run it in a new group so that its precious finishlang children don't get hurt |
| 34 | + os.setsid() |
| 35 | + os.execlp("python", "python", scripts+"/netqueue.py") |
| 36 | + sys.exit(1) |
| 37 | + |
| 38 | +# Wait for it to start up |
| 39 | +queueSock = socket.socket() |
| 40 | +while queueSock.connect_ex(("localhost", 8200)): |
| 41 | + time.sleep(0.1) |
| 42 | + |
| 43 | + |
| 44 | +# Start slave threads |
| 45 | +slaves = [] |
| 46 | +for host, number in threads.iteritems(): |
| 47 | + for i in range(number): |
| 48 | + print "Starting thread %d on host %s" % (i, host) |
| 49 | + pid = os.fork() |
| 50 | + if pid == 0: |
| 51 | + # Redirect stdout |
| 52 | + os.close(1) |
| 53 | + fd = os.open("%s/logs/%s-%d.out" % (base, host, i), os.O_WRONLY|os.O_CREAT|os.O_APPEND, 0666) |
| 54 | + os.dup2(fd, 1) |
| 55 | + |
| 56 | + # Redirect stderr |
| 57 | + os.close(2) |
| 58 | + fd = os.open("%s/logs/%s-%d.err" % (base, host, i), os.O_WRONLY|os.O_CREAT|os.O_APPEND, 0666) |
| 59 | + os.dup2(fd, 2) |
| 60 | + |
| 61 | + if host == "localhost": |
| 62 | + #os.execlp("php", "php", "-n", scripts+"/queueSlave.php", socket.gethostname(), "8200", base) |
| 63 | + os.execlp("python", "python", scripts+"/queueSlave", socket.gethostname(), "8200", base) |
| 64 | + sys.exit(1) |
| 65 | + else: |
| 66 | + #os.execlp("ssh", "ssh", host, "php", "-n", scripts+"/queueSlave.php", socket.gethostname(), "8200", base) |
| 67 | + os.execlp("ssh", "ssh", host, "python", scripts+"/queueSlave", socket.gethostname(), "8200", base) |
| 68 | + sys.exit(1) |
| 69 | + slaves.append(pid) |
| 70 | + |
| 71 | +# Start controller, wait for it to exit |
| 72 | +print "Starting controller" |
| 73 | +try: |
| 74 | + os.spawnlp(os.P_WAIT, "php", "php", "-n", "queueController.php", edition) |
| 75 | + print "Controller has exited, all done\n" |
| 76 | +except KeyboardInterrupt: |
| 77 | + pass |
| 78 | + |
| 79 | + |
| 80 | +# All done, kill queue server |
| 81 | +os.kill(queueServer, signal.SIGKILL) |
| 82 | + |
Property changes on: trunk/tools/dumpHTML/do-edition |
___________________________________________________________________ |
Added: svn:executable |
1 | 83 | + * |
Index: trunk/tools/dumpHTML/compress-html |
— | — | @@ -5,30 +5,37 @@ |
6 | 6 | exit |
7 | 7 | fi |
8 | 8 | |
9 | | -cd /var/static |
10 | 9 | |
11 | 10 | lang=$1 |
12 | 11 | edition=$2 |
| 12 | +site=wikipedia |
13 | 13 | |
14 | | -dest=/var/static/downloads/$edition/$lang |
| 14 | +base=/mnt/static |
| 15 | +sitebase=$base/$site |
| 16 | +dest=$base/downloads/$edition/$lang |
| 17 | + |
15 | 18 | mkdir -p $dest |
16 | 19 | |
17 | 20 | echo Finding files... |
| 21 | +cd $sitebase |
18 | 22 | find $lang -name \*.html > $dest/html.lst |
19 | 23 | |
20 | 24 | #echo Filtering... |
21 | 25 | #php scripts/filterNamespaces.php $lang wikipedia $dest/html.lst > $dest/reduced.lst |
22 | 26 | |
23 | 27 | find $lang/skins $lang/images -follow -type f \ |
24 | | - -not \( -name \*.php -or -name \*.xcf -or -name .\* -or -name \*~ -or -path \*/CVS/\* \) \ |
| 28 | + -not \( -name \*.php -or -name \*.xcf -or -name .\* -or -name \*~ -or \ |
| 29 | + -path \*/CVS/\* -or -path \*/.svn\* -or -path \*/wikimania\* \) \ |
25 | 30 | > $dest/skins.lst |
26 | 31 | find $lang/raw -type f >> $dest/skins.lst |
27 | 32 | echo $lang/upload/b/bc/Wiki.png >> $dest/skins.lst |
28 | 33 | |
29 | 34 | echo Creating HTML archive... |
30 | 35 | rm -f $dest/wikipedia-$lang-html.7z |
31 | | -7z -l a $dest/wikipedia-$lang-html.7z @$dest/html.lst @$dest/skins.lst |
32 | 36 | |
| 37 | +# Set chunk size to 8MB for faster random access |
| 38 | +7z -l -ms8m a $dest/wikipedia-$lang-html.7z @$dest/html.lst @$dest/skins.lst |
| 39 | + |
33 | 40 | #if [ ! -e $dest/wikipedia-$lang-reduced.7z ];then |
34 | 41 | # echo |
35 | 42 | # echo Creating reduced archive... |
Index: trunk/tools/dumpHTML/finish-lang |
— | — | @@ -0,0 +1,34 @@ |
| 2 | +#!/bin/bash |
| 3 | + |
| 4 | +if [ "X$2" == "X" ];then |
| 5 | + echo "Usage: finish <lang> <edition>" |
| 6 | + exit |
| 7 | +fi |
| 8 | +bindir=/mnt/static/scripts |
| 9 | +lang=$1 |
| 10 | +edition=$2 |
| 11 | +site=wikipedia |
| 12 | + |
| 13 | +if [ ! -d /mnt/static/$site/$lang-new ]; then |
| 14 | + echo "Already compressed $lang" |
| 15 | + exit |
| 16 | +fi |
| 17 | +if [ ! -e /mnt/static/$site/$lang-new/index.html ]; then |
| 18 | + echo "$lang-new directory is broken, missing index.html, skipping." |
| 19 | + exit |
| 20 | +fi |
| 21 | + |
| 22 | +if [ -d /mnt/static/$site/$lang ];then |
| 23 | + if [ -d /mnt/static/$site/$lang-old ];then |
| 24 | + rm -rf /mnt/static/$site/$lang-old |
| 25 | + fi |
| 26 | + mv /mnt/static/$site/$lang /mnt/static/$site/$lang-old |
| 27 | +fi |
| 28 | +mv /mnt/static/$site/$lang-new /mnt/static/$site/$lang |
| 29 | + |
| 30 | +echo "$lang: Compressing HTML..." |
| 31 | +$bindir/compress-html $lang $edition 2>&1 >/dev/null |
| 32 | +#echo "$lang: Making image tarball..." |
| 33 | +#ssh albert tar -C /mnt/static -cf /a/upload_snapshot/$edition/downloads/wikipedia-$lang-images.tar -h $lang/upload |
| 34 | +#ln -sf /mnt/upload_snapshot/$edition/downloads/wikipedia-$lang-images.tar /mnt/static/downloads/$edition/$lang/wikipedia-$lang-images.tar |
| 35 | +echo "$lang: Done." |
Property changes on: trunk/tools/dumpHTML/finish-lang |
___________________________________________________________________ |
Added: svn:executable |
1 | 36 | + * |
Index: trunk/tools/dumpHTML/queueSlave.php |
— | — | @@ -0,0 +1,47 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +$queueHost = $argv[1]; |
| 5 | +$queuePort = $argv[2]; |
| 6 | +$baseDir = $argv[3]; |
| 7 | + |
| 8 | +$queueSock = fsockopen( $queueHost, $queuePort ); |
| 9 | +if ( !$queueSock ) { |
| 10 | + echo "Unable to connect to queue server\n"; |
| 11 | + die( 1 ); |
| 12 | +} |
| 13 | + |
| 14 | +chdir( "/home/wikipedia/common/php-1.5/maintenance" ); |
| 15 | +$waiting = false; |
| 16 | +while ( 1 ) { |
| 17 | + if ( !fwrite( $queueSock, "deq\n" ) ) { |
| 18 | + echo "Unable to write to queue server\n"; |
| 19 | + die( 1 ); |
| 20 | + } |
| 21 | + $s = fgets( $queueSock ); |
| 22 | + if ( $s === false ) { |
| 23 | + echo "Unable to read from queue server\n"; |
| 24 | + die( 1 ); |
| 25 | + } |
| 26 | + if ( preg_match( '!^data ([a-z_-]+) (\d+/\d+)!', $s, $m ) ) { |
| 27 | + $waiting = false; |
| 28 | + $wiki = $m[1]; |
| 29 | + $slice = $m[2]; |
| 30 | + echo "-------------------------------------------------------------------\n"; |
| 31 | + echo "$wiki $slice\n"; |
| 32 | + echo "-------------------------------------------------------------------\n"; |
| 33 | + $checkpoint = "$baseDir/checkpoints/{$wiki}_" . str_replace( '/', '_', $slice ); |
| 34 | + $lang = str_replace( 'wiki', '', $wiki ); |
| 35 | + $dest = "$baseDir/$lang-new"; |
| 36 | + |
| 37 | + passthru( "php -n dumpHTML.php $wiki --force-copy --image-snapshot --interlang -d $dest --slice $slice --checkpoint $checkpoint" ); |
| 38 | + } else { |
| 39 | + # Wait for jobs |
| 40 | + if ( !$waiting ) { |
| 41 | + print "Waiting...\n"; |
| 42 | + $waiting = true; |
| 43 | + } |
| 44 | + sleep( 5 ); |
| 45 | + } |
| 46 | +} |
| 47 | + |
| 48 | +?> |
Index: trunk/tools/dumpHTML/compress-volumes2 |
— | — | @@ -5,7 +5,7 @@ |
6 | 6 | exit |
7 | 7 | fi |
8 | 8 | |
9 | | -basedir=/var/static |
| 9 | +basedir=/mnt/static |
10 | 10 | htmldir=$basedir/$1 |
11 | 11 | destdir=$basedir/downloads/$1/volumes |
12 | 12 | shift |
Index: trunk/tools/dumpHTML/functions |
— | — | @@ -9,12 +9,12 @@ |
10 | 10 | } |
11 | 11 | |
12 | 12 | finishlang() { |
13 | | - if [ -d /var/static/$1 ];then |
14 | | - mv /var/static/$1 /var/static/$1-old |
| 13 | + if [ -d /mnt/static/$1 ];then |
| 14 | + mv /mnt/static/$1 /mnt/static/$1-old |
15 | 15 | fi |
16 | | - mv /var/static/$1-new /var/static/$1 |
| 16 | + mv /mnt/static/$1-new /mnt/static/$1 |
17 | 17 | |
18 | | - echo "Compressing..." |
| 18 | + echo "Compressing $1..." |
19 | 19 | $bindir/compress-html $1 $edition 2>&1 >/dev/null |
20 | 20 | echo "Done." |
21 | 21 | } |
Index: trunk/tools/dumpHTML/start-lang |
— | — | @@ -0,0 +1,27 @@ |
| 2 | +#!/bin/bash |
| 3 | + |
| 4 | +if [ -z $1 ];then |
| 5 | + echo "Usage: start-lang <lang>" |
| 6 | + exit 1 |
| 7 | +fi |
| 8 | + |
| 9 | +lang=$1 |
| 10 | +shift |
| 11 | +dest=/mnt/static/wikipedia/$lang-new |
| 12 | + |
| 13 | +if [ ! -d $dest ];then |
| 14 | + mkdir -p $dest |
| 15 | + #rm -rf /mnt/upload3/wikipedia/$lang/shared |
| 16 | + ln -s /home/wikipedia/htdocs/wikipedia.org/images $dest/images |
| 17 | + |
| 18 | + # Upload snapshot disabled, not enough space |
| 19 | + #[ -d /mnt/upload_snapshot/new/$lang ] || mkdir -p /mnt/upload_snapshot/new/$lang |
| 20 | + #ln -s /mnt/upload_snapshot/new/$lang $dest/upload |
| 21 | + |
| 22 | + mkdir $dest/upload |
| 23 | + ln -s /mnt/upload3/wikipedia/$lang/* $dest/upload/ |
| 24 | + rm -f $dest/upload/shared |
| 25 | + mkdir $dest/upload/shared |
| 26 | + ln -s /home/wikipedia/common/php-1.5/skins $dest/skins |
| 27 | + cp /mnt/static/COPYING.html $dest/COPYING.html |
| 28 | +fi |
Property changes on: trunk/tools/dumpHTML/start-lang |
___________________________________________________________________ |
Added: svn:executable |
1 | 29 | + * |
Index: trunk/tools/dumpHTML/queueController.php |
— | — | @@ -0,0 +1,182 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +$basedir = '/mnt/static'; |
| 5 | + |
| 6 | +$wgNoDBParam = true; |
| 7 | +require_once( '/home/wikipedia/common/php/maintenance/commandLine.inc' ); |
| 8 | + |
| 9 | +if ( !isset( $args[0] ) ) { |
| 10 | + echo "Usage: queueController.php <edition>\n"; |
| 11 | +} |
| 12 | + |
| 13 | +$wikiList = array_map( 'trim', file( '/home/wikipedia/common/wikipedia.dblist' ) ); |
| 14 | +$yaseo = array_map( 'trim', file( '/home/wikipedia/common/yaseo.dblist' ) ); |
| 15 | +$wikiList = array_diff( $wikiList, $yaseo ); |
| 16 | + |
| 17 | +$targetQueueSize = 20; |
| 18 | +$maxArticlesPerJob = 10000; |
| 19 | +$jobTimeout = 86400; |
| 20 | +$edition = $args[0]; |
| 21 | + |
| 22 | +$queueSock = fsockopen( 'localhost', 8200 ); |
| 23 | +if ( !$queueSock ) { |
| 24 | + echo "Unable to connect to queue server\n"; |
| 25 | + die(1); |
| 26 | +} |
| 27 | + |
| 28 | +# Flush the queue |
| 29 | +fwrite( $queueSock, "clear\n" ); |
| 30 | +fgets( $queueSock ); |
| 31 | + |
| 32 | +# Fetch wiki stats |
| 33 | +$wikiSizes = @file_get_contents( "$basedir/checkpoints/wikiSizes" ); |
| 34 | +if ( $wikiSizes ) { |
| 35 | + $wikiSizes = unserialize( $wikiSizes ); |
| 36 | +} else { |
| 37 | + $wikiSizes = array(); |
| 38 | + foreach ( $wikiList as $wiki ) { |
| 39 | + if ( $wgAlternateMaster[$wiki] ) { |
| 40 | + $db = new Database( $wgAlternateMaster[$wiki], $wgDBuser, $wgDBpassword, $wiki ); |
| 41 | + } else { |
| 42 | + $db = wfGetDB( DB_SLAVE ); |
| 43 | + } |
| 44 | + |
| 45 | + $wikiSizes[$wiki] = $db->selectField( "`$wiki`.site_stats", 'ss_total_pages' ); |
| 46 | + } |
| 47 | + file_put_contents( "$basedir/checkpoints/wikiSizes", serialize( $wikiSizes ) ); |
| 48 | +} |
| 49 | + |
| 50 | +# Compute job array |
| 51 | +$jobs = array(); |
| 52 | +$jobsRemainingPerWiki = array(); |
| 53 | +foreach ( $wikiSizes as $wiki => $size ) { |
| 54 | + if ( in_array( $wiki, $yaseo ) ) { |
| 55 | + continue; |
| 56 | + } |
| 57 | + $numJobs = intval( ceil( $size / $maxArticlesPerJob ) ); |
| 58 | + $jobsRemainingPerWiki[$wiki] = $numJobs; |
| 59 | + for ( $i = 1; $i <= $numJobs; $i++ ) { |
| 60 | + $jobs[] = "$wiki $i/$numJobs"; |
| 61 | + } |
| 62 | +} |
| 63 | + |
| 64 | +$start = 0; |
| 65 | +$doneCount = 0; |
| 66 | +$queued = 0; |
| 67 | +$jobCount = count( $jobs ); |
| 68 | +$queueTimes = array(); |
| 69 | +$initialisedWikis = array(); |
| 70 | + |
| 71 | +print "$jobCount jobs to do\n"; |
| 72 | + |
| 73 | +while ( $doneCount < $jobCount ) { |
| 74 | + for ( $i = $start; $i < $jobCount && getQueueSize() < $targetQueueSize; $i++ ) { |
| 75 | + if ( !isset( $jobs[$i] ) ) { |
| 76 | + # Already done and removed |
| 77 | + continue; |
| 78 | + } |
| 79 | + $job = $jobs[$i]; |
| 80 | + list( $wiki ) = explode( ' ', $job ); |
| 81 | + if ( !$wiki ) { |
| 82 | + die( "Invalid job: $job\n" ); |
| 83 | + } |
| 84 | + $queueing = false; |
| 85 | + if ( isDone( $job ) ) { |
| 86 | + $doneCount++; |
| 87 | + print "Job $i done: $job ($doneCount of $jobCount)\n"; |
| 88 | + $remaining = --$jobsRemainingPerWiki[$wiki]; |
| 89 | + if ( !$remaining ) { |
| 90 | + finishWiki( $wiki ); |
| 91 | + } else { |
| 92 | + print "$remaining jobs remaining for $wiki\n"; |
| 93 | + } |
| 94 | + |
| 95 | + unset( $jobs[$i] ); |
| 96 | + while ( !isset( $jobs[$start] ) && $start < $jobCount ) { |
| 97 | + $start++; |
| 98 | + } |
| 99 | + } elseif ( !isset( $queueTimes[$i] ) ) { |
| 100 | + print "Queueing job $i: $job\n"; |
| 101 | + $queueing = true; |
| 102 | + } elseif ( time() > $queueTimes[$i] + $jobTimeout ) { |
| 103 | + print "Timeout, requeueing job $i: $job\n"; |
| 104 | + $queueing = true; |
| 105 | + } else { |
| 106 | + $queueing = false; |
| 107 | + } |
| 108 | + if ( $queueing ) { |
| 109 | + if ( !isset( $initialisedWikis[$wiki] ) ) { |
| 110 | + startWiki( $wiki ); |
| 111 | + $initialisedWikis[$wiki] = true; |
| 112 | + } |
| 113 | + enqueue( $job ); |
| 114 | + $queueTimes[$i] = time(); |
| 115 | + } |
| 116 | + } |
| 117 | + sleep(10); |
| 118 | +} |
| 119 | + |
| 120 | +//------------------------------------------------------------ |
| 121 | + |
| 122 | +function getQueueSize() { |
| 123 | + global $queueSock; |
| 124 | + if ( fwrite( $queueSock, "size\n" ) === false ) { |
| 125 | + die( "Unable to write to queue server\n" ); |
| 126 | + } |
| 127 | + |
| 128 | + $response = fgets( $queueSock ); |
| 129 | + if ( $response === false ) { |
| 130 | + die( "Unable to read from queue server\n" ); |
| 131 | + } |
| 132 | + if ( !preg_match( "/^size (\d*)/", $response, $m ) ) { |
| 133 | + die( "Invalid response to size request\n" ); |
| 134 | + } |
| 135 | + return $m[1]; |
| 136 | +} |
| 137 | + |
| 138 | +function isDone( $job ) { |
| 139 | + global $basedir; |
| 140 | + $jobCpFile = "$basedir/checkpoints/" . strtr( $job, ' /', '__' ); |
| 141 | + $lines = @file( $jobCpFile ); |
| 142 | + if ( $lines === false ) { |
| 143 | + return false; |
| 144 | + } |
| 145 | + $test = 'everything=done'; |
| 146 | + foreach ( $lines as $line ) { |
| 147 | + if ( substr( $line, 0, strlen( $test ) ) == $test ) { |
| 148 | + return true; |
| 149 | + } |
| 150 | + } |
| 151 | + return false; |
| 152 | +} |
| 153 | + |
| 154 | +function enqueue( $job ) { |
| 155 | + global $queueSock; |
| 156 | + if ( false === fwrite( $queueSock, "enq $job\n" ) ) { |
| 157 | + die( "Unable to write to queue server\n" ); |
| 158 | + } |
| 159 | + |
| 160 | + # Read and throw away response |
| 161 | + $response = fgets( $queueSock ); |
| 162 | +} |
| 163 | + |
| 164 | +function startWiki( $wiki ) { |
| 165 | + global $basedir; |
| 166 | + $lang = str_replace( 'wiki', '', $wiki ); |
| 167 | + print "Starting language $lang\n"; |
| 168 | + passthru( "$basedir/scripts/start-lang $lang" ); |
| 169 | +} |
| 170 | + |
| 171 | +function finishWiki( $wiki ) { |
| 172 | + global $edition, $basedir; |
| 173 | + $lang = str_replace( 'wiki', '', $wiki ); |
| 174 | + if ( !is_dir( "$basedir/wikipedia/$lang-new" ) ) { |
| 175 | + # Already compressed |
| 176 | + print "Already compressed $lang\n"; |
| 177 | + return; |
| 178 | + } |
| 179 | + print "Finishing language $lang\n"; |
| 180 | + passthru( "$basedir/scripts/finish-lang $lang $edition >> $basedir/logs/finish.log 2>&1 &" ); |
| 181 | +} |
| 182 | + |
| 183 | +?> |