r96314 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r96313‎ | r96314 | r96315 >
Date:08:04, 6 September 2011
Author:ariel
Status:deferred
Tags:
Comment:
script which creates a list of the n most recent XML successful dumps per project for mirroring
Modified paths:
  • /branches/ariel/xmldumps-backup/create-rsync-list.sh (added) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/create-rsync-list.sh
@@ -0,0 +1,189 @@
 2+#!/bin/bash
 3+
 4+# This script generates a list of the last n sets of XML dump files
 5+# per project that were successful, adding failed dumps to the list if there
 6+# are not n successful dumps available.
 7+
 8+# Options:
 9+# dumpsnumber -- number of dumps to list
 10+# outputfile -- path to file in which to write the list
 11+# configfile -- path to config file used to generate dumps
 12+
 13+usage() {
 14+ echo "Usage: $0 --dumpsnumber n --outputfile filename --configfile filename --rsyncprefix path"
 15+ echo
 16+ echo " dumpsnumber number of dumps to list"
 17+ echo " outputfile name of file to which we will write iw action list"
 18+ echo " configfile name of configuration file for dump generation"
 19+ echo " (default value: wikidump.conf)"
 20+ echo " rsyncprefix path to substitute in place of the public path supplied"
 21+ echo " in the configuration file, if needed"
 22+ echo
 23+ echo "For example:"
 24+ echo " $0 --dumpsnumber 5 --outputfile /data/dumps/public/dumpsfiles_for_rsync.txt --configfile wikidump.conf.testing"
 25+ exit 1
 26+}
 27+
 28+check_args() {
 29+ if [ -z "$dumpsnumber" ]; then
 30+ echo "$0: dumpsnumber must be an integer greater than 0"
 31+ usage
 32+ fi
 33+ if ! [[ "$dumpsnumber" =~ ^[0-9]+$ ]] ; then
 34+ echo "$0: dumpsnumber must be an integer greater than 0"
 35+ usage
 36+ fi
 37+ if [ "$dumpsnumber" -lt "1" ]; then
 38+ echo "$0: dumpsnumber must be an integer greater than 0"
 39+ usage
 40+ fi
 41+ if [ -z "$outputfile" ]; then
 42+ echo "No value was given for outfile option."
 43+ usage
 44+ fi
 45+ if [ -z "$configfile" ]; then
 46+ echo "No value was given for configfile option."
 47+ usage
 48+ fi
 49+ if [ ! -f "$configfile" ]; then
 50+ echo "$0: can't open configuration file $configfile, exiting..."
 51+ exit 1
 52+ fi
 53+}
 54+
 55+
 56+listdumpsforproject() {
 57+ # cannot rely on timestamp. sometimes we have rerun a phase in
 58+ # some earlier dump and have it completed later than a later dump,
 59+ # or we may have two en pedia runs going at once in different
 60+ # phases.
 61+ dirs=`ls -dr $publicdir/$p/20* 2>/dev/null`
 62+
 63+ for day in $dirs; do
 64+ # tools, mw, static...
 65+ if [ -d "$day" ]; then
 66+ complete=`grep "Dump complete" "$day/status.html" 2>/dev/null | grep -v "failed" 2>/dev/null`
 67+ if [ ! -z "$complete" ]; then
 68+ complete_dumps=("${complete_dumps[@]}" "$day")
 69+ fi
 70+ failed=`grep "Dump complete" "$day/status.html" 2>/dev/null | grep "failed" 2>/dev/null`
 71+ if [ ! -z "$failed" ]; then
 72+ failed_dumps=("${failed_dumps[@]}" "$day")
 73+ fi
 74+ fi
 75+ done
 76+}
 77+
 78+list_files_in_dir() {
 79+ if [ ! -f "$outputfile.tmp" ]; then
 80+ touch $outputfile.tmp
 81+ fi
 82+ if [ "$rsyncprefix" == "false" ]; then
 83+ ls $d/*.gz 2>/dev/null >> $outputfile.tmp
 84+ ls $d/*.bz2 2>/dev/null >> $outputfile.tmp
 85+ ls $d/*.7z 2>/dev/null >> $outputfile.tmp
 86+ ls $d/*.html 2>/dev/null >> $outputfile.tmp
 87+ ls $d/*.txt 2>/dev/null >> $outputfile.tmp
 88+ else
 89+ ls $d/*.gz 2>/dev/null | sed -e "s|^$publicdir|$rsyncprefix|" >> $outputfile.tmp
 90+ ls $d/*.bz2 2>/dev/null | sed -e "s|^$publicdir|$rsyncprefix|" >> $outputfile.tmp
 91+ ls $d/*.7z 2>/dev/null | sed -e "s|^$publicdir|$rsyncprefix|" >> $outputfile.tmp
 92+ ls $d/*.html 2>/dev/null | sed -e "s|^$publicdir|$rsyncprefix|" >> $outputfile.tmp
 93+ ls $d/*.txt 2>/dev/null | sed -e "s|^$publicdir|$rsyncprefix|" >> $outputfile.tmp
 94+ fi
 95+}
 96+
 97+get_list_of_files() {
 98+ projectdirs=`ls -d $publicdir/$p/20* 2>/dev/null`
 99+ declare -a complete_dumps
 100+ declare -a failed_dumps
 101+ listdumpsforproject
 102+ if [ ${#complete_dumps[@]} -ge $dumpsnumber ]; then
 103+ dumps_to_copy=${complete_dumps[@]:0:$dumpsnumber}
 104+ for d in $dumps_to_copy; do
 105+ list_files_in_dir
 106+ done
 107+ else
 108+ for d in ${complete_dumps[@]}; do
 109+ list_files_in_dir
 110+ done
 111+ left_to_get=$(( $dumpsnumber - ${#complete_dumps[@]} ))
 112+ if [ ${#failed_dumps[@]} -ge $left_to_get ]; then
 113+ dumps_to_copy=${failed_dumps[@]:0:$left_to_get}
 114+ for d in $dumps_to_copy; do
 115+ list_files_in_dir
 116+ done
 117+ else
 118+ for d in ${failed_dumps[@]}; do
 119+ list_files_in_dir
 120+ done
 121+ fi
 122+ fi
 123+}
 124+
 125+if [ "$#" -lt "4" -o "$#" -gt "8" ]; then
 126+ usage
 127+fi
 128+
 129+dumpsnumber=""
 130+outputfile=""
 131+configfile="wikidump.conf"
 132+rsyncprefix="false"
 133+
 134+while [ $# -gt 0 ]; do
 135+ if [ $1 == "--dumpsnumber" ]; then
 136+ dumpsnumber="$2"
 137+ elif [ $1 == "--outputfile" ]; then
 138+ outputfile="$2"
 139+ elif [ $1 == "--configfile" ]; then
 140+ configfile="$2"
 141+ elif [ $1 == "--rsyncprefix" ]; then
 142+ rsyncprefix="$2"
 143+ else
 144+ echo "$0: Unknown option $1"
 145+ usage
 146+ fi
 147+ shift; shift
 148+done
 149+
 150+check_args
 151+
 152+tempdir=`egrep "^temp=" "$configfile" | awk -Ftemp= '{ print $2 }'`
 153+if [ -z "$tempdir" ]; then
 154+ tempdir="/tmp"
 155+fi
 156+
 157+dblist="${tempdir}/all.dblist"
 158+
 159+wget -P "$tempdir" -N -q 'http://noc.wikimedia.org/conf/all.dblist'
 160+
 161+if [ ! -f "$dblist" ]; then
 162+ echo "$0: failed to retrieve list of valid projects that are dumped, exiting."
 163+ exit 1
 164+fi
 165+
 166+publicdir=`egrep "^public=" "$configfile" | awk -Fpublic= '{ print $2 }'`
 167+if [ -z "$publicdir" ]; then
 168+ publicdir="/dumps/public"
 169+fi
 170+
 171+projects=`cat $dblist`
 172+
 173+for p in $projects; do
 174+ get_list_of_files
 175+done
 176+
 177+# do this last so that if someone is using the file in the meantime, they aren't
 178+# interrupted
 179+if [ -f "$outputfile" ]; then
 180+ mv "$outputfile" "$outputfile.old"
 181+fi
 182+if [ -f "$outputfile.tmp" ]; then
 183+ mv "$outputfile.tmp" "$outputfile"
 184+ exit 0
 185+else
 186+ echo "$0: no output file created. Something is wrong."
 187+ exit 1
 188+fi
 189+
 190+
Property changes on: branches/ariel/xmldumps-backup/create-rsync-list.sh
___________________________________________________________________
Added: svn:eol-style
1191 + native
Added: svn:executable
2192 + *

Status & tagging log