r97361 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r97360‎ | r97361 | r97362 >
Date:02:30, 17 September 2011
Author:ezachte
Status:deferred
Tags:
Comment:
extra validation checks + be smarter when to run reporting + rsync uodated files
Modified paths:
  • /trunk/wikistats/dumps/report.sh (modified) (history)

Diff [purge]

Index: trunk/wikistats/dumps/report.sh
@@ -1,27 +1,144 @@
22 #!/bin/bash
 3+
 4+# Update English reports for project $1 whenever input csv files are newer than html reports
 5+# Update reports for other 25+ languages at most once a month, to economize processing time
 6+# Whenever Englisgh reports have been updated run archive job
 7+
 8+interval=30 # only update non-English reports once per 'interval' days
 9+force_run_report=1
 10+
 11+function echo2 {
 12+ echo $1
 13+ echo $1 >> log_report_sh.txt
 14+}
 15+
 16+clear
 17+echo2 "---------------"
 18+echo2 "Start report.sh $1 $2"
 19+date >> log_report_sh.txt
 20+
 21+# Validate project code
 22+if [ "$1" == "" ] ; then
 23+ echo2 "Project code missing! Specify as 1st argument one of wb,wk,wn,wp,wq,ws,wv,wx"
 24+ exit
 25+fi
 26+
 27+# Abort when 2nd argument specifies a threshold in days, which is not met
 28+# This prevents costly reporting step when new month has just started and most counting still needs to be done
329 abort_before=$2
430 day_of_month=$(date +"%d")
5 -if [ $day_of_month -lt ${abort_before:=0} ]
6 -then
7 - echo report.sh: day of month $day_of_month lt $abort_before - exit
 31+if [ $day_of_month -lt ${abort_before:=0} ] ; then
 32+ echo2 "report.sh: day of month $day_of_month lt $abort_before - exit"
833 exit
934 fi
1035
11 -echo day of month $day_of_month le $abort_before - continue
 36+if [ "$abort_before" != "" ] ; then
 37+ echo2 "Day of month $day_of_month le $abort_before - continue"
 38+fi
1239
13 -echo "\nStart report.sh $1" >> report.txt
14 -date >> report.txt
15 -
 40+# Once in a while update and cache language names in so many target languages
 41+# Sources are TranslateWiki and interwiki links on English Wikipedia
1642 ./sync_language_files.sh
1743
18 -for x in en ast bg br ca cs da de eo es fr he hu id it ja nl nn pl pt ro ru sk sl sr sv wa zh ;
19 -#for x in en ;
20 -do perl WikiReports.pl -m $1 -l $x -i /a/wikistats/csv_$1/ -o /a/out/out_$1 ;
 44+do_zip=0 # trigger archive step ?
 45+
 46+case "$1" in
 47+ wb) project='Wikibooks' ; dir='wikibooks' ;;
 48+ wk) project='Wiktionaries' ; dir='wiktionary' ;;
 49+ wn) project='Wikinews' ; dir='wikinews' ;;
 50+ wp) project='Wikipedias' ; dir='.' ;;
 51+ wq) project='Wikiquotes' ; dir='wikiquote' ;;
 52+ ws) project='Wikisources' ; dir='wikisource' ;;
 53+ wv) project='Wikiversities' ; dir='wikiversity' ;;
 54+ wx) project='Wikispecial' ; dir='wikispecial' ;;
 55+ *) project='unknown' ; dir='...' ;;
 56+esac
 57+echo2 "Generate and publish reports for project $project"
 58+
 59+for x in en bg br ca cs da de eo es fr he hu id it ja nl nn pl pt ro ru sk sl sr sv wa zh ;
 60+do
 61+
 62+ echo2 ""
 63+ echo2 "Language code $x"
 64+
 65+ # Get timestamp last reports for language x
 66+ x_upper=$( echo "$x" | tr '[:lower:]' '[:upper:]' )
 67+ file="/a/out/out_$1/$x_upper/#index.html"
 68+ now=`date +%s`
 69+ prevrun=`stat -c %Y $file`
 70+ let secs_out="$now - $prevrun"
 71+ let days_out="$secs_out/86400"
 72+ echo2 "File $file generated $days_out days ago"
 73+
 74+ # Get timestamp for most recent csv files
 75+ file="/a/wikistats/csv_$1/StatisticsLog.csv"
 76+ now=`date +%s`
 77+ prevrun=`stat -c %Y $file`
 78+ let secs_csv="$now - $prevrun"
 79+ let days_csv="$secs_csv/86400"
 80+ echo2 "File $file generated $days_csv days ago"
 81+
 82+ # Set source and destination paths for publishing reports
 83+ out=/a/out/out_$1/$x_upper/
 84+ htdocs=/mnt/htdocs/$dir/$x_upper
 85+
 86+ # Check if reports need to be run now for language x
 87+ run_report=0
 88+ if [ $force_run_report -ne 0 ] ; then
 89+ run_report=1
 90+ do_zip=1
 91+ else
 92+ if [ "$secs_csv" -eq "$secs_out" ] ; then
 93+ if [ "$force_run_report" -eq 0 ] ; then
 94+ echo2 "Forced run of reports"
 95+ else
 96+ echo2 "Csv files are newer than reports ... "
 97+ fi
 98+
 99+ if [ "$x" == "en" ] ; then
 100+ do_zip=1
 101+ run_report=1
 102+ else
 103+ if [ $days_out -gt $interval ] ; then
 104+ run_report=1
 105+ else
 106+ if [ "$force_run_report" -ne 0 ] ; then
 107+ echo2 "Skip reporting for non-English languages, only update these once every $interval days"
 108+ fi
 109+ fi
 110+ fi
 111+ else
 112+ echo2 "Reports for language code '$x' are up to date -> skip reporting"
 113+ fi
 114+ fi
 115+
 116+ # If reporting needed now, do it now
 117+ if [ $run_report -eq 1 ] ; then
 118+ echo2 "Run reporting for language $x"
 119+ perl WikiReports.pl -m $1 -l $x -i /a/wikistats/csv_$1/ -o /a/out/out_$1
 120+
 121+ echo2 ""
 122+ echo2 "Copy new and updated files from $out -> $htdocs"
 123+ rsync -a $out/* $htdocs/ >> log_report_sh.txt
 124+ echo2 "List files from target folder older than a day"
 125+ rsync -a $out/* $htdocs/ >> log_report_sh.txt
 126+ find $htdocs/ -mtime +1 | xargs ls -l # rather than 'ls -l $htdocs'
 127+ fi
 128+
21129 done;
22130
 131+# Generate category overviews (deactivated, reports became too large)
23132 # perl WikiReports.pl -c -m $1 -l en -i /a/wikistats/csv_$1/ -o /a/out/out_$1
24133
25 -./zip_out.sh $1
 134+echo2 ""
26135
27 -echo "Ready" >> report.txt
28 -date >> report.txt
 136+# Archive English reports
 137+if [ $do_zip -eq 1 ] ; then
 138+ echo2 "Archive new English reports"
 139+ ./zip_out.sh $1
 140+else
 141+ echo2 "No English reports built. Skip zip phase"
 142+fi
 143+
 144+echo2 ""
 145+echo2 "Ready"

Status & tagging log