Index: trunk/wikistats/dumps/report.sh |
— | — | @@ -1,27 +1,144 @@ |
2 | 2 | #!/bin/bash |
| 3 | + |
| 4 | +# Update English reports for project $1 whenever input csv files are newer than html reports |
| 5 | +# Update reports for other 25+ languages at most once a month, to economize processing time |
| 6 | +# Whenever Englisgh reports have been updated run archive job |
| 7 | + |
| 8 | +interval=30 # only update non-English reports once per 'interval' days |
| 9 | +force_run_report=1 |
| 10 | + |
| 11 | +function echo2 { |
| 12 | + echo $1 |
| 13 | + echo $1 >> log_report_sh.txt |
| 14 | +} |
| 15 | + |
| 16 | +clear |
| 17 | +echo2 "---------------" |
| 18 | +echo2 "Start report.sh $1 $2" |
| 19 | +date >> log_report_sh.txt |
| 20 | + |
| 21 | +# Validate project code |
| 22 | +if [ "$1" == "" ] ; then |
| 23 | + echo2 "Project code missing! Specify as 1st argument one of wb,wk,wn,wp,wq,ws,wv,wx" |
| 24 | + exit |
| 25 | +fi |
| 26 | + |
| 27 | +# Abort when 2nd argument specifies a threshold in days, which is not met |
| 28 | +# This prevents costly reporting step when new month has just started and most counting still needs to be done |
3 | 29 | abort_before=$2 |
4 | 30 | day_of_month=$(date +"%d") |
5 | | -if [ $day_of_month -lt ${abort_before:=0} ] |
6 | | -then |
7 | | - echo report.sh: day of month $day_of_month lt $abort_before - exit |
| 31 | +if [ $day_of_month -lt ${abort_before:=0} ] ; then |
| 32 | + echo2 "report.sh: day of month $day_of_month lt $abort_before - exit" |
8 | 33 | exit |
9 | 34 | fi |
10 | 35 | |
11 | | -echo day of month $day_of_month le $abort_before - continue |
| 36 | +if [ "$abort_before" != "" ] ; then |
| 37 | + echo2 "Day of month $day_of_month le $abort_before - continue" |
| 38 | +fi |
12 | 39 | |
13 | | -echo "\nStart report.sh $1" >> report.txt |
14 | | -date >> report.txt |
15 | | - |
| 40 | +# Once in a while update and cache language names in so many target languages |
| 41 | +# Sources are TranslateWiki and interwiki links on English Wikipedia |
16 | 42 | ./sync_language_files.sh |
17 | 43 | |
18 | | -for x in en ast bg br ca cs da de eo es fr he hu id it ja nl nn pl pt ro ru sk sl sr sv wa zh ; |
19 | | -#for x in en ; |
20 | | -do perl WikiReports.pl -m $1 -l $x -i /a/wikistats/csv_$1/ -o /a/out/out_$1 ; |
| 44 | +do_zip=0 # trigger archive step ? |
| 45 | + |
| 46 | +case "$1" in |
| 47 | + wb) project='Wikibooks' ; dir='wikibooks' ;; |
| 48 | + wk) project='Wiktionaries' ; dir='wiktionary' ;; |
| 49 | + wn) project='Wikinews' ; dir='wikinews' ;; |
| 50 | + wp) project='Wikipedias' ; dir='.' ;; |
| 51 | + wq) project='Wikiquotes' ; dir='wikiquote' ;; |
| 52 | + ws) project='Wikisources' ; dir='wikisource' ;; |
| 53 | + wv) project='Wikiversities' ; dir='wikiversity' ;; |
| 54 | + wx) project='Wikispecial' ; dir='wikispecial' ;; |
| 55 | + *) project='unknown' ; dir='...' ;; |
| 56 | +esac |
| 57 | +echo2 "Generate and publish reports for project $project" |
| 58 | + |
| 59 | +for x in en bg br ca cs da de eo es fr he hu id it ja nl nn pl pt ro ru sk sl sr sv wa zh ; |
| 60 | +do |
| 61 | + |
| 62 | + echo2 "" |
| 63 | + echo2 "Language code $x" |
| 64 | + |
| 65 | + # Get timestamp last reports for language x |
| 66 | + x_upper=$( echo "$x" | tr '[:lower:]' '[:upper:]' ) |
| 67 | + file="/a/out/out_$1/$x_upper/#index.html" |
| 68 | + now=`date +%s` |
| 69 | + prevrun=`stat -c %Y $file` |
| 70 | + let secs_out="$now - $prevrun" |
| 71 | + let days_out="$secs_out/86400" |
| 72 | + echo2 "File $file generated $days_out days ago" |
| 73 | + |
| 74 | + # Get timestamp for most recent csv files |
| 75 | + file="/a/wikistats/csv_$1/StatisticsLog.csv" |
| 76 | + now=`date +%s` |
| 77 | + prevrun=`stat -c %Y $file` |
| 78 | + let secs_csv="$now - $prevrun" |
| 79 | + let days_csv="$secs_csv/86400" |
| 80 | + echo2 "File $file generated $days_csv days ago" |
| 81 | + |
| 82 | + # Set source and destination paths for publishing reports |
| 83 | + out=/a/out/out_$1/$x_upper/ |
| 84 | + htdocs=/mnt/htdocs/$dir/$x_upper |
| 85 | + |
| 86 | + # Check if reports need to be run now for language x |
| 87 | + run_report=0 |
| 88 | + if [ $force_run_report -ne 0 ] ; then |
| 89 | + run_report=1 |
| 90 | + do_zip=1 |
| 91 | + else |
| 92 | + if [ "$secs_csv" -eq "$secs_out" ] ; then |
| 93 | + if [ "$force_run_report" -eq 0 ] ; then |
| 94 | + echo2 "Forced run of reports" |
| 95 | + else |
| 96 | + echo2 "Csv files are newer than reports ... " |
| 97 | + fi |
| 98 | + |
| 99 | + if [ "$x" == "en" ] ; then |
| 100 | + do_zip=1 |
| 101 | + run_report=1 |
| 102 | + else |
| 103 | + if [ $days_out -gt $interval ] ; then |
| 104 | + run_report=1 |
| 105 | + else |
| 106 | + if [ "$force_run_report" -ne 0 ] ; then |
| 107 | + echo2 "Skip reporting for non-English languages, only update these once every $interval days" |
| 108 | + fi |
| 109 | + fi |
| 110 | + fi |
| 111 | + else |
| 112 | + echo2 "Reports for language code '$x' are up to date -> skip reporting" |
| 113 | + fi |
| 114 | + fi |
| 115 | + |
| 116 | + # If reporting needed now, do it now |
| 117 | + if [ $run_report -eq 1 ] ; then |
| 118 | + echo2 "Run reporting for language $x" |
| 119 | + perl WikiReports.pl -m $1 -l $x -i /a/wikistats/csv_$1/ -o /a/out/out_$1 |
| 120 | + |
| 121 | + echo2 "" |
| 122 | + echo2 "Copy new and updated files from $out -> $htdocs" |
| 123 | + rsync -a $out/* $htdocs/ >> log_report_sh.txt |
| 124 | + echo2 "List files from target folder older than a day" |
| 125 | + rsync -a $out/* $htdocs/ >> log_report_sh.txt |
| 126 | + find $htdocs/ -mtime +1 | xargs ls -l # rather than 'ls -l $htdocs' |
| 127 | + fi |
| 128 | + |
21 | 129 | done; |
22 | 130 | |
| 131 | +# Generate category overviews (deactivated, reports became too large) |
23 | 132 | # perl WikiReports.pl -c -m $1 -l en -i /a/wikistats/csv_$1/ -o /a/out/out_$1 |
24 | 133 | |
25 | | -./zip_out.sh $1 |
| 134 | +echo2 "" |
26 | 135 | |
27 | | -echo "Ready" >> report.txt |
28 | | -date >> report.txt |
| 136 | +# Archive English reports |
| 137 | +if [ $do_zip -eq 1 ] ; then |
| 138 | + echo2 "Archive new English reports" |
| 139 | + ./zip_out.sh $1 |
| 140 | +else |
| 141 | + echo2 "No English reports built. Skip zip phase" |
| 142 | +fi |
| 143 | + |
| 144 | +echo2 "" |
| 145 | +echo2 "Ready" |