r98520 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r98519‎ | r98520 | r98521 >
Date:14:42, 30 September 2011
Author:ezachte
Status:deferred
Tags:
Comment:
added comments, removed test code
Modified paths:
  • /trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl (modified) (history)
  • /trunk/wikistats/analytics/_readme.txt (modified) (history)
  • /trunk/wikistats/analytics/analytics_generate_csv_files.sh (modified) (history)
  • /trunk/wikistats/analytics/analytics_upd.sh (modified) (history)

Diff [purge]

Index: trunk/wikistats/analytics/analytics_generate_csv_files.sh
@@ -1,21 +1,47 @@
22 #!/bin/sh
 3+
 4+# Prepare several csv files, ready for importing into analytics database
 5+# All generated files have _in_ in name signalling these contain data ready for importing into database
 6+# One input record corresponds to one database record
 7+
38 ulimit -v 8000000
49
510 clear
611 cd /a/analytics
712
8 -perl AnalyticsPrepBinariesData.pl -i /a/wikistats/ -o /a/analytics/test/
913
10 -# add or replace data from newest comScore csv files (last 14 months) into master files (full history)
11 -# and generate database input csv file from it
 14+# AnalyticsPrepBinariesData.pl read counts for binaries which were generated by wikistats
 15+# and which reside in /a/wikistats/csv_[project code]/StatisticsPerBinariesExtension.csv
 16+# It filters and reorganizes data and produces analytics_in_binaries.csv
 17+# Output csv contains: project code, language, month, extension name, count
1218
 19+perl AnalyticsPrepBinariesData.pl -i /a/wikistats/ -o /a/analytics/
 20+
 21+# AnalyticsPrepComscoreData.pl scans /a/analytics/comscore for newest comScore csv files (with data for last 14 months)
 22+# parses those csv files, adds/replaces data from these csv files into master files (containing full history)
 23+# and generates input csv file analytics_in_comscore.csv ready for importing into database
 24+#
 25+# note : these csv files were manually downloaded from http://mymetrix.comscore.com/app/report.aspx
 26+# and given more descriptive names, script finds newest files based on partial name search
 27+#
1328 # -r replace (default is add only)
14 -# -i input folder, contains manually downloaded csv files from comScore (or xls, converted to csv)
 29+# -i input folder, contains manually downloaded csv files from comScore (or xls files manually converted to csv)
1530 # -m master files with full history
16 -# -o output csv file, with reach and UV's per region and UV's per top web property, ready for import into database
 31+# -o output csv file, with reach per region, UV's per region and UV's per top web property, ready for import into database
 32+
1733 perl AnalyticsPrepComscoreData.pl -r -i /a/analytics/comscore -m /a/analytics -o /a/analytics
1834
 35+# AnalyticsPrepWikiCountsOutput.pl reads a plethora of fields from several csv files from wikistats process
 36+# It filters and reorganizes data and produces analytics_in_wikistats.csv, ready for import into analytics database
 37+
1938 perl AnalyticsPrepWikiCountsOutput.pl -i /a/wikistats/ -o /a/analytics
2039
 40+# analytics_in_page_views.csv is written daily as part of WikiCountsSummarizeProjectCounts.pl
 41+# part of (/home/ezachte/pageviews_monthly.sh job)
 42+# which processes hourly projectcounts files (per wiki page view totals for one hour) from http://dammit.lt/wikistats
 43+# and generates several files on different aggregation levels
 44+# only action here is to copy data to this folder to have everything in one place
 45+# note: unlike folder name suggests this file contains stats for all projects
 46+
2147 cp /a/wikistats/csv_wp/analytics_in_page_views.csv .
2248
Index: trunk/wikistats/analytics/analytics_upd.sh
@@ -3,4 +3,4 @@
44
55 ./analytics_generate_csv_files.sh
66
7 -mysql -u analytics -h project2.wikimedia.org -preport < analytics_refresh_from_csv.txt
\ No newline at end of file
 7+mysql -u analytics -h project2.wikimedia.org -preport < analytics_refresh_from_csv.txt
Index: trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl
@@ -47,6 +47,7 @@
4848
4949 sub ParseArguments
5050 {
 51+ print "ParseArguments\n" ;
5152 my (@options, $arguments) ;
5253
5354 getopt ("io", \%options) ;
@@ -80,6 +81,7 @@
8182
8283 sub ReadStatisticsMonthly
8384 {
 85+ print "ReadStatisticsMonthly\n" ;
8486 &ReadStatisticsMonthlyForProject ("wb") ;
8587 &ReadStatisticsMonthlyForProject ("wk") ;
8688 &ReadStatisticsMonthlyForProject ("wn") ;
@@ -252,6 +254,7 @@
253255
254256 sub FindLargestWikis
255257 {
 258+ print "FindLargestWikis\n" ;
256259 print "Largest projects (most accumulated very active editors):\n";
257260 @total_edits_ge_100 = sort {$total_edits_ge_100 {$b} <=> $total_edits_ge_100 {$a}} keys %total_edits_ge_100 ;
258261 $rank = 0 ;
@@ -269,18 +272,18 @@
270273 foreach $project_language (keys %largest_projects)
271274 {
272275 ($project,$language) = split (',', $project_language) ;
273 - if ($data2 {"$project,$language,$yyyymm"} eq '')
 276+ if ($data2 {"$project,$language,$yyyymm"} eq '')
274277 {
275278 print "No data yet for large wiki $project_language for $yyyymm-> skip month $yyyymm\n" ;
276279 $months {$yyyymm} = 0 ;
277280 }
278281 }
279282 }
280 - exit ;
281283 }
282284
283285 sub WriteMonthlyData
284286 {
 287+ print "WriteMonthlyData\n" ;
285288 my $file_csv_out = "$path_out/$file_csv_analytics_in" ;
286289 open CSV_OUT, '>', $file_csv_out ;
287290 foreach $project_wiki_month (sort keys %data1)
Index: trunk/wikistats/analytics/_readme.txt
@@ -6,7 +6,7 @@
77
88 == analytics_upd.sh ==
99 Prepares new csv files (delegated to analytics_generate_csv_files.sh),
10 -and empties and reloads all tables for which csv files are in this folder.
 10+and empties/reloads all tables for which csv files are in this folder.
1111 It executes SQL from analytics_refresh_from_csv.txt
1212
1313 == CSV files ==

Status & tagging log