r98520 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r98519‎ \| r98520 \| r98521 >
Date:	14:42, 30 September 2011
Author:	ezachte
Status:	deferred
Tags:
Comment:	added comments, removed test code
Modified paths:	/trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl (modified) (history) /trunk/wikistats/analytics/_readme.txt (modified) (history) /trunk/wikistats/analytics/analytics_generate_csv_files.sh (modified) (history) /trunk/wikistats/analytics/analytics_upd.sh (modified) (history)

Diff [purge]

Index: trunk/wikistats/analytics/analytics_generate_csv_files.sh
—	—	@@ -1,21 +1,47 @@
2	2	#!/bin/sh
	3	+
	4	+# Prepare several csv files, ready for importing into analytics database
	5	+# All generated files have _in_ in name signalling these contain data ready for importing into database
	6	+# One input record corresponds to one database record
	7	+
3	8	ulimit -v 8000000
4	9
5	10	clear
6	11	cd /a/analytics
7	12
8		~~-perl AnalyticsPrepBinariesData.pl -i /a/wikistats/ -o /a/analytics/test/~~
9	13
10		~~-# add or replace data from newest comScore csv files (last 14 months) into master files (full history)~~
11		~~-# and generate database input csv file from it~~
	14	+# AnalyticsPrepBinariesData.pl read counts for binaries which were generated by wikistats
	15	+# and which reside in /a/wikistats/csv_[project code]/StatisticsPerBinariesExtension.csv
	16	+# It filters and reorganizes data and produces analytics_in_binaries.csv
	17	+# Output csv contains: project code, language, month, extension name, count
12	18
	19	+perl AnalyticsPrepBinariesData.pl -i /a/wikistats/ -o /a/analytics/
	20	+
	21	+# AnalyticsPrepComscoreData.pl scans /a/analytics/comscore for newest comScore csv files (with data for last 14 months)
	22	+# parses those csv files, adds/replaces data from these csv files into master files (containing full history)
	23	+# and generates input csv file analytics_in_comscore.csv ready for importing into database
	24	+#
	25	+# note : these csv files were manually downloaded from http://mymetrix.comscore.com/app/report.aspx
	26	+# and given more descriptive names, script finds newest files based on partial name search
	27	+#
13	28	# -r replace (default is add only)
14		~~-# -i input folder, contains manually downloaded csv files from comScore (or xls, converted to csv)~~
	29	+# -i input folder, contains manually downloaded csv files from comScore (or xls files manually converted to csv)
15	30	# -m master files with full history
16		~~-# -o output csv file, with reach and UV's per region and UV's per top web property, ready for import into database~~
	31	+# -o output csv file, with reach per region, UV's per region and UV's per top web property, ready for import into database
	32	+
17	33	perl AnalyticsPrepComscoreData.pl -r -i /a/analytics/comscore -m /a/analytics -o /a/analytics
18	34
	35	+# AnalyticsPrepWikiCountsOutput.pl reads a plethora of fields from several csv files from wikistats process
	36	+# It filters and reorganizes data and produces analytics_in_wikistats.csv, ready for import into analytics database
	37	+
19	38	perl AnalyticsPrepWikiCountsOutput.pl -i /a/wikistats/ -o /a/analytics
20	39
	40	+# analytics_in_page_views.csv is written daily as part of WikiCountsSummarizeProjectCounts.pl
	41	+# part of (/home/ezachte/pageviews_monthly.sh job)
	42	+# which processes hourly projectcounts files (per wiki page view totals for one hour) from http://dammit.lt/wikistats
	43	+# and generates several files on different aggregation levels
	44	+# only action here is to copy data to this folder to have everything in one place
	45	+# note: unlike folder name suggests this file contains stats for all projects
	46	+
21	47	cp /a/wikistats/csv_wp/analytics_in_page_views.csv .
22	48
Index: trunk/wikistats/analytics/analytics_upd.sh
—	—	@@ -3,4 +3,4 @@
4	4
5	5	./analytics_generate_csv_files.sh
6	6
7		~~-mysql -u analytics -h project2.wikimedia.org -preport < analytics_refresh_from_csv.txt~~
\ No newline at end of file
	7	+mysql -u analytics -h project2.wikimedia.org -preport < analytics_refresh_from_csv.txt
Index: trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl
—	—	@@ -47,6 +47,7 @@
48	48
49	49	sub ParseArguments
50	50	{
	51	+ print "ParseArguments\n" ;
51	52	my (@options, $arguments) ;
52	53
53	54	getopt ("io", \%options) ;
—	—	@@ -80,6 +81,7 @@
81	82
82	83	sub ReadStatisticsMonthly
83	84	{
	85	+ print "ReadStatisticsMonthly\n" ;
84	86	&ReadStatisticsMonthlyForProject ("wb") ;
85	87	&ReadStatisticsMonthlyForProject ("wk") ;
86	88	&ReadStatisticsMonthlyForProject ("wn") ;
—	—	@@ -252,6 +254,7 @@
253	255
254	256	sub FindLargestWikis
255	257	{
	258	+ print "FindLargestWikis\n" ;
256	259	print "Largest projects (most accumulated very active editors):\n";
257	260	@total_edits_ge_100 = sort {$total_edits_ge_100 {$b} <=> $total_edits_ge_100 {$a}} keys %total_edits_ge_100 ;
258	261	$rank = 0 ;
—	—	@@ -269,18 +272,18 @@
270	273	foreach $project_language (keys %largest_projects)
271	274	{
272	275	($project,$language) = split (',', $project_language) ;
273		~~- if ($data2 {"$project,$language,$yyyymm"} eq '')~~
	276	+ if ($data2 {"$project,$language,$yyyymm"} eq '')
274	277	{
275	278	print "No data yet for large wiki $project_language for $yyyymm-> skip month $yyyymm\n" ;
276	279	$months {$yyyymm} = 0 ;
277	280	}
278	281	}
279	282	}
280		~~- exit ;~~
281	283	}
282	284
283	285	sub WriteMonthlyData
284	286	{
	287	+ print "WriteMonthlyData\n" ;
285	288	my $file_csv_out = "$path_out/$file_csv_analytics_in" ;
286	289	open CSV_OUT, '>', $file_csv_out ;
287	290	foreach $project_wiki_month (sort keys %data1)
Index: trunk/wikistats/analytics/_readme.txt
—	—	@@ -6,7 +6,7 @@
7	7
8	8	== analytics_upd.sh ==
9	9	Prepares new csv files (delegated to analytics_generate_csv_files.sh),
10		~~-and empties and reloads all tables for which csv files are in this folder.~~
	10	+and empties/reloads all tables for which csv files are in this folder.
11	11	It executes SQL from analytics_refresh_from_csv.txt
12	12
13	13	== CSV files ==

Status & tagging log

01:38, 22 October 2011 Reedy (talk | contribs) changed the status of r98520 [removed: new added: deferred]