r113066 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r113065‎ | r113066 | r113067 >
Date:19:25, 5 March 2012
Author:rfaulk
Status:deferred
Tags:
Comment:
modularized the main visualization implementation in order to more easily modify parameters
Modified paths:
  • /trunk/tools/wsor/message_templates/R/template_analysis.R (modified) (history)
  • /trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R (modified) (history)

Diff [purge]

Index: trunk/tools/wsor/message_templates/R/template_analysis.R
@@ -47,7 +47,7 @@
4848 # GLOBALS assumed to exist: warn_test, warn_control, blocks_test, blocks_control, edits_test, edits_control
4949 #
5050
51 -process.data.frames <- function(min_edits_before=0, min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf) {
 51+process.data.frames <- function(min_edits_before=0, min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf, min_revisions_after = 0, registered=TRUE) {
5252
5353 # MERGE THE METRICS AND ADD TEMPLATE COLS
5454
@@ -69,16 +69,18 @@
7070
7171 maximum_warns_before <- 0
7272
73 - IP_regex <- "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
74 - IP_regex_not <- '.*[a-zA-z].*'
 73+ if (!registered)
 74+ IP_regex <- "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
 75+ else
 76+ IP_regex <- '.*[a-zA-z].*'
7577
7678 condition_1 <- TRUE # merged_test$blocks_before > 0
7779 condition_2 <- merged_test$blocks_after == 0
7880 condition_3 <- merged_test$ns_0_revisions_before >= min_edits_before & merged_test$ns_0_revisions_before <= max_edits_before
7981 condition_4 <- merged_test$ns_0_revisions_deleted_before >= min_deleted_edits_before & merged_test$ns_0_revisions_deleted_before <= max_deleted_edits_before
8082 condition_5 <- merged_test$warns_before <= maximum_warns_before
81 - condition_6 <- filter.list.by.regex(IP_regex_not, merged_test$recipient_name)
82 - condition_7 <- merged_test$ns_0_revisions_after_0_3 > 0
 83+ condition_6 <- filter.list.by.regex(IP_regex, merged_test$recipient_name)
 84+ condition_7 <- merged_test$ns_0_revisions_after_0_3 >= min_revisions_after
8385
8486 indices <- condition_1 & condition_2 & condition_3 & condition_4 & condition_5 & condition_6 & condition_7
8587 merged_test <<- merged_test[indices,]
@@ -88,8 +90,8 @@
8991 condition_3 <- merged_control$ns_0_revisions_before >= min_edits_before & merged_control$ns_0_revisions_before <= max_edits_before
9092 condition_4 <- merged_control$ns_0_revisions_deleted_before >= min_deleted_edits_before & merged_control$ns_0_revisions_deleted_before <= max_deleted_edits_before
9193 condition_5 <- merged_control$warns_before <= maximum_warns_before
92 - condition_6 <- filter.list.by.regex(IP_regex_not, merged_control$recipient_name)
93 - condition_7 <- merged_control$ns_0_revisions_after_0_3 > 0
 94+ condition_6 <- filter.list.by.regex(IP_regex, merged_control$recipient_name)
 95+ condition_7 <- merged_control$ns_0_revisions_after_0_3 >= min_revisions_after
9496
9597 indices <- condition_1 & condition_2 & condition_3 & condition_4 & condition_5 & condition_6 & condition_7
9698 merged_control <<- merged_control[indices,]
@@ -149,13 +151,13 @@
150152 # A pseudo main method to allow the script to be executed as a batch
151153 #
152154
153 -execute.main <- function(test_samples, control_samples) {
 155+execute.main <- function() {
154156
155157 # IMPORT DATA
156158
157 - template_indices_control <- c(78,81) # c(84, 0) # c(107,109,111,113,115) # c(1,4) # c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76)
158 - template_indices_test <- c(79,82) # c(86, 0) # c(108,110,114,116) # c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77)
159 - fname_first_part <- paste(home_dir,"output/metrics_1109_1209_z",sep="") # paste(home_dir,"output/metrics_1108_1202_z",sep="") # paste(home_dir,"output/metrics_1122_1222_z",sep="") # paste(home_dir,"output/metrics_pt_z",sep="") # paste(home_dir,"output/metrics_1018_1119_z",sep="") # "/home/rfaulk/WSOR/message_templates/output/metrics_pt_z"
 159+ template_indices_control <- c(60,62,66,76) # c(107,109,111,113,115) # c(78,81) # c(84, 0) # c(1,4) # c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76)
 160+ template_indices_test <- c(61,63,67,77) # c(108,110,114,116) # c(79,82) # c(86, 0) # c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77)
 161+ fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="") # paste(home_dir,"output/metrics_1122_1222_z",sep="") # paste(home_dir,"output/metrics_1109_1209_z",sep="") # paste(home_dir,"output/metrics_1108_1202_z",sep="") # paste(home_dir,"output/metrics_pt_z",sep="") # paste(home_dir,"output/metrics_1018_1119_z",sep="")
160162
161163 # import.experimental.metrics.data(template_indices_test, template_indices_control, fname_first_part)
162164
@@ -165,7 +167,8 @@
166168
167169 # print("")
168170 # print("Processing data frames.")
169 - process.data.frames(1,0,Inf,Inf)
 171+ registered = TRUE
 172+ process.data.frames(3,0,Inf,Inf,registered)
170173
171174
172175
@@ -177,7 +180,7 @@
178181
179182 # LOGISTIC REGRESSION MODELLING:
180183
181 - all_data <- append.data.frames(merged_test, merged_control)
 184+ all_data <<- append.data.frames(merged_test, merged_control)
182185 # summary(glm(template ~ edits_decrease, data=all_data, family=binomial(link="logit")))
183186 # summary(glm(template ~ edits_del_decrease, data=all_data, family=binomial(link="logit")))
184187
Index: trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
@@ -13,6 +13,8 @@
1414 #
1515 # Basic plotting for te st vs. control
1616 #
 17+# e.g. call :: plot.control.vs.test("Huggle Short 2 Experiment (reduced) - Decrease in Editor Activity", "Minimum Edits before Template Posting", "Mean % Decrease in Edit Activity", edit_decrease_means_test, edit_decrease_means_control)
 18+#
1719
1820 plot.control.vs.test <- function(title, x_label, y_label, test_samples, control_samples) {
1921
@@ -62,44 +64,68 @@
6365
6466 # IMPORT DATA
6567
66 -# c(84, 0) c(107,109,111,113,115) c(78,81) c(1,4) c(84,99,101,103,105)
67 -template_indices_control <- c(60,62,64,66,68,70,72,74,76)
68 -
69 -# c(85, 0) c(108,110,114,116) c(79,82) c(2,3) c(85,86,100,102,104,106)
70 -template_indices_test <- c(61,63,65,67,69,71,73,75,77)
71 -
72 -# paste(home_dir,"output/metrics_1108_1202_z",sep="") paste(home_dir,"output/metrics_1122_1222_z",sep="") paste(home_dir,"output/metrics_1109_1209_z",sep="") paste(home_dir,"output/metrics_pt_z",sep="") "/home/rfaulk/WSOR/message_templates/output/metrics_pt_z"
73 -fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="")
74 -
75 -import.experimental.metrics.data(template_indices_test, template_indices_control, fname_first_part)
76 -
77 -
78 -
79 -# PROCESS DATA
80 -
81 -edit_count_before_filter <- 1:10
82 -
83 -edit_decrease_means_test <- c()
84 -edit_decrease_means_control <- c()
85 -
86 -for (i in edit_count_before_filter)
 68+line.plot.results <- function(edit_count_min_lower = 1, edit_count_min_upper = 10, import_metrics = FALSE, save_plot = TRUE, registered = TRUE, error_bars = FALSE)
8769 {
88 - process.data.frames(i,0,Inf,Inf)
 70+ # c(78,81) c(1,4) c(60,62,64,66,68,70,72,74,76) c(60,62,66,76) c(107,109,111,113,115) c(84,99,101,103,105)
 71+ template_indices_control <- c(84, 0)
8972
90 - edit_decrease_means_test <- c(edit_decrease_means_test, mean(merged_test$edits_decrease))
91 - edit_decrease_means_control <- c(edit_decrease_means_control, mean(merged_control$edits_decrease))
 73+ # c(79,82) c(2,3) c(61,63,65,67,69,71,73,75,77) c(61,63,67,77) c(108,110,114,116) c(85,86,100,102,104,106)
 74+ template_indices_test <- c(85, 0)
 75+
 76+ # paste(home_dir,"output/metrics_1109_1209_z",sep="") paste(home_dir,"output/metrics_pt_z",sep="") paste(home_dir,"output/metrics_1018_1119_z",sep="") paste(home_dir,"output/metrics_1122_1222_z",sep="")
 77+ fname_first_part <- paste(home_dir,"output/metrics_1108_1202_z",sep="")
 78+
 79+ if (import_metrics)
 80+ import.experimental.metrics.data(template_indices_test, template_indices_control, fname_first_part)
 81+
 82+
 83+
 84+ # PROCESS DATA
 85+
 86+ edit_count_before_filter <- edit_count_min_lower : edit_count_min_upper
 87+
 88+ data_counts_test <<- c()
 89+ data_counts_control <<- c()
 90+
 91+ edit_decrease_means_test <<- c()
 92+ edit_decrease_means_control <<- c()
 93+
 94+ edit_decrease_sd_test <<- c()
 95+ edit_decrease_sd_control <<- c()
 96+
 97+
 98+ if (registered)
 99+ reg_str = 'registered'
 100+ else
 101+ reg_str = 'non_registered'
 102+
 103+ for (i in edit_count_before_filter)
 104+ {
 105+ process.data.frames(i,0,Inf,Inf,registered=registered,min_revisions_after=0)
 106+
 107+ edit_decrease_means_test <<- c(edit_decrease_means_test, mean(merged_test$edits_decrease) * 100)
 108+ edit_decrease_means_control <<- c(edit_decrease_means_control, mean(merged_control$edits_decrease) * 100)
 109+
 110+ edit_decrease_sd_test <<- c(edit_decrease_sd_test, sd(merged_test$edits_decrease * 100))
 111+ edit_decrease_sd_control <<- c(edit_decrease_sd_control, sd(merged_control$edits_decrease * 100))
 112+
 113+ data_counts_test <<- c(data_counts_test, length(merged_test$edits_decrease))
 114+ data_counts_control <<- c(data_counts_control, length(merged_control$edits_decrease))
 115+ }
 116+
 117+ # PLOT DATA
 118+
 119+ plot_title = paste("Huggle Short 1 & 2 Experiment (", reg_str, ") - Decrease in Editor Activity", sep="")
 120+
 121+ df <- data.frame(x=1:length(edit_decrease_means_test), y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control, y_test_sd=edit_decrease_sd_test, y_ctrl_sd=edit_decrease_sd_control)
 122+ p <- ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) + geom_line(aes(y=y_ctrl,colour="Control"))
 123+
 124+ if (error_bars)
 125+ p <- p + geom_errorbar(aes(ymin = y_test - y_test_sd, ymax = y_test + y_test_sd, colour="Test"), width=0.2) + geom_errorbar(aes(ymin = y_ctrl - y_ctrl_sd, ymax = y_ctrl + y_ctrl_sd, colour="Control"), width=0.2)
 126+
 127+ p <- p + scale_x_continuous('Minimum Edits before Template Posting') + scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title = plot_title, legend.title = theme_blank())
 128+
 129+ if (save_plot)
 130+ ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/huggle_short_1_2_',reg_str,'.png',sep=""),width=8)
92131 }
93132
94 -# PLOT DATA
95 -
96 -# plot.control.vs.test("Huggle Short 2 Experiment - Decrease in Editor Activity", "Minimum Edits before Template Posting", "Mean % Decrease in Edit Activity", edit_decrease_means_test, edit_decrease_means_control)
97 -
98 -# ggplot
99 -
100 -plot_title = "Huggle 3 - % decrease of Edit Actitivity after Posting"
101 -
102 -df <- data.frame(x=1:length(edit_decrease_means_test), y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control)
103 -p = ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) + geom_line(aes(y=y_ctrl,colour="Control"))
104 -p + scale_x_continuous('Minimum Edits before Template Posting') + scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title = plot_title, legend.title = theme_blank())
105 -ggsave('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/huggle_3.png',width=8)
106 -