Index: trunk/tools/wsor/message_templates/R/template_analysis.R |
— | — | @@ -47,7 +47,7 @@ |
48 | 48 | # GLOBALS assumed to exist: warn_test, warn_control, blocks_test, blocks_control, edits_test, edits_control |
49 | 49 | # |
50 | 50 | |
51 | | -process.data.frames <- function(min_edits_before=0, min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf) { |
| 51 | +process.data.frames <- function(min_edits_before=0, min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf, min_revisions_after = 0, registered=TRUE) { |
52 | 52 | |
53 | 53 | # MERGE THE METRICS AND ADD TEMPLATE COLS |
54 | 54 | |
— | — | @@ -69,16 +69,18 @@ |
70 | 70 | |
71 | 71 | maximum_warns_before <- 0 |
72 | 72 | |
73 | | - IP_regex <- "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$" |
74 | | - IP_regex_not <- '.*[a-zA-z].*' |
| 73 | + if (!registered) |
| 74 | + IP_regex <- "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$" |
| 75 | + else |
| 76 | + IP_regex <- '.*[a-zA-z].*' |
75 | 77 | |
76 | 78 | condition_1 <- TRUE # merged_test$blocks_before > 0 |
77 | 79 | condition_2 <- merged_test$blocks_after == 0 |
78 | 80 | condition_3 <- merged_test$ns_0_revisions_before >= min_edits_before & merged_test$ns_0_revisions_before <= max_edits_before |
79 | 81 | condition_4 <- merged_test$ns_0_revisions_deleted_before >= min_deleted_edits_before & merged_test$ns_0_revisions_deleted_before <= max_deleted_edits_before |
80 | 82 | condition_5 <- merged_test$warns_before <= maximum_warns_before |
81 | | - condition_6 <- filter.list.by.regex(IP_regex_not, merged_test$recipient_name) |
82 | | - condition_7 <- merged_test$ns_0_revisions_after_0_3 > 0 |
| 83 | + condition_6 <- filter.list.by.regex(IP_regex, merged_test$recipient_name) |
| 84 | + condition_7 <- merged_test$ns_0_revisions_after_0_3 >= min_revisions_after |
83 | 85 | |
84 | 86 | indices <- condition_1 & condition_2 & condition_3 & condition_4 & condition_5 & condition_6 & condition_7 |
85 | 87 | merged_test <<- merged_test[indices,] |
— | — | @@ -88,8 +90,8 @@ |
89 | 91 | condition_3 <- merged_control$ns_0_revisions_before >= min_edits_before & merged_control$ns_0_revisions_before <= max_edits_before |
90 | 92 | condition_4 <- merged_control$ns_0_revisions_deleted_before >= min_deleted_edits_before & merged_control$ns_0_revisions_deleted_before <= max_deleted_edits_before |
91 | 93 | condition_5 <- merged_control$warns_before <= maximum_warns_before |
92 | | - condition_6 <- filter.list.by.regex(IP_regex_not, merged_control$recipient_name) |
93 | | - condition_7 <- merged_control$ns_0_revisions_after_0_3 > 0 |
| 94 | + condition_6 <- filter.list.by.regex(IP_regex, merged_control$recipient_name) |
| 95 | + condition_7 <- merged_control$ns_0_revisions_after_0_3 >= min_revisions_after |
94 | 96 | |
95 | 97 | indices <- condition_1 & condition_2 & condition_3 & condition_4 & condition_5 & condition_6 & condition_7 |
96 | 98 | merged_control <<- merged_control[indices,] |
— | — | @@ -149,13 +151,13 @@ |
150 | 152 | # A pseudo main method to allow the script to be executed as a batch |
151 | 153 | # |
152 | 154 | |
153 | | -execute.main <- function(test_samples, control_samples) { |
| 155 | +execute.main <- function() { |
154 | 156 | |
155 | 157 | # IMPORT DATA |
156 | 158 | |
157 | | - template_indices_control <- c(78,81) # c(84, 0) # c(107,109,111,113,115) # c(1,4) # c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76) |
158 | | - template_indices_test <- c(79,82) # c(86, 0) # c(108,110,114,116) # c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) |
159 | | - fname_first_part <- paste(home_dir,"output/metrics_1109_1209_z",sep="") # paste(home_dir,"output/metrics_1108_1202_z",sep="") # paste(home_dir,"output/metrics_1122_1222_z",sep="") # paste(home_dir,"output/metrics_pt_z",sep="") # paste(home_dir,"output/metrics_1018_1119_z",sep="") # "/home/rfaulk/WSOR/message_templates/output/metrics_pt_z" |
| 159 | + template_indices_control <- c(60,62,66,76) # c(107,109,111,113,115) # c(78,81) # c(84, 0) # c(1,4) # c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76) |
| 160 | + template_indices_test <- c(61,63,67,77) # c(108,110,114,116) # c(79,82) # c(86, 0) # c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) |
| 161 | + fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="") # paste(home_dir,"output/metrics_1122_1222_z",sep="") # paste(home_dir,"output/metrics_1109_1209_z",sep="") # paste(home_dir,"output/metrics_1108_1202_z",sep="") # paste(home_dir,"output/metrics_pt_z",sep="") # paste(home_dir,"output/metrics_1018_1119_z",sep="") |
160 | 162 | |
161 | 163 | # import.experimental.metrics.data(template_indices_test, template_indices_control, fname_first_part) |
162 | 164 | |
— | — | @@ -165,7 +167,8 @@ |
166 | 168 | |
167 | 169 | # print("") |
168 | 170 | # print("Processing data frames.") |
169 | | - process.data.frames(1,0,Inf,Inf) |
| 171 | + registered = TRUE |
| 172 | + process.data.frames(3,0,Inf,Inf,registered) |
170 | 173 | |
171 | 174 | |
172 | 175 | |
— | — | @@ -177,7 +180,7 @@ |
178 | 181 | |
179 | 182 | # LOGISTIC REGRESSION MODELLING: |
180 | 183 | |
181 | | - all_data <- append.data.frames(merged_test, merged_control) |
| 184 | + all_data <<- append.data.frames(merged_test, merged_control) |
182 | 185 | # summary(glm(template ~ edits_decrease, data=all_data, family=binomial(link="logit"))) |
183 | 186 | # summary(glm(template ~ edits_del_decrease, data=all_data, family=binomial(link="logit"))) |
184 | 187 | |
Index: trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R |
— | — | @@ -13,6 +13,8 @@ |
14 | 14 | # |
15 | 15 | # Basic plotting for te st vs. control |
16 | 16 | # |
| 17 | +# e.g. call :: plot.control.vs.test("Huggle Short 2 Experiment (reduced) - Decrease in Editor Activity", "Minimum Edits before Template Posting", "Mean % Decrease in Edit Activity", edit_decrease_means_test, edit_decrease_means_control) |
| 18 | +# |
17 | 19 | |
18 | 20 | plot.control.vs.test <- function(title, x_label, y_label, test_samples, control_samples) { |
19 | 21 | |
— | — | @@ -62,44 +64,68 @@ |
63 | 65 | |
64 | 66 | # IMPORT DATA |
65 | 67 | |
66 | | -# c(84, 0) c(107,109,111,113,115) c(78,81) c(1,4) c(84,99,101,103,105) |
67 | | -template_indices_control <- c(60,62,64,66,68,70,72,74,76) |
68 | | - |
69 | | -# c(85, 0) c(108,110,114,116) c(79,82) c(2,3) c(85,86,100,102,104,106) |
70 | | -template_indices_test <- c(61,63,65,67,69,71,73,75,77) |
71 | | - |
72 | | -# paste(home_dir,"output/metrics_1108_1202_z",sep="") paste(home_dir,"output/metrics_1122_1222_z",sep="") paste(home_dir,"output/metrics_1109_1209_z",sep="") paste(home_dir,"output/metrics_pt_z",sep="") "/home/rfaulk/WSOR/message_templates/output/metrics_pt_z" |
73 | | -fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="") |
74 | | - |
75 | | -import.experimental.metrics.data(template_indices_test, template_indices_control, fname_first_part) |
76 | | - |
77 | | - |
78 | | - |
79 | | -# PROCESS DATA |
80 | | - |
81 | | -edit_count_before_filter <- 1:10 |
82 | | - |
83 | | -edit_decrease_means_test <- c() |
84 | | -edit_decrease_means_control <- c() |
85 | | - |
86 | | -for (i in edit_count_before_filter) |
| 68 | +line.plot.results <- function(edit_count_min_lower = 1, edit_count_min_upper = 10, import_metrics = FALSE, save_plot = TRUE, registered = TRUE, error_bars = FALSE) |
87 | 69 | { |
88 | | - process.data.frames(i,0,Inf,Inf) |
| 70 | + # c(78,81) c(1,4) c(60,62,64,66,68,70,72,74,76) c(60,62,66,76) c(107,109,111,113,115) c(84,99,101,103,105) |
| 71 | + template_indices_control <- c(84, 0) |
89 | 72 | |
90 | | - edit_decrease_means_test <- c(edit_decrease_means_test, mean(merged_test$edits_decrease)) |
91 | | - edit_decrease_means_control <- c(edit_decrease_means_control, mean(merged_control$edits_decrease)) |
| 73 | + # c(79,82) c(2,3) c(61,63,65,67,69,71,73,75,77) c(61,63,67,77) c(108,110,114,116) c(85,86,100,102,104,106) |
| 74 | + template_indices_test <- c(85, 0) |
| 75 | + |
| 76 | + # paste(home_dir,"output/metrics_1109_1209_z",sep="") paste(home_dir,"output/metrics_pt_z",sep="") paste(home_dir,"output/metrics_1018_1119_z",sep="") paste(home_dir,"output/metrics_1122_1222_z",sep="") |
| 77 | + fname_first_part <- paste(home_dir,"output/metrics_1108_1202_z",sep="") |
| 78 | + |
| 79 | + if (import_metrics) |
| 80 | + import.experimental.metrics.data(template_indices_test, template_indices_control, fname_first_part) |
| 81 | + |
| 82 | + |
| 83 | + |
| 84 | + # PROCESS DATA |
| 85 | + |
| 86 | + edit_count_before_filter <- edit_count_min_lower : edit_count_min_upper |
| 87 | + |
| 88 | + data_counts_test <<- c() |
| 89 | + data_counts_control <<- c() |
| 90 | + |
| 91 | + edit_decrease_means_test <<- c() |
| 92 | + edit_decrease_means_control <<- c() |
| 93 | + |
| 94 | + edit_decrease_sd_test <<- c() |
| 95 | + edit_decrease_sd_control <<- c() |
| 96 | + |
| 97 | + |
| 98 | + if (registered) |
| 99 | + reg_str = 'registered' |
| 100 | + else |
| 101 | + reg_str = 'non_registered' |
| 102 | + |
| 103 | + for (i in edit_count_before_filter) |
| 104 | + { |
| 105 | + process.data.frames(i,0,Inf,Inf,registered=registered,min_revisions_after=0) |
| 106 | + |
| 107 | + edit_decrease_means_test <<- c(edit_decrease_means_test, mean(merged_test$edits_decrease) * 100) |
| 108 | + edit_decrease_means_control <<- c(edit_decrease_means_control, mean(merged_control$edits_decrease) * 100) |
| 109 | + |
| 110 | + edit_decrease_sd_test <<- c(edit_decrease_sd_test, sd(merged_test$edits_decrease * 100)) |
| 111 | + edit_decrease_sd_control <<- c(edit_decrease_sd_control, sd(merged_control$edits_decrease * 100)) |
| 112 | + |
| 113 | + data_counts_test <<- c(data_counts_test, length(merged_test$edits_decrease)) |
| 114 | + data_counts_control <<- c(data_counts_control, length(merged_control$edits_decrease)) |
| 115 | + } |
| 116 | + |
| 117 | + # PLOT DATA |
| 118 | + |
| 119 | + plot_title = paste("Huggle Short 1 & 2 Experiment (", reg_str, ") - Decrease in Editor Activity", sep="") |
| 120 | + |
| 121 | + df <- data.frame(x=1:length(edit_decrease_means_test), y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control, y_test_sd=edit_decrease_sd_test, y_ctrl_sd=edit_decrease_sd_control) |
| 122 | + p <- ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) + geom_line(aes(y=y_ctrl,colour="Control")) |
| 123 | + |
| 124 | + if (error_bars) |
| 125 | + p <- p + geom_errorbar(aes(ymin = y_test - y_test_sd, ymax = y_test + y_test_sd, colour="Test"), width=0.2) + geom_errorbar(aes(ymin = y_ctrl - y_ctrl_sd, ymax = y_ctrl + y_ctrl_sd, colour="Control"), width=0.2) |
| 126 | + |
| 127 | + p <- p + scale_x_continuous('Minimum Edits before Template Posting') + scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title = plot_title, legend.title = theme_blank()) |
| 128 | + |
| 129 | + if (save_plot) |
| 130 | + ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/huggle_short_1_2_',reg_str,'.png',sep=""),width=8) |
92 | 131 | } |
93 | 132 | |
94 | | -# PLOT DATA |
95 | | - |
96 | | -# plot.control.vs.test("Huggle Short 2 Experiment - Decrease in Editor Activity", "Minimum Edits before Template Posting", "Mean % Decrease in Edit Activity", edit_decrease_means_test, edit_decrease_means_control) |
97 | | - |
98 | | -# ggplot |
99 | | - |
100 | | -plot_title = "Huggle 3 - % decrease of Edit Actitivity after Posting" |
101 | | - |
102 | | -df <- data.frame(x=1:length(edit_decrease_means_test), y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control) |
103 | | -p = ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) + geom_line(aes(y=y_ctrl,colour="Control")) |
104 | | -p + scale_x_continuous('Minimum Edits before Template Posting') + scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title = plot_title, legend.title = theme_blank()) |
105 | | -ggsave('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/huggle_3.png',width=8) |
106 | | - |