Index: trunk/tools/wsor/message_templates/R/template_analysis.R |
— | — | @@ -7,7 +7,7 @@ |
8 | 8 | |
9 | 9 | # Import helper methods - GLOBAL |
10 | 10 | |
11 | | -home_dir <- "/home/rfaulkner/trunk/projects/WSOR/message_templates/" |
| 11 | +home_dir <<- "/home/rfaulkner/trunk/projects/WSOR/message_templates/" |
12 | 12 | # home_dir <- "/home/rfaulk/trunk/projects/WSOR/message_templates/" |
13 | 13 | |
14 | 14 | helper_import <- paste(home_dir,"R/R_helper_functions.R",sep="") |
— | — | @@ -26,15 +26,15 @@ |
27 | 27 | fname_last_part_edits <- "_editcounts.tsv" |
28 | 28 | fname_last_part_blocks <- "_blocks.tsv" |
29 | 29 | fname_last_part_warn <- "_warnings.tsv" |
30 | | - |
31 | | - warn_test <<- build.data.frames(template_indices_test, fname_first_part, fname_last_part_warn, string_frames=c(1)) |
32 | | - warn_control <<- build.data.frames(template_indices_control, fname_first_part, fname_last_part_warn, string_frames=c(1)) |
| 30 | + |
| 31 | + warn_test <<- build.data.frames(template_indices_test, fname_first_part, fname_last_part_warn, home_dir, string_frames=c(1)) |
| 32 | + warn_control <<- build.data.frames(template_indices_control, fname_first_part, fname_last_part_warn, home_dir, string_frames=c(1)) |
33 | 33 | |
34 | | - blocks_test <<- build.data.frames(template_indices_test, fname_first_part, fname_last_part_blocks, string_frames=c(1)) |
35 | | - blocks_control <<- build.data.frames(template_indices_control, fname_first_part, fname_last_part_blocks, string_frames=c(1)) |
| 34 | + blocks_test <<- build.data.frames(template_indices_test, fname_first_part, fname_last_part_blocks, home_dir, string_frames=c(1)) |
| 35 | + blocks_control <<- build.data.frames(template_indices_control, fname_first_part, fname_last_part_blocks, home_dir, string_frames=c(1)) |
36 | 36 | |
37 | | - edits_test <<- build.data.frames(template_indices_test, fname_first_part, fname_last_part_edits, string_frames=c(1)) |
38 | | - edits_control <<- build.data.frames(template_indices_control, fname_first_part, fname_last_part_edits, string_frames=c(1)) |
| 37 | + edits_test <<- build.data.frames(template_indices_test, fname_first_part, fname_last_part_edits, home_dir, string_frames=c(1)) |
| 38 | + edits_control <<- build.data.frames(template_indices_control, fname_first_part, fname_last_part_edits, home_dir, string_frames=c(1)) |
39 | 39 | |
40 | 40 | } |
41 | 41 | |
— | — | @@ -47,7 +47,7 @@ |
48 | 48 | # GLOBALS assumed to exist: warn_test, warn_control, blocks_test, blocks_control, edits_test, edits_control |
49 | 49 | # |
50 | 50 | |
51 | | -process.data.frames <- function(min_edits_before=0, min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf, min_revisions_after = 0, registered=TRUE) { |
| 51 | +process.data.frames <- function(min_edits_before=0, min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf, min_edits_after = 0, registered=TRUE) { |
52 | 52 | |
53 | 53 | # MERGE THE METRICS AND ADD TEMPLATE COLS |
54 | 54 | |
— | — | @@ -80,7 +80,7 @@ |
81 | 81 | condition_4 <- merged_test$ns_0_revisions_deleted_before >= min_deleted_edits_before & merged_test$ns_0_revisions_deleted_before <= max_deleted_edits_before |
82 | 82 | condition_5 <- merged_test$warns_before <= maximum_warns_before |
83 | 83 | condition_6 <- filter.list.by.regex(IP_regex, merged_test$recipient_name) |
84 | | - condition_7 <- merged_test$ns_0_revisions_after_0_3 >= min_revisions_after |
| 84 | + condition_7 <- merged_test$ns_0_revisions_after_0_3 >= min_edits_after |
85 | 85 | |
86 | 86 | indices <- condition_1 & condition_2 & condition_3 & condition_4 & condition_5 & condition_6 & condition_7 |
87 | 87 | merged_test <<- merged_test[indices,] |
— | — | @@ -91,7 +91,7 @@ |
92 | 92 | condition_4 <- merged_control$ns_0_revisions_deleted_before >= min_deleted_edits_before & merged_control$ns_0_revisions_deleted_before <= max_deleted_edits_before |
93 | 93 | condition_5 <- merged_control$warns_before <= maximum_warns_before |
94 | 94 | condition_6 <- filter.list.by.regex(IP_regex, merged_control$recipient_name) |
95 | | - condition_7 <- merged_control$ns_0_revisions_after_0_3 >= min_revisions_after |
| 95 | + condition_7 <- merged_control$ns_0_revisions_after_0_3 >= min_edits_after |
96 | 96 | |
97 | 97 | indices <- condition_1 & condition_2 & condition_3 & condition_4 & condition_5 & condition_6 & condition_7 |
98 | 98 | merged_control <<- merged_control[indices,] |
— | — | @@ -101,12 +101,20 @@ |
102 | 102 | |
103 | 103 | # print("Add derived columns..") |
104 | 104 | |
105 | | - merged_test$edits_decrease <<- (merged_test$ns_0_revisions_before - merged_test$ns_0_revisions_after_0_3) / (merged_test$ns_0_revisions_before) |
| 105 | + merged_test$edits_decrease <<- (merged_test$ns_0_revisions_before - (merged_test$ns_0_revisions_after_0_3)) / (merged_test$ns_0_revisions_before) |
106 | 106 | merged_control$edits_decrease <<- (merged_control$ns_0_revisions_before - merged_control$ns_0_revisions_after_0_3) / (merged_control$ns_0_revisions_before) |
107 | 107 | |
108 | | - # merged_test$edits_del_decrease <<- (merged_test$ns_0_revisions_deleted_before - (merged_test$ns_0_revisions_deleted_after_0_3)) / (merged_test$ns_0_revisions_deleted_before) |
109 | | - # merged_control$edits_del_decrease <<- (merged_control$ns_0_revisions_deleted_before - (merged_control$ns_0_revisions_deleted_after_0_3)) / (merged_control$ns_0_revisions_deleted_before) |
| 108 | + merged_test$edit_counts_0_3 <<- merged_test$ns_0_revisions_after_0_3 |
| 109 | + merged_control$edit_counts_0_3 <<- merged_control$ns_0_revisions_after_0_3 |
110 | 110 | |
| 111 | + merged_test$edits_del_decrease <<- (merged_test$ns_0_revisions_deleted_before - (merged_test$ns_0_revisions_deleted_after_0_3)) / (merged_test$ns_0_revisions_deleted_before) |
| 112 | + merged_control$edits_del_decrease <<- (merged_control$ns_0_revisions_deleted_before - (merged_control$ns_0_revisions_deleted_after_0_3)) / (merged_control$ns_0_revisions_deleted_before) |
| 113 | + |
| 114 | + merged_test$edit_del_counts_0_3 <<- merged_test$ns_0_revisions_deleted_after_0_3 |
| 115 | + merged_control$edit_del_counts_0_3 <<- merged_control$ns_0_revisions_deleted_after_0_3 |
| 116 | + |
| 117 | + merged_test$edit_del_counts <<- ceiling(merged_test$ns_0_revisions_deleted_after_0_3 / max(merged_test$ns_0_revisions_deleted_after_0_3)) |
| 118 | + merged_control$edit_del_counts <<- ceiling(merged_control$ns_0_revisions_deleted_after_0_3 / max(merged_control$ns_0_revisions_deleted_after_0_3)) |
111 | 119 | } |
112 | 120 | |
113 | 121 | # FUNCTION :: execute.chi.square.test |
— | — | @@ -151,27 +159,32 @@ |
152 | 160 | # A pseudo main method to allow the script to be executed as a batch |
153 | 161 | # |
154 | 162 | |
155 | | -execute.main <- function() { |
| 163 | +execute.main <- function(min_edits_before = 0, max_edits_before = Inf, min_edits_after = 0, min_deleted_edits_before = 0, max_deleted_edits_before = Inf, |
| 164 | +load_metrics = FALSE, load_file = "", import_metrics = FALSE, registered = FALSE) { |
156 | 165 | |
157 | 166 | # IMPORT DATA |
158 | 167 | |
159 | | - template_indices_control <- c(60,62,66,76) # c(107,109,111,113,115) # c(78,81) # c(84, 0) # c(1,4) # c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76) |
160 | | - template_indices_test <- c(61,63,67,77) # c(108,110,114,116) # c(79,82) # c(86, 0) # c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) |
161 | | - fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="") # paste(home_dir,"output/metrics_1122_1222_z",sep="") # paste(home_dir,"output/metrics_1109_1209_z",sep="") # paste(home_dir,"output/metrics_1108_1202_z",sep="") # paste(home_dir,"output/metrics_pt_z",sep="") # paste(home_dir,"output/metrics_1018_1119_z",sep="") |
| 168 | + # c(60,62,66,76) # c(107,109,111,113,115) # TWINKLE c(78,81) # c(84, 0) # c(1,4) # c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76) # CORENSEARCH c(118, 120, 122, 124, 126, 128) # IMAGETAG c(132, 133, 135, 136, 138, 139, 141, 142) |
| 169 | + # c(61,63,67,77) # c(108,110,114,116) # TWINKLE c(79,82) # c(86, 0) # c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) # CORENSEARCH c(117, 119, 121, 123, 125, 127) # IMAGETAG c(131, 134, 137, 140) |
| 170 | + # paste(home_dir,"output/metrics_1018_1119_z",sep="") # paste(home_dir,"output/metrics_1122_1222_z",sep="") # paste(home_dir,"output/metrics_1109_1209_z",sep="") |
| 171 | + # paste(home_dir,"output/metrics_1108_1202_z",sep="") # paste(home_dir,"output/metrics_pt_z",sep="") # paste(home_dir,"output/metrics_1018_1119_z",sep="") # paste(home_dir,"output/metrics_z",sep="") |
162 | 172 | |
163 | | - # import.experimental.metrics.data(template_indices_test, template_indices_control, fname_first_part) |
| 173 | + template_indices_control <- c(81,0) |
| 174 | + template_indices_test <- c(82,0) |
| 175 | + fname_first_part <- "output/metrics_1109_1209_z" |
164 | 176 | |
| 177 | + if (import_metrics) |
| 178 | + import.experimental.metrics.data(template_indices_test, template_indices_control, fname_first_part) |
165 | 179 | |
| 180 | + if (load_metrics) |
| 181 | + load(load_file) |
166 | 182 | |
167 | | - # PROCESS DATA |
168 | 183 | |
169 | | - # print("") |
170 | | - # print("Processing data frames.") |
171 | | - registered = TRUE |
172 | | - process.data.frames(3,0,Inf,Inf,registered) |
| 184 | + # PROCESS DATA |
| 185 | + process.data.frames(min_edits_before = min_edits_before, max_edits_before = max_edits_before, min_edits_after = min_edits_after, |
| 186 | + min_deleted_edits_before = min_deleted_edits_before, max_deleted_edits_before = max_deleted_edits_before, registered = registered) |
173 | 187 | |
174 | 188 | |
175 | | - |
176 | 189 | # HYPOTHESIS TESTING |
177 | 190 | |
178 | 191 | # t_result <- t.test(x=merged_test$edits_decrease, y=merged_control$edits_decrease, alternative = "two.sided", paired = FALSE, var.equal = FALSE, conf.level = 0.95) |
— | — | @@ -181,7 +194,10 @@ |
182 | 195 | # LOGISTIC REGRESSION MODELLING: |
183 | 196 | |
184 | 197 | all_data <<- append.data.frames(merged_test, merged_control) |
| 198 | + |
185 | 199 | # summary(glm(template ~ edits_decrease, data=all_data, family=binomial(link="logit"))) |
| 200 | + # summary(glm(template ~ edit_counts_0_3, data=all_data, family=binomial(link="logit"))) |
186 | 201 | # summary(glm(template ~ edits_del_decrease, data=all_data, family=binomial(link="logit"))) |
187 | | - |
| 202 | + # summary(glm(template ~ edit_del_counts_0_3, data=all_data, family=binomial(link="logit"))) |
| 203 | + # summary(glm(template ~ edit_del_counts, data=all_data, family=binomial(link="logit"))) |
188 | 204 | } |
Index: trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R |
— | — | @@ -64,23 +64,28 @@ |
65 | 65 | # save_plot - saves plot if TRUE |
66 | 66 | # registered - look at registered editors if TRUE (non-registered otherwise) |
67 | 67 | # error_bars - display error bars if TRUE |
| 68 | +# plot_samples - plots the sample sizes used for each data point |
68 | 69 | # |
69 | 70 | |
70 | | -line.plot.results <- function(edit_count_min_lower = 1, edit_count_min_upper = 10, import_metrics = FALSE, save_plot = TRUE, filename = 'ggplot_out_', registered = TRUE, error_bars = FALSE) |
| 71 | +line.plot.results <- function(edit_count_min_lower = 1, edit_count_min_upper = 10, rev_count_after_min = 0, import_metrics = FALSE, plot_width = 10, |
| 72 | +save_plot = FALSE, filename = 'ggplot_out_', registered = FALSE, error_bars = FALSE, plot_title = "Huggle Experiments", load_metrics = FALSE, load_file = "", plot_samples = FALSE, |
| 73 | +x_scale = "Minimum Edits before Template Posting", y_scale = "Sample Size", plot_title_metric = "Metric Description") |
71 | 74 | { |
72 | 75 | # IMPORT DATA |
73 | 76 | |
74 | | - # c(78,81) c(1,4) c(60,62,64,66,68,70,72,74,76) c(60,62,66,76) c(107,109,111,113,115) c(84,99,101,103,105) |
75 | | - # c(79,82) c(2,3) c(61,63,65,67,69,71,73,75,77) c(61,63,67,77) c(108,110,114,116) c(85,86,100,102,104,106) |
76 | | - # paste(home_dir,"output/metrics_1109_1209_z",sep="") paste(home_dir,"output/metrics_pt_z",sep="") paste(home_dir,"output/metrics_1018_1119_z",sep="") paste(home_dir,"output/metrics_1122_1222_z",sep="") |
| 77 | + # c(84, 0) c(78,81) c(1,4) c(60,62,64,66,68,70,72,74,76) c(60,62,66,76) c(107,109,111,113,115) c(84,99,101,103,105) |
| 78 | + # c(85, 0) c(79,82) c(2,3) c(61,63,65,67,69,71,73,75,77) c(61,63,67,77) c(108,110,114,116) c(85,86,100,102,104,106) |
| 79 | + # paste(home_dir,"output/metrics_1108_1202_z",sep="") paste(home_dir,"output/metrics_1109_1209_z",sep="") paste(home_dir,"output/metrics_pt_z",sep="") paste(home_dir,"output/metrics_1018_1119_z",sep="") paste(home_dir,"output/metrics_1122_1222_z",sep="") |
77 | 80 | |
78 | | - template_indices_control <- c(84, 0) |
79 | | - template_indices_test <- c(85, 0) |
| 81 | + template_indices_control <- c(84, 0) |
| 82 | + template_indices_test <- c(85, 0) |
80 | 83 | fname_first_part <- paste(home_dir,"output/metrics_1108_1202_z",sep="") |
81 | 84 | |
82 | 85 | if (import_metrics) |
83 | 86 | import.experimental.metrics.data(template_indices_test, template_indices_control, fname_first_part) |
84 | 87 | |
| 88 | + if (load_metrics) |
| 89 | + load(load_file) |
85 | 90 | |
86 | 91 | |
87 | 92 | # PROCESS DATA |
— | — | @@ -90,11 +95,11 @@ |
91 | 96 | data_counts_test <<- c() |
92 | 97 | data_counts_control <<- c() |
93 | 98 | |
94 | | - edit_decrease_means_test <<- c() |
95 | | - edit_decrease_means_control <<- c() |
| 99 | + means_test <<- c() |
| 100 | + means_control <<- c() |
96 | 101 | |
97 | | - edit_decrease_sd_test <<- c() |
98 | | - edit_decrease_sd_control <<- c() |
| 102 | + sd_test <<- c() |
| 103 | + sd_control <<- c() |
99 | 104 | |
100 | 105 | |
101 | 106 | if (registered) |
— | — | @@ -104,31 +109,58 @@ |
105 | 110 | |
106 | 111 | for (i in edit_count_before_filter) |
107 | 112 | { |
108 | | - process.data.frames(i,0,Inf,Inf,registered=registered,min_revisions_after=0) |
| 113 | + process.data.frames(min_deleted_edits_before = i, max_deleted_edits_before = Inf, registered=registered, min_edits_after=rev_count_after_min) |
| 114 | + |
| 115 | + means_test <<- c(means_test, mean(merged_test$edit_del_counts_0_3)) |
| 116 | + means_control <<- c(means_control, mean(merged_control$edit_del_counts_0_3)) |
109 | 117 | |
110 | | - edit_decrease_means_test <<- c(edit_decrease_means_test, mean(merged_test$edits_decrease) * 100) |
111 | | - edit_decrease_means_control <<- c(edit_decrease_means_control, mean(merged_control$edits_decrease) * 100) |
| 118 | + sd_test <<- c(sd_test, sd(merged_test$edit_del_counts_0_3)) |
| 119 | + sd_control <<- c(sd_control, sd(merged_control$edit_del_counts_0_3)) |
112 | 120 | |
113 | | - edit_decrease_sd_test <<- c(edit_decrease_sd_test, sd(merged_test$edits_decrease * 100)) |
114 | | - edit_decrease_sd_control <<- c(edit_decrease_sd_control, sd(merged_control$edits_decrease * 100)) |
115 | | - |
116 | | - data_counts_test <<- c(data_counts_test, length(merged_test$edits_decrease)) |
117 | | - data_counts_control <<- c(data_counts_control, length(merged_control$edits_decrease)) |
| 121 | + data_counts_test <<- c(data_counts_test, length(merged_test$edit_del_counts_0_3)) |
| 122 | + data_counts_control <<- c(data_counts_control, length(merged_control$edit_del_counts_0_3)) |
118 | 123 | } |
119 | 124 | |
120 | | - # PLOT DATA |
121 | 125 | |
122 | | - plot_title = paste("Huggle Short 1 & 2 Experiment (", reg_str, ") - Decrease in Editor Activity", sep="") |
| 126 | + # PLOT - Decrease in Editor Activity |
123 | 127 | |
124 | | - df <- data.frame(x=1:length(edit_decrease_means_test), y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control, y_test_sd=edit_decrease_sd_test, y_ctrl_sd=edit_decrease_sd_control) |
| 128 | + # plot_title_full = paste(plot_title, "(", reg_str, ") - Decrease in Editor Activity", sep="") |
| 129 | + plot_title_full = paste(plot_title, "(", reg_str, ") - ", plot_title_metric, sep="") |
| 130 | + |
| 131 | + df <- data.frame(x=1:length(means_test), y_test=means_test, y_ctrl=means_control, y_test_sd=sd_test, y_ctrl_sd=sd_control) |
125 | 132 | p <- ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) + geom_line(aes(y=y_ctrl,colour="Control")) |
126 | 133 | |
127 | 134 | if (error_bars) |
128 | 135 | p <- p + geom_errorbar(aes(ymin = y_test - y_test_sd, ymax = y_test + y_test_sd, colour="Test"), width=0.2) + geom_errorbar(aes(ymin = y_ctrl - y_ctrl_sd, ymax = y_ctrl + y_ctrl_sd, colour="Control"), width=0.2) |
129 | 136 | |
130 | | - p <- p + scale_x_continuous('Minimum Edits before Template Posting') + scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title = plot_title, legend.title = theme_blank()) |
| 137 | + # Add axes labels and titles |
| 138 | + p <- p + scale_x_continuous(x_scale) + scale_y_continuous(y_scale) + opts(title = plot_title_full, legend.title = theme_blank()) |
131 | 139 | |
132 | 140 | if (save_plot) |
133 | | - ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/',filename,reg_str,'.png',sep=""),width=8) |
| 141 | + ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/',filename,reg_str,'.png',sep=""), width=plot_width) |
| 142 | + |
| 143 | + |
| 144 | + |
| 145 | + # PLOT - Sample Sizes |
| 146 | + |
| 147 | + if (plot_samples) |
| 148 | + { |
| 149 | + plot_title_full = paste(plot_title, "(", reg_str, ") - Sample Sizes", sep="") |
| 150 | + bins <- 1:length(data_counts_test) |
| 151 | + |
| 152 | + test_samples <- counts.to.samples(bins, data_counts_test) |
| 153 | + control_samples <- counts.to.samples(bins, data_counts_control) |
| 154 | + |
| 155 | + labels <- c(test_samples * 0, control_samples / control_samples) |
| 156 | + labels[labels == 0] = "Test" |
| 157 | + labels[labels == 1] = "Control" |
| 158 | + |
| 159 | + df <- data.frame(x=c(test_samples, control_samples), labels=labels) |
| 160 | + p <- ggplot(df, aes(x, fill=labels)) + geom_bar(binwidth=0.4, position="dodge") |
| 161 | + p <- p + scale_x_continuous(x_scale) + scale_y_continuous('Sample Size') + opts(title = plot_title_full, legend.title = theme_blank()) |
| 162 | + |
| 163 | + if (save_plot) |
| 164 | + ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/',filename,"samples_",reg_str,'.png',sep=""), width=plot_width) |
| 165 | + } |
134 | 166 | } |
135 | 167 | |