Index: trunk/tools/wsor/message_templates/R/huggle3_analysis.R |
— | — | @@ -8,49 +8,45 @@ |
9 | 9 | source('/home/rfaulk/WSOR/message_templates/R/R_helper_functions.R') |
10 | 10 | |
11 | 11 | |
12 | | -# Read aggregated results for z64 |
| 12 | +# Read aggregated results for the template |
13 | 13 | |
14 | | -metrics_ec_z64 = read.table("/home/rfaulk/WSOR/message_templates/output/metrics_1018_1119_z70_editcounts.tsv", na.strings="\\N", sep="\t", comment.char="", quote="", header=T) |
15 | | -metrics_blocks_z64 = read.table("/home/rfaulk/WSOR/message_templates/output/metrics_1018_1119_z70_blocks.tsv", na.strings="\\N", sep="\t", comment.char="", quote="", header=T) |
| 14 | +template_indices_control <- c(60,62,64,66,68,70,72,74,76) |
| 15 | +template_indices_test <- c(61,63,65,67,69,71,73,75,77) |
16 | 16 | |
| 17 | +fname_first_part <- "/home/rfaulk/WSOR/message_templates/output/metrics_1018_1119_z" |
| 18 | +fname_last_part <- "_editcounts.tsv" |
17 | 19 | |
18 | | -# Read aggregated results for z65 |
19 | 20 | |
20 | | -metrics_ec_z65 = read.table("/home/rfaulk/WSOR/message_templates/output/metrics_1018_1119_z71_editcounts.tsv", na.strings="\\N", sep="\t", comment.char="", quote="", header=T) |
21 | | -metrics_blocks_z65 = read.table("/home/rfaulk/WSOR/message_templates/output/metrics_1018_1119_z71_blocks.tsv", na.strings="\\N", sep="\t", comment.char="", quote="", header=T) |
| 21 | +# MAIN EXECUTION |
| 22 | +# ============== |
22 | 23 | |
| 24 | +# BUILD THE DATA FRAMES |
23 | 25 | |
24 | | -# Compute the change in edits after the template |
| 26 | +metrics_test <- build.data.frames(template_indices_test, fname_first_part, fname_last_part) |
| 27 | +metrics_control <- build.data.frames(template_indices_control, fname_first_part, fname_last_part) |
25 | 28 | |
26 | | -z64_ns0 = (metrics_ec_z64$ns_0_revisions_after - metrics_ec_z64$ns_0_revisions_before) / metrics_ec_z64$ns_0_revisions_before |
27 | | -z65_ns0 = (metrics_ec_z65$ns_0_revisions_after - metrics_ec_z65$ns_0_revisions_before / metrics_ec_z65$ns_0_revisions_before |
28 | 29 | |
29 | | - |
| 30 | +# Compute the change in edits after the template -- default to namespace 0 |
30 | 31 | # User Talk namespace does not necessarily have edits before - in this case omit the result (it could be the case that templates stimulate user talk edits but that should be tested separately) |
31 | 32 | # Only append non-zero results - do this for just namespace 3 since it has zero entries for 'ns_3_revisions_before' |
32 | 33 | |
33 | | -z64_ns3 <- c() |
34 | | -z65_ns3 <- c() |
| 34 | +z_test <- c() |
| 35 | +z_control <- c() |
35 | 36 | |
36 | | -for (i in 1:length(metrics_ec_z64['ns_3_revisions_before'][[1]])) |
37 | | - if (metrics_ec_z64['ns_3_revisions_before'][[1]][i] != 0) |
38 | | - z64_ns3 <- c(z64_ns3, |
39 | | - (metrics_ec_z64['ns_3_revisions_before'][[1]][i] - metrics_ec_z64['ns_3_revisions_after'][[1]][i]) / metrics_ec_z64['ns_3_revisions_before'][[1]][i]) |
| 37 | +for (i in 1:length(metrics_test$ns_0_revisions_before)) |
| 38 | + if (metrics_test$ns_0_revisions_before[i] != 0) |
| 39 | + z_test <- c(z_test, |
| 40 | + (metrics_test$ns_0_revisions_before[i] - metrics_test$ns_0_revisions_after[i]) / metrics_test$ns_0_revisions_before[i]) |
40 | 41 | |
41 | | -for (i in 1:length(metrics_ec_z65['ns_3_revisions_before'][[1]])) |
42 | | - if (metrics_ec_z65['ns_3_revisions_before'][[1]][i] != 0) |
43 | | - z65_ns3 <- c(z65_ns3, |
44 | | - (metrics_ec_z65['ns_3_revisions_before'][[1]][i] - metrics_ec_z65['ns_3_revisions_after'][[1]][i]) / metrics_ec_z65['ns_3_revisions_before'][[1]][i]) |
| 42 | +for (i in 1:length(metrics_control['ns_0_revisions_before'][[1]])) |
| 43 | + if (metrics_control$ns_0_revisions_before[i] != 0) |
| 44 | + z_control <- c(z_control, |
| 45 | + (metrics_control$ns_0_revisions_before[i] - metrics_control$ns_0_revisions_after[i]) / metrics_control$ns_0_revisions_before[i]) |
45 | 46 | |
46 | 47 | |
47 | 48 | # Generate results: |
48 | 49 | |
49 | | -summary(z65_ns0) |
50 | | -summary(z64_ns0) |
51 | | -summary(z65_ns3) |
52 | | -summary(z64_ns3) |
| 50 | +t_result = t.test(x=z_test, y=z_control, alternative = "two.sided", paired = FALSE, var.equal = FALSE, conf.level = 0.95) |
| 51 | +# t_result_ns3 = t.test(x=z64_ns3, y=z65_ns3, alternative = "two.sided", paired = FALSE, var.equal = FALSE, conf.level = 0.95) |
53 | 52 | |
54 | | -t_result_ns0 = t.test(x=z64_ns0, y=z65_ns0, alternative = "two.sided", paired = FALSE, var.equal = FALSE, conf.level = 0.95) |
55 | | -t_result_ns3 = t.test(x=z64_ns3, y=z65_ns3, alternative = "two.sided", paired = FALSE, var.equal = FALSE, conf.level = 0.95) |
56 | 53 | |
57 | | - |
Index: trunk/tools/wsor/message_templates/R/huggle3_analysis_chi_sq.R |
— | — | @@ -7,14 +7,22 @@ |
8 | 8 | |
9 | 9 | source('/home/rfaulk/WSOR/message_templates/R/R_helper_functions.R') |
10 | 10 | |
| 11 | +# Read aggregated results for the template |
11 | 12 | |
| 13 | +template_indices_control <- c(60,62,64,66,68,70,72,74,76) |
| 14 | +template_indices_test <- c(61,63,65,67,69,71,73,75,77) |
| 15 | + |
| 16 | +fname_first_part <- "/home/rfaulk/WSOR/message_templates/output/metrics_1018_1119_z" |
| 17 | +fname_last_part <- "_editcounts.tsv" |
| 18 | + |
| 19 | + |
12 | 20 | # MAIN EXECUTION |
13 | 21 | # ============== |
14 | 22 | |
15 | | -# Read aggregated results |
| 23 | +# BUILD THE DATA FRAMES |
16 | 24 | |
17 | | -metrics_test = read.table("/home/rfaulk/WSOR/message_templates/output/metrics_1018_1119_z70_editcounts.tsv", na.strings="\\N", sep="\t", comment.char="", quote="", header=T) |
18 | | -metrics_control = read.table("/home/rfaulk/WSOR/message_templates/output/metrics_1018_1119_z71_editcounts.tsv", na.strings="\\N", sep="\t", comment.char="", quote="", header=T) |
| 25 | +metrics_test <- build.data.frames(template_indices_test, fname_first_part, fname_last_part) |
| 26 | +metrics_control <- build.data.frames(template_indices_control, fname_first_part, fname_last_part) |
19 | 27 | |
20 | 28 | |
21 | 29 | # Compute the change in edits after the template |
Index: trunk/tools/wsor/message_templates/R/R_helper_functions.R |
— | — | @@ -17,7 +17,8 @@ |
18 | 18 | |
19 | 19 | sample_sd <- sd(data) |
20 | 20 | sample_mean <- mean(data) |
21 | | - |
| 21 | + df <- length(data) - 1 |
| 22 | + |
22 | 23 | # vector to store bucket probabilities |
23 | 24 | probs <- c() |
24 | 25 | num_bins <- length(bins) |
— | — | @@ -39,7 +40,8 @@ |
40 | 41 | lower <- bins[i] - ((bins[i] - bins[im1]) / 2) |
41 | 42 | } |
42 | 43 | |
43 | | - p = pnorm(upper, mean = sample_mean, sd = sample_sd, log = FALSE) - pnorm(lower, mean = sample_mean, sd = sample_sd, log = FALSE) |
| 44 | + # p = pnorm(upper, mean = sample_mean, sd = sample_sd, log = FALSE) - pnorm(lower, mean = sample_mean, sd = sample_sd, log = FALSE) |
| 45 | + p = pt(upper - sample_mean, df) - pnorm(lower - sample_mean, df) |
44 | 46 | probs <- c(probs, p) |
45 | 47 | } |
46 | 48 | |
— | — | @@ -146,4 +148,76 @@ |
147 | 149 | } |
148 | 150 | |
149 | 151 | data.frame(values=new_values, counts=new_counts) |
| 152 | +} |
| 153 | + |
| 154 | + |
| 155 | +# FUNCTION :: append.data.frames |
| 156 | +# |
| 157 | +# Given two data frames append the second to the first |
| 158 | +# |
| 159 | +# Assumes: the two data frames have the same column names |
| 160 | +# |
| 161 | + |
| 162 | +append.data.frames <- function(df_1, df_2) { |
| 163 | + |
| 164 | + df_cols <- length(colnames(df_1)) |
| 165 | + df_rows_1 <- length(df_1[[1]]) |
| 166 | + df_rows_2 <- length(df_2[[1]]) |
| 167 | + |
| 168 | + new_rows <- df_rows_1 + df_rows_2 |
| 169 | + df_return <- data.frame(matrix(nrow=new_rows, ncol=df_cols)) |
| 170 | + |
| 171 | + for (i in 1:df_cols) |
| 172 | + for (j in 1:df_rows_1) |
| 173 | + df_return[colnames(df_return)[i]][[1]][j] <- df_1[colnames(df_1)[i]][[1]][j] |
| 174 | + |
| 175 | + for (i in 1:df_cols) |
| 176 | + for (j in 1:df_rows_2) |
| 177 | + { |
| 178 | + row_index <- j + df_rows_1 |
| 179 | + df_return[colnames(df_return)[i]][[1]][row_index] <- df_2[colnames(df_1)[i]][[1]][j] |
| 180 | + } |
| 181 | + |
| 182 | + # create the new data list |
| 183 | + for (i in 1:df_cols) |
| 184 | + { |
| 185 | + colname <- colnames(df_1)[i] |
| 186 | + colnames(df_return)[i] <- colname |
| 187 | + } |
| 188 | + |
| 189 | + df_return |
| 190 | +} |
| 191 | + |
| 192 | + |
| 193 | +# FUNCTION :: build.data.frames |
| 194 | +# |
| 195 | +# Constructs a concatenated data.frame from files |
| 196 | +# |
| 197 | + |
| 198 | +build.data.frames <- function(template_indices, fname_first_part, fname_last_part) { |
| 199 | + |
| 200 | + # Initialize the data frame |
| 201 | + |
| 202 | + filename <- paste(fname_first_part, template_indices[1], fname_last_part, sep="") |
| 203 | + metrics = read.table(filename, na.strings="\\N", sep="\t", comment.char="", quote="", header=T) |
| 204 | + |
| 205 | + output <- paste("Processing data from",filename,"....") |
| 206 | + print(output) |
| 207 | + |
| 208 | + # Extend the data frames |
| 209 | + |
| 210 | + for (i in 2:length(template_indices_test)) |
| 211 | + { |
| 212 | + |
| 213 | + index <- template_indices[i] |
| 214 | + filename <- paste(fname_first_part, index, fname_last_part, sep="") |
| 215 | + |
| 216 | + output <- paste("Processing data from",filename,"....") |
| 217 | + print(output) |
| 218 | + |
| 219 | + temp_frame = read.table(filename, na.strings="\\N", sep="\t", comment.char="", quote="", header=T) |
| 220 | + metrics <- append.data.frames(metrics, temp_frame) |
| 221 | + } |
| 222 | + |
| 223 | + metrics |
150 | 224 | } |
\ No newline at end of file |