Index: trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R |
— | — | @@ -0,0 +1,377 @@ |
| 2 | +source("loader/load_huggling_codings_mk2.R") |
| 3 | +library(doBy) |
| 4 | +hugglings = load_huggling_codings_mk2() |
| 5 | + |
| 6 | +#hugglingCounts = summaryBy( |
| 7 | +# recipient ~ recipient, |
| 8 | +# data = hugglings, |
| 9 | +# FUN=length |
| 10 | +#) |
| 11 | +#hugglingCounts$count = hugglingCounts$recipient.length |
| 12 | +#hugglingCounts$recipient.length = NULL |
| 13 | +# |
| 14 | +#hugglings = merge(hugglings, hugglingCounts, by=c("recipient")) |
| 15 | + |
| 16 | +#huggling_codings = load_huggling_codings(reload=T) |
| 17 | +#messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),] |
| 18 | +ifNA = function(val, naThen){ |
| 19 | + if(is.na(val)){ |
| 20 | + naThen |
| 21 | + }else{ |
| 22 | + val |
| 23 | + } |
| 24 | +} |
| 25 | + |
| 26 | +hugglings$contact = with( |
| 27 | + hugglings, |
| 28 | + mapply( |
| 29 | + ifNA, |
| 30 | + responds_hugglers_talk | |
| 31 | + responds_own_talk | |
| 32 | + responds_elsewhere | |
| 33 | + retaliates, |
| 34 | + F |
| 35 | + ) |
| 36 | +) |
| 37 | +hugglings$good_contact = mapply( |
| 38 | + function(contact, retaliates){ |
| 39 | + if(!contact){ |
| 40 | + NA |
| 41 | + }else{ |
| 42 | + !retaliates |
| 43 | + } |
| 44 | + }, |
| 45 | + hugglings$contact, |
| 46 | + hugglings$retaliates |
| 47 | +) |
| 48 | +hugglings$stay = !is.na(hugglings$after_rating) |
| 49 | +hugglings$improves = hugglings$after_rating > hugglings$before_rating |
| 50 | +hugglings$talk_edits_before_msg = with( |
| 51 | + hugglings, |
| 52 | + user_talk_edits_after_msg + article_talk_edits_before_msg |
| 53 | +) |
| 54 | +# Can't do it |
| 55 | +#messaged_codings$ntalk_edits_before_msg = with( |
| 56 | +# messaged_codings, |
| 57 | +# edits_before_msg - talk_edits_before_msg |
| 58 | +#) |
| 59 | + |
| 60 | +hugglings$good_outcome = with( |
| 61 | + hugglings, |
| 62 | + ( #Suckas leave or get better |
| 63 | + before_rating <= 2 & |
| 64 | + ( |
| 65 | + is.na(after_rating) | |
| 66 | + after_rating > 2 |
| 67 | + ) |
| 68 | + ) | |
| 69 | + ( #Good contact was made |
| 70 | + !is.na(good_contact) & |
| 71 | + good_contact |
| 72 | + ) | |
| 73 | + ( #Edits are good after receiving message |
| 74 | + !is.na(after_rating) & |
| 75 | + after_rating > 3 |
| 76 | + ) |
| 77 | +) |
| 78 | + |
| 79 | +## |
| 80 | +# Groups |
| 81 | +# |
| 82 | +# - <= 1 "vandal": We all agreed that the editor was a blatant vandal |
| 83 | +# - > 1 & <= 2 "bad faith": We all agreed that the editor was bad faith |
| 84 | +# - > 2 & < 4 "test": A test edit, but not good faith |
| 85 | +# - >= 4 "good faith": Good faith to excellent |
| 86 | +# |
| 87 | +# For each group: |
| 88 | +# - contact |
| 89 | +# - contact huggler + retaliate |
| 90 | +# - talk? (wait for staeiou) |
| 91 | +# - continue editing |
| 92 | +# - did they actually |
| 93 | +# - quality |
| 94 | +# - improve |
| 95 | +# - was it good |
| 96 | +# - degrade |
| 97 | +# |
| 98 | +# |
| 99 | +# Predictors: |
| 100 | +# - number of edilts before message |
| 101 | +# - number deleted |
| 102 | +# - makes edits to talk (before/after) |
| 103 | + |
| 104 | +hugglings$group = as.factor(sapply( |
| 105 | + hugglings$before_rating, |
| 106 | + function(rating){ |
| 107 | + if(is.na(rating)){ |
| 108 | + NA |
| 109 | + }else if(rating <= 1){ |
| 110 | + "vandal" |
| 111 | + }else if(rating > 1 & rating <= 2){ |
| 112 | + "bad faith" |
| 113 | + }else if(rating > 2 & rating < 4){ |
| 114 | + "test" |
| 115 | + }else if(rating >= 4){ |
| 116 | + "good faith" |
| 117 | + }else{ |
| 118 | + NA |
| 119 | + } |
| 120 | + } |
| 121 | +)) |
| 122 | +formatNum = function(num){ |
| 123 | + if(num >= 0){ |
| 124 | + paste(" ", format(round(num, 3), nsmall=3), sep="") |
| 125 | + }else{ |
| 126 | + format(round(num, 3), nsmall=3) |
| 127 | + } |
| 128 | +} |
| 129 | + |
| 130 | +for(group in c("vandal", "bad faith", "test", "good faith")){ |
| 131 | + group_codings = hugglings[hugglings$group == group,] |
| 132 | + |
| 133 | + |
| 134 | + cat("Result's for ", length(group_codings$group), " '", group, "' editors:\n", sep='') |
| 135 | + cat("============================================================\n") |
| 136 | + |
| 137 | + control = group_codings[group_codings$def,] |
| 138 | + personal = group_codings[group_codings$personal,] |
| 139 | + nodirectives = group_codings[group_codings$nodirectives,] |
| 140 | + |
| 141 | + experiments = list( |
| 142 | + list(name="Personal ", data=personal), |
| 143 | + list(name="No Directives", data=nodirectives) |
| 144 | + ) |
| 145 | + |
| 146 | + outcomes = list( |
| 147 | + list(name="Good outcome", field="good_outcome"), |
| 148 | + list(name="Improves", field="improves"), |
| 149 | + list(name="Contact", field="contact"), |
| 150 | + list(name="Stays", field="stay"), |
| 151 | + list(name="Good contact", field="good_contact") |
| 152 | + ) |
| 153 | + for(outcome in outcomes){ |
| 154 | + cat(outcome$name, ": \n", sep="") |
| 155 | + |
| 156 | + controlLen = length(na.omit(control[[outcome$field]])) |
| 157 | + controlSuccess = sum(control[[outcome$field]], na.rm=T) |
| 158 | + cat( |
| 159 | + "\tControl ", |
| 160 | + ": prop=", formatNum(controlSuccess/controlLen), |
| 161 | + ", n=", controlLen, "\n", |
| 162 | + sep="" |
| 163 | + ) |
| 164 | + for(experiment in experiments){ |
| 165 | + expSuccess = sum(experiment$data[[outcome$field]], na.rm=T) |
| 166 | + expLen = length(na.omit(experiment$data[[outcome$field]])) |
| 167 | + t = prop.test( |
| 168 | + c( |
| 169 | + expSuccess, |
| 170 | + controlSuccess |
| 171 | + ), |
| 172 | + c( |
| 173 | + expLen, |
| 174 | + controlLen |
| 175 | + ) |
| 176 | + ) |
| 177 | + |
| 178 | + propDiff = mean(experiment$data[[outcome$field]], na.rm=T)-mean(control[[outcome$field]], na.rm=T) |
| 179 | + cat( |
| 180 | + "\t", experiment$name, |
| 181 | + ": prop=", formatNum(expSuccess/expLen), |
| 182 | + ", diff=", formatNum(propDiff), |
| 183 | + ", p-value=", formatNum(t$p.value), |
| 184 | + ", conf.int=(", formatNum(t$conf.int[1]), ", ", formatNum(t$conf.int[2]), ")", |
| 185 | + ", n=", expLen, "\n", |
| 186 | + sep="" |
| 187 | + ) |
| 188 | + } |
| 189 | + cat("\n") |
| 190 | + } |
| 191 | + |
| 192 | + |
| 193 | + cat("\n\n\n") |
| 194 | +} |
| 195 | + |
| 196 | +meanNoNA = function(x){ |
| 197 | + mean(x, na.rm=T) |
| 198 | +} |
| 199 | +lengthNoNA = function(x){ |
| 200 | + length(na.omit(x)) |
| 201 | +} |
| 202 | + |
| 203 | +library(lattice) |
| 204 | +outcomeNames = list( |
| 205 | + good_outcome = "with a \"good outcome\"", |
| 206 | + improves = "who show improvement", |
| 207 | + contact = "who contact the reverting editor", |
| 208 | + good_contact = "who contact the reverting editor nicely", |
| 209 | + stay = "who make at least one edit after reading the message" |
| 210 | +) |
| 211 | +for(outcomeName in c("good_outcome", "improves", "contact", "good_contact", "stay")){ |
| 212 | + f = with( |
| 213 | + summaryBy( |
| 214 | + outcome ~ group + teaching + personal, |
| 215 | + data = data.frame( |
| 216 | + outcome = messaged_codings[[outcomeName]], |
| 217 | + teaching = messaged_codings$teaching, |
| 218 | + personal = messaged_codings$personal, |
| 219 | + group = messaged_codings$group |
| 220 | + ), |
| 221 | + FUN=c(meanNoNA, lengthNoNA) |
| 222 | + ), |
| 223 | + data.frame( |
| 224 | + group = group, |
| 225 | + message = mapply( |
| 226 | + function(personal, teaching){ |
| 227 | + if(personal & teaching){ |
| 228 | + "personal & teaching" |
| 229 | + }else if(personal){ |
| 230 | + "personal" |
| 231 | + }else if(teaching){ |
| 232 | + "teaching" |
| 233 | + }else{ |
| 234 | + "control" |
| 235 | + } |
| 236 | + }, |
| 237 | + personal, |
| 238 | + teaching |
| 239 | + ), |
| 240 | + #teaching = teaching, |
| 241 | + #personal = personal, |
| 242 | + prop = outcome.meanNoNA, |
| 243 | + n = outcome.lengthNoNA |
| 244 | + ) |
| 245 | + ) |
| 246 | + cat(outcomeName, "\n") |
| 247 | + cat(f$prop, "\n\n") |
| 248 | + svg(paste("plots/outcome", outcomeName, "all_groups.svg", sep="."), height=4, width=8) |
| 249 | + print(barchart( |
| 250 | + prop ~ group | message, |
| 251 | + data = f, |
| 252 | + layout=c(4,1), |
| 253 | + xlab="Pre-message rating", |
| 254 | + lab="Proportion of editors", |
| 255 | + main=paste("Proportion of editors", outcomeNames[[outcomeName]]) |
| 256 | + )) |
| 257 | + dev.off() |
| 258 | +} |
| 259 | + |
| 260 | +messaged_codings$default = !messaged_codings$personal & !messaged_codings$teaching |
| 261 | +messaged_codings$teaching_only = messaged_codings$teaching & !messaged_codings$personal |
| 262 | +messaged_codings$personal_only = !messaged_codings$teaching & messaged_codings$personal |
| 263 | +messaged_codings$teaching_and_personal = messaged_codings$teaching & messaged_codings$personal |
| 264 | + |
| 265 | +s = scale |
| 266 | + |
| 267 | +for(condition in c("teaching_only", "personal_only", "teaching_and_personal")){ |
| 268 | + cat("-----------------------------------------------------------\n") |
| 269 | + cat("-----------", condition, "\n") |
| 270 | + cat("-----------------------------------------------------------\n") |
| 271 | + exp_codings = messaged_codings[ |
| 272 | + messaged_codings[[condition]] | |
| 273 | + messaged_codings$default, |
| 274 | + ] |
| 275 | + |
| 276 | + exp_codings$condition = exp_codings[[condition]] |
| 277 | + |
| 278 | + print(summary(glm( |
| 279 | + good_outcome ~ |
| 280 | + anon + |
| 281 | + s(ntalk_edits_before_msg) + |
| 282 | + s(talk_edits_before_msg) + |
| 283 | + s(before_rating) * |
| 284 | + condition, |
| 285 | + data = exp_codings[exp_codings$image,] |
| 286 | + ))) |
| 287 | + print(summary(glm( |
| 288 | + good_outcome ~ |
| 289 | + anon + |
| 290 | + s(ntalk_edits_before_msg) + |
| 291 | + s(talk_edits_before_msg) + |
| 292 | + s(before_rating) * |
| 293 | + condition, |
| 294 | + data = exp_codings[!exp_codings$image,] |
| 295 | + ))) |
| 296 | + |
| 297 | + |
| 298 | + print(summary(glm( |
| 299 | + improves ~ |
| 300 | + anon + |
| 301 | + s(ntalk_edits_before_msg) + |
| 302 | + s(talk_edits_before_msg) + |
| 303 | + s(before_rating) * |
| 304 | + condition, |
| 305 | + data = exp_codings[exp_codings$image,] |
| 306 | + ))) |
| 307 | + print(summary(glm( |
| 308 | + improves ~ |
| 309 | + anon + |
| 310 | + s(ntalk_edits_before_msg) + |
| 311 | + s(talk_edits_before_msg) + |
| 312 | + s(before_rating) * |
| 313 | + condition, |
| 314 | + data = exp_codings[!exp_codings$image,] |
| 315 | + ))) |
| 316 | + |
| 317 | + |
| 318 | + print(summary(glm( |
| 319 | + contact ~ |
| 320 | + anon + |
| 321 | + s(ntalk_edits_before_msg) + |
| 322 | + s(talk_edits_before_msg) + |
| 323 | + s(before_rating) * |
| 324 | + condition, |
| 325 | + data = exp_codings[exp_codings$image,] |
| 326 | + ))) |
| 327 | + print(summary(glm( |
| 328 | + contact ~ |
| 329 | + anon + |
| 330 | + s(ntalk_edits_before_msg) + |
| 331 | + s(talk_edits_before_msg) + |
| 332 | + s(before_rating) * |
| 333 | + condition, |
| 334 | + data = exp_codings[!exp_codings$image,] |
| 335 | + ))) |
| 336 | + |
| 337 | + |
| 338 | + print(summary(glm( |
| 339 | + good_contact ~ |
| 340 | + anon + |
| 341 | + s(ntalk_edits_before_msg) + |
| 342 | + s(talk_edits_before_msg) + |
| 343 | + s(before_rating) * |
| 344 | + condition, |
| 345 | + data = exp_codings[exp_codings$image,] |
| 346 | + ))) |
| 347 | + print(summary(glm( |
| 348 | + good_contact ~ |
| 349 | + anon + |
| 350 | + s(ntalk_edits_before_msg) + |
| 351 | + s(talk_edits_before_msg) + |
| 352 | + s(before_rating) * |
| 353 | + condition, |
| 354 | + data = exp_codings[!exp_codings$image,] |
| 355 | + ))) |
| 356 | + |
| 357 | + |
| 358 | + print(summary(glm( |
| 359 | + stay ~ |
| 360 | + anon + |
| 361 | + s(ntalk_edits_before_msg) + |
| 362 | + s(talk_edits_before_msg) + |
| 363 | + s(before_rating) * |
| 364 | + condition, |
| 365 | + data = exp_codings[exp_codings$image,] |
| 366 | + ))) |
| 367 | + print(summary(glm( |
| 368 | + stay ~ |
| 369 | + anon + |
| 370 | + s(ntalk_edits_before_msg) + |
| 371 | + s(talk_edits_before_msg) + |
| 372 | + s(before_rating) * |
| 373 | + condition, |
| 374 | + data = exp_codings[!exp_codings$image,] |
| 375 | + ))) |
| 376 | +} |
| 377 | + |
| 378 | + |
Index: trunk/tools/wsor/newbie_warnings/R/chi2_tests.R |
— | — | @@ -0,0 +1,188 @@ |
| 2 | +source("loader/load_hugglings.R") |
| 3 | +source("loader/load_huggling_codings.R") |
| 4 | +library(doBy) |
| 5 | +hugglings = load_hugglings() |
| 6 | + |
| 7 | +hugglingCounts = summaryBy( |
| 8 | + recipient ~ recipient, |
| 9 | + data = hugglings, |
| 10 | + FUN=length |
| 11 | +) |
| 12 | +hugglingCounts$count = hugglingCounts$recipient.length |
| 13 | +hugglingCounts$recipient.length = NULL |
| 14 | + |
| 15 | +hugglings = merge(hugglings, hugglingCounts, by=c("recipient")) |
| 16 | + |
| 17 | +huggling_codings = load_huggling_codings(reload=T) |
| 18 | +messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),] |
| 19 | + |
| 20 | +messaged_codings$retailates = messaged_codings$retaliates > 0 |
| 21 | +messaged_codings$contact = !is.na(messaged_codings$contacts_huggler) & (messaged_codings$contacts_huggler > 0 | messaged_codings$retaliates > 0) |
| 22 | +messaged_codings$quality_work = messaged_codings$after_rating >= 3.0 |
| 23 | +messaged_codings$stay = !is.na(messaged_codings$after_rating) |
| 24 | +messaged_codings$improves = messaged_codings$after_rating > messaged_codings$before_rating |
| 25 | +messaged_codings$anon = messaged_codings$is_anon > 0 |
| 26 | +messaged_codings$talk_edits_before_msg = with( |
| 27 | + messaged_codings, |
| 28 | + user_talk_edits_after_msg + article_talk_edits_before_msg |
| 29 | +) |
| 30 | +messaged_codings$ntalk_edits_before_msg = with( |
| 31 | + messaged_codings, |
| 32 | + edits_before_msg - talk_edits_before_msg |
| 33 | +) |
| 34 | +messaged_codings$good_contact = mapply( |
| 35 | + function(contact, retaliates){ |
| 36 | + if(!is.na(contact) & contact){ |
| 37 | + retaliates <= 0 |
| 38 | + }else{ |
| 39 | + NA |
| 40 | + } |
| 41 | + }, |
| 42 | + messaged_codings$contact, |
| 43 | + messaged_codings$retaliates |
| 44 | +) |
| 45 | +messaged_codings$good_outcome = with( |
| 46 | + messaged_codings, |
| 47 | + ( |
| 48 | + before_rating <= 2 & |
| 49 | + ( |
| 50 | + is.na(after_rating) | |
| 51 | + after_rating > 2 |
| 52 | + ) |
| 53 | + ) | |
| 54 | + ( |
| 55 | + !is.na(good_contact) & |
| 56 | + good_contact |
| 57 | + ) | |
| 58 | + ( |
| 59 | + !is.na(quality_work) & |
| 60 | + quality_work |
| 61 | + ) |
| 62 | +) |
| 63 | + |
| 64 | +## |
| 65 | +# Groups |
| 66 | +# |
| 67 | +# - < 2 at least one of us thought "no hope" |
| 68 | +# - >= 2 & <= 3 possibles |
| 69 | +# - > 3 at least one of us thought "golden" |
| 70 | +# |
| 71 | +# For each group: |
| 72 | +# - contact |
| 73 | +# - contact huggler + retaliate |
| 74 | +# - talk? (wait for staeiou) |
| 75 | +# - continue editing |
| 76 | +# - did they actually |
| 77 | +# - quality |
| 78 | +# - improve |
| 79 | +# - was it good |
| 80 | +# - degrade |
| 81 | +# |
| 82 | +# |
| 83 | +# Predictors: |
| 84 | +# - number of edilts before message |
| 85 | +# - number deleted |
| 86 | +# - makes edits to talk (before/after) |
| 87 | + |
| 88 | +messaged_codings$group = as.factor(sapply( |
| 89 | + messaged_codings$before_rating, |
| 90 | + function(rating){ |
| 91 | + if(is.na(rating)){ |
| 92 | + NA |
| 93 | + }else if(rating < 2){ |
| 94 | + "unlikely" |
| 95 | + }else if(rating <= 3){ |
| 96 | + "possible" |
| 97 | + }else{ |
| 98 | + "golden" |
| 99 | + } |
| 100 | + } |
| 101 | +)) |
| 102 | + |
| 103 | +formatNum = function(num){ |
| 104 | + if(!is.numeric(num) | is.nan(num)){ |
| 105 | + " --- " |
| 106 | + } |
| 107 | + else if(num >= 0){ |
| 108 | + paste(" ", format(round(num, 3), nsmall=3), sep="") |
| 109 | + }else{ |
| 110 | + format(round(num, 3), nsmall=3) |
| 111 | + } |
| 112 | +} |
| 113 | + |
| 114 | +for(group in c("unlikely", "possible", "golden")){ |
| 115 | + group_codings = messaged_codings[messaged_codings$group == group,] |
| 116 | + |
| 117 | + |
| 118 | + cat("Result's for ", length(group_codings$group), " '", group, "' editors:\n", sep='') |
| 119 | + cat("============================================================\n") |
| 120 | + |
| 121 | + control = group_codings[!group_codings$personal & !group_codings$teaching,] |
| 122 | + personal = group_codings[group_codings$personal & !group_codings$teaching,] |
| 123 | + teaching = group_codings[group_codings$teaching & !group_codings$personal,] |
| 124 | + both = group_codings[group_codings$teaching & group_codings$personal,] |
| 125 | + |
| 126 | + experiments = list( |
| 127 | + list(name="Personal ", data=personal), |
| 128 | + list(name="Teaching ", data=teaching), |
| 129 | + list(name="Personal & Teaching", data=teaching) |
| 130 | + ) |
| 131 | + |
| 132 | + outcomes = list( |
| 133 | + list(name="Good outcome", field="good_outcome"), |
| 134 | + list(name="Improves", field="improves"), |
| 135 | + list(name="Contact", field="contact"), |
| 136 | + list(name="Stays", field="stay"), |
| 137 | + list(name="Good contact", field="good_contact") |
| 138 | + ) |
| 139 | + |
| 140 | + for(outcome in outcomes){ |
| 141 | + cat(outcome$name, ": \n", sep="") |
| 142 | + |
| 143 | + controlLen = length(na.omit(control[[outcome$field]])) |
| 144 | + controlSuccess = sum(control[[outcome$field]], na.rm=T) |
| 145 | + cat( |
| 146 | + "\tControl ", |
| 147 | + ": prop=", formatNum(controlSuccess/controlLen), |
| 148 | + ", n=", controlLen, "\n", |
| 149 | + sep="" |
| 150 | + ) |
| 151 | + for(experiment in experiments){ |
| 152 | + expSuccess = sum(experiment$data[[outcome$field]], na.rm=T) |
| 153 | + expLen = length(na.omit(experiment$data[[outcome$field]])) |
| 154 | + if(controlLen > 0 & expLen > 0){ |
| 155 | + t = prop.test( |
| 156 | + c( |
| 157 | + expSuccess, |
| 158 | + controlSuccess |
| 159 | + ), |
| 160 | + c( |
| 161 | + expLen, |
| 162 | + controlLen |
| 163 | + ) |
| 164 | + ) |
| 165 | + }else{ |
| 166 | + t = list( |
| 167 | + p.value=NA, |
| 168 | + conf.int=c(NA, NA) |
| 169 | + ) |
| 170 | + } |
| 171 | + |
| 172 | + propDiff = mean(experiment$data[[outcome$field]], na.rm=T)-mean(control[[outcome$field]], na.rm=T) |
| 173 | + cat( |
| 174 | + "\t", experiment$name, |
| 175 | + ": prop=", formatNum(expSuccess/expLen), |
| 176 | + ", diff=", formatNum(propDiff), |
| 177 | + ", p-value=", formatNum(t$p.value), |
| 178 | + ", conf.int=(", formatNum(t$conf.int[1]), ", ", formatNum(t$conf.int[2]), ")", |
| 179 | + ", n=", expLen, "\n", |
| 180 | + sep="" |
| 181 | + ) |
| 182 | + } |
| 183 | + cat("\n") |
| 184 | + } |
| 185 | + |
| 186 | + |
| 187 | + cat("\n\n\n") |
| 188 | +} |
| 189 | + |