r106108 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r106107‎ \| r106108 \| r106109 >
Date:	21:53, 13 December 2011
Author:	halfak
Status:	deferred
Tags:
Comment:	Added CHI^2 tests to hugglings
Modified paths:	/trunk/tools/wsor/newbie_warnings/R/chi2_tests.R (added) (history) /trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R (added) (history)

Diff [purge]

Index: trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R
—	—	@@ -0,0 +1,377 @@
	2	+source("loader/load_huggling_codings_mk2.R")
	3	+library(doBy)
	4	+hugglings = load_huggling_codings_mk2()
	5	+
	6	+#hugglingCounts = summaryBy(
	7	+# recipient ~ recipient,
	8	+# data = hugglings,
	9	+# FUN=length
	10	+#)
	11	+#hugglingCounts$count = hugglingCounts$recipient.length
	12	+#hugglingCounts$recipient.length = NULL
	13	+#
	14	+#hugglings = merge(hugglings, hugglingCounts, by=c("recipient"))
	15	+
	16	+#huggling_codings = load_huggling_codings(reload=T)
	17	+#messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),]
	18	+ifNA = function(val, naThen){
	19	+ if(is.na(val)){
	20	+ naThen
	21	+ }else{
	22	+ val
	23	+ }
	24	+}
	25	+
	26	+hugglings$contact = with(
	27	+ hugglings,
	28	+ mapply(
	29	+ ifNA,
	30	+ responds_hugglers_talk \|
	31	+ responds_own_talk \|
	32	+ responds_elsewhere \|
	33	+ retaliates,
	34	+ F
	35	+ )
	36	+)
	37	+hugglings$good_contact = mapply(
	38	+ function(contact, retaliates){
	39	+ if(!contact){
	40	+ NA
	41	+ }else{
	42	+ !retaliates
	43	+ }
	44	+ },
	45	+ hugglings$contact,
	46	+ hugglings$retaliates
	47	+)
	48	+hugglings$stay = !is.na(hugglings$after_rating)
	49	+hugglings$improves = hugglings$after_rating > hugglings$before_rating
	50	+hugglings$talk_edits_before_msg = with(
	51	+ hugglings,
	52	+ user_talk_edits_after_msg + article_talk_edits_before_msg
	53	+)
	54	+# Can't do it
	55	+#messaged_codings$ntalk_edits_before_msg = with(
	56	+# messaged_codings,
	57	+# edits_before_msg - talk_edits_before_msg
	58	+#)
	59	+
	60	+hugglings$good_outcome = with(
	61	+ hugglings,
	62	+ ( #Suckas leave or get better
	63	+ before_rating <= 2 &
	64	+ (
	65	+ is.na(after_rating) \|
	66	+ after_rating > 2
	67	+ )
	68	+ ) \|
	69	+ ( #Good contact was made
	70	+ !is.na(good_contact) &
	71	+ good_contact
	72	+ ) \|
	73	+ ( #Edits are good after receiving message
	74	+ !is.na(after_rating) &
	75	+ after_rating > 3
	76	+ )
	77	+)
	78	+
	79	+##
	80	+# Groups
	81	+#
	82	+# - <= 1 "vandal": We all agreed that the editor was a blatant vandal
	83	+# - > 1 & <= 2 "bad faith": We all agreed that the editor was bad faith
	84	+# - > 2 & < 4 "test": A test edit, but not good faith
	85	+# - >= 4 "good faith": Good faith to excellent
	86	+#
	87	+# For each group:
	88	+# - contact
	89	+# - contact huggler + retaliate
	90	+# - talk? (wait for staeiou)
	91	+# - continue editing
	92	+# - did they actually
	93	+# - quality
	94	+# - improve
	95	+# - was it good
	96	+# - degrade
	97	+#
	98	+#
	99	+# Predictors:
	100	+# - number of edilts before message
	101	+# - number deleted
	102	+# - makes edits to talk (before/after)
	103	+
	104	+hugglings$group = as.factor(sapply(
	105	+ hugglings$before_rating,
	106	+ function(rating){
	107	+ if(is.na(rating)){
	108	+ NA
	109	+ }else if(rating <= 1){
	110	+ "vandal"
	111	+ }else if(rating > 1 & rating <= 2){
	112	+ "bad faith"
	113	+ }else if(rating > 2 & rating < 4){
	114	+ "test"
	115	+ }else if(rating >= 4){
	116	+ "good faith"
	117	+ }else{
	118	+ NA
	119	+ }
	120	+ }
	121	+))
	122	+formatNum = function(num){
	123	+ if(num >= 0){
	124	+ paste(" ", format(round(num, 3), nsmall=3), sep="")
	125	+ }else{
	126	+ format(round(num, 3), nsmall=3)
	127	+ }
	128	+}
	129	+
	130	+for(group in c("vandal", "bad faith", "test", "good faith")){
	131	+ group_codings = hugglings[hugglings$group == group,]
	132	+
	133	+
	134	+ cat("Result's for ", length(group_codings$group), " '", group, "' editors:\n", sep='')
	135	+ cat("============================================================\n")
	136	+
	137	+ control = group_codings[group_codings$def,]
	138	+ personal = group_codings[group_codings$personal,]
	139	+ nodirectives = group_codings[group_codings$nodirectives,]
	140	+
	141	+ experiments = list(
	142	+ list(name="Personal ", data=personal),
	143	+ list(name="No Directives", data=nodirectives)
	144	+ )
	145	+
	146	+ outcomes = list(
	147	+ list(name="Good outcome", field="good_outcome"),
	148	+ list(name="Improves", field="improves"),
	149	+ list(name="Contact", field="contact"),
	150	+ list(name="Stays", field="stay"),
	151	+ list(name="Good contact", field="good_contact")
	152	+ )
	153	+ for(outcome in outcomes){
	154	+ cat(outcome$name, ": \n", sep="")
	155	+
	156	+ controlLen = length(na.omit(control[[outcome$field]]))
	157	+ controlSuccess = sum(control[[outcome$field]], na.rm=T)
	158	+ cat(
	159	+ "\tControl ",
	160	+ ": prop=", formatNum(controlSuccess/controlLen),
	161	+ ", n=", controlLen, "\n",
	162	+ sep=""
	163	+ )
	164	+ for(experiment in experiments){
	165	+ expSuccess = sum(experiment$data[[outcome$field]], na.rm=T)
	166	+ expLen = length(na.omit(experiment$data[[outcome$field]]))
	167	+ t = prop.test(
	168	+ c(
	169	+ expSuccess,
	170	+ controlSuccess
	171	+ ),
	172	+ c(
	173	+ expLen,
	174	+ controlLen
	175	+ )
	176	+ )
	177	+
	178	+ propDiff = mean(experiment$data[[outcome$field]], na.rm=T)-mean(control[[outcome$field]], na.rm=T)
	179	+ cat(
	180	+ "\t", experiment$name,
	181	+ ": prop=", formatNum(expSuccess/expLen),
	182	+ ", diff=", formatNum(propDiff),
	183	+ ", p-value=", formatNum(t$p.value),
	184	+ ", conf.int=(", formatNum(t$conf.int[1]), ", ", formatNum(t$conf.int[2]), ")",
	185	+ ", n=", expLen, "\n",
	186	+ sep=""
	187	+ )
	188	+ }
	189	+ cat("\n")
	190	+ }
	191	+
	192	+
	193	+ cat("\n\n\n")
	194	+}
	195	+
	196	+meanNoNA = function(x){
	197	+ mean(x, na.rm=T)
	198	+}
	199	+lengthNoNA = function(x){
	200	+ length(na.omit(x))
	201	+}
	202	+
	203	+library(lattice)
	204	+outcomeNames = list(
	205	+ good_outcome = "with a \"good outcome\"",
	206	+ improves = "who show improvement",
	207	+ contact = "who contact the reverting editor",
	208	+ good_contact = "who contact the reverting editor nicely",
	209	+ stay = "who make at least one edit after reading the message"
	210	+)
	211	+for(outcomeName in c("good_outcome", "improves", "contact", "good_contact", "stay")){
	212	+ f = with(
	213	+ summaryBy(
	214	+ outcome ~ group + teaching + personal,
	215	+ data = data.frame(
	216	+ outcome = messaged_codings[[outcomeName]],
	217	+ teaching = messaged_codings$teaching,
	218	+ personal = messaged_codings$personal,
	219	+ group = messaged_codings$group
	220	+ ),
	221	+ FUN=c(meanNoNA, lengthNoNA)
	222	+ ),
	223	+ data.frame(
	224	+ group = group,
	225	+ message = mapply(
	226	+ function(personal, teaching){
	227	+ if(personal & teaching){
	228	+ "personal & teaching"
	229	+ }else if(personal){
	230	+ "personal"
	231	+ }else if(teaching){
	232	+ "teaching"
	233	+ }else{
	234	+ "control"
	235	+ }
	236	+ },
	237	+ personal,
	238	+ teaching
	239	+ ),
	240	+ #teaching = teaching,
	241	+ #personal = personal,
	242	+ prop = outcome.meanNoNA,
	243	+ n = outcome.lengthNoNA
	244	+ )
	245	+ )
	246	+ cat(outcomeName, "\n")
	247	+ cat(f$prop, "\n\n")
	248	+ svg(paste("plots/outcome", outcomeName, "all_groups.svg", sep="."), height=4, width=8)
	249	+ print(barchart(
	250	+ prop ~ group \| message,
	251	+ data = f,
	252	+ layout=c(4,1),
	253	+ xlab="Pre-message rating",
	254	+ lab="Proportion of editors",
	255	+ main=paste("Proportion of editors", outcomeNames[[outcomeName]])
	256	+ ))
	257	+ dev.off()
	258	+}
	259	+
	260	+messaged_codings$default = !messaged_codings$personal & !messaged_codings$teaching
	261	+messaged_codings$teaching_only = messaged_codings$teaching & !messaged_codings$personal
	262	+messaged_codings$personal_only = !messaged_codings$teaching & messaged_codings$personal
	263	+messaged_codings$teaching_and_personal = messaged_codings$teaching & messaged_codings$personal
	264	+
	265	+s = scale
	266	+
	267	+for(condition in c("teaching_only", "personal_only", "teaching_and_personal")){
	268	+ cat("-----------------------------------------------------------\n")
	269	+ cat("-----------", condition, "\n")
	270	+ cat("-----------------------------------------------------------\n")
	271	+ exp_codings = messaged_codings[
	272	+ messaged_codings[[condition]] \|
	273	+ messaged_codings$default,
	274	+ ]
	275	+
	276	+ exp_codings$condition = exp_codings[[condition]]
	277	+
	278	+ print(summary(glm(
	279	+ good_outcome ~
	280	+ anon +
	281	+ s(ntalk_edits_before_msg) +
	282	+ s(talk_edits_before_msg) +
	283	+ s(before_rating) *
	284	+ condition,
	285	+ data = exp_codings[exp_codings$image,]
	286	+ )))
	287	+ print(summary(glm(
	288	+ good_outcome ~
	289	+ anon +
	290	+ s(ntalk_edits_before_msg) +
	291	+ s(talk_edits_before_msg) +
	292	+ s(before_rating) *
	293	+ condition,
	294	+ data = exp_codings[!exp_codings$image,]
	295	+ )))
	296	+
	297	+
	298	+ print(summary(glm(
	299	+ improves ~
	300	+ anon +
	301	+ s(ntalk_edits_before_msg) +
	302	+ s(talk_edits_before_msg) +
	303	+ s(before_rating) *
	304	+ condition,
	305	+ data = exp_codings[exp_codings$image,]
	306	+ )))
	307	+ print(summary(glm(
	308	+ improves ~
	309	+ anon +
	310	+ s(ntalk_edits_before_msg) +
	311	+ s(talk_edits_before_msg) +
	312	+ s(before_rating) *
	313	+ condition,
	314	+ data = exp_codings[!exp_codings$image,]
	315	+ )))
	316	+
	317	+
	318	+ print(summary(glm(
	319	+ contact ~
	320	+ anon +
	321	+ s(ntalk_edits_before_msg) +
	322	+ s(talk_edits_before_msg) +
	323	+ s(before_rating) *
	324	+ condition,
	325	+ data = exp_codings[exp_codings$image,]
	326	+ )))
	327	+ print(summary(glm(
	328	+ contact ~
	329	+ anon +
	330	+ s(ntalk_edits_before_msg) +
	331	+ s(talk_edits_before_msg) +
	332	+ s(before_rating) *
	333	+ condition,
	334	+ data = exp_codings[!exp_codings$image,]
	335	+ )))
	336	+
	337	+
	338	+ print(summary(glm(
	339	+ good_contact ~
	340	+ anon +
	341	+ s(ntalk_edits_before_msg) +
	342	+ s(talk_edits_before_msg) +
	343	+ s(before_rating) *
	344	+ condition,
	345	+ data = exp_codings[exp_codings$image,]
	346	+ )))
	347	+ print(summary(glm(
	348	+ good_contact ~
	349	+ anon +
	350	+ s(ntalk_edits_before_msg) +
	351	+ s(talk_edits_before_msg) +
	352	+ s(before_rating) *
	353	+ condition,
	354	+ data = exp_codings[!exp_codings$image,]
	355	+ )))
	356	+
	357	+
	358	+ print(summary(glm(
	359	+ stay ~
	360	+ anon +
	361	+ s(ntalk_edits_before_msg) +
	362	+ s(talk_edits_before_msg) +
	363	+ s(before_rating) *
	364	+ condition,
	365	+ data = exp_codings[exp_codings$image,]
	366	+ )))
	367	+ print(summary(glm(
	368	+ stay ~
	369	+ anon +
	370	+ s(ntalk_edits_before_msg) +
	371	+ s(talk_edits_before_msg) +
	372	+ s(before_rating) *
	373	+ condition,
	374	+ data = exp_codings[!exp_codings$image,]
	375	+ )))
	376	+}
	377	+
	378	+
Index: trunk/tools/wsor/newbie_warnings/R/chi2_tests.R
—	—	@@ -0,0 +1,188 @@
	2	+source("loader/load_hugglings.R")
	3	+source("loader/load_huggling_codings.R")
	4	+library(doBy)
	5	+hugglings = load_hugglings()
	6	+
	7	+hugglingCounts = summaryBy(
	8	+ recipient ~ recipient,
	9	+ data = hugglings,
	10	+ FUN=length
	11	+)
	12	+hugglingCounts$count = hugglingCounts$recipient.length
	13	+hugglingCounts$recipient.length = NULL
	14	+
	15	+hugglings = merge(hugglings, hugglingCounts, by=c("recipient"))
	16	+
	17	+huggling_codings = load_huggling_codings(reload=T)
	18	+messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),]
	19	+
	20	+messaged_codings$retailates = messaged_codings$retaliates > 0
	21	+messaged_codings$contact = !is.na(messaged_codings$contacts_huggler) & (messaged_codings$contacts_huggler > 0 \| messaged_codings$retaliates > 0)
	22	+messaged_codings$quality_work = messaged_codings$after_rating >= 3.0
	23	+messaged_codings$stay = !is.na(messaged_codings$after_rating)
	24	+messaged_codings$improves = messaged_codings$after_rating > messaged_codings$before_rating
	25	+messaged_codings$anon = messaged_codings$is_anon > 0
	26	+messaged_codings$talk_edits_before_msg = with(
	27	+ messaged_codings,
	28	+ user_talk_edits_after_msg + article_talk_edits_before_msg
	29	+)
	30	+messaged_codings$ntalk_edits_before_msg = with(
	31	+ messaged_codings,
	32	+ edits_before_msg - talk_edits_before_msg
	33	+)
	34	+messaged_codings$good_contact = mapply(
	35	+ function(contact, retaliates){
	36	+ if(!is.na(contact) & contact){
	37	+ retaliates <= 0
	38	+ }else{
	39	+ NA
	40	+ }
	41	+ },
	42	+ messaged_codings$contact,
	43	+ messaged_codings$retaliates
	44	+)
	45	+messaged_codings$good_outcome = with(
	46	+ messaged_codings,
	47	+ (
	48	+ before_rating <= 2 &
	49	+ (
	50	+ is.na(after_rating) \|
	51	+ after_rating > 2
	52	+ )
	53	+ ) \|
	54	+ (
	55	+ !is.na(good_contact) &
	56	+ good_contact
	57	+ ) \|
	58	+ (
	59	+ !is.na(quality_work) &
	60	+ quality_work
	61	+ )
	62	+)
	63	+
	64	+##
	65	+# Groups
	66	+#
	67	+# - < 2 at least one of us thought "no hope"
	68	+# - >= 2 & <= 3 possibles
	69	+# - > 3 at least one of us thought "golden"
	70	+#
	71	+# For each group:
	72	+# - contact
	73	+# - contact huggler + retaliate
	74	+# - talk? (wait for staeiou)
	75	+# - continue editing
	76	+# - did they actually
	77	+# - quality
	78	+# - improve
	79	+# - was it good
	80	+# - degrade
	81	+#
	82	+#
	83	+# Predictors:
	84	+# - number of edilts before message
	85	+# - number deleted
	86	+# - makes edits to talk (before/after)
	87	+
	88	+messaged_codings$group = as.factor(sapply(
	89	+ messaged_codings$before_rating,
	90	+ function(rating){
	91	+ if(is.na(rating)){
	92	+ NA
	93	+ }else if(rating < 2){
	94	+ "unlikely"
	95	+ }else if(rating <= 3){
	96	+ "possible"
	97	+ }else{
	98	+ "golden"
	99	+ }
	100	+ }
	101	+))
	102	+
	103	+formatNum = function(num){
	104	+ if(!is.numeric(num) \| is.nan(num)){
	105	+ " --- "
	106	+ }
	107	+ else if(num >= 0){
	108	+ paste(" ", format(round(num, 3), nsmall=3), sep="")
	109	+ }else{
	110	+ format(round(num, 3), nsmall=3)
	111	+ }
	112	+}
	113	+
	114	+for(group in c("unlikely", "possible", "golden")){
	115	+ group_codings = messaged_codings[messaged_codings$group == group,]
	116	+
	117	+
	118	+ cat("Result's for ", length(group_codings$group), " '", group, "' editors:\n", sep='')
	119	+ cat("============================================================\n")
	120	+
	121	+ control = group_codings[!group_codings$personal & !group_codings$teaching,]
	122	+ personal = group_codings[group_codings$personal & !group_codings$teaching,]
	123	+ teaching = group_codings[group_codings$teaching & !group_codings$personal,]
	124	+ both = group_codings[group_codings$teaching & group_codings$personal,]
	125	+
	126	+ experiments = list(
	127	+ list(name="Personal ", data=personal),
	128	+ list(name="Teaching ", data=teaching),
	129	+ list(name="Personal & Teaching", data=teaching)
	130	+ )
	131	+
	132	+ outcomes = list(
	133	+ list(name="Good outcome", field="good_outcome"),
	134	+ list(name="Improves", field="improves"),
	135	+ list(name="Contact", field="contact"),
	136	+ list(name="Stays", field="stay"),
	137	+ list(name="Good contact", field="good_contact")
	138	+ )
	139	+
	140	+ for(outcome in outcomes){
	141	+ cat(outcome$name, ": \n", sep="")
	142	+
	143	+ controlLen = length(na.omit(control[[outcome$field]]))
	144	+ controlSuccess = sum(control[[outcome$field]], na.rm=T)
	145	+ cat(
	146	+ "\tControl ",
	147	+ ": prop=", formatNum(controlSuccess/controlLen),
	148	+ ", n=", controlLen, "\n",
	149	+ sep=""
	150	+ )
	151	+ for(experiment in experiments){
	152	+ expSuccess = sum(experiment$data[[outcome$field]], na.rm=T)
	153	+ expLen = length(na.omit(experiment$data[[outcome$field]]))
	154	+ if(controlLen > 0 & expLen > 0){
	155	+ t = prop.test(
	156	+ c(
	157	+ expSuccess,
	158	+ controlSuccess
	159	+ ),
	160	+ c(
	161	+ expLen,
	162	+ controlLen
	163	+ )
	164	+ )
	165	+ }else{
	166	+ t = list(
	167	+ p.value=NA,
	168	+ conf.int=c(NA, NA)
	169	+ )
	170	+ }
	171	+
	172	+ propDiff = mean(experiment$data[[outcome$field]], na.rm=T)-mean(control[[outcome$field]], na.rm=T)
	173	+ cat(
	174	+ "\t", experiment$name,
	175	+ ": prop=", formatNum(expSuccess/expLen),
	176	+ ", diff=", formatNum(propDiff),
	177	+ ", p-value=", formatNum(t$p.value),
	178	+ ", conf.int=(", formatNum(t$conf.int[1]), ", ", formatNum(t$conf.int[2]), ")",
	179	+ ", n=", expLen, "\n",
	180	+ sep=""
	181	+ )
	182	+ }
	183	+ cat("\n")
	184	+ }
	185	+
	186	+
	187	+ cat("\n\n\n")
	188	+}
	189	+

Status & tagging log

14:56, 16 December 2011 😂 (talk | contribs) changed the status of r106108 [removed: new added: deferred]