Index: trunk/tools/wsor/wikimedia/setup.py |
— | — | @@ -2,7 +2,7 @@ |
3 | 3 | from setuptools import setup, find_packages |
4 | 4 | |
5 | 5 | setup( |
6 | | - name='util', |
| 6 | + name='wmf', |
7 | 7 | version='1.0', |
8 | 8 | description="WMF utilities", |
9 | 9 | long_description=""" |
Index: trunk/tools/wsor/wikimedia/wmf/util.py |
— | — | @@ -234,3 +234,5 @@ |
235 | 235 | return True; |
236 | 236 | |
237 | 237 | return False; |
| 238 | + |
| 239 | + |
Index: trunk/tools/wsor/overworked/R/loader/load_reverter_months.R |
— | — | @@ -0,0 +1,23 @@ |
| 2 | +source("util/env.R") |
| 3 | + |
| 4 | + |
| 5 | + |
| 6 | +load_reverter_months = function(verbose=T, reload=F){ |
| 7 | + filename = paste(DATA_DIR, "en.reverter_months.20110115.tsv", sep="/") |
| 8 | + if(is.null(REVERTER_MONTHS) | reload){ |
| 9 | + REVERTER_MONTHS <<- NULL |
| 10 | + } |
| 11 | + if(is.null(REVERTER_MONTHS)){ |
| 12 | + if(verbose){cat("Loading reverter months from", filename, "...")} |
| 13 | + REVERTER_MONTHS <<- read.table( |
| 14 | + filename, |
| 15 | + header=T, sep="\t", |
| 16 | + quote="'\"", comment.char="", |
| 17 | + na.strings="\\N", |
| 18 | + ) |
| 19 | + if(verbose){cat("DONE!\n")} |
| 20 | + } |
| 21 | + REVERTER_MONTHS |
| 22 | +} |
| 23 | + |
| 24 | + |
Index: trunk/tools/wsor/overworked/R/revert_distributions.R |
— | — | @@ -0,0 +1,199 @@ |
| 2 | +source("loader/load_patroller_days.R") |
| 3 | + |
| 4 | +patroller_days = load_patroller_days() |
| 5 | +patroller_days = patroller_days[!grepl("bot( |$)", patroller_days$username, ignore.case=T),] |
| 6 | +patroller_days = patroller_days[!grepl("DASHBot", patroller_days$username, ignore.case=T),] |
| 7 | + |
| 8 | +library(lattice) |
| 9 | +library(doBy) |
| 10 | + |
| 11 | + |
| 12 | +patroller_years = with( |
| 13 | + summaryBy( |
| 14 | + count ~ year + user_id + username, |
| 15 | + data=patroller_days, |
| 16 | + FUN=sum |
| 17 | + ), |
| 18 | + data.frame( |
| 19 | + year = year, |
| 20 | + user_id = user_id, |
| 21 | + username = username, |
| 22 | + count = count.sum |
| 23 | + ) |
| 24 | +) |
| 25 | + |
| 26 | +patroller_years = patroller_years[order(patroller_years$count),] |
| 27 | +patroller_years$count_bucket = 2^round(log(patroller_years$count, base=2)) |
| 28 | + |
| 29 | +patroller_years.count_dist = with( |
| 30 | + summaryBy( |
| 31 | + user_id ~ year + count, |
| 32 | + data = patroller_years, |
| 33 | + FUN=length |
| 34 | + ), |
| 35 | + data.frame( |
| 36 | + year = year, |
| 37 | + count = count, |
| 38 | + freq = user_id.length |
| 39 | + ) |
| 40 | +) |
| 41 | + |
| 42 | +png('plots/dist.patroller_years_activity.png', height=768, width=1024) |
| 43 | +xyplot( |
| 44 | + freq ~ count | as.character(year), |
| 45 | + data = patroller_years.count_dist, |
| 46 | + panel = function(x, y, subscripts, group, ...){ |
| 47 | + panel.xyplot(x, y) |
| 48 | + panel.lines(x, y) |
| 49 | + }, |
| 50 | + main="Distribution of activity level among editors", |
| 51 | + ylab="Frequency", |
| 52 | + xlab="Activity level", |
| 53 | + #scales=list( |
| 54 | + # x=list( |
| 55 | + # log=2, |
| 56 | + # at=2^(1:max(patroller_years.count_dist$count)), |
| 57 | + # labels=2^(1:max(patroller_years.count_dist$count)) |
| 58 | + # ) |
| 59 | + #), |
| 60 | + layout=c(length(unique(patroller_years.count_dist$year)), 1) |
| 61 | +) |
| 62 | +dev.off() |
| 63 | + |
| 64 | + |
| 65 | + |
| 66 | +for(year in sort(unique(patroller_years$year))){ |
| 67 | + p_year = patroller_years[patroller_years$year==year,] |
| 68 | + p_year = p_year[order(p_year$count, decreasing=T),] |
| 69 | + png(paste('plots/bars.patroller_years_activity', year, 'png', sep="."), height=768, width=1024) |
| 70 | + print(barchart( |
| 71 | + reorder(substring(as.character(username),1,30), count) ~ count, |
| 72 | + data=p_year[1:50,], |
| 73 | + horizontal=T, |
| 74 | + xlim=c(0, 110000), |
| 75 | + xlab="Patrolled pages" |
| 76 | + )) |
| 77 | + dev.off() |
| 78 | + cat(year, "\n") |
| 79 | + print(summary(p_year$count)) |
| 80 | +} |
| 81 | + |
| 82 | + |
| 83 | +patroller_months = with( |
| 84 | + summaryBy( |
| 85 | + count ~ year + month + user_id + username, |
| 86 | + data=patroller_days, |
| 87 | + FUN=sum |
| 88 | + ), |
| 89 | + data.frame( |
| 90 | + year = year, |
| 91 | + month = month, |
| 92 | + user_id = user_id, |
| 93 | + username = username, |
| 94 | + count = count.sum |
| 95 | + ) |
| 96 | +) |
| 97 | + |
| 98 | +nNoNA = function(x){ |
| 99 | + length(subset(x, !is.na(x))) |
| 100 | +} |
| 101 | +sdNoNA = function(x){ |
| 102 | + sd(x, na.rm=T)/sqrt(nNoNA(x)) |
| 103 | +} |
| 104 | +meanNoNA = function(x){ |
| 105 | + mean(x, na.rm=T) |
| 106 | +} |
| 107 | + |
| 108 | +patrol_months.per_user = with( |
| 109 | + summaryBy( |
| 110 | + count ~ year + month, |
| 111 | + data=patroller_months, |
| 112 | + FUN=c(meanNoNA, sdNoNA, nNoNA) |
| 113 | + ), |
| 114 | + data.frame( |
| 115 | + year = year, |
| 116 | + month = month, |
| 117 | + year.month = year + month/100, |
| 118 | + count.mean = count.meanNoNA, |
| 119 | + count.sd = count.sdNoNA, |
| 120 | + count.n = count.nNoNA |
| 121 | + ) |
| 122 | +) |
| 123 | + |
| 124 | +model = lm( |
| 125 | + count.mean ~ as.numeric(factor(year.month)), |
| 126 | + data=patrol_months.per_user[patrol_months.per_user$year.month <= 2011.05,] |
| 127 | +) |
| 128 | +summary(model) |
| 129 | +monthLine = function(x){ |
| 130 | + model$coefficients[['(Intercept)']] + model$coefficients[['as.numeric(factor(year.month))']]*x |
| 131 | +} |
| 132 | + |
| 133 | +png("plots/patrol_months.per_user.png", height=768, width=1024) |
| 134 | +print(xyplot( |
| 135 | + count.mean ~ as.factor(year.month), |
| 136 | + data = patrol_months.per_user[patrol_months.per_user$year.month <= 2011.05,], |
| 137 | + panel = function(x, y, subscripts, ...){ |
| 138 | + f = patrol_months.per_user[patrol_months.per_user$year.month <= 2011.05,][subscripts,] |
| 139 | + panel.xyplot(x, y, col="#000000", ...) |
| 140 | + se = f$count.sd/sqrt(f$count.n) |
| 141 | + panel.arrows(x, y+se, x, y-se, ends="both", angle=90, col="#000000", length=0.05, ...) |
| 142 | + panel.lines(x[order(x)], y[order(x)], lwd=2, ...) |
| 143 | + panel.lines(x[order(x)], monthLine(as.numeric(x[order(x)])), lwd=2, col="#000000") |
| 144 | + }, |
| 145 | + #main="Average Patroller workload by month", |
| 146 | + ylab="Mean patrolled pages per user", |
| 147 | + xlab="Month", |
| 148 | + scales=list(x=list(rot=45)) |
| 149 | +)) |
| 150 | +dev.off() |
| 151 | + |
| 152 | +patrol_years.per_user = with( |
| 153 | + summaryBy( |
| 154 | + count ~ year, |
| 155 | + data=patroller_years, |
| 156 | + FUN=c(meanNoNA, sdNoNA, nNoNA) |
| 157 | + ), |
| 158 | + data.frame( |
| 159 | + year = year, |
| 160 | + count.mean = count.meanNoNA, |
| 161 | + count.sd = count.sdNoNA, |
| 162 | + count.n = count.nNoNA |
| 163 | + ) |
| 164 | +) |
| 165 | + |
| 166 | +model = lm( |
| 167 | + count.mean ~ year, |
| 168 | + data=patrol_years.per_user[patrol_years.per_user$year <= 2010,] |
| 169 | +) |
| 170 | +summary(model) |
| 171 | + |
| 172 | +model = lm( |
| 173 | + count.mean ~ log(year-2006, base=2), |
| 174 | + data=patrol_years.per_user[patrol_years.per_user$year <= 2010,] |
| 175 | +) |
| 176 | +summary(model) |
| 177 | +yearCurve=function(x){ |
| 178 | + model$coefficients[['(Intercept)']] + log(x-2006, base=2)*model$coefficients[['log(year - 2006, base = 2)']] |
| 179 | +} |
| 180 | +png("plots/patrol_years.per_user.png", height=768, width=1024) |
| 181 | +print(xyplot( |
| 182 | + count.mean ~ year-2006, |
| 183 | + data = patrol_years.per_user[patrol_years.per_user$year <= 2010,], |
| 184 | + panel = function(x, y, subscripts, ...){ |
| 185 | + f = patrol_years.per_user[patrol_years.per_user$year.month <= 2011.05,][subscripts,] |
| 186 | + panel.xyplot(x, y, col="#000000", ...) |
| 187 | + se = f$count.sd/sqrt(f$count.n) |
| 188 | + panel.arrows(x, y+se, x, y-se, ends="both", angle=90, col="#000000", length=0.05, ...) |
| 189 | + #panel.lines(x[order(x)], y[order(x)], lwd=2, ...) |
| 190 | + #panel.curve(myCurve, 2006, 2011, col="#000000") |
| 191 | + panel.lines(seq(0, 5, .1), yearCurve(seq(2006, 2011, .1)), lwd=2, col="#000000") |
| 192 | + }, |
| 193 | + #main="Average Patroller workload by year", |
| 194 | + ylab="Mean patrolled pages per user", |
| 195 | + xlab="Year (log scaled)", |
| 196 | + pch=20, |
| 197 | + scales=list(x=list(at=1:5, labels=2007:2010)) |
| 198 | +)) |
| 199 | +dev.off() |
| 200 | + |
Index: trunk/tools/wsor/overworked/R/Rplots.pdf |
— | — | @@ -2,8 +2,8 @@ |
3 | 3 | %���ρ�\r |
4 | 4 | 1 0 obj |
5 | 5 | << |
6 | | -/CreationDate (D:20110610225803) |
7 | | -/ModDate (D:20110610225803) |
| 6 | +/CreationDate (D:20110627164459) |
| 7 | +/ModDate (D:20110627164459) |
8 | 8 | /Title (R Graphics Output) |
9 | 9 | /Producer (R 2.13.0) |
10 | 10 | /Creator (R) |
— | — | @@ -29,292 +29,85 @@ |
30 | 30 | >> |
31 | 31 | stream
|
32 | 32 | 1 J 1 j q |
33 | | -Q q |
34 | | -Q q |
| 33 | +Q q 59.04 73.44 414.72 371.52 re W n |
| 34 | +/sRGB CS 0.000 0.000 0.000 SCN |
| 35 | +0.75 w |
| 36 | +[] 0 d |
| 37 | +1 J |
| 38 | +1 j |
| 39 | +10.00 M |
35 | 40 | BT |
36 | | -/sRGB cs 0.000 0.000 0.000 scn |
37 | | -/F2 1 Tf 12.00 0.00 -0.00 12.00 279.25 12.00 Tm [(P) 40 (atrolled pages)] TJ |
| 41 | +/F1 1 Tf 1 Tr 7.48 0 0 7.48 263.44 256.60 Tm (l) Tj 0 Tr |
38 | 42 | ET |
39 | 43 | Q q |
40 | | -Q q |
41 | | -Q q |
42 | | -Q q 160.73 50.80 319.93 418.19 re W n |
43 | | -Q q |
44 | | -Q q |
45 | 44 | /sRGB CS 0.000 0.000 0.000 SCN |
46 | 45 | 0.75 w |
47 | 46 | [] 0 d |
48 | 47 | 1 J |
49 | 48 | 1 j |
50 | 49 | 10.00 M |
51 | | -172.58 468.99 m 172.58 474.66 l S |
52 | | -231.82 468.99 m 231.82 474.66 l S |
53 | | -291.07 468.99 m 291.07 474.66 l S |
54 | | -350.32 468.99 m 350.32 474.66 l S |
55 | | -409.57 468.99 m 409.57 474.66 l S |
56 | | -468.81 468.99 m 468.81 474.66 l S |
57 | | -Q q |
58 | | -Q q |
| 50 | +74.40 73.44 m 458.40 73.44 l S |
| 51 | +74.40 73.44 m 74.40 66.24 l S |
| 52 | +170.40 73.44 m 170.40 66.24 l S |
| 53 | +266.40 73.44 m 266.40 66.24 l S |
| 54 | +362.40 73.44 m 362.40 66.24 l S |
| 55 | +458.40 73.44 m 458.40 66.24 l S |
59 | 56 | BT |
60 | 57 | /sRGB cs 0.000 0.000 0.000 scn |
61 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 109.08 52.21 Tm [(Gonz) 15 (onoir)] TJ |
| 58 | +/F2 1 Tf 12.00 0.00 -0.00 12.00 66.06 47.52 Tm (0.6) Tj |
62 | 59 | ET |
63 | 60 | BT |
64 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 94.00 60.54 Tm [(T) 120 (eapotgeorge)] TJ |
| 61 | +/F2 1 Tf 12.00 0.00 -0.00 12.00 162.06 47.52 Tm (0.8) Tj |
65 | 62 | ET |
66 | 63 | BT |
67 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 117.27 68.87 Tm (Pichpich) Tj |
| 64 | +/F2 1 Tf 12.00 0.00 -0.00 12.00 258.06 47.52 Tm (1.0) Tj |
68 | 65 | ET |
69 | 66 | BT |
70 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 121.71 77.20 Tm (Pdcook) Tj |
| 67 | +/F2 1 Tf 12.00 0.00 -0.00 12.00 354.06 47.52 Tm (1.2) Tj |
71 | 68 | ET |
72 | 69 | BT |
73 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 109.34 85.53 Tm [(Acroter) -15 (ion)] TJ |
| 70 | +/F2 1 Tf 12.00 0.00 -0.00 12.00 450.06 47.52 Tm (1.4) Tj |
74 | 71 | ET |
| 72 | +59.04 87.20 m 59.04 431.20 l S |
| 73 | +59.04 87.20 m 51.84 87.20 l S |
| 74 | +59.04 173.20 m 51.84 173.20 l S |
| 75 | +59.04 259.20 m 51.84 259.20 l S |
| 76 | +59.04 345.20 m 51.84 345.20 l S |
| 77 | +59.04 431.20 m 51.84 431.20 l S |
75 | 78 | BT |
76 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 65.89 93.86 Tm [(W) 30 (ereSpielChequers)] TJ |
| 79 | +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 83.86 Tm (6) Tj |
77 | 80 | ET |
78 | 81 | BT |
79 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 115.03 102.19 Tm (Bonadea) Tj |
| 82 | +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 169.86 Tm (8) Tj |
80 | 83 | ET |
81 | 84 | BT |
82 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 122.27 110.53 Tm (Melaen) Tj |
| 85 | +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 252.53 Tm (10) Tj |
83 | 86 | ET |
84 | 87 | BT |
85 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 113.37 118.86 Tm (PhGustaf) Tj |
| 88 | +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 338.53 Tm (12) Tj |
86 | 89 | ET |
87 | 90 | BT |
88 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 99.08 127.19 Tm [(W) 40 (a) 30 (yne Slam)] TJ |
| 91 | +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 424.53 Tm (14) Tj |
89 | 92 | ET |
90 | | -BT |
91 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 101.15 135.52 Tm (Prestonmag) Tj |
92 | | -ET |
93 | | -BT |
94 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 105.61 143.85 Tm (Catfish Jim) Tj |
95 | | -ET |
96 | | -BT |
97 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 104.59 152.18 Tm [(Cindam) 10 (use)] TJ |
98 | | -ET |
99 | | -BT |
100 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 130.05 160.51 Tm (Mono) Tj |
101 | | -ET |
102 | | -BT |
103 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 121.16 168.84 Tm (Scapler) Tj |
104 | | -ET |
105 | | -BT |
106 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 109.10 177.17 Tm [(Jimm) 15 (y Pitt)] TJ |
107 | | -ET |
108 | | -BT |
109 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 121.73 185.50 Tm (WWGB) Tj |
110 | | -ET |
111 | | -BT |
112 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 91.00 193.83 Tm [(Elektr) -15 (ik Shoos)] TJ |
113 | | -ET |
114 | | -BT |
115 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 50.67 202.16 Tm [(Phar) 10 (aoh of the Wizards)] TJ |
116 | | -ET |
117 | | -BT |
118 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 96.13 210.49 Tm (Seb az86556) Tj |
119 | | -ET |
120 | | -BT |
121 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 103.31 218.82 Tm [(F) 50 (alcon8765)] TJ |
122 | | -ET |
123 | | -BT |
124 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 132.83 227.15 Tm (Stifle) Tj |
125 | | -ET |
126 | | -BT |
127 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 106.16 235.48 Tm (Andyjsmith) Tj |
128 | | -ET |
129 | | -BT |
130 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 76.97 243.81 Tm [(Marcus Qw) 10 (er) -40 (tyus)] TJ |
131 | | -ET |
132 | | -BT |
133 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 112.28 252.14 Tm (Clubmarx) Tj |
134 | | -ET |
135 | | -BT |
136 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 121.16 260.47 Tm (TheTito) Tj |
137 | | -ET |
138 | | -BT |
139 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 129.64 268.80 Tm [(Xtz) 15 (ou)] TJ |
140 | | -ET |
141 | | -BT |
142 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 129.91 277.13 Tm [(Shir) -15 (ik)] TJ |
143 | | -ET |
144 | | -BT |
145 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 99.48 285.46 Tm (MuffledThud) Tj |
146 | | -ET |
147 | | -BT |
148 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 112.36 293.79 Tm [(T) 120 (r) 10 (a) 20 (v) 25 (elbird)] TJ |
149 | | -ET |
150 | | -BT |
151 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 38.84 302.13 Tm [(V) 80 (ejv) 25 (an\\xc4\\x8dick\\xc3\\xbd)] TJ |
152 | | -ET |
153 | | -BT |
154 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 121.16 310.46 Tm (VQuakr) Tj |
155 | | -ET |
156 | | -BT |
157 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 98.63 318.79 Tm [(Air) -30 (planeman)] TJ |
158 | | -ET |
159 | | -BT |
160 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 63.91 327.12 Tm (Daemonic Kangaroo) Tj |
161 | | -ET |
162 | | -BT |
163 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 106.56 335.45 Tm [(Der) -15 (ild4921)] TJ |
164 | | -ET |
165 | | -BT |
166 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 115.33 343.78 Tm [(K) 30 (udpung)] TJ |
167 | | -ET |
168 | | -BT |
169 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 106.72 352.11 Tm (Realkyhick) Tj |
170 | | -ET |
171 | | -BT |
172 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 115.04 360.44 Tm (Gilo1969) Tj |
173 | | -ET |
174 | | -BT |
175 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 17.67 368.77 Tm [(The Blade of the Nor) -40 (ther) -25 (n Ligh)] TJ |
176 | | -ET |
177 | | -BT |
178 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 112.21 377.10 Tm [(RadioF) 50 (an)] TJ |
179 | | -ET |
180 | | -BT |
181 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 110.60 385.43 Tm (Timneu22) Tj |
182 | | -ET |
183 | | -BT |
184 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 119.08 393.76 Tm [(Tton) 15 (yb1)] TJ |
185 | | -ET |
186 | | -BT |
187 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 97.97 402.09 Tm [(Shado) 15 (wjams)] TJ |
188 | | -ET |
189 | | -BT |
190 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 53.36 410.42 Tm (Ser Amantio di Nicolao) Tj |
191 | | -ET |
192 | | -BT |
193 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 105.06 418.75 Tm (Malcolmxl5) Tj |
194 | | -ET |
195 | | -BT |
196 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 64.57 427.08 Tm [(Dr) 10 (agonflySixtyse) 30 (v) 25 (en)] TJ |
197 | | -ET |
198 | | -BT |
199 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 111.70 435.41 Tm (Sopher99) Tj |
200 | | -ET |
201 | | -BT |
202 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 115.60 443.74 Tm (Eeekster) Tj |
203 | | -ET |
204 | | -BT |
205 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 113.93 452.07 Tm (Ironholds) Tj |
206 | | -ET |
207 | | -BT |
208 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 104.48 460.40 Tm (Blanchardb) Tj |
209 | | -ET |
| 93 | +59.04 73.44 m |
| 94 | +473.76 73.44 l |
| 95 | +473.76 444.96 l |
| 96 | +59.04 444.96 l |
| 97 | +59.04 73.44 l |
| 98 | +S |
210 | 99 | Q q |
211 | | -Q q |
212 | | -/sRGB CS 0.000 0.000 0.000 SCN |
213 | | -0.75 w |
214 | | -[] 0 d |
215 | | -1 J |
216 | | -1 j |
217 | | -10.00 M |
218 | | -172.58 50.80 m 172.58 45.13 l S |
219 | | -231.82 50.80 m 231.82 45.13 l S |
220 | | -291.07 50.80 m 291.07 45.13 l S |
221 | | -350.32 50.80 m 350.32 45.13 l S |
222 | | -409.57 50.80 m 409.57 45.13 l S |
223 | | -468.81 50.80 m 468.81 45.13 l S |
224 | 100 | BT |
225 | 101 | /sRGB cs 0.000 0.000 0.000 scn |
226 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 169.80 32.29 Tm (0) Tj |
| 102 | +/F2 1 Tf 12.00 0.00 -0.00 12.00 251.90 18.72 Tm [(Inde) 30 (x)] TJ |
227 | 103 | ET |
228 | 104 | BT |
229 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 217.92 32.29 Tm (50000) Tj |
| 105 | +/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 244.19 Tm (10:10) Tj |
230 | 106 | ET |
231 | | -BT |
232 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 274.39 32.29 Tm (100000) Tj |
233 | | -ET |
234 | | -BT |
235 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 333.64 32.29 Tm (150000) Tj |
236 | | -ET |
237 | | -BT |
238 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 392.89 32.29 Tm (200000) Tj |
239 | | -ET |
240 | | -BT |
241 | | -/F2 1 Tf 10.00 0.00 -0.00 10.00 452.13 32.29 Tm (250000) Tj |
242 | | -ET |
243 | | -Q q |
244 | | -Q q 160.73 50.80 319.93 418.19 re W n |
245 | | -/sRGB cs 0.000 1.000 1.000 scn |
246 | | -/sRGB CS 0.000 0.000 0.000 SCN |
247 | | -0.75 w |
248 | | -[] 0 d |
249 | | -1 J |
250 | | -1 j |
251 | | -10.00 M |
252 | | -160.73 452.89 13.26 5.55 re B |
253 | | -160.73 461.22 13.26 5.55 re B |
254 | | -160.73 444.56 13.26 5.55 re B |
255 | | -160.73 436.23 13.28 5.55 re B |
256 | | -160.73 427.90 13.28 5.55 re B |
257 | | -160.73 419.56 13.34 5.55 re B |
258 | | -160.73 411.23 13.36 5.55 re B |
259 | | -160.73 402.90 13.38 5.55 re B |
260 | | -160.73 394.57 13.38 5.55 re B |
261 | | -160.73 386.24 13.38 5.55 re B |
262 | | -160.73 377.91 13.39 5.55 re B |
263 | | -160.73 369.58 13.39 5.55 re B |
264 | | -160.73 361.25 13.41 5.55 re B |
265 | | -160.73 352.92 13.48 5.55 re B |
266 | | -160.73 344.59 13.49 5.55 re B |
267 | | -160.73 336.26 13.52 5.55 re B |
268 | | -160.73 327.93 13.56 5.55 re B |
269 | | -160.73 319.60 13.62 5.55 re B |
270 | | -160.73 311.27 13.71 5.55 re B |
271 | | -160.73 302.94 13.77 5.55 re B |
272 | | -160.73 294.61 13.78 5.55 re B |
273 | | -160.73 286.28 13.85 5.55 re B |
274 | | -160.73 277.95 13.99 5.55 re B |
275 | | -160.73 269.62 14.01 5.55 re B |
276 | | -160.73 261.29 14.06 5.55 re B |
277 | | -160.73 252.96 14.14 5.55 re B |
278 | | -160.73 244.63 14.14 5.55 re B |
279 | | -160.73 236.30 14.23 5.55 re B |
280 | | -160.73 227.96 14.29 5.55 re B |
281 | | -160.73 219.63 14.35 5.55 re B |
282 | | -160.73 211.30 14.39 5.55 re B |
283 | | -160.73 202.97 14.49 5.55 re B |
284 | | -160.73 194.64 14.62 5.55 re B |
285 | | -160.73 186.31 14.66 5.55 re B |
286 | | -160.73 177.98 14.69 5.55 re B |
287 | | -160.73 169.65 14.94 5.55 re B |
288 | | -160.73 161.32 15.38 5.55 re B |
289 | | -160.73 152.99 16.45 5.55 re B |
290 | | -160.73 144.66 17.24 5.55 re B |
291 | | -160.73 136.33 17.31 5.55 re B |
292 | | -160.73 128.00 17.66 5.55 re B |
293 | | -160.73 119.67 18.54 5.55 re B |
294 | | -160.73 111.34 18.57 5.55 re B |
295 | | -160.73 103.01 20.23 5.55 re B |
296 | | -160.73 94.68 21.28 5.55 re B |
297 | | -160.73 86.35 22.11 5.55 re B |
298 | | -160.73 78.02 22.20 5.55 re B |
299 | | -160.73 69.69 22.37 5.55 re B |
300 | | -160.73 61.36 23.52 5.55 re B |
301 | | -160.73 53.03 23.55 5.55 re B |
302 | | -Q q |
303 | | -Q q |
304 | | -/sRGB CS 0.000 0.000 0.000 SCN |
305 | | -0.75 w |
306 | | -[] 0 d |
307 | | -1 J |
308 | | -1 j |
309 | | -10.00 M |
310 | | -160.73 50.80 319.93 418.19 re S |
311 | | -Q q |
312 | | -Q q |
313 | | -Q q |
314 | 107 | Q |
315 | 108 | endstream |
316 | 109 | endobj |
317 | 110 | 9 0 obj |
318 | | -6726 |
| 111 | +1517 |
319 | 112 | endobj |
320 | 113 | 3 0 obj |
321 | 114 | << |
— | — | @@ -329,7 +122,7 @@ |
330 | 123 | 4 0 obj |
331 | 124 | << |
332 | 125 | /ProcSet [/PDF /Text] |
333 | | -/Font <</F2 11 0 R >> |
| 126 | +/Font << /F1 11 0 R /F2 12 0 R >> |
334 | 127 | /ExtGState << >> |
335 | 128 | /ColorSpace << /sRGB 5 0 R >> |
336 | 129 | >> |
— | — | @@ -548,33 +341,42 @@ |
549 | 342 | /dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] |
550 | 343 | >> |
551 | 344 | endobj |
552 | | -11 0 obj << |
| 345 | +11 0 obj |
| 346 | +<< |
553 | 347 | /Type /Font |
554 | 348 | /Subtype /Type1 |
| 349 | +/Name /F1 |
| 350 | +/BaseFont /ZapfDingbats |
| 351 | +>> |
| 352 | +endobj |
| 353 | +12 0 obj << |
| 354 | +/Type /Font |
| 355 | +/Subtype /Type1 |
555 | 356 | /Name /F2 |
556 | 357 | /BaseFont /Helvetica |
557 | 358 | /Encoding 10 0 R |
558 | 359 | >> endobj |
559 | 360 | xref |
560 | | -0 12 |
| 361 | +0 13 |
561 | 362 | 0000000000 65535 f |
562 | 363 | 0000000021 00000 n |
563 | 364 | 0000000164 00000 n |
564 | | -0000007092 00000 n |
565 | | -0000007175 00000 n |
566 | | -0000007287 00000 n |
567 | | -0000007320 00000 n |
| 365 | +0000001883 00000 n |
| 366 | +0000001966 00000 n |
| 367 | +0000002090 00000 n |
| 368 | +0000002123 00000 n |
568 | 369 | 0000000213 00000 n |
569 | 370 | 0000000293 00000 n |
570 | | -0000007072 00000 n |
571 | | -0000016856 00000 n |
572 | | -0000017114 00000 n |
| 371 | +0000001863 00000 n |
| 372 | +0000011659 00000 n |
| 373 | +0000011917 00000 n |
| 374 | +0000012001 00000 n |
573 | 375 | trailer |
574 | 376 | << |
575 | | -/Size 12 |
| 377 | +/Size 13 |
576 | 378 | /Info 1 0 R |
577 | 379 | /Root 2 0 R |
578 | 380 | >> |
579 | 381 | startxref |
580 | | -17212 |
| 382 | +12099 |
581 | 383 | %%EOF |
Index: trunk/tools/wsor/overworked/convert_reverts.py |
— | — | @@ -0,0 +1,48 @@ |
| 2 | +import argparse, sys, os |
| 3 | + |
| 4 | + |
| 5 | + |
| 6 | +def main(args): |
| 7 | + files = { |
| 8 | + 'revert': args.revert, |
| 9 | + 'reverted': args.reverted |
| 10 | + } |
| 11 | + |
| 12 | + for line in args.input: |
| 13 | + ty = eval(line.strip().split("\t")[0]) |
| 14 | + files[ty].write(line.split("\t", 1)[1]) |
| 15 | + |
| 16 | + if ty == "revert": |
| 17 | + sys.stderr.write("<") |
| 18 | + elif ty == "reverted": |
| 19 | + sys.stderr.write("|") |
| 20 | + |
| 21 | + sys.stderr.write("\n") |
| 22 | + |
| 23 | + |
| 24 | +if __name__ == "__main__": |
| 25 | + parser = argparse.ArgumentParser( |
| 26 | + description='Cleans revert data from a dump map process.' |
| 27 | + ) |
| 28 | + parser.add_argument( |
| 29 | + '-i', '--input', |
| 30 | + metavar="<path>", |
| 31 | + type=lambda fn:open(fn, "r"), |
| 32 | + help='the path of the file to filter (defaults to stdin)', |
| 33 | + default=sys.stdin |
| 34 | + ) |
| 35 | + parser.add_argument( |
| 36 | + '--reverted', |
| 37 | + metavar="<path>", |
| 38 | + type=lambda fn:open(os.path.expanduser(fn), "w"), |
| 39 | + help='the path to a file to produce representing the reverted revisions' |
| 40 | + ) |
| 41 | + parser.add_argument( |
| 42 | + '--revert', |
| 43 | + metavar="<path>", |
| 44 | + type=lambda fn:open(os.path.expanduser(fn), "w"), |
| 45 | + help='the path to a file to produce representing the reverting revisions' |
| 46 | + ) |
| 47 | + args = parser.parse_args() |
| 48 | + main(args) |
| 49 | + |
Index: trunk/tools/wsor/overworked/testing.sql |
— | — | @@ -1,52 +1,52 @@ |
2 | 2 | +------------+-------------+ |
3 | | -| log_action | log_type | |
| 3 | +| log_action log_type |
4 | 4 | +------------+-------------+ |
5 | | -| delete | delete | |
6 | | -| upload | upload | |
7 | | -| protect | protect | |
8 | | -| block | block | |
9 | | -| unblock | block | |
10 | | -| restore | delete | |
11 | | -| unprotect | protect | |
12 | | -| rights | rights | |
13 | | -| move | move | |
14 | | -| move_redir | move | |
15 | | -| | | |
16 | | -| renameuser | renameuser | |
17 | | -| newusers | newusers | |
18 | | -| create | newusers | |
19 | | -| create2 | newusers | |
20 | | -| modify | protect | |
21 | | -| overwrite | upload | |
22 | | -| upload | import | |
23 | | -| patrol | patrol | |
24 | | -| delete | suppress | |
25 | | -| autocreate | newusers | |
26 | | -| delete | globalauth | |
27 | | -| whitelist | gblblock | |
28 | | -| dwhitelist | gblblock | |
29 | | -| move_prot | protect | |
30 | | -| reblock | block | |
31 | | -| event | suppress | |
32 | | -| event | delete | |
33 | | -| revision | delete | |
34 | | -| revision | suppress | |
35 | | -| reblock | suppress | |
36 | | -| modify | abusefilter | |
37 | | -| block | suppress | |
38 | | -| usergroups | gblrights | |
39 | | -| interwiki | import | |
40 | | -| groupprms2 | gblrights | |
41 | | -| config | stable | |
42 | | -| approve-ia | review | |
43 | | -| approve-a | review | |
44 | | -| unapprove | review | |
45 | | -| approve | review | |
46 | | -| reset | stable | |
47 | | -| modify | stable | |
48 | | -| approve-i | review | |
49 | | -| hide-afl | suppress | |
50 | | -| unhide-afl | suppress | |
| 5 | +| delete delete |
| 6 | +| upload upload |
| 7 | +| protect protect |
| 8 | +| block block |
| 9 | +| unblock block |
| 10 | +| restore delete |
| 11 | +| unprotect protect |
| 12 | +| rights rights |
| 13 | +| move move |
| 14 | +| move_redir move |
| 15 | +| |
| 16 | +| renameuser renameuser |
| 17 | +| newusers newusers |
| 18 | +| create newusers |
| 19 | +| create2 newusers |
| 20 | +| modify protect |
| 21 | +| overwrite upload |
| 22 | +| upload import |
| 23 | +| patrol patrol |
| 24 | +| delete suppress |
| 25 | +| autocreate newusers |
| 26 | +| delete globalauth |
| 27 | +| whitelist gblblock |
| 28 | +| dwhitelist gblblock |
| 29 | +| move_prot protect |
| 30 | +| reblock block |
| 31 | +| event suppress |
| 32 | +| event delete |
| 33 | +| revision delete |
| 34 | +| revision suppress |
| 35 | +| reblock suppress |
| 36 | +| modify abusefilter |
| 37 | +| block suppress |
| 38 | +| usergroups gblrights |
| 39 | +| interwiki import |
| 40 | +| groupprms2 gblrights |
| 41 | +| config stable |
| 42 | +| approve-ia review |
| 43 | +| approve-a review |
| 44 | +| unapprove review |
| 45 | +| approve review |
| 46 | +| reset stable |
| 47 | +| modify stable |
| 48 | +| approve-i review |
| 49 | +| hide-afl suppress |
| 50 | +| unhide-afl suppress |
51 | 51 | +------------+-------------+ |
52 | 52 | 46 rows in set (2 min 28.77 sec) |
53 | 53 | |
— | — | @@ -90,6 +90,103 @@ |
91 | 91 | ) |
92 | 92 | |
93 | 93 | |
| 94 | +CREATE TABLE halfak.revert_pre_20110115( |
| 95 | + revision_id INT, |
| 96 | + rvtd_to_id INT, |
| 97 | + revs_reverted INT |
| 98 | +); |
| 99 | + |
| 100 | +CREATE TABLE halfak.reverted_pre_20110115( |
| 101 | + revision_id INT, |
| 102 | + rvtg_id INT, |
| 103 | + rvtd_to_id INT, |
| 104 | + revs_reverted INT |
| 105 | +); |
| 106 | + |
| 107 | + |
| 108 | + |
| 109 | +CREATE TABLE halfak.reverted_20110115( |
| 110 | + revision_id INT, |
| 111 | + username VARBINARY(255), |
| 112 | + user_id INT, |
| 113 | + comment VARBINARY(255), |
| 114 | + rvtg_id INT, |
| 115 | + rvtg_username VARBINARY(255), |
| 116 | + rvtg_user_id INT, |
| 117 | + rvtg_comment VARBINARY(255), |
| 118 | + rvtto_id INT, |
| 119 | + rvtto_username VARBINARY(255), |
| 120 | + rvtto_user_id INT, |
| 121 | + rvtto_comment VARBINARY(255), |
| 122 | + is_vandalism BOOL, |
| 123 | + revs_reverted INT |
| 124 | +); |
| 125 | +INSERT INTO halfak.reverted_20110115 |
| 126 | +SELECT |
| 127 | + reverted.rev_id, |
| 128 | + reverted.rev_user_text, |
| 129 | + reverted.rev_user, |
| 130 | + reverted.rev_comment, |
| 131 | + reverting.rev_id, |
| 132 | + reverting.rev_user_text, |
| 133 | + reverting.rev_user, |
| 134 | + reverting.rev_comment, |
| 135 | + reverted_to.rev_id, |
| 136 | + reverted_to.rev_user_text, |
| 137 | + reverted_to.rev_user, |
| 138 | + reverted_to.rev_comment, |
| 139 | + CONVERT(reverting.rev_comment USING utf8) REGEXP "(^revert\\ to.+using)|(^reverted\\ edits\\ by.+using)|(^reverted\\ edits\\ by.+to\\ last\\ version\\ by)|(^bot\\ -\\ rv.+to\\ last\\ version\\ by)|(-assisted\\ reversion)|(^(revert(ed)?|rv).+to\\ last)|(^undo\\ revision.+by)" OR |
| 140 | + CONVERT(reverting.rev_comment USING utf8) REGEXP "(\\brvv)|(\\brv[/ ]v)|(vandal(!proof|bot))|(\\b(rv|rev(ert)?|rm)\\b.*(blank|spam|nonsense|porn|mass\\sdelet|vand))", |
| 141 | + r.revs_reverted |
| 142 | +FROM |
| 143 | + halfak.reverted_pre_20110115 r |
| 144 | +INNER JOIN revision reverted |
| 145 | + ON r.revision_id = reverted.rev_id |
| 146 | +INNER JOIN revision reverting |
| 147 | + ON r.revision_id = reverting.rev_id |
| 148 | +INNER JOIN revision reverted_to |
| 149 | + ON r.revision_id = reverted_to.rev_id; |
| 150 | +CREATE INDEX rev_id_idx ON halfak.reverted_20110115 (revision_id); |
| 151 | +CREATE INDEX rvtg_id_idx ON halfak.reverted_20110115 (rvtg_id); |
| 152 | + |
| 153 | + |
| 154 | +CREATE TABLE halfak.revert_20110115( |
| 155 | + revision_id INT, |
| 156 | + rvtto_id INT, |
| 157 | + is_vandalism BOOL, |
| 158 | + revs_reverted INT |
| 159 | +); |
| 160 | +INSERT INTO halfak.revert_20110115 |
| 161 | +SELECT |
| 162 | + rvt.revision_id, |
| 163 | + rvt.rvtd_to_id, |
| 164 | + bit_or(rvtd.is_vandalism), |
| 165 | + rvt.revs_reverted |
| 166 | +FROM halfak.revert_pre_20110115 rvt |
| 167 | +INNER JOIN halfak.reverted_20110115 rvtd |
| 168 | + ON rvt.revision_id = rvtd.rvtg_id |
| 169 | +GROUP BY rvt.revision_id, rvt.rvtd_to_id, rvt.revs_reverted; |
| 170 | +CREATE INDEX rev_id_idx ON halfak.revert_20110115 (revision_id); |
| 171 | +CREATE INDEX is_vandalism ON halfak.revert_20110115 (is_vandalism); |
| 172 | + |
| 173 | + |
| 174 | + |
| 175 | + |
94 | 176 | --SELECT * FROM revision WHERE rev_comment LIKE "Requesting speedy deletion%" |
95 | 177 | |
96 | 178 | |
| 179 | +SELECT |
| 180 | + SUBSTRING(rev_timestamp, 1,4) as year, |
| 181 | + rev_user as user_id, |
| 182 | + u.user_name as username, |
| 183 | + COUNT(*) as revisions, |
| 184 | + SUM(rvt.revision_id IS NOT NULL) as reverts, |
| 185 | + SUM(rvt.revision_id IS NOT NULL AND rvt.is_vandalism) as vandal_reverts |
| 186 | +FROM revision r |
| 187 | +LEFT JOIN halfak.revert_20100130 rvt |
| 188 | + ON r.rev_id = rvt.revision_id |
| 189 | +INNER JOIN user u |
| 190 | + ON r.rev_user = u.user_id |
| 191 | +WHERE rev_timestamp < "20110000000000" |
| 192 | +GROUP BY SUBSTRING(rev_timestamp, 1,4), rev_user, u.user_name |
| 193 | + |
Index: trunk/tools/wsor/scripts/revision_meta.py |
— | — | @@ -0,0 +1,114 @@ |
| 2 | +import sys, subprocess, os, errno, re, argparse, logging, hashlib, types
|
| 3 | +from difflib import SequenceMatcher
|
| 4 | +from gl.containers import LimitedDictLists
|
| 5 | +import wmf
|
| 6 | +
|
| 7 | +from text import STOP_WORDS, MARKUP
|
| 8 | +
|
| 9 | +
|
| 10 | +def tokenize(text):
|
| 11 | + return re.findall(
|
| 12 | + r"[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|.",
|
| 13 | + text
|
| 14 | + )
|
| 15 | +
|
| 16 | +def simpleDiff(a, b):
|
| 17 | + sm = SequenceMatcher(None, a, b)
|
| 18 | + added = []
|
| 19 | + removed = []
|
| 20 | + for (tag, i1, i2, j1, j2) in sm.get_opcodes():
|
| 21 | + if tag == 'replace':
|
| 22 | + removed.extend(a[i1:i2])
|
| 23 | + added.extend(b[j1:j2])
|
| 24 | + elif tag == 'delete':
|
| 25 | + removed.extend(a[i1:i2])
|
| 26 | + elif tag == 'insert':
|
| 27 | + added.extend(b[i1:i2])
|
| 28 | +
|
| 29 | + return (added, removed)
|
| 30 | +
|
| 31 | +
|
| 32 | +
|
| 33 | +def process(dump, page):
|
| 34 | + recentRevs = LimitedDictLists(maxsize=15)
|
| 35 | + lastTokens = []
|
| 36 | + metaHeaders = [
|
| 37 | + 'rev_id',
|
| 38 | + 'checksum',
|
| 39 | + 'tokens',
|
| 40 | + 'cs_added',
|
| 41 | + 'cs_removed',
|
| 42 | + 'ts_added',
|
| 43 | + 'ts_removed',
|
| 44 | + 'ws_added',
|
| 45 | + 'ws_removed',
|
| 46 | + 'ms_added',
|
| 47 | + 'ms_removed'
|
| 48 | + ]
|
| 49 | + for revision in page.readRevisions():
|
| 50 | + checksum = hashlib.md5(revision.getText().encode("utf-8")).hexdigest()
|
| 51 | + if checksum in recentRevs:
|
| 52 | + #found a revert
|
| 53 | + revertedToRev = recentRevs[checksum]
|
| 54 | +
|
| 55 | + #get the revisions that were reverted
|
| 56 | + revertedRevs = [r for (c,r) in reversed(recentRevs.getQueue()) if r.getId() > revertedToRev.getId()]
|
| 57 | +
|
| 58 | + isVandalism = wmf.isVandalismByComment(revision.getComment())
|
| 59 | +
|
| 60 | + #write revert row
|
| 61 | + yield (
|
| 62 | + 'revert',
|
| 63 | + revision.getId(),
|
| 64 | + revertedToRev.getId(),
|
| 65 | + isVandalism,
|
| 66 | + len(revertedRevs)
|
| 67 | + )
|
| 68 | +
|
| 69 | + for rev in revertedRevs:
|
| 70 | + yield (
|
| 71 | + 'reverted',
|
| 72 | + rev.getId(),
|
| 73 | + revision.getId(),
|
| 74 | + revertedToRev.getId(),
|
| 75 | + isVandalism,
|
| 76 | + len(revertedRevs)
|
| 77 | + )
|
| 78 | + else:
|
| 79 | + pass
|
| 80 | +
|
| 81 | + """tokens = tokenize(revision.getText())
|
| 82 | +
|
| 83 | + tokensAdded, tokensRemoved = simpleDiff(lastTokens, tokens)
|
| 84 | +
|
| 85 | + row = {
|
| 86 | + 'rev_id': revision.getId(),
|
| 87 | + 'checksum': checksum,
|
| 88 | + 'tokens': len(revision.getText()),
|
| 89 | + 'cs_added': 0,
|
| 90 | + 'cs_removed': 0,
|
| 91 | + 'ts_added': 0,
|
| 92 | + 'ts_removed': 0,
|
| 93 | + 'ws_added': 0,
|
| 94 | + 'ws_removed': 0,
|
| 95 | + 'ms_added': 0,
|
| 96 | + 'ms_removed': 0
|
| 97 | + }
|
| 98 | + for token in tokensAdded:
|
| 99 | + row['ts_added'] += 1
|
| 100 | + row['cs_added'] += len(token)
|
| 101 | + if token.strip() == '': pass
|
| 102 | + if token in MARKUP: row['ms_added'] += 1
|
| 103 | + elif token not in STOP_WORDS: row['ws_added'] += 1
|
| 104 | + for token in tokensRemoved:
|
| 105 | + row['ts_removed'] += 1
|
| 106 | + row['cs_removed'] += len(token)
|
| 107 | + if token.strip() == '': pass
|
| 108 | + if token in MARKUP: row['ms_removed'] += 1
|
| 109 | + elif token not in STOP_WORDS: row['ws_removed'] += 1
|
| 110 | +
|
| 111 | +
|
| 112 | + yield tuple(['meta']+[row[h] for h in metaHeaders])
|
| 113 | +
|
| 114 | + lastTokens = tokens"""
|
| 115 | + recentRevs.insert(checksum, revision)
|
Index: trunk/tools/wsor/ts_samples/total_talk_edits_staeiou.py |
— | — | @@ -0,0 +1,359 @@ |
| 2 | +# |
| 3 | +# Sample talk page postings to newbie's talk pages in various languages. |
| 4 | +# |
| 5 | +# This script is intended to be run on the one of the toolserver machines. |
| 6 | +# |
| 7 | +# run python sample_talk_edits.py --help for command line parameters. |
| 8 | +# |
| 9 | +import os, sys, logging, argparse, MySQLdb, datetime |
| 10 | + |
| 11 | +def clean(v): |
| 12 | + if v == None: |
| 13 | + return "\N" |
| 14 | + else: |
| 15 | + return str(v).replace("\\", "\\\\").replace("\t", "\\t").replace("\n", "\\n") |
| 16 | + |
| 17 | + |
| 18 | +def main(args): |
| 19 | + LOGGING_STREAM = sys.stderr |
| 20 | + logging.basicConfig( |
| 21 | + level=logging.DEBUG, |
| 22 | + stream=LOGGING_STREAM, |
| 23 | + format='%(asctime)s %(levelname)-8s %(message)s', |
| 24 | + datefmt='%b-%d %H:%M:%S' |
| 25 | + ) |
| 26 | + |
| 27 | + logging.info("Connecting to %s:%s using %s." % (args.host, args.db, args.cnf)) |
| 28 | + conn = MySQLdb.connect( |
| 29 | + host=args.host, |
| 30 | + db=args.db, |
| 31 | + read_default_file=args.cnf |
| 32 | + ) |
| 33 | + fetchConn = MySQLdb.connect( |
| 34 | + host=args.host, |
| 35 | + db=args.db, |
| 36 | + read_default_file=args.cnf |
| 37 | + ) |
| 38 | + |
| 39 | + #Printing headers |
| 40 | + print( |
| 41 | + "\t".join([ |
| 42 | + 'user_id', |
| 43 | + 'username', |
| 44 | + 'registration', |
| 45 | + 'first_edit', |
| 46 | + 'end of newbie', |
| 47 | + 'last user rev_id', |
| 48 | + 'last utalk rev_id', |
| 49 | + 'Main edits', |
| 50 | + 'Talk edits', |
| 51 | + 'User edits', |
| 52 | + 'User_talk edits', |
| 53 | + 'Wikipedia edits', |
| 54 | + 'Wikipedia_talk edits', |
| 55 | + 'Image edits', |
| 56 | + 'Image_talk edits', |
| 57 | + 'MediaWiki edits', |
| 58 | + 'MediaWiki_talk edits', |
| 59 | + 'Template edits', |
| 60 | + 'Template_talk edits', |
| 61 | + 'Help edits', |
| 62 | + 'Help_talk edits', |
| 63 | + 'Category edits', |
| 64 | + 'Category_talk edits', |
| 65 | + 'blocks' |
| 66 | + ]) |
| 67 | + ) |
| 68 | + for year in args.year: |
| 69 | + for semStart, semEnd in [('000000', '069999'), ('070000', '99999')]: |
| 70 | + logging.info("Processing %s:%s" % (year, semStart)) |
| 71 | + start = str(year) + semStart + "000000" |
| 72 | + end = str(year) + semEnd + "999999" |
| 73 | + count = 0 |
| 74 | + for user in getUsers(fetchConn, start, end): |
| 75 | + # |
| 76 | + # The following lines take a user's first_edit, |
| 77 | + # covert it to a date, add 30 days and convert |
| 78 | + # it back to a string. I am syntax fu. |
| 79 | + # |
| 80 | + endOfNoob = ( |
| 81 | + datetime.date( |
| 82 | + int(user['first_edit'][0:4]), |
| 83 | + int(user['first_edit'][4:6]), |
| 84 | + int(user['first_edit'][6:8]) |
| 85 | + )+datetime.timedelta(days=30) |
| 86 | + ).strftime("%Y%m%d") + user['first_edit'][8:] |
| 87 | + |
| 88 | + LOGGING_STREAM.write(":") |
| 89 | + talkRevs = list(getPostsToTalkPage( |
| 90 | + conn, |
| 91 | + user['user_id'], |
| 92 | + user['user_name'], |
| 93 | + user['first_edit'], |
| 94 | + endOfNoob |
| 95 | + )) |
| 96 | + newbieRevs = {} |
| 97 | + |
| 98 | + LOGGING_STREAM.write(":") |
| 99 | + for rev in getUserRevs(conn, user['user_id'], user['first_edit'], endOfNoob): |
| 100 | + newbieRevs[rev['page_namespace']] = newbieRevs.get(rev['page_namespace'], 0)+1 |
| 101 | + |
| 102 | + |
| 103 | + LOGGING_STREAM.write(":") |
| 104 | + blocks = '\n'.join( |
| 105 | + [ |
| 106 | + "%(action)s: %(comment)s - %(params)s" % b for b in |
| 107 | + getBlockEvents(conn, user['user_name'], user['first_edit'], endOfNoob) |
| 108 | + ] |
| 109 | + ) |
| 110 | + |
| 111 | + LOGGING_STREAM.write(":") |
| 112 | + userPageRev = getLastPostToUserPage( |
| 113 | + conn, |
| 114 | + user['user_id'], |
| 115 | + user['user_name'], |
| 116 | + user['first_edit'], |
| 117 | + endOfNoob |
| 118 | + ) |
| 119 | + if userPageRev == None: |
| 120 | + userPageRevId = None |
| 121 | + else: |
| 122 | + userPageRevId = userPageRev['rev_id'] |
| 123 | + |
| 124 | + if len(talkRevs) != 0: |
| 125 | + print( |
| 126 | + "\t".join(clean(v) for v in [ |
| 127 | + user['user_id'], |
| 128 | + user['user_name'], |
| 129 | + user['user_registration'], |
| 130 | + user['first_edit'], |
| 131 | + endOfNoob, |
| 132 | + userPageRevId, |
| 133 | + talkRevs[-1]['rev_id'], |
| 134 | + newbieRevs.get(0, 0), |
| 135 | + newbieRevs.get(1, 0), |
| 136 | + newbieRevs.get(2, 0), |
| 137 | + newbieRevs.get(3, 0), |
| 138 | + newbieRevs.get(4, 0), |
| 139 | + newbieRevs.get(5, 0), |
| 140 | + newbieRevs.get(6, 0), |
| 141 | + newbieRevs.get(7, 0), |
| 142 | + newbieRevs.get(8, 0), |
| 143 | + newbieRevs.get(9, 0), |
| 144 | + newbieRevs.get(10, 0), |
| 145 | + newbieRevs.get(11, 0), |
| 146 | + newbieRevs.get(12, 0), |
| 147 | + newbieRevs.get(13, 0), |
| 148 | + newbieRevs.get(14, 0), |
| 149 | + newbieRevs.get(15, 0), |
| 150 | + blocks |
| 151 | + ]) |
| 152 | + ) |
| 153 | + LOGGING_STREAM.write(".") |
| 154 | + count += 1 |
| 155 | + if count >= args.n: |
| 156 | + break |
| 157 | + else: |
| 158 | + LOGGING_STREAM.write("s") |
| 159 | + |
| 160 | + LOGGING_STREAM.write("\n") |
| 161 | + |
| 162 | + |
| 163 | + |
| 164 | + |
| 165 | +def getUsers(conn, start, end): |
| 166 | + cursor = conn.cursor(MySQLdb.cursors.SSCursor) |
| 167 | + cursor.execute(""" |
| 168 | + SELECT |
| 169 | + u.user_id, |
| 170 | + u.user_name, |
| 171 | + u.user_registration, |
| 172 | + um.first_edit, |
| 173 | + um.last_edit |
| 174 | + FROM user u |
| 175 | + INNER JOIN halfak.user_meta um |
| 176 | + ON u.user_id = um.user_id |
| 177 | + WHERE um.first_edit BETWEEN %(start)s AND %(end)s |
| 178 | + ORDER BY RAND() |
| 179 | + """, |
| 180 | + { |
| 181 | + 'start': start, |
| 182 | + 'end': end |
| 183 | + } |
| 184 | + ) |
| 185 | + for row in cursor: |
| 186 | + yield dict( |
| 187 | + zip( |
| 188 | + (d[0] for d in cursor.description), |
| 189 | + row |
| 190 | + ) |
| 191 | + ) |
| 192 | + |
| 193 | + |
| 194 | + |
| 195 | + |
| 196 | +def getUserRevs(conn, userId, start, end): |
| 197 | + user_id = int(userId) |
| 198 | + cursor = conn.cursor() |
| 199 | + cursor.execute(""" |
| 200 | + SELECT |
| 201 | + r.*, |
| 202 | + p.page_namespace |
| 203 | + FROM revision r |
| 204 | + INNER JOIN page p |
| 205 | + ON r.rev_page = p.page_id |
| 206 | + WHERE rev_user = %(user_id)s |
| 207 | + AND rev_timestamp BETWEEN %(start)s AND %(end)s |
| 208 | + ORDER BY rev_timestamp ASC |
| 209 | + """, |
| 210 | + { |
| 211 | + 'user_id': userId, |
| 212 | + 'start': start, |
| 213 | + 'end': end |
| 214 | + } |
| 215 | + ) |
| 216 | + for row in cursor: |
| 217 | + yield dict( |
| 218 | + zip( |
| 219 | + (d[0] for d in cursor.description), |
| 220 | + row |
| 221 | + ) |
| 222 | + ) |
| 223 | + |
| 224 | + |
| 225 | +def getBlockEvents(conn, username, start, end): |
| 226 | + cursor = conn.cursor() |
| 227 | + cursor.execute(""" |
| 228 | + SELECT |
| 229 | + log_action as action, |
| 230 | + log_comment as comment, |
| 231 | + log_params as params |
| 232 | + FROM logging |
| 233 | + WHERE log_title = %(username)s |
| 234 | + AND log_type = "block" |
| 235 | + AND log_timestamp BETWEEN %(start)s AND %(end)s |
| 236 | + ORDER BY log_timestamp ASC |
| 237 | + """, |
| 238 | + { |
| 239 | + 'username': username, |
| 240 | + 'start': start, |
| 241 | + 'end': end |
| 242 | + } |
| 243 | + ) |
| 244 | + for row in cursor: |
| 245 | + yield dict( |
| 246 | + zip( |
| 247 | + (d[0] for d in cursor.description), |
| 248 | + row |
| 249 | + ) |
| 250 | + ) |
| 251 | + |
| 252 | +def getLastPostToUserPage(conn, userId, username, start, end): |
| 253 | + pageId = getPageId(conn, username, 2) |
| 254 | + if pageId != None: |
| 255 | + cursor = conn.cursor() |
| 256 | + cursor.execute(""" |
| 257 | + SELECT * FROM revision |
| 258 | + WHERE rev_page = %(page_id)s |
| 259 | + AND rev_timestamp BETWEEN %(start)s AND %(end)s |
| 260 | + ORDER BY rev_timestamp DESC |
| 261 | + LIMIT 1 |
| 262 | + """, |
| 263 | + { |
| 264 | + 'page_id': pageId, |
| 265 | + 'user_id': userId, |
| 266 | + 'start': start, |
| 267 | + 'end': end |
| 268 | + } |
| 269 | + ) |
| 270 | + for rev in cursor: |
| 271 | + return dict( |
| 272 | + zip( |
| 273 | + (d[0] for d in cursor.description), |
| 274 | + rev |
| 275 | + ) |
| 276 | + ) |
| 277 | + |
| 278 | + return None |
| 279 | + |
| 280 | + |
| 281 | +def getPageId(conn, title, namespace): |
| 282 | + cursor = conn.cursor() |
| 283 | + cursor.execute(""" |
| 284 | + SELECT page_id FROM page |
| 285 | + WHERE page_title = %(title)s |
| 286 | + AND page_namespace = %(namespace)s |
| 287 | + """, |
| 288 | + { |
| 289 | + 'title': title, |
| 290 | + 'namespace': namespace |
| 291 | + } |
| 292 | + ) |
| 293 | + for page in cursor: |
| 294 | + return page[0] |
| 295 | + |
| 296 | + return None |
| 297 | + |
| 298 | +def getPostsToTalkPage(conn, userId, username, start, end): |
| 299 | + pageId = getPageId(conn, username, 3) |
| 300 | + if pageId != None: |
| 301 | + cursor = conn.cursor() |
| 302 | + cursor.execute(""" |
| 303 | + SELECT * FROM revision |
| 304 | + WHERE rev_page = %(page_id)s |
| 305 | + AND rev_timestamp BETWEEN %(start)s AND %(end)s |
| 306 | + ORDER BY rev_id |
| 307 | + """, |
| 308 | + { |
| 309 | + 'page_id': pageId, |
| 310 | + 'user_id': userId, |
| 311 | + 'start': start, |
| 312 | + 'end': end |
| 313 | + } |
| 314 | + ) |
| 315 | + for rev in cursor: |
| 316 | + yield dict( |
| 317 | + zip( |
| 318 | + (d[0] for d in cursor.description), |
| 319 | + rev |
| 320 | + ) |
| 321 | + ) |
| 322 | + |
| 323 | + |
| 324 | +if __name__ == "__main__": |
| 325 | + parser = argparse.ArgumentParser( |
| 326 | + description= |
| 327 | + 'Samples editors by the year they made their first edit.' |
| 328 | + ) |
| 329 | + parser.add_argument( |
| 330 | + 'n', |
| 331 | + type=int, |
| 332 | + help='the number of editors to sample from each year' |
| 333 | + ) |
| 334 | + parser.add_argument( |
| 335 | + 'year', |
| 336 | + type=int, |
| 337 | + help='year(s) to sample from', |
| 338 | + nargs="+" |
| 339 | + ) |
| 340 | + parser.add_argument( |
| 341 | + '-c', '--cnf', |
| 342 | + metavar="<path>", |
| 343 | + type=str, |
| 344 | + help='the path to MySQL config info (defaults to ~/.my.cnf)', |
| 345 | + default=os.path.expanduser("~/.my.cnf") |
| 346 | + ) |
| 347 | + parser.add_argument( |
| 348 | + '-s', '--host', |
| 349 | + type=str, |
| 350 | + help='the database host to connect to (defaults to localhost)', |
| 351 | + default="localhost" |
| 352 | + ) |
| 353 | + parser.add_argument( |
| 354 | + '-d', '--db', |
| 355 | + type=str, |
| 356 | + help='the language db to run the query in (defaults to enwiki)', |
| 357 | + default="enwiki" |
| 358 | + ) |
| 359 | + args = parser.parse_args() |
| 360 | + main(args) |
Index: trunk/tools/wsor/utilities/limited_dict_lists.py |
— | — | @@ -0,0 +1,144 @@ |
| 2 | +from .limited_queue import LimitedQueue |
| 3 | + |
| 4 | +__docformat__ = "restructuredtext en" |
| 5 | + |
| 6 | +class LimitedDictLists(dict): |
| 7 | + """ |
| 8 | + Represents a limited size dictionary. When the dictionary is full and |
| 9 | + another item is added to it, the oldest item in the dictionary is thrown |
| 10 | + away. |
| 11 | + """ |
| 12 | + |
| 13 | + def __init__(self, pairs=[], maxsize=None): |
| 14 | + """ |
| 15 | + Constructs a new LimitedDict. If `maxsize` is not specified, |
| 16 | + it will be assumed to be len(pairs). |
| 17 | + |
| 18 | + :Parameters: |
| 19 | + pairs : dict | iterable pairs |
| 20 | + A dict of (key->value) pairs or an iterator over pairs. |
| 21 | + maxsize : int |
| 22 | + The number of values that will be remembered |
| 23 | + |
| 24 | + """ |
| 25 | + #Create a clean iterator of pairs |
| 26 | + try: |
| 27 | + pairs = [(k,v) for (k, v) in pairs] |
| 28 | + except AttributeError: |
| 29 | + pairs = [(k,v) for (k, v) in pairs.iteritems()] |
| 30 | + |
| 31 | + #Determine how large the maxsize should be |
| 32 | + if maxsize != None: |
| 33 | + self.__maxsize = maxsize |
| 34 | + else: |
| 35 | + self.__maxsize = len(pairs) |
| 36 | + |
| 37 | + #Prime the queue |
| 38 | + self.__queue = LimitedQueue(maxlen=self.__maxsize) |
| 39 | + |
| 40 | + #Load in the pairs |
| 41 | + for k, v in pairs: |
| 42 | + self.__setitem__(k, v) |
| 43 | + |
| 44 | + |
| 45 | + |
| 46 | + def __getitem__(self, key): |
| 47 | + if dict.__contains__(self, key) and len(dict.__getitem__(self, key)) > 0: |
| 48 | + return dict.__getitem__(self, key)[0] |
| 49 | + else: |
| 50 | + raise KeyError(key) |
| 51 | + |
| 52 | + |
| 53 | + def __delitem__(self, key): |
| 54 | + if key in self and len(dict.__getitem__(self, key)) > 0: |
| 55 | + dict.__delitem__(self, key) |
| 56 | + else: |
| 57 | + raise KeyError(key) |
| 58 | + |
| 59 | + def __iter__(self): |
| 60 | + for pair in self.__queue: |
| 61 | + yield pair |
| 62 | + |
| 63 | + def __len__(self): |
| 64 | + return len(self.__queue) |
| 65 | + |
| 66 | + |
| 67 | + def __str__(self): |
| 68 | + return self.__repr__() |
| 69 | + |
| 70 | + |
| 71 | + def __repr__(self): |
| 72 | + return "%s(%r,%r)" % ( |
| 73 | + self.__class__.__name__, |
| 74 | + self.__maxsize, |
| 75 | + [pair for pair in self] |
| 76 | + ) |
| 77 | + |
| 78 | + def getMaxSize(self): return self.__maxsize |
| 79 | + def getQueue(self): return self.__queue |
| 80 | + |
| 81 | + def insert(self, key, value): |
| 82 | + """ |
| 83 | + Inserts a new key-value pair into the dictionary. If the |
| 84 | + dictionary is full, this function will return an expectorate; |
| 85 | + otherwise, it returns None. |
| 86 | + |
| 87 | + :Parameters: |
| 88 | + key : hashable |
| 89 | + key to reference value |
| 90 | + value |
| 91 | + value to store |
| 92 | + |
| 93 | + :Return: |
| 94 | + A (key, value) pair expectorate if the dictionary is |
| 95 | + full or None if it is not. |
| 96 | + """ |
| 97 | + return self.__setitem__(key, value) |
| 98 | + |
| 99 | + |
| 100 | + def __setitem__(self, key, value): |
| 101 | + """ |
| 102 | + Inserts a new key-value pair into the dictionary. If the |
| 103 | + dictionary is full, this function will return an expectorate; |
| 104 | + otherwise, it returns None. |
| 105 | + |
| 106 | + :Parameters: |
| 107 | + key : hashable |
| 108 | + key to reference value |
| 109 | + value |
| 110 | + value to store |
| 111 | + |
| 112 | + :Return: |
| 113 | + A (key, value) pair expectorate if the dictionary is |
| 114 | + full or None if it is not. |
| 115 | + """ |
| 116 | + #The queue will return something interesting if it is full |
| 117 | + expectorate = self.__queue.append((key,value)) |
| 118 | + |
| 119 | + if expectorate != None and expectorate[0] in self: |
| 120 | + #Something got spit out of the queue |
| 121 | + #Take it out of the map |
| 122 | + if dict.__getitem__(self, expectorate[0])[-1] == expectorate[1]: |
| 123 | + retVal = dict.__getitem__(self, expectorate[0]).pop() |
| 124 | + |
| 125 | + if len(dict.__getitem__(self, expectorate[0])) == 0: |
| 126 | + dict.__delitem__(self, expectorate[0]) |
| 127 | + |
| 128 | + return retVal |
| 129 | + |
| 130 | + if key in self: |
| 131 | + dict.__getitem__(self, key).insert(0, value) |
| 132 | + else: |
| 133 | + dict.__setitem__(self, key, [value]) |
| 134 | + |
| 135 | + |
| 136 | + |
| 137 | + def getByIndex(self, index): |
| 138 | + """ |
| 139 | + Gets a pair based on the order it was added where 0 is the oldest and |
| 140 | + self.getLimit()-1 was most recently added. |
| 141 | + """ |
| 142 | + return self.__queue.get(index)[1] |
| 143 | + |
| 144 | + |
| 145 | + |
Index: trunk/tools/wsor/utilities/limited_queue.py |
— | — | @@ -0,0 +1,212 @@ |
| 2 | + |
| 3 | +__docformat__ = "restructuredtext en" |
| 4 | + |
| 5 | +class LimitedQueue: |
| 6 | + """ |
| 7 | + A limited size queue, instances of this class will have a limited |
| 8 | + amount of spaces to store items. When the queue is full and a |
| 9 | + subsequent item is added, an expectorate will be returned. |
| 10 | + |
| 11 | + This class's internal representation is a sized "circular" queue. |
| 12 | + Essentially, no matter what is added, removed or popped from this |
| 13 | + queue, it's internal structure does not change in size from its |
| 14 | + creation. For the most part, this detail can be ignored, but it |
| 15 | + may be an important when considering performance. |
| 16 | + |
| 17 | + This should eventually be depricated in favor of collections.deque. |
| 18 | + """ |
| 19 | + |
| 20 | + def __init__(self, iterable=[], maxlen=None): |
| 21 | + """ |
| 22 | + Constructor |
| 23 | + |
| 24 | + :Parameters: |
| 25 | + iterable : iterable |
| 26 | + An iterator of elements to load into the queue |
| 27 | + maxlen : int |
| 28 | + The size of the queue, i.e. the perimeter of the circle. |
| 29 | + """ |
| 30 | + self.__circle = list(iterable) |
| 31 | + if maxlen != None: |
| 32 | + self.__maxlen = maxlen |
| 33 | + self.__circle = self.__circle[0:maxlen] |
| 34 | + else: |
| 35 | + self.__maxlen = len(self.__circle) |
| 36 | + |
| 37 | + self.__counter = 0 |
| 38 | + self.__length = 0 |
| 39 | + |
| 40 | + for i in range(len(self.__circle), self.__maxlen): |
| 41 | + self.__circle.append(None) |
| 42 | + |
| 43 | + |
| 44 | + def __iter__(self): |
| 45 | + for i in range(0, self.__length): |
| 46 | + if self.get(i) != None: |
| 47 | + yield self.get(i) |
| 48 | + |
| 49 | + def __reversed__(self): |
| 50 | + for i in range(self.__length-1, -1, -1): |
| 51 | + if self.get(i) != None: |
| 52 | + yield self.get(i) |
| 53 | + |
| 54 | + |
| 55 | + |
| 56 | + |
| 57 | + def __len__(self): |
| 58 | + return self.__length |
| 59 | + |
| 60 | + |
| 61 | + def index(self, item, j=0, k=None): |
| 62 | + """ |
| 63 | + Find the index of an item in the queue |
| 64 | + """ |
| 65 | + |
| 66 | + if k == None: |
| 67 | + k = len(self) |
| 68 | + |
| 69 | + for i in range(j, k): |
| 70 | + if item == self.__getitem__(i): |
| 71 | + return i |
| 72 | + |
| 73 | + raise ValueError("%s not in queue" % item) |
| 74 | + |
| 75 | + |
| 76 | + |
| 77 | + def pop(self, index=0): |
| 78 | + """ |
| 79 | + Removes an item from the queue and returns it. |
| 80 | + |
| 81 | + :Parameters: |
| 82 | + index : int |
| 83 | + Index of the item to remove and return |
| 84 | + |
| 85 | + :Return: |
| 86 | + The item removed. |
| 87 | + """ |
| 88 | + if index >= 0 and index < self.__length: |
| 89 | + #Remove the item from the correct location |
| 90 | + value = self.__circle.pop(self.__getInternalIndex(index)) |
| 91 | + |
| 92 | + #Add a None to the end to make up for the removed item |
| 93 | + self.__circle.insert(self.__getInternalIndex(0), None) |
| 94 | + |
| 95 | + #Decrement length |
| 96 | + self.__length -= 1 |
| 97 | + |
| 98 | + return value |
| 99 | + else: |
| 100 | + raise IndexError("Index %s does not exist in the queue and cannot be popped." % index) |
| 101 | + |
| 102 | + def __getitem__(self, index): |
| 103 | + return self.get(index) |
| 104 | + |
| 105 | + |
| 106 | + def __delitem__(self, index): |
| 107 | + self.pop(index) |
| 108 | + |
| 109 | + |
| 110 | + def __str__(self): |
| 111 | + return self.__repr__() |
| 112 | + |
| 113 | + |
| 114 | + def __repr__(self): |
| 115 | + return "%s(%r, %r)" % ( |
| 116 | + self.__class__.__name__, |
| 117 | + self.__maxlen, |
| 118 | + [item for item in self] |
| 119 | + ) |
| 120 | + |
| 121 | + |
| 122 | + |
| 123 | + def append(self, item): |
| 124 | + """ |
| 125 | + Adds a new object into the queue. The return value depends on the |
| 126 | + state of the queue. If the queue is full, the value returned will be |
| 127 | + the object that falls off of the end of the queue. If the queue is |
| 128 | + |
| 129 | + :Parameters: |
| 130 | + item |
| 131 | + The item to append to the queue |
| 132 | + |
| 133 | + :Return: |
| 134 | + The expectorate. None if nothing if there was room for |
| 135 | + item or the oldest item in the queue if the queue was |
| 136 | + full. |
| 137 | + """ |
| 138 | + oldItem = self.__circle[self.__getNextInternalIndex()] |
| 139 | + |
| 140 | + self.__circle[self.__getNextInternalIndex()] = item |
| 141 | + self.__counter += 1 |
| 142 | + if self.__length < self.__maxlen: |
| 143 | + self.__length += 1 |
| 144 | + |
| 145 | + return oldItem |
| 146 | + |
| 147 | + |
| 148 | + def get(self, index): |
| 149 | + """ |
| 150 | + Gets a value from the queue. This method will throw an IndexError if |
| 151 | + the index is not in the queue. |
| 152 | + |
| 153 | + :Parameters: |
| 154 | + index : int |
| 155 | + The index of the item to retrieve |
| 156 | + |
| 157 | + :Return: |
| 158 | + The item |
| 159 | + """ |
| 160 | + if index >= self.__length*-1 and index < self.__length: |
| 161 | + if index < 0: |
| 162 | + posIndex = self.__length+index |
| 163 | + else: |
| 164 | + posIndex = index |
| 165 | + |
| 166 | + return self.__circle[self.__getInternalIndex(posIndex)] |
| 167 | + else: |
| 168 | + raise IndexError("Index %s out of range(%s-%s)" % (index, self.__length*-1, self.__length)) |
| 169 | + |
| 170 | + |
| 171 | + def __getNextInternalIndex(self): |
| 172 | + """ |
| 173 | + Gets the next location in self.__circle where new values should be |
| 174 | + stored. |
| 175 | + """ |
| 176 | + return self.__counter % self.__maxlen |
| 177 | + |
| 178 | + |
| 179 | + |
| 180 | + def __getInternalIndex(self, index): |
| 181 | + """ |
| 182 | + Generates the actual internal index based off of an abstract |
| 183 | + external index. Essentially, this function turns what someone |
| 184 | + using this class thinks is an index to the right one for |
| 185 | + self.__circle. |
| 186 | + |
| 187 | + :Parameters: |
| 188 | + index : int |
| 189 | + the index to convert |
| 190 | + |
| 191 | + :Return: |
| 192 | + Internal index |
| 193 | + """ |
| 194 | + if index >= 0 and index < self.__maxlen: |
| 195 | + if self.__counter >= self.__maxlen: |
| 196 | + return ((self.__counter)+index) % self.__maxlen |
| 197 | + else: |
| 198 | + return index |
| 199 | + |
| 200 | + else: |
| 201 | + raise IndexError("Index %s out of range" % index) |
| 202 | + |
| 203 | + |
| 204 | + def clear(self): |
| 205 | + """ |
| 206 | + Resets the queue to its empty state. |
| 207 | + """ |
| 208 | + self.__counter = 0 |
| 209 | + self.__circle = [] |
| 210 | + |
| 211 | + for i in range(0,self.__maxlen): |
| 212 | + self.__circle.append(None) |
| 213 | + |