Index: trunk/tools/wsor/cohorts/queries.sql |
— | — | @@ -1,4 +1,43 @@ |
2 | | -CREATE TABLE halfak.rev_len_changed_namespace ( |
| 2 | +CREATE TABLE halfak.user_cohort |
| 3 | +SELECT |
| 4 | + user_id, |
| 5 | + user_name, |
| 6 | + MIN(first_edit) AS first_edit, |
| 7 | + YEAR(MIN(first_edit)) AS first_edit_year, |
| 8 | + MONTH(MIN(first_edit)) AS first_edit_month, |
| 9 | + MAX(first_edit) AS last_edit |
| 10 | +FROM |
| 11 | +( |
| 12 | +SELECT |
| 13 | + user_id, |
| 14 | + user_name, |
| 15 | + MIN(rev_timestamp) AS first_edit, |
| 16 | + YEAR(MIN(rev_timestamp)) AS first_edit_year, |
| 17 | + MONTH(MIN(rev_timestamp)) AS first_edit_month, |
| 18 | + MAX(rev_timestamp) AS last_edit |
| 19 | +FROM revision r |
| 20 | +INNER JOIN user u |
| 21 | + ON u.user_id = r.rev_user |
| 22 | +GROUP BY user_id |
| 23 | +UNION |
| 24 | +SELECT |
| 25 | + user_id, |
| 26 | + user_name, |
| 27 | + MIN(ar_timestamp) AS first_edit, |
| 28 | + YEAR(MIN(ar_timestamp)) AS first_edit_year, |
| 29 | + MONTH(MIN(ar_timestamp)) AS first_edit_month, |
| 30 | + MAX(ar_timestamp) AS last_edit |
| 31 | +FROM archive a |
| 32 | +INNER JOIN user u |
| 33 | + ON u.user_id = a.ar_user |
| 34 | +GROUP BY user_id |
| 35 | +) AS whocares_doesntmatter |
| 36 | +GROUP BY user_id, user_name |
| 37 | + |
| 38 | + |
| 39 | + |
| 40 | + |
| 41 | +CREATE TABLE halfak.rev_len_changed ( |
3 | 42 | rev_id INT UNSIGNED, |
4 | 43 | rev_timestamp VARBINARY(14), |
5 | 44 | rev_year INT UNSIGNED, |
— | — | @@ -12,7 +51,7 @@ |
13 | 52 | len_change INT |
14 | 53 | ); |
15 | 54 | --mysqlimport --local -h db42 halfak rev_len_changed_namespace |
16 | | -CREATE TABLE halfak.rev_len_changed_namespace_day |
| 55 | +CREATE TABLE halfak.rev_len_changed |
17 | 56 | SELECT |
18 | 57 | c.rev_id, |
19 | 58 | c.rev_timestamp, |
— | — | @@ -31,12 +70,32 @@ |
32 | 71 | ON c.rev_parent_id = p.rev_id |
33 | 72 | INNER JOIN page cp |
34 | 73 | ON c.rev_page = cp.page_id; |
35 | | - |
36 | 74 | CREATE UNIQUE INDEX rev_idx ON halfak.rev_len_changed_namespace_day (rev_id); |
37 | 75 | CREATE INDEX rev_user_namespace_year_month_day ON halfak.rev_len_changed_namespace_day (user_id, namespace, rev_year, rev_month, rev_day); |
38 | 76 | |
39 | 77 | |
| 78 | +--update |
40 | 79 | SELECT |
| 80 | + c.rev_id, |
| 81 | + c.rev_timestamp, |
| 82 | + YEAR(c.rev_timestamp) AS rev_year, |
| 83 | + MONTH(c.rev_timestamp) AS rev_month, |
| 84 | + DAY(c.rev_timestamp) AS rev_day, |
| 85 | + c.rev_len, |
| 86 | + c.rev_user AS user_id, |
| 87 | + c.rev_user_text AS user_text, |
| 88 | + c.rev_page AS page_id, |
| 89 | + cp.page_namespace AS namespace, |
| 90 | + c.rev_parent_id AS parent_id, |
| 91 | + c.rev_len - IFNULL(p.rev_len, 0) AS len_change |
| 92 | +FROM revision c |
| 93 | +LEFT JOIN revision p |
| 94 | + ON c.rev_parent_id = p.rev_id |
| 95 | +INNER JOIN page cp |
| 96 | + ON c.rev_page = cp.page_id |
| 97 | +WHERE c.rev_id > (SELECT MAX(rev_id) FROM halfak.rev_len_changed); |
| 98 | + |
| 99 | +SELECT |
41 | 100 | user_id, |
42 | 101 | u.first_edit, |
43 | 102 | YEAR(u.first_edit) AS first_edit_year, |
— | — | @@ -216,3 +275,64 @@ |
217 | 276 | ORDER BY SUM(IF(rvt.revision_id IS NULL AND len_change > 0, len_change, 0)) DESC |
218 | 277 | LIMIT 10; |
219 | 278 | |
| 279 | + |
| 280 | +CREATE TABLE halfak.rev_len_changed( |
| 281 | + rev_id INT UNSIGNED, |
| 282 | + rev_timestamp VARBINARY(14), |
| 283 | + rev_year INT UNSIGNED, |
| 284 | + rev_month INT UNSIGNED, |
| 285 | + rev_day INT UNSIGNED, |
| 286 | + rev_len INT UNSIGNED, |
| 287 | + user_id INT UNSIGNED, |
| 288 | + user_text VARBINARY(255), |
| 289 | + page_id INT UNSIGNED, |
| 290 | + namespace INT UNSIGNED, |
| 291 | + parent_id INT UNSIGNED, |
| 292 | + len_change INT |
| 293 | +); |
| 294 | +--mysqlimport --local -h db1047 halfak ~/data/rev_len_changed.tsv |
| 295 | + |
| 296 | + |
| 297 | + |
| 298 | +CREATE TABLE halfak.user_namespace_day( |
| 299 | + user_id INT UNSIGNED, |
| 300 | + namespace INT UNSIGNED, |
| 301 | + rev_year INT UNSIGNED, |
| 302 | + rev_month INT UNSIGNED, |
| 303 | + rev_day INT UNSIGNED, |
| 304 | + first_edit VARBINARY(14), |
| 305 | + first_edit_year INT UNSIGNED, |
| 306 | + first_edit_month INT UNSIGNED, |
| 307 | + reverting_edits INT UNSIGNED, |
| 308 | + noop_edits INT UNSIGNED, |
| 309 | + add_edits INT UNSIGNED, |
| 310 | + remove_edits INT UNSIGNED, |
| 311 | + len_added INT UNSIGNED, |
| 312 | + len_removed INT UNSIGNED |
| 313 | +); |
| 314 | +CREATE TABLE halfak.user_namespace_day |
| 315 | +SELECT |
| 316 | + rlc.user_id, |
| 317 | + rlc.namespace, |
| 318 | + rlc.rev_year, |
| 319 | + rlc.rev_month, |
| 320 | + rlc.rev_day, |
| 321 | + uc.first_edit, |
| 322 | + uc.first_edit_year, |
| 323 | + uc.first_edit_month, |
| 324 | + SUM(rvt.revision_id IS NOT NULL) AS reverting_edits, |
| 325 | + SUM(rvt.revision_id IS NULL AND len_change = 0) AS noop_edits, |
| 326 | + SUM(rvt.revision_id IS NULL AND len_change > 0) AS add_edits, |
| 327 | + SUM(rvt.revision_id IS NULL AND len_change < 0) AS remove_edits, |
| 328 | + SUM(IF(rvt.revision_id IS NULL AND len_change > 0, len_change, 0)) AS len_added, |
| 329 | + SUM(IF(rvt.revision_id IS NULL AND len_change < 0, len_change, 0)) AS len_removed |
| 330 | +FROM halfak.rev_len_changed rlc |
| 331 | +INNER JOIN halfak.user_cohort uc USING(user_id) |
| 332 | +LEFT JOIN halfak.revert_20110115 rvt ON rev_id = revision_id |
| 333 | +WHERE rev_timestamp <= "20110115000000" |
| 334 | +GROUP BY |
| 335 | + rlc.user_id, |
| 336 | + rlc.namespace, |
| 337 | + rlc.rev_year, |
| 338 | + rlc.rev_month, |
| 339 | + rlc.rev_day; |
Index: trunk/tools/wsor/newbie_warnings/R/exploration.R |
— | — | @@ -1,4 +1,5 @@ |
2 | 2 | source("loader/load_hugglings.R") |
| 3 | +source("loader/load_huggling_codings.R") |
3 | 4 | library(doBy) |
4 | 5 | hugglings = load_hugglings() |
5 | 6 | |
— | — | @@ -11,3 +12,209 @@ |
12 | 13 | hugglingCounts$recipient.length = NULL |
13 | 14 | |
14 | 15 | hugglings = merge(hugglings, hugglingCounts, by=c("recipient")) |
| 16 | + |
| 17 | +huggling_codings = load_huggling_codings() |
| 18 | + |
| 19 | +huggling_codings$retailates_bool = huggling_codings$retaliates > 0 |
| 20 | + |
| 21 | +model = glm( |
| 22 | + retailates_bool ~ personal*teaching*image, |
| 23 | + huggling_codings, |
| 24 | + family=binomial(link="logit") |
| 25 | +) |
| 26 | +summary(model) |
| 27 | +#Coefficients: |
| 28 | +# Estimate Std. Error z value Pr(>|z|) |
| 29 | +#(Intercept) -2.56495 0.59914 -4.281 1.86e-05 *** |
| 30 | +#personalTrue -1.28520 1.17480 -1.094 0.2740 |
| 31 | +#teachingTrue -0.07411 0.84624 -0.088 0.9302 |
| 32 | +#imageTrue -0.69315 0.93713 -0.740 0.4595 |
| 33 | +#personalTrue:teachingTrue 1.97835 1.42233 1.391 0.1642 |
| 34 | +#personalTrue:imageTrue 2.50022 1.43554 1.742 0.0816 . |
| 35 | +#teachingTrue:imageTrue -0.61904 1.50146 -0.412 0.6801 |
| 36 | +#personalTrue:teachingTrue:imageTrue -1.26651 1.97089 -0.643 0.5205 |
| 37 | + |
| 38 | +model = glm( |
| 39 | + retailates_bool ~ personal*teaching, |
| 40 | + huggling_codings, |
| 41 | + family=binomial(link="logit") |
| 42 | +) |
| 43 | +summary(model) |
| 44 | + |
| 45 | +summary( |
| 46 | + huggling_codings[ |
| 47 | + huggling_codings$personal == "True" & |
| 48 | + huggling_codings$teaching == "True" & |
| 49 | + huggling_codings$image == "True", |
| 50 | + ]$retailates_bool |
| 51 | +) |
| 52 | + |
| 53 | +huggling_codings$contacts_huggler_bool = huggling_codings$contacts_huggler > 0 |
| 54 | + |
| 55 | +model = glm( |
| 56 | + contacts_huggler_bool ~ personal*teaching, |
| 57 | + huggling_codings, |
| 58 | + family=binomial(link="logit") |
| 59 | +) |
| 60 | +summary(model) |
| 61 | + |
| 62 | +summary( |
| 63 | + huggling_codings[ |
| 64 | + huggling_codings$personal == "True" & |
| 65 | + huggling_codings$teaching == "True", |
| 66 | + ]$retailates_bool |
| 67 | +) |
| 68 | + |
| 69 | +huggling_codings$good_contact = huggling_codings$contacts_huggler > 0 & huggling_codings$retaliates == 0 |
| 70 | + |
| 71 | +model = glm( |
| 72 | + good_contact ~ personal*teaching, |
| 73 | + huggling_codings, |
| 74 | + family=binomial(link="logit") |
| 75 | +) |
| 76 | +summary(model) |
| 77 | + |
| 78 | +#huggling_codings$good_outcome = !is.na(huggling_codings$after_rating) & huggling_codings$after_rating >= 3.0 |
| 79 | +huggling_codings$good_outcome = huggling_codings$after_rating >= 3.0 |
| 80 | + |
| 81 | +model = glm( |
| 82 | + good_outcome ~ personal*teaching, |
| 83 | + huggling_codings, |
| 84 | + family=binomial(link="logit") |
| 85 | +) |
| 86 | +summary(model) |
| 87 | + |
| 88 | +model = glm( |
| 89 | + good_outcome ~ personal*teaching, |
| 90 | + huggling_codings[huggling_codings$before_rating < 1.5,], |
| 91 | + family=binomial(link="logit") |
| 92 | +) |
| 93 | +summary(model) |
| 94 | + |
| 95 | +model = glm( |
| 96 | + good_outcome ~ personal*teaching, |
| 97 | + huggling_codings[huggling_codings$before_rating >= 1.5 & huggling_codings$before_rating < 2.5,], |
| 98 | + family=binomial(link="logit") |
| 99 | +) |
| 100 | +summary(model) |
| 101 | + |
| 102 | +model = glm( |
| 103 | + good_outcome ~ personal*teaching, |
| 104 | + huggling_codings[huggling_codings$before_rating >= 2.5 & huggling_codings$before_rating < 3.5,], |
| 105 | + family=binomial(link="logit") |
| 106 | +) |
| 107 | +summary(model) |
| 108 | + |
| 109 | +model = glm( |
| 110 | + good_outcome ~ personal*teaching, |
| 111 | + huggling_codings[huggling_codings$before_rating >= 3.5,], |
| 112 | + family=binomial(link="logit") |
| 113 | +) |
| 114 | +summary(model) |
| 115 | + |
| 116 | + |
| 117 | +model = glm( |
| 118 | + good_outcome ~ personal*teaching, |
| 119 | + huggling_codings[huggling_codings$before_rating < 2.5,], |
| 120 | + family=binomial(link="logit") |
| 121 | +) |
| 122 | +summary(model) |
| 123 | + |
| 124 | + |
| 125 | +model = glm( |
| 126 | + good_outcome ~ personal*teaching, |
| 127 | + huggling_codings[huggling_codings$before_rating >= 2.5,], |
| 128 | + family=binomial(link="logit") |
| 129 | +) |
| 130 | +summary(model) |
| 131 | + |
| 132 | + |
| 133 | + |
| 134 | + |
| 135 | +huggling_codings$improves = huggling_codings$after_rating >= huggling_codings$before_rating |
| 136 | + |
| 137 | +model = glm( |
| 138 | + improves ~ personal*teaching, |
| 139 | + huggling_codings, |
| 140 | + family=binomial(link="logit") |
| 141 | +) |
| 142 | +summary(model) |
| 143 | + |
| 144 | + |
| 145 | +##### |
| 146 | +#### Assholes go away |
| 147 | +#### |
| 148 | + |
| 149 | + |
| 150 | +huggling_codings$fail_success = with( |
| 151 | + huggling_codings, |
| 152 | + before_rating < 1.5 & ( |
| 153 | + is.na(after_rating) | |
| 154 | + after_rating >= 1.5 | |
| 155 | + good_contact |
| 156 | + ) |
| 157 | +) |
| 158 | +summary(huggling_codings[huggling_codings$before_rating < 1.5,]$fail_success) |
| 159 | + |
| 160 | +model = glm( |
| 161 | + fail_success ~ personal:teaching, |
| 162 | + huggling_codings[huggling_codings$before_rating < 1.5,], |
| 163 | + family=binomial(link="logit") |
| 164 | +) |
| 165 | +summary(model) |
| 166 | + |
| 167 | + |
| 168 | + |
| 169 | +##### |
| 170 | +#### Golden stick around (and do good work) |
| 171 | +#### |
| 172 | + |
| 173 | + |
| 174 | +huggling_codings$golden_good = with( |
| 175 | + huggling_codings, |
| 176 | + before_rating >= 3 & |
| 177 | + !is.na(after_rating) & |
| 178 | + ( |
| 179 | + after_rating > 3 | |
| 180 | + good_contact |
| 181 | + ) |
| 182 | +) |
| 183 | +summary(huggling_codings[huggling_codings$before_rating >= 3,]$golden_good) |
| 184 | + |
| 185 | + |
| 186 | +model = glm( |
| 187 | + golden_good ~ personal*teaching, |
| 188 | + huggling_codings[huggling_codings$before_rating >= 3,], |
| 189 | + family=binomial(link="logit") |
| 190 | +) |
| 191 | +summary(model) |
| 192 | + |
| 193 | + |
| 194 | +##### |
| 195 | +#### Median conversion |
| 196 | +#### |
| 197 | + |
| 198 | + |
| 199 | +huggling_codings$median_good = with( |
| 200 | + huggling_codings, |
| 201 | + before_rating >= 2 & |
| 202 | + before_rating <= 3 & ( |
| 203 | + is.na(after_rating) | |
| 204 | + ( |
| 205 | + after_rating > before_rating | |
| 206 | + good_contact |
| 207 | + ) |
| 208 | + ) |
| 209 | +) |
| 210 | +summary(huggling_codings[ |
| 211 | + huggling_codings$before_rating >= 2 & |
| 212 | + huggling_codings$before_rating <= 3, |
| 213 | +]$median_good) |
| 214 | + |
| 215 | + |
| 216 | +model = glm( |
| 217 | + golden_good ~ personal*teaching, |
| 218 | + huggling_codings[huggling_codings$before_rating >= 3,], |
| 219 | + family=binomial(link="logit") |
| 220 | +) |
| 221 | +summary(model) |
Index: trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings.R |
— | — | @@ -0,0 +1,27 @@ |
| 2 | +source("util/env.R") |
| 3 | + |
| 4 | +load_huggling_codings = function(verbose=T, reload=F){ |
| 5 | + filename = paste(DATA_DIR, "huggling_codings.tsv", sep="/") |
| 6 | + if(!exists("HUGGLING_CODINGS")){ |
| 7 | + HUGGLING_CODINGS <<- NULL |
| 8 | + } |
| 9 | + if(is.null(HUGGLING_CODINGS) | reload){ |
| 10 | + HUGGLING_CODINGS <<- NULL |
| 11 | + } |
| 12 | + if(is.null(HUGGLING_CODINGS)){ |
| 13 | + if(verbose){cat("Loading ", filename, "...")} |
| 14 | + HUGGLING_CODINGS <<- read.table( |
| 15 | + filename, |
| 16 | + header=T, sep="\t", |
| 17 | + quote="", comment.char="", |
| 18 | + na.strings="NULL" |
| 19 | + ) |
| 20 | + HUGGLING_CODINGS$personal = as.factor(HUGGLING_CODINGS$personal) == "True" |
| 21 | + HUGGLING_CODINGS$teaching = as.factor(HUGGLING_CODINGS$teaching) == "True" |
| 22 | + HUGGLING_CODINGS$image = as.factor(HUGGLING_CODINGS$image) == "True" |
| 23 | + |
| 24 | + if(verbose){cat("DONE!\n")} |
| 25 | + } |
| 26 | + HUGGLING_CODINGS |
| 27 | +} |
| 28 | + |
Index: trunk/tools/wsor/newbie_warnings/R/outcomes.R |
— | — | @@ -0,0 +1,205 @@ |
| 2 | +source("loader/load_hugglings.R") |
| 3 | +source("loader/load_huggling_codings.R") |
| 4 | +library(doBy) |
| 5 | +hugglings = load_hugglings() |
| 6 | + |
| 7 | +hugglingCounts = summaryBy( |
| 8 | + recipient ~ recipient, |
| 9 | + data = hugglings, |
| 10 | + FUN=length |
| 11 | +) |
| 12 | +hugglingCounts$count = hugglingCounts$recipient.length |
| 13 | +hugglingCounts$recipient.length = NULL |
| 14 | + |
| 15 | +hugglings = merge(hugglings, hugglingCounts, by=c("recipient")) |
| 16 | + |
| 17 | +huggling_codings = load_huggling_codings(reload=T) |
| 18 | +messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),] |
| 19 | + |
| 20 | +messaged_codings$retailates = messaged_codings$retaliates > 0 |
| 21 | +messaged_codings$contact = messaged_codings$contacts_huggler > 0 | messaged_codings$retaliates > 0 |
| 22 | +messaged_codings$quality_work = messaged_codings$after_rating >= 3.0 |
| 23 | +messaged_codings$stay = !is.na(messaged_codings$after_rating) |
| 24 | +messaged_codings$improves = messaged_codings$after_rating > messaged_codings$before_rating |
| 25 | +messaged_codings$anon = messaged_codings$is_anon > 0 |
| 26 | +messaged_codings$talk_edits_before_msg = with( |
| 27 | + messaged_codings, |
| 28 | + user_talk_edits_after_msg + article_talk_edits_before_msg |
| 29 | +) |
| 30 | +messaged_codings$ntalk_edits_before_msg = with( |
| 31 | + messaged_codings, |
| 32 | + edits_before_msg - talk_edits_before_msg |
| 33 | +) |
| 34 | +messaged_codings$good_contact = mapply( |
| 35 | + function(contact, retaliates){ |
| 36 | + if(!is.na(contact) & contact){ |
| 37 | + retaliates <= 0 |
| 38 | + }else{ |
| 39 | + NA |
| 40 | + } |
| 41 | + }, |
| 42 | + messaged_codings$contact, |
| 43 | + messaged_codings$retaliates |
| 44 | +) |
| 45 | +messaged_codings$good_outcome = with( |
| 46 | + messaged_codings, |
| 47 | + ( |
| 48 | + before_rating <= 2 & |
| 49 | + ( |
| 50 | + is.na(after_rating) | |
| 51 | + after_rating > 2 |
| 52 | + ) |
| 53 | + ) | |
| 54 | + ( |
| 55 | + !is.na(good_contact) & |
| 56 | + good_contact |
| 57 | + ) | |
| 58 | + ( |
| 59 | + !is.na(quality_work) & |
| 60 | + quality_work |
| 61 | + ) |
| 62 | +) |
| 63 | + |
| 64 | +## |
| 65 | +# Groups |
| 66 | +# |
| 67 | +# - < 2 at least one of us thought "no hope" |
| 68 | +# - >= 2 & <= 3 possibles |
| 69 | +# - > 3 at least one of us thought "golden" |
| 70 | +# |
| 71 | +# For each group: |
| 72 | +# - contact |
| 73 | +# - contact huggler + retaliate |
| 74 | +# - talk? (wait for staeiou) |
| 75 | +# - continue editing |
| 76 | +# - did they actually |
| 77 | +# - quality |
| 78 | +# - improve |
| 79 | +# - was it good |
| 80 | +# - degrade |
| 81 | +# |
| 82 | +# |
| 83 | +# Predictors: |
| 84 | +# - number of edilts before message |
| 85 | +# - number deleted |
| 86 | +# - makes edits to talk (before/after) |
| 87 | + |
| 88 | +messaged_codings$group = as.factor(sapply( |
| 89 | + messaged_codings$before_rating, |
| 90 | + function(rating){ |
| 91 | + if(is.na(rating)){ |
| 92 | + NA |
| 93 | + }else if(rating < 2){ |
| 94 | + "unlikely" |
| 95 | + }else if(rating <= 3){ |
| 96 | + "possible" |
| 97 | + }else{ |
| 98 | + "golden" |
| 99 | + } |
| 100 | + } |
| 101 | +)) |
| 102 | + |
| 103 | +# |
| 104 | +# Try removing teaching*personal. |
| 105 | +# |
| 106 | + |
| 107 | +for(group in c("unlikely", "possible", "golden")){ |
| 108 | + group_codings = messaged_codings[ |
| 109 | + messaged_codings$group == group, |
| 110 | + ] |
| 111 | + |
| 112 | + |
| 113 | + cat("Result's for ", length(group_codings$group), " '", group, "' editors:\n", sep='') |
| 114 | + cat("============================================================\n") |
| 115 | + |
| 116 | + print(summary(glm( |
| 117 | + good_outcome ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal, |
| 118 | + data = group_codings |
| 119 | + ))) |
| 120 | + |
| 121 | + print(summary(glm( |
| 122 | + improves ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal, |
| 123 | + data = group_codings |
| 124 | + ))) |
| 125 | + |
| 126 | + print(summary(glm( |
| 127 | + contact ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal, |
| 128 | + data = group_codings |
| 129 | + ))) |
| 130 | + |
| 131 | + print(summary(glm( |
| 132 | + good_contact ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal, |
| 133 | + data = group_codings |
| 134 | + ))) |
| 135 | + |
| 136 | + print(summary(glm( |
| 137 | + stay ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal, |
| 138 | + data = group_codings |
| 139 | + ))) |
| 140 | + |
| 141 | + cat("\n\n\n") |
| 142 | +} |
| 143 | + |
| 144 | +meanNoNA = function(x){ |
| 145 | + mean(x, na.rm=T) |
| 146 | +} |
| 147 | +lengthNoNA = function(x){ |
| 148 | + length(na.omit(x)) |
| 149 | +} |
| 150 | + |
| 151 | +library(lattice) |
| 152 | +outcomeNames = list( |
| 153 | + good_outcome = "with a \"good outcome\"", |
| 154 | + improves = "who show improvement", |
| 155 | + contact = "who contact the reverting editor", |
| 156 | + good_contact = "who contact the reverting editor nicely", |
| 157 | + stay = "who make at least one edit after reading the message" |
| 158 | +) |
| 159 | +for(outcomeName in c("good_outcome", "improves", "contact", "good_contact", "stay")){ |
| 160 | + f = with( |
| 161 | + summaryBy( |
| 162 | + outcome ~ group + teaching + personal, |
| 163 | + data = data.frame( |
| 164 | + outcome = messaged_codings[[outcomeName]], |
| 165 | + teaching = messaged_codings$teaching, |
| 166 | + personal = messaged_codings$personal, |
| 167 | + group = messaged_codings$group |
| 168 | + ), |
| 169 | + FUN=c(meanNoNA, lengthNoNA) |
| 170 | + ), |
| 171 | + data.frame( |
| 172 | + group = group, |
| 173 | + message = mapply( |
| 174 | + function(personal, teaching){ |
| 175 | + if(personal & teaching){ |
| 176 | + "personal & teaching" |
| 177 | + }else if(personal){ |
| 178 | + "personal" |
| 179 | + }else if(teaching){ |
| 180 | + "teaching" |
| 181 | + }else{ |
| 182 | + "control" |
| 183 | + } |
| 184 | + }, |
| 185 | + personal, |
| 186 | + teaching |
| 187 | + ), |
| 188 | + #teaching = teaching, |
| 189 | + #personal = personal, |
| 190 | + prop = outcome.meanNoNA, |
| 191 | + n = outcome.lengthNoNA |
| 192 | + ) |
| 193 | + ) |
| 194 | + cat(outcomeName, "\n") |
| 195 | + cat(f$prop, "\n\n") |
| 196 | + svg(paste("plots/outcome", outcomeName, "all_groups.svg", sep="."), height=4, width=8) |
| 197 | + print(barchart( |
| 198 | + prop ~ group | message, |
| 199 | + data = f, |
| 200 | + layout=c(4,1), |
| 201 | + xlab="Pre-message rating", |
| 202 | + lab="Proportion of editors", |
| 203 | + main=paste("Proportion of editors", outcomeNames[[outcomeName]]) |
| 204 | + )) |
| 205 | + dev.off() |
| 206 | +} |
Index: trunk/tools/wsor/scripts/reverts.py |
— | — | @@ -0,0 +1,72 @@ |
| 2 | +from wmf import dump |
| 3 | +from wmf.dump.processors import reverts |
| 4 | +from multiprocessing import cpu_count |
| 5 | +import time, types, argparse, sys, logging |
| 6 | + |
| 7 | +def encode(v): |
| 8 | + if v == None: |
| 9 | + return "\\N" |
| 10 | + elif type(v) in (types.LongType, types.IntType): |
| 11 | + return str(int(v)) |
| 12 | + elif type(v) == types.UnicodeType: |
| 13 | + return v.encode('utf-8').encode('string-escape') |
| 14 | + else: |
| 15 | + return str(v).encode('string-escape') |
| 16 | + |
| 17 | + |
| 18 | +def main(): |
| 19 | + parser = argparse.ArgumentParser( |
| 20 | + description='Gathers editor data for first and last session' |
| 21 | + ) |
| 22 | + parser.add_argument( |
| 23 | + '-t', '--threads', |
| 24 | + type=int, |
| 25 | + help='the number of parallel threads of processing to start', |
| 26 | + default=max(1, cpu_count() - 1) |
| 27 | + ) |
| 28 | + parser.add_argument( |
| 29 | + '-p', '--output_prefix', |
| 30 | + type=str, |
| 31 | + help='the prefix to prepend to output file names', |
| 32 | + default=str(int(time.time())) |
| 33 | + ) |
| 34 | + parser.add_argument( |
| 35 | + 'dump', |
| 36 | + type=str, |
| 37 | + help='the XML dump file(s) to process', |
| 38 | + nargs="+" |
| 39 | + ) |
| 40 | + args = parser.parse_args() |
| 41 | + |
| 42 | + LOGGING_STREAM = sys.stderr |
| 43 | + logging.basicConfig( |
| 44 | + level=logging.INFO, |
| 45 | + stream=LOGGING_STREAM, |
| 46 | + format='%(asctime)s %(levelname)-8s %(message)s', |
| 47 | + datefmt='%b-%d %H:%M:%S' |
| 48 | + ) |
| 49 | + logging.info("Starting %s run..." % args.output_prefix) |
| 50 | + |
| 51 | + revertFile = open(args.output_prefix + "revert.tsv", "w") |
| 52 | + logging.info("Creating output file: %s" % (args.output_prefix + "revert.tsv")) |
| 53 | + |
| 54 | + revertedFile = open(args.output_prefix + "reverted.tsv", "w") |
| 55 | + logging.info("Creating output file: %s" % (args.output_prefix + "reverted.tsv")) |
| 56 | + |
| 57 | + print(args.dump) |
| 58 | + logging.info("Prcoessing...") |
| 59 | + for out in dump.map(args.dump, reverts.process, threads=args.threads): |
| 60 | + if out[0] == 'revert': |
| 61 | + revertFile.write("\t".join(encode(v) for v in out[1:]) + "\n") |
| 62 | + LOGGING_STREAM.write("|") |
| 63 | + elif out[0] == 'reverted': |
| 64 | + revertedFile.write("\t".join(encode(v) for v in out[1:]) + "\n") |
| 65 | + LOGGING_STREAM.write(".") |
| 66 | + |
| 67 | + |
| 68 | + revertFile.close() |
| 69 | + revertedFile.close() |
| 70 | + |
| 71 | + |
| 72 | +if __name__ == "__main__": |
| 73 | + main() |
Index: trunk/tools/wsor/scripts/revision_meta.py |
— | — | @@ -2,49 +2,10 @@ |
3 | 3 | from difflib import SequenceMatcher
|
4 | 4 | from gl.containers import LimitedDictLists
|
5 | 5 | import wmf
|
| 6 | +from wmf import dump
|
6 | 7 |
|
7 | | -from text import STOP_WORDS, MARKUP
|
8 | | -
|
9 | | -
|
10 | | -def tokenize(text):
|
11 | | - return re.findall(
|
12 | | - r"[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|.",
|
13 | | - text
|
14 | | - )
|
15 | | -
|
16 | | -def simpleDiff(a, b):
|
17 | | - sm = SequenceMatcher(None, a, b)
|
18 | | - added = []
|
19 | | - removed = []
|
20 | | - for (tag, i1, i2, j1, j2) in sm.get_opcodes():
|
21 | | - if tag == 'replace':
|
22 | | - removed.extend(a[i1:i2])
|
23 | | - added.extend(b[j1:j2])
|
24 | | - elif tag == 'delete':
|
25 | | - removed.extend(a[i1:i2])
|
26 | | - elif tag == 'insert':
|
27 | | - added.extend(b[i1:i2])
|
28 | | -
|
29 | | - return (added, removed)
|
30 | | -
|
31 | | -
|
32 | | -
|
33 | 8 | def process(dump, page):
|
34 | 9 | recentRevs = LimitedDictLists(maxsize=15)
|
35 | | - lastTokens = []
|
36 | | - metaHeaders = [
|
37 | | - 'rev_id',
|
38 | | - 'checksum',
|
39 | | - 'tokens',
|
40 | | - 'cs_added',
|
41 | | - 'cs_removed',
|
42 | | - 'ts_added',
|
43 | | - 'ts_removed',
|
44 | | - 'ws_added',
|
45 | | - 'ws_removed',
|
46 | | - 'ms_added',
|
47 | | - 'ms_removed'
|
48 | | - ]
|
49 | 10 | for revision in page.readRevisions():
|
50 | 11 | checksum = hashlib.md5(revision.getText().encode("utf-8")).hexdigest()
|
51 | 12 | if checksum in recentRevs:
|
— | — | @@ -77,38 +38,8 @@ |
78 | 39 | else:
|
79 | 40 | pass
|
80 | 41 |
|
81 | | - """tokens = tokenize(revision.getText())
|
82 | 42 |
|
83 | | - tokensAdded, tokensRemoved = simpleDiff(lastTokens, tokens)
|
84 | | -
|
85 | | - row = {
|
86 | | - 'rev_id': revision.getId(),
|
87 | | - 'checksum': checksum,
|
88 | | - 'tokens': len(revision.getText()),
|
89 | | - 'cs_added': 0,
|
90 | | - 'cs_removed': 0,
|
91 | | - 'ts_added': 0,
|
92 | | - 'ts_removed': 0,
|
93 | | - 'ws_added': 0,
|
94 | | - 'ws_removed': 0,
|
95 | | - 'ms_added': 0,
|
96 | | - 'ms_removed': 0
|
97 | | - }
|
98 | | - for token in tokensAdded:
|
99 | | - row['ts_added'] += 1
|
100 | | - row['cs_added'] += len(token)
|
101 | | - if token.strip() == '': pass
|
102 | | - if token in MARKUP: row['ms_added'] += 1
|
103 | | - elif token not in STOP_WORDS: row['ws_added'] += 1
|
104 | | - for token in tokensRemoved:
|
105 | | - row['ts_removed'] += 1
|
106 | | - row['cs_removed'] += len(token)
|
107 | | - if token.strip() == '': pass
|
108 | | - if token in MARKUP: row['ms_removed'] += 1
|
109 | | - elif token not in STOP_WORDS: row['ws_removed'] += 1
|
110 | | -
|
111 | | -
|
112 | | - yield tuple(['meta']+[row[h] for h in metaHeaders])
|
113 | | -
|
114 | | - lastTokens = tokens"""
|
115 | 43 | recentRevs.insert(checksum, revision)
|
| 44 | +
|
| 45 | +
|
| 46 | +
|
Index: trunk/tools/wsor/scripts/fix_reg_date.dumb.py |
— | — | @@ -0,0 +1,107 @@ |
| 2 | +import sys, MySQLdb, MySQLdb.cursors, argparse, os, logging, types |
| 3 | +import wmf |
| 4 | + |
| 5 | +def encode(v): |
| 6 | + if v == None: return "\N" |
| 7 | + |
| 8 | + if type(v) == types.LongType: v = int(v) |
| 9 | + elif type(v) == types.UnicodeType: v = v.encode('utf-8') |
| 10 | + |
| 11 | + return str(v).encode("string-escape") |
| 12 | + |
| 13 | + |
| 14 | +def main(): |
| 15 | + parser = argparse.ArgumentParser( |
| 16 | + description='Gathers approximate registration date by walking ' + |
| 17 | + 'backwards through the user table and guessing at registration ' + |
| 18 | + 'dates based on user_id. Assumes user_id is ordered.' |
| 19 | + ) |
| 20 | + parser.add_argument( |
| 21 | + 'date', |
| 22 | + type=str, |
| 23 | + help='the date to start querying for users with dumb registration dates' |
| 24 | + ) |
| 25 | + parser.add_argument( |
| 26 | + '-c', '--cnf', |
| 27 | + metavar="<path>", |
| 28 | + type=str, |
| 29 | + help='the path to MySQL config info (defaults to ~/.my.cnf)', |
| 30 | + default=os.path.expanduser("~/.my.cnf") |
| 31 | + ) |
| 32 | + parser.add_argument( |
| 33 | + '-s', '--host', |
| 34 | + type=str, |
| 35 | + help='the database host to connect to (defaults to localhost)', |
| 36 | + default="localhost" |
| 37 | + ) |
| 38 | + parser.add_argument( |
| 39 | + '-d', '--db', |
| 40 | + type=str, |
| 41 | + help='the language db to run the query in (defaults to enwiki)', |
| 42 | + default="enwiki" |
| 43 | + ) |
| 44 | + args = parser.parse_args() |
| 45 | + |
| 46 | + LOGGING_STREAM = sys.stderr |
| 47 | + logging.basicConfig( |
| 48 | + level=logging.DEBUG, |
| 49 | + stream=LOGGING_STREAM, |
| 50 | + format='%(asctime)s %(levelname)-8s %(message)s', |
| 51 | + datefmt='%b-%d %H:%M:%S' |
| 52 | + ) |
| 53 | + |
| 54 | + logging.info("Connecting to %s:%s using %s." % (args.host, args.db, args.cnf)) |
| 55 | + db = Database( |
| 56 | + host=args.host, |
| 57 | + db=args.db, |
| 58 | + read_default_file=args.cnf |
| 59 | + ) |
| 60 | + headers = [ |
| 61 | + 'user_id', |
| 62 | + 'user_registration' |
| 63 | + ] |
| 64 | + |
| 65 | + lowestDate = args.date |
| 66 | + logging.info("foo") |
| 67 | + for user in db.getUsersBefore(args.date): |
| 68 | + if user['user_registration'] == None: |
| 69 | + LOGGING_STREAM.write("!") |
| 70 | + user['user_registration'] = lowestDate |
| 71 | + print("\t".join(str(user[h]) for h in headers)) |
| 72 | + else: |
| 73 | + LOGGING_STREAM.write(".") |
| 74 | + |
| 75 | + lowestDate = min(user['user_registration'], lowestDate) |
| 76 | + |
| 77 | + LOGGING_STREAM.write("\n") |
| 78 | + |
| 79 | + |
| 80 | + |
| 81 | + |
| 82 | +class Database: |
| 83 | + |
| 84 | + def __init__(self, *args, **kwargs): |
| 85 | + self.args = args |
| 86 | + self.kwargs = kwargs |
| 87 | + self.usersConn = MySQLdb.connect(*args, **kwargs) |
| 88 | + |
| 89 | + def getUsersBefore(self, date): |
| 90 | + cursor = self.usersConn.cursor(MySQLdb.cursors.SSDictCursor) |
| 91 | + cursor.execute( |
| 92 | + """ |
| 93 | + SELECT |
| 94 | + user_id, |
| 95 | + user_registration |
| 96 | + FROM user |
| 97 | + WHERE user_registration <= %(date)s |
| 98 | + OR user_registration IS NULL |
| 99 | + ORDER BY user_id DESC |
| 100 | + """, |
| 101 | + { |
| 102 | + 'date': date |
| 103 | + } |
| 104 | + ) |
| 105 | + for row in cursor: |
| 106 | + yield row |
| 107 | + |
| 108 | +if __name__ == "__main__": main() |
Index: trunk/tools/wsor/first_session/R/first_session_characteristics.R |
— | — | @@ -1,9 +1,15 @@ |
2 | 2 | source("loader/user_sessions.R") |
| 3 | +source("loader/user_survival.R") |
3 | 4 | |
4 | 5 | library(lattice) |
5 | 6 | library(doBy) |
6 | 7 | |
7 | 8 | user_sessions = load_user_sessions() |
| 9 | +user_survival = load_user_survival() |
| 10 | +user_sessions = merge( |
| 11 | + user_sessions, |
| 12 | + user_survival |
| 13 | +) |
8 | 14 | user_sessions$year = strftime(user_sessions$first_edit, format="%Y") |
9 | 15 | user_sessions$early_survival = user_sessions$last_edit - user_sessions$es_0_end >= 30 |
10 | 16 | |
— | — | @@ -246,7 +252,7 @@ |
247 | 253 | |
248 | 254 | surviving_editors = with( |
249 | 255 | summaryBy( |
250 | | - early_survival ~ |
| 256 | + surviving ~ |
251 | 257 | year, |
252 | 258 | data=user_sessions[!is.na(user_sessions$year),], |
253 | 259 | FUN=c(sum, length) |
— | — | @@ -254,9 +260,9 @@ |
255 | 261 | rbind( |
256 | 262 | data.frame( |
257 | 263 | year = as.numeric(as.character(year)), |
258 | | - surviving = early_survival.sum, |
259 | | - n = early_survival.length, |
260 | | - prop = early_survival.sum/early_survival.length |
| 264 | + surviving = surviving.sum, |
| 265 | + n = surviving.length, |
| 266 | + prop = surviving.sum/surviving.length |
261 | 267 | ) |
262 | 268 | ) |
263 | 269 | ) |
— | — | @@ -298,7 +304,7 @@ |
299 | 305 | |
300 | 306 | combined = rbind( |
301 | 307 | with( |
302 | | - three_es.rejection, |
| 308 | + three_es.rejection[three_es.rejection$es == 0,], |
303 | 309 | data.frame( |
304 | 310 | x = year, |
305 | 311 | y = mean, |
— | — | @@ -327,7 +333,7 @@ |
328 | 334 | lty=2 |
329 | 335 | ), |
330 | 336 | "edit session 0"=list( |
331 | | - col="#0000FF", |
| 337 | + col="#BB0000", |
332 | 338 | pch=1, |
333 | 339 | lty=1 |
334 | 340 | )#, |
— | — | @@ -339,7 +345,7 @@ |
340 | 346 | # "edit session 2"=list( |
341 | 347 | # col="#00FF00", |
342 | 348 | # pch=3, |
343 | | -3 lty=1 |
| 349 | +# lty=1 |
344 | 350 | # ) |
345 | 351 | ) |
346 | 352 | xyplot( |
Index: trunk/tools/wsor/first_session/R/loader/user_sessions.R |
— | — | @@ -1,7 +1,7 @@ |
2 | 2 | source("util/env.R") |
3 | 3 | |
4 | 4 | load_user_sessions = function(verbose=T, reload=F){ |
5 | | - filename = paste(DATA_DIR, "user_sessions.5.tsv", sep="/") |
| 5 | + filename = paste(DATA_DIR, "user_sessions.3.tsv", sep="/") |
6 | 6 | if(!exists("USER_SESSIONS")){ |
7 | 7 | USER_SESSIONS <<- NULL |
8 | 8 | } |
— | — | @@ -27,13 +27,13 @@ |
28 | 28 | USER_SESSIONS$es_0_start = as.POSIXct(USER_SESSIONS$es_0_start, origin="1970-01-01") |
29 | 29 | USER_SESSIONS$es_1_start = as.POSIXct(USER_SESSIONS$es_1_start, origin="1970-01-01") |
30 | 30 | USER_SESSIONS$es_2_start = as.POSIXct(USER_SESSIONS$es_2_start, origin="1970-01-01") |
31 | | - USER_SESSIONS$es_3_start = as.POSIXct(USER_SESSIONS$es_3_start, origin="1970-01-01") |
32 | | - USER_SESSIONS$es_4_start = as.POSIXct(USER_SESSIONS$es_4_start, origin="1970-01-01") |
| 31 | + #USER_SESSIONS$es_3_start = as.POSIXct(USER_SESSIONS$es_3_start, origin="1970-01-01") |
| 32 | + #USER_SESSIONS$es_4_start = as.POSIXct(USER_SESSIONS$es_4_start, origin="1970-01-01") |
33 | 33 | USER_SESSIONS$es_0_end = as.POSIXct(USER_SESSIONS$es_0_end, origin="1970-01-01") |
34 | 34 | USER_SESSIONS$es_1_end = as.POSIXct(USER_SESSIONS$es_1_end, origin="1970-01-01") |
35 | 35 | USER_SESSIONS$es_2_end = as.POSIXct(USER_SESSIONS$es_2_end, origin="1970-01-01") |
36 | | - USER_SESSIONS$es_3_end = as.POSIXct(USER_SESSIONS$es_3_end, origin="1970-01-01") |
37 | | - USER_SESSIONS$es_4_end = as.POSIXct(USER_SESSIONS$es_4_end, origin="1970-01-01") |
| 36 | + #USER_SESSIONS$es_3_end = as.POSIXct(USER_SESSIONS$es_3_end, origin="1970-01-01") |
| 37 | + #USER_SESSIONS$es_4_end = as.POSIXct(USER_SESSIONS$es_4_end, origin="1970-01-01") |
38 | 38 | if(verbose){cat("DONE!\n")} |
39 | 39 | } |
40 | 40 | USER_SESSIONS |
Index: trunk/tools/wsor/first_session/R/loader/user_survival.R |
— | — | @@ -0,0 +1,25 @@ |
| 2 | +source("util/env.R") |
| 3 | + |
| 4 | +load_user_survival = function(verbose=T, reload=F){ |
| 5 | + filename = paste(DATA_DIR, "user_survival.tsv", sep="/") |
| 6 | + if(!exists("USER_SURVIVAL")){ |
| 7 | + USER_SURVIVAL <<- NULL |
| 8 | + } |
| 9 | + if(is.null(USER_SURVIVAL) | reload){ |
| 10 | + USER_SURVIVAL <<- NULL |
| 11 | + } |
| 12 | + if(is.null(USER_SURVIVAL)){ |
| 13 | + if(verbose){cat("Loading ", filename, "...")} |
| 14 | + USER_SURVIVAL <<- read.table( |
| 15 | + filename, |
| 16 | + header=T, sep="\t", |
| 17 | + quote="", comment.char="", |
| 18 | + na.strings="\\N" |
| 19 | + ) |
| 20 | + USER_SURVIVAL$surviving = as.character(USER_SURVIVAL$surviving) == "True" |
| 21 | + if(verbose){cat("DONE!\n")} |
| 22 | + } |
| 23 | + USER_SURVIVAL |
| 24 | +} |
| 25 | + |
| 26 | + |
Index: trunk/tools/wsor/first_session/get_survival.py |
— | — | @@ -0,0 +1,174 @@ |
| 2 | +import sys, MySQLdb, MySQLdb.cursors, argparse, os, logging, types |
| 3 | +import wmf |
| 4 | + |
| 5 | +def encode(v): |
| 6 | + if v == None: return "\N" |
| 7 | + |
| 8 | + if type(v) == types.LongType: v = int(v) |
| 9 | + elif type(v) == types.UnicodeType: v = v.encode('utf-8') |
| 10 | + |
| 11 | + return str(v).encode("string-escape") |
| 12 | + |
| 13 | +def decode(v): |
| 14 | + if v == "\N": return None |
| 15 | + else: return v.decode("string-escape") |
| 16 | + |
| 17 | + |
| 18 | +def main(): |
| 19 | + parser = argparse.ArgumentParser( |
| 20 | + description='Gathers editor data for first and last session' |
| 21 | + ) |
| 22 | + parser.add_argument( |
| 23 | + 'after', |
| 24 | + type=int, |
| 25 | + help='the minimum time (in seconds) after the first session an editor must edit to be considered "suviving"' |
| 26 | + ) |
| 27 | + parser.add_argument( |
| 28 | + 'sunset', |
| 29 | + type=int, |
| 30 | + help='the time (in seconds) of an artificial sunset beyond which to stop looking for surving editors' |
| 31 | + ) |
| 32 | + parser.add_argument( |
| 33 | + '-c', '--cnf', |
| 34 | + metavar="<path>", |
| 35 | + type=str, |
| 36 | + help='the path to MySQL config info (defaults to ~/.my.cnf)', |
| 37 | + default=os.path.expanduser("~/.my.cnf") |
| 38 | + ) |
| 39 | + parser.add_argument( |
| 40 | + '-s', '--host', |
| 41 | + type=str, |
| 42 | + help='the database host to connect to (defaults to localhost)', |
| 43 | + default="localhost" |
| 44 | + ) |
| 45 | + parser.add_argument( |
| 46 | + '-d', '--db', |
| 47 | + type=str, |
| 48 | + help='the language db to run the query in (defaults to enwiki)', |
| 49 | + default="enwiki" |
| 50 | + ) |
| 51 | + parser.add_argument( |
| 52 | + '-i', '--input', |
| 53 | + type=lambda fn:open(fn, 'r'), |
| 54 | + help='an old input file from get_first_n_sessions (defaults to stdin)', |
| 55 | + default=sys.stdin |
| 56 | + ) |
| 57 | + parser.add_argument( |
| 58 | + '-o', '--out', |
| 59 | + type=lambda fn:open(fn, 'w'), |
| 60 | + help='an output file to write to (defaults to stdout)', |
| 61 | + default=sys.stdout |
| 62 | + ) |
| 63 | + args = parser.parse_args() |
| 64 | + assert not args.input.isatty(), "An input file must be specified" |
| 65 | + |
| 66 | + LOGGING_STREAM = sys.stderr |
| 67 | + logging.basicConfig( |
| 68 | + level=logging.DEBUG, |
| 69 | + stream=LOGGING_STREAM, |
| 70 | + format='%(asctime)s %(levelname)-8s %(message)s', |
| 71 | + datefmt='%b-%d %H:%M:%S' |
| 72 | + ) |
| 73 | + |
| 74 | + logging.info("Connecting to %s:%s using %s." % (args.host, args.db, args.cnf)) |
| 75 | + db = Database( |
| 76 | + host=args.host, |
| 77 | + db=args.db, |
| 78 | + read_default_file=args.cnf |
| 79 | + ) |
| 80 | + headers = [ |
| 81 | + 'user_id', |
| 82 | + 'user_name', |
| 83 | + 'surviving' |
| 84 | + ] |
| 85 | + |
| 86 | + |
| 87 | + print("\t".join(headers)) |
| 88 | + |
| 89 | + logging.info("Processing users...") |
| 90 | + for user in usersFromFile(args.input): |
| 91 | + |
| 92 | + user['surviving'] = db.getSurviving(user['user_id'], user['es_0_end'], args.after, args.sunset) |
| 93 | + if user['surviving']: |
| 94 | + LOGGING_STREAM.write("s") |
| 95 | + else: |
| 96 | + LOGGING_STREAM.write(".") |
| 97 | + |
| 98 | + args.out.write("\t".join(encode(user.get(h)) for h in headers) + "\n") |
| 99 | + |
| 100 | + LOGGING_STREAM.write("\n") |
| 101 | + |
| 102 | + |
| 103 | + |
| 104 | + |
| 105 | +def usersFromFile(f): |
| 106 | + headers = [decode(v) for v in f.readline().strip().split("\t")] |
| 107 | + for line in f: |
| 108 | + yield dict( |
| 109 | + zip( |
| 110 | + headers, |
| 111 | + [decode(v) for v in line.strip().split("\t")] |
| 112 | + ) |
| 113 | + ) |
| 114 | + |
| 115 | + |
| 116 | +class Database: |
| 117 | + |
| 118 | + def __init__(self, *args, **kwargs): |
| 119 | + self.args = args |
| 120 | + self.kwargs = kwargs |
| 121 | + self.conn = MySQLdb.connect(*args, **kwargs) |
| 122 | + |
| 123 | + |
| 124 | + def getSurviving(self, userId, endOfSession, after, before): |
| 125 | + userId = int(userId) |
| 126 | + after = int(after) |
| 127 | + before = int(before) |
| 128 | + if endOfSession == None: |
| 129 | + return False |
| 130 | + else: |
| 131 | + endOfSession = int(endOfSession) |
| 132 | + |
| 133 | + cursor = self.conn.cursor(MySQLdb.cursors.DictCursor) |
| 134 | + |
| 135 | + cursor.execute( |
| 136 | + """ |
| 137 | + SELECT 1 |
| 138 | + FROM revision |
| 139 | + WHERE rev_user = %(user_id)s |
| 140 | + AND rev_timestamp BETWEEN FROM_UNIXTIME(%(after)s) AND FROM_UNIXTIME(%(before)s) |
| 141 | + LIMIT 1; |
| 142 | + """, |
| 143 | + { |
| 144 | + 'user_id': userId, |
| 145 | + 'after': endOfSession + after, |
| 146 | + 'before': endOfSession + before |
| 147 | + } |
| 148 | + ) |
| 149 | + for row in cursor: |
| 150 | + return True |
| 151 | + |
| 152 | + |
| 153 | + cursor.execute( |
| 154 | + """ |
| 155 | + SELECT |
| 156 | + 1 |
| 157 | + FROM archive |
| 158 | + WHERE ar_user = %(user_id)s |
| 159 | + AND ar_timestamp BETWEEN FROM_UNIXTIME(%(after)s) AND FROM_UNIXTIME(%(before)s) |
| 160 | + LIMIT 1; |
| 161 | + """, |
| 162 | + { |
| 163 | + 'user_id': userId, |
| 164 | + 'end_of_session': endOfSession, |
| 165 | + 'after': after, |
| 166 | + 'before': before |
| 167 | + } |
| 168 | + ) |
| 169 | + for row in cursor: |
| 170 | + return True |
| 171 | + |
| 172 | + return False |
| 173 | + |
| 174 | + |
| 175 | +if __name__ == "__main__": main() |
Index: trunk/tools/wsor/diffs/example.py |
— | — | @@ -26,8 +26,8 @@ |
27 | 27 | r"|\|\}" + #Closing table |
28 | 28 | r"|\|\-" + #Table row |
29 | 29 | r"|.", #Misc character |
30 | | - content |
31 | | - ) |
| 30 | + content |
| 31 | + ) |
32 | 32 | |
33 | 33 | def hashTokens(tokens, hash2Token=[], token2Hash={}): |
34 | 34 | hashBuffer = StringIO() |
— | — | @@ -94,6 +94,13 @@ |
95 | 95 | print("Rev: id=%s\n\t%r\n\t%r" % (rev['rev_id'], rev['content'], content)) |
96 | 96 | lastRev = rev |
97 | 97 | |
| 98 | + content1 = open("content.2.txt", "r").read() |
| 99 | + hashes1, h2t, t2h = hashTokens(tokenize(content)) |
| 100 | + print(len(hashes1)) |
| 101 | + |
| 102 | + content = open("content.txt", "r").read() |
| 103 | + hashes2, h2t, t2h = hashTokens(tokenize(content), h2t, t2h) |
| 104 | + print(len(hashes2)) |
98 | 105 | |
99 | 106 | |
100 | 107 | |
Index: trunk/tools/wsor/diffs/revision_differ.py |
— | — | @@ -158,13 +158,18 @@ |
159 | 159 | repr(userId), |
160 | 160 | repr(userName) |
161 | 161 | ] |
| 162 | + try: |
| 163 | + for d in simpleDiff(lastRev.getText(), revision.getText(), report=[-1,1]): |
| 164 | + row.append(":".join(repr(v) for v in d)) |
| 165 | + |
| 166 | + print("\t".join(row)) |
| 167 | + sys.stderr.write('reporter:counter:SkippingTaskCounters,MapProcessedRecords,1\n') |
| 168 | + except Exception as e: |
| 169 | + row.extend(["diff_fail", str(e).encode('string-escape')]) |
| 170 | + print("\t".join(row)) |
| 171 | + raise e |
162 | 172 | |
163 | | - for d in simpleDiff(lastRev.getText(), revision.getText(), report=[-1,1]): |
164 | | - row.append(":".join(repr(v) for v in d)) |
165 | 173 | |
166 | | - print("\t".join(row)) |
167 | | - sys.stderr.write('reporter:counter:SkippingTaskCounters,MapProcessedRecords,1\n') |
168 | | - |
169 | 174 | except Exception as e: |
170 | 175 | sys.stderr.write('%s - while processing revId=%s\n' % (e, currRevId)) |
171 | 176 | traceback.print_exc(file=sys.stderr) |
Index: trunk/tools/wsor/diffs/page_sample.xml |
— | — | @@ -51,3 +51,445 @@ |
52 | 52 | <text xml:space="preserve">baz</text> |
53 | 53 | </revision> |
54 | 54 | </page> |
| 55 | + <page> |
| 56 | + <title>TestPage or something</title> |
| 57 | + <id>9001</id> |
| 58 | + <revision> |
| 59 | + <id>100</id> |
| 60 | + <timestamp>2009-04-12T17:03:02Z</timestamp> |
| 61 | + <contributor deleted="deleted" /> |
| 62 | + <text xml:space="preserve">{| style="float: right; clear: right; background-color: transparent" |
| 63 | +| {{Infobox Military Conflict |
| 64 | +|conflict=Sinai and Palestine Campaign |
| 65 | +|partof=[[Middle Eastern theatre of World War I|Middle Eastern theatre]] ([[World War I]]) |
| 66 | +|image=[[Image:Anzacsoldierandhorseinsinaiandpalestinecampaign.JPG|200px]] |
| 67 | +|caption=A model of a typical [[ANZAC]] soldier and his horse during the campaign |
| 68 | +|date=28 January 1915 - 28 October 1918 |
| 69 | +|place=[[Sinai Peninsula]], [[Palestine]], and [[Syria]] |
| 70 | +|result=Allied Victory |
| 71 | +|territory=[[Partitioning of the Ottoman Empire]] |
| 72 | +|combatant1={{flagicon|United Kingdom}} [[British Empire]]<br> |
| 73 | +*{{flagicon|United Kingdom}} [[united Kingdom of Great Britain and Ireland|United Kingdom]] |
| 74 | +*{{flagicon|Australia}} [[Military history of Australia during World War I|Australia]] |
| 75 | +*{{flagicon|New Zealand}} [[Dominion of New Zealand|New Zealand]] |
| 76 | +*{{flagicon|India|British}} [[British Raj|India]] |
| 77 | +{{flag|France}}<br>{{flagicon|Italy|1861}} [[Kingdom of Italy (1861-1946)|Kingdom of Italy]] |
| 78 | +|combatant2={{flag|Ottoman Empire}}<br>{{flag|German Empire}} |
| 79 | +|commander1={{flagicon|United Kingdom}} [[John Maxwell (British Army officer)|Sir John Maxwell]]<br>{{flagicon|United Kingdom}} [[Sir Archibald Murray]]<br>{{flagicon|United Kingdom}} [[Philip Chetwode]]<br>{{flagicon|United Kingdom}} [[Charles Dobell]]<br>{{flagicon|United Kingdom}} [[Edmund Allenby]]<br>{{flagicon|Australia}} [[Henry George Chauvel]]<br>{{flagicon|United Kingdom}} [[Edward Bulfin]] |
| 80 | +|commander2={{flagicon|Ottoman Empire}} [[Ahmed Djemal|Djemal Pasha]]<br>{{flagicon|Ottoman Empire}} [[Jadir Bey]]<br>{{flagicon|Ottoman Empire}} [[Tala Bey]]<br>{{flagicon|German Empire}} [[Friedrich Freiherr Kress von Kressenstein]]<br>{{flagicon|German Empire}} [[Erich von Falkenhayn]]<br>{{flagicon|German Empire}} [[Otto Liman von Sanders]] |
| 81 | +|strength1= |
| 82 | +|strength2= |
| 83 | +|casualties1= |
| 84 | +|casualties2= |
| 85 | +|notes= |
| 86 | +}} |
| 87 | +|- |
| 88 | +|{{Campaignbox Sinai and Palestine}}{{WWITheatre}} |
| 89 | +|} |
| 90 | +The '''Sinai and Palestine Campaign''' during the [[Middle Eastern Theatre of World War I]] was a series of battles which took place in the [[Sinai Peninsula]], [[Ottoman Palestine]], and [[Syria]] between 28 January, 1915 and 28 October, 1918. [[United Kingdom|British]], [[British Indian Army|Indian]], [[Australia]]n, and [[New Zealand]] forces opposed the [[German Empire|German]] and [[Ottoman Empire|Turkish]] forces. |
| 91 | + |
| 92 | +As a result of several victories in Egypt in the late 19th Century, Britain gained control of that country and established a British protectorate there, soon after the beginning of the First World War. The Ottoman Empire also started to take an interest in Egypt quite early on in the war, possibly at the behest of Germany. The Suez Canal was their prime concern but unrest was also fomented by the Sanussi to the west of Cairo and to the south in Sudan. |
| 93 | + |
| 94 | +The Commander–in–Chief of the British Protectorate of Egypt, Major–General Sir John Maxwell [had fought in Egypt in the 1882 Battle of Tel el Kebir and in the Sudan in 1885 and 1898] describes his appointment and the situation in Egypt when he arrived – |
| 95 | + |
| 96 | +'On August 29th, 1914 I was at the Headquarters of Marshal Joffre, at Vitry le Francois, where I received orders from Field–Marshall Earl Kitchener to proceed at once to Egypt and take over the command there. Somewhat disconcerted, I complied and arrived September 8th in that country. |
| 97 | + |
| 98 | +When I left France the French and British armies were in full retreat to the line of the Marne. Our little Army, after magnificent and strenuous resistance, had suffered terribly, and the question of reinforcements was paramount. It was, therefore, no surprise when, on my arrival in Egypt, I received orders to send every British soldier at once to England. I was informed that large forces were expected to be passing through the Suez Canal en route to Europe, and that a Territorial Division would be sent as soon as possible. The situation I found was by no means a pleasant one. The Turks were sitting on the fence, the Khedive Abbas was in Constantinople intriguing against us. The population of Egypt was some 12 millions, the great majority Moslems, in sympathy with their co–religionists the Turks; of the European population, the majority was Italian, Greek, German and Austrian, with a good proportion of Turks and Turko–Egyptians, Syrians and Armenians. The British and French were in a decided minority.' <ref>Powles, C. Guy, 'The New Zealanders in Sinai and Palestine' Volume III 'Official History New Zealand's Effort in the Great War' (Auckland, Christchurch, Dunedin and Wellington: Whitcombe & Tombs Ltd, 1922) p. vii</ref> |
| 99 | + |
| 100 | +==Defence of Egypt – Eastern Frontier; Defence of Suez Canal== |
| 101 | + |
| 102 | +The Suez Canal very quickly became of great importance to both sides. To the Ottoman Empire the canal represented the closest and weakest link in British communications, being located in an erstwhile part of the Ottoman Empire. At the beginning of the war Egypt was still linked to the Ottoman Empire by its head of state which subsisted until the British Protectorate was declared. |
| 103 | + |
| 104 | +To the British the Suez Canal was of vital strategic importance. Instead of having to travel around the Cape of Good Hope, the Suez Canal cut the traveling time from Britain to India, New Zealand and Australia and was therefore vital, to the supporting of the British war effort in the European sector by the Colonies and Dominions. |
| 105 | + |
| 106 | +However at the beginning of the war, its defence posed a number of problems. There was no road to the canal, only one railway track crossed the thirty miles of desert from Cairo to Ismailia; thence north to Port Said and south to Suez. With Ismailia near the main gates and sluices captured the vital Nile fresh water these towns relied on would make their continued habitation very difficult and their strategic importance virtually nil. |
| 107 | + |
| 108 | +The Sinai was policed by a token defence force which very quickly evacuated the area in November 1914 leaving only very few troops on the eastern side of the Suez Canal. The 30,000 strong defenders were made up of two Indian infantry divisions and one Indian mounted brigade supported by Indian mountain artillery. They were the 10th and 11th Divisions and the Imperial Service Cavalry Brigade and they mounted their main defences on the Cairo side of the canal. The Ottoman Army very quickly advanced across Sinai and by February 1915 had staged attacks against all three towns on the canal with the major effort being in the centre at Ismailia. This force could rely on their being Allied shipping in the canal which could turn their ship's guns to their support and likely some observation balloons. |
| 109 | + |
| 110 | +===Ottoman advance towards the Suez Canal=== |
| 111 | +[[Image:MapSinaiWWI.jpg|thumb|left|<center>Map of north and central [[Sinai]], 1917</center>]] |
| 112 | +The [[Ottoman Empire]], at the urging of their German ally, chose to attack British and Egyptian forces in Egypt and shut the [[Suez Canal]] in the [[First Suez Offensive]]. The Ottoman Fourth Army, under the command of the Turkish Minister of Marine, [[Ahmed Djemal|Djemal Pasha]], was based in [[Jerusalem]]. At this time, the Sinai was an almost empty desert and very hard for an army to cross as there were neither roads nor water sources. The chief of staff for the Ottoman Fourth Army was the Bavarian Colonel [[Friedrich Freiherr Kress von Kressenstein|Kress von Kressenstein]], who organized the attack and managed to get supplies for the army as it crossed the desert. |
| 113 | + |
| 114 | +Under the leadership of Kress von Kressenstein, the Ottoman Army force began to move towards the Canal in mid January 1915 from their 'Princiapl Desert Base at Hafir el Auja in three echelons. [See Library of Congress's American Colony (Jerusalem) 1914-1917 Photo album Call Number LOT 13833; Photo Number 41 of 243; Photo Album 13709; external link below need to click 'next group' to group 37 to 48] The northern group moved via Magdhaba to El Arish and thence along the northern route towards Port Said. From Auja, the central group also the largest, moved via the water cisterns at Moiya Harab and the wells at Wady um Muksheib and Jifjafa towards Ismailia where the main gates and sluices vital for the pumping of Nile fresh water to the three towns on the canal were located. Without this water the towns would be very difficult to maintain and defend. Along with their artillery and supplies, this group brought with them flat bottomed boats in which troops could cross the canal. The third smaller group moved from Auja via Nekl towards Suez in the south. There were approximately 3,000 in the north and south columns and 6,000 in the central column, but there are no reliable German or Turkish sources for the numbers of enemy troops involved. |
| 115 | + |
| 116 | +[[File:Map 3 Sinai detail Keogh p.26.jpeg|thumb|Map 3 Sinai detail Keogh p. 26]] |
| 117 | + |
| 118 | +===First Suez Offensive=== |
| 119 | +{{Main|First Suez Offensive}} |
| 120 | +The Ottoman Suez Expeditionary Force arrived at the canal on 2 February, 1915. The attack failed to achieve surprise as the British and Egyptians were aware of the Ottoman army's approach. In fighting that lasted for two days the Ottomans were beaten, losing some 2000 men. Allied losses were minimal.{{Citation needed|date=November 2009}} |
| 121 | + |
| 122 | +===1915 Actions on the Suez Canal 26 January to 4 February=== |
| 123 | + |
| 124 | +The Defence of the Suez Canal campaign began on 26 February 1915 when subsidiary attacks were made near Kantara in the north and Suez in the south by Kress von Kressenstein's minor columns. The Battles Nomenclature Committee assigned the name 'Actions on the Suez Canal' to these operations which, according to the Committee ended with the rout of the enemy following the Battle of Romani on 12 August 1916. <ref>Battles Nomenclature Committee, Army. 'The Official Names of the Battles and Other Engagements Fought by the Military Forces of the British Empire during the Great War, 1914-1919, and the third Afghan War, 1919: Report of the Battles Nomenclature Committee as Approved by The Army Council Presented to Parliament by Command of His Majesty' (London, 1922), p. 31</ref> The major attack on the center about Ismailia by the main force early on the morning of 3 February 1915 when the enemy was successful in crossing the canal. However the attack failed to surprise the Indian defenders who kept the enemy from establishing itself on the Canal at a cost of about 700 casualties and 700 prisoners with the Indian Army loosing about 150 men. The enemy quickly retreated to the El Arish, Magdhaba, Aujah area from which position Kress von Kressenstein maintained a virtually continuous series of raids and attacks on the Canal endeavoring to disrupt traffic on the Suez Canal. |
| 125 | + |
| 126 | +Because the Suez Canal was vital to the Allied war effort, this failed attack caused the British to leave far more soldiers protecting the canal than they had planned on, resulting in a smaller force for the [[Battle of Gallipoli|Gallipoli Campaign]]. The British forced the colonial Egyptian Army and Egyptian Navy to be enlarged to help defend Egypt. However, most Egyptians were poorly-armed and poorly-trained.{{Citation needed|date=November 2009}} |
| 127 | + |
| 128 | +===Improvements to Suez Canal Defences=== |
| 129 | +In November 1915 Lord Kitchener had identified the weakness of basing the defence of Egypt on the Suez Canal and Kress von Kressenstein's raiding parties confirmed it. However it was not until towards the end of 1915 as the Gallipoli campaign was drawing to its conclusion that the War Cabinet in London authorised a new positions to be established about 10,000 yards east of the Canal in the desert to make the canal safe from long range guns and to provide additional troops to man them. |
| 130 | + |
| 131 | +Port Said became Headquarters with Kantara Advanced Headquarters of three sectors of the Canal defences – |
| 132 | +No. 1 (Southern) Suez to Kabrit HQ Suez |
| 133 | +No. 2 (Central) Kabrit to Ferdan HQ Ismailia |
| 134 | +No. 3 (Northern) Ferdan to Port Said |
| 135 | + |
| 136 | +===1916 Forward Defence of Suez Canal=== |
| 137 | +When these new defences were established and troops provided to man them, it was decided that the oasis area which stretched westwards towards the Canal from Bir el Abd to Romani and Katia along the ancient silk road needed to be denied to the enemy. Kress von Kressenstein and his forces had made use of this area of reliable drinking water during the previous fighting. |
| 138 | + |
| 139 | +In order to carry out this plan it was necessary to build a pipeline for the fresh Nile water to be pumped to the troops as they moved out eastwards. A railway was also required to provide supplies and move troops quickly and the laying of rails and sleepers by Egyptian Labour Force soon moved out past the new canal defences making it necessary to send out a brigade to protect the workers and the infrastructure they were building. |
| 140 | + |
| 141 | +===Operations to destroy the water on the central road across Sinai=== |
| 142 | +As long as the water cistern and wells on the central road remained intact, the enemy could move across the Sinai Peninsular to threaten the Canal at any time. The decision was taken in March 1916 for these water sources to be destroyed and the 8th Light Horse Regiment and Birkani Camel Corps were sent to Wady um Muksheib and Moya Harab on 21 March while the 9th Light Horse Regiment, camels and supporting engineers, etc. destroyed the water wells and their pumping equipment on 11 April at Jifjafa. |
| 143 | + |
| 144 | +===Affair of Katia=== |
| 145 | +{{Main|Affair of Katia}} |
| 146 | + |
| 147 | +This attack by the Ottoman Army on St George's Day 23 April 1916, was possibly a response to the increased presence of the Allies, some distance eastward from the Suez Canal. The 5th Mounted Yeomanry Brigade was spread out at Katia, Bir el Mageibra, Bir el Hamisah and Oghratina where they were surprised and overwhelmed by the enemy. <ref>Wavell, pp. 43–5</ref> All these places are in the vicinity of Romani and played a part in that Battle. |
| 148 | + |
| 149 | +===Battle of Romani=== |
| 150 | +{{Main|Battle of Romani}} |
| 151 | +More than a year passed with the British troops content to guard the Suez Canal, and the Ottomans busy fighting the Russians in the Caucusus and the British at Gallipoli and in Mesopotamia. Then in July 1916, the Ottoman army tried another offensive against the Suez Canal. Again, the Ottomans advanced with an over-sized division. Again they ran into a well prepared Allied force, this time at Romani. Again, they retreated after two days of fighting from 3 August to 5 August, 1916. |
| 152 | + |
| 153 | +Following this victory, the Allied forces sought to prevent the Turkish Canal Expeditionary Force threatening the Suez Canal, by removing them from Bir el Abd. On 9 August, 1916, an indecisive action was fought at Bir el Abd, leading to the Turkish withdrawal to El Arish while leaving a rear guard force at Bir el Mazar. |
| 154 | + |
| 155 | +==British advance across the Sinai== |
| 156 | +This attack convinced the British to push their defence of the Canal further out, into the Sinai, and so starting in October, the British under Lieutenant General Sir [[Charles Dobell]] began operations into the Sinai desert and on to the border of Palestine. Initial efforts were limited to building a railway and a waterline across the Sinai. After several months building up supplies and troops, the British were ready for an attack. The first battle was the capture of a fortified position at [[Battle of Magdhaba|Magdhaba]] on 23 December, 1916. |
| 157 | + |
| 158 | +On 8 January, 1917, the [[Anzac Mounted Division]] attacked the fortified town of [[Battle of Rafa|Rafa]]. The attack was successful and the majority of the Turkish garrison was captured. The British had accomplished their objective of protecting the Suez Canal from Turkish attacks, but the new government of [[David Lloyd George]] wanted more. |
| 159 | + |
| 160 | +==Palestine campaign==<!-- This section is linked from [[Edmund Allenby, 1st Viscount Allenby]] --> |
| 161 | +{{Unreferenced section|date=November 2009}} |
| 162 | +[[File:Turkish trenches at Dead Sea2.jpg|right|thumb|Turkish trenches at the shores of the [[Dead Sea]], 1917.]] |
| 163 | +The British army in Egypt was ordered to go on the offensive against the [[Ottoman Turks]] in Palestine. In part this was to support the [[Arab revolt]] which had started early in 1916, but also to accomplish something positive after the years of fruitless battles on the [[Western Front (World War I)|Western Front]]. The British commander in Egypt, Sir [[Archibald Murray]], suggested that he needed more troops and ships, but this request was refused. |
| 164 | + |
| 165 | +[[Image:Sinai-WW1-1.jpg|thumb|300px|left|Assault on [[Gaza]], 1917]] |
| 166 | +The Ottoman forces were holding a rough line from the fort at [[Gaza]], on the shore of the [[Mediterranean Sea]], to the town of [[Beersheba]], which was the terminus of the Ottoman railway that extended north to Damascus. The British commander in the field, Dobell, chose to attack Gaza, using a short hook move on 26 March, 1917. |
| 167 | + |
| 168 | +===First Battle of Gaza=== |
| 169 | +{{Main|First Battle of Gaza}} |
| 170 | +The British attack was essentially a failure. Due to miscommunication, some units retreated when they should have held onto their gains and so the fortress was not taken. |
| 171 | + |
| 172 | +The government in London believed the reports from the field which indicated a substantial victory had been won and ordered General Murray to move on and capture [[Jerusalem]]. The British were in no position to attack Jerusalem as they had yet to break through the Ottoman defensive positions. These positions were rapidly improved and credit for the Turkish defence is given to the German chief-of-staff [[Friedrich Freiherr Kress von Kressenstein|Baron Kress von Kressenstein]]. |
| 173 | + |
| 174 | +===Second Battle of Gaza=== |
| 175 | +{{Main|Second Battle of Gaza}} |
| 176 | +A second attack on the fort of Gaza was launched one month later on 17 April, 1917. This attack, supported by naval gunfire, chlorine gas and even a few early [[Mark I (tank)|tanks]], was also a failure. It was essentially a frontal assault on a fortified position, and its failure was due more to inflexibility in operations than to faults in planning; yet it cost some 6,000 British casualties. As a result both General Dobell and General Murray were removed from command. The new man put in charge was General Sir [[Edmund Allenby]] and his orders were clear: take [[Jerusalem]] by Christmas. |
| 177 | + |
| 178 | +After personally reviewing the Ottoman defensive positions, Allenby requested reinforcements: three more infantry divisions, aircraft, and artillery. This request was granted and by October, 1917, the British were ready for their next attack. |
| 179 | + |
| 180 | +The Ottoman army had three active fronts at this time: [[Mesopotamian Campaign|Mesopotamia]], Arabia, and the Gaza front. They also had substantial forces deployed around [[Constantinople]] and in the (now quiet) Caucasus front. Given all these demands, the army in Gaza was only about 35,000 strong, led by the Ottoman General [[Kustafa]] and concentrated in three main defensive locations: Gaza, Tel Es Sheria, and Beersheba. Allenby's army was now much larger, with some 88,000 troops in good condition and well-equipped. |
| 181 | + |
| 182 | +===Battle of El Buggar Ridge=== |
| 183 | +{{Main|Battle of El Buggar Ridge}} |
| 184 | +The occupation of Karm by the Allies on 22 October, 1917 created a major point for supply and water for the troops in the immediate area. For the Ottoman forces, the establishment of a railway station at Karm placed the defensive positions known as the Hureira Redoubt and Rushdie System which formed a powerful bulwark against any Allied action under threat. |
| 185 | + |
| 186 | +To forestall this threat, General Erich von Falkenhayn, the Commander of the Yildirim Group, proposed a two phase attack. The plan called for a reconnaissance in force from Beersheba on 27 October, to be followed by an all out attack launched by the 8th Army from Hureira. This second phase was ironically scheduled to occur on the morning of 31 October, 1917, the day when the Battle of Beersheba began. |
| 187 | + |
| 188 | +===Battle of Beersheba=== |
| 189 | +{{Main|Battle of Beersheba (1917)}} |
| 190 | +A key feature of the British plan was to convince the Turks (and their German leaders) that once again, Gaza was to be attacked. This deception campaign was extremely thorough and convincing. The [[Battle of El Buggar Ridge]], initiated by the Turks, completed the deception. When the Allies launched their attack on Beersheba, the Turks were taken by surprise. In one of the most remarkable feats of planning and execution, the Allies were able to move some 40,000 men and a similar number of horses over hostile and inhospitable terrain without being detected by the Turks. The climax of the battle was one of the last successful cavalry charges of modern warfare, when two Australian Light Horse regiments (4th and 12th) charged across open ground just before dusk and captured the town. |
| 191 | + |
| 192 | +The Turkish defeat at Beersheba on 31 October was not a complete rout. The Turks retreated into the hills and prepared defensive positions to the north of Beersheba. For the Allies, the following days were spent fighting a difficult and bloody battle at Tel el Khuweilifeh, to the north east of Beersheba. |
| 193 | + |
| 194 | +[[Image:Palestine-WW1-2.jpg|thumb|300px|right|Allenby's Offensive, November-December 1917]] |
| 195 | +To break through the Turkish defensive line, the Allied forces attacked the Ottoman positions at Tel Es Sheria on 6 November, and followed this up with a further attack at Huj the following day. With the imminent collapse of Gaza at the same time, the Turks quickly retreated to a new line of defence. |
| 196 | + |
| 197 | +===Third Battle of Gaza=== |
| 198 | +{{Main|Third Battle of Gaza}} |
| 199 | +On 7 November, the British attacked Gaza for the third time. The Turks, worried about being cut off, retreated in the face of the British assault. Gaza had finally been captured. |
| 200 | + |
| 201 | +The Turkish defensive position was shattered, the Ottoman army was retreating in some disarray, and General Allenby ordered his army to pursue the enemy. The British followed closely on the heels of the retreating Ottoman forces. An attempt by the Turks to form a defence of a place called Junction Station (Wadi Sarar) was foiled by a British attack on 13 November. General Falkenhayn next tried to form a new defensive line from [[Bethlehem]] to Jerusalem to [[Jaffa]]. The first British attack on Jerusalem failed but with a short rest and the gathering of more infantry divisions, Allenby tried again and on 9 December, 1917, Jerusalem was captured. This was a major political event for the British government of David Lloyd George, one of the few real successes the British could point to after three long bloody years of war. |
| 202 | + |
| 203 | +On the Turkish side, this defeat marked the exit of Djemal Pasha, who returned to [[Istanbul]]. Djemal had delegated the actual command of his army to German officers such as von Kressenstein and von Falkenhayn more than a year earlier, but now, defeated as [[Enver Pasha]] had been at the [[Battle of Sarikamis]], he gave up even nominal command and returned to the capital. Less than a year remained before he was forced out of the government. General Falkenhayn was also replaced, in March 1918. |
| 204 | + |
| 205 | +== The final year: Palestine and Syria == |
| 206 | +[[Image:Palestine-WW1-3.jpg|thumb|230px|left|Allenby's Final Attack, September 1918]] |
| 207 | +The British government had hopes that the Ottoman Empire could be defeated early in the coming year with successful campaigns in Palestine and Mesopotamia but the [[Spring Offensive]] by the Germans on the Western Front delayed the expected attack on Syria for nine full months. General Allenby's army was largely redeployed to France and most of his divisions were rebuilt with units recently recruited in India. His forces spent much of the summer of 1918 training and reorganising. |
| 208 | + |
| 209 | +Because the British achieved complete control of the air with their new [[Sopwith Camel|fighter planes]], the Turks, and their new German commander, General [[Otto Liman von Sanders|Liman von Sanders]], had no clear idea where the British were going to attack. Compounding the problems, the Turks, at the direction of their [[War Minister]] [[Enver Pasha]] withdrew their best troops during the summer for the creation of Enver's [[Ottoman Army of Islam|Army of Islam]], leaving behind poor quality, dispirited soldiers. During this time, the Turks were distracted by raids against their open desert (eastern) flank by forces of the Arab Revolt commanded by the [[Faisal I of Iraq|Emir Feisal]] and coordinated by [[T. E. Lawrence]] and other British liaison officers, which tied down thousands of soldiers in garrisons throughout Palestine, [[Jordan]], and Syria. |
| 210 | + |
| 211 | +===Battle of Megiddo=== |
| 212 | +{{Main|Battle of Megiddo (1918)}} |
| 213 | +General Allenby finally launched his long-delayed attack on 19 September, 1918. The campaign has been called the Battle of Megiddo (which is a transliteration of the Hebrew name of an ancient town known in the west as [[Armageddon]]). Again, the British made major efforts to deceive the Turks as to their actual intended target of operations. This effort was, again, successful and the Turks were taken by surprise when the British attacked Meggido in a sudden storm. The Turkish troops started a full scale retreat, the British bombed the fleeing columns of men from the air and within a week, the Turkish army in Palestine had ceased to exist as a military force. |
| 214 | + |
| 215 | +The ultimate goal of Allenby's and Feisal's armies was [[Damascus]]. Two separate Allied columns marched towards Damascus. The first, composed mainly of Australian and Indian cavalry, approached from Galilee, while the other column, consisting of Indian cavalry and the ''ad hoc'' militia following T.E. Lawrence, travelled northwards along the [[Hejaz Railway]]. Australian Light Horse troops marched unopposed into Damascus on 1 October, 1918, despite the presence of some 12,000 Turkish soldiers at Baramke Barracks. Major Olden of the Australian 10th Light Horse Regiment received the Official Surrender of the City at 7 am at the Serai. Later that day, Lawrence's irregulars entered Damascus to claim full credit for its capture. |
| 216 | + |
| 217 | +The war in Palestine was over but in Syria lasted for a further month. The Turkish government was quite prepared to sacrifice these non-Turkish provinces without surrendering. Indeed, while this battle was raging, the Turks sent an expeditionary force into Russia to enlarge the ethnic Turkish elements of the empire. It was only after the surrender of Bulgaria, which put Turkey into a vulnerable position for invasion, that the Turkish government was compelled to sign an armistice on 30 October, 1918, and surrendered outright two days later. Six hundred years of Ottoman rule over the [[Middle East]] had come to an end. |
| 218 | + |
| 219 | +== In popular media == |
| 220 | +This campaign has been depicted in several films. The most famous is ''[[Lawrence of Arabia (film)|Lawrence of Arabia]]'' (1962), though it focused primarily on T.E. Lawrence and the Arab Revolt. Other films dealing with this topic include ''[[Forty Thousand Horsemen]]'' (1941), and ''[[The Lighthorsemen (film)|The Lighthorsemen]]'' (1987), with [[Peter Phelps]] and [[Nick Waters]], both of which focused on the role of the ANZAC forces during the campaign. |
| 221 | + |
| 222 | +==Summary== |
| 223 | +The British suffered a total of 550,000 casualties: more than 90% of these were not battle losses but instead attributable to disease, heat and other secondary causes. Total Turkish losses are unknown but almost certainly larger: an entire army was lost in the fighting and the Turks poured a vast number of troops into the front over the three years of combat. |
| 224 | + |
| 225 | +Despite the uncertainty of casualty counts, the historical consequences of this campaign are hard to overestimate. The British conquest of Palestine led directly to the [[British mandate]] over Palestine and the [[Trans-Jordan]] which, in turn, paved the way for the creation of the states of [[Israel]], [[Jordan]], [[Lebanon]], and [[Syria]]. |
| 226 | + |
| 227 | +==References== |
| 228 | +{{Reflist}} |
| 229 | + |
| 230 | +==See also== |
| 231 | +{{portal|World War I}} |
| 232 | +*[[Bund der Asienkämpfer]] |
| 233 | +{{Commonscat-inline|Sinai and Palestine Campaign}} |
| 234 | + |
| 235 | +==External links== |
| 236 | +* First World War.com. [http://www.firstworldwar.com/battles/suez.htm Defence of the Suez Canal, 1915]. Retrieved 19 December, 2005. |
| 237 | +* [http://alh-research.tripod.com/Light_Horse/ Australian Light Horse Studies Centre] |
| 238 | +* [http://www.turkeyswar.com/campaigns/palestine1.htm Palestine pages of 'Turkey in WW1' web site] |
| 239 | +* [http://www.nzhistory.net.nz/node/13507 Sinai campaign (NZHistory.net.nz)] |
| 240 | +* [http://www.nzhistory.net.nz/node/14256 Palestine campaign (NZHistory.net.nz)] |
| 241 | +* [http://www.ottomanpalestine.com/GALLERY_1.htm The Photographs of Palestine Campaign] |
| 242 | +* [http://hdl.loc.gov/loc.pnp/ppmsca.13709 Library of Congress's American Colony in Jerusalem's Photo Album] |
| 243 | + |
| 244 | +==Sources== |
| 245 | +* Battles Nomenclature Committee, Army. ''The Official Names of the Battles and Other Engagements Fought by the Military Forces of the British Empire during the Great War, 1914-1919, and the third Afghan War, 1919: Report of the Battles Nomenclature Committee as Approved by The Army Council Presented to Parliament by Command of His Majesty'' (London, 1922). |
| 246 | +* Jean Bou, ''A History of Australia's Mounted Arm'' Series: Australian Army History Series (Port Melbourne: Cambridge University Press, 2009). |
| 247 | +* Bruce, Anthony (2002). ''The Last Crusade: The Palestinian Campaign in the First World War''. John Murray. |
| 248 | +* Field Marshal Lord Carver, ''The National Army Museum Book of The Turkish Front 1914-1918 The Campaigns at Gallipoli, in Mesopotamia and in Palestine'' (London: Pan Macmillan, 2003). |
| 249 | +* R. M. Downes, ''The Campaign in Sinai and Palestine'' Part II in Volume 1 ''Gallipoli, Palestine and New Guinea'' of A. G. Butler, ''Official History of the Australian Army Medical Services, 1914–1918'' (2nd edition 1938) p. 553. On line at Australian War Memorial; Official Histories. |
| 250 | +* Erickson, Edward J., ''Ordered to Die A History of the Ottoman Army in the First World War'' Forward by General Hüseyiln Kivrikoglu Contributions in Military Studies, No. 201 (Westport Connecticut: Greenwood Press, 2001). |
| 251 | +* Esposito, Vincent (ed.) (1959). ''The West Point Atlas of American Wars - Vol. 2''. Frederick Praeger Press. |
| 252 | +* Fromkin, David (1989). ''A Peace to End All Peace''. Avon Books. |
| 253 | +* Grainger, John D. (2006) ''The Battle for Palestine: 1917'' Boydell Press. ISBN 1 84383 263 1 |
| 254 | +* Keegan, John (1998). ''The First World War''. Random House Press. |
| 255 | +* E.G. Keogh, ''Suez to Aleppo'' (Melbourne: Directorate of Military Training, 1955). |
| 256 | +* Preston, Lieutenant-Colonel Richard Martin (1921) ''The Desert Mounted Corps: An Account of the Cavalry Operations in Palestine and Syria 1914 to 1918''. Houghton Mifflin Company. [http://books.google.com/books?id=LHg5xNCFDGsC Google Books Search] |
| 257 | +* Powles, C. Guy, ''The New Zealanders in Sinai and Palestine'' Volume III ''Official History New Zealand's Effort in the Great War'' (Auckland, Christchurch, Dunedin and Wellington: Whitcombe & Tombs Ltd, 1922). |
| 258 | +* War Diaries of 1st, 2nd and 3rd Light Horse Brigades. [available on the Australian War Memorial's web site] |
| 259 | +* Field Marshal Earl Wavell, ''The Palestine Campaigns'' 3rd Edition thirteenth Printing; Series: A Short History of the British Army 4th Edition by Major E.W. Sheppard (London: Constable & Co. 1968). |
| 260 | +* Woodward, David R (2006). ''Forgotten Soldiers of the First World War - Lost Voices from the Middle Eastern Front''. Tempus Publishing. |
| 261 | + |
| 262 | +{{World War I}} |
| 263 | + |
| 264 | +[[Category:Ottoman Empire and World War I]] |
| 265 | +[[Category:Middle Eastern theatre of World War I| ]] |
| 266 | +[[Category:Campaigns and theatres of World War I|Sinai and Palestine]] |
| 267 | +[[Category:Military campaigns and theatres of World War I involving Australia]] |
| 268 | + |
| 269 | +[[es:Campaña del Sinaí y Palestina]] |
| 270 | +[[he:המערכה על סיני וארץ ישראל במלחמת העולם הראשונה]] |
| 271 | +[[hu:Palesztin front (első világháború)]] |
| 272 | +[[pt:Campanha do Sinai e Palestina]] |
| 273 | +[[ru:Синайско-Палестинская кампания]] |
| 274 | +[[sr:Синајски и палестински поход]] |
| 275 | +[[tr:Sina ve Filistin Cephesi]]</text> |
| 276 | + </revision> |
| 277 | + <revision> |
| 278 | + <id>200</id> |
| 279 | + <timestamp>2009-04-12T17:03:02Z</timestamp> |
| 280 | + <contributor deleted="deleted" /> |
| 281 | + <text xml:space="preserve">{| style="float: right; clear: right; background-color: transparent" |
| 282 | +| {{Infobox Military Conflict |
| 283 | +|conflict=Sinai and Palestine Campaign |
| 284 | +|partof=[[Middle Eastern theatre of World War I|Middle Eastern theatre]] ([[World War I]]) |
| 285 | +|image=[[Image:Anzacsoldierandhorseinsinaiandpalestinecampaign.JPG|200px]] |
| 286 | +|caption=A model of a typical [[ANZAC]] soldier and his horse during the campaign |
| 287 | +|date=28 January 1915 - 28 October 1918 |
| 288 | +|place=[[Sinai Peninsula]], [[Palestine]], and [[Syria]] |
| 289 | +|result=Allied Victory |
| 290 | +|territory=[[Partitioning of the Ottoman Empire]] |
| 291 | +|combatant1={{flagicon|United Kingdom}} [[British Empire]]<br> |
| 292 | +*{{flagicon|United Kingdom}} [[united Kingdom of Great Britain and Ireland|United Kingdom]] |
| 293 | +*{{flagicon|Australia}} [[Military history of Australia during World War I|Australia]] |
| 294 | +*{{flagicon|New Zealand}} [[Dominion of New Zealand|New Zealand]] |
| 295 | +*{{flagicon|India|British}} [[British Raj|India]] |
| 296 | +{{flag|France}}<br>{{flagicon|Italy|1861}} [[Kingdom of Italy (1861-1946)|Kingdom of Italy]] |
| 297 | +|combatant2={{flag|Ottoman Empire}}<br>{{flag|German Empire}} |
| 298 | +|commander1={{flagicon|United Kingdom}} [[John Maxwell (British Army officer)|Sir John Maxwell]]<br>{{flagicon|United Kingdom}} [[Sir Archibald Murray]]<br>{{flagicon|United Kingdom}} [[Philip Chetwode]]<br>{{flagicon|United Kingdom}} [[Charles Dobell]]<br>{{flagicon|United Kingdom}} [[Edmund Allenby]]<br>{{flagicon|Australia}} [[Henry George Chauvel]]<br>{{flagicon|United Kingdom}} [[Edward Bulfin]] |
| 299 | +|commander2={{flagicon|Ottoman Empire}} [[Ahmed Djemal|Djemal Pasha]]<br>{{flagicon|Ottoman Empire}} [[Jadir Bey]]<br>{{flagicon|Ottoman Empire}} [[Tala Bey]]<br>{{flagicon|German Empire}} [[Friedrich Freiherr Kress von Kressenstein]]<br>{{flagicon|German Empire}} [[Erich von Falkenhayn]]<br>{{flagicon|German Empire}} [[Otto Liman von Sanders]] |
| 300 | +|strength1= |
| 301 | +|strength2= |
| 302 | +|casualties1= |
| 303 | +|casualties2= |
| 304 | +|notes= |
| 305 | +}} |
| 306 | +|- |
| 307 | +|{{Campaignbox Sinai and Palestine}}{{WWITheatre}} |
| 308 | +|} |
| 309 | +The '''Sinai and Palestine Campaign''' during the [[Middle Eastern Theatre of World War I]] was a series of battles which took place in the [[Sinai Peninsula]], [[Ottoman Palestine]], and [[Syria]] between 28 January, 1915 and 28 October, 1918. [[United Kingdom|British]], [[British Indian Army|Indian]], [[Australia]]n, and [[New Zealand]] forces opposed the [[German Empire|German]] and [[Ottoman Empire|Turkish]] forces. |
| 310 | + |
| 311 | +As a result of several victories in Egypt in the late 19th Century, Britain gained control of that country and established a British protectorate there, soon after the beginning of the First World War. The Ottoman Empire also started to take an interest in Egypt quite early on in the war, possibly at the behest of Germany. The Suez Canal was their prime concern but unrest was also fomented by the Sanussi to the west of Cairo and to the south in Sudan. |
| 312 | + |
| 313 | +The Commander–in–Chief of the British Protectorate of Egypt, Major–General Sir John Maxwell [had fought in Egypt in the 1882 Battle of Tel el Kebir and in the Sudan in 1885 and 1898] describes his appointment and the situation in Egypt when he arrived – |
| 314 | + |
| 315 | +'On August 29th, 1914 I was at the Headquarters of Marshal Joffre, at Vitry le Francois, where I received orders from Field–Marshall Earl Kitchener to proceed at once to Egypt and take over the command there. Somewhat disconcerted, I complied and arrived September 8th in that country. |
| 316 | + |
| 317 | +When I left France the French and British armies were in full retreat to the line of the Marne. Our little Army, after magnificent and strenuous resistance, had suffered terribly, and the question of reinforcements was paramount. It was, therefore, no surprise when, on my arrival in Egypt, I received orders to send every British soldier at once to England. I was informed that large forces were expected to be passing through the Suez Canal en route to Europe, and that a Territorial Division would be sent as soon as possible. The situation I found was by no means a pleasant one. The Turks were sitting on the fence, the Khedive Abbas was in Constantinople intriguing against us. The population of Egypt was some 12 millions, the great majority Moslems, in sympathy with their co–religionists the Turks; of the European population, the majority was Italian, Greek, German and Austrian, with a good proportion of Turks and Turko–Egyptians, Syrians and Armenians. The British and French were in a decided minority.' <ref>Powles, C. Guy, 'The New Zealanders in Sinai and Palestine' Volume III 'Official History New Zealand's Effort in the Great War' (Auckland, Christchurch, Dunedin and Wellington: Whitcombe & Tombs Ltd, 1922) p. vii</ref> |
| 318 | + |
| 319 | +==Defence of Egypt – Eastern Frontier; Defence of Suez Canal== |
| 320 | + |
| 321 | +The Suez Canal very quickly became of great importance to both sides. To the Ottoman Empire the canal represented the closest and weakest link in British communications, being located in an erstwhile part of the Ottoman Empire. At the beginning of the war Egypt was still linked to the Ottoman Empire by its head of state which subsisted until the British Protectorate was declared. |
| 322 | + |
| 323 | +To the British the Suez Canal was of vital strategic importance. Instead of having to travel around the Cape of Good Hope, the Suez Canal cut the traveling time from Britain to India, New Zealand and Australia and was therefore vital, to the supporting of the British war effort in the European sector by the Colonies and Dominions. |
| 324 | + |
| 325 | +However at the beginning of the war, its defence posed a number of problems. There was no road to the canal, only one railway track crossed the thirty miles of desert from Cairo to Ismailia; thence north to Port Said and south to Suez. With Ismailia near the main gates and sluices captured the vital Nile fresh water these towns relied on would make their continued habitation very difficult and their strategic importance virtually nil. |
| 326 | + |
| 327 | +The Sinai was policed by a token defence force which very quickly evacuated the area in November 1914 leaving only very few troops on the eastern side of the Suez Canal. The 30,000 strong defenders were made up of two Indian infantry divisions and one Indian mounted brigade supported by Indian mountain artillery. They were the 10th and 11th Divisions and the Imperial Service Cavalry Brigade and they mounted their main defences on the Cairo side of the canal. The Ottoman Army very quickly advanced across Sinai and by February 1915 had staged attacks against all three towns on the canal with the major effort being in the centre at Ismailia. This force could rely on their being Allied shipping in the canal which could turn their ship's guns to their support and likely some observation balloons. |
| 328 | + |
| 329 | +===Ottoman advance towards the Suez Canal=== |
| 330 | +[[Image:MapSinaiWWI.jpg|thumb|left|<center>Map of north and central [[Sinai]], 1917</center>]] |
| 331 | +The [[Ottoman Empire]], at the urging of their German ally, chose to attack British and Egyptian forces in Egypt and shut the [[Suez Canal]] in the [[First Suez Offensive]]. The Ottoman Fourth Army, under the command of the Turkish Minister of Marine, [[Ahmed Djemal|Djemal Pasha]], was based in [[Jerusalem]]. At this time, the Sinai was an almost empty desert and very hard for an army to cross as there were neither roads nor water sources. The chief of staff for the Ottoman Fourth Army was the Bavarian Colonel [[Friedrich Freiherr Kress von Kressenstein|Kress von Kressenstein]], who organized the attack and managed to get supplies for the army as it crossed the desert. |
| 332 | + |
| 333 | +Under the leadership of Kress von Kressenstein, the Ottoman Army force began to move towards the Canal in mid January 1915 from their 'Princiapl Desert Base at Hafir el Auja in three echelons. [See Library of Congress's American Colony (Jerusalem) 1914-1917 Photo album Call Number LOT 13833; Photo Number 41 of 243; Photo Album 13709; external link below need to click 'next group' to group 37 to 48] The northern group moved via Magdhaba to El Arish and thence along the northern route towards Port Said. From Auja, the central group also the largest, moved via the water cisterns at Moiya Harab and the wells at Wady um Muksheib and Jifjafa towards Ismailia where the main gates and sluices vital for the pumping of Nile fresh water to the three towns on the canal were located. Without this water the towns would be very difficult to maintain and defend. Along with their artillery and supplies, this group brought with them flat bottomed boats in which troops could cross the canal. The third smaller group moved from Auja via Nekl towards Suez in the south. There were approximately 3,000 in the north and south columns and 6,000 in the central column, but there are no reliable German or Turkish sources for the numbers of enemy troops involved. |
| 334 | + |
| 335 | +[[File:Map 3 Sinai detail Keogh p.26.jpeg|thumb|Map 3 Sinai detail Keogh p. 26]] |
| 336 | + |
| 337 | +===First Suez Offensive=== |
| 338 | +{{Main|First Suez Offensive}} |
| 339 | +The Ottoman Suez Expeditionary Force arrived at the canal on 2 February, 1915. The attack failed to achieve surprise as the British and Egyptians were aware of the Ottoman army's approach. In fighting that lasted for two days the Ottomans were beaten, losing some 2000 men. Allied losses were minimal.{{Citation needed|date=November 2009}} |
| 340 | + |
| 341 | +===1915 Actions on the Suez Canal 26 January to 4 February=== |
| 342 | + |
| 343 | +The Defence of the Suez Canal campaign began on 26 February 1915 when subsidiary attacks were made near Kantara in the north and Suez in the south by Kress von Kressenstein's minor columns. The Battles Nomenclature Committee assigned the name 'Actions on the Suez Canal' to these operations which, according to the Committee ended with the rout of the enemy following the Battle of Romani on 12 August 1916. <ref>Battles Nomenclature Committee, Army. 'The Official Names of the Battles and Other Engagements Fought by the Military Forces of the British Empire during the Great War, 1914-1919, and the third Afghan War, 1919: Report of the Battles Nomenclature Committee as Approved by The Army Council Presented to Parliament by Command of His Majesty' (London, 1922), p. 31</ref> The major attack on the center about Ismailia by the main force early on the morning of 3 February 1915 when the enemy was successful in crossing the canal. However the attack failed to surprise the Indian defenders who kept the enemy from establishing itself on the Canal at a cost of about 700 casualties and 700 prisoners with the Indian Army loosing about 150 men. The enemy quickly retreated to the El Arish, Magdhaba, Aujah area from which position Kress von Kressenstein maintained a virtually continuous series of raids and attacks on the Canal endeavoring to disrupt traffic on the Suez Canal. |
| 344 | + |
| 345 | +Because the Suez Canal was vital to the Allied war effort, this failed attack caused the British to leave far more soldiers protecting the canal than they had planned on, resulting in a smaller force for the [[Battle of Gallipoli|Gallipoli Campaign]]. The British forced the colonial Egyptian Army and Egyptian Navy to be enlarged to help defend Egypt. However, most Egyptians were poorly-armed and poorly-trained.{{Citation needed|date=November 2009}} |
| 346 | + |
| 347 | +===Improvements to Suez Canal Defences=== |
| 348 | +In November 1915 Lord Kitchener had identified the weakness of basing the defence of Egypt on the Suez Canal and Kress von Kressenstein's raiding parties confirmed it. However it was not until towards the end of 1915 as the Gallipoli campaign was drawing to its conclusion that the War Cabinet in London authorised a new positions to be established about 10,000 yards east of the Canal in the desert to make the canal safe from long range guns and to provide additional troops to man them. |
| 349 | + |
| 350 | +Port Said became Headquarters with Kantara Advanced Headquarters of three sectors of the Canal defences – |
| 351 | +No. 1 (Southern) Suez to Kabrit HQ Suez |
| 352 | +No. 2 (Central) Kabrit to Ferdan HQ Ismailia |
| 353 | +No. 3 (Northern) Ferdan to Port Said |
| 354 | + |
| 355 | +===1916 Forward Defence of Suez Canal=== |
| 356 | +When these new defences were established and troops provided to man them, it was decided that the oasis area which stretched westwards towards the Canal from Bir el Abd to Romani and Katia along the ancient silk road needed to be denied to the enemy. Kress von Kressenstein and his forces had made use of this area of reliable drinking water during the previous fighting. |
| 357 | + |
| 358 | +In order to carry out this plan it was necessary to build a pipeline for the fresh Nile water to be pumped to the troops as they moved out eastwards. A railway was also required to provide supplies and move troops quickly and the laying of rails and sleepers by Egyptian Labour Force soon moved out past the new canal defences making it necessary to send out a brigade to protect the workers and the infrastructure they were building. |
| 359 | + |
| 360 | +===Operations to destroy the water on the central road across Sinai=== |
| 361 | +As long as the water cistern and wells on the central road remained intact, the enemy could move across the Sinai Peninsular to threaten the Canal at any time. The decision was taken in March 1916 for these water sources to be destroyed and the 8th Light Horse Regiment and Birkani Camel Corps were sent to Wady um Muksheib and Moya Harab on 21 March while the 9th Light Horse Regiment, camels and supporting engineers, and, according to the 3rd Light Horse Brigade's War Diary, 30 Light Horsemen armed as Lancers, destroyed the water wells and their pumping equipment on 11 April at Jifjafa. |
| 362 | + |
| 363 | +===Affair of Katia=== |
| 364 | +{{Main|Affair of Katia}} |
| 365 | + |
| 366 | +This attack by the Ottoman Army on St George's Day 23 April 1916, was possibly a response to the increased presence of the Allies, some distance eastward from the Suez Canal. The 5th Mounted Yeomanry Brigade was spread out at Katia, Bir el Mageibra, Bir el Hamisah and Oghratina where they were surprised and overwhelmed by the enemy. <ref>Wavell, pp. 43–5</ref> All these places are in the vicinity of Romani and played a part in that Battle. |
| 367 | + |
| 368 | +===Battle of Romani=== |
| 369 | +{{Main|Battle of Romani}} |
| 370 | +More than a year passed with the British troops content to guard the Suez Canal, and the Ottomans busy fighting the Russians in the Caucusus and the British at Gallipoli and in Mesopotamia. Then in July 1916, the Ottoman army tried another offensive against the Suez Canal. Again, the Ottomans advanced with an over-sized division. Again they ran into a well prepared Allied force, this time at Romani. Again, they retreated after two days of fighting from 3 August to 5 August, 1916. |
| 371 | + |
| 372 | +Following this victory, the Allied forces sought to prevent the Turkish Canal Expeditionary Force threatening the Suez Canal, by removing them from Bir el Abd. On 9 August, 1916, an indecisive action was fought at Bir el Abd, leading to the Turkish withdrawal to El Arish while leaving a rear guard force at Bir el Mazar. |
| 373 | + |
| 374 | +==British advance across the Sinai== |
| 375 | +This attack convinced the British to push their defence of the Canal further out, into the Sinai, and so starting in October, the British under Lieutenant General Sir [[Charles Dobell]] began operations into the Sinai desert and on to the border of Palestine. Initial efforts were limited to building a railway and a waterline across the Sinai. After several months building up supplies and troops, the British were ready for an attack. The first battle was the capture of a fortified position at [[Battle of Magdhaba|Magdhaba]] on 23 December, 1916. |
| 376 | + |
| 377 | +On 8 January, 1917, the [[Anzac Mounted Division]] attacked the fortified town of [[Battle of Rafa|Rafa]]. The attack was successful and the majority of the Turkish garrison was captured. The British had accomplished their objective of protecting the Suez Canal from Turkish attacks, but the new government of [[David Lloyd George]] wanted more. |
| 378 | + |
| 379 | +==Palestine campaign==<!-- This section is linked from [[Edmund Allenby, 1st Viscount Allenby]] --> |
| 380 | +{{Unreferenced section|date=November 2009}} |
| 381 | +[[File:Turkish trenches at Dead Sea2.jpg|right|thumb|Turkish trenches at the shores of the [[Dead Sea]], 1917.]] |
| 382 | +The British army in Egypt was ordered to go on the offensive against the [[Ottoman Turks]] in Palestine. In part this was to support the [[Arab revolt]] which had started early in 1916, but also to accomplish something positive after the years of fruitless battles on the [[Western Front (World War I)|Western Front]]. The British commander in Egypt, Sir [[Archibald Murray]], suggested that he needed more troops and ships, but this request was refused. |
| 383 | + |
| 384 | +[[Image:Sinai-WW1-1.jpg|thumb|300px|left|Assault on [[Gaza]], 1917]] |
| 385 | +The Ottoman forces were holding a rough line from the fort at [[Gaza]], on the shore of the [[Mediterranean Sea]], to the town of [[Beersheba]], which was the terminus of the Ottoman railway that extended north to Damascus. The British commander in the field, Dobell, chose to attack Gaza, using a short hook move on 26 March, 1917. |
| 386 | + |
| 387 | +===First Battle of Gaza=== |
| 388 | +{{Main|First Battle of Gaza}} |
| 389 | +The British attack was essentially a failure. Due to miscommunication, some units retreated when they should have held onto their gains and so the fortress was not taken. |
| 390 | + |
| 391 | +The government in London believed the reports from the field which indicated a substantial victory had been won and ordered General Murray to move on and capture [[Jerusalem]]. The British were in no position to attack Jerusalem as they had yet to break through the Ottoman defensive positions. These positions were rapidly improved and credit for the Turkish defence is given to the German chief-of-staff [[Friedrich Freiherr Kress von Kressenstein|Baron Kress von Kressenstein]]. |
| 392 | + |
| 393 | +===Second Battle of Gaza=== |
| 394 | +{{Main|Second Battle of Gaza}} |
| 395 | +A second attack on the fort of Gaza was launched one month later on 17 April, 1917. This attack, supported by naval gunfire, chlorine gas and even a few early [[Mark I (tank)|tanks]], was also a failure. It was essentially a frontal assault on a fortified position, and its failure was due more to inflexibility in operations than to faults in planning; yet it cost some 6,000 British casualties. As a result both General Dobell and General Murray were removed from command. The new man put in charge was General Sir [[Edmund Allenby]] and his orders were clear: take [[Jerusalem]] by Christmas. |
| 396 | + |
| 397 | +After personally reviewing the Ottoman defensive positions, Allenby requested reinforcements: three more infantry divisions, aircraft, and artillery. This request was granted and by October, 1917, the British were ready for their next attack. |
| 398 | + |
| 399 | +The Ottoman army had three active fronts at this time: [[Mesopotamian Campaign|Mesopotamia]], Arabia, and the Gaza front. They also had substantial forces deployed around [[Constantinople]] and in the (now quiet) Caucasus front. Given all these demands, the army in Gaza was only about 35,000 strong, led by the Ottoman General [[Kustafa]] and concentrated in three main defensive locations: Gaza, Tel Es Sheria, and Beersheba. Allenby's army was now much larger, with some 88,000 troops in good condition and well-equipped. |
| 400 | + |
| 401 | +===Battle of El Buggar Ridge=== |
| 402 | +{{Main|Battle of El Buggar Ridge}} |
| 403 | +The occupation of Karm by the Allies on 22 October, 1917 created a major point for supply and water for the troops in the immediate area. For the Ottoman forces, the establishment of a railway station at Karm placed the defensive positions known as the Hureira Redoubt and Rushdie System which formed a powerful bulwark against any Allied action under threat. |
| 404 | + |
| 405 | +To forestall this threat, General Erich von Falkenhayn, the Commander of the Yildirim Group, proposed a two phase attack. The plan called for a reconnaissance in force from Beersheba on 27 October, to be followed by an all out attack launched by the 8th Army from Hureira. This second phase was ironically scheduled to occur on the morning of 31 October, 1917, the day when the Battle of Beersheba began. |
| 406 | + |
| 407 | +===Battle of Beersheba=== |
| 408 | +{{Main|Battle of Beersheba (1917)}} |
| 409 | +A key feature of the British plan was to convince the Turks (and their German leaders) that once again, Gaza was to be attacked. This deception campaign was extremely thorough and convincing. The [[Battle of El Buggar Ridge]], initiated by the Turks, completed the deception. When the Allies launched their attack on Beersheba, the Turks were taken by surprise. In one of the most remarkable feats of planning and execution, the Allies were able to move some 40,000 men and a similar number of horses over hostile and inhospitable terrain without being detected by the Turks. The climax of the battle was one of the last successful cavalry charges of modern warfare, when two Australian Light Horse regiments (4th and 12th) charged across open ground just before dusk and captured the town. |
| 410 | + |
| 411 | +The Turkish defeat at Beersheba on 31 October was not a complete rout. The Turks retreated into the hills and prepared defensive positions to the north of Beersheba. For the Allies, the following days were spent fighting a difficult and bloody battle at Tel el Khuweilifeh, to the north east of Beersheba. |
| 412 | + |
| 413 | +[[Image:Palestine-WW1-2.jpg|thumb|300px|right|Allenby's Offensive, November-December 1917]] |
| 414 | +To break through the Turkish defensive line, the Allied forces attacked the Ottoman positions at Tel Es Sheria on 6 November, and followed this up with a further attack at Huj the following day. With the imminent collapse of Gaza at the same time, the Turks quickly retreated to a new line of defence. |
| 415 | + |
| 416 | +===Third Battle of Gaza=== |
| 417 | +{{Main|Third Battle of Gaza}} |
| 418 | +On 7 November, the British attacked Gaza for the third time. The Turks, worried about being cut off, retreated in the face of the British assault. Gaza had finally been captured. |
| 419 | + |
| 420 | +The Turkish defensive position was shattered, the Ottoman army was retreating in some disarray, and General Allenby ordered his army to pursue the enemy. The British followed closely on the heels of the retreating Ottoman forces. An attempt by the Turks to form a defence of a place called Junction Station (Wadi Sarar) was foiled by a British attack on 13 November. General Falkenhayn next tried to form a new defensive line from [[Bethlehem]] to Jerusalem to [[Jaffa]]. The first British attack on Jerusalem failed but with a short rest and the gathering of more infantry divisions, Allenby tried again and on 9 December, 1917, Jerusalem was captured. This was a major political event for the British government of David Lloyd George, one of the few real successes the British could point to after three long bloody years of war. |
| 421 | + |
| 422 | +On the Turkish side, this defeat marked the exit of Djemal Pasha, who returned to [[Istanbul]]. Djemal had delegated the actual command of his army to German officers such as von Kressenstein and von Falkenhayn more than a year earlier, but now, defeated as [[Enver Pasha]] had been at the [[Battle of Sarikamis]], he gave up even nominal command and returned to the capital. Less than a year remained before he was forced out of the government. General Falkenhayn was also replaced, in March 1918. |
| 423 | + |
| 424 | +== The final year: Palestine and Syria == |
| 425 | +[[Image:Palestine-WW1-3.jpg|thumb|230px|left|Allenby's Final Attack, September 1918]] |
| 426 | +The British government had hopes that the Ottoman Empire could be defeated early in the coming year with successful campaigns in Palestine and Mesopotamia but the [[Spring Offensive]] by the Germans on the Western Front delayed the expected attack on Syria for nine full months. General Allenby's army was largely redeployed to France and most of his divisions were rebuilt with units recently recruited in India. His forces spent much of the summer of 1918 training and reorganising. |
| 427 | + |
| 428 | +Because the British achieved complete control of the air with their new [[Sopwith Camel|fighter planes]], the Turks, and their new German commander, General [[Otto Liman von Sanders|Liman von Sanders]], had no clear idea where the British were going to attack. Compounding the problems, the Turks, at the direction of their [[War Minister]] [[Enver Pasha]] withdrew their best troops during the summer for the creation of Enver's [[Ottoman Army of Islam|Army of Islam]], leaving behind poor quality, dispirited soldiers. During this time, the Turks were distracted by raids against their open desert (eastern) flank by forces of the Arab Revolt commanded by the [[Faisal I of Iraq|Emir Feisal]] and coordinated by [[T. E. Lawrence]] and other British liaison officers, which tied down thousands of soldiers in garrisons throughout Palestine, [[Jordan]], and Syria. |
| 429 | + |
| 430 | +===Battle of Megiddo=== |
| 431 | +{{Main|Battle of Megiddo (1918)}} |
| 432 | +General Allenby finally launched his long-delayed attack on 19 September, 1918. The campaign has been called the Battle of Megiddo (which is a transliteration of the Hebrew name of an ancient town known in the west as [[Armageddon]]). Again, the British made major efforts to deceive the Turks as to their actual intended target of operations. This effort was, again, successful and the Turks were taken by surprise when the British attacked Meggido in a sudden storm. The Turkish troops started a full scale retreat, the British bombed the fleeing columns of men from the air and within a week, the Turkish army in Palestine had ceased to exist as a military force. |
| 433 | + |
| 434 | +The ultimate goal of Allenby's and Feisal's armies was [[Damascus]]. Two separate Allied columns marched towards Damascus. The first, composed mainly of Australian and Indian cavalry, approached from Galilee, while the other column, consisting of Indian cavalry and the ''ad hoc'' militia following T.E. Lawrence, travelled northwards along the [[Hejaz Railway]]. Australian Light Horse troops marched unopposed into Damascus on 1 October, 1918, despite the presence of some 12,000 Turkish soldiers at Baramke Barracks. Major Olden of the Australian 10th Light Horse Regiment received the Official Surrender of the City at 7 am at the Serai. Later that day, Lawrence's irregulars entered Damascus to claim full credit for its capture. |
| 435 | + |
| 436 | +The war in Palestine was over but in Syria lasted for a further month. The Turkish government was quite prepared to sacrifice these non-Turkish provinces without surrendering. Indeed, while this battle was raging, the Turks sent an expeditionary force into Russia to enlarge the ethnic Turkish elements of the empire. It was only after the surrender of Bulgaria, which put Turkey into a vulnerable position for invasion, that the Turkish government was compelled to sign an armistice on 30 October, 1918, and surrendered outright two days later. Six hundred years of Ottoman rule over the [[Middle East]] had come to an end. |
| 437 | + |
| 438 | +== In popular media == |
| 439 | +This campaign has been depicted in several films. The most famous is ''[[Lawrence of Arabia (film)|Lawrence of Arabia]]'' (1962), though it focused primarily on T.E. Lawrence and the Arab Revolt. Other films dealing with this topic include ''[[Forty Thousand Horsemen]]'' (1941), and ''[[The Lighthorsemen (film)|The Lighthorsemen]]'' (1987), with [[Peter Phelps]] and [[Nick Waters]], both of which focused on the role of the ANZAC forces during the campaign. |
| 440 | + |
| 441 | +==Summary== |
| 442 | +The British suffered a total of 550,000 casualties: more than 90% of these were not battle losses but instead attributable to disease, heat and other secondary causes. Total Turkish losses are unknown but almost certainly larger: an entire army was lost in the fighting and the Turks poured a vast number of troops into the front over the three years of combat. |
| 443 | + |
| 444 | +Despite the uncertainty of casualty counts, the historical consequences of this campaign are hard to overestimate. The British conquest of Palestine led directly to the [[British mandate]] over Palestine and the [[Trans-Jordan]] which, in turn, paved the way for the creation of the states of [[Israel]], [[Jordan]], [[Lebanon]], and [[Syria]]. |
| 445 | + |
| 446 | +==References== |
| 447 | +{{Reflist}} |
| 448 | + |
| 449 | +==See also== |
| 450 | +{{portal|World War I}} |
| 451 | +*[[Bund der Asienkämpfer]] |
| 452 | +{{Commonscat-inline|Sinai and Palestine Campaign}} |
| 453 | + |
| 454 | +==External links== |
| 455 | +* First World War.com. [http://www.firstworldwar.com/battles/suez.htm Defence of the Suez Canal, 1915]. Retrieved 19 December, 2005. |
| 456 | +* [http://alh-research.tripod.com/Light_Horse/ Australian Light Horse Studies Centre] |
| 457 | +* [http://www.turkeyswar.com/campaigns/palestine1.htm Palestine pages of 'Turkey in WW1' web site] |
| 458 | +* [http://www.nzhistory.net.nz/node/13507 Sinai campaign (NZHistory.net.nz)] |
| 459 | +* [http://www.nzhistory.net.nz/node/14256 Palestine campaign (NZHistory.net.nz)] |
| 460 | +* [http://www.ottomanpalestine.com/GALLERY_1.htm The Photographs of Palestine Campaign] |
| 461 | +* [http://hdl.loc.gov/loc.pnp/ppmsca.13709 Library of Congress's American Colony in Jerusalem's Photo Album] |
| 462 | + |
| 463 | +==Sources== |
| 464 | +* Battles Nomenclature Committee, Army. ''The Official Names of the Battles and Other Engagements Fought by the Military Forces of the British Empire during the Great War, 1914-1919, and the third Afghan War, 1919: Report of the Battles Nomenclature Committee as Approved by The Army Council Presented to Parliament by Command of His Majesty'' (London, 1922). |
| 465 | +* Jean Bou, ''A History of Australia's Mounted Arm'' Series: Australian Army History Series (Port Melbourne: Cambridge University Press, 2009). |
| 466 | +* Bruce, Anthony (2002). ''The Last Crusade: The Palestinian Campaign in the First World War''. John Murray. |
| 467 | +* Field Marshal Lord Carver, ''The National Army Museum Book of The Turkish Front 1914-1918 The Campaigns at Gallipoli, in Mesopotamia and in Palestine'' (London: Pan Macmillan, 2003). |
| 468 | +* R. M. Downes, ''The Campaign in Sinai and Palestine'' Part II in Volume 1 ''Gallipoli, Palestine and New Guinea'' of A. G. Butler, ''Official History of the Australian Army Medical Services, 1914–1918'' (2nd edition 1938) p. 553. On line at Australian War Memorial; Official Histories. |
| 469 | +* Erickson, Edward J., ''Ordered to Die A History of the Ottoman Army in the First World War'' Forward by General Hüseyiln Kivrikoglu Contributions in Military Studies, No. 201 (Westport Connecticut: Greenwood Press, 2001). |
| 470 | +* Esposito, Vincent (ed.) (1959). ''The West Point Atlas of American Wars - Vol. 2''. Frederick Praeger Press. |
| 471 | +* Fromkin, David (1989). ''A Peace to End All Peace''. Avon Books. |
| 472 | +* Grainger, John D. (2006) ''The Battle for Palestine: 1917'' Boydell Press. ISBN 1 84383 263 1 |
| 473 | +* Keegan, John (1998). ''The First World War''. Random House Press. |
| 474 | +* E.G. Keogh, ''Suez to Aleppo'' (Melbourne: Directorate of Military Training, 1955). |
| 475 | +* Preston, Lieutenant-Colonel Richard Martin (1921) ''The Desert Mounted Corps: An Account of the Cavalry Operations in Palestine and Syria 1914 to 1918''. Houghton Mifflin Company. [http://books.google.com/books?id=LHg5xNCFDGsC Google Books Search] |
| 476 | +* Powles, C. Guy, ''The New Zealanders in Sinai and Palestine'' Volume III ''Official History New Zealand's Effort in the Great War'' (Auckland, Christchurch, Dunedin and Wellington: Whitcombe & Tombs Ltd, 1922). |
| 477 | +* War Diaries of 1st, 2nd and 3rd Light Horse Brigades. [available on the Australian War Memorial's web site] |
| 478 | +* Field Marshal Earl Wavell, ''The Palestine Campaigns'' 3rd Edition thirteenth Printing; Series: A Short History of the British Army 4th Edition by Major E.W. Sheppard (London: Constable & Co. 1968). |
| 479 | +* Woodward, David R (2006). ''Forgotten Soldiers of the First World War - Lost Voices from the Middle Eastern Front''. Tempus Publishing. |
| 480 | + |
| 481 | +{{World War I}} |
| 482 | + |
| 483 | +[[Category:Ottoman Empire and World War I]] |
| 484 | +[[Category:Middle Eastern theatre of World War I| ]] |
| 485 | +[[Category:Campaigns and theatres of World War I|Sinai and Palestine]] |
| 486 | +[[Category:Military campaigns and theatres of World War I involving Australia]] |
| 487 | + |
| 488 | +[[es:Campaña del Sinaí y Palestina]] |
| 489 | +[[he:המערכה על סיני וארץ ישראל במלחמת העולם הראשונה]] |
| 490 | +[[hu:Palesztin front (első világháború)]] |
| 491 | +[[pt:Campanha do Sinai e Palestina]] |
| 492 | +[[ru:Синайско-Палестинская кампания]] |
| 493 | +[[sr:Синајски и палестински поход]] |
| 494 | +[[tr:Sina ve Filistin Cephesi]]</text> |
| 495 | + </revision> |
| 496 | + </page> |
Index: trunk/tools/wsor/ts_samples/get_active_user_times.py |
— | — | @@ -0,0 +1,232 @@ |
| 2 | +import sys, MySQLdb, MySQLdb.cursors, argparse, os, logging, types |
| 3 | +import wmf |
| 4 | + |
| 5 | +def encode(v): |
| 6 | + if v == None: return "\N" |
| 7 | + |
| 8 | + if type(v) == types.LongType: v = int(v) |
| 9 | + elif type(v) == types.UnicodeType: v = v.encode('utf-8') |
| 10 | + |
| 11 | + return str(v).encode("string-escape") |
| 12 | + |
| 13 | + |
| 14 | +def main(): |
| 15 | + parser = argparse.ArgumentParser( |
| 16 | + description='Gathers editor data for first and last session' |
| 17 | + ) |
| 18 | + parser.add_argument( |
| 19 | + 'min_edits', |
| 20 | + type=int, |
| 21 | + help='the minimum number of edits that editors must have perfomed to be included' |
| 22 | + ) |
| 23 | + parser.add_argument( |
| 24 | + '-c', '--cnf', |
| 25 | + metavar="<path>", |
| 26 | + type=str, |
| 27 | + help='the path to MySQL config info (defaults to ~/.my.cnf)', |
| 28 | + default=os.path.expanduser("~/.my.cnf") |
| 29 | + ) |
| 30 | + parser.add_argument( |
| 31 | + '-s', '--host', |
| 32 | + type=str, |
| 33 | + help='the database host to connect to (defaults to localhost)', |
| 34 | + default="localhost" |
| 35 | + ) |
| 36 | + parser.add_argument( |
| 37 | + '-d', '--db', |
| 38 | + type=str, |
| 39 | + help='the language db to run the query in (defaults to enwiki)', |
| 40 | + default="enwiki" |
| 41 | + ) |
| 42 | + args = parser.parse_args() |
| 43 | + |
| 44 | + LOGGING_STREAM = sys.stderr |
| 45 | + logging.basicConfig( |
| 46 | + level=logging.DEBUG, |
| 47 | + stream=LOGGING_STREAM, |
| 48 | + format='%(asctime)s %(levelname)-8s %(message)s', |
| 49 | + datefmt='%b-%d %H:%M:%S' |
| 50 | + ) |
| 51 | + |
| 52 | + logging.info("Connecting to %s:%s using %s." % (args.host, args.db, args.cnf)) |
| 53 | + db = Database( |
| 54 | + host=args.host, |
| 55 | + db=args.db, |
| 56 | + read_default_file=args.cnf |
| 57 | + ) |
| 58 | + headers = [ |
| 59 | + 'user_id', |
| 60 | + 'user_name', |
| 61 | + 'first_edit', |
| 62 | + 'last_edit', |
| 63 | + 'fes_edits', |
| 64 | + 'fes_reverted', |
| 65 | + 'fes_vandalism', |
| 66 | + 'fes_deleted', |
| 67 | + 'last10_edits', |
| 68 | + 'last10_reverted', |
| 69 | + 'last10_vandalism', |
| 70 | + 'last10_deleted' |
| 71 | + ] |
| 72 | + print( |
| 73 | + "\t".join(headers) |
| 74 | + ) |
| 75 | + |
| 76 | + logging.info("Processing users:") |
| 77 | + for user in db.getUsers(minimumEdits=args.min_edits): |
| 78 | + firstSession = [] |
| 79 | + last = None |
| 80 | + #logging.debug("Getting first edits for %s" % user['user_name']) |
| 81 | + for rev in db.getFirstEdits(user['user_id'], maximum=100): |
| 82 | + if last != None: |
| 83 | + diff = wmf.wp2Timestamp(rev['rev_timestamp']) - wmf.wp2Timestamp(last['rev_timestamp']) |
| 84 | + assert diff >= 0 |
| 85 | + if diff < args.session: |
| 86 | + firstSession.append(rev) |
| 87 | + else: |
| 88 | + break |
| 89 | + |
| 90 | + else: |
| 91 | + firstSession.append(rev) |
| 92 | + |
| 93 | + last = rev |
| 94 | + |
| 95 | + #logging.debug("Getting last edits for %s" % user['user_name']) |
| 96 | + last10 = list(db.getLastEdits(user['user_id'], maximum=10)) |
| 97 | + logging.debug("%s(%s): %s %s" % (user['user_name'], user['user_id'], len(firstSession)*">", len(last10)*"<")) |
| 98 | + user['first_edit'] = firstSession[0]['rev_timestamp'] |
| 99 | + user['last_edit'] = last10[0]['rev_timestamp'] |
| 100 | + user['fes_edits'] = len(firstSession) |
| 101 | + user['fes_reverted'] = 0 |
| 102 | + user['fes_vandalism'] = 0 |
| 103 | + user['fes_deleted'] = 0 |
| 104 | + for rev in firstSession: |
| 105 | + if rev['is_reverted']: user['fes_reverted'] += 1 |
| 106 | + if rev['is_vandalism']: user['fes_vandalism'] += 1 |
| 107 | + if rev['deleted']: user['fes_deleted'] += 1 |
| 108 | + |
| 109 | + user['last10_edits'] = len(last10) |
| 110 | + user['last10_reverted'] = 0 |
| 111 | + user['last10_vandalism'] = 0 |
| 112 | + user['last10_deleted'] = 0 |
| 113 | + for rev in last10: |
| 114 | + if rev['is_reverted']: user['last10_reverted'] += 1 |
| 115 | + if rev['is_vandalism']: user['last10_vandalism'] += 1 |
| 116 | + if rev['deleted']: user['last10_deleted'] += 1 |
| 117 | + |
| 118 | + print("\t".join(encode(user[h]) for h in headers)) |
| 119 | + |
| 120 | + |
| 121 | +class Database: |
| 122 | + |
| 123 | + def __init__(self, *args, **kwargs): |
| 124 | + self.args = args |
| 125 | + self.kwargs = kwargs |
| 126 | + self.conn = MySQLdb.connect(*args, **kwargs) |
| 127 | + |
| 128 | + def getHundredthEdit(self, ): |
| 129 | + minimumEdits = int(minimumEdits) |
| 130 | + cursor = self.usersConn.cursor(MySQLdb.cursors.SSDictCursor) |
| 131 | + cursor.execute( |
| 132 | + """ |
| 133 | + SELECT |
| 134 | + u.user_id, |
| 135 | + u.user_name, |
| 136 | + u.user_editcount as editcount |
| 137 | + FROM user u |
| 138 | + WHERE u.user_editcount >= %(minimum_edits)s |
| 139 | + """, |
| 140 | + { |
| 141 | + 'minimum_edits': minimumEdits |
| 142 | + } |
| 143 | + ) |
| 144 | + for row in cursor: |
| 145 | + yield row |
| 146 | + |
| 147 | + |
| 148 | + |
| 149 | + def getEdits(self, userId, maximum=10000, chronologically=True): |
| 150 | + userId = int(userId) |
| 151 | + revisionCursor = self.revsConn.cursor(MySQLdb.cursors.SSDictCursor) |
| 152 | + archiveCursor = self.archConn.cursor(MySQLdb.cursors.SSDictCursor) |
| 153 | + |
| 154 | + if chronologically: direction = "ASC" |
| 155 | + else: direction = "DESC" |
| 156 | + |
| 157 | + revisionCursor.execute( |
| 158 | + """ |
| 159 | + SELECT |
| 160 | + r.rev_id, |
| 161 | + r.rev_timestamp, |
| 162 | + rvtd.revision_id IS NOT NULL AS is_reverted, |
| 163 | + rvtd.is_vandalism IS NOT NULL AND rvtd.is_vandalism = TRUE AS is_vandalism, |
| 164 | + False AS deleted |
| 165 | + FROM revision r |
| 166 | + LEFT JOIN halfak.reverted_20110115 rvtd |
| 167 | + ON r.rev_id = rvtd.revision_id |
| 168 | + WHERE rev_user = %(user_id)s |
| 169 | + ORDER BY r.rev_timestamp """ + direction + """ |
| 170 | + LIMIT %(maximum)s; |
| 171 | + """, |
| 172 | + { |
| 173 | + 'user_id': userId, |
| 174 | + 'maximum': maximum |
| 175 | + } |
| 176 | + ) |
| 177 | + archiveCursor.execute( |
| 178 | + """ |
| 179 | + SELECT |
| 180 | + ar_rev_id AS rev_id, |
| 181 | + ar_timestamp AS rev_timestamp, |
| 182 | + NULL AS is_reverted, |
| 183 | + NULL AS is_vandalism, |
| 184 | + True AS deleted |
| 185 | + FROM archive |
| 186 | + WHERE ar_user = %(user_id)s |
| 187 | + ORDER BY ar_timestamp """ + direction + """ |
| 188 | + LIMIT %(maximum)s; |
| 189 | + """, |
| 190 | + { |
| 191 | + 'user_id': userId, |
| 192 | + 'maximum': maximum |
| 193 | + } |
| 194 | + ) |
| 195 | + if chronologically: |
| 196 | + order = lambda t1, t2:t1 < t2 |
| 197 | + else: |
| 198 | + order = lambda t1, t2:t1 > t2 |
| 199 | + |
| 200 | + revPointer = revisionCursor.fetchone() |
| 201 | + archPointer = archiveCursor.fetchone() |
| 202 | + count = 0 |
| 203 | + while revPointer != None or archPointer != None: #still something to output |
| 204 | + if revPointer != None and archPointer != None: #both cursors still have something |
| 205 | + if order(revPointer['rev_timestamp'], archPointer['rev_timestamp']): |
| 206 | + yield revPointer |
| 207 | + revPointer = revisionCursor.fetchone() |
| 208 | + else: |
| 209 | + yield archPointer |
| 210 | + archPointer = archiveCursor.fetchone() |
| 211 | + elif revPointer != None: #only revisions left |
| 212 | + yield revPointer |
| 213 | + revPointer = revisionCursor.fetchone() |
| 214 | + elif archPointer != None: #only archives left |
| 215 | + yield archPointer |
| 216 | + archPointer = archiveCursor.fetchone() |
| 217 | + |
| 218 | + count += 1 |
| 219 | + if count >= maximum: break |
| 220 | + |
| 221 | + revisionCursor.close() |
| 222 | + archiveCursor.close() |
| 223 | + |
| 224 | + |
| 225 | + |
| 226 | + def getFirstEdits(self, userId, maximum=10000): |
| 227 | + return self.getEdits(userId, maximum, chronologically=True) |
| 228 | + |
| 229 | + def getLastEdits(self, userId, maximum=10000): |
| 230 | + return self.getEdits(userId, maximum, chronologically=False) |
| 231 | + |
| 232 | + |
| 233 | +if __name__ == "__main__": main() |
Index: trunk/tools/wsor/vandal_conversion/R/loader/load_editor_edit_count.R |
— | — | @@ -0,0 +1,26 @@ |
| 2 | +source("util/env.R") |
| 3 | + |
| 4 | + |
| 5 | + |
| 6 | +load_editor_edit_count = function(verbose=T, reload=F){ |
| 7 | + filename = paste(DATA_DIR, "en.editor_edit_count.20.fixed.tsv", sep="/") |
| 8 | + if(!exists("EDITOR_EDIT_COUNT")){ |
| 9 | + EDITOR_EDIT_COUNT <<- NULL |
| 10 | + } |
| 11 | + if(is.null(EDITOR_EDIT_COUNT) | reload){ |
| 12 | + EDITOR_EDIT_COUNT <<- NULL |
| 13 | + } |
| 14 | + if(is.null(EDITOR_EDIT_COUNT)){ |
| 15 | + if(verbose){cat("Loading ", filename, "...")} |
| 16 | + EDITOR_EDIT_COUNT <<- read.table( |
| 17 | + filename, |
| 18 | + header=T, sep="\t", |
| 19 | + quote="", comment.char="", |
| 20 | + na.strings="\\N" |
| 21 | + ) |
| 22 | + if(verbose){cat("DONE!\n")} |
| 23 | + } |
| 24 | + EDITOR_EDIT_COUNT |
| 25 | +} |
| 26 | + |
| 27 | + |