Index: trunk/tools/editor_trends/etl/variables.py |
— | — | @@ -19,8 +19,11 @@ |
20 | 20 | __version__ = '0.1' |
21 | 21 | |
22 | 22 | import hashlib |
| 23 | +import re |
23 | 24 | from xml.etree.cElementTree import dump |
24 | 25 | |
| 26 | +RE_DEL_ARTICLE = re.compile('/GA[\d]{1,2}') |
| 27 | +RE_SPEEDY_DELETION = re.compile('\{\{db\-[a-z\d]*\}\}') #http://en.wikipedia.org/wiki/Wikipedia:Criteria_for_speedy_deletion |
25 | 28 | |
26 | 29 | def validate_hostname(address): |
27 | 30 | ''' |
— | — | @@ -68,6 +71,14 @@ |
69 | 72 | return title.text |
70 | 73 | |
71 | 74 | |
| 75 | +def detect_speedy_deletion(revision_text): |
| 76 | + spds = re.findall(RE_SPEEDY_DELETION, revision_text) |
| 77 | + templates = {} |
| 78 | + for spd in spds: |
| 79 | + templates[spd] = 1 |
| 80 | + return templates |
| 81 | + |
| 82 | + |
72 | 83 | def parse_title_meta_data(title, ns, namespaces): |
73 | 84 | ''' |
74 | 85 | This function categorizes an article to assist the Wikimedia Taxonomy |
— | — | @@ -75,6 +86,7 @@ |
76 | 87 | http://meta.wikimedia.org/wiki/Contribution_Taxonomy_Project/Research_Questions |
77 | 88 | ''' |
78 | 89 | title_meta = {} |
| 90 | + re_ga = re.compile('/GA[\d]') |
79 | 91 | if not ns: |
80 | 92 | return title_meta |
81 | 93 | namespace = '%s:' % namespaces[ns] |
— | — | @@ -83,10 +95,11 @@ |
84 | 96 | title_meta['ns'] = ns |
85 | 97 | if title.startswith('List of'): |
86 | 98 | title_meta['category'] = 'List' |
| 99 | + elif ns == 1: |
| 100 | + if re.search(RE_DEL_ARTICLE, title.find): |
| 101 | + title_meta['category'] = 'Good Article' |
87 | 102 | elif ns == 4 or ns == 5: |
88 | | - if title.find('Articles for deletion') > -1: |
89 | | - title_meta['category'] = 'Deletion' |
90 | | - elif title.find('Arbitration') > -1: |
| 103 | + if title.find('Arbitration') > -1: |
91 | 104 | title_meta['category'] = 'Arbitration' |
92 | 105 | elif title.find('Good Article') > -1: |
93 | 106 | title_meta['category'] = 'Good Article' |
— | — | @@ -108,6 +121,8 @@ |
109 | 122 | title_meta['category'] = 'Featured Portal' |
110 | 123 | elif title.find('Featured topic candidates') > -1: |
111 | 124 | title_meta['category'] = 'Featured Topic' |
| 125 | + elif title.find('Articles for deletion') > -1 and title.find('Articles for deletion/Log/') > -1: |
| 126 | + title_meta['category'] = 'Deletion' |
112 | 127 | |
113 | 128 | #print title_meta |
114 | 129 | return title_meta |