r78582 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r78581‎ | r78582 | r78583 >
Date:22:10, 18 December 2010
Author:diederik
Status:deferred
Tags:
Comment:
Thanks to Nimish for giving me a number of suggestions to reduce the processing time. This commit is primarily focused on improving the efficiency, for example by merging the split and extract phase into a single group of functionality.
Modified paths:
  • /trunk/tools/editor_trends/analyses/file_size_reduction.py (modified) (history)
  • /trunk/tools/editor_trends/bots/bots.py (modified) (history)
  • /trunk/tools/editor_trends/configuration.py (modified) (history)
  • /trunk/tools/editor_trends/etl/chunker.py (modified) (history)
  • /trunk/tools/editor_trends/etl/exporter.py (modified) (history)
  • /trunk/tools/editor_trends/etl/extract.py (modified) (history)
  • /trunk/tools/editor_trends/etl/extracter.py (added) (history)
  • /trunk/tools/editor_trends/etl/models.py (modified) (history)
  • /trunk/tools/editor_trends/manage.py (modified) (history)
  • /trunk/tools/editor_trends/wikitree/parser.py (added) (history)
  • /trunk/tools/editor_trends/wikitree/xml.py (deleted) (history)

Diff [purge]

Index: trunk/tools/editor_trends/analyses/file_size_reduction.py
@@ -69,6 +69,7 @@
7070 def calculate_filesize_overhead(location, filename):
7171 counter = None
7272 ds = DumpStatistics()
 73+ filename = os.path.join(location, filename)
7374 context = cElementTree.iterparse(filename, events=('start', 'end'))
7475 context = iter(context)
7576 event, root = context.next() #get the root element of the XML doc
@@ -80,20 +81,20 @@
8182 root.clear() # when done parsing a section clear the tree to release memory
8283 except SyntaxError:
8384 pass
84 - utils.store_object(ds, settings.binary_location, 'ds')
 85+ utils.store_object(ds, settings.binary_location, 'ds')
8586 xml_size = ds.total_size_xml()
8687 text_size = ds.total_size_text()
8788 print text_size, xml_size
8889 print ds.tags
89 -
9090
 91+
9192 def output_dumpstatistics():
9293 ds = utils.load_object(settings.binary_location, 'ds.bin')
93 -
 94+
9495 for key in ds.tags:
9596 print '%s\t%s' % (key, ds.tags[key])
96 -
 97+
9798 if __name__ == '__main__':
 99+ input = os.path.join(settings.input_location, 'en', 'wiki')
 100+ calculate_filesize_overhead(input, 'enwiki-latest-stub-meta-history.xml')
98101 output_dumpstatistics()
99 - #calculate_filesize_overhead(settings.input_location, settings.input_filename)
100 -
Index: trunk/tools/editor_trends/manage.py
@@ -182,9 +182,10 @@
183183 filename = kwargs.get('filename')
184184 extension = kwargs.get('extension')
185185 location = kwargs.get('location')
 186+ full_project = kwargs.get('full_project')
186187 pbar = get_value(args, 'progress')
187188 domain = settings.wp_dump_location
188 - path = '/%s/latest/' % project
 189+ path = '/%s/latest/' % full_project
189190 extension = utils.determine_file_extension(filename)
190191 filemode = utils.determine_file_mode(extension)
191192 dump_downloader.download_wiki_file(domain, path, filename, location, filemode, pbar)
@@ -322,7 +323,7 @@
323324 ignore = ignore + ',extract'
324325
325326 functions = ordered_dict.OrderedDict(((dump_downloader_launcher, 'download'),
326 - (chunker_launcher, 'split'),
 327+ #(chunker_launcher, 'split'),
327328 (extract_launcher, 'extract'),
328329 (sort_launcher, 'sort'),
329330 (store_launcher, 'store'),
@@ -407,10 +408,9 @@
408409 parser_download = subparsers.add_parser('download', help='The download sub command allows you to download a Wikipedia dump file.')
409410 parser_download.set_defaults(func=dump_downloader_launcher)
410411
411 - parser_split = subparsers.add_parser('split', help='The split sub command splits the downloaded file in smaller chunks to parallelize extracting information.')
 412+ #parser_split = subparsers.add_parser('split', help='The split sub command splits the downloaded file in smaller chunks to parallelize extracting information.')
 413+ #parser_split.set_defaults(func=chunker_launcher)
412414
413 - parser_split.set_defaults(func=chunker_launcher)
414 -
415415 parser_create = subparsers.add_parser('extract', help='The store sub command parsers the XML chunk files, extracts the information and stores it in a MongoDB.')
416416 parser_create.set_defaults(func=extract_launcher)
417417
Index: trunk/tools/editor_trends/wikitree/xml.py
@@ -1,55 +0,0 @@
2 -#!/usr/bin/python
3 -# -*- coding: utf-8 -*-
4 -'''
5 -Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
6 -This program is free software; you can redistribute it and/or
7 -modify it under the terms of the GNU General Public License version 2
8 -as published by the Free Software Foundation.
9 -This program is distributed in the hope that it will be useful,
10 -but WITHOUT ANY WARRANTY; without even the implied warranty of
11 -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 -See the GNU General Public License for more details, at
13 -http://www.fsf.org/licenses/gpl.html
14 -'''
15 -
16 -__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
17 -__author__email = 'dvanliere at gmail dot com'
18 -__date__ = '2010-10-21'
19 -__version__ = '0.1'
20 -
21 -from utils import utils
22 -import configuration
23 -settings = configuration.Settings()
24 -
25 -
26 -def convert_html_entities(text):
27 - return utils.unescape(text)
28 -
29 -
30 -def extract_text(elem, **kwargs):
31 - if elem != None and elem.text != None:
32 - #try:
33 - return elem.text #.decode(settings.encoding)
34 - #except UnicodeDecodeError:
35 - #return None
36 -
37 -
38 -def retrieve_xml_node(xml_nodes, name):
39 - for xml_node in xml_nodes:
40 - if xml_node.tag == name:
41 - return xml_node
42 - return None #maybe this should be replaced with an NotFoundError
43 -
44 -
45 -def read_input(file):
46 - lines = []
47 - for line in file:
48 - lines.append(line)
49 - if line.find('</page>') > -1:
50 - yield lines
51 - '''
52 - #This looks counter intuitive but Python continues with this call
53 - after it has finished the yield statement
54 - '''
55 - lines = []
56 - file.close()
Index: trunk/tools/editor_trends/wikitree/parser.py
@@ -0,0 +1,70 @@
 2+#!/usr/bin/python
 3+# -*- coding: utf-8 -*-
 4+'''
 5+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
 6+This program is free software; you can redistribute it and/or
 7+modify it under the terms of the GNU General Public License version 2
 8+as published by the Free Software Foundation.
 9+This program is distributed in the hope that it will be useful,
 10+but WITHOUT ANY WARRANTY; without even the implied warranty of
 11+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 12+See the GNU General Public License for more details, at
 13+http://www.fsf.org/licenses/gpl.html
 14+'''
 15+
 16+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
 17+__author__email = 'dvanliere at gmail dot com'
 18+__date__ = '2010-10-21'
 19+__version__ = '0.1'
 20+
 21+import xml.etree.cElementTree as cElementTree
 22+
 23+import configuration
 24+settings = configuration.Settings()
 25+from utils import utils
 26+
 27+def convert_html_entities(text):
 28+ return utils.unescape(text)
 29+
 30+
 31+def extract_text(elem, **kwargs):
 32+ if elem != None and elem.text != None:
 33+ #try:
 34+ return elem.text #.decode(settings.encoding)
 35+ #except UnicodeDecodeError:
 36+ #return None
 37+
 38+
 39+def retrieve_xml_node(xml_nodes, name):
 40+ for xml_node in xml_nodes:
 41+ if xml_node.tag == name:
 42+ return xml_node
 43+ return None #maybe this should be replaced with an NotFoundError
 44+
 45+def determine_element(line):
 46+ pos = line.find(' ')
 47+ elem = line[:pos] + '>'
 48+
 49+
 50+def read_input(file):
 51+ lines = []
 52+ start_parsing = False
 53+ for line in file:
 54+ if line == '\n':
 55+ continue
 56+ if start_parsing == False and line.find('<page>') > -1:
 57+ start_parsing = True
 58+ if start_parsing:
 59+ lines.append(line.strip())
 60+ if line.find('</page>') > -1:
 61+ #print lines
 62+ lines = '\n'.join(lines)
 63+ lines = lines.encode(settings.encoding)
 64+ xml_string = cElementTree.XML(lines)
 65+ yield xml_string
 66+ '''
 67+ #This looks counter intuitive but Python continues with this call
 68+ after it has finished the yield statement
 69+ '''
 70+ lines = []
 71+ file.close()
Property changes on: trunk/tools/editor_trends/wikitree/parser.py
___________________________________________________________________
Added: svn:eol-style
172 + native
Added: svn:mime-type
273 + text/plain
Index: trunk/tools/editor_trends/etl/exporter.py
@@ -213,16 +213,12 @@
214214 break
215215 obs = editors.find_one({'editor': id}, {'first_edit': 1, 'final_edit': 1})
216216
217 - #for editor in tasks:
218 - # obs = tasks[editor]
219217 first_edit = obs['first_edit']
220218 last_edit = obs['final_edit']
221219 editor_dt = relativedelta(last_edit, first_edit)
222220 editor_dt = (editor_dt.years * 12) + editor_dt.months
223221 edits = []
224222 for year in xrange(2001, datetime.datetime.now().year + 1):
225 - #if year == 2009 and editor == '2':
226 - # print 'debug'
227223 if first_edit.year > year or last_edit.year < year:
228224 continue
229225 window_end = datetime.datetime(year, 12, 31)
@@ -251,11 +247,67 @@
252248
253249
254250
255 -def generate_cohort_dataset(tasks, dbname, collection, **kwargs):
 251+def generate_cohort_dataset_forward(tasks, dbname, collection, **kwargs):
256252 mongo = db.init_mongo_db(dbname)
257253 editors = mongo[collection + '_dataset']
258254 windows = create_windows()
259255 data = shaper.create_datacontainer('dict')
 256+ final_year = datetime.datetime.now().year + 1
 257+ m1 = [1, 2, 3, 4, 5, 6]
 258+ m2 = [7, 8, 9, 10, 11, 12]
 259+ frames = [m1, m2]
 260+ while True:
 261+ id = tasks.get(block=False)
 262+ if id == None:
 263+ break
 264+ obs = editors.find_one({'editor': id}, {'new_wikipedian': 1, 'monthly_edits': 1, 'final_edit':1})
 265+ new_wikipedian = obs['new_wikipedian']
 266+ last_edit = obs['final_edit']
 267+ start_year = new_wikipedian.year
 268+ last_year = last_edit.year + 1
 269+ if new_wikipedian.month != 1:
 270+ continue
 271+ for year in xrange(start_year, last_year):
 272+ if year not in data[start_year]:
 273+ data[start_year][year] = {}
 274+ for x, frame in enumerate(frames):
 275+ if x not in data[start_year][year]:
 276+ data[start_year][year][x] = 0
 277+ if 'n' not in data[start_year][year]:
 278+ data[start_year][year]['n'] = 0
 279+
 280+ active = sum([obs['monthly_edits'][str(year)][str(m)] for m in frame])
 281+ data[start_year][year]['n'] += 1
 282+ if active > 0:
 283+ data[start_year][year][x] += 1
 284+ filename = '%s_cohort_forward.csv' % dbname
 285+ fh = utils.create_txt_filehandle(settings.dataset_location, filename, 'w', settings.encoding)
 286+ frames.append('n')
 287+ headers = ["%s_%s" % (year, frame[0]) for year in xrange(2001, final_year) for frame in enumerate(frames)]
 288+ headers.insert(0, '\t')
 289+ utils.write_list_to_csv(headers, fh)
 290+
 291+ for obs_year in data:
 292+ obs = '%s\t' % obs_year
 293+ for year in xrange(2001, final_year):
 294+ values = data[obs_year].get(year, None)
 295+ if values != None:
 296+ for value in values:
 297+ obs = '%s\t%s\t' % (obs, values[value])
 298+ else:
 299+ obs = '%s\t.\t.\t.\t' % obs
 300+
 301+ obs = '%s\n' % obs
 302+ fh.write(obs)
 303+ fh.close()
 304+
 305+
 306+
 307+def generate_cohort_dataset_backward(tasks, dbname, collection, **kwargs):
 308+ mongo = db.init_mongo_db(dbname)
 309+ editors = mongo[collection + '_dataset']
 310+ windows = create_windows()
 311+ data = shaper.create_datacontainer('dict')
260312 data = shaper.add_windows_to_datacontainer(data, windows)
261313
262314 while True:
@@ -360,10 +412,12 @@
361413 '13':{'first_edit': datetime.datetime(2007, 2, 1), 'final_edit': datetime.datetime(2009, 4, 30)},
362414 }
363415 generate_cohort_dataset(editors, dbname, collection)
 416+
 417+
364418 if __name__ == '__main__':
365419 dbname = 'enwiki'
366420 collection = 'editors'
367421 #debug(dbname, collection)
368 - dataset_launcher(dbname, collection, generate_cohort_dataset_howie)
 422+ dataset_launcher(dbname, collection, generate_cohort_dataset_forward)
369423 #dataset_launcher(dbname, collection, generate_long_editor_dataset)
370424 #dataset_launcher(dbname, collection, generate_wide_editor_dataset)
Index: trunk/tools/editor_trends/etl/extract.py
@@ -39,7 +39,7 @@
4040 from database import db_settings
4141 from database import db
4242 from database import cache
43 -from wikitree import xml
 43+import wikitree.parser
4444 from bots import bots
4545 from etl import models
4646 #from utils import process_constructor as pc
Index: trunk/tools/editor_trends/etl/chunker.py
@@ -34,7 +34,7 @@
3535
3636 from utils import utils
3737 import extract
38 -from wikitree import xml
 38+import wikitree.parser
3939 from bots import bots
4040
4141
Index: trunk/tools/editor_trends/etl/extracter.py
@@ -0,0 +1,283 @@
 2+#!/usr/bin/python
 3+# -*- coding: utf-8 -*-
 4+'''
 5+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
 6+This program is free software; you can redistribute it and/or
 7+modify it under the terms of the GNU General Public License version 2
 8+as published by the Free Software Foundation.
 9+This program is distributed in the hope that it will be useful,
 10+but WITHOUT ANY WARRANTY; without even the implied warranty of
 11+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 12+See the GNU General Public License for more details, at
 13+http://www.fsf.org/licenses/gpl.html
 14+'''
 15+
 16+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
 17+__author__email = 'dvanliere at gmail dot com'
 18+__date__ = '2010-12-13'
 19+__version__ = '0.1'
 20+
 21+import sys
 22+import re
 23+import json
 24+import os
 25+import xml.etree.cElementTree as cElementTree
 26+
 27+sys.path.append('..')
 28+import configuration
 29+settings = configuration.Settings()
 30+
 31+import wikitree.parser
 32+from bots import bots
 33+from utils import utils
 34+
 35+try:
 36+ import psyco
 37+ psyco.full()
 38+except ImportError:
 39+ pass
 40+
 41+
 42+RE_NUMERIC_CHARACTER = re.compile('&#(\d+);')
 43+
 44+
 45+def remove_numeric_character_references(text):
 46+ return re.sub(RE_NUMERIC_CHARACTER, lenient_deccharref, text).encode('utf-8')
 47+
 48+
 49+def lenient_deccharref(m):
 50+ try:
 51+ return unichr(int(m.group(1)))
 52+ except ValueError:
 53+ '''
 54+ There are a few articles that raise a Value Error here, the reason is
 55+ that I am using a narrow Python build (UCS2) instead of a wide build
 56+ (UCS4). The quick fix is to return an empty string...
 57+ Real solution is to rebuild Python with UCS4 support.....
 58+ '''
 59+ return ''
 60+
 61+
 62+def remove_namespace(element, namespace):
 63+ '''Remove namespace from the XML document.'''
 64+ ns = u'{%s}' % namespace
 65+ nsl = len(ns)
 66+ for elem in element.getiterator():
 67+ if elem.tag.startswith(ns):
 68+ elem.tag = elem.tag[nsl:]
 69+ return element
 70+
 71+
 72+def load_namespace(language):
 73+ file = '%s_ns.json' % language
 74+ fh = utils.create_txt_filehandle(settings.namespace_location, file, 'r', settings.encoding)
 75+ ns = json.load(fh)
 76+ fh.close()
 77+ ns = ns['query']['namespaces']
 78+ return ns
 79+
 80+
 81+def build_namespaces_locale(namespaces, include=['0']):
 82+ '''
 83+ @include is a list of namespace keys that should not be ignored, the default
 84+ setting is to ignore all namespaces except the main namespace.
 85+ '''
 86+ ns = []
 87+ for namespace in namespaces:
 88+ if namespace not in include:
 89+ value = namespaces[namespace].get(u'*', None)
 90+ ns.append(value)
 91+ return ns
 92+
 93+
 94+def parse_comments(revisions, function):
 95+ for revision in revisions:
 96+ comment = revision.find('{%s}comment' % settings.xml_namespace)
 97+ #timestamp = revision.find('{%s}timestamp' % settings.xml_namespace).text
 98+ if comment != None and comment.text != None:
 99+ comment.text = function(comment.text)
 100+ return revisions
 101+
 102+
 103+def is_article_main_namespace(elem, namespace):
 104+ '''
 105+ checks whether the article belongs to the main namespace
 106+ '''
 107+ title = elem.text
 108+ for ns in namespace:
 109+ if title.startswith(ns):
 110+ return False
 111+ return True
 112+
 113+def validate_hostname(address):
 114+ '''
 115+ This is not a foolproof solution at all. The problem is that it's really hard
 116+ to determine whether a string is a hostname or not **reliably**. This is a
 117+ very fast rule of thumb. Will lead to false positives, but that's life :)
 118+ '''
 119+ parts = address.split(".")
 120+ if len(parts) > 2:
 121+ return True
 122+ else:
 123+ return False
 124+
 125+
 126+def validate_ip(address):
 127+ parts = address.split(".")
 128+ if len(parts) != 4:
 129+ return False
 130+ parts = parts[:3]
 131+ for item in parts:
 132+ try:
 133+ if not 0 <= int(item) <= 255:
 134+ return False
 135+ except ValueError:
 136+ return False
 137+ return True
 138+
 139+
 140+def determine_username_is_bot(contributor, **kwargs):
 141+ '''
 142+ #contributor is an xml element containing the id of the contributor
 143+ @bots should have a dict with all the bot ids and bot names
 144+ @Return False if username id is not in bot dict id or True if username id
 145+ is a bot id.
 146+ '''
 147+ bots = kwargs.get('bots')
 148+ username = contributor.find('username')
 149+ if username == None:
 150+ return 0
 151+ else:
 152+ if username in bots:
 153+ return 1
 154+ else:
 155+ return 0
 156+
 157+
 158+def extract_username(contributor, **kwargs):
 159+ contributor = contributor.find('username')
 160+ if contributor != None:
 161+ return contributor.text
 162+ else:
 163+ return None
 164+
 165+
 166+def extract_contributor_id(contributor, **kwargs):
 167+ '''
 168+ @contributor is the xml contributor node containing a number of attributes
 169+ Currently, we are only interested in registered contributors, hence we
 170+ ignore anonymous editors.
 171+ '''
 172+ if contributor.get('deleted'):
 173+ return None # ASK: Not sure if this is the best way to code deleted contributors.
 174+ elem = contributor.find('id')
 175+ if elem != None:
 176+ return {'id':elem.text}
 177+ else:
 178+ elem = contributor.find('ip')
 179+ if elem != None and elem.text != None and validate_ip(elem.text) == False and validate_hostname(elem.text) == False:
 180+ return {'username':elem.text, 'id': elem.text}
 181+ else:
 182+ return None
 183+
 184+
 185+def output_editor_information(revisions, page, bots):
 186+ '''
 187+ @elem is an XML element containing 1 revision from a page
 188+ @output is where to store the data, a filehandle
 189+ @**kwargs contains extra information
 190+
 191+ the variable tags determines which attributes are being parsed, the values in
 192+ this dictionary are the functions used to extract the data.
 193+ '''
 194+ headers = ['id', 'date', 'article', 'username']
 195+ tags = {'contributor': {'id': extract_contributor_id,
 196+ 'bot': determine_username_is_bot,
 197+ 'username': extract_username,
 198+ },
 199+ 'timestamp': {'date': wikitree.parser.extract_text},
 200+ }
 201+ vars = {}
 202+ flat = []
 203+
 204+ for x, revision in enumerate(revisions):
 205+ #print len(revision.getchildren())
 206+ vars[x] = {}
 207+ vars[x]['article'] = page
 208+ for tag in tags:
 209+ el = revision.find('%s' % tag)
 210+ if el == None:
 211+ #print cElementTree.tostring(revision, settings.encoding)
 212+ del vars[x]
 213+ break
 214+ for function in tags[tag].keys():
 215+ f = tags[tag][function]
 216+ value = f(el, bots=bots)
 217+ if type(value) == type({}):
 218+ for kw in value:
 219+ vars[x][kw] = value[kw]
 220+ else:
 221+ vars[x][function] = value
 222+
 223+ '''
 224+ This loop determines for each observation whether it should be stored or not.
 225+ '''
 226+ for x in vars:
 227+ if vars[x]['bot'] == 1 or vars[x]['id'] == None or vars[x]['username'] == None:
 228+ continue
 229+ else:
 230+ f = []
 231+ for head in headers:
 232+ f.append(vars[x][head])
 233+ flat.append(f)
 234+
 235+ return flat
 236+
 237+
 238+def parse_dumpfile(project, language_code, namespaces=['0']):
 239+ bot_ids = bots.retrieve_bots(language_code)
 240+ ns = load_namespace(language_code)
 241+ ns = build_namespaces_locale(ns, namespaces)
 242+
 243+ location = os.path.join(settings.input_location, language_code, project)
 244+ fh = utils.create_txt_filehandle(location, 'enwiki-latest-stub-meta-history.xml', 'r', settings.encoding)
 245+ for page in wikitree.parser.read_input(fh):
 246+ title = page.find('title')
 247+ if is_article_main_namespace(title, ns):
 248+ #cElementTree.dump(page)
 249+ article_id = page.find('id').text
 250+ revisions = page.findall('revision')
 251+ revisions = parse_comments(revisions, remove_numeric_character_references)
 252+ output = output_editor_information(revisions, article_id, bot_ids)
 253+ write_output(output, project, language_code)
 254+ page.clear()
 255+ fh.close()
 256+
 257+
 258+def write_output(output, project, language_code):
 259+ location = os.path.join(settings.input_location, language_code, project, 'txt')
 260+ for o in output:
 261+ file = '%s.csv' % hash(o[0])
 262+ try:
 263+ fh = utils.create_txt_filehandle(location, file, 'a', settings.encoding)
 264+ utils.write_list_to_csv(o, fh)
 265+ fh.close()
 266+ except Exception, error:
 267+ print error
 268+
 269+
 270+def hash(id):
 271+ '''
 272+ A very simple hash function based on modulo. The except clause has been
 273+ addde because there are instances where the username is stored in userid
 274+ tag and hence that's a string and not an integer.
 275+ '''
 276+ try:
 277+ return int(id) % 500
 278+ except:
 279+ return sum([ord(i) for i in id]) % 500
 280+
 281+if __name__ == '__main__':
 282+ project = 'wiki'
 283+ language_code = 'en'
 284+ parse_dumpfile(project, language_code)
Index: trunk/tools/editor_trends/etl/models.py
@@ -27,7 +27,7 @@
2828
2929 from utils import models
3030 from utils import utils
31 -from wikitree import xml
 31+import wikitree
3232
3333 class TXTFile(object):
3434
Index: trunk/tools/editor_trends/configuration.py
@@ -57,7 +57,7 @@
5858 #Change this to match your computers configuration (RAM / CPU)
5959 self.minimum_python_version = (2, 6)
6060 self.wp_dump_location = 'http://download.wikimedia.org'
61 - self.xml_namespace = 'http://www.mediawiki.org/xml/export-0.3/'
 61+ self.xml_namespace = 'http://www.mediawiki.org/xml/export-0.4/'
6262 self.ascii_extensions = ['txt', 'csv', 'xml', 'sql', 'json']
6363 self.windows_register = {'7z.exe': 'Software\\7-Zip', }
6464 #Extensions of ascii files, this is used to determine the filemode to use
Index: trunk/tools/editor_trends/bots/bots.py
@@ -28,7 +28,7 @@
2929
3030 import configuration
3131 settings = configuration.Settings()
32 -from wikitree import xml
 32+import wikitree
3333 from database import db
3434 from utils import utils
3535 #from etl import extract

Follow-up revisions

RevisionCommit summaryAuthorDate
r78583Followup r78582, svn:eol-style nativereedy22:19, 18 December 2010

Status & tagging log