r76834 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r76833‎ | r76834 | r76835 >
Date:20:47, 16 November 2010
Author:diederik
Status:deferred
Tags:
Comment:
Refactored settings module.
Modified paths:
  • /trunk/tools/editor_trends/configuration.py (added) (history)

Diff [purge]

Index: trunk/tools/editor_trends/configuration.py
@@ -0,0 +1,168 @@
 2+#!/usr/bin/python
 3+# -*- coding: utf-8 -*-
 4+'''
 5+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
 6+This program is free software; you can redistribute it and/or
 7+modify it under the terms of the GNU General Public License version 2
 8+as published by the Free Software Foundation.
 9+This program is distributed in the hope that it will be useful,
 10+but WITHOUT ANY WARRANTY; without even the implied warranty of
 11+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 12+See the GNU General Public License for more details, at
 13+http://www.fsf.org/licenses/gpl.html
 14+'''
 15+
 16+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
 17+__author__email = 'dvanliere at gmail dot com'
 18+__date__ = '2010-10-21'
 19+__version__ = '0.1'
 20+
 21+'''
 22+This file contains settings that are used for constructing and analyzing
 23+the datasets as part of the Editor Dynamics and Anti-Vandalism projects.
 24+'''
 25+
 26+from multiprocessing import cpu_count
 27+import os
 28+import sys
 29+import platform
 30+
 31+try:
 32+ from _winreg import *
 33+ from pywin import win32file
 34+ '''increase the maximum number of open files on Windows to 1024'''
 35+ win32file._setmaxstdio(1024)
 36+except ImportError:
 37+ pass
 38+
 39+try:
 40+ import resource
 41+except ImportError:
 42+ pass
 43+
 44+class Settings(object):
 45+
 46+ def __init__(self, debug=True, process_multiplier=1):
 47+ self.debug = debug
 48+ self.progressbar = True
 49+ self.encoding = 'utf-8'
 50+ self.date_format = '%Y-%m-%d' #Date format as used by Erik Zachte
 51+ self.timestamp_format = '%Y-%m-%dT%H:%M:%SZ' # Timestampformat as generated by the MediaWiki dumps
 52+
 53+ self.max_settings_xmlfile_size = 67108864 # ==64Mb, see http://hadoop.apache.org/common/docs/r0.20.0/hdfs_design.html#Large+Data+Setsfor reason
 54+ self.number_of_processes = cpu_count() * process_multiplier
 55+ #Change this to match your computers configuration (RAM / CPU)
 56+ self.minimum_python_version = (2, 6)
 57+ self.wp_dump_location = 'http://download.wikimedia.org'
 58+ self.xml_namespace = 'http://www.mediawiki.org/xml/export-0.4/'
 59+ self.ascii_extensions = ['txt', 'csv', 'xml', 'sql', 'json']
 60+ #Extensions of ascii files, this is used to determine the filemode to use
 61+ self.compression_extensions = ['gz', 'bz2', '7z']
 62+ self.platform = self.determine_platform()
 63+ self.architecture = platform.machine()
 64+ self.working_directory = self.determine_working_directory()
 65+ self.update_python_path()
 66+
 67+ self.root = '/' if self.platform != 'Windows' else 'c:\\'
 68+ self.ziptool = self.determine_ziptool()
 69+ self.file_locations = self.set_file_locations()
 70+ self.max_filehandles = self.determine_max_filehandles_open()
 71+
 72+ self.windows_register= {'7zip': 'Software\\7-Zip',}
 73+
 74+ self.projects= {'commons': 'commonswiki',
 75+ 'wikibooks': 'wikibooks',
 76+ 'wikinews': 'wikinews',
 77+ 'wikiquote': 'wikiquote',
 78+ 'wikisource': 'wikisource',
 79+ 'wikiversity': 'wikiversity',
 80+ 'wiktionary': 'wiktionary',
 81+ 'metawiki': 'metawiki',
 82+ 'wikispecies': 'specieswiki',
 83+ 'incubator': 'incubatorwiki',
 84+ 'foundation': 'foundationwiki',
 85+ 'mediawiki': 'mediawikiwiki',
 86+ 'outreach': 'outreachwiki',
 87+ 'strategic planning': 'strategywiki',
 88+ 'usability initiative': 'usabilitywiki',
 89+ 'multilingual wikisource': None
 90+ }
 91+
 92+
 93+ def determine_working_directory(self):
 94+ cwd = os.getcwd()
 95+ if not cwd.endswith('editor_trends%s' % os.sep):
 96+ pos = cwd.find('editor_trends') + 14
 97+ cwd = cwd[:pos]
 98+ return cwd
 99+
 100+ def determine_platform(self):
 101+ #Setting up the environment
 102+ ops = {platform.win32_ver: 'Windows',
 103+ platform.linux_distribution: 'Linux',
 104+ platform.mac_ver: 'OSX'}
 105+ for op in ops:
 106+ if op() != ('', '', '') and op() != ('', ('', '', ''), ''):
 107+ return ops[op]
 108+ return None
 109+
 110+ def verify_environment(self, directories):
 111+ for dir in directories:
 112+ result = os.path.exists(dir)
 113+ if not result:
 114+ try:
 115+ os.mkdir(dir)
 116+ except IOError:
 117+ raise 'Configuration Error, could not create directory.'
 118+
 119+ def detect_windows_program(self, program):
 120+ entry = self.windows_register[program]
 121+ try:
 122+ key = OpenKey(HKEY_CURRENT_USER, entry, 0, KEY_READ)
 123+ return QueryValueEx(key, 'Path')[0]
 124+ except WindowsError:
 125+ return None
 126+
 127+
 128+ def detect_installed_program(self, program):
 129+ if self.platform == 'Windows':
 130+ path = self.detect_windows_program(program)
 131+ return path
 132+ else:
 133+ raise NotImplementedError
 134+
 135+ def determine_max_filehandles_open(self):
 136+ if self.platform == 'Windows' and self.architecture == 'i386':
 137+ return win32file._getmaxstdio()
 138+ elif self.platform != 'Windows':
 139+ return resource.getrlimit(resource.RLIMIT_NOFILE)[0]
 140+ else:
 141+ return 500
 142+ def update_python_path(self):
 143+ IGNORE_DIRS = ['wikistats', 'zips']
 144+ dirs = [name for name in os.listdir(self.working_directory) if
 145+ os.path.isdir(os.path.join(self.working_directory, name))]
 146+ for subdirname in dirs:
 147+ if not subdirname.startswith('.') and subdirname not in IGNORE_DIRS:
 148+ sys.path.append(os.path.join(self.working_directory,
 149+ subdirname))
 150+
 151+ def determine_ziptool(self):
 152+ tools = {'OSX': None,
 153+ 'Windows': '7z.exe',
 154+ 'Linux': None}
 155+ return tools[self.platform]
 156+
 157+ def set_file_locations(self):
 158+ self.input_location = os.path.join(self.root, 'wikimedia')
 159+ self.input_filename = os.path.join(self.input_location, 'en',
 160+'wiki', 'enwiki-20100916-stub-meta-history.xml') # Default Input file
 161+ self.log_location = os.path.join(self.working_directory,
 162+'logs') # This is the place where error messages are stored for debugging purposes
 163+ self.csv_location = os.path.join(self.working_directory,
 164+'data', 'csv')
 165+ self.dataset_location = os.path.join(self.working_directory, 'datasets')
 166+ self.binary_location = os.path.join(self.working_directory,
 167+'data', 'objects')
 168+ self.namespace_location = os.path.join(self.working_directory,
 169+'namespaces')
Property changes on: trunk/tools/editor_trends/configuration.py
___________________________________________________________________
Added: svn:eol-style
1170 + native
Added: svn:mime-type
2171 + text/plain

Status & tagging log