r110799 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r110798‎ | r110799 | r110800 >
Date:22:53, 6 February 2012
Author:rfaulk
Status:deferred
Tags:nodeploy 
Comment:
Added functionality to process posting revisions from a .tsv
Modified paths:
  • /trunk/tools/wsor/message_templates/umetrics/postings.py (modified) (history)

Diff [purge]

Index: trunk/tools/wsor/message_templates/umetrics/postings.py
@@ -99,7 +99,7 @@
100100 '-a', '--api_uri',
101101 type=str,
102102 help='the mediawiki API to connect to in order to retrieve message content (defaults to http://en.wikipedia.org/w/api.php)',
103 - default="http://en.wikipedia.org/w/api.php"
 103+ default="http://pt.wikipedia.org/w/api.php"
104104 )
105105 parser.add_argument(
106106 '--start',
@@ -145,6 +145,13 @@
146146 help='the output file name.',
147147 default=''
148148 )
 149+ parser.add_argument(
 150+ '--use_in_file',
 151+ type=str,
 152+ help='indicates that revisions should be read from a file. Name is to be specified.',
 153+ default=''
 154+ )
 155+
149156 args = parser.parse_args()
150157
151158 LOGGING_STREAM = sys.stderr
@@ -185,17 +192,45 @@
186193 logging.info("Querying for matching revisions:")
187194 revs = []
188195 count = 0
189 - for rev in db.getPostings(args.start, args.end, args.user_name, args.comment):
190 - count += 1
191 - revs.append(rev)
192 - if count % 100 == 0: LOGGING_STREAM.write("|")
193196
 197+ # Process input from file if args.use_in_file is not an empty string - otherwise use the enwiki slave
 198+ if cmp(args.use_in_file,'') != 0:
 199+ in_file = open(args.use_in_file, 'rb')
 200+
 201+ line = in_file.readline()
 202+ cols = line.split('\t')
 203+ cols[len(cols) - 1] = cols[len(cols) - 1][:-1]
 204+
 205+ line = in_file.readline()
 206+ while(line):
 207+ entry = dict()
 208+
 209+ elems = line.split('\t')
 210+ elems[len(cols) - 1] = elems[len(cols) - 1][:-1]
 211+ index = 0
 212+ try:
 213+ for col_name in cols:
 214+ entry[col_name] = elems[index]
 215+ index = index + 1
 216+
 217+ revs.append(entry)
 218+ except:
 219+ logging.info('Could not add row: %s' % str(elems))
 220+
 221+ line = in_file.readline()
 222+ else:
 223+ for rev in db.getPostings(args.start, args.end, args.user_name, args.comment):
 224+ count += 1
 225+ revs.append(rev)
 226+ if count % 100 == 0: LOGGING_STREAM.write("|")
 227+
194228 LOGGING_STREAM.write("\n")
195 -
 229+
196230 logging.info("Checking for message templates")
197231 count = {"matched": 0, "missed": 0}
198232 for rev in revs:
199233 logging.debug("Matching revision %(rev_id)s peformed by %(poster_name)s @ %(timestamp)s: %(rev_comment)s" % rev)
 234+
200235 message = api.getAdded(rev['rev_id'])
201236
202237 match = args.message.search(message)

Status & tagging log