r110264 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r110263‎ | r110264 | r110265 >
Date:03:38, 30 January 2012
Author:rfaulk
Status:deferred
Tags:
Comment:
cleaned up implementation to reference template indexes for processing more easily
Modified paths:
  • /trunk/tools/wsor/message_templates/run_postings_and_metrics.py (modified) (history)

Diff [purge]

Index: trunk/tools/wsor/message_templates/run_postings_and_metrics.py
@@ -30,52 +30,59 @@
3131 Execution body of main
3232 """
3333 def main(args):
 34+
 35+ # Set flags for template indices to process
 36+ template_indices = {1 : False, 2 : False, 3 : False,
 37+ 60 : False, 61 : False, 62 : False, 63 : False, 64 : False, 65 : False, 66 : False, 67 : False, 68 : False, 69 : False,
 38+ 70 : False, 71 : False, 72 : False, 73 : False, 74 : False, 75 : False, 76 : False, 77 : False,
 39+ 84 : False, 85 : False, 86 : False, 99 : False, 100 : False, 101 : False, 102 : False, 103 : False, 104 : False,
 40+ 105 : False, 106 : False, 107 : False, 108 : False, 109 : False, 110 : False, 111 : False, 112 : False, 113 : False,
 41+ 114 : False, 115 : False, 116 : False, 78 : False, 79 : False, 81 : False, 82 : False, 87 : False, 88 : False,
 42+ 89 : True, 90 : True, 91 : True, 92 : True, 93 : True, 94 : True, 95 : True, 96 : True, 97 : True, 98 : True
 43+ }
3444
35 - test_keys = ['Huggle_3_z60', 'Huggle_3_z61', 'Huggle_3_z62', 'Huggle_3_z63', 'Huggle_3_z64', 'Huggle_3_z65', 'Huggle_3_z66', \
36 - 'Huggle_3_z67', 'Huggle_3_z68', 'Huggle_3_z69', 'Huggle_3_z70', 'Huggle_3_z71', 'Huggle_3_z72', 'Huggle_3_z73', \
37 - 'Huggle_3_z74', 'Huggle_3_z75', 'Huggle_3_z76', \
38 - 'Huggle_3_z77', 'Huggle_3_z1', 'Huggle_3_z2', 'Huggle_3_z3'] # "Huggle 1 Portugese"
39 -
40 - gather_data = {'Huggle_3_z60' : False, 'Huggle_3_z61' : False, 'Huggle_3_z62' : False, 'Huggle_3_z63' : False, 'Huggle_3_z64' : False, 'Huggle_3_z65' : False, 'Huggle_3_z66' : False, 'Huggle_3_z67' : False, \
41 - 'Huggle_3_z68' : False, 'Huggle_3_z69' : False, 'Huggle_3_z70' : False, 'Huggle_3_z71' : False, 'Huggle_3_z72' : False, 'Huggle_3_z73' : False, 'Huggle_3_z74' : False, 'Huggle_3_z75' : False, \
42 - 'Huggle_3_z76' : False, 'Huggle_3_z77' : False, \
43 - 'Huggle_3_z1' : True, 'Huggle_3_z2' : True, 'Huggle_3_z3' : True}
44 -
45 - templates = {'Huggle_3_z60' : 'z60', 'Huggle_3_z61' : 'z61', 'Huggle_3_z62' : 'z62', 'Huggle_3_z63' : 'z63', 'Huggle_3_z64' : 'z64', 'Huggle_3_z65' : 'z65', 'Huggle_3_z66' : 'z66', 'Huggle_3_z67' : 'z67', \
46 - 'Huggle_3_z68' : 'z68', 'Huggle_3_z69' : 'z69', 'Huggle_3_z70' : 'z70', 'Huggle_3_z71' : 'z71', 'Huggle_3_z72' : 'z72', 'Huggle_3_z73' : 'z73', 'Huggle_3_z74' : 'z74', 'Huggle_3_z75' : 'z75', \
47 - 'Huggle_3_z76' : 'z76', 'Huggle_3_z77' : 'z77', \
48 - 'Huggle_3_z1' : 'z1', 'Huggle_3_z2' : 'z2', 'Huggle_3_z3' : 'z3'}
49 -
50 - start_times = {'Huggle_3_z60' : '20111018000000', 'Huggle_3_z61' : '20111018000000', 'Huggle_3_z62' : '20111018000000', 'Huggle_3_z63' : '20111018000000', 'Huggle_3_z64' : '20111018000000', 'Huggle_3_z65' : '20111018000000', \
51 - 'Huggle_3_z66' : '20111018000000', 'Huggle_3_z67' : '20111018000000', 'Huggle_3_z68' : '20111018000000', 'Huggle_3_z69' : '20111018000000', 'Huggle_3_z70' : '20111018000000', 'Huggle_3_z71' : '20111018000000', \
52 - 'Huggle_3_z72' : '20111018000000', 'Huggle_3_z73' : '20111018000000', 'Huggle_3_z74' : '20111018000000', 'Huggle_3_z75' : '20111018000000', 'Huggle_3_z76' : '20111018000000', 'Huggle_3_z77' : '20111018000000', \
53 - 'Huggle_3_z1' : '20111027000000', 'Huggle_3_z2' : '20111027000000', 'Huggle_3_z3' : '20111027000000'}
54 -
55 - end_times = {'Huggle_3_z60' : '20111119000000', 'Huggle_3_z61' : '20111119000000', 'Huggle_3_z62' : '20111119000000', 'Huggle_3_z63' : '20111119000000', 'Huggle_3_z64' : '20111119000000', 'Huggle_3_z65' : '20111119000000', \
56 - 'Huggle_3_z66' : '20111119000000', 'Huggle_3_z67' : '20111119000000', 'Huggle_3_z68' : '20111119000000', 'Huggle_3_z69' : '20111119000000', 'Huggle_3_z70' : '20111119000000', 'Huggle_3_z71' : '20111119000000', \
57 - 'Huggle_3_z72' : '20111119000000', 'Huggle_3_z73' : '20111119000000', 'Huggle_3_z74' : '20111119000000', 'Huggle_3_z75' : '20111119000000', 'Huggle_3_z76' : '20111119000000', 'Huggle_3_z77' : '20111119000000',
58 - 'Huggle_3_z1' : '20111128000000', 'Huggle_3_z2' : '20111128000000', 'Huggle_3_z3' : '20111128000000'}
59 -
6045 # Run postings and metrics
6146
62 - generator = 'editcounts'
 47+ generator = 'editcount'
6348 postings_cmd = './postings -h db1047 --start=%(start_time)s --end=%(end_time)s --comment="\(\[\[WP:HG\|HG\]\]\)" --message="{{%(template)s}}" --outfilename postings_%(file_name)s.tsv'
64 - metrics_cmd = 'cat ./output/postings_%(file_name)s.tsv | ./metrics -h db1047 --header --outfilename metrics_%(file_name)s.tsv %(generator)s'
 49+ metrics_cmd = 'cat ./output/postings_%(file_name)s.tsv | ./metrics -h db1047 --header --outfilename metrics_%(file_name)s_%(fname_generator)s.tsv %(generator)s'
6550
66 - for key in test_keys:
67 -
68 - if gather_data[key]:
69 - logging.info('Generating postings for %s' % key)
70 - filename_part = start_times[key][4:8] + '_' + end_times[key][4:8] + '_' + templates[key]
 51+ for key in template_indices:
 52+
 53+ name, start_ts, end_ts = get_experiment(key)
 54+ template_name = 'z' + str(key)
 55+
 56+ if template_indices[key]:
 57+
 58+ logging.info('Generating postings for %s' % template_name)
 59+ filename_part = start_ts[4:8] + '_' + end_ts[4:8] + '_' + template_name
7160
72 - # os.system(postings_cmd % {'start_time' : start_times[key], 'end_time' : end_times[key], 'template' : templates[key], 'file_name' : filename_part})
73 - os.system(metrics_cmd % {'file_name' : filename_part, 'file_name' : filename_part, 'generator' : generator})
 61+ os.system(postings_cmd % {'start_time' : start_ts, 'end_time' : end_ts, 'template' : template_name, 'file_name' : filename_part})
 62+ # os.system(metrics_cmd % {'file_name' : filename_part, 'file_name' : filename_part, 'generator' : generator, 'fname_generator' : generator})
7463 else:
75 - logging.info('Skipping postings for %s' % key)
 64+ logging.info('Skipping postings for %s' % template_name)
7665
7766 return 0
7867
7968 """
 69+ Returns the experiment name and start and end timestamps corresponding to the key
 70+"""
 71+def get_experiment(index):
 72+
 73+ if index >= 60 and index <= 77:
 74+ return 'Huggle_3', '20111018000000', '20111119000000'
 75+ elif index >= 1 and index <= 3:
 76+ return 'Huggle_1_Portuguese', '20111027000000', '20111128000000'
 77+ elif (index >= 84 and index <= 86) or (index >= 99 and index <= 106):
 78+ return 'Huggle_Short_1_and_2', '20111108000000', '20111202000000'
 79+ elif index >= 107 and index <= 116:
 80+ return 'Huggle_Short_2', '20111122000000', '20111222000000'
 81+ elif (index >= 77 and index <= 79) or (index >= 81 and index <= 82):
 82+ return 'Twinkle_1', '20111109000000', '20111209000000'
 83+ elif index >= 87 and index <= 98:
 84+ return 'XLinkBot', '20111117000000', '20111217000000'
 85+
 86+"""
8087 Call main, exit when execution is complete
8188
8289 """

Status & tagging log