Index: trunk/tools/wsor/message_templates/umetrics/postings.py |
— | — | @@ -99,7 +99,7 @@ |
100 | 100 | '-a', '--api_uri', |
101 | 101 | type=str, |
102 | 102 | help='the mediawiki API to connect to in order to retrieve message content (defaults to http://en.wikipedia.org/w/api.php)', |
103 | | - default="http://en.wikipedia.org/w/api.php" |
| 103 | + default="http://pt.wikipedia.org/w/api.php" |
104 | 104 | ) |
105 | 105 | parser.add_argument( |
106 | 106 | '--start', |
— | — | @@ -145,6 +145,13 @@ |
146 | 146 | help='the output file name.', |
147 | 147 | default='' |
148 | 148 | ) |
| 149 | + parser.add_argument( |
| 150 | + '--use_in_file', |
| 151 | + type=str, |
| 152 | + help='indicates that revisions should be read from a file. Name is to be specified.', |
| 153 | + default='' |
| 154 | + ) |
| 155 | + |
149 | 156 | args = parser.parse_args() |
150 | 157 | |
151 | 158 | LOGGING_STREAM = sys.stderr |
— | — | @@ -185,17 +192,45 @@ |
186 | 193 | logging.info("Querying for matching revisions:") |
187 | 194 | revs = [] |
188 | 195 | count = 0 |
189 | | - for rev in db.getPostings(args.start, args.end, args.user_name, args.comment): |
190 | | - count += 1 |
191 | | - revs.append(rev) |
192 | | - if count % 100 == 0: LOGGING_STREAM.write("|") |
193 | 196 | |
| 197 | + # Process input from file if args.use_in_file is not an empty string - otherwise use the enwiki slave |
| 198 | + if cmp(args.use_in_file,'') != 0: |
| 199 | + in_file = open(args.use_in_file, 'rb') |
| 200 | + |
| 201 | + line = in_file.readline() |
| 202 | + cols = line.split('\t') |
| 203 | + cols[len(cols) - 1] = cols[len(cols) - 1][:-1] |
| 204 | + |
| 205 | + line = in_file.readline() |
| 206 | + while(line): |
| 207 | + entry = dict() |
| 208 | + |
| 209 | + elems = line.split('\t') |
| 210 | + elems[len(cols) - 1] = elems[len(cols) - 1][:-1] |
| 211 | + index = 0 |
| 212 | + try: |
| 213 | + for col_name in cols: |
| 214 | + entry[col_name] = elems[index] |
| 215 | + index = index + 1 |
| 216 | + |
| 217 | + revs.append(entry) |
| 218 | + except: |
| 219 | + logging.info('Could not add row: %s' % str(elems)) |
| 220 | + |
| 221 | + line = in_file.readline() |
| 222 | + else: |
| 223 | + for rev in db.getPostings(args.start, args.end, args.user_name, args.comment): |
| 224 | + count += 1 |
| 225 | + revs.append(rev) |
| 226 | + if count % 100 == 0: LOGGING_STREAM.write("|") |
| 227 | + |
194 | 228 | LOGGING_STREAM.write("\n") |
195 | | - |
| 229 | + |
196 | 230 | logging.info("Checking for message templates") |
197 | 231 | count = {"matched": 0, "missed": 0} |
198 | 232 | for rev in revs: |
199 | 233 | logging.debug("Matching revision %(rev_id)s peformed by %(poster_name)s @ %(timestamp)s: %(rev_comment)s" % rev) |
| 234 | + |
200 | 235 | message = api.getAdded(rev['rev_id']) |
201 | 236 | |
202 | 237 | match = args.message.search(message) |