r102811 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r102810‎ | r102811 | r102812 >
Date:20:07, 11 November 2011
Author:khorn
Status:ok
Tags:
Comment:
After confirming with metrics, made changes that filter out a whole lot of garbage.
r102746
Modified paths:
  • /trunk/udplog/filters/lp-filter.c (modified) (history)

Diff [purge]

Index: trunk/udplog/filters/lp-filter.c
@@ -2,20 +2,23 @@
33 #include <stdio.h>
44 #include <string.h>
55
6 -char url1[] ="wikimediafoundation.org/";
7 -char url2[] ="donate.wikimedia.org/";
 6+char * urls[] = {
 7+ "wikimediafoundation.org/",
 8+ "donate.wikimedia.org/",
 9+};
810
9 -
1011 main() {
1112 char line[65534];
12 - char title[65534];
1313 char *t = 0;
14 - char *u = 0;
1514
16 - //to cut down on the processing time: Guess first.
17 - //longest filter is 25 characters.
18 - //I'm allowing for one heck of a subdomain, here.
19 - int search_length = 75;
 15+ int i = 0;
 16+
 17+ //calculate these up-front.
 18+ int filtercount = sizeof(urls)/sizeof(*urls);
 19+ int url_length[filtercount];
 20+ for (i=0; i<filtercount; ++i){
 21+ url_length[i] = strlen(urls[i]);
 22+ }
2023
2124 while (!feof(stdin)) {
2225 char *r;
@@ -24,7 +27,7 @@
2528 int pos=0;
2629 t = line;
2730
28 - while(pos++<8) {
 31+ while(pos++<7) {
2932 if (!t)
3033 continue;
3134 t = strstr(t, " ");
@@ -34,25 +37,22 @@
3538 }
3639 if (!t)
3740 continue;
38 -
39 - strncpy(title, t, search_length);
40 - title[search_length]=0;
41 -
42 - if (strstr(title, url1) || strstr(title, url2) ){
43 - u = strstr(title, " ");
44 -
45 - if (!u){ //no spaces, just do it.
46 - printf("%s", line);
47 - } else {
48 - //make sure it was before the first space.
49 - t = strstr(title, url1);
50 - if (!t)
51 - t = strstr(title, url2);
52 - if ( (t) && t < u ) {
53 - printf("%s", line);
54 - }
55 - }
 41+
 42+ t = strstr(t, "://");
 43+ if (!t)
 44+ continue;
 45+ t += 3;
 46+
 47+ int found = 0;
 48+ for (i = 0; i < filtercount; ++i) {
 49+ if (strncmp(t, urls[i], url_length[i]) == 0) {
 50+ found = 1;
 51+ break;
 52+ }
5653 }
 54+ if (found) {
 55+ printf("%s", line);
 56+ }
5757
5858 }
5959

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r102746Adding donate.wikimedia.org to the landing page filters (and slightly improve...khorn03:25, 11 November 2011

Status & tagging log