Index: trunk/udplog/filters/lp-filter.c |
— | — | @@ -2,20 +2,23 @@ |
3 | 3 | #include <stdio.h> |
4 | 4 | #include <string.h> |
5 | 5 | |
6 | | -char url1[] ="wikimediafoundation.org/"; |
7 | | -char url2[] ="donate.wikimedia.org/"; |
| 6 | +char * urls[] = { |
| 7 | + "wikimediafoundation.org/", |
| 8 | + "donate.wikimedia.org/", |
| 9 | +}; |
8 | 10 | |
9 | | - |
10 | 11 | main() { |
11 | 12 | char line[65534]; |
12 | | - char title[65534]; |
13 | 13 | char *t = 0; |
14 | | - char *u = 0; |
15 | 14 | |
16 | | - //to cut down on the processing time: Guess first. |
17 | | - //longest filter is 25 characters. |
18 | | - //I'm allowing for one heck of a subdomain, here. |
19 | | - int search_length = 75; |
| 15 | + int i = 0; |
| 16 | + |
| 17 | + //calculate these up-front. |
| 18 | + int filtercount = sizeof(urls)/sizeof(*urls); |
| 19 | + int url_length[filtercount]; |
| 20 | + for (i=0; i<filtercount; ++i){ |
| 21 | + url_length[i] = strlen(urls[i]); |
| 22 | + } |
20 | 23 | |
21 | 24 | while (!feof(stdin)) { |
22 | 25 | char *r; |
— | — | @@ -24,7 +27,7 @@ |
25 | 28 | int pos=0; |
26 | 29 | t = line; |
27 | 30 | |
28 | | - while(pos++<8) { |
| 31 | + while(pos++<7) { |
29 | 32 | if (!t) |
30 | 33 | continue; |
31 | 34 | t = strstr(t, " "); |
— | — | @@ -34,25 +37,22 @@ |
35 | 38 | } |
36 | 39 | if (!t) |
37 | 40 | continue; |
38 | | - |
39 | | - strncpy(title, t, search_length); |
40 | | - title[search_length]=0; |
41 | | - |
42 | | - if (strstr(title, url1) || strstr(title, url2) ){ |
43 | | - u = strstr(title, " "); |
44 | | - |
45 | | - if (!u){ //no spaces, just do it. |
46 | | - printf("%s", line); |
47 | | - } else { |
48 | | - //make sure it was before the first space. |
49 | | - t = strstr(title, url1); |
50 | | - if (!t) |
51 | | - t = strstr(title, url2); |
52 | | - if ( (t) && t < u ) { |
53 | | - printf("%s", line); |
54 | | - } |
55 | | - } |
| 41 | + |
| 42 | + t = strstr(t, "://"); |
| 43 | + if (!t) |
| 44 | + continue; |
| 45 | + t += 3; |
| 46 | + |
| 47 | + int found = 0; |
| 48 | + for (i = 0; i < filtercount; ++i) { |
| 49 | + if (strncmp(t, urls[i], url_length[i]) == 0) { |
| 50 | + found = 1; |
| 51 | + break; |
| 52 | + } |
56 | 53 | } |
| 54 | + if (found) { |
| 55 | + printf("%s", line); |
| 56 | + } |
57 | 57 | |
58 | 58 | } |
59 | 59 | |