Index: trunk/udplog/filters/lp-filter.c |
— | — | @@ -2,18 +2,24 @@ |
3 | 3 | #include <stdio.h> |
4 | 4 | #include <string.h> |
5 | 5 | |
6 | | -char burl[]="wikimediafoundation.org/"; |
| 6 | +char url1[] ="wikimediafoundation.org/"; |
| 7 | +char url2[] ="donate.wikimedia.org/"; |
7 | 8 | |
8 | 9 | |
9 | 10 | main() { |
10 | | - char line[10240]; |
11 | | - char title[10240]; |
12 | | - char *urlstart, *urlend; |
| 11 | + char line[65534]; |
| 12 | + char title[65534]; |
13 | 13 | char *t = 0; |
| 14 | + char *u = 0; |
| 15 | + |
| 16 | + //to cut down on the processing time: Guess first. |
| 17 | + //longest filter is 25 characters. |
| 18 | + //I'm allowing for one heck of a subdomain, here. |
| 19 | + int search_length = 75; |
14 | 20 | |
15 | 21 | while (!feof(stdin)) { |
16 | 22 | char *r; |
17 | | - r=fgets(line, 10000, stdin); |
| 23 | + r=fgets(line, 65534, stdin); |
18 | 24 | |
19 | 25 | int pos=0; |
20 | 26 | t = line; |
— | — | @@ -28,13 +34,26 @@ |
29 | 35 | } |
30 | 36 | if (!t) |
31 | 37 | continue; |
32 | | - urlstart = t; |
33 | | - urlend = strstr(urlstart, " "); |
34 | | - strncpy(title, urlstart, urlend-urlstart); |
35 | | - title[urlend-urlstart]=0; |
36 | | - if (strstr(title, burl) ) |
37 | | - printf("%s", line); |
38 | 38 | |
| 39 | + strncpy(title, t, search_length); |
| 40 | + title[search_length]=0; |
| 41 | + |
| 42 | + if (strstr(title, url1) || strstr(title, url2) ){ |
| 43 | + u = strstr(title, " "); |
| 44 | + |
| 45 | + if (!u){ //no spaces, just do it. |
| 46 | + printf("%s", line); |
| 47 | + } else { |
| 48 | + //make sure it was before the first space. |
| 49 | + t = strstr(title, url1); |
| 50 | + if (!t) |
| 51 | + t = strstr(title, url2); |
| 52 | + if ( (t) && t < u ) { |
| 53 | + printf("%s", line); |
| 54 | + } |
| 55 | + } |
| 56 | + } |
| 57 | + |
39 | 58 | } |
40 | 59 | |
41 | 60 | } |