Index: branches/ariel/xmldumps-backup/writeuptopageid.c |
— | — | @@ -11,10 +11,11 @@ |
12 | 12 | #define MAXHEADERLEN 524289 |
13 | 13 | |
14 | 14 | void usage(char *me) { |
15 | | - fprintf(stderr,"Usage: %s pageID\n",me); |
16 | | - fprintf(stderr,"Copies the contents of an XML file up to but not including\n"); |
17 | | - fprintf(stderr,"the specified pageID. This program is used in processing XML\n"); |
18 | | - fprintf(stderr,"dump files that were only partially written.\n"); |
| 15 | + fprintf(stderr,"Usage: %s startPageID endPageID\n",me); |
| 16 | + fprintf(stderr,"Copies the contents of an XML file starting with and including startPageID\n"); |
| 17 | + fprintf(stderr,"and up to but not including endPageID. This program is used in processing XML\n"); |
| 18 | + fprintf(stderr,"dump files that were only partially written, as well as in writing partial\n"); |
| 19 | + fprintf(stderr,"stub files for reruns of those dump files.\n"); |
19 | 20 | } |
20 | 21 | |
21 | 22 | /* note that even if we have only read a partial line |
— | — | @@ -24,7 +25,7 @@ |
25 | 26 | in the page text. |
26 | 27 | |
27 | 28 | returns new state */ |
28 | | -States setState (char *line, States currentState, int endPageID) { |
| 29 | +States setState (char *line, States currentState, int startPageID, int endPageID) { |
29 | 30 | int pageID = 0; |
30 | 31 | |
31 | 32 | if (!strncmp(line,"<mediawiki",10)) { |
— | — | @@ -37,18 +38,28 @@ |
38 | 39 | else if (currentState == StartPage && (!strncmp(line, "<id>", 4))) { |
39 | 40 | /* dig the id out, format is <id>num</id> */ |
40 | 41 | pageID = atoi(line+4); |
41 | | - if (pageID == endPageID) { |
| 42 | + if (pageID >= endPageID) { |
42 | 43 | return(AtLastPageID); |
43 | 44 | } |
44 | | - else { |
| 45 | + else if (pageID >= startPageID) { |
45 | 46 | return(WriteMem); |
46 | 47 | } |
| 48 | + else { |
| 49 | + /* we don't write anything */ |
| 50 | + return(None); |
| 51 | + } |
47 | 52 | } |
48 | 53 | else if (currentState == WriteMem) { |
49 | 54 | return(Write); |
50 | 55 | } |
51 | 56 | else if (!strncmp(line, "</page>", 6)) { |
52 | | - return(EndPage); |
| 57 | + if (currentState == Write) { |
| 58 | + return(EndPage); |
| 59 | + } |
| 60 | + else { |
| 61 | + /* don't write anything */ |
| 62 | + return(None); |
| 63 | + } |
53 | 64 | } |
54 | 65 | return(currentState); |
55 | 66 | } |
— | — | @@ -59,11 +70,17 @@ |
60 | 71 | |
61 | 72 | if (state == WriteMem) { |
62 | 73 | res = fwrite(mem,strlen(mem),1,stdout); |
63 | | - mem[0]='\0'; |
64 | 74 | return(res); |
65 | 75 | } |
66 | 76 | } |
67 | 77 | |
| 78 | +void clearMemoryIfNeeded(char *mem, States state) { |
| 79 | + if (state == WriteMem || state == None) { |
| 80 | + mem[0]='\0'; |
| 81 | + } |
| 82 | + return; |
| 83 | +} |
| 84 | + |
68 | 85 | /* returns 1 on success, 0 on error */ |
69 | 86 | int writeIfNeeded(char *line, States state) { |
70 | 87 | if (state == StartHeader || state == WriteMem || state == Write || state == EndPage) { |
— | — | @@ -86,7 +103,8 @@ |
87 | 104 | } |
88 | 105 | |
89 | 106 | int main(int argc,char **argv) { |
90 | | - long int pageID = 0; |
| 107 | + long int startPageID = 0; |
| 108 | + long int endPageID = 0; |
91 | 109 | char *nonNumeric = 0; |
92 | 110 | States state = None; |
93 | 111 | char *text; |
— | — | @@ -97,27 +115,36 @@ |
98 | 116 | length of time. */ |
99 | 117 | char mem[MAXHEADERLEN]; |
100 | 118 | |
101 | | - if (argc != 2) { |
| 119 | + if (argc != 3) { |
102 | 120 | usage(argv[0]); |
103 | 121 | exit(-1); |
104 | 122 | } |
105 | 123 | |
106 | 124 | errno = 0; |
107 | | - pageID = strtol(argv[1], &nonNumeric, 10); |
108 | | - if (pageID == 0 || |
| 125 | + startPageID = strtol(argv[1], &nonNumeric, 10); |
| 126 | + if (startPageID == 0 || |
109 | 127 | *nonNumeric != 0 || |
110 | | - nonNumeric == (char *) &pageID || |
| 128 | + nonNumeric == (char *) &startPageID || |
111 | 129 | errno != 0) { |
112 | | - fprintf (stderr,"The value you entered for pageID must be a positive integer.\n"); |
| 130 | + fprintf (stderr,"The value you entered for startPageID must be a positive integer.\n"); |
113 | 131 | usage(argv[0]); |
114 | 132 | exit(-1); |
115 | 133 | } |
| 134 | + endPageID = strtol(argv[2], &nonNumeric, 10); |
| 135 | + if (endPageID == 0 || |
| 136 | + *nonNumeric != 0 || |
| 137 | + nonNumeric == (char *) &endPageID || |
| 138 | + errno != 0) { |
| 139 | + fprintf (stderr,"The value you entered for endPageID must be a positive integer.\n"); |
| 140 | + usage(argv[0]); |
| 141 | + exit(-1); |
| 142 | + } |
116 | 143 | |
117 | 144 | while (fgets(line, sizeof(line)-1, stdin) != NULL) { |
118 | 145 | text=line; |
119 | 146 | while (*text && isspace(*text)) |
120 | 147 | text++; |
121 | | - state = setState(text, state, pageID); |
| 148 | + state = setState(text, state, startPageID, endPageID); |
122 | 149 | if (!saveInMemIfNeeded(mem,line,state)) { |
123 | 150 | fprintf(stderr,"failed to save text in temp memory, bailing\n"); |
124 | 151 | exit(-1); |
— | — | @@ -126,6 +153,7 @@ |
127 | 154 | fprintf(stderr,"failed to write text from memory, bailing\n"); |
128 | 155 | exit(-1); |
129 | 156 | } |
| 157 | + clearMemoryIfNeeded(mem,state); |
130 | 158 | if (!writeIfNeeded(line,state)) { |
131 | 159 | fprintf(stderr,"failed to write text, bailing\n"); |
132 | 160 | exit(-1); |