Index: branches/ariel/xmldumps-backup/writeuptopageid.c |
— | — | @@ -4,7 +4,7 @@ |
5 | 5 | #include <errno.h> |
6 | 6 | #include <string.h> |
7 | 7 | |
8 | | -typedef enum { None, StartHeader, StartPage, AtPageID, WriteMem, Write, EndPage, AtLastPageID } States; |
| 8 | +typedef enum { None, StartHeader, EndHeader, StartPage, AtPageID, WriteMem, Write, EndPage, AtLastPageID } States; |
9 | 9 | |
10 | 10 | /* assume the header is never going to be longer than 1000 x 80 4-byte characters... how many |
11 | 11 | namespaces will one project want? */ |
— | — | @@ -29,9 +29,20 @@ |
30 | 30 | States setState (char *line, States currentState, int startPageID, int endPageID) { |
31 | 31 | int pageID = 0; |
32 | 32 | |
| 33 | + if (currentState == EndHeader) { |
| 34 | + /* if we have junk after the header we don't write it. |
| 35 | + commands like dumpbz2filefromoffset can produce such streams. */ |
| 36 | + if (strncmp(line,"<page>",6)) { |
| 37 | + return(None); |
| 38 | + } |
| 39 | + } |
| 40 | + |
33 | 41 | if (!strncmp(line,"<mediawiki",10)) { |
34 | 42 | return(StartHeader); |
35 | 43 | } |
| 44 | + else if (!strncmp(line,"</siteinfo>",11)) { |
| 45 | + return(EndHeader); |
| 46 | + } |
36 | 47 | else if (!strncmp(line,"<page>",6)) { |
37 | 48 | return(StartPage); |
38 | 49 | } |
— | — | @@ -87,7 +98,7 @@ |
88 | 99 | |
89 | 100 | /* returns 1 on success, 0 on error */ |
90 | 101 | int writeIfNeeded(char *line, States state) { |
91 | | - if (state == StartHeader || state == WriteMem || state == Write || state == EndPage) { |
| 102 | + if (state == StartHeader || state == EndHeader || state == WriteMem || state == Write || state == EndPage) { |
92 | 103 | return(fwrite(line,strlen(line),1,stdout)); |
93 | 104 | } |
94 | 105 | } |