r83019 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r83018‎ | r83019 | r83020 >
Date:15:35, 1 March 2011
Author:ariel
Status:deferred
Tags:
Comment:
allow write of xml file from an arbitrary starting point
Modified paths:
  • /branches/ariel/xmldumps-backup/writeuptopageid.c (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/writeuptopageid.c
@@ -11,10 +11,11 @@
1212 #define MAXHEADERLEN 524289
1313
1414 void usage(char *me) {
15 - fprintf(stderr,"Usage: %s pageID\n",me);
16 - fprintf(stderr,"Copies the contents of an XML file up to but not including\n");
17 - fprintf(stderr,"the specified pageID. This program is used in processing XML\n");
18 - fprintf(stderr,"dump files that were only partially written.\n");
 15+ fprintf(stderr,"Usage: %s startPageID endPageID\n",me);
 16+ fprintf(stderr,"Copies the contents of an XML file starting with and including startPageID\n");
 17+ fprintf(stderr,"and up to but not including endPageID. This program is used in processing XML\n");
 18+ fprintf(stderr,"dump files that were only partially written, as well as in writing partial\n");
 19+ fprintf(stderr,"stub files for reruns of those dump files.\n");
1920 }
2021
2122 /* note that even if we have only read a partial line
@@ -24,7 +25,7 @@
2526 in the page text.
2627
2728 returns new state */
28 -States setState (char *line, States currentState, int endPageID) {
 29+States setState (char *line, States currentState, int startPageID, int endPageID) {
2930 int pageID = 0;
3031
3132 if (!strncmp(line,"<mediawiki",10)) {
@@ -37,18 +38,28 @@
3839 else if (currentState == StartPage && (!strncmp(line, "<id>", 4))) {
3940 /* dig the id out, format is <id>num</id> */
4041 pageID = atoi(line+4);
41 - if (pageID == endPageID) {
 42+ if (pageID >= endPageID) {
4243 return(AtLastPageID);
4344 }
44 - else {
 45+ else if (pageID >= startPageID) {
4546 return(WriteMem);
4647 }
 48+ else {
 49+ /* we don't write anything */
 50+ return(None);
 51+ }
4752 }
4853 else if (currentState == WriteMem) {
4954 return(Write);
5055 }
5156 else if (!strncmp(line, "</page>", 6)) {
52 - return(EndPage);
 57+ if (currentState == Write) {
 58+ return(EndPage);
 59+ }
 60+ else {
 61+ /* don't write anything */
 62+ return(None);
 63+ }
5364 }
5465 return(currentState);
5566 }
@@ -59,11 +70,17 @@
6071
6172 if (state == WriteMem) {
6273 res = fwrite(mem,strlen(mem),1,stdout);
63 - mem[0]='\0';
6474 return(res);
6575 }
6676 }
6777
 78+void clearMemoryIfNeeded(char *mem, States state) {
 79+ if (state == WriteMem || state == None) {
 80+ mem[0]='\0';
 81+ }
 82+ return;
 83+}
 84+
6885 /* returns 1 on success, 0 on error */
6986 int writeIfNeeded(char *line, States state) {
7087 if (state == StartHeader || state == WriteMem || state == Write || state == EndPage) {
@@ -86,7 +103,8 @@
87104 }
88105
89106 int main(int argc,char **argv) {
90 - long int pageID = 0;
 107+ long int startPageID = 0;
 108+ long int endPageID = 0;
91109 char *nonNumeric = 0;
92110 States state = None;
93111 char *text;
@@ -97,27 +115,36 @@
98116 length of time. */
99117 char mem[MAXHEADERLEN];
100118
101 - if (argc != 2) {
 119+ if (argc != 3) {
102120 usage(argv[0]);
103121 exit(-1);
104122 }
105123
106124 errno = 0;
107 - pageID = strtol(argv[1], &nonNumeric, 10);
108 - if (pageID == 0 ||
 125+ startPageID = strtol(argv[1], &nonNumeric, 10);
 126+ if (startPageID == 0 ||
109127 *nonNumeric != 0 ||
110 - nonNumeric == (char *) &pageID ||
 128+ nonNumeric == (char *) &startPageID ||
111129 errno != 0) {
112 - fprintf (stderr,"The value you entered for pageID must be a positive integer.\n");
 130+ fprintf (stderr,"The value you entered for startPageID must be a positive integer.\n");
113131 usage(argv[0]);
114132 exit(-1);
115133 }
 134+ endPageID = strtol(argv[2], &nonNumeric, 10);
 135+ if (endPageID == 0 ||
 136+ *nonNumeric != 0 ||
 137+ nonNumeric == (char *) &endPageID ||
 138+ errno != 0) {
 139+ fprintf (stderr,"The value you entered for endPageID must be a positive integer.\n");
 140+ usage(argv[0]);
 141+ exit(-1);
 142+ }
116143
117144 while (fgets(line, sizeof(line)-1, stdin) != NULL) {
118145 text=line;
119146 while (*text && isspace(*text))
120147 text++;
121 - state = setState(text, state, pageID);
 148+ state = setState(text, state, startPageID, endPageID);
122149 if (!saveInMemIfNeeded(mem,line,state)) {
123150 fprintf(stderr,"failed to save text in temp memory, bailing\n");
124151 exit(-1);
@@ -126,6 +153,7 @@
127154 fprintf(stderr,"failed to write text from memory, bailing\n");
128155 exit(-1);
129156 }
 157+ clearMemoryIfNeeded(mem,state);
130158 if (!writeIfNeeded(line,state)) {
131159 fprintf(stderr,"failed to write text, bailing\n");
132160 exit(-1);

Status & tagging log