r91640 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r91639‎ | r91640 | r91641 >
Date:14:26, 7 July 2011
Author:ariel
Status:deferred
Tags:
Comment:
off_t for all lseeks and file positions
Modified paths:
  • /branches/ariel/xmldumps-backup/mwbzutils/dumpbz2filefromoffset.c (modified) (history)
  • /branches/ariel/xmldumps-backup/mwbzutils/dumplastbz2block.c (modified) (history)
  • /branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c (modified) (history)
  • /branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c (modified) (history)
  • /branches/ariel/xmldumps-backup/mwbzutils/mwbzutils.h (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/mwbzutils/dumpbz2filefromoffset.c
@@ -38,7 +38,7 @@
3939
4040 b = init_buffer(length);
4141 bfile.bytes_read = 0;
42 - bfile.position = 0;
 42+ bfile.position = (off_t)0;
4343
4444 while ((get_buffer_of_uncompressed_data(b, fin, &bfile, FORWARD)>=0) && (! bfile.eof) && (!done)) {
4545 /* fixme either we don't check the return code right or we don't notice no bytes read or we don't clear the bytes read */
@@ -124,7 +124,7 @@
125125 0 on success,
126126 -1 on error
127127 */
128 -int dump_from_first_page_id_after_offset(int fin, int position) {
 128+int dump_from_first_page_id_after_offset(int fin, off_t position) {
129129 int res;
130130 regmatch_t *match_page;
131131 regex_t compiled_page;
@@ -229,7 +229,8 @@
230230 BZ_OK on success, various BZ_ errors otherwise.
231231 */
232232 int main(int argc, char **argv) {
233 - int fin, position, res;
 233+ int fin, res;
 234+ off_t position;
234235
235236 if (argc != 3) {
236237 fprintf(stderr,"usage: %s infile position\n", argv[0]);
@@ -242,8 +243,8 @@
243244 exit(-1);
244245 }
245246
246 - position = atoi(argv[2]);
247 - if (position <0) {
 247+ position = atoll(argv[2]);
 248+ if (position <(off_t)0) {
248249 fprintf(stderr,"please specify a position >= 0.\n");
249250 fprintf(stderr,"usage: %s infile position\n", argv[0]);
250251 exit(-1);
Index: branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c
@@ -8,6 +8,7 @@
99 #include <errno.h>
1010 #include <sys/types.h>
1111 #include <regex.h>
 12+#include <inttypes.h>
1213 #include "mwbzutils.h"
1314
1415
@@ -42,7 +43,7 @@
4344 0 if no pageid found,
4445 -1 on error
4546 */
46 -int get_first_page_id_after_offset(int fin, int position, page_info_t *pinfo) {
 47+int get_first_page_id_after_offset(int fin, off_t position, page_info_t *pinfo) {
4748 int res;
4849 regmatch_t *match_page, *match_page_id;
4950 regex_t compiled_page, compiled_page_id;
@@ -64,12 +65,12 @@
6566 b = init_buffer(length);
6667
6768 pinfo->bits_shifted = -1;
68 - pinfo->position = -1;
 69+ pinfo->position = (off_t)-1;
6970 pinfo->page_id = -1;
7071
7172 bfile.bytes_read = 0;
7273
73 - if (find_first_bz2_block_from_offset(&bfile, fin, position, FORWARD) <= 0) {
 74+ if (find_first_bz2_block_from_offset(&bfile, fin, position, FORWARD) <= (off_t)0) {
7475 /* fprintf(stderr,"failed to find block in bz2file (1)\n"); */
7576 return(-1);
7677 }
@@ -162,21 +163,21 @@
163164 */
164165 int do_iteration(iter_info_t *iinfo, int fin, page_info_t *pinfo) {
165166 int res;
166 - int new_position;
167 - int interval;
 167+ off_t new_position;
 168+ off_t interval;
168169
169170 /*
170171 last_position is somewhere in the interval, perhaps at an end
171172 last_value is the value we had at that position
172173 */
173174
174 - interval = (iinfo->right_end - iinfo->left_end)/2;
175 - if (interval == 0) {
176 - interval = 1;
 175+ interval = (iinfo->right_end - iinfo->left_end)/(off_t)2;
 176+ if (interval == (off_t)0) {
 177+ interval = (off_t)1;
177178 }
178 - /* fprintf(stderr,"interval size is %ld, left end %ld, right end %ld, last val %d\n",interval, iinfo->left_end, iinfo->right_end, iinfo->last_value); */
 179+ /* fprintf(stderr,"interval size is %"PRId64", left end %"PRId64", right end %"PRId64", last val %d\n",interval, iinfo->left_end, iinfo->right_end, iinfo->last_value); */
179180 /* if we're this close, we'll check this value and be done with it */
180 - if (iinfo->right_end -iinfo->left_end < 2) {
 181+ if (iinfo->right_end -iinfo->left_end < (off_t)2) {
181182 new_position = iinfo->left_end;
182183 iinfo->right_end = iinfo->left_end;
183184 }
@@ -231,7 +232,8 @@
232233 returns: 0 on success, -1 on error
233234 */
234235 int main(int argc, char **argv) {
235 - int fin, position, res, interval, page_id, oldmarker, file_size;
 236+ int fin, res, page_id;
 237+ off_t position, interval, file_size;
236238 page_info_t pinfo;
237239 iter_info_t iinfo;
238240
@@ -256,28 +258,27 @@
257259 file_size = get_file_size(fin);
258260
259261 interval = file_size;
260 - position = 0;
261 - oldmarker = -1;
 262+ position = (off_t)0;
262263 pinfo.bits_shifted = -1;
263 - pinfo.position = -1;
 264+ pinfo.position = (off_t)-1;
264265 pinfo.page_id = -1;
265266
266 - iinfo.left_end = 0;
 267+ iinfo.left_end = (off_t)0;
267268 file_size = get_file_size(fin);
268269 iinfo.right_end = file_size;
269270 iinfo.value_wanted = page_id;
270271
271 - res = get_first_page_id_after_offset(fin, 0, &pinfo);
 272+ res = get_first_page_id_after_offset(fin, (off_t)0, &pinfo);
272273 if (res > 0) {
273274 iinfo.last_value = pinfo.page_id;
274 - iinfo.last_position = 0;
 275+ iinfo.last_position = (off_t)0;
275276 }
276277 else {
277278 fprintf(stderr,"failed to get anything useful from the beginning of the file even, bailing.\n");
278279 exit(1);
279280 }
280281 if (pinfo.page_id == page_id) {
281 - fprintf(stdout,"position:%d page_id:%d\n",pinfo.position, pinfo.page_id);
 282+ fprintf(stdout,"position:%"PRId64" page_id:%d\n",pinfo.position, pinfo.page_id);
282283 exit(0);
283284 }
284285
@@ -285,7 +286,7 @@
286287 res = do_iteration(&iinfo, fin, &pinfo);
287288 /* things to check: bad return? interval is 0 bytes long? */
288289 if (iinfo.left_end == iinfo.right_end) {
289 - fprintf(stdout,"position:%d page_id:%d\n",pinfo.position, pinfo.page_id);
 290+ fprintf(stdout,"position:%"PRId64" page_id:%d\n",pinfo.position, pinfo.page_id);
290291 exit(0);
291292 }
292293 else if (res < 0) {
Index: branches/ariel/xmldumps-backup/mwbzutils/dumplastbz2block.c
@@ -6,6 +6,7 @@
77 #include <fcntl.h>
88 #include <stdlib.h>
99 #include <errno.h>
 10+#include <inttypes.h>
1011 #include "mwbzutils.h"
1112
1213
@@ -54,9 +55,9 @@
5556 bfile.position = bfile.file_size;
5657 }
5758 else {
58 - bfile.position = bfile.file_size - 11; /* size of footer, perhaps with 1 byte extra */
 59+ bfile.position = bfile.file_size - (off_t)11; /* size of footer, perhaps with 1 byte extra */
5960 }
60 - bfile.position -=6; /* size of marker */
 61+ bfile.position -=(off_t)6; /* size of marker */
6162 bfile.initialized = 0;
6263 b = init_buffer(length);
6364 bfile.bytes_read = 0;
@@ -64,11 +65,11 @@
6566 /* init_bz2_file(&bfile, fin, BACKWARD); */
6667 firstblock = 1;
6768
68 - if (find_first_bz2_block_from_offset(&bfile, fin, bfile.position, BACKWARD) <= 0) {
 69+ if (find_first_bz2_block_from_offset(&bfile, fin, bfile.position, BACKWARD) <= (off_t)0) {
6970 fprintf(stderr,"failed to find block in bz2file\n");
7071 exit(-1);
7172 }
72 - while ((get_buffer_of_uncompressed_data(b, fin, &bfile, FORWARD)>=0) && (! bfile.eof) && (! bfile.position ==0)) {
 73+ while ((get_buffer_of_uncompressed_data(b, fin, &bfile, FORWARD)>=0) && (! bfile.eof) && (! bfile.position == (off_t)0)) {
7374 if (bfile.bytes_read) {
7475 fwrite(b->next_to_read,b->bytes_avail,1,stdout);
7576 b->next_to_read = b->end;
Index: branches/ariel/xmldumps-backup/mwbzutils/mwbzutils.h
@@ -7,7 +7,7 @@
88 typedef struct {
99 int page_id; /* first id in the block */
1010 int bits_shifted; /* block is right shifted this many bits */
11 - int position; /* position in file of block */
 11+ off_t position; /* position in file of block */
1212 } page_info_t;
1313
1414 #define BUFINSIZE 5000
@@ -27,7 +27,7 @@
2828
2929 int initialized; /* whether bz2file has been initialized (header processed, seek to
3030 some bz2 block in the file and input buffer filled) */
31 - int block_start; /* position of bz2 block in file from which we started to read (we
 31+ off_t block_start; /* position of bz2 block in file from which we started to read (we
3232 read a sequence of bz2 blocks from a given position, this is
3333 the offset to the first one) */
3434
@@ -44,12 +44,12 @@
4545 unsigned char **footer; /* bzip2 end of stream footer, plus bit-shifted versions of it for
4646 locating the footer in a stream of compressed data */
4747
48 - int position; /* current offset into file from start of file */
 48+ off_t position; /* current offset into file from start of file */
4949
5050 int bytes_read; /* number of bytes of compressed data read from file (per read) */
5151 int bytes_written; /* number of bytes of decompressed data written into output buffer (per decompress) */
5252 int eof; /* nonzero if eof reached */
53 - int file_size; /* length of file, so we don't search past it for blocks */
 53+ off_t file_size; /* length of file, so we don't search past it for blocks */
5454 } bz_info_t;
5555
5656 #define MASKLEFT 0
@@ -76,11 +76,11 @@
7777 position and checking the first pageid (if any) contained in it.
7878 */
7979 typedef struct {
80 - int left_end; /* left end of interval to search (bytes from start of file) */
81 - int right_end; /* right end of interval to search */
 80+ off_t left_end; /* left end of interval to search (bytes from start of file) */
 81+ off_t right_end; /* right end of interval to search */
8282 int value_wanted; /* pageid desired */
8383 int last_value; /* pageid we found in last iteration */
84 - int last_position; /* position in file for last iteration */
 84+ off_t last_position; /* position in file for last iteration */
8585 } iter_info_t;
8686
8787 int bit_mask(int numbits, int end);
@@ -114,7 +114,7 @@
115115
116116 int buffer_is_full(buf_info_t *b);
117117
118 -int get_file_size(int fin);
 118+off_t get_file_size(int fin);
119119
120120 int init_bz2_file(bz_info_t *bfile, int fin, int direction);
121121
@@ -134,6 +134,6 @@
135135
136136 void clear_buffer(unsigned char *buf, int length);
137137
138 -int find_first_bz2_block_from_offset(bz_info_t *bfile, int fin, int position, int direction);
 138+off_t find_first_bz2_block_from_offset(bz_info_t *bfile, int fin, off_t position, int direction);
139139
140140 #endif
Index: branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c
@@ -8,11 +8,11 @@
99 #include <errno.h>
1010 #include <sys/types.h>
1111 #include <regex.h>
 12+#include <inttypes.h>
1213 #include "bzlib.h"
1314 #include "mwbzutils.h"
1415
1516
16 -
1717 /* return n ones either at left or right end */
1818 int bit_mask(int numbits, int end) {
1919 if (end == MASKRIGHT) {
@@ -130,11 +130,12 @@
131131
132132 /* return: 1 if found, 0 if not, -1 on error */
133133 int find_next_bz2_block_marker(int fin, bz_info_t *bfile, int direction) {
134 - int result;
 134+ off_t seekresult;
 135+ int res;
135136
136137 bfile->bits_shifted = -1;
137 - result = read(fin, bfile->marker_buffer, 7);
138 - if (result == -1) {
 138+ res = read(fin, bfile->marker_buffer, 7);
 139+ if (res == -1) {
139140 fprintf(stderr,"read of file failed\n");
140141 return(-1);
141142 }
@@ -149,13 +150,13 @@
150151 else {
151152 bfile->position--;
152153 }
153 - result = lseek(fin, (bfile->position), SEEK_SET);
154 - if (result == -1) {
155 - fprintf(stderr,"lseek of file to %ld failed (2)\n",(long int) bfile->position);
 154+ seekresult = lseek(fin, bfile->position, SEEK_SET);
 155+ if (seekresult == (off_t)-1) {
 156+ fprintf(stderr,"lseek of file to %"PRId64" failed (2)\n",bfile->position);
156157 return(-1);
157158 }
158 - result = read(fin, bfile->marker_buffer, 7);
159 - if (result < 7) {
 159+ res = read(fin, bfile->marker_buffer, 7);
 160+ if (res < 7) {
160161 /* fprintf(stderr,"read of file failed\n"); */
161162 return(-1);
162163 }
@@ -194,15 +195,14 @@
195196 }
196197
197198 /* FIXME do this right. whatever. */
198 -int get_file_size(int fin) {
199 - int res;
 199+off_t get_file_size(int fin) {
 200+ off_t seekresult;
200201
201 - res = lseek(fin, 0, SEEK_END);
202 - if (res == -1) {
 202+ seekresult = lseek(fin, (off_t)0, SEEK_END);
 203+ if (seekresult == (off_t)-1) {
203204 fprintf(stderr,"lseek of file to 0 failed (6)\n");
204 - return(-1);
205205 }
206 - return(res);
 206+ return(seekresult);
207207 }
208208
209209 /*
@@ -217,10 +217,11 @@
218218 various BZ_ errors or -1 on failure (see bzlib.h)
219219 */
220220 int decompress_header(int fin, bz_info_t *bfile) {
221 - int ret, res;
 221+ int res;
 222+ off_t seekresult;
222223
223 - res = lseek(fin,0,SEEK_SET);
224 - if (res == -1) {
 224+ seekresult = lseek(fin,(off_t)0,SEEK_SET);
 225+ if (seekresult == (off_t)-1) {
225226 fprintf(stderr,"lseek of file to 0 failed (3)\n");
226227 return(-1);
227228 }
@@ -232,12 +233,12 @@
233234 bfile->strm.next_in = (char *)bfile->header_buffer;
234235 bfile->strm.avail_in = 4;
235236
236 - ret = BZ2_bzDecompress_mine ( &(bfile->strm) );
237 - if (BZ_OK != ret && BZ_STREAM_END != ret) {
 237+ res = BZ2_bzDecompress_mine ( &(bfile->strm) );
 238+ if (BZ_OK != res && BZ_STREAM_END != res) {
238239 fprintf(stderr,"Corrupt bzip2 header\n");
239240 return(-1);
240241 }
241 - return(ret);
 242+ return(res);
242243 }
243244
244245 /*
@@ -256,19 +257,19 @@
257258 -1 on error
258259 */
259260 int setup_first_buffer_to_decompress(int fin, bz_info_t *bfile) {
260 - int res;
 261+ off_t seekresult;
261262
262263 if (bfile->bits_shifted == 0) {
263 - res = lseek(fin,bfile->position+1,SEEK_SET);
264 - if (res == -1) {
265 - fprintf(stderr,"lseek of file to %ld failed (4)\n",(long int) bfile->position+1);
 264+ seekresult = lseek(fin,bfile->position+(off_t)1,SEEK_SET);
 265+ if (seekresult == -1) {
 266+ fprintf(stderr,"lseek of file to %"PRId64" failed (4)\n",bfile->position+(off_t)1);
266267 return(-1);
267268 }
268269 }
269270 else {
270 - res = lseek(fin,bfile->position,SEEK_SET);
271 - if (res == -1) {
272 - fprintf(stderr,"lseek of file to %ld failed (5)\n",(long int) bfile->position);
 271+ seekresult = lseek(fin,bfile->position,SEEK_SET);
 272+ if (seekresult == -1) {
 273+ fprintf(stderr,"lseek of file to %"PRId64" failed (5)\n",bfile->position);
273274 return(-1);
274275 }
275276 }
@@ -294,7 +295,7 @@
295296 -1 if no marker or other error, position of next read if ok
296297 */
297298 int init_bz2_file(bz_info_t *bfile, int fin, int direction) {
298 - int res;
 299+ off_t seekresult;
299300
300301 bfile->bufin_size = BUFINSIZE;
301302 bfile->marker = init_marker();
@@ -309,9 +310,9 @@
310311 fprintf(stderr,"asked for position past end of file\n");
311312 return(-1);
312313 }
313 - res = lseek(fin, bfile->position, SEEK_SET);
314 - if (res == -1) {
315 - fprintf(stderr,"lseek of file to %ld failed (7)\n",(long int) bfile->position);
 314+ seekresult = lseek(fin, bfile->position, SEEK_SET);
 315+ if (seekresult == (off_t)-1) {
 316+ fprintf(stderr,"lseek of file to %"PRId64" failed (7)\n",bfile->position);
316317 return(-1);
317318 }
318319
@@ -451,7 +452,7 @@
452453 bfile->eof++;
453454 /* should we actually change the file position?
454455 bfile->position = bfile->filesize;
455 - lseek(fin,0,SEEK_END);
 456+ lseek(fin,(off_t)0,SEEK_END);
456457 */
457458 }
458459 return(0);
@@ -559,10 +560,11 @@
560561 }
561562
562563 int read_footer(unsigned char *buffer, int fin) {
 564+ off_t seekresult;
563565 int res;
564566
565 - res = lseek(fin, -11, SEEK_END);
566 - if (res == -1) {
 567+ seekresult = lseek(fin, (off_t)-11, SEEK_END);
 568+ if (seekresult == (off_t)-1) {
567569 fprintf(stderr,"lseek of file failed\n");
568570 return(-1);
569571 }
@@ -621,13 +623,14 @@
622624 0 if no marker
623625 -1 on error
624626 */
625 -int find_first_bz2_block_from_offset(bz_info_t *bfile, int fin, int position, int direction) {
 627+off_t find_first_bz2_block_from_offset(bz_info_t *bfile, int fin, off_t position, int direction) {
 628+ off_t seekresult;
626629 int res;
627630
628631 bfile->bufin_size = BUFINSIZE;
629632 bfile->marker = init_marker();
630633 bfile->position = position;
631 - bfile->block_start = -1;
 634+ bfile->block_start = (off_t)-1;
632635 bfile->bytes_read = 0;
633636 bfile->bytes_written = 0;
634637 bfile->eof = 0;
@@ -639,9 +642,9 @@
640643 if (bfile->position > bfile->file_size) {
641644 return(0);
642645 }
643 - res = lseek(fin, bfile->position, SEEK_SET);
644 - if (res < 0) {
645 - fprintf(stderr,"lseek of file to %ld failed (7)\n",(long int) bfile->position);
 646+ seekresult = lseek(fin, bfile->position, SEEK_SET);
 647+ if (seekresult == (off_t)-1) {
 648+ fprintf(stderr,"lseek of file to %"PRId64" failed (7)\n",bfile->position);
646649 return(-1);
647650 }
648651 res = find_next_bz2_block_marker(fin, bfile,direction);
@@ -663,19 +666,19 @@
664667 bfile->bytes_written = 0;
665668 bfile->eof = 0;
666669 /* leave the file at the right position */
667 - res = lseek(fin, bfile->block_start, SEEK_SET);
668 - if (res < 0) {
669 - fprintf(stderr,"lseek of file to %ld failed (7)\n",(long int) bfile->position);
 670+ seekresult = lseek(fin, bfile->block_start, SEEK_SET);
 671+ if (seekresult == (off_t)-1) {
 672+ fprintf(stderr,"lseek of file to %"PRId64" failed (7)\n",bfile->position);
670673 return(-1);
671674 }
672 - bfile->position = res;
 675+ bfile->position = seekresult;
673676 return(bfile->position);
674677 }
675678 /* right bytes, but there by chance, skip and try again */
676679 else {
677 - bfile->position+=6;
 680+ bfile->position+=(off_t)6;
678681 bfile->bits_shifted = -1;
679 - bfile->block_start = -1;
 682+ bfile->block_start = (off_t)-1;
680683 }
681684 }
682685 else {

Status & tagging log