comparison ezBAMQC/src/htslib/hfile.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400 (2016-03-24)
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfa3745e5fd8
1 /* hfile.c -- buffered low-level input/output streams.
2
3 Copyright (C) 2013-2015 Genome Research Ltd.
4
5 Author: John Marshall <jm18@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <errno.h>
29
30 #include "htslib/hfile.h"
31 #include "hfile_internal.h"
32
33 /* hFILE fields are used as follows:
34
35 char *buffer; // Pointer to the start of the I/O buffer
36 char *begin; // First not-yet-read character / unused position
37 char *end; // First unfilled/unfillable position
38 char *limit; // Pointer to the first position past the buffer
39
40 const hFILE_backend *backend; // Methods to refill/flush I/O buffer
41
42 off_t offset; // Offset within the stream of buffer position 0
43 int at_eof:1; // For reading, whether EOF has been seen
44 int has_errno; // Error number from the last failure on this stream
45
46 For reading, begin is the first unread character in the buffer and end is the
47 first unfilled position:
48
49 -----------ABCDEFGHIJKLMNO---------------
50 ^buffer ^begin ^end ^limit
51
52 For writing, begin is the first unused position and end is unused so remains
53 equal to buffer:
54
55 ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------
56 ^buffer ^begin ^limit
57 ^end
58
59 Thus if begin > end then there is a non-empty write buffer, if begin < end
60 then there is a non-empty read buffer, and if begin == end then both buffers
61 are empty. In all cases, the stream's file position indicator corresponds
62 to the position pointed to by begin. */
63
64 hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
65 {
66 hFILE *fp = (hFILE *) malloc(struct_size);
67 if (fp == NULL) goto error;
68
69 if (capacity == 0) capacity = 32768;
70 // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory
71 if (strchr(mode, 'r') && capacity > 32768) capacity = 32768;
72
73 fp->buffer = (char *) malloc(capacity);
74 if (fp->buffer == NULL) goto error;
75
76 fp->begin = fp->end = fp->buffer;
77 fp->limit = &fp->buffer[capacity];
78
79 fp->offset = 0;
80 fp->at_eof = 0;
81 fp->has_errno = 0;
82 return fp;
83
84 error:
85 hfile_destroy(fp);
86 return NULL;
87 }
88
89 void hfile_destroy(hFILE *fp)
90 {
91 int save = errno;
92 if (fp) free(fp->buffer);
93 free(fp);
94 errno = save;
95 }
96
97 static inline int writebuffer_is_nonempty(hFILE *fp)
98 {
99 return fp->begin > fp->end;
100 }
101
102 /* Refills the read buffer from the backend (once, so may only partially
103 fill the buffer), returning the number of additional characters read
104 (which might be 0), or negative when an error occurred. */
105 static ssize_t refill_buffer(hFILE *fp)
106 {
107 ssize_t n;
108
109 // Move any unread characters to the start of the buffer
110 if (fp->begin > fp->buffer) {
111 fp->offset += fp->begin - fp->buffer;
112 memmove(fp->buffer, fp->begin, fp->end - fp->begin);
113 fp->end = &fp->buffer[fp->end - fp->begin];
114 fp->begin = fp->buffer;
115 }
116
117 // Read into the available buffer space at fp->[end,limit)
118 if (fp->at_eof || fp->end == fp->limit) n = 0;
119 else {
120 n = fp->backend->read(fp, fp->end, fp->limit - fp->end);
121 if (n < 0) { fp->has_errno = errno; return n; }
122 else if (n == 0) fp->at_eof = 1;
123 }
124
125 fp->end += n;
126 return n;
127 }
128
129 /* Called only from hgetc(), when our buffer is empty. */
130 int hgetc2(hFILE *fp)
131 {
132 return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF;
133 }
134
135 ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes)
136 {
137 size_t n = fp->end - fp->begin;
138 while (n < nbytes) {
139 ssize_t ret = refill_buffer(fp);
140 if (ret < 0) return ret;
141 else if (ret == 0) break;
142 else n += ret;
143 }
144
145 if (n > nbytes) n = nbytes;
146 memcpy(buffer, fp->begin, n);
147 return n;
148 }
149
150 /* Called only from hread(); when called, our buffer is empty and nread bytes
151 have already been placed in the destination buffer. */
152 ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread)
153 {
154 const size_t capacity = fp->limit - fp->buffer;
155 char *dest = (char *) destv;
156 dest += nread, nbytes -= nread;
157
158 // Read large requests directly into the destination buffer
159 while (nbytes * 2 >= capacity && !fp->at_eof) {
160 ssize_t n = fp->backend->read(fp, dest, nbytes);
161 if (n < 0) { fp->has_errno = errno; return n; }
162 else if (n == 0) fp->at_eof = 1;
163 fp->offset += n;
164 dest += n, nbytes -= n;
165 nread += n;
166 }
167
168 while (nbytes > 0 && !fp->at_eof) {
169 size_t n;
170 ssize_t ret = refill_buffer(fp);
171 if (ret < 0) return ret;
172
173 n = fp->end - fp->begin;
174 if (n > nbytes) n = nbytes;
175 memcpy(dest, fp->begin, n);
176 fp->begin += n;
177 dest += n, nbytes -= n;
178 nread += n;
179 }
180
181 return nread;
182 }
183
184 /* Flushes the write buffer, fp->[buffer,begin), out through the backend
185 returning 0 on success or negative if an error occurred. */
186 static ssize_t flush_buffer(hFILE *fp)
187 {
188 const char *buffer = fp->buffer;
189 while (buffer < fp->begin) {
190 ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer);
191 if (n < 0) { fp->has_errno = errno; return n; }
192 buffer += n;
193 fp->offset += n;
194 }
195
196 fp->begin = fp->buffer; // Leave the buffer empty
197 return 0;
198 }
199
200 int hflush(hFILE *fp)
201 {
202 if (flush_buffer(fp) < 0) return EOF;
203 if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; }
204 return 0;
205 }
206
207 /* Called only from hputc(), when our buffer is already full. */
208 int hputc2(int c, hFILE *fp)
209 {
210 if (flush_buffer(fp) < 0) return EOF;
211 *(fp->begin++) = c;
212 return c;
213 }
214
215 /* Called only from hwrite() and hputs2(); when called, our buffer is full and
216 ncopied bytes from the source have already been copied to our buffer. */
217 ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied)
218 {
219 const char *src = (const char *) srcv;
220 ssize_t ret;
221 const size_t capacity = fp->limit - fp->buffer;
222 size_t remaining = totalbytes - ncopied;
223 src += ncopied;
224
225 ret = flush_buffer(fp);
226 if (ret < 0) return ret;
227
228 // Write large blocks out directly from the source buffer
229 while (remaining * 2 >= capacity) {
230 ssize_t n = fp->backend->write(fp, src, remaining);
231 if (n < 0) { fp->has_errno = errno; return n; }
232 fp->offset += n;
233 src += n, remaining -= n;
234 }
235
236 // Just buffer any remaining characters
237 memcpy(fp->begin, src, remaining);
238 fp->begin += remaining;
239
240 return totalbytes;
241 }
242
243 /* Called only from hputs(), when our buffer is already full. */
244 int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp)
245 {
246 return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF;
247 }
248
249 off_t hseek(hFILE *fp, off_t offset, int whence)
250 {
251 off_t pos;
252
253 if (writebuffer_is_nonempty(fp)) {
254 int ret = flush_buffer(fp);
255 if (ret < 0) return ret;
256 }
257 else {
258 // Convert relative offsets from being relative to the hFILE's stream
259 // position (at begin) to being relative to the backend's physical
260 // stream position (at end, due to the buffering read-ahead).
261 if (whence == SEEK_CUR) offset -= fp->end - fp->begin;
262 }
263
264 pos = fp->backend->seek(fp, offset, whence);
265 if (pos < 0) { fp->has_errno = errno; return pos; }
266
267 // Seeking succeeded, so discard any non-empty read buffer
268 fp->begin = fp->end = fp->buffer;
269 fp->at_eof = 0;
270
271 fp->offset = pos;
272 return pos;
273 }
274
275 int hclose(hFILE *fp)
276 {
277 int err = fp->has_errno;
278
279 if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno;
280 if (fp->backend->close(fp) < 0) err = errno;
281 hfile_destroy(fp);
282
283 if (err) {
284 errno = err;
285 return EOF;
286 }
287 else return 0;
288 }
289
290 void hclose_abruptly(hFILE *fp)
291 {
292 int save = errno;
293 if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ }
294 hfile_destroy(fp);
295 errno = save;
296 }
297
298
299 /***************************
300 * File descriptor backend *
301 ***************************/
302
303 #include <sys/socket.h>
304 #include <sys/stat.h>
305 #include <fcntl.h>
306 #include <unistd.h>
307
308 #ifdef _WIN32
309 #define HAVE_CLOSESOCKET
310 #endif
311
312 /* For Unix, it doesn't matter whether a file descriptor is a socket.
313 However Windows insists on send()/recv() and its own closesocket()
314 being used when fd happens to be a socket. */
315
316 typedef struct {
317 hFILE base;
318 int fd;
319 int is_socket:1;
320 } hFILE_fd;
321
322 static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes)
323 {
324 hFILE_fd *fp = (hFILE_fd *) fpv;
325 ssize_t n;
326 do {
327 n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0)
328 : read(fp->fd, buffer, nbytes);
329 } while (n < 0 && errno == EINTR);
330 return n;
331 }
332
333 static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
334 {
335 hFILE_fd *fp = (hFILE_fd *) fpv;
336 ssize_t n;
337 do {
338 n = fp->is_socket? send(fp->fd, buffer, nbytes, 0)
339 : write(fp->fd, buffer, nbytes);
340 } while (n < 0 && errno == EINTR);
341 return n;
342 }
343
344 static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
345 {
346 hFILE_fd *fp = (hFILE_fd *) fpv;
347 return lseek(fp->fd, offset, whence);
348 }
349
350 static int fd_flush(hFILE *fpv)
351 {
352 hFILE_fd *fp = (hFILE_fd *) fpv;
353 int ret;
354 do {
355 #ifdef HAVE_FDATASYNC
356 ret = fdatasync(fp->fd);
357 #else
358 ret = fsync(fp->fd);
359 #endif
360 // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
361 // and operation-not-supported errors (Mac OS X)
362 if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0;
363 } while (ret < 0 && errno == EINTR);
364 return ret;
365 }
366
367 static int fd_close(hFILE *fpv)
368 {
369 hFILE_fd *fp = (hFILE_fd *) fpv;
370 int ret;
371 do {
372 #ifdef HAVE_CLOSESOCKET
373 ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd);
374 #else
375 ret = close(fp->fd);
376 #endif
377 } while (ret < 0 && errno == EINTR);
378 return ret;
379 }
380
381 static const struct hFILE_backend fd_backend =
382 {
383 fd_read, fd_write, fd_seek, fd_flush, fd_close
384 };
385
386 static size_t blksize(int fd)
387 {
388 struct stat sbuf;
389 if (fstat(fd, &sbuf) != 0) return 0;
390 return sbuf.st_blksize;
391 }
392
393 static hFILE *hopen_fd(const char *filename, const char *mode)
394 {
395 hFILE_fd *fp = NULL;
396 int fd = open(filename, hfile_oflags(mode), 0666);
397 if (fd < 0) goto error;
398
399 fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
400 if (fp == NULL) goto error;
401
402 fp->fd = fd;
403 fp->is_socket = 0;
404 fp->base.backend = &fd_backend;
405 return &fp->base;
406
407 error:
408 if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }
409 hfile_destroy((hFILE *) fp);
410 return NULL;
411 }
412
413 hFILE *hdopen(int fd, const char *mode)
414 {
415 hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
416 if (fp == NULL) return NULL;
417
418 fp->fd = fd;
419 fp->is_socket = (strchr(mode, 's') != NULL);
420 fp->base.backend = &fd_backend;
421 return &fp->base;
422 }
423
424 static hFILE *hopen_fd_stdinout(const char *mode)
425 {
426 int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO;
427 // TODO Set binary mode (for Windows)
428 return hdopen(fd, mode);
429 }
430
431 int hfile_oflags(const char *mode)
432 {
433 int rdwr = 0, flags = 0;
434 const char *s;
435 for (s = mode; *s; s++)
436 switch (*s) {
437 case 'r': rdwr = O_RDONLY; break;
438 case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break;
439 case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break;
440 case '+': rdwr = O_RDWR; break;
441 default: break;
442 }
443
444 #ifdef O_BINARY
445 flags |= O_BINARY;
446 #endif
447
448 return rdwr | flags;
449 }
450
451
452 /*********************
453 * In-memory backend *
454 *********************/
455
456 typedef struct {
457 hFILE base;
458 const char *buffer;
459 size_t length, pos;
460 } hFILE_mem;
461
462 static ssize_t mem_read(hFILE *fpv, void *buffer, size_t nbytes)
463 {
464 hFILE_mem *fp = (hFILE_mem *) fpv;
465 size_t avail = fp->length - fp->pos;
466 if (nbytes > avail) nbytes = avail;
467 memcpy(buffer, fp->buffer + fp->pos, nbytes);
468 fp->pos += nbytes;
469 return nbytes;
470 }
471
472 static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
473 {
474 hFILE_mem *fp = (hFILE_mem *) fpv;
475 size_t absoffset = (offset >= 0)? offset : -offset;
476 size_t origin;
477
478 switch (whence) {
479 case SEEK_SET: origin = 0; break;
480 case SEEK_CUR: origin = fp->pos; break;
481 case SEEK_END: origin = fp->length; break;
482 default: errno = EINVAL; return -1;
483 }
484
485 if ((offset < 0 && absoffset > origin) ||
486 (offset >= 0 && absoffset > fp->length - origin)) {
487 errno = EINVAL;
488 return -1;
489 }
490
491 fp->pos = origin + offset;
492 return fp->pos;
493 }
494
495 static int mem_close(hFILE *fpv)
496 {
497 return 0;
498 }
499
500 static const struct hFILE_backend mem_backend =
501 {
502 mem_read, NULL, mem_seek, NULL, mem_close
503 };
504
505 static hFILE *hopen_mem(const char *data, const char *mode)
506 {
507 // TODO Implement write modes, which will require memory allocation
508 if (strchr(mode, 'r') == NULL) { errno = EINVAL; return NULL; }
509
510 hFILE_mem *fp = (hFILE_mem *) hfile_init(sizeof (hFILE_mem), mode, 0);
511 if (fp == NULL) return NULL;
512
513 fp->buffer = data;
514 fp->length = strlen(data);
515 fp->pos = 0;
516 fp->base.backend = &mem_backend;
517 return &fp->base;
518 }
519
520
521 /******************************
522 * hopen() backend dispatcher *
523 ******************************/
524
525 hFILE *hopen(const char *fname, const char *mode)
526 {
527 if (strncmp(fname, "http://", 7) == 0 ||
528 strncmp(fname, "ftp://", 6) == 0) return hopen_net(fname, mode);
529 #ifdef HAVE_IRODS
530 else if (strncmp(fname, "irods:", 6) == 0) return hopen_irods(fname, mode);
531 #endif
532 else if (strncmp(fname, "data:", 5) == 0) return hopen_mem(fname + 5, mode);
533 else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
534 else return hopen_fd(fname, mode);
535 }
536
537 int hisremote(const char *fname)
538 {
539 // FIXME Make a new backend entry to return this
540 if (strncmp(fname, "http://", 7) == 0 ||
541 strncmp(fname, "https://", 8) == 0 ||
542 strncmp(fname, "ftp://", 6) == 0) return 1;
543 #ifdef HAVE_IRODS
544 else if (strncmp(fname, "irods:", 6) == 0) return 1;
545 #endif
546 else return 0;
547 }