annotate ezBAMQC/src/htslib/cram/zfio.c @ 20:9de3bbec2479 draft default tip

Uploaded
author youngkim
date Thu, 31 Mar 2016 10:10:37 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2 Copyright (c) 2009-2013 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Author: James Bonfield <jkb@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Redistribution and use in source and binary forms, with or without
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6 modification, are permitted provided that the following conditions are met:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 1. Redistributions of source code must retain the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 this list of conditions and the following disclaimer.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 2. Redistributions in binary form must reproduce the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 this list of conditions and the following disclaimer in the documentation
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13 and/or other materials provided with the distribution.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16 Institute nor the names of its contributors may be used to endorse or promote
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 products derived from this software without specific prior written permission.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 #ifdef HAVE_CONFIG_H
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 #include "io_lib_config.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35 #include <stdlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 #include <unistd.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 #include "cram/os.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39 #include "cram/zfio.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41 /* ------------------------------------------------------------------------ */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 /* Some wrappers around FILE * vs gzFile *, allowing for either */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 * gzopen() works on both compressed and uncompressed data, but it has
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 * a significant performance hit even for uncompressed data (tested as
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 * 25s using FILE* to 46s via gzOpen and 66s via gzOpen when gzipped).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 * Hence we use our own wrapper 'zfp' which is a FILE* when uncompressed
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 * and gzFile* when compressed. This also means we could hide bzopen in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 * there too if desired.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54 off_t zftello(zfp *zf) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 return zf->fp ? ftello(zf->fp) : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 int zfseeko(zfp *zf, off_t offset, int whence) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 return zf->fp ? fseeko(zf->fp, offset, whence) : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 * A wrapper for either fgets or gzgets depending on what has been
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65 * opened.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 char *zfgets(char *line, int size, zfp *zf) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 if (zf->fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 return fgets(line, size, zf->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 return gzgets(zf->gz, line, size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75 * A wrapper for either fputs or gzputs depending on what has been
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 * opened.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78 int zfputs(char *line, zfp *zf) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79 if (zf->fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 return fputs(line, zf->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 return gzputs(zf->gz, line) ? 0 : EOF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86 * Peeks at and returns the next character without consuming it from the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 * input. (Ie a combination of getc and ungetc).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89 int zfpeek(zfp *zf) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 int c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92 if (zf->fp) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93 c = getc(zf->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 if (c != EOF)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 ungetc(c, zf->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 c = gzgetc(zf->gz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98 if (c != EOF)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 gzungetc(c, zf->gz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 return c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 /* A replacement for either feof of gzeof */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106 int zfeof(zfp *zf) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 return zf->fp ? feof(zf->fp) : gzeof(zf->gz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 /* A replacement for either fopen or gzopen */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111 zfp *zfopen(const char *path, const char *mode) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 char path2[1024];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 zfp *zf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115 if (!(zf = (zfp *)malloc(sizeof(*zf))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 zf->fp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118 zf->gz = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 /* Try normal fopen */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 if (mode[0] != 'z' && mode[1] != 'z' &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122 NULL != (zf->fp = fopen(path, mode))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123 unsigned char magic[2];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124 if (2 != fread(magic, 1, 2, zf->fp)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 free(zf);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128 if (!(magic[0] == 0x1f &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129 magic[1] == 0x8b)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130 fseeko(zf->fp, 0, SEEK_SET);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131 return zf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134 fclose(zf->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 zf->fp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 #ifdef HAVE_POPEN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 * I've no idea why, by gzgets is VERY slow, maybe because it handles
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141 * arbitrary seeks.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 * popen to gzip -cd is 3 times faster though.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144 if (*mode == 'w') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146 if (access(path, R_OK) == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 sprintf(path2, "gzip -cd < %.*s", 1000, path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
148 if (NULL != (zf->fp = popen(path2, "r")))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
149 return zf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
150 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
151
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
152 sprintf(path2, "gzip -cd < %.*s.gz", 1000, path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
153 if (NULL != (zf->fp = popen(path2, "r")))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
154 return zf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
155
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
156 printf("Failed on %s\n", path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
157 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
158 sprintf(path2, "gzip > %.*s", 1000, path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
159 if (NULL != (zf->fp = popen(path2, "w")))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
160 return zf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
161 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
162
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
163 printf("Failed on %s\n", path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
164 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
165 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
166 /* Gzopen instead */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
167 if ((zf->gz = gzopen(path, mode)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
168 return zf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
169
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
170 sprintf(path2, "%.*s.gz", 1020, path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
171 if ((zf->gz = gzopen(path2, mode)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
172 return zf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
173 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
174
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
175 perror(path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
176
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
177 free(zf);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
178 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
179 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
180
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
181 int zfclose(zfp *zf) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
182 int r = (zf->fp) ? fclose(zf->fp) : gzclose(zf->gz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
183 free(zf);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
184 return r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
185 }