0
|
1 /*
|
|
2 Copyright (c) 2009-2013 Genome Research Ltd.
|
|
3 Author: James Bonfield <jkb@sanger.ac.uk>
|
|
4
|
|
5 Redistribution and use in source and binary forms, with or without
|
|
6 modification, are permitted provided that the following conditions are met:
|
|
7
|
|
8 1. Redistributions of source code must retain the above copyright notice,
|
|
9 this list of conditions and the following disclaimer.
|
|
10
|
|
11 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12 this list of conditions and the following disclaimer in the documentation
|
|
13 and/or other materials provided with the distribution.
|
|
14
|
|
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
|
|
16 Institute nor the names of its contributors may be used to endorse or promote
|
|
17 products derived from this software without specific prior written permission.
|
|
18
|
|
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
|
|
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
|
|
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29 */
|
|
30
|
|
31 #ifdef HAVE_CONFIG_H
|
|
32 #include "io_lib_config.h"
|
|
33 #endif
|
|
34
|
|
35 #include <stdlib.h>
|
|
36 #include <unistd.h>
|
|
37
|
|
38 #include "cram/os.h"
|
|
39 #include "cram/zfio.h"
|
|
40
|
|
41 /* ------------------------------------------------------------------------ */
|
|
42 /* Some wrappers around FILE * vs gzFile *, allowing for either */
|
|
43
|
|
44 /*
|
|
45 * gzopen() works on both compressed and uncompressed data, but it has
|
|
46 * a significant performance hit even for uncompressed data (tested as
|
|
47 * 25s using FILE* to 46s via gzOpen and 66s via gzOpen when gzipped).
|
|
48 *
|
|
49 * Hence we use our own wrapper 'zfp' which is a FILE* when uncompressed
|
|
50 * and gzFile* when compressed. This also means we could hide bzopen in
|
|
51 * there too if desired.
|
|
52 */
|
|
53
|
|
54 off_t zftello(zfp *zf) {
|
|
55 return zf->fp ? ftello(zf->fp) : -1;
|
|
56 }
|
|
57
|
|
58 int zfseeko(zfp *zf, off_t offset, int whence) {
|
|
59 return zf->fp ? fseeko(zf->fp, offset, whence) : -1;
|
|
60 }
|
|
61
|
|
62
|
|
63 /*
|
|
64 * A wrapper for either fgets or gzgets depending on what has been
|
|
65 * opened.
|
|
66 */
|
|
67 char *zfgets(char *line, int size, zfp *zf) {
|
|
68 if (zf->fp)
|
|
69 return fgets(line, size, zf->fp);
|
|
70 else
|
|
71 return gzgets(zf->gz, line, size);
|
|
72 }
|
|
73
|
|
74 /*
|
|
75 * A wrapper for either fputs or gzputs depending on what has been
|
|
76 * opened.
|
|
77 */
|
|
78 int zfputs(char *line, zfp *zf) {
|
|
79 if (zf->fp)
|
|
80 return fputs(line, zf->fp);
|
|
81 else
|
|
82 return gzputs(zf->gz, line) ? 0 : EOF;
|
|
83 }
|
|
84
|
|
85 /*
|
|
86 * Peeks at and returns the next character without consuming it from the
|
|
87 * input. (Ie a combination of getc and ungetc).
|
|
88 */
|
|
89 int zfpeek(zfp *zf) {
|
|
90 int c;
|
|
91
|
|
92 if (zf->fp) {
|
|
93 c = getc(zf->fp);
|
|
94 if (c != EOF)
|
|
95 ungetc(c, zf->fp);
|
|
96 } else {
|
|
97 c = gzgetc(zf->gz);
|
|
98 if (c != EOF)
|
|
99 gzungetc(c, zf->gz);
|
|
100 }
|
|
101
|
|
102 return c;
|
|
103 }
|
|
104
|
|
105 /* A replacement for either feof of gzeof */
|
|
106 int zfeof(zfp *zf) {
|
|
107 return zf->fp ? feof(zf->fp) : gzeof(zf->gz);
|
|
108 }
|
|
109
|
|
110 /* A replacement for either fopen or gzopen */
|
|
111 zfp *zfopen(const char *path, const char *mode) {
|
|
112 char path2[1024];
|
|
113 zfp *zf;
|
|
114
|
|
115 if (!(zf = (zfp *)malloc(sizeof(*zf))))
|
|
116 return NULL;
|
|
117 zf->fp = NULL;
|
|
118 zf->gz = NULL;
|
|
119
|
|
120 /* Try normal fopen */
|
|
121 if (mode[0] != 'z' && mode[1] != 'z' &&
|
|
122 NULL != (zf->fp = fopen(path, mode))) {
|
|
123 unsigned char magic[2];
|
|
124 if (2 != fread(magic, 1, 2, zf->fp)) {
|
|
125 free(zf);
|
|
126 return NULL;
|
|
127 }
|
|
128 if (!(magic[0] == 0x1f &&
|
|
129 magic[1] == 0x8b)) {
|
|
130 fseeko(zf->fp, 0, SEEK_SET);
|
|
131 return zf;
|
|
132 }
|
|
133
|
|
134 fclose(zf->fp);
|
|
135 zf->fp = NULL;
|
|
136 }
|
|
137
|
|
138 #ifdef HAVE_POPEN
|
|
139 /*
|
|
140 * I've no idea why, by gzgets is VERY slow, maybe because it handles
|
|
141 * arbitrary seeks.
|
|
142 * popen to gzip -cd is 3 times faster though.
|
|
143 */
|
|
144 if (*mode == 'w') {
|
|
145 } else {
|
|
146 if (access(path, R_OK) == 0) {
|
|
147 sprintf(path2, "gzip -cd < %.*s", 1000, path);
|
|
148 if (NULL != (zf->fp = popen(path2, "r")))
|
|
149 return zf;
|
|
150 }
|
|
151
|
|
152 sprintf(path2, "gzip -cd < %.*s.gz", 1000, path);
|
|
153 if (NULL != (zf->fp = popen(path2, "r")))
|
|
154 return zf;
|
|
155
|
|
156 printf("Failed on %s\n", path);
|
|
157 } else {
|
|
158 sprintf(path2, "gzip > %.*s", 1000, path);
|
|
159 if (NULL != (zf->fp = popen(path2, "w")))
|
|
160 return zf;
|
|
161 }
|
|
162
|
|
163 printf("Failed on %s\n", path);
|
|
164 }
|
|
165 #else
|
|
166 /* Gzopen instead */
|
|
167 if ((zf->gz = gzopen(path, mode)))
|
|
168 return zf;
|
|
169
|
|
170 sprintf(path2, "%.*s.gz", 1020, path);
|
|
171 if ((zf->gz = gzopen(path2, mode)))
|
|
172 return zf;
|
|
173 #endif
|
|
174
|
|
175 perror(path);
|
|
176
|
|
177 free(zf);
|
|
178 return NULL;
|
|
179 }
|
|
180
|
|
181 int zfclose(zfp *zf) {
|
|
182 int r = (zf->fp) ? fclose(zf->fp) : gzclose(zf->gz);
|
|
183 free(zf);
|
|
184 return r;
|
|
185 }
|