comparison ezBAMQC/src/htslib/cram/zfio.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfa3745e5fd8
1 /*
2 Copyright (c) 2009-2013 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #ifdef HAVE_CONFIG_H
32 #include "io_lib_config.h"
33 #endif
34
35 #include <stdlib.h>
36 #include <unistd.h>
37
38 #include "cram/os.h"
39 #include "cram/zfio.h"
40
41 /* ------------------------------------------------------------------------ */
42 /* Some wrappers around FILE * vs gzFile *, allowing for either */
43
44 /*
45 * gzopen() works on both compressed and uncompressed data, but it has
46 * a significant performance hit even for uncompressed data (tested as
47 * 25s using FILE* to 46s via gzOpen and 66s via gzOpen when gzipped).
48 *
49 * Hence we use our own wrapper 'zfp' which is a FILE* when uncompressed
50 * and gzFile* when compressed. This also means we could hide bzopen in
51 * there too if desired.
52 */
53
54 off_t zftello(zfp *zf) {
55 return zf->fp ? ftello(zf->fp) : -1;
56 }
57
58 int zfseeko(zfp *zf, off_t offset, int whence) {
59 return zf->fp ? fseeko(zf->fp, offset, whence) : -1;
60 }
61
62
63 /*
64 * A wrapper for either fgets or gzgets depending on what has been
65 * opened.
66 */
67 char *zfgets(char *line, int size, zfp *zf) {
68 if (zf->fp)
69 return fgets(line, size, zf->fp);
70 else
71 return gzgets(zf->gz, line, size);
72 }
73
74 /*
75 * A wrapper for either fputs or gzputs depending on what has been
76 * opened.
77 */
78 int zfputs(char *line, zfp *zf) {
79 if (zf->fp)
80 return fputs(line, zf->fp);
81 else
82 return gzputs(zf->gz, line) ? 0 : EOF;
83 }
84
85 /*
86 * Peeks at and returns the next character without consuming it from the
87 * input. (Ie a combination of getc and ungetc).
88 */
89 int zfpeek(zfp *zf) {
90 int c;
91
92 if (zf->fp) {
93 c = getc(zf->fp);
94 if (c != EOF)
95 ungetc(c, zf->fp);
96 } else {
97 c = gzgetc(zf->gz);
98 if (c != EOF)
99 gzungetc(c, zf->gz);
100 }
101
102 return c;
103 }
104
105 /* A replacement for either feof of gzeof */
106 int zfeof(zfp *zf) {
107 return zf->fp ? feof(zf->fp) : gzeof(zf->gz);
108 }
109
110 /* A replacement for either fopen or gzopen */
111 zfp *zfopen(const char *path, const char *mode) {
112 char path2[1024];
113 zfp *zf;
114
115 if (!(zf = (zfp *)malloc(sizeof(*zf))))
116 return NULL;
117 zf->fp = NULL;
118 zf->gz = NULL;
119
120 /* Try normal fopen */
121 if (mode[0] != 'z' && mode[1] != 'z' &&
122 NULL != (zf->fp = fopen(path, mode))) {
123 unsigned char magic[2];
124 if (2 != fread(magic, 1, 2, zf->fp)) {
125 free(zf);
126 return NULL;
127 }
128 if (!(magic[0] == 0x1f &&
129 magic[1] == 0x8b)) {
130 fseeko(zf->fp, 0, SEEK_SET);
131 return zf;
132 }
133
134 fclose(zf->fp);
135 zf->fp = NULL;
136 }
137
138 #ifdef HAVE_POPEN
139 /*
140 * I've no idea why, by gzgets is VERY slow, maybe because it handles
141 * arbitrary seeks.
142 * popen to gzip -cd is 3 times faster though.
143 */
144 if (*mode == 'w') {
145 } else {
146 if (access(path, R_OK) == 0) {
147 sprintf(path2, "gzip -cd < %.*s", 1000, path);
148 if (NULL != (zf->fp = popen(path2, "r")))
149 return zf;
150 }
151
152 sprintf(path2, "gzip -cd < %.*s.gz", 1000, path);
153 if (NULL != (zf->fp = popen(path2, "r")))
154 return zf;
155
156 printf("Failed on %s\n", path);
157 } else {
158 sprintf(path2, "gzip > %.*s", 1000, path);
159 if (NULL != (zf->fp = popen(path2, "w")))
160 return zf;
161 }
162
163 printf("Failed on %s\n", path);
164 }
165 #else
166 /* Gzopen instead */
167 if ((zf->gz = gzopen(path, mode)))
168 return zf;
169
170 sprintf(path2, "%.*s.gz", 1020, path);
171 if ((zf->gz = gzopen(path2, mode)))
172 return zf;
173 #endif
174
175 perror(path);
176
177 free(zf);
178 return NULL;
179 }
180
181 int zfclose(zfp *zf) {
182 int r = (zf->fp) ? fclose(zf->fp) : gzclose(zf->gz);
183 free(zf);
184 return r;
185 }