Mercurial > repos > youngkim > ezbamqc
comparison ezBAMQC/src/htslib/bgzip.c @ 0:dfa3745e5fd8
Uploaded
author | youngkim |
---|---|
date | Thu, 24 Mar 2016 17:12:52 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfa3745e5fd8 |
---|---|
1 /* bgzip.c -- Block compression/decompression utility. | |
2 | |
3 Copyright (C) 2008, 2009 Broad Institute / Massachusetts Institute of Technology | |
4 Copyright (C) 2010, 2013, 2014 Genome Research Ltd. | |
5 | |
6 Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 of this software and associated documentation files (the "Software"), to deal | |
8 in the Software without restriction, including without limitation the rights | |
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 copies of the Software, and to permit persons to whom the Software is | |
11 furnished to do so, subject to the following conditions: | |
12 | |
13 The above copyright notices and this permission notice shall be included in | |
14 all copies or substantial portions of the Software. | |
15 | |
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
22 THE SOFTWARE. | |
23 */ | |
24 | |
25 #include <stdlib.h> | |
26 #include <string.h> | |
27 #include <stdio.h> | |
28 #include <fcntl.h> | |
29 #include <unistd.h> | |
30 #include <errno.h> | |
31 #include <stdarg.h> | |
32 #include <getopt.h> | |
33 #include <sys/select.h> | |
34 #include <sys/stat.h> | |
35 #include "htslib/bgzf.h" | |
36 #include "htslib/hts.h" | |
37 | |
38 static const int WINDOW_SIZE = 64 * 1024; | |
39 | |
40 static void error(const char *format, ...) | |
41 { | |
42 va_list ap; | |
43 va_start(ap, format); | |
44 vfprintf(stderr, format, ap); | |
45 va_end(ap); | |
46 exit(EXIT_FAILURE); | |
47 } | |
48 | |
49 static int write_open(const char *fn, int is_forced) | |
50 { | |
51 int fd = -1; | |
52 char c; | |
53 if (!is_forced) { | |
54 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) { | |
55 fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn); | |
56 if ( scanf("%c", &c) != 1 ) c = 'n'; | |
57 if (c != 'Y' && c != 'y') { | |
58 fprintf(stderr, "[bgzip] not overwritten\n"); | |
59 exit(EXIT_FAILURE); | |
60 } | |
61 } | |
62 } | |
63 if (fd < 0) { | |
64 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) { | |
65 fprintf(stderr, "[bgzip] %s: Fail to write\n", fn); | |
66 exit(EXIT_FAILURE); | |
67 } | |
68 } | |
69 return fd; | |
70 } | |
71 | |
72 static int bgzip_main_usage(void) | |
73 { | |
74 fprintf(stderr, "\n"); | |
75 fprintf(stderr, "Version: %s\n", hts_version()); | |
76 fprintf(stderr, "Usage: bgzip [OPTIONS] [FILE] ...\n"); | |
77 fprintf(stderr, "Options:\n"); | |
78 fprintf(stderr, " -b, --offset INT decompress at virtual file pointer (0-based uncompressed offset)\n"); | |
79 fprintf(stderr, " -c, --stdout write on standard output, keep original files unchanged\n"); | |
80 fprintf(stderr, " -d, --decompress decompress\n"); | |
81 fprintf(stderr, " -f, --force overwrite files without asking\n"); | |
82 fprintf(stderr, " -h, --help give this help\n"); | |
83 fprintf(stderr, " -i, --index compress and create BGZF index\n"); | |
84 fprintf(stderr, " -I, --index-name FILE name of BGZF index file [file.gz.gzi]\n"); | |
85 fprintf(stderr, " -r, --reindex (re)index compressed file\n"); | |
86 fprintf(stderr, " -s, --size INT decompress INT bytes (uncompressed size)\n"); | |
87 fprintf(stderr, "\n"); | |
88 return 1; | |
89 } | |
90 | |
91 int main(int argc, char **argv) | |
92 { | |
93 int c, compress, pstdout, is_forced, index = 0, reindex = 0; | |
94 BGZF *fp; | |
95 void *buffer; | |
96 long start, end, size; | |
97 char *index_fname = NULL; | |
98 | |
99 static struct option loptions[] = | |
100 { | |
101 {"help",0,0,'h'}, | |
102 {"offset",1,0,'b'}, | |
103 {"stdout",0,0,'c'}, | |
104 {"decompress",0,0,'d'}, | |
105 {"force",0,0,'f'}, | |
106 {"index",0,0,'i'}, | |
107 {"index-name",1,0,'I'}, | |
108 {"reindex",0,0,'r'}, | |
109 {"size",1,0,'s'}, | |
110 {0,0,0,0} | |
111 }; | |
112 | |
113 compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; | |
114 while((c = getopt_long(argc, argv, "cdh?fb:s:iI:r",loptions,NULL)) >= 0){ | |
115 switch(c){ | |
116 case 'd': compress = 0; break; | |
117 case 'c': pstdout = 1; break; | |
118 case 'b': start = atol(optarg); compress = 0; pstdout = 1; break; | |
119 case 's': size = atol(optarg); pstdout = 1; break; | |
120 case 'f': is_forced = 1; break; | |
121 case 'i': index = 1; break; | |
122 case 'I': index_fname = optarg; break; | |
123 case 'r': reindex = 1; compress = 0; break; | |
124 case 'h': | |
125 case '?': return bgzip_main_usage(); | |
126 } | |
127 } | |
128 if (size >= 0) end = start + size; | |
129 if (end >= 0 && end < start) { | |
130 fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); | |
131 return 1; | |
132 } | |
133 if (compress == 1) { | |
134 struct stat sbuf; | |
135 int f_src = fileno(stdin); | |
136 int f_dst = fileno(stdout); | |
137 | |
138 if ( argc>optind ) | |
139 { | |
140 if ( stat(argv[optind],&sbuf)<0 ) | |
141 { | |
142 fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); | |
143 return 1; | |
144 } | |
145 | |
146 if ((f_src = open(argv[optind], O_RDONLY)) < 0) { | |
147 fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); | |
148 return 1; | |
149 } | |
150 | |
151 if (pstdout) | |
152 f_dst = fileno(stdout); | |
153 else | |
154 { | |
155 char *name = malloc(strlen(argv[optind]) + 5); | |
156 strcpy(name, argv[optind]); | |
157 strcat(name, ".gz"); | |
158 f_dst = write_open(name, is_forced); | |
159 free(name); | |
160 if (f_dst < 0) return 1; | |
161 } | |
162 } | |
163 else if (!pstdout && isatty(fileno((FILE *)stdout)) ) | |
164 return bgzip_main_usage(); | |
165 else if ( index && !index_fname ) | |
166 { | |
167 fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n"); | |
168 return 1; | |
169 } | |
170 | |
171 fp = bgzf_fdopen(f_dst, "w"); | |
172 if ( index ) bgzf_index_build_init(fp); | |
173 buffer = malloc(WINDOW_SIZE); | |
174 while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0) | |
175 if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode); | |
176 // f_dst will be closed here | |
177 if ( index ) | |
178 { | |
179 if ( index_fname ) bgzf_index_dump(fp, index_fname, NULL); | |
180 else bgzf_index_dump(fp, argv[optind], ".gz.gzi"); | |
181 } | |
182 if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode); | |
183 if (argc > optind && !pstdout) unlink(argv[optind]); | |
184 free(buffer); | |
185 close(f_src); | |
186 return 0; | |
187 } | |
188 else if ( reindex ) | |
189 { | |
190 if ( argc>optind ) | |
191 { | |
192 fp = bgzf_open(argv[optind], "r"); | |
193 if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]); | |
194 } | |
195 else | |
196 { | |
197 if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n"); | |
198 fp = bgzf_fdopen(fileno(stdin), "r"); | |
199 if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno)); | |
200 } | |
201 | |
202 buffer = malloc(BGZF_BLOCK_SIZE); | |
203 bgzf_index_build_init(fp); | |
204 int ret; | |
205 while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ; | |
206 free(buffer); | |
207 if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n"); | |
208 | |
209 if ( index_fname ) | |
210 bgzf_index_dump(fp, index_fname, NULL); | |
211 else | |
212 bgzf_index_dump(fp, argv[optind], ".gzi"); | |
213 | |
214 if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode); | |
215 return 0; | |
216 } | |
217 else | |
218 { | |
219 struct stat sbuf; | |
220 int f_dst; | |
221 | |
222 if ( argc>optind ) | |
223 { | |
224 if ( stat(argv[optind],&sbuf)<0 ) | |
225 { | |
226 fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); | |
227 return 1; | |
228 } | |
229 char *name; | |
230 int len = strlen(argv[optind]); | |
231 if ( strcmp(argv[optind]+len-3,".gz") ) | |
232 { | |
233 fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]); | |
234 return 1; | |
235 } | |
236 fp = bgzf_open(argv[optind], "r"); | |
237 if (fp == NULL) { | |
238 fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]); | |
239 return 1; | |
240 } | |
241 | |
242 if (pstdout) { | |
243 f_dst = fileno(stdout); | |
244 } | |
245 else { | |
246 name = strdup(argv[optind]); | |
247 name[strlen(name) - 3] = '\0'; | |
248 f_dst = write_open(name, is_forced); | |
249 free(name); | |
250 } | |
251 } | |
252 else if (!pstdout && isatty(fileno((FILE *)stdin)) ) | |
253 return bgzip_main_usage(); | |
254 else | |
255 { | |
256 f_dst = fileno(stdout); | |
257 fp = bgzf_fdopen(fileno(stdin), "r"); | |
258 if (fp == NULL) { | |
259 fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); | |
260 return 1; | |
261 } | |
262 } | |
263 buffer = malloc(WINDOW_SIZE); | |
264 if ( start>0 ) | |
265 { | |
266 if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]); | |
267 if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start); | |
268 } | |
269 while (1) { | |
270 if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); | |
271 else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); | |
272 if (c == 0) break; | |
273 if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode); | |
274 start += c; | |
275 if ( write(f_dst, buffer, c) != c ) error("Could not write %d bytes\n", c); | |
276 if (end >= 0 && start >= end) break; | |
277 } | |
278 free(buffer); | |
279 if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode); | |
280 if (!pstdout) unlink(argv[optind]); | |
281 return 0; | |
282 } | |
283 return 0; | |
284 } |