0
|
1 /* The MIT License
|
|
2
|
|
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
|
|
4
|
|
5 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6 of this software and associated documentation files (the "Software"), to deal
|
|
7 in the Software without restriction, including without limitation the rights
|
|
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9 copies of the Software, and to permit persons to whom the Software is
|
|
10 furnished to do so, subject to the following conditions:
|
|
11
|
|
12 The above copyright notice and this permission notice shall be included in
|
|
13 all copies or substantial portions of the Software.
|
|
14
|
|
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21 THE SOFTWARE.
|
|
22 */
|
|
23
|
|
24 #include <stdlib.h>
|
|
25 #include <string.h>
|
|
26 #include <stdio.h>
|
|
27 #include <fcntl.h>
|
|
28 #include <unistd.h>
|
|
29 #include <errno.h>
|
|
30 #include <sys/select.h>
|
|
31 #include <sys/stat.h>
|
|
32 #include "bgzf.h"
|
|
33
|
|
34 static const int WINDOW_SIZE = 64 * 1024;
|
|
35
|
|
36 static int bgzip_main_usage()
|
|
37 {
|
|
38 fprintf(stderr, "\n");
|
|
39 fprintf(stderr, "Usage: bgzip [options] [file] ...\n\n");
|
|
40 fprintf(stderr, "Options: -c write on standard output, keep original files unchanged\n");
|
|
41 fprintf(stderr, " -d decompress\n");
|
|
42 fprintf(stderr, " -f overwrite files without asking\n");
|
|
43 fprintf(stderr, " -b INT decompress at virtual file pointer INT\n");
|
|
44 fprintf(stderr, " -s INT decompress INT bytes in the uncompressed file\n");
|
|
45 fprintf(stderr, " -h give this help\n");
|
|
46 fprintf(stderr, "\n");
|
|
47 return 1;
|
|
48 }
|
|
49
|
|
50 static int write_open(const char *fn, int is_forced)
|
|
51 {
|
|
52 int fd = -1;
|
|
53 char c;
|
|
54 if (!is_forced) {
|
|
55 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) {
|
|
56 fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn);
|
|
57 scanf("%c", &c);
|
|
58 if (c != 'Y' && c != 'y') {
|
|
59 fprintf(stderr, "[bgzip] not overwritten\n");
|
|
60 exit(1);
|
|
61 }
|
|
62 }
|
|
63 }
|
|
64 if (fd < 0) {
|
|
65 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) {
|
|
66 fprintf(stderr, "[bgzip] %s: Fail to write\n", fn);
|
|
67 exit(1);
|
|
68 }
|
|
69 }
|
|
70 return fd;
|
|
71 }
|
|
72
|
|
73 static void fail(BGZF* fp)
|
|
74 {
|
|
75 fprintf(stderr, "Error: %s\n", fp->error);
|
|
76 exit(1);
|
|
77 }
|
|
78
|
|
79 int main(int argc, char **argv)
|
|
80 {
|
|
81 int c, compress, pstdout, is_forced;
|
|
82 BGZF *fp;
|
|
83 void *buffer;
|
|
84 long start, end, size;
|
|
85
|
|
86 compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
|
|
87 while((c = getopt(argc, argv, "cdhfb:s:")) >= 0){
|
|
88 switch(c){
|
|
89 case 'h': return bgzip_main_usage();
|
|
90 case 'd': compress = 0; break;
|
|
91 case 'c': pstdout = 1; break;
|
|
92 case 'b': start = atol(optarg); break;
|
|
93 case 's': size = atol(optarg); break;
|
|
94 case 'f': is_forced = 1; break;
|
|
95 }
|
|
96 }
|
|
97 if (size >= 0) end = start + size;
|
|
98 if (end >= 0 && end < start) {
|
|
99 fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
|
|
100 return 1;
|
|
101 }
|
|
102 if (compress == 1) {
|
|
103 struct stat sbuf;
|
|
104 int f_src = fileno(stdin);
|
|
105 int f_dst = fileno(stdout);
|
|
106
|
|
107 if ( argc>optind )
|
|
108 {
|
|
109 if ( stat(argv[optind],&sbuf)<0 )
|
|
110 {
|
|
111 fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
|
|
112 return 1;
|
|
113 }
|
|
114
|
|
115 if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
|
|
116 fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
|
|
117 return 1;
|
|
118 }
|
|
119
|
|
120 if (pstdout)
|
|
121 f_dst = fileno(stdout);
|
|
122 else
|
|
123 {
|
|
124 char *name = malloc(strlen(argv[optind]) + 5);
|
|
125 strcpy(name, argv[optind]);
|
|
126 strcat(name, ".gz");
|
|
127 f_dst = write_open(name, is_forced);
|
|
128 if (f_dst < 0) return 1;
|
|
129 free(name);
|
|
130 }
|
|
131 }
|
|
132 else if (!pstdout && isatty(fileno((FILE *)stdout)) )
|
|
133 return bgzip_main_usage();
|
|
134
|
|
135 fp = bgzf_fdopen(f_dst, "w");
|
|
136 buffer = malloc(WINDOW_SIZE);
|
|
137 while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
|
|
138 if (bgzf_write(fp, buffer, c) < 0) fail(fp);
|
|
139 // f_dst will be closed here
|
|
140 if (bgzf_close(fp) < 0) fail(fp);
|
|
141 if (argc > optind && !pstdout) unlink(argv[optind]);
|
|
142 free(buffer);
|
|
143 close(f_src);
|
|
144 return 0;
|
|
145 } else {
|
|
146 struct stat sbuf;
|
|
147 int f_dst;
|
|
148
|
|
149 if ( argc>optind )
|
|
150 {
|
|
151 if ( stat(argv[optind],&sbuf)<0 )
|
|
152 {
|
|
153 fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
|
|
154 return 1;
|
|
155 }
|
|
156 char *name;
|
|
157 int len = strlen(argv[optind]);
|
|
158 if ( strcmp(argv[optind]+len-3,".gz") )
|
|
159 {
|
|
160 fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
|
|
161 return 1;
|
|
162 }
|
|
163 fp = bgzf_open(argv[optind], "r");
|
|
164 if (fp == NULL) {
|
|
165 fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
|
|
166 return 1;
|
|
167 }
|
|
168
|
|
169 if (pstdout) {
|
|
170 f_dst = fileno(stdout);
|
|
171 }
|
|
172 else {
|
|
173 name = strdup(argv[optind]);
|
|
174 name[strlen(name) - 3] = '\0';
|
|
175 f_dst = write_open(name, is_forced);
|
|
176 free(name);
|
|
177 }
|
|
178 }
|
|
179 else if (!pstdout && isatty(fileno((FILE *)stdin)) )
|
|
180 return bgzip_main_usage();
|
|
181 else
|
|
182 {
|
|
183 f_dst = fileno(stdout);
|
|
184 fp = bgzf_fdopen(fileno(stdin), "r");
|
|
185 if (fp == NULL) {
|
|
186 fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
|
|
187 return 1;
|
|
188 }
|
|
189 }
|
|
190 buffer = malloc(WINDOW_SIZE);
|
|
191 if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp);
|
|
192 while (1) {
|
|
193 if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
|
|
194 else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
|
|
195 if (c == 0) break;
|
|
196 if (c < 0) fail(fp);
|
|
197 start += c;
|
|
198 write(f_dst, buffer, c);
|
|
199 if (end >= 0 && start >= end) break;
|
|
200 }
|
|
201 free(buffer);
|
|
202 if (bgzf_close(fp) < 0) fail(fp);
|
|
203 if (!pstdout) unlink(argv[optind]);
|
|
204 return 0;
|
|
205 }
|
|
206 }
|