0
|
1 /* tabix.c -- Generic indexer for TAB-delimited genome position files.
|
|
2
|
|
3 Copyright (C) 2009-2011 Broad Institute.
|
|
4 Copyright (C) 2010-2012, 2014 Genome Research Ltd.
|
|
5
|
|
6 Author: Heng Li <lh3@sanger.ac.uk>
|
|
7
|
|
8 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
9 of this software and associated documentation files (the "Software"), to deal
|
|
10 in the Software without restriction, including without limitation the rights
|
|
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
12 copies of the Software, and to permit persons to whom the Software is
|
|
13 furnished to do so, subject to the following conditions:
|
|
14
|
|
15 The above copyright notice and this permission notice shall be included in
|
|
16 all copies or substantial portions of the Software.
|
|
17
|
|
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
24 DEALINGS IN THE SOFTWARE. */
|
|
25
|
|
26 #include <stdio.h>
|
|
27 #include <stdlib.h>
|
|
28 #include <unistd.h>
|
|
29 #include <string.h>
|
|
30 #include <getopt.h>
|
|
31 #include <sys/types.h>
|
|
32 #include <sys/stat.h>
|
|
33 #include <errno.h>
|
|
34 #include "htslib/tbx.h"
|
|
35 #include "htslib/sam.h"
|
|
36 #include "htslib/vcf.h"
|
|
37 #include "htslib/kseq.h"
|
|
38 #include "htslib/bgzf.h"
|
|
39 #include "htslib/hts.h"
|
|
40 #include "htslib/regidx.h"
|
|
41
|
|
42 typedef struct
|
|
43 {
|
|
44 char *regions_fname, *targets_fname;
|
|
45 int print_header, header_only;
|
|
46 }
|
|
47 args_t;
|
|
48
|
|
49 static void error(const char *format, ...)
|
|
50 {
|
|
51 va_list ap;
|
|
52 va_start(ap, format);
|
|
53 vfprintf(stderr, format, ap);
|
|
54 va_end(ap);
|
|
55 exit(EXIT_FAILURE);
|
|
56 }
|
|
57
|
|
58 #define IS_GFF (1<<0)
|
|
59 #define IS_BED (1<<1)
|
|
60 #define IS_SAM (1<<2)
|
|
61 #define IS_VCF (1<<3)
|
|
62 #define IS_BCF (1<<4)
|
|
63 #define IS_BAM (1<<5)
|
|
64 #define IS_CRAM (1<<6)
|
|
65 #define IS_TXT (IS_GFF|IS_BED|IS_SAM|IS_VCF)
|
|
66
|
|
67 int file_type(const char *fname)
|
|
68 {
|
|
69 int l = strlen(fname);
|
|
70 int strcasecmp(const char *s1, const char *s2);
|
|
71 if (l>=7 && strcasecmp(fname+l-7, ".gff.gz") == 0) return IS_GFF;
|
|
72 else if (l>=7 && strcasecmp(fname+l-7, ".bed.gz") == 0) return IS_BED;
|
|
73 else if (l>=7 && strcasecmp(fname+l-7, ".sam.gz") == 0) return IS_SAM;
|
|
74 else if (l>=7 && strcasecmp(fname+l-7, ".vcf.gz") == 0) return IS_VCF;
|
|
75 else if (l>=4 && strcasecmp(fname+l-4, ".bcf") == 0) return IS_BCF;
|
|
76 else if (l>=4 && strcasecmp(fname+l-4, ".bam") == 0) return IS_BAM;
|
|
77 else if (l>=4 && strcasecmp(fname+l-5, ".cram") == 0) return IS_CRAM;
|
|
78
|
|
79 htsFile *fp = hts_open(fname,"r");
|
|
80 enum htsExactFormat format = fp->format.format;
|
|
81 hts_close(fp);
|
|
82 if ( format == bcf ) return IS_BCF;
|
|
83 if ( format == bam ) return IS_BAM;
|
|
84 if ( format == cram ) return IS_CRAM;
|
|
85 if ( format == vcf ) return IS_VCF;
|
|
86
|
|
87 return 0;
|
|
88 }
|
|
89
|
|
90 static char **parse_regions(char *regions_fname, char **argv, int argc, int *nregs)
|
|
91 {
|
|
92 kstring_t str = {0,0,0};
|
|
93 int iseq = 0, ireg = 0;
|
|
94 char **regs = NULL;
|
|
95 *nregs = argc;
|
|
96
|
|
97 if ( regions_fname )
|
|
98 {
|
|
99 // improve me: this is a too heavy machinery for parsing regions...
|
|
100
|
|
101 regidx_t *idx = regidx_init(regions_fname, NULL, NULL, 0, NULL);
|
|
102 if ( !idx ) error("Could not read %s\n", regions_fname);
|
|
103
|
|
104 (*nregs) += regidx_nregs(idx);
|
|
105 regs = (char**) malloc(sizeof(char*)*(*nregs));
|
|
106
|
|
107 int nseq;
|
|
108 char **seqs = regidx_seq_names(idx, &nseq);
|
|
109 for (iseq=0; iseq<nseq; iseq++)
|
|
110 {
|
|
111 regitr_t itr;
|
|
112 regidx_overlap(idx, seqs[iseq], 0, UINT32_MAX, &itr);
|
|
113 while ( itr.i < itr.n )
|
|
114 {
|
|
115 str.l = 0;
|
|
116 ksprintf(&str, "%s:%d-%d", seqs[iseq], REGITR_START(itr)+1, REGITR_END(itr)+1);
|
|
117 regs[ireg++] = strdup(str.s);
|
|
118 itr.i++;
|
|
119 }
|
|
120 }
|
|
121 regidx_destroy(idx);
|
|
122 }
|
|
123 free(str.s);
|
|
124
|
|
125 if ( !ireg )
|
|
126 {
|
|
127 if ( argc )
|
|
128 regs = (char**) malloc(sizeof(char*)*argc);
|
|
129 else
|
|
130 {
|
|
131 regs = (char**) malloc(sizeof(char*));
|
|
132 regs[0] = strdup(".");
|
|
133 *nregs = 1;
|
|
134 }
|
|
135 }
|
|
136
|
|
137 for (iseq=0; iseq<argc; iseq++) regs[ireg++] = strdup(argv[iseq]);
|
|
138 return regs;
|
|
139 }
|
|
140 static int query_regions(args_t *args, char *fname, char **regs, int nregs)
|
|
141 {
|
|
142 int i;
|
|
143 htsFile *fp = hts_open(fname,"r");
|
|
144 if ( !fp ) error("Could not read %s\n", fname);
|
|
145 enum htsExactFormat format = hts_get_format(fp)->format;
|
|
146
|
|
147 regidx_t *reg_idx = NULL;
|
|
148 if ( args->targets_fname )
|
|
149 {
|
|
150 reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL);
|
|
151 if ( !reg_idx ) error("Could not read %s\n", args->targets_fname);
|
|
152 }
|
|
153
|
|
154 if ( format == bcf )
|
|
155 {
|
|
156 htsFile *out = hts_open("-","w");
|
|
157 if ( !out ) error("Could not open stdout\n", fname);
|
|
158 hts_idx_t *idx = bcf_index_load(fname);
|
|
159 if ( !idx ) error("Could not load .csi index of %s\n", fname);
|
|
160 bcf_hdr_t *hdr = bcf_hdr_read(fp);
|
|
161 if ( !hdr ) error("Could not read the header: %s\n", fname);
|
|
162 if ( args->print_header )
|
|
163 bcf_hdr_write(out,hdr);
|
|
164 if ( !args->header_only )
|
|
165 {
|
|
166 bcf1_t *rec = bcf_init();
|
|
167 for (i=0; i<nregs; i++)
|
|
168 {
|
|
169 hts_itr_t *itr = bcf_itr_querys(idx,hdr,regs[i]);
|
|
170 while ( bcf_itr_next(fp, itr, rec) >=0 )
|
|
171 {
|
|
172 if ( reg_idx && !regidx_overlap(reg_idx, bcf_seqname(hdr,rec),rec->pos,rec->pos+rec->rlen-1, NULL) ) continue;
|
|
173 bcf_write(out,hdr,rec);
|
|
174 }
|
|
175 tbx_itr_destroy(itr);
|
|
176 }
|
|
177 bcf_destroy(rec);
|
|
178 }
|
|
179 if ( hts_close(out) ) error("hts_close returned non-zero status for stdout\n");
|
|
180 bcf_hdr_destroy(hdr);
|
|
181 hts_idx_destroy(idx);
|
|
182 }
|
|
183 else if ( format==vcf || format==sam || format==unknown_format )
|
|
184 {
|
|
185 tbx_t *tbx = tbx_index_load(fname);
|
|
186 if ( !tbx ) error("Could not load .tbi/.csi index of %s\n", fname);
|
|
187 kstring_t str = {0,0,0};
|
|
188 if ( args->print_header )
|
|
189 {
|
|
190 while ( hts_getline(fp, KS_SEP_LINE, &str) >= 0 )
|
|
191 {
|
|
192 if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break;
|
|
193 puts(str.s);
|
|
194 }
|
|
195 }
|
|
196 if ( !args->header_only )
|
|
197 {
|
|
198 int nseq;
|
|
199 const char **seq = NULL;
|
|
200 if ( reg_idx ) seq = tbx_seqnames(tbx, &nseq);
|
|
201 for (i=0; i<nregs; i++)
|
|
202 {
|
|
203 hts_itr_t *itr = tbx_itr_querys(tbx, regs[i]);
|
|
204 if ( !itr ) continue;
|
|
205 while (tbx_itr_next(fp, tbx, itr, &str) >= 0)
|
|
206 {
|
|
207 if ( reg_idx && !regidx_overlap(reg_idx,seq[itr->curr_tid],itr->curr_beg,itr->curr_end, NULL) ) continue;
|
|
208 puts(str.s);
|
|
209 }
|
|
210 tbx_itr_destroy(itr);
|
|
211 }
|
|
212 free(seq);
|
|
213 }
|
|
214 free(str.s);
|
|
215 tbx_destroy(tbx);
|
|
216 }
|
|
217 else if ( format==bam )
|
|
218 error("Please use \"samtools view\" for querying BAM files.\n");
|
|
219
|
|
220 if ( reg_idx ) regidx_destroy(reg_idx);
|
|
221 if ( hts_close(fp) ) error("hts_close returned non-zero status: %s\n", fname);
|
|
222
|
|
223 for (i=0; i<nregs; i++) free(regs[i]);
|
|
224 free(regs);
|
|
225 return 0;
|
|
226 }
|
|
227 static int query_chroms(char *fname)
|
|
228 {
|
|
229 const char **seq;
|
|
230 int i, nseq, ftype = file_type(fname);
|
|
231 if ( ftype & IS_TXT || !ftype )
|
|
232 {
|
|
233 tbx_t *tbx = tbx_index_load(fname);
|
|
234 if ( !tbx ) error("Could not load .tbi index of %s\n", fname);
|
|
235 seq = tbx_seqnames(tbx, &nseq);
|
|
236 for (i=0; i<nseq; i++)
|
|
237 printf("%s\n", seq[i]);
|
|
238 free(seq);
|
|
239 tbx_destroy(tbx);
|
|
240 }
|
|
241 else if ( ftype==IS_BCF )
|
|
242 {
|
|
243 htsFile *fp = hts_open(fname,"r");
|
|
244 if ( !fp ) error("Could not read %s\n", fname);
|
|
245 bcf_hdr_t *hdr = bcf_hdr_read(fp);
|
|
246 if ( !hdr ) error("Could not read the header: %s\n", fname);
|
|
247 hts_close(fp);
|
|
248 hts_idx_t *idx = bcf_index_load(fname);
|
|
249 if ( !idx ) error("Could not load .csi index of %s\n", fname);
|
|
250 seq = bcf_index_seqnames(idx, hdr, &nseq);
|
|
251 for (i=0; i<nseq; i++)
|
|
252 printf("%s\n", seq[i]);
|
|
253 free(seq);
|
|
254 bcf_hdr_destroy(hdr);
|
|
255 hts_idx_destroy(idx);
|
|
256 }
|
|
257 else if ( ftype==IS_BAM ) // todo: BAM
|
|
258 error("BAM: todo\n");
|
|
259 return 0;
|
|
260 }
|
|
261
|
|
262 int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *conf)
|
|
263 {
|
|
264 if ( ftype & IS_TXT || !ftype )
|
|
265 {
|
|
266 BGZF *fp = bgzf_open(fname,"r");
|
|
267 if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length ) return -1;
|
|
268
|
|
269 char *buffer = fp->uncompressed_block;
|
|
270 int skip_until = 0;
|
|
271
|
|
272 // Skip the header: find out the position of the data block
|
|
273 if ( buffer[0]==conf->meta_char )
|
|
274 {
|
|
275 skip_until = 1;
|
|
276 while (1)
|
|
277 {
|
|
278 if ( buffer[skip_until]=='\n' )
|
|
279 {
|
|
280 skip_until++;
|
|
281 if ( skip_until>=fp->block_length )
|
|
282 {
|
|
283 if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", fname);
|
|
284 skip_until = 0;
|
|
285 }
|
|
286 // The header has finished
|
|
287 if ( buffer[skip_until]!=conf->meta_char ) break;
|
|
288 }
|
|
289 skip_until++;
|
|
290 if ( skip_until>=fp->block_length )
|
|
291 {
|
|
292 if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", fname);
|
|
293 skip_until = 0;
|
|
294 }
|
|
295 }
|
|
296 }
|
|
297
|
|
298 // Output the new header
|
|
299 FILE *hdr = fopen(header,"r");
|
|
300 if ( !hdr ) error("%s: %s", header,strerror(errno));
|
|
301 int page_size = getpagesize();
|
|
302 char *buf = valloc(page_size);
|
|
303 BGZF *bgzf_out = bgzf_dopen(fileno(stdout), "w");
|
|
304 ssize_t nread;
|
|
305 while ( (nread=fread(buf,1,page_size-1,hdr))>0 )
|
|
306 {
|
|
307 if ( nread<page_size-1 && buf[nread-1]!='\n' ) buf[nread++] = '\n';
|
|
308 if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %d\n",bgzf_out->errcode);
|
|
309 }
|
|
310 if ( fclose(hdr) ) error("close failed: %s\n", header);
|
|
311
|
|
312 // Output all remainig data read with the header block
|
|
313 if ( fp->block_length - skip_until > 0 )
|
|
314 {
|
|
315 if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) error("Error: %d\n",fp->errcode);
|
|
316 }
|
|
317 if (bgzf_flush(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
|
|
318
|
|
319 while (1)
|
|
320 {
|
|
321 nread = bgzf_raw_read(fp, buf, page_size);
|
|
322 if ( nread<=0 ) break;
|
|
323
|
|
324 int count = bgzf_raw_write(bgzf_out, buf, nread);
|
|
325 if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
|
|
326 }
|
|
327 if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
|
|
328 if (bgzf_close(fp) < 0) error("Error: %d\n",fp->errcode);
|
|
329 }
|
|
330 else
|
|
331 error("todo: reheader BCF, BAM\n"); // BCF is difficult, records contain pointers to the header.
|
|
332 return 0;
|
|
333 }
|
|
334
|
|
335 static int usage(void)
|
|
336 {
|
|
337 fprintf(stderr, "\n");
|
|
338 fprintf(stderr, "Version: %s\n", hts_version());
|
|
339 fprintf(stderr, "Usage: tabix [OPTIONS] [FILE] [REGION [...]]\n");
|
|
340 fprintf(stderr, "\n");
|
|
341 fprintf(stderr, "Indexing Options:\n");
|
|
342 fprintf(stderr, " -0, --zero-based coordinates are zero-based\n");
|
|
343 fprintf(stderr, " -b, --begin INT column number for region start [4]\n");
|
|
344 fprintf(stderr, " -c, --comment CHAR skip comment lines starting with CHAR [null]\n");
|
|
345 fprintf(stderr, " -C, --csi generate CSI index for VCF (default is TBI)\n");
|
|
346 fprintf(stderr, " -e, --end INT column number for region end (if no end, set INT to -b) [5]\n");
|
|
347 fprintf(stderr, " -f, --force overwrite existing index without asking\n");
|
|
348 fprintf(stderr, " -m, --min-shift INT set minimal interval size for CSI indices to 2^INT [14]\n");
|
|
349 fprintf(stderr, " -p, --preset STR gff, bed, sam, vcf\n");
|
|
350 fprintf(stderr, " -s, --sequence INT column number for sequence names (suppressed by -p) [1]\n");
|
|
351 fprintf(stderr, " -S, --skip-lines INT skip first INT lines [0]\n");
|
|
352 fprintf(stderr, "\n");
|
|
353 fprintf(stderr, "Querying and other options:\n");
|
|
354 fprintf(stderr, " -h, --print-header print also the header lines\n");
|
|
355 fprintf(stderr, " -H, --only-header print only the header lines\n");
|
|
356 fprintf(stderr, " -l, --list-chroms list chromosome names\n");
|
|
357 fprintf(stderr, " -r, --reheader FILE replace the header with the content of FILE\n");
|
|
358 fprintf(stderr, " -R, --regions FILE restrict to regions listed in the file\n");
|
|
359 fprintf(stderr, " -T, --targets FILE similar to -R but streams rather than index-jumps\n");
|
|
360 fprintf(stderr, "\n");
|
|
361 return 1;
|
|
362 }
|
|
363
|
|
364 int main(int argc, char *argv[])
|
|
365 {
|
|
366 int c, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0;
|
|
367 tbx_conf_t conf = tbx_conf_gff, *conf_ptr = NULL;
|
|
368 char *reheader = NULL;
|
|
369 args_t args;
|
|
370 memset(&args,0,sizeof(args_t));
|
|
371
|
|
372 static struct option loptions[] =
|
|
373 {
|
|
374 {"help",0,0,'h'},
|
|
375 {"regions",1,0,'R'},
|
|
376 {"targets",1,0,'T'},
|
|
377 {"csi",0,0,'C'},
|
|
378 {"zero-based",0,0,'0'},
|
|
379 {"print-header",0,0,'h'},
|
|
380 {"only-header",0,0,'H'},
|
|
381 {"begin",1,0,'b'},
|
|
382 {"comment",1,0,'c'},
|
|
383 {"end",1,0,'e'},
|
|
384 {"force",0,0,'f'},
|
|
385 {"preset",1,0,'p'},
|
|
386 {"sequence",1,0,'s'},
|
|
387 {"skip-lines",1,0,'S'},
|
|
388 {"list-chroms",0,0,'l'},
|
|
389 {"reheader",1,0,'r'},
|
|
390 {0,0,0,0}
|
|
391 };
|
|
392
|
|
393 while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:", loptions,NULL)) >= 0)
|
|
394 {
|
|
395 switch (c)
|
|
396 {
|
|
397 case 'R': args.regions_fname = optarg; break;
|
|
398 case 'T': args.targets_fname = optarg; break;
|
|
399 case 'C': do_csi = 1; break;
|
|
400 case 'r': reheader = optarg; break;
|
|
401 case 'h': args.print_header = 1; break;
|
|
402 case 'H': args.header_only = 1; break;
|
|
403 case 'l': list_chroms = 1; break;
|
|
404 case '0': conf.preset |= TBX_UCSC; break;
|
|
405 case 'b': conf.bc = atoi(optarg); break;
|
|
406 case 'e': conf.ec = atoi(optarg); break;
|
|
407 case 'c': conf.meta_char = *optarg; break;
|
|
408 case 'f': is_force = 1; break;
|
|
409 case 'm': min_shift = atoi(optarg); break;
|
|
410 case 'p':
|
|
411 if (strcmp(optarg, "gff") == 0) conf_ptr = &tbx_conf_gff;
|
|
412 else if (strcmp(optarg, "bed") == 0) conf_ptr = &tbx_conf_bed;
|
|
413 else if (strcmp(optarg, "sam") == 0) conf_ptr = &tbx_conf_sam;
|
|
414 else if (strcmp(optarg, "vcf") == 0) conf_ptr = &tbx_conf_vcf;
|
|
415 else if (strcmp(optarg, "bcf") == 0) ; // bcf is autodetected, preset is not needed
|
|
416 else if (strcmp(optarg, "bam") == 0) ; // same as bcf
|
|
417 else error("The preset string not recognised: '%s'\n", optarg);
|
|
418 break;
|
|
419 case 's': conf.sc = atoi(optarg); break;
|
|
420 case 'S': conf.line_skip = atoi(optarg); break;
|
|
421 default: return usage();
|
|
422 }
|
|
423 }
|
|
424
|
|
425 if ( optind==argc ) return usage();
|
|
426
|
|
427 if ( list_chroms )
|
|
428 return query_chroms(argv[optind]);
|
|
429
|
|
430 if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname )
|
|
431 {
|
|
432 int nregs = 0;
|
|
433 char **regs = NULL;
|
|
434 if ( !args.header_only )
|
|
435 regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs);
|
|
436 return query_regions(&args, argv[optind], regs, nregs);
|
|
437 }
|
|
438
|
|
439 char *fname = argv[optind];
|
|
440 int ftype = file_type(fname);
|
|
441 if ( !conf_ptr ) // no preset given
|
|
442 {
|
|
443 if ( ftype==IS_GFF ) conf_ptr = &tbx_conf_gff;
|
|
444 else if ( ftype==IS_BED ) conf_ptr = &tbx_conf_bed;
|
|
445 else if ( ftype==IS_SAM ) conf_ptr = &tbx_conf_sam;
|
|
446 else if ( ftype==IS_VCF )
|
|
447 {
|
|
448 conf_ptr = &tbx_conf_vcf;
|
|
449 if ( !min_shift && do_csi ) min_shift = 14;
|
|
450 }
|
|
451 else if ( ftype==IS_BCF )
|
|
452 {
|
|
453 if ( !min_shift ) min_shift = 14;
|
|
454 }
|
|
455 else if ( ftype==IS_BAM )
|
|
456 {
|
|
457 if ( !min_shift ) min_shift = 14;
|
|
458 }
|
|
459 }
|
|
460 if ( do_csi )
|
|
461 {
|
|
462 if ( !min_shift ) min_shift = 14;
|
|
463 min_shift *= do_csi; // positive for CSIv2, negative for CSIv1
|
|
464 }
|
|
465 if ( min_shift!=0 && !do_csi ) do_csi = 1;
|
|
466
|
|
467 if ( reheader )
|
|
468 return reheader_file(fname, reheader, ftype, conf_ptr);
|
|
469
|
|
470 if ( conf_ptr )
|
|
471 conf = *conf_ptr;
|
|
472
|
|
473 char *suffix = ".tbi";
|
|
474 if ( do_csi ) suffix = ".csi";
|
|
475 else if ( ftype==IS_BAM ) suffix = ".bai";
|
|
476 else if ( ftype==IS_CRAM ) suffix = ".crai";
|
|
477
|
|
478 char *idx_fname = calloc(strlen(fname) + 5, 1);
|
|
479 strcat(strcpy(idx_fname, fname), suffix);
|
|
480
|
|
481 struct stat stat_tbi, stat_file;
|
|
482 if ( !is_force && stat(idx_fname, &stat_tbi)==0 )
|
|
483 {
|
|
484 // Before complaining about existing index, check if the VCF file isn't
|
|
485 // newer. This is a common source of errors, people tend not to notice
|
|
486 // that tabix failed
|
|
487 stat(fname, &stat_file);
|
|
488 if ( stat_file.st_mtime <= stat_tbi.st_mtime )
|
|
489 error("[tabix] the index file exists. Please use '-f' to overwrite.\n");
|
|
490 }
|
|
491 free(idx_fname);
|
|
492
|
|
493 if ( ftype==IS_CRAM )
|
|
494 {
|
|
495 if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname);
|
|
496 return 0;
|
|
497 }
|
|
498 else if ( do_csi )
|
|
499 {
|
|
500 if ( ftype==IS_BCF )
|
|
501 {
|
|
502 if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname);
|
|
503 return 0;
|
|
504 }
|
|
505 if ( ftype==IS_BAM )
|
|
506 {
|
|
507 if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname);
|
|
508 return 0;
|
|
509 }
|
|
510 if ( tbx_index_build(fname, min_shift, &conf)!=0 ) error("tbx_index_build failed: %s\n", fname);
|
|
511 return 0;
|
|
512 }
|
|
513 else // TBI index
|
|
514 {
|
|
515 if ( tbx_index_build(fname, min_shift, &conf) ) error("tbx_index_build failed: %s\n", fname);
|
|
516 return 0;
|
|
517 }
|
|
518 return 0;
|
|
519 }
|