annotate ezBAMQC/src/htslib/tabix.c @ 5:0c5c414c3407

Uploaded
author cshl-bsr
date Tue, 29 Mar 2016 15:33:54 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /* tabix.c -- Generic indexer for TAB-delimited genome position files.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Copyright (C) 2009-2011 Broad Institute.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4 Copyright (C) 2010-2012, 2014 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6 Author: Heng Li <lh3@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 Permission is hereby granted, free of charge, to any person obtaining a copy
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 of this software and associated documentation files (the "Software"), to deal
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10 in the Software without restriction, including without limitation the rights
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 copies of the Software, and to permit persons to whom the Software is
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13 furnished to do so, subject to the following conditions:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 The above copyright notice and this permission notice shall be included in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16 all copies or substantial portions of the Software.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24 DEALINGS IN THE SOFTWARE. */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 #include <stdio.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 #include <stdlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 #include <unistd.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 #include <string.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30 #include <getopt.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 #include <sys/types.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 #include <sys/stat.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 #include <errno.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34 #include "htslib/tbx.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35 #include "htslib/sam.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 #include "htslib/vcf.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37 #include "htslib/kseq.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 #include "htslib/bgzf.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39 #include "htslib/hts.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 #include "htslib/regidx.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 typedef struct
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 char *regions_fname, *targets_fname;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 int print_header, header_only;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 args_t;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 static void error(const char *format, ...)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 va_list ap;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 va_start(ap, format);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 vfprintf(stderr, format, ap);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54 va_end(ap);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 exit(EXIT_FAILURE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 #define IS_GFF (1<<0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 #define IS_BED (1<<1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 #define IS_SAM (1<<2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 #define IS_VCF (1<<3)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62 #define IS_BCF (1<<4)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 #define IS_BAM (1<<5)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 #define IS_CRAM (1<<6)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65 #define IS_TXT (IS_GFF|IS_BED|IS_SAM|IS_VCF)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 int file_type(const char *fname)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 int l = strlen(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 int strcasecmp(const char *s1, const char *s2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 if (l>=7 && strcasecmp(fname+l-7, ".gff.gz") == 0) return IS_GFF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 else if (l>=7 && strcasecmp(fname+l-7, ".bed.gz") == 0) return IS_BED;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 else if (l>=7 && strcasecmp(fname+l-7, ".sam.gz") == 0) return IS_SAM;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 else if (l>=7 && strcasecmp(fname+l-7, ".vcf.gz") == 0) return IS_VCF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75 else if (l>=4 && strcasecmp(fname+l-4, ".bcf") == 0) return IS_BCF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 else if (l>=4 && strcasecmp(fname+l-4, ".bam") == 0) return IS_BAM;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 else if (l>=4 && strcasecmp(fname+l-5, ".cram") == 0) return IS_CRAM;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79 htsFile *fp = hts_open(fname,"r");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 enum htsExactFormat format = fp->format.format;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 hts_close(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 if ( format == bcf ) return IS_BCF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 if ( format == bam ) return IS_BAM;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84 if ( format == cram ) return IS_CRAM;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 if ( format == vcf ) return IS_VCF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 static char **parse_regions(char *regions_fname, char **argv, int argc, int *nregs)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92 kstring_t str = {0,0,0};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93 int iseq = 0, ireg = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 char **regs = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 *nregs = argc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 if ( regions_fname )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 // improve me: this is a too heavy machinery for parsing regions...
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 regidx_t *idx = regidx_init(regions_fname, NULL, NULL, 0, NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 if ( !idx ) error("Could not read %s\n", regions_fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 (*nregs) += regidx_nregs(idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 regs = (char**) malloc(sizeof(char*)*(*nregs));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 int nseq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108 char **seqs = regidx_seq_names(idx, &nseq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 for (iseq=0; iseq<nseq; iseq++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111 regitr_t itr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 regidx_overlap(idx, seqs[iseq], 0, UINT32_MAX, &itr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 while ( itr.i < itr.n )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115 str.l = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116 ksprintf(&str, "%s:%d-%d", seqs[iseq], REGITR_START(itr)+1, REGITR_END(itr)+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 regs[ireg++] = strdup(str.s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118 itr.i++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 regidx_destroy(idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123 free(str.s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 if ( !ireg )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127 if ( argc )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128 regs = (char**) malloc(sizeof(char*)*argc);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131 regs = (char**) malloc(sizeof(char*));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 regs[0] = strdup(".");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133 *nregs = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137 for (iseq=0; iseq<argc; iseq++) regs[ireg++] = strdup(argv[iseq]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 return regs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 static int query_regions(args_t *args, char *fname, char **regs, int nregs)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143 htsFile *fp = hts_open(fname,"r");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144 if ( !fp ) error("Could not read %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145 enum htsExactFormat format = hts_get_format(fp)->format;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 regidx_t *reg_idx = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
148 if ( args->targets_fname )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
149 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
150 reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
151 if ( !reg_idx ) error("Could not read %s\n", args->targets_fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
152 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
153
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
154 if ( format == bcf )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
155 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
156 htsFile *out = hts_open("-","w");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
157 if ( !out ) error("Could not open stdout\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
158 hts_idx_t *idx = bcf_index_load(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
159 if ( !idx ) error("Could not load .csi index of %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
160 bcf_hdr_t *hdr = bcf_hdr_read(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
161 if ( !hdr ) error("Could not read the header: %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
162 if ( args->print_header )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
163 bcf_hdr_write(out,hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
164 if ( !args->header_only )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
165 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
166 bcf1_t *rec = bcf_init();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
167 for (i=0; i<nregs; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
168 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
169 hts_itr_t *itr = bcf_itr_querys(idx,hdr,regs[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
170 while ( bcf_itr_next(fp, itr, rec) >=0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
171 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
172 if ( reg_idx && !regidx_overlap(reg_idx, bcf_seqname(hdr,rec),rec->pos,rec->pos+rec->rlen-1, NULL) ) continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
173 bcf_write(out,hdr,rec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
174 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
175 tbx_itr_destroy(itr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
176 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
177 bcf_destroy(rec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
178 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
179 if ( hts_close(out) ) error("hts_close returned non-zero status for stdout\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
180 bcf_hdr_destroy(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
181 hts_idx_destroy(idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
182 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
183 else if ( format==vcf || format==sam || format==unknown_format )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
184 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
185 tbx_t *tbx = tbx_index_load(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
186 if ( !tbx ) error("Could not load .tbi/.csi index of %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
187 kstring_t str = {0,0,0};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
188 if ( args->print_header )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
189 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
190 while ( hts_getline(fp, KS_SEP_LINE, &str) >= 0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
191 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
192 if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
193 puts(str.s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
194 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
195 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
196 if ( !args->header_only )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
197 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
198 int nseq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
199 const char **seq = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
200 if ( reg_idx ) seq = tbx_seqnames(tbx, &nseq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
201 for (i=0; i<nregs; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
202 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
203 hts_itr_t *itr = tbx_itr_querys(tbx, regs[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
204 if ( !itr ) continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
205 while (tbx_itr_next(fp, tbx, itr, &str) >= 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
206 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
207 if ( reg_idx && !regidx_overlap(reg_idx,seq[itr->curr_tid],itr->curr_beg,itr->curr_end, NULL) ) continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
208 puts(str.s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
209 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
210 tbx_itr_destroy(itr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
211 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
212 free(seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
213 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
214 free(str.s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
215 tbx_destroy(tbx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
216 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
217 else if ( format==bam )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
218 error("Please use \"samtools view\" for querying BAM files.\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
219
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
220 if ( reg_idx ) regidx_destroy(reg_idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
221 if ( hts_close(fp) ) error("hts_close returned non-zero status: %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
222
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
223 for (i=0; i<nregs; i++) free(regs[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
224 free(regs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
225 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
226 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
227 static int query_chroms(char *fname)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
228 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
229 const char **seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
230 int i, nseq, ftype = file_type(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
231 if ( ftype & IS_TXT || !ftype )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
232 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
233 tbx_t *tbx = tbx_index_load(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
234 if ( !tbx ) error("Could not load .tbi index of %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
235 seq = tbx_seqnames(tbx, &nseq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
236 for (i=0; i<nseq; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
237 printf("%s\n", seq[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
238 free(seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
239 tbx_destroy(tbx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
240 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
241 else if ( ftype==IS_BCF )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
242 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
243 htsFile *fp = hts_open(fname,"r");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
244 if ( !fp ) error("Could not read %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
245 bcf_hdr_t *hdr = bcf_hdr_read(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
246 if ( !hdr ) error("Could not read the header: %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
247 hts_close(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
248 hts_idx_t *idx = bcf_index_load(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
249 if ( !idx ) error("Could not load .csi index of %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
250 seq = bcf_index_seqnames(idx, hdr, &nseq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
251 for (i=0; i<nseq; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
252 printf("%s\n", seq[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
253 free(seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
254 bcf_hdr_destroy(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
255 hts_idx_destroy(idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
256 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
257 else if ( ftype==IS_BAM ) // todo: BAM
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
258 error("BAM: todo\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
259 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
260 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
261
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
262 int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *conf)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
263 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
264 if ( ftype & IS_TXT || !ftype )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
265 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
266 BGZF *fp = bgzf_open(fname,"r");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
267 if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
268
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
269 char *buffer = fp->uncompressed_block;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
270 int skip_until = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
271
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
272 // Skip the header: find out the position of the data block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
273 if ( buffer[0]==conf->meta_char )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
274 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
275 skip_until = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
276 while (1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
277 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
278 if ( buffer[skip_until]=='\n' )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
279 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
280 skip_until++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
281 if ( skip_until>=fp->block_length )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
282 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
283 if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
284 skip_until = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
285 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
286 // The header has finished
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
287 if ( buffer[skip_until]!=conf->meta_char ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
288 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
289 skip_until++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
290 if ( skip_until>=fp->block_length )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
291 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
292 if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
293 skip_until = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
294 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
295 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
296 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
297
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
298 // Output the new header
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
299 FILE *hdr = fopen(header,"r");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
300 if ( !hdr ) error("%s: %s", header,strerror(errno));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
301 int page_size = getpagesize();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
302 char *buf = valloc(page_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
303 BGZF *bgzf_out = bgzf_dopen(fileno(stdout), "w");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
304 ssize_t nread;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
305 while ( (nread=fread(buf,1,page_size-1,hdr))>0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
306 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
307 if ( nread<page_size-1 && buf[nread-1]!='\n' ) buf[nread++] = '\n';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
308 if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %d\n",bgzf_out->errcode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
309 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
310 if ( fclose(hdr) ) error("close failed: %s\n", header);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
311
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
312 // Output all remainig data read with the header block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
313 if ( fp->block_length - skip_until > 0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
314 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
315 if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) error("Error: %d\n",fp->errcode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
316 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
317 if (bgzf_flush(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
318
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
319 while (1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
320 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
321 nread = bgzf_raw_read(fp, buf, page_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
322 if ( nread<=0 ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
323
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
324 int count = bgzf_raw_write(bgzf_out, buf, nread);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
325 if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
326 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
327 if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
328 if (bgzf_close(fp) < 0) error("Error: %d\n",fp->errcode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
329 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
330 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
331 error("todo: reheader BCF, BAM\n"); // BCF is difficult, records contain pointers to the header.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
332 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
333 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
334
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
335 static int usage(void)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
336 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
337 fprintf(stderr, "\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
338 fprintf(stderr, "Version: %s\n", hts_version());
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
339 fprintf(stderr, "Usage: tabix [OPTIONS] [FILE] [REGION [...]]\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
340 fprintf(stderr, "\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
341 fprintf(stderr, "Indexing Options:\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
342 fprintf(stderr, " -0, --zero-based coordinates are zero-based\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
343 fprintf(stderr, " -b, --begin INT column number for region start [4]\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
344 fprintf(stderr, " -c, --comment CHAR skip comment lines starting with CHAR [null]\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
345 fprintf(stderr, " -C, --csi generate CSI index for VCF (default is TBI)\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
346 fprintf(stderr, " -e, --end INT column number for region end (if no end, set INT to -b) [5]\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
347 fprintf(stderr, " -f, --force overwrite existing index without asking\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
348 fprintf(stderr, " -m, --min-shift INT set minimal interval size for CSI indices to 2^INT [14]\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
349 fprintf(stderr, " -p, --preset STR gff, bed, sam, vcf\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
350 fprintf(stderr, " -s, --sequence INT column number for sequence names (suppressed by -p) [1]\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
351 fprintf(stderr, " -S, --skip-lines INT skip first INT lines [0]\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
352 fprintf(stderr, "\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
353 fprintf(stderr, "Querying and other options:\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
354 fprintf(stderr, " -h, --print-header print also the header lines\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
355 fprintf(stderr, " -H, --only-header print only the header lines\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
356 fprintf(stderr, " -l, --list-chroms list chromosome names\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
357 fprintf(stderr, " -r, --reheader FILE replace the header with the content of FILE\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
358 fprintf(stderr, " -R, --regions FILE restrict to regions listed in the file\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
359 fprintf(stderr, " -T, --targets FILE similar to -R but streams rather than index-jumps\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
360 fprintf(stderr, "\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
361 return 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
362 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
363
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
364 int main(int argc, char *argv[])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
365 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
366 int c, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
367 tbx_conf_t conf = tbx_conf_gff, *conf_ptr = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
368 char *reheader = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
369 args_t args;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
370 memset(&args,0,sizeof(args_t));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
371
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
372 static struct option loptions[] =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
373 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
374 {"help",0,0,'h'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
375 {"regions",1,0,'R'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
376 {"targets",1,0,'T'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
377 {"csi",0,0,'C'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
378 {"zero-based",0,0,'0'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
379 {"print-header",0,0,'h'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
380 {"only-header",0,0,'H'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
381 {"begin",1,0,'b'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
382 {"comment",1,0,'c'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
383 {"end",1,0,'e'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
384 {"force",0,0,'f'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
385 {"preset",1,0,'p'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
386 {"sequence",1,0,'s'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
387 {"skip-lines",1,0,'S'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
388 {"list-chroms",0,0,'l'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
389 {"reheader",1,0,'r'},
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
390 {0,0,0,0}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
391 };
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
392
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
393 while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:", loptions,NULL)) >= 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
394 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
395 switch (c)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
396 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
397 case 'R': args.regions_fname = optarg; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
398 case 'T': args.targets_fname = optarg; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
399 case 'C': do_csi = 1; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
400 case 'r': reheader = optarg; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
401 case 'h': args.print_header = 1; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
402 case 'H': args.header_only = 1; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
403 case 'l': list_chroms = 1; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
404 case '0': conf.preset |= TBX_UCSC; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
405 case 'b': conf.bc = atoi(optarg); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
406 case 'e': conf.ec = atoi(optarg); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
407 case 'c': conf.meta_char = *optarg; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
408 case 'f': is_force = 1; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
409 case 'm': min_shift = atoi(optarg); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
410 case 'p':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
411 if (strcmp(optarg, "gff") == 0) conf_ptr = &tbx_conf_gff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
412 else if (strcmp(optarg, "bed") == 0) conf_ptr = &tbx_conf_bed;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
413 else if (strcmp(optarg, "sam") == 0) conf_ptr = &tbx_conf_sam;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
414 else if (strcmp(optarg, "vcf") == 0) conf_ptr = &tbx_conf_vcf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
415 else if (strcmp(optarg, "bcf") == 0) ; // bcf is autodetected, preset is not needed
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
416 else if (strcmp(optarg, "bam") == 0) ; // same as bcf
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
417 else error("The preset string not recognised: '%s'\n", optarg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
418 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
419 case 's': conf.sc = atoi(optarg); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
420 case 'S': conf.line_skip = atoi(optarg); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
421 default: return usage();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
422 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
423 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
424
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
425 if ( optind==argc ) return usage();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
426
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
427 if ( list_chroms )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
428 return query_chroms(argv[optind]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
429
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
430 if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
431 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
432 int nregs = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
433 char **regs = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
434 if ( !args.header_only )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
435 regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
436 return query_regions(&args, argv[optind], regs, nregs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
437 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
438
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
439 char *fname = argv[optind];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
440 int ftype = file_type(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
441 if ( !conf_ptr ) // no preset given
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
442 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
443 if ( ftype==IS_GFF ) conf_ptr = &tbx_conf_gff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
444 else if ( ftype==IS_BED ) conf_ptr = &tbx_conf_bed;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
445 else if ( ftype==IS_SAM ) conf_ptr = &tbx_conf_sam;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
446 else if ( ftype==IS_VCF )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
447 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
448 conf_ptr = &tbx_conf_vcf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
449 if ( !min_shift && do_csi ) min_shift = 14;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
450 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
451 else if ( ftype==IS_BCF )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
452 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
453 if ( !min_shift ) min_shift = 14;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
454 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
455 else if ( ftype==IS_BAM )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
456 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
457 if ( !min_shift ) min_shift = 14;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
458 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
459 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
460 if ( do_csi )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
461 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
462 if ( !min_shift ) min_shift = 14;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
463 min_shift *= do_csi; // positive for CSIv2, negative for CSIv1
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
464 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
465 if ( min_shift!=0 && !do_csi ) do_csi = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
466
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
467 if ( reheader )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
468 return reheader_file(fname, reheader, ftype, conf_ptr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
469
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
470 if ( conf_ptr )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
471 conf = *conf_ptr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
472
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
473 char *suffix = ".tbi";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
474 if ( do_csi ) suffix = ".csi";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
475 else if ( ftype==IS_BAM ) suffix = ".bai";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
476 else if ( ftype==IS_CRAM ) suffix = ".crai";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
477
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
478 char *idx_fname = calloc(strlen(fname) + 5, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
479 strcat(strcpy(idx_fname, fname), suffix);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
480
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
481 struct stat stat_tbi, stat_file;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
482 if ( !is_force && stat(idx_fname, &stat_tbi)==0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
483 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
484 // Before complaining about existing index, check if the VCF file isn't
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
485 // newer. This is a common source of errors, people tend not to notice
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
486 // that tabix failed
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
487 stat(fname, &stat_file);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
488 if ( stat_file.st_mtime <= stat_tbi.st_mtime )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
489 error("[tabix] the index file exists. Please use '-f' to overwrite.\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
490 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
491 free(idx_fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
492
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
493 if ( ftype==IS_CRAM )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
494 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
495 if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
496 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
497 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
498 else if ( do_csi )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
499 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
500 if ( ftype==IS_BCF )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
501 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
502 if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
503 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
504 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
505 if ( ftype==IS_BAM )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
506 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
507 if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
508 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
509 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
510 if ( tbx_index_build(fname, min_shift, &conf)!=0 ) error("tbx_index_build failed: %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
511 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
512 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
513 else // TBI index
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
514 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
515 if ( tbx_index_build(fname, min_shift, &conf) ) error("tbx_index_build failed: %s\n", fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
516 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
517 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
518 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
519 }