annotate pyPRADA_1.2/tools/samtools-0.1.16/bcftools/bcf.h @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 /* The MIT License
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 Copyright (c) 2010 Broad Institute
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 Permission is hereby granted, free of charge, to any person obtaining
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 a copy of this software and associated documentation files (the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 "Software"), to deal in the Software without restriction, including
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 without limitation the rights to use, copy, modify, merge, publish,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 distribute, sublicense, and/or sell copies of the Software, and to
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 permit persons to whom the Software is furnished to do so, subject to
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 the following conditions:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 The above copyright notice and this permission notice shall be
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 included in all copies or substantial portions of the Software.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 SOFTWARE.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 /* Contact: Heng Li <lh3@live.co.uk> */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 #ifndef BCF_H
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 #define BCF_H
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 #include <stdint.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 #include <zlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 #ifndef BCF_LITE
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 #include "bgzf.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 typedef BGZF *bcfFile;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 #else
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 typedef gzFile bcfFile;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 #define bgzf_open(fn, mode) gzopen(fn, mode)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 #define bgzf_fdopen(fd, mode) gzdopen(fd, mode)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 #define bgzf_close(fp) gzclose(fp)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 #define bgzf_read(fp, buf, len) gzread(fp, buf, len)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 #define bgzf_write(fp, buf, len)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 #define bgzf_flush(fp)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 #endif
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 /*
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 A member in the structs below is said to "primary" if its content
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 cannot be inferred from other members in any of structs below; a
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 member is said to be "derived" if its content can be derived from
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 other members. For example, bcf1_t::str is primary as this comes from
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 the input data, while bcf1_t::info is derived as it can always be
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 correctly set if we know bcf1_t::str. Derived members are for quick
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 access to the content and must be synchronized with the primary data.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 uint32_t fmt; // format of the block, set by bcf_str2int().
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 int len; // length of data for each individual
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 void *data; // concatenated data
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 // derived info: fmt, len (<-bcf1_t::fmt)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 } bcf_ginfo_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 int32_t tid, pos; // refID and 0-based position
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 int32_t l_str, m_str; // length and the allocated size of ->str
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 float qual; // SNP quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 char *str; // concatenated string of variable length strings in VCF (from col.2 to col.7)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 char *ref, *alt, *flt, *info, *fmt; // they all point to ->str; no memory allocation
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 int n_gi, m_gi; // number and the allocated size of geno fields
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 bcf_ginfo_t *gi; // array of geno fields
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 int n_alleles, n_smpl; // number of alleles and samples
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 // derived info: ref, alt, flt, info, fmt (<-str), n_gi (<-fmt), n_alleles (<-alt), n_smpl (<-bcf_hdr_t::n_smpl)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 } bcf1_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 int32_t n_ref, n_smpl; // number of reference sequences and samples
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 int32_t l_nm; // length of concatenated sequence names; 0 padded
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 int32_t l_smpl; // length of concatenated sample names; 0 padded
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 int32_t l_txt; // length of header text (lines started with ##)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 char *name, *sname, *txt; // concatenated sequence names, sample names and header text
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 char **ns, **sns; // array of sequence and sample names; point to name and sname, respectively
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 // derived info: n_ref (<-name), n_smpl (<-sname), ns (<-name), sns (<-sname)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 } bcf_hdr_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 int is_vcf; // if the file in operation is a VCF
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 void *v; // auxillary data structure for VCF
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 bcfFile fp; // file handler for BCF
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 } bcf_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 struct __bcf_idx_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 typedef struct __bcf_idx_t bcf_idx_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 #ifdef __cplusplus
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 extern "C" {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 #endif
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 // open a BCF file; for BCF file only
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 bcf_t *bcf_open(const char *fn, const char *mode);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 // close file
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102 int bcf_close(bcf_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103 // read one record from BCF; return -1 on end-of-file, and <-1 for errors
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 int bcf_read(bcf_t *bp, const bcf_hdr_t *h, bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 // call this function if b->str is changed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 int bcf_sync(bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 // write a BCF record
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 int bcf_write(bcf_t *bp, const bcf_hdr_t *h, const bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 // read the BCF header; BCF only
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 bcf_hdr_t *bcf_hdr_read(bcf_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 // write the BCF header
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 int bcf_hdr_write(bcf_t *b, const bcf_hdr_t *h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 // set bcf_hdr_t::ns and bcf_hdr_t::sns
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 int bcf_hdr_sync(bcf_hdr_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 // destroy the header
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 void bcf_hdr_destroy(bcf_hdr_t *h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 // destroy a record
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118 int bcf_destroy(bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 // BCF->VCF conversion
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 char *bcf_fmt(const bcf_hdr_t *h, bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121 // append more info
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 int bcf_append_info(bcf1_t *b, const char *info, int l);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 // copy
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 int bcf_cpy(bcf1_t *r, const bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 // open a VCF or BCF file if "b" is set in "mode"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127 bcf_t *vcf_open(const char *fn, const char *mode);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128 // close a VCF/BCF file
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 int vcf_close(bcf_t *bp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130 // read the VCF/BCF header
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131 bcf_hdr_t *vcf_hdr_read(bcf_t *bp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132 // read the sequence dictionary from a separate file; required for VCF->BCF conversion
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133 int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 // read a VCF/BCF record; return -1 on end-of-file and <-1 for errors
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 // write the VCF header
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137 int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 // write a VCF record
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 int vcf_write(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 // keep the first n alleles and discard the rest
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142 int bcf_shrink_alt(bcf1_t *b, int n);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143 // convert GL to PL
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144 int bcf_gl2pl(bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145 // if the site is an indel
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 int bcf_is_indel(const bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147 bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148 int bcf_subsam(int n_smpl, int *list, bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149 // move GT to the first FORMAT field
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150 int bcf_fix_gt(bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151 // update PL generated by old samtools
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152 int bcf_fix_pl(bcf1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 // string hash table
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 void *bcf_build_refhash(bcf_hdr_t *h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156 void bcf_str2id_destroy(void *_hash);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 void bcf_str2id_thorough_destroy(void *_hash);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158 int bcf_str2id_add(void *_hash, const char *str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 int bcf_str2id(void *_hash, const char *str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 void *bcf_str2id_init();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 // indexing related functions
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 int bcf_idx_build(const char *fn);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164 uint64_t bcf_idx_query(const bcf_idx_t *idx, int tid, int beg);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165 int bcf_parse_region(void *str2id, const char *str, int *tid, int *begin, int *end);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 bcf_idx_t *bcf_idx_load(const char *fn);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167 void bcf_idx_destroy(bcf_idx_t *idx);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 #ifdef __cplusplus
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 #endif
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 static inline uint32_t bcf_str2int(const char *str, int l)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 uint32_t x = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 for (i = 0; i < l && i < 4; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178 if (str[i] == 0) return x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179 x = x<<8 | str[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181 return x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184 #endif