comparison PsiCLASS-1.0.2/samtools-0.1.19/bcftools/bcf.h @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:903fc43d6227
1 /* The MIT License
2
3 Copyright (c) 2010 Broad Institute
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
24 */
25
26 /* Contact: Heng Li <lh3@live.co.uk> */
27
28 #ifndef BCF_H
29 #define BCF_H
30
31 #define BCF_VERSION "0.1.19-44428cd"
32
33 #include <stdint.h>
34 #include <zlib.h>
35
36 #ifndef BCF_LITE
37 #include "bgzf.h"
38 typedef BGZF *bcfFile;
39 #else
40 typedef gzFile bcfFile;
41 #define bgzf_open(fn, mode) gzopen(fn, mode)
42 #define bgzf_fdopen(fd, mode) gzdopen(fd, mode)
43 #define bgzf_close(fp) gzclose(fp)
44 #define bgzf_read(fp, buf, len) gzread(fp, buf, len)
45 #define bgzf_write(fp, buf, len)
46 #define bgzf_flush(fp)
47 #endif
48
49 /*
50 A member in the structs below is said to "primary" if its content
51 cannot be inferred from other members in any of structs below; a
52 member is said to be "derived" if its content can be derived from
53 other members. For example, bcf1_t::str is primary as this comes from
54 the input data, while bcf1_t::info is derived as it can always be
55 correctly set if we know bcf1_t::str. Derived members are for quick
56 access to the content and must be synchronized with the primary data.
57 */
58
59 typedef struct {
60 uint32_t fmt; // format of the block, set by bcf_str2int().
61 int len; // length of data for each individual
62 void *data; // concatenated data
63 // derived info: fmt, len (<-bcf1_t::fmt)
64 } bcf_ginfo_t;
65
66 typedef struct {
67 int32_t tid, pos; // refID and 0-based position
68 int32_t l_str, m_str; // length and the allocated size of ->str
69 float qual; // SNP quality
70 char *str; // concatenated string of variable length strings in VCF (from col.2 to col.7)
71 char *ref, *alt, *flt, *info, *fmt; // they all point to ->str; no memory allocation
72 int n_gi, m_gi; // number and the allocated size of geno fields
73 bcf_ginfo_t *gi; // array of geno fields
74 int n_alleles, n_smpl; // number of alleles and samples
75 // derived info: ref, alt, flt, info, fmt (<-str), n_gi (<-fmt), n_alleles (<-alt), n_smpl (<-bcf_hdr_t::n_smpl)
76 uint8_t *ploidy; // ploidy of all samples; if NULL, ploidy of 2 is assumed.
77 } bcf1_t;
78
79 typedef struct {
80 int32_t n_ref, n_smpl; // number of reference sequences and samples
81 int32_t l_nm; // length of concatenated sequence names; 0 padded
82 int32_t l_smpl; // length of concatenated sample names; 0 padded
83 int32_t l_txt; // length of header text (lines started with ##)
84 char *name, *sname, *txt; // concatenated sequence names, sample names and header text
85 char **ns, **sns; // array of sequence and sample names; point to name and sname, respectively
86 // derived info: n_ref (<-name), n_smpl (<-sname), ns (<-name), sns (<-sname)
87 } bcf_hdr_t;
88
89 typedef struct {
90 int is_vcf; // if the file in operation is a VCF
91 void *v; // auxillary data structure for VCF
92 bcfFile fp; // file handler for BCF
93 } bcf_t;
94
95 struct __bcf_idx_t;
96 typedef struct __bcf_idx_t bcf_idx_t;
97
98 #ifdef __cplusplus
99 extern "C" {
100 #endif
101
102 // open a BCF file; for BCF file only
103 bcf_t *bcf_open(const char *fn, const char *mode);
104 // close file
105 int bcf_close(bcf_t *b);
106 // read one record from BCF; return -1 on end-of-file, and <-1 for errors
107 int bcf_read(bcf_t *bp, const bcf_hdr_t *h, bcf1_t *b);
108 // call this function if b->str is changed
109 int bcf_sync(bcf1_t *b);
110 // write a BCF record
111 int bcf_write(bcf_t *bp, const bcf_hdr_t *h, const bcf1_t *b);
112 // read the BCF header; BCF only
113 bcf_hdr_t *bcf_hdr_read(bcf_t *b);
114 // write the BCF header
115 int bcf_hdr_write(bcf_t *b, const bcf_hdr_t *h);
116 // set bcf_hdr_t::ns and bcf_hdr_t::sns
117 int bcf_hdr_sync(bcf_hdr_t *b);
118 // destroy the header
119 void bcf_hdr_destroy(bcf_hdr_t *h);
120 // destroy a record
121 int bcf_destroy(bcf1_t *b);
122 // BCF->VCF conversion
123 char *bcf_fmt(const bcf_hdr_t *h, bcf1_t *b);
124 // append more info
125 int bcf_append_info(bcf1_t *b, const char *info, int l);
126 // remove tag
127 int remove_tag(char *string, const char *tag, char delim);
128 // remove info tag, string is the kstring holder of bcf1_t.str
129 void rm_info(kstring_t *string, const char *key);
130 // copy
131 int bcf_cpy(bcf1_t *r, const bcf1_t *b);
132
133 // open a VCF or BCF file if "b" is set in "mode"
134 bcf_t *vcf_open(const char *fn, const char *mode);
135 // close a VCF/BCF file
136 int vcf_close(bcf_t *bp);
137 // read the VCF/BCF header
138 bcf_hdr_t *vcf_hdr_read(bcf_t *bp);
139 // read the sequence dictionary from a separate file; required for VCF->BCF conversion
140 int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn);
141 // read a VCF/BCF record; return -1 on end-of-file and <-1 for errors
142 int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b);
143 // write the VCF header
144 int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h);
145 // write a VCF record
146 int vcf_write(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b);
147
148 // keep the first n alleles and discard the rest
149 int bcf_shrink_alt(bcf1_t *b, int n);
150 // keep the masked alleles and discard the rest
151 void bcf_fit_alt(bcf1_t *b, int mask);
152 // convert GL to PL
153 int bcf_gl2pl(bcf1_t *b);
154 // if the site is an indel
155 int bcf_is_indel(const bcf1_t *b);
156 bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list);
157 int bcf_subsam(int n_smpl, int *list, bcf1_t *b);
158 // move GT to the first FORMAT field
159 int bcf_fix_gt(bcf1_t *b);
160 // update PL generated by old samtools
161 int bcf_fix_pl(bcf1_t *b);
162 // convert PL to GLF-like 10-likelihood GL
163 int bcf_gl10(const bcf1_t *b, uint8_t *gl);
164 // convert up to 4 INDEL alleles to GLF-like 10-likelihood GL
165 int bcf_gl10_indel(const bcf1_t *b, uint8_t *gl);
166
167 // string hash table
168 void *bcf_build_refhash(bcf_hdr_t *h);
169 void bcf_str2id_destroy(void *_hash);
170 void bcf_str2id_thorough_destroy(void *_hash);
171 int bcf_str2id_add(void *_hash, const char *str);
172 int bcf_str2id(void *_hash, const char *str);
173 void *bcf_str2id_init();
174
175 // indexing related functions
176 int bcf_idx_build(const char *fn);
177 uint64_t bcf_idx_query(const bcf_idx_t *idx, int tid, int beg);
178 int bcf_parse_region(void *str2id, const char *str, int *tid, int *begin, int *end);
179 bcf_idx_t *bcf_idx_load(const char *fn);
180 void bcf_idx_destroy(bcf_idx_t *idx);
181
182 #ifdef __cplusplus
183 }
184 #endif
185
186 static inline uint32_t bcf_str2int(const char *str, int l)
187 {
188 int i;
189 uint32_t x = 0;
190 for (i = 0; i < l && i < 4; ++i) {
191 if (str[i] == 0) return x;
192 x = x<<8 | str[i];
193 }
194 return x;
195 }
196
197 #endif