Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/bcftools/bcf.h @ 0:903fc43d6227 draft default tip
Uploaded
| author | lsong10 |
|---|---|
| date | Fri, 26 Mar 2021 16:52:45 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:903fc43d6227 |
|---|---|
| 1 /* The MIT License | |
| 2 | |
| 3 Copyright (c) 2010 Broad Institute | |
| 4 | |
| 5 Permission is hereby granted, free of charge, to any person obtaining | |
| 6 a copy of this software and associated documentation files (the | |
| 7 "Software"), to deal in the Software without restriction, including | |
| 8 without limitation the rights to use, copy, modify, merge, publish, | |
| 9 distribute, sublicense, and/or sell copies of the Software, and to | |
| 10 permit persons to whom the Software is furnished to do so, subject to | |
| 11 the following conditions: | |
| 12 | |
| 13 The above copyright notice and this permission notice shall be | |
| 14 included in all copies or substantial portions of the Software. | |
| 15 | |
| 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
| 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
| 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
| 19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
| 20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
| 21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
| 22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| 23 SOFTWARE. | |
| 24 */ | |
| 25 | |
| 26 /* Contact: Heng Li <lh3@live.co.uk> */ | |
| 27 | |
| 28 #ifndef BCF_H | |
| 29 #define BCF_H | |
| 30 | |
| 31 #define BCF_VERSION "0.1.19-44428cd" | |
| 32 | |
| 33 #include <stdint.h> | |
| 34 #include <zlib.h> | |
| 35 | |
| 36 #ifndef BCF_LITE | |
| 37 #include "bgzf.h" | |
| 38 typedef BGZF *bcfFile; | |
| 39 #else | |
| 40 typedef gzFile bcfFile; | |
| 41 #define bgzf_open(fn, mode) gzopen(fn, mode) | |
| 42 #define bgzf_fdopen(fd, mode) gzdopen(fd, mode) | |
| 43 #define bgzf_close(fp) gzclose(fp) | |
| 44 #define bgzf_read(fp, buf, len) gzread(fp, buf, len) | |
| 45 #define bgzf_write(fp, buf, len) | |
| 46 #define bgzf_flush(fp) | |
| 47 #endif | |
| 48 | |
| 49 /* | |
| 50 A member in the structs below is said to "primary" if its content | |
| 51 cannot be inferred from other members in any of structs below; a | |
| 52 member is said to be "derived" if its content can be derived from | |
| 53 other members. For example, bcf1_t::str is primary as this comes from | |
| 54 the input data, while bcf1_t::info is derived as it can always be | |
| 55 correctly set if we know bcf1_t::str. Derived members are for quick | |
| 56 access to the content and must be synchronized with the primary data. | |
| 57 */ | |
| 58 | |
| 59 typedef struct { | |
| 60 uint32_t fmt; // format of the block, set by bcf_str2int(). | |
| 61 int len; // length of data for each individual | |
| 62 void *data; // concatenated data | |
| 63 // derived info: fmt, len (<-bcf1_t::fmt) | |
| 64 } bcf_ginfo_t; | |
| 65 | |
| 66 typedef struct { | |
| 67 int32_t tid, pos; // refID and 0-based position | |
| 68 int32_t l_str, m_str; // length and the allocated size of ->str | |
| 69 float qual; // SNP quality | |
| 70 char *str; // concatenated string of variable length strings in VCF (from col.2 to col.7) | |
| 71 char *ref, *alt, *flt, *info, *fmt; // they all point to ->str; no memory allocation | |
| 72 int n_gi, m_gi; // number and the allocated size of geno fields | |
| 73 bcf_ginfo_t *gi; // array of geno fields | |
| 74 int n_alleles, n_smpl; // number of alleles and samples | |
| 75 // derived info: ref, alt, flt, info, fmt (<-str), n_gi (<-fmt), n_alleles (<-alt), n_smpl (<-bcf_hdr_t::n_smpl) | |
| 76 uint8_t *ploidy; // ploidy of all samples; if NULL, ploidy of 2 is assumed. | |
| 77 } bcf1_t; | |
| 78 | |
| 79 typedef struct { | |
| 80 int32_t n_ref, n_smpl; // number of reference sequences and samples | |
| 81 int32_t l_nm; // length of concatenated sequence names; 0 padded | |
| 82 int32_t l_smpl; // length of concatenated sample names; 0 padded | |
| 83 int32_t l_txt; // length of header text (lines started with ##) | |
| 84 char *name, *sname, *txt; // concatenated sequence names, sample names and header text | |
| 85 char **ns, **sns; // array of sequence and sample names; point to name and sname, respectively | |
| 86 // derived info: n_ref (<-name), n_smpl (<-sname), ns (<-name), sns (<-sname) | |
| 87 } bcf_hdr_t; | |
| 88 | |
| 89 typedef struct { | |
| 90 int is_vcf; // if the file in operation is a VCF | |
| 91 void *v; // auxillary data structure for VCF | |
| 92 bcfFile fp; // file handler for BCF | |
| 93 } bcf_t; | |
| 94 | |
| 95 struct __bcf_idx_t; | |
| 96 typedef struct __bcf_idx_t bcf_idx_t; | |
| 97 | |
| 98 #ifdef __cplusplus | |
| 99 extern "C" { | |
| 100 #endif | |
| 101 | |
| 102 // open a BCF file; for BCF file only | |
| 103 bcf_t *bcf_open(const char *fn, const char *mode); | |
| 104 // close file | |
| 105 int bcf_close(bcf_t *b); | |
| 106 // read one record from BCF; return -1 on end-of-file, and <-1 for errors | |
| 107 int bcf_read(bcf_t *bp, const bcf_hdr_t *h, bcf1_t *b); | |
| 108 // call this function if b->str is changed | |
| 109 int bcf_sync(bcf1_t *b); | |
| 110 // write a BCF record | |
| 111 int bcf_write(bcf_t *bp, const bcf_hdr_t *h, const bcf1_t *b); | |
| 112 // read the BCF header; BCF only | |
| 113 bcf_hdr_t *bcf_hdr_read(bcf_t *b); | |
| 114 // write the BCF header | |
| 115 int bcf_hdr_write(bcf_t *b, const bcf_hdr_t *h); | |
| 116 // set bcf_hdr_t::ns and bcf_hdr_t::sns | |
| 117 int bcf_hdr_sync(bcf_hdr_t *b); | |
| 118 // destroy the header | |
| 119 void bcf_hdr_destroy(bcf_hdr_t *h); | |
| 120 // destroy a record | |
| 121 int bcf_destroy(bcf1_t *b); | |
| 122 // BCF->VCF conversion | |
| 123 char *bcf_fmt(const bcf_hdr_t *h, bcf1_t *b); | |
| 124 // append more info | |
| 125 int bcf_append_info(bcf1_t *b, const char *info, int l); | |
| 126 // remove tag | |
| 127 int remove_tag(char *string, const char *tag, char delim); | |
| 128 // remove info tag, string is the kstring holder of bcf1_t.str | |
| 129 void rm_info(kstring_t *string, const char *key); | |
| 130 // copy | |
| 131 int bcf_cpy(bcf1_t *r, const bcf1_t *b); | |
| 132 | |
| 133 // open a VCF or BCF file if "b" is set in "mode" | |
| 134 bcf_t *vcf_open(const char *fn, const char *mode); | |
| 135 // close a VCF/BCF file | |
| 136 int vcf_close(bcf_t *bp); | |
| 137 // read the VCF/BCF header | |
| 138 bcf_hdr_t *vcf_hdr_read(bcf_t *bp); | |
| 139 // read the sequence dictionary from a separate file; required for VCF->BCF conversion | |
| 140 int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn); | |
| 141 // read a VCF/BCF record; return -1 on end-of-file and <-1 for errors | |
| 142 int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b); | |
| 143 // write the VCF header | |
| 144 int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h); | |
| 145 // write a VCF record | |
| 146 int vcf_write(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b); | |
| 147 | |
| 148 // keep the first n alleles and discard the rest | |
| 149 int bcf_shrink_alt(bcf1_t *b, int n); | |
| 150 // keep the masked alleles and discard the rest | |
| 151 void bcf_fit_alt(bcf1_t *b, int mask); | |
| 152 // convert GL to PL | |
| 153 int bcf_gl2pl(bcf1_t *b); | |
| 154 // if the site is an indel | |
| 155 int bcf_is_indel(const bcf1_t *b); | |
| 156 bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list); | |
| 157 int bcf_subsam(int n_smpl, int *list, bcf1_t *b); | |
| 158 // move GT to the first FORMAT field | |
| 159 int bcf_fix_gt(bcf1_t *b); | |
| 160 // update PL generated by old samtools | |
| 161 int bcf_fix_pl(bcf1_t *b); | |
| 162 // convert PL to GLF-like 10-likelihood GL | |
| 163 int bcf_gl10(const bcf1_t *b, uint8_t *gl); | |
| 164 // convert up to 4 INDEL alleles to GLF-like 10-likelihood GL | |
| 165 int bcf_gl10_indel(const bcf1_t *b, uint8_t *gl); | |
| 166 | |
| 167 // string hash table | |
| 168 void *bcf_build_refhash(bcf_hdr_t *h); | |
| 169 void bcf_str2id_destroy(void *_hash); | |
| 170 void bcf_str2id_thorough_destroy(void *_hash); | |
| 171 int bcf_str2id_add(void *_hash, const char *str); | |
| 172 int bcf_str2id(void *_hash, const char *str); | |
| 173 void *bcf_str2id_init(); | |
| 174 | |
| 175 // indexing related functions | |
| 176 int bcf_idx_build(const char *fn); | |
| 177 uint64_t bcf_idx_query(const bcf_idx_t *idx, int tid, int beg); | |
| 178 int bcf_parse_region(void *str2id, const char *str, int *tid, int *begin, int *end); | |
| 179 bcf_idx_t *bcf_idx_load(const char *fn); | |
| 180 void bcf_idx_destroy(bcf_idx_t *idx); | |
| 181 | |
| 182 #ifdef __cplusplus | |
| 183 } | |
| 184 #endif | |
| 185 | |
| 186 static inline uint32_t bcf_str2int(const char *str, int l) | |
| 187 { | |
| 188 int i; | |
| 189 uint32_t x = 0; | |
| 190 for (i = 0; i < l && i < 4; ++i) { | |
| 191 if (str[i] == 0) return x; | |
| 192 x = x<<8 | str[i]; | |
| 193 } | |
| 194 return x; | |
| 195 } | |
| 196 | |
| 197 #endif |
