annotate PsiCLASS-1.0.2/samtools-0.1.19/bcftools/bcfutils.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 #include <math.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 #include <assert.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 #include "bcf.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 #include "kstring.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 #include "khash.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 KHASH_MAP_INIT_STR(str2id, int)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 #ifdef _WIN32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 #define srand48(x) srand(x)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 #define drand48() ((double)rand() / RAND_MAX)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 // FIXME: valgrind report a memory leak in this function. Probably it does not get deallocated...
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 void *bcf_build_refhash(bcf_hdr_t *h)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 khash_t(str2id) *hash;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 int i, ret;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 hash = kh_init(str2id);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 for (i = 0; i < h->n_ref; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 khint_t k;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 k = kh_put(str2id, hash, h->ns[i], &ret); // FIXME: check ret
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 kh_val(hash, k) = i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 return hash;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 void *bcf_str2id_init()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 return kh_init(str2id);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 void bcf_str2id_destroy(void *_hash)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 if (hash) kh_destroy(str2id, hash); // Note that strings are not freed.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 void bcf_str2id_thorough_destroy(void *_hash)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 khint_t k;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 if (hash == 0) return;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 for (k = 0; k < kh_end(hash); ++k)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 if (kh_exist(hash, k)) free((char*)kh_key(hash, k));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 kh_destroy(str2id, hash);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 int bcf_str2id(void *_hash, const char *str)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 khint_t k;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 if (!hash) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 k = kh_get(str2id, hash, str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 return k == kh_end(hash)? -1 : kh_val(hash, k);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 int bcf_str2id_add(void *_hash, const char *str)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 khint_t k;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 int ret;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 if (!hash) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 k = kh_put(str2id, hash, str, &ret);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 if (ret == 0) return kh_val(hash, k);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 kh_val(hash, k) = kh_size(hash) - 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 return kh_val(hash, k);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 void bcf_fit_alt(bcf1_t *b, int mask)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 mask |= 1; // REF must be always present
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 int i,j,nals=0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 for (i=0; i<sizeof(int); i++)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 if ( mask&1<<i) nals++;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 if ( b->n_alleles <= nals ) return;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 // update ALT, in principle any of the alleles can be removed
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 char *p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 if ( nals>1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 char *dst, *src;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 int n=0, nalts=nals-1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 for (src=dst=p=b->alt, i=1; *p; p++)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 if ( *p!=',' ) continue;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 if ( mask&1<<i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 n++;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 if ( src!=dst )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 memmove(dst,src,p-src);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 dst += p-src;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 else dst = p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 if ( n<nalts ) { *dst=','; dst++; }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 i++;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 if ( n>=nalts ) { *dst=0; break; }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 src = p+1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 if ( n<nalts )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 memmove(dst,src,p-src);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 dst += p-src;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 *dst = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 p = dst;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 else p = b->alt, *p = '\0';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 p++;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 memmove(p, b->flt, b->str + b->l_str - b->flt);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 b->l_str -= b->flt - p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 // update PL and GT
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 int ipl=-1, igt=-1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 for (i = 0; i < b->n_gi; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 bcf_ginfo_t *g = b->gi + i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 if (g->fmt == bcf_str2int("PL", 2)) ipl = i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 if (g->fmt == bcf_str2int("GT", 2)) igt = i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 // .. create mapping between old and new indexes
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 int npl = nals * (nals+1) / 2;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 int *map = malloc(sizeof(int)*(npl>b->n_alleles ? npl : b->n_alleles));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 int kori=0,knew=0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 for (i=0; i<b->n_alleles; i++)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 for (j=0; j<=i; j++)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 int skip=0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 if ( i && !(mask&1<<i) ) skip=1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 if ( j && !(mask&1<<j) ) skip=1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 if ( !skip ) { map[knew++] = kori; }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 kori++;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 // .. apply to all samples
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 int n_smpl = b->n_smpl;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 for (i = 0; i < b->n_gi; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 bcf_ginfo_t *g = b->gi + i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 if (g->fmt == bcf_str2int("PL", 2))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 g->len = npl;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 uint8_t *d = (uint8_t*)g->data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 int ismpl, npl_ori = b->n_alleles * (b->n_alleles + 1) / 2;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 for (knew=ismpl=0; ismpl<n_smpl; ismpl++)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 uint8_t *dl = d + ismpl * npl_ori;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 for (j=0; j<npl; j++) d[knew++] = dl[map[j]];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 } // FIXME: to add GL
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 // update GTs
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 map[0] = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 for (i=1, knew=0; i<b->n_alleles; i++)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 map[i] = mask&1<<i ? ++knew : -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 for (i=0; i<n_smpl; i++)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 uint8_t gt = ((uint8_t*)b->gi[igt].data)[i];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 int a1 = (gt>>3)&7;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 int a2 = gt&7;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 assert( map[a1]>=0 && map[a2]>=0 );
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 ((uint8_t*)b->gi[igt].data)[i] = ((1<<7|1<<6)&gt) | map[a1]<<3 | map[a2];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 free(map);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 b->n_alleles = nals;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 bcf_sync(b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 int bcf_shrink_alt(bcf1_t *b, int n)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 char *p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 int i, j, k, n_smpl = b->n_smpl;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 if (b->n_alleles <= n) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 // update ALT
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 if (n > 1) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 for (p = b->alt, k = 1; *p; ++p)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 if (*p == ',' && ++k == n) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 *p = '\0';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 } else p = b->alt, *p = '\0';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 ++p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188 memmove(p, b->flt, b->str + b->l_str - b->flt);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 b->l_str -= b->flt - p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 // update PL
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 for (i = 0; i < b->n_gi; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 bcf_ginfo_t *g = b->gi + i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 if (g->fmt == bcf_str2int("PL", 2)) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 int l, x = b->n_alleles * (b->n_alleles + 1) / 2;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 uint8_t *d = (uint8_t*)g->data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 g->len = n * (n + 1) / 2;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 for (l = k = 0; l < n_smpl; ++l) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 uint8_t *dl = d + l * x;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 for (j = 0; j < g->len; ++j) d[k++] = dl[j];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 } // FIXME: to add GL
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 b->n_alleles = n;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 bcf_sync(b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 int bcf_gl2pl(bcf1_t *b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 char *p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 int i, n_smpl = b->n_smpl;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212 bcf_ginfo_t *g;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 float *d0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 uint8_t *d1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 if (strstr(b->fmt, "PL")) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 if ((p = strstr(b->fmt, "GL")) == 0) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 *p = 'P';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 for (i = 0; i < b->n_gi; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 if (b->gi[i].fmt == bcf_str2int("GL", 2))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 g = b->gi + i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 g->fmt = bcf_str2int("PL", 2);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 g->len /= 4; // 4 == sizeof(float)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 d0 = (float*)g->data; d1 = (uint8_t*)g->data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 for (i = 0; i < n_smpl * g->len; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 int x = (int)(-10. * d0[i] + .499);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 if (x > 255) x = 255;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 if (x < 0) x = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 d1[i] = x;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 /* FIXME: this function will fail given AB:GTX:GT. BCFtools never
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 * produces such FMT, but others may do. */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 int bcf_fix_gt(bcf1_t *b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 char *s;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 int i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 uint32_t tmp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 bcf_ginfo_t gt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 // check the presence of the GT FMT
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 if ((s = strstr(b->fmt, ":GT")) == 0) return 0; // no GT or GT is already the first
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 assert(s[3] == '\0' || s[3] == ':'); // :GTX in fact
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 tmp = bcf_str2int("GT", 2);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 for (i = 0; i < b->n_gi; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 if (b->gi[i].fmt == tmp) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 if (i == b->n_gi) return 0; // no GT in b->gi; probably a bug...
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 gt = b->gi[i];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 // move GT to the first
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 for (; i > 0; --i) b->gi[i] = b->gi[i-1];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 b->gi[0] = gt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 if ( s[3]==0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 memmove(b->fmt + 3, b->fmt, s - b->fmt); // :GT
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 memmove(b->fmt + 3, b->fmt, s - b->fmt + 1); // :GT:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 b->fmt[0] = 'G'; b->fmt[1] = 'T'; b->fmt[2] = ':';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 int bcf_fix_pl(bcf1_t *b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262 int i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 uint32_t tmp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 uint8_t *PL, *swap;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 bcf_ginfo_t *gi;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 // pinpoint PL
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 tmp = bcf_str2int("PL", 2);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 for (i = 0; i < b->n_gi; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 if (b->gi[i].fmt == tmp) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 if (i == b->n_gi) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 // prepare
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 gi = b->gi + i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 PL = (uint8_t*)gi->data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 swap = alloca(gi->len);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 // loop through individuals
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 for (i = 0; i < b->n_smpl; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 int k, l, x;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 uint8_t *PLi = PL + i * gi->len;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 memcpy(swap, PLi, gi->len);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 for (k = x = 0; k < b->n_alleles; ++k)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 for (l = k; l < b->n_alleles; ++l)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 PLi[l*(l+1)/2 + k] = swap[x++];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 int bcf_smpl_covered(const bcf1_t *b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 int i, j, n = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 uint32_t tmp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 bcf_ginfo_t *gi;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 // pinpoint PL
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 tmp = bcf_str2int("PL", 2);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 for (i = 0; i < b->n_gi; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 if (b->gi[i].fmt == tmp) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 if (i == b->n_gi) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 // count how many samples having PL!=[0..0]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 gi = b->gi + i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 for (i = 0; i < b->n_smpl; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 uint8_t *PLi = ((uint8_t*)gi->data) + i * gi->len;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301 for (j = 0; j < gi->len; ++j)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 if (PLi[j]) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 if (j < gi->len) ++n;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 return n;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 static void *locate_field(const bcf1_t *b, const char *fmt, int l)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 int i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 uint32_t tmp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 tmp = bcf_str2int(fmt, l);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 for (i = 0; i < b->n_gi; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 if (b->gi[i].fmt == tmp) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 return i == b->n_gi? 0 : b->gi[i].data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 int bcf_anno_max(bcf1_t *b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 int k, max_gq, max_sp, n_het;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 kstring_t str;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 uint8_t *gt, *gq;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 int32_t *sp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 max_gq = max_sp = n_het = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325 gt = locate_field(b, "GT", 2);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 if (gt == 0) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 gq = locate_field(b, "GQ", 2);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 sp = locate_field(b, "SP", 2);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 if (sp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 for (k = 0; k < b->n_smpl; ++k)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 if (gt[k]&0x3f)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 max_sp = max_sp > (int)sp[k]? max_sp : sp[k];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 if (gq)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 for (k = 0; k < b->n_smpl; ++k)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 if (gt[k]&0x3f)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 max_gq = max_gq > (int)gq[k]? max_gq : gq[k];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 for (k = 0; k < b->n_smpl; ++k) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 int a1, a2;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 a1 = gt[k]&7; a2 = gt[k]>>3&7;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 if ((!a1 && a2) || (!a2 && a1)) { // a het
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 if (gq == 0) ++n_het;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 else if (gq[k] >= 20) ++n_het;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 if (n_het) max_sp -= (int)(4.343 * log(n_het) + .499);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 if (max_sp < 0) max_sp = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 memset(&str, 0, sizeof(kstring_t));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 if (*b->info) kputc(';', &str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 ksprintf(&str, "MXSP=%d;MXGQ=%d", max_sp, max_gq);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 bcf_append_info(b, str.s, str.l);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 free(str.s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 // FIXME: only data are shuffled; the header is NOT
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 int bcf_shuffle(bcf1_t *b, int seed)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 int i, j, *a;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 if (seed > 0) srand48(seed);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 a = malloc(b->n_smpl * sizeof(int));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 for (i = 0; i < b->n_smpl; ++i) a[i] = i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 for (i = b->n_smpl; i > 1; --i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 int tmp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 j = (int)(drand48() * i);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 for (j = 0; j < b->n_gi; ++j) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 bcf_ginfo_t *gi = b->gi + j;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 uint8_t *swap, *data = (uint8_t*)gi->data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 swap = malloc(gi->len * b->n_smpl);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371 for (i = 0; i < b->n_smpl; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372 memcpy(swap + gi->len * a[i], data + gi->len * i, gi->len);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 free(gi->data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 gi->data = swap;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 free(a);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 int i, ret, j;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 khint_t k;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384 bcf_hdr_t *h;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 khash_t(str2id) *hash;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 kstring_t s;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 s.l = s.m = 0; s.s = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388 hash = kh_init(str2id);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 for (i = 0; i < h0->n_smpl; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 k = kh_put(str2id, hash, h0->sns[i], &ret);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 kh_val(hash, k) = i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 for (i = j = 0; i < n; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394 k = kh_get(str2id, hash, samples[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 if (k != kh_end(hash)) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 list[j++] = kh_val(hash, k);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 kputs(samples[i], &s); kputc('\0', &s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400 if (j < n)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 fprintf(stderr, "<%s> %d samples in the list but not in BCF.", __func__, n - j);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 exit(1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405 kh_destroy(str2id, hash);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 h = calloc(1, sizeof(bcf_hdr_t));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 *h = *h0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 h->ns = 0; h->sns = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 h->name = malloc(h->l_nm); memcpy(h->name, h0->name, h->l_nm);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 h->txt = calloc(1, h->l_txt + 1); memcpy(h->txt, h0->txt, h->l_txt);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 h->l_smpl = s.l; h->sname = s.s;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 bcf_hdr_sync(h);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413 return h;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416 int bcf_subsam(int n_smpl, int *list, bcf1_t *b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 int i, j;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419 for (j = 0; j < b->n_gi; ++j) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420 bcf_ginfo_t *gi = b->gi + j;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 uint8_t *swap;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 swap = malloc(gi->len * b->n_smpl);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 for (i = 0; i < n_smpl; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424 memcpy(swap + i * gi->len, (uint8_t*)gi->data + list[i] * gi->len, gi->len);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 free(gi->data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 gi->data = swap;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428 b->n_smpl = n_smpl;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 static int8_t nt4_table[128] = {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 /*'-'*/, 4, 4,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438 4, 4, 4, 4, 3, 4, 4, 4, -1, 4, 4, 4, 4, 4, 4, 4,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440 4, 4, 4, 4, 3, 4, 4, 4, -1, 4, 4, 4, 4, 4, 4, 4
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 };
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443 int bcf_gl10(const bcf1_t *b, uint8_t *gl)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 int a[4], k, l, map[4], k1, j, i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446 const bcf_ginfo_t *PL;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 char *s;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448 if (b->ref[1] != 0 || b->n_alleles > 4) return -1; // ref is not a single base or >4 alleles
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449 for (i = 0; i < b->n_gi; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450 if (b->gi[i].fmt == bcf_str2int("PL", 2)) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451 if (i == b->n_gi) return -1; // no PL
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 PL = b->gi + i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453 a[0] = nt4_table[(int)b->ref[0]];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454 if (a[0] > 3 || a[0] < 0) return -1; // ref is not A/C/G/T
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455 a[1] = a[2] = a[3] = -2; // -1 has a special meaning
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 if (b->alt[0] == 0) return -1; // no alternate allele
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457 map[0] = map[1] = map[2] = map[3] = -2;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458 map[a[0]] = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459 for (k = 0, s = b->alt, k1 = -1; k < 3 && *s; ++k, s += 2) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460 if (s[1] != ',' && s[1] != 0) return -1; // ALT is not single base
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461 a[k+1] = nt4_table[(int)*s];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462 if (a[k+1] >= 0) map[a[k+1]] = k+1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463 else k1 = k + 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 if (s[1] == 0) break; // the end of the ALT string
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 for (k = 0; k < 4; ++k)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467 if (map[k] < 0) map[k] = k1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468 for (i = 0; i < b->n_smpl; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 const uint8_t *p = PL->data + i * PL->len; // the PL for the i-th individual
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470 uint8_t *g = gl + 10 * i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471 for (k = j = 0; k < 4; ++k) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472 for (l = k; l < 4; ++l) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473 int t, x = map[k], y = map[l];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 if (x > y) t = x, x = y, y = t; // make sure x is the smaller
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 g[j++] = p[y * (y+1) / 2 + x];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482 int bcf_gl10_indel(const bcf1_t *b, uint8_t *gl)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 int k, l, j, i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 const bcf_ginfo_t *PL;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486 if (b->alt[0] == 0) return -1; // no alternate allele
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487 for (i = 0; i < b->n_gi; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488 if (b->gi[i].fmt == bcf_str2int("PL", 2)) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 if (i == b->n_gi) return -1; // no PL
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490 PL = b->gi + i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 for (i = 0; i < b->n_smpl; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 const uint8_t *p = PL->data + i * PL->len; // the PL for the i-th individual
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493 uint8_t *g = gl + 10 * i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494 for (k = j = 0; k < 4; ++k) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 for (l = k; l < 4; ++l) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 int t, x = k, y = l;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497 if (x > y) t = x, x = y, y = t; // make sure x is the smaller
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 x = y * (y+1) / 2 + x;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499 g[j++] = x < PL->len? p[x] : 255;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 }