annotate ezBAMQC/src/htslib/vcfutils.c @ 8:82bb8c455761

Uploaded
author cshl-bsr
date Wed, 30 Mar 2016 12:03:10 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /* vcfutils.c -- allele-related utility functions.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Copyright (C) 2012-2014 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Author: Petr Danecek <pd3@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7 Permission is hereby granted, free of charge, to any person obtaining a copy
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 of this software and associated documentation files (the "Software"), to deal
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 in the Software without restriction, including without limitation the rights
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 copies of the Software, and to permit persons to whom the Software is
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 furnished to do so, subject to the following conditions:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14 The above copyright notice and this permission notice shall be included in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 all copies or substantial portions of the Software.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 DEALINGS IN THE SOFTWARE. */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 #include "htslib/vcfutils.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 int bcf_calc_ac(const bcf_hdr_t *header, bcf1_t *line, int *ac, int which)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30 for (i=0; i<line->n_allele; i++) ac[i]=0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 // Use INFO/AC,AN field only when asked
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 if ( which&BCF_UN_INFO )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35 bcf_unpack(line, BCF_UN_INFO);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 int an_id = bcf_hdr_id2int(header, BCF_DT_ID, "AN");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37 int ac_id = bcf_hdr_id2int(header, BCF_DT_ID, "AC");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 int i, an=-1, ac_len=0, ac_type=0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39 uint8_t *ac_ptr=NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 if ( an_id>=0 && ac_id>=0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 for (i=0; i<line->n_info; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 bcf_info_t *z = &line->d.info[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 if ( z->key == an_id ) an = z->v1.i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 else if ( z->key == ac_id ) { ac_ptr = z->vptr; ac_len = z->len; ac_type = z->type; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 if ( an>=0 && ac_ptr )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 int nac = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 #define BRANCH_INT(type_t) { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 type_t *p = (type_t *) ac_ptr; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54 for (i=0; i<ac_len; i++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 ac[i+1] = p[i]; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57 nac += p[i]; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 switch (ac_type) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 case BCF_BT_INT8: BRANCH_INT(int8_t); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62 case BCF_BT_INT16: BRANCH_INT(int16_t); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 case BCF_BT_INT32: BRANCH_INT(int32_t); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 default: fprintf(stderr, "[E::%s] todo: %d at %s:%d\n", __func__, ac_type, header->id[BCF_DT_CTG][line->rid].key, line->pos+1); exit(1); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 #undef BRANCH_INT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 if ( an<nac )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 fprintf(stderr,"[E::%s] Incorrect AN/AC counts at %s:%d\n", __func__,header->id[BCF_DT_CTG][line->rid].key, line->pos+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 exit(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 ac[0] = an - nac;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 return 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 // Split genotype fields only when asked
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78 if ( which&BCF_UN_FMT )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 int i, gt_id = bcf_hdr_id2int(header,BCF_DT_ID,"GT");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 if ( gt_id<0 ) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 bcf_unpack(line, BCF_UN_FMT);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 bcf_fmt_t *fmt_gt = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84 for (i=0; i<(int)line->n_fmt; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 if ( line->d.fmt[i].id==gt_id ) { fmt_gt = &line->d.fmt[i]; break; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86 if ( !fmt_gt ) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 #define BRANCH_INT(type_t,vector_end) { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88 for (i=0; i<line->n_sample; i++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 type_t *p = (type_t*) (fmt_gt->p + i*fmt_gt->size); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91 int ial; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92 for (ial=0; ial<fmt_gt->n; ial++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 if ( p[ial]==vector_end ) break; /* smaller ploidy */ \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 if ( bcf_gt_is_missing(p[ial]) ) continue; /* missing allele */ \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96 if ( p[ial]>>1 > line->n_allele ) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98 fprintf(stderr,"[E::%s] Incorrect allele (\"%d\") in %s at %s:%d\n", __func__,(p[ial]>>1)-1, header->samples[i],header->id[BCF_DT_CTG][line->rid].key, line->pos+1); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 exit(1); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 ac[(p[ial]>>1)-1]++; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 switch (fmt_gt->type) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106 case BCF_BT_INT8: BRANCH_INT(int8_t, bcf_int8_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108 case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 default: fprintf(stderr, "[E::%s] todo: %d at %s:%d\n", __func__, fmt_gt->type, header->id[BCF_DT_CTG][line->rid].key, line->pos+1); exit(1); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111 #undef BRANCH_INT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 return 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 int bcf_gt_type(bcf_fmt_t *fmt_ptr, int isample, int *_ial, int *_jal)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119 int i, nals = 0, has_ref = 0, has_alt = 0, ial = 0, jal = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 #define BRANCH_INT(type_t,vector_end) { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 type_t *p = (type_t*) (fmt_ptr->p + isample*fmt_ptr->size); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122 for (i=0; i<fmt_ptr->n; i++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124 if ( p[i] == vector_end ) break; /* smaller ploidy */ \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 if ( bcf_gt_is_missing(p[i]) ) continue; /* missing allele */ \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126 int tmp = p[i]>>1; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127 if ( tmp>1 ) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129 if ( !ial ) { ial = tmp; has_alt = 1; } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130 else if ( tmp!=ial ) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 if ( tmp<ial ) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134 jal = ial; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 ial = tmp; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137 else \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139 jal = tmp; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141 has_alt = 2; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144 else has_ref = 1; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145 nals++; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
148 switch (fmt_ptr->type) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
149 case BCF_BT_INT8: BRANCH_INT(int8_t, bcf_int8_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
150 case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
151 case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
152 default: fprintf(stderr, "[E::%s] todo: fmt_type %d\n", __func__, fmt_ptr->type); exit(1); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
153 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
154 #undef BRANCH_INT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
155
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
156 if ( _ial ) *_ial = ial>0 ? ial-1 : ial;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
157 if ( _jal ) *_jal = jal>0 ? jal-1 : jal;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
158 if ( !nals ) return GT_UNKN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
159 if ( nals==1 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
160 return has_ref ? GT_HAPL_R : GT_HAPL_A;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
161 if ( !has_ref )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
162 return has_alt==1 ? GT_HOM_AA : GT_HET_AA;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
163 if ( !has_alt )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
164 return GT_HOM_RR;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
165 return GT_HET_RA;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
166 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
167
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
168 int bcf_trim_alleles(const bcf_hdr_t *header, bcf1_t *line)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
169 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
170 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
171 bcf_fmt_t *gt = bcf_get_fmt(header, line, "GT");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
172 if ( !gt ) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
173
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
174 int *ac = (int*) calloc(line->n_allele,sizeof(int));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
175
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
176 // check if all alleles are populated
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
177 #define BRANCH(type_t,vector_end) { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
178 for (i=0; i<line->n_sample; i++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
179 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
180 type_t *p = (type_t*) (gt->p + i*gt->size); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
181 int ial; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
182 for (ial=0; ial<gt->n; ial++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
183 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
184 if ( p[ial]==vector_end ) break; /* smaller ploidy */ \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
185 if ( bcf_gt_is_missing(p[ial]) ) continue; /* missing allele */ \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
186 if ( (p[ial]>>1)-1 >= line->n_allele ) { free(ac); return -1; } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
187 ac[(p[ial]>>1)-1]++; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
188 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
189 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
190 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
191 switch (gt->type) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
192 case BCF_BT_INT8: BRANCH(int8_t, bcf_int8_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
193 case BCF_BT_INT16: BRANCH(int16_t, bcf_int16_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
194 case BCF_BT_INT32: BRANCH(int32_t, bcf_int32_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
195 default: fprintf(stderr, "[E::%s] todo: %d at %s:%d\n", __func__, gt->type, header->id[BCF_DT_CTG][line->rid].key, line->pos+1); exit(1); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
196 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
197 #undef BRANCH
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
198
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
199 int rm_als = 0, nrm = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
200 for (i=1; i<line->n_allele; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
201 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
202 if ( !ac[i] ) { rm_als |= 1<<i; nrm++; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
203 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
204 free(ac);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
205
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
206 if ( nrm ) bcf_remove_alleles(header, line, rm_als);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
207 return nrm;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
208 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
209
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
210 void bcf_remove_alleles(const bcf_hdr_t *header, bcf1_t *line, int rm_mask)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
211 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
212 int *map = (int*) calloc(line->n_allele, sizeof(int));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
213
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
214 // create map of indexes from old to new ALT numbering and modify ALT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
215 kstring_t str = {0,0,0};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
216 kputs(line->d.allele[0], &str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
217
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
218 int nrm = 0, i,j; // i: ori alleles, j: new alleles
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
219 for (i=1, j=1; i<line->n_allele; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
220 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
221 if ( rm_mask & 1<<i )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
222 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
223 // remove this allele
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
224 line->d.allele[i] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
225 nrm++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
226 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
227 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
228 kputc(',', &str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
229 kputs(line->d.allele[i], &str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
230 map[i] = j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
231 j++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
232 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
233 if ( !nrm ) { free(map); free(str.s); return; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
234
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
235 int nR_ori = line->n_allele;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
236 int nR_new = line->n_allele-nrm;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
237 assert(nR_new > 0); // should not be able to remove reference allele
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
238 int nA_ori = nR_ori-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
239 int nA_new = nR_new-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
240
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
241 int nG_ori = nR_ori*(nR_ori + 1)/2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
242 int nG_new = nR_new*(nR_new + 1)/2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
243
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
244 bcf_update_alleles_str(header, line, str.s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
245
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
246 // remove from Number=G, Number=R and Number=A INFO fields.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
247 uint8_t *dat = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
248 int mdat = 0, ndat = 0, mdat_bytes = 0, nret;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
249 for (i=0; i<line->n_info; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
250 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
251 bcf_info_t *info = &line->d.info[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
252 int vlen = bcf_hdr_id2length(header,BCF_HL_INFO,info->key);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
253
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
254 if ( vlen!=BCF_VL_A && vlen!=BCF_VL_G && vlen!=BCF_VL_R ) continue; // no need to change
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
255
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
256 int type = bcf_hdr_id2type(header,BCF_HL_INFO,info->key);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
257 if ( type==BCF_HT_FLAG ) continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
258 int size = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
259 if ( type==BCF_HT_REAL || type==BCF_HT_INT ) size = 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
260
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
261 mdat = mdat_bytes / size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
262 nret = bcf_get_info_values(header, line, bcf_hdr_int2id(header,BCF_DT_ID,info->key), (void**)&dat, &mdat, type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
263 mdat_bytes = mdat * size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
264 if ( nret<0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
265 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
266 fprintf(stderr,"[%s:%d %s] Could not access INFO/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
267 bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname(header,line), line->pos+1, nret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
268 exit(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
269 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
270 if ( type==BCF_HT_STR )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
271 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
272 str.l = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
273 char *ss = (char*) dat, *se = (char*) dat;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
274 if ( vlen==BCF_VL_A || vlen==BCF_VL_R )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
275 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
276 int nexp, inc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
277 if ( vlen==BCF_VL_A )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
278 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
279 nexp = nA_ori;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
280 inc = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
281 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
282 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
283 nexp = nR_ori;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
284 for (j=0; j<nexp; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
285 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
286 if ( !*se ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
287 while ( *se && *se!=',' ) se++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
288 if ( rm_mask & 1<<(j+inc) )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
289 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
290 if ( *se ) se++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
291 ss = se;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
292 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
293 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
294 if ( str.l ) kputc(',',&str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
295 kputsn(ss,se-ss,&str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
296 if ( *se ) se++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
297 ss = se;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
298 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
299 assert( j==nexp );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
300 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
301 else // Number=G, assuming diploid genotype
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
302 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
303 int k = 0, n = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
304 for (j=0; j<nR_ori; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
305 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
306 for (k=0; k<=j; k++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
307 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
308 if ( !*se ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
309 while ( *se && *se!=',' ) se++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
310 n++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
311 if ( rm_mask & 1<<j || rm_mask & 1<<k )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
312 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
313 if ( *se ) se++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
314 ss = se;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
315 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
316 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
317 if ( str.l ) kputc(',',&str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
318 kputsn(ss,se-ss,&str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
319 if ( *se ) se++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
320 ss = se;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
321 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
322 if ( !*se ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
323 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
324 assert( n=nG_ori );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
325 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
326
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
327 nret = bcf_update_info(header, line, bcf_hdr_int2id(header,BCF_DT_ID,info->key), (void*)str.s, str.l, type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
328 if ( nret<0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
329 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
330 fprintf(stderr,"[%s:%d %s] Could not update INFO/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
331 bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname(header,line), line->pos+1, nret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
332 exit(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
333 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
334 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
335 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
336
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
337 if ( vlen==BCF_VL_A || vlen==BCF_VL_R )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
338 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
339 int inc = 0, ntop;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
340 if ( vlen==BCF_VL_A )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
341 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
342 assert( nret==nA_ori );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
343 ntop = nA_ori;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
344 ndat = nA_new;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
345 inc = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
346 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
347 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
348 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
349 assert( nret==nR_ori );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
350 ntop = nR_ori;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
351 ndat = nR_new;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
352 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
353 int k = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
354
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
355 #define BRANCH(type_t,is_vector_end) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
356 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
357 type_t *ptr = (type_t*) dat; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
358 int size = sizeof(type_t); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
359 for (j=0; j<ntop; j++) /* j:ori, k:new */ \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
360 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
361 if ( is_vector_end ) { memcpy(dat+k*size, dat+j*size, size); break; } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
362 if ( rm_mask & 1<<(j+inc) ) continue; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
363 if ( j!=k ) memcpy(dat+k*size, dat+j*size, size); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
364 k++; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
365 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
366 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
367 switch (type)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
368 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
369 case BCF_HT_INT: BRANCH(int32_t,ptr[j]==bcf_int32_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
370 case BCF_HT_REAL: BRANCH(float,bcf_float_is_vector_end(ptr[j])); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
371 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
372 #undef BRANCH
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
373 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
374 else // Number=G
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
375 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
376 assert( nret==nG_ori );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
377 int k, l_ori = -1, l_new = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
378 ndat = nG_new;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
379
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
380 #define BRANCH(type_t,is_vector_end) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
381 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
382 type_t *ptr = (type_t*) dat; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
383 int size = sizeof(type_t); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
384 for (j=0; j<nR_ori; j++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
385 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
386 for (k=0; k<=j; k++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
387 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
388 l_ori++; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
389 if ( is_vector_end ) { memcpy(dat+l_new*size, dat+l_ori*size, size); break; } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
390 if ( rm_mask & 1<<j || rm_mask & 1<<k ) continue; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
391 if ( l_ori!=l_new ) memcpy(dat+l_new*size, dat+l_ori*size, size); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
392 l_new++; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
393 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
394 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
395 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
396 switch (type)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
397 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
398 case BCF_HT_INT: BRANCH(int32_t,ptr[l_ori]==bcf_int32_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
399 case BCF_HT_REAL: BRANCH(float,bcf_float_is_vector_end(ptr[l_ori])); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
400 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
401 #undef BRANCH
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
402 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
403
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
404 nret = bcf_update_info(header, line, bcf_hdr_int2id(header,BCF_DT_ID,info->key), (void*)dat, ndat, type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
405 if ( nret<0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
406 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
407 fprintf(stderr,"[%s:%d %s] Could not update INFO/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
408 bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname(header,line), line->pos+1, nret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
409 exit(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
410 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
411 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
412
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
413 // Update GT fields, the allele indexes might have changed
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
414 for (i=1; i<line->n_allele; i++) if ( map[i]!=i ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
415 if ( i<line->n_allele )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
416 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
417 mdat = mdat_bytes / 4; // sizeof(int32_t)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
418 nret = bcf_get_genotypes(header,line,(void**)&dat,&mdat);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
419 mdat_bytes = mdat * 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
420 if ( nret>0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
421 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
422 nret /= line->n_sample;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
423 int32_t *ptr = (int32_t*) dat;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
424 for (i=0; i<line->n_sample; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
425 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
426 for (j=0; j<nret; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
427 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
428 if ( bcf_gt_is_missing(ptr[j]) ) continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
429 if ( ptr[j]==bcf_int32_vector_end ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
430 int al = bcf_gt_allele(ptr[j]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
431 assert( al<nR_ori && map[al]>=0 );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
432 ptr[j] = (map[al]+1)<<1 | (ptr[j]&1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
433 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
434 ptr += nret;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
435 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
436 bcf_update_genotypes(header, line, (void*)dat, nret*line->n_sample);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
437 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
438 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
439
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
440 // Remove from Number=G, Number=R and Number=A FORMAT fields.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
441 // Assuming haploid or diploid GTs
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
442 for (i=0; i<line->n_fmt; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
443 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
444 bcf_fmt_t *fmt = &line->d.fmt[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
445 int vlen = bcf_hdr_id2length(header,BCF_HL_FMT,fmt->id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
446
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
447 if ( vlen!=BCF_VL_A && vlen!=BCF_VL_G && vlen!=BCF_VL_R ) continue; // no need to change
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
448
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
449 int type = bcf_hdr_id2type(header,BCF_HL_FMT,fmt->id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
450 if ( type==BCF_HT_FLAG ) continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
451
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
452 int size = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
453 if ( type==BCF_HT_REAL || type==BCF_HT_INT ) size = 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
454
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
455 mdat = mdat_bytes / size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
456 nret = bcf_get_format_values(header, line, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), (void**)&dat, &mdat, type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
457 mdat_bytes = mdat * size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
458 if ( nret<0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
459 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
460 fprintf(stderr,"[%s:%d %s] Could not access FORMAT/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
461 bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname(header,line), line->pos+1, nret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
462 exit(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
463 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
464
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
465 if ( type==BCF_HT_STR )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
466 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
467 int size = nret/line->n_sample; // number of bytes per sample
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
468 str.l = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
469 if ( vlen==BCF_VL_A || vlen==BCF_VL_R )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
470 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
471 int nexp, inc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
472 if ( vlen==BCF_VL_A )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
473 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
474 nexp = nA_ori;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
475 inc = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
476 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
477 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
478 nexp = nR_ori;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
479 for (j=0; j<line->n_sample; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
480 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
481 char *ss = ((char*)dat) + j*size, *se = ss + size, *ptr = ss;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
482 int k_src = 0, k_dst = 0, l = str.l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
483 for (k_src=0; k_src<nexp; k_src++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
484 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
485 if ( ptr>=se || !*ptr) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
486 while ( ptr<se && *ptr && *ptr!=',' ) ptr++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
487 if ( rm_mask & 1<<(k_src+inc) )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
488 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
489 ss = ++ptr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
490 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
491 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
492 if ( k_dst ) kputc(',',&str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
493 kputsn(ss,ptr-ss,&str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
494 ss = ++ptr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
495 k_dst++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
496 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
497 assert( k_src==nexp );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
498 l = str.l - l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
499 for (; l<size; l++) kputc(0, &str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
500 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
501 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
502 else // Number=G, diploid or haploid
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
503 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
504 for (j=0; j<line->n_sample; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
505 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
506 char *ss = ((char*)dat) + j*size, *se = ss + size, *ptr = ss;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
507 int k_src = 0, k_dst = 0, l = str.l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
508 int nexp = 0; // diploid or haploid?
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
509 while ( ptr<se )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
510 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
511 if ( !*ptr ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
512 if ( *ptr==',' ) nexp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
513 ptr++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
514 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
515 if ( ptr!=ss ) nexp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
516 assert( nexp==nG_ori || nexp==nR_ori );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
517 ptr = ss;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
518 if ( nexp==nG_ori ) // diploid
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
519 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
520 int ia, ib;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
521 for (ia=0; ia<nR_ori; ia++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
522 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
523 for (ib=0; ib<=ia; ib++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
524 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
525 if ( ptr>=se || !*ptr ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
526 while ( ptr<se && *ptr && *ptr!=',' ) ptr++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
527 if ( rm_mask & 1<<ia || rm_mask & 1<<ib )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
528 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
529 ss = ++ptr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
530 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
531 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
532 if ( k_dst ) kputc(',',&str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
533 kputsn(ss,ptr-ss,&str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
534 ss = ++ptr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
535 k_dst++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
536 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
537 if ( ptr>=se || !*ptr ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
538 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
539 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
540 else // haploid
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
541 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
542 for (k_src=0; k_src<nR_ori; k_src++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
543 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
544 if ( ptr>=se || !*ptr ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
545 while ( ptr<se && *ptr && *ptr!=',' ) ptr++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
546 if ( rm_mask & 1<<k_src )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
547 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
548 ss = ++ptr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
549 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
550 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
551 if ( k_dst ) kputc(',',&str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
552 kputsn(ss,ptr-ss,&str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
553 ss = ++ptr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
554 k_dst++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
555 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
556 assert( k_src==nR_ori );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
557 l = str.l - l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
558 for (; l<size; l++) kputc(0, &str);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
559 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
560 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
561 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
562 nret = bcf_update_format(header, line, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), (void*)str.s, str.l, type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
563 if ( nret<0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
564 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
565 fprintf(stderr,"[%s:%d %s] Could not update FORMAT/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
566 bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname(header,line), line->pos+1, nret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
567 exit(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
568 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
569 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
570 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
571
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
572 int nori = nret / line->n_sample;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
573 if ( vlen==BCF_VL_A || vlen==BCF_VL_R || (vlen==BCF_VL_G && nori==nR_ori) ) // Number=A, R or haploid Number=G
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
574 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
575 int inc = 0, nnew;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
576 if ( vlen==BCF_VL_A )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
577 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
578 assert( nori==nA_ori ); // todo: will fail if all values are missing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
579 ndat = nA_new*line->n_sample;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
580 nnew = nA_new;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
581 inc = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
582 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
583 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
584 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
585 assert( nori==nR_ori ); // todo: will fail if all values are missing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
586 ndat = nR_new*line->n_sample;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
587 nnew = nR_new;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
588 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
589
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
590 #define BRANCH(type_t,is_vector_end) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
591 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
592 for (j=0; j<line->n_sample; j++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
593 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
594 type_t *ptr_src = ((type_t*)dat) + j*nori; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
595 type_t *ptr_dst = ((type_t*)dat) + j*nnew; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
596 int size = sizeof(type_t); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
597 int k_src, k_dst = 0; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
598 for (k_src=0; k_src<nori; k_src++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
599 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
600 if ( is_vector_end ) { memcpy(ptr_dst+k_dst, ptr_src+k_src, size); break; } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
601 if ( rm_mask & 1<<(k_src+inc) ) continue; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
602 memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
603 k_dst++; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
604 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
605 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
606 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
607 switch (type)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
608 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
609 case BCF_HT_INT: BRANCH(int32_t,ptr_src[k_src]==bcf_int32_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
610 case BCF_HT_REAL: BRANCH(float,bcf_float_is_vector_end(ptr_src[k_src])); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
611 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
612 #undef BRANCH
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
613 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
614 else // Number=G, diploid or mixture of haploid+diploid
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
615 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
616 assert( nori==nG_ori );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
617 ndat = nG_new*line->n_sample;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
618
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
619 #define BRANCH(type_t,is_vector_end) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
620 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
621 for (j=0; j<line->n_sample; j++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
622 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
623 type_t *ptr_src = ((type_t*)dat) + j*nori; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
624 type_t *ptr_dst = ((type_t*)dat) + j*nG_new; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
625 int size = sizeof(type_t); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
626 int ia, ib, k_dst = 0, k_src; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
627 int nset = 0; /* haploid or diploid? */ \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
628 for (k_src=0; k_src<nG_ori; k_src++) { if ( is_vector_end ) break; nset++; } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
629 if ( nset==nR_ori ) /* haploid */ \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
630 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
631 for (k_src=0; k_src<nR_ori; k_src++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
632 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
633 if ( rm_mask & 1<<k_src ) continue; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
634 memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
635 k_dst++; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
636 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
637 memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
638 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
639 else /* diploid */ \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
640 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
641 k_src = -1; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
642 for (ia=0; ia<nR_ori; ia++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
643 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
644 for (ib=0; ib<=ia; ib++) \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
645 { \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
646 k_src++; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
647 if ( is_vector_end ) { memcpy(ptr_dst+k_dst, ptr_src+k_src, size); ia = nR_ori; break; } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
648 if ( rm_mask & 1<<ia || rm_mask & 1<<ib ) continue; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
649 memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
650 k_dst++; \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
651 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
652 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
653 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
654 } \
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
655 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
656 switch (type)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
657 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
658 case BCF_HT_INT: BRANCH(int32_t,ptr_src[k_src]==bcf_int32_vector_end); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
659 case BCF_HT_REAL: BRANCH(float,bcf_float_is_vector_end(ptr_src[k_src])); break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
660 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
661 #undef BRANCH
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
662 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
663 nret = bcf_update_format(header, line, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), (void*)dat, ndat, type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
664 if ( nret<0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
665 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
666 fprintf(stderr,"[%s:%d %s] Could not update FORMAT/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
667 bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname(header,line), line->pos+1, nret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
668 exit(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
669 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
670 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
671 free(dat);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
672 free(str.s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
673 free(map);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
674 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
675