annotate ezBAMQC/src/htslib/test/test-vcf-sweep.c @ 20:9de3bbec2479 draft default tip

Uploaded
author youngkim
date Thu, 31 Mar 2016 10:10:37 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /* test/test-vcf-sweep.c -- VCF test harness.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Copyright (C) 2013 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Author: Petr Danecek <pd3@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7 Permission is hereby granted, free of charge, to any person obtaining a copy
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 of this software and associated documentation files (the "Software"), to deal
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 in the Software without restriction, including without limitation the rights
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 copies of the Software, and to permit persons to whom the Software is
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 furnished to do so, subject to the following conditions:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14 The above copyright notice and this permission notice shall be included in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 all copies or substantial portions of the Software.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 DEALINGS IN THE SOFTWARE. */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 #include <stdio.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 #include <htslib/vcf_sweep.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 int main(int argc, char **argv)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30 if ( argc!=2 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 fprintf(stderr,"Usage: test-vcf-sweep <file.bcf|file.vcf>\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 return 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 // Init variables. The checksum is just for this test program to output
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37 // something and verify that all sites are read in both passes - fwd and
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 // bwd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39 bcf_sweep_t *sw = bcf_sweep_init(argv[1]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 bcf_hdr_t *hdr = bcf_sweep_hdr(sw);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41 int chksum = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 // First we must sweep forward and read the whole file to build an index.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 // If this is undesirable, we can require the presence of a .gzi index
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 // which can be created with `bgzip -r` from the samtools/htslib package
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 bcf1_t *rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 while ( (rec = bcf_sweep_fwd(sw)) ) chksum += rec->pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 printf("fwd position chksum: %d\n", chksum);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 // Now sweep backward.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 chksum = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 while ( (rec = bcf_sweep_bwd(sw)) ) chksum += rec->pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 printf("bwd position chksum: %d\n", chksum);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 // And forward and backward again, this time summing the PL vectors
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 int i,j, mPLs = 0, nPLs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57 int32_t *PLs = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 chksum = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 while ( (rec = bcf_sweep_fwd(sw)) )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 // get copy of the PL vectors
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62 nPLs = bcf_get_format_int32(hdr, rec, "PL", &PLs, &mPLs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 if ( !nPLs ) continue; // PL not present
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65 // how many values are there per sample
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 int nvals = nPLs / bcf_hdr_nsamples(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 int32_t *ptr = PLs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 for (i=0; i<bcf_hdr_nsamples(hdr); i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 for (j=0; j<nvals; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 // check for shorter vectors (haploid genotypes amongst diploids)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 if ( ptr[j]==bcf_int32_vector_end ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 // skip missing values
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 if ( ptr[j]==bcf_int32_missing ) continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79 chksum += ptr[j];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 ptr += nvals;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84 printf("fwd PL chksum: %d\n", chksum);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86 // And the same backwards..
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 chksum = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88 while ( (rec = bcf_sweep_bwd(sw)) )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 nPLs = bcf_get_format_int32(hdr, rec, "PL", &PLs, &mPLs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91 if ( !nPLs ) continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92 int nvals = nPLs / bcf_hdr_nsamples(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93 int32_t *ptr = PLs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 for (i=0; i<bcf_hdr_nsamples(hdr); i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96 for (j=0; j<nvals; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98 if ( ptr[j]==bcf_int32_vector_end ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 if ( ptr[j]==bcf_int32_missing ) continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100 chksum += ptr[j];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 ptr += nvals;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 printf("bwd PL chksum: %d\n", chksum);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 // Clean up
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108 bcf_sweep_destroy(sw);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112