0
|
1 /* test/test-vcf-sweep.c -- VCF test harness.
|
|
2
|
|
3 Copyright (C) 2013 Genome Research Ltd.
|
|
4
|
|
5 Author: Petr Danecek <pd3@sanger.ac.uk>
|
|
6
|
|
7 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8 of this software and associated documentation files (the "Software"), to deal
|
|
9 in the Software without restriction, including without limitation the rights
|
|
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11 copies of the Software, and to permit persons to whom the Software is
|
|
12 furnished to do so, subject to the following conditions:
|
|
13
|
|
14 The above copyright notice and this permission notice shall be included in
|
|
15 all copies or substantial portions of the Software.
|
|
16
|
|
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
23 DEALINGS IN THE SOFTWARE. */
|
|
24
|
|
25 #include <stdio.h>
|
|
26 #include <htslib/vcf_sweep.h>
|
|
27
|
|
28 int main(int argc, char **argv)
|
|
29 {
|
|
30 if ( argc!=2 )
|
|
31 {
|
|
32 fprintf(stderr,"Usage: test-vcf-sweep <file.bcf|file.vcf>\n");
|
|
33 return 1;
|
|
34 }
|
|
35
|
|
36 // Init variables. The checksum is just for this test program to output
|
|
37 // something and verify that all sites are read in both passes - fwd and
|
|
38 // bwd.
|
|
39 bcf_sweep_t *sw = bcf_sweep_init(argv[1]);
|
|
40 bcf_hdr_t *hdr = bcf_sweep_hdr(sw);
|
|
41 int chksum = 0;
|
|
42
|
|
43 // First we must sweep forward and read the whole file to build an index.
|
|
44 // If this is undesirable, we can require the presence of a .gzi index
|
|
45 // which can be created with `bgzip -r` from the samtools/htslib package
|
|
46 bcf1_t *rec;
|
|
47 while ( (rec = bcf_sweep_fwd(sw)) ) chksum += rec->pos+1;
|
|
48 printf("fwd position chksum: %d\n", chksum);
|
|
49
|
|
50 // Now sweep backward.
|
|
51 chksum = 0;
|
|
52 while ( (rec = bcf_sweep_bwd(sw)) ) chksum += rec->pos+1;
|
|
53 printf("bwd position chksum: %d\n", chksum);
|
|
54
|
|
55 // And forward and backward again, this time summing the PL vectors
|
|
56 int i,j, mPLs = 0, nPLs;
|
|
57 int32_t *PLs = NULL;
|
|
58 chksum = 0;
|
|
59 while ( (rec = bcf_sweep_fwd(sw)) )
|
|
60 {
|
|
61 // get copy of the PL vectors
|
|
62 nPLs = bcf_get_format_int32(hdr, rec, "PL", &PLs, &mPLs);
|
|
63 if ( !nPLs ) continue; // PL not present
|
|
64
|
|
65 // how many values are there per sample
|
|
66 int nvals = nPLs / bcf_hdr_nsamples(hdr);
|
|
67
|
|
68 int32_t *ptr = PLs;
|
|
69 for (i=0; i<bcf_hdr_nsamples(hdr); i++)
|
|
70 {
|
|
71 for (j=0; j<nvals; j++)
|
|
72 {
|
|
73 // check for shorter vectors (haploid genotypes amongst diploids)
|
|
74 if ( ptr[j]==bcf_int32_vector_end ) break;
|
|
75
|
|
76 // skip missing values
|
|
77 if ( ptr[j]==bcf_int32_missing ) continue;
|
|
78
|
|
79 chksum += ptr[j];
|
|
80 }
|
|
81 ptr += nvals;
|
|
82 }
|
|
83 }
|
|
84 printf("fwd PL chksum: %d\n", chksum);
|
|
85
|
|
86 // And the same backwards..
|
|
87 chksum = 0;
|
|
88 while ( (rec = bcf_sweep_bwd(sw)) )
|
|
89 {
|
|
90 nPLs = bcf_get_format_int32(hdr, rec, "PL", &PLs, &mPLs);
|
|
91 if ( !nPLs ) continue;
|
|
92 int nvals = nPLs / bcf_hdr_nsamples(hdr);
|
|
93 int32_t *ptr = PLs;
|
|
94 for (i=0; i<bcf_hdr_nsamples(hdr); i++)
|
|
95 {
|
|
96 for (j=0; j<nvals; j++)
|
|
97 {
|
|
98 if ( ptr[j]==bcf_int32_vector_end ) break;
|
|
99 if ( ptr[j]==bcf_int32_missing ) continue;
|
|
100 chksum += ptr[j];
|
|
101 }
|
|
102 ptr += nvals;
|
|
103 }
|
|
104 }
|
|
105 printf("bwd PL chksum: %d\n", chksum);
|
|
106
|
|
107 // Clean up
|
|
108 bcf_sweep_destroy(sw);
|
|
109 return 0;
|
|
110 }
|
|
111
|
|
112
|