Mercurial > repos > youngkim > ezbamqc
diff ezBAMQC/src/htslib/test/test-vcf-sweep.c @ 0:dfa3745e5fd8
Uploaded
author | youngkim |
---|---|
date | Thu, 24 Mar 2016 17:12:52 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/test-vcf-sweep.c Thu Mar 24 17:12:52 2016 -0400 @@ -0,0 +1,112 @@ +/* test/test-vcf-sweep.c -- VCF test harness. + + Copyright (C) 2013 Genome Research Ltd. + + Author: Petr Danecek <pd3@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include <stdio.h> +#include <htslib/vcf_sweep.h> + +int main(int argc, char **argv) +{ + if ( argc!=2 ) + { + fprintf(stderr,"Usage: test-vcf-sweep <file.bcf|file.vcf>\n"); + return 1; + } + + // Init variables. The checksum is just for this test program to output + // something and verify that all sites are read in both passes - fwd and + // bwd. + bcf_sweep_t *sw = bcf_sweep_init(argv[1]); + bcf_hdr_t *hdr = bcf_sweep_hdr(sw); + int chksum = 0; + + // First we must sweep forward and read the whole file to build an index. + // If this is undesirable, we can require the presence of a .gzi index + // which can be created with `bgzip -r` from the samtools/htslib package + bcf1_t *rec; + while ( (rec = bcf_sweep_fwd(sw)) ) chksum += rec->pos+1; + printf("fwd position chksum: %d\n", chksum); + + // Now sweep backward. + chksum = 0; + while ( (rec = bcf_sweep_bwd(sw)) ) chksum += rec->pos+1; + printf("bwd position chksum: %d\n", chksum); + + // And forward and backward again, this time summing the PL vectors + int i,j, mPLs = 0, nPLs; + int32_t *PLs = NULL; + chksum = 0; + while ( (rec = bcf_sweep_fwd(sw)) ) + { + // get copy of the PL vectors + nPLs = bcf_get_format_int32(hdr, rec, "PL", &PLs, &mPLs); + if ( !nPLs ) continue; // PL not present + + // how many values are there per sample + int nvals = nPLs / bcf_hdr_nsamples(hdr); + + int32_t *ptr = PLs; + for (i=0; i<bcf_hdr_nsamples(hdr); i++) + { + for (j=0; j<nvals; j++) + { + // check for shorter vectors (haploid genotypes amongst diploids) + if ( ptr[j]==bcf_int32_vector_end ) break; + + // skip missing values + if ( ptr[j]==bcf_int32_missing ) continue; + + chksum += ptr[j]; + } + ptr += nvals; + } + } + printf("fwd PL chksum: %d\n", chksum); + + // And the same backwards.. + chksum = 0; + while ( (rec = bcf_sweep_bwd(sw)) ) + { + nPLs = bcf_get_format_int32(hdr, rec, "PL", &PLs, &mPLs); + if ( !nPLs ) continue; + int nvals = nPLs / bcf_hdr_nsamples(hdr); + int32_t *ptr = PLs; + for (i=0; i<bcf_hdr_nsamples(hdr); i++) + { + for (j=0; j<nvals; j++) + { + if ( ptr[j]==bcf_int32_vector_end ) break; + if ( ptr[j]==bcf_int32_missing ) continue; + chksum += ptr[j]; + } + ptr += nvals; + } + } + printf("bwd PL chksum: %d\n", chksum); + + // Clean up + bcf_sweep_destroy(sw); + return 0; +} + +