annotate tests/artificial.vcf.in @ 6:df3b28364cd2

allele-counts.{py,xml}: Add strand bias, documentation updates.
author nicksto <nmapsy@gmail.com>
date Wed, 09 Dec 2015 11:20:51 -0500
parents 31361191d2d2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
1 ##fileformat=VCFv4.1
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
2 ##comment="ARGS=-r 1 -f 10 -c 10"
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
3 ##comment="This is a test set of made-up sites, each created in order to test certain functionality. It's meant to be run with -f 10 -c 10"
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
4 ##fileDate=19700101
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
5 ##source=Dan
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
6 ##reference=file:///scratch/dan/galaxy/galaxy-central/database/files/002/dataset_0000.dat
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
7 ##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
8 ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
9 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
10 ##FORMAT=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
11 ##FORMAT=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
12 ##FORMAT=<ID=NC,Number=.,Type=String,Description="Nucleotide and indel counts">
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
13 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT THYROID
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
14 # General note: the only data made consistent is the CHROM, POS, REF, ALT, and the variant data (after the ':'). The other stuff isn't supposed to be consistent.
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
15 # Simplest case, but POS 0 and no minor allele
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
16 chr1 0 . A . . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,-A=15,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
17 # Simple, normal cases of A/G variants: above/below threshold x strand bias/no strand bias (2 x 2 = 4 cases)
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
18 chr1 10 . A G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+G=1,-A=15,-G=1,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
19 chr1 20 . A G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+G=1,-A=16,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
20 chr1 30 . A G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=10,+G=2,-A=11,-G=2,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
21 chr1 40 . A G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=11,+G=3,-A=11,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
22 # Same 4 cases, but with minor allele = N
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
23 chr1 50 . A N . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+N=1,-A=15,-N=1,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
24 chr1 60 . A N . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+N=1,-A=16,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
25 chr1 70 . A N . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=10,+N=2,-A=11,-N=2,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
26 chr1 80 . A N . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=11,+N=3,-A=11,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
27 # Same 4 cases, but with an additional noncanonical minor allele d1
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
28 chr1 82 . A d1 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+G=1,+d1=1,-A=15,-G=1,-d1=1,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
29 chr1 84 . A d1 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+G=1,+d1=1,-A=16,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
30 chr1 86 . A d1 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=10,+G=2,+d1=1,-A=11,-G=2,-d1=1,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
31 chr1 88 . A d1 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=11,+G=3,+d1=1,-A=11,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
32 # Same 4 cases, but with minor allele = d1 (non-canonical)
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
33 chr1 90 . A d1 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+d1=1,-A=15,-d1=1,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
34 chr1 100 . A d1 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+d1=1,-A=16,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
35 chr1 110 . A d1 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=10,+d1=2,-A=11,-d1=2,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
36 chr1 120 . A d1 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=11,+d1=3,-A=11,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
37 # Same 4 cases, but with MAJOR allele = d1
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
38 chr1 130 . d1 G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+d1=15,+G=1,-d1=15,-G=1,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
39 chr1 140 . d1 G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+d1=15,+G=1,-d1=16,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
40 chr1 150 . d1 G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+d1=10,+G=2,-d1=11,-G=2,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
41 chr1 160 . d1 G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+d1=11,+G=3,-d1=11,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
42 # Test edge cases where freq == freq_thres and/or covg == covg_thres
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
43 chr1 200 . A . . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=9,-A=9,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
44 chr1 210 . A . . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=10,-A=10,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
45 chr1 220 . A . . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=11,-A=11,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
46 chr1 230 . A G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=91,+G=9,-A=91,-G=9,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
47 chr1 240 . A G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=90,+G=10,-A=90,-G=10,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
48 chr1 250 . A G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=89,+G=11,-A=89,-G=11,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
49 # Test case where minor allele is above threshold on only one strand because of different coverage. Also, a long decimal minor allele frequency.
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
50 chr1 260 . A G . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=13,+G=7,-A=93,-G=7,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
51 # Test case where minor alleles have equal frequency: Above/below threshold, +/- strand bias
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
52 chr1 300 . T G,A . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=1,+G=1,+T=38,-A=1,-G=1,-T=38,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
53 chr1 310 . T G,A . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=6,+G=6,+T=38,-A=6,-G=6,-T=38,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
54 chr1 320 . T G,A . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=6,+G=6,+T=38,-A=6,-G=6,-T=18,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
55 chr1 330 . T G,A . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=6,+G=6,+T=38,-A=1,-G=1,-T=28,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
56 chr1 340 . T G,A . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=1,+T=80,-G=1,-T=18,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
57 chr1 350 . T G,A . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=11,+T=60,-G=11,-T=18,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
58 # Case where + and - variants are interleaved with each other. Also, a long decimal result for the minor allele frequency.
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
59 chr1 400 . A N . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=16,-A=16,+G=4,-G=4,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
60 # Test complex data in the ALT, INFO, and sample (before the ':') columns
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
61 chr1 410 . T G,A . . AC=1,1;AF=0.0111111111111,0.0111111111111 GT:AC:AF:NC 0/0:1,1:0.0111111111111,0.0111111111111:+A=1,+T=81,-T=16,-G=2,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
62 chr1 420 . A . . . AC=;AF= GT:AC:AF:NC 0/0:::+A=82,-A=22,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
63 # Test some other types of noncanonical variants (tie for 2nd and not)
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
64 chr1 430 . A N,GAA,d2 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+N=1,+d2=1,-A=15,-GAA=2
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
65 chr1 440 . A N,GAA,d2 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+N=1,+d2=1,-A=15,-GAA=1
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
66 # No canonical variants present
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
67 chr1 450 . A d1 . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+d1=15,-d1=20,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
68 chr1 460 . A d1,N . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+d1=15,+N=2,-d1=20,-N=2,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
69 # Catch some divide by zero errors
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
70 chr1 470 . A . . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=0,
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
71 # Test an unusual CHROM value and a long POS value
31361191d2d2 Uploaded tarball.
nick
parents:
diff changeset
72 27 1234567890 . A N . . AC=;AF= GT:AC:AF:NC 0/0:1:1:+A=15,+N=1,-A=14,