# HG changeset patch
# User iuc
# Date 1492119671 14400
# Node ID 12f7c5315f7f21d43f6ee3d2cbd6244291244fae
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 9d03fe38504a35d11660dadb44cb1beee32fcf4e
diff -r 000000000000 -r 12f7c5315f7f README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,68 @@
+# bcftools (v1.3)
+
+Copied from branch bcftools1.2:
+
+This aims to be a "faithful" rendering of the bcftool suite. I.e. options are
+presented essentially as closely to the command line version as is useful.
+
+This may not appeal to all, if you'd like to see smaller and more dedicated
+tools (e.g. "intersect", "union" and "complement" being separate tools instead
+of all of them included in the "isec" tool,) please feel free to file an issue.
+
+Updated for bcftools v1.3
+
+This was extended from the bcftools1.2 branch then greatly hand edited to
+group params and manage param innteractions.
+
+In the macros.xml there are macros and tokens to handle file input and output.
+These use the datatypes currently available in galaxy: Vcf and Bcf
+The macros take care of bgzip and indexing of inputs.
+
+The convert command was split into 2 tools, "convert to vcf" and "convert from vcf"
+
+## TODO:
+
+- stats needs a matplotlib tool dependency and pdflatex for generating a pdf of plots
+- cnv needs a matplotlib tool dependency for generating images, then a means to consolidate those.
+- cnv needs an input.vcf for testing, runs with bcftools cnv -s "HG00101" -o 'HG00101/' -p 5 mpileup.vcf
+- roh needs a more useful input.vcf for testing
+- plugin color chrs
+- plugin frameshifts
+
+## Status
+
+The wrappers were automatically generated in bulk. That doesn't get them 100%
+of the way there (e.g. meaningful test cases), so the rest of the process is a
+bit slower.
+
+- [x] annotate
+- [x] call
+- [ ] cnv (needs real test data, needs plotting)
+- [x] concat
+- [x] consensus
+- [x] convert from vcf
+- [x] convert to vcf
+- [x] filter
+- [x] gtcheck
+- [x] isec
+- [x] merge
+- [x] norm
+- [x] query
+- [x] query list samples
+- [x] reheader
+- [x] roh
+- [x] stats (needs plotting)
+- [x] view
+- [ ] +color chrs
+- [x] +counts
+- [x] +dosage
+- [x] +fill an ac
+- [x] +fill tags
+- [x] +fixploidy
+- [ ] +frameshifts
+- [x] +impute info
+- [x] +mendelian
+- [x] +missing2ref
+- [x] +setgt
+- [x] +tag2tag
+- [x] +vcf2sex
diff -r 000000000000 -r 12f7c5315f7f bcftools_mpileup.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bcftools_mpileup.xml Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,450 @@
+
+
+ Generate VCF or BCF containing genotype likelihoods for one or multiple alignment (BAM or CRAM) files
+
+ mpileup
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '$output_file'
+#if str( $sec_filtering.read_groups.read_groups_selector ) == "paste":
+&& echo 'read-groups:'
+&& cat ${read_groups_file}
+#end if
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+--no-BAQ; BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments.
+--redo-BAQ; ignore existing BQ tags
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+output gVCF blocks of homozygous REF calls, with depth (DP) ranges specified by the list of integers. For example, passing 5,15 will group sites into two types of gVCF blocks, the first with minimum per-sample DP from the interval [5,15) and the latter with minimum depth 15 or more. In this example, sites with minimum per-sample depth less than 5 will be printed as separate records, outside of gVCF blocks.
+
+ ^(\d+(,\d+)*)?$
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 12f7c5315f7f macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,680 @@
+
+ 1.4.0
+
+
+
+
+
+
+
+
+
+
+ bcftools
+ htslib
+
+
+
+
+ samtools
+
+
+ bcftools 2>&1 | grep 'Version:'
+
+
+
+
+ 10.1093/bioinformatics/btp352
+
+
+
+ https://github.com/samtools/bcftools/wiki
+ http://samtools.github.io/bcftools/bcftools.html
+
+ --threads \${GALAXY_SLOTS:-4}
+
+
+
+
+
+
+
+
+ $input_vcf &&
+ bcftools index $input_vcf &&
+#elif $input_file.is_of_type('vcf_bgzip')
+ ln -s '$input_file' $input_vcf
+#elif $input_file.is_of_type('bcf')
+ #set $input_vcf = 'input.bcf'
+ ln -s '$input_file' $input_vcf &&
+ #if $input_file.metadata.bcf_index:
+ ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi &&
+ #else
+ bcftools index $input_vcf &&
+ #end if
+#elif $input_file.is_of_type('bcf_bgzip')
+ ln -s '$input_file' $input_vcf
+#end if
+]]>
+
+
+$input_vcf
+
+
+
+
+
+
+ $input_vcf &&
+ bcftools index $input_vcf &&
+ #elif $input_file.is_of_type('vcf_bgz')
+ ln -s '$input_file' $input_vcf
+ #elif $input_file.is_of_type('bcf')
+ #set $input_vcf = 'input' + str($i) + '.bcf.gz'
+ ln -s '$input_file' $input_vcf &&
+ #if $input_file.metadata.bcf_index:
+ ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi &&
+ #else
+ bcftools index $input_vcf &&
+ #end if
+ #elif $input_file.is_of_type('bcfvcf_bgz')
+ ln -s '$input_file' $input_vcf &&
+ #end if
+ echo '$input_vcf' >> $vcfs_list_file &&
+ $input_vcfs.append($input_vcf)
+#end for
+]]>
+
+
+#echo ' '.join($input_vcfs)#
+
+
+$vcfs_list_file
+
+
+
+
+
+
+
+
+
+#if $input_fa_ref is not None:
+ --fasta-ref $input_fa_ref
+#elif 'fasta_ref' in $section and $section.fasta_ref:
+ --fasta-ref '${section.fasta_ref}'
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#if 'AF_file' in $section and $section.AF_file:
+ --AF-file '${section.AF_file}'
+#end if
+
+
+
+
+
+
+#if 'estimate_AF' in $section and $section.estimate_AF:
+ --estimate-AF "${section.estimate_AF}"
+#end if
+
+
+
+
+
+
+ $exons_path &&
+ tabix -s 1 -b 2 -e 3 $exons_path &&
+#end if
+]]>
+
+
+#if 'exons_file' in $section and $section.exons_file:
+ --exons $exons_path
+#end if
+
+
+
+
+
+
+#if 'ploidy_file' in $section and $section.ploidy_file:
+ --ploidy "${section.ploidy_file}"
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#if $section.collapse:
+ --collapse ${section.collapse}
+#end if
+
+
+
+
+ ^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$
+
+
+
+#if $section.apply_filters:
+ --apply-filters '${section.apply_filters}'
+#end if
+
+
+
+
+
+
+
+
+
+
+#if str($output_type) != "__none__":
+ --output-type '${output_type}'
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$
+
+
+
+
+
+
+
+
+
+
+
+#if $section.regions.regions_src == 'regions' and $section.regions.regions != '':
+ --regions '$section.regions.regions'
+#elif $section.regions.regions_src == 'regions_file' and $section.regions.regions_file:
+ #if $regions_path is not None:
+ --regions-file '$regions_path'
+ #else:
+ --regions-file '$section.regions.regions_file'
+ #end if
+#end if
+
+
+
+
+
+
+
+
+ $targets_path &&
+ tabix -s 1 -b 2 -e 2 $targets_path &&
+ #end if
+#elif $tgts_sec.targets_file:
+ #set $targets_path = 'targets_file.tab.gz'
+ bgzip -c "$section.targets_file" > $targets_path &&
+ tabix -s 1 -b 2 -e 2 $targets_path &&
+#end if
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ^(\w+(,\w+)*)?$
+
+
+
+
+
+
+#set $samples_defined = False
+#if str($section.samples) != '':
+ #set $samples_defined = True
+ --samples '${section.invert_samples}${section.samples}'
+#end if
+#if $section.samples_file:
+ #set $samples_defined = True
+ --samples-file "${section.invert_samples_file}${section.samples_file}"
+#end if
+
+
+
+
+
+
+#if $section.sample:
+ --sample '${section.sample}'
+#end if
+
+
+
+
+
+ ^[^']*$
+
+
+
+
+#if $section.include:
+ --include '${section.include}'
+#end if
+
+
+
+
+ ^[^']*$
+
+
+
+
+#if $section.exclude:
+ --exclude '${section.exclude}'
+#end if
+
+
+
+
+ ^([^,]+(,[^,]+)*)?$
+
+
+
+#if $section.columns != '':
+ --columns '${section.columns}'
+#end if
+
+
+
+
+
+
+
+
+
+
+${section.vcf_ids}
+
+
+
+ BCF conversion.
+
+This Galaxy tool recommends using the compressed BCF format
+as piping is not implemented, and uncompressed data would
+use unnecessary amounts of space.
+ ]]>
+
+
+ als.tsv.gz && tabix -s1 -b2 -e2 als.tsv.gz
+ ]]>
+
+
+
+
+
+Collapse
+--------
+
+Controls how to treat records with duplicate positions and defines compatible
+records across multiple input files. Here by "compatible" we mean records which
+should be considered as identical by the tools. For example, when performing
+line intersections, the desire may be to consider as identical all sites with
+matching positions (bcftools isec -c all), or only sites with matching variant
+type (bcftools isec -c snps -c indels), or only sites with all alleles
+identical (bcftools isec -c none).
+
+
++------------+----------------------------------------------------------------+
+| Flag value | Result |
++============+================================================================+
+| none | only records with identical REF and ALT alleles are compatible |
++------------+----------------------------------------------------------------+
+| some | only records where some subset of ALT alleles match are |
+| | compatible |
++------------+----------------------------------------------------------------+
+| all | all records are compatible, regardless of whether the ALT |
+| | alleles match or not. In the case of records with the same |
+| | position, only the first wil lbe considered and appear on |
+| | output. |
++------------+----------------------------------------------------------------+
+| snps | any SNP records are compatible, regardless of whether the ALT |
+| | alleles match or not. For duplicate positions, only the first |
+| | SNP record will be considered and appear on output. |
++------------+----------------------------------------------------------------+
+| indels | all indel records are compatible, regardless of whether the |
+| | REF and ALT alleles match or not. For duplicate positions, |
+| | only the first indel record will be considered and appear on |
+| | output. |
++------------+----------------------------------------------------------------+
+| both | abbreviation of "-c indels -c snps" |
++------------+----------------------------------------------------------------+
+| id | only records with identical ID column are compatible. |
+| | Supportedby bcftools merge only. |
++------------+----------------------------------------------------------------+
+
+
+
+ , >=, <=, <, !=
+
+- regex operators "~" and its negation "!~"
+
+ ::
+
+ INFO/HAYSTACK ~ "needle"
+
+- parentheses
+
+ ::
+
+ (, )
+
+- logical operators
+
+ ::
+
+ && (same as &), ||, |
+
+- INFO tags, FORMAT tags, column names
+
+ ::
+
+ INFO/DP or DP
+ FORMAT/DV, FMT/DV, or DV
+ FILTER, QUAL, ID, REF, ALT[0]
+
+- 1 (or 0) to test the presence (or absence) of a flag
+
+ ::
+
+ FlagA=1 && FlagB=0
+
+- "." to test missing values
+
+ ::
+
+ DP=".", DP!=".", ALT="."
+
+- missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression
+
+ ::
+
+ GT="."
+
+- TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other)
+
+ ::
+
+ TYPE="indel" | TYPE="snp"
+
+- array subscripts, "*" for any field
+
+ ::
+
+ (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3
+ DP4[*] == 0
+ CSQ[*] ~ "missense_variant.*deleterious"
+
+- function on FORMAT tags (over samples) and INFO tags (over vector fields)
+
+ ::
+
+ MAX, MIN, AVG, SUM, STRLEN, ABS
+
+- variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes
+
+ ::
+
+ N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN
+
+**Notes:**
+
+- String comparisons and regular expressions are case-insensitive
+- If the subscript "*" is used in regular expression search, the whole field
+ is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be
+ true for the string vector INFO/STR=AB,CD.
+- Variables and function names are case-insensitive, but not tag names. For
+ example, "qual" can be used instead of "QUAL", "strlen()" instead of
+ "STRLEN()" , but not "dp" instead of "DP".
+
+**Examples:**
+
+ ::
+
+ MIN(DV)>5
+ MIN(DV/DP)>0.3
+ MIN(DP)>10 & MIN(DV)>3
+ FMT/DP>10 & FMT/GQ>10 .. both conditions must be satisfied within one sample
+ FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples
+ QUAL>10 | FMT/GQ>10 .. selects only GQ>10 samples
+ QUAL>10 || FMT/GQ>10 .. selects all samples at QUAL>10 sites
+ TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2)
+ MIN(DP)>35 && AVG(GQ)>50
+ ID=@file .. selects lines with ID present in the file
+ ID!=@~/file .. skip lines with ID present in the ~/file
+ MAF[0]<0.05 .. select rare variants at 5% cutoff
+ ]]>
+
diff -r 000000000000 -r 12f7c5315f7f test-data/23andme.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/23andme.fa Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,14 @@
+>1
+CACGTNACGGCTGAAGTCCAAGGTAC
+CGTATCGAGTTCACAGTCGATAGCTC
+GATCGATAGCATCGCTAGCNNNACTA
+CGATCGATCGCTCTCCGTAACACTCA
+AAAACGATCGATCGACTGCTCTTTAG
+CGATGACTTTAGGGGAAAAA
+>2
+CGCTCAGCCGTACAGCCGAGCAGGAC
+ACGCTATTTTAGATCGACTGGCTNNG
+CGCTAGCTACGCTTTAGCACGAGAA
+>Y
+NNNGCATACGTGTCCATCACGATGAT
+AGCGATGATCGATC
diff -r 000000000000 -r 12f7c5315f7f test-data/annotate.hdr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.hdr Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,4 @@
+##INFO=
+##INFO=
+##INFO=
+##INFO=
diff -r 000000000000 -r 12f7c5315f7f test-data/annotate.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.tab Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,18 @@
+3 3212016 CTT C,CT indel_3212016 . . . 1
+4 3258448 TACACACAC T indel_3258448 . . . 1
+4 4000000 T C id1 . . . 1
+4 4000001 T C,A id2 . . . 1
+2 3199812 G GTT,GT indel_3199812 . . . 1
+1 3000150 C CT indel_3000150 . . . 1
+1 3000150 C T snp_3000150 999 1,2 1e-10,2e-10 .
+1 3000151 C T snp_3000151 999 1 2e-10 .
+1 3062915 G T,C snp_3062915 999 1 2e-10 .
+1 3062915 GTTT G indel_3062915 . . . 1
+1 3106154 A C snp_3106154 999 1 2e-10 .
+1 3106154 C CT indel_3106154 . . . 1
+1 3106154 CAAA C indel_3106154 . . . 1
+1 3157410 GA G indel_3157410 . . . 1
+1 3162006 GAA G indel_3162006 . . . 1
+1 3177144 G . ref_3177144 999 1 2e-10 .
+1 3177144 G T snp_3177144 999 1 2e-10 0
+1 3184885 TAAAA TA,T indel_3184885 . . . 1
diff -r 000000000000 -r 12f7c5315f7f test-data/annotate.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.1
+##INFO=
+##FORMAT=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##FILTER=
+##contig=
+##contig=
+##contig=
+##contig=
+##test=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2;INDEL;STR=test GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=3;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . C CT 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3157410 . GA G 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA G 60.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:212:22
+1 3177144 . G T 45 PASS AN=4;AC=2 GT:GQ:DP 0/0:150:30 1/1:150:30
+1 3177144 . G . 45 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 4000000 . T A,C 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 4000001 . T A 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
diff -r 000000000000 -r 12f7c5315f7f test-data/annotate2.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate2.tab Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,2 @@
+2 3000000 3199812 region_3000000_3199812
+1 3000150 3106154 region_3000150_3106154
diff -r 000000000000 -r 12f7c5315f7f test-data/annotate2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate2.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.1
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C
+1 3000001 xx C T 11 PASS FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:xxx 0/0:11:1.1:x 0/0:11:1.1:x
+1 3000002 . C T . . . GT . . .
+1 3000003 xx C T 11 q11 FLAG;IINT=.;IFLT=.;ISTR=. GT:FINT:FFLT:FSTR 0/0:.:.:. 0/0:.:.:. 0/0:.:.:.
+1 3000004 xx C T 11 q11 FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:x 0/0:11:1.1:xxx 0/0:11:1.1:xxx
diff -r 000000000000 -r 12f7c5315f7f test-data/annotate3.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate3.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,21 @@
+##fileformat=VCFv4.1
+##FILTER=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 id C . 20 . AA=1;BB=2;X=3;Y=4 GT:X:PL:Y:AA 0/1:1:2:3:1 0/1:1:2:3:1
+1 3000001 id C . 20 PASS AA=1;BB=2;X=3;Y=4 GT:X:PL:Y:AA 0/1:1:2:3:1 0/1:1:2:3:1
+1 3000002 id C . 20 fltY;fltA;fltB;fltX BB=2;X=3;Y=4;AA=1 GT:Y:X:PL:AA 0/1:3:1:2:1 0/1:3:1:2:1
diff -r 000000000000 -r 12f7c5315f7f test-data/annotate4.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate4.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,13 @@
+##fileformat=VCFv4.2
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO
+1 1 . C T . . .
+1 2 . C T,G . . FA=.,9.9;FR=.,9.9,.;IA=.,99;IR=.,99,.;SA=.,99;SR=.,99,.
+1 3 . C A,T . . .
diff -r 000000000000 -r 12f7c5315f7f test-data/annots.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,37 @@
+##fileformat=VCFv4.1
+##INFO=
+##FORMAT=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##FILTER=
+##contig=
+##contig=
+##contig=
+##contig=
+##test=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 id1 C T 99 PASS STR=id1;AN=4;AC=0 GT:GQ 0|0:999 0|0:999
+1 3000151 id2 C T 99 PASS STR=id2;AN=4;AC=0 GT:DP:GQ 0|0:99:999 0|0:99:999
+1 3062915 idIndel GTTT G 99 PASS DP4=1,2,3,4;AN=4;AC=0;INDEL;STR=testIndel GT:GQ:DP:GL 0|0:999:99:-99,-9,-99 0|0:999:99:-99,-9,-99
+1 3062915 idSNP G T,C 99 PASS STR=testSNP;TEST=5;DP4=1,2,3,4;AN=3;AC=0,0 GT:TT:GQ:DP:GL 0|0:9,9:999:99:-99,-9,-99,-99,-9,-99 0:9,9:999:99:-99,-9,-99
+1 3106154 id4 CAAA C 99 PASS STR=id4;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3106154 id5 C CT 99 PASS STR=id5;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3157410 id6 GA GC,G 99 PASS STR=id6;AN=4;AC=0 GT:GQ:DP 0|0:99:99 0|0:99:99
+1 3162006 id7 GAA GG 99 PASS STR=id7;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3177144 id8 G T 99 PASS STR=id8;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3177144 id9 G . 99 PASS STR=id9;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3184885 id10 TAAAA TA,T 99 PASS STR=id10;AN=4;AC=0,0 GT:GQ:DP 0|0:99:99 0|0:99:99
+2 3199812 id11 G GTT,GT 99 PASS STR=id11;AN=4;AC=0,0 GT:GQ:DP 0|0:999:99 0|0:999:99
+3 3212016 id12 CTT C,CT 99 PASS STR=id12;AN=4;AC=0,0 GT:GQ:DP 0|0:99:99 0|0:99:99
+4 3258448 id13 TACACACAC T 99 PASS STR=id13;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
diff -r 000000000000 -r 12f7c5315f7f test-data/annots2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots2.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.1
+##FILTER=
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B A
+1 3000001 . C T . . . GT . .
+1 3000002 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
+1 3000003 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
+1 3000004 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
diff -r 000000000000 -r 12f7c5315f7f test-data/annots4.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots4.tab Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,3 @@
+1 1 C A,T,G 0,1.1,0 1.1,0,2.2,0 0,1,0 1,0,2,0 X,11,XXX 1,XX,222,XXX
+1 2 C T,G 1.1,2.2 1.1,2.2,3.3 1,2 1,2,3 11,2 111,22,3
+1 3 C T 1.1 1.1,2.2 1 1,2 11 11,2
diff -r 000000000000 -r 12f7c5315f7f test-data/annots4.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots4.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,13 @@
+##fileformat=VCFv4.2
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO
+1 1 . C A,T,G . . FA=0,1.1,0;FR=1.1,0,2.2,0;IA=0,1,0;IR=1,0,2,0;SA=X,11,XXX;SR=1,XX,222,XXX
+1 2 . C T,G . . FA=1.1,2.2;FR=1.1,2.2,3.3;IA=1,2;IR=1,2,3;SA=11,2;SR=111,22,3
+1 3 . C T . . FA=1.1;FR=1.1,2.2;IA=1;IR=1,2;SA=11;SR=11,2
diff -r 000000000000 -r 12f7c5315f7f test-data/check.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/check.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,38 @@
+##fileformat=VCFv4.1
+##INFO=
+##FORMAT=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##FILTER=
+##contig=
+##contig=
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=
+##INFO=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=4;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 0/2:0,1:409:35:-20,-5,-20,-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . G A 59.2 PASS AN=4;AC=1 GT:GQ:DP 0/1:245:32 0/0:245:32
+1 3157410 . G A 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . G A 60.2 PASS AN=4;AC=3 GT:GQ:DP 1/1:212:22 0/1:212:22
+1 3177144 . GT G 45 PASS AN=4;AC=2 GT:GQ:DP 0/1:150:30 0/1:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T 59.9 PASS DP=62;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258449 . GCAAA GA,G 59.9 PASS DP=62;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258450 . AAAAGAAAAAG A,AAAAAAG 59.9 PASS DP=60;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258451 . AAA AGT 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258452 . AAA AGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258453 . AACA AGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258453 . ACA AAGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258454 . AACA AACA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
diff -r 000000000000 -r 12f7c5315f7f test-data/concat.1.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.1.a.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.0
+##FILTER=
+##FILTER=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##contig=
+##contig=
+##samtoolsVersion=0.2.0-rc10+htslib-0.2.0-rc10
+##samtoolsCommand=samtools mpileup -t INFO/DPR -C50 -pm3 -F0.2 -d10000 -ug -r 1:1-1000000 -b mpileup.2014-07-03//lists/chr1-pooled.list -f human_g1k_v37.fasta
+##ALT=
+##bcftools_callVersion=0.2.0-rc10-2-gcd94fde+htslib-0.2.0-rc10
+##bcftools_callCommand=call -vm -f GQ -S mpileup.2014-07-03//pooled/1/1:1-1000000.samples -
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 100 . GTTT G 1806 q10 XX=11;DP=35 GT:GQ:DP 0/1:409:35
+1 110 . C T,G 1792 Fail DP=32 GT:GQ:DP 0/1:245:32
+1 110 . CAAA C 1792 Fail DP=32 GT:GQ:DP 0/1:245:32
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+1 130 . G T 1016 Fail DP=22 GT:GQ:DP 0/1:212:22
+1 130 . GAA GG 1016 Fail DP=22 GT:GQ:DP 0/1:212:22
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+1 150 . TAAAA TA,T 246 Fail DP=10 GT:GQ:DP 1/2:12:10
+1 160 . TAAAA TA,T 246 Fail DP=10 GT:GQ:DP 1/2:12:10
+2 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+2 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+2 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+2 130 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+2 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
diff -r 000000000000 -r 12f7c5315f7f test-data/concat.1.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.1.b.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,19 @@
+##fileformat=VCFv4.0
+##samtoolsVersion=0.2.0-rc10+htslib-0.2.0-rc10
+##samtoolsCommand=samtools mpileup -t INFO/DPR -C50 -pm3 -F0.2 -d10000 -ug -r 1:1-1000000 -b mpileup.2014-07-03//lists/chr1-pooled.list -f human_g1k_v37.fasta
+##ALT=
+##bcftools_callVersion=0.2.0-rc10-2-gcd94fde+htslib-0.2.0-rc10
+##bcftools_callCommand=call -vm -f GQ -S mpileup.2014-07-03//pooled/1/1:1-1000000.samples -
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+3 142 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+3 152 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+3 162 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+3 172 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+3 182 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+3 192 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
diff -r 000000000000 -r 12f7c5315f7f test-data/concat.2.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.2.a.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.0
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##contig=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+2 140 . A G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
+1 110 . C T,G 1792 Fail XX=11;DP=32 GT:GQ:DP 0/1:245:32
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
diff -r 000000000000 -r 12f7c5315f7f test-data/concat.2.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.2.b.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.0
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##contig=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+1 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+1 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+1 160 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+2 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+2 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+2 130 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+2 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
diff -r 000000000000 -r 12f7c5315f7f test-data/concat.3.0.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.0.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.0
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
diff -r 000000000000 -r 12f7c5315f7f test-data/concat.3.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.a.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,27 @@
+##fileformat=VCFv4.0
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##contig=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+9 202 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0|1:409:35 0|1
+9 212 . C T,G 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+9 212 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+9 222 . GA G 628 q10 DP=21 GT:GQ:DP 0|1:21:21 0|1
+9 232 . G T 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+9 232 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+9 242 . GT G 727 PASS DP=30 GT:GQ:DP 0|1:150:30 0|1
+9 252 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
+9 262 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
+1 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0|1:409:35 0|1
+1 110 . C T,G 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+1 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 0|1:21:21 0|1
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0|1:150:30 0|1
+1 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
+1 160 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
diff -r 000000000000 -r 12f7c5315f7f test-data/concat.3.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.b.vcf Thu Apr 13 17:41:11 2017 -0400
@@ -0,0 +1,223 @@
+##fileformat=VCFv4.0
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=