comparison variant_combine.xml @ 0:1a6e16391727 draft default tip

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:50:25 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1a6e16391727
1 <tool id="gatk_variant_combine" name="Combine Variants" version="0.0.4">
2 <description></description>
3 <requirements>
4 <requirement type="package" version="1.4">gatk</requirement>
5 </requirements>
6 <macros>
7 <import>gatk_macros.xml</import>
8 </macros>
9 <command interpreter="python">gatk_wrapper.py
10 --max_jvm_heap_fraction "1"
11 --stdout "${output_log}"
12
13 #set $priority_order = []
14 #for $input_variant in $reference_source.input_variants:
15 -d "--variant:${input_variant.input_variant_name},%(file_type)s" "${input_variant.input_variant}" "${input_variant.input_variant.ext}" "input_variant_${input_variant.input_variant_name}"
16 #set $input_variant_name = str( $input_variant.input_variant_name )
17 #assert $input_variant_name not in $priority_order, "Variant Names must be unique" ##this should be handled by a validator
18 #silent $priority_order.append( $input_variant_name )
19 #end for
20 -p 'java
21 -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar"
22 -T "CombineVariants"
23 --out "${output_variants}"
24 ##--num_threads 4 ##hard coded, for now
25 -et "NO_ET" ##ET no phone home
26 ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
27 #if $reference_source.reference_source_selector != "history":
28 -R "${reference_source.ref_file.fields.path}"
29 #end if
30 --genotypemergeoption "${genotype_merge_option}"
31 --rod_priority_list "${ ','.join( $priority_order ) }"
32 '
33
34 #include source=$standard_gatk_options#
35
36
37 ##start analysis specific options
38 #if $analysis_param_type.analysis_param_type_selector == "advanced":
39 -p '
40 --filteredrecordsmergetype "${analysis_param_type.filtered_records_merge_type}"
41 ${analysis_param_type.print_complex_merges}
42 ${analysis_param_type.filtered_are_uncalled}
43 ${analysis_param_type.minimal_vcf}
44 ${analysis_param_type.assume_identical_samples}
45
46 #if str( $analysis_param_type.set_key ):
47 --setKey "${analysis_param_type.set_key}"
48 #end if
49
50 --minimumN "${analysis_param_type.minimum_n}"
51 '
52 #end if
53 </command>
54 <inputs>
55
56 <conditional name="reference_source">
57 <expand macro="reference_source_selector_param" />
58 <when value="cached">
59 <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here (-V,--variant &amp;lt;variant&amp;gt;)">
60 <param name="input_variant" type="data" format="vcf" label="Input variant file" />
61 <param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique">
62 <validator type="length" min="1" message="You must provide a unique name for this set of variants" />
63 </param>
64 </repeat>
65 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
66 <options from_data_table="gatk_picard_indexes">
67 <!-- <filter type="data_meta" key="dbkey" ref="input_variants.input_variant" column="dbkey"/> -->
68 </options>
69 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
70 </param>
71 </when>
72 <when value="history"> <!-- FIX ME!!!! -->
73 <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here (-V,--variant &amp;lt;variant&amp;gt;)">
74 <param name="input_variant" type="data" format="vcf" label="Input variant file" />
75 <param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique">
76 <validator type="length" min="1" message="You must provide a unique name for this set of variants" />
77 </param>
78 </repeat>
79 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
80 </when>
81 </conditional>
82
83 <param name="genotype_merge_option" type="select" label="How should we merge genotype records across records for samples shared across the ROD files" help="-genotypeMergeOptions,--genotypemergeoption &amp;lt;genotypemergeoption&amp;gt;" >
84 <option value="UNIQUIFY" />
85 <option value="PRIORITIZE" selected="true"/>
86 <option value="UNSORTED" />
87 <option value="REQUIRE_UNIQUE" />
88 </param>
89
90 <expand macro="gatk_param_type_conditional" />
91
92
93 <expand macro="analysis_type_conditional">
94 <param name="filtered_records_merge_type" type="select" label="How should we deal with records seen at the same site in the VCF, but with different FILTER fields?" help="-filteredRecordsMergeType,--filteredrecordsmergetype &amp;lt;filteredrecordsmergetype&amp;gt;" >
95 <option value="KEEP_IF_ANY_UNFILTERED" selected="true"/>
96 <option value="KEEP_IF_ALL_UNFILTERED" />
97 </param>
98
99 <param name="print_complex_merges" checked="false" type="boolean" truevalue="--printComplexMerges" falsevalue="" label="Print out interesting sites requiring complex compatibility merging" help="-printComplexMerges,--printComplexMerges" />
100 <param name="filtered_are_uncalled" checked="false" type="boolean" truevalue="--filteredAreUncalled" falsevalue="" label="If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF" help="-filteredAreUncalled,--filteredAreUncalled" />
101 <param name="minimal_vcf" checked="false" type="boolean" truevalue="--minimalVCF" falsevalue="" label="If true, then the output VCF will contain no INFO or genotype INFO field" help="-minimalVCF,--minimalVCF" />
102
103 <param name="set_key" type="text" value="" label="Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from." help="-setKey,--setKey &amp;lt;setKey&amp;gt;"/>
104 <param name="assume_identical_samples" checked="false" type="boolean" truevalue="--assumeIdenticalSamples" falsevalue="" label="If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime." help="-assumeIdenticalSamples,--assumeIdenticalSamples" />
105 <param name="minimum_n" type="integer" value="1" label="Combine variants and output site only if variant is present in at least N input files." help="-minN,--minimumN &amp;lt;minimumN&amp;gt;"/>
106
107 </expand>
108
109
110 </inputs>
111 <outputs>
112 <data format="vcf" name="output_variants" label="${tool.name} on ${on_string} (variants)" />
113 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
114 </outputs>
115 <tests>
116 <test>
117 <param name="reference_source_selector" value="history" />
118 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
119 <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
120 <param name="input_variant_name" value="from_variant_annotator" />
121 <param name="genotype_merge_option" value="PRIORITIZE" />
122 <param name="gatk_param_type_selector" value="basic" />
123 <param name="analysis_param_type_selector" value="basic" />
124 <output name="output_variants" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.vcf" lines_diff="4" />
125 <output name="output_log" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.log.contains" compare="contains" />
126 </test>
127 </tests>
128 <help>
129 **What it does**
130
131 Combines VCF records from different sources; supports both full merges and set unions. Merge: combines multiple records into a single one; if sample names overlap then they are uniquified. Union: assumes each rod represents the same set of samples (although this is not enforced); using the priority list (if provided), emits a single record instance at every position represented in the rods.
132
133 For more information on using the CombineVariants module, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/wiki/index.php/CombineVariants&gt;`_.
134
135 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.
136
137 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.
138
139 ------
140
141 **Inputs**
142
143 GenomeAnalysisTK: CombineVariants accepts variant files as input.
144
145 ------
146
147 **Outputs**
148
149 The output is a combined vcf file.
150
151
152 Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.
153
154 -------
155
156 **Settings**::
157
158 out File to which variants should be written
159 genotypemergeoption How should we merge genotype records for samples shared across the ROD files? (UNIQUIFY|PRIORITIZE|UNSORTED|REQUIRE_UNIQUE)
160 filteredrecordsmergetype How should we deal with records seen at the same site in the VCF, but with different FILTER fields? KEEP_IF_ANY_UNFILTERED PASSes the record if any record is unfiltered, KEEP_IF_ALL_UNFILTERED requires all records to be unfiltered (KEEP_IF_ANY_UNFILTERED|KEEP_IF_ALL_UNFILTERED)
161 rod_priority_list When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided
162 printComplexMerges Print out interesting sites requiring complex compatibility merging
163 filteredAreUncalled If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF
164 minimalVCF If true, then the output VCF will contain no INFO or genotype INFO field
165 setKey Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from. Set to null if you don't want the set field emitted.
166 assumeIdenticalSamples If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime.
167 minimumN Combine variants and output site only if variant is present in at least N input files.
168
169 @CITATION_SECTION@
170 </help>
171 </tool>