comparison variant_combine.xml @ 15:01ff8dd37d4d draft default tip

Uploaded
author lz_hust
date Sat, 01 Jun 2019 07:20:41 -0400
parents
children
comparison
equal deleted inserted replaced
14:68426930d59c 15:01ff8dd37d4d
1 <tool id="gatk2_variant_combine" name="Combine Variants" version="@VERSION@.0">
2 <description></description>
3 <macros>
4 <import>gatk2_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_command" />
8 <command interpreter="python">
9 gatk2_wrapper.py
10 --stdout "${output_log}"
11
12 #set $priority_order = []
13 #for $input_variant in $reference_source.input_variants:
14 -d "--variant:${input_variant.input_variant_name},%(file_type)s" "${input_variant.input_variant}" "${input_variant.input_variant.ext}" "input_variant_${input_variant.input_variant_name}"
15 #set $input_variant_name = str( $input_variant.input_variant_name )
16 #assert $input_variant_name not in $priority_order, "Variant Names must be unique" ##this should be handled by a validator
17 #silent $priority_order.append( $input_variant_name )
18 #end for
19 -p '
20 @JAR_PATH@
21 -T "CombineVariants"
22 --out "${output_variants}"
23 \$GATK2_SITE_OPTIONS
24
25 @THREADS@
26
27 #if $reference_source.reference_source_selector != "history":
28 -R "${reference_source.ref_file.fields.path}"
29 #end if
30 --genotypemergeoption "${genotype_merge_option}"
31 --rod_priority_list "${ ','.join( $priority_order ) }"
32 '
33
34 #include source=$standard_gatk_options#
35
36 ##start analysis specific options
37 #if $analysis_param_type.analysis_param_type_selector == "advanced":
38 -p '
39 --filteredrecordsmergetype "${analysis_param_type.filtered_records_merge_type}"
40 ${analysis_param_type.print_complex_merges}
41 ${analysis_param_type.filtered_are_uncalled}
42 ${analysis_param_type.minimal_vcf}
43 ${analysis_param_type.assume_identical_samples}
44
45 #if str( $analysis_param_type.set_key ):
46 --setKey "${analysis_param_type.set_key}"
47 #end if
48
49 --minimumN "${analysis_param_type.minimum_n}"
50 '
51 #end if
52 </command>
53 <inputs>
54
55 <conditional name="reference_source">
56 <expand macro="reference_source_selector_param" />
57 <when value="cached">
58 <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here (-V,--variant &amp;lt;variant&amp;gt;)">
59 <param name="input_variant" type="data" format="vcf" label="Input variant file" />
60 <param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique">
61 <validator type="length" min="1" message="You must provide a unique name for this set of variants" />
62 </param>
63 </repeat>
64 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
65 <options from_data_table="gatk2_picard_indexes">
66 <!-- <filter type="data_meta" key="dbkey" ref="input_variants.input_variant" column="dbkey"/> -->
67 </options>
68 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
69 </param>
70 </when>
71 <when value="history"> <!-- FIX ME!!!! -->
72 <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here (-V,--variant &amp;lt;variant&amp;gt;)">
73 <param name="input_variant" type="data" format="vcf" label="Input variant file" />
74 <param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique">
75 <validator type="length" min="1" message="You must provide a unique name for this set of variants" />
76 </param>
77 </repeat>
78 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
79 </when>
80 </conditional>
81
82 <param name="genotype_merge_option" type="select" label="How should we merge genotype records across records for samples shared across the ROD files" help="-genotypeMergeOptions,--genotypemergeoption &amp;lt;genotypemergeoption&amp;gt;" >
83 <option value="UNIQUIFY" />
84 <option value="PRIORITIZE" selected="true"/>
85 <option value="UNSORTED" />
86 <option value="REQUIRE_UNIQUE" />
87 </param>
88
89 <expand macro="gatk_param_type_conditional" />
90
91 <expand macro="analysis_type_conditional">
92 <param name="filtered_records_merge_type" type="select" label="How should we deal with records seen at the same site in the VCF, but with different FILTER fields?" help="-filteredRecordsMergeType,--filteredrecordsmergetype &amp;lt;filteredrecordsmergetype&amp;gt;" >
93 <option value="KEEP_IF_ANY_UNFILTERED" selected="true"/>
94 <option value="KEEP_IF_ALL_UNFILTERED" />
95 </param>
96
97 <param name="print_complex_merges" checked="false" type="boolean" truevalue="--printComplexMerges" falsevalue="" label="Print out interesting sites requiring complex compatibility merging" help="-printComplexMerges,--printComplexMerges" />
98 <param name="filtered_are_uncalled" checked="false" type="boolean" truevalue="--filteredAreUncalled" falsevalue="" label="If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF" help="-filteredAreUncalled,--filteredAreUncalled" />
99 <param name="minimal_vcf" checked="false" type="boolean" truevalue="--minimalVCF" falsevalue="" label="If true, then the output VCF will contain no INFO or genotype INFO field" help="-minimalVCF,--minimalVCF" />
100
101 <param name="set_key" type="text" value="" label="Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from." help="-setKey,--setKey &amp;lt;setKey&amp;gt;"/>
102 <param name="assume_identical_samples" checked="false" type="boolean" truevalue="--assumeIdenticalSamples" falsevalue="" label="If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime." help="-assumeIdenticalSamples,--assumeIdenticalSamples" />
103 <param name="minimum_n" type="integer" value="1" label="Combine variants and output site only if variant is present in at least N input files." help="-minN,--minimumN &amp;lt;minimumN&amp;gt;"/>
104 </expand>
105
106 </inputs>
107 <outputs>
108 <data format="vcf" name="output_variants" label="${tool.name} on ${on_string} (variants)" />
109 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
110 </outputs>
111 <tests>
112 <test>
113 <param name="reference_source_selector" value="history" />
114 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
115 <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
116 <param name="input_variant_name" value="from_variant_annotator" />
117 <param name="genotype_merge_option" value="PRIORITIZE" />
118 <param name="gatk_param_type_selector" value="basic" />
119 <param name="analysis_param_type_selector" value="basic" />
120 <output name="output_variants" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.vcf" lines_diff="4" />
121 <output name="output_log" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.log.contains" compare="contains" />
122 </test>
123 </tests>
124 <help>
125 **What it does**
126
127 Combines VCF records from different sources; supports both full merges and set unions. Merge: combines multiple records into a single one; if sample names overlap then they are uniquified. Union: assumes each rod represents the same set of samples (although this is not enforced); using the priority list (if provided), emits a single record instance at every position represented in the rods.
128
129 For more information on using the CombineVariants module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_variantutils_CombineVariants.html&gt;`_.
130
131 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
132
133 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
134
135 ------
136
137 **Inputs**
138
139 GenomeAnalysisTK: CombineVariants accepts variant files as input.
140
141 ------
142
143 **Outputs**
144
145 The output is a combined vcf file.
146
147
148 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
149
150 -------
151
152 **Settings**::
153
154 out File to which variants should be written
155 genotypemergeoption How should we merge genotype records for samples shared across the ROD files? (UNIQUIFY|PRIORITIZE|UNSORTED|REQUIRE_UNIQUE)
156 filteredrecordsmergetype How should we deal with records seen at the same site in the VCF, but with different FILTER fields? KEEP_IF_ANY_UNFILTERED PASSes the record if any record is unfiltered, KEEP_IF_ALL_UNFILTERED requires all records to be unfiltered (KEEP_IF_ANY_UNFILTERED|KEEP_IF_ALL_UNFILTERED)
157 rod_priority_list When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided
158 printComplexMerges Print out interesting sites requiring complex compatibility merging
159 filteredAreUncalled If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF
160 minimalVCF If true, then the output VCF will contain no INFO or genotype INFO field
161 setKey Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from. Set to null if you don't want the set field emitted.
162 assumeIdenticalSamples If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime.
163 minimumN Combine variants and output site only if variant is present in at least N input files.
164
165 @CITATION_SECTION@
166 </help>
167 <expand macro="citations" />
168 </tool>