Mercurial > repos > devteam > variant_combine
view variant_combine.xml @ 0:1a6e16391727 draft default tip
Imported from capsule None
author | devteam |
---|---|
date | Tue, 01 Apr 2014 10:50:25 -0400 |
parents | |
children |
line wrap: on
line source
<tool id="gatk_variant_combine" name="Combine Variants" version="0.0.4"> <description></description> <requirements> <requirement type="package" version="1.4">gatk</requirement> </requirements> <macros> <import>gatk_macros.xml</import> </macros> <command interpreter="python">gatk_wrapper.py --max_jvm_heap_fraction "1" --stdout "${output_log}" #set $priority_order = [] #for $input_variant in $reference_source.input_variants: -d "--variant:${input_variant.input_variant_name},%(file_type)s" "${input_variant.input_variant}" "${input_variant.input_variant.ext}" "input_variant_${input_variant.input_variant_name}" #set $input_variant_name = str( $input_variant.input_variant_name ) #assert $input_variant_name not in $priority_order, "Variant Names must be unique" ##this should be handled by a validator #silent $priority_order.append( $input_variant_name ) #end for -p 'java -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar" -T "CombineVariants" --out "${output_variants}" ##--num_threads 4 ##hard coded, for now -et "NO_ET" ##ET no phone home ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}" #end if --genotypemergeoption "${genotype_merge_option}" --rod_priority_list "${ ','.join( $priority_order ) }" ' #include source=$standard_gatk_options# ##start analysis specific options #if $analysis_param_type.analysis_param_type_selector == "advanced": -p ' --filteredrecordsmergetype "${analysis_param_type.filtered_records_merge_type}" ${analysis_param_type.print_complex_merges} ${analysis_param_type.filtered_are_uncalled} ${analysis_param_type.minimal_vcf} ${analysis_param_type.assume_identical_samples} #if str( $analysis_param_type.set_key ): --setKey "${analysis_param_type.set_key}" #end if --minimumN "${analysis_param_type.minimum_n}" ' #end if </command> <inputs> <conditional name="reference_source"> <expand macro="reference_source_selector_param" /> <when value="cached"> <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here (-V,--variant &lt;variant&gt;)"> <param name="input_variant" type="data" format="vcf" label="Input variant file" /> <param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique"> <validator type="length" min="1" message="You must provide a unique name for this set of variants" /> </param> </repeat> <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &lt;reference_sequence&gt;"> <options from_data_table="gatk_picard_indexes"> <!-- <filter type="data_meta" key="dbkey" ref="input_variants.input_variant" column="dbkey"/> --> </options> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </param> </when> <when value="history"> <!-- FIX ME!!!! --> <repeat min="1" name="input_variants" title="Variants to Merge" help="Records will be prioritized in the order that you list them here (-V,--variant &lt;variant&gt;)"> <param name="input_variant" type="data" format="vcf" label="Input variant file" /> <param name="input_variant_name" type="text" value="" label="Variant name" help="Names must be unique"> <validator type="length" min="1" message="You must provide a unique name for this set of variants" /> </param> </repeat> <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;" /> </when> </conditional> <param name="genotype_merge_option" type="select" label="How should we merge genotype records across records for samples shared across the ROD files" help="-genotypeMergeOptions,--genotypemergeoption &lt;genotypemergeoption&gt;" > <option value="UNIQUIFY" /> <option value="PRIORITIZE" selected="true"/> <option value="UNSORTED" /> <option value="REQUIRE_UNIQUE" /> </param> <expand macro="gatk_param_type_conditional" /> <expand macro="analysis_type_conditional"> <param name="filtered_records_merge_type" type="select" label="How should we deal with records seen at the same site in the VCF, but with different FILTER fields?" help="-filteredRecordsMergeType,--filteredrecordsmergetype &lt;filteredrecordsmergetype&gt;" > <option value="KEEP_IF_ANY_UNFILTERED" selected="true"/> <option value="KEEP_IF_ALL_UNFILTERED" /> </param> <param name="print_complex_merges" checked="false" type="boolean" truevalue="--printComplexMerges" falsevalue="" label="Print out interesting sites requiring complex compatibility merging" help="-printComplexMerges,--printComplexMerges" /> <param name="filtered_are_uncalled" checked="false" type="boolean" truevalue="--filteredAreUncalled" falsevalue="" label="If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF" help="-filteredAreUncalled,--filteredAreUncalled" /> <param name="minimal_vcf" checked="false" type="boolean" truevalue="--minimalVCF" falsevalue="" label="If true, then the output VCF will contain no INFO or genotype INFO field" help="-minimalVCF,--minimalVCF" /> <param name="set_key" type="text" value="" label="Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from." help="-setKey,--setKey &lt;setKey&gt;"/> <param name="assume_identical_samples" checked="false" type="boolean" truevalue="--assumeIdenticalSamples" falsevalue="" label="If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime." help="-assumeIdenticalSamples,--assumeIdenticalSamples" /> <param name="minimum_n" type="integer" value="1" label="Combine variants and output site only if variant is present in at least N input files." help="-minN,--minimumN &lt;minimumN&gt;"/> </expand> </inputs> <outputs> <data format="vcf" name="output_variants" label="${tool.name} on ${on_string} (variants)" /> <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> </outputs> <tests> <test> <param name="reference_source_selector" value="history" /> <param name="ref_file" value="phiX.fasta" ftype="fasta" /> <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" /> <param name="input_variant_name" value="from_variant_annotator" /> <param name="genotype_merge_option" value="PRIORITIZE" /> <param name="gatk_param_type_selector" value="basic" /> <param name="analysis_param_type_selector" value="basic" /> <output name="output_variants" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.vcf" lines_diff="4" /> <output name="output_log" file="gatk/gatk_variant_combine/gatk_variant_combine_out_1.log.contains" compare="contains" /> </test> </tests> <help> **What it does** Combines VCF records from different sources; supports both full merges and set unions. Merge: combines multiple records into a single one; if sample names overlap then they are uniquified. Union: assumes each rod represents the same set of samples (although this is not enforced); using the priority list (if provided), emits a single record instance at every position represented in the rods. For more information on using the CombineVariants module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/CombineVariants>`_. To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. ------ **Inputs** GenomeAnalysisTK: CombineVariants accepts variant files as input. ------ **Outputs** The output is a combined vcf file. Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. ------- **Settings**:: out File to which variants should be written genotypemergeoption How should we merge genotype records for samples shared across the ROD files? (UNIQUIFY|PRIORITIZE|UNSORTED|REQUIRE_UNIQUE) filteredrecordsmergetype How should we deal with records seen at the same site in the VCF, but with different FILTER fields? KEEP_IF_ANY_UNFILTERED PASSes the record if any record is unfiltered, KEEP_IF_ALL_UNFILTERED requires all records to be unfiltered (KEEP_IF_ANY_UNFILTERED|KEEP_IF_ALL_UNFILTERED) rod_priority_list When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided printComplexMerges Print out interesting sites requiring complex compatibility merging filteredAreUncalled If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF minimalVCF If true, then the output VCF will contain no INFO or genotype INFO field setKey Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from. Set to null if you don't want the set field emitted. assumeIdenticalSamples If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime. minimumN Combine variants and output site only if variant is present in at least N input files. @CITATION_SECTION@ </help> </tool>