Mercurial > repos > iuc > gatk2
view variant_eval.xml @ 3:2553f84b8174 draft
Uploaded
author | iuc |
---|---|
date | Wed, 19 Feb 2014 04:39:38 -0500 |
parents | 8bcc13094767 |
children | f244b8209eb8 |
line wrap: on
line source
<tool id="gatk2_variant_eval" name="Eval Variants" version="0.0.7"> <description></description> <expand macro="requirements" /> <macros> <import>gatk2_macros.xml</import> </macros> <command interpreter="python"> #from binascii import hexlify gatk2_wrapper.py --stdout "${output_log}" #for $var_count, $variant in enumerate( $reference_source.variants ): -d "--eval:input_${var_count},%(file_type)s" "${variant.input_variant}" "${variant.input_variant.ext}" "input_variants_${var_count}" #end for -p ' @JAR_PATH@ -T "VariantEval" --out "${output_report}" \$GATK2_SITE_OPTIONS @THREADS@ #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}" #end if ' #for $rod_binding in $comp_rod_bind: -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}" #if str( $rod_binding.comp_known_names ): -p '--known_names "${rod_binding.comp_rod_name}"' #end if #end for #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp' -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" #if $dbsnp_rod_bind_type.dbsnp_known_names -p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"' #end if #end if #include source=$standard_gatk_options# ##start analysis specific options #if $analysis_param_type.analysis_param_type_selector == "advanced": #for $stratification in $analysis_param_type.stratifications: #set $select_string = "--select_exps '%s' --select_names '%s'" % ( str( $stratification.select_exps ), str( $stratification.select_name ) ) -o '${ hexlify( $select_string ) }' #end for -p ' #for $sample in $analysis_param_type.samples: --sample "${sample.sample}" #end for #if str( $analysis_param_type.stratification_modules ) != "None": #for $stratification_module in str( $analysis_param_type.stratification_modules).split( ',' ): --stratificationModule "${stratification_module}" #end for #end if ${analysis_param_type.do_not_use_all_standard_stratifications} #for $variant_type in $analysis_param_type.only_variants_of_type: --onlyVariantsOfType "${variant_type.variant_type}" #end for #if str( $analysis_param_type.eval_modules ) != "None": #for $eval_module in str( $analysis_param_type.eval_modules).split( ',' ): --evalModule "${eval_module}" #end for #end if ${analysis_param_type.do_not_use_all_standard_modules} #if str( $analysis_param_type.num_samples ) != "0": --numSamples "${analysis_param_type.num_samples}" #end if --minPhaseQuality "${analysis_param_type.min_phase_quality}" #if str( $analysis_param_type.family ): --family_structure "${analysis_param_type.family}" #end if --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}" #if str( $analysis_param_type.ancestral_alignments ) != "None": --ancestralAlignments "${analysis_param_type.ancestral_alignments}" #end if ' #if str( $analysis_param_type.known_cnvs ) != "None": -d "--knownCNVs" "${analysis_param_type.known_cnvs}" "${analysis_param_type.known_cnvs.ext}" "input_known_cnvs" #end if #if str( $analysis_param_type.strat_intervals ) != "None": -d "--stratIntervals" "${analysis_param_type.strat_intervals}" "${analysis_param_type.strat_intervals.ext}" "input_strat_intervals" #end if #end if </command> <inputs> <conditional name="reference_source"> <expand macro="reference_source_selector_param" /> <when value="cached"> <repeat name="variants" title="Variant" min="1" help="-eval,--eval &lt;eval&gt;"> <param name="input_variant" type="data" format="vcf" label="Input variant file" /> </repeat> <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &lt;reference_sequence&gt;"> <options from_data_table="gatk2_picard_indexes"> <!-- <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/> --> </options> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </param> </when> <when value="history"> <!-- FIX ME!!!! --> <repeat name="variants" title="Variant" min="1" help="-eval,--eval &lt;eval&gt;"> <param name="input_variant" type="data" format="vcf" label="Input variant file" /> </repeat> <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;" /> </when> </conditional> <repeat name="comp_rod_bind" title="Comparison Reference-Ordered Data (ROD) file" help="-comp,--comp &lt;comp&gt;"> <param name="comp_input_rod" type="data" format="vcf" label="Comparison ROD file" /> <param name="comp_rod_name" type="text" value="" label="Comparison ROD name"> <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator> </param> <param name="comp_known_names" type="boolean" label="Use comparison ROD file as known_names" help="-knownName,--known_names &lt;known_names&gt;"/> </repeat> <conditional name="dbsnp_rod_bind_type"> <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP Reference-Ordered Data (ROD) file" help="-D,--dbsnp &lt;dbsnp&gt;"> <option value="set_dbsnp" selected="True">Set dbSNP</option> <option value="exclude_dbsnp">Don't set dbSNP</option> </param> <when value="exclude_dbsnp" /> <when value="set_dbsnp"> <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" /> <param name="dbsnp_rod_name" type="text" value="dbsnp" label="dbsnp ROD name"> <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator> </param> <param name="dbsnp_known_names" type="boolean" label="Use dbSNP ROD file as known_names" help="-knownName,--known_names &lt;known_names&gt;" /> </when> </conditional> <expand macro="gatk_param_type_conditional" /> <expand macro="analysis_type_conditional"> <repeat name="stratifications" title="Stratification"> <param name="select_exps" value="" type="text" label="Stratification Expression" help="-select,--select_exps &lt;select_exps&gt;"> <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> <mapping initial="none"/> </sanitizer> </param> <param name="select_name" value="" type="text" label="Name" help="-selectName,--select_names &lt;select_names&gt;"/> </repeat> <repeat name="samples" title="Sample" help="-sn,--sample &lt;sample&gt;"> <param name="sample" value="" type="text" label="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context"/> </repeat> <param name="stratification_modules" type="select" multiple="True" display="checkboxes" label="Stratification modules to apply to the eval track(s)" help="-ST,--stratificationModule &lt;stratificationModule&gt;" > <!-- do these need individual options also? gatk wiki has little info --> <option value="AlleleFrequency" /> <option value="AlleleCount" /> <option value="CompRod" /> <option value="Contig" /> <option value="CpG" /> <option value="Degeneracy" /> <option value="EvalRod" /> <option value="Filter" /> <option value="FunctionalClass" /> <option value="JexlExpression" /> <option value="Sample" /> <option value="IntervalStratification" /> </param> <param name="do_not_use_all_standard_stratifications" checked="false" type="boolean" truevalue="--doNotUseAllStandardStratifications" falsevalue="" label="Do not use the standard stratification modules by default" help="-noST,--doNotUseAllStandardStratifications" /> <repeat name="only_variants_of_type" title="only Variants Of Type" help="--onlyVariantsOfType"> <param name="variant_type" type="text" value="" label="only variants of these types will be considered during the evaluation"/> </repeat> <param name="eval_modules" type="select" multiple="True" display="checkboxes" label="Eval modules to apply to the eval track(s)" help="-EV,--evalModule &lt;evalModule&gt;" > <!-- do these need individual options also? gatk wiki has little info --> <option value="ACTransitionTable" /> <option value="AlleleFrequencyComparison" /> <option value="AminoAcidTransition" /> <option value="CompOverlap" /> <option value="CountVariants" /> <option value="GenotypeConcordance" /> <option value="GenotypePhasingEvaluator" /> <option value="IndelMetricsByAC" /> <option value="IndelStatistics" /> <option value="MendelianViolationEvaluator" /> <option value="PrintMissingComp" /> <option value="PrivatePermutations" /> <option value="SimpleMetricsByAC" /> <option value="ThetaVariantEvaluator" /> <option value="TiTvVariantEvaluator" /> <option value="VariantQualityScore" /> </param> <param name="do_not_use_all_standard_modules" checked="false" type="boolean" truevalue="--doNotUseAllStandardModules" falsevalue="" label="Do not use the standard eval modules by default" help="-noEV,--doNotUseAllStandardModules" /> <param name="num_samples" type="integer" label="Number of samples (used if no samples are available in the VCF file" value="0" help="-ns,--numSamples &lt;numSamples&gt;"/> <param name="min_phase_quality" type="float" label="Minimum phasing quality " value="10.0" help="-mpq,--minPhaseQuality &lt;minPhaseQuality&gt;"/> <param name="family" type="text" value="" label="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined" help="--family_structure"/> <param name="mendelian_violation_qual_threshold" type="integer" label="Minimum genotype QUAL score for each trio member required to accept a site as a violation" value="50" help="-mvq,--mendelianViolationQualThreshold &lt;mendelianViolationQualThreshold&gt;"/> <param name="ancestral_alignments" type="data" format="fasta" optional="True" label="Fasta file with ancestral alleles" help="-aa,--ancestralAlignments &lt;ancestralAlignments&gt;" /> <param name="known_cnvs" type="data" format="bed,gatk_interval,picard_interval_list" optional="True" label="File containing tribble-readable features describing a known list of copy number variants" help="-knownCNVs,--knownCNVs &lt;knownCNVs&gt;" /> <param name="strat_intervals" type="data" format="bed,gatk_interval,picard_interval_list" optional="True" label="File containing tribble-readable features for the IntervalStratificiation" help="-stratIntervals,--stratIntervals &lt;stratIntervals&gt;" /> </expand> </inputs> <outputs> <data format="gatk_report" name="output_report" label="${tool.name} on ${on_string} (report)" /> <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> </outputs> <tests> <test> <param name="reference_source_selector" value="history" /> <param name="ref_file" value="phiX.fasta" ftype="fasta" /> <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" /> <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" /> <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> <param name="dbsnp_rod_name" value="dbsnp" /> <param name="dbsnp_known_names" value="True"/> <param name="comp_rod_bind" value="0" /> <param name="gatk_param_type_selector" value="basic" /> <param name="analysis_param_type_selector" value="basic" /> <output name="output_report" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.gatk_report" /> <output name="output_log" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.log.contains" compare="contains" /> </test> </tests> <help> **What it does** General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more) For more information on using the VariantEval module, see this `tool specific page <http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_varianteval_VariantEval.html>`_. To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gatk/guide/topic?name=best-practices>`_. If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gatk/guide/topic?name=faqs>`_. ------ **Inputs** GenomeAnalysisTK: VariantEval accepts variant files as input. **Outputs** The output is a table of variant evaluation. Go `here <http://www.broadinstitute.org/gatk/guide/topic?name=intro>`_ for details on GATK file formats. ------- **Settings**:: out An output file presented to the walker. Will overwrite contents if file exists. list List the available eval modules and exit select_exps One or more stratifications to use when evaluating the data select_names Names to use for the list of stratifications (must be a 1-to-1 mapping) sample Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context known_names Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets stratificationModule One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified) doNotUseAllStandardStratifications Do not use the standard stratification modules by default (instead, only those that are specified with the -S option) onlyVariantsOfType If provided, only variants of these types will be considered during the evaluation, in evalModule One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified) doNotUseAllStandardModules Do not use the standard modules by default (instead, only those that are specified with the -E option) numSamples Number of samples (used if no samples are available in the VCF file minPhaseQuality Minimum phasing quality family_structure If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined mendelianViolationQualThreshold Minimum genotype QUAL score for each trio member required to accept a site as a violation ancestralAlignments Fasta file with ancestral alleles @CITATION_SECTION@ </help> </tool>