view variant_eval.xml @ 3:2553f84b8174 draft

Uploaded
author iuc
date Wed, 19 Feb 2014 04:39:38 -0500
parents 8bcc13094767
children f244b8209eb8
line wrap: on
line source

<tool id="gatk2_variant_eval" name="Eval Variants" version="0.0.7">
  <description></description>
  <expand macro="requirements" />
  <macros>
    <import>gatk2_macros.xml</import>
  </macros>
  <command interpreter="python">
   #from binascii import hexlify

    gatk2_wrapper.py
   --stdout "${output_log}"
    #for $var_count, $variant in enumerate( $reference_source.variants ):
        -d "--eval:input_${var_count},%(file_type)s" "${variant.input_variant}" "${variant.input_variant.ext}" "input_variants_${var_count}"
    #end for
    -p '
    @JAR_PATH@
    -T "VariantEval"
    --out "${output_report}"
    \$GATK2_SITE_OPTIONS

    @THREADS@

    #if $reference_source.reference_source_selector != "history":
        -R "${reference_source.ref_file.fields.path}"
    #end if
   '
   
    #for $rod_binding in $comp_rod_bind:
        -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}"
        #if str( $rod_binding.comp_known_names ):
            -p '--known_names "${rod_binding.comp_rod_name}"'
        #end if
    #end for
    
    #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp'
        -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
        #if $dbsnp_rod_bind_type.dbsnp_known_names
            -p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"'
        #end if
    #end if
   
    #include source=$standard_gatk_options#
    
    
    ##start analysis specific options
    #if $analysis_param_type.analysis_param_type_selector == "advanced":
        #for $stratification in $analysis_param_type.stratifications:
            #set $select_string = "--select_exps '%s' --select_names '%s'" % ( str( $stratification.select_exps ), str( $stratification.select_name )  )
            -o '${ hexlify( $select_string ) }'
        #end for
        -p '
        
        #for $sample in $analysis_param_type.samples:
            --sample "${sample.sample}"
        #end for
        
        #if str( $analysis_param_type.stratification_modules ) != "None":
            #for $stratification_module in str( $analysis_param_type.stratification_modules).split( ',' ):
                --stratificationModule "${stratification_module}"
            #end for
        #end if
        
        ${analysis_param_type.do_not_use_all_standard_stratifications}
        
        #for $variant_type in $analysis_param_type.only_variants_of_type:
            --onlyVariantsOfType "${variant_type.variant_type}"
        #end for
        
        #if str( $analysis_param_type.eval_modules ) != "None":
            #for $eval_module in str( $analysis_param_type.eval_modules).split( ',' ):
                --evalModule "${eval_module}"
            #end for
        #end if
        
        ${analysis_param_type.do_not_use_all_standard_modules}
        
        #if str( $analysis_param_type.num_samples ) != "0":
            --numSamples "${analysis_param_type.num_samples}"
        #end if
        
        --minPhaseQuality "${analysis_param_type.min_phase_quality}"
        
        #if str( $analysis_param_type.family ):
            --family_structure "${analysis_param_type.family}"
        #end if
        
        --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}"
        
        #if str( $analysis_param_type.ancestral_alignments ) != "None":
            --ancestralAlignments "${analysis_param_type.ancestral_alignments}"
        #end if
        '
        #if str( $analysis_param_type.known_cnvs ) != "None":
            -d "--knownCNVs" "${analysis_param_type.known_cnvs}" "${analysis_param_type.known_cnvs.ext}" "input_known_cnvs"
        #end if
        
        #if str( $analysis_param_type.strat_intervals ) != "None":
            -d "--stratIntervals" "${analysis_param_type.strat_intervals}" "${analysis_param_type.strat_intervals.ext}" "input_strat_intervals"
        #end if
    #end if
  </command>
  <inputs>
    
    <conditional name="reference_source">
      <expand macro="reference_source_selector_param" />
      <when value="cached">
        <repeat name="variants" title="Variant" min="1" help="-eval,--eval &amp;lt;eval&amp;gt;">
          <param name="input_variant" type="data" format="vcf" label="Input variant file" />
        </repeat>
        <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
          <options from_data_table="gatk2_picard_indexes">
            <!-- <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/> -->
          </options>
          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
        </param>
      </when>
      <when value="history"> <!-- FIX ME!!!! -->
        <repeat name="variants" title="Variant" min="1" help="-eval,--eval &amp;lt;eval&amp;gt;">
          <param name="input_variant" type="data" format="vcf" label="Input variant file" />
        </repeat>
        <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
      </when>
    </conditional>
    
    <repeat name="comp_rod_bind" title="Comparison Reference-Ordered Data (ROD) file" help="-comp,--comp &amp;lt;comp&amp;gt;">
      <param name="comp_input_rod" type="data" format="vcf" label="Comparison ROD file" />
      <param name="comp_rod_name" type="text" value="" label="Comparison ROD name">
          <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator>
      </param>
      <param name="comp_known_names" type="boolean" label="Use comparison ROD file as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;"/>
    </repeat>
    <conditional name="dbsnp_rod_bind_type">
      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP Reference-Ordered Data (ROD) file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
        <option value="set_dbsnp" selected="True">Set dbSNP</option>
        <option value="exclude_dbsnp">Don't set dbSNP</option>
      </param>
      <when value="exclude_dbsnp" />
      <when value="set_dbsnp">
        <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" />
        <param name="dbsnp_rod_name" type="text" value="dbsnp" label="dbsnp ROD name">
          <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator>
        </param>
        <param name="dbsnp_known_names" type="boolean" label="Use dbSNP ROD file as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;" />
      </when>
    </conditional>
    
    <expand macro="gatk_param_type_conditional" />
    
    
    <expand macro="analysis_type_conditional">
        <repeat name="stratifications" title="Stratification">
          <param name="select_exps" value="" type="text" label="Stratification Expression" help="-select,--select_exps &amp;lt;select_exps&amp;gt;">
            <sanitizer>
              <valid initial="string.printable">
               <remove value="&apos;"/>
             </valid>
              <mapping initial="none"/>
            </sanitizer>
          </param>
          <param name="select_name" value="" type="text" label="Name" help="-selectName,--select_names &amp;lt;select_names&amp;gt;"/>
        </repeat>
        
        <repeat name="samples" title="Sample" help="-sn,--sample &amp;lt;sample&amp;gt;">
          <param name="sample" value="" type="text" label="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context"/>
        </repeat>
        
        <param name="stratification_modules" type="select" multiple="True" display="checkboxes" label="Stratification modules to apply to the eval track(s)" help="-ST,--stratificationModule &amp;lt;stratificationModule&amp;gt;" >
          <!-- do these need individual options also? gatk wiki has little info -->
          <option value="AlleleFrequency" />
          <option value="AlleleCount" />
          <option value="CompRod" />
          <option value="Contig" />
          <option value="CpG" />
          <option value="Degeneracy" />
          <option value="EvalRod" />
          <option value="Filter" />
          <option value="FunctionalClass" />
          <option value="JexlExpression" />
          <option value="Sample" />
          <option value="IntervalStratification" />
        </param>
        <param name="do_not_use_all_standard_stratifications" checked="false" type="boolean" truevalue="--doNotUseAllStandardStratifications" falsevalue="" label="Do not use the standard stratification modules by default" help="-noST,--doNotUseAllStandardStratifications" />
        
        <repeat name="only_variants_of_type" title="only Variants Of Type" help="--onlyVariantsOfType">
          <param name="variant_type" type="text" value="" label="only variants of these types will be considered during the evaluation"/>
        </repeat>
        
        <param name="eval_modules" type="select" multiple="True" display="checkboxes" label="Eval modules to apply to the eval track(s)" help="-EV,--evalModule &amp;lt;evalModule&amp;gt;" >
          <!-- do these need individual options also? gatk wiki has little info -->
          <option value="ACTransitionTable" />
          <option value="AlleleFrequencyComparison" />
          <option value="AminoAcidTransition" />
          <option value="CompOverlap" />
          <option value="CountVariants" />
          <option value="GenotypeConcordance" />
          <option value="GenotypePhasingEvaluator" />
          <option value="IndelMetricsByAC" />
          <option value="IndelStatistics" />
          <option value="MendelianViolationEvaluator" />
          <option value="PrintMissingComp" />
          <option value="PrivatePermutations" />
          <option value="SimpleMetricsByAC" />
          <option value="ThetaVariantEvaluator" />
          <option value="TiTvVariantEvaluator" />
          <option value="VariantQualityScore" />
        </param>
        <param name="do_not_use_all_standard_modules" checked="false" type="boolean" truevalue="--doNotUseAllStandardModules" falsevalue="" label="Do not use the standard eval modules by default" help="-noEV,--doNotUseAllStandardModules" />
        
        <param name="num_samples" type="integer" label="Number of samples (used if no samples are available in the VCF file" value="0" help="-ns,--numSamples &amp;lt;numSamples&amp;gt;"/>
        <param name="min_phase_quality" type="float" label="Minimum phasing quality " value="10.0" help="-mpq,--minPhaseQuality &amp;lt;minPhaseQuality&amp;gt;"/>
        <param name="family" type="text" value="" label="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined" help="--family_structure"/>
        <param name="mendelian_violation_qual_threshold" type="integer" label="Minimum genotype QUAL score for each trio member required to accept a site as a violation" value="50" help="-mvq,--mendelianViolationQualThreshold &amp;lt;mendelianViolationQualThreshold&amp;gt;"/>
        <param name="ancestral_alignments" type="data" format="fasta" optional="True" label="Fasta file with ancestral alleles" help="-aa,--ancestralAlignments &amp;lt;ancestralAlignments&amp;gt;" />
        <param name="known_cnvs" type="data" format="bed,gatk_interval,picard_interval_list" optional="True" label="File containing tribble-readable features describing a known list of copy number variants" help="-knownCNVs,--knownCNVs &amp;lt;knownCNVs&amp;gt;" />
        <param name="strat_intervals" type="data" format="bed,gatk_interval,picard_interval_list" optional="True" label="File containing tribble-readable features for the IntervalStratificiation" help="-stratIntervals,--stratIntervals &amp;lt;stratIntervals&amp;gt;" />
        
    </expand>
    
    
  </inputs>
  <outputs>
    <data format="gatk_report" name="output_report" label="${tool.name} on ${on_string} (report)" />
    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
  </outputs>
  <tests>
      <test>
          <param name="reference_source_selector" value="history" />
          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
          <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
          <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
          <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
          <param name="dbsnp_rod_name" value="dbsnp" />
          <param name="dbsnp_known_names" value="True"/>
          <param name="comp_rod_bind" value="0" />
          <param name="gatk_param_type_selector" value="basic" />
          <param name="analysis_param_type_selector" value="basic" />
          <output name="output_report" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.gatk_report" />
          <output name="output_log" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.log.contains" compare="contains" />
      </test>
  </tests>
  <help>
**What it does**

General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more)

For more information on using the VariantEval module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_varianteval_VariantEval.html&gt;`_.

To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.

If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.

------

**Inputs**

GenomeAnalysisTK: VariantEval accepts variant files as input.


**Outputs**

The output is a table of variant evaluation.


Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.

-------

**Settings**::

 out                                   An output file presented to the walker. Will overwrite contents if file exists.
 list                                  List the available eval modules and exit
 select_exps                           One or more stratifications to use when evaluating the data
 select_names                          Names to use for the list of stratifications (must be a 1-to-1 mapping)
 sample                                Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context
 known_names                           Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets
 stratificationModule                  One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)
 doNotUseAllStandardStratifications    Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)
 onlyVariantsOfType                    If provided, only variants of these types will be considered during the evaluation, in 
 evalModule                            One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)
 doNotUseAllStandardModules            Do not use the standard modules by default (instead, only those that are specified with the -E option)
 numSamples                            Number of samples (used if no samples are available in the VCF file
 minPhaseQuality                       Minimum phasing quality
 family_structure                      If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined
 mendelianViolationQualThreshold       Minimum genotype QUAL score for each trio member required to accept a site as a violation
 ancestralAlignments                   Fasta file with ancestral alleles

@CITATION_SECTION@
  </help>
</tool>