Mercurial > repos > iuc > gatk2
changeset 2:8bcc13094767 draft
Uploaded
author | iuc |
---|---|
date | Sat, 18 Jan 2014 07:21:33 -0500 |
parents | f760c0de8e3a |
children | 2553f84b8174 |
files | base_recalibrator.xml gatk2_macros.xml gatk2_picard_index.loc.sample haplotype_caller.xml indel_realigner.xml print_reads.xml readme.rst reduce_reads.xml unified_genotyper.xml variant_annotator.xml variant_eval.xml variant_filtration.xml variant_validate.xml |
diffstat | 13 files changed, 119 insertions(+), 213 deletions(-) [+] |
line wrap: on
line diff
--- a/base_recalibrator.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/base_recalibrator.xml Sat Jan 18 07:21:33 2014 -0500 @@ -17,9 +17,8 @@ \$GATK2_SITE_OPTIONS ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975 - --num_cpu_threads_per_data_thread 8 + --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-8} - @THREADS@ ## we set non standards at every run and the user can choose which ones are preferred ## in our select box both standard options (ContextCovariate, CycleCovariate) are selected by default --no_standard_covs @@ -111,12 +110,7 @@ </param> </when> </conditional> - <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="-BQSR,--BQSR &lt;recal_file&gt;" > - <help>The input covariates table file which enables on-the-fly base quality score recalibration. - Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. - Please be aware that one should only run recalibration with the covariates file created on the same input bam(s). - </help> - </param> + <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &lt;recal_file&gt;)" /> <param name="covariates" type="select" multiple="True" display="checkboxes" label="Covariates to be used in the recalibration" help="-cov,--covariate &lt;covariate&gt;" > <!-- might we want to load the available covariates from an external configuration file, since additional ones can be added to local installs? -->
--- a/gatk2_macros.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/gatk2_macros.xml Sat Jan 18 07:21:33 2014 -0500 @@ -1,17 +1,23 @@ <macros> - <xml name="requirements"> - <requirements> - <requirement type="package" version="0.1.19">samtools</requirement> - <requirement type="set_environment">GATK2_PATH</requirement> - <requirement type="set_environment">GATK2_SITE_OPTIONS</requirement> - </requirements> - </xml> - <token name="@THREADS@"> - --num_threads \${GALAXY_SLOTS:-4} - </token> - <token name="@JAR_PATH@"> - java -jar "\$GATK2_PATH/GenomeAnalysisTK.jar" - </token> + <xml name="requirements"> + <requirements> + <requirement type="package">gatk2</requirement> + <requirement type="package" version="0.1.19">samtools</requirement> + <requirement type="set_environment">GATK2_PATH</requirement> + <requirement type="set_environment">GATK2_SITE_OPTIONS</requirement> + </requirements> + </xml> + <token name="@THREADS@"> + --num_threads \${GALAXY_SLOTS:-4} + </token> + <token name="@JAR_PATH@"> + java -jar "\$GATK2_PATH/GenomeAnalysisTK.jar" + </token> + <token name="@DBSNP_OPTIONS@"> + #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp' + -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" + #end if + </token> <template name="standard_gatk_options"> ##start standard gatk options #if $gatk_param_type.gatk_param_type_selector == "advanced": @@ -311,6 +317,21 @@ <option value="history">History</option> </param> </xml> + <xml name="dbsnp_param"> + <conditional name="dbsnp_rod_bind_type"> + <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP Reference-Ordered Data (ROD) file" help="-D,--dbsnp &lt;dbsnp&gt;"> + <option value="set_dbsnp" selected="True">Set dbSNP</option> + <option value="exclude_dbsnp">Don't set dbSNP</option> + </param> + <when value="exclude_dbsnp" /> + <when value="set_dbsnp"> + <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" /> + <param name="dbsnp_rod_name" type="text" value="dbsnp" label="dbsnp ROD name"> + <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator> + </param> + </when> + </conditional> + </xml> <token name="@CITATION_SECTION@">------ **Citation**
--- a/gatk2_picard_index.loc.sample Mon Dec 02 10:36:02 2013 -0500 +++ b/gatk2_picard_index.loc.sample Sat Jan 18 07:21:33 2014 -0500 @@ -24,7 +24,3 @@ #the dict file does not have the .fa extension although the #path list in the loc file does include it. # -hg18 hg18 hg18 /data/galaxy/ext-tool-data/picard/hg18.fa -hg19 hg19 hg19 /data/galaxy/ext-tool-data/picard/hg19.fa -mm8 mm8 mm8 /data/galaxy/ext-tool-data/picard/mm8.fa -mm9 mm9 mm9 /data/galaxy/ext-tool-data/picard/mm9.fa
--- a/haplotype_caller.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/haplotype_caller.xml Sat Jan 18 07:21:33 2014 -0500 @@ -7,10 +7,12 @@ <command interpreter="python"> gatk2_wrapper.py --stdout "${output_log}" - -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" - #if str( $reference_source.input_bam.metadata.bam_index ) != "None": - -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index - #end if + #for $i, $input_bam in enumerate( $reference_source.input_bams ): + -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}" + #if str( $input_bam.input_bam.metadata.bam_index ) != "None": + -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index + #end if + #end for -p ' @JAR_PATH@ -T "HaplotypeCaller" @@ -18,7 +20,7 @@ \$GATK2_SITE_OPTIONS - @THREADS@ + --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-4} #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}" @@ -27,14 +29,12 @@ --BQSR "${input_recal}" #end if ' + @DBSNP_OPTIONS@ #include source=$standard_gatk_options# ##start analysis specific options #if $analysis_param_type.analysis_param_type_selector == "advanced": -p ' - #if $analysis_param_type.p_nonref_model.__str__ != "None" and len($analysis_param_type.p_nonref_model.__str__) > 0: - --p_nonref_model $analysis_param_type.p_nonref_model - #end if #if $analysis_param_type.heterozygosity.__str__.strip() != '': --heterozygosity $analysis_param_type.heterozygosity #end if @@ -42,8 +42,8 @@ #if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES': --alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}" #end if - #if $analysis_param_type.output_mode.__str__ != "None" and len($analysis_param_type.output_mode.__str__) > 0: - --output_mode $analysis_param_type.output_mode + #if not $analysis_param_type.emitRefConfidence is None: + --emitRefConfidence $analysis_param_type.emitRefConfidence #end if ## files @@ -53,9 +53,6 @@ #if str($analysis_param_type.comp) != 'None': --comp "$analysis_param_type.comp" #end if - #if str($analysis_param_type.dbsnp) != 'None': - --dbsnp "$analysis_param_type.dbsnp" - #end if ## #if str( $analysis_param_type.annotation ) != "None": #for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( ','): @@ -80,9 +77,6 @@ #if $analysis_param_type.contamination_fraction_to_filter.__str__.strip() != '': --contamination_fraction_to_filter $analysis_param_type.contamination_fraction_to_filter #end if - #if $analysis_param_type.downsampleRegion.__str__.strip() != '': - --downsampleRegion $analysis_param_type.downsampleRegion - #end if #if $analysis_param_type.minPruning.__str__.strip() != '': --minPruning $analysis_param_type.minPruning #end if @@ -99,9 +93,7 @@ --max_alternate_alleles $analysis_param_type.max_alternate_alleles #end if ## mode selections - #if $analysis_param_type.genotyping_mode.__str__ != "None" and len($analysis_param_type.genotyping_mode.__str__) > 0: - --genotyping_mode $analysis_param_type.genotyping_mode - #end if + #if $analysis_param_type.pair_hmm_implementation.__str__ != "None" and len($analysis_param_type.pair_hmm_implementation.__str__) > 0: --pair_hmm_implementation $analysis_param_type.pair_hmm_implementation #end if @@ -121,35 +113,31 @@ #end if </command> <inputs> - <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="-BQSR,--BQSR &lt;recal_file&gt;" > - <help>The input covariates table file which enables on-the-fly base quality score recalibration. - Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. - Please be aware that one should only run recalibration with the covariates file created on the same input bam(s). - </help> - </param> + <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &lt;recal_file&gt;)" /> <conditional name="reference_source"> <expand macro="reference_source_selector_param" /> <when value="cached"> - <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &lt;input_file&gt;"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> - </param> + <repeat name="input_bams" title="BAM file" min="1" help="-I,--input_file &lt;input_file&gt;"> + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> + </param> + </repeat> <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &lt;reference_sequence&gt;" > <options from_data_table="gatk2_picard_indexes"> - <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> + <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...--> </options> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </param> </when> <when value="history"> - <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &lt;input_file&gt;" /> - <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;"> - <options> - <filter type="data_meta" key="dbkey" ref="input_bam" /> - </options> - </param> + <repeat name="input_bams" title="BAM file" min="1" help="-I,--input_file &lt;input_file&gt;"> + <param name="input_bam" type="data" format="bam" label="BAM file" /> + </repeat> + <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;" /> </when> </conditional> + <expand macro="dbsnp_param" /> <expand macro="gatk_param_type_conditional" /> @@ -211,9 +199,7 @@ <param name="contamination_fraction_to_filter" type="float" value="0.05" optional="true" label="contamination_fraction_to_filter" help="--contamination_fraction_to_filter / -contamination Fraction of contamination in sequencing data (for all samples) to aggressively remove"> <validator type="in_range" message="value between 0.00 and 1.00" min="0" max="1"/> </param> - <param name="dbsnp" type="data" format="vcf" optional="true" label="dbsnp" help="--dbsnp / -D dbSNP file"/> <param name="debug" type="boolean" checked="False" truevalue="-debug" falsevalue="" label="debug" help="--debug / -debug If specified, print out very verbose debug information about each triggering active region"/> - <param name="downsampleRegion" type="integer" value="1000" optional="true" label="downsampleRegion" help="--downsampleRegion / -dr coverage, per-sample, to downsample each active region to"/> <conditional name="genotyping_mode_type"> <param name="genotyping_mode" type="select" label="How to determine the alternate allele to use for genotyping" help="-gt_mode,--genotyping_mode &lt;genotyping_mode&gt;"> @@ -234,10 +220,11 @@ <param name="minPruning" type="integer" value="1" optional="true" label="minPruning" help="--minPruning / -minPruning The minimum allowed pruning factor in assembly graph. Paths with >= X supporting kmers are pruned from the graph"> <validator type="in_range" message="value between 0 and 127" min="0" max="127"/> </param> - <param name="output_mode" type="select" optional="true" label="output_mode" help="--output_mode / -out_mode Specifies which type of calls we should output"> - <option value="EMIT_VARIANTS_ONLY" selected="True">EMIT_VARIANTS_ONLY</option> - <option value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option> - <option value="EMIT_ALL_SITES">EMIT_ALL_SITES</option> + <!-- http://www.broadinstitute.org/gatk/guide/article?id=2940 --> + <param name="emitRefConfidence" type="select" optional="true" label="Output confidence estimates" help="Emitting a per-bp or summarized confidence estimate for a site being strictly homozygous-reference (--emitRefConfidence)"> + <option value="NONE" selected="True">don't emit anything</option> + <option value="BP_RESOLUTION">BP_RESOLUTION (emit detailed information for each BP)</option> + <option value="GVCF">GVCF (emit a block summarized version of the BP_RESOLUTION data)</option> </param> <param name="pair_hmm_implementation" type="select" optional="true" label="pair_hmm_implementation" help="--pair_hmm_implementation / -pairHMM The PairHMM implementation to use for genotype likelihood calculations"> <option value="EXACT">EXACT</option> @@ -252,13 +239,6 @@ <param name="gcpHMM" type="integer" value="10" optional="true" label="gcpHMM" help="--gcpHMM / -gcpHMM Flat gap continuation penalty for use in the Pair HMM"/> <param name="genotypeFullActiveRegion" type="boolean" checked="False" truevalue="-genotypeFullActiveRegion" falsevalue="" label="genotypeFullActiveRegion" help="--genotypeFullActiveRegion / -genotypeFullActiveRegion If specified, alternate alleles are considered to be the full active region for the purposes of genotyping"/> <param name="max_alternate_alleles" type="integer" value="6" optional="true" label="max_alternate_alleles" help="--max_alternate_alleles / -maxAltAlleles Maximum number of alternate alleles to genotype"/> - <param name="p_nonref_model" type="select" optional="true" label="p_nonref_model" help="--p_nonref_model / -pnrm Non-reference probability calculation model to employ"> - <option value="EXACT_INDEPENDENT" selected="True">EXACT_INDEPENDENT experimental implementation - for testing only</option> - <option value="EXACT_REFERENCE">EXACT_REFERENCE reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles</option> - <option value="EXACT_ORIGINAL">EXACT_ORIGINAL original biallelic exact model, for testing only</option> - <option value="EXACT_GENERAL_PLOIDY">implementation that supports any sample ploidy</option> - </param> - </when> </conditional> </inputs> @@ -323,14 +303,12 @@ contamination Fraction of contamination in sequencing data (for all samples) to aggressively remove dbsnp dbSNP file debug If specified, print out very verbose debug information about each triggering active region - downsampleRegion coverage, per-sample, to downsample each active region to excludeAnnotation One or more specific annotations to exclude genotyping_mode Specifies how to determine the alternate alleles to use for genotyping graphOutput File to which debug assembly graph information should be written group One or more classes/groups of annotations to apply to variant calls heterozygosity Heterozygosity value used to compute prior likelihoods for any locus minPruning The minimum allowed pruning factor in assembly graph. Paths with less than or equal supporting kmers are pruned from the graph - output_mode Specifies which type of calls we should output pair_hmm_implementation The PairHMM implementation to use for genotype likelihood calculations stand_call_conf The minimum phred-scaled confidence threshold at which variants should be called stand_emit_conf The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold) @@ -339,9 +317,6 @@ gcpHMM Flat gap continuation penalty for use in the Pair HMM genotypeFullActiveRegion If specified, alternate alleles are considered to be the full active region for the purposes of genotyping max_alternate_alleles Maximum number of alternate alleles to genotype - p_nonref_model Non-reference probability calculation model to employ - ------- @CITATION_SECTION@ </help>
--- a/indel_realigner.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/indel_realigner.xml Sat Jan 18 07:21:33 2014 -0500 @@ -21,8 +21,6 @@ ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975 --num_cpu_threads_per_data_thread 1 - @THREADS@ - #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}" #end if
--- a/print_reads.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/print_reads.xml Sat Jan 18 07:21:33 2014 -0500 @@ -18,9 +18,8 @@ \$GATK2_SITE_OPTIONS ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975 - --num_cpu_threads_per_data_thread 8 + --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-6} - @THREADS@ #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}"
--- a/readme.rst Mon Dec 02 10:36:02 2013 -0500 +++ b/readme.rst Sat Jan 18 07:21:33 2014 -0500 @@ -13,7 +13,6 @@ http://www.broadinstitute.org/gatk http://www.broadinstitute.org/gatk/about/citing-gatk - GATK is Free for academics, and fee for commercial use. Please study the GATK licensing website: http://www.broadinstitute.org/gatk/about/#licensing @@ -23,35 +22,42 @@ The recommended installation is by means of the toolshed_. -.. _toolshed: http://toolshed.g2.bx.psu.edu/view/bjoern-gruening/augustus +.. _toolshed: http://toolshed.g2.bx.psu.edu/view/iuc/gatk2 -Galaxy should be able to automatically install samtools dependencies automatically +Galaxy should be able to install samtools dependencies automatically for you. GATK2, and its new licence model, does not allow us to distribute the GATK binaries. -As a consequence you need to install GATK2 by your own, please see the GATK website for more informations: +As a consequence you need to install GATK2 by your own, please see the GATK website for more information: http://www.broadinstitute.org/gatk/download -Once you have installed GATK2 you need to edit the env.sh file that is installed with these wrappers. -You will find this env.sh file under: +Once you have installed GATK2, you need to edit the env.sh files that are installed together with the wrappers. +You must edit the GATK2_PATH environment variable in the file: -<tool_dependency_dir>/gatk2/<version>/iuc/<hash_string>/env.sh +<tool_dependency_dir>/environment_settings/GATK2_PATH/iuc/gatk2/<hash_string>/env.sh + +to point to the folder where you have installed GATK2. + +Optionally, you may also want to edit the GATK2_SITE_OPTIONS environment variable in the file: -You should edit the GATK2_PATH environment variable to point to the folder you have installed GATK2 -and if you want to deactivate the 'call home feature' from GATK you can set +<tool_dependency_dir>/environment_settings/GATK2_SITE_OPTIONS/iuc/gatk2/<hash_string>/env.sh -GATK2_SITE_OPTIONS='-et "NO_ET" -K "/data/gatk2_key_file"' +to deactivate the 'call home feature' of GATK with something like: -GATK2_SITE_OPTIONS can be used to insert specific options into every GATK2 wrapper -during runtime, without changing the actuall wrapper. +GATK2_SITE_OPTIONS='-et NO_ET -K /data/gatk2_key_file' -Read more about the "Phone Home" problem under: +GATK2_SITE_OPTIONS can be also used to insert other specific options into every GATK2 wrapper +at runtime, without changing the actual wrapper. + +Read more about the "Phone Home" problem at: http://www.broadinstitute.org/gatk/guide/article?id=1250 +Optionally, you may also want to add some commands to be executed before GATK (e.g. to load modules) to the file: + +<tool_dependency_dir>/gatk2/default/env.sh Finally, you should fill in additional information about your genomes and annotations in the gatk2_picard_index.loc and gatk2_annotations.txt. -You can find them under ./tool-data/. - +You can find them in the tool-data/ Galaxy directory. History @@ -80,5 +86,3 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -
--- a/reduce_reads.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/reduce_reads.xml Sat Jan 18 07:21:33 2014 -0500 @@ -21,8 +21,6 @@ ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975 --num_cpu_threads_per_data_thread 1 - @THREADS@ - #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}" #end if @@ -68,12 +66,7 @@ #end if </command> <inputs> - <param name="input_recal" type="data" format="csv" optional="true" label="Covariates table recalibration file" help="-BQSR,--BQSR &lt;recal_file&gt;" > - <help>The input covariates table file which enables on-the-fly base quality score recalibration. - Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. - Please be aware that one should only run recalibration with the covariates file created on the same input bam(s). - </help> - </param> + <param name="input_recal" type="data" format="csv" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &lt;recal_file&gt;)" /> <conditional name="reference_source"> <expand macro="reference_source_selector_param" /> <when value="cached"> @@ -228,8 +221,6 @@ -noclip_ad / --dont_hardclip_adaptor_sequences ( boolean with default value false ) Do not hard clip adaptor sequences. Note: You don't have to turn this on for reads that are not mate paired. The program will behave correctly in those cases. ------- - @CITATION_SECTION@ </help> </tool>
--- a/unified_genotyper.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/unified_genotyper.xml Sat Jan 18 07:21:33 2014 -0500 @@ -31,22 +31,12 @@ --standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}" --standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}" ' - #set $rod_binding_names = dict() - #for $rod_binding in $rod_bind: - #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': - #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name - #else - #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector - #end if - #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 - -d "--dbsnp:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" - #end for - + @DBSNP_OPTIONS@ + #include source=$standard_gatk_options# ##start analysis specific options #if $analysis_param_type.analysis_param_type_selector == "advanced": -p ' - --p_nonref_model "${analysis_param_type.p_nonref_model}" --heterozygosity "${analysis_param_type.heterozygosity}" --pcr_error_rate "${analysis_param_type.pcr_error_rate}" --genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}" @@ -120,30 +110,7 @@ <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;" /> </when> </conditional> - - <repeat name="rod_bind" title="Binding for reference-ordered data" help="-D,--dbsnp &lt;dbsnp&gt;"> - <conditional name="rod_bind_type"> - <param name="rod_bind_type_selector" type="select" label="Binding Type"> - <option value="dbsnp" selected="True">dbSNP</option> - <option value="snps">SNPs</option> - <option value="indels">INDELs</option> - <option value="custom">Custom</option> - </param> - <when value="dbsnp"> - <param name="input_rod" type="data" format="vcf" label="ROD file" /> - </when> - <when value="snps"> - <param name="input_rod" type="data" format="vcf" label="ROD file" /> - </when> - <when value="indels"> - <param name="input_rod" type="data" format="vcf" label="ROD file" /> - </when> - <when value="custom"> - <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/> - <param name="input_rod" type="data" format="vcf" label="ROD file" /> - </when> - </conditional> - </repeat> + <expand macro="dbsnp_param" /> <param name="genotype_likelihoods_model" type="select" label="Genotype likelihoods calculation model to employ" help="-glm,--genotype_likelihoods_model &lt;genotype_likelihoods_model&gt;"> <option value="BOTH" selected="True">BOTH</option> @@ -158,10 +125,6 @@ <expand macro="gatk_param_type_conditional" /> <expand macro="analysis_type_conditional"> - <param name="p_nonref_model" type="select" label="Non-reference probability calculation model to employ" help="-pnrm,--p_nonref_model &lt;p_nonref_model&gt;"> - <option value="EXACT_GENERAL_PLOIDY" selected="True">EXACT_GENERAL_PLOIDY (supports any sample ploidy)</option> - <option value="EXACT_REFERENCE">EXACT_REFERENCE (multi-allelic EXACT model. Extremely slow for many alternate alleles)</option> - </param> <param name="heterozygosity" type="float" value="1e-3" label="Heterozygosity value used to compute prior likelihoods for any locus" help="-hets,--heterozygosity &lt;heterozygosity&gt;" /> <param name="pcr_error_rate" type="float" value="1e-4" label="The PCR error rate to be used for computing fragment-based likelihoods" help="-pcr_error,--pcr_error_rate &lt;pcr_error_rate&gt;" /> <conditional name="genotyping_mode_type"> @@ -187,13 +150,11 @@ <param name="max_alternate_alleles" type="integer" value="6" label="Maximum number of alternate alleles to genotype" help="-maxAlleles,--max_alternate_alleles &lt;max_alternate_alleles&gt;" /> <param name="min_indel_count_for_genotyping" type="integer" value="5" label="Minimum number of consensus indels required to trigger genotyping run" help="-minIndelCnt,--min_indel_count_for_genotyping &lt;min_indel_count_for_genotyping&gt;" /> <param name="indel_heterozygosity" type="float" value="0.000125" label="Heterozygosity for indel calling" help="1.0/8000==0.000125 (-indelHeterozygosity,--indel_heterozygosity &lt;indel_heterozygosity&gt;)"/> - <param name="indelGapContinuationPenalty" type="integer" value="10" label="Indel gap continuation penalty" help="--indelGapContinuationPenalty"> - <help>Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10.</help> - <validator type="in_range" message="value between 0 and 255" min="0" max="255"/> + <param name="indelGapContinuationPenalty" type="integer" value="10" label="Indel gap continuation penalty" help="As Phred-scaled probability, i.e. 30 => 10^-30/10 (--indelGapContinuationPenalty)"> + <validator type="in_range" message="value between 0 and 255" min="0" max="255" /> </param> - <param name="indelGapOpenPenalty" type="integer" value="45" label="Indel gap open penalty" help="--indelGapOpenPenalty" > - <help>Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10.</help> - <validator type="in_range" message="value between 0 and 255" min="0" max="255"/> + <param name="indelGapOpenPenalty" type="integer" value="45" label="Indel gap open penalty" help="As Phred-scaled probability, i.e. 30 => 10^-30/10 (--indelGapOpenPenalty)"> + <validator type="in_range" message="value between 0 and 255" min="0" max="255" /> </param> <!-- indelHaplotypeSize - Gone in GATK 2.4? --> <param name="indelHaplotypeSize" type="integer" value="80" label="Indel haplotype size" help="--indelHaplotypeSize" /> @@ -252,14 +213,14 @@ <param name="reference_source_selector" value="history" /> <param name="ref_file" value="phiX.fasta" ftype="fasta" /> <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> - <param name="rod_bind_type_selector" value="dbsnp" /> - <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> + <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" /> + <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> + <param name="dbsnp_rod_name" value="dbsnp" /> <param name="standard_min_confidence_threshold_for_calling" value="0" /> <param name="standard_min_confidence_threshold_for_emitting" value="4" /> <param name="gatk_param_type_selector" value="basic" /> <param name="analysis_param_type_selector" value="advanced" /> <param name="genotype_likelihoods_model" value="BOTH" /> - <param name="p_nonref_model" value="EXACT" /> <param name="heterozygosity" value="0.001" /> <param name="pcr_error_rate" value="0.0001" /> <param name="genotyping_mode" value="DISCOVERY" /> @@ -310,7 +271,6 @@ **Settings**:: genotype_likelihoods_model Genotype likelihoods calculation model to employ -- BOTH is the default option, while INDEL is also available for calling indels and SNP is available for calling SNPs only (SNP|INDEL|BOTH) - p_nonref_model Non-reference probability calculation model to employ -- EXACT_GENERAL_PLOIDY is the default option, while EXACT_REFERENCE is also available. (EXACT_INDEPENDENT,EXACT_REFERENCE,EXACT_ORIGINAL,EXACT_GENERAL_PLOIDY) heterozygosity Heterozygosity value used to compute prior likelihoods for any locus pcr_error_rate The PCR error rate to be used for computing fragment-based likelihoods genotyping_mode Should we output confident genotypes (i.e. including ref calls) or just the variants? (DISCOVERY|GENOTYPE_GIVEN_ALLELES)
--- a/variant_annotator.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/variant_annotator.xml Sat Jan 18 07:21:33 2014 -0500 @@ -51,10 +51,7 @@ -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}" #end for - #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp': - -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" - #end if - + @DBSNP_OPTIONS@ #for $rod_binding in $resource_rod_bind: -d "--resource:${rod_binding.resource_rod_name},%(file_type)s" "${rod_binding.resource_input_rod}" "${rod_binding.resource_input_rod.ext}" "input_resource_${rod_binding.resource_rod_name}" @@ -136,20 +133,7 @@ <param name="comp_input_rod" type="data" format="vcf" label="ROD file" /> <param name="comp_rod_name" type="text" value="Unnamed" label="ROD Name"/> </repeat> - - <conditional name="dbsnp_rod_bind_type"> - <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &lt;dbsnp&gt;"> - <option value="set_dbsnp" selected="True">Set dbSNP</option> - <option value="exclude_dbsnp">Don't set dbSNP</option> - </param> - <when value="exclude_dbsnp"> - <!-- Do nothing here --> - </when> - <when value="set_dbsnp"> - <param name="dbsnp_input_rod" type="data" format="vcf" label="ROD file" /> - <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="ROD Name"/> - </when> - </conditional> + <expand macro="dbsnp_param" /> <repeat name="resource_rod_bind" title="Binding for reference-ordered resource data" help="-resource,--resource &lt;resource&gt;"> <param name="resource_input_rod" type="data" format="vcf" label="ROD file" /> @@ -210,6 +194,7 @@ <param name="additional_annotations" value="0" /> <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" /> <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> + <param name="dbsnp_rod_name" value="dbsnp" /> <param name="snpEff_rod_bind_type_selector" value="exclude_snpEff" /> <param name="gatk_param_type_selector" value="basic" /> <output name="output_vcf" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" lines_diff="4" />
--- a/variant_eval.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/variant_eval.xml Sat Jan 18 07:21:33 2014 -0500 @@ -32,9 +32,9 @@ #end if #end for - #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp': + #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp' -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" - #if str( $dbsnp_rod_bind_type.dbsnp_known_names ): + #if $dbsnp_rod_bind_type.dbsnp_known_names -p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"' #end if #end if @@ -122,24 +122,25 @@ </when> </conditional> - <repeat name="comp_rod_bind" title="Binding for reference-ordered comparison data" help="-comp,--comp &lt;comp&gt;"> + <repeat name="comp_rod_bind" title="Comparison Reference-Ordered Data (ROD) file" help="-comp,--comp &lt;comp&gt;"> <param name="comp_input_rod" type="data" format="vcf" label="Comparison ROD file" /> - <param name="comp_rod_name" type="text" value="Unnamed" label="Comparison ROD Name"/> - <param name="comp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use Comparison ROD as known_names" help="-knownName,--known_names &lt;known_names&gt;"/> + <param name="comp_rod_name" type="text" value="" label="Comparison ROD name"> + <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator> + </param> + <param name="comp_known_names" type="boolean" label="Use comparison ROD file as known_names" help="-knownName,--known_names &lt;known_names&gt;"/> </repeat> - <conditional name="dbsnp_rod_bind_type"> - <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &lt;dbsnp&gt;"> + <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP Reference-Ordered Data (ROD) file" help="-D,--dbsnp &lt;dbsnp&gt;"> <option value="set_dbsnp" selected="True">Set dbSNP</option> <option value="exclude_dbsnp">Don't set dbSNP</option> </param> - <when value="exclude_dbsnp"> - <!-- Do nothing here --> - </when> + <when value="exclude_dbsnp" /> <when value="set_dbsnp"> <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" /> - <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="dbSNP ROD Name"/> - <param name="dbsnp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use dbSNP ROD as known_names" help="-knownName,--known_names &lt;known_names&gt;" /> + <param name="dbsnp_rod_name" type="text" value="dbsnp" label="dbsnp ROD name"> + <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator> + </param> + <param name="dbsnp_known_names" type="boolean" label="Use dbSNP ROD file as known_names" help="-knownName,--known_names &lt;known_names&gt;" /> </when> </conditional> @@ -228,6 +229,7 @@ <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" /> <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" /> <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> + <param name="dbsnp_rod_name" value="dbsnp" /> <param name="dbsnp_known_names" value="True"/> <param name="comp_rod_bind" value="0" /> <param name="gatk_param_type_selector" value="basic" />
--- a/variant_filtration.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/variant_filtration.xml Sat Jan 18 07:21:33 2014 -0500 @@ -15,8 +15,6 @@ -T "VariantFiltration" \$GATK2_SITE_OPTIONS - @THREADS@ - -o "${output_vcf}" #if $reference_source.reference_source_selector != "history": @@ -83,7 +81,7 @@ <conditional name="mask_rod_bind_type"> <param name="mask_rod_bind_type_selector" type="select" label="Provide a Mask reference-ordered data file"> - <option value="set_mask" selected="True">Set maskP</option> + <option value="set_mask" selected="True">Set mask</option> <option value="exclude_mask">Don't set mask</option> </param> <when value="exclude_mask">
--- a/variant_validate.xml Mon Dec 02 10:36:02 2013 -0500 +++ b/variant_validate.xml Sat Jan 18 07:21:33 2014 -0500 @@ -14,18 +14,13 @@ \$GATK2_SITE_OPTIONS - @THREADS@ - #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}" #end if ${warn_on_errors} ${do_not_validate_filtered_records} ' - - #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp': - -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" - #end if + @DBSNP_OPTIONS@ #include source=$standard_gatk_options# </command> @@ -47,20 +42,7 @@ <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;" /> </when> </conditional> - - <conditional name="dbsnp_rod_bind_type"> - <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &lt;dbsnp&gt;"> - <option value="set_dbsnp" selected="True">Set dbSNP</option> - <option value="exclude_dbsnp">Don't set dbSNP</option> - </param> - <when value="exclude_dbsnp"> - <!-- Do nothing here --> - </when> - <when value="set_dbsnp"> - <param name="dbsnp_input_rod" type="data" format="vcf" label="ROD file" /> - <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="ROD Name"/> - </when> - </conditional> + <expand macro="dbsnp_param" /> <param name="warn_on_errors" type="boolean" checked="False" truevalue="-warnOnErrors" falsevalue="" label="instead of terminating the run at the first error, print warning messages for each error seen." help="-warnOnErrors,--warnOnErrors"/> <param name="do_not_validate_filtered_records" type="boolean" checked="False" truevalue="-doNotValidateFilteredRecords" falsevalue="" label="do not try to validate records that are FILTERed." help="-doNotValidateFilteredRecords,--doNotValidateFilteredRecords"/> @@ -78,6 +60,7 @@ <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" /> <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" /> <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> + <param name="dbsnp_rod_name" value="dbsnp" /> <param name="warn_on_errors" value="True"/> <param name="do_not_validate_filtered_records" /> <param name="gatk_param_type_selector" value="basic" />