# HG changeset patch
# User iuc
# Date 1390047693 18000
# Node ID 8bcc130947679fe64b1d1b37032f8ed5de5c494b
# Parent f760c0de8e3a5b04e30b010f1b0a01a21cb26b4d
Uploaded
diff -r f760c0de8e3a -r 8bcc13094767 base_recalibrator.xml
--- a/base_recalibrator.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/base_recalibrator.xml Sat Jan 18 07:21:33 2014 -0500
@@ -17,9 +17,8 @@
\$GATK2_SITE_OPTIONS
## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
- --num_cpu_threads_per_data_thread 8
+ --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-8}
- @THREADS@
## we set non standards at every run and the user can choose which ones are preferred
## in our select box both standard options (ContextCovariate, CycleCovariate) are selected by default
--no_standard_covs
@@ -111,12 +110,7 @@
-
- The input covariates table file which enables on-the-fly base quality score recalibration.
- Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
- Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-
-
+
diff -r f760c0de8e3a -r 8bcc13094767 gatk2_macros.xml
--- a/gatk2_macros.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/gatk2_macros.xml Sat Jan 18 07:21:33 2014 -0500
@@ -1,17 +1,23 @@
-
-
- samtools
- GATK2_PATH
- GATK2_SITE_OPTIONS
-
-
-
- --num_threads \${GALAXY_SLOTS:-4}
-
-
- java -jar "\$GATK2_PATH/GenomeAnalysisTK.jar"
-
+
+
+ gatk2
+ samtools
+ GATK2_PATH
+ GATK2_SITE_OPTIONS
+
+
+
+ --num_threads \${GALAXY_SLOTS:-4}
+
+
+ java -jar "\$GATK2_PATH/GenomeAnalysisTK.jar"
+
+
+ #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp'
+ -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
+ #end if
+
##start standard gatk options
#if $gatk_param_type.gatk_param_type_selector == "advanced":
@@ -311,6 +317,21 @@
+
+
+
+
+
+
+
+
+
+
+ ^\w+$
+
+
+
+ ------
**Citation**
diff -r f760c0de8e3a -r 8bcc13094767 gatk2_picard_index.loc.sample
--- a/gatk2_picard_index.loc.sample Mon Dec 02 10:36:02 2013 -0500
+++ b/gatk2_picard_index.loc.sample Sat Jan 18 07:21:33 2014 -0500
@@ -24,7 +24,3 @@
#the dict file does not have the .fa extension although the
#path list in the loc file does include it.
#
-hg18 hg18 hg18 /data/galaxy/ext-tool-data/picard/hg18.fa
-hg19 hg19 hg19 /data/galaxy/ext-tool-data/picard/hg19.fa
-mm8 mm8 mm8 /data/galaxy/ext-tool-data/picard/mm8.fa
-mm9 mm9 mm9 /data/galaxy/ext-tool-data/picard/mm9.fa
diff -r f760c0de8e3a -r 8bcc13094767 haplotype_caller.xml
--- a/haplotype_caller.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/haplotype_caller.xml Sat Jan 18 07:21:33 2014 -0500
@@ -7,10 +7,12 @@
gatk2_wrapper.py
--stdout "${output_log}"
- -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
- #if str( $reference_source.input_bam.metadata.bam_index ) != "None":
- -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
- #end if
+ #for $i, $input_bam in enumerate( $reference_source.input_bams ):
+ -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}"
+ #if str( $input_bam.input_bam.metadata.bam_index ) != "None":
+ -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index
+ #end if
+ #end for
-p '
@JAR_PATH@
-T "HaplotypeCaller"
@@ -18,7 +20,7 @@
\$GATK2_SITE_OPTIONS
- @THREADS@
+ --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-4}
#if $reference_source.reference_source_selector != "history":
-R "${reference_source.ref_file.fields.path}"
@@ -27,14 +29,12 @@
--BQSR "${input_recal}"
#end if
'
+ @DBSNP_OPTIONS@
#include source=$standard_gatk_options#
##start analysis specific options
#if $analysis_param_type.analysis_param_type_selector == "advanced":
-p '
- #if $analysis_param_type.p_nonref_model.__str__ != "None" and len($analysis_param_type.p_nonref_model.__str__) > 0:
- --p_nonref_model $analysis_param_type.p_nonref_model
- #end if
#if $analysis_param_type.heterozygosity.__str__.strip() != '':
--heterozygosity $analysis_param_type.heterozygosity
#end if
@@ -42,8 +42,8 @@
#if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES':
--alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}"
#end if
- #if $analysis_param_type.output_mode.__str__ != "None" and len($analysis_param_type.output_mode.__str__) > 0:
- --output_mode $analysis_param_type.output_mode
+ #if not $analysis_param_type.emitRefConfidence is None:
+ --emitRefConfidence $analysis_param_type.emitRefConfidence
#end if
## files
@@ -53,9 +53,6 @@
#if str($analysis_param_type.comp) != 'None':
--comp "$analysis_param_type.comp"
#end if
- #if str($analysis_param_type.dbsnp) != 'None':
- --dbsnp "$analysis_param_type.dbsnp"
- #end if
##
#if str( $analysis_param_type.annotation ) != "None":
#for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( ','):
@@ -80,9 +77,6 @@
#if $analysis_param_type.contamination_fraction_to_filter.__str__.strip() != '':
--contamination_fraction_to_filter $analysis_param_type.contamination_fraction_to_filter
#end if
- #if $analysis_param_type.downsampleRegion.__str__.strip() != '':
- --downsampleRegion $analysis_param_type.downsampleRegion
- #end if
#if $analysis_param_type.minPruning.__str__.strip() != '':
--minPruning $analysis_param_type.minPruning
#end if
@@ -99,9 +93,7 @@
--max_alternate_alleles $analysis_param_type.max_alternate_alleles
#end if
## mode selections
- #if $analysis_param_type.genotyping_mode.__str__ != "None" and len($analysis_param_type.genotyping_mode.__str__) > 0:
- --genotyping_mode $analysis_param_type.genotyping_mode
- #end if
+
#if $analysis_param_type.pair_hmm_implementation.__str__ != "None" and len($analysis_param_type.pair_hmm_implementation.__str__) > 0:
--pair_hmm_implementation $analysis_param_type.pair_hmm_implementation
#end if
@@ -121,35 +113,31 @@
#end if
-
- The input covariates table file which enables on-the-fly base quality score recalibration.
- Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
- Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-
-
+
-
-
-
-
+
+
+
+
+
+
-
+
-
-
-
-
-
-
+
+
+
+
+
@@ -211,9 +199,7 @@
-
-
@@ -234,10 +220,11 @@
-
-
-
-
+
+
+
+
+
@@ -252,13 +239,6 @@
-
-
-
-
-
-
-
@@ -323,14 +303,12 @@
contamination Fraction of contamination in sequencing data (for all samples) to aggressively remove
dbsnp dbSNP file
debug If specified, print out very verbose debug information about each triggering active region
- downsampleRegion coverage, per-sample, to downsample each active region to
excludeAnnotation One or more specific annotations to exclude
genotyping_mode Specifies how to determine the alternate alleles to use for genotyping
graphOutput File to which debug assembly graph information should be written
group One or more classes/groups of annotations to apply to variant calls
heterozygosity Heterozygosity value used to compute prior likelihoods for any locus
minPruning The minimum allowed pruning factor in assembly graph. Paths with less than or equal supporting kmers are pruned from the graph
- output_mode Specifies which type of calls we should output
pair_hmm_implementation The PairHMM implementation to use for genotype likelihood calculations
stand_call_conf The minimum phred-scaled confidence threshold at which variants should be called
stand_emit_conf The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)
@@ -339,9 +317,6 @@
gcpHMM Flat gap continuation penalty for use in the Pair HMM
genotypeFullActiveRegion If specified, alternate alleles are considered to be the full active region for the purposes of genotyping
max_alternate_alleles Maximum number of alternate alleles to genotype
- p_nonref_model Non-reference probability calculation model to employ
-
-------
@CITATION_SECTION@
diff -r f760c0de8e3a -r 8bcc13094767 indel_realigner.xml
--- a/indel_realigner.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/indel_realigner.xml Sat Jan 18 07:21:33 2014 -0500
@@ -21,8 +21,6 @@
## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
--num_cpu_threads_per_data_thread 1
- @THREADS@
-
#if $reference_source.reference_source_selector != "history":
-R "${reference_source.ref_file.fields.path}"
#end if
diff -r f760c0de8e3a -r 8bcc13094767 print_reads.xml
--- a/print_reads.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/print_reads.xml Sat Jan 18 07:21:33 2014 -0500
@@ -18,9 +18,8 @@
\$GATK2_SITE_OPTIONS
## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
- --num_cpu_threads_per_data_thread 8
+ --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-6}
- @THREADS@
#if $reference_source.reference_source_selector != "history":
-R "${reference_source.ref_file.fields.path}"
diff -r f760c0de8e3a -r 8bcc13094767 readme.rst
--- a/readme.rst Mon Dec 02 10:36:02 2013 -0500
+++ b/readme.rst Sat Jan 18 07:21:33 2014 -0500
@@ -13,7 +13,6 @@
http://www.broadinstitute.org/gatk
http://www.broadinstitute.org/gatk/about/citing-gatk
-
GATK is Free for academics, and fee for commercial use. Please study the GATK licensing website:
http://www.broadinstitute.org/gatk/about/#licensing
@@ -23,35 +22,42 @@
The recommended installation is by means of the toolshed_.
-.. _toolshed: http://toolshed.g2.bx.psu.edu/view/bjoern-gruening/augustus
+.. _toolshed: http://toolshed.g2.bx.psu.edu/view/iuc/gatk2
-Galaxy should be able to automatically install samtools dependencies automatically
+Galaxy should be able to install samtools dependencies automatically
for you. GATK2, and its new licence model, does not allow us to distribute the GATK binaries.
-As a consequence you need to install GATK2 by your own, please see the GATK website for more informations:
+As a consequence you need to install GATK2 by your own, please see the GATK website for more information:
http://www.broadinstitute.org/gatk/download
-Once you have installed GATK2 you need to edit the env.sh file that is installed with these wrappers.
-You will find this env.sh file under:
+Once you have installed GATK2, you need to edit the env.sh files that are installed together with the wrappers.
+You must edit the GATK2_PATH environment variable in the file:
-/gatk2//iuc//env.sh
+/environment_settings/GATK2_PATH/iuc/gatk2//env.sh
+
+to point to the folder where you have installed GATK2.
+
+Optionally, you may also want to edit the GATK2_SITE_OPTIONS environment variable in the file:
-You should edit the GATK2_PATH environment variable to point to the folder you have installed GATK2
-and if you want to deactivate the 'call home feature' from GATK you can set
+/environment_settings/GATK2_SITE_OPTIONS/iuc/gatk2//env.sh
-GATK2_SITE_OPTIONS='-et "NO_ET" -K "/data/gatk2_key_file"'
+to deactivate the 'call home feature' of GATK with something like:
-GATK2_SITE_OPTIONS can be used to insert specific options into every GATK2 wrapper
-during runtime, without changing the actuall wrapper.
+GATK2_SITE_OPTIONS='-et NO_ET -K /data/gatk2_key_file'
-Read more about the "Phone Home" problem under:
+GATK2_SITE_OPTIONS can be also used to insert other specific options into every GATK2 wrapper
+at runtime, without changing the actual wrapper.
+
+Read more about the "Phone Home" problem at:
http://www.broadinstitute.org/gatk/guide/article?id=1250
+Optionally, you may also want to add some commands to be executed before GATK (e.g. to load modules) to the file:
+
+/gatk2/default/env.sh
Finally, you should fill in additional information about your genomes and
annotations in the gatk2_picard_index.loc and gatk2_annotations.txt.
-You can find them under ./tool-data/.
-
+You can find them in the tool-data/ Galaxy directory.
History
@@ -80,5 +86,3 @@
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
-
-
diff -r f760c0de8e3a -r 8bcc13094767 reduce_reads.xml
--- a/reduce_reads.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/reduce_reads.xml Sat Jan 18 07:21:33 2014 -0500
@@ -21,8 +21,6 @@
## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
--num_cpu_threads_per_data_thread 1
- @THREADS@
-
#if $reference_source.reference_source_selector != "history":
-R "${reference_source.ref_file.fields.path}"
#end if
@@ -68,12 +66,7 @@
#end if
-
- The input covariates table file which enables on-the-fly base quality score recalibration.
- Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
- Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-
-
+
@@ -228,8 +221,6 @@
-noclip_ad / --dont_hardclip_adaptor_sequences ( boolean with default value false )
Do not hard clip adaptor sequences. Note: You don't have to turn this on for reads that are not mate paired. The program will behave correctly in those cases.
-------
-
@CITATION_SECTION@
diff -r f760c0de8e3a -r 8bcc13094767 unified_genotyper.xml
--- a/unified_genotyper.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/unified_genotyper.xml Sat Jan 18 07:21:33 2014 -0500
@@ -31,22 +31,12 @@
--standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}"
--standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}"
'
- #set $rod_binding_names = dict()
- #for $rod_binding in $rod_bind:
- #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
- #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
- #else
- #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
- #end if
- #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
- -d "--dbsnp:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
- #end for
-
+ @DBSNP_OPTIONS@
+
#include source=$standard_gatk_options#
##start analysis specific options
#if $analysis_param_type.analysis_param_type_selector == "advanced":
-p '
- --p_nonref_model "${analysis_param_type.p_nonref_model}"
--heterozygosity "${analysis_param_type.heterozygosity}"
--pcr_error_rate "${analysis_param_type.pcr_error_rate}"
--genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}"
@@ -120,30 +110,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
@@ -158,10 +125,6 @@
-
-
-
-
@@ -187,13 +150,11 @@
-
- Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10.
-
+
+
-
- Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10.
-
+
+
@@ -252,14 +213,14 @@
-
-
+
+
+
-
@@ -310,7 +271,6 @@
**Settings**::
genotype_likelihoods_model Genotype likelihoods calculation model to employ -- BOTH is the default option, while INDEL is also available for calling indels and SNP is available for calling SNPs only (SNP|INDEL|BOTH)
- p_nonref_model Non-reference probability calculation model to employ -- EXACT_GENERAL_PLOIDY is the default option, while EXACT_REFERENCE is also available. (EXACT_INDEPENDENT,EXACT_REFERENCE,EXACT_ORIGINAL,EXACT_GENERAL_PLOIDY)
heterozygosity Heterozygosity value used to compute prior likelihoods for any locus
pcr_error_rate The PCR error rate to be used for computing fragment-based likelihoods
genotyping_mode Should we output confident genotypes (i.e. including ref calls) or just the variants? (DISCOVERY|GENOTYPE_GIVEN_ALLELES)
diff -r f760c0de8e3a -r 8bcc13094767 variant_annotator.xml
--- a/variant_annotator.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_annotator.xml Sat Jan 18 07:21:33 2014 -0500
@@ -51,10 +51,7 @@
-d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}"
#end for
- #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
- -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
- #end if
-
+ @DBSNP_OPTIONS@
#for $rod_binding in $resource_rod_bind:
-d "--resource:${rod_binding.resource_rod_name},%(file_type)s" "${rod_binding.resource_input_rod}" "${rod_binding.resource_input_rod.ext}" "input_resource_${rod_binding.resource_rod_name}"
@@ -136,20 +133,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
@@ -210,6 +194,7 @@
+
diff -r f760c0de8e3a -r 8bcc13094767 variant_eval.xml
--- a/variant_eval.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_eval.xml Sat Jan 18 07:21:33 2014 -0500
@@ -32,9 +32,9 @@
#end if
#end for
- #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
+ #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp'
-d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
- #if str( $dbsnp_rod_bind_type.dbsnp_known_names ):
+ #if $dbsnp_rod_bind_type.dbsnp_known_names
-p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"'
#end if
#end if
@@ -122,24 +122,25 @@
-
+
-
-
+
+ ^\w+$
+
+
-
-
+
-
-
-
+
-
-
+
+ ^\w+$
+
+
@@ -228,6 +229,7 @@
+
diff -r f760c0de8e3a -r 8bcc13094767 variant_filtration.xml
--- a/variant_filtration.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_filtration.xml Sat Jan 18 07:21:33 2014 -0500
@@ -15,8 +15,6 @@
-T "VariantFiltration"
\$GATK2_SITE_OPTIONS
- @THREADS@
-
-o "${output_vcf}"
#if $reference_source.reference_source_selector != "history":
@@ -83,7 +81,7 @@
-
+
diff -r f760c0de8e3a -r 8bcc13094767 variant_validate.xml
--- a/variant_validate.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_validate.xml Sat Jan 18 07:21:33 2014 -0500
@@ -14,18 +14,13 @@
\$GATK2_SITE_OPTIONS
- @THREADS@
-
#if $reference_source.reference_source_selector != "history":
-R "${reference_source.ref_file.fields.path}"
#end if
${warn_on_errors}
${do_not_validate_filtered_records}
'
-
- #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
- -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
- #end if
+ @DBSNP_OPTIONS@
#include source=$standard_gatk_options#
@@ -47,20 +42,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
@@ -78,6 +60,7 @@
+