# HG changeset patch # User devteam # Date 1397479705 14400 # Node ID 30e1dd77e99c53e031d4b4e0e8cb02565d80b126 # Parent 53dd1bfced54ccfa1e317abe19980f8517699a87 Uploaded correct tarball. diff -r 53dd1bfced54 -r 30e1dd77e99c table_recalibration.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/table_recalibration.xml Mon Apr 14 08:48:25 2014 -0400 @@ -0,0 +1,232 @@ + + on BAM files + + gatk + samtools + + + gatk_macros.xml + + gatk_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if + -p 'java + -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar" + -T "TableRecalibration" + -o "${output_bam}" + -et "NO_ET" ##ET no phone home + ##--num_threads 4 ##hard coded, for now + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + --recal_file "${input_recal}" + --disable_bam_indexing + ' + #include source=$standard_gatk_options# + + ##start analysis specific options + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + #if $analysis_param_type.default_read_group_type.default_read_group_type_selector == "set": + --default_read_group "${analysis_param_type.default_read_group_type.default_read_group}" + #end if + #if str( $analysis_param_type.default_platform ) != "default": + --default_platform "${analysis_param_type.default_platform}" + #end if + #if str( $analysis_param_type.force_read_group_type.force_read_group_type_selector ) == "set": + --force_read_group "${analysis_param_type.force_read_group_type.force_read_group}" + #end if + #if str( $analysis_param_type.force_platform ) != "default": + --force_platform "${analysis_param_type.force_platform}" + #end if + ${analysis_param_type.exception_if_no_tile} + #if str( $analysis_param_type.solid_options_type.solid_options_type_selector ) == "set": + #if str( $analysis_param_type.solid_options_type.solid_recal_mode ) != "default": + --solid_recal_mode "${analysis_param_type.solid_options_type.solid_recal_mode}" + #end if + #if str( $analysis_param_type.solid_options_type.solid_nocall_strategy ) != "default": + --solid_nocall_strategy "${analysis_param_type.solid_options_type.solid_nocall_strategy}" + #end if + #end if + ${analysis_param_type.simplify_bam} + --preserve_qscores_less_than "${analysis_param_type.preserve_qscores_less_than}" + --smoothing "${analysis_param_type.smoothing}" + --max_quality_score "${analysis_param_type.max_quality_score}" + --window_size_nqs "${analysis_param_type.window_size_nqs}" + --homopolymer_nback "${analysis_param_type.homopolymer_nback}" + ${analysis_param_type.do_not_write_original_quals} + ' + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal. For each base in each read this walker calculates various user-specified covariates (such as read group, reported quality score, cycle, and dinuc) Using these values as a key in a large hashmap the walker calculates an empirical base quality score and overwrites the quality score currently in the read. This walker then outputs a new bam file with these updated (recalibrated) reads. Note: This walker expects as input the recalibration table file generated previously by CovariateCounterWalker. Note: This walker is designed to be used in conjunction with CovariateCounterWalker. + +For more information on base quality score recalibration using the GATK, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: TableRecalibration accepts an aligned BAM and a recalibration CSV input files. + + +**Outputs** + +The output is in BAM format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + default_read_group If a read has no read group then default to the provided String. + default_platform If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid. + force_read_group If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group. + force_platform If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid. + window_size_nqs The window size used by MinimumNQSCovariate for its calculation + homopolymer_nback The number of previous bases to look at in HomopolymerCovariate + exception_if_no_tile If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1 + solid_recal_mode How should we recalibrate solid bases in whichthe reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS (DO_NOTHING|SET_Q_ZERO|SET_Q_ZERO_BASE_N|REMOVE_REF_BIAS) + solid_nocall_strategy Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ (THROW_EXCEPTION|LEAVE_READ_UNRECALIBRATED|PURGE_READ) + recal_file Filename for the input covariates table recalibration .csv file + out The output BAM file + bam_compression Compression level to use for writing BAM files + disable_bam_indexing Turn off on-the-fly creation of indices for output BAM files. + simplifyBAM If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier + preserve_qscores_less_than Bases with quality scores less than this threshold won't be recalibrated, default=5. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases + smoothing Number of imaginary counts to add to each bin bin order to smooth out bins with few data points, default=1 + max_quality_score The integer value at which to cap the quality scores, default=50 + doNotWriteOriginalQuals If true, we will not write the original quality (OQ) tag for each read + +@CITATION_SECTION@ + + diff -r 53dd1bfced54 -r 30e1dd77e99c test-data/1.bam Binary file test-data/1.bam has changed diff -r 53dd1bfced54 -r 30e1dd77e99c test-data/gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv Mon Apr 14 08:48:25 2014 -0400 @@ -0,0 +1,246 @@ +# Counted Sites 41 +# Counted Bases 340 +# Skipped Sites 2 +# Fraction Skipped 1 / 21 bp +ReadGroup,QualityScore,Cycle,Dinuc,Homopolymer,MinimumNQS,Position,nObservations,nMismatches,Qempirical +A Fake phiX Sample,26,1,NN,0,26,0,9,0,40 +A Fake phiX Sample,26,1,NN,1,26,0,1,0,40 +A Fake phiX Sample,26,2,AG,0,26,1,1,0,40 +A Fake phiX Sample,26,2,CC,0,26,1,1,0,40 +A Fake phiX Sample,26,2,CG,0,26,1,3,0,40 +A Fake phiX Sample,26,2,GC,0,26,1,2,0,40 +A Fake phiX Sample,26,2,GC,1,26,1,1,0,40 +A Fake phiX Sample,26,2,GT,0,26,1,1,0,40 +A Fake phiX Sample,26,2,TG,1,26,1,1,0,40 +A Fake phiX Sample,26,3,CC,0,26,2,1,0,40 +A Fake phiX Sample,26,3,CG,0,26,2,3,0,40 +A Fake phiX Sample,26,3,GC,0,26,2,1,0,40 +A Fake phiX Sample,26,3,GC,1,26,2,2,0,40 +A Fake phiX Sample,26,3,GG,0,26,2,1,0,40 +A Fake phiX Sample,26,3,GT,0,26,2,1,0,40 +A Fake phiX Sample,26,3,TG,1,26,2,1,0,40 +A Fake phiX Sample,26,4,CC,0,26,3,2,0,40 +A Fake phiX Sample,26,4,CG,0,26,3,2,0,40 +A Fake phiX Sample,26,4,GA,0,26,3,1,0,40 +A Fake phiX Sample,26,4,GC,1,26,3,2,0,40 +A Fake phiX Sample,26,4,GG,0,26,3,1,0,40 +A Fake phiX Sample,26,4,GT,0,26,3,1,0,40 +A Fake phiX Sample,26,4,TG,1,26,3,1,0,40 +A Fake phiX Sample,26,5,AT,0,26,4,1,0,40 +A Fake phiX Sample,26,5,CC,0,26,4,2,0,40 +A Fake phiX Sample,26,5,CG,0,26,4,2,0,40 +A Fake phiX Sample,26,5,GA,0,26,4,1,0,40 +A Fake phiX Sample,26,5,GC,1,26,4,1,0,40 +A Fake phiX Sample,26,5,GG,0,26,4,1,0,40 +A Fake phiX Sample,26,5,GT,0,26,4,1,0,40 +A Fake phiX Sample,26,5,TG,1,26,4,1,0,40 +A Fake phiX Sample,26,6,AT,0,26,5,1,0,40 +A Fake phiX Sample,26,6,CC,0,26,5,1,0,40 +A Fake phiX Sample,26,6,CG,0,26,5,2,0,40 +A Fake phiX Sample,26,6,GA,0,26,5,1,0,40 +A Fake phiX Sample,26,6,GG,0,26,5,1,0,40 +A Fake phiX Sample,26,6,GT,0,26,5,2,0,40 +A Fake phiX Sample,26,6,TG,0,26,5,1,0,40 +A Fake phiX Sample,26,6,TG,1,26,5,1,0,40 +A Fake phiX Sample,26,7,AT,0,26,6,1,0,40 +A Fake phiX Sample,26,7,CG,0,26,6,1,0,40 +A Fake phiX Sample,26,7,GA,0,26,6,2,1,3 +A Fake phiX Sample,26,7,GG,0,26,6,1,0,40 +A Fake phiX Sample,26,7,GT,0,26,6,2,0,40 +A Fake phiX Sample,26,7,TG,0,26,6,1,0,40 +A Fake phiX Sample,26,7,TG,1,26,6,2,0,40 +A Fake phiX Sample,26,8,AC,0,26,7,1,0,40 +A Fake phiX Sample,26,8,AT,0,26,7,1,0,40 +A Fake phiX Sample,26,8,GA,0,26,7,2,1,3 +A Fake phiX Sample,26,8,GG,0,26,7,2,0,40 +A Fake phiX Sample,26,8,GT,0,26,7,1,0,40 +A Fake phiX Sample,26,8,TG,0,26,7,1,0,40 +A Fake phiX Sample,26,8,TG,1,26,7,2,0,40 +A Fake phiX Sample,26,9,AC,0,26,8,1,0,40 +A Fake phiX Sample,26,9,AT,0,26,8,1,0,40 +A Fake phiX Sample,26,9,CT,0,26,8,1,0,40 +A Fake phiX Sample,26,9,GA,0,26,8,3,1,5 +A Fake phiX Sample,26,9,GG,0,26,8,2,0,40 +A Fake phiX Sample,26,9,TG,0,26,8,1,0,40 +A Fake phiX Sample,26,9,TG,1,26,8,1,0,40 +A Fake phiX Sample,26,10,AC,0,26,9,1,0,40 +A Fake phiX Sample,26,10,AT,0,26,9,2,0,40 +A Fake phiX Sample,26,10,CT,0,26,9,1,0,40 +A Fake phiX Sample,26,10,GA,0,26,9,3,1,5 +A Fake phiX Sample,26,10,GG,0,26,9,1,0,40 +A Fake phiX Sample,26,10,TG,0,26,9,2,0,40 +A Fake phiX Sample,26,11,AC,0,26,10,1,0,40 +A Fake phiX Sample,26,11,AT,0,26,10,2,0,40 +A Fake phiX Sample,26,11,CT,0,26,10,1,0,40 +A Fake phiX Sample,26,11,GA,0,26,10,3,1,5 +A Fake phiX Sample,26,11,TG,0,26,10,3,0,40 +A Fake phiX Sample,26,12,AC,0,26,11,1,0,40 +A Fake phiX Sample,26,12,AC,1,26,11,1,0,40 +A Fake phiX Sample,26,12,AT,0,26,11,1,0,40 +A Fake phiX Sample,26,12,CT,0,26,11,1,0,40 +A Fake phiX Sample,26,12,GA,0,26,11,2,1,3 +A Fake phiX Sample,26,12,GC,1,26,11,1,0,40 +A Fake phiX Sample,26,12,TG,0,26,11,3,0,40 +A Fake phiX Sample,26,13,AC,0,26,12,1,0,40 +A Fake phiX Sample,26,13,AC,1,26,12,1,0,40 +A Fake phiX Sample,26,13,CC,0,26,12,2,0,40 +A Fake phiX Sample,26,13,CT,0,26,12,1,0,40 +A Fake phiX Sample,26,13,GA,0,26,12,2,1,3 +A Fake phiX Sample,26,13,GC,1,26,12,1,0,40 +A Fake phiX Sample,26,13,TG,0,26,12,2,0,40 +A Fake phiX Sample,26,14,AC,0,26,13,1,0,40 +A Fake phiX Sample,26,14,AC,1,26,13,1,0,40 +A Fake phiX Sample,26,14,CC,0,26,13,2,0,40 +A Fake phiX Sample,26,14,CG,0,26,13,1,0,40 +A Fake phiX Sample,26,14,CT,0,26,13,2,0,40 +A Fake phiX Sample,26,14,GA,0,26,13,1,0,40 +A Fake phiX Sample,26,14,GC,1,26,13,1,0,40 +A Fake phiX Sample,26,14,TG,0,26,13,1,0,40 +A Fake phiX Sample,26,15,AC,1,26,14,1,0,40 +A Fake phiX Sample,26,15,CC,0,26,14,2,0,40 +A Fake phiX Sample,26,15,CG,0,26,14,1,0,40 +A Fake phiX Sample,26,15,CT,0,26,14,2,0,40 +A Fake phiX Sample,26,15,GA,0,26,14,1,0,40 +A Fake phiX Sample,26,15,GT,0,26,14,1,0,40 +A Fake phiX Sample,26,15,TG,0,26,14,2,0,40 +A Fake phiX Sample,26,16,AC,1,26,15,1,0,40 +A Fake phiX Sample,26,16,CC,0,26,15,1,0,40 +A Fake phiX Sample,26,16,CG,0,26,15,1,0,40 +A Fake phiX Sample,26,16,CT,0,26,15,1,0,40 +A Fake phiX Sample,26,16,GA,0,26,15,2,0,40 +A Fake phiX Sample,26,16,GT,0,26,15,1,0,40 +A Fake phiX Sample,26,16,TA,0,26,15,1,0,40 +A Fake phiX Sample,26,16,TG,0,26,15,2,0,40 +A Fake phiX Sample,26,17,AC,1,26,16,3,0,40 +A Fake phiX Sample,26,17,CC,0,26,16,1,0,40 +A Fake phiX Sample,26,17,CG,0,26,16,1,0,40 +A Fake phiX Sample,26,17,GA,0,26,16,2,0,40 +A Fake phiX Sample,26,17,GT,0,26,16,1,0,40 +A Fake phiX Sample,26,17,TA,0,26,16,1,0,40 +A Fake phiX Sample,26,17,TG,0,26,16,1,0,40 +A Fake phiX Sample,26,18,AC,1,26,17,3,0,40 +A Fake phiX Sample,26,18,CC,0,26,17,3,0,40 +A Fake phiX Sample,26,18,CG,0,26,17,1,0,40 +A Fake phiX Sample,26,18,GA,0,26,17,1,0,40 +A Fake phiX Sample,26,18,GT,0,26,17,1,0,40 +A Fake phiX Sample,26,18,TA,0,26,17,1,0,40 +A Fake phiX Sample,26,19,AC,1,26,18,2,0,40 +A Fake phiX Sample,26,19,CC,0,26,18,3,0,40 +A Fake phiX Sample,26,19,CG,0,26,18,3,0,40 +A Fake phiX Sample,26,19,GT,0,26,18,1,0,40 +A Fake phiX Sample,26,19,TA,0,26,18,1,0,40 +A Fake phiX Sample,26,20,AC,1,26,19,1,0,40 +A Fake phiX Sample,26,20,CC,0,26,19,2,0,40 +A Fake phiX Sample,26,20,CG,0,26,19,3,0,40 +A Fake phiX Sample,26,20,GA,0,26,19,1,0,40 +A Fake phiX Sample,26,20,GT,0,26,19,2,0,40 +A Fake phiX Sample,26,20,TA,0,26,19,1,0,40 +A Fake phiX Sample,26,21,AC,1,26,20,1,0,40 +A Fake phiX Sample,26,21,AG,1,26,20,1,0,40 +A Fake phiX Sample,26,21,CC,0,26,20,1,0,40 +A Fake phiX Sample,26,21,CG,0,26,20,2,0,40 +A Fake phiX Sample,26,21,GA,0,26,20,1,0,40 +A Fake phiX Sample,26,21,GT,0,26,20,2,0,40 +A Fake phiX Sample,26,21,TA,0,26,20,2,0,40 +A Fake phiX Sample,26,22,AC,1,26,21,2,0,40 +A Fake phiX Sample,26,22,AG,1,26,21,1,0,40 +A Fake phiX Sample,26,22,CC,0,26,21,1,0,40 +A Fake phiX Sample,26,22,CG,0,26,21,1,0,40 +A Fake phiX Sample,26,22,GA,0,26,21,1,0,40 +A Fake phiX Sample,26,22,GG,0,26,21,1,0,40 +A Fake phiX Sample,26,22,GT,0,26,21,1,0,40 +A Fake phiX Sample,26,22,TA,0,26,21,2,0,40 +A Fake phiX Sample,26,23,AC,1,26,22,2,0,40 +A Fake phiX Sample,26,23,AG,1,26,22,1,0,40 +A Fake phiX Sample,26,23,CC,0,26,22,2,0,40 +A Fake phiX Sample,26,23,CG,0,26,22,1,0,40 +A Fake phiX Sample,26,23,GA,0,26,22,1,0,40 +A Fake phiX Sample,26,23,GC,0,26,22,1,0,40 +A Fake phiX Sample,26,23,GG,0,26,22,1,0,40 +A Fake phiX Sample,26,23,TA,0,26,22,1,0,40 +A Fake phiX Sample,26,24,AC,1,26,23,1,0,40 +A Fake phiX Sample,26,24,AG,1,26,23,1,0,40 +A Fake phiX Sample,26,24,CC,0,26,23,2,0,40 +A Fake phiX Sample,26,24,CG,0,26,23,2,0,40 +A Fake phiX Sample,26,24,CT,0,26,23,1,0,40 +A Fake phiX Sample,26,24,GA,0,26,23,1,0,40 +A Fake phiX Sample,26,24,GC,0,26,23,1,0,40 +A Fake phiX Sample,26,24,GG,0,26,23,1,0,40 +A Fake phiX Sample,26,25,AG,1,26,24,1,0,40 +A Fake phiX Sample,26,25,CC,0,26,24,1,0,40 +A Fake phiX Sample,26,25,CG,0,26,24,2,0,40 +A Fake phiX Sample,26,25,CT,0,26,24,1,0,40 +A Fake phiX Sample,26,25,GA,0,26,24,2,0,40 +A Fake phiX Sample,26,25,GC,0,26,24,1,0,40 +A Fake phiX Sample,26,25,GG,0,26,24,1,0,40 +A Fake phiX Sample,26,25,TA,1,26,24,1,0,40 +A Fake phiX Sample,26,26,AG,1,26,25,2,0,40 +A Fake phiX Sample,26,26,CG,0,26,25,1,0,40 +A Fake phiX Sample,26,26,CT,0,26,25,1,0,40 +A Fake phiX Sample,26,26,GA,0,26,25,2,0,40 +A Fake phiX Sample,26,26,GC,0,26,25,1,0,40 +A Fake phiX Sample,26,26,GG,0,26,25,1,0,40 +A Fake phiX Sample,26,26,TA,1,26,25,1,0,40 +A Fake phiX Sample,26,27,AC,2,26,26,1,0,40 +A Fake phiX Sample,26,27,AG,1,26,26,2,0,40 +A Fake phiX Sample,26,27,CT,0,26,26,1,0,40 +A Fake phiX Sample,26,27,GA,0,26,26,1,0,40 +A Fake phiX Sample,26,27,GC,0,26,26,1,0,40 +A Fake phiX Sample,26,27,GG,0,26,26,2,0,40 +A Fake phiX Sample,26,27,TA,1,26,26,1,0,40 +A Fake phiX Sample,26,28,AC,2,26,27,1,0,40 +A Fake phiX Sample,26,28,AG,1,26,27,1,0,40 +A Fake phiX Sample,26,28,CC,1,26,27,1,0,40 +A Fake phiX Sample,26,28,CT,0,26,27,1,0,40 +A Fake phiX Sample,26,28,GC,0,26,27,2,0,40 +A Fake phiX Sample,26,28,GG,0,26,27,2,0,40 +A Fake phiX Sample,26,28,TA,1,26,27,1,0,40 +A Fake phiX Sample,26,29,AC,2,26,28,1,0,40 +A Fake phiX Sample,26,29,CC,1,26,28,1,0,40 +A Fake phiX Sample,26,29,CT,0,26,28,2,0,40 +A Fake phiX Sample,26,29,GC,0,26,28,2,0,40 +A Fake phiX Sample,26,29,GG,0,26,28,1,0,40 +A Fake phiX Sample,26,29,TA,1,26,28,1,0,40 +A Fake phiX Sample,26,30,AC,2,26,29,1,0,40 +A Fake phiX Sample,26,30,CC,1,26,29,1,0,40 +A Fake phiX Sample,26,30,CT,0,26,29,3,0,40 +A Fake phiX Sample,26,30,GC,0,26,29,1,0,40 +A Fake phiX Sample,26,30,TA,1,26,29,2,0,40 +A Fake phiX Sample,26,31,AC,2,26,30,1,0,40 +A Fake phiX Sample,26,31,CC,1,26,30,1,0,40 +A Fake phiX Sample,26,31,CT,0,26,30,2,0,40 +A Fake phiX Sample,26,31,TA,1,26,30,3,0,40 +A Fake phiX Sample,26,32,AA,0,26,31,1,0,40 +A Fake phiX Sample,26,32,AC,1,26,31,1,0,40 +A Fake phiX Sample,26,32,AC,2,26,31,1,0,40 +A Fake phiX Sample,26,32,CC,1,26,31,1,0,40 +A Fake phiX Sample,26,32,CT,0,26,31,1,0,40 +A Fake phiX Sample,26,32,TA,1,26,31,2,0,40 +A Fake phiX Sample,26,33,AA,0,26,32,1,0,40 +A Fake phiX Sample,26,33,AC,1,26,32,1,0,40 +A Fake phiX Sample,26,33,AC,2,26,32,1,0,40 +A Fake phiX Sample,26,33,AT,0,26,32,1,0,40 +A Fake phiX Sample,26,33,CC,1,26,32,1,0,40 +A Fake phiX Sample,26,33,CT,0,26,32,1,0,40 +A Fake phiX Sample,26,33,TA,1,26,32,1,0,40 +A Fake phiX Sample,26,34,AA,0,26,33,1,0,40 +A Fake phiX Sample,26,34,AC,1,26,33,1,0,40 +A Fake phiX Sample,26,34,AT,0,26,33,1,0,40 +A Fake phiX Sample,26,34,CC,1,26,33,1,0,40 +A Fake phiX Sample,26,34,CT,0,26,33,2,0,40 +A Fake phiX Sample,26,34,TA,1,26,33,1,0,40 +A Fake phiX Sample,26,34,TG,0,26,33,1,0,40 +A Fake phiX Sample,26,35,AA,0,26,34,1,0,40 +A Fake phiX Sample,26,35,AT,0,26,34,1,0,40 +A Fake phiX Sample,26,35,CT,0,26,34,2,0,40 +A Fake phiX Sample,26,35,GA,0,26,34,1,0,40 +A Fake phiX Sample,26,35,TA,1,26,34,2,0,40 +A Fake phiX Sample,26,35,TG,0,26,34,1,0,40 +A Fake phiX Sample,26,36,AA,0,26,35,2,0,40 +A Fake phiX Sample,26,36,AG,0,26,35,1,0,40 +A Fake phiX Sample,26,36,AT,0,26,35,1,0,40 +A Fake phiX Sample,26,36,CT,0,26,35,2,0,40 +A Fake phiX Sample,26,36,GA,0,26,35,1,0,40 +A Fake phiX Sample,26,36,TA,0,26,35,2,0,40 +A Fake phiX Sample,26,36,TG,0,26,35,1,0,40 +EOF diff -r 53dd1bfced54 -r 30e1dd77e99c test-data/gatk/gatk_indel_realigner/gatk_indel_realigner_out_1.bam Binary file test-data/gatk/gatk_indel_realigner/gatk_indel_realigner_out_1.bam has changed diff -r 53dd1bfced54 -r 30e1dd77e99c test-data/gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam Binary file test-data/gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam has changed diff -r 53dd1bfced54 -r 30e1dd77e99c test-data/gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.log.contains --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.log.contains Mon Apr 14 08:48:25 2014 -0400 @@ -0,0 +1,16 @@ +GenomeAnalysisEngine - Strictness is SILENT +TableRecalibrationWalker - Reading in the data from input csv file... +TableRecalibrationWalker - ...done! +TableRecalibrationWalker - The covariates being used here: +TableRecalibrationWalker - ReadGroupCovariate +TableRecalibrationWalker - QualityScoreCovariate +TableRecalibrationWalker - CycleCovariate +TableRecalibrationWalker - DinucCovariate +TableRecalibrationWalker - HomopolymerCovariate +TableRecalibrationWalker - MinimumNQSCovariate +TableRecalibrationWalker - PositionCovariate +TableRecalibrationWalker - Generating tables of empirical qualities for use in sequential calculation... +TableRecalibrationWalker - ...done! +TraversalEngine - [INITIALIZATION COMPLETE; TRAVERSAL STARTING] +TraversalEngine - Total runtime +TraversalEngine - 0 reads were filtered out during traversal out of 10 total (0.00%) \ No newline at end of file diff -r 53dd1bfced54 -r 30e1dd77e99c test-data/phiX.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX.fasta Mon Apr 14 08:48:25 2014 -0400 @@ -0,0 +1,79 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA + diff -r 53dd1bfced54 -r 30e1dd77e99c tool-data/gatk_annotations.txt.sample --- a/tool-data/gatk_annotations.txt.sample Tue Apr 01 10:49:41 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -#unique_id name gatk_value tools_valid_for -AlleleBalance AlleleBalance AlleleBalance UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -AlleleBalanceBySample AlleleBalanceBySample AlleleBalanceBySample UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -BaseCounts BaseCounts BaseCounts UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -BaseQualityRankSumTest BaseQualityRankSumTest BaseQualityRankSumTest UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -ChromosomeCounts ChromosomeCounts ChromosomeCounts UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -DepthOfCoverage DepthOfCoverage DepthOfCoverage UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -DepthPerAlleleBySample DepthPerAlleleBySample DepthPerAlleleBySample UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -FisherStrand FisherStrand FisherStrand UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -GCContent GCContent GCContent UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -HaplotypeScore HaplotypeScore HaplotypeScore UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -HardyWeinberg HardyWeinberg HardyWeinberg UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -HomopolymerRun HomopolymerRun HomopolymerRun UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -InbreedingCoeff InbreedingCoeff InbreedingCoeff UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -IndelType IndelType IndelType UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -LowMQ LowMQ LowMQ UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -MVLikelihoodRatio MVLikelihoodRatio MVLikelihoodRatio UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -MappingQualityRankSumTest MappingQualityRankSumTest MappingQualityRankSumTest UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -MappingQualityZero MappingQualityZero MappingQualityZero UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -MappingQualityZeroBySample MappingQualityZeroBySample MappingQualityZeroBySample UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -MappingQualityZeroFraction MappingQualityZeroFraction MappingQualityZeroFraction UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -NBaseCount NBaseCount NBaseCount UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -QualByDepth QualByDepth QualByDepth UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -RMSMappingQuality RMSMappingQuality RMSMappingQuality UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -ReadDepthAndAllelicFractionBySample ReadDepthAndAllelicFractionBySample ReadDepthAndAllelicFractionBySample UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -ReadPosRankSumTest ReadPosRankSumTest ReadPosRankSumTest UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -SampleList SampleList SampleList UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -SnpEff SnpEff SnpEff VariantAnnotator,VariantRecalibrator -SpanningDeletions SpanningDeletions SpanningDeletions UnifiedGenotyper,VariantAnnotator,VariantRecalibrator -TechnologyComposition TechnologyComposition TechnologyComposition UnifiedGenotyper,VariantAnnotator,VariantRecalibrator diff -r 53dd1bfced54 -r 30e1dd77e99c tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Tue Apr 01 10:49:41 2014 -0400 +++ b/tool_data_table_conf.xml.sample Mon Apr 14 08:48:25 2014 -0400 @@ -5,9 +5,4 @@ value, dbkey, name, path - - - value, name, gatk_value, tools_valid_for - -
diff -r 53dd1bfced54 -r 30e1dd77e99c tool_dependencies.xml --- a/tool_dependencies.xml Tue Apr 01 10:49:41 2014 -0400 +++ b/tool_dependencies.xml Mon Apr 14 08:48:25 2014 -0400 @@ -1,6 +1,9 @@ - - + + + + + diff -r 53dd1bfced54 -r 30e1dd77e99c variant_recalibrator.xml --- a/variant_recalibrator.xml Tue Apr 01 10:49:41 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,431 +0,0 @@ - - - - gatk - - - gatk_macros.xml - - gatk_wrapper.py - --max_jvm_heap_fraction "1" - --stdout "${output_log}" - #for $var_count, $variant in enumerate( $reference_source.variants ): - -d "--input:input_${var_count},%(file_type)s" "${variant.input_variants}" "${variant.input_variants.ext}" "input_variants_${var_count}" - #end for - -p 'java - -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar" - -T "VariantRecalibrator" - --num_threads \${GALAXY_SLOTS:-4} - -et "NO_ET" ##ET no phone home - ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout - #if $reference_source.reference_source_selector != "history": - -R "${reference_source.ref_file.fields.path}" - #end if - --recal_file "${output_recal}" - --tranches_file "${output_tranches}" - --rscript_file "${output_rscript}" - ' - - #set $rod_binding_names = dict() - #for $rod_binding in $rod_bind: - #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': - #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name - #elif str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'comp': - #set $rod_bind_name = "comp" + $rod_binding.rod_bind_type.custom_rod_name - #else - #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector - #end if - #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 - #if $rod_binding.rod_bind_type.rod_training_type.rod_training_type_selector == "not_training_truth_known": - -d "--resource:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" - #else: - -d "--resource:${rod_bind_name},%(file_type)s,known=${rod_binding.rod_bind_type.rod_training_type.known},training=${rod_binding.rod_bind_type.rod_training_type.training},truth=${rod_binding.rod_bind_type.rod_training_type.truth},bad=${rod_binding.rod_bind_type.rod_training_type.bad},prior=${rod_binding.rod_bind_type.rod_training_type.prior}" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" - #end if - #end for - - #include source=$standard_gatk_options# - - ##start analysis specific options - -p ' - #if str( $annotations ) != "None": - #for $annotation in str( $annotations.fields.gatk_value ).split( ',' ): - --use_annotation "${annotation}" - #end for - #end if - #for $additional_annotation in $additional_annotations: - --use_annotation "${additional_annotation.additional_annotation_name}" - #end for - --mode "${mode}" - ' - - #if $analysis_param_type.analysis_param_type_selector == "advanced": - -p ' - --maxGaussians "${analysis_param_type.max_gaussians}" - --maxIterations "${analysis_param_type.max_iterations}" - --numKMeans "${analysis_param_type.num_k_means}" - --stdThreshold "${analysis_param_type.std_threshold}" - --qualThreshold "${analysis_param_type.qual_threshold}" - --shrinkage "${analysis_param_type.shrinkage}" - --dirichlet "${analysis_param_type.dirichlet}" - --priorCounts "${analysis_param_type.prior_counts}" - #if str( $analysis_param_type.bad_variant_selector.bad_variant_selector_type ) == 'percent': - --percentBadVariants "${analysis_param_type.bad_variant_selector.percent_bad_variants}" - #else: - --minNumBadVariants "${analysis_param_type.bad_variant_selector.min_num_bad_variants}" - #end if - --target_titv "${analysis_param_type.target_titv}" - #for $tranche in [ $tranche.strip() for $tranche in str( $analysis_param_type.ts_tranche ).split( ',' ) if $tranche.strip() ] - --TStranche "${tranche}" - #end for - #for $ignore_filter in $analysis_param_type.ignore_filters: - #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.ignore_filter_type_selector ) - #if $ignore_filter_name == "custom": - #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.filter_name ) - #end if - --ignore_filter "${ignore_filter_name}" - #end for - --ts_filter_level "${analysis_param_type.ts_filter_level}" - ' - #end if - - - && - mv "${output_rscript}.pdf" "${output_tranches_pdf}" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Takes variant calls as .vcf files, learns a Gaussian mixture model over the variant annotations and evaluates the variant -- assigning an informative lod score - -For more information on using the VariantRecalibrator module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration>`_. - -To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. - -If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. - ------- - -**Inputs** - -GenomeAnalysisTK: VariantRecalibrator accepts a variant input file. - - -**Outputs** - -The output is in VCF format. - - -Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. - -------- - -**Settings**:: - - - tranches_file The output tranches file used by ApplyRecalibration - use_annotation The names of the annotations which should used for calculations - mode Recalibration mode to employ: 1.) SNP for recalibrating only snps (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both snps and indels simultaneously. (SNP|INDEL|BOTH) - maxGaussians The maximum number of Gaussians to try during variational Bayes algorithm - maxIterations The maximum number of VBEM iterations to be performed in variational Bayes algorithm. Procedure will normally end when convergence is detected. - numKMeans The number of k-means iterations to perform in order to initialize the means of the Gaussians in the Gaussian mixture model. - stdThreshold If a variant has annotations more than -std standard deviations away from mean then don't use it for building the Gaussian mixture model. - qualThreshold If a known variant has raw QUAL value less than -qual then don't use it for building the Gaussian mixture model. - shrinkage The shrinkage parameter in variational Bayes algorithm. - dirichlet The dirichlet parameter in variational Bayes algorithm. - priorCounts The number of prior counts to use in variational Bayes algorithm. - percentBadVariants What percentage of the worst scoring variants to use when building the Gaussian mixture model of bad variants. 0.07 means bottom 7 percent. - minNumBadVariants The minimum amount of worst scoring variants to use when building the Gaussian mixture model of bad variants. Will override -percentBad arugment if necessary. - recal_file The output recal file used by ApplyRecalibration - target_titv The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES! - TStranche The levels of novel false discovery rate (FDR, implied by ti/tv) at which to slice the data. (in percent, that is 1.0 for 1 percent) - ignore_filter If specified the optimizer will use variants even if the specified filter name is marked in the input VCF file - path_to_Rscript The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript - rscript_file The output rscript file generated by the VQSR to aid in visualization of the input data and learned model - path_to_resources Path to resources folder holding the Sting R scripts. - ts_filter_level The truth sensitivity level at which to start filtering, used here to indicate filtered variants in plots - -@CITATION_SECTION@ - -