annotate base_recalibrator.xml @ 8:3a8ef2ad0414 draft

Uploaded
author lz_hust
date Sat, 01 Jun 2019 07:08:27 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
1 <tool id="gatk2_base_recalibrator" name="Base Recalibrator" version="@VERSION@.0">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
2 <description>calculates covariates used to recalibrate base quality scores of reads</description>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
3 <macros>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
4 <import>gatk2_macros.xml</import>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
5 </macros>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
6 <expand macro="requirements" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
7 <expand macro="version_command" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
8 <command interpreter="python">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
9 gatk2_wrapper.py
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
10 --stdout "${output_log}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
11 -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
12 #if str( $reference_source.input_bam.metadata.bam_index ) != "None":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
13 -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
14 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
15 -p '
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
16 @JAR_PATH@
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
17 -T "BaseRecalibrator"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
18 \$GATK2_SITE_OPTIONS
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
19
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
20 ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
21 --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-8}
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
22
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
23 ## we set non standards at every run and the user can choose which ones are preferred
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
24 ## in our select box both standard options (ContextCovariate, CycleCovariate) are selected by default
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
25 --no_standard_covs
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
26
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
27 #if $reference_source.reference_source_selector != "history":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
28 -R "${reference_source.ref_file.fields.path}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
29 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
30 #if str($input_recal) != 'None':
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
31 --BQSR "${input_recal}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
32 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
33 --out "${output_recal}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
34 #if str( $covariates ) != "None":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
35 #for $cov in str( $covariates ).split( ',' ):
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
36 -cov "${cov}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
37 #end for
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
38 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
39 '
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
40
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
41 #set $snp_dataset_provided = False
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
42 #set $rod_binding_names = dict()
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
43 #for $rod_binding in $rod_bind:
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
44 #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
45 #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
46 #else
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
47 #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
48 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
49 #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'dbsnp':
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
50 #set $snp_dataset_provided = True
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
51 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
52 #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
53 -d "--knownSites:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
54 #end for
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
55
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
56 #include source=$standard_gatk_options#
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
57
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
58 ##start analysis specific options
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
59 #if $analysis_param_type.analysis_param_type_selector == "advanced":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
60 -p '
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
61 #if $analysis_param_type.default_read_group_type.default_read_group_type_selector == "set":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
62 --default_read_group "${analysis_param_type.default_read_group_type.default_read_group}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
63 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
64 #if str( $analysis_param_type.default_platform ) != "default":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
65 --default_platform "${analysis_param_type.default_platform}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
66 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
67 #if str( $analysis_param_type.force_read_group_type.force_read_group_type_selector ) == "set":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
68 --force_read_group "${analysis_param_type.force_read_group_type.force_read_group}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
69 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
70 #if str( $analysis_param_type.force_platform ) != "default":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
71 --force_platform "${analysis_param_type.force_platform}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
72 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
73 ${analysis_param_type.exception_if_no_tile}
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
74 #if str( $analysis_param_type.solid_options_type.solid_options_type_selector ) == "set":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
75 #if str( $analysis_param_type.solid_options_type.solid_recal_mode ) != "default":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
76 --solid_recal_mode "${analysis_param_type.solid_options_type.solid_recal_mode}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
77 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
78 #if str( $analysis_param_type.solid_options_type.solid_nocall_strategy ) != "default":
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
79 --solid_nocall_strategy "${analysis_param_type.solid_options_type.solid_nocall_strategy}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
80 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
81 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
82 --window_size_nqs "${analysis_param_type.window_size_nqs}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
83 --homopolymer_nback "${analysis_param_type.homopolymer_nback}"
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
84 '
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
85 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
86 #if not $snp_dataset_provided:
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
87 -p '--run_without_dbsnp_potentially_ruining_quality'
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
88 #end if
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
89 </command>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
90 <inputs>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
91 <conditional name="reference_source">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
92 <expand macro="reference_source_selector_param" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
93 <when value="cached">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
94 <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
95 <validator type="unspecified_build" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
96 <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
97 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
98 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" >
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
99 <options from_data_table="gatk2_picard_indexes">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
100 <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
101 </options>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
102 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
103 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
104 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
105 <when value="history">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
106 <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
107 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
108 <options>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
109 <filter type="data_meta" key="dbkey" ref="input_bam" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
110 </options>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
111 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
112 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
113 </conditional>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
114 <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &amp;lt;recal_file&amp;gt;)" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
115
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
116 <param name="covariates" type="select" multiple="True" display="checkboxes" label="Covariates to be used in the recalibration" help="-cov,--covariate &amp;lt;covariate&amp;gt;" >
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
117 <!-- might we want to load the available covariates from an external configuration file, since additional ones can be added to local installs? -->
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
118 <option value="ContextCovariate" selected="true"/>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
119 <option value="CycleCovariate" selected="true"/>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
120 <option value="RepeatLengthCovariate" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
121 <option value="RepeatUnitCovariate" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
122 <option value="RepeatUnitAndLengthCovariate" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
123 <!--
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
124 Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
125 be added for the user regardless of whether or not they were specified.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
126 <option value="QualityScoreCovariate" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
127 <option value="ReadGroupCovariate" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
128 -->
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
129 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
130
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
131 <repeat name="rod_bind" title="Known Variants" help="Using data sets of known variants (-knownSites,--knownSites &amp;lt;knownSites&amp;gt;)">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
132 <conditional name="rod_bind_type">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
133 <param name="rod_bind_type_selector" type="select" label="Variant Type">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
134 <option value="dbsnp" selected="True">dbSNP</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
135 <option value="snps">SNPs</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
136 <option value="indels">INDELs</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
137 <option value="mask">Mask</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
138 <option value="custom">Custom</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
139 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
140 <when value="dbsnp">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
141 <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
142 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
143 <when value="snps">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
144 <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
145 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
146 <when value="indels">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
147 <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
148 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
149 <when value="mask">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
150 <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
151 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
152 <when value="custom">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
153 <param name="custom_rod_name" type="text" value="Unknown" label="Customer's variant file"/>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
154 <param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="Variant file" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
155 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
156 </conditional>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
157 </repeat>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
158
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
159 <expand macro="gatk_param_type_conditional" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
160
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
161 <conditional name="analysis_param_type">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
162 <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
163 <option value="basic" selected="True">Basic</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
164 <option value="advanced">Advanced</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
165 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
166 <when value="basic">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
167 <!-- Do nothing here -->
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
168 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
169 <when value="advanced">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
170 <conditional name="default_read_group_type">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
171 <param name="default_read_group_type_selector" type="select" label="Set default Read Group" help="--default_read_group">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
172 <option value="default" selected="True">Don't Set</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
173 <option value="set">Set</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
174 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
175 <when value="default">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
176 <!-- do nothing here -->
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
177 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
178 <when value="set">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
179 <param name="default_read_group" type="text" value="Unknown" label="If a read has no read group then default to the provided String"/>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
180 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
181 </conditional>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
182 <param name="default_platform" type="select" label="Set default Platform" help="--default_platform">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
183 <option value="default" selected="True">Don't Set</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
184 <option value="illumina">illumina</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
185 <option value="454">454</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
186 <option value="solid">solid</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
187 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
188 <conditional name="force_read_group_type">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
189 <param name="force_read_group_type_selector" type="select" label="Force Read Group" help="--force_read_group">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
190 <option value="default" selected="True">Don't Force</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
191 <option value="set">Force</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
192 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
193 <when value="default">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
194 <!-- do nothing here -->
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
195 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
196 <when value="set">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
197 <param name="force_read_group" type="text" value="Unknown" label="If provided, the read group ID of EVERY read will be forced to be the provided String."/>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
198 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
199 </conditional>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
200 <param name="force_platform" type="select" label="Force Platform" help="--force_platform">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
201 <option value="default" selected="True">Don't Force</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
202 <option value="illumina">illumina</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
203 <option value="454">454</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
204 <option value="solid">solid</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
205 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
206 <param name="exception_if_no_tile" type="boolean" checked="False" truevalue="--exception_if_no_tile" falsevalue="" label="Throw an exception when no tile can be found" help="--exception_if_no_tile"/>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
207 <conditional name="solid_options_type">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
208 <param name="solid_options_type_selector" type="select" label="Set SOLiD specific options">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
209 <option value="default" selected="True">Don't Set</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
210 <option value="set">Set</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
211 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
212 <when value="default">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
213 <!-- do nothing here -->
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
214 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
215 <when value="set">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
216 <param name="solid_recal_mode" type="select" label="How should we recalibrate solid bases in which the reference was inserted" help="-sMode,--solid_recal_mode &amp;lt;solid_recal_mode&amp;gt;">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
217 <option value="default" selected="True">Don't set</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
218 <option value="DO_NOTHING">DO_NOTHING</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
219 <option value="SET_Q_ZERO">SET_Q_ZERO</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
220 <option value="SET_Q_ZERO_BASE_N">SET_Q_ZERO_BASE_N</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
221 <option value="REMOVE_REF_BIAS">REMOVE_REF_BIAS</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
222 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
223 <param name="solid_nocall_strategy" type="select" label="Behavior of the recalibrator when it encounters no calls" help="-solid_nocall_strategy,--solid_nocall_strategy &amp;lt;solid_nocall_strategy&amp;gt;">
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
224 <option value="default" selected="True">Don't set</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
225 <option value="THROW_EXCEPTION">THROW_EXCEPTION</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
226 <option value="LEAVE_READ_UNRECALIBRATED">LEAVE_READ_UNRECALIBRATED</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
227 <option value="PURGE_READ">PURGE_READ</option>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
228 </param>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
229 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
230 </conditional>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
231 <param name="window_size_nqs" type="integer" value="5" label="Window size used by MinimumNQSCovariate" help="window_size_nqs"/>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
232 <param name="homopolymer_nback" type="integer" value="7" label="number of previous bases to look at in HomopolymerCovariate" help="-nback,--homopolymer_nback &amp;lt;homopolymer_nback&amp;gt;" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
233 </when>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
234 </conditional>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
235 </inputs>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
236 <outputs>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
237 <data format="gatk_report" name="output_recal" label="${tool.name} on ${on_string} (Covariate File)" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
238 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
239 </outputs>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
240 <tests>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
241 <test>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
242 <param name="reference_source_selector" value="history" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
243 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
244 <param name="input_bam" value="gatk/gatk_indel_realigner/gatk_indel_realigner_out_1.bam" ftype="bam" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
245 <param name="rod_bind_type_selector" value="dbsnp" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
246 <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
247 <param name="standard_covs" value="True" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
248 <param name="covariates" value="ReadGroupCovariate,HomopolymerCovariate,MinimumNQSCovariate,PositionCovariate" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
249 <param name="gatk_param_type_selector" value="basic" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
250 <param name="analysis_param_type_selector" value="basic" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
251 <output name="output_recal" file="gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
252 <output name="output_log" file="gatk/gatk_count_covariates/gatk_count_covariates_out_1.log.contains" compare="contains" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
253 </test>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
254 </tests>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
255 <help>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
256 .. class:: warningmark
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
257
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
258 "This calculation is critically dependent on being able to skip over known variant sites. Please provide a dbSNP ROD or a VCF file containing known sites of genetic variation."
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
259 However, if you do not provide this file, the '--run_without_dbsnp_potentially_ruining_quality' flag will be automatically used, and the command will be allowed to run.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
260
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
261 **What it does**
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
262
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
263 This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative of poor base quality. This walker generates tables based on various user-specified covariates (such as read group, reported quality score, cycle, and dinucleotide) Since there is a large amount of data one can then calculate an empirical probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations The output file is a CSV list of (the several covariate values, num observations, num mismatches, empirical quality score) The first non-comment line of the output file gives the name of the covariates that were used for this calculation. Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will be added for the user regardless of whether or not they were specified Note: This walker is designed to be used in conjunction with TableRecalibrationWalker.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
264
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
265 For more information on base quality score recalibration using the GATK, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_bqsr_BaseRecalibrator.html&gt;`_.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
266
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
267 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
268
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
269 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
270
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
271 ------
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
272
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
273 **Inputs**
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
274
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
275 GenomeAnalysisTK: BaseRecalibrator accepts an aligned BAM input file.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
276
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
277
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
278 **Outputs**
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
279
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
280 The output is in CSV format.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
281
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
282
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
283 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
284
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
285 -------
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
286
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
287 **Settings**::
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
288
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
289
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
290 default_read_group If a read has no read group then default to the provided String.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
291 default_platform If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
292 force_read_group If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
293 force_platform If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
294 window_size_nqs The window size used by MinimumNQSCovariate for its calculation
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
295 homopolymer_nback The number of previous bases to look at in HomopolymerCovariate
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
296 exception_if_no_tile If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
297 solid_recal_mode How should we recalibrate solid bases in whichthe reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS (DO_NOTHING|SET_Q_ZERO|SET_Q_ZERO_BASE_N|REMOVE_REF_BIAS)
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
298 solid_nocall_strategy Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ (THROW_EXCEPTION|LEAVE_READ_UNRECALIBRATED|PURGE_READ)
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
299 recal_file Filename for the input covariates table recalibration .csv file
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
300 out The output CSV file
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
301 standard_covs Use the standard set of covariates in addition to the ones listed using the -cov argument
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
302 run_without_dbsnp_potentially_ruining_quality If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
303
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
304 @CITATION_SECTION@
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
305 </help>
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
306 <expand macro="citations" />
3a8ef2ad0414 Uploaded
lz_hust
parents:
diff changeset
307 </tool>