comparison realigner_target_creator.xml @ 15:01ff8dd37d4d draft default tip

Uploaded
author lz_hust
date Sat, 01 Jun 2019 07:20:41 -0400
parents
children
comparison
equal deleted inserted replaced
14:68426930d59c 15:01ff8dd37d4d
1 <tool id="gatk2_realigner_target_creator" name="Realigner Target Creator" version="@VERSION@.1">
2 <description>for use in local realignment</description>
3 <macros>
4 <import>gatk2_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_command" />
8 <command interpreter="python">
9 gatk2_wrapper.py
10 --stdout "${output_log}"
11 -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
12 #if str( $reference_source.input_bam.metadata.bam_index ) != "None":
13 -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
14 #end if
15 -p '
16 @JAR_PATH@
17 -T "RealignerTargetCreator"
18 -o "${output_interval}"
19
20 \$GATK2_SITE_OPTIONS
21
22 ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
23 --num_cpu_threads_per_data_thread 1
24
25 @THREADS@
26
27 #if $reference_source.reference_source_selector != "history":
28 -R "${reference_source.ref_file.fields.path}"
29 #end if
30 '
31 #set $rod_binding_names = dict()
32 #for $rod_binding in $rod_bind:
33 #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
34 #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
35 #else
36 #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
37 #end if
38 #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
39 -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
40 #end for
41
42 $allow_n_cigar_reads
43 #include source=$standard_gatk_options#
44 ##start analysis specific options
45 #if $analysis_param_type.analysis_param_type_selector == "advanced":
46 -p '
47 --minReadsAtLocus "${analysis_param_type.minReadsAtLocus}"
48 --windowSize "${analysis_param_type.windowSize}"
49 --mismatchFraction "${analysis_param_type.mismatchFraction}"
50 --maxIntervalSize "${analysis_param_type.maxIntervalSize}"
51 '
52 #end if
53 </command>
54 <inputs>
55 <conditional name="reference_source">
56 <expand macro="reference_source_selector_param" />
57 <when value="cached">
58 <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;">
59 <validator type="unspecified_build" />
60 <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
61 </param>
62 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" >
63 <options from_data_table="gatk2_picard_indexes">
64 <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/>
65 </options>
66 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
67 </param>
68 </when>
69 <when value="history">
70 <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;" />
71 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
72 <options>
73 <filter type="data_meta" key="dbkey" ref="input_bam" />
74 </options>
75 </param>
76 </when>
77 </conditional>
78
79 <repeat name="rod_bind" title="Known Variants" help="Using data sets of known variants (-known,--known &amp;lt;known&amp;gt;)">
80 <conditional name="rod_bind_type">
81 <param name="rod_bind_type_selector" type="select" label="Variant Type">
82 <option value="dbsnp" selected="True">dbSNP</option>
83 <option value="snps">SNPs</option>
84 <option value="indels">INDELs</option>
85 <option value="custom">Custom</option>
86 </param>
87 <when value="dbsnp">
88 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
89 </when>
90 <when value="snps">
91 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
92 </when>
93 <when value="indels">
94 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
95 </when>
96 <when value="custom">
97 <param name="custom_rod_name" type="text" value="Unknown" label="Customer's variant file" />
98 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
99 </when>
100 </conditional>
101 </repeat>
102
103 <expand macro="allow_n_cigar_reads" />
104 <expand macro="gatk_param_type_conditional" />
105
106 <expand macro="analysis_type_conditional">
107 <param name="windowSize" type="integer" value="10" label="Window size for calculating entropy or SNP clusters (windowSize)"
108 help="-window,--windowSize &amp;lt;windowSize&amp;gt;" />
109 <param name="mismatchFraction" type="float" value="0.15" label="Fraction of base qualities needing to mismatch for a position to have high entropy (mismatchFraction)"
110 help="to disable set to &lt;= 0 or &gt; 1 (-mismatch,--mismatchFraction &amp;lt;mismatchFraction&amp;gt;)"/>
111 <param name="minReadsAtLocus" type="integer" value="4" label="Minimum reads at a locus to enable using the entropy calculation (minReadsAtLocus)"
112 help="-minReads,--minReadsAtLocus &amp;lt;minReadsAtLocus&amp;gt;" />
113 <param name="maxIntervalSize" type="integer" value="500" label="Maximum interval size" help="-maxInterval,--maxIntervalSize &amp;lt;maxIntervalSize&amp;gt;" />
114 </expand>
115 </inputs>
116 <outputs>
117 <data format="gatk_interval" name="output_interval" label="${tool.name} on ${on_string} (GATK intervals)" />
118 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
119 </outputs>
120 <tests>
121 <test>
122 <param name="reference_source_selector" value="history" />
123 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
124 <param name="input_bam" value="gatk/fake_phiX_reads_1.bam" ftype="bam" />
125 <param name="rod_bind_type_selector" value="dbsnp" />
126 <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
127 <param name="gatk_param_type_selector" value="basic" />
128 <param name="analysis_param_type_selector" value="advanced" />
129 <param name="windowSize" value="10" />
130 <param name="mismatchFraction" value="0.15" />
131 <param name="minReadsAtLocus" value="4" />
132 <param name="maxIntervalSize" value="500" />
133 <output name="output_interval" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.gatk_interval" />
134 <output name="output_log" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.log.contains" compare="contains"/>
135 </test>
136 </tests>
137 <help>
138 **What it does**
139
140 Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string.
141
142 For more information on local realignment around indels using the GATK, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_indels_RealignerTargetCreator.html&gt;`_.
143
144 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
145
146 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
147
148 ------
149
150 **Inputs**
151
152 GenomeAnalysisTK: RealignerTargetCreator accepts an aligned BAM input file.
153
154
155 **Outputs**
156
157 The output is in GATK Interval format.
158
159
160 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
161
162 -------
163
164 **Settings**::
165
166 windowSize window size for calculating entropy or SNP clusters
167 mismatchFraction fraction of base qualities needing to mismatch for a position to have high entropy; to disable set to &lt;= 0 or &gt; 1
168 minReadsAtLocus minimum reads at a locus to enable using the entropy calculation
169 maxIntervalSize maximum interval size
170
171 @CITATION_SECTION@
172 </help>
173 <expand macro="citations" />
174 </tool>