comparison naive_variant_caller.xml @ 14:5c852eca82e0 draft

planemo upload for repository https://github.com/blankenberg/tools-blankenberg/tree/master/tools/naive_variant_caller commit a1f39a3e28911591f6a1ed58a43e95e0baf5e750
author blankenberg
date Wed, 28 Feb 2018 15:54:57 -0500
parents
children aff38ea879f1
comparison
equal deleted inserted replaced
13:cfc86c3fc5c8 14:5c852eca82e0
1 <tool id="naive_variant_caller" name="Naive Variant Caller" version="0.0.3">
2 <description> - tabulate variable sites from BAM datasets</description>
3 <requirements>
4 <requirement type="package" version="0.0.3">nvc</requirement>
5 </requirements>
6 <stdio>
7 <exit_code range="1:" />
8 <exit_code range=":-1" />
9 </stdio>
10 <version_command>naive_variant_caller.py --version</version_command>
11 <command>naive_variant_caller.py
12 -o "${output_vcf}"
13
14 #for $input_bam in $reference_source.input_bams:
15 -b '${input_bam.input_bam}'
16 -i '${input_bam.input_bam.metadata.bam_index}'
17 #end for
18
19 #if $reference_source.reference_source_selector != "history":
20 -r '${reference_source.ref_file.fields.path}'
21 #elif $reference_source.ref_file:
22 -r '${reference_source.ref_file}'
23 #end if
24
25 #for $region in $regions:
26 --region '${region.chromosome}:${region.start}-${region.end}'
27 #end for
28
29 #for $region_file in $region_files:
30 --regions_filename '${region_file.input_region}'
31 --regions_file_columns '${int($region_file.input_region.metadata.chromCol)-1},${int($region_file.input_region.metadata.startCol)-1},${int($region_file.input_region.metadata.endCol)-1}'
32 #end for
33
34 ${variants_only}
35
36 ${use_strand}
37
38 --ploidy '${$ploidy}'
39
40 --min_support_depth '${min_support_depth}'
41
42 #if str($min_base_quality):
43 --min_base_quality '${min_base_quality}'
44 #end if
45
46 #if str($min_mapping_quality):
47 --min_mapping_quality '${min_mapping_quality}'
48 #end if
49
50 --allow_out_of_bounds_positions
51
52 #if str( $advanced_options.advanced_options_selector ) == "advanced":
53 #if str( $advanced_options.coverage_dtype ) != "guess":
54 --coverage_dtype '${advanced_options.coverage_dtype}'
55 #end if
56 ${advanced_options.safe}
57 #end if
58 </command>
59 <inputs>
60 <conditional name="reference_source">
61 <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
62 <option value="cached">Locally cached</option>
63 <option value="history">History</option>
64 </param>
65 <when value="cached">
66 <repeat name="input_bams" title="BAM file" min="1" >
67 <param name="input_bam" type="data" format="bam" label="BAM file">
68 <validator type="unspecified_build" />
69 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
70 </param>
71 </repeat>
72 <param name="ref_file" type="select" label="Using reference genome" >
73 <options from_data_table="sam_fa_indexes">
74 <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...-->
75 </options>
76 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
77 </param>
78 </when>
79 <when value="history"> <!-- FIX ME!!!! -->
80 <repeat name="input_bams" title="BAM file" min="1" >
81 <param name="input_bam" type="data" format="bam" label="BAM file" >
82 </param>
83 </repeat>
84 <param name="ref_file" type="data" format="fasta" label="Using reference file" optional="True" />
85 </when>
86 </conditional>
87
88 <repeat name="regions" title="Restrict to regions" min="0" >
89 <param name="chromosome" type="text" value="" optional="False" label="Chromosome" />
90 <param name="start" type="integer" value="" optional="True" label="Start" help="0-based, closed. (BED style)" />
91 <param name="end" type="integer" value="" optional="True" label="End" help="0-based, open. (BED style)" />
92 </repeat>
93
94 <repeat name="region_files" title="Restrict to regions by file" min="0" >
95 <param name="input_region" type="data" format="interval" label="Genomic Regions" />
96 </repeat>
97
98 <!-- TODO: enhance filtering -->
99 <param name="min_support_depth" type="integer" value="0" min="0" label="Minimum number of reads needed to consider a REF/ALT" />
100 <param name="min_base_quality" type="integer" value="" label="Minimum base quality" optional="True" />
101 <param name="min_mapping_quality" type="integer" value="" label="Minimum mapping quality" optional="True" />
102
103 <param name="ploidy" type="integer" value="2" min="1" label="Ploidy" />
104 <param name="variants_only" type="boolean" truevalue="--variants_only" falsevalue="" checked="False" label="Only write out positions with possible alternate alleles"/>
105
106 <param name="use_strand" type="boolean" truevalue="--use_strand" falsevalue="" checked="False" label="Report counts by strand"/>
107
108 <conditional name="advanced_options">
109 <param name="advanced_options_selector" type="select" label="Show Advanced Options">
110 <option value="basic" selected="True">Hide Advanced Options</option>
111 <option value="advanced">Show Advanced Options</option>
112 </param>
113 <when value="basic">
114 <!-- Do nothing here -->
115 </when>
116 <when value="advanced">
117 <param name="coverage_dtype" type="select" label="Choose the dtype to use for storing coverage information" help="This affects the maximum recorded value for a position, e.g. uint8 would be 255 coverage, but will require the least amount of RAM">
118 <option value="guess" selected="True">Guess</option>
119 <option value="uint8">uint8</option>
120 <option value="uint16">uint16</option>
121 <option value="uint32">uint32</option>
122 <option value="uint64">uint64</option>
123 </param>
124 <param name="safe" type="boolean" truevalue="--safe" falsevalue="" checked="False" label="Be extra safe"/>
125 </when>
126 </conditional>
127
128 </inputs>
129 <outputs>
130 <data format="vcf" name="output_vcf" />
131 </outputs>
132 <tests>
133 <test>
134 <param name="reference_source_selector" value="history" />
135 <param name="input_bam" value="fake_phiX174_reads_1.bam" ftype="bam" />
136 <param name="ref_file" value="phiX174.fasta" ftype="fasta" />
137 <param name="regions" value="0" />
138 <param name="min_support_depth" value="0" />
139 <param name="min_base_quality" value="" />
140 <param name="min_mapping_quality" value="" />
141 <param name="ploidy" value="2" />
142 <param name="variants_only" value="False" />
143 <param name="use_strand" value="False" />
144 <param name="advanced_options_selector" value="advanced" />
145 <param name="coverage_dtype" value="uint8" />
146 <output name="output_vcf" file="fake_phiX174_reads_1_test_out_1.vcf" compare="contains" />
147 </test>
148 </tests>
149 <help>
150 **What it does**
151
152 This tool is a naive variant caller that processes aligned sequencing reads from the BAM format and produces a VCF file containing per position variant calls. This tool allows multiple BAM files to be provided as input and utilizes read group information to make calls for individual samples.
153
154 User configurable options allow filtering reads that do not pass mapping or base quality thresholds and minimum per base read depth; user's can also specify the ploidy and whether to consider each strand separately.
155
156 In addition to calling alternate alleles based upon simple ratios of nucleotides at a position, per base nucleotide counts are also provided. A custom tag, NC, is used within the Genotype fields. The NC field is a comma-separated listing of nucleotide counts in the form of &lt;nucleotide&gt;=&lt;count&gt;, where a plus or minus character is prepended to indicate strand, if the strandedness option was specified.
157
158
159 ------
160
161 **Inputs**
162
163 Accepts one or more BAM input files and a reference genome from the built-in list or from a FASTA file in your history.
164
165
166 **Outputs**
167
168 The output is in VCF format.
169
170 Example VCF output line, without reporting by strand:
171 ``chrM 16029 . T G,A,C . . AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095 GT:AC:AF:NC 0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:A=9,C=5,T=9629,G=15,``
172
173 Example VCF output line, when reporting by strand:
174 ``chrM 16029 . T G,A,C . . AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095 GT:AC:AF:NC 0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:+T=3972,-A=9,-C=5,-T=5657,-G=15,``
175
176 **Options**
177
178 Reference Genome:
179
180 Ensure that you have selected the correct reference genome, either from the list of built-in genomes or by selecting the corresponding FASTA file from your history.
181
182 Restrict to regions:
183
184 You can specify any number of regions on which you would like to receive results. You can specify just a chromosome name, or a chromosome name and start postion, or a chromosome name and start and end position for the set of desired regions.
185
186 Minimum number of reads needed to consider a REF/ALT:
187
188 This value declares the minimum number of reads containing a particular base at each position in order to list and use said allele in genotyping calls. Default is 0.
189
190 Minimum base quality:
191
192 The minimum base quality score needed for the position in a read to be used for nucleotide counts and genotyping. Default is no filter.
193
194 Minimum mapping quality:
195
196 The minimum mapping quality score needed to consider a read for nucleotide counts and genotyping. Default is no filter.
197
198 Ploidy:
199
200 The number of genotype calls to make at each reported position.
201
202 Only write out positions with possible alternate alleles:
203
204 When set, only positions which have at least one non-reference nucleotide which passes declare filters will be present in the output.
205
206 Report counts by strand:
207
208 When set, nucleotide counts (NC) will be reported in reference to the aligned read's source strand. Reported as: &lt;strand&gt;&lt;BASE&gt;=&lt;COUNT&gt;.
209
210 Choose the dtype to use for storing coverage information:
211
212 This controls the maximum depth value for each nucleotide/position/strand (when specified). Smaller values require the least amount of memory, but have smaller maximal limits.
213
214 +--------+----------------------------+
215 | name | maximum coverage value |
216 +========+============================+
217 | uint8 | 255 |
218 +--------+----------------------------+
219 | uint16 | 65,535 |
220 +--------+----------------------------+
221 | uint32 | 4,294,967,295 |
222 +--------+----------------------------+
223 | uint64 | 18,446,744,073,709,551,615 |
224 +--------+----------------------------+
225
226
227 </help>
228 <citations>
229 <citation type="doi">10.1186/gb4161</citation>
230 </citations>
231
232 </tool>