Mercurial > repos > blankenberg > naive_variant_caller
comparison tools/naive_variant_caller.xml @ 1:ae6edc0012ba
Populate naive_variant_caller repository.
author | Daniel Blankenberg <dan@bx.psu.edu> |
---|---|
date | Thu, 29 Aug 2013 10:54:14 -0400 |
parents | |
children | 8398666758e3 |
comparison
equal
deleted
inserted
replaced
0:0fa83c466e9d | 1:ae6edc0012ba |
---|---|
1 <tool id="naive_variant_caller" name="Naive Variant Caller" version="0.0.1"> | |
2 <description> - tabulate variable sites from BAM datasets</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.7.1">numpy</requirement> | |
5 <requirement type="package" version="0.0.1">pyBamParser</requirement> | |
6 <requirement type="package" version="0.0.1">pyBamTools</requirement> | |
7 </requirements> | |
8 <stdio> | |
9 <exit_code range="1:" err_level="fatal" /> | |
10 <exit_code range=":-1" err_level="fatal" /> | |
11 </stdio> | |
12 <command interpreter="python">naive_variant_caller.py | |
13 -o "${output_vcf}" | |
14 | |
15 #for $input_bam in $reference_source.input_bams: | |
16 -b "${input_bam.input_bam}" | |
17 -i "${input_bam.input_bam.metadata.bam_index}" | |
18 #end for | |
19 | |
20 #if $reference_source.reference_source_selector != "history": | |
21 -r "${reference_source.ref_file.fields.path}" | |
22 #elif $reference_source.ref_file: | |
23 -r "${reference_source.ref_file}" | |
24 #end if | |
25 | |
26 #for $region in $regions: | |
27 --region "${region.chromosome}:${region.start}-${region.end}" | |
28 #end for | |
29 | |
30 ${variants_only} | |
31 | |
32 ${use_strand} | |
33 | |
34 --ploidy "${$ploidy}" | |
35 | |
36 --min_support_depth "${min_support_depth}" | |
37 | |
38 #if str($min_base_quality): | |
39 --min_base_quality "${min_base_quality}" | |
40 #end if | |
41 | |
42 #if str($min_mapping_quality): | |
43 --min_mapping_quality "${min_mapping_quality}" | |
44 #end if | |
45 | |
46 --coverage_dtype "${coverage_dtype}" | |
47 | |
48 --allow_out_of_bounds_positions | |
49 | |
50 </command> | |
51 <inputs> | |
52 <conditional name="reference_source"> | |
53 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> | |
54 <option value="cached">Locally cached</option> | |
55 <option value="history">History</option> | |
56 </param> | |
57 <when value="cached"> | |
58 <repeat name="input_bams" title="BAM file" min="1" > | |
59 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
60 <validator type="unspecified_build" /> | |
61 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> | |
62 </param> | |
63 </repeat> | |
64 <param name="ref_file" type="select" label="Using reference genome" > | |
65 <options from_data_table="sam_fa_indexes"> | |
66 <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...--> | |
67 </options> | |
68 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
69 </param> | |
70 </when> | |
71 <when value="history"> <!-- FIX ME!!!! --> | |
72 <repeat name="input_bams" title="BAM file" min="1" > | |
73 <param name="input_bam" type="data" format="bam" label="BAM file" > | |
74 </param> | |
75 </repeat> | |
76 <param name="ref_file" type="data" format="fasta" label="Using reference file" optional="True" /> | |
77 </when> | |
78 </conditional> | |
79 | |
80 <repeat name="regions" title="Restrict to regions" min="0" > | |
81 <param name="chromosome" type="text" value="" optional="False" label="Chromosome" /> | |
82 <param name="start" type="integer" value="" optional="True" label="Start" /> | |
83 <param name="end" type="integer" value="" optional="True" label="End" /> | |
84 </repeat> | |
85 | |
86 <!-- TODO: enhance filtering --> | |
87 <param name="min_support_depth" type="integer" value="0" min="0" label="Minimum number of reads needed to consider a REF/ALT" /> | |
88 <param name="min_base_quality" type="integer" value="" label="Minimum base quality" optional="True" /> | |
89 <param name="min_mapping_quality" type="integer" value="" label="Minimum mapping quality" optional="True" /> | |
90 | |
91 <param name="ploidy" type="integer" value="2" min="1" label="Ploidy" /> | |
92 <param name="variants_only" type="boolean" truevalue="--variants_only" falsevalue="" checked="False" label="Only write out positions with with possible alternate alleles"/> | |
93 | |
94 <param name="use_strand" type="boolean" truevalue="--use_strand" falsevalue="" checked="False" label="Report counts by strand"/> | |
95 | |
96 <param name="coverage_dtype" type="select" label="Choose the dtype to use for storing coverage information" help="This affects the maximum recorded value for a position, e.g. uint8 would be 255 coverage, but will require the least amount of RAM"> | |
97 <option value="uint8">uint8</option> | |
98 <option value="uint16" selected="True">uint16</option> | |
99 <option value="uint32">uint32</option> | |
100 <option value="uint64">uint64</option> | |
101 </param> | |
102 | |
103 </inputs> | |
104 <outputs> | |
105 <data format="vcf" name="output_vcf" /> | |
106 </outputs> | |
107 <help> | |
108 **What it does** | |
109 | |
110 This tool is a naive variant caller that processes aligned sequencing reads from the BAM format and produces a VCF file containing per position variant calls. This tool allows multiple BAM files to be provided as input and utilizes read group information to make calls for individual samples. | |
111 | |
112 User configurable options allow filtering reads that do not pass mapping or base quality thresholds and minimum per base read depth; user's can also specify the ploidy and whether to consider each strand separately. | |
113 | |
114 In addition to calling alternate alleles based upon simple ratios of nucleotides at a position, per base nucleotide counts are also provided. A custom tag, NC, is used within the Genotype fields. The NC field is a comma-separated listing of nucleotide counts in the form of <nucleotide>=<count>, where a plus or minus character is prepended to indicate strand, if the strandedness option was specified. | |
115 | |
116 | |
117 ------ | |
118 | |
119 **Inputs** | |
120 | |
121 Accepts one or more BAM input files and a reference genome from the built-in list or from a FASTA file in your history. | |
122 | |
123 | |
124 **Outputs** | |
125 | |
126 The output is in VCF format. | |
127 | |
128 Example VCF output line, without reporting by strand: | |
129 ``chrM 16029 . T G,A,C . . AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095 GT:AC:AF:NC 0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:A=9,C=5,T=9629,G=15,`` | |
130 | |
131 Example VCF output line, when reporting by strand: | |
132 ``chrM 16029 . T G,A,C . . AC=15,9,5;AF=0.00155311658729,0.000931869952371,0.000517705529095 GT:AC:AF:NC 0/0:15,9,5:0.00155311658729,0.000931869952371,0.000517705529095:+T=3972,-A=9,-C=5,-T=5657,-G=15,`` | |
133 | |
134 **Options** | |
135 | |
136 Reference Genome: | |
137 | |
138 Ensure that you have selected the correct reference genome, either from the list of built-in genomes or by selecting the corresponding FASTA file from your history. | |
139 | |
140 Restrict to regions: | |
141 | |
142 You can specify any number of regions on which you would like to receive results. You can specify just a chromosome name, or a chromosome name and start postion, or a chromosome name and start and end position for the set of desired regions. | |
143 | |
144 Minimum number of reads needed to consider a REF/ALT: | |
145 | |
146 This value declares the minimum number of reads containing a particular base at each position in order to list and use said allele in genotyping calls. Default is 0. | |
147 | |
148 Minimum base quality: | |
149 | |
150 The minimum base quality score needed for the position in a read to be used for nucleotide counts and genotyping. Default is no filter. | |
151 | |
152 Minimum mapping quality: | |
153 | |
154 The minimum mapping quality score needed to consider a read for nucleotide counts and genotyping. Default is no filter. | |
155 | |
156 Ploidy: | |
157 | |
158 The number of genotype calls to make at each reported position. | |
159 | |
160 Only write out positions with with possible alternate alleles: | |
161 | |
162 When set, only positions which have at least one non-reference nucleotide which passes declare filters will be present in the output. | |
163 | |
164 Report counts by strand: | |
165 | |
166 When set, nucleotide counts (NC) will be reported in reference to the aligned read's source strand. Reported as: <strand><BASE>=<COUNT>. | |
167 | |
168 Choose the dtype to use for storing coverage information: | |
169 | |
170 This controls the maximum depth value for each nucleotide/position/strand (when specified). Smaller values require the least amount of memory, but have smaller maximal limits. | |
171 | |
172 +--------+----------------------------+ | |
173 | name | maximum coverage value | | |
174 +========+============================+ | |
175 | uint8 | 255 | | |
176 +--------+----------------------------+ | |
177 | uint16 | 65,535 | | |
178 +--------+----------------------------+ | |
179 | uint32 | 4,294,967,295 | | |
180 +--------+----------------------------+ | |
181 | uint64 | 18,446,744,073,709,551,615 | | |
182 +--------+----------------------------+ | |
183 | |
184 ------ | |
185 | |
186 **Citation** | |
187 | |
188 If you use this tool, please cite Blankenberg D, et al. *In preparation.* | |
189 | |
190 </help> | |
191 <tests> | |
192 <test> | |
193 <param name="reference_source_selector" value="history" /> | |
194 <param name="input_bam" value="fake_phiX174_reads_1.bam" ftype="bam" /> | |
195 <param name="ref_file" value="phiX174.fasta" ftype="fasta" /> | |
196 <param name="regions" value="0" /> | |
197 <param name="min_support_depth" value="0" /> | |
198 <param name="min_base_quality" value="" /> | |
199 <param name="min_mapping_quality" value="" /> | |
200 <param name="ploidy" value="2" /> | |
201 <param name="variants_only" value="False" /> | |
202 <param name="use_strand" value="False" /> | |
203 <param name="coverage_dtype" value="uint8" /> | |
204 <output name="output_vcf" file="fake_phiX174_reads_1_test_out_1.vcf" compare="contains" /> | |
205 </test> | |
206 </tests> | |
207 | |
208 </tool> |