comparison lumpy.xml @ 0:796552c157de draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy-sv commit d06124e8a097f3f665b4955281f40fe811eaee64
author artbio
date Mon, 24 Jul 2017 08:03:17 -0400
parents
children 1ed8619a5611
comparison
equal deleted inserted replaced
-1:000000000000 0:796552c157de
1 <tool id="lumpy" name="lumpy-sv" version="1.0.0">
2 <description>find structural variants</description>
3 <requirements>
4 <requirement type="package" version="0.2.13">lumpy-sv</requirement>
5 <requirement type="package" version="1.3.1">samtools</requirement>
6 <requirement type="package" version="1.11.2=py27_0">numpy</requirement>
7 </requirements>
8 <stdio>
9 <exit_code range="1:" level="fatal" description="Tool exception" />
10 </stdio>
11 <command detect_errors="exit_code"><![CDATA[
12 #import re
13 #set one_sample_bam = re.sub('[^\w\-]', '_', str($analysis_type.input_file.element_identifier))
14 #if $analysis_type.analysis_type_list == "one_sample":
15 ln -f -s '$analysis_type.input_file' '$one_sample_bam' &&
16 #else:
17 #set sample_a_bam = re.sub('[^\w\-]', '_', str($analysis_type.input_file.element_identifier))
18 #set sample_b_bam = re.sub('[^\w\-]', '_', str($analysis_type.input_fileB.element_identifier))
19 #if $sample_a_bam == $sample_b_bam:
20 #set sample_a_bam = "%s_a" % str($sample_a_bam)
21 #set sample_b_bam = "%s_b" % str($sample_b_bam)
22 #end if
23 ln -f -s '$analysis_type.input_file' '$sample_a_bam' &&
24 ln -f -s '$analysis_type.input_fileB' '$sample_b_bam' &&
25 #end if
26
27 #if $analysis_type.analysis_type_list == "one_sample":
28
29 #if $seq_method.seq_method_list == "paired-end":
30 samtools view -u -F 1294 '$one_sample_bam' | samtools sort -O bam -o input.discordants.bam &&
31 samtools view -h '$one_sample_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam &&
32 samtools view '$one_sample_bam'
33 |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLength -X 4 -N $seq_method.additional_params.samplingValue -o input.lib.histo > meandev.txt &&
34 mean=\$(cat meandev.txt | sed s/mean:// | sed -r s/stdev:.+//) &&
35 stdev=\$(cat meandev.txt | sed -r s/mean:.+stdev://) &&
36 lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
37 #if $output_format == "BEDPE":
38 -b
39 #end if
40 -pe id:'$one_sample_bam',bam_file:input.discordants.bam,histo_file:input.lib.histo,mean:"\$mean",stdev:"\$stdev",read_length:$analysis_type.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
41 -sr id:'$one_sample_bam',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call'
42 #elif $seq_method.seq_method_list == "single-read":
43 samtools view -h '$one_sample_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam &&
44 lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
45 #if $output_format == "BEDPE":
46 -b
47 #end if
48 -sr id:'$one_sample_bam',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call'
49 #end if
50 #else:
51 #if $seq_method.seq_method_list == "paired-end":
52 samtools view -u -F 1294 '$sample_a_bam' | samtools sort -O bam -o input.discordants.bam &&
53 samtools view -u -F 1294 '$sample_b_bam' | samtools sort -O bam -o input.B.discordants.bam &&
54 samtools view -h '$sample_a_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam &&
55 samtools view -h '$sample_b_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.B.splitters.bam &&
56 samtools view '$sample_a_bam'
57 |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLength -X 4 -N $seq_method.additional_params.samplingValue -o input.lib.histo > meandevA.txt &&
58 samtools view '$sample_b_bam'
59 |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLengthB -X 4 -N $seq_method.additional_params.samplingValue -o input.B.lib.histo > meandevB.txt &&
60 meanA=\$(cat meandevA.txt | sed s/mean:// | sed -r s/stdev:.+//) &&
61 meanB=\$(cat meandevB.txt | sed s/mean:// | sed -r s/stdev:.+//) &&
62 stdevA=\$(cat meandevA.txt | sed -r s/mean:.+stdev://) &&
63 stdevB=\$(cat meandevB.txt | sed -r s/mean:.+stdev://) &&
64 lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
65 #if $output_format == "BEDPE":
66 -b
67 #end if
68 -pe id:inputA.bam,bam_file:input.discordants.bam,histo_file:input.lib.histo,mean:"\$meanA",stdev:"\$stdevA",read_length:$analysis_type.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
69 -pe id:inputB.bam,bam_file:input.B.discordants.bam,histo_file:input.B.lib.histo,mean:"\$meanB",stdev:"\$stdevA",read_length:$analysis_type.readLengthB,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
70 -sr id:inputA.bam,bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
71 -sr id:inputB.bam,bam_file:input.B.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call'
72 #elif $seq_method.seq_method_list == "single-read":
73 samtools view -h '$sample_a_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam &&
74 samtools view -h '$sample_b_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.B.splitters.bam &&
75 lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
76 #if $output_format == "BEDPE":
77 -b
78 #end if
79 -sr id:'$sample_a_bam',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
80 -sr id:'$sample_b_bam',bam_file:input.B.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call'
81 #end if
82 #end if
83
84 ]]></command>
85 <!-- basic error handling -->
86 <inputs>
87 <conditional name="analysis_type">
88 <param help="Single or paired conditions (eg tumor vs normal)" label="Input(s)" name="analysis_type_list" type="select">
89 <option selected="True" value="one_sample">One Sample</option>
90 <option value="two_sample">Two samples</option>
91 </param>
92 <when value="one_sample">
93 <param format="bam" name="input_file" type="data" label="One BAM alignment file produced by BWA-mem"/>
94 <param name="readLength" value="151" type="integer" label="read length" help="e.g. 151 nt" />
95 </when>
96 <when value="two_sample">
97 <param format="bam" name="input_file" type="data" label="One BAM alignment file produced by BWA-mem"/>
98 <param name="readLength" value="151" type="integer" label="read length" help="e.g. 151 nt" />
99 <param format="bam" name="input_fileB" type="data" label="One BAM alignment file produced by BWA-mem"/>
100 <param name="readLengthB" value="151" type="integer" label="read length" help="e.g. 151 nt" />
101 </when>
102 </conditional>
103 <conditional name="seq_method">
104 <param help="Paired-end or single-read sequencing" label="Sequencing method" name="seq_method_list" type="select">
105 <option selected="True" value="paired-end">Paired-end sequencing</option>
106 <option value="single-read">Single-read sequencing</option>
107 </param>
108 <when value="paired-end">
109 <section name="additional_params" title="Additional Options" expanded="False">
110 <param name="samplingValue" value="100000" type="integer" label="number of reads to compute mean and stdev of read length" help="e.g. 10000" />
111 <param name="mw" value="4" type="integer" label="-mw" help="minimum weight across all samples for a call (default: 4)" />
112 <param name="tt" value="0" type="integer" label="-tt" help="trim threshold (default: 0)" />
113 <param name="min_non_overlap" value="101" type="integer" label="min_non_overlap" help="e.g. 101" />
114 <param name="discordant_z" value="5" type="integer" label="discordant_z" help="e.g. 5" />
115 <param name="back_distance" value="10" type="integer" label="back_distance" help="e.g. 10" />
116 <param name="weight" value="1" type="integer" label="weight" help="e.g. 1" />
117 <param name="min_mapping_threshold" value="20" type="integer" label="min_mapping_threshold" help="e.g. 20" />
118 <param name="probability_curve" argument="-P" type="boolean" truevalue="-P" falsevalue="" checked="true" label="output probability curve for each variant"/>
119 <param name="evidence" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="true" label="show evidence for each call"/>
120 </section>
121 </when>
122 <when value="single-read">
123 <section name="additional_params" title="Additional Options" expanded="False">
124 <param name="mw" value="4" type="integer" label="-mw" help="minimum weight across all samples for a call (default: 4)" />
125 <param name="tt" value="0" type="integer" label="-tt" help="trim threshold (default: 0)" />
126 <param name="back_distance" value="10" type="integer" label="back_distance" help="e.g. 10" />
127 <param name="weight" value="1" type="integer" label="weight" help="e.g. 1" />
128 <param name="min_mapping_threshold" value="20" type="integer" label="min_mapping_threshold" help="e.g. 20" />
129 <param name="probability_curve" argument="-P" type="boolean" truevalue="-P" falsevalue="" checked="false" label="output probability curve for each variant"/>
130 <param name="evidence" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="false" label="show evidence for each call"/>
131 </section>
132 </when>
133
134 </conditional>
135 <param help="get variant calling in vcf or BEDPE format" label="variant calling format" name="output_format" type="select">
136 <option selected="True" value="vcf">vcf</option>
137 <option value="BEDPE">BEDPE</option>
138 </param>
139 </inputs>
140
141 <outputs>
142 <data format="tabular" name="histogram" label="Lumpy on ${on_string}: Fragment size distribution" from_work_dir="input.lib.histo">
143 <filter>seq_method['seq_method_list'] == "paired-end"</filter>
144 </data>
145 <data format="tabular" name="histogramB" label="Lumpy on ${on_string}: Fragment size distribution" from_work_dir="input.B.lib.histo">
146 <filter>seq_method['seq_method_list'] == "paired-end"</filter>
147 <filter>analysis_type['analysis_type_list'] == "two_sample"</filter>
148 </data>
149 <data format="bam" name="splits" label="Lumpy on ${on_string}: Split Reads (Bam format)" from_work_dir="input.splitters.bam"/>
150 <data format="bam" name="splitsB" label="Lumpy on ${on_string}: Split Reads (Bam format)" from_work_dir="input.B.splitters.bam">
151 <filter>analysis_type['analysis_type_list'] == "two_sample"</filter>
152 </data>
153 <data format="bam" name="discordants" label="Lumpy on ${on_string}: Discordant Pairs (Bam format)" from_work_dir="input.discordants.bam">
154 <filter>seq_method['seq_method_list'] == "paired-end"</filter>
155 </data>
156 <data format="bam" name="discordantsB" label="Lumpy on ${on_string}: Discordant Pairs (Bam format)" from_work_dir="input.discordants.B.bam">
157 <filter>seq_method['seq_method_list'] == "paired-end"</filter>
158 <filter>analysis_type['analysis_type_list'] == "two_sample"</filter>
159 </data>
160 <data format="vcf" name="vcf_call" label="Lumpy Variant Calling">
161 <change_format>
162 <when format="tabular" input="output_format" value="BEDPE" />
163 </change_format>
164 </data>
165 </outputs>
166
167 <tests>
168 <test>
169 <param name="analysis_type_list" value="one_sample" />
170 <param name="input_file" value="sr.input.bam" ftype="bam"/>
171 <param name="seq_method_list" value="single-read" />
172 <param name="mw" value="4"/>
173 <param name="tt" value="0"/>
174 <param name="back_distance" value="10"/>
175 <param name="weight" value="1" />
176 <param name="min_mapping_threshold" value="20" />
177 <output name="vcf_call" file="output.vcf" ftype="vcf"/>
178 </test>
179 <test>
180 <param name="analysis_type_list" value="one_sample" />
181 <param name="input_file" value="sr.input.bam" ftype="bam"/>
182 <param name="seq_method_list" value="single-read" />
183 <param name="mw" value="4"/>
184 <param name="tt" value="0"/>
185 <param name="back_distance" value="10"/>
186 <param name="weight" value="1" />
187 <param name="min_mapping_threshold" value="20" />
188 <param name="evidence" value="true" />
189 <param name="probability_curve" value="true" />
190 <output name="vcf_call" file="output_extended.vcf" ftype="vcf" compare="sim_size"/>
191 </test>
192 <test>
193 <param name="analysis_type_list" value="two_sample" />
194 <param name="input_file" value="sr.input.bam" ftype="bam"/>
195 <param name="input_fileB" value="sr.input.bam" ftype="bam"/>
196 <param name="seq_method_list" value="single-read" />
197 <param name="mw" value="4"/>
198 <param name="tt" value="0"/>
199 <param name="back_distance" value="10"/>
200 <param name="weight" value="1" />
201 <param name="min_mapping_threshold" value="20" />
202 <output name="vcf_call" file="output_two.vcf" ftype="vcf"/>
203 </test>
204 </tests>
205
206 <help>
207
208 **Input(s)**
209
210 *One sample* : lumpy search structural variations inside a single sequencing dataset
211
212 *Two samples*: lumpy search structural variations inside and across two sequencing datasets from two samples
213
214 Analysis of sample replicates is not implemented yet in this wrapper
215
216 *BAM files*: Only BAM alignments produced by BWA-mem have been tested with this tool
217
218 **Sequencing method**
219
220 *Paired-end sequencing*: Both ends of library fragments have been sequenced, resulting in two paired sequencing datasets
221
222 *Single-read sequencing*: Only one end of library fragment has been sequenced, resulting in a single sequencing dataset. Under these conditions, evidences of structural variation are obtained only from splited read alignments
223
224 *Read length*: The length of the sequencing reads in the library. This information is required only for paired-end sequencing data
225
226 *Additional options*: refer to lumpy-sv_ documentation and the publication (doi 10.1186/gb-2014-15-6-r84)
227
228 **lumpy-sv manual**
229
230 Read the lumpy-sv_ documentation for details on using lumpy.
231
232 .. _lumpy-sv: https://github.com/arq5x/lumpy-sv
233
234 **lumpy options**
235
236 v 0.2.13
237 Author: Ryan Layer (rl6sf@virginia.edu)
238
239 Summary: Find structural variations in various signals.
240
241 Options::
242 <![CDATA[
243
244 -g Genome file (defines chromosome order)
245 -e Show evidence for each call
246 -w File read windows size (default 1000000)
247 -mw minimum weight for a call
248 -msw minimum per-sample weight for a call
249 -tt trim threshold
250 -x exclude file bed file
251 -t temp file prefix, must be to a writeable directory
252 -P output probability curve for each variant
253 -b output BEDPE instead of VCF
254 -sr bam_file:<file name>,
255 id:<sample name>,
256 back_distance:<distance>,
257 min_mapping_threshold:<mapping quality>,
258 weight:<sample weight>,
259 min_clip:<minimum clip length>,
260 read_group:<string>
261
262 -pe bam_file:<file name>,
263 id:<sample name>,
264 histo_file:<file name>,
265 mean:<value>,
266 stdev:<value>,
267 read_length:<length>,
268 min_non_overlap:<length>,
269 discordant_z:<z value>,
270 back_distance:<distance>,
271 min_mapping_threshold:<mapping quality>,
272 weight:<sample weight>,
273 read_group:<string>
274
275 -bedpe bedpe_file:<bedpe file>,
276 id:<sample name>,
277 weight:<sample weight>
278 ]]>
279 </help>
280
281 <citations>
282 <citation type="doi">10.1186/gb-2014-15-6-r84</citation>
283 </citations>
284 </tool>