Mercurial > repos > artbio > lumpy_sv
comparison lumpy.xml @ 0:796552c157de draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy-sv commit d06124e8a097f3f665b4955281f40fe811eaee64
author | artbio |
---|---|
date | Mon, 24 Jul 2017 08:03:17 -0400 |
parents | |
children | 1ed8619a5611 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:796552c157de |
---|---|
1 <tool id="lumpy" name="lumpy-sv" version="1.0.0"> | |
2 <description>find structural variants</description> | |
3 <requirements> | |
4 <requirement type="package" version="0.2.13">lumpy-sv</requirement> | |
5 <requirement type="package" version="1.3.1">samtools</requirement> | |
6 <requirement type="package" version="1.11.2=py27_0">numpy</requirement> | |
7 </requirements> | |
8 <stdio> | |
9 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
10 </stdio> | |
11 <command detect_errors="exit_code"><![CDATA[ | |
12 #import re | |
13 #set one_sample_bam = re.sub('[^\w\-]', '_', str($analysis_type.input_file.element_identifier)) | |
14 #if $analysis_type.analysis_type_list == "one_sample": | |
15 ln -f -s '$analysis_type.input_file' '$one_sample_bam' && | |
16 #else: | |
17 #set sample_a_bam = re.sub('[^\w\-]', '_', str($analysis_type.input_file.element_identifier)) | |
18 #set sample_b_bam = re.sub('[^\w\-]', '_', str($analysis_type.input_fileB.element_identifier)) | |
19 #if $sample_a_bam == $sample_b_bam: | |
20 #set sample_a_bam = "%s_a" % str($sample_a_bam) | |
21 #set sample_b_bam = "%s_b" % str($sample_b_bam) | |
22 #end if | |
23 ln -f -s '$analysis_type.input_file' '$sample_a_bam' && | |
24 ln -f -s '$analysis_type.input_fileB' '$sample_b_bam' && | |
25 #end if | |
26 | |
27 #if $analysis_type.analysis_type_list == "one_sample": | |
28 | |
29 #if $seq_method.seq_method_list == "paired-end": | |
30 samtools view -u -F 1294 '$one_sample_bam' | samtools sort -O bam -o input.discordants.bam && | |
31 samtools view -h '$one_sample_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam && | |
32 samtools view '$one_sample_bam' | |
33 |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLength -X 4 -N $seq_method.additional_params.samplingValue -o input.lib.histo > meandev.txt && | |
34 mean=\$(cat meandev.txt | sed s/mean:// | sed -r s/stdev:.+//) && | |
35 stdev=\$(cat meandev.txt | sed -r s/mean:.+stdev://) && | |
36 lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt | |
37 #if $output_format == "BEDPE": | |
38 -b | |
39 #end if | |
40 -pe id:'$one_sample_bam',bam_file:input.discordants.bam,histo_file:input.lib.histo,mean:"\$mean",stdev:"\$stdev",read_length:$analysis_type.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold | |
41 -sr id:'$one_sample_bam',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call' | |
42 #elif $seq_method.seq_method_list == "single-read": | |
43 samtools view -h '$one_sample_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam && | |
44 lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt | |
45 #if $output_format == "BEDPE": | |
46 -b | |
47 #end if | |
48 -sr id:'$one_sample_bam',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call' | |
49 #end if | |
50 #else: | |
51 #if $seq_method.seq_method_list == "paired-end": | |
52 samtools view -u -F 1294 '$sample_a_bam' | samtools sort -O bam -o input.discordants.bam && | |
53 samtools view -u -F 1294 '$sample_b_bam' | samtools sort -O bam -o input.B.discordants.bam && | |
54 samtools view -h '$sample_a_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam && | |
55 samtools view -h '$sample_b_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.B.splitters.bam && | |
56 samtools view '$sample_a_bam' | |
57 |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLength -X 4 -N $seq_method.additional_params.samplingValue -o input.lib.histo > meandevA.txt && | |
58 samtools view '$sample_b_bam' | |
59 |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLengthB -X 4 -N $seq_method.additional_params.samplingValue -o input.B.lib.histo > meandevB.txt && | |
60 meanA=\$(cat meandevA.txt | sed s/mean:// | sed -r s/stdev:.+//) && | |
61 meanB=\$(cat meandevB.txt | sed s/mean:// | sed -r s/stdev:.+//) && | |
62 stdevA=\$(cat meandevA.txt | sed -r s/mean:.+stdev://) && | |
63 stdevB=\$(cat meandevB.txt | sed -r s/mean:.+stdev://) && | |
64 lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt | |
65 #if $output_format == "BEDPE": | |
66 -b | |
67 #end if | |
68 -pe id:inputA.bam,bam_file:input.discordants.bam,histo_file:input.lib.histo,mean:"\$meanA",stdev:"\$stdevA",read_length:$analysis_type.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold | |
69 -pe id:inputB.bam,bam_file:input.B.discordants.bam,histo_file:input.B.lib.histo,mean:"\$meanB",stdev:"\$stdevA",read_length:$analysis_type.readLengthB,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold | |
70 -sr id:inputA.bam,bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold | |
71 -sr id:inputB.bam,bam_file:input.B.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call' | |
72 #elif $seq_method.seq_method_list == "single-read": | |
73 samtools view -h '$sample_a_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam && | |
74 samtools view -h '$sample_b_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.B.splitters.bam && | |
75 lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt | |
76 #if $output_format == "BEDPE": | |
77 -b | |
78 #end if | |
79 -sr id:'$sample_a_bam',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold | |
80 -sr id:'$sample_b_bam',bam_file:input.B.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call' | |
81 #end if | |
82 #end if | |
83 | |
84 ]]></command> | |
85 <!-- basic error handling --> | |
86 <inputs> | |
87 <conditional name="analysis_type"> | |
88 <param help="Single or paired conditions (eg tumor vs normal)" label="Input(s)" name="analysis_type_list" type="select"> | |
89 <option selected="True" value="one_sample">One Sample</option> | |
90 <option value="two_sample">Two samples</option> | |
91 </param> | |
92 <when value="one_sample"> | |
93 <param format="bam" name="input_file" type="data" label="One BAM alignment file produced by BWA-mem"/> | |
94 <param name="readLength" value="151" type="integer" label="read length" help="e.g. 151 nt" /> | |
95 </when> | |
96 <when value="two_sample"> | |
97 <param format="bam" name="input_file" type="data" label="One BAM alignment file produced by BWA-mem"/> | |
98 <param name="readLength" value="151" type="integer" label="read length" help="e.g. 151 nt" /> | |
99 <param format="bam" name="input_fileB" type="data" label="One BAM alignment file produced by BWA-mem"/> | |
100 <param name="readLengthB" value="151" type="integer" label="read length" help="e.g. 151 nt" /> | |
101 </when> | |
102 </conditional> | |
103 <conditional name="seq_method"> | |
104 <param help="Paired-end or single-read sequencing" label="Sequencing method" name="seq_method_list" type="select"> | |
105 <option selected="True" value="paired-end">Paired-end sequencing</option> | |
106 <option value="single-read">Single-read sequencing</option> | |
107 </param> | |
108 <when value="paired-end"> | |
109 <section name="additional_params" title="Additional Options" expanded="False"> | |
110 <param name="samplingValue" value="100000" type="integer" label="number of reads to compute mean and stdev of read length" help="e.g. 10000" /> | |
111 <param name="mw" value="4" type="integer" label="-mw" help="minimum weight across all samples for a call (default: 4)" /> | |
112 <param name="tt" value="0" type="integer" label="-tt" help="trim threshold (default: 0)" /> | |
113 <param name="min_non_overlap" value="101" type="integer" label="min_non_overlap" help="e.g. 101" /> | |
114 <param name="discordant_z" value="5" type="integer" label="discordant_z" help="e.g. 5" /> | |
115 <param name="back_distance" value="10" type="integer" label="back_distance" help="e.g. 10" /> | |
116 <param name="weight" value="1" type="integer" label="weight" help="e.g. 1" /> | |
117 <param name="min_mapping_threshold" value="20" type="integer" label="min_mapping_threshold" help="e.g. 20" /> | |
118 <param name="probability_curve" argument="-P" type="boolean" truevalue="-P" falsevalue="" checked="true" label="output probability curve for each variant"/> | |
119 <param name="evidence" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="true" label="show evidence for each call"/> | |
120 </section> | |
121 </when> | |
122 <when value="single-read"> | |
123 <section name="additional_params" title="Additional Options" expanded="False"> | |
124 <param name="mw" value="4" type="integer" label="-mw" help="minimum weight across all samples for a call (default: 4)" /> | |
125 <param name="tt" value="0" type="integer" label="-tt" help="trim threshold (default: 0)" /> | |
126 <param name="back_distance" value="10" type="integer" label="back_distance" help="e.g. 10" /> | |
127 <param name="weight" value="1" type="integer" label="weight" help="e.g. 1" /> | |
128 <param name="min_mapping_threshold" value="20" type="integer" label="min_mapping_threshold" help="e.g. 20" /> | |
129 <param name="probability_curve" argument="-P" type="boolean" truevalue="-P" falsevalue="" checked="false" label="output probability curve for each variant"/> | |
130 <param name="evidence" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="false" label="show evidence for each call"/> | |
131 </section> | |
132 </when> | |
133 | |
134 </conditional> | |
135 <param help="get variant calling in vcf or BEDPE format" label="variant calling format" name="output_format" type="select"> | |
136 <option selected="True" value="vcf">vcf</option> | |
137 <option value="BEDPE">BEDPE</option> | |
138 </param> | |
139 </inputs> | |
140 | |
141 <outputs> | |
142 <data format="tabular" name="histogram" label="Lumpy on ${on_string}: Fragment size distribution" from_work_dir="input.lib.histo"> | |
143 <filter>seq_method['seq_method_list'] == "paired-end"</filter> | |
144 </data> | |
145 <data format="tabular" name="histogramB" label="Lumpy on ${on_string}: Fragment size distribution" from_work_dir="input.B.lib.histo"> | |
146 <filter>seq_method['seq_method_list'] == "paired-end"</filter> | |
147 <filter>analysis_type['analysis_type_list'] == "two_sample"</filter> | |
148 </data> | |
149 <data format="bam" name="splits" label="Lumpy on ${on_string}: Split Reads (Bam format)" from_work_dir="input.splitters.bam"/> | |
150 <data format="bam" name="splitsB" label="Lumpy on ${on_string}: Split Reads (Bam format)" from_work_dir="input.B.splitters.bam"> | |
151 <filter>analysis_type['analysis_type_list'] == "two_sample"</filter> | |
152 </data> | |
153 <data format="bam" name="discordants" label="Lumpy on ${on_string}: Discordant Pairs (Bam format)" from_work_dir="input.discordants.bam"> | |
154 <filter>seq_method['seq_method_list'] == "paired-end"</filter> | |
155 </data> | |
156 <data format="bam" name="discordantsB" label="Lumpy on ${on_string}: Discordant Pairs (Bam format)" from_work_dir="input.discordants.B.bam"> | |
157 <filter>seq_method['seq_method_list'] == "paired-end"</filter> | |
158 <filter>analysis_type['analysis_type_list'] == "two_sample"</filter> | |
159 </data> | |
160 <data format="vcf" name="vcf_call" label="Lumpy Variant Calling"> | |
161 <change_format> | |
162 <when format="tabular" input="output_format" value="BEDPE" /> | |
163 </change_format> | |
164 </data> | |
165 </outputs> | |
166 | |
167 <tests> | |
168 <test> | |
169 <param name="analysis_type_list" value="one_sample" /> | |
170 <param name="input_file" value="sr.input.bam" ftype="bam"/> | |
171 <param name="seq_method_list" value="single-read" /> | |
172 <param name="mw" value="4"/> | |
173 <param name="tt" value="0"/> | |
174 <param name="back_distance" value="10"/> | |
175 <param name="weight" value="1" /> | |
176 <param name="min_mapping_threshold" value="20" /> | |
177 <output name="vcf_call" file="output.vcf" ftype="vcf"/> | |
178 </test> | |
179 <test> | |
180 <param name="analysis_type_list" value="one_sample" /> | |
181 <param name="input_file" value="sr.input.bam" ftype="bam"/> | |
182 <param name="seq_method_list" value="single-read" /> | |
183 <param name="mw" value="4"/> | |
184 <param name="tt" value="0"/> | |
185 <param name="back_distance" value="10"/> | |
186 <param name="weight" value="1" /> | |
187 <param name="min_mapping_threshold" value="20" /> | |
188 <param name="evidence" value="true" /> | |
189 <param name="probability_curve" value="true" /> | |
190 <output name="vcf_call" file="output_extended.vcf" ftype="vcf" compare="sim_size"/> | |
191 </test> | |
192 <test> | |
193 <param name="analysis_type_list" value="two_sample" /> | |
194 <param name="input_file" value="sr.input.bam" ftype="bam"/> | |
195 <param name="input_fileB" value="sr.input.bam" ftype="bam"/> | |
196 <param name="seq_method_list" value="single-read" /> | |
197 <param name="mw" value="4"/> | |
198 <param name="tt" value="0"/> | |
199 <param name="back_distance" value="10"/> | |
200 <param name="weight" value="1" /> | |
201 <param name="min_mapping_threshold" value="20" /> | |
202 <output name="vcf_call" file="output_two.vcf" ftype="vcf"/> | |
203 </test> | |
204 </tests> | |
205 | |
206 <help> | |
207 | |
208 **Input(s)** | |
209 | |
210 *One sample* : lumpy search structural variations inside a single sequencing dataset | |
211 | |
212 *Two samples*: lumpy search structural variations inside and across two sequencing datasets from two samples | |
213 | |
214 Analysis of sample replicates is not implemented yet in this wrapper | |
215 | |
216 *BAM files*: Only BAM alignments produced by BWA-mem have been tested with this tool | |
217 | |
218 **Sequencing method** | |
219 | |
220 *Paired-end sequencing*: Both ends of library fragments have been sequenced, resulting in two paired sequencing datasets | |
221 | |
222 *Single-read sequencing*: Only one end of library fragment has been sequenced, resulting in a single sequencing dataset. Under these conditions, evidences of structural variation are obtained only from splited read alignments | |
223 | |
224 *Read length*: The length of the sequencing reads in the library. This information is required only for paired-end sequencing data | |
225 | |
226 *Additional options*: refer to lumpy-sv_ documentation and the publication (doi 10.1186/gb-2014-15-6-r84) | |
227 | |
228 **lumpy-sv manual** | |
229 | |
230 Read the lumpy-sv_ documentation for details on using lumpy. | |
231 | |
232 .. _lumpy-sv: https://github.com/arq5x/lumpy-sv | |
233 | |
234 **lumpy options** | |
235 | |
236 v 0.2.13 | |
237 Author: Ryan Layer (rl6sf@virginia.edu) | |
238 | |
239 Summary: Find structural variations in various signals. | |
240 | |
241 Options:: | |
242 <![CDATA[ | |
243 | |
244 -g Genome file (defines chromosome order) | |
245 -e Show evidence for each call | |
246 -w File read windows size (default 1000000) | |
247 -mw minimum weight for a call | |
248 -msw minimum per-sample weight for a call | |
249 -tt trim threshold | |
250 -x exclude file bed file | |
251 -t temp file prefix, must be to a writeable directory | |
252 -P output probability curve for each variant | |
253 -b output BEDPE instead of VCF | |
254 -sr bam_file:<file name>, | |
255 id:<sample name>, | |
256 back_distance:<distance>, | |
257 min_mapping_threshold:<mapping quality>, | |
258 weight:<sample weight>, | |
259 min_clip:<minimum clip length>, | |
260 read_group:<string> | |
261 | |
262 -pe bam_file:<file name>, | |
263 id:<sample name>, | |
264 histo_file:<file name>, | |
265 mean:<value>, | |
266 stdev:<value>, | |
267 read_length:<length>, | |
268 min_non_overlap:<length>, | |
269 discordant_z:<z value>, | |
270 back_distance:<distance>, | |
271 min_mapping_threshold:<mapping quality>, | |
272 weight:<sample weight>, | |
273 read_group:<string> | |
274 | |
275 -bedpe bedpe_file:<bedpe file>, | |
276 id:<sample name>, | |
277 weight:<sample weight> | |
278 ]]> | |
279 </help> | |
280 | |
281 <citations> | |
282 <citation type="doi">10.1186/gb-2014-15-6-r84</citation> | |
283 </citations> | |
284 </tool> |