comparison lumpy_smoove.xml @ 0:ee8fc44b1655 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 515176ccca845de0b1a0c08417238bfa9ea45360"
author artbio
date Tue, 25 Aug 2020 11:35:02 -0400
parents
children 49a8a327cc72
comparison
equal deleted inserted replaced
-1:000000000000 0:ee8fc44b1655
1 <tool id="lumpy_smoove" name="lumpy_smoove" version="0.2.0">
2 <description>find structural variants using the smoove workflow</description>
3 <macros>
4 <import>macro_lumpy_smoove.xml</import>
5 </macros>
6 <requirements>
7 <requirement type="package" version="0.2.5">smoove</requirement>
8 </requirements>
9 <stdio>
10 <exit_code range="1:" level="fatal" description="Tool exception" />
11 </stdio>
12 <command detect_errors="exit_code"><![CDATA[
13 @pipefail@
14 @set_fasta_index@
15 ln -s $normal_bam normal.bam &&
16 ln -s $tumor_bam tumor.bam &&
17
18 smoove call -x --name output
19 #if $set_exclusion.choices=="yes":
20 --exclude $bedmask
21 #end if
22 --fasta reference.fa -p 24 normal.bam tumor.bam &&
23 gunzip output-smoove.vcf.gz
24 #if $prpos=="no":
25 && sed -i -E 's/;PRPOS=.+\tGT/\tGT/g' output-smoove.vcf
26 #end if
27
28
29 ]]></command>
30 <inputs>
31 <expand macro="reference_source_conditional" />
32 <param format="bam" name="normal_bam" type="data" label="BAM alignments from the normal sample"/>
33 <param format="bam" name="tumor_bam" type="data" label="BAM alignments from the tumor sample"/>
34 <conditional name="set_exclusion">
35 <param name="choices" type="select" label="exclude regions with a bed file" display="radio">
36 <option value="no" selected="true">No</option>
37 <option value="yes">Yes</option>
38 </param>
39 <when value="yes">
40 <param format="bed" name="bedmask" type="data" label="BED regions to be excluded for the analysis"/>
41 </when>
42 <when value="no">
43 </when>
44 </conditional>
45 <param name="prpos" type="select" label="include the PRPOS probabilities in INFO tags" display="radio">
46 <option value="no" selected="true">No</option>
47 <option value="yes">Yes</option>
48 </param>
49 </inputs>
50
51 <outputs>
52 <data format="vcf" name="vcf_call" label="lumpy-smoove Variant Calling" from_work_dir="./output-smoove.vcf" />
53 </outputs>
54
55 <tests>
56 <test>
57 <param name="reference_source_selector" value="history" />
58 <param name="ref_file" value="chrI-ce11.fa"/>
59 <param name="normal_bam" value="celegans_1.bam"/>
60 <param name="tumor_bam" value="celegans_2.bam"/>
61 <param name="choices" value="yes"/>
62 <param name="bedmask" value="exclude.bed"/>
63 <param name="prpos" value="no"/>
64 <output name="vcf_call" ftype="vcf" file="result-1.vcf" lines_diff="4"/>
65 </test>
66 <test>
67 <param name="reference_source_selector" value="history" />
68 <param name="ref_file" value="chrI-ce11.fa"/>
69 <param name="normal_bam" value="celegans_1.bam"/>
70 <param name="tumor_bam" value="celegans_2.bam"/>
71 <param name="choices" value="no"/>
72 <param name="prpos" value="no"/>
73 <output name="vcf_call" ftype="vcf" file="result-2.vcf" lines_diff="4"/>
74 </test>
75 <test>
76 <param name="reference_source_selector" value="history" />
77 <param name="ref_file" value="chrI-ce11.fa"/>
78 <param name="normal_bam" value="celegans_2.bam"/>
79 <param name="tumor_bam" value="celegans_1.bam"/>
80 <param name="choices" value="no"/>
81 <param name="prpos" value="no"/>
82 <output name="vcf_call" ftype="vcf" file="result-3.vcf" lines_diff="4"/>
83 </test>
84 <test>
85 <param name="reference_source_selector" value="history" />
86 <param name="ref_file" value="chrI-ce11.fa"/>
87 <param name="normal_bam" value="celegans_1.bam"/>
88 <param name="tumor_bam" value="celegans_2.bam"/>
89 <param name="choices" value="no"/>
90 <param name="prpos" value="yes"/>
91 <output name="vcf_call" ftype="vcf" file="result-4.vcf" lines_diff="4"/>
92 </test>
93
94 </tests>
95
96 <help>
97
98 **smoove** simplifies and speeds calling and genotyping SVs for short reads. It also improves
99 specificity by removing many spurious alignment signals that are indicative of low-level
100 noise and often contribute to spurious calls.
101
102 There is a blog-post describing smoove in more detail
103 here: https://brentp.github.io/post/smoove/
104
105 Currently, this Galaxy tool only wraps smoove for 2 samples (bam normal and tumor inputs),
106 which translates in the command line::
107
108 <![CDATA[smoove call -x --name my-cohort --exclude $bed --fasta $fasta -p $threads /path/to/*.bam]]>
109
110 Note that the --genotype option which allows to stream smoove to svtyper is not implemented
111 due to an error returned by svtyper in the smoove conda environment
112
113 the --exclude $bed is highly recommended as it can be used to ignore reads that overlap
114 problematic regions.
115
116 A good set of regions for GRCh37 is https://github.com/hall-lab/speedseq/blob/master/annotations/ceph18.b37.lumpy.exclude.2014-01-15.bed
117
118 And for hg38 https://github.com/hall-lab/speedseq/blob/master/annotations/exclude.cnvnator_100bp.GRCh38.20170403.bed
119
120 smoove will::
121
122 1. parallelize calls to lumpy_filter to extract split and discordant reads required by lumpy
123
124 2. further filter lumpy_filter calls to remove high-coverage, spurious regions and user-specified chroms like 'hs37d5';
125 it will also remove reads that we've found are likely spurious signals. after this, it will
126 remove singleton reads (where the mate was removed by one of the previous filters)
127 from the discordant bams. This makes lumpy much faster and less memory-hungry.
128
129 3. calculate per-sample metrics for mean, standard deviation, and distribution of insert
130 size as required by lumpy.
131
132 4. stream output of lumpy directly into multiple svtyper processes for parallel-by-region
133 genotyping while lumpy is still running. This option in not currently implemented in Galaxy
134
135 5. sort, compress, and index final VCF.
136
137 **Input(s)**
138
139
140 *BAM files*: One Bam for normal sample and one Bam for tumor sample.
141 Only BAM alignments produced by BWA-mem have been tested with this tool
142
143 *A bed file* describing the regions to exclude from the analysis
144
145
146 *Additional options*: refer to smoove GitHub repository_ and the lumpy publication (doi 10.1186/gb-2014-15-6-r84)
147
148 .. _repository: https://github.com/brentp/smoove
149
150
151 Options::
152
153 <![CDATA[
154
155 smoove calls several programs. Those with 'Y' are found on your $PATH. Only those with '*' are required.
156
157 [Y] bgzip [ sort -> (compress) -> index ]
158 [Y] gsort [(sort) -> compress -> index ]
159 [Y] tabix [ sort -> compress -> (index)]
160 [Y] lumpy
161 [Y] lumpy_filter
162 [Y] samtools
163 [Y] svtyper
164 [Y] mosdepth [extra filtering of split and discordant files for better scaling]
165
166 [Y] duphold [(optional) annotate calls with depth changes]
167 [Y] svtools [only needed for large cohorts].
168
169 Available sub-commands are below. Each can be run with -h for additional help.
170
171 call : call lumpy (and optionally svtyper)
172 merge : merge and sort (using svtools) calls from multiple samples
173 genotype : parallelize svtyper on an input VCF
174 paste : square final calls from multiple samples (each with same number of variants)
175 plot-counts : plot counts of split, discordant reads before, after smoove filtering
176 annotate : annotate a VCF with gene and quality of SV call
177 hipstr : run hipSTR in parallel
178 cnvnator : run cnvnator in parallel
179 duphold : run duphold in parallel (this can be done by adding a flag to call or genotype)
180 ]]>
181 </help>
182
183 <citations>
184 <citation type="doi">10.1186/gb-2014-15-6-r84</citation>
185 </citations>
186 </tool>