Mercurial > repos > artbio > lumpy_smoove
comparison lumpy_smoove.xml @ 3:65b400409455 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit c52939d44f8e8287ad4068949daadf616879f008"
author | artbio |
---|---|
date | Wed, 26 Aug 2020 12:24:07 -0400 |
parents | 49a8a327cc72 |
children | 49da975ba395 |
comparison
equal
deleted
inserted
replaced
2:49a8a327cc72 | 3:65b400409455 |
---|---|
1 <tool id="lumpy_smoove" name="lumpy_smoove" version="0.4.0"> | 1 <tool id="lumpy_smoove" name="lumpy_smoove" version="0.5.0"> |
2 <description>find structural variants using the smoove workflow</description> | 2 <description>find structural variants using the smoove workflow</description> |
3 <macros> | 3 <macros> |
4 <import>macro_lumpy_smoove.xml</import> | 4 <import>macro_lumpy_smoove.xml</import> |
5 </macros> | 5 </macros> |
6 <requirements> | 6 <requirements> |
12 <command detect_errors="exit_code"><![CDATA[ | 12 <command detect_errors="exit_code"><![CDATA[ |
13 @pipefail@ | 13 @pipefail@ |
14 @set_fasta_index@ | 14 @set_fasta_index@ |
15 ln -s $normal_bam normal.bam && | 15 ln -s $normal_bam normal.bam && |
16 ln -s $tumor_bam tumor.bam && | 16 ln -s $tumor_bam tumor.bam && |
17 | 17 samtools index -@ \${GALAXY_SLOTS:-4} normal.bam && |
18 smoove call -x --name output | 18 samtools index -@ \${GALAXY_SLOTS:-4} tumor.bam && |
19 | |
20 smoove call --name output | |
19 #if $set_exclusion.choices=="yes": | 21 #if $set_exclusion.choices=="yes": |
20 --exclude $bedmask | 22 --exclude $bedmask |
21 #end if | 23 #end if |
22 --fasta reference.fa -p \${GALAXY_SLOTS:-4} normal.bam tumor.bam && | 24 --fasta reference.fa |
23 gunzip output-smoove.vcf.gz | 25 --processes \${GALAXY_SLOTS:-4} |
24 #if $prpos=="no": | 26 --genotype |
25 && sed -i -E 's/;PRPOS=.+\tGT/\tGT/g' output-smoove.vcf | 27 #if $prpos=="no": |
26 #end if | 28 --removepr |
27 | 29 #end if |
30 normal.bam tumor.bam && | |
31 ls -latr && | |
32 gunzip output-smoove.genotyped.vcf.gz | |
28 | 33 |
29 ]]></command> | 34 ]]></command> |
30 <inputs> | 35 <inputs> |
31 <expand macro="reference_source_conditional" /> | 36 <expand macro="reference_source_conditional" /> |
32 <param format="bam" name="normal_bam" type="data" label="BAM alignments from the normal sample"/> | 37 <param format="bam" name="normal_bam" type="data" label="BAM alignments from the normal sample"/> |
47 <option value="yes">Yes</option> | 52 <option value="yes">Yes</option> |
48 </param> | 53 </param> |
49 </inputs> | 54 </inputs> |
50 | 55 |
51 <outputs> | 56 <outputs> |
52 <data format="vcf" name="vcf_call" label="lumpy-smoove Variant Calling" from_work_dir="./output-smoove.vcf" /> | 57 <data format="vcf" name="vcf_call" label="lumpy-smoove Variant Calling" from_work_dir="./output-smoove.genotyped.vcf" /> |
53 </outputs> | 58 </outputs> |
54 | 59 |
55 <tests> | 60 <tests> |
56 <test> | 61 <test> |
57 <param name="reference_source_selector" value="history" /> | 62 <param name="reference_source_selector" value="history" /> |
58 <param name="ref_file" value="chrI-ce11.fa"/> | 63 <param name="ref_file" value="chrI-ce11.fa"/> |
59 <param name="normal_bam" value="celegans_1.bam"/> | 64 <param name="normal_bam" value="celegans_RG_1.bam"/> |
60 <param name="tumor_bam" value="celegans_2.bam"/> | 65 <param name="tumor_bam" value="celegans_RG_2.bam"/> |
61 <param name="choices" value="yes"/> | 66 <param name="choices" value="yes"/> |
62 <param name="bedmask" value="exclude.bed"/> | 67 <param name="bedmask" value="exclude.bed"/> |
63 <param name="prpos" value="no"/> | 68 <param name="prpos" value="no"/> |
64 <output name="vcf_call" ftype="vcf" file="result-1.vcf" lines_diff="4"/> | 69 <output name="vcf_call" ftype="vcf" file="result-1.vcf" lines_diff="6"/> |
65 </test> | 70 </test> |
66 <test> | 71 <test> |
67 <param name="reference_source_selector" value="history" /> | 72 <param name="reference_source_selector" value="history" /> |
68 <param name="ref_file" value="chrI-ce11.fa"/> | 73 <param name="ref_file" value="chrI-ce11.fa"/> |
69 <param name="normal_bam" value="celegans_1.bam"/> | 74 <param name="normal_bam" value="celegans_RG_1.bam"/> |
70 <param name="tumor_bam" value="celegans_2.bam"/> | 75 <param name="tumor_bam" value="celegans_RG_2.bam"/> |
71 <param name="choices" value="no"/> | 76 <param name="choices" value="no"/> |
72 <param name="prpos" value="no"/> | 77 <param name="prpos" value="no"/> |
73 <output name="vcf_call" ftype="vcf" file="result-2.vcf" lines_diff="4"/> | 78 <output name="vcf_call" ftype="vcf" file="result-2.vcf" lines_diff="6"/> |
74 </test> | 79 </test> |
75 <test> | 80 <test> |
76 <param name="reference_source_selector" value="history" /> | 81 <param name="reference_source_selector" value="history" /> |
77 <param name="ref_file" value="chrI-ce11.fa"/> | 82 <param name="ref_file" value="chrI-ce11.fa"/> |
78 <param name="normal_bam" value="celegans_2.bam"/> | 83 <param name="normal_bam" value="celegans_RG_2.bam"/> |
79 <param name="tumor_bam" value="celegans_1.bam"/> | 84 <param name="tumor_bam" value="celegans_RG_1.bam"/> |
80 <param name="choices" value="no"/> | 85 <param name="choices" value="no"/> |
81 <param name="prpos" value="no"/> | 86 <param name="prpos" value="no"/> |
82 <output name="vcf_call" ftype="vcf" file="result-3.vcf" lines_diff="4"/> | 87 <output name="vcf_call" ftype="vcf" file="result-3.vcf" lines_diff="6"/> |
83 </test> | 88 </test> |
84 <test> | 89 <test> |
85 <param name="reference_source_selector" value="history" /> | 90 <param name="reference_source_selector" value="history" /> |
86 <param name="ref_file" value="chrI-ce11.fa"/> | 91 <param name="ref_file" value="chrI-ce11.fa"/> |
87 <param name="normal_bam" value="celegans_1.bam"/> | 92 <param name="normal_bam" value="celegans_RG_1.bam"/> |
88 <param name="tumor_bam" value="celegans_2.bam"/> | 93 <param name="tumor_bam" value="celegans_RG_2.bam"/> |
89 <param name="choices" value="no"/> | 94 <param name="choices" value="no"/> |
90 <param name="prpos" value="yes"/> | 95 <param name="prpos" value="yes"/> |
91 <output name="vcf_call" ftype="vcf" file="result-4.vcf" lines_diff="4"/> | 96 <output name="vcf_call" ftype="vcf" file="result-4.vcf" lines_diff="6"/> |
92 </test> | 97 </test> |
93 | 98 |
94 </tests> | 99 </tests> |
95 | 100 |
96 <help> | 101 <help> |
97 | |
98 **smoove** simplifies and speeds calling and genotyping SVs for short reads. It also improves | 102 **smoove** simplifies and speeds calling and genotyping SVs for short reads. It also improves |
99 specificity by removing many spurious alignment signals that are indicative of low-level | 103 specificity by removing many spurious alignment signals that are indicative of low-level |
100 noise and often contribute to spurious calls. | 104 noise and often contribute to spurious calls. |
101 | 105 |
102 There is a blog-post describing smoove in more detail | 106 There is a blog-post describing smoove in more detail |
103 here: https://brentp.github.io/post/smoove/ | 107 here: https://brentp.github.io/post/smoove/ |
104 | 108 |
105 Currently, this Galaxy tool only wraps smoove for 2 samples (bam normal and tumor inputs), | 109 Currently, this Galaxy tool only wraps smoove for 2 samples (bam normal and tumor inputs), |
106 which translates in the command line:: | 110 which translates in the command line:: |
107 | 111 |
108 <![CDATA[smoove call -x --name my-cohort --exclude $bed --fasta $fasta -p $threads /path/to/*.bam]]> | 112 <![CDATA[smoove call --name my-cohort --exclude $bed --fasta $fasta -p $threads --genotype [--removepr] /path/to/*.bam]]> |
109 | 113 |
110 Note that the --genotype option which allows to stream smoove to svtyper is not implemented | |
111 due to an error returned by svtyper in the smoove conda environment | |
112 | 114 |
113 the --exclude $bed is highly recommended as it can be used to ignore reads that overlap | 115 the --exclude $bed is highly recommended as it can be used to ignore reads that overlap |
114 problematic regions. | 116 problematic regions. |
115 | 117 |
116 A good set of regions for GRCh37 is https://github.com/hall-lab/speedseq/blob/master/annotations/ceph18.b37.lumpy.exclude.2014-01-15.bed | 118 A good set of regions for GRCh37 can be found here_ |
117 | 119 |
118 And for hg38 https://github.com/hall-lab/speedseq/blob/master/annotations/exclude.cnvnator_100bp.GRCh38.20170403.bed | 120 .. _here: https://github.com/hall-lab/speedseq/blob/master/annotations/ceph18.b37.lumpy.exclude.2014-01-15.bed |
121 | |
122 | |
123 And a good set for GRCh38 can be found there_ | |
124 | |
125 .. _there: https://github.com/hall-lab/speedseq/blob/master/annotations/exclude.cnvnator_100bp.GRCh38.20170403.bed | |
126 | |
119 | 127 |
120 smoove will:: | 128 smoove will:: |
121 | 129 |
122 1. parallelize calls to lumpy_filter to extract split and discordant reads required by lumpy | 130 1. parallelize calls to lumpy_filter to extract split and discordant reads required by lumpy |
123 | 131 |
128 | 136 |
129 3. calculate per-sample metrics for mean, standard deviation, and distribution of insert | 137 3. calculate per-sample metrics for mean, standard deviation, and distribution of insert |
130 size as required by lumpy. | 138 size as required by lumpy. |
131 | 139 |
132 4. stream output of lumpy directly into multiple svtyper processes for parallel-by-region | 140 4. stream output of lumpy directly into multiple svtyper processes for parallel-by-region |
133 genotyping while lumpy is still running. This option in not currently implemented in Galaxy | 141 genotyping while lumpy is still running. |
134 | 142 |
135 5. sort, compress, and index final VCF. | 143 5. sort, compress, and index final VCF (but this galaxy wrapper is uncompression the gzip_vcf output) |
136 | 144 |
137 **Input(s)** | 145 **Input(s)** |
138 | 146 |
139 | 147 * BAM files: One Bam for normal sample and one Bam for tumor sample. Only BAM alignments produced by BWA-mem have been tested with this tool |
140 *BAM files*: One Bam for normal sample and one Bam for tumor sample. | 148 |
141 Only BAM alignments produced by BWA-mem have been tested with this tool | 149 .. class:: warningmark |
142 | 150 |
143 *A bed file* describing the regions to exclude from the analysis | 151 It is mandatory for proper run of svtyper that **BAM files contain read group information**, |
152 ie the @RG tag is present and filled in each BAM | |
144 | 153 |
145 | 154 |
146 *Additional options*: refer to smoove GitHub repository_ and the lumpy publication (doi 10.1186/gb-2014-15-6-r84) | 155 * A bed file describing the regions to exclude from the analysis |
156 * Additional options*: refer to smoove GitHub repository_ and the lumpy publication (doi 10.1186/gb-2014-15-6-r84) | |
147 | 157 |
148 .. _repository: https://github.com/brentp/smoove | 158 .. _repository: https://github.com/brentp/smoove |
149 | 159 |
150 | 160 |
151 Options:: | 161 Options:: |