diff lumpy_smoove.xml @ 0:ee8fc44b1655 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 515176ccca845de0b1a0c08417238bfa9ea45360"
author artbio
date Tue, 25 Aug 2020 11:35:02 -0400
parents
children 49a8a327cc72
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lumpy_smoove.xml	Tue Aug 25 11:35:02 2020 -0400
@@ -0,0 +1,186 @@
+<tool id="lumpy_smoove" name="lumpy_smoove" version="0.2.0">
+    <description>find structural variants using the smoove workflow</description>
+    <macros>
+        <import>macro_lumpy_smoove.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="0.2.5">smoove</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Tool exception" />
+    </stdio>
+    <command detect_errors="exit_code"><![CDATA[
+    @pipefail@
+    @set_fasta_index@
+    ln -s $normal_bam normal.bam &&
+    ln -s $tumor_bam tumor.bam &&
+    
+    smoove call -x --name output
+        #if $set_exclusion.choices=="yes":
+            --exclude $bedmask
+        #end if
+        --fasta reference.fa -p 24 normal.bam tumor.bam &&
+    gunzip output-smoove.vcf.gz
+    #if $prpos=="no":
+    && sed -i -E 's/;PRPOS=.+\tGT/\tGT/g' output-smoove.vcf
+    #end if
+    
+
+    ]]></command>
+    <inputs>
+        <expand macro="reference_source_conditional" />
+        <param format="bam" name="normal_bam" type="data" label="BAM alignments from the normal sample"/>
+        <param format="bam" name="tumor_bam" type="data" label="BAM alignments from the tumor sample"/>
+        <conditional name="set_exclusion">
+            <param name="choices" type="select" label="exclude regions with a bed file" display="radio">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="yes">
+                <param format="bed" name="bedmask" type="data" label="BED regions to be excluded for the analysis"/>
+            </when>
+            <when value="no">
+            </when>
+        </conditional>      
+        <param name="prpos" type="select" label="include the PRPOS probabilities in INFO tags" display="radio">
+            <option value="no" selected="true">No</option>
+            <option value="yes">Yes</option>
+        </param>
+   </inputs>
+
+    <outputs>
+        <data format="vcf" name="vcf_call" label="lumpy-smoove Variant Calling" from_work_dir="./output-smoove.vcf" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="reference_source_selector" value="history" />
+            <param name="ref_file" value="chrI-ce11.fa"/>
+            <param name="normal_bam" value="celegans_1.bam"/>
+            <param name="tumor_bam" value="celegans_2.bam"/>
+            <param name="choices" value="yes"/>
+            <param name="bedmask" value="exclude.bed"/>
+            <param name="prpos" value="no"/>
+            <output name="vcf_call" ftype="vcf" file="result-1.vcf" lines_diff="4"/>
+        </test>
+        <test>
+            <param name="reference_source_selector" value="history" />
+            <param name="ref_file" value="chrI-ce11.fa"/>
+            <param name="normal_bam" value="celegans_1.bam"/>
+            <param name="tumor_bam" value="celegans_2.bam"/>
+            <param name="choices" value="no"/>
+            <param name="prpos" value="no"/>
+            <output name="vcf_call" ftype="vcf" file="result-2.vcf" lines_diff="4"/>
+        </test>
+        <test>
+            <param name="reference_source_selector" value="history" />
+            <param name="ref_file" value="chrI-ce11.fa"/>
+            <param name="normal_bam" value="celegans_2.bam"/>
+            <param name="tumor_bam" value="celegans_1.bam"/>
+            <param name="choices" value="no"/>
+            <param name="prpos" value="no"/>
+            <output name="vcf_call" ftype="vcf" file="result-3.vcf" lines_diff="4"/>
+        </test>
+        <test>
+            <param name="reference_source_selector" value="history" />
+            <param name="ref_file" value="chrI-ce11.fa"/>
+            <param name="normal_bam" value="celegans_1.bam"/>
+            <param name="tumor_bam" value="celegans_2.bam"/>
+            <param name="choices" value="no"/>
+            <param name="prpos" value="yes"/>
+            <output name="vcf_call" ftype="vcf" file="result-4.vcf" lines_diff="4"/>
+        </test>
+
+    </tests>
+
+    <help>
+
+**smoove** simplifies and speeds calling and genotyping SVs for short reads. It also improves
+specificity by removing many spurious alignment signals that are indicative of low-level
+noise and often contribute to spurious calls.
+
+There is a blog-post describing smoove in more detail
+here: https://brentp.github.io/post/smoove/
+
+Currently, this Galaxy tool only wraps smoove for 2 samples (bam normal and tumor inputs),
+which translates in the command line::
+
+    <![CDATA[smoove call -x --name my-cohort --exclude $bed --fasta $fasta -p $threads /path/to/*.bam]]>
+
+Note that the --genotype option which allows to stream smoove to svtyper is not implemented
+due to an error returned by svtyper in the smoove conda environment
+
+the --exclude $bed is highly recommended as it can be used to ignore reads that overlap
+problematic regions.
+
+A good set of regions for GRCh37 is https://github.com/hall-lab/speedseq/blob/master/annotations/ceph18.b37.lumpy.exclude.2014-01-15.bed
+
+And for hg38 https://github.com/hall-lab/speedseq/blob/master/annotations/exclude.cnvnator_100bp.GRCh38.20170403.bed
+
+smoove will::
+
+    1. parallelize calls to lumpy_filter to extract split and discordant reads required by lumpy
+    
+    2. further filter lumpy_filter calls to remove high-coverage, spurious regions and user-specified chroms like 'hs37d5';
+    it will also remove reads that we've found are likely spurious signals. after this, it will
+    remove singleton reads (where the mate was removed by one of the previous filters)
+    from the discordant bams. This makes lumpy much faster and less memory-hungry.
+    
+    3. calculate per-sample metrics for mean, standard deviation, and distribution of insert
+    size as required by lumpy.
+    
+    4. stream output of lumpy directly into multiple svtyper processes for parallel-by-region
+    genotyping while lumpy is still running. This option in not currently implemented in Galaxy
+    
+    5. sort, compress, and index final VCF.
+
+**Input(s)**
+
+
+*BAM files*: One Bam for normal sample and one Bam for tumor sample.
+Only BAM alignments produced by BWA-mem have been tested with this tool
+
+*A bed file* describing the regions to exclude from the analysis
+
+
+*Additional options*: refer to smoove GitHub repository_ and the lumpy publication (doi 10.1186/gb-2014-15-6-r84)
+
+.. _repository: https://github.com/brentp/smoove
+
+
+Options::
+
+    <![CDATA[
+
+    smoove calls several programs. Those with 'Y' are found on your $PATH. Only those with '*' are required.
+
+  [Y] bgzip [ sort   -> (compress) ->   index ]
+  [Y] gsort [(sort)  ->  compress   ->  index ]
+  [Y] tabix [ sort   ->  compress   -> (index)]
+  [Y] lumpy
+  [Y] lumpy_filter
+  [Y] samtools
+  [Y] svtyper
+  [Y] mosdepth [extra filtering of split and discordant files for better scaling]
+
+  [Y] duphold [(optional) annotate calls with depth changes]
+  [Y] svtools [only needed for large cohorts].
+
+    Available sub-commands are below. Each can be run with -h for additional help.
+
+ call        : call lumpy (and optionally svtyper)
+ merge       : merge and sort (using svtools) calls from multiple samples
+ genotype    : parallelize svtyper on an input VCF
+ paste       : square final calls from multiple samples (each with same number of variants)
+ plot-counts : plot counts of split, discordant reads before, after smoove filtering
+ annotate    : annotate a VCF with gene and quality of SV call
+ hipstr      : run hipSTR in parallel
+ cnvnator    : run cnvnator in parallel
+ duphold     : run duphold in parallel (this can be done by adding a flag to call or genotype)
+    ]]>
+    </help>
+
+    <citations>
+    <citation type="doi">10.1186/gb-2014-15-6-r84</citation>
+  </citations>
+</tool>