changeset 4:49da975ba395 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 83a18424ccf42793d2c7f5385b3f9ca211e49790"
author artbio
date Thu, 27 Aug 2020 17:14:25 -0400
parents 65b400409455
children bd4135caa3fa
files lumpy_smoove.xml test-data/result-5.vcf
diffstat 2 files changed, 100 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/lumpy_smoove.xml	Wed Aug 26 12:24:07 2020 -0400
+++ b/lumpy_smoove.xml	Thu Aug 27 17:14:25 2020 -0400
@@ -1,4 +1,4 @@
-<tool id="lumpy_smoove" name="lumpy_smoove" version="0.5.0">
+<tool id="lumpy_smoove" name="lumpy_smoove" version="0.6.0">
     <description>find structural variants using the smoove workflow</description>
     <macros>
         <import>macro_lumpy_smoove.xml</import>
@@ -12,10 +12,15 @@
     <command detect_errors="exit_code"><![CDATA[
     @pipefail@
     @set_fasta_index@
-    ln -s $normal_bam normal.bam &&
-    ln -s $tumor_bam tumor.bam &&
-    samtools index -@ \${GALAXY_SLOTS:-4} normal.bam &&
-    samtools index -@ \${GALAXY_SLOTS:-4} tumor.bam &&
+    #if $set_plan.plan_choice=='pair':
+        ln -s $set_plan.normal_bam normal.bam &&
+        ln -f -s $set_plan.normal_bam.metadata.bam_index normal.bam.bai &&
+        ln -s $set_plan.tumor_bam tumor.bam &&
+        ln -f -s $set_plan.tumor_bam.metadata.bam_index tumor.bam.bai &&
+    #else
+        ln -s $set_plan.single_bam single.bam &&
+        ln -f -s $set_plan.single_bam.metadata.bam_index single.bam.bai &&
+    #end if
    
     smoove call --name output
         #if $set_exclusion.choices=="yes":
@@ -27,15 +32,28 @@
        #if $prpos=="no":
             --removepr
        #end if
-           normal.bam tumor.bam &&
+           *.bam &&
     ls -latr &&
     gunzip output-smoove.genotyped.vcf.gz
 
     ]]></command>
     <inputs>
         <expand macro="reference_source_conditional" />
-        <param format="bam" name="normal_bam" type="data" label="BAM alignments from the normal sample"/>
-        <param format="bam" name="tumor_bam" type="data" label="BAM alignments from the tumor sample"/>
+        <conditional name="set_plan">
+            <param name="plan_choice" type="select" label="Analyse a single Bam or a pair of Bam (eg normal/tumor)" display="radio">
+                <option value="pair" selected="true">A pair of Bam files</option>
+                <option value="single">A single Bam</option>
+            </param>
+            <when value="pair">
+                <param format="bam" name="normal_bam" type="data" label="BAM alignment from the normal sample"/>
+                <param format="bam" name="tumor_bam" type="data" label="BAM alignment from the tumor sample"/>
+            </when>
+            <when value="single">
+                <param format="bam" name="single_bam" type="data" label="BAM alignment from a single sample"/>
+            </when>
+        </conditional>      
+
+ 
         <conditional name="set_exclusion">
             <param name="choices" type="select" label="exclude regions with a bed file" display="radio">
                 <option value="no" selected="true">No</option>
@@ -66,7 +84,7 @@
             <param name="choices" value="yes"/>
             <param name="bedmask" value="exclude.bed"/>
             <param name="prpos" value="no"/>
-            <output name="vcf_call" ftype="vcf" file="result-1.vcf" lines_diff="6"/>
+            <output name="vcf_call" ftype="vcf" file="result-1.vcf" lines_diff="8"/>
         </test>
         <test>
             <param name="reference_source_selector" value="history" />
@@ -75,7 +93,7 @@
             <param name="tumor_bam" value="celegans_RG_2.bam"/>
             <param name="choices" value="no"/>
             <param name="prpos" value="no"/>
-            <output name="vcf_call" ftype="vcf" file="result-2.vcf" lines_diff="6"/>
+            <output name="vcf_call" ftype="vcf" file="result-2.vcf" lines_diff="8"/>
         </test>
         <test>
             <param name="reference_source_selector" value="history" />
@@ -84,7 +102,7 @@
             <param name="tumor_bam" value="celegans_RG_1.bam"/>
             <param name="choices" value="no"/>
             <param name="prpos" value="no"/>
-            <output name="vcf_call" ftype="vcf" file="result-3.vcf" lines_diff="6"/>
+            <output name="vcf_call" ftype="vcf" file="result-3.vcf" lines_diff="8"/>
         </test>
         <test>
             <param name="reference_source_selector" value="history" />
@@ -93,9 +111,17 @@
             <param name="tumor_bam" value="celegans_RG_2.bam"/>
             <param name="choices" value="no"/>
             <param name="prpos" value="yes"/>
-            <output name="vcf_call" ftype="vcf" file="result-4.vcf" lines_diff="6"/>
+            <output name="vcf_call" ftype="vcf" file="result-4.vcf" lines_diff="8"/>
         </test>
-
+        <test>
+            <param name="reference_source_selector" value="history" />
+            <param name="plan_choice" value="single" />
+            <param name="ref_file" value="chrI-ce11.fa"/>
+            <param name="single_bam" value="celegans_RG_1.bam"/>
+            <param name="choices" value="no"/>
+            <param name="prpos" value="no"/>
+            <output name="vcf_call" ftype="vcf" file="result-5.vcf" lines_diff="8"/>
+        </test>
     </tests>
 
     <help>
@@ -144,12 +170,13 @@
 
 **Input(s)**
 
-* BAM files: One Bam for normal sample and one Bam for tumor sample. Only BAM alignments produced by BWA-mem have been tested with this tool
+* BAM files: Either a pair of Bam files (e.g. normal vs tumor sample) or a single Bam file.
+  Only BAM alignments produced by BWA-mem have been tested with this tool
     
     .. class:: warningmark
     
     It is mandatory for proper run of svtyper that **BAM files contain read group information**,
-    ie the @RG tag is present and filled in each BAM
+    ie the @RG tag is present and filled in each BAM file
 
 
 * A bed file describing the regions to exclude from the analysis
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result-5.vcf	Thu Aug 27 17:14:25 2020 -0400
@@ -0,0 +1,58 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20200827
+##reference=reference.fa
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
+##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
+##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
+##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
+##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
+##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
+##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
+##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
+##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
+##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
+##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
+##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
+##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
+##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
+##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
+##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
+##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
+##contig=<ID=chrI,length=15072434>
+##smoove_version=0.2.5
+##smoove_count_stats=celegans-1:2869,2691,202,330
+##source=LUMPY
+##bcftools_annotateVersion=1.10.2+htslib-1.10.2
+##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Thu Aug 27 18:59:21 2020
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##bcftools_viewVersion=1.10.2+htslib-1.10.2
+##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Thu Aug 27 18:59:21 2020
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	celegans-1
+chrI	10416569	1	N	<DUP>	130.28	.	SVTYPE=DUP;SVLEN=981;END=10417550;STRANDS=-+:4;IMPRECISE;CIPOS=-769,29;CIEND=-30,636;CIPOS95=-165,8;CIEND95=-9,128;SU=4;PE=4;SR=0;AC=2;AN=2	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:16:130.28:-15,-4,-2:14:4:9:4:9:0:0:0:4:9:0.69