changeset 2:b1e152172de9 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit 5f3fb85a6eb667b04c4de65eb722261f315c8241
author drosofff
date Tue, 06 Dec 2016 17:29:34 -0500
parents a006d42dd759
children 0bd777f0d6d8
files lumpy.xml
diffstat 1 files changed, 118 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/lumpy.xml	Tue Dec 06 09:29:31 2016 -0500
+++ b/lumpy.xml	Tue Dec 06 17:29:34 2016 -0500
@@ -1,4 +1,4 @@
-<tool id="lumpy" name="lumpy-sv" version="0.1">
+<tool id="lumpy" name="lumpy-sv" version="0.2">
     <description>find structural variants</description>
     <requirements>
         <requirement type="package" version="0.2.12">lumpy-sv</requirement>
@@ -10,38 +10,107 @@
     </stdio>
     <version_command>lumpy --version</version_command>
     <command><![CDATA[
-        ln -f -s "$input_file" input.bam &&
-        #if $seq_method.seq_method_list == "paired-end":
-            samtools view -b -F 1294 input.bam > "input.discordants.unsorted.bam" &&
-            samtools view -h input.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.splitters.unsorted.bam" &&
-            samtools sort input.discordants.unsorted.bam > input.discordants.bam &&
-            samtools sort input.splitters.unsorted.bam > input.splitters.bam &&
-            samtools view -r readgroup input.bam
-                |tail -n +$seq_method.additional_params.samplingValue
-                |python $__tool_directory__/pairend_distro.py -r 101 -X 4 -N $seq_method.additional_params.samplingValue -o input.lib.histo > meandev.txt &&
-            mean=\$(cat meandev.txt | sed s/mean:// | sed -r s/stdev:.+//) &&
-            stdev=\$(cat meandev.txt | sed -r s/mean:.+stdev://) &&
-            lumpy -mw 4 -tt 0 
-                -pe id:input.bam,bam_file:input.discordants.bam,histo_file:input.lib.histo,mean:"\$mean",stdev:"\$stdev",read_length:$seq_method.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold 
-                -sr id:input.bam,bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > output.vcf &&
-            mv input.discordants.bam $discordants &&
-            mv input.splitters.bam $splits &&
-            mv input.lib.histo $histogram &&
-            mv output.vcf $vcf_call &&
-            rm input.discordants.unsorted.bam input.splitters.unsorted.bam meandev.txt
+        #if $analysis_type.analysis_type_list == "one_sample":
+            ln -f -s $analysis_type.input_file input.bam &&
+        #else:
+            ln -f -s $analysis_type.input_file input.A.bam &&
+            ln -f -s $analysis_type.input_fileB input.B.bam &&
         #end if
-        #if $seq_method.seq_method_list == "single-read":
-            samtools view -h input.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.splitters.unsorted.bam" &&
-            lumpy -mw 4 -tt 0
-                -sr id:input.bam,bam_file:input.splitters.unsorted.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > output.vcf &&
-            mv input.splitters.unsorted.bam $splits &&
-            mv output.vcf $vcf_call
+
+        #if $analysis_type.analysis_type_list == "one_sample":
+
+            #if $seq_method.seq_method_list == "paired-end":
+                samtools view -b -F 1294 input.bam > "input.discordants.unsorted.bam" &&
+                samtools view -h input.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.splitters.unsorted.bam" &&
+                samtools sort input.discordants.unsorted.bam > input.discordants.bam &&
+                samtools sort input.splitters.unsorted.bam > input.splitters.bam &&
+                samtools view -r readgroup input.bam
+                    |tail -n +$seq_method.additional_params.samplingValue
+                    |python $__tool_directory__/pairend_distro.py -r 101 -X 4 -N $seq_method.additional_params.samplingValue -o input.lib.histo > meandev.txt &&
+                mean=\$(cat meandev.txt | sed s/mean:// | sed -r s/stdev:.+//) &&
+                stdev=\$(cat meandev.txt | sed -r s/mean:.+stdev://) &&
+                lumpy -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt 
+                    -pe id:input.bam,bam_file:input.discordants.bam,histo_file:input.lib.histo,mean:"\$mean",stdev:"\$stdev",read_length:$seq_method.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold 
+                    -sr id:input.bam,bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > output.vcf &&
+                mv input.discordants.bam $discordants &&
+                mv input.splitters.bam $splits &&
+                mv input.lib.histo $histogram &&
+                mv output.vcf $vcf_call &&
+                rm input.discordants.unsorted.bam input.splitters.unsorted.bam meandev.txt
+            #end if
+            #if $seq_method.seq_method_list == "single-read":
+                samtools view -h input.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.splitters.unsorted.bam" &&
+                lumpy -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
+                    -sr id:input.bam,bam_file:input.splitters.unsorted.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > output.vcf &&
+                mv input.splitters.unsorted.bam $splits &&
+                mv output.vcf $vcf_call
+            #end if
+        
+        #else:
+
+            #if $seq_method.seq_method_list == "paired-end":
+                samtools view -b -F 1294 input.A.bam > "input.A.discordants.unsorted.bam" &&
+                samtools view -b -F 1294 input.B.bam > "input.B.discordants.unsorted.bam" &&
+                samtools view -h input.A.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.A.splitters.unsorted.bam" &&
+                samtools view -h input.B.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.B.splitters.unsorted.bam" &&
+                samtools sort input.A.discordants.unsorted.bam > input.A.discordants.bam &&
+                samtools sort input.B.discordants.unsorted.bam > input.B.discordants.bam &&
+                samtools sort input.A.splitters.unsorted.bam > input.A.splitters.bam &&
+                samtools sort input.B.splitters.unsorted.bam > input.B.splitters.bam &&
+                samtools view -r readgroup input.A.bam
+                    |tail -n +$seq_method.additional_params.samplingValue
+                    |python $__tool_directory__/pairend_distro.py -r 101 -X 4 -N $seq_method.additional_params.samplingValue -o input.A.lib.histo > meandevA.txt &&
+                samtools view -r readgroup input.B.bam
+                    |tail -n +$seq_method.additional_params.samplingValue
+                    |python $__tool_directory__/pairend_distro.py -r 101 -X 4 -N $seq_method.additional_params.samplingValue -o input.B.lib.histo > meandevB.txt &&
+                meanA=\$(cat meandevA.txt | sed s/mean:// | sed -r s/stdev:.+//) &&
+                meanB=\$(cat meandevB.txt | sed s/mean:// | sed -r s/stdev:.+//) &&               
+                stdevA=\$(cat meandevA.txt | sed -r s/mean:.+stdev://) &&
+                stdevB=\$(cat meandevB.txt | sed -r s/mean:.+stdev://) &&
+                lumpy -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt 
+                    -pe id:inputA.bam,bam_file:input.A.discordants.bam,histo_file:input.A.lib.histo,mean:"\$meanA",stdev:"\$stdevA",read_length:$seq_method.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold 
+                    -pe id:inputB.bam,bam_file:input.B.discordants.bam,histo_file:input.B.lib.histo,mean:"\$meanB",stdev:"\$stdevA",read_length:$seq_method.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold 
+                    -sr id:inputA.bam,bam_file:input.A.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
+                    -sr id:inputB.bam,bam_file:input.B.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > output.vcf &&
+                mv input.A.discordants.bam $discordants &&
+                mv input.B.discordants.bam $discordantsB &&
+                mv input.A.splitters.bam $splits &&
+                mv input.B.splitters.bam $splitsB &&
+                mv input.A.lib.histo $histogram &&
+                mv input.B.lib.histo $histogramB &&
+                mv output.vcf $vcf_call &&
+                rm input.A.discordants.unsorted.bam input.B.discordants.unsorted.bam input.A.splitters.unsorted.bam input.B.splitters.unsorted.bam meandevA.txt meandevB.txt
+            #end if
+            #if $seq_method.seq_method_list == "single-read":
+                samtools view -h input.A.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.A.splitters.unsorted.bam" &&
+                samtools view -h input.B.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.B.splitters.unsorted.bam" &&
+                lumpy -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
+                    -sr id:input.A.bam,bam_file:input.A.splitters.unsorted.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
+                    -sr id:input.B.bam,bam_file:input.B.splitters.unsorted.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > output.vcf &&
+                mv input.A.splitters.unsorted.bam $splits &&
+                mv input.B.splitters.unsorted.bam $splitsB &&
+                mv output.vcf $vcf_call
+            #end if
+
+        
         #end if
 
     ]]></command>
     <!-- basic error handling -->
     <inputs>
-        <param format="bam" name="input_file" type="data" label="One BAM alignment file produced by BWA-mem"/>
+        <conditional name="analysis_type">
+            <param help="Single or paired conditions (eg tumor vs normal)" label="Analysis type" name="analysis_type_list" type="select">
+                <option selected="True" value="one_sample">One Sample</option>
+                <option value="two_sample">Two samples</option>
+            </param>
+            <when value="one_sample">
+                <param format="bam" name="input_file" type="data" label="One BAM alignment file produced by BWA-mem"/>
+            </when>
+            <when value="two_sample">
+                <param format="bam" name="input_file" type="data" label="One BAM alignment file produced by BWA-mem"/>
+                <param format="bam" name="input_fileB" type="data" label="One BAM alignment file produced by BWA-mem"/>
+            </when>
+        </conditional>
         <conditional name="seq_method">
             <param help="Paired-end or single-read sequencing" label="Sequencing method" name="seq_method_list" type="select">
                 <option selected="True" value="paired-end">Paired-end sequencing</option>
@@ -51,6 +120,8 @@
                 <param name="readLength" value="151"  type="integer" label="read length" help="e.g. 151 nt" />
                 <section name="additional_params" title="Additional Options" expanded="False">
                     <param name="samplingValue" value="100000"  type="integer" label="number of reads to compute mean and stdev of read length" help="e.g. 10000" />
+                    <param name="mw" value="4"  type="integer" label="-mw" help="minimum weight across all samples for a call (default: 4)" />
+                    <param name="tt" value="0"  type="integer" label="-tt" help="trim threshold (default: 0)" />
                     <param name="min_non_overlap" value="101"  type="integer" label="min_non_overlap" help="e.g. 101" />
                     <param name="discordant_z" value="5"  type="integer" label="discordant_z" help="e.g. 5" />
                     <param name="back_distance" value="10"  type="integer" label="back_distance" help="e.g. 10" />
@@ -60,6 +131,8 @@
             </when>
             <when value="single-read">
                 <section name="additional_params" title="Additional Options" expanded="False">
+                    <param name="mw" value="4"  type="integer" label="-mw" help="minimum weight across all samples for a call (default: 4)" />
+                    <param name="tt" value="0"  type="integer" label="-tt" help="trim threshold (default: 0)" />
                     <param name="back_distance" value="10"  type="integer" label="back_distance" help="e.g. 10" />
                     <param name="weight" value="1"  type="integer" label="weight" help="e.g. 1" />
                     <param name="min_mapping_threshold" value="20"  type="integer" label="min_mapping_threshold" help="e.g. 20" />
@@ -71,20 +144,34 @@
     </inputs>
 
     <outputs>
-        <data format="tabular" name="histogram" type="data" label="Lumpy on ${input_file.element_identifier}: Fragment size distribution">
+        <data format="tabular" name="histogram" type="data" label="Lumpy on ${analysis_type.input_file.element_identifier}: Fragment size distribution">
             <filter>seq_method['seq_method_list'] == "paired-end"</filter>
         </data>
-        <data format="bam" name="splits" type="data" label="Lumpy on ${input_file.element_identifier}: Split Reads (Bam format)"/>
-        <data format="bam" name="discordants" type="data" label="Lumpy on ${input_file.element_identifier}: Discordant Pairs (Bam format)">
+        <data format="tabular" name="histogramB" type="data" label="Lumpy on ${analysis_type.input_fileB.element_identifier}: Fragment size distribution">
+            <filter>seq_method['seq_method_list'] == "paired-end"</filter>
+            <filter>analysis_type['analysis_type_list'] == "two_sample"</filter>
+        </data>
+        <data format="bam" name="splits" type="data" label="Lumpy on ${analysis_type.input_file.element_identifier}: Split Reads (Bam format)"/>
+        <data format="bam" name="splitsB" type="data" label="Lumpy on ${analysis_type.input_fileB.element_identifier}: Split Reads (Bam format)">
+            <filter>analysis_type['analysis_type_list'] == "two_sample"</filter>
+        </data>
+        <data format="bam" name="discordants" type="data" label="Lumpy on ${analysis_type.input_file.element_identifier}: Discordant Pairs (Bam format)">
             <filter>seq_method['seq_method_list'] == "paired-end"</filter>
         </data>
-        <data format="vcf" name="vcf_call" type="data" label="Lumpy on ${input_file.element_identifier}: Variant Calling (vcf format)"/>
+        <data format="bam" name="discordantsB" type="data" label="Lumpy on ${analysis_type.input_fileB.element_identifier}: Discordant Pairs (Bam format)">
+            <filter>seq_method['seq_method_list'] == "paired-end"</filter>
+            <filter>analysis_type['analysis_type_list'] == "two_sample"</filter>
+        </data>
+        <data format="vcf" name="vcf_call" type="data" label="Lumpy Variant Calling (vcf format)"/>
     </outputs>
 
     <tests>
         <test>
+            <param name="analysis_type" value="one_sample" />
             <param name="input_file" value="sr.input.bam" ftype="bam"/>
             <param name="seq_method_list" value="single-read" />
+            <param name="mw" value="4"/>
+            <param name="tt" value="0"/>
             <param name="back_distance" value="10"/>
             <param name="weight" value="1" />
             <param name="min_mapping_threshold" value="20" />