diff lumpy.xml @ 12:b3fb23bbca8e draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit 03ac2f3182f9e72db31297fa9e2fd5f0802343ea
author drosofff
date Sun, 18 Dec 2016 10:35:43 -0500
parents ecbc563571ea
children 02adb61c0246
line wrap: on
line diff
--- a/lumpy.xml	Mon Dec 12 13:45:08 2016 -0500
+++ b/lumpy.xml	Sun Dec 18 10:35:43 2016 -0500
@@ -1,4 +1,4 @@
-<tool id="lumpy" name="lumpy-sv" version="0.2.1">
+<tool id="lumpy" name="lumpy-sv" version="0.3.0">
     <description>find structural variants</description>
     <requirements>
         <requirement type="package" version="0.2.12">lumpy-sv</requirement>
@@ -9,101 +9,76 @@
         <exit_code range="1:" level="fatal" description="Tool exception" />
     </stdio>
     <command detect_errors="exit_code"><![CDATA[
+        #import re
+        #set one_sample_bam = re.sub('[^\w\-]', '_', str($analysis_type.input_file.element_identifier))
         #if $analysis_type.analysis_type_list == "one_sample":
-            ln -f -s '$analysis_type.input_file' input.bam &&
+            ln -f -s '$analysis_type.input_file' '$one_sample_bam' &&
         #else:
-            ln -f -s '$analysis_type.input_file' input.A.bam &&
-            ln -f -s '$analysis_type.input_fileB' input.B.bam &&
+            #set sample_a_bam = re.sub('[^\w\-]', '_', str($analysis_type.input_file.element_identifier))
+            #set sample_b_bam = re.sub('[^\w\-]', '_', str($analysis_type.input_fileB.element_identifier))
+            #if $sample_a_bam == $sample_b_bam:
+                #set sample_a_bam = "%s_a" % str($sample_a_bam)
+                #set sample_b_bam = "%s_b" % str($sample_b_bam)
+            #end if
+            ln -f -s '$analysis_type.input_file' '$sample_a_bam' &&
+            ln -f -s '$analysis_type.input_fileB' '$sample_b_bam' &&
         #end if
 
         #if $analysis_type.analysis_type_list == "one_sample":
 
             #if $seq_method.seq_method_list == "paired-end":
-                samtools view -b -F 1294 input.bam > "input.discordants.unsorted.bam" &&
-                samtools view -h input.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.splitters.unsorted.bam" &&
-                samtools sort input.discordants.unsorted.bam > input.discordants.bam &&
-                samtools sort input.splitters.unsorted.bam > input.splitters.bam &&
-                samtools view input.bam
-                    |tail -n +1
+                samtools view -u -F 1294 '$one_sample_bam' | samtools sort -O bam -o input.discordants.bam &&
+                samtools view -h '$one_sample_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam &&
+                samtools view '$one_sample_bam'
                     |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLength -X 4 -N $seq_method.additional_params.samplingValue -o input.lib.histo > meandev.txt &&
                 mean=\$(cat meandev.txt | sed s/mean:// | sed -r s/stdev:.+//) &&
                 stdev=\$(cat meandev.txt | sed -r s/mean:.+stdev://) &&
-                lumpy -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
+                lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
                     #if $output_format == "BEDPE":
                         -b
                     #end if
-                    -pe id:input.bam,bam_file:input.discordants.bam,histo_file:input.lib.histo,mean:"\$mean",stdev:"\$stdev",read_length:$analysis_type.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
-                    -sr id:input.bam,bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > output.vcf &&
-                mv input.discordants.bam $discordants &&
-                mv input.splitters.bam $splits &&
-                mv input.lib.histo $histogram &&
-                mv output.vcf $vcf_call &&
-                rm input.discordants.unsorted.bam input.splitters.unsorted.bam meandev.txt
-            #end if
-            #if $seq_method.seq_method_list == "single-read":
-                samtools view -h input.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.splitters.unsorted.bam" &&
-                lumpy -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
+                    -pe id:'$one_sample_bam',bam_file:input.discordants.bam,histo_file:input.lib.histo,mean:"\$mean",stdev:"\$stdev",read_length:$analysis_type.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
+                    -sr id:'$one_sample_bam',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call'
+            #elif $seq_method.seq_method_list == "single-read":
+                samtools view -h '$one_sample_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam &&
+                lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
                     #if $output_format == "BEDPE":
                         -b
                     #end if
-                    -sr id:input.bam,bam_file:input.splitters.unsorted.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > output.vcf &&
-                mv input.splitters.unsorted.bam $splits &&
-                mv output.vcf $vcf_call
+                    -sr id:'$one_sample_bam',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call'
             #end if
-
         #else:
-
             #if $seq_method.seq_method_list == "paired-end":
-                samtools view -b -F 1294 input.A.bam > "input.A.discordants.unsorted.bam" &&
-                samtools view -b -F 1294 input.B.bam > "input.B.discordants.unsorted.bam" &&
-                samtools view -h input.A.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.A.splitters.unsorted.bam" &&
-                samtools view -h input.B.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.B.splitters.unsorted.bam" &&
-                samtools sort input.A.discordants.unsorted.bam > input.A.discordants.bam &&
-                samtools sort input.B.discordants.unsorted.bam > input.B.discordants.bam &&
-                samtools sort input.A.splitters.unsorted.bam > input.A.splitters.bam &&
-                samtools sort input.B.splitters.unsorted.bam > input.B.splitters.bam &&
-                samtools view  input.A.bam
-                    |tail -n +1
-                    |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLength -X 4 -N $seq_method.additional_params.samplingValue -o input.A.lib.histo > meandevA.txt &&
-                samtools view  input.B.bam
-                    |tail -n +1
+                samtools view -u -F 1294 '$sample_a_bam' | samtools sort -O bam -o input.discordants.bam &&
+                samtools view -u -F 1294 '$sample_b_bam' | samtools sort -O bam -o input.B.discordants.bam &&
+                samtools view -h '$sample_a_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam &&
+                samtools view -h '$sample_b_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.B.splitters.bam &&
+                samtools view  '$sample_a_bam'
+                    |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLength -X 4 -N $seq_method.additional_params.samplingValue -o input.lib.histo > meandevA.txt &&
+                samtools view  '$sample_b_bam'
                     |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLengthB -X 4 -N $seq_method.additional_params.samplingValue -o input.B.lib.histo > meandevB.txt &&
                 meanA=\$(cat meandevA.txt | sed s/mean:// | sed -r s/stdev:.+//) &&
                 meanB=\$(cat meandevB.txt | sed s/mean:// | sed -r s/stdev:.+//) &&
                 stdevA=\$(cat meandevA.txt | sed -r s/mean:.+stdev://) &&
                 stdevB=\$(cat meandevB.txt | sed -r s/mean:.+stdev://) &&
-                lumpy -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
+                lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
                     #if $output_format == "BEDPE":
                         -b
                     #end if
-                    -pe id:inputA.bam,bam_file:input.A.discordants.bam,histo_file:input.A.lib.histo,mean:"\$meanA",stdev:"\$stdevA",read_length:$analysis_type.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
+                    -pe id:inputA.bam,bam_file:input.discordants.bam,histo_file:input.lib.histo,mean:"\$meanA",stdev:"\$stdevA",read_length:$analysis_type.readLength,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
                     -pe id:inputB.bam,bam_file:input.B.discordants.bam,histo_file:input.B.lib.histo,mean:"\$meanB",stdev:"\$stdevA",read_length:$analysis_type.readLengthB,min_non_overlap:$seq_method.additional_params.min_non_overlap,discordant_z:$seq_method.additional_params.discordant_z,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
-                    -sr id:inputA.bam,bam_file:input.A.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
-                    -sr id:inputB.bam,bam_file:input.B.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > output.vcf &&
-                mv input.A.discordants.bam $discordants &&
-                mv input.B.discordants.bam $discordantsB &&
-                mv input.A.splitters.bam $splits &&
-                mv input.B.splitters.bam $splitsB &&
-                mv input.A.lib.histo $histogram &&
-                mv input.B.lib.histo $histogramB &&
-                mv output.vcf $vcf_call &&
-                rm input.A.discordants.unsorted.bam input.B.discordants.unsorted.bam input.A.splitters.unsorted.bam input.B.splitters.unsorted.bam meandevA.txt meandevB.txt
-            #end if
-            #if $seq_method.seq_method_list == "single-read":
-                samtools view -h input.A.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.A.splitters.unsorted.bam" &&
-                samtools view -h input.B.bam | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools view -Sb - > "input.B.splitters.unsorted.bam" &&
-                lumpy -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
+                    -sr id:inputA.bam,bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
+                    -sr id:inputB.bam,bam_file:input.B.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call'
+            #elif $seq_method.seq_method_list == "single-read":
+                samtools view -h '$sample_a_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.splitters.bam &&
+                samtools view -h '$sample_b_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -O bam -o input.B.splitters.bam &&
+                lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt
                     #if $output_format == "BEDPE":
                         -b
                     #end if
-                    -sr id:input.A.bam,bam_file:input.A.splitters.unsorted.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
-                    -sr id:input.B.bam,bam_file:input.B.splitters.unsorted.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > output.vcf &&
-                mv input.A.splitters.unsorted.bam $splits &&
-                mv input.B.splitters.unsorted.bam $splitsB &&
-                mv output.vcf $vcf_call
+                    -sr id:'$sample_a_bam',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold
+                    -sr id:'$sample_b_bam',bam_file:input.B.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call'
             #end if
-
-
         #end if
 
     ]]></command>
@@ -140,6 +115,8 @@
                     <param name="back_distance" value="10"  type="integer" label="back_distance" help="e.g. 10" />
                     <param name="weight" value="1"  type="integer" label="weight" help="e.g. 1" />
                     <param name="min_mapping_threshold" value="20"  type="integer" label="min_mapping_threshold" help="e.g. 20" />
+                    <param name="probability_curve" argument="-P" type="boolean" truevalue="-P" falsevalue="" checked="true" label="output probability curve for each variant"/>
+                    <param name="evidence" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="true" label="show evidence for each call"/>
                 </section>
             </when>
             <when value="single-read">
@@ -149,6 +126,8 @@
                     <param name="back_distance" value="10"  type="integer" label="back_distance" help="e.g. 10" />
                     <param name="weight" value="1"  type="integer" label="weight" help="e.g. 1" />
                     <param name="min_mapping_threshold" value="20"  type="integer" label="min_mapping_threshold" help="e.g. 20" />
+                    <param name="probability_curve" argument="-P" type="boolean" truevalue="-P" falsevalue="" checked="false" label="output probability curve for each variant"/>
+                    <param name="evidence" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="false" label="show evidence for each call"/>
                 </section>
             </when>
 
@@ -160,21 +139,21 @@
     </inputs>
 
     <outputs>
-        <data format="tabular" name="histogram" label="Lumpy on ${on_string}: Fragment size distribution">
+        <data format="tabular" name="histogram" label="Lumpy on ${on_string}: Fragment size distribution" from_work_dir="input.lib.histo">
             <filter>seq_method['seq_method_list'] == "paired-end"</filter>
         </data>
-        <data format="tabular" name="histogramB" label="Lumpy on ${on_string}: Fragment size distribution">
+        <data format="tabular" name="histogramB" label="Lumpy on ${on_string}: Fragment size distribution" from_work_dir="input.B.lib.histo">
             <filter>seq_method['seq_method_list'] == "paired-end"</filter>
             <filter>analysis_type['analysis_type_list'] == "two_sample"</filter>
         </data>
-        <data format="bam" name="splits" label="Lumpy on ${on_string}: Split Reads (Bam format)"/>
-        <data format="bam" name="splitsB" label="Lumpy on ${on_string}: Split Reads (Bam format)">
+        <data format="bam" name="splits" label="Lumpy on ${on_string}: Split Reads (Bam format)" from_work_dir="input.splitters.bam"/>
+        <data format="bam" name="splitsB" label="Lumpy on ${on_string}: Split Reads (Bam format)" from_work_dir="input.B.splitters.bam">
             <filter>analysis_type['analysis_type_list'] == "two_sample"</filter>
         </data>
-        <data format="bam" name="discordants" label="Lumpy on ${on_string}: Discordant Pairs (Bam format)">
+        <data format="bam" name="discordants" label="Lumpy on ${on_string}: Discordant Pairs (Bam format)" from_work_dir="input.discordants.bam">
             <filter>seq_method['seq_method_list'] == "paired-end"</filter>
         </data>
-        <data format="bam" name="discordantsB" label="Lumpy on ${on_string}: Discordant Pairs (Bam format)">
+        <data format="bam" name="discordantsB" label="Lumpy on ${on_string}: Discordant Pairs (Bam format)" from_work_dir="input.discordants.B.bam">
             <filter>seq_method['seq_method_list'] == "paired-end"</filter>
             <filter>analysis_type['analysis_type_list'] == "two_sample"</filter>
         </data>
@@ -187,7 +166,7 @@
 
     <tests>
         <test>
-            <param name="analysis_type" value="one_sample" />
+            <param name="analysis_type_list" value="one_sample" />
             <param name="input_file" value="sr.input.bam" ftype="bam"/>
             <param name="seq_method_list" value="single-read" />
             <param name="mw" value="4"/>
@@ -197,6 +176,31 @@
             <param name="min_mapping_threshold" value="20" />
             <output name="vcf_call" file="output.vcf" ftype="vcf"/>
         </test>
+        <test>
+            <param name="analysis_type_list" value="one_sample" />
+            <param name="input_file" value="sr.input.bam" ftype="bam"/>
+            <param name="seq_method_list" value="single-read" />
+            <param name="mw" value="4"/>
+            <param name="tt" value="0"/>
+            <param name="back_distance" value="10"/>
+            <param name="weight" value="1" />
+            <param name="min_mapping_threshold" value="20" />
+            <param name="evidence" value="true" />
+            <param name="probability_curve" value="true" />
+            <output name="vcf_call" file="output_extended.vcf" ftype="vcf" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="analysis_type_list" value="two_sample" />
+            <param name="input_file" value="sr.input.bam" ftype="bam"/>
+            <param name="input_fileB" value="sr.input.bam" ftype="bam"/>
+            <param name="seq_method_list" value="single-read" />
+            <param name="mw" value="4"/>
+            <param name="tt" value="0"/>
+            <param name="back_distance" value="10"/>
+            <param name="weight" value="1" />
+            <param name="min_mapping_threshold" value="20" />
+            <output name="vcf_call" file="output_two.vcf" ftype="vcf"/>
+        </test>
    </tests>
 
     <help>