Mercurial > repos > iuc > bamtools_split_ref

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools_split.xml	Wed Jan 11 12:03:53 2023 +0000
@@ -0,0 +1,119 @@
+<tool id="bamtools_split" name="Split" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description>BAM datasets on variety of attributes</description>
+    <macros>
+          <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command>
+        <![CDATA[
+            echo "BAM" > $report &&
+            #for $bam_count, $input_bam in enumerate( $input_bams ):
+                ln -s "${input_bam}" "localbam_${bam_count}.bam" &&
+                ln -s "${input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &&
+            #end for
+            bamtools
+            split
+            #if str ( $analysis_type.analysis_type_selector ) == "-tag" :
+                ${analysis_type.analysis_type_selector} "${analysis_type.tag_name}"
+            #else
+                ${analysis_type.analysis_type_selector}
+            #end if
+            -stub split_bam
+            #for $bam_count, $input_bam in enumerate( $input_bams ):
+                -in "localbam_${bam_count}.bam"
+            #end for
+        ]]>
+    </command>
+    <inputs>
+        <param name="input_bams" type="data" format="bam" label="BAM dataset(s) to filter" min="1" multiple="True"/>
+        <conditional name="analysis_type">
+            <param name="analysis_type_selector" type="select" label="Split BAM dataset(s) by" help="See help below for explanation of each option">
+                <option value="-mapped">Mapping status (-mapped)</option>
+                <option value="-paired">Pairing status (-paired)</option>
+                <option value="-reference">Reference name (-reference)</option>
+                <option value="-tag">Specific tag (-tag)</option>
+            </param>
+            <when value="-mapped" />
+            <when value="-paired" />
+            <when value="-reference" />
+            <when value="-tag">
+                <param name="tag_name" type="text" value="NM" label="Enter tag name here" help="For example, to split on NM tag enter &quot;NM&quot;"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="txt" name="report" label="BAMSplitter Run" hidden="true">
+            <discover_datasets pattern="split_bam\.(?P&lt;designation&gt;.+)\.bam" ext="bam" visible="true"/>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_bams" ftype="bam" value="bamtools-input1.bam"/>
+            <param name="analysis_type_selector" value="-mapped"/>
+            <output name="report">
+                <assert_contents>
+                    <has_line line="BAM" />
+                </assert_contents>
+                <discovered_dataset designation="MAPPED" file="bamtools-split-test1.bam" ftype="bam"/>
+            </output>
+        </test>
+        <test>
+            <param name="input_bams" ftype="bam" value="bamtools-input2.bam"/>
+            <param name="analysis_type_selector" value="-reference"/>
+            <output name="report">
+                <assert_contents>
+                    <has_line line="BAM" />
+                </assert_contents>
+                <discovered_dataset designation="REF_chr1" file="bamtools_input2.chr1" ftype="bam"/>
+            </output>
+        </test>
+    </tests>
+    <help>
+**What is does**
+
+BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+.. class:: warningmark
+
+**DANGER: Multiple Outputs**
+
+As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing.
+
+-----
+
+**How it works**
+
+The following options can be specified via "**Split BAM dataset(s) by**" dropdown::
+
+  Mapping status (-mapped)          split mapped/unmapped and generate two output files
+                                    named (MAPPED) and (UNMAPPED) containing mapped and unmapped
+                                    reads, respectively.
+
+  Pairing status (-paired)          split single-end/paired-end alignments and generate two output files
+                                    named (SINGLE_END) and (PAIRED_END) containing paired and unpaired
+                                    reads, respectively.
+
+  Reference name (-reference)       split alignments by reference name. In cases of unfinished genomes with
+                                    very large number of reference sequences (scaffolds) it can generate
+                                    thousands (if not millions) of output datasets.
+
+  Specific tag (-tag)               split alignments based on all values of TAG encountered. Choosing this
+                                    option from the menu will allow you to enter the tag name. As was the
+                                    case with the reference splitting above, this option can produce very
+                                    large number of outputs if a tag has a large number of unique values.
+
+-----
+
+.. class:: infomark
+
+**More information**
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btr174</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools_split_mapped.xml	Wed Jan 11 12:03:53 2023 +0000
@@ -0,0 +1,56 @@
+<tool id="bamtools_split_mapped" name="Split BAM by Mapped" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description></description>
+    <macros>
+          <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+        <![CDATA[
+            ln -s '${input_bam}' 'localbam.bam' &&
+            ln -s '${input_bam.metadata.bam_index}' 'localbam.bam.bai' &&
+            bamtools split -mapped
+            -in localbam.bam
+            -stub split_bam
+        ]]>
+    </command>
+    <inputs>
+        <param name="input_bam" type="data" format="bam" label="BAM dataset to split by mapped/unmapped"/>
+    </inputs>
+    <outputs>
+        <data format="bam" name="mapped" label="${input_bam.name} mapped" from_work_dir="split_bam.MAPPED.bam" />
+        <data format="bam" name="unmapped" label="${input_bam.name} unmapped" from_work_dir="split_bam.UNMAPPED.bam" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_bam" ftype="bam" value="bamtools-input1.bam"/>
+            <output name="mapped" file="split_bam.MAPPED.bam"  compare="sim_size" delta="200" />
+            <output name="unmapped" file="split_bam.UNMAPPED.bam"  compare="sim_size" delta="200" />
+        </test>
+    </tests>
+    <help>
+**What is does**
+
+BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+.. class:: warningmark
+
+
+**How it works**
+
+Splits the input BAM file into 2 output files named (MAPPED) and (UNMAPPED) containing mapped and unmapped reads, respectively.
+
+-----
+
+.. class:: infomark
+
+**More information**
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btr174</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools_split_paired.xml	Wed Jan 11 12:03:53 2023 +0000
@@ -0,0 +1,57 @@
+<tool id="bamtools_split_paired" name="Split BAM by Paired/Single End" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description></description>
+    <macros>
+          <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+        <![CDATA[
+            ln -s '${input_bam}' 'localbam.bam' &&
+            ln -s '${input_bam.metadata.bam_index}' 'localbam.bam.bai' &&
+            bamtools split -paired
+            -in localbam.bam
+            -stub split_bam
+        ]]>
+    </command>
+    <inputs>
+        <param name="input_bam" type="data" format="bam" label="BAM dataset to split by single_end/paired_end"/>
+    </inputs>
+    <outputs>
+        <data format="bam" name="single" label="${input_bam.name} mapped" from_work_dir="split_bam.SINGLE_END.bam" />
+        <data format="bam" name="paired" label="${input_bam.name} unmapped" from_work_dir="split_bam.PAIRED_END.bam" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_bam" ftype="bam" value="bamtools-input-paired.bam"/>
+            <output name="single" file="split_bam.SINGLE_END.bam"  compare="sim_size" delta="200" />
+            <output name="paired" file="split_bam.PAIRED_END.bam"  compare="sim_size" delta="200" />
+        </test>
+    </tests>
+    <help>
+**What is does**
+
+BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+.. class:: warningmark
+
+
+**How it works**
+
+
+Splits the input BAM file into 2 output files named (SINGLE_END) and (PAIRED_END) containing single_end and paired_end reads, respectively.
+
+-----
+
+.. class:: infomark
+
+**More information**
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btr174</citation>
+    </citations>
+</tool>
--- a/bamtools_split_ref.xml	Sat Nov 27 10:03:33 2021 +0000
+++ b/bamtools_split_ref.xml	Wed Jan 11 12:03:53 2023 +0000
@@ -1,32 +1,50 @@
-<tool id="bamtools_split_ref" name="Split BAM by Reference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+<tool id="bamtools_split_ref" name="Split BAM by Reference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09">
     <description>into dataset list collection</description>
     <macros>
           <import>macros.xml</import>
     </macros>
-    <expand macro="requirements" />
-    <command>
-        <![CDATA[
-            ln -s '${input_bam}' 'localbam.bam' &&
-            ln -s '${input_bam.metadata.bam_index}' 'localbam.bam.bai' &&
-            bamtools split -reference
-            -in localbam.bam
-            -stub split_bam
-            ## Preserve order from metadata in the output collection
-            #import re
-            #set $name = $re.sub('\W','_',$re.sub('\.bam$','',$input_bam.name))
-            #if str($refs) != 'None':
-                #set $ref_list = ' '.join(str($refs).split(","))
-            #else
-                #set $ref_list = ' '.join([$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')])
-            #end if
-            && mkdir -p outputs
-            && (export I=0;
-              for i in $ref_list;
-                do I=\$((++I)); SN=`printf "split_bam.REF_%s.bam" "\$i"`;
-                  if [ -e \$SN ]; then FN=`printf "outputs/split_bam%05d%s.%s.bam" \$((I)) '$name' "\$i"`; mv \$SN \$FN; fi;
-                done)
-        ]]>
-    </command>
+    <expand macro="requirements">
+        <requirement type="package" version="1.16.1">samtools</requirement>
+    </expand>
+    <command><![CDATA[
+        ln -s '${input_bam}' localbam.bam &&
+        ln -s '${input_bam.metadata.bam_index}' 'localbam.bam.bai' &&
+        samtools view -bH localbam.bam --no-PG -o header.bam &&
+        bamtools split -reference
+        -in localbam.bam
+        -stub split_bam
+        && (IFS=',';
+          for i in \$REFS_FROM_BAM_METADATA;
+          do FN=`printf "split_bam.REF_%s.bam" "\$i"`;
+            if [ ! -f \$FN ]; then cp header.bam "\$FN"; fi;
+          done)
+        && cp '$c1' galaxy.json
+    ]]></command>
+    <environment_variables>
+        <environment_variable name="REFS_FROM_BAM_METADATA">#import re
+## need to extract ref names from Galaxy's safe string representation
+#set $ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]
+#if str($refs) != 'None'
+#set $refs_selected = set(str($refs).split(","))
+## sort the selected refs by their order in the bam metadata
+#echo ','.join([r for r in $ref_list if r in refs_selected])
+#else
+#echo ','.join($ref_list)
+#end if
+</environment_variable>
+    </environment_variables>
+    <configfiles>
+        <configfile name="c1">#import re
+## need to extract ref names from Galaxy's safe string representation
+#set $ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]
+#if str($refs) != 'None'
+#set $refs_selected = set(str($refs).split(","))
+#set $ref_list = [r for r in $ref_list if r in refs_selected]
+#end if
+#set $elems = [{'name': '%s: %s' % ($input_bam.name, r), 'filename': 'split_bam.REF_%s.bam' % r, 'dbkey': str($input_bam.dbkey)} for r in $ref_list]
+#import json
+#echo json.dumps({'output_bams': {'elements': $elems}})</configfile>
+    </configfiles>
     <inputs>
         <param name="input_bam" type="data" format="bam" label="BAM dataset to split by reference"/>
         <param name="refs" type="select" optional="True" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" >
@@ -39,40 +57,44 @@
         </param>
     </inputs>
     <outputs>
-        <collection name="output_bams" type="list" label="${input_bam.name} Split List">
-            <discover_datasets pattern="split_bam\d*(?P&lt;designation&gt;.+)\.bam" ext="bam" directory="outputs" visible="false"/>
+        <collection name="output_bams" type="list">
+            <discover_datasets from_provided_metadata="true" ext="bam" visible="false" />
         </collection>
     </outputs>
     <tests>
         <test>
-            <param name="input_bam" ftype="bam" value="bamtools-input2.bam"/>
-            <output_collection name="output_bams"  type="list">
-                <element name="bamtools_input2.chr1"  file="bamtools_input2.chr1" compare="sim_size" delta="500" />
+            <param name="input_bam" ftype="bam" value="bamtools-input2.bam" />
+            <output_collection name="output_bams" type="list" count="25">
+                <element name="bamtools-input2.bam: chrM"  file="bamtools_input2.header.bam" ftype="bam" />
+                <element name="bamtools-input2.bam: chr1"  file="bamtools_input2.chr1" ftype="bam" />
+                <element name="bamtools-input2.bam: chr21"  file="bamtools_input2.chr21.bam" ftype="bam" />
+            </output_collection>
+        </test>
+        <test>
+            <param name="input_bam" ftype="bam" value="bamtools-input2.bam" />
+            <param name="refs" value="chrM,chr1,chr21" />
+            <output_collection name="output_bams" type="list" count="3">
+                <element name="bamtools-input2.bam: chrM"  file="bamtools_input2.header.bam" ftype="bam" />
+                <element name="bamtools-input2.bam: chr1"  file="bamtools_input2.chr1" ftype="bam" />
+                <element name="bamtools-input2.bam: chr21"  file="bamtools_input2.chr21.bam" ftype="bam" />
             </output_collection>
         </test>
     </tests>
     <help>
 **What is does**

-BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
-
------
-
-.. class:: warningmark
-
-**DANGER: Multiple Outputs**
-
-As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing.
+BAMTools split is a utility for splitting BAM files. It is based on the BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).

 -----

 **How it works**

-Split alignments by reference name into a dataset list collection.  The collection will be in the same order as the input BAM references.
+Split alignments by reference name into a dataset list collection. The collection will be in the same order as the input BAM references and will consist of as many elements as there are references selected or listed in the input BAM header.

-In cases of unfinished genomes with very large number of reference sequences (scaffolds)
-it can generate thousands (if not millions) of output datasets.
+.. class:: warningmark

+   In cases of unfinished genomes with very large number of reference sequences (scaffolds)
+   this could generate thousands (if not millions) of output datasets.

 -----
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools_split_tag.xml	Wed Jan 11 12:03:53 2023 +0000
@@ -0,0 +1,71 @@
+<tool id="bamtools_split_tag" name="Split BAM by Tag" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description>into dataset list collection</description>
+    <macros>
+          <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+        <![CDATA[
+            #import re
+            #set $name = 'split_bam' + $re.sub('\W','_',$re.sub('\.bam$','',$input_bam.name))
+            ln -s '${input_bam}' 'localbam.bam' &&
+            ln -s '${input_bam.metadata.bam_index}' 'localbam.bam.bai' &&
+            bamtools split -tag $tag_name
+            -in 'localbam.bam'
+            -stub '$name'
+        ]]>
+    </command>
+    <inputs>
+        <param name="input_bam" type="data" format="bam" label="BAM dataset to split by tag value"/>
+        <param name="tag_name" type="text" value="NM" label="Enter tag name here" help="For example, to split on NM tag enter &quot;NM&quot;"/>
+    </inputs>
+    <outputs>
+        <collection name="output_bams" type="list" label="${input_bam.name} Split List">
+            <discover_datasets pattern="split_bam(?P&lt;designation&gt;.+)\.bam" ext="bam" visible="false"/>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_bam" ftype="bam" value="bamtools-input-tags.bam"/>
+            <param name="tag_name" value="XG"/>
+            <output_collection name="output_bams"  type="list">
+                <element name="bamtools_input_tags.TAG_XG_V"  file="bamtools_input_tags.TAG_XG_V.bam" compare="sim_size" delta="500" />
+            </output_collection>
+        </test>
+
+    </tests>
+    <help>
+**What is does**
+
+BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+.. class:: warningmark
+
+**DANGER: Multiple Outputs**
+
+As described below, splitting a BAM dataset(s) on tag value can produce very large numbers of outputs. Read below and know what you are doing.
+
+-----
+
+**How it works**
+
+Split alignments by tag name into a dataset list collection.
+
+This can generate a huge number of output datasets depending on the number of distinct values of the TAG.
+
+
+-----
+
+.. class:: infomark
+
+**More information**
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btr174</citation>
+    </citations>
+</tool>
--- a/macros.xml	Sat Nov 27 10:03:33 2021 +0000
+++ b/macros.xml	Wed Jan 11 12:03:53 2023 +0000
@@ -1,11 +1,11 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">2.5.1</token>
+    <token name="@TOOL_VERSION@">2.5.2</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">bamtools</requirement>
-            <requirement type="package" version="1.4.1">samtools</requirement>
+            <yield />
         </requirements>
     </xml>
     <xml name="citations">
@@ -13,4 +13,4 @@
             <citation type="doi">10.1093/bioinformatics/btr174</citation>
         </citations>
     </xml>
-</macros>
\ No newline at end of file
+</macros>
Binary file test-data/bamtools_input2.chr21.bam has changed
Binary file test-data/bamtools_input2.header.bam has changed