view bamtools_split_ref.xml @ 3:9b520009db81 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tool_collections/bamtools/bamtools_split commit 862f05b5053f55def8007194a702019418697cb5
author iuc
date Fri, 13 Jan 2023 12:17:49 +0000
parents 9dbf707bebb0
children 9b8feb118f9e
line wrap: on
line source

<tool id="bamtools_split_ref" name="Split BAM by reference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09">
    <description>into a dataset list collection</description>
    <macros>
          <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="1.16.1">samtools</requirement>
    </expand>
    <command><![CDATA[
        ln -s '${input_bam}' localbam.bam &&
        ln -s '${input_bam.metadata.bam_index}' 'localbam.bam.bai' &&
        samtools view -bH localbam.bam --no-PG -o header.bam &&
        bamtools split -reference
        -in localbam.bam
        -stub split_bam
        && (IFS=',';
          for i in \$REFS_FROM_BAM_METADATA;
          do FN=`printf "split_bam.REF_%s.bam" "\$i"`;
            if [ ! -f \$FN ]; then cp header.bam "\$FN"; fi;
          done)
        && cp '$c1' galaxy.json
    ]]></command>
    <environment_variables>
        <environment_variable name="REFS_FROM_BAM_METADATA">#import re
## need to extract ref names from Galaxy's safe string representation
#set $ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]
#if str($refs) != 'None'
#set $refs_selected = set(str($refs).split(","))
## sort the selected refs by their order in the bam metadata
#echo ','.join([r for r in $ref_list if r in refs_selected])
#else
#echo ','.join($ref_list)
#end if
</environment_variable>
    </environment_variables>
    <configfiles>
        <configfile name="c1">#import re
## need to extract ref names from Galaxy's safe string representation
#set $ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]
#if str($refs) != 'None'
#set $refs_selected = set(str($refs).split(","))
#set $ref_list = [r for r in $ref_list if r in refs_selected]
#end if
#set $elems = [{'name': '%s: %s' % ($input_bam.name, r), 'filename': 'split_bam.REF_%s.bam' % r, 'dbkey': str($input_bam.dbkey)} for r in $ref_list]
#import json
#echo json.dumps({'output_bams': {'elements': $elems}})</configfile>
    </configfiles>
    <inputs>
        <param name="input_bam" type="data" format="bam" label="BAM dataset to split by reference"/>
        <param name="refs" type="select" optional="True" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" >
            <help><![CDATA[Click and type in the box above to see options. You can select multiple entries.
                  If "No options available" is displayed, you need to re-detect metadata on the input dataset.
            ]]></help>
            <options>
                <filter type="data_meta" ref="input_bam" key="reference_names" />
            </options>
        </param>
    </inputs>
    <outputs>
        <collection name="output_bams" type="list">
            <discover_datasets from_provided_metadata="true" ext="bam" visible="false" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="input_bam" ftype="bam" value="bamtools-input2.bam" />
            <output_collection name="output_bams" type="list" count="25">
                <element name="bamtools-input2.bam: chrM"  file="bamtools_input2.header.bam" ftype="bam" />
                <element name="bamtools-input2.bam: chr1"  file="bamtools_input2.chr1" ftype="bam" />
                <element name="bamtools-input2.bam: chr21"  file="bamtools_input2.chr21.bam" ftype="bam" />
            </output_collection>
        </test>
        <test>
            <param name="input_bam" ftype="bam" value="bamtools-input2.bam" />
            <param name="refs" value="chrM,chr1,chr21" />
            <output_collection name="output_bams" type="list" count="3">
                <element name="bamtools-input2.bam: chrM"  file="bamtools_input2.header.bam" ftype="bam" />
                <element name="bamtools-input2.bam: chr1"  file="bamtools_input2.chr1" ftype="bam" />
                <element name="bamtools-input2.bam: chr21"  file="bamtools_input2.chr21.bam" ftype="bam" />
            </output_collection>
        </test>
    </tests>
    <help>
**What is does**

BAMTools split is a utility for splitting BAM files. It is based on the BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).

-----

**How it works**

Split alignments by reference name into a dataset list collection. The collection will be in the same order as the input BAM references and will consist of as many elements as there are references selected or listed in the input BAM header.

.. class:: warningmark

In cases of unfinished genomes with very large number of reference sequences (scaffolds) this could generate a collection with thousands (if not millions) of elements.

-----

.. class:: infomark

**More information**

Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki

    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btr174</citation>
    </citations>
</tool>