view tools/samtools_bam2fq/samtools_bam2fq.xml @ 0:c961d16801e4 draft default tip

Uploaded v0.0.2
author peterjc
date Tue, 04 Nov 2014 07:15:50 -0500
parents
children
line wrap: on
line source

<tool id="samtools_bam2fq" name="Convert BAM to FASTQ" version="0.0.2">
    <description>samtools bam2fq</description>
    <requirements>
        <requirement type="binary">samtools</requirement>
        <requirement type="package" version="1.1">samtools</requirement>
    </requirements>
    <version_command>samtools 2&gt;&amp;1 | grep -i "Version:"</version_command>
    <command>
        #if $action_mode.mode == "pairs":
            ## Sort by name for pair-aware output (should give nice interlaced FASTQ)
            ## Galaxy has a tendancy to automatically apply co-ordindate sorting,
            ## so just do this every time. If it was name sorted, pay an IO overhead.
            ## Note requiring -T is samtools issue 295
            samtools sort -n -O bam -T TEMP_SORT "$input_bam" | samtools bam2fq -s "$singletons_fastq" - &gt; "$pairs_fastq"
        #else
            ## Naive conversion using order in the input file
            samtools bam2fq $suffices $orig_qual "$input_bam" &gt; "$out_fastq"
        #end if
    </command>
    <inputs>
        <!-- Unlike samtools 0.1.x, samtools 1.1 will autodetect SAM vs BAM -->
        <param name="input_bam" type="data" format="bam,sam" label="Input SAM/BAM file" />
        <param name="suffices" type="boolean" label="Add /1 and /2 suffices to paired reads?"
	       truevalue="" falsevalue="-n" checked="true" />
        <param name="orig_qual" type="boolean" label="Use original qualities (OQ tags) if present?"
               truevalue="-O" falsevalue="" checked="false" />
        <!-- Using a condition here to allow different output files; default to paired mode -->
        <conditional name="action_mode">
            <param name="mode" type="select" label="Mode of action">
                <option value="pairs" selected="true">Sort by name, then divide into paired and singletons (two FASTQ files)</option>
                <option value="naive">No pre-sorting, all reads in a single FASTQ file</option>
            </param>
        </conditional>
    </inputs>
    <outputs>
        <data name="pairs_fastq" format="fastqsanger" label="$input_bam.name (bam2fq pairs)">
	      <filter>(action_mode['mode'] == 'pairs')</filter>
        </data>
        <data name="singletons_fastq" format="fastqsanger" label="$input_bam.name (bam2fq singletons)">
              <filter>(action_mode['mode'] == 'pairs')</filter>
        </data>
        <data name="out_fastq" format="fastqsanger" label="$input_bam.name (bam2fq)">
            <filter>(action_mode['mode'] == 'naive')</filter>
        </data>
    </outputs>
    <stdio>
        <!-- Assume anything other than zero is an error -->
        <exit_code range="1:" />
        <exit_code range=":-1" />
    </stdio>
    <tests>
        <test>
            <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
            <param name="suffices" value="true" />
            <param name="orig_qual" value="false" />
            <param name="mode" value="naive" />
            <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
        </test>
        <test>
            <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
            <param name="suffices" value="true" />
            <param name="orig_qual" value="true" />
            <param name="mode" value="naive" />
            <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
        </test>
        <test>
            <param name="input_bam" value="sam_spec_padded.sam" ftype="sam" />
            <param name="mode" value="naive" />
            <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
        </test>
        <test>
            <param name="input_bam" value="sam_spec_padded.depad.bam" ftype="bam" />
            <param name="mode" value="naive" />
            <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
        </test>
        <test>
            <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
            <param name="suffices" value="false"/>
            <param name="mode" value="naive" />
            <output name="out_fastq" file="sam_spec_padded.bam2fq_no_suf.fastq" ftype="fastqsanger" />
        </test>
        <test>
            <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
            <param name="suffices" value="true" />
            <param name="orig_qual" value="false" />
            <param name="mode" value="pairs" />
            <output name="pairs_fastq" file="sam_spec_padded.bam2fq_pairs.fastq" ftype="fastqsanger" />
            <output name="singletons_fastq" file="sam_spec_padded.bam2fq_singles.fastq" ftype="fastqsanger" />
        </test>
        <test>
            <param name="input_bam" value="sam_spec_padded.sam" ftype="sam" />
            <param name="suffices" value="true" />
            <param name="orig_qual" value="false" />
            <param name="mode" value="pairs" />
            <output name="pairs_fastq" file="sam_spec_padded.bam2fq_pairs.fastq" ftype="fastqsanger" />
            <output name="singletons_fastq" file="sam_spec_padded.bam2fq_singles.fastq" ftype="fastqsanger" />
        </test>
    </tests>
    <help>
**What it does**

This tool runs the ``samtools bam2fq`` command in the SAMtools toolkit.

By default this will pre-sort your SAM/BAM file by read name and split your
reads into an interlaced FASTQ file for paired reads, and a separate FASTQ
file for singleton reads. A naive conversion is also offered which gives a
single FASTQ file with the reads ordered as in the input SAM/BAM file.

It is quite common to wish to remap high-throughput sequencing data. If you
only have the mapped reads in SAM/BAM format, this tool can "unmap" them to
recover FASTQ format reads to input into an alternative mapping tool.

BAM files can hold both aligned reads and unaligned reads, so another example
usage would be to filter your BAM file to get only the unaligned reads, and
turn those back in FASTQ using this tool ready for *de novo* assembly, or to
try mapping against another reference sequence.


**Citation**

If you use this Galaxy tool in work leading to a scientific publication please
cite:

Heng Li et al (2009). The Sequence Alignment/Map format and SAMtools.
Bioinformatics 25(16), 2078-9.
http://dx.doi.org/10.1093/bioinformatics/btp352

Peter J.A. Cock (2014), Galaxy wrapper for the samtools bam2fq command
http://toolshed.g2.bx.psu.edu/view/peterjc/samtools_bam2fq

This wrapper is available to install into other Galaxy Instances via the Galaxy
Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/samtools_bam2fq
    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btp352</citation>
    </citations>
</tool>