view abyss-pe.xml @ 1:0a5c7992b1ac draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/abyss commit 9d484642914b581ce35f254466b849d3c4c2c06c-dirty
author iuc
date Wed, 01 Mar 2017 15:17:12 -0500
parents f048033da666
children 2b89471cca20
line wrap: on
line source

<tool id="abyss-pe" name="ABySS" version="2.0.1.0">
    <description>de novo sequence assembler</description>
    <macros>
        <xml name="reads_conditional">
            <conditional name="reads">
                <param name="reads_selector" type="select" label="Type of paired-end datasets">
                    <option value="paired">2 separate datasets</option>
                    <option value="paired_collection">1 paired dataset collection</option>
                    <option value="paired_il">1 dataset of interleaved reads</option>
                </param>
                <when value="paired">
                    <param name="reads1" type="data" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired-end reads 1" />
                    <param name="reads2" type="data" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired-end reads 2" />
                </when>
                <when value="paired_collection">
                    <param name="reads_coll" type="data_collection" collection_type="paired" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired-end reads collection" />
                </when>
                <when value="paired_il">
                    <param name="reads_il" type="data" format="fasta,fastq,fastqsanger,fastqillumina" label="Interleaved paired-end reads" />
                </when>
            </conditional>
        </xml>
    </macros>
    <requirements>
        <requirement type="package" version="2.0.1">abyss</requirement>
    </requirements>
    <stdio>
        <!-- Anything other than zero is an error -->
        <exit_code range="1:" />
        <!-- In case the return code has not been set propery check stderr too -->
        <regex match="Error:" />
        <regex match="Exception:" />
    </stdio>
    <version_command>ABYSS --version | head -n 1</version_command>
    <command>
        abyss-pe
            name=abyss
            j=\${GALAXY_SLOTS:-1}
            k=$k

            #if str($K)
              K=$K
            #end if
            #if str($q)
              q=$q
            #end if
            #if str($Q)
              Q=$Q
            #end if
            #if str($e)
              e=$e
            #end if
            #if str($E)
              E=$E
            #end if
            #if str($t)
              t=$t
            #end if
            #if str($c)
              c=$c
            #end if
            #if str($b)
              b=$b
            #end if
            #if $SS
              SS=--SS
            #end if
            #if str($m)
              m=$m
            #end if
            #if str($p)
              p=$p
            #end if
            #if str($a)
              a=$a
            #end if
            #if str($l)
              l=$l
            #end if
            #if str($s)
              s=$s
            #end if
            #if str($n)
              n=$n
            #end if
            #if str($d)
              d=$d
            #end if
            #if str($S)
              S=$S
            #end if
            #if str($N)
              N=$N
            #end if
            #if $lib_repeat
              lib='
              #for $i in range(len($lib_repeat))
                lib$i
              #end for
              '
              #for $i, $v in enumerate($lib_repeat)
                #if $v.reads.reads_selector == 'paired'
                  lib${i}='${v.reads.reads1} ${v.reads.reads2}'
                #elif $v.reads.reads_selector == 'paired_collection'
                  lib${i}='${v.reads.reads_coll.forward} ${v.reads.reads_coll.reverse}'
                #else
                  lib${i}='${v.reads.reads_il}'
                #end if
              #end for
            #end if
            #if str($se_reads) != 'None'
              se='
              #for $v in $se_reads
                $v
              #end for
              '
            #end if
            #if $mp_repeat
              mp='
              #for $i in range(len($mp_repeat))
                mp$i
              #end for
              '
              #for $i, $v in enumerate($mp_repeat)
                #if $v.reads.reads_selector == 'paired'
                  mp${i}='${v.reads.reads1} ${v.reads.reads2}'
                #elif $v.reads.reads_selector == 'paired_collection'
                  mp${i}='${v.reads.reads_coll.forward} ${v.reads.reads_coll.reverse}'
                #else
                  mp${i}='${v.reads.reads_il}'
                #end if
              #end for
            #end if
            #if str($long_seqs) != 'None'
              long='
              #for $i in range(len($long_seqs))
                long$i
              #end for
              '
              #for $i, $v in enumerate($long_seqs)
                long${i}='$v'
              #end for
            #end if
    </command>

    <inputs>
        <repeat name="lib_repeat" title="Paired-end library" help="(lib)">
            <expand macro="reads_conditional" />
        </repeat>
        <param name="se_reads" type="data" multiple="true" optional="true" format="fasta,fastq,fastqsanger,fastqillumina" label="Single-end reads" help="(se)" />
        <repeat name="mp_repeat" title="Mate-pair library" help="The mate-pair libraries are used only for scaffolding and do not contribute towards the consensus sequence (mp)">
            <expand macro="reads_conditional" />
        </repeat>
        <param name="long_seqs" type="data" multiple="true" optional="true" format="fasta,fastq,fastqsanger,fastqillumina" label="Long sequences" help="Long sequence libraries are used only for rescaffolding and do not contribute towards the consensus sequence (long)" />
        <param name="k" type="integer" value="41" label="K-mer length (in bp)" help="The length of a k-mer (when -K is not set) or the span of a k-mer pair (when K is set) (k)" />
        <param name="K" type="integer" value="" optional="true" label="The length (in bp) of a single k-mer in a k-mer pair" help="Optional (K)" />
        <param name="q" type="integer" value="3" max="40" optional="true" label="Minimum base quality when trimming" help="Trim bases from the ends of reads whose quality is less than this value (q)" />
        <param name="Q" type="integer" value="0" max="40" optional="true" label="Minimum base quality" help="Mask all bases of reads whose quality is less than this value as 'N' (Q)" />
        <param name="e" type="integer" value="" min="0" optional="true" label="Minimum erosion k-mer coverage" help="Erode bases at the ends of blunt contigs with coverage less than this value. Defaults to round(sqrt(median)) (e)" />
        <param name="E" type="integer" value="" min="0" optional="true" label="Minimum erosion k-mer coverage per strand" help="Erode bases at the ends of blunt contigs with coverage less than this value on either strand. Defaults to 1 if sqrt(median) &gt; 2 else 0 (E)" />
        <param name="t" type="integer" value="" optional="true" label="Maximum length of blunt contigs to trim" help="Defaults to the value of k (t)" />
        <param name="c" type="float" value="" optional="true" label="Minimum mean k-mer coverage of a unitig" help="Defaults to sqrt(median) (c)" />
        <param name="b" type="integer" value="" optional="true" label="Maximum length of a bubble (in bp)" help="abyss-pe has two bubble popping stages. The default limits are 3*k bp for ABYSS and 10000 bp for PopBubbles (b)" />
        <param name="SS" type="boolean" label="Assemble in strand-specific mode" help="Requires that all libraries are strand-specific RNA-Seq libraries. Assumes that the first read in a read pair is reveresed with respect to the transcripts sequenced (SS)" />
        <param name="m" type="integer" value="50" min="0" optional="true" label="Minimum overlap of two unitigs (in bp)" help="(m)" />
        <param name="p" type="float" value="0.9" optional="true" label="Minimum sequence identity of a bubble" help="(p)" />
        <param name="a" type="integer" value="2" min="0" optional="true" label="Maximum number of branches of a bubble" help="(a)" />
        <param name="l" type="integer" value="" min="1"  optional="true" label="Minimum alignment length of a read (in bp)" help="Defaults to the value of k (l)" />
        <param name="s" type="integer" value="200" min="0" optional="true" label="Minimum unitig length required for building contigs (in bp)" help="The seed length should be at least twice the value of k. If more sequence is assembled than the expected genome size, try increasing this value (s)" />
        <param name="n" type="integer" value="10" min="0" optional="true" label="Minimum number of pairs required for building contigs" help="(n)" />
        <param name="d" type="integer" value="6" min="0" optional="true" label="Allowable error of a distance estimate (in bp)" help="(d)" />
        <param name="S" type="integer" value="" min="0" optional="true" label="Minimum contig length required for building scaffolds (in bp)" help="Defaults to the value of s (S)" />
        <param name="N" type="integer" value="" min="0" optional="true" label="Minimum number of pairs required for building scaffolds" help="Defaults to the value of n (N)" />
    </inputs>

    <outputs>
        <data name="unitigs" format="fasta" label="${tool.name} on ${on_string}: unitigs" from_work_dir="abyss-unitigs.fa" />
        <data name="contigs_outfile" format="fasta" label="${tool.name} on ${on_string}: contigs" from_work_dir="abyss-contigs.fa">
            <filter>lib_repeat</filter>
        </data>
        <data name="scaffolds" format="fasta" label="${tool.name} on ${on_string}: scaffolds" from_work_dir="abyss-scaffolds.fa">
            <filter>lib_repeat or mp_repeat</filter>
        </data>
        <data name="long_scaffolds" format="fasta" label="${tool.name} on ${on_string}: scaffolds" from_work_dir="abyss-long-scaffs.fa">
            <filter>long_seqs</filter>
        </data>
        <data name="bubbles" format="fasta" label="${tool.name} on ${on_string}: bubbles" from_work_dir="abyss-bubbles.fa" />
        <data name="indels" format="fasta" label="${tool.name} on ${on_string}: indels" from_work_dir="abyss-indel.fa" />
        <data name="coverage_histogram_outfile" format="tabular" label="${tool.name} on ${on_string}: coverage histogram" from_work_dir="coverage.hist" />
        <data name="stats" format="tabular" label="${tool.name} on ${on_string}: stats" from_work_dir="abyss-stats.tab" />
    </outputs>

    <tests>
        <test>
            <repeat name="lib_repeat">
                <conditional name="reads">
                    <param name="reads_selector" value="paired" />
                    <param name="reads1" ftype="fastqsanger" value="assembly_sample-R1.fastq" />
                    <param name="reads2" ftype="fastqsanger" value="assembly_sample-R2.fastq" />
                </conditional>
            </repeat>
            <param name="k" value="50" />
            <output name="unitigs" file="abyss-unitigs1.fa" />
            <output name="contigs_outfile" file="abyss-contigs1.fa" />
            <output name="scaffolds" file="abyss-scaffolds1.fa" />
            <output name="bubbles" file="empty_file.fasta" />
            <output name="indels" file="empty_file.fasta" />
            <output name="coverage_histogram_outfile" file="coverage1.hist" />
            <output name="stats" file="abyss-stats1.tab" />
        </test>
        <test>
            <param name="se_reads" ftype="fastqsanger" value="assembly_sample-R1.fastq,assembly_sample-R2.fastq" />
            <param name="k" value="50" />
            <output name="unitigs" file="abyss-unitigs2.fa" />
            <output name="bubbles" file="empty_file.fasta" />
            <output name="indels" file="empty_file.fasta" />
            <output name="coverage_histogram_outfile" file="coverage2.hist" />
            <output name="stats" file="abyss-stats2.tab" />
        </test>
        <test>
            <param name="se_reads" ftype="fastqsanger" value="assembly_sample-R1.fastq,assembly_sample-R2.fastq" />
            <param name="long_seqs" ftype="fastqsanger" value="assembly_sample-R2.fastq" />
            <param name="k" value="50" />
            <output name="unitigs" file="abyss-unitigs3.fa" />
            <output name="long_scaffolds" file="abyss-long-scaffolds3.fa" />
            <output name="bubbles" file="empty_file.fasta" />
            <output name="indels" file="empty_file.fasta" />
            <output name="coverage_histogram_outfile" file="coverage3.hist" />
            <output name="stats" file="abyss-stats3.tab" />
        </test>
    </tests>
    <help>
**What it does**

`ABySS`_ is a de novo sequence assembler intended for short paired-end reads and large genomes.

**Input**

Input files should be FASTA or FASTQ files.

For paired reads, a pair of reads must be named with the suffixes /1 and /2 to identify the first and second read, or the reads may be named identically. The paired reads may be in separate files or interleaved in a single file.

At least a paired-end library or a single-end library must be provided.

Contigs are generated only if at least a paired-end library is provided. Scaffolds are generated only if at least a paired-end library or a mate-pair library is provided.

**Description**

This tool performs the complete ABySS pipeline using abyss-pe, described in https://github.com/bcgsc/abyss/blob/master/doc/flowchart.pdf .

**License and citation**

This Galaxy tool is Copyright © 2015-2016 `Earlham Institute`_ and is released under the `MIT license`_.

.. _Earlham Institute: http://www.earlham.ac.uk/
.. _MIT license: https://opensource.org/licenses/MIT

You can use this tool only if you agree to the license terms of: `ABySS`_.

.. _ABySS: http://www.bcgsc.ca/platform/bioinfo/software/abyss
    </help>
    <citations>
        <citation type="doi">10.1101/gr.089532.108</citation>
    </citations>
</tool>