Mercurial > repos > iuc > abyss
view abyss-pe.xml @ 11:7547fcab5d19 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/abyss commit dff6775ae41cbd3da949bbc57c4f79a11cd56ea6
author | iuc |
---|---|
date | Tue, 09 May 2023 10:13:25 +0000 |
parents | 8f1b150a2487 |
children | a929ce596a34 |
line wrap: on
line source
<tool id="abyss-pe" name="ABySS" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> <description>de novo sequence assembler</description> <macros> <token name="@TOOL_VERSION@">2.3.6</token> <token name="@VERSION_SUFFIX@">0</token> <xml name="reads_conditional"> <conditional name="reads"> <param name="reads_selector" type="select" label="Type of paired-end datasets"> <option value="paired">2 separate datasets</option> <option value="paired_collection">1 paired dataset collection</option> <option value="paired_il">1 dataset of interleaved reads</option> </param> <when value="paired"> <param name="reads1" type="data" format="fasta,fastq,fastq.gz" label="Paired-end reads 1" /> <param name="reads2" type="data" format="fasta,fastq,fastq.gz" label="Paired-end reads 2" /> </when> <when value="paired_collection"> <param name="reads_coll" type="data_collection" collection_type="paired" format="fasta,fastq,fastqsanger,fastqillumina" label="Paired-end reads collection" /> </when> <when value="paired_il"> <param name="reads_il" type="data" format="fasta,fastq,fastq.gz" label="Interleaved paired-end reads" /> </when> </conditional> </xml> </macros> <xrefs> <xref type="bio.tools">abyss</xref> </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">abyss</requirement> <requirement type="package" version="0.7.17">bwa</requirement> </requirements> <stdio> <!-- Anything other than zero is an error --> <exit_code range="1:" /> <!-- In case the return code has not been set propery check stderr too --> <regex match="Error:" /> <regex match="Exception:" /> </stdio> <version_command>ABYSS --version | head -n 1</version_command> <command><![CDATA[ #def get_link_fname($data, $prefix, $index) ## Make a name for symlinked files including extension #return ''.join([str($prefix), '_', str($index), '.', str($data.ext)]) #end def ## Go through all inputs and create symbolic links #for $i, $v in enumerate($lib_repeat) #if $v.reads.reads_selector == 'paired' ln -s $v.reads.reads1 $get_link_fname($v.reads.reads1, 'lib_reads1', $i) && ln -s $v.reads.reads2 $get_link_fname($v.reads.reads2, 'lib_reads2', $i) && #elif $v.reads.reads_selector == 'paired_collection' ln -s $v.reads.reads_coll.forward $get_link_fname($v.reads.reads_coll.forward, 'lib_reads_coll_forward', $i) && ln -s $v.reads.reads_coll.reverse $get_link_fname($v.reads.reads_coll.reverse, 'lib_reads_coll_reverse', $i) && #else ln -s $v.reads.reads_il $get_link_fname($v.reads.reads_il, 'lib_reads_il', $i) && #end if #end for #for $i, $v in enumerate($mp_repeat) #if $v.reads.reads_selector == 'paired' ln -s $v.reads.reads1 $get_link_fname($v.reads.reads1, 'mp_reads1', $i) && ln -s $v.reads.reads2 $get_link_fname($v.reads.reads2, 'mp_reads2', $i) && #elif $v.reads.reads_selector == 'paired_collection' ln -s $v.reads.reads_coll.forward $get_link_fname($v.reads.reads_coll.forward, 'mp_reads_coll_forward', $i) && ln -s $v.reads.reads_coll.reverse $get_link_fname($v.reads.reads_coll.reverse, 'mp_reads_coll_reverse', $i) && #else ln -s $v.reads.reads_il $get_link_fname($v.reads.reads_il, 'mp_reads_il', $i) && #end if #end for #if str($se_reads) != 'None' #for $i, $v in enumerate($se_reads) ln -s $v $get_link_fname($v, 'se_reads', $i) && #end for #end if #if str($long_seqs) != 'None' #for $i, $v in enumerate($long_seqs) ln -s $v $get_link_fname($v, 'long_seqs', $i) && #end for #end if abyss-pe name=abyss j=\${GALAXY_SLOTS:-1} B=\${GALAXY_MEMORY_MB:-2048}M k=$k #if str($K) K=$K #end if #if str($q) q=$q #end if #if str($Q) Q=$Q #end if #if str($e) e=$e #end if #if str($E) E=$E #end if #if str($t) t=$t #end if #if str($c) c=$c #end if #if str($b) b=$b #end if #if $SS SS=--SS #end if #if str($m) m=$m #end if #if str($p) p=$p #end if #if str($a) a=$a #end if #if str($l) l=$l #end if #if str($s) s=$s #end if #if str($n) n=$n #end if #if str($d) d=$d #end if #if $S S='$S' #end if #if $N N='$N' #end if #if $lib_repeat lib='${' '.join(['lib'+str($i) for $i in range(len($lib_repeat))])}' #for $i, $v in enumerate($lib_repeat) #if $v.reads.reads_selector == 'paired' lib${i}='${get_link_fname($v.reads.reads1, "lib_reads1", $i)} ${get_link_fname($v.reads.reads2, "lib_reads2", $i)}' #elif $v.reads.reads_selector == 'paired_collection' lib${i}='$get_link_fname($v.reads.reads_coll.forward, "lib_reads_coll_forward", $i)} ${get_link_fname($v.reads.reads_coll.reverse, "lib_reads_coll_reverse", $i)}' #else lib${i}='${get_link_fname($v.reads.reads_il, "lib_reads_il", $i)}' #end if #end for #end if #if str($se_reads) != 'None' se='${' '.join([$get_link_fname($v, "se_reads", $i) for $i, $v in enumerate($se_reads)])}' #end if #if $mp_repeat mp='${' '.join(['mp'+str($i) for $i in range(len($mp_repeat))])}' #for $i, $v in enumerate($mp_repeat) #if $v.reads.reads_selector == 'paired' mp${i}='${get_link_fname($v.reads.reads1, "mp_reads1", $i)} ${get_link_fname($v.reads.reads2, "mp_reads2", $i)}' #elif $v.reads.reads_selector == 'paired_collection' mp${i}='$get_link_fname($v.reads.reads_coll.forward, "mp_reads_coll_forward", $i)} ${get_link_fname($v.reads.reads_coll.reverse, "mp_reads_coll_reverse", $i)}' #else mp${i}='${get_link_fname($v.reads.reads_il, "mp_reads_il", $i)}' #end if #end for #end if #if str($long_seqs) != 'None' long='${' '.join(['long'+str($i) for $i in range(len($long_seqs))])}' #for $i, $v in enumerate($long_seqs) long$i='${get_link_fname($v, "long_seqs", $i)}' #end for #end if ]]></command> <inputs> <repeat name="lib_repeat" title="Paired-end library" help="(lib)"> <expand macro="reads_conditional" /> </repeat> <param name="se_reads" argument="se" type="data" multiple="true" optional="true" format="fasta,fastq,fastq.gz" label="Single-end reads" /> <repeat name="mp_repeat" title="Mate-pair library" help="The mate-pair libraries are used only for scaffolding and do not contribute towards the consensus sequence (mp)"> <expand macro="reads_conditional" /> </repeat> <param name="long_seqs" argument="long" type="data" multiple="true" optional="true" format="fasta,fastq,fastq.gz" label="Long sequences" help="Long sequence libraries are used only for rescaffolding and do not contribute towards the consensus sequence" /> <param argument="k" type="integer" value="41" label="K-mer length (in bp)" help="The length of a k-mer (when -K is not set) or the span of a k-mer pair (when K is set)" /> <param argument="K" type="integer" value="" optional="true" label="The length (in bp) of a single k-mer in a k-mer pair" help="Optional" /> <param argument="q" type="integer" value="3" max="40" optional="true" label="Minimum base quality when trimming" help="Trim bases from the ends of reads whose quality is less than this value" /> <param argument="Q" type="integer" value="0" max="40" optional="true" label="Minimum base quality" help="Mask all bases of reads whose quality is less than this value as 'N'" /> <param argument="e" type="integer" value="" min="0" optional="true" label="Minimum erosion k-mer coverage" help="Erode bases at the ends of blunt contigs with coverage less than this value. Defaults to round(sqrt(median))" /> <param argument="E" type="integer" value="" min="0" optional="true" label="Minimum erosion k-mer coverage per strand" help="Erode bases at the ends of blunt contigs with coverage less than this value on either strand. Defaults to 1 if sqrt(median) > 2 else 0" /> <param argument="t" type="integer" value="" optional="true" label="Maximum length of blunt contigs to trim" help="Defaults to the value of k" /> <param argument="c" type="float" value="" optional="true" label="Minimum mean k-mer coverage of a unitig" help="Defaults to sqrt(median)" /> <param argument="b" type="integer" value="" optional="true" label="Maximum length of a bubble (in bp)" help="abyss-pe has two bubble popping stages. The default limits are 3*k bp for ABYSS and 10000 bp for PopBubbles" /> <param argument="SS" type="boolean" label="Assemble in strand-specific mode" help="Requires that all libraries are strand-specific RNA-Seq libraries. Assumes that the first read in a read pair is reveresed with respect to the transcripts sequenced" /> <param argument="m" type="integer" value="" min="0" optional="true" label="Minimum overlap of two unitigs (in bp)" help="Defaults to 0 (interpreted as k - 1) if mate-pair libraries are provided or if k<=50, otherwise 50" /> <param argument="p" type="float" value="0.9" optional="true" label="Minimum sequence identity of a bubble" /> <param argument="a" type="integer" value="2" min="0" optional="true" label="Maximum number of branches of a bubble" /> <param argument="l" type="integer" value="" min="1" optional="true" label="Minimum alignment length of a read (in bp)" help="Defaults to the value of k" /> <param argument="s" type="integer" value="200" min="0" optional="true" label="Minimum unitig length required for building contigs (in bp)" help="The seed length should be at least twice the value of k. If more sequence is assembled than the expected genome size, try increasing this value" /> <param argument="n" type="integer" value="10" min="0" optional="true" label="Minimum number of pairs required for building contigs" /> <param argument="d" type="integer" value="6" min="0" optional="true" label="Allowable error of a distance estimate (in bp)" /> <param argument="S" type="text" value="" label="Minimum contig size required for building scaffolds (in bp)" help="It can be an interval. Defaults to 100-5000"> <validator type="regex" message="Must be a an integer or an interval">^([0-9]+(-[0-9]+)?)?$</validator> </param> <param argument="N" type="text" value="" label="Minimum number of pairs required for building scaffolds" help="It can be an interval. Defaults to 15-20"> <validator type="regex" message="Must be a an integer or an interval">^([0-9]+(-[0-9]+)?)?$</validator> </param> </inputs> <outputs> <data name="unitigs" format="fasta" label="${tool.name} on ${on_string}: unitigs" from_work_dir="abyss-unitigs.fa" /> <data name="contigs_outfile" format="fasta" label="${tool.name} on ${on_string}: contigs" from_work_dir="abyss-contigs.fa"> <filter>lib_repeat</filter> </data> <data name="scaffolds" format="fasta" label="${tool.name} on ${on_string}: scaffolds" from_work_dir="abyss-scaffolds.fa"> <filter>lib_repeat or mp_repeat</filter> </data> <data name="long_scaffolds" format="fasta" label="${tool.name} on ${on_string}: long scaffolds" from_work_dir="abyss-long-scaffs.fa"> <filter>long_seqs</filter> </data> <data name="indels" format="fasta" label="${tool.name} on ${on_string}: indels" from_work_dir="abyss-indel.fa" /> <data name="stats" format="tabular" label="${tool.name} on ${on_string}: stats" from_work_dir="abyss-stats.tab" /> </outputs> <tests> <test> <repeat name="lib_repeat"> <conditional name="reads"> <param name="reads_selector" value="paired" /> <param name="reads1" ftype="fastqsanger.gz" value="assembly_sample-R1.fastq.gz" /> <param name="reads2" ftype="fastqsanger.gz" value="assembly_sample-R2.fastq.gz" /> </conditional> </repeat> <param name="k" value="50" /> <output name="unitigs" file="abyss-unitigs1.fa" /> <output name="contigs_outfile" file="abyss-contigs1.fa" /> <output name="scaffolds" file="abyss-scaffolds1.fa" /> <output name="indels" file="empty_file.fasta" /> <output name="stats" file="abyss-stats1.tab" /> </test> <test> <param name="se_reads" ftype="fastq.gz" value="assembly_sample-R1.fastq.gz,assembly_sample-R2.fastq.gz" /> <param name="k" value="50" /> <output name="unitigs" file="abyss-unitigs2.fa" /> <output name="indels" file="empty_file.fasta" /> <output name="stats" file="abyss-stats2.tab" /> </test> <test> <param name="se_reads" ftype="fasta" value="assembly_sample-R1.fasta,assembly_sample-R2.fasta" /> <param name="long_seqs" ftype="fasta" value="assembly_sample-R2.fasta" /> <param name="k" value="50" /> <output name="unitigs" file="abyss-unitigs3.fa" /> <output name="long_scaffolds" file="abyss-long-scaffolds3.fa" /> <output name="indels" file="empty_file.fasta" /> <output name="stats" file="abyss-stats3.tab" /> </test> </tests> <help><![CDATA[ **What it does** `ABySS`_ is a de novo sequence assembler intended for short paired-end reads and large genomes. **Input** Input files should be FASTA or FASTQ files. For paired reads, a pair of reads must be named with the suffixes /1 and /2 to identify the first and second read, or the reads may be named identically. The paired reads may be in separate files or interleaved in a single file. At least a paired-end library or a single-end library must be provided. Contigs are generated only if at least a paired-end library is provided. Scaffolds are generated only if at least a paired-end library or a mate-pair library is provided. **Description** This tool performs the complete ABySS pipeline using abyss-pe, described in https://github.com/bcgsc/abyss/blob/master/doc/flowchart.pdf . **License and citation** This Galaxy tool is Copyright © 2015-2021 `Earlham Institute`_ and is released under the `MIT license`_. .. _Earlham Institute: http://www.earlham.ac.uk/ .. _MIT license: https://opensource.org/licenses/MIT You can use this tool only if you agree to the license terms of: `ABySS`_. .. _ABySS: https://github.com/bcgsc/abyss ]]></help> <citations> <citation type="doi">doi:10.1101/gr.214346.116</citation> <citation type="doi">10.1101/gr.089532.108</citation> </citations> </tool>