Mercurial > repos > iuc > links
view links.xml @ 0:7acd9ade2dd1 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/links commit f15e5bb67b7a212ecd8c98e816c80e22d3b7a0cb"
author | iuc |
---|---|
date | Wed, 23 Feb 2022 08:25:55 +0000 |
parents | |
children |
line wrap: on
line source
<tool id="links" name="LINKS" version="@TOOL_VERSION@+galaxy+@VERSION_SUFFIX@" python_template_version="3.5" profile="20.09"> <description> - scaffold genome assemblies with long reads</description> <xrefs> <xref type="bio.tools">links</xref> </xrefs> <macros> <token name="@TOOL_VERSION@">2.0.1</token> <token name="@VERSION_SUFFIX@">1</token> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">links</requirement> </requirements> <version_command><![CDATA[ LINKS | grep "LINKS v" | cut -d' ' -f4 ]]></version_command> <command detect_errors="exit_code"><![CDATA[ ## set up file paths #set sequences_fn = 'sequences.' + $f.ext ln -s '$f' '$sequences_fn' && #for $n, $read in enumerate($reads): #set read_fn = 'reads' + str($n) + '.' + $read.ext ln -s '$read' '$read_fn' && printf '%s\n' '$read_fn' >> reads.fof && #end for ## RUN LINKS LINKS -f '$sequences_fn' -s reads.fof -b links_output #if str($scaffolding.k): -k '$scaffolding.k' #end if #if str($scaffolding.d): -d '$scaffolding.d' #end if #if str($scaffolding.t): -t '$scaffolding.t' #end if #if str($advanced.a): -a '$advanced.a' #end if #if str($advanced.e): -e '$advanced.e' #end if #if str($advanced.o): -o '$advanced.o' #end if #if str($advanced.l): -l '$advanced.l' #end if #if str($advanced.z): -z '$advanced.z' #end if #if str($advanced.p): -p '$advanced.p' #end if ]]></command> <inputs> <param argument="-f" type="data" format="fasta,fasta.gz" label="Contigs" help="Sequences to scaffold" /> <param name="reads" type="data" format="fasta,fasta.gz,fastq,fastq.gz" label="Reads" help="Long sequence reads for scaffolding" multiple="true" /> <section name="scaffolding" title="Scaffolding options" expanded="False"> <param argument="-k" type="integer" value="15" optional="true" label="k-mer length" /> <param argument="-d" type="text" value="4000" optional="true" label="Distance" help="Distance between k-mer pairs. Multiple distances are separated by comma, e.g. 500,1000,2000,3000"/> <param argument="-t" type="text" value="2" optional="true" label="Step" help="Step of sliding window when extracting k-mer pairs from long reads. Multiple steps are separated by comma, e.g. 10,5" /> </section> <section name="advanced" title="Advanced options" expanded="False"> <param argument="-a" type="float" value="0.3" optional="true" label="Maximum link ratio" help="Maximum link ratio between two best contig pairs. Higher values lead to least accurate scaffolding" /> <param argument="-e" type="float" value="0.1" optional="true" label="Error" help="Error (%) allowed on -d distance"/> <param argument="-l" type="integer" value="5" optional="true" label="Number of links" help="Minimum number of links (k-mer pairs) to compute scaffold"/> <param argument="-o" type="integer" value="0" optional="true" label="Offset" help="Offset position for extracting k-mer pairs"/> <param argument="-p" type="float" value="0.001" optional="true" label="Bloom filter false positive rate" help="Increase to prevent memory allocation errors"/> <param argument="-z" type="integer" value="500" optional="true" label="Minimum contig length" help="Minimum contig length to consider for scaffolding"/> </section> </inputs> <outputs> <data name="scaffolds" format="fasta" from_work_dir="links_output.scaffolds.fa" label="${tool.name} on ${on_string} (scaffolds)" /> <data name="correspondence" format="tabular" from_work_dir="links_output.assembly_correspondence.tsv" label="${tool.name} on ${on_string} (Correspondence file)" /> <data name="gv" format="graph_dot" from_work_dir="links_output.gv" label="${tool.name} on ${on_string} (Graph)" /> </outputs> <tests> <!-- Basic test --> <test> <param name="f" value="used_contigs.fasta.gz"/> <param name="reads" value="reads.fasta.gz"/> <output name="scaffolds" ftype="fasta" file="single_readfile.scaffolds.fa"/> <output name="correspondence" ftype="tabular" file="single_readfile.assembly_correspondence.tsv"/> <output name="gv"> <assert_contents> <has_text text="node [shape = circle]" /> </assert_contents> </output> </test> <!-- Test multiple readfile input --> <test> <param name="f" value="used_contigs.fasta.gz"/> <param name="reads" value="reads1.fasta.gz,reads2.fasta.gz"/> <output name="scaffolds" ftype="fasta" file="multi_readfile.scaffolds.fa"/> <output name="correspondence" ftype="tabular" file="multi_readfile.assembly_correspondence.tsv"/> </test> <!-- Test scaffolding parameters --> <test> <param name="f" value="used_contigs.fasta.gz"/> <param name="reads" value="reads.fasta.gz"/> <param name="k" value="19" /> <param name="d" value="500,1000,2000,3000" /> <param name="t" value="10,5" /> <output name="scaffolds" ftype="fasta" file="scaffolding.scaffolds.fa"/> <output name="correspondence" ftype="tabular" file="scaffolding.assembly_correspondence.tsv"/> </test> <!-- Test advanced parameters --> <test> <param name="f" value="used_contigs.fasta.gz"/> <param name="reads" value="reads.fasta.gz"/> <output name="scaffolds" ftype="fasta" file="advanced.scaffolds.fa"/> <param name="a" value="0.2" /> <param name="e" value="0.05" /> <param name="l" value="10" /> <param name="o" value="1" /> <param name="z" value="600" /> <output name="scaffolds" ftype="fasta" file="advanced.scaffolds.fa"/> <output name="correspondence" ftype="tabular" file="advanced.assembly_correspondence.tsv"/> </test> </tests> <help><![CDATA[ .. figure:: https://github.com/warrenlr/links/raw/master/links-logo.png :alt: links-logo :width: 200px `LINKS <https://github.com/bcgsc/LINKS>`__ is a genomics application for scaffolding genome assemblies with long reads, such as those produced by Oxford Nanopore Technologies Ltd. It can be used to scaffold high-quality draft genome assemblies with any long sequences (eg. ONT reads, PacBio reads, other draft genomes, etc). **Input**: **LINKS does not use quality information in the Reads, so it is recommended to filter the long reads first**. Provide a ``fasta`` or ``fasta.gz`` dataset containing the Contigs, and a ``fastq``, ``fastq.gz``, ``fasta`` or ``fasta.gz`` dataset of Reads to be used for scaffolding. **Use cases**: - *Use long reads to scaffold a draft assembly* by providing the draft assembly to the Contigs parameter and the long reads to the Reads parameter. - *Use a reference assembly to scaffold a draft assembly* by providing the reference assembly to the Reads parameter. For example, you could use a species’s reference genome to scaffold a genome that was assembled for another individual. **How it works**: LINKS uses *k*-mer pairs from the Reads to identify candidate pairs of Contigs, then uses the number of spanning *k*-mer pairs and the mean distance between them to build scaffolds. You can control the distance between *k*-mer pairs, the length of the *k*-mers and the minimum number of *k*-mer pairs between contigs. See the `LINKS readme on GitHub <https://github.com/bcgsc/LINKS#how-it-works>`__ for information on setting advanced parameters. ]]></help> <citations> <citation type="doi">10.1186/s13742-015-0076-3</citation> </citations> </tool>