view links.xml @ 0:7acd9ade2dd1 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/links commit f15e5bb67b7a212ecd8c98e816c80e22d3b7a0cb"
author iuc
date Wed, 23 Feb 2022 08:25:55 +0000
parents
children
line wrap: on
line source

<tool id="links" name="LINKS" version="@TOOL_VERSION@+galaxy+@VERSION_SUFFIX@" python_template_version="3.5" profile="20.09">
    <description> - scaffold genome assemblies with long reads</description>
    <xrefs>
        <xref type="bio.tools">links</xref>
    </xrefs>
    <macros>
        <token name="@TOOL_VERSION@">2.0.1</token>
        <token name="@VERSION_SUFFIX@">1</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">links</requirement> 
    </requirements>
    <version_command><![CDATA[
        LINKS | grep "LINKS v" | cut -d' ' -f4
    ]]></version_command>
    <command detect_errors="exit_code"><![CDATA[

        ## set up file paths

        #set sequences_fn = 'sequences.' + $f.ext
        ln -s '$f' '$sequences_fn'
        &&
        #for $n, $read in enumerate($reads):
            #set read_fn = 'reads' + str($n) + '.' + $read.ext
            ln -s '$read' '$read_fn'
            &&
            printf '%s\n' '$read_fn' >> reads.fof
            &&
        #end for

        ## RUN LINKS       

        LINKS
        -f '$sequences_fn'
        -s reads.fof
        -b links_output

        #if str($scaffolding.k):
            -k '$scaffolding.k'
        #end if

        #if str($scaffolding.d):
            -d '$scaffolding.d'
        #end if

        #if str($scaffolding.t):
            -t '$scaffolding.t'
        #end if

        #if str($advanced.a):
            -a '$advanced.a'
        #end if

        #if str($advanced.e):
            -e '$advanced.e'
        #end if

        #if str($advanced.o):
            -o '$advanced.o'
        #end if

        #if str($advanced.l):
            -l '$advanced.l'
        #end if

        #if str($advanced.z):
            -z '$advanced.z'
        #end if

        #if str($advanced.p):
            -p '$advanced.p'
        #end if

    ]]></command>
    <inputs>
        <param argument="-f" type="data" format="fasta,fasta.gz" label="Contigs" help="Sequences to scaffold" />
        <param name="reads" type="data" format="fasta,fasta.gz,fastq,fastq.gz" label="Reads" help="Long sequence reads for scaffolding" multiple="true" />
        <section name="scaffolding" title="Scaffolding options" expanded="False">
            <param argument="-k" type="integer" value="15" optional="true" label="k-mer length" />
            <param argument="-d" type="text" value="4000" optional="true" label="Distance" help="Distance between k-mer pairs. Multiple distances are separated by comma, e.g. 500,1000,2000,3000"/>
            <param argument="-t" type="text" value="2" optional="true" label="Step" help="Step of sliding window when extracting k-mer pairs from long reads. Multiple steps are separated by comma, e.g. 10,5" />
        </section>
        <section name="advanced" title="Advanced options" expanded="False">
            <param argument="-a" type="float" value="0.3" optional="true" label="Maximum link ratio" help="Maximum link ratio between two best contig pairs. Higher values lead to least accurate scaffolding" />
            <param argument="-e" type="float" value="0.1" optional="true" label="Error" help="Error (%) allowed on -d distance"/>
            <param argument="-l" type="integer" value="5" optional="true" label="Number of links" help="Minimum number of links (k-mer pairs) to compute 
            scaffold"/>
            <param argument="-o" type="integer" value="0" optional="true" label="Offset" help="Offset position for extracting k-mer pairs"/>
            <param argument="-p" type="float" value="0.001" optional="true" label="Bloom filter false positive rate" help="Increase to prevent memory allocation errors"/>
            <param argument="-z" type="integer" value="500" optional="true" label="Minimum contig length" help="Minimum contig length to consider for scaffolding"/>
        </section>
    </inputs>
    <outputs>
        <data name="scaffolds" format="fasta" from_work_dir="links_output.scaffolds.fa" label="${tool.name} on ${on_string} (scaffolds)" />
        <data name="correspondence" format="tabular" from_work_dir="links_output.assembly_correspondence.tsv" label="${tool.name} on ${on_string} (Correspondence file)" />
        <data name="gv" format="graph_dot" from_work_dir="links_output.gv" label="${tool.name} on ${on_string} (Graph)" />
    </outputs>
    <tests>
    <!-- Basic test -->
        <test>
            <param name="f" value="used_contigs.fasta.gz"/>
            <param name="reads" value="reads.fasta.gz"/>
            <output name="scaffolds" ftype="fasta" file="single_readfile.scaffolds.fa"/>
            <output name="correspondence" ftype="tabular" file="single_readfile.assembly_correspondence.tsv"/>
            <output name="gv">
                <assert_contents>
                    <has_text text="node [shape = circle]" />
                </assert_contents>
            </output>
        </test>
        <!-- Test multiple readfile input -->
        <test>
            <param name="f" value="used_contigs.fasta.gz"/>
            <param name="reads" value="reads1.fasta.gz,reads2.fasta.gz"/>
            <output name="scaffolds" ftype="fasta" file="multi_readfile.scaffolds.fa"/>
            <output name="correspondence" ftype="tabular" file="multi_readfile.assembly_correspondence.tsv"/>
        </test>
        <!-- Test scaffolding parameters -->
        <test>
            <param name="f" value="used_contigs.fasta.gz"/>
            <param name="reads" value="reads.fasta.gz"/>
            <param name="k" value="19" />
            <param name="d" value="500,1000,2000,3000" />
            <param name="t" value="10,5" />
            <output name="scaffolds" ftype="fasta" file="scaffolding.scaffolds.fa"/>
            <output name="correspondence" ftype="tabular" file="scaffolding.assembly_correspondence.tsv"/>
        </test>
        <!-- Test advanced parameters -->
        <test>
            <param name="f" value="used_contigs.fasta.gz"/>
            <param name="reads" value="reads.fasta.gz"/>
            <output name="scaffolds" ftype="fasta" file="advanced.scaffolds.fa"/>
            <param name="a" value="0.2" />
            <param name="e" value="0.05" />
            <param name="l" value="10" />
            <param name="o" value="1" />
            <param name="z" value="600" />
            <output name="scaffolds" ftype="fasta" file="advanced.scaffolds.fa"/>
            <output name="correspondence" ftype="tabular" file="advanced.assembly_correspondence.tsv"/>
        </test>
    </tests>
    <help><![CDATA[

.. figure:: https://github.com/warrenlr/links/raw/master/links-logo.png
   :alt: links-logo
   :width: 200px

`LINKS <https://github.com/bcgsc/LINKS>`__ is a genomics application for
scaffolding genome assemblies with long reads, such as those produced by
Oxford Nanopore Technologies Ltd. It can be used to scaffold
high-quality draft genome assemblies with any long sequences (eg. ONT
reads, PacBio reads, other draft genomes, etc).

**Input**:

**LINKS does not use quality information in the Reads, so it is
recommended to filter the long reads first**.

Provide a ``fasta`` or ``fasta.gz`` dataset containing the Contigs, and
a ``fastq``, ``fastq.gz``, ``fasta`` or ``fasta.gz`` dataset of Reads to
be used for scaffolding.

**Use cases**:

-  *Use long reads to scaffold a draft assembly* by providing the draft
   assembly to the Contigs parameter and the long reads to the Reads
   parameter.
-  *Use a reference assembly to scaffold a draft assembly* by providing
   the reference assembly to the Reads parameter. For example, you could
   use a species’s reference genome to scaffold a genome that was
   assembled for another individual.

**How it works**:

LINKS uses *k*-mer pairs from the Reads to identify candidate pairs of
Contigs, then uses the number of spanning *k*-mer pairs and the mean
distance between them to build scaffolds.

You can control the distance between *k*-mer pairs, the length of the
*k*-mers and the minimum number of *k*-mer pairs between contigs. See
the `LINKS readme on
GitHub <https://github.com/bcgsc/LINKS#how-it-works>`__ for information
on setting advanced parameters.



    ]]></help>
    <citations>
      <citation type="doi">10.1186/s13742-015-0076-3</citation>
    </citations>
</tool>