view ete_lineage_generator.xml @ 13:ed74587a13c8 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 23b3c7c09a7d391b576e3b19f8b34dd63d636bdc
author earlhaminst
date Thu, 01 Sep 2022 16:11:32 +0000
parents 2db72467da51
children d40b9a7debe5
line wrap: on
line source

<tool id="ete_lineage_generator" name="ETE lineage generator" version="@VERSION@+galaxy1">
    <description>from a list of species/taxids using the ETE Toolkit</description>
    <macros>
        <import>ete_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
python '$__tool_directory__/ete_lineage_generator.py'
-s '$speciesFile'
-d '$database'
-o '$outputFile'
#if $ranks.levels == "full"
    -f
#elif $ranks.levels == "manual"
    #for $rank in str($ranks.manualranks).split(','):
        -r $rank
    #end for
#end if
#if $compresscond.compress == "-c"
    $compresscond.compress
    $compresscond.lower
#end if
$includeid
    ]]></command>
    <inputs>
        <param name="speciesFile" type="data" format="txt" label="Species file" help="List with one species/taxid per line" />
        <param name="database" type="data" format="sqlite" label="(ETE3) Taxonomy Database" help="The sqlite formatted Taxonomy used by ETE3 (which is derived from NCBI taxonomy)" />
        <conditional name="ranks">
            <param name="levels" type="select" label="Taxonomic levels" help="Taxonomic levels to include in the output">
                <option value="primary" selected="true">Primary</option>
                <option value="full">Full</option>
                <option value="manual">Manual</option>
            </param>
            <when value="manual">
                <param name="manualranks" type="select" multiple="true" optional="false" label="Manual selection of ranks">
                    <option value="superkingdom">superkingdom</option>
                    <option value="kingdom" selected="true">kingdom</option>
                    <option value="subkingdom">subkingdom</option>
                    <option value="superphylum">superphylum</option>
                    <option value="phylum" selected="true">phylum</option>
                    <option value="subphylum">subphylum</option>
                    <option value="superclass">superclass</option>
                    <option value="class" selected="true">class</option>
                    <option value="subclass">subclass</option>
                    <option value="infraclass">infraclass</option>
                    <option value="cohort">cohort</option>
                    <option value="superorder">superorder</option>
                    <option value="order" selected="true">order</option>
                    <option value="suborder">suborder</option>
                    <option value="infraorder">infraorder</option>
                    <option value="parvorder">parvorder</option>
                    <option value="superfamily">superfamily</option>
                    <option value="family" selected="true">family</option>
                    <option value="subfamily">subfamily</option>
                    <option value="tribe">tribe</option>
                    <option value="subtribe">subtribe</option>
                    <option value="genus" selected="true">genus</option>
                    <option value="subgenus">subgenus</option>
                    <option value="species group">species group</option>
                    <option value="species subgroup">species subgroup</option>
                    <option value="species" selected="true">species</option>
                    <option value="subspecies">subspecies</option>
                    <option value="varietas">varietas</option>
                    <option value="forma">forma</option>
                </param>
            </when>
            <when value="primary" />
            <when value="full" />
        </conditional>
        <conditional name="compresscond">
            <param name="compress" type="select" label="Fill unnamed ranks" help="Fill unnamed ranks with super/sub ranks (see -l)">
                <option value="-c" selected="True">Yes</option>
                <option value="">No</option>
            </param>
            <when value="-c">
                <param name="lower" type="boolean" truevalue="-l" falsevalue="" checked="False" label="Prefer lower ranks" help="Take the next available lower rank (default: higher)"/> 
            </when>
            <when value="" />
        </conditional>
        <param name="includeid" type="boolean" truevalue="--includeid" falsevalue="" label="Include taxid in the table"
            help="For the case when the input consists of taxon names" />
    </inputs>
    <outputs>
        <data name="outputFile" format="tsv" label="${tool.name} on ${on_string}"/>
    </outputs>
    <tests>
        <test>
            <param name="speciesFile" ftype="txt" value="species.txt" />
            <param name="database" ftype="sqlite" value="taxdump.sqlite" />
            <param name="compress" value="" />
            <param name="levels" value="full" />
            <output name="outputFile" file="lineage.txt" />
        </test>
        <test>
            <param name="speciesFile" ftype="txt" value="species.txt" />
            <param name="database" ftype="sqlite" value="taxdump.sqlite" />
            <param name="compress" value="-c" />
            <param name="levels" value="primary" />
            <output name="outputFile" file="lineage-compress.txt" />
        </test>
        <test>
            <param name="speciesFile" ftype="txt" value="species.txt" />
            <param name="database" ftype="sqlite" value="taxdump.sqlite" />
            <param name="compress" value="-c" />
            <param name="levels" value="primary" />
            <param name="lower" value="-l" />
            <output name="outputFile" file="lineage-compress-lower.txt" />
        </test>
        <test>
            <param name="speciesFile" ftype="txt" value="species.txt" />
            <param name="database" ftype="sqlite" value="taxdump.sqlite" />
            <param name="compress" value="" />
            <param name="levels" value="manual" />
            <param name="manualranks" value="kingdom,family" />
            <output name="outputFile" file="lineage-full.txt" />
        </test>
        <test>
            <param name="speciesFile" ftype="txt" value="species.txt" />
            <param name="database" ftype="sqlite" value="taxdump.sqlite" />
            <param name="includeid" value="--includeid" />
            <output name="outputFile" file="lineage-wid.txt" />
        </test>
        <test>
            <param name="speciesFile" ftype="txt" value="species_ids.txt" />
            <param name="database" ftype="sqlite" value="taxdump.sqlite" />
            <output name="outputFile">
                <assert_contents>
                    <has_line line="9606&#009;Eukaryota&#009;Chordata&#009;Mammalia&#009;Euarchontoglires&#009;Hominoidea&#009;Homo&#009;Homo sapiens"/>
                    <has_line line="0&#009;NA&#009;NA&#009;NA&#009;NA&#009;NA&#009;NA&#009;NA"/>
                </assert_contents>
            </output>
            <assert_stderr>
                <has_line line="[0] could not determine lineage!"/>
            </assert_stderr>
        </test>
    </tests>
    <help><![CDATA[
Generates a table with lineage information for a list of species (also taxids and arbitrary taxons are accepted) using the `ETE Toolkit`_.

.. _ETE Toolkit: http://etetoolkit.org/

**Input**

- *Species file* a single column tabular file
- *(ETE3) Taxonomy Database* a sqlite database that has been created by ETE from the NCBI taxonomy dump

**Options**

- *Taxonomic levels* the columns to be incuded in the output table. There are two presets (full and primary)

    - *Full* contains all 29 ranks included in the NCBI taxonomy
    - *Primary* contains the primary ranks (kingdom, phylum, class, order, family, genus, species)
    - *Manual* the ranks of interest can be chosen by the user. The primary levels are chosen by default.

- *Fill unnamed ranks* Get missing data from "nearby" levels:

    - Some nodes in the NCBI taxonomy tree have no name (no rank) these are shown by default as "NA" in the output. If the *compress* option is selected then the rank is accepted if the level name is included (e.g. superorder is accepted as order if the order is unnamed but the name of the superorder is given)

- *Prefer lower ranks for filling* for compressing lower levels are prefered over higher ones

**Output**

Table (tab separated). The first column contains the species names. The following columns contain the
rank names of the levels of interest.
    ]]></help>
    <expand macro="citations" />
</tool>