view chimera.slayer.xml @ 3:a8263249eacb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit 283d5933cd945b7815b3da2ce45baa60d73e9746
author iuc
date Fri, 06 Apr 2018 03:57:19 -0400
parents 6f7f890e6a54
children a899cc1416bc
line wrap: on
line source

<tool profile="16.07" id="mothur_chimera_slayer" name="Chimera.slayer" version="@WRAPPER_VERSION@.0">
    <description>Find putative chimeras using slayer</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command><![CDATA[
@SHELL_OPTIONS@

## enable mothur to find blast
ln -s ln -s `dirname \$(which mothur)`/* . &&

## create symlinks to input datasets
ln -s '$fasta' fasta.dat &&
#if $alignment.source == "self":
    ln -s '$alignment.name' alignment.name.dat &&
    ln -s '$alignment.group' alignment.group.dat &&
    ln -s '$alignment.count' alignment.count.dat &&
#else:
    ln -s '$alignment.reference' alignment.reference.dat &&
#end if

echo 'chimera.slayer(
    fasta=fasta.dat,
    #if $alignment.source == "self":
        reference=self,
        #if $alignment.name:
            name=$alignment.name,
        #end if
        #if $alignment.group:
            group=$alignment.group,
        #end if
        #if $alignment.count:
            count=$alignment.count,
        #end if
    #else:
        reference=$alignment.reference,
    #end if
    #if $options.setby == "user":
        search=$options.search,
        window=$options.window,
        increment=$options.increment,
        match=$options.match,
        mismatch=$options.mismatch,
        numwanted=$options.numwanted,
        parents=$options.parents,
        minsim=$options.minsim,
        mincov=$options.mincov,
        iters=$options.iters,
        minbs=$options.minbs,
        minsnp=$options.minsnp,
        divergence=$options.divergence,
        trim=$options.trim,
        split=$options.split,
    #end if
    dereplicate=$dereplicate,
    processors='\${GALAXY_SLOTS:-8}'
)'
| sed 's/ //g'  ## mothur trips over whitespace
| ./mothur
| tee mothur.out.log
    ]]></command>
    <inputs>
        <param argument="fasta" type="data" format="fasta" label="fasta - Candiate Sequences"/>
        <conditional name="alignment">
            <param name="source" type="select" label="Select Reference Template from">
                <option value="hist">History</option>
                <option value="ref">Cached Reference</option>
                <option value="self">Self - Use abundant sequences from the input Candiate Sequences fasta </option>
            </param>
            <when value="ref">
                <param argument="reference" type="select" label="reference - Select an alignment database">
                    <options from_data_table="mothur_aligndb">
                    </options>
                </param>
            </when>
            <when value="hist">
                <param argument="reference" type="data" format="fasta" label="reference - Reference to align with"/>
            </when>
            <when value="self">
                <param argument="count" type="data" format="mothur.count_table" optional="true" label="count - Count file"/>
                <param argument="name" type="data" format="mothur.names" optional="true" label="names - Sequences Names"/>
                <param argument="group" type="data" format="mothur.groups" optional="true" label="group - Sequences Name reference"
                    help="use the more abundant sequences from the same sample to check the query sequence"/>
            </when>
        </conditional>
        <conditional name="options">
            <param name="setby" type="select" label="Slayer Options">
                <option value="default">Use default settings</option>
                <option value="user">Manually set options</option>
            </param>
            <when value="default"/>
            <when value="user">
                <param argument="search" type="select" label="search - Search method for finding the closest parent">
                    <option value="blast" selected="true">blast</option>
                    <option value="kmer">kmer</option>
                </param>
                <param argument="window" type="integer" value="50" label="window - Window size for searching for chimeras (default 50)"/>
                <param argument="increment" type="integer" value="5" label="increment - Increment for window slide on each iteration (default 5)"/>
                <param argument="match" type="integer" value="5" label="match - Reward matched bases (default 5)"/>
                <param argument="mismatch" type="integer" value="-4" label="mismatch - Penalty for mismatched bases (default -4)"/>
                <param argument="numwanted" type="integer" value="15" label="numwanted - Number of potential parents to to compare with query sequence (default 15)"/>
                <param argument="parents" type="integer" value="3" label="parents - Number of potential parents to investigate from the numwanted best matches"/>
                <param argument="minsim" type="integer" value="90" label="minsim - Minimum similarity % between the query and parent (default 90)"/>
                <param argument="mincov" type="integer" value="70" label="mincov - Minimum coverage % of closest matches in reference and the query (default 70)"/>
                <param argument="iters" type="integer" value="1000" label="iters - Number of bootstrap iterations to try (default 1000)"/>
                <param argument="minbs" type="integer" value="90" label="minbs - Minimum bootstrap support % for calling a sequence chimeric (default 90)"/>
                <param argument="minsnp" type="integer" value="10" label="minsnp - Percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default 10)"/>
                <param argument="divergence" type="float" value="1.007" label="divergence - Divergence cutoff for chimera determination (default 1.007)"/>
                <param argument="trim" type="boolean" truevalue="true" falsevalue="false" checked="false" label="trim - include chimeric sequences trimmed to their longest piece"/>
                <param argument="split" type="boolean" truevalue="true" falsevalue="false" checked="false" label="split - detect tri- and quadmeras"
                    help="if a sequence comes back as non-chimeric, mothur will test the two sides to see if they are chimeric."/>
            </when>
        </conditional>
        <param name="dereplicate" type="boolean" falsevalue="false" truevalue="true" checked="false" label="dereplicate - remove chimeric sequences from all groups"
            help="If parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric"/>
        <expand macro="param-savelog"/>
    </inputs>
    <outputs>
        <expand macro="logfile-output"/>
        <data name="slayer.chimeras" from_work_dir="fasta.slayer.chimeras" format="txt" label="${tool.name} on ${on_string}: slayer.chimeras"/>
        <data name="out_accnos" from_work_dir="fasta.slayer.accnos" format="mothur.accnos" label="${tool.name} on ${on_string}: slayer.accnos"/>
    </outputs>
    <tests>
        <test><!-- test with external reference -->
            <param name="fasta" value="Mock_S280_L001_R1_001_small.trim.contigs.good.align_head"/>
            <param name="source" value="hist"/>
            <param name="reference" value="HMP_MOCK.v35.align"/>
            <output name="slayer.chimeras" file="Mock_S280_L001_R1_001_small.trim.contigs.good.slayer.chimeras" ftype="txt"/>
            <param name="savelog" value="true"/>
            <expand macro="logfile-test"/>
        </test>
        <test><!-- test with self as reference -->
            <param name="fasta" value="HMP_MOCK.v35.align"/>
            <param name="source" value="self"/>
            <param name="setby" value="user"/>
            <param name="search" value="kmer"/>
            <param name="name" value="HMP_MOCK.v35.align.names"/>
            <output name="slayer.chimeras" file="HMP_MOCK.v35.slayer.chimeras" ftype="txt"/>
            <param name="savelog" value="true"/>
            <expand macro="logfile-test"/>
        </test>
    </tests>
    <help><![CDATA[

@MOTHUR_OVERVIEW@

**Command Documentation**

The chimera.slayer_ command identifies putative chimeras using the slayer approach.

ChimeraSlayer_ is a chimeric sequence detection utility, compatible with near-full length Sanger sequences and shorter 454-FLX sequences (~500 bp).

Chimera Slayer involves the following series of steps that operate to flag chimeric 16S rRNA sequences:

    (A) the ends of a query sequence are searched against an included database of reference chimera-free 16S sequences to identify potential parents of a chimera;
    (B) candidate parents of a chimera are selected as those that form a branched best scoring alignment to the NAST-formatted query sequence;
    (C) the NAST alignment of the query sequence is improved in a 'chimera-aware' profile-based NAST realignment to the selected reference parent sequences; and
    (D) an evolutionary framework is used to flag query sequences found to exhibit greater sequence homology to an in silico chimera formed between any two of the selected reference parent sequences.

Note:
It is not recommended to blindly discard all sequences flagged as chimeras. Some may represent naturally formed chimeras that do not represent PCR artifacts. Sequences flagged may warrant further investigation.


.. _ChimeraSlayer: http://microbiomeutil.sourceforge.net/
.. _chimera.slayer: https://www.mothur.org/wiki/Chimera.slayer

    ]]></help>
    <expand macro="citations"/>
</tool>