view chimera.slayer.xml @ 1:1cce71e7e7ef draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit 721531d2e9fd1e208a3fba8cfbe5dcd572599ca2
author iuc
date Tue, 05 Sep 2017 17:07:29 -0400
parents 72b33fe8c05a
children 6f7f890e6a54
line wrap: on
line source

<tool profile="16.07" id="mothur_chimera_slayer" name="Chimera.slayer" version="@WRAPPER_VERSION@.0">
    <description>Find putative chimeras using slayer</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command><![CDATA[
        @SHELL_OPTIONS@

        ## enable mothur to find blast
        loc=`which mothur` && ln -s "\${loc%/mothur}"/* . &&

        ## create symlinks to input datasets
        ln -s "$fasta" fasta.dat &&
        #if $alignment.source == "self":
            ln -s "$alignment.name" alignment.name.dat &&
            ln -s "$alignment.group" alignment.group.dat &&
            ln -s "$alignment.count" alignment.count.dat &&
        #else:
            ln -s "$alignment.template" alignment.template.dat &&
        #end if

        echo 'chimera.slayer(
            fasta=fasta.dat,
            #if $alignment.source == "self":
                reference=self,
                #if $alignment.name:
                    name=$alignment.name,
                #end if
                #if $alignment.group:
                    group=$alignment.group,
                #end if
                #if $alignment.count:
                    count=$alignment.count,
                #end if
            #else:
                reference=$alignment.template,
            #end if
            #if $options.setby == "user":
                search=$options.search,
                window=$options.window,
                increment=$options.increment,
                match=$options.match,
                mismatch=$options.mismatch,
                numwanted=$options.numwanted,
                parents=$options.parents,
                minsim=$options.minsim,
                mincov=$options.mincov,
                iters=$options.iters,
                minbs=$options.minbs,
                minsnp=$options.minsnp,
                divergence=$options.divergence,
                trim=$options.trim,
                split=$options.split,
            #end if
            dereplicate=$dereplicate,
            processors='\${GALAXY_SLOTS:-8}'
        )'
        | sed 's/ //g'  ## mothur trips over whitespace
        | ./mothur
        | tee mothur.out.log
    ]]></command>
    <inputs>
        <param name="fasta" type="data" format="fasta" label="fasta - Candiate Sequences"/>
        <conditional name="alignment">
            <param name="source" type="select" label="Select Reference Template from" help="">
                <option value="hist">History</option>
                <option value="ref">Cached Reference</option>
                <option value="self">Self - Use abundant sequences from the input Candiate Sequences fasta </option>
            </param>
            <when value="ref">
                <param name="template" type="select" label="reference - Select an alignment database " help="">
                    <options from_data_table="mothur_aligndb">
                    </options>
                </param>
            </when>
            <when value="hist">
                <param name="template" type="data" format="fasta" label="reference - Reference to align with" help=""/>
            </when>
            <when value="self">
                <param name="count" type="data" format="mothur.count_table" optional="true" label="count - Count file"/>
                <param name="name" type="data" format="mothur.names" optional="true" label="names - Sequences Names"/>
                <param name="group" type="data" format="mothur.groups" optional="true" label="group - Sequences Name reference" help="use the more abundant sequences from the same sample to check the query sequence"/>
            </when>
        </conditional>
        <conditional name="options">
            <param name="setby" type="select" label="Slayer Options" help="">
                <option value="default">Use default settings</option>
                <option value="user">Manually set options</option>
            </param>
            <when value="default"/>
            <when value="user">
                <param name="search" type="select" label="search - Search method for finding the closest parent" help="">
                    <option value="blast" selected="true">blast</option>
                    <option value="kmer">kmer</option>
                </param>
                <param name="window" type="integer" value="50" label="window - Window size for searching for chimeras (default 50)"/>
                <param name="increment" type="integer" value="5" label="increment - Increment for window slide on each iteration (default 5)"/>
                <param name="match" type="integer" value="5" label="match - Reward matched bases (default 5)"/>
                <param name="mismatch" type="integer" value="-4" label="mismatch - Penalty for mismatched bases (default -4)"/>
                <param name="numwanted" type="integer" value="15" label="numwanted - Number of potential parents to to compare with query sequence (default 15)"/>
                <param name="parents" type="integer" value="3" label="parents - Number of potential parents to investigate from the numwanted best matches"/>
                <param name="minsim" type="integer" value="90" label="minsim - Minimum similarity % between the query and parent (default 90)"/>
                <param name="mincov" type="integer" value="70" label="mincov - Minimum coverage % of closest matches in reference and the query (default 70)"/>
                <param name="iters" type="integer" value="1000" label="iters - Number of bootstrap iterations to try (default 100)"/>
                <param name="minbs" type="integer" value="90" label="minbs - Minimum bootstrap support % for calling a sequence chimeric (default 90)"/>
                <param name="minsnp" type="integer" value="10" label="minsnp - Percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default 100)"/>
                <param name="divergence" type="float" value="1.007" label="divergence - Divergence cutoff for chimera determination (default 1.007)"/>
                <param name="trim" type="boolean" truevalue="true" falsevalue="false" checked="false" label="trim - include chimeric sequences trimmed to their longest peice"/>
                <param name="split" type="boolean" truevalue="true" falsevalue="false" checked="false" label="split - detect tri- and quadmeras" help="if a sequence comes back as non-chimeric, mothur will test the two sides to see if they are chimeric."/>
            </when>
        </conditional>
        <param name="dereplicate" type="boolean" falsevalue="false" truevalue="true" checked="false" label="dereplicate - remove chimeric sequences from all groups, default=f" help="If parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric"/>
    </inputs>
    <outputs>
        <expand macro="logfile-output"/>
        <data name="out_file" from_work_dir="fasta.slayer.chimeras" format="txt" label="${tool.name} on ${on_string}: slayer.chimeras"/>
        <data name="out_accnos" from_work_dir="fasta.slayer.accnos" format="mothur.accnos" label="${tool.name} on ${on_string}: slayer.accnos"/>
    </outputs>
    <tests>
        <test><!-- test with external reference -->
            <param name="fasta" value="Mock_S280_L001_R1_001_small.trim.contigs.good.align_head"/>
            <param name="source" value="hist"/>
            <param name="template" value="HMP_MOCK.v35.align"/>
            <output name="out_file" file="Mock_S280_L001_R1_001_small.trim.contigs.good.slayer.chimeras" ftype="txt"/>
            <expand macro="logfile-test"/>
        </test>
        <test><!-- test with self as reference -->
            <param name="fasta" value="HMP_MOCK.v35.align"/>
            <param name="source" value="self"/>
            <param name="setby" value="user"/>
            <param name="search" value="kmer"/>
            <param name="name" value="HMP_MOCK.v35.align.names"/>
            <output name="out_file" file="HMP_MOCK.v35.slayer.chimeras" ftype="txt"/>
            <expand macro="logfile-test"/>
        </test>
    </tests>

    <help>
<![CDATA[

@MOTHUR_OVERVIEW@

**Command Documentation**

The chimera.slayer_ command identifies putative chimeras using the slayer approach.

ChimeraSlayer_ is a chimeric sequence detection utility, compatible with near-full length Sanger sequences and shorter 454-FLX sequences (~500 bp).

Chimera Slayer involves the following series of steps that operate to flag chimeric 16S rRNA sequences:

    (A) the ends of a query sequence are searched against an included database of reference chimera-free 16S sequences to identify potential parents of a chimera;
    (B) candidate parents of a chimera are selected as those that form a branched best scoring alignment to the NAST-formatted query sequence;
    (C) the NAST alignment of the query sequence is improved in a 'chimera-aware' profile-based NAST realignment to the selected reference parent sequences; and
    (D) an evolutionary framework is used to flag query sequences found to exhibit greater sequence homology to an in silico chimera formed between any two of the selected reference parent sequences.

Note:
It is not recommended to blindly discard all sequences flagged as chimeras. Some may represent naturally formed chimeras that do not represent PCR artifacts. Sequences flagged may warrant further investigation.


.. _ChimeraSlayer: http://microbiomeutil.sourceforge.net/
.. _chimera.slayer: https://www.mothur.org/wiki/Chimera.slayer

]]>
    </help>
    <expand macro="citations"/>
</tool>