view chimera.pintail.xml @ 6:93b61db8bdb6 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit debdcdbb89f9bf3e2c3eed05cb078eddb27d4684"
author iuc
date Mon, 02 Dec 2019 06:18:11 -0500
parents cc3eef74fe31
children
line wrap: on
line source

<tool profile="16.07" id="mothur_chimera_pintail" name="Chimera.pintail" version="@WRAPPER_VERSION@.0">
    <description>Find putative chimeras using pintail</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command><![CDATA[
@SHELL_OPTIONS@

## create symlinks to input datasets
ln -s '$fasta' fasta.dat &&
ln -s '$alignment.reference' alignment.reference.dat &&
ln -s '$conservation' conservation.dat &&
ln -s '$quantile' quantile.dat &&
#if $mask_cond.source2 == "history":
    ln -s '$mask_cond.mask' mask.dat &&
#end if

echo 'chimera.pintail(
    fasta=fasta.dat,
    reference=alignment.reference.dat,
    filter=$filter,
    #if $mask_cond.source2 == "default":
        mask=default,
    #elif $mask_cond.source2 == "history":
        mask=mask.dat,
    #end if
    #if $conservation:
        conservation=conservation.dat,
    #end if
    #if $quantile:
        quantile=quantile.dat,
    #end if
    #if int($window) > 0:
        window=$window,
    #end if
    #if int($increment) > 0:
        increment=$increment,
    #end if
    processors='\${GALAXY_SLOTS:-8}'
)'
| sed 's/ //g'  ## mothur trips over whitespace
| mothur
| tee mothur.out.log
    ]]></command>
    <inputs>
        <param argument="fasta" type="data" format="fasta" label="fasta - Candiate Sequences"/>
        <conditional name="alignment">
            <param name="source" type="select" label="Select Reference Template from">
                <option value="hist">History</option>
                <option value="ref">Cached Reference</option>
            </param>
            <when value="ref">
                <param argument="reference" type="select" label="reference - Select an alignment database">
                    <options from_data_table="mothur_aligndb"/>
                </param>
            </when>
            <when value="hist">
                <param argument="reference" type="data" format="fasta" label="reference - Reference to align with"/>
            </when>
        </conditional>
        <param argument="filter" type="boolean" falsevalue="false" truevalue="true" checked="false" label="filter - Apply a 50% soft vertical filter"/>
        <conditional name="mask_cond">
            <param name="source2" type="select" label="Mask option">
                <option value="">None</option>
                <option value="default">default ecoli mask</option>
                <option value="history">From your history</option>
            </param>
            <when value="history">
                <param argument="mask" type="data" format="mothur.filter" label="Mask for the template file"/>
            </when>
            <when value=""/>
            <when value="default"/>
        </conditional>
        <param argument="conservation" type="data" format="mothur.freq" optional="true" label="conservation - Template Conserved frequencies"
            help="a file containing the frequency information for your template file to increase speed. Mothur will generate this for you but it takes a long time."/>
        <param argument="quantile" type="data" format="mothur.quan" optional="true" label="quantile - Template quantile information"
            help=" file containing the quantiles information for your template file to increase speed. Mothur can generate this for you but it takes a VERY long time.
            Note that when you use the filter, mask or mask and filter you need to select the appropriate quantile file. The filter parameter makes the quantile file
            generated specific to the query set you are analyzing."/>
        <param argument="window" type="integer" value="300" label="window - Length of sequence you want in each window analyzed (uses default if &lt; 1)"
            help="Default is set to 300. Note, changing the window size will require new quantile files to be made."/>
        <param argument="increment" type="integer" value="25" label="increment - Increment for window slide on each iteration (uses default if &lt; 1)"
            help="Default is 25. Note, changing the increment will require new quantile files to be made."/>
        <expand macro="param-savelog"/>
    </inputs>
    <outputs>
        <expand macro="logfile-output"/>
        <data name="pintail.chimeras" format="txt" from_work_dir="fasta.*pintail.chimeras" label="${tool.name} on ${on_string}: pintail.chimeras"/>
        <data name="out_accnos" format="mothur.accnos" from_work_dir="fasta.*pintail.accnos" label="${tool.name} on ${on_string}: pintail.accnos"/>
        <data name="out_freq" format="mothur.freq" from_work_dir="alignment.reference.freq" label="${tool.name} on ${on_string}: pintail.freq">
            <filter>conservation == None</filter>
        </data>
        <data name="out_quantile" format="mothur.quan" from_work_dir="*.quan" label="${tool.name} on ${on_string}: pintail.quan">
            <filter>quantile == None</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="fasta" value="Mock_S280_L001_R1_001_small.trim.contigs.good.align_head"/>
            <param name="source" value="hist"/>
            <param name="reference" value="HMP_MOCK.v35.align"/>
            <param name="quantile" value="HMP_MOCK.v35.pintail.quan"/>
            <param name="window" value="200"/>
            <output name="pintail.chimeras" md5="644fe23ee459a76de7b225e530361865" ftype="txt"/>
            <output name="out_accnos" file="Mock_S280_L001_R1_001_small.trim.contigs.good.pintail.accnos" ftype="mothur.accnos"/>
            <param name="savelog" value="true"/>
            <expand macro="logfile-test"/>
        </test>
        <test>
            <param name="fasta" value="Mock_S280_L001_R1_001_small.trim.contigs.good.align_head"/>
            <param name="source" value="hist"/>
            <param name="reference" value="HMP_MOCK.v35.align"/>
            <param name="source2" value="default"/>
            <param name="window" value="200"/>
            <output name="pintail.chimeras" ftype="txt">
                <assert_contents>
                    <has_text text="Observed"/>
                    <has_text text="Expected"/>
                </assert_contents>
            </output>
            <output name="out_quantile" md5="74372024214704010aed4f9c8258f77b" ftype="mothur.quan"/>
            <output name="out_freq" md5="2d2c4066c5de06c8d1d78ee3784daa3d" ftype="mothur.freq"/>
            <param name="savelog" value="true"/>
            <expand macro="logfile-test"/>
        </test>
    </tests>
    <help><![CDATA[

@MOTHUR_OVERVIEW@

**Command Documentation**

The chimera.pintail_ command identifies putative chimeras using the pintail approach.  It looks at the variation between the expected differences and the observed differences in the query sequence over several windows.

This method was written using the algorithms described in the paper_ "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.

The Pintail algorithm is a technique for determining whether a 16S rDNA sequence is anomalous.  It is based on the idea that the extent of local base differences between two aligned 16S rDNA sequences should be roughly the same along the length of the alignment (having allowed for the underlying pattern of hypervariable and conserved regions known to exist within the 16S rRNA gene).  In other words, evolutionary distance between two reliable sequences should be constant along the length of the gene.

In contrast, if an error-free sequence is compared with an anomalous sequence, evolutionary distance along the alignment is unlikely to be constant, especially if the anomaly in question is a chimera and formed from phylogenetically different parental sequences.

The Pintail algorithm is designed to detect and quantify such local variations and in doing so generates the Deviation from Expectation (DE) statistic.  The higher the DE value, the greater the likelihood that the query is anomalous.

The algorithm works as follows

The sequence to be checked (the query) is first globally aligned with a phylogenetically similar sequence known to be error-free (the subject).  At regular intervals along the resulting alignment, the local evolutionary distance between query and subject is estimated by recording percentage base mismatches within a sampling window of fixed length.  The resulting array of percentages (observed percentage differences) reflects variations in evolutionary distance between the query and subject along the length of the 16S rRNA gene.  Subtracting observed percentage differences from an equivalent array of expected percentage differences (predicted values for error-free sequences), we obtain a set of deviations, the standard deviation of which (Deviation from Expectation, DE) summarises the variation between observed and expected datasets.  The greater the DE value, the greater the disparity there is between observed and expected percentage differences, and the more likely it is that the query sequence is anomalous.


.. _paper: http://www.ncbi.nlm.nih.gov/pubmed/16332745
.. _chimera.pintail: https://www.mothur.org/wiki/Chimera.pintail

    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1128/AEM.71.12.7724-7736.2005</citation>
        <citation type="doi">10.1128/AEM.00556-06</citation>
    </expand>
</tool>