Mercurial > repos > iuc > mothur_filter_seqs

<tool profile="16.07" id="mothur_filter_seqs" name="Filter.seqs" version="@WRAPPER_VERSION@.0">
    <description>removes columns from alignments</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command><![CDATA[
@SHELL_OPTIONS@
#import re

## create symlinks to input datasets
ln -s '$fasta' fasta.dat &&
#for $i in $inputs:
    ln -s '$i.fasta' fasta${inputs.index($i)}.dat &&
#end for
ln -s '$hard' hard.dat &&

echo 'filter.seqs(
    fasta=fasta.dat#for $i in $inputs#-fasta${inputs.index($i)}.dat#end for#,
    vertical=$vertical,
    #if $trump:
        trump=$trump,
    #end if
    soft=$soft,
    #if $hard:
        hard=hard.dat,
    #end if
    processors='\${GALAXY_SLOTS:-8}'
)'
| sed 's/ //g'  ## mothur trips over whitespace
| mothur
| tee mothur.out.log &&

## rename collection files for more transparent element naming
#set $identifier=re.sub('[^\w\-\s\.]', '_', str($fasta.element_identifier))
mv fasta.filter.fasta '${identifier}.filter.fasta'
#for i in $inputs:
    #set $identifier=re.sub('[^\w\-\s]', '_', str($i.fasta.element_identifier))
    && mv fasta${inputs.index($i)}.filter.fasta '${identifier}.filter.fasta'
#end for
    ]]></command>
    <inputs>
        <param name="fasta" type="data" format="mothur.align" label="fasta - Alignment Fasta"/>
        <repeat name="inputs" title="Additional Alignment File">
            <param name="fasta" type="data" format="mothur.align" label="fasta - Alignment Fasta"/>
        </repeat>
        <param name="vertical" type="boolean" checked="true" truevalue="true" falsevalue="false" label="vertical - Vertical column"
            help="Ignore any column that only contains gap characters (i.e. '-' or '.')"/>
        <param name="trump" type="select" label="trump - Trump character"
            help="Remove a column if the trump character is found at that position in any sequence of the alignment">
            <option value="">Off</option>
            <option value=".">.</option>
            <option value="-">-</option>
            <option value="N">N</option>
        </param>
        <param name="soft" type="integer" value="0" min="0" max="100" label="soft - percentage required to retain column. (0-100)"
            help="Removes any column where the dominant base (i.e. A, T, G, C, or U) does not occur in at least a designated percentage of sequences"/>
        <param name="hard" type="data" format="mothur.filter" optional="True" label="hard - Hard Column Filter"
            help="A file should only contain one line consisting of 0's and 1's"/>
        <expand macro="param-savelog"/>
    </inputs>
    <outputs>
        <expand macro="logfile-output"/>
        <data name="out_filter" format="mothur.filter" from_work_dir="fasta*.filter" label="${tool.name} on ${on_string}: filter"/>
        <collection name="filteredfastas" type="list" label="${tool.name} on ${on_string}: filtered fastas">
            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.filter\.fasta" format="fasta"/>
            <filter>inputs</filter> <!-- only output collection if multiple outputs-->
        </collection>
        <data name="filteredfasta" format="fasta" from_work_dir="*.filter.fasta" label="${tool.name} on ${on_string}: filtered fasta">
            <filter>not inputs</filter>
        </data>
    </outputs>
    <tests>
        <test><!-- test with multiple inputs and collection output -->
            <param name="fasta" value="HMP_MOCK.v35.align" ftype="mothur.align"/>
            <repeat name="inputs">
                <param name="fasta" value="Mock_S280_L001_R1_001_small.trim.contigs.good.align_head" ftype="mothur.align"/>
            </repeat>
            <output name="out_filter" md5="3e6c2cfef46baf35d2a8b5cafe53e3a4"/>
            <output_collection name="filteredfastas" count="2">
                <element name="HMP_MOCK.v35.align" md5="ef4c6c2d9a882f7a22e5fa3c814af7cc" ftype="fasta"/>
            </output_collection>
            <param name="savelog" value="true"/>
            <expand macro="logfile-test"/>
        </test>
        <test><!-- test with single input and non-collection output -->
            <param name="fasta" value="HMP_MOCK.v35.align" ftype="mothur.align"/>
            <output name="out_filter" md5="3e6c2cfef46baf35d2a8b5cafe53e3a4"/>
            <output name="filteredfasta" md5="ef4c6c2d9a882f7a22e5fa3c814af7cc"/>
            <param name="savelog" value="true"/>
            <expand macro="logfile-test"/>
        </test>
    </tests>
    <help><![CDATA[

@MOTHUR_OVERVIEW@

**Command Documentation**

The filter.seqs_ command removes columns from alignments based on a criteria defined by the user. For example, alignments
generated against reference alignments (e.g. from RDP, SILVA, or greengenes) often have columns where every character is
either a '.' or a '-'. These columns are not included in calculating distances because they have no information in them.
By removing these columns, the calculation of a large number of distances is accelerated. Also, people also like to mask
their sequences to remove variable regions using a soft or hard mask (e.g. Lane's mask). This type of masking is only
encouraged for deep-level phylogenetic analysis, not fine level analysis such as that needed with calculating OTUs.

.. _filter.seqs: https://www.mothur.org/wiki/Filter.seqs

v.1.20.0: Updated to Mothur 1.33

    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Fri, 06 Nov 2020 00:02:41 +0000
parents	e71c69acde3f
children