Mercurial > repos > iuc > qiime_filter_alignment

<tool id="qiime_filter_alignment" name="Filter sequence alignment" version="@WRAPPER_VERSION@.0">
    <description> by removing highly variable regions (filter_alignment)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="aggressive"><![CDATA[
filter_alignment.py
    --input_fasta_file '$input_fasta_file'
    -o output
    $suppress_lane_mask_filter
    --allowed_gap_frac '$allowed_gap_frac'
    $remove_outliers
    --threshold '$threshold'
    #if str($entropy_threshold) != ''
        --entropy_threshold '$entropy_threshold'
    #end if
    ]]></command>
    <inputs>
        <param argument="--input_fasta_file" type="data" format="fasta" label="Input fasta file containing the alignment" />
        <param argument="--suppress_lane_mask_filter" type="boolean" truevalue="--suppress_lane_mask_filter" falsevalue="" checked="false" label="Suppress lane mask filtering?"/>
        <param argument="--allowed_gap_frac" type="float" value="0.999999" label="Gap filter threshold" help="Filters positions which are gaps in &gt; allowed_gap_frac of the sequences"/>
        <param argument="--remove_outliers" type="boolean" truevalue="--remove_outliers" falsevalue="" checked="false" label="Remove seqs very dissimilar to the alignment consensus?"/>
        <param argument="--threshold" type="float" value="3.0" label="Threshold to remove sequences based on dissimilarity" help="remove seqs whose dissimilarity to the consensus sequence is approximately &gt; x standard deviations above the mean of the sequences"/>
        <param argument="--entropy_threshold" type="float" min="0" max="1" label="Percent threshold for removing base positions with the highest entropy" help="For example, if 0.10 were specified, the top 10% most entropic base positions would be filtered. If this value is used, any lane mask supplied will be ignored. Entropy filtering occurs after gap filtering" optional="true" />
    </inputs>
    <outputs>
        <data name="filtered_sequences" format="fasta" from_work_dir="output/*pfiltered.fasta" label="${tool.name} on ${on_string}: Filtered sequences"/>
    </outputs>
    <tests>
        <test>
            <param name="input_fasta_file" value="filter_alignment/alignment.fasta"/>
            <param name="suppress_lane_mask_filter" value=""/>
            <param name="allowed_gap_frac" value="0.999999"/>
            <param name="remove_outliers" value=""/>
            <param name="threshold" value="3.0"/>
            <output name="filtered_sequences" md5="1a5977fcac7cb85b23f3464a19763489"/>
        </test>
        <test>
            <param name="input_fasta_file" value="filter_alignment/alignment.fasta"/>
            <param name="suppress_lane_mask_filter" value="--suppress_lane_mask_filter"/>
            <param name="allowed_gap_frac" value="0.999999"/>
            <param name="remove_outliers" value="--remove_outliers"/>
            <param name="threshold" value="3.0" />
            <output name="filtered_sequences" md5="e9e63deb4cc99c0ec0651b20528f4149"/>
        </test>
        <test>
            <param name="input_fasta_file" value="filter_alignment/alignment.fasta"/>
            <param name="suppress_lane_mask_filter" value=""/>
            <param name="allowed_gap_frac" value="0.999999"/>
            <param name="remove_outliers" value=""/>
            <param name="threshold" value="3.0" />
            <param name="entropy_threshold" value="0.1"/>
            <output name="filtered_sequences" md5="a297535302ef8fa7de4d831187b16736"/>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This script should be applied to generate a useful tree when aligning against a
template alignment (e.g., with PyNAST). This script will remove positions which
are gaps in every sequence (common for PyNAST, as typical sequences cover only
200-400 bases, and they are being aligned against the full 16S gene). Additionally,
the user can supply a lanemask file, that defines which positions should included
when building the tree, and which should be ignored. Typically, this will differentiate
between non-conserved positions, which are uninformative for tree building, and
conserved positions which are informative for tree building. FILTERING ALIGNMENTS
WHICH WERE BUILT WITH PYNAST AGAINST THE GREENGENES CORE SET ALIGNMENT SHOULD BE
CONSIDERED AN ESSENTIAL STEP.
    ]]></help>
    <citations>
        <expand macro="citations"/>
    </citations>
</tool>
author	iuc
date	Sat, 05 Aug 2017 07:21:55 -0400
parents	8435f8fa0837
children	ca44cfeacbe9