view filter_alignment.xml @ 0:8435f8fa0837 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit c9bf747b23b4a9d6adc20c7740b9247c22654862
author iuc
date Thu, 18 May 2017 09:40:03 -0400
parents
children ffcb719ff53c
line wrap: on
line source

<tool id="qiime_filter_alignment" name="Filter sequence alignment " version="@WRAPPER_VERSION@.0">
    <description>by removing highly variable regions</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="aggressive"><![CDATA[
        filter_alignment.py 
            --input_fasta_file '$input_fasta_file'
            -o output
            $suppress_lane_mask_filter
            --allowed_gap_frac '$allowed_gap_frac'
            $remove_outliers
            --threshold '$threshold'
            #if str($entropy_threshold) != ''
                --entropy_threshold '$entropy_threshold'
            #end if
    ]]></command>
    <inputs>
        <param argument="--input_fasta_file" type="data" format="fasta" label="Input fasta file containing the alignment" />
        <param argument="--suppress_lane_mask_filter" type="boolean" truevalue="--suppress_lane_mask_filter" falsevalue="" checked="False" label="Suppress lane mask filtering?"/>
        <param argument="--allowed_gap_frac" type="float" value="0.999999" label="Gap filter threshold" help="Filters positions which are gaps in &gt; allowed_gap_frac of the sequences"/>
        <param argument="--remove_outliers" type="boolean" truevalue="--remove_outliers" falsevalue="" checked="False" label="Remove seqs very dissimilar to the alignment consensus?"/>
        <param argument="--threshold" type="float" value="3.0" label="Threshold to remove sequences based on dissimilarity" help="remove seqs whose dissimilarity to the consensus sequence is approximately &gt; x standard deviations above the mean of the sequences"/>
        <param argument="--entropy_threshold" type="float" min="0" max="1" label="Percent threshold for removing base positions with the highest entropy" help="For example, if 0.10 were specified, the top 10% most entropic base positions would be filtered.  If this value is used, any lane mask supplied will be ignored. Entropy filtering occurs after gap filtering" optional="True" />
    </inputs>
    <outputs>
        <data name="filtered_sequences" format="fasta" from_work_dir="output/*pfiltered.fasta" label="${tool.name} on ${on_string}: Filtered sequences"/>
    </outputs>
    <tests>
        <test>
            <param name="input_fasta_file" value="filter_alignment/alignment.fasta"/>
            <param name="suppress_lane_mask_filter" value=""/>
            <param name="allowed_gap_frac" value="0.999999"/>
            <param name="remove_outliers" value=""/>
            <param name="threshold" value="3.0"/>
            <output name="filtered_sequences" md5="1a5977fcac7cb85b23f3464a19763489"/>
        </test>
        <test>
            <param name="input_fasta_file" value="filter_alignment/alignment.fasta"/>
            <param name="suppress_lane_mask_filter" value="--suppress_lane_mask_filter"/>
            <param name="allowed_gap_frac" value="0.999999"/>
            <param name="remove_outliers" value="--remove_outliers"/>
            <param name="threshold" value="3.0" />
            <output name="filtered_sequences" md5="e9e63deb4cc99c0ec0651b20528f4149"/>
        </test>
        <test>
            <param name="input_fasta_file" value="filter_alignment/alignment.fasta"/>
            <param name="suppress_lane_mask_filter" value=""/>
            <param name="allowed_gap_frac" value="0.999999"/>
            <param name="remove_outliers" value=""/>
            <param name="threshold" value="3.0" />
            <param name="entropy_threshold" value="0.1"/>
            <output name="filtered_sequences" md5="a297535302ef8fa7de4d831187b16736"/>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This script should be applied to generate a useful tree when aligning against a
template alignment (e.g., with PyNAST). This script will remove positions which
are gaps in every sequence (common for PyNAST, as typical sequences cover only
200-400 bases, and they are being aligned against the full 16S gene). Additionally,
the user can supply a lanemask file, that defines which positions should included
when building the tree, and which should be ignored. Typically, this will differentiate
between non-conserved positions, which are uninformative for tree building, and
conserved positions which are informative for tree building. FILTERING ALIGNMENTS
WHICH WERE BUILT WITH PYNAST AGAINST THE GREENGENES CORE SET ALIGNMENT SHOULD BE
CONSIDERED AN ESSENTIAL STEP.
    ]]></help>
    <citations>
        <expand macro="citations"/>
    </citations>
</tool>