Mercurial > repos > iuc > qiime_assign_taxonomy

<tool id="qiime_assign_taxonomy" name="Assign taxonomy" version="@WRAPPER_VERSION@.0">
    <description>to each sequence</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="2.0.2">rdptools</requirement>
        <requirement type="package" version="2.2.22">blast-legacy</requirement>
        <requirement type="package" version="2.3.4">vsearch</requirement>
        <requirement type="package" version="1.36.1">mothur</requirement>
    </expand>
    <version_command>assign_taxonomy.py --version</version_command>
    <command detect_errors="aggressive"><![CDATA[
        assign_taxonomy.py
            --input_fasta_fp '$input_fasta_fp'
            #if $id_to_taxonomy_fp
                --id_to_taxonomy_fp '$id_to_taxonomy_fp'
            #end if
            --assignment_method '$methodcond.assignment_method'
            #if $methodcond.assignment_method == "uclust"
                --min_consensus_fraction '$methodcond.min_consensus_fraction'
                --similarity '$methodcond.similarity'
                --uclust_max_accepts '$methodcond.uclust_max_accepts'
            #else if $methodcond.assignment_method == "rdp"
                #if $methodcond.reference_seqs_fp
                    --reference_seqs_fp '$methodcond.reference_seqs_fp'
                #end if
                --confidence '$methodcond.confidence'
            #else if $methodcond.assignment_method == "blast"
                --reference_seqs_fp '$methodcond.reference_seqs_fp'
                --blast_e_value '$methodcond.blast_e_value'
            #else if $methodcond.assignment_method == "rtax"
                --read_1_seqs_fp '$methodcond.read_1_seqs_fp'
                --read_2_seqs_fp '$methodcond.read_2_seqs_fp'
                $methodcond.single_ok
                $methodcond.no_single_ok_generic
                --read_id_regex '$methodcond.read_id_regex'
                --amplicon_id_regex '$methodcond.amplicon_id_regex'
                --header_id_rege '$methodcond.header_id_regex'
            #else if $methodcond.assignment_method == "mothur"
                --confidence '$methodcond.confidence'
            #else if $methodcond.assignment_method == "sortmerna"
                --sortmerna_threads \${GALAXY_SLOTS:-1}
                #if $methodcond.sortmerna_db
                    --sortmerna_db '$methodcond.sortmerna_db'
                #end if
                --min_consensus_fraction '$methodcond.min_consensus_fraction'
                --similarity '$methodcond.similarity'
                --sortmerna_e_value '$methodcond.sortmerna_e_value'
                --sortmerna_coverage '$methodcond.sortmerna_coverage'
                --sortmerna_best_N_alignments '$methodcond.sortmerna_best_N_alignments'
            #end if
            -o assign_taxonomy
    ]]></command>
    <inputs>
        <param argument="--input_fasta_fp" type="data" format="fasta" label="Input fasta file" />
        <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy"  type="data" format="tabular" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep" optional="True"/>
        <conditional name="methodcond">
            <param argument="--assignment_method" label="Taxon assignment method" type="select">
                <option selected="True" value="uclust">uclust</option>
                <!--<option value="rdp">rdp</option>
                <option value="blast">blast</option>
                <option value="rtax">rtax</option>
                <option value="mothur">mothur</option>
                <option value="sortmerna">sortmerna</option>-->
            </param>
            <when value="uclust">
                <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
                <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/>
                <param argument="uclust_max_accepts" type="integer" value="3" label="Number of database hits to consider when making an assignment"/>
            </when>
            <when value="rdp">
                <param argument="--reference_seqs_fp" label="Reference sequences used as training sequences for the classifier" type="data" format="fasta" optional="True"/>
                <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
            </when>
            <when value="blast">
                <param argument="--reference_seqs_fp" label="Reference sequences used to generate a blast database" type="data" format="fasta" optional="True"/>
                <param argument="--blast_e_value" type="float" value="0.001" label="Maximum e-value to record an assignment"/>
            </when>
            <when value="rtax">
                <param argument="--read_1_seqs_fp" type="data" format="fasta" label="First reads from paired-end sequencing, prior to OTU clustering" help="This file is the result of split_illumina_fastq"/>
                <param argument="--read_2_seqs_fp" type="data" format="fasta" label="Second reads from paired-end sequencing, prior to OTU clustering" help="This file is the result of split_illumina_fastq"/>
                <param argument="--single_ok" type="boolean" truevalue="--single_ok" falsevalue="" checked="false" label="Allow fallback to single-ended classification when the mate pair is lacking?"/>
                <param argument="--no_single_ok_generic" type="boolean" truevalue="--no_single_ok_generic" falsevalue="" checked="false" label="Allow fallback to single-ended classification when the mate pair is overly generic?"/>
                <param argument="--read_id_regex" type="text" value="\S+\s+(\S+)" label="Regex used to parse the result of OTU clustering, to get the read_1_id for each clusterID" help="The clusterID itself is assumed to be the first field, and is not captured by the regex"/>
                <param argument="--amplicon_id_regex" type="text" value="(\S+)\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the ampliconID for each read_1_id" help="Two groups capture read_1_id and ampliconID,  respectively."/>
                <param argument="--header_id_regex" type="text" value="\S+\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the portion of the header that RTAX uses to match mate pairs" help="The default uses the amplicon ID, not including /1 or /3, as the primary key for the query sequences. Typically this regex will be the same as amplicon_id_regex, except that only the second group is captured" />
            </when>
            <when value="mothur">
                <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
            </when>
            <when value="sortmerna">
                <param argument="--sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/>
                <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
                <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/>
                <param argument="--sortmerna_e_value" type="float" value="1.0" label="Maximum E-value when clustering"/>
                <param argument="--sortmerna_coverage" type="float" value="0.9" label="Mininum percent query coverage (of an alignment) to consider a hit, expressed as a fraction between 0 and 1"/>
                <param argument="--sortmerna_best_N_alignments" type="integer" value="5" label="Number best alignments per read to be written"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log"/>
        <data name="tax_assignments" format="txt" from_work_dir="assign_taxonomy/*.txt" label="${tool.name} on ${on_string}: Taxonomic assignment"/>
        <data name="sortmerna_map" format="tabular" from_work_dir="assign_taxonomy/sortmerna_map.blast" label="${tool.name} on ${on_string}: SortMeRNA Blast">
            <filter>methodcond['assignment_method']=="sortmerna"</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input_fasta_fp" value="assign_taxonomy/uclust_input_seqs.fasta"/>
            <param name="assignment_method" value="uclust"/>
            <param name="min_consensus_fraction" value="0.51"/>
            <param name="similarity" value="0.9"/>
            <param name="uclust_max_accepts" value="3" />
            <output name="tax_assignments" value="assign_taxonomy/uclust_taxonomic_assignation.txt"/>
            <output name="log">
                <assert_contents>
                    <has_text text="UclustConsensusTaxonAssigner" />
                    <has_text text="2751331" />
                </assert_contents>
            </output>
        </test>
        <!--<test>
            <param name="input_fasta_fp" value="assign_taxonomy/mothur_ref_seq_set.fna"/>
            <param name="assignment_method" value="sortmerna"/>
            <param name="min_consensus_fraction" value="0.51" />
            <param name="similarity" value="0.9" />
            <param name="sortmerna_e_value" value="1.0" />
            <param name="sortmerna_coverage" value="0.9" />
            <param name="sortmerna_best_N_alignments" value="5" />
            <output name="log">
                <assert_contents>
                    <has_text text="Application:SortMeRNA" />
                    <has_text text="min_consensus_fraction" />
                </assert_contents>
            </output>
            <output name="tax_assignments" value="assign_taxonomy/sortmerna_taxonomic_assignation.txt"/>
            <output name="sortmerna_map" value="assign_taxonomy/sortmerna_map.blast"/>
        </test>-->
    </tests>
    <help><![CDATA[
**What it does**

Contains code for assigning taxonomy, using several techniques.

Given a set of sequences, the program attempts to assign the taxonomy of each sequence.

Currently the methods implemented are assignment with BLAST, the RDP classifier, RTAX, mothur, and uclust. The output of this step is an observation metadata mapping file of input sequence identifiers (1st column of output file) to taxonomy (2nd column) and quality score (3rd column). There may be method-specific information in subsequent columns.

Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in the Greengenes reference OTU builds.

The consensus taxonomy assignment implemented here is the most detailed lineage description shared by 90% or more of the sequences within the OTU (this level of agreement can be adjusted by the user). The full lineage information for each sequence is one of the output files of the analysis. In addition, a conflict file
records cases in which a phylum-level taxonomy assignment disagreement exists within an OTU (such instances are rare and can reflect sequence misclassification within the greengenes database).
    ]]></help>
    <citations>
        <expand macro="citations"/>
    </citations>
</tool>
author	iuc
date	Thu, 18 May 2017 09:30:34 -0400
parents
children	ec3c4654eacc