Mercurial > repos > iuc > qiime_assign_taxonomy

<tool id="qiime_assign_taxonomy" name="Assign taxonomy" version="@WRAPPER_VERSION@.0" profile="@PROFILE@">
    <description> to each sequence (assign_taxonomy)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <!--<requirement type="package" version="2.0.2">rdptools</requirement>-->
        <requirement type="package" version="2.2.22">blast-legacy</requirement>
        <requirement type="package" version="2.3.4">vsearch</requirement>
        <requirement type="package" version="1.36.1">mothur</requirement>
    </expand>
    <version_command>assign_taxonomy.py --version</version_command>
    <command detect_errors="aggressive"><![CDATA[
assign_taxonomy.py
    --input_fasta_fp '$input_fasta_fp'
    #if $id_to_taxonomy_condition.source_selector == 'history'
        --id_to_taxonomy_fp '$id_to_taxonomy_condition.id_to_taxonomy_fp'
    #else if $id_to_taxonomy_condition.source_selector == 'cached'
        --id_to_taxonomy_fp '$id_to_taxonomy_condition.id_to_taxonomy_fp.fields.path'
    #end if
    --assignment_method '$methodcond.assignment_method'
    #if $methodcond.assignment_method == "uclust"
        --min_consensus_fraction '$methodcond.min_consensus_fraction'
        --similarity '$methodcond.similarity'
        --uclust_max_accepts '$methodcond.uclust_max_accepts'
    #else if $methodcond.assignment_method == "rdp"
        #if $methodcond.references.source_selector == 'history'
            --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
        #else if $methodcond.references.source_selector == 'cached'
            --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
        #end if
        --confidence '$methodcond.confidence'
    #else if $methodcond.assignment_method == "blast"
        #if $methodcond.references.source_selector == 'history'
            --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
        #else if $methodcond.references.source_selector == 'cached'
            --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
        #end if
        --blast_e_value '$methodcond.blast_e_value'
    #else if $methodcond.assignment_method == "rtax"
        --read_1_seqs_fp '$methodcond.read_1_seqs_fp'
        --read_2_seqs_fp '$methodcond.read_2_seqs_fp'
        $methodcond.single_ok
        $methodcond.no_single_ok_generic
        --read_id_regex '$methodcond.read_id_regex'
        --amplicon_id_regex '$methodcond.amplicon_id_regex'
        --header_id_rege '$methodcond.header_id_regex'
    #else if $methodcond.assignment_method == "mothur"
        #if $methodcond.references.source_selector == 'history'
            --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
        #else if $methodcond.references.source_selector == 'cached'
            --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
        #end if
        --confidence '$methodcond.confidence'
    #else if $methodcond.assignment_method == "sortmerna"
        --sortmerna_threads \${GALAXY_SLOTS:-1}
        #if $methodcond.sortmerna_db
            --sortmerna_db '$methodcond.sortmerna_db'
        #end if
        --min_consensus_fraction '$methodcond.min_consensus_fraction'
        --similarity '$methodcond.similarity'
        --sortmerna_e_value '$methodcond.sortmerna_e_value'
        --sortmerna_coverage '$methodcond.sortmerna_coverage'
        --sortmerna_best_N_alignments '$methodcond.sortmerna_best_N_alignments'
    #end if
    -o assign_taxonomy
    ]]></command>
    <inputs>
        <param argument="--input_fasta_fp" type="data" format="fasta" label="Input fasta file" />
        <conditional name="id_to_taxonomy_condition">
            <param name="source_selector" type="select" label="Do you want to use a taxonomy reference ?">
                <option value="cached">Yes (from the local cache)</option>
                <option value="history">Yes (from the active history)</option>
                <option value="void" selected="true">No</option>
            </param>
            <when value="cached">
                <param argument="--id_to_taxonomy_fp" type="select" label="Tab-delimited file mapping sequences to assigned taxonomy" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep">
                    <options from_data_table="qiime_taxonomy"/>
                </param>
            </when>
            <when value="history">
                <param argument="--id_to_taxonomy_fp" type="data" format="tabular" label="Tab-delimited file mapping sequences to assigned taxonomy" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep"/>
            </when>
            <when value="void"/>
        </conditional>
        <conditional name="methodcond">
            <param argument="--assignment_method" label="Taxon assignment method" type="select">
                <option selected="True" value="uclust">uclust</option>
                <!--<option value="rdp">rdp</option>-->
                <option value="blast">blast</option>
                <!--<option value="rtax">rtax</option>-->
                <option value="mothur">mothur</option>
                <option value="sortmerna">sortmerna</option>
            </param>
            <when value="uclust">
                <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
                <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/>
                <param argument="uclust_max_accepts" type="integer" value="3" label="Number of database hits to consider when making an assignment"/>
            </when>
            <when value="rdp">
                <expand macro="assign_taxonomy_reference_source"/>
                <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
            </when>
            <when value="blast">
                <expand macro="assign_taxonomy_reference_source"/>
                <param argument="--blast_e_value" type="float" value="0.001" label="Maximum e-value to record an assignment"/>
            </when>
            <when value="rtax">
                <param argument="--read_1_seqs_fp" type="data" format="fasta" label="First reads from paired-end sequencing, prior to OTU clustering" help="This file is the result of split_illumina_fastq"/>
                <param argument="--read_2_seqs_fp" type="data" format="fasta" label="Second reads from paired-end sequencing, prior to OTU clustering" help="This file is the result of split_illumina_fastq"/>
                <param argument="--single_ok" type="boolean" truevalue="--single_ok" falsevalue="" checked="false" label="Allow fallback to single-ended classification when the mate pair is lacking?"/>
                <param argument="--no_single_ok_generic" type="boolean" truevalue="--no_single_ok_generic" falsevalue="" checked="false" label="Allow fallback to single-ended classification when the mate pair is overly generic?"/>
                <param argument="--read_id_regex" type="text" value="\S+\s+(\S+)" label="Regex used to parse the result of OTU clustering, to get the read_1_id for each clusterID" help="The clusterID itself is assumed to be the first field, and is not captured by the regex"/>
                <param argument="--amplicon_id_regex" type="text" value="(\S+)\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the ampliconID for each read_1_id" help="Two groups capture read_1_id and ampliconID,  respectively."/>
                <param argument="--header_id_regex" type="text" value="\S+\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the portion of the header that RTAX uses to match mate pairs" help="The default uses the amplicon ID, not including /1 or /3, as the primary key for the query sequences. Typically this regex will be the same as amplicon_id_regex, except that only the second group is captured" />
            </when>
            <when value="mothur">
                <expand macro="assign_taxonomy_reference_source"/>
                <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
            </when>
            <when value="sortmerna">
                <!--<param argument="- -sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/>-->
                <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
                <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/>
                <param argument="--sortmerna_e_value" type="float" value="1.0" label="Maximum E-value when clustering"/>
                <param argument="--sortmerna_coverage" type="float" value="0.9" label="Mininum percent query coverage (of an alignment) to consider a hit, expressed as a fraction between 0 and 1"/>
                <param argument="--sortmerna_best_N_alignments" type="integer" value="5" label="Number best alignments per read to be written"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log">
            <filter>methodcond['assignment_method']!="mothur"</filter>
        </data>
        <data name="tax_assignments" format="txt" from_work_dir="assign_taxonomy/*.txt" label="${tool.name} on ${on_string}: Taxonomic assignment"/>
        <data name="sortmerna_map" format="tabular" from_work_dir="assign_taxonomy/sortmerna_map.blast" label="${tool.name} on ${on_string}: SortMeRNA Blast">
            <filter>methodcond['assignment_method']=="sortmerna"</filter>
        </data>
    </outputs>
    <tests>
        <!-- Uclust assignment method -->
        <test>
            <param name="input_fasta_fp" value="assign_taxonomy/uclust_input_seqs.fasta"/>
            <conditional name="id_to_taxonomy_condition">
                <param name="source_selector" value="void" />
            </conditional>
            <conditional name="methodcond">
                <param name="assignment_method" value="uclust"/>
                <param name="min_consensus_fraction" value="0.51"/>
                <param name="similarity" value="0.9"/>
                <param name="uclust_max_accepts" value="3" />
            </conditional>
            <output name="tax_assignments" md5="57b0cf51fc0142f369134ea923d78d99"/>
            <output name="log">
                <assert_contents>
                    <has_text text="UclustConsensusTaxonAssigner" />
                    <has_text text="2751331" />
                </assert_contents>
            </output>
        </test>
        <!-- Mothur assignment method -->
        <!-- Note: there is variability in the assignment results with this method so the md5 checksum comparison is not possible -->
        <test>
            <param name="input_fasta_fp" value="assign_taxonomy/mothur_repr_set_seqs.fasta"/>
            <conditional name="id_to_taxonomy_condition">
                <param name="source_selector" value="history" />
                <param name="id_to_taxonomy_fp" value="assign_taxonomy/mothur_id_to_taxonomy.txt"/>
            </conditional>
            <conditional name="methodcond">
                <param name="assignment_method" value="mothur"/>
                <conditional name="references">
                    <param name="source_selector" value="history" />
                    <param name="reference_seqs_fp" value="assign_taxonomy/mothur_ref_seq_set.fna" />
                </conditional>
                <param name="confidence" value="0.5"/>
            </conditional>
            <output name="tax_assignments">
                <assert_contents>
                    <has_text text="X67228" />
                    <has_text text="Rhizobium" />
                    <has_text text="EF503697" />
                </assert_contents>
            </output>
        </test>
        <!-- Blast assignment method -->
        <test>
            <param name="input_fasta_fp" value="assign_taxonomy/mothur_repr_set_seqs.fasta"/>
            <conditional name="id_to_taxonomy_condition">
                <param name="source_selector" value="history" />
                <param name="id_to_taxonomy_fp" value="assign_taxonomy/mothur_id_to_taxonomy.txt"/>
            </conditional>
            <conditional name="methodcond">
                <param name="assignment_method" value="blast"/>
                <conditional name="references">
                    <param name="source_selector" value="history" />
                    <param name="reference_seqs_fp" value="assign_taxonomy/mothur_ref_seq_set.fna" />
                </conditional>
                <param name="blast_e_value" value="0.001"/>
            </conditional>
            <output name="tax_assignments" md5="5ab8d28f67bcbf828937d222b2ab9c6e"/>
            <output name="log">
                <assert_contents>
                    <has_text text="BlastTaxonAssigner" />
                    <has_text text="inspected: 2" />
                </assert_contents>
            </output>
        </test>
        <!-- SortMeRNA assignment method -->
        <!-- Note: The input file has been reduced to only 1 sequence but this test is still quite long to execute (more than 10min) -->
        <!--<test>
            <param name="input_fasta_fp" value="assign_taxonomy/sortmerna_input_seqs.fasta"/>
            <conditional name="methodcond">
                <param name="assignment_method" value="sortmerna"/>
                <param name="min_consensus_fraction" value="0.51" />
                <param name="similarity" value="0.9" />
                <param name="sortmerna_e_value" value="1.0" />
                <param name="sortmerna_coverage" value="0.9" />
                <param name="sortmerna_best_N_alignments" value="5" />
            </conditional>
            <output name="log">
                <assert_contents>
                    <has_text text="Application:SortMeRNA" />
                    <has_text text="min_consensus_fraction" />
                </assert_contents>
            </output>
            <output name="tax_assignments" md5="0da68ab9762b677a00f34051eadad68c"/>
            <output name="sortmerna_map" md5="16e349be29f121fca741d6294f79ce7c"/>
        </test>-->
    </tests>
    <help><![CDATA[
**What it does**

Contains code for assigning taxonomy, using several techniques.

Given a set of sequences, the program attempts to assign the taxonomy of each sequence.

Currently the methods implemented are assignment with BLAST, the RDP classifier, RTAX, mothur, and uclust. The output of this step is an observation metadata mapping file of input sequence identifiers (1st column of output file) to taxonomy (2nd column) and quality score (3rd column). There may be method-specific information in subsequent columns.

Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in the Greengenes reference OTU builds.

The consensus taxonomy assignment implemented here is the most detailed lineage description shared by 90% or more of the sequences within the OTU (this level of agreement can be adjusted by the user). The full lineage information for each sequence is one of the output files of the analysis. In addition, a conflict file
records cases in which a phylum-level taxonomy assignment disagreement exists within an OTU (such instances are rare and can reflect sequence misclassification within the greengenes database).
    ]]></help>
    <citations>
        <expand macro="citations"/>
    </citations>
</tool>
author	iuc
date	Thu, 05 Dec 2019 07:44:57 -0500
parents	b4170e1a3b85
children