comparison assign_taxonomy.xml @ 0:fa330c61c0a5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit c9bf747b23b4a9d6adc20c7740b9247c22654862
author iuc
date Thu, 18 May 2017 09:30:34 -0400
parents
children ec3c4654eacc
comparison
equal deleted inserted replaced
-1:000000000000 0:fa330c61c0a5
1 <tool id="qiime_assign_taxonomy" name="Assign taxonomy" version="@WRAPPER_VERSION@.0">
2 <description>to each sequence</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements">
7 <requirement type="package" version="2.0.2">rdptools</requirement>
8 <requirement type="package" version="2.2.22">blast-legacy</requirement>
9 <requirement type="package" version="2.3.4">vsearch</requirement>
10 <requirement type="package" version="1.36.1">mothur</requirement>
11 </expand>
12 <version_command>assign_taxonomy.py --version</version_command>
13 <command detect_errors="aggressive"><![CDATA[
14 assign_taxonomy.py
15 --input_fasta_fp '$input_fasta_fp'
16 #if $id_to_taxonomy_fp
17 --id_to_taxonomy_fp '$id_to_taxonomy_fp'
18 #end if
19 --assignment_method '$methodcond.assignment_method'
20 #if $methodcond.assignment_method == "uclust"
21 --min_consensus_fraction '$methodcond.min_consensus_fraction'
22 --similarity '$methodcond.similarity'
23 --uclust_max_accepts '$methodcond.uclust_max_accepts'
24 #else if $methodcond.assignment_method == "rdp"
25 #if $methodcond.reference_seqs_fp
26 --reference_seqs_fp '$methodcond.reference_seqs_fp'
27 #end if
28 --confidence '$methodcond.confidence'
29 #else if $methodcond.assignment_method == "blast"
30 --reference_seqs_fp '$methodcond.reference_seqs_fp'
31 --blast_e_value '$methodcond.blast_e_value'
32 #else if $methodcond.assignment_method == "rtax"
33 --read_1_seqs_fp '$methodcond.read_1_seqs_fp'
34 --read_2_seqs_fp '$methodcond.read_2_seqs_fp'
35 $methodcond.single_ok
36 $methodcond.no_single_ok_generic
37 --read_id_regex '$methodcond.read_id_regex'
38 --amplicon_id_regex '$methodcond.amplicon_id_regex'
39 --header_id_rege '$methodcond.header_id_regex'
40 #else if $methodcond.assignment_method == "mothur"
41 --confidence '$methodcond.confidence'
42 #else if $methodcond.assignment_method == "sortmerna"
43 --sortmerna_threads \${GALAXY_SLOTS:-1}
44 #if $methodcond.sortmerna_db
45 --sortmerna_db '$methodcond.sortmerna_db'
46 #end if
47 --min_consensus_fraction '$methodcond.min_consensus_fraction'
48 --similarity '$methodcond.similarity'
49 --sortmerna_e_value '$methodcond.sortmerna_e_value'
50 --sortmerna_coverage '$methodcond.sortmerna_coverage'
51 --sortmerna_best_N_alignments '$methodcond.sortmerna_best_N_alignments'
52 #end if
53 -o assign_taxonomy
54 ]]></command>
55 <inputs>
56 <param argument="--input_fasta_fp" type="data" format="fasta" label="Input fasta file" />
57 <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy" type="data" format="tabular" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep" optional="True"/>
58 <conditional name="methodcond">
59 <param argument="--assignment_method" label="Taxon assignment method" type="select">
60 <option selected="True" value="uclust">uclust</option>
61 <!--<option value="rdp">rdp</option>
62 <option value="blast">blast</option>
63 <option value="rtax">rtax</option>
64 <option value="mothur">mothur</option>
65 <option value="sortmerna">sortmerna</option>-->
66 </param>
67 <when value="uclust">
68 <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
69 <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/>
70 <param argument="uclust_max_accepts" type="integer" value="3" label="Number of database hits to consider when making an assignment"/>
71 </when>
72 <when value="rdp">
73 <param argument="--reference_seqs_fp" label="Reference sequences used as training sequences for the classifier" type="data" format="fasta" optional="True"/>
74 <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
75 </when>
76 <when value="blast">
77 <param argument="--reference_seqs_fp" label="Reference sequences used to generate a blast database" type="data" format="fasta" optional="True"/>
78 <param argument="--blast_e_value" type="float" value="0.001" label="Maximum e-value to record an assignment"/>
79 </when>
80 <when value="rtax">
81 <param argument="--read_1_seqs_fp" type="data" format="fasta" label="First reads from paired-end sequencing, prior to OTU clustering" help="This file is the result of split_illumina_fastq"/>
82 <param argument="--read_2_seqs_fp" type="data" format="fasta" label="Second reads from paired-end sequencing, prior to OTU clustering" help="This file is the result of split_illumina_fastq"/>
83 <param argument="--single_ok" type="boolean" truevalue="--single_ok" falsevalue="" checked="false" label="Allow fallback to single-ended classification when the mate pair is lacking?"/>
84 <param argument="--no_single_ok_generic" type="boolean" truevalue="--no_single_ok_generic" falsevalue="" checked="false" label="Allow fallback to single-ended classification when the mate pair is overly generic?"/>
85 <param argument="--read_id_regex" type="text" value="\S+\s+(\S+)" label="Regex used to parse the result of OTU clustering, to get the read_1_id for each clusterID" help="The clusterID itself is assumed to be the first field, and is not captured by the regex"/>
86 <param argument="--amplicon_id_regex" type="text" value="(\S+)\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the ampliconID for each read_1_id" help="Two groups capture read_1_id and ampliconID, respectively."/>
87 <param argument="--header_id_regex" type="text" value="\S+\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the portion of the header that RTAX uses to match mate pairs" help="The default uses the amplicon ID, not including /1 or /3, as the primary key for the query sequences. Typically this regex will be the same as amplicon_id_regex, except that only the second group is captured" />
88 </when>
89 <when value="mothur">
90 <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
91 </when>
92 <when value="sortmerna">
93 <param argument="--sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/>
94 <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
95 <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/>
96 <param argument="--sortmerna_e_value" type="float" value="1.0" label="Maximum E-value when clustering"/>
97 <param argument="--sortmerna_coverage" type="float" value="0.9" label="Mininum percent query coverage (of an alignment) to consider a hit, expressed as a fraction between 0 and 1"/>
98 <param argument="--sortmerna_best_N_alignments" type="integer" value="5" label="Number best alignments per read to be written"/>
99 </when>
100 </conditional>
101 </inputs>
102 <outputs>
103 <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log"/>
104 <data name="tax_assignments" format="txt" from_work_dir="assign_taxonomy/*.txt" label="${tool.name} on ${on_string}: Taxonomic assignment"/>
105 <data name="sortmerna_map" format="tabular" from_work_dir="assign_taxonomy/sortmerna_map.blast" label="${tool.name} on ${on_string}: SortMeRNA Blast">
106 <filter>methodcond['assignment_method']=="sortmerna"</filter>
107 </data>
108 </outputs>
109 <tests>
110 <test>
111 <param name="input_fasta_fp" value="assign_taxonomy/uclust_input_seqs.fasta"/>
112 <param name="assignment_method" value="uclust"/>
113 <param name="min_consensus_fraction" value="0.51"/>
114 <param name="similarity" value="0.9"/>
115 <param name="uclust_max_accepts" value="3" />
116 <output name="tax_assignments" value="assign_taxonomy/uclust_taxonomic_assignation.txt"/>
117 <output name="log">
118 <assert_contents>
119 <has_text text="UclustConsensusTaxonAssigner" />
120 <has_text text="2751331" />
121 </assert_contents>
122 </output>
123 </test>
124 <!--<test>
125 <param name="input_fasta_fp" value="assign_taxonomy/mothur_ref_seq_set.fna"/>
126 <param name="assignment_method" value="sortmerna"/>
127 <param name="min_consensus_fraction" value="0.51" />
128 <param name="similarity" value="0.9" />
129 <param name="sortmerna_e_value" value="1.0" />
130 <param name="sortmerna_coverage" value="0.9" />
131 <param name="sortmerna_best_N_alignments" value="5" />
132 <output name="log">
133 <assert_contents>
134 <has_text text="Application:SortMeRNA" />
135 <has_text text="min_consensus_fraction" />
136 </assert_contents>
137 </output>
138 <output name="tax_assignments" value="assign_taxonomy/sortmerna_taxonomic_assignation.txt"/>
139 <output name="sortmerna_map" value="assign_taxonomy/sortmerna_map.blast"/>
140 </test>-->
141 </tests>
142 <help><![CDATA[
143 **What it does**
144
145 Contains code for assigning taxonomy, using several techniques.
146
147 Given a set of sequences, the program attempts to assign the taxonomy of each sequence.
148
149 Currently the methods implemented are assignment with BLAST, the RDP classifier, RTAX, mothur, and uclust. The output of this step is an observation metadata mapping file of input sequence identifiers (1st column of output file) to taxonomy (2nd column) and quality score (3rd column). There may be method-specific information in subsequent columns.
150
151 Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in the Greengenes reference OTU builds.
152
153 The consensus taxonomy assignment implemented here is the most detailed lineage description shared by 90% or more of the sequences within the OTU (this level of agreement can be adjusted by the user). The full lineage information for each sequence is one of the output files of the analysis. In addition, a conflict file
154 records cases in which a phylum-level taxonomy assignment disagreement exists within an OTU (such instances are rare and can reflect sequence misclassification within the greengenes database).
155 ]]></help>
156 <citations>
157 <expand macro="citations"/>
158 </citations>
159 </tool>