annotate uchime/uchime.xml @ 0:fd0ab76b83f1 draft default tip

Uploaded
author qfab
date Wed, 28 May 2014 22:14:14 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
1 <tool id="uchime" name="Uchime" version="1.0.0">
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
2 <description>Detecting chimeric sequences with two or more segments.</description>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
3 <command>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
4 #if str( $runmode.mode ) == "denovo"
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
5 usearch -uchime_denovo $input -chimeras $output -nonchimeras $outputnon -uchimeout $outputtab -uchimealns $outputread -quiet
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
6 #else
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
7 usearch -uchime_ref $input -db $db -chimeras $output -nonchimeras $outputnon -uchimeout $outputtab -uchimealns $outputread -strand plus -quiet
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
8 #end if
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
9 </command>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
10 <inputs>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
11 <conditional name="runmode">
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
12 <param name="mode" type="select" label="Mode to detect chimeras" help="Which mode? See help below">
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
13 <option value="ref" selected="true">ref</option>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
14 <option value="denovo">de novo</option>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
15 </param>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
16 <when value="denovo">
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
17 <param name='input' type='data' format='fasta,tabular' label='Input file' help='' />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
18 </when>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
19 <when value="ref">
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
20 <param name='input' type='data' format='fasta,tabular' label='Input reference file' help='' />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
21 <param name='db' type='data' format='fasta' label='Reference Database' />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
22 </when>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
23 </conditional>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
24 </inputs>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
25 <outputs>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
26 <data name='output' format='fasta' label="${tool.name} on ${on_string}:chimeras" />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
27 <data name='outputnon' format='fasta' label="${tool.name} on ${on_string}:non_chimeras" />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
28 <data name='outputread' format='tabular' hidden="TRUE" label="${tool.name} on ${on_string}:Human-readable output" />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
29 <data name='outputtab' format='tabular' hidden="TRUE" label="${tool.name} on ${on_string}:Tabbed output" help='Output in tabbed format with one record per sequence. First field is score (h), second filed is query label.' />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
30 </outputs>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
31 <help>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
32 ===========
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
33 Description
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
34 ===========
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
35
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
36 .. class:: infomark
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
37
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
38 Two additional files are generated by this tool, the log files in tabbed and human-readable format that are hidden from the history list. You can view these outputs by clicking on the cogwheel next to the History panel and select "Include Hidden Dataset".
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
39
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
40 UCHIME is an algorithm for detecting chimeric sequences. It is implemented in the USEARCH-Tool-Suite_.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
41
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
42 The fundamental step in UCHIME is a search for a 3-way alignment of a query sequence with two parent sequences (A and B) such that one parent is more similar to one segment of the query (Q) and the other parent is similar over another segment.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
43
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
44 A score is calculated from the alignment. Higher scores indicate a stronger chimeric signal. A score cutoff set by the .minh option (0.28 by default) determines whether the query is classified as a chimera.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
45
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
46 This search can be performed with a reference database of parent sequences believed to be chimera-free provided by the user, or the database can be constructed de novo from the query sequences. In de novo mode, the sequences are assumed to be derived from one PCR run. In this case, parent sequences should be more abundant than their chimeras because the parent amplicons will have undergone more rounds of amplification.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
47
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
48 .. _USEARCH-Tool-Suite: http://www.drive5.com/usearch/
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
49
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
50 .. class:: warningmark
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
51
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
52 Please note: The free 32-bit version of USEARCH is limited to using 4GB or less RAM (Linux, OSX).
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
53 If you are using the free 32-bit version of USEARCH, we recommend to use reference datasets up to 800MB in size to avoid running into the "out of memory" error.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
54 Please see the USEARCH_ site for more info on the memory requirments.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
55
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
56 .. _USEARCH: http://drive5.com/usearch/manual/bitness.html
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
57
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
58 -----
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
59
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
60 ----------
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
61 Parameters
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
62 ----------
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
63
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
64 **Reference database (ref) mode**
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
65 A database file of nucleotide sequences must be specified using the Reference Database (ref) option. The database may be in FASTA format. The reference database should include sequences that might appear as parents in the query set. These should be high-quality sequences that are believed to be free of chimeras. Errors in reference sequences will degrade detection accuracy and increase the number of false positives. Chimeras will not be detected if their parents (or sufficiently close relatives) are not present in the database.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
66
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
67 .. class:: warningmark
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
68
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
69 The reference database should contain high-quality sequences that are believed to be chimera-free.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
70
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
71 **De novo mode**
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
72 De novo chimera detection using the UCHIME algorithm. The input file must contain estimated amplicons with abundances specified by size annotations. In de novo mode, abundance skew is used to distinguish chimeras from parents. input should be estimated amplicon sequences with integer abundances specified using size annotations, e.g.:
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
73
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
74 >FQ23BBGZ5;size=23;
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
75
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
76 The minimum abundance skew is specified by the .abskew parameter, which defaults to 2.0 (because one round of PCR doubles the abundance). Abundance is a measure of how many amplicons with a given unique sequence were present in the sample after amplification by PCR. One way to estimate this is to sum the total number of reads in the cluster used to estimate the given amplicon sequence. UCHIME uses only ratios of abundances, so the absolute value does not matter. However, using the number of reads is a useful indicator.for example, a cluster containing one read is likely to be spurious. Amplicon sequences and abundances can be estimated using USEARCH, or by using another algorithm such as Chris Quince's PyroNoise or AmpliconNoise. When using de novo mode, sequences should be estimated amplicons from one sequencing run (strictly, one PCR amplification stage), otherwise abundances may not be directly comparable.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
77
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
78 ------
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
79 Inputs
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
80 ------
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
81 **Reference database mode**
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
82
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
83 (A) An input file containing the sequences in FASTA format.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
84 (B) A reference database file in FASTA format containing nucleotide sequences believed to be free of chimeras.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
85
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
86 **De novo mode**
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
87
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
88 (A) A FASTA file containing for each sequence estimated amplicons with abundances specified by size annotations, e.g. >FQ23BBGZ5;size=23; .
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
89
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
90 ------
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
91 Output
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
92 ------
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
93
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
94 This tool produced four output files two of which are hidden by default.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
95
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
96 .. class:: infomark
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
97
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
98 To view the hidden files: click on the cogwheel icon in the history panel and select 'Include Hidden Datasets'.
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
99
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
100 (A) A FASTA file of predicted chimeras
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
101 (B) A FASTA file of non-chimeras
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
102 (C) *(hidden) A human readable file of chimeric alignments*
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
103 (D) *(hidden) A tab-separated file with the following 18 columns:*
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
104
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
105 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
106 |1 |Score |Value >= 0.0, high score means more likely to be a chimera |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
107 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
108 |2 |Q |Query label |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
109 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
110 |3 |A |Parent A label |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
111 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
112 |4 |B |Parent B label |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
113 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
114 |5 |T |Top parent (T) label. This isthe closest reference sequence; usuallly either A or B |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
115 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
116 |6 |IdQM |Percent identity of query and the model (M) constructed as a segment of A and a segment of B|
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
117 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
118 |7 |IdQA |Percent identity of Q and A |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
119 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
120 |8 |IdQB |Percent identity of Q and B |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
121 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
122 |9 |IdAB |Percent identity of A and B |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
123 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
124 |10 |IdQT |Percent identity of Q and T |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
125 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
126 |11 |LY |Yes votes in left segment |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
127 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
128 |12 |LN |No votes in left segment |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
129 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
130 |13 |LA |Abstain votes in left segment |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
131 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
132 |14 |RY |Yes votes in right segment |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
133 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
134 |15 |RN |No votes in right segment |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
135 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
136 |16 |RA |Abstain votes in right segmen |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
137 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
138 |17 |Div |Divergence, defined as (IdQM -IdQT) |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
139 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
140 |18 |YN |Y(yes) or N(no) classification as a chimera |
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
141 +-------+---------------+--------------------------------------------------------------------------------------------+
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
142
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
143 -----
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
144
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
145 =========
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
146 Resources
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
147 =========
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
148
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
149 UCHIME_
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
150
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
151 .. _UCHIME: http://drive5.com/usearch/manual/uchime_algo.html
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
152
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
153 **Author**
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
154
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
155 Robert C. Edgar (bob@drive5.com)
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
156
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
157 **Wrapper Author**
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
158
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
159 QFAB Bioinformatics (support@qfab.org)
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
160 </help>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
161 <tests>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
162 <test>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
163 <param name="input_file" value="seqs.fasta" />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
164 <param name="mode" value="ref" />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
165 <param name="ref_db" value="gold.fasta" />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
166 <output name="output" file="chimeras.fasta" ftype="fasta" lines_diff="10" />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
167 <output name="outputnon" file="non_chimeras.fasta" ftype="fasta" lines_diff="10" />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
168 <output name="outputtab" file="output.tabular" ftype="tabular" lines_diff="10" />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
169 <output name="outputread" file="outputread.tabular" ftype="tabular" lines_diff="10" />
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
170 </test>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
171 </tests>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
172 </tool>
fd0ab76b83f1 Uploaded
qfab
parents:
diff changeset
173