0
|
1 <?xml version="1.0" ?>
|
9
|
2 <tool id="qiime_dada2_denoise-paired" name="qiime dada2 denoise-paired" version="2019.7">
|
0
|
3 <description> - Denoise and dereplicate paired-end sequences</description>
|
|
4 <requirements>
|
9
|
5 <requirement type="package" version="2019.7">qiime2</requirement>
|
0
|
6 </requirements>
|
|
7 <command><![CDATA[
|
6
|
8
|
|
9 #def parse_file(file):
|
|
10 #import csv
|
|
11 #set $read = csv.reader(open($file, "r"))
|
|
12 #set $qc = 0
|
|
13 #for l in $read:
|
|
14 #if "50%" in l:
|
|
15 #set $num = 0.0
|
|
16 #for i in l[1:]:
|
|
17 #if float(i) <= 25.0
|
|
18 #set $num = i
|
|
19 #set $qc = l.index($num) - 1
|
|
20 #break
|
|
21 #end if
|
|
22 #end for
|
|
23 #end if
|
|
24 #end for
|
|
25 #return $qc
|
|
26 #end def
|
|
27
|
|
28 #def find_QC(file):
|
|
29 #set $f_file_path=str(file).split(".dat")[0] + '_files/forward-seven-number-summaries.csv'
|
|
30 #set $r_file_path=str(file).split(".dat")[0] + '_files/reverse-seven-number-summaries.csv'
|
|
31 #set $qc_f = $parse_file($f_file_path)
|
|
32 #set $qc_r = $parse_file($r_file_path)
|
|
33 #return $qc_f, $qc_r
|
|
34 #end def
|
|
35
|
|
36 #def find_adapters(mapping_fp):
|
|
37 #import csv
|
|
38 #set $forward = 0
|
|
39 #set $reversed = 0
|
|
40 #set $reader = csv.reader(open(str(mapping_fp)), delimiter='\t')
|
|
41 #for row in $reader:
|
|
42 #if "#" not in str(row[0]):
|
|
43 #set $forward = len(row[2])
|
|
44 #set $reversed = len(row[3])
|
|
45 #break
|
|
46 #end if
|
|
47 #end for
|
|
48 #return int($forward), int($reversed)
|
|
49 #end def
|
|
50
|
|
51 #if str($mapping_fp) != 'None' and (int($ptrimleftf) == -1 or int($ptrimleftr) == -1):
|
|
52 #set $both_adapters = $find_adapters($mapping_fp)
|
|
53 #set $ptrimleftf=$both_adapters[0]
|
|
54 #set $ptrimleftr=$both_adapters[1]
|
|
55 #end if
|
|
56
|
|
57 #if str($sum_fp) != 'None' and (int($ptrunclenf) == -1 or int($ptrunclenr) == -1):
|
|
58 #set $both_qc = $find_QC($sum_fp)
|
|
59 #set $ptrunclenf=$both_qc[0]
|
|
60 #set $ptrunclenr=$both_qc[1]
|
|
61 #end if
|
|
62
|
|
63
|
0
|
64 qiime dada2 denoise-paired
|
|
65
|
|
66 --i-demultiplexed-seqs=$idemultiplexedseqs
|
6
|
67
|
|
68
|
|
69 #if str($ptrunclenf):
|
|
70 --p-trunc-len-f="$ptrunclenf"
|
|
71 #end if
|
|
72
|
|
73 #if str($ptrunclenf):
|
|
74 --p-trunc-len-r="$ptrunclenr"
|
|
75 #end if
|
|
76
|
|
77 #if str($ptrimleftf):
|
0
|
78 --p-trim-left-f=$ptrimleftf
|
|
79 #end if
|
|
80
|
6
|
81 #if str($ptrimleftr):
|
0
|
82 --p-trim-left-r=$ptrimleftr
|
|
83 #end if
|
|
84
|
9
|
85
|
|
86
|
|
87
|
|
88 #if str($pmaxeef):
|
|
89 --p-max-ee-f=$pmaxeef
|
0
|
90 #end if
|
|
91
|
9
|
92 #if str($pmaxeer):
|
|
93 --p-max-ee-r=$pmaxeer
|
|
94 #end if
|
|
95
|
|
96
|
|
97
|
|
98
|
6
|
99 #if str($ptruncq):
|
0
|
100 --p-trunc-q=$ptruncq
|
|
101 #end if
|
|
102
|
|
103 #if str($pchimeramethod) != 'None':
|
|
104 --p-chimera-method=$pchimeramethod
|
|
105 #end if
|
|
106
|
6
|
107 #if str($pminfoldparentoverabundance):
|
0
|
108 --p-min-fold-parent-over-abundance=$pminfoldparentoverabundance
|
|
109 #end if
|
|
110
|
|
111 #set $pnthreads = '${GALAXY_SLOTS:-4}'
|
|
112
|
|
113 #if str($pnthreads):
|
|
114 --p-n-threads="$pnthreads"
|
|
115 #end if
|
|
116
|
|
117
|
6
|
118 #if str($pnreadslearn):
|
0
|
119 --p-n-reads-learn=$pnreadslearn
|
|
120 #end if
|
|
121
|
|
122 #if $pnohashedfeatureids:
|
|
123 --p-no-hashed-feature-ids
|
|
124 #end if
|
|
125
|
|
126 --o-table=otable
|
|
127 --o-representative-sequences=orepresentativesequences
|
|
128 --o-denoising-stats=odenoisingstats
|
|
129 ;
|
|
130 cp otable.qza $otable;
|
|
131 cp orepresentativesequences.qza $orepresentativesequences;
|
|
132 cp odenoisingstats.qza $odenoisingstats
|
|
133 ]]></command>
|
|
134 <inputs>
|
6
|
135 <param format="tabular" label="Mapping file where 3rd and 4th columns must be forward and reverse primers respectively" name="mapping_fp" optional="True" type="data"/>
|
|
136 <param format="html" label="Summary file" name="sum_fp" optional="True" type="data"/>
|
|
137
|
0
|
138 <param format="qza,no_unzip.zip" label="--i-demultiplexed-seqs: ARTIFACT SampleData[PairedEndSequencesWithQuality] The paired-end demultiplexed sequences to be denoised. [required]" name="idemultiplexedseqs" optional="False" type="data"/>
|
|
139 <param label="--p-trunc-len-f: INTEGER Position at which forward read sequences should be truncated due to decrease in quality. This truncates the 3' end of the of the input sequences, which will be the bases that were sequenced in the last cycles. Reads that are shorter than this value will be discarded. After this parameter is applied there must still be at least a 20 nucleotide overlap between the forward and reverse reads. If 0 is provided, no truncation or length filtering will be performed [required]" name="ptrunclenf" optional="False" value="" type="integer"/>
|
|
140 <param label="--p-trunc-len-r: INTEGER Position at which reverse read sequences should be truncated due to decrease in quality. This truncates the 3' end of the of the input sequences, which will be the bases that were sequenced in the last cycles. Reads that are shorter than this value will be discarded. After this parameter is applied there must still be at least a 20 nucleotide overlap between the forward and reverse reads. If 0 is provided, no truncation or length filtering will be performed [required]" name="ptrunclenr" optional="False" value="" type="integer"/>
|
|
141 <param label="--p-trim-left-f: INTEGER Position at which forward read sequences should be trimmed due to low quality. This trims the 5' end of the input sequences, which will be the bases that were sequenced in the first cycles. [default: 0]" name="ptrimleftf" optional="True" type="integer" value="0"/>
|
|
142 <param label="--p-trim-left-r: INTEGER Position at which reverse read sequences should be trimmed due to low quality. This trims the 5' end of the input sequences, which will be the bases that were sequenced in the first cycles. [default: 0]" name="ptrimleftr" optional="True" type="integer" value="0"/>
|
9
|
143
|
|
144
|
|
145
|
|
146 <param label="--p-max-ee-f: NUMBER Forward reads with number of expected errors higher than this value will be discarded. [default: 2.0]" name="pmaxeef" optional="True" type="float" value="2.0"/>
|
|
147 <param label="--p-max-ee-r: NUMBER Reverse reads with number of expected errors higher than this value will be discarded. [default: 2.0]" name="pmaxeer" optional="True" type="float" value="2.0"/>
|
|
148
|
|
149
|
|
150
|
0
|
151 <param label="--p-trunc-q: INTEGER Reads are truncated at the first instance of a quality score less than or equal to this value. If the resulting read is then shorter than `trunc-len-f` or `trunc-len-r` (depending on the direction of the read) it is discarded. [default: 2]" name="ptruncq" optional="True" type="integer" value="2"/>
|
|
152 <param label="--p-chimera-method: " name="pchimeramethod" optional="True" type="select">
|
|
153 <option selected="True" value="None">Selection is Optional</option>
|
|
154 <option value="consensus">consensus</option>
|
|
155 <option value="pooled">pooled</option>
|
|
156 <option value="none">none</option>
|
|
157 </param>
|
|
158 <param label="--p-min-fold-parent-over-abundance: NUMBER The minimum abundance of potential parents of a sequence being tested as chimeric, expressed as a fold-change versus the abundance of the sequence being tested. Values should be greater than or equal to 1 (i.e. parents should be more abundant than the sequence being tested). This parameter has no effect if chimera-method is 'none'. [default: 1.0]" name="pminfoldparentoverabundance" optional="True" type="float" value="1.0"/>
|
|
159 <param label="--p-n-reads-learn: INTEGER The number of reads to use when training the error model. Smaller numbers will result in a shorter run time but a less reliable error model. [default: 1000000]" name="pnreadslearn" optional="True" type="integer" value="1000000"/>
|
|
160 <param label="--p-no-hashed-feature-ids: If false, the feature ids in the resulting table will be presented as hashes of the sequences defining each feature. The hash will always be the same for the same sequence so this allows feature tables to be merged across runs of this method. You should only merge tables if the exact same parameters are used for each run. [default: False]" name="pnohashedfeatureids" selected="False" type="boolean"/>
|
|
161 </inputs>
|
|
162 <outputs>
|
|
163 <data format="qza" label="${tool.name} on ${on_string}: table.qza" name="otable"/>
|
|
164 <data format="qza" label="${tool.name} on ${on_string}: representativesequences.qza" name="orepresentativesequences"/>
|
|
165 <data format="qza" label="${tool.name} on ${on_string}: denoisingstats.qza" name="odenoisingstats"/>
|
|
166 </outputs>
|
|
167 <help><![CDATA[
|
|
168 Denoise and dereplicate paired-end sequences
|
9
|
169 #############################################
|
0
|
170
|
|
171 This method denoises paired-end sequences, dereplicates them, and filters
|
|
172 chimeras.
|
|
173
|
|
174 Parameters
|
|
175 ----------
|
|
176 demultiplexed_seqs : SampleData[PairedEndSequencesWithQuality]
|
|
177 The paired-end demultiplexed sequences to be denoised.
|
|
178 trunc_len_f : Int
|
|
179 Position at which forward read sequences should be truncated due to
|
|
180 decrease in quality. This truncates the 3' end of the of the input
|
|
181 sequences, which will be the bases that were sequenced in the last
|
|
182 cycles. Reads that are shorter than this value will be discarded. After
|
|
183 this parameter is applied there must still be at least a 20 nucleotide
|
|
184 overlap between the forward and reverse reads. If 0 is provided, no
|
|
185 truncation or length filtering will be performed
|
|
186 trunc_len_r : Int
|
|
187 Position at which reverse read sequences should be truncated due to
|
|
188 decrease in quality. This truncates the 3' end of the of the input
|
|
189 sequences, which will be the bases that were sequenced in the last
|
|
190 cycles. Reads that are shorter than this value will be discarded. After
|
|
191 this parameter is applied there must still be at least a 20 nucleotide
|
|
192 overlap between the forward and reverse reads. If 0 is provided, no
|
|
193 truncation or length filtering will be performed
|
|
194 trim_left_f : Int, optional
|
|
195 Position at which forward read sequences should be trimmed due to low
|
|
196 quality. This trims the 5' end of the input sequences, which will be
|
|
197 the bases that were sequenced in the first cycles.
|
|
198 trim_left_r : Int, optional
|
|
199 Position at which reverse read sequences should be trimmed due to low
|
|
200 quality. This trims the 5' end of the input sequences, which will be
|
|
201 the bases that were sequenced in the first cycles.
|
9
|
202 max_ee_f : Float, optional
|
|
203 Forward reads with number of expected errors higher than this value
|
|
204 will be discarded.
|
|
205 max_ee_r : Float, optional
|
|
206 Reverse reads with number of expected errors higher than this value
|
|
207 will be discarded.
|
0
|
208 trunc_q : Int, optional
|
|
209 Reads are truncated at the first instance of a quality score less than
|
|
210 or equal to this value. If the resulting read is then shorter than
|
|
211 `trunc_len_f` or `trunc_len_r` (depending on the direction of the read)
|
|
212 it is discarded.
|
9
|
213 chimera_method : Str % Choices('consensus', 'none', 'pooled'), optional
|
0
|
214 The method used to remove chimeras. "none": No chimera removal is
|
|
215 performed. "pooled": All reads are pooled prior to chimera detection.
|
|
216 "consensus": Chimeras are detected in samples individually, and
|
|
217 sequences found chimeric in a sufficient fraction of samples are
|
|
218 removed.
|
|
219 min_fold_parent_over_abundance : Float, optional
|
|
220 The minimum abundance of potential parents of a sequence being tested
|
|
221 as chimeric, expressed as a fold-change versus the abundance of the
|
|
222 sequence being tested. Values should be greater than or equal to 1
|
|
223 (i.e. parents should be more abundant than the sequence being tested).
|
|
224 This parameter has no effect if chimera_method is "none".
|
|
225 n_reads_learn : Int, optional
|
|
226 The number of reads to use when training the error model. Smaller
|
|
227 numbers will result in a shorter run time but a less reliable error
|
|
228 model.
|
|
229 hashed_feature_ids : Bool, optional
|
|
230 If true, the feature ids in the resulting table will be presented as
|
|
231 hashes of the sequences defining each feature. The hash will always be
|
|
232 the same for the same sequence so this allows feature tables to be
|
|
233 merged across runs of this method. You should only merge tables if the
|
|
234 exact same parameters are used for each run.
|
|
235
|
|
236 Returns
|
|
237 -------
|
|
238 table : FeatureTable[Frequency]
|
|
239 The resulting feature table.
|
|
240 representative_sequences : FeatureData[Sequence]
|
|
241 The resulting feature sequences. Each feature in the feature table will
|
|
242 be represented by exactly one sequence, and these sequences will be the
|
|
243 joined paired-end sequences.
|
|
244 denoising_stats : SampleData[DADA2Stats]
|
|
245 ]]></help>
|
|
246 <macros>
|
|
247 <import>qiime_citation.xml</import>
|
|
248 </macros>
|
|
249 <expand macro="qiime_citation"/>
|
|
250 </tool>
|