1 <?xml version="1.0" ?>
2 <tool id="qiime_dada2_denoise-paired" name="qiime dada2 denoise-paired" version="2019.4">
3 <description> - Denoise and dereplicate paired-end sequences</description>
4 <requirements>
5 <requirement type="package" version="2019.4">qiime2</requirement>
6 </requirements>
7 <command><![CDATA[
9 #def parse_file(file):
10 #import csv
11 #set $read = csv.reader(open($file, "r"))
12 #set $qc = 0
13 #for l in $read:
14 #if "50%" in l:
15 #set $num = 0.0
16 #for i in l[1:]:
17 #if float(i) <= 25.0
18 #set $num = i
19 #set $qc = l.index($num) - 1
20 #break
21 #end if
22 #end for
23 #end if
24 #end for
25 #return $qc
26 #end def
28 #def find_QC(file):
29 #set $f_file_path=str(file).split(".dat")[0] + '_files/forward-seven-number-summaries.csv'
30 #set $r_file_path=str(file).split(".dat")[0] + '_files/reverse-seven-number-summaries.csv'
31 #set $qc_f = $parse_file($f_file_path)
32 #set $qc_r = $parse_file($r_file_path)
33 #return $qc_f, $qc_r
34 #end def
36 #def find_adapters(mapping_fp):
37 #import csv
38 #set $forward = 0
39 #set $reversed = 0
40 #set $reader = csv.reader(open(str(mapping_fp)), delimiter='\t')
41 #for row in $reader:
42 #if "#" not in str(row[0]):
43 #set $forward = len(row[2])
44 #set $reversed = len(row[3])
45 #break
46 #end if
47 #end for
48 #return int($forward), int($reversed)
49 #end def
51 #if str($mapping_fp) != 'None' and (int($ptrimleftf) == -1 or int($ptrimleftr) == -1):
52 #set $both_adapters = $find_adapters($mapping_fp)
53 #set $ptrimleftf=$both_adapters[0]
54 #set $ptrimleftr=$both_adapters[1]
55 #end if
57 #if str($sum_fp) != 'None' and (int($ptrunclenf) == -1 or int($ptrunclenr) == -1):
58 #set $both_qc = $find_QC($sum_fp)
59 #set $ptrunclenf=$both_qc[0]
60 #set $ptrunclenr=$both_qc[1]
61 #end if
64 qiime dada2 denoise-paired
66 --i-demultiplexed-seqs=$idemultiplexedseqs
69 #if str($ptrunclenf):
70 --p-trunc-len-f="$ptrunclenf"
71 #end if
73 #if str($ptrunclenf):
74 --p-trunc-len-r="$ptrunclenr"
75 #end if
77 #if str($ptrimleftf):
78 --p-trim-left-f=$ptrimleftf
79 #end if
81 #if str($ptrimleftr):
82 --p-trim-left-r=$ptrimleftr
83 #end if
85 #if str($pmaxee):
86 --p-max-ee=$pmaxee
87 #end if
89 #if str($ptruncq):
90 --p-trunc-q=$ptruncq
91 #end if
93 #if str($pchimeramethod) != 'None':
94 --p-chimera-method=$pchimeramethod
95 #end if
97 #if str($pminfoldparentoverabundance):
98 --p-min-fold-parent-over-abundance=$pminfoldparentoverabundance
99 #end if
101 #set $pnthreads = '${GALAXY_SLOTS:-4}'
103 #if str($pnthreads):
104 --p-n-threads="$pnthreads"
105 #end if
108 #if str($pnreadslearn):
109 --p-n-reads-learn=$pnreadslearn
110 #end if
112 #if $pnohashedfeatureids:
113 --p-no-hashed-feature-ids
114 #end if
116 --o-table=otable
117 --o-representative-sequences=orepresentativesequences
118 --o-denoising-stats=odenoisingstats
119 ;
120 cp otable.qza $otable;
121 cp orepresentativesequences.qza $orepresentativesequences;
122 cp odenoisingstats.qza $odenoisingstats
123 ]]></command>
124 <inputs>
125 <param format="tabular" label="Mapping file where 3rd and 4th columns must be forward and reverse primers respectively" name="mapping_fp" optional="True" type="data"/>
126 <param format="html" label="Summary file" name="sum_fp" optional="True" type="data"/>
128 <param format="qza,no_unzip.zip" label="--i-demultiplexed-seqs: ARTIFACT SampleData[PairedEndSequencesWithQuality] The paired-end demultiplexed sequences to be denoised. [required]" name="idemultiplexedseqs" optional="False" type="data"/>
129 <param label="--p-trunc-len-f: INTEGER Position at which forward read sequences should be truncated due to decrease in quality. This truncates the 3' end of the of the input sequences, which will be the bases that were sequenced in the last cycles. Reads that are shorter than this value will be discarded. After this parameter is applied there must still be at least a 20 nucleotide overlap between the forward and reverse reads. If 0 is provided, no truncation or length filtering will be performed [required]" name="ptrunclenf" optional="False" value="" type="integer"/>
130 <param label="--p-trunc-len-r: INTEGER Position at which reverse read sequences should be truncated due to decrease in quality. This truncates the 3' end of the of the input sequences, which will be the bases that were sequenced in the last cycles. Reads that are shorter than this value will be discarded. After this parameter is applied there must still be at least a 20 nucleotide overlap between the forward and reverse reads. If 0 is provided, no truncation or length filtering will be performed [required]" name="ptrunclenr" optional="False" value="" type="integer"/>
131 <param label="--p-trim-left-f: INTEGER Position at which forward read sequences should be trimmed due to low quality. This trims the 5' end of the input sequences, which will be the bases that were sequenced in the first cycles. [default: 0]" name="ptrimleftf" optional="True" type="integer" value="0"/>
132 <param label="--p-trim-left-r: INTEGER Position at which reverse read sequences should be trimmed due to low quality. This trims the 5' end of the input sequences, which will be the bases that were sequenced in the first cycles. [default: 0]" name="ptrimleftr" optional="True" type="integer" value="0"/>
133 <param label="--p-max-ee: NUMBER Reads with number of expected errors higher than this value will be discarded. [default: 2.0]" name="pmaxee" optional="True" type="float" value="2.0"/>
134 <param label="--p-trunc-q: INTEGER Reads are truncated at the first instance of a quality score less than or equal to this value. If the resulting read is then shorter than `trunc-len-f` or `trunc-len-r` (depending on the direction of the read) it is discarded. [default: 2]" name="ptruncq" optional="True" type="integer" value="2"/>
135 <param label="--p-chimera-method: " name="pchimeramethod" optional="True" type="select">
136 <option selected="True" value="None">Selection is Optional</option>
137 <option value="consensus">consensus</option>
138 <option value="pooled">pooled</option>
139 <option value="none">none</option>
140 </param>
141 <param label="--p-min-fold-parent-over-abundance: NUMBER The minimum abundance of potential parents of a sequence being tested as chimeric, expressed as a fold-change versus the abundance of the sequence being tested. Values should be greater than or equal to 1 (i.e. parents should be more abundant than the sequence being tested). This parameter has no effect if chimera-method is 'none'. [default: 1.0]" name="pminfoldparentoverabundance" optional="True" type="float" value="1.0"/>
142 <param label="--p-n-reads-learn: INTEGER The number of reads to use when training the error model. Smaller numbers will result in a shorter run time but a less reliable error model. [default: 1000000]" name="pnreadslearn" optional="True" type="integer" value="1000000"/>
143 <param label="--p-no-hashed-feature-ids: If false, the feature ids in the resulting table will be presented as hashes of the sequences defining each feature. The hash will always be the same for the same sequence so this allows feature tables to be merged across runs of this method. You should only merge tables if the exact same parameters are used for each run. [default: False]" name="pnohashedfeatureids" selected="False" type="boolean"/>
144 </inputs>
145 <outputs>
146 <data format="qza" label="${tool.name} on ${on_string}: table.qza" name="otable"/>
147 <data format="qza" label="${tool.name} on ${on_string}: representativesequences.qza" name="orepresentativesequences"/>
148 <data format="qza" label="${tool.name} on ${on_string}: denoisingstats.qza" name="odenoisingstats"/>
149 </outputs>
150 <help><![CDATA[
151 Denoise and dereplicate paired-end sequences
152 ############################################
154 This method denoises paired-end sequences, dereplicates them, and filters
155 chimeras.
157 Parameters
158 ----------
159 demultiplexed_seqs : SampleData[PairedEndSequencesWithQuality]
160 The paired-end demultiplexed sequences to be denoised.
161 trunc_len_f : Int
162 Position at which forward read sequences should be truncated due to
163 decrease in quality. This truncates the 3' end of the of the input
164 sequences, which will be the bases that were sequenced in the last
165 cycles. Reads that are shorter than this value will be discarded. After
166 this parameter is applied there must still be at least a 20 nucleotide
167 overlap between the forward and reverse reads. If 0 is provided, no
168 truncation or length filtering will be performed
169 trunc_len_r : Int
170 Position at which reverse read sequences should be truncated due to
171 decrease in quality. This truncates the 3' end of the of the input
172 sequences, which will be the bases that were sequenced in the last
173 cycles. Reads that are shorter than this value will be discarded. After
174 this parameter is applied there must still be at least a 20 nucleotide
175 overlap between the forward and reverse reads. If 0 is provided, no
176 truncation or length filtering will be performed
177 trim_left_f : Int, optional
178 Position at which forward read sequences should be trimmed due to low
179 quality. This trims the 5' end of the input sequences, which will be
180 the bases that were sequenced in the first cycles.
181 trim_left_r : Int, optional
182 Position at which reverse read sequences should be trimmed due to low
183 quality. This trims the 5' end of the input sequences, which will be
184 the bases that were sequenced in the first cycles.
185 max_ee : Float, optional
186 Reads with number of expected errors higher than this value will be
187 discarded.
188 trunc_q : Int, optional
189 Reads are truncated at the first instance of a quality score less than
190 or equal to this value. If the resulting read is then shorter than
191 `trunc_len_f` or `trunc_len_r` (depending on the direction of the read)
192 it is discarded.
193 chimera_method : Str % Choices('consensus', 'pooled', 'none'), optional
194 The method used to remove chimeras. "none": No chimera removal is
195 performed. "pooled": All reads are pooled prior to chimera detection.
196 "consensus": Chimeras are detected in samples individually, and
197 sequences found chimeric in a sufficient fraction of samples are
198 removed.
199 min_fold_parent_over_abundance : Float, optional
200 The minimum abundance of potential parents of a sequence being tested
201 as chimeric, expressed as a fold-change versus the abundance of the
202 sequence being tested. Values should be greater than or equal to 1
203 (i.e. parents should be more abundant than the sequence being tested).
204 This parameter has no effect if chimera_method is "none".
205 provided, all available cores will be used.
206 n_reads_learn : Int, optional
207 The number of reads to use when training the error model. Smaller
208 numbers will result in a shorter run time but a less reliable error
209 model.
210 hashed_feature_ids : Bool, optional
211 If true, the feature ids in the resulting table will be presented as
212 hashes of the sequences defining each feature. The hash will always be
213 the same for the same sequence so this allows feature tables to be
214 merged across runs of this method. You should only merge tables if the
215 exact same parameters are used for each run.
217 Returns
218 -------
219 table : FeatureTable[Frequency]
220 The resulting feature table.
221 representative_sequences : FeatureData[Sequence]
222 The resulting feature sequences. Each feature in the feature table will
223 be represented by exactly one sequence, and these sequences will be the
224 joined paired-end sequences.
225 denoising_stats : SampleData[DADA2Stats]
226 ]]></help>
227 <macros>
228 <import>qiime_citation.xml</import>
229 </macros>
230 <expand macro="qiime_citation"/>
231 </tool>