Mercurial > repos > iuc > dada2_filterandtrim
comparison dada2_filterAndTrim.xml @ 0:cc41546adf56 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author | iuc |
---|---|
date | Fri, 08 Nov 2019 18:48:06 -0500 |
parents | |
children | 23fc35093b11 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cc41546adf56 |
---|---|
1 <tool id="dada2_filterAndTrim" name="dada2: filterAndTrim" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09"> | |
2 <description>Filter and trim short read data</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"/> | |
7 <expand macro="stdio"/> | |
8 <expand macro="version_command"/> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 Rscript '$dada2_script' | |
11 ]]></command> | |
12 <configfiles> | |
13 <configfile name="dada2_script"><![CDATA[ | |
14 truncQ <- c($trim.truncQ) | |
15 truncLen <- c($trim.truncLen) | |
16 trimLeft <- c($trim.trimLeft) | |
17 trimRight <- c($trim.trimRight) | |
18 #if str($filter.maxLen) == "" | |
19 maxLen <- c(Inf) | |
20 #else: | |
21 maxLen <- c($filter.maxLen) | |
22 #end if | |
23 minLen <- c($filter.minLen) | |
24 maxN <- c($filter.maxN) | |
25 minQ <- c($filter.minQ) | |
26 #if str($filter.maxEE) == "" | |
27 maxEE <- c(Inf) | |
28 #else: | |
29 maxEE <- c($filter.maxEE) | |
30 #end if | |
31 #if $paired_cond.paired_select != "single" and $seprev_cond.seprev_select == "yes" | |
32 truncQ <- c(truncQ,$seprev_cond.trim.truncQ) | |
33 truncLen <- c(truncLen,$seprev_cond.trim.truncLen) | |
34 trimLeft <- c(trimLeft,$seprev_cond.trim.trimLeft) | |
35 trimRight <- c(trimRight,$seprev_cond.trim.trimRight) | |
36 #if str($seprev_cond.filter.maxLen) == "" | |
37 maxLen <- c(maxLen,Inf) | |
38 #else: | |
39 maxLen <- c(maxLen,$seprev_cond.filter.maxLen) | |
40 #end if | |
41 minLen <- c(minLen,$seprev_cond.filter.minLen) | |
42 maxN <- c(maxN,$seprev_cond.filter.maxN) | |
43 minQ <- c(minQ,$seprev_cond.filter.minQ) | |
44 #if str($seprev_cond.filter.maxEE) == "" | |
45 maxEE <- c(maxEE,Inf) | |
46 #else: | |
47 maxEE <- c(maxEE,$seprev_cond.filter.maxEE) | |
48 #end if | |
49 #end if | |
50 | |
51 fwd <- NULL | |
52 rev <- NULL | |
53 filt.fwd <- NULL | |
54 filt.rev <- NULL | |
55 #if $paired_cond.paired_select == "paired" | |
56 fwd <- c(fwd, '$paired_cond.reads.forward') | |
57 rev <- c(rev, '$paired_cond.reads.reverse') | |
58 filt.fwd <- c(filt.fwd, '$paired_output.forward') | |
59 filt.rev <- c(filt.rev, '$paired_output.reverse') | |
60 #else if $paired_cond.paired_select == "separate" | |
61 fwd <- c(fwd, '$paired_cond.reads') | |
62 rev <- c(rev, '$paired_cond.sdaer') | |
63 filt.fwd <- c(filt.fwd, '$output_fwd') | |
64 filt.rev <- c(filt.rev, '$output_rev') | |
65 #else | |
66 fwd <- c(fwd, '$paired_cond.reads') | |
67 filt.fwd <- c(filt.fwd, '$output_single') | |
68 #end if | |
69 | |
70 #if str($orientFwd) == "" | |
71 orientFwd <- NULL | |
72 #else | |
73 orientFwd <- "$orientFwd" | |
74 #end if | |
75 | |
76 library(dada2, quietly=T) | |
77 | |
78 ftout <- filterAndTrim(fwd, filt.fwd, rev, filt.rev, compress = TRUE, | |
79 truncQ = truncQ, truncLen = truncLen, trimLeft = trimLeft, trimRight = trimRight, maxLen = maxLen, | |
80 minLen = minLen, maxN = maxN, minQ = minQ, maxEE = maxEE, rm.lowcomplex = $rmlowcomplex, | |
81 rm.phix = $rmPhiX, orient.fwd = orientFwd) | |
82 | |
83 rownames(ftout) <- c( '$paired_cond.reads.element_identifier' ) | |
84 write.table(ftout, "$outtab", quote=F, sep="\t", col.names=NA) | |
85 ]]></configfile> | |
86 </configfiles> | |
87 <inputs> | |
88 <expand macro="fastq_input" multiple="False" collection_type="paired" argument_fwd="fwd" argument_rev="rev"/> | |
89 <expand macro="trimmers"/> | |
90 <expand macro="filters"/> | |
91 <conditional name="seprev_cond"> | |
92 <param name="seprev_select" type="select" label="Separate filters and trimmers for reverse reads" help="only applies to paired end data"> | |
93 <option value="no">no</option> | |
94 <option value="yes">yes</option> | |
95 </param> | |
96 <when value="no"/> | |
97 <when value="yes"> | |
98 <expand macro="trimmers"/> | |
99 <expand macro="filters"/> | |
100 </when> | |
101 </conditional> | |
102 <param argument="rmPhiX" truevalue="TRUE" falsevalue="FALSE" type="boolean" checked="true" label="Discard reads matching the PhiX genome" /> | |
103 <param name="rmlowcomplex" argument="rm.lowcomplex" type="integer" value="0" min="0" label="Low complexity filter kmer threshold" help="see below"/> | |
104 <param name="orientFwd" argument="orient.fwd" type="text" value="" optional="true" label="String present at the start of valid reads" help="see below"/> | |
105 <param name="output_statistics" truevalue="TRUE" falsevalue="FALSE" type="boolean" checked="true" label="Output statistics" help="Create extra table with the number of reads pre and post filtering" /> | |
106 </inputs> | |
107 <outputs> | |
108 <collection name="paired_output" type="paired" format_source="reads['forward']"> | |
109 <filter>paired_cond['paired_select'] == "paired"</filter> | |
110 </collection> | |
111 <data name="output_single" format_source="reads" > | |
112 <filter>paired_cond['paired_select'] == "single"</filter> | |
113 </data> | |
114 <data name="output_fwd" format_source="reads" label="${tool.name} on ${on_string}: Forward reads" > | |
115 <filter>paired_cond['paired_select'] == "separate"</filter> | |
116 </data> | |
117 <data name="output_rev" format_source="sdaer" label="${tool.name} on ${on_string}: Reverse reads" > | |
118 <filter>paired_cond['paired_select'] == "separate"</filter> | |
119 </data> | |
120 <data name="outtab" format="tabular" label="${tool.name} on ${on_string}: Statistics"> | |
121 <filter>output_statistics</filter> | |
122 </data> | |
123 </outputs> | |
124 <tests> | |
125 <!-- paired data in paired collection --> | |
126 <test expect_num_outputs="4"> | |
127 <conditional name="paired_cond"> | |
128 <param name="paired_select" value="paired"/> | |
129 <param name="reads"> | |
130 <collection type="paired"> | |
131 <element name="forward" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> | |
132 <element name="reverse" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> | |
133 </collection> | |
134 </param> | |
135 </conditional> | |
136 <output_collection name="paired_output" type="paired" count="2"> | |
137 <element name="forward" value="filterAndTrim_F3D0_R1.fq.gz" ftype="fastqsanger.gz" /> | |
138 <element name="reverse" value="filterAndTrim_F3D0_R2.fq.gz" ftype="fastqsanger.gz" /> | |
139 </output_collection> | |
140 <output name="outtab" value="filterAndTrim_F3D0.tab" ftype="tabular"/> | |
141 </test> | |
142 <!-- paired data given separately --> | |
143 <test expect_num_outputs="3"> | |
144 <conditional name="paired_cond"> | |
145 <param name="paired_select" value="separate"/> | |
146 <param name="reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> | |
147 <param name="sdaer" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> | |
148 </conditional> | |
149 <output name="output_fwd" value="filterAndTrim_F3D0_R1.fq.gz" ftype="fastqsanger.gz" /> | |
150 <output name="output_rev" value="filterAndTrim_F3D0_R2.fq.gz" ftype="fastqsanger.gz" /> | |
151 <output name="outtab" value="filterAndTrim_F3D0.tab" ftype="tabular" compare="sim_size"/> | |
152 </test> | |
153 <!-- single end data --> | |
154 <test expect_num_outputs="1"> | |
155 <conditional name="paired_cond"> | |
156 <param name="paired_select" value="single"/> | |
157 <param name="reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> | |
158 </conditional> | |
159 <param name="rmPhiX" value="TRUE" /> | |
160 <param name="rmlowcomplex" value="2" /> | |
161 <param name="orientFwd" value="TACGG" /> | |
162 <param name="output_statistics" value="FALSE" /> | |
163 <output name="output_fwd" value="filterAndTrim_single_F3D0_R1.fq.gz" ftype="fastqsanger.gz" /> | |
164 </test> | |
165 <!-- single end data trimming --> | |
166 <test expect_num_outputs="1"> | |
167 <conditional name="paired_cond"> | |
168 <param name="paired_select" value="single"/> | |
169 <param name="reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> | |
170 </conditional> | |
171 <param name="trim|truncQ" value="30" /> | |
172 <param name="trim|trimLeft" value="150" /> | |
173 <param name="trim|trimRight" value="2" /> | |
174 <param name="trim|truncLen" value="2" /> | |
175 <param name="output_statistics" value="FALSE" /> | |
176 <output name="output_fwd" value="filterAndTrim_single_trimmers_F3D0_R1.fq.gz" ftype="fastqsanger.gz" /> | |
177 </test> | |
178 <!-- single end data filtering --> | |
179 <test expect_num_outputs="1"> | |
180 <conditional name="paired_cond"> | |
181 <param name="paired_select" value="single"/> | |
182 <param name="reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> | |
183 </conditional> | |
184 <param name="filter|maxLen" value="255" /> | |
185 <param name="filter|minLen" value="60" /> | |
186 <param name="filter|maxN" value="100" /> | |
187 <param name="filter|minQ" value="13" /> | |
188 <param name="filter|maxEE" value="1" /> | |
189 <param name="output_statistics" value="FALSE" /> | |
190 <output name="output_fwd" value="filterAndTrim_single_filters_F3D0_R1.fq.gz" ftype="fastqsanger.gz" /> | |
191 </test> | |
192 <!-- paired data w separate filters and trimmers for reverse --> | |
193 <test expect_num_outputs="3"> | |
194 <conditional name="paired_cond" > | |
195 <param name="paired_select" value="paired"/> | |
196 <param name="reads"> | |
197 <collection type="paired"> | |
198 <element name="forward" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> | |
199 <element name="reverse" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> | |
200 </collection> | |
201 </param> | |
202 </conditional> | |
203 <param name="seprev_cond|seprev_select" value="yes"/> | |
204 <param name="seprev_cond|trim|truncQ" value="30" /> | |
205 <param name="seprev_cond|trim|trimLeft" value="150" /> | |
206 <param name="seprev_cond|trim|trimRight" value="2" /> | |
207 <param name="seprev_cond|trim|truncLen" value="2" /> | |
208 <param name="seprev_cond|filter|maxLen" value="255" /> | |
209 <param name="seprev_cond|filter|minLen" value="60" /> | |
210 <param name="seprev_cond|filter|maxN" value="100" /> | |
211 <param name="seprev_cond|filter|minQ" value="13" /> | |
212 <param name="seprev_cond|filter|maxEE" value="1" /> | |
213 <param name="output_statistics" value="FALSE" /> | |
214 <output_collection name="paired_output" type="paired" count="2"/> | |
215 </test> | |
216 </tests> | |
217 | |
218 <help><![CDATA[ | |
219 Description | |
220 ........... | |
221 | |
222 Filters and trims a FASTQ dataset (can be compressed) based on several user-definable criteria, and outputs a compressed FASTQ data set containing those trimmed reads which passed the filters. For paired end data forward and reverse FASTQ datasets can be provided as pair of FASTQ datasets (or two separate data sets), in which case filtering is performed on the forward and reverse reads independently, and both reads must pass for the read pair to be in the output. | |
223 | |
224 Usage | |
225 ..... | |
226 | |
227 **Input** is a FASTQ dataset (or a pair in case of paired end data) containing all reads of a sample. It is suggested to organize them in a (paired) collection (in particular if you have multiple samples). | |
228 | |
229 **Output** is a (paired) collection of filtered and trimmed paired FASTQ datasets (again one data set or pair per sample). | |
230 | |
231 Upstream dada2 tools are *dada2: learnErrorRates* and *dada2: dada*. Note that these tools do not work on paired end data. So, if you have paired end data you need to split the generated paired collection into one containing the forward reads and one containing the reverse reads. This can be done by the *unzip collection* tool. | |
232 | |
233 An additional tabular output gives the number of reads before and after trimming. This can data set can be used as input for *dada2: sequence counts* to track the sequence counts for each sample through all dada2 pipeline step. | |
234 | |
235 Details | |
236 ....... | |
237 | |
238 *Trimming and filtering*: | |
239 | |
240 - Truncation of the read length is enforced after trimming of the right end. | |
241 - The long read filter is applied before trimming and the short read filter after trimming. | |
242 - For details on the calculation of the number of expected errors see also https://doi.org/10.1093/bioinformatics/btv401 | |
243 | |
244 | |
245 *String present at the start of valid reads* (orient.fwd): | |
246 | |
247 This string is compared to the start of each read, and the reverse complement of each read. If it exactly matches the start of the read, the read is kept. If it exactly matches the start of the reverse complement read, the read is reverse-complemented and kept. Otherwise the read if filtered out. For paired reads, the string is compared to the start of the forward and reverse reads, and if it matches the start of the reverse read the reads are swapped and kept. The primary use of this parameter is to unify the orientation of amplicon sequencing libraries that are a mixture of forward and reverse orientations, and that include the forward primer on the reads. | |
248 | |
249 *Low complexity filter kmer threshold"* | |
250 | |
251 If greater than 0, reads with an effective number of kmers less than this value will be removed. The effective number of kmers is determined as a Shannon information approximation. The default kmer-size is 2, and therefore perfectly random sequences will approachan effective kmer number of 16 = 4 (nucleotides) ^ 2 (kmer size). | |
252 | |
253 Notes | |
254 ..... | |
255 | |
256 This step may be replaced by alternative tools to filter and trim short read data if the following is ensured: | |
257 | |
258 - For paired end data unpaired reads must be removed. | |
259 - There must not be a read containing a non-canonical nucleotide (N). | |
260 | |
261 @HELP_OVERVIEW@ | |
262 ]]></help> | |
263 <expand macro="citations"/> | |
264 </tool> |