0
|
1 <tool id="trimmomatic" name="Trimmomatic" version="0.32.1">
|
|
2 <description>flexible read trimming tool for Illumina NGS data</description>
|
|
3 <command interpreter="bash">trimmomatic.sh
|
|
4 -mx8G
|
|
5 -jar \$TRIMMOMATIC_DIR/trimmomatic-0.32.jar
|
|
6 #if $paired_end.is_paired_end
|
|
7 PE -threads 6 -phred33 $fastq_r1_in $paired_end.fastq_r2_in $fastq_out_r1_paired $fastq_out_r1_unpaired $fastq_out_r2_paired $fastq_out_r2_unpaired
|
|
8 #else
|
|
9 SE -threads 6 -phred33 $fastq_in $fastq_out
|
|
10 #end if
|
|
11 ## ILLUMINACLIP option
|
|
12 #if $illuminaclip.do_illuminaclip
|
|
13 ILLUMINACLIP:\$TRIMMOMATIC_ADAPTERS_DIR/$illuminaclip.adapter_fasta:$illuminaclip.seed_mismatches:$illuminaclip.palindrome_clip_threshold:$illuminaclip.simple_clip_threshold
|
|
14 #end if
|
|
15 ## Other operations
|
|
16 #for $op in $operations
|
|
17 ## SLIDINGWINDOW
|
|
18 #if str( $op.operation.name ) == "SLIDINGWINDOW"
|
|
19 SLIDINGWINDOW:$op.operation.window_size:$op.operation.required_quality
|
|
20 #end if
|
|
21 ## MINLEN:36
|
|
22 #if str( $op.operation.name ) == "MINLEN"
|
|
23 MINLEN:$op.operation.minlen
|
|
24 #end if
|
|
25 #if str( $op.operation.name ) == "LEADING"
|
|
26 LEADING:$op.operation.leading
|
|
27 #end if
|
|
28 #if str( $op.operation.name ) == "TRAILING"
|
|
29 TRAILING:$op.operation.trailing
|
|
30 #end if
|
|
31 #if str( $op.operation.name ) == "CROP"
|
|
32 CROP:$op.operation.crop
|
|
33 #end if
|
|
34 #if str( $op.operation.name ) == "HEADCROP"
|
|
35 HEADCROP:$op.operation.headcrop
|
|
36 #end if
|
|
37 #end for
|
|
38 </command>
|
|
39 <requirements>
|
|
40 <requirement type="package" version="0.32">trimmomatic</requirement>
|
|
41 </requirements>
|
|
42 <inputs>
|
|
43 <conditional name="paired_end">
|
|
44 <param name="is_paired_end" type="boolean" label="Paired end data?" truevalue="yes" falsevalue="no" checked="on" />
|
|
45 <when value="no">
|
|
46 <param name="fastq_in" type="data" format="fastqsanger" label="Input FASTQ file" />
|
|
47 </when>
|
|
48 <when value="yes">
|
|
49 <param name="fastq_r1_in" type="data" format="fastqsanger"
|
|
50 label="Input FASTQ file (R1/first of pair)" />
|
|
51 <param name="fastq_r2_in" type="data" format="fastqsanger"
|
|
52 label="Input FASTQ file (R2/second of pair)" />
|
|
53 </when>
|
|
54 </conditional>
|
|
55 <conditional name="illuminaclip">
|
|
56 <param name="do_illuminaclip" type="boolean" label="Perform initial ILLUMINACLIP step?" help="Cut adapter and other illumina-specific sequences from the read" truevalue="yes" falsevalue="no" checked="off" />
|
|
57 <when value="yes">
|
|
58 <param name="adapter_fasta" type="select" label="Adapter sequences to use">
|
|
59 <option value="TruSeq2-SE.fa">TruSeq2 (single-ended, for Illumina GAII)</option>
|
|
60 <option value="TruSeq3-SE.fa">TruSeq3 (single-ended, for MiSeq and HiSeq)</option>
|
|
61 <option value="TruSeq2-PE.fa">TruSeq2 (paired-ended, for Illumina GAII)</option>
|
|
62 <option value="TruSeq3-PE.fa">TruSeq3 (paired-ended, for MiSeq and HiSeq)</option>
|
|
63 <option value="TruSeq3-PE-2.fa">TruSeq3 (additional seqs) (paired-ended, for MiSeq and HiSeq)</option>
|
|
64 <option value="NexteraPE-PE.fa">Nextera (paired-ended)</option>
|
|
65 </param>
|
|
66 <param name="seed_mismatches" type="integer" label="Maximum mismatch count which will still allow a full match to be performed" value="2" />
|
|
67 <param name="palindrome_clip_threshold" type="integer" label="How accurate the match between the two 'adapter ligated' reads must be for PE palindrome read alignment" value="30" />
|
|
68 <param name="simple_clip_threshold" type="integer" label="How accurate the match between any adapter etc. sequence must be against a read" value="10" />
|
|
69 </when>
|
|
70 </conditional>
|
|
71 <repeat name="operations" title="Trimmomatic Operation" min="1">
|
|
72 <conditional name="operation">
|
|
73 <param name="name" type="select" label="Select Trimmomatic operation to perform">
|
|
74 <option selected="true" value="SLIDINGWINDOW">Sliding window trimming (SLIDINGWINDOW)</option>
|
|
75 <option value="MINLEN">Drop reads below a specified length (MINLEN)</option>
|
|
76 <option value="LEADING">Cut bases off the start of a read, if below a threshold quality (LEADING)</option>
|
|
77 <option value="TRAILING">Cut bases off the end of a read, if below a threshold quality (TRAILING)</option>
|
|
78 <option value="CROP">Cut the read to a specified length (CROP)</option>
|
|
79 <option value="HEADCROP">Cut the specified number of bases from the start of the read (HEADCROP)</option>
|
|
80 </param>
|
|
81 <when value="SLIDINGWINDOW">
|
|
82 <param name="window_size" type="integer" label="Number of bases to average across" value="4" />
|
|
83 <param name="required_quality" type="integer" label="Average quality required" value="20" />
|
|
84 </when>
|
|
85 <when value="MINLEN">
|
|
86 <param name="minlen" type="integer" label="Minimum length of reads to be kept" value="20" />
|
|
87 </when>
|
|
88 <when value="LEADING">
|
|
89 <param name="leading" type="integer" label="Minimum quality required to keep a base" value="3" help="Bases at the start of the read with quality below the threshold will be removed" />
|
|
90 </when>
|
|
91 <when value="TRAILING">
|
|
92 <param name="trailing" type="integer" label="Minimum quality required to keep a base" value="3" help="Bases at the end of the read with quality below the threshold will be removed" />
|
|
93 </when>
|
|
94 <when value="CROP">
|
|
95 <param name="crop" type="integer" label="Number of bases to keep from the start of the read" value="" />
|
|
96 </when>
|
|
97 <when value="HEADCROP">
|
|
98 <param name="headcrop" type="integer" label="Number of bases to remove from the start of the read" value="" />
|
|
99 </when>
|
|
100 </conditional>
|
|
101 </repeat>
|
|
102 </inputs>
|
|
103 <outputs>
|
|
104 <data format="fastqsanger" name="fastq_out_r1_paired" label="${tool.name} on ${on_string} (R1 paired)">
|
|
105 <filter>paired_end['is_paired_end']</filter>
|
|
106 </data>
|
|
107 <data format="fastqsanger" name="fastq_out_r1_unpaired" label="${tool.name} on ${on_string} (R1 unpaired)">
|
|
108 <filter>paired_end['is_paired_end']</filter>
|
|
109 </data>
|
|
110 <data format="fastqsanger" name="fastq_out_r2_paired" label="${tool.name} on ${on_string} (R2 paired)">
|
|
111 <filter>paired_end['is_paired_end']</filter>
|
|
112 </data>
|
|
113 <data format="fastqsanger" name="fastq_out_r2_unpaired" label="${tool.name} on ${on_string} (R2 unpaired)">
|
|
114 <filter>paired_end['is_paired_end']</filter>
|
|
115 </data>
|
|
116 <data format="fastqsanger" name="fastq_out" label="${tool.name} on ${on_string}">
|
|
117 <filter>not paired_end['is_paired_end']</filter>
|
|
118 </data>
|
|
119 </outputs>
|
|
120 <tests>
|
|
121 <test>
|
|
122 <!-- Single-end example -->
|
|
123 <param name="is_paired_end" value="no" />
|
|
124 <param name="fastq_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" />
|
|
125 <param name="operations_0|operation|name" value="SLIDINGWINDOW" />
|
|
126 <!--
|
|
127 **NB** outputs have to be specified in order that they appear in the
|
|
128 tool (which is the order they will be written to the history) - the
|
|
129 test framework seems to use the order and ignores the "name" attribute
|
|
130 -->
|
|
131 <output name="fastq_out" file="trimmomatic_se_out1.fastq" />
|
|
132 </test>
|
|
133 <test>
|
|
134 <!-- Paired-end example -->
|
|
135 <param name="is_paired_end" value="yes" />
|
|
136 <param name="fastq_r1_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" />
|
|
137 <param name="fastq_r2_in" value="Illumina_SG_R2.fastq" ftype="fastqsanger" />
|
|
138 <param name="operations_0|operation|name" value="SLIDINGWINDOW" />
|
|
139 <!--
|
|
140 **NB** outputs have to be specified in order that they appear in the
|
|
141 tool (which is the order they will be written to the history) - the
|
|
142 test framework seems to use the order and ignores the "name" attribute
|
|
143 -->
|
|
144 <output name="fastq_out_r1_paired" file="trimmomatic_pe_r1_paired_out1.fastq" />
|
|
145 <output name="fastq_out_r1_unpaired" file="trimmomatic_pe_r1_unpaired_out1.fastq" />
|
|
146 <output name="fastq_out_r2_paired" file="trimmomatic_pe_r2_paired_out1.fastq" />
|
|
147 <output name="fastq_out_r2_unpaired" file="trimmomatic_pe_r2_unpaired_out1.fastq" />
|
|
148 </test>
|
|
149 <test>
|
|
150 <!-- Single-end example (cropping) -->
|
|
151 <param name="is_paired_end" value="no" />
|
|
152 <param name="fastq_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" />
|
|
153 <param name="operations_0|operation|name" value="CROP" />
|
|
154 <param name="operations_0|operation|crop" value="10" />
|
|
155 <!--
|
|
156 **NB** outputs have to be specified in order that they appear in the
|
|
157 tool (which is the order they will be written to the history) - the
|
|
158 test framework seems to use the order and ignores the "name" attribute
|
|
159 -->
|
|
160 <output name="fastq_out" file="trimmomatic_se_out2.fastq" />
|
|
161 </test>
|
|
162 </tests>
|
|
163 <help>
|
|
164 .. class:: infomark
|
|
165
|
|
166 **What it does**
|
|
167
|
|
168 Trimmomatic performs a variety of useful trimming tasks for illumina paired-end and
|
|
169 single ended data.
|
|
170
|
|
171 This tool allows the following trimming steps to be performed:
|
|
172
|
|
173 * **ILLUMINACLIP:** Cut adapter and other illumina-specific sequences from the read
|
|
174 * **SLIDINGWINDOW:** Perform a sliding window trimming, cutting once the average
|
|
175 quality within the window falls below a threshold
|
|
176 * **MINLEN:** Drop the read if it is below a specified length
|
|
177 * **LEADING:** Cut bases off the start of a read, if below a threshold quality
|
|
178 * **TRAILING:** Cut bases off the end of a read, if below a threshold quality
|
|
179 * **CROP:** Cut the read to a specified length
|
|
180 * **HEADCROP:** Cut the specified number of bases from the start of the read
|
|
181
|
|
182 If ILLUMINACLIP is requested then it is always performed first; subsequent options
|
|
183 can be mixed and matched and will be performed in the order that they have been
|
|
184 specified.
|
|
185
|
|
186 .. class:: warningmark
|
|
187
|
|
188 Note that trimming operation order is important.
|
|
189
|
|
190 -------------
|
|
191
|
|
192 .. class:: infomark
|
|
193
|
|
194 **Outputs**
|
|
195
|
|
196 For paired-end data a particular strength of Trimmomatic is that it retains the
|
|
197 pairing of reads (from R1 and R2) in the filtered output files:
|
|
198
|
|
199 * Two FASTQ files (R1-paired and R2-paired) contain one read from each pair where
|
|
200 both have survived filtering.
|
|
201 * Additionally two FASTQ files (R1-unpaired and R2-unpaired) contain reads where
|
|
202 one of the pair failed the filtering steps.
|
|
203
|
|
204 Retaining the same order and number of reads in the filtered output fastq files is
|
|
205 essential for many downstream analysis tools.
|
|
206
|
|
207 For single-end data the output is a single FASTQ file containing just the filtered
|
|
208 reads.
|
|
209
|
|
210 -------------
|
|
211
|
|
212 .. class:: infomark
|
|
213
|
|
214 **Credits**
|
|
215
|
|
216 This Galaxy tool has been developed within the Bioinformatics Core Facility at the
|
|
217 University of Manchester. It runs the Trimmomatic program which has been developed
|
|
218 within Bjorn Usadel's group at RWTH Aachen university.
|
|
219
|
|
220 Trimmomatic website (including documentation):
|
|
221
|
|
222 * http://www.usadellab.org/cms/index.php?page=trimmomatic
|
|
223
|
|
224 The reference for Trimmomatic is:
|
|
225
|
|
226 * Lohse M, Bolger AM, Nagel A, Fernie AR, Lunn JE, Stitt M, Usadel B. RobiNA: a
|
|
227 user-friendly, integrated software solution for RNA-Seq-based transcriptomics.
|
|
228 Nucleic Acids Res. 2012 Jul;40(Web Server issue):W622-7)
|
|
229
|
|
230 Please kindly acknowledge both this Galaxy tool and the Trimmomatic program if you
|
|
231 use it.
|
|
232 </help>
|
|
233 </tool>
|