Mercurial > repos > gbcs-embl-heidelberg > je_demultiplex_illu
annotate je-demultiplex-illu.xml @ 4:7895e352c09b draft
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 1c7680ac36888721d2c0b72f4e6d27415fe07b43
author | gbcs-embl-heidelberg |
---|---|
date | Tue, 01 Aug 2017 03:42:23 -0400 |
parents | 01fdc6d10660 |
children | 69c77f9fc064 |
rev | line source |
---|---|
3
01fdc6d10660
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit dd9e62bdb01d1252a90ce778103ce9b6b4a8cd52-dirty
gbcs-embl-heidelberg
parents:
0
diff
changeset
|
1 <tool id="je_demultiplex_illu" name="Je-Demultiplex-Illu" version="@VERSION_STRING@"> |
0 | 2 <description>demultiplexes fastq files using Illumina Index file</description> |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <stdio> | |
7 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
8 </stdio> | |
9 <version_command>echo '1.0'</version_command> | |
10 <command interpreter="bash"> | |
11 <![CDATA[ | |
12 je demultiplex-illu | |
13 | |
14 ## Fastq inputs | |
15 @single_or_paired_illu_cmd@ | |
16 | |
17 @barcode_option_cmd@ | |
18 | |
19 #if str($INTERNAL_BARCODES_CON.INTERNAL_BARCODES) == 'true': | |
20 BPOS=${INTERNAL_BARCODES_CON.BPOS} | |
21 C=${INTERNAL_BARCODES_CON.CLIP_BARCODE} | |
22 #if str( $INTERNAL_BARCODES_CON.LEN ) != "": | |
23 BCLEN=$INTERNAL_BARCODES_CON.LEN | |
24 #end if | |
25 #else: | |
26 BPOS=NONE | |
27 C=false | |
28 #end if | |
29 | |
30 @common_options_cmd@ | |
31 | |
32 @demultiplexer_common_output_options_cmd@ | |
33 @demultiplexer_common_outputs_cmd@ | |
34 ]]> | |
35 </command> | |
36 <configfiles> | |
37 <expand macro="barcode_config_file"/> | |
38 </configfiles> | |
39 <inputs> | |
40 <!-- single/paired - similar to macro 'single_or_paired_general' --> | |
41 <expand macro="single_or_paired_illu"> | |
42 <expand macro="demultiplex_illu_paired_end_options"/> | |
43 </expand> | |
44 <expand macro="barcode_option"/> | |
45 <conditional name="INTERNAL_BARCODES_CON"> | |
46 <param name="INTERNAL_BARCODES" type="select" | |
47 label="Do your reads contain Unique Molecular Identifiers(UMIs)"> | |
48 <option value="true">Yes</option> | |
49 <option value="false" selected="true">No</option> | |
50 </param> | |
51 <when value="true"> | |
52 <param name="BPOS" type="select" label="Barcode read position (BPOS)" help="where are the barcodes. | |
53 If not using paired-end it does not matter what you specify here."> | |
54 <option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option> | |
55 <option value="READ_2">READ_2 (beginning of read from the second fastq file)</option> | |
56 <option value="BOTH">BOTH (beginning of both reads)</option> | |
57 </param> | |
58 <expand macro="barcode_len_option"/> | |
59 <expand macro="clip_barcode"/> | |
60 </when> | |
61 <when value="false"/> | |
62 </conditional> | |
63 | |
64 <expand macro="demultiplexer_common_options"/> | |
65 | |
66 <expand macro="common_options"/> | |
67 | |
68 <expand macro="demultiplexer_common_output_options"/> | |
69 | |
70 </inputs> | |
71 <outputs> | |
72 <expand macro="demultiplexer_common_outputs"/> | |
73 </outputs> | |
74 | |
75 <tests> | |
76 <test> | |
77 <!-- barcode at both ends, non-redundant --> | |
78 <param name="type" value="paired"/> | |
79 <param name="input_1" value="illu_file_1_sequence.txt" ftype="fastqsanger"/> | |
80 <param name="input_2" value="illu_file_2_sequence.txt" ftype="fastqsanger"/> | |
81 <param name="I1" value="illu_file_1_index.txt" ftype="fastqsanger"/> | |
82 <param name="I2_AVAILABLE" value="true"/> | |
83 <param name="I2" value="illu_file_2_index.txt" ftype="fastqsanger"/> | |
84 | |
85 <param name="INTERNAL_BARCODES" value="true"/> | |
86 <param name="barcode_list_type_con" value="file"/> | |
87 <param name="BARCODE_FILE" value="illu_dualindexing.txt" ftype="tabular"/> | |
88 <param name="LEN" value="8:8"/> | |
89 <param name="ZT" value="5:6"/> | |
90 <param name="BPOS" value="BOTH"/> | |
91 <param name="BM" value="BOTH"/> | |
92 <param name="BRED" value="false"/> | |
93 <param name="MM" value="3"/> | |
94 <param name="MMD" value="2"/> | |
95 <param name="Q" value="20"/> | |
96 <param name="DIAG" value="false"/> | |
97 <output name="METRICS_FILE_NAME" file="illu_summary_PE.txt" ftype="tabular" lines_diff="4"> | |
98 <discovered_dataset designation="unassigned_1" file="illu_unassigned_1_PE.txt" /> | |
99 <discovered_dataset designation="unassigned_2" file="illu_unassigned_2_PE.txt" /> | |
100 <discovered_dataset designation="emb681m5_GGACTCCTCTCTCTAT_2" file="emb681m5_GGACTCCTCTCTCTAT_2.txt"/> | |
101 <discovered_dataset designation="emb681m5_GGACTCCTCTCTCTAT_1" file="emb681m5_GGACTCCTCTCTCTAT_1.txt"/> | |
102 <discovered_dataset designation="emb681m4_TCCTGAGCCTCTCTAT_2" file="emb681m4_TCCTGAGCCTCTCTAT_2.txt"/> | |
103 <discovered_dataset designation="emb681m4_TCCTGAGCCTCTCTAT_1" file="emb681m4_TCCTGAGCCTCTCTAT_1.txt"/> | |
104 <discovered_dataset designation="emb681m1_TAAGGCGACTCTCTAT_2" file="emb681m1_TAAGGCGACTCTCTAT_2.txt"/> | |
105 <discovered_dataset designation="emb681m1_TAAGGCGACTCTCTAT_1" file="emb681m1_TAAGGCGACTCTCTAT_1.txt"/> | |
106 <discovered_dataset designation="emb6801m2_AGGCAGAATAGATCGC_2" file="emb6801m2_AGGCAGAATAGATCGC_2.txt"/> | |
107 <discovered_dataset designation="emb6801m2_AGGCAGAATAGATCGC_1" file="emb6801m2_AGGCAGAATAGATCGC_1.txt"/> | |
108 <discovered_dataset designation="emb6801m1_CGTACTAGTAGATCGC_2" file="emb6801m1_CGTACTAGTAGATCGC_2.txt"/> | |
109 <discovered_dataset designation="emb6801m1_CGTACTAGTAGATCGC_1" file="emb6801m1_CGTACTAGTAGATCGC_1.txt"/> | |
110 </output> | |
111 </test> | |
112 </tests> | |
113 | |
114 | |
115 <help> | |
116 <![CDATA[ | |
117 **What it does** | |
118 | |
119 Je demultiplex-illu: demultiplex fastq files using Illumina Index files, | |
120 with optional handling of Unique Molecular Identifiers for further use in 'markdupes' module | |
121 Input files are fastq files, and can be in gzip compressed format. | |
122 | |
123 Author: Charles Girardot (charles.girardot@embl.de). | |
124 | |
125 Wrapper by: Jelle Scholtalbers (jelle.scholtalbers@embl.de). | |
126 | |
127 ------ | |
128 | |
129 **Know what you are doing** | |
130 | |
131 .. class:: warningmark | |
132 | |
133 You will want to read the `documentation`__. | |
134 | |
135 .. __: http://gbcs.embl.de/portal/Je | |
136 | |
137 ------ | |
138 | |
139 **Parameter list** | |
140 | |
141 This is an exhaustive list of options:: | |
142 | |
143 FASTQ_FILE1=File | |
144 F1=File | |
145 | |
146 Input fastq file (optionally gzipped) for single end data, or first read in paired end | |
147 data. | |
148 | |
149 Required. | |
150 | |
151 FASTQ_FILE2=File | |
152 F2=File | |
153 | |
154 Input fastq file (optionally gzipped) for the second read of paired end data. | |
155 | |
156 Default value: null. | |
157 | |
158 INDEX_FILE1=File | |
159 I1=File | |
160 | |
161 Fastq file for index 1 (barcode) reads, optionally gzipped | |
162 | |
163 Required. | |
164 | |
165 INDEX_FILE2=File | |
166 I2=File | |
167 | |
168 Fastq file for index 2 (barcode) reads, optionally gzipped. | |
169 A INDEX_FILE1 MUST be provided when INDEX_FILE2 is given. This situation corresponds to | |
170 Illumina dual indexing. | |
171 | |
172 Default value: null. | |
173 | |
174 BARCODE_FILE=File | |
175 BF=File | |
176 | |
177 Barcode file describing sequence list and sample names. Tab-delimited file with 2 | |
178 columns, with the sample in col1 and the corresponding barcode in col2. | |
179 Simple barcode file format : 2 tab-delimited colums | |
180 If multiple barcode map to the same sample, either line can be duplicated e.g. | |
181 sample1 ATAT | |
182 sample1 GAGG | |
183 sample2 CCAA | |
184 sample2 TGTG | |
185 Or barcodes can be combined using the OR operator '|' i.e. the file above can be | |
186 re-written like | |
187 sample1 ATAT|GAGG | |
188 sample2 CCAA|TGTG | |
189 Finally, for the special situation of paired-end data in which barcodes differ at both | |
190 ends (ie BPOS=BOTH BRED=false BM=BOTH , see BRED option description), barcodes for read_1 | |
191 and read_2 can be distinguished using a ':' separator i.e. | |
192 sample1 ATAT:GAGG | |
193 sample2 CCAA:TGTG | |
194 This above syntax means that sample 1 is encoded with ATAT barcode at read_1 AND GAGG | |
195 barcode at read_2. Note that you can still combine barcodes using | e.g. | |
196 sample1 ATAT|GAGG:CCAA|TGTG | |
197 would mean that sample 1 is mapped by the combination of barcode: ATAT OR GAGG at read_1 | |
198 AND CCAA OR TGTG at read_2. | |
199 Extended barcode file format : 3 (single-end) or 4 (paired-end) tab-delimited colums | |
200 same as the simple barcode file format but the extra columns contains the file name(s) | |
201 to use to name output files. A unique extra column is expected for single-end while 2 | |
202 extra columns are expected for paired-end. In case, lines are duplicated (multiple | |
203 barcodesmapping the same sample), the same file name should be indicated in the third | |
204 (and fourth) column(s). | |
205 sample1 ATAT spl1_1.txt.gz spl1_2.txt.gz | |
206 sample1 GAGG spl1_1.txt.gz spl1_2.txt.gz | |
207 sample2 CCAA spl2_1.txt.gz spl2_2.txt.gz | |
208 Or | |
209 sample1 ATAT|GAGG:CCAA|TGTG spl1_1.txt.gz spl1_2.txt.gz | |
210 Ns in barcode sequence are allowed and are used to flag positions that should be ignored | |
211 in sample matching | |
212 i.e. they will be clipped off the read sequence (like in iCLIP protocol). | |
213 | |
214 Required. | |
215 | |
216 BARCODE_READ_POS=BarcodePosition | |
217 BPOS=BarcodePosition | |
218 | |
219 Indicates the location of additional barcodes present in the read(s). Setting this option | |
220 implies setting the LEN option. | |
221 Importantly, these additional barcodes must not encode sample identity information but | |
222 used for e.g. molecular barcoding (UMIs) or for any purpose other than sample identity encoding. | |
223 | |
224 Default value: BOTH. This option can be set to 'null' to clear the default value. | |
225 Possible values: {READ_1, READ_2, BOTH, NONE} | |
226 | |
227 BCLEN=String | |
228 LEN=String | |
229 | |
230 Length of the barcode sequences, optional. Taken from barcode file when not given. | |
231 In situations where BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, two distinct | |
232 length can be provided using the syntax LEN=X:Z where X and Z are 2 integers representing | |
233 the barcode length for read_1 and read_2 respectively. | |
234 | |
235 Default value: null | |
236 | |
237 REDUNDANT_BARCODES=Boolean | |
238 BRED=Boolean | |
239 | |
240 This option only applies for paired-end data with *both* INDEX_FILE1 and INDEX_FILE2 | |
241 provided. | |
242 Indicates if both index barcodes encode redundant information i.e. if both barcodes are | |
243 supposed to be identical (or resolve to the same sample when a pool of barcodes is used | |
244 per sample). | |
245 When BRED=true, the STRICT option guides the sample lookup behavior When BRED=false, | |
246 barcodes are combined prior to sample lookup. | |
247 | |
248 Default value: true. This option can be set to 'null' to clear the default value. | |
249 Possible values: {true, false} | |
250 | |
251 STRICT=Boolean | |
252 S=Boolean | |
253 | |
254 For paired-end data and when two distinct barcodes/indices are used to encode samples, | |
255 this option tells if both barcodes should resolve to the same sample. | |
256 When true and if only one of the two reads has a barcode match, the read pair is | |
257 'unassigned'. | |
258 When false and if only one of the two reads has a barcode match, the read pair is | |
259 assigned to the | |
260 corresponding sample | |
261 When reads resolve to different samples, the read pair is always 'unassigned'. | |
262 | |
263 Default value: false. This option can be set to 'null' to clear the default value. | |
264 Possible values: {true, false} | |
265 | |
266 MAX_MISMATCHES=String | |
267 MM=String | |
268 | |
269 Maximum mismatches for a barcode to be considered a match. In situations where both | |
270 barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH (or 2 INDEX_FILE given), two | |
271 distinct | |
272 values can be given here using the syntax MM=X:Z where X and Z are 2 integers to use for | |
273 read_1 and read_2 respectively. | |
274 MM=null is like MM=0 | |
275 | |
276 Default value: 1. This option can be set to 'null' to clear the default value. | |
277 | |
278 MIN_MISMATCH_DELTA=String | |
279 MMD=String | |
280 | |
281 Minimum difference between the number of mismatches against the best and the second best | |
282 barcode. When MMD is not respected, the read remains unassigned. | |
283 When two distinct barcodes are used for sample matching (dual encoding), two distinct | |
284 values can be given using the syntax MMD=X:Z where X and Z are 2 integers to use for | |
285 first (e.g. from read_1 or index_1) | |
286 MMD=null is like MMD=0 | |
287 | |
288 Default value: 1. This option can be set to 'null' to clear the default value. | |
289 | |
290 MIN_BASE_QUALITY=String | |
291 Q=String | |
292 | |
293 Minimum base quality during barcode matching: bases which quality is less than this | |
294 cutoff are always considered as a mismatch.When two distinct barcodes are used for sample | |
295 matching (dual encoding), two distinct values can be given using the syntax Q=X:Z where X | |
296 and Z are 2 integers to use for first (e.g. from read_1 or index_1) and second barcode | |
297 (e.g. from read_2 or index_2) respectively. | |
298 Q=null is like Q=0. | |
299 | |
300 Default value: 10. This option can be set to 'null' to clear the default value. | |
301 | |
302 XTRIMLEN=String | |
303 XT=String | |
304 | |
305 Optional extra number of base to be trimmed right after the barcode (only used if | |
306 CLIP_BARCODE=true). | |
307 When running paired-end, two distinct values can be given using the syntax XT=X:Z where X | |
308 and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when | |
309 BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode as to | |
310 end up with reads of the same length (note that this can also be operated using ZT). If a | |
311 unique value is given, e.g. XT=1, while running paired-end the following rule applies: | |
312 (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode | |
313 (2) BPOS=BOTH, the value is used for both reads. | |
314 | |
315 Note that XT=null is like XT=0. | |
316 Default value: 0. This option can be set to 'null' to clear the default value. | |
317 | |
318 ZTRIMLEN=String | |
319 ZT=String | |
320 | |
321 Optional extra number of bases to be trimmed from the read end i.e. 3' end. | |
322 When running paired-end, two distinct values can be given here using the syntax ZT=X:Z | |
323 where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even | |
324 when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode | |
325 as to end up with reads of the same length (note that this can also be operated using | |
326 XT). Note that if a single value is passed, the value always applies to both reads in | |
327 paired-end mode without further consideration. | |
328 ZT=null is like ZT=0. | |
329 | |
330 Default value: 0. This option can be set to 'null' to clear the default value. | |
331 | |
332 CLIP_BARCODE=Boolean | |
333 C=Boolean | |
334 | |
335 Clip barcode sequence from read sequence, as well as XTRIMLEN (and ZTRIMLEN) bases if | |
336 applicable, before writing to output file. | |
337 If false, reads are written without modification to output file. | |
338 Apply to both barcodes when BPOS=BOTH. | |
339 | |
340 Default value: true. This option can be set to 'null' to clear the default value. | |
341 Possible values: {true, false} | |
342 | |
343 ADD_BARCODE_TO_HEADER=Boolean | |
344 ADD=Boolean | |
345 | |
346 Add matched barcode at the end of the read header. Applies to both index when INDEX_FILE2 | |
347 is also provided. | |
348 First the sample encoding barcodes from I1 (and I2 when relevant) are added to the read | |
349 headers like | |
350 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:I1_BARCODE:I2_BARCODE | |
351 Then, if BPOS!=NONE, the additional barcodes (UMIs) clipped from the read(s) are added | |
352 to their own header, like | |
353 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:I1_BARCODE:I2_BARCODE:CLIPPED_SEQ_FROMREAD | |
354 | |
355 Default value: true. This option can be set to 'null' to clear the default value. | |
356 Possible values: {true, false} | |
357 | |
358 ENSURE_IDENTICAL_HEADER_NAMES=Boolean | |
359 SAME_HEADERS=Boolean | |
360 | |
361 Makes sure that headers of both reads of a pair are identical, using the following read | |
362 header pattern (for both reads of a pair): | |
363 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 I1_BARCODE:I2_BARCODE(:CLIPPED_SEQ_FROMREAD1:CLIPPED_SEQ_FROMREAD2) | |
364 This option only makes sense in paired end mode and ADD=true. Some (if not all) mappers | |
365 will indeed complain when the read headers are not identical. When molecular barcodes are | |
366 present in reads and the RCHAR is used, you will end with (problematic) read headers like | |
367 this: | |
368 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:1:N:0:TAGAACAC:TGGAGTAG | |
369 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:3:N:0:TAGAACAC:CGTTGTAT | |
370 SAME_HEADERS=true will instead genetates the following identical header for both reads: | |
371 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:TAGAACAC:TGGAGTAG:CGTTGTAT | |
372 Note that we also clipped the useless '1:N:0' and '3:N:0' has they will also result in | |
373 generating different headers | |
374 Important: this option will force RCHAR=: UNLESS you specify RCHAR=null ; in which | |
375 case a space will be preserved ie: | |
376 HISEQ:44:C6KC0ANXX:5:1101:1491:1994 TAGAACAC:TGGAGTAG:CGTTGTAT | |
377 | |
378 Default value: true. This option can be set to 'null' to clear the default value. | |
379 Possible values: {true, false} | |
380 | |
381 READ_NAME_REPLACE_CHAR=String | |
382 RCHAR=String | |
383 | |
384 Replace spaces in read name/header using provided character. This is particularly handy | |
385 when you need to retain ADDed barcode in read name/header during mapping (everything | |
386 after space in read name is usually clipped in BAM files). For example, with RCHAR=':': | |
387 '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:' | |
388 becomes | |
389 '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:2:N:0:BARCODE' | |
390 Default value: null. | |
391 | |
392 QUALITY_FORMAT=FastqQualityFormat | |
393 V=FastqQualityFormat | |
394 | |
395 A value describing how the quality values are encoded in the fastq. Either 'Solexa' for | |
396 pre-pipeline 1.3 style scores (solexa scaling + 66), 'Illumina' for pipeline 1.3 and | |
397 above (phred scaling + 64) or 'Standard' for phred scaled scores with a character shift | |
398 of 33. If this value is not specified (or 'null' is given), the quality format will be | |
399 detected. | |
400 | |
401 Default value: Standard. This option can be set to 'null' to clear the default value. | |
402 Possible values: {Solexa, Illumina, Standard} | |
403 | |
404 KEEP_UNASSIGNED_READ=Boolean | |
405 UN=Boolean | |
406 | |
407 Should un-assigned reads be saved in files or simply ignored. File names are | |
408 automatically created or can be given using UF1 & UF2 options. | |
409 | |
410 Default value: true. This option can be set to 'null' to clear the default value. | |
411 Possible values: {true, false} | |
412 | |
413 BARCODE_DIAG_FILE=String | |
414 DIAG=String | |
415 | |
416 Name for a barcode match reporting file (not generated by default).Either a name (in | |
417 which case the file will be created in the output dir) or full path. This file will | |
418 contain a line per read pair with the barcode best matching the read subsequence or | |
419 'null' when no match is found according to matching parameters ; and the final selected | |
420 sample. This file is useful for debugging or further processing in case both ends are | |
421 barcoded. | |
422 N.B: this file will have a size of about one of the fastq input files. | |
423 | |
424 Default value: null. | |
425 ]]> | |
426 </help> | |
427 | |
428 </tool> |