Mercurial > repos > gbcs-embl-heidelberg > je_demultiplex
annotate je-demultiplex.xml @ 10:bd3cdf128bcb draft default tip
planemo upload for repository https://github.com/gbcs-embl/Je/tree/master/src/galaxy commit 5acb6bc253e38c5c61fc70c10443716d4109a711
author | gbcs-embl-heidelberg |
---|---|
date | Sat, 04 Aug 2018 09:02:27 -0400 |
parents | 8f16495dc5f2 |
children |
rev | line source |
---|---|
3
8930b411a9d7
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit dd9e62bdb01d1252a90ce778103ce9b6b4a8cd52-dirty
gbcs-embl-heidelberg
parents:
0
diff
changeset
|
1 <tool id="je_demultiplex" name="Je-Demultiplex" version="@VERSION_STRING@"> |
0 | 2 <description>demultiplexes fastq files</description> |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
5
222819c87d90
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents:
3
diff
changeset
|
6 <expand macro="requirements" /> |
0 | 7 <stdio> |
8 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
9 </stdio> | |
5
222819c87d90
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents:
3
diff
changeset
|
10 <expand macro="version_command" /> |
222819c87d90
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents:
3
diff
changeset
|
11 <command> |
0 | 12 <![CDATA[ |
13 je demultiplex | |
14 | |
15 ## Fastq inputs | |
16 @single_or_paired_cmd@ | |
17 #if str( $library.type ) != "single": | |
18 @demultiplex_paired_end_cmd_options@ | |
19 #end if | |
20 | |
21 @barcode_option_cmd@ | |
22 @barcode_len_cmd@ | |
23 C=$CLIP_BARCODE | |
24 | |
25 @demultiplexer_common_options_cmd@ | |
26 @common_options_cmd@ | |
27 | |
28 @demultiplexer_common_output_options_cmd@ | |
29 @demultiplexer_common_outputs_cmd@ | |
30 | |
31 ]]> | |
32 </command> | |
33 <configfiles> | |
34 <expand macro="barcode_config_file"></expand> | |
35 </configfiles> | |
36 <inputs> | |
37 <!-- single/paired - similar to macro 'single_or_paired_general' --> | |
38 <expand macro="single_or_paired_general"> | |
39 <expand macro="demultiplex_paired_end_options"/> | |
40 </expand> | |
41 | |
42 <expand macro="barcode_option"/> | |
43 <expand macro="barcode_len_option"/> | |
44 <expand macro="clip_barcode"/> | |
45 | |
46 <expand macro="demultiplexer_common_options"/> | |
47 | |
48 <expand macro="common_options"/> | |
49 | |
50 <expand macro="demultiplexer_common_output_options"/> | |
51 | |
52 </inputs> | |
53 <outputs> | |
54 <expand macro="demultiplexer_common_outputs"/> | |
55 </outputs> | |
56 | |
57 <tests> | |
58 <test> | |
59 <!-- simple test on single end data --> | |
60 <param name="type" value="single"/> | |
61 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/> | |
62 <param name="BARCODE_FILE" value="barcodes_SE.txt" ftype="tabular"/> | |
7
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
63 <output name="METRICS_FILE_NAME" file="summary_SE.txt" ftype="tabular" lines_diff="4"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
64 <output name="DEMULTIPLEX_RESULTS" ftype="tabular"> |
0 | 65 <discovered_dataset designation="unassigned_1" file="unassigned_1_SE.txt" /> |
66 </output> | |
67 </test> | |
68 <test> | |
69 <!-- more complex test on paired end data with different barcode for fwd/rev --> | |
70 <param name="type" value="paired"/> | |
71 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/> | |
72 <param name="input_2" value="file_2_sequence.txt" ftype="fastqsanger"/> | |
73 | |
74 <param name="BPOS" value="BOTH"/> | |
75 <param name="BM" value="BOTH"/> | |
76 <param name="BRED" value="false"/> | |
77 | |
7
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
78 <param name="COLLECT_OUTPUTS" value="false" /> |
0 | 79 <param name="barcode_list_type_con" value="text"/> |
80 <param name="barcode_text" | |
81 value="sample1 CACTGT:GTATAG sample2 ATTCCG:TCCGTC sample3 GCTACC:TGGTCA sample4 CGAAAC:CACTGT"/> | |
7
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
82 <output name="METRICS_FILE_NAME" file="summary_PE.txt" ftype="tabular" lines_diff="4"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
83 <output name="DEMULTIPLEX_RESULTS" ftype="tabular"> |
0 | 84 <discovered_dataset designation="unassigned_1" file="unassigned_1_PE.txt" /> |
85 <discovered_dataset designation="unassigned_2" file="unassigned_2_PE.txt" /> | |
86 <discovered_dataset designation="sample4_CGAAACCACTGT_2" file="sample4_CGAAACCACTGT_2.txt"/> | |
87 <discovered_dataset designation="sample4_CGAAACCACTGT_1" file="sample4_CGAAACCACTGT_1.txt"/> | |
88 <discovered_dataset designation="sample3_GCTACCTGGTCA_2" file="sample3_GCTACCTGGTCA_2.txt"/> | |
89 <discovered_dataset designation="sample3_GCTACCTGGTCA_1" file="sample3_GCTACCTGGTCA_1.txt"/> | |
90 <discovered_dataset designation="sample2_ATTCCGTCCGTC_2" file="sample2_ATTCCGTCCGTC_2.txt"/> | |
91 <discovered_dataset designation="sample2_ATTCCGTCCGTC_1" file="sample2_ATTCCGTCCGTC_1.txt"/> | |
92 <discovered_dataset designation="sample1_CACTGTGTATAG_2" file="sample1_CACTGTGTATAG_2.txt"/> | |
93 <discovered_dataset designation="sample1_CACTGTGTATAG_1" file="sample1_CACTGTGTATAG_1.txt"/> | |
94 </output> | |
95 </test> | |
7
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
96 <test> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
97 <!-- Repeat of previous but with collection outputs --> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
98 <param name="type" value="paired"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
99 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
100 <param name="input_2" value="file_2_sequence.txt" ftype="fastqsanger"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
101 |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
102 <param name="BPOS" value="BOTH"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
103 <param name="BM" value="BOTH"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
104 <param name="BRED" value="false"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
105 <param name="barcode_list_type_con" value="text"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
106 <param name="barcode_text" |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
107 value="sample1 CACTGT:GTATAG sample2 ATTCCG:TCCGTC sample3 GCTACC:TGGTCA sample4 CGAAAC:CACTGT"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
108 <param name="COLLECT_OUTPUTS" value="true" /> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
109 |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
110 <output_collection name="COLLECTION_1" type="list"> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
111 <element name="sample1_CACTGTGTATAG_1.txt" value="sample4_CGAAACCACTGT_1.txt"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
112 <element name="sample3_GCTACCTGGTCA_1.txt" value="sample3_GCTACCTGGTCA_1.txt"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
113 <element name="sample2_ATTCCGTCCGTC_1.txt" value="sample2_ATTCCGTCCGTC_1.txt"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
114 <element name="sample1_CACTGTGTATAG_1.txt" value="sample1_CACTGTGTATAG_1.txt"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
115 </output_collection> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
116 <output_collection name="COLLECTION_2" type="list"> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
117 <element name="sample4_CGAAACCACTGT_2.txt" value="sample4_CGAAACCACTGT_2.txt"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
118 <element name="sample3_GCTACCTGGTCA_2.txt" value="sample3_GCTACCTGGTCA_2.txt"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
119 <element name="sample2_ATTCCGTCCGTC_2.txt" value="sample2_ATTCCGTCCGTC_2.txt"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
120 <element name="sample1_CACTGTGTATAG_2.txt" value="sample1_CACTGTGTATAG_2.txt"/> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
121 </output_collection> |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
122 </test> |
0 | 123 </tests> |
124 | |
5
222819c87d90
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents:
3
diff
changeset
|
125 <help> |
0 | 126 <![CDATA[ |
127 **What it does** | |
128 | |
129 Je demultiplex: A fastq file demultiplexer with optional handling of Unique Molecular Identifiers for further use | |
130 in 'markdupes' module. | |
131 Input files are fastq files, and can be in gzip compressed format. | |
132 | |
133 Author: Charles Girardot (charles.girardot@embl.de). | |
134 | |
135 Wrapper by: Jelle Scholtalbers (jelle.scholtalbers@embl.de). | |
136 | |
7
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
137 With contributions by: Mehmet Tekman (@mtekman) |
8f16495dc5f2
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents:
5
diff
changeset
|
138 |
0 | 139 ------ |
140 | |
141 **Know what you are doing** | |
142 | |
143 .. class:: warningmark | |
144 | |
145 You will want to read the `documentation`__. | |
146 | |
147 .. __: http://gbcs.embl.de/portal/Je | |
148 | |
149 ------ | |
150 | |
151 **Parameter list** | |
152 | |
153 This is an exhaustive list of options:: | |
154 | |
155 FASTQ_FILE1=File | |
156 F1=File | |
157 | |
158 Input fastq file (optionally gzipped) for single end data, or first read in paired end | |
159 data. | |
160 | |
161 Required. | |
162 | |
163 FASTQ_FILE2=File | |
164 F2=File | |
165 | |
166 Input fastq file (optionally gzipped) for the second read of paired end data. | |
167 | |
168 Default value: null. | |
169 | |
170 BARCODE_FILE=File | |
171 BF=File | |
172 | |
173 Barcode file describing sequence list and sample names. Tab-delimited file with 2 | |
174 columns, with the sample in col1 and the corresponding barcode in col2. | |
175 Simple barcode file format : 2 tab-delimited colums | |
176 If multiple barcode map to the same sample, either line can be duplicated e.g. | |
177 sample1 ATAT | |
178 sample1 GAGG | |
179 sample2 CCAA | |
180 sample2 TGTG | |
181 Or barcodes can be combined using the OR operator '|' i.e. the file above can be | |
182 re-written like | |
183 sample1 ATAT|GAGG | |
184 sample2 CCAA|TGTG | |
185 Finally, for the special situation of paired-end data in which barcodes differ at both | |
186 ends (ie BPOS=BOTH BRED=false BM=BOTH , see BRED option description), barcodes for read_1 | |
187 and read_2 can be distinguished using a ':' separator i.e. | |
188 sample1 ATAT:GAGG | |
189 sample2 CCAA:TGTG | |
190 This above syntax means that sample 1 is encoded with ATAT barcode at read_1 AND GAGG | |
191 barcode at read_2. Note that you can still combine barcodes using | e.g. | |
192 sample1 ATAT|GAGG:CCAA|TGTG | |
193 would mean that sample 1 is mapped by the combination of barcode: ATAT OR GAGG at read_1 | |
194 AND CCAA OR TGTG at read_2. | |
195 Extended barcode file format : 3 (single-end) or 4 (paired-end) tab-delimited colums | |
196 same as the simple barcode file format but the extra columns contains the file name(s) | |
197 to use to name output files. A unique extra column is expected for single-end while 2 | |
198 extra columns are expected for paired-end. In case, lines are duplicated (multiple | |
199 barcodesmapping the same sample), the same file name should be indicated in the third | |
200 (and fourth) column(s). | |
201 sample1 ATAT spl1_1.txt.gz spl1_2.txt.gz | |
202 sample1 GAGG spl1_1.txt.gz spl1_2.txt.gz | |
203 sample2 CCAA spl2_1.txt.gz spl2_2.txt.gz | |
204 Or | |
205 sample1 ATAT|GAGG:CCAA|TGTG spl1_1.txt.gz spl1_2.txt.gz | |
206 Ns in barcode sequence are allowed and are used to flag positions that should be ignored | |
207 in sample matching | |
208 i.e. they will be clipped off the read sequence (like in iCLIP protocol). | |
209 | |
210 Required. | |
211 | |
212 BARCODE_READ_POS=BarcodePosition | |
213 BPOS=BarcodePosition | |
214 | |
215 For paired-end data, where to expect the barcode(s) : | |
216 READ_1 (beginning of read from FASTQ_FILE_1), | |
217 READ_2 (beginning of read from FASTQ_FILE_2), | |
218 BOTH (beginning of both reads). | |
219 Automatically set to READ_1 in single end mode. | |
220 | |
221 Default value: BOTH. This option can be set to 'null' to clear the default value. | |
222 Possible values: {READ_1, READ_2, BOTH, NONE} | |
223 | |
224 BCLEN=String | |
225 LEN=String | |
226 | |
227 Length of the barcode sequences, optional. Taken from barcode file when not given. | |
228 In situations where BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, two distinct | |
229 length can be provided using the syntax LEN=X:Z where X and Z are 2 integers representing | |
230 the barcode length for read_1 and read_2 respectively. | |
231 | |
232 Default value: null. | |
233 | |
234 BARCODE_FOR_SAMPLE_MATCHING=BarcodePosition | |
235 BM=BarcodePosition | |
236 | |
237 Indicates which barcode(s) should be used for sample lookup | |
238 Automatically set to READ_1 in single end mode. | |
239 For paired-end data and when BARCODE_READ_POS == BOTH, which barcode should be used to | |
240 resolve sample: | |
241 use BM=READ_1 (beginning of read from FASTQ_FILE_1) if only this read should be used | |
242 for sample matching: | |
243 use BM=READ_2 (beginning of read from FASTQ_FILE_2) if only this read should be used | |
244 for sample matching: | |
245 use BM=BOTH (beginning of both reads) if both should be used. | |
246 | |
247 When BM=BOTH, the behaviour is different based on the value of REDUNDANT_BARCODES : | |
248 If REDUNDANT_BARCODES=true, the two barcodes are considered to map to the same sample | |
249 and 'Je demultiplex' uses the two barcodes according to the STRICT value. | |
250 If REDUNDANT_BARCODES=false, the barcode file should map a couple of barcode to each | |
251 sample (e.g. sample1 => AGAGTG:TTGATA) and 'Je demultiplex' needs both barcodes to find | |
252 the relevant sample. Note that this is the only situation in which all barcode matching | |
253 options (MM, MMD, Q) accept different values for both barcodes in the form X:Z where X | |
254 and Z are 2 integers. | |
255 | |
256 Default value: BOTH. This option can be set to 'null' to clear the default value. | |
257 Possible values: {READ_1, READ_2, BOTH, NONE} | |
258 | |
259 | |
260 REDUNDANT_BARCODES=Boolean | |
261 BRED=Boolean | |
262 | |
263 This option only applies for paired-end data with BARCODE_READ_POS set to 'BOTH' | |
264 Indicates if both read's barcodes encode redundant information or if barcodes are | |
265 supposed to be identical at both ends (or to resolve to the same sample when a pool of | |
266 barcodes is used per sample). | |
267 When REDUNDANT_BARCODES=false, the 2 barcodes potentially encode | |
268 different information. For example, only one of the barcodes encodes the sample identity | |
269 while | |
270 the second barcode might be a random barcode (UMI) to tell apart PCR artefacts from real | |
271 duplicates. | |
272 Another example is when both barcodes should be used in a combined fashion to resolve the | |
273 sample. | |
274 In the first example, you should use BPOS=BOTH BRED=false BM=READ_1. | |
275 In the second example, you should have BPOS=BOTH BRED=false BM=BOTH. | |
276 Note that with BPOS=BOTH BRED=true BM=BOTH, the behavior would be different as | |
277 'demultiplex' would then check the STRICT option to perform sample resolution. | |
278 Importantly, when BARCODE_READ_POS (BPOS) == BOTH AND REDUNDANT_BARCODES=false, BLEN, | |
279 barcode matching options (MM, MMD, Q) and read trimming/clipping options (XT, ZT) accept | |
280 different values for both barcodes in the form X:Z where X and Z are 2 integers. | |
281 | |
282 Default value: true. This option can be set to 'null' to clear the default value. | |
283 Possible values: {true, false} | |
284 | |
285 STRICT=Boolean | |
286 S=Boolean | |
287 | |
288 For paired-end data and when two distinct barcodes/indices are used to encode samples, | |
289 this option tells if both barcodes should resolve to the same sample. | |
290 When true and if only one of the two reads has a barcode match, the read pair is | |
291 'unassigned'. | |
292 When false and if only one of the two reads has a barcode match, the read pair is | |
293 assigned to the | |
294 corresponding sample | |
295 When reads resolve to different samples, the read pair is always 'unassigned'. | |
296 | |
297 Default value: false. This option can be set to 'null' to clear the default value. | |
298 Possible values: {true, false} | |
299 | |
300 MAX_MISMATCHES=String | |
301 MM=String | |
302 | |
303 Maximum mismatches for a barcode to be considered a match. In situations where both | |
304 barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH (or 2 INDEX_FILE given), two | |
305 distinct | |
306 values can be given here using the syntax MM=X:Z where X and Z are 2 integers to use for | |
307 read_1 and read_2 respectively. | |
308 MM=null is like MM=0 | |
309 | |
310 Default value: 1. This option can be set to 'null' to clear the default value. | |
311 | |
312 MIN_MISMATCH_DELTA=String | |
313 MMD=String | |
314 | |
315 Minimum difference between the number of mismatches against the best and the second best | |
316 barcode. When MMD is not respected, the read remains unassigned. | |
317 When two distinct barcodes are used for sample matching (dual encoding), two distinct | |
318 values can be given using the syntax MMD=X:Z where X and Z are 2 integers to use for | |
319 first (e.g. from read_1 or index_1) | |
320 MMD=null is like MMD=0 | |
321 | |
322 Default value: 1. This option can be set to 'null' to clear the default value. | |
323 | |
324 MIN_BASE_QUALITY=String | |
325 Q=String | |
326 | |
327 Minimum base quality during barcode matching: bases which quality is less than this | |
328 cutoff are always considered as a mismatch.When two distinct barcodes are used for sample | |
329 matching (dual encoding), two distinct values can be given using the syntax Q=X:Z where X | |
330 and Z are 2 integers to use for first (e.g. from read_1 or index_1) and second barcode | |
331 (e.g. from read_2 or index_2) respectively. | |
332 Q=null is like Q=0. | |
333 | |
334 Default value: 10. This option can be set to 'null' to clear the default value. | |
335 | |
336 XTRIMLEN=String | |
337 XT=String | |
338 | |
339 Optional extra number of base to be trimmed right after the barcode (only used if | |
340 CLIP_BARCODE=true). | |
341 When running paired-end, two distinct values can be given using the syntax XT=X:Z where X | |
342 and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when | |
343 BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode as to | |
344 end up with reads of the same length (note that this can also be operated using ZT). If a | |
345 unique value is given, e.g. XT=1, while running paired-end the following rule applies: | |
346 (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode | |
347 (2) BPOS=BOTH, the value is used for both reads. | |
348 | |
349 Note that XT=null is like XT=0. | |
350 Default value: 0. This option can be set to 'null' to clear the default value. | |
351 | |
352 ZTRIMLEN=String | |
353 ZT=String | |
354 | |
355 Optional extra number of bases to be trimmed from the read end i.e. 3' end. | |
356 When running paired-end, two distinct values can be given here using the syntax ZT=X:Z | |
357 where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even | |
358 when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode | |
359 as to end up with reads of the same length (note that this can also be operated using | |
360 XT). Note that if a single value is passed, the value always applies to both reads in | |
361 paired-end mode without further consideration. | |
362 ZT=null is like ZT=0. | |
363 | |
364 Default value: 0. This option can be set to 'null' to clear the default value. | |
365 | |
366 CLIP_BARCODE=Boolean | |
367 C=Boolean | |
368 | |
369 Clip barcode sequence from read sequence, as well as XTRIMLEN (and ZTRIMLEN) bases if | |
370 applicable, before writing to output file. | |
371 If false, reads are written without modification to output file. | |
372 Apply to both barcodes when BPOS=BOTH. | |
373 | |
374 Default value: true. This option can be set to 'null' to clear the default value. | |
375 Possible values: {true, false} | |
376 | |
377 ADD_BARCODE_TO_HEADER=Boolean | |
378 ADD=Boolean | |
379 | |
380 Add barcode at the end of the read header. Apply to both barcodes when BPOS=BOTH. | |
381 If true, the string ':barcode' is added at the end of the read header with a ':' added | |
382 only if current read header does not end with ':'. | |
383 If both reads of the pair have a barcode (i.e. BARCODE_READ_POS == BOTH), thenthe second | |
384 read also has its own matched barcode written. Else, the read without a barcode receives | |
385 the barcode from the barcoded read. | |
386 For example: | |
387 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0: | |
388 becomes: | |
389 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:BARCODE | |
390 | |
391 When barcodes containing random positions, i.e. 'N', (for example like in the iCLIP | |
392 protocol) or are UMIs, the added sequence is the sequence clipped from the read and NOT | |
393 the matched barcode. | |
394 | |
395 Default value: true. This option can be set to 'null' to clear the default value. | |
396 Possible values: {true, false} | |
397 | |
398 | |
399 ENSURE_IDENTICAL_HEADER_NAMES=Boolean | |
400 SAME_HEADERS=Boolean | |
401 | |
402 Makes sure that headers of both reads of a pair are identical, using the following read | |
403 header pattern (for both reads of a pair): | |
404 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 SAMPLEBARCODE_READ1:SAMPLEBARCODE_READ2(:CLIPPED_SEQ_FROMREAD1:CLIPPED_SEQ_FROMREAD2) | |
405 This option only makes sense in | |
406 paired end mode and ADD=true. Some (if not all) mappers will indeed complain when the | |
407 read headers are not identical. When molecular barcodes are present in reads (either as | |
408 additional barcodes or as degenerate barcodes ie with 'N') and the RCHAR is used, you | |
409 will end with (problematic) read headers like this: | |
410 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:1:N:0:TAGAACAC:TGGAGTAG | |
411 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:3:N:0:TAGAACAC:CGTTGTAT | |
412 SAME_HEADERS=true will instead generates the following identical header for both reads: | |
413 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:TAGAACAC:TGGAGTAG:CGTTGTAT | |
414 Note that we also clipped the useless '1:N:0' and '3:N:0' has they will also result in | |
415 generating different headers. | |
416 Important: this option will force RCHAR=: UNLESS you specify RCHAR=null ; in which | |
417 case a space will be preserved ie: | |
418 HISEQ:44:C6KC0ANXX:5:1101:1491:1994 TAGAACAC:TGGAGTAG:CGTTGTAT | |
419 | |
420 Default value: true. This option can be set to 'null' to clear the default value. | |
421 Possible values: {true, false} | |
422 | |
423 | |
424 READ_NAME_REPLACE_CHAR=String | |
425 RCHAR=String | |
426 | |
427 Replace spaces in read name/header using provided character. This is particularly handy | |
428 when you need to retain ADDed barcode in read name/header during mapping (everything | |
429 after space in read name is usually clipped in BAM files). For example, with RCHAR=':': | |
430 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0: | |
431 becomes | |
432 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:2:N:0:BARCODE | |
433 Default value: null. | |
434 | |
435 QUALITY_FORMAT=FastqQualityFormat | |
436 V=FastqQualityFormat | |
437 | |
438 A value describing how the quality values are encoded in the fastq. Either 'Solexa' for | |
439 pre-pipeline 1.3 style scores (solexa scaling + 66), 'Illumina' for pipeline 1.3 and | |
440 above (phred scaling + 64) or 'Standard' for phred scaled scores with a character shift | |
441 of 33. If this value is not specified (or 'null' is given), the quality format will be | |
442 detected. | |
443 | |
444 Default value: Standard. This option can be set to 'null' to clear the default value. | |
445 Possible values: {Solexa, Illumina, Standard} | |
446 | |
447 KEEP_UNASSIGNED_READ=Boolean | |
448 UN=Boolean | |
449 | |
450 Should un-assigned reads be saved in files or simply ignored. File names are | |
451 automatically created or can be given using UF1 & UF2 options. | |
452 | |
453 Default value: true. This option can be set to 'null' to clear the default value. | |
454 Possible values: {true, false} | |
455 | |
456 BARCODE_DIAG_FILE=String | |
457 DIAG=String | |
458 | |
459 Name for a barcode match reporting file (not generated by default).Either a name (in | |
460 which case the file will be created in the output dir) or full path. This file will | |
461 contain a line per read pair with the barcode best matching the read subsequence or | |
462 'null' when no match is found according to matching parameters ; and the final selected | |
463 sample. This file is useful for debugging or further processing in case both ends are | |
464 barcoded. | |
465 N.B: this file will have a size of about one of the fastq input files. | |
466 | |
467 Default value: null. | |
468 ]]> | |
5
222819c87d90
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents:
3
diff
changeset
|
469 </help> |
222819c87d90
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents:
3
diff
changeset
|
470 <expand macro="citations"/> |
0 | 471 </tool> |