Mercurial > repos > gbcs-embl-heidelberg > je_clip
annotate je-clip.xml @ 2:b61628ae2371 draft
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 62411561ae3bc65ea8762d27ec79a7d912503e5b
author | gbcs-embl-heidelberg |
---|---|
date | Wed, 07 Dec 2016 11:57:50 -0500 |
parents | 101525093ba1 |
children | 2cfed59e4d27 |
rev | line source |
---|---|
0 | 1 <tool id="je_clip" name="Je-Clip" version="1.0"> |
2 <description>clips Unique Molecular Identifiers (UMIs) from fastq files</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <stdio> | |
7 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
8 </stdio> | |
9 <version_command>echo '1.0'</version_command> | |
10 <command interpreter="bash"> | |
11 <![CDATA[ | |
12 je clip | |
13 | |
14 ## Fastq inputs | |
15 @single_or_paired_cmd@ | |
16 #if str( $library.type ) != "single": | |
17 BPOS=${library.BPOS} | |
18 #end if | |
19 | |
20 @common_options_cmd@ | |
21 @barcode_len_cmd@ | |
22 ADD=${ADD} | |
23 #if str($ADD) == "false": | |
2
b61628ae2371
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 62411561ae3bc65ea8762d27ec79a7d912503e5b
gbcs-embl-heidelberg
parents:
0
diff
changeset
|
24 BARCODE_RESULT_FILENAME=${BARCODE_RESULT_FILENAME} |
0 | 25 #end if |
26 | |
27 OF1=${OF1} | |
28 #if str( $library.type ) != "single": | |
29 OF2=${OF2} | |
30 #end if | |
31 | |
32 FORCE=true | |
33 ]]> | |
34 </command> | |
35 <inputs> | |
36 <!-- single/paired --> | |
37 <expand macro="single_or_paired_general"> | |
38 <param name="BPOS" type="select" label="Barcode read position (BPOS)" help="where are the barcodes."> | |
39 <option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option> | |
40 <option value="READ_2">READ_2 (beginning of read from the second fastq file)</option> | |
41 <option value="BOTH">BOTH (beginning of both reads)</option> | |
42 </param> | |
43 </expand> | |
44 <expand macro="barcode_len_option"/> | |
45 <param name="ADD" type="boolean" | |
46 label="Add matched barcode at the end of the read header (ADD)" | |
47 truevalue="true" | |
48 falsevalue="false" | |
49 checked="true" | |
50 /> | |
51 | |
52 <expand macro="common_options"/> | |
53 | |
54 | |
55 </inputs> | |
56 <outputs> | |
2
b61628ae2371
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 62411561ae3bc65ea8762d27ec79a7d912503e5b
gbcs-embl-heidelberg
parents:
0
diff
changeset
|
57 <data name="BARCODE_RESULT_FILENAME" format="tabular" label="Je-Clipped Barcodes"> |
b61628ae2371
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 62411561ae3bc65ea8762d27ec79a7d912503e5b
gbcs-embl-heidelberg
parents:
0
diff
changeset
|
58 <filter>!ADD</filter> |
b61628ae2371
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 62411561ae3bc65ea8762d27ec79a7d912503e5b
gbcs-embl-heidelberg
parents:
0
diff
changeset
|
59 </data> |
b61628ae2371
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 62411561ae3bc65ea8762d27ec79a7d912503e5b
gbcs-embl-heidelberg
parents:
0
diff
changeset
|
60 <data name="OF1" format_source="input_1" label="Je-Clip OF1: ${on_string}"/> |
b61628ae2371
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 62411561ae3bc65ea8762d27ec79a7d912503e5b
gbcs-embl-heidelberg
parents:
0
diff
changeset
|
61 <data name="OF2" format_source="input_2" label="Je-Clip OF2: ${on_string}"> |
b61628ae2371
planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 62411561ae3bc65ea8762d27ec79a7d912503e5b
gbcs-embl-heidelberg
parents:
0
diff
changeset
|
62 <filter>library["type"] != "single"</filter> |
0 | 63 </data> |
64 </outputs> | |
65 | |
66 <tests> | |
67 <test> | |
68 <!-- simple test on single end data --> | |
69 <param name="type" value="single"/> | |
70 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/> | |
71 <param name="LEN" value="6"/> | |
72 <param name="ADD" value="false"/> | |
73 <output name="BARCODE_RESULT_FILENAME" file="clip_barcode_result_file.txt"/> | |
74 <output name="OF1" file="clip_dataset1_SE.fastq"/> | |
75 </test> | |
76 <test> | |
77 <!-- more complex test on paired end data with different barcode for fwd/rev --> | |
78 <param name="type" value="paired"/> | |
79 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/> | |
80 <param name="input_2" value="file_2_sequence.txt" ftype="fastqsanger"/> | |
81 <param name="LEN" value="6"/> | |
82 <param name="BPOS" value="BOTH"/> | |
83 <output name="OF1" file="clip_dataset1_PE.fastq"/> | |
84 <output name="OF2" file="clip_dataset2_PE.fastq"/> | |
85 </test> | |
86 </tests> | |
87 | |
88 | |
89 <help> | |
90 <![CDATA[ | |
91 **What it does** | |
92 | |
93 Je clip: Clips barcodes or Unique Molecular Identifiers (UMIs) from the input fastq files | |
94 Input files are fastq files, and can be in gzip compressed format. | |
95 | |
96 Author: Charles Girardot (charles.girardot@embl.de). | |
97 | |
98 Wrapper by: Jelle Scholtalbers (jelle.scholtalbers@embl.de). | |
99 | |
100 ------ | |
101 | |
102 **Know what you are doing** | |
103 | |
104 .. class:: warningmark | |
105 | |
106 You will want to read the `documentation`__. | |
107 | |
108 .. __: http://gbcs.embl.de/portal/Je | |
109 | |
110 ------ | |
111 | |
112 **Parameter list** | |
113 | |
114 This is an exhaustive list of options:: | |
115 | |
116 FASTQ_FILE1=File | |
117 F1=File | |
118 | |
119 Input fastq file (optionally gzipped) for single end data, or first read in paired end data. | |
120 Required. | |
121 | |
122 FASTQ_FILE2=File | |
123 F2=File | |
124 | |
125 Input fastq file (optionally gzipped) for the second read of paired end data. | |
126 Default value: null. | |
127 | |
128 BCLEN=String | |
129 LEN=String | |
130 | |
131 Length of the barcode sequences. When BARCODE_READ_POS == BOTH, two distinct lengths can | |
132 be provided using the syntax LEN=X:Z where X and Z are 2 integers representing the | |
133 barcode length for read_1 and read_2 respectively. | |
134 Required. | |
135 | |
136 BARCODE_READ_POS=BarcodePosition | |
137 BPOS=BarcodePosition | |
138 | |
139 Reads containing the sequence (i.e. UMIs) to clip: | |
140 READ_1 (beginning of read from FASTQ_FILE_1), | |
141 READ_2 (beginning of read from FASTQ_FILE_2), | |
142 BOTH (beginning of both reads). | |
143 | |
144 Automatically set to READ_1 in single end mode and BOTH in paired end mode. Actually not | |
145 relevant for single end data | |
146 Default value: BOTH. This option can be set to 'null' to clear the default value. | |
147 Possible values: {READ_1, READ_2, BOTH, NONE} | |
148 | |
149 ADD_BARCODE_TO_HEADER=Boolean | |
150 ADD=Boolean | |
151 | |
152 Should clipped UMIs be added to the read header (at the end); apply to both barcodes when | |
153 BPOS=BOTH. | |
154 If ADD=true, the string ':barcode' is added at the end of the read header with a ':' | |
155 added only if current read header does not end with ':'. | |
156 If both reads of the pair contains a UMI (i.e. BARCODE_READ_POS == BOTH), the UMI from | |
157 the second read is also added to the read header. | |
158 Else, the header of the read without UMI receives the UMI from the other read. | |
159 For example: | |
160 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0: | |
161 becomes | |
162 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:BARCODE | |
163 Default value: true. This option can be set to 'null' to clear the default value. | |
164 Possible values: {true, false} | |
165 | |
166 ENSURE_IDENTICAL_HEADER_NAMES=Boolean | |
167 SAME_HEADERS=Boolean | |
168 | |
169 Makes sure headers of both reads of a pair are identical. | |
170 Read name (or headers) will follow the pattern (for both reads of a pair): | |
171 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 CLIPPED_SEQ_FROMREAD1:CLIPPED_SEQ_FROMREAD2 | |
172 This option only makes sense in paired end mode and ADD=true.Some (if not all) mappers | |
173 will indeed complain when read headers of a read pair are not identical. | |
174 When SAME_HEADERS=FALSE and the RCHAR is used, read headers look like this: | |
175 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:1:N:0:TGGAGTAG | |
176 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:3:N:0:CGTTGTAT | |
177 | |
178 SAME_HEADERS=true will instead generates the following identical header for both reads : | |
179 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:TGGAGTAG:CGTTGTAT | |
180 Note that we also clipped the useless '1:N:0' amd '3:N:0' as they also result in | |
181 different headers | |
182 Important : this option will force RCHAR=: UNLESS you specify RCHAR=null ; in which case | |
183 a space will be preserved i.e.: | |
184 HISEQ:44:C6KC0ANXX:5:1101:1491:1994 TAGAACAC:TGGAGTAG:CGTTGTAT | |
185 | |
186 Default value: true. | |
187 This option can be set to 'null' to clear the default value. Possible values: {true, | |
188 false} | |
189 | |
190 READ_NAME_REPLACE_CHAR=String | |
191 RCHAR=String | |
192 | |
193 Replace spaces in read name/header using provided character. | |
194 This is needed when you need to retain ADDed barcode in read name/header during mapping | |
195 as everything after space in read name is usually clipped in BAM files. | |
196 For example, with RCHAR=':': | |
197 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 1:N:0: | |
198 becomes | |
199 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:1:N:0:BARCODE | |
200 | |
201 Default value: ':'. This option can be set to 'null' to clear the default value. | |
202 | |
203 XTRIMLEN=String | |
204 XT=String | |
205 | |
206 Optional extra number of base(s) to be trimmed right after the barcode. These extra bases | |
207 are not added to read headers. | |
208 When running paired-end, two distinct values can be given using the syntax XT=X:Z where X | |
209 and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when | |
210 BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode to | |
211 end up with reads of identical length (note that this can also be operated using ZT). If | |
212 a unique value is given, e.g. XT=1, while running paired-end the following rule applies : | |
213 (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode | |
214 (2) BPOS=BOTH, the value is used for both reads. | |
215 Note that XT=null is like XT=0. | |
216 Default value: 0. This option can be set to 'null' to clear the default value. | |
217 | |
218 ZTRIMLEN=String | |
219 ZT=String | |
220 | |
221 Optional extra number of bases to be trimmed from the read end i.e. 3' end. These extra | |
222 bases are not added to read headers. | |
223 When running paired-end, two distinct values can be given here using the syntax ZT=X:Z | |
224 where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even | |
225 when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode | |
226 as to end up with reads of the same length (note that this can also be operated using | |
227 XT). Note that if a single value is passed, the value always applies to both reads in | |
228 paired-end mode without further consideration. | |
229 | |
230 Default value: 0. This option can be set to 'null' to clear the default value. | |
231 | |
232 BARCODE_RESULT_FILENAME=String | |
233 BF=String | |
234 | |
235 Optional file name where to write clipped barcodes, default name is clipped_barcodes.txt. | |
236 This file is automatically created if ADD=FALSE i.e. even if this option is not provided | |
237 by user (and always created if this option is given). | |
238 File format is tab delimited with: | |
239 ``read header (col 1) barcode from read_1 (col 2) barcode quality from read_1 (col 2)`` | |
240 + barcode + quality from read_2 (col 4 and 5 respectively) when relevant. | |
241 Can either be a name (in which case the file will be created in the output dir) or a full path. | |
242 Default value: null. | |
243 | |
244 ]]> | |
245 </help> | |
246 | |
247 </tool> |