0
|
1 <tool id="je_clip" name="Je-Clip" version="1.0">
|
|
2 <description>clips Unique Molecular Identifiers (UMIs) from fastq files</description>
|
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
|
6 <stdio>
|
|
7 <exit_code range="1:" level="fatal" description="Tool exception" />
|
|
8 </stdio>
|
|
9 <version_command>echo '1.0'</version_command>
|
|
10 <command interpreter="bash">
|
|
11 <![CDATA[
|
|
12 je clip
|
|
13
|
|
14 ## Fastq inputs
|
|
15 @single_or_paired_cmd@
|
|
16 #if str( $library.type ) != "single":
|
|
17 BPOS=${library.BPOS}
|
|
18 #end if
|
|
19
|
|
20 @common_options_cmd@
|
|
21 @barcode_len_cmd@
|
|
22 ADD=${ADD}
|
|
23 #if str($ADD) == "false":
|
|
24 BARCODE_RESULT_FILENAME=$BARCODE_RESULT_FILENAME
|
|
25 #end if
|
|
26
|
|
27 OF1=${OF1}
|
|
28 #if str( $library.type ) != "single":
|
|
29 OF2=${OF2}
|
|
30 #end if
|
|
31
|
|
32 FORCE=true
|
|
33 ]]>
|
|
34 </command>
|
|
35 <inputs>
|
|
36 <!-- single/paired -->
|
|
37 <expand macro="single_or_paired_general">
|
|
38 <param name="BPOS" type="select" label="Barcode read position (BPOS)" help="where are the barcodes.">
|
|
39 <option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option>
|
|
40 <option value="READ_2">READ_2 (beginning of read from the second fastq file)</option>
|
|
41 <option value="BOTH">BOTH (beginning of both reads)</option>
|
|
42 </param>
|
|
43 </expand>
|
|
44 <expand macro="barcode_len_option"/>
|
|
45 <param name="ADD" type="boolean"
|
|
46 label="Add matched barcode at the end of the read header (ADD)"
|
|
47 truevalue="true"
|
|
48 falsevalue="false"
|
|
49 checked="true"
|
|
50 />
|
|
51
|
|
52 <expand macro="common_options"/>
|
|
53
|
|
54
|
|
55 </inputs>
|
|
56 <outputs>
|
|
57 <data name="BARCODE_RESULT_FILENAME" format="tabular" label="Je-Clipped Barcodes"/>
|
|
58 <data name="OF1" format_source="input_1" label="Je-Clipped {on_string}"/>
|
|
59 <data name="OF2" format_source="input_1" label="Je-Clipped {on_string}">
|
|
60 <filter>(type != "single")</filter>
|
|
61 </data>
|
|
62 </outputs>
|
|
63
|
|
64 <tests>
|
|
65 <test>
|
|
66 <!-- simple test on single end data -->
|
|
67 <param name="type" value="single"/>
|
|
68 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/>
|
|
69 <param name="LEN" value="6"/>
|
|
70 <param name="ADD" value="false"/>
|
|
71 <output name="BARCODE_RESULT_FILENAME" file="clip_barcode_result_file.txt"/>
|
|
72 <output name="OF1" file="clip_dataset1_SE.fastq"/>
|
|
73 </test>
|
|
74 <test>
|
|
75 <!-- more complex test on paired end data with different barcode for fwd/rev -->
|
|
76 <param name="type" value="paired"/>
|
|
77 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/>
|
|
78 <param name="input_2" value="file_2_sequence.txt" ftype="fastqsanger"/>
|
|
79 <param name="LEN" value="6"/>
|
|
80 <param name="BPOS" value="BOTH"/>
|
|
81 <output name="OF1" file="clip_dataset1_PE.fastq"/>
|
|
82 <output name="OF2" file="clip_dataset2_PE.fastq"/>
|
|
83 </test>
|
|
84 </tests>
|
|
85
|
|
86
|
|
87 <help>
|
|
88 <![CDATA[
|
|
89 **What it does**
|
|
90
|
|
91 Je clip: Clips barcodes or Unique Molecular Identifiers (UMIs) from the input fastq files
|
|
92 Input files are fastq files, and can be in gzip compressed format.
|
|
93
|
|
94 Author: Charles Girardot (charles.girardot@embl.de).
|
|
95
|
|
96 Wrapper by: Jelle Scholtalbers (jelle.scholtalbers@embl.de).
|
|
97
|
|
98 ------
|
|
99
|
|
100 **Know what you are doing**
|
|
101
|
|
102 .. class:: warningmark
|
|
103
|
|
104 You will want to read the `documentation`__.
|
|
105
|
|
106 .. __: http://gbcs.embl.de/portal/Je
|
|
107
|
|
108 ------
|
|
109
|
|
110 **Parameter list**
|
|
111
|
|
112 This is an exhaustive list of options::
|
|
113
|
|
114 FASTQ_FILE1=File
|
|
115 F1=File
|
|
116
|
|
117 Input fastq file (optionally gzipped) for single end data, or first read in paired end data.
|
|
118 Required.
|
|
119
|
|
120 FASTQ_FILE2=File
|
|
121 F2=File
|
|
122
|
|
123 Input fastq file (optionally gzipped) for the second read of paired end data.
|
|
124 Default value: null.
|
|
125
|
|
126 BCLEN=String
|
|
127 LEN=String
|
|
128
|
|
129 Length of the barcode sequences. When BARCODE_READ_POS == BOTH, two distinct lengths can
|
|
130 be provided using the syntax LEN=X:Z where X and Z are 2 integers representing the
|
|
131 barcode length for read_1 and read_2 respectively.
|
|
132 Required.
|
|
133
|
|
134 BARCODE_READ_POS=BarcodePosition
|
|
135 BPOS=BarcodePosition
|
|
136
|
|
137 Reads containing the sequence (i.e. UMIs) to clip:
|
|
138 READ_1 (beginning of read from FASTQ_FILE_1),
|
|
139 READ_2 (beginning of read from FASTQ_FILE_2),
|
|
140 BOTH (beginning of both reads).
|
|
141
|
|
142 Automatically set to READ_1 in single end mode and BOTH in paired end mode. Actually not
|
|
143 relevant for single end data
|
|
144 Default value: BOTH. This option can be set to 'null' to clear the default value.
|
|
145 Possible values: {READ_1, READ_2, BOTH, NONE}
|
|
146
|
|
147 ADD_BARCODE_TO_HEADER=Boolean
|
|
148 ADD=Boolean
|
|
149
|
|
150 Should clipped UMIs be added to the read header (at the end); apply to both barcodes when
|
|
151 BPOS=BOTH.
|
|
152 If ADD=true, the string ':barcode' is added at the end of the read header with a ':'
|
|
153 added only if current read header does not end with ':'.
|
|
154 If both reads of the pair contains a UMI (i.e. BARCODE_READ_POS == BOTH), the UMI from
|
|
155 the second read is also added to the read header.
|
|
156 Else, the header of the read without UMI receives the UMI from the other read.
|
|
157 For example:
|
|
158 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:
|
|
159 becomes
|
|
160 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:BARCODE
|
|
161 Default value: true. This option can be set to 'null' to clear the default value.
|
|
162 Possible values: {true, false}
|
|
163
|
|
164 ENSURE_IDENTICAL_HEADER_NAMES=Boolean
|
|
165 SAME_HEADERS=Boolean
|
|
166
|
|
167 Makes sure headers of both reads of a pair are identical.
|
|
168 Read name (or headers) will follow the pattern (for both reads of a pair):
|
|
169 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 CLIPPED_SEQ_FROMREAD1:CLIPPED_SEQ_FROMREAD2
|
|
170 This option only makes sense in paired end mode and ADD=true.Some (if not all) mappers
|
|
171 will indeed complain when read headers of a read pair are not identical.
|
|
172 When SAME_HEADERS=FALSE and the RCHAR is used, read headers look like this:
|
|
173 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:1:N:0:TGGAGTAG
|
|
174 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:3:N:0:CGTTGTAT
|
|
175
|
|
176 SAME_HEADERS=true will instead generates the following identical header for both reads :
|
|
177 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:TGGAGTAG:CGTTGTAT
|
|
178 Note that we also clipped the useless '1:N:0' amd '3:N:0' as they also result in
|
|
179 different headers
|
|
180 Important : this option will force RCHAR=: UNLESS you specify RCHAR=null ; in which case
|
|
181 a space will be preserved i.e.:
|
|
182 HISEQ:44:C6KC0ANXX:5:1101:1491:1994 TAGAACAC:TGGAGTAG:CGTTGTAT
|
|
183
|
|
184 Default value: true.
|
|
185 This option can be set to 'null' to clear the default value. Possible values: {true,
|
|
186 false}
|
|
187
|
|
188 READ_NAME_REPLACE_CHAR=String
|
|
189 RCHAR=String
|
|
190
|
|
191 Replace spaces in read name/header using provided character.
|
|
192 This is needed when you need to retain ADDed barcode in read name/header during mapping
|
|
193 as everything after space in read name is usually clipped in BAM files.
|
|
194 For example, with RCHAR=':':
|
|
195 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 1:N:0:
|
|
196 becomes
|
|
197 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:1:N:0:BARCODE
|
|
198
|
|
199 Default value: ':'. This option can be set to 'null' to clear the default value.
|
|
200
|
|
201 XTRIMLEN=String
|
|
202 XT=String
|
|
203
|
|
204 Optional extra number of base(s) to be trimmed right after the barcode. These extra bases
|
|
205 are not added to read headers.
|
|
206 When running paired-end, two distinct values can be given using the syntax XT=X:Z where X
|
|
207 and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when
|
|
208 BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode to
|
|
209 end up with reads of identical length (note that this can also be operated using ZT). If
|
|
210 a unique value is given, e.g. XT=1, while running paired-end the following rule applies :
|
|
211 (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode
|
|
212 (2) BPOS=BOTH, the value is used for both reads.
|
|
213 Note that XT=null is like XT=0.
|
|
214 Default value: 0. This option can be set to 'null' to clear the default value.
|
|
215
|
|
216 ZTRIMLEN=String
|
|
217 ZT=String
|
|
218
|
|
219 Optional extra number of bases to be trimmed from the read end i.e. 3' end. These extra
|
|
220 bases are not added to read headers.
|
|
221 When running paired-end, two distinct values can be given here using the syntax ZT=X:Z
|
|
222 where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even
|
|
223 when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode
|
|
224 as to end up with reads of the same length (note that this can also be operated using
|
|
225 XT). Note that if a single value is passed, the value always applies to both reads in
|
|
226 paired-end mode without further consideration.
|
|
227
|
|
228 Default value: 0. This option can be set to 'null' to clear the default value.
|
|
229
|
|
230 BARCODE_RESULT_FILENAME=String
|
|
231 BF=String
|
|
232
|
|
233 Optional file name where to write clipped barcodes, default name is clipped_barcodes.txt.
|
|
234 This file is automatically created if ADD=FALSE i.e. even if this option is not provided
|
|
235 by user (and always created if this option is given).
|
|
236 File format is tab delimited with:
|
|
237 ``read header (col 1) barcode from read_1 (col 2) barcode quality from read_1 (col 2)``
|
|
238 + barcode + quality from read_2 (col 4 and 5 respectively) when relevant.
|
|
239 Can either be a name (in which case the file will be created in the output dir) or a full path.
|
|
240 Default value: null.
|
|
241
|
|
242 ]]>
|
|
243 </help>
|
|
244
|
|
245 </tool>
|