0
|
1 #!/usr/bin/perl-w
|
|
2
|
|
3 use strict;
|
|
4
|
|
5 use warnings;
|
|
6
|
|
7 use Getopt::Long;
|
|
8
|
|
9 use Pod::Usage;
|
|
10
|
|
11 use File::Basename;
|
|
12
|
|
13 # Date: 14-05-2010
|
|
14
|
|
15 # This program takes a fastq file containing paired-end reads in interleaved format as input and returns two separate files containing read 1 and read 2 in the correct order.
|
|
16
|
|
17 # Author: Ram Vinay Pandey
|
|
18
|
|
19
|
|
20
|
|
21 # Define the variables
|
|
22
|
|
23 my $input="";
|
|
24
|
|
25 my $output="";
|
|
26
|
|
27 my $help=0;
|
|
28
|
|
29 my $test=0;
|
|
30
|
|
31 my $verbose=1;
|
|
32
|
|
33
|
|
34
|
|
35 my $usage="perl $0 --input interleaved_fastq_file.fastq --output output.fastq\n";
|
|
36
|
|
37
|
|
38
|
|
39 GetOptions(
|
|
40
|
|
41 "input=s" =>\$input,
|
|
42
|
|
43 "output=s" =>\$output,
|
|
44
|
|
45 "test" =>\$test,
|
|
46
|
|
47 "help" =>\$help
|
|
48
|
|
49 ) or pod2usage(-msg=>"Wrong options",-verbose=>1);
|
|
50
|
|
51
|
|
52
|
|
53 pod2usage(-verbose=>2) if $help;
|
|
54
|
|
55 Test::runTests() if $test;
|
|
56
|
|
57
|
|
58
|
|
59 pod2usage(-msg=>"\n\tProvide an input file!!\n\n\t\t$usage\n\n",-verbose=>1) unless -e $input;
|
|
60
|
|
61 pod2usage(-msg=>"\n\tProvide an output file!!\n\n\t\t$usage\n\n",-verbose=>1) unless $output;
|
|
62
|
|
63
|
|
64
|
|
65
|
|
66
|
|
67 my ( $name, $path, $extension ) = File::Basename::fileparse ( $output, '\..*' );
|
|
68
|
|
69 my $output1 = $name."-read1.fastq";
|
|
70
|
|
71 my $output2 = $name."-read2.fastq";
|
|
72
|
|
73
|
|
74
|
|
75 open my $ofh1, ">$output1" or die "Could not open output file";
|
|
76
|
|
77 open my $ofh2, ">$output2" or die "Could not open output file";
|
|
78
|
|
79
|
|
80
|
|
81
|
|
82
|
|
83 open (IN, "<$input") or die ("Could not open file $input for reading\n");
|
|
84
|
|
85
|
|
86
|
|
87 while (<IN>) {
|
|
88
|
|
89 chomp;
|
|
90
|
|
91 s/\r/\n/g;
|
|
92
|
|
93 # discard blank line
|
|
94
|
|
95 if (m/^\s*$/g) {
|
|
96
|
|
97 next;
|
|
98
|
|
99 }
|
|
100
|
|
101 else {
|
|
102
|
|
103 # Reading all lines for read 1
|
|
104
|
|
105 if (m/^\s*\@.*1$/) {
|
|
106
|
|
107
|
|
108
|
|
109 print $ofh1 "$_\n";
|
|
110
|
|
111 my $ct=0;
|
|
112
|
|
113 while(my $l = <IN>) {
|
|
114
|
|
115
|
|
116
|
|
117 $ct++;
|
|
118
|
|
119 chomp $l;
|
|
120
|
|
121 s/\r/\n/g;
|
|
122
|
|
123 print $ofh1 "$l\n";
|
|
124
|
|
125
|
|
126
|
|
127 last if($ct ==3);
|
|
128
|
|
129
|
|
130
|
|
131 }
|
|
132
|
|
133
|
|
134
|
|
135 }
|
|
136
|
|
137 # Reading all lines for read 2
|
|
138
|
|
139 if (m/^\s*\@.*2$/) {
|
|
140
|
|
141
|
|
142
|
|
143 print $ofh2 "$_\n";
|
|
144
|
|
145 my $ct=0;
|
|
146
|
|
147 while(my $l = <IN>) {
|
|
148
|
|
149
|
|
150
|
|
151 $ct++;
|
|
152
|
|
153 chomp $l;
|
|
154
|
|
155 s/\r/\n/g;
|
|
156
|
|
157 print $ofh2 "$l\n";
|
|
158
|
|
159
|
|
160
|
|
161 last if($ct ==3);
|
|
162
|
|
163 }
|
|
164
|
|
165
|
|
166
|
|
167
|
|
168
|
|
169 }
|
|
170
|
|
171
|
|
172
|
|
173 }
|
|
174
|
|
175 }
|
|
176
|
|
177
|
|
178
|
|
179 close IN;
|
|
180
|
|
181 close $ofh1;
|
|
182
|
|
183 close $ofh2;
|
|
184
|
|
185
|
|
186
|
|
187
|
|
188
|
|
189
|
|
190
|
|
191 =head1 NAME
|
|
192
|
|
193
|
|
194
|
|
195 split-interleaved-fastq.pl - TThis program takes a fastq file containing paired-end reads in interleaved format as input and returns two separate files containing read 1 and read 2 in the correct order.
|
|
196
|
|
197
|
|
198
|
|
199 =head1 SYNOPSIS
|
|
200
|
|
201
|
|
202
|
|
203 perl split-interleaved-fastq.pl --input interleaved_fastq_file.fastq --output output.fastq
|
|
204
|
|
205
|
|
206
|
|
207 =head1 OPTIONS
|
|
208
|
|
209
|
|
210
|
|
211 =over 4
|
|
212
|
|
213
|
|
214
|
|
215 =item B<--input>
|
|
216
|
|
217
|
|
218
|
|
219 The input file which contains read1 and read2 in a single file in FASTQ format. Mandatory parameter
|
|
220
|
|
221
|
|
222
|
|
223 =item B<--output>
|
|
224
|
|
225
|
|
226
|
|
227 The output file. Mandatory parameter
|
|
228
|
|
229
|
|
230
|
|
231 =item B<--help>
|
|
232
|
|
233
|
|
234
|
|
235 Display help for this script
|
|
236
|
|
237
|
|
238
|
|
239 =back
|
|
240
|
|
241
|
|
242
|
|
243 =head1 Details
|
|
244
|
|
245
|
|
246
|
|
247 =head2 Input
|
|
248
|
|
249
|
|
250
|
|
251 The paired-end reads in interleaved format; input file looks like following:
|
|
252
|
|
253
|
|
254
|
|
255 @fc_HWUSI-EAS613R:1:1:4:682#CATA/1
|
|
256
|
|
257 TTGTANGATTTCGTCCAGACTTATCTGGAGCATCCGGACGGTCGGGTGAAGCTCAATCCTCAGCTGGTGTTG
|
|
258
|
|
259 +fc_HWUSI-EAS613R:1:1:4:682#CATA/1
|
|
260
|
|
261 baaa\DVbbbbaaaaa`[`abaaaab`b`]aab]_aaa``Z^`a[SN^QR^`]___aXK[a\T\[UTWWMZV
|
|
262
|
|
263 @fc_HWUSI-EAS613R:1:1:4:682#CATA/2
|
|
264
|
|
265 TCGACAGCTGCTGCTCCGTATTGAGGTACGGATCGTTCACGATCATATACGCCCTCTCTTTCAAAAACCTCA
|
|
266
|
|
267 +fc_HWUSI-EAS613R:1:1:4:682#CATA/2
|
|
268
|
|
269 bbbY`[`a\S_Y][XPaUDZ__LLL]TZPWXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
270
|
|
271
|
|
272
|
|
273 =head2 Output
|
|
274
|
|
275
|
|
276
|
|
277 The output looks like as following for reads 1:
|
|
278
|
|
279
|
|
280
|
|
281 @fc_HWUSI-EAS613R:1:1:4:682#CATA/1
|
|
282
|
|
283 TTGTANGATTTCGTCCAGACTTATCTGGAGCATCCGGACGGTCGGGTGAAGCTCAATCCTCAGCTGGTGTTG
|
|
284
|
|
285 +fc_HWUSI-EAS613R:1:1:4:682#CATA/1
|
|
286
|
|
287 baaa\DVbbbbaaaaa`[`abaaaab`b`]aab]_aaa``Z^`a[SN^QR^`]___aXK[a\T\[UTWWMZV
|
|
288
|
|
289
|
|
290
|
|
291 The output looks like as following for reads 2:
|
|
292
|
|
293
|
|
294
|
|
295 @fc_HWUSI-EAS613R:1:1:4:682#CATA/2
|
|
296
|
|
297 TCGACAGCTGCTGCTCCGTATTGAGGTACGGATCGTTCACGATCATATACGCCCTCTCTTTCAAAAACCTCA
|
|
298
|
|
299 +fc_HWUSI-EAS613R:1:1:4:682#CATA/2
|
|
300
|
|
301 bbbY`[`a\S_Y][XPaUDZ__LLL]TZPWXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
302
|
|
303
|
|
304
|
|
305
|
|
306
|
|
307 =head1 AUTHORS
|
|
308
|
|
309
|
|
310
|
|
311 Ram vinay pandey
|
|
312
|
|
313
|
|
314
|
|
315 =cut
|
|
316
|
|
317 |