comparison SeqSero/libs/split_interleaved_fastq.pl @ 0:c577b57b7c74 draft

Uploaded
author estrain
date Wed, 06 Dec 2017 15:59:29 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c577b57b7c74
1 #!/usr/bin/perl-w
2
3 use strict;
4
5 use warnings;
6
7 use Getopt::Long;
8
9 use Pod::Usage;
10
11 use File::Basename;
12
13 # Date: 14-05-2010
14
15 # This program takes a fastq file containing paired-end reads in interleaved format as input and returns two separate files containing read 1 and read 2 in the correct order.
16
17 # Author: Ram Vinay Pandey
18
19
20
21 # Define the variables
22
23 my $input="";
24
25 my $output="";
26
27 my $help=0;
28
29 my $test=0;
30
31 my $verbose=1;
32
33
34
35 my $usage="perl $0 --input interleaved_fastq_file.fastq --output output.fastq\n";
36
37
38
39 GetOptions(
40
41 "input=s" =>\$input,
42
43 "output=s" =>\$output,
44
45 "test" =>\$test,
46
47 "help" =>\$help
48
49 ) or pod2usage(-msg=>"Wrong options",-verbose=>1);
50
51
52
53 pod2usage(-verbose=>2) if $help;
54
55 Test::runTests() if $test;
56
57
58
59 pod2usage(-msg=>"\n\tProvide an input file!!\n\n\t\t$usage\n\n",-verbose=>1) unless -e $input;
60
61 pod2usage(-msg=>"\n\tProvide an output file!!\n\n\t\t$usage\n\n",-verbose=>1) unless $output;
62
63
64
65
66
67 my ( $name, $path, $extension ) = File::Basename::fileparse ( $output, '\..*' );
68
69 my $output1 = $name."-read1.fastq";
70
71 my $output2 = $name."-read2.fastq";
72
73
74
75 open my $ofh1, ">$output1" or die "Could not open output file";
76
77 open my $ofh2, ">$output2" or die "Could not open output file";
78
79
80
81
82
83 open (IN, "<$input") or die ("Could not open file $input for reading\n");
84
85
86
87 while (<IN>) {
88
89 chomp;
90
91 s/\r/\n/g;
92
93 # discard blank line
94
95 if (m/^\s*$/g) {
96
97 next;
98
99 }
100
101 else {
102
103 # Reading all lines for read 1
104
105 if (m/^\s*\@.*1$/) {
106
107
108
109 print $ofh1 "$_\n";
110
111 my $ct=0;
112
113 while(my $l = <IN>) {
114
115
116
117 $ct++;
118
119 chomp $l;
120
121 s/\r/\n/g;
122
123 print $ofh1 "$l\n";
124
125
126
127 last if($ct ==3);
128
129
130
131 }
132
133
134
135 }
136
137 # Reading all lines for read 2
138
139 if (m/^\s*\@.*2$/) {
140
141
142
143 print $ofh2 "$_\n";
144
145 my $ct=0;
146
147 while(my $l = <IN>) {
148
149
150
151 $ct++;
152
153 chomp $l;
154
155 s/\r/\n/g;
156
157 print $ofh2 "$l\n";
158
159
160
161 last if($ct ==3);
162
163 }
164
165
166
167
168
169 }
170
171
172
173 }
174
175 }
176
177
178
179 close IN;
180
181 close $ofh1;
182
183 close $ofh2;
184
185
186
187
188
189
190
191 =head1 NAME
192
193
194
195 split-interleaved-fastq.pl - TThis program takes a fastq file containing paired-end reads in interleaved format as input and returns two separate files containing read 1 and read 2 in the correct order.
196
197
198
199 =head1 SYNOPSIS
200
201
202
203 perl split-interleaved-fastq.pl --input interleaved_fastq_file.fastq --output output.fastq
204
205
206
207 =head1 OPTIONS
208
209
210
211 =over 4
212
213
214
215 =item B<--input>
216
217
218
219 The input file which contains read1 and read2 in a single file in FASTQ format. Mandatory parameter
220
221
222
223 =item B<--output>
224
225
226
227 The output file. Mandatory parameter
228
229
230
231 =item B<--help>
232
233
234
235 Display help for this script
236
237
238
239 =back
240
241
242
243 =head1 Details
244
245
246
247 =head2 Input
248
249
250
251 The paired-end reads in interleaved format; input file looks like following:
252
253
254
255 @fc_HWUSI-EAS613R:1:1:4:682#CATA/1
256
257 TTGTANGATTTCGTCCAGACTTATCTGGAGCATCCGGACGGTCGGGTGAAGCTCAATCCTCAGCTGGTGTTG
258
259 +fc_HWUSI-EAS613R:1:1:4:682#CATA/1
260
261 baaa\DVbbbbaaaaa`[`abaaaab`b`]aab]_aaa``Z^`a[SN^QR^`]___aXK[a\T\[UTWWMZV
262
263 @fc_HWUSI-EAS613R:1:1:4:682#CATA/2
264
265 TCGACAGCTGCTGCTCCGTATTGAGGTACGGATCGTTCACGATCATATACGCCCTCTCTTTCAAAAACCTCA
266
267 +fc_HWUSI-EAS613R:1:1:4:682#CATA/2
268
269 bbbY`[`a\S_Y][XPaUDZ__LLL]TZPWXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
270
271
272
273 =head2 Output
274
275
276
277 The output looks like as following for reads 1:
278
279
280
281 @fc_HWUSI-EAS613R:1:1:4:682#CATA/1
282
283 TTGTANGATTTCGTCCAGACTTATCTGGAGCATCCGGACGGTCGGGTGAAGCTCAATCCTCAGCTGGTGTTG
284
285 +fc_HWUSI-EAS613R:1:1:4:682#CATA/1
286
287 baaa\DVbbbbaaaaa`[`abaaaab`b`]aab]_aaa``Z^`a[SN^QR^`]___aXK[a\T\[UTWWMZV
288
289
290
291 The output looks like as following for reads 2:
292
293
294
295 @fc_HWUSI-EAS613R:1:1:4:682#CATA/2
296
297 TCGACAGCTGCTGCTCCGTATTGAGGTACGGATCGTTCACGATCATATACGCCCTCTCTTTCAAAAACCTCA
298
299 +fc_HWUSI-EAS613R:1:1:4:682#CATA/2
300
301 bbbY`[`a\S_Y][XPaUDZ__LLL]TZPWXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
302
303
304
305
306
307 =head1 AUTHORS
308
309
310
311 Ram vinay pandey
312
313
314
315 =cut
316
317