Mercurial > repos > estrain > seqsero_v1
comparison SeqSero/libs/split_interleaved_fastq.pl @ 0:c577b57b7c74 draft
Uploaded
author | estrain |
---|---|
date | Wed, 06 Dec 2017 15:59:29 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c577b57b7c74 |
---|---|
1 #!/usr/bin/perl-w | |
2 | |
3 use strict; | |
4 | |
5 use warnings; | |
6 | |
7 use Getopt::Long; | |
8 | |
9 use Pod::Usage; | |
10 | |
11 use File::Basename; | |
12 | |
13 # Date: 14-05-2010 | |
14 | |
15 # This program takes a fastq file containing paired-end reads in interleaved format as input and returns two separate files containing read 1 and read 2 in the correct order. | |
16 | |
17 # Author: Ram Vinay Pandey | |
18 | |
19 | |
20 | |
21 # Define the variables | |
22 | |
23 my $input=""; | |
24 | |
25 my $output=""; | |
26 | |
27 my $help=0; | |
28 | |
29 my $test=0; | |
30 | |
31 my $verbose=1; | |
32 | |
33 | |
34 | |
35 my $usage="perl $0 --input interleaved_fastq_file.fastq --output output.fastq\n"; | |
36 | |
37 | |
38 | |
39 GetOptions( | |
40 | |
41 "input=s" =>\$input, | |
42 | |
43 "output=s" =>\$output, | |
44 | |
45 "test" =>\$test, | |
46 | |
47 "help" =>\$help | |
48 | |
49 ) or pod2usage(-msg=>"Wrong options",-verbose=>1); | |
50 | |
51 | |
52 | |
53 pod2usage(-verbose=>2) if $help; | |
54 | |
55 Test::runTests() if $test; | |
56 | |
57 | |
58 | |
59 pod2usage(-msg=>"\n\tProvide an input file!!\n\n\t\t$usage\n\n",-verbose=>1) unless -e $input; | |
60 | |
61 pod2usage(-msg=>"\n\tProvide an output file!!\n\n\t\t$usage\n\n",-verbose=>1) unless $output; | |
62 | |
63 | |
64 | |
65 | |
66 | |
67 my ( $name, $path, $extension ) = File::Basename::fileparse ( $output, '\..*' ); | |
68 | |
69 my $output1 = $name."-read1.fastq"; | |
70 | |
71 my $output2 = $name."-read2.fastq"; | |
72 | |
73 | |
74 | |
75 open my $ofh1, ">$output1" or die "Could not open output file"; | |
76 | |
77 open my $ofh2, ">$output2" or die "Could not open output file"; | |
78 | |
79 | |
80 | |
81 | |
82 | |
83 open (IN, "<$input") or die ("Could not open file $input for reading\n"); | |
84 | |
85 | |
86 | |
87 while (<IN>) { | |
88 | |
89 chomp; | |
90 | |
91 s/\r/\n/g; | |
92 | |
93 # discard blank line | |
94 | |
95 if (m/^\s*$/g) { | |
96 | |
97 next; | |
98 | |
99 } | |
100 | |
101 else { | |
102 | |
103 # Reading all lines for read 1 | |
104 | |
105 if (m/^\s*\@.*1$/) { | |
106 | |
107 | |
108 | |
109 print $ofh1 "$_\n"; | |
110 | |
111 my $ct=0; | |
112 | |
113 while(my $l = <IN>) { | |
114 | |
115 | |
116 | |
117 $ct++; | |
118 | |
119 chomp $l; | |
120 | |
121 s/\r/\n/g; | |
122 | |
123 print $ofh1 "$l\n"; | |
124 | |
125 | |
126 | |
127 last if($ct ==3); | |
128 | |
129 | |
130 | |
131 } | |
132 | |
133 | |
134 | |
135 } | |
136 | |
137 # Reading all lines for read 2 | |
138 | |
139 if (m/^\s*\@.*2$/) { | |
140 | |
141 | |
142 | |
143 print $ofh2 "$_\n"; | |
144 | |
145 my $ct=0; | |
146 | |
147 while(my $l = <IN>) { | |
148 | |
149 | |
150 | |
151 $ct++; | |
152 | |
153 chomp $l; | |
154 | |
155 s/\r/\n/g; | |
156 | |
157 print $ofh2 "$l\n"; | |
158 | |
159 | |
160 | |
161 last if($ct ==3); | |
162 | |
163 } | |
164 | |
165 | |
166 | |
167 | |
168 | |
169 } | |
170 | |
171 | |
172 | |
173 } | |
174 | |
175 } | |
176 | |
177 | |
178 | |
179 close IN; | |
180 | |
181 close $ofh1; | |
182 | |
183 close $ofh2; | |
184 | |
185 | |
186 | |
187 | |
188 | |
189 | |
190 | |
191 =head1 NAME | |
192 | |
193 | |
194 | |
195 split-interleaved-fastq.pl - TThis program takes a fastq file containing paired-end reads in interleaved format as input and returns two separate files containing read 1 and read 2 in the correct order. | |
196 | |
197 | |
198 | |
199 =head1 SYNOPSIS | |
200 | |
201 | |
202 | |
203 perl split-interleaved-fastq.pl --input interleaved_fastq_file.fastq --output output.fastq | |
204 | |
205 | |
206 | |
207 =head1 OPTIONS | |
208 | |
209 | |
210 | |
211 =over 4 | |
212 | |
213 | |
214 | |
215 =item B<--input> | |
216 | |
217 | |
218 | |
219 The input file which contains read1 and read2 in a single file in FASTQ format. Mandatory parameter | |
220 | |
221 | |
222 | |
223 =item B<--output> | |
224 | |
225 | |
226 | |
227 The output file. Mandatory parameter | |
228 | |
229 | |
230 | |
231 =item B<--help> | |
232 | |
233 | |
234 | |
235 Display help for this script | |
236 | |
237 | |
238 | |
239 =back | |
240 | |
241 | |
242 | |
243 =head1 Details | |
244 | |
245 | |
246 | |
247 =head2 Input | |
248 | |
249 | |
250 | |
251 The paired-end reads in interleaved format; input file looks like following: | |
252 | |
253 | |
254 | |
255 @fc_HWUSI-EAS613R:1:1:4:682#CATA/1 | |
256 | |
257 TTGTANGATTTCGTCCAGACTTATCTGGAGCATCCGGACGGTCGGGTGAAGCTCAATCCTCAGCTGGTGTTG | |
258 | |
259 +fc_HWUSI-EAS613R:1:1:4:682#CATA/1 | |
260 | |
261 baaa\DVbbbbaaaaa`[`abaaaab`b`]aab]_aaa``Z^`a[SN^QR^`]___aXK[a\T\[UTWWMZV | |
262 | |
263 @fc_HWUSI-EAS613R:1:1:4:682#CATA/2 | |
264 | |
265 TCGACAGCTGCTGCTCCGTATTGAGGTACGGATCGTTCACGATCATATACGCCCTCTCTTTCAAAAACCTCA | |
266 | |
267 +fc_HWUSI-EAS613R:1:1:4:682#CATA/2 | |
268 | |
269 bbbY`[`a\S_Y][XPaUDZ__LLL]TZPWXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB | |
270 | |
271 | |
272 | |
273 =head2 Output | |
274 | |
275 | |
276 | |
277 The output looks like as following for reads 1: | |
278 | |
279 | |
280 | |
281 @fc_HWUSI-EAS613R:1:1:4:682#CATA/1 | |
282 | |
283 TTGTANGATTTCGTCCAGACTTATCTGGAGCATCCGGACGGTCGGGTGAAGCTCAATCCTCAGCTGGTGTTG | |
284 | |
285 +fc_HWUSI-EAS613R:1:1:4:682#CATA/1 | |
286 | |
287 baaa\DVbbbbaaaaa`[`abaaaab`b`]aab]_aaa``Z^`a[SN^QR^`]___aXK[a\T\[UTWWMZV | |
288 | |
289 | |
290 | |
291 The output looks like as following for reads 2: | |
292 | |
293 | |
294 | |
295 @fc_HWUSI-EAS613R:1:1:4:682#CATA/2 | |
296 | |
297 TCGACAGCTGCTGCTCCGTATTGAGGTACGGATCGTTCACGATCATATACGCCCTCTCTTTCAAAAACCTCA | |
298 | |
299 +fc_HWUSI-EAS613R:1:1:4:682#CATA/2 | |
300 | |
301 bbbY`[`a\S_Y][XPaUDZ__LLL]TZPWXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB | |
302 | |
303 | |
304 | |
305 | |
306 | |
307 =head1 AUTHORS | |
308 | |
309 | |
310 | |
311 Ram vinay pandey | |
312 | |
313 | |
314 | |
315 =cut | |
316 | |
317 |