comparison rapsodyn/rapsosnp_stats4x.pl @ 2:761fecc07fa9 draft

Uploaded
author mcharles
date Thu, 11 Sep 2014 03:10:47 -0400
parents
children
comparison
equal deleted inserted replaced
1:7f36bd129321 2:761fecc07fa9
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4
5 my $read1_row = $ARGV[0];
6 my $read2_row = $ARGV[1];
7
8 my $read1_trimmed_part1 = $ARGV[2];
9 my $read1_trimmed_part2 = $ARGV[3];
10 my $read1_trimmed_part3 = $ARGV[4];
11 my $read1_trimmed_part4 = $ARGV[5];
12 my $read2_trimmed_part1 = $ARGV[6];
13 my $read2_trimmed_part2 = $ARGV[7];
14 my $read2_trimmed_part3 = $ARGV[8];
15 my $read2_trimmed_part4 = $ARGV[9];
16
17 my $sam_row_part1 = $ARGV[10];
18 my $sam_row_part2 = $ARGV[11];
19 my $sam_row_part3 = $ARGV[12];
20 my $sam_row_part4 = $ARGV[13];
21 my $sam_filtered_part1 = $ARGV[14];
22 my $sam_filtered_part2 = $ARGV[15];
23 my $sam_filtered_part3 = $ARGV[16];
24 my $sam_filtered_part4 = $ARGV[17];
25
26 my $mpileup_variant = $ARGV[18];
27
28 my $list_filtered = $ARGV[19];
29
30 my $blast_filtered_part1 = $ARGV[20];
31 my $blast_filtered_part2 = $ARGV[21];
32 my $blast_filtered_part3 = $ARGV[22];
33 my $blast_filtered_part4 = $ARGV[23];
34
35 my $snp_selected = $ARGV[24];
36
37
38 open(INR1R, $read1_row) or die ("Can't open $read1_row\n");
39 my $nbread=0;
40 my $nbbase =0;
41 while (my $line1=<INR1R>){
42 my $line2 = <INR1R>;
43 my $line3 = <INR1R>;
44 my $line4 = <INR1R>;
45 if ($line1 =~ /^@/){
46 $nbread++;
47 if ($line2=~/([ATGCNX]+)/i){
48 $nbbase += length($1);
49 }
50 }
51 }
52 print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
53 close (INR1R);
54
55
56
57
58 open(INR2R, $read2_row) or die ("Can't open $read2_row\n");
59 $nbread=0;
60 $nbbase =0;
61 while (my $line1=<INR2R>){
62 my $line2 = <INR2R>;
63 my $line3 = <INR2R>;
64 my $line4 = <INR2R>;
65 if ($line1 =~ /^@/){
66 $nbread++;
67 if ($line2=~/([ATGCNX]+)/i){
68 $nbbase += length($1);
69 }
70 }
71 }
72 print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
73 close (INR2R);
74
75
76
77
78
79 open(INR1TP1, $read1_trimmed_part1) or die ("Can't open $read1_trimmed_part1\n");
80 $nbread=0;
81 $nbbase =0;
82 while (my $line1=<INR1TP1>){
83 my $line2 = <INR1TP1>;
84 my $line3 = <INR1TP1>;
85 my $line4 = <INR1TP1>;
86 if ($line1 =~ /^@/){
87 $nbread++;
88 if ($line2=~/([ATGCNX]+)/i){
89 $nbbase += length($1);
90 }
91 else {
92 print STDERR "$line1\n$line2\n";
93 }
94 }
95 }
96 close (INR1TP1);
97 open(INR1TP2, $read1_trimmed_part2) or die ("Can't open $read1_trimmed_part2\n");
98 while (my $line1=<INR1TP2>){
99 my $line2 = <INR1TP2>;
100 my $line3 = <INR1TP2>;
101 my $line4 = <INR1TP2>;
102 if ($line1 =~ /^@/){
103 $nbread++;
104 if ($line2=~/([ATGCNX]+)/i){
105 $nbbase += length($1);
106 }
107 else {
108 print STDERR "$line1\n$line2\n";
109 }
110 }
111 }
112 close (INR1TP2);
113 open(INR1TP3, $read1_trimmed_part3) or die ("Can't open $read1_trimmed_part3\n");
114 while (my $line1=<INR1TP3>){
115 my $line2 = <INR1TP3>;
116 my $line3 = <INR1TP3>;
117 my $line4 = <INR1TP3>;
118 if ($line1 =~ /^@/){
119 $nbread++;
120 if ($line2=~/([ATGCNX]+)/i){
121 $nbbase += length($1);
122 }
123 else {
124 print STDERR "$line1\n$line2\n";
125 }
126 }
127 }
128 close (INR1TP3);
129 open(INR1TP4, $read1_trimmed_part4) or die ("Can't open $read1_trimmed_part4\n");
130 while (my $line1=<INR1TP4>){
131 my $line2 = <INR1TP4>;
132 my $line3 = <INR1TP4>;
133 my $line4 = <INR1TP4>;
134 if ($line1 =~ /^@/){
135 $nbread++;
136 if ($line2=~/([ATGCNX]+)/i){
137 $nbbase += length($1);
138 }
139 else {
140 print STDERR "$line1\n$line2\n";
141 }
142 }
143 }
144 close (INR1TP4);
145 print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
146
147
148
149
150 open(INR2TP1, $read2_trimmed_part1) or die ("Can't open $read2_trimmed_part1\n");
151 $nbread=0;
152 $nbbase =0;
153 while (my $line1=<INR2TP1>){
154 my $line2 = <INR2TP1>;
155 my $line3 = <INR2TP1>;
156 my $line4 = <INR2TP1>;
157 if ($line1 =~ /^@/){
158 $nbread++;
159 if ($line2=~/([ATGCNX]+)/i){
160 $nbbase += length($1);
161 }
162 else {
163 print STDERR "$line1\n$line2\n";
164 }
165 }
166 }
167 close (INR2TP2);
168 open(INR2TP2, $read2_trimmed_part2) or die ("Can't open $read2_trimmed_part2\n");
169 while (my $line1=<INR2TP2>){
170 my $line2 = <INR2TP2>;
171 my $line3 = <INR2TP2>;
172 my $line4 = <INR2TP2>;
173 if ($line1 =~ /^@/){
174 $nbread++;
175 if ($line2=~/([ATGCNX]+)/i){
176 $nbbase += length($1);
177 }
178 else {
179 print STDERR "$line1\n$line2\n";
180 }
181 }
182 }
183 close (INR2TP2);
184 open(INR2TP3, $read2_trimmed_part3) or die ("Can't open $read2_trimmed_part3\n");
185 while (my $line1=<INR2TP3>){
186 my $line2 = <INR2TP3>;
187 my $line3 = <INR2TP3>;
188 my $line4 = <INR2TP3>;
189 if ($line1 =~ /^@/){
190 $nbread++;
191 if ($line2=~/([ATGCNX]+)/i){
192 $nbbase += length($1);
193 }
194 else {
195 print STDERR "$line1\n$line2\n";
196 }
197 }
198 }
199 close (INR2TP3);
200 open(INR2TP4, $read2_trimmed_part4) or die ("Can't open $read2_trimmed_part4\n");
201 while (my $line1=<INR2TP4>){
202 my $line2 = <INR2TP4>;
203 my $line3 = <INR2TP4>;
204 my $line4 = <INR2TP4>;
205 if ($line1 =~ /^@/){
206 $nbread++;
207 if ($line2=~/([ATGCNX]+)/i){
208 $nbbase += length($1);
209 }
210 else {
211 print STDERR "$line1\n$line2\n";
212 }
213 }
214 }
215 close (INR2TP4);
216 print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
217
218
219
220
221 print "\nSAM row\n";
222 open(SAMP1, $sam_row_part1) or die ("Can't open $sam_row_part1\n");
223 my %bitscore;
224 while (my $line=<SAMP1>){
225 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
226 my @fields = split(/\s+/,$line);
227 my $bit = $fields[1];
228 if ($bitscore{$bit}){
229 $bitscore{$bit}++;
230 }
231 else {
232 $bitscore{$bit}=1;
233 }
234 }
235 }
236 close (SAMP1);
237 open(SAMP2, $sam_row_part2) or die ("Can't open $sam_row_part2\n");
238 while (my $line=<SAMP2>){
239 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
240 my @fields = split(/\s+/,$line);
241 my $bit = $fields[1];
242 if ($bitscore{$bit}){
243 $bitscore{$bit}++;
244 }
245 else {
246 $bitscore{$bit}=1;
247 }
248 }
249 }
250 close (SAMP2);
251 open(SAMP3, $sam_row_part3) or die ("Can't open $sam_row_part3\n");
252 while (my $line=<SAMP3>){
253 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
254 my @fields = split(/\s+/,$line);
255 my $bit = $fields[1];
256 if ($bitscore{$bit}){
257 $bitscore{$bit}++;
258 }
259 else {
260 $bitscore{$bit}=1;
261 }
262 }
263 }
264 close (SAMP3);
265 open(SAMP4, $sam_row_part4) or die ("Can't open $sam_row_part4\n");
266 while (my $line=<SAMP4>){
267 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
268 my @fields = split(/\s+/,$line);
269 my $bit = $fields[1];
270 if ($bitscore{$bit}){
271 $bitscore{$bit}++;
272 }
273 else {
274 $bitscore{$bit}=1;
275 }
276 }
277 }
278 close (SAMP4);
279 print "bitscore\t";
280 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
281 print $key,"\t*\t";
282 }
283 print "\n";
284 print " number \t";
285 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
286 print $bitscore{$key},"\t*\t";
287 }
288 print "\n";
289
290
291
292
293 print "\nSAM filtered\n";
294 open(SAMFP1, $sam_filtered_part1) or die ("Can't open $sam_filtered_part1\n");
295 undef %bitscore;
296 while (my $line=<SAMFP1>){
297 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
298 my @fields = split(/\s+/,$line);
299 my $bit = $fields[1];
300 if ($bitscore{$bit}){
301 $bitscore{$bit}++;
302 }
303 else {
304 $bitscore{$bit}=1;
305 }
306 }
307 }
308 close (SAMFP1);
309 open(SAMFP2, $sam_filtered_part2) or die ("Can't open $sam_filtered_part2\n");
310 while (my $line=<SAMFP2>){
311 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
312 my @fields = split(/\s+/,$line);
313 my $bit = $fields[1];
314 if ($bitscore{$bit}){
315 $bitscore{$bit}++;
316 }
317 else {
318 $bitscore{$bit}=1;
319 }
320 }
321 }
322 close (SAMFP2);
323 open(SAMFP3, $sam_filtered_part3) or die ("Can't open $sam_filtered_part3\n");
324 while (my $line=<SAMFP3>){
325 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
326 my @fields = split(/\s+/,$line);
327 my $bit = $fields[1];
328 if ($bitscore{$bit}){
329 $bitscore{$bit}++;
330 }
331 else {
332 $bitscore{$bit}=1;
333 }
334 }
335 }
336 close (SAMFP3);
337 open(SAMFP4, $sam_filtered_part4) or die ("Can't open $sam_filtered_part4\n");
338 while (my $line=<SAMFP4>){
339 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
340 my @fields = split(/\s+/,$line);
341 my $bit = $fields[1];
342 if ($bitscore{$bit}){
343 $bitscore{$bit}++;
344 }
345 else {
346 $bitscore{$bit}=1;
347 }
348 }
349 }
350 close (SAMFP4);
351 print "bitscore\t";
352 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
353 print $key,"\t*\t";
354 }
355 print "\n";
356 print " number \t";
357 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
358 print $bitscore{$key},"\t*\t";
359 }
360 print "\n";
361
362
363
364
365 print "\nMPILEUP variant\n";
366 open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n");
367 my $nbvariant=0;
368 while (my $line=<MPV>){
369 my @fields = split(/\s+/,$line);
370 if ($#fields >= 4){
371 my $match = $fields[4];
372 $match =~ s/\$//g; #the read start at this position
373 $match =~ s/\^.//g; #the read end at this position followed by quality char
374 if ($match =~/[ACGTNacgtn]+/){
375 $nbvariant++;
376 }
377 }
378 else {
379 #print STDERR "Erreur : $line\n";
380 }
381 }
382 print "Variant detected :\t$nbvariant\n";
383 close (MPV);
384
385
386
387
388
389
390 print "\nMPILEUP filtered without dubious position\n";
391 open(LF, $list_filtered) or die ("Can't open $list_filtered\n");
392 $nbvariant=0;
393 while (my $line=<LF>){
394 $nbvariant++;
395 }
396 print "Variant selected :\t$nbvariant\n";
397 close (LF);
398
399
400
401
402
403 print "\nMPILEUP filtered without dubious position and BLAST\n";
404 open(BFP1, $blast_filtered_part1) or die ("Can't open $blast_filtered_part1\n");
405 $nbvariant=0;
406 while (my $line=<BFP1>){
407 $nbvariant++;
408 }
409 close (BFP1);
410 open(BFP2, $blast_filtered_part2) or die ("Can't open $blast_filtered_part2\n");
411 while (my $line=<BFP2>){
412 $nbvariant++;
413 }
414 close (BFP2);
415 open(BFP3, $blast_filtered_part3) or die ("Can't open $blast_filtered_part3\n");
416 while (my $line=<BFP3>){
417 $nbvariant++;
418 }
419 close (BFP3);
420 open(BFP4, $blast_filtered_part4) or die ("Can't open $blast_filtered_part4\n");
421 while (my $line=<BFP4>){
422 $nbvariant++;
423 }
424 close (BFP4);
425 print "Variant selected :\t$nbvariant\n";
426
427
428
429
430
431 print "\nSNP selected after mpileup filtering : \t";
432 open(SNP, $snp_selected) or die ("Can't open $snp_selected\n");
433 $nbvariant=0;
434 while (my $line=<SNP>){
435 $nbvariant++;
436 }
437
438 print "$nbvariant\n";
439 close (SNP);
440
441
442
443
444
445
446
447