annotate rapsodyn/rapsosnp_stats.pl @ 4:9074a5104cdd draft

Uploaded
author mcharles
date Wed, 17 Sep 2014 04:20:08 -0400
parents 7f36bd129321
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
2 use strict;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
3 use warnings;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
4
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
5 my $read1_row = $ARGV[0];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
6 my $read2_row = $ARGV[1];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
7
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
8 my $read1_trimmed = $ARGV[2];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
9 my $read2_trimmed = $ARGV[3];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
10
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
11 my $sam_row = $ARGV[4];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
12 my $sam_filtered = $ARGV[5];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
13
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
14 my $mpileup_variant = $ARGV[6];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
15
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
16 my $list_filtered = $ARGV[7];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
17
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
18 my $blast_filtered = $ARGV[8];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
19
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
20 my $snp_selected = $ARGV[9];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
21
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
22
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
23 open(INR1R, $read1_row) or die ("Can't open $read1_row\n");
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
24 my $nbread=0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
25 my $nbbase =0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
26 while (my $line1=<INR1R>){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
27 my $line2 = <INR1R>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
28 my $line3 = <INR1R>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
29 my $line4 = <INR1R>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
30 if ($line1 =~ /^@/){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
31 $nbread++;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
32 if ($line2=~/([ATGCNX]+)/i){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
33 $nbbase += length($1);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
34 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
35 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
36 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
37 print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
38 close (INR1R);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
39
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
40 open(INR2R, $read2_row) or die ("Can't open $read2_row\n");
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
41 $nbread=0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
42 $nbbase =0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
43 while (my $line1=<INR2R>){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
44 my $line2 = <INR2R>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
45 my $line3 = <INR2R>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
46 my $line4 = <INR2R>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
47 if ($line1 =~ /^@/){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
48 $nbread++;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
49 if ($line2=~/([ATGCNX]+)/i){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
50 $nbbase += length($1);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
51 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
52 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
53 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
54 print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
55 close (INR2R);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
56
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
57 open(INR1T, $read1_trimmed) or die ("Can't open $read1_trimmed\n");
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
58 $nbread=0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
59 $nbbase =0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
60 while (my $line1=<INR1T>){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
61 my $line2 = <INR1T>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
62 my $line3 = <INR1T>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
63 my $line4 = <INR1T>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
64 if ($line1 =~ /^@/){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
65 $nbread++;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
66 if ($line2=~/([ATGCNX]+)/i){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
67 $nbbase += length($1);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
68 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
69 else {
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
70 print STDERR "$line1\n$line2\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
71 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
72 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
73 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
74 print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
75 close (INR1T);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
76
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
77 open(INR2T, $read2_trimmed) or die ("Can't open $read2_trimmed\n");
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
78 $nbread=0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
79 $nbbase =0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
80 while (my $line1=<INR2T>){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
81 my $line2 = <INR2T>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
82 my $line3 = <INR2T>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
83 my $line4 = <INR2T>;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
84 if ($line1 =~ /^@/){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
85 $nbread++;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
86 if ($line2=~/([ATGCNX]+)/i){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
87 $nbbase += length($1);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
88 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
89 else {
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
90 print STDERR "$line1\n$line2\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
91 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
92 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
93 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
94 print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
95 close (INR2T);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
96
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
97 print "\nSAM row\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
98 open(SAM, $sam_row) or die ("Can't open $sam_row\n");
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
99 my %bitscore;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
100 while (my $line=<SAM>){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
101 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
102 my @fields = split(/\s+/,$line);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
103 my $bit = $fields[1];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
104 if ($bitscore{$bit}){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
105 $bitscore{$bit}++;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
106 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
107 else {
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
108 $bitscore{$bit}=1;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
109 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
110 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
111 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
112
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
113 print "bitscore\t";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
114 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
115 print $key,"\t*\t";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
116 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
117 print "\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
118
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
119 print " number \t";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
120 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
121 print $bitscore{$key},"\t*\t";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
122 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
123 print "\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
124 close (SAM);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
125
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
126 print "\nSAM filtered\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
127 open(SAMF, $sam_filtered) or die ("Can't open $sam_filtered\n");
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
128 undef %bitscore;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
129 while (my $line=<SAMF>){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
130 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
131 my @fields = split(/\s+/,$line);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
132 my $bit = $fields[1];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
133 if ($bitscore{$bit}){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
134 $bitscore{$bit}++;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
135 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
136 else {
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
137 $bitscore{$bit}=1;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
138 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
139 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
140 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
141
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
142 print "bitscore\t";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
143 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
144 print $key,"\t*\t";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
145 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
146 print "\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
147
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
148 print " number \t";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
149 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
150 print $bitscore{$key},"\t*\t";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
151 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
152 print "\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
153 close (SAMF);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
154
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
155 print "\nMPILEUP variant\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
156 open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n");
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
157
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
158 my $nbvariant=0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
159 while (my $line=<MPV>){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
160 my @fields = split(/\s+/,$line);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
161 if ($#fields >= 4){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
162 my $match = $fields[4];
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
163 $match =~ s/\$//g; #the read start at this position
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
164 $match =~ s/\^.//g; #the read end at this position followed by quality char
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
165 if ($match =~/[ACGTNacgtn]+/){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
166 $nbvariant++;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
167 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
168 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
169 else {
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
170 #print STDERR "Erreur : $line\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
171 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
172 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
173
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
174 print "Variant detected :\t$nbvariant\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
175 close (MPV);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
176
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
177
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
178 print "\nMPILEUP filtered without dubious position\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
179 open(LF, $list_filtered) or die ("Can't open $list_filtered\n");
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
180 $nbvariant=0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
181 while (my $line=<LF>){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
182 $nbvariant++;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
183 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
184
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
185 print "Variant selected :\t$nbvariant\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
186 close (LF);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
187
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
188 print "\nMPILEUP filtered without dubious position and BLAST\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
189 open(BF, $blast_filtered) or die ("Can't open $blast_filtered\n");
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
190 $nbvariant=0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
191 while (my $line=<BF>){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
192 $nbvariant++;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
193 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
194
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
195 print "Variant selected :\t$nbvariant\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
196 close (BF);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
197
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
198
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
199 print "\nSNP selected after mpileup filtering : \t";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
200 open(SNP, $snp_selected) or die ("Can't open $snp_selected\n");
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
201 $nbvariant=0;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
202 while (my $line=<SNP>){
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
203 $nbvariant++;
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
204 }
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
205
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
206 print "$nbvariant\n";
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
207 close (SNP);
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
208
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
209
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
210
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
211
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
212
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
213
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
214
7f36bd129321 Uploaded
mcharles
parents:
diff changeset
215