Mercurial > repos > mcharles > rapsosnp
comparison rapsodyn/rapsosnp_stats2x.pl @ 2:761fecc07fa9 draft
Uploaded
author | mcharles |
---|---|
date | Thu, 11 Sep 2014 03:10:47 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:7f36bd129321 | 2:761fecc07fa9 |
---|---|
1 #!/usr/bin/perl | |
2 use strict; | |
3 use warnings; | |
4 | |
5 my $read1_row = $ARGV[0]; | |
6 my $read2_row = $ARGV[1]; | |
7 | |
8 my $read1_trimmed_part1 = $ARGV[2]; | |
9 my $read1_trimmed_part2 = $ARGV[3]; | |
10 my $read2_trimmed_part1 = $ARGV[4]; | |
11 my $read2_trimmed_part2 = $ARGV[5]; | |
12 | |
13 my $sam_row_part1 = $ARGV[6]; | |
14 my $sam_row_part2 = $ARGV[7]; | |
15 my $sam_filtered_part1 = $ARGV[8]; | |
16 my $sam_filtered_part2 = $ARGV[9]; | |
17 | |
18 my $mpileup_variant = $ARGV[10]; | |
19 | |
20 my $list_filtered = $ARGV[11]; | |
21 | |
22 my $blast_filtered_part1 = $ARGV[12]; | |
23 my $blast_filtered_part2 = $ARGV[13]; | |
24 | |
25 my $snp_selected = $ARGV[14]; | |
26 | |
27 | |
28 open(INR1R, $read1_row) or die ("Can't open $read1_row\n"); | |
29 my $nbread=0; | |
30 my $nbbase =0; | |
31 while (my $line1=<INR1R>){ | |
32 my $line2 = <INR1R>; | |
33 my $line3 = <INR1R>; | |
34 my $line4 = <INR1R>; | |
35 if ($line1 =~ /^@/){ | |
36 $nbread++; | |
37 if ($line2=~/([ATGCNX]+)/i){ | |
38 $nbbase += length($1); | |
39 } | |
40 } | |
41 } | |
42 print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; | |
43 close (INR1R); | |
44 | |
45 | |
46 | |
47 | |
48 open(INR2R, $read2_row) or die ("Can't open $read2_row\n"); | |
49 $nbread=0; | |
50 $nbbase =0; | |
51 while (my $line1=<INR2R>){ | |
52 my $line2 = <INR2R>; | |
53 my $line3 = <INR2R>; | |
54 my $line4 = <INR2R>; | |
55 if ($line1 =~ /^@/){ | |
56 $nbread++; | |
57 if ($line2=~/([ATGCNX]+)/i){ | |
58 $nbbase += length($1); | |
59 } | |
60 } | |
61 } | |
62 print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; | |
63 close (INR2R); | |
64 | |
65 | |
66 | |
67 | |
68 | |
69 open(INR1TP1, $read1_trimmed_part1) or die ("Can't open $read1_trimmed_part1\n"); | |
70 $nbread=0; | |
71 $nbbase =0; | |
72 while (my $line1=<INR1TP1>){ | |
73 my $line2 = <INR1TP1>; | |
74 my $line3 = <INR1TP1>; | |
75 my $line4 = <INR1TP1>; | |
76 if ($line1 =~ /^@/){ | |
77 $nbread++; | |
78 if ($line2=~/([ATGCNX]+)/i){ | |
79 $nbbase += length($1); | |
80 } | |
81 else { | |
82 print STDERR "$line1\n$line2\n"; | |
83 } | |
84 } | |
85 } | |
86 close (INR1TP1); | |
87 open(INR1TP2, $read1_trimmed_part2) or die ("Can't open $read1_trimmed_part2\n"); | |
88 while (my $line1=<INR1TP2>){ | |
89 my $line2 = <INR1TP2>; | |
90 my $line3 = <INR1TP2>; | |
91 my $line4 = <INR1TP2>; | |
92 if ($line1 =~ /^@/){ | |
93 $nbread++; | |
94 if ($line2=~/([ATGCNX]+)/i){ | |
95 $nbbase += length($1); | |
96 } | |
97 else { | |
98 print STDERR "$line1\n$line2\n"; | |
99 } | |
100 } | |
101 } | |
102 close (INR1TP2); | |
103 print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; | |
104 | |
105 | |
106 | |
107 | |
108 open(INR2TP1, $read2_trimmed_part1) or die ("Can't open $read2_trimmed_part1\n"); | |
109 $nbread=0; | |
110 $nbbase =0; | |
111 while (my $line1=<INR2TP1>){ | |
112 my $line2 = <INR2TP1>; | |
113 my $line3 = <INR2TP1>; | |
114 my $line4 = <INR2TP1>; | |
115 if ($line1 =~ /^@/){ | |
116 $nbread++; | |
117 if ($line2=~/([ATGCNX]+)/i){ | |
118 $nbbase += length($1); | |
119 } | |
120 else { | |
121 print STDERR "$line1\n$line2\n"; | |
122 } | |
123 } | |
124 } | |
125 close (INR2TP2); | |
126 open(INR2TP2, $read2_trimmed_part2) or die ("Can't open $read2_trimmed_part2\n"); | |
127 while (my $line1=<INR2TP2>){ | |
128 my $line2 = <INR2TP2>; | |
129 my $line3 = <INR2TP2>; | |
130 my $line4 = <INR2TP2>; | |
131 if ($line1 =~ /^@/){ | |
132 $nbread++; | |
133 if ($line2=~/([ATGCNX]+)/i){ | |
134 $nbbase += length($1); | |
135 } | |
136 else { | |
137 print STDERR "$line1\n$line2\n"; | |
138 } | |
139 } | |
140 } | |
141 close (INR2TP2); | |
142 print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; | |
143 | |
144 | |
145 | |
146 | |
147 print "\nSAM row\n"; | |
148 open(SAMP1, $sam_row_part1) or die ("Can't open $sam_row_part1\n"); | |
149 my %bitscore; | |
150 while (my $line=<SAMP1>){ | |
151 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
152 my @fields = split(/\s+/,$line); | |
153 my $bit = $fields[1]; | |
154 if ($bitscore{$bit}){ | |
155 $bitscore{$bit}++; | |
156 } | |
157 else { | |
158 $bitscore{$bit}=1; | |
159 } | |
160 } | |
161 } | |
162 close (SAMP1); | |
163 open(SAMP2, $sam_row_part2) or die ("Can't open $sam_row_part2\n"); | |
164 while (my $line=<SAMP2>){ | |
165 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
166 my @fields = split(/\s+/,$line); | |
167 my $bit = $fields[1]; | |
168 if ($bitscore{$bit}){ | |
169 $bitscore{$bit}++; | |
170 } | |
171 else { | |
172 $bitscore{$bit}=1; | |
173 } | |
174 } | |
175 } | |
176 close (SAMP2); | |
177 print "bitscore\t"; | |
178 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { | |
179 print $key,"\t*\t"; | |
180 } | |
181 print "\n"; | |
182 print " number \t"; | |
183 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { | |
184 print $bitscore{$key},"\t*\t"; | |
185 } | |
186 print "\n"; | |
187 | |
188 | |
189 | |
190 | |
191 print "\nSAM filtered\n"; | |
192 open(SAMFP1, $sam_filtered_part1) or die ("Can't open $sam_filtered_part1\n"); | |
193 undef %bitscore; | |
194 while (my $line=<SAMFP1>){ | |
195 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
196 my @fields = split(/\s+/,$line); | |
197 my $bit = $fields[1]; | |
198 if ($bitscore{$bit}){ | |
199 $bitscore{$bit}++; | |
200 } | |
201 else { | |
202 $bitscore{$bit}=1; | |
203 } | |
204 } | |
205 } | |
206 close (SAMFP1); | |
207 open(SAMFP2, $sam_filtered_part2) or die ("Can't open $sam_filtered_part2\n"); | |
208 while (my $line=<SAMFP2>){ | |
209 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ | |
210 my @fields = split(/\s+/,$line); | |
211 my $bit = $fields[1]; | |
212 if ($bitscore{$bit}){ | |
213 $bitscore{$bit}++; | |
214 } | |
215 else { | |
216 $bitscore{$bit}=1; | |
217 } | |
218 } | |
219 } | |
220 close (SAMFP2); | |
221 print "bitscore\t"; | |
222 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { | |
223 print $key,"\t*\t"; | |
224 } | |
225 print "\n"; | |
226 print " number \t"; | |
227 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { | |
228 print $bitscore{$key},"\t*\t"; | |
229 } | |
230 print "\n"; | |
231 | |
232 | |
233 | |
234 | |
235 print "\nMPILEUP variant\n"; | |
236 open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n"); | |
237 my $nbvariant=0; | |
238 while (my $line=<MPV>){ | |
239 my @fields = split(/\s+/,$line); | |
240 if ($#fields >= 4){ | |
241 my $match = $fields[4]; | |
242 $match =~ s/\$//g; #the read start at this position | |
243 $match =~ s/\^.//g; #the read end at this position followed by quality char | |
244 if ($match =~/[ACGTNacgtn]+/){ | |
245 $nbvariant++; | |
246 } | |
247 } | |
248 else { | |
249 #print STDERR "Erreur : $line\n"; | |
250 } | |
251 } | |
252 print "Variant detected :\t$nbvariant\n"; | |
253 close (MPV); | |
254 | |
255 | |
256 | |
257 | |
258 | |
259 | |
260 print "\nMPILEUP filtered without dubious position\n"; | |
261 open(LF, $list_filtered) or die ("Can't open $list_filtered\n"); | |
262 $nbvariant=0; | |
263 while (my $line=<LF>){ | |
264 $nbvariant++; | |
265 } | |
266 print "Variant selected :\t$nbvariant\n"; | |
267 close (LF); | |
268 | |
269 | |
270 | |
271 | |
272 | |
273 print "\nMPILEUP filtered without dubious position and BLAST\n"; | |
274 open(BFP1, $blast_filtered_part1) or die ("Can't open $blast_filtered_part1\n"); | |
275 $nbvariant=0; | |
276 while (my $line=<BFP1>){ | |
277 $nbvariant++; | |
278 } | |
279 close (BFP1); | |
280 open(BFP2, $blast_filtered_part2) or die ("Can't open $blast_filtered_part2\n"); | |
281 while (my $line=<BFP2>){ | |
282 $nbvariant++; | |
283 } | |
284 close (BFP2); | |
285 print "Variant selected :\t$nbvariant\n"; | |
286 | |
287 | |
288 | |
289 | |
290 | |
291 print "\nSNP selected after mpileup filtering : \t"; | |
292 open(SNP, $snp_selected) or die ("Can't open $snp_selected\n"); | |
293 $nbvariant=0; | |
294 while (my $line=<SNP>){ | |
295 $nbvariant++; | |
296 } | |
297 | |
298 print "$nbvariant\n"; | |
299 close (SNP); | |
300 | |
301 | |
302 | |
303 | |
304 | |
305 | |
306 | |
307 |