2
|
1 #!/usr/bin/perl
|
|
2 use strict;
|
|
3 use warnings;
|
|
4
|
|
5 my $read1_row = $ARGV[0];
|
|
6 my $read2_row = $ARGV[1];
|
|
7
|
|
8 my $read1_trimmed_part1 = $ARGV[2];
|
|
9 my $read1_trimmed_part2 = $ARGV[3];
|
|
10 my $read2_trimmed_part1 = $ARGV[4];
|
|
11 my $read2_trimmed_part2 = $ARGV[5];
|
|
12
|
|
13 my $sam_row_part1 = $ARGV[6];
|
|
14 my $sam_row_part2 = $ARGV[7];
|
|
15 my $sam_filtered_part1 = $ARGV[8];
|
|
16 my $sam_filtered_part2 = $ARGV[9];
|
|
17
|
|
18 my $mpileup_variant = $ARGV[10];
|
|
19
|
|
20 my $list_filtered = $ARGV[11];
|
|
21
|
|
22 my $blast_filtered_part1 = $ARGV[12];
|
|
23 my $blast_filtered_part2 = $ARGV[13];
|
|
24
|
|
25 my $snp_selected = $ARGV[14];
|
|
26
|
|
27
|
|
28 open(INR1R, $read1_row) or die ("Can't open $read1_row\n");
|
|
29 my $nbread=0;
|
|
30 my $nbbase =0;
|
|
31 while (my $line1=<INR1R>){
|
|
32 my $line2 = <INR1R>;
|
|
33 my $line3 = <INR1R>;
|
|
34 my $line4 = <INR1R>;
|
|
35 if ($line1 =~ /^@/){
|
|
36 $nbread++;
|
|
37 if ($line2=~/([ATGCNX]+)/i){
|
|
38 $nbbase += length($1);
|
|
39 }
|
|
40 }
|
|
41 }
|
|
42 print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
|
|
43 close (INR1R);
|
|
44
|
|
45
|
|
46
|
|
47
|
|
48 open(INR2R, $read2_row) or die ("Can't open $read2_row\n");
|
|
49 $nbread=0;
|
|
50 $nbbase =0;
|
|
51 while (my $line1=<INR2R>){
|
|
52 my $line2 = <INR2R>;
|
|
53 my $line3 = <INR2R>;
|
|
54 my $line4 = <INR2R>;
|
|
55 if ($line1 =~ /^@/){
|
|
56 $nbread++;
|
|
57 if ($line2=~/([ATGCNX]+)/i){
|
|
58 $nbbase += length($1);
|
|
59 }
|
|
60 }
|
|
61 }
|
|
62 print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
|
|
63 close (INR2R);
|
|
64
|
|
65
|
|
66
|
|
67
|
|
68
|
|
69 open(INR1TP1, $read1_trimmed_part1) or die ("Can't open $read1_trimmed_part1\n");
|
|
70 $nbread=0;
|
|
71 $nbbase =0;
|
|
72 while (my $line1=<INR1TP1>){
|
|
73 my $line2 = <INR1TP1>;
|
|
74 my $line3 = <INR1TP1>;
|
|
75 my $line4 = <INR1TP1>;
|
|
76 if ($line1 =~ /^@/){
|
|
77 $nbread++;
|
|
78 if ($line2=~/([ATGCNX]+)/i){
|
|
79 $nbbase += length($1);
|
|
80 }
|
|
81 else {
|
|
82 print STDERR "$line1\n$line2\n";
|
|
83 }
|
|
84 }
|
|
85 }
|
|
86 close (INR1TP1);
|
|
87 open(INR1TP2, $read1_trimmed_part2) or die ("Can't open $read1_trimmed_part2\n");
|
|
88 while (my $line1=<INR1TP2>){
|
|
89 my $line2 = <INR1TP2>;
|
|
90 my $line3 = <INR1TP2>;
|
|
91 my $line4 = <INR1TP2>;
|
|
92 if ($line1 =~ /^@/){
|
|
93 $nbread++;
|
|
94 if ($line2=~/([ATGCNX]+)/i){
|
|
95 $nbbase += length($1);
|
|
96 }
|
|
97 else {
|
|
98 print STDERR "$line1\n$line2\n";
|
|
99 }
|
|
100 }
|
|
101 }
|
|
102 close (INR1TP2);
|
|
103 print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
|
|
104
|
|
105
|
|
106
|
|
107
|
|
108 open(INR2TP1, $read2_trimmed_part1) or die ("Can't open $read2_trimmed_part1\n");
|
|
109 $nbread=0;
|
|
110 $nbbase =0;
|
|
111 while (my $line1=<INR2TP1>){
|
|
112 my $line2 = <INR2TP1>;
|
|
113 my $line3 = <INR2TP1>;
|
|
114 my $line4 = <INR2TP1>;
|
|
115 if ($line1 =~ /^@/){
|
|
116 $nbread++;
|
|
117 if ($line2=~/([ATGCNX]+)/i){
|
|
118 $nbbase += length($1);
|
|
119 }
|
|
120 else {
|
|
121 print STDERR "$line1\n$line2\n";
|
|
122 }
|
|
123 }
|
|
124 }
|
|
125 close (INR2TP2);
|
|
126 open(INR2TP2, $read2_trimmed_part2) or die ("Can't open $read2_trimmed_part2\n");
|
|
127 while (my $line1=<INR2TP2>){
|
|
128 my $line2 = <INR2TP2>;
|
|
129 my $line3 = <INR2TP2>;
|
|
130 my $line4 = <INR2TP2>;
|
|
131 if ($line1 =~ /^@/){
|
|
132 $nbread++;
|
|
133 if ($line2=~/([ATGCNX]+)/i){
|
|
134 $nbbase += length($1);
|
|
135 }
|
|
136 else {
|
|
137 print STDERR "$line1\n$line2\n";
|
|
138 }
|
|
139 }
|
|
140 }
|
|
141 close (INR2TP2);
|
|
142 print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
|
|
143
|
|
144
|
|
145
|
|
146
|
|
147 print "\nSAM row\n";
|
|
148 open(SAMP1, $sam_row_part1) or die ("Can't open $sam_row_part1\n");
|
|
149 my %bitscore;
|
|
150 while (my $line=<SAMP1>){
|
|
151 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
152 my @fields = split(/\s+/,$line);
|
|
153 my $bit = $fields[1];
|
|
154 if ($bitscore{$bit}){
|
|
155 $bitscore{$bit}++;
|
|
156 }
|
|
157 else {
|
|
158 $bitscore{$bit}=1;
|
|
159 }
|
|
160 }
|
|
161 }
|
|
162 close (SAMP1);
|
|
163 open(SAMP2, $sam_row_part2) or die ("Can't open $sam_row_part2\n");
|
|
164 while (my $line=<SAMP2>){
|
|
165 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
166 my @fields = split(/\s+/,$line);
|
|
167 my $bit = $fields[1];
|
|
168 if ($bitscore{$bit}){
|
|
169 $bitscore{$bit}++;
|
|
170 }
|
|
171 else {
|
|
172 $bitscore{$bit}=1;
|
|
173 }
|
|
174 }
|
|
175 }
|
|
176 close (SAMP2);
|
|
177 print "bitscore\t";
|
|
178 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
|
|
179 print $key,"\t*\t";
|
|
180 }
|
|
181 print "\n";
|
|
182 print " number \t";
|
|
183 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
|
|
184 print $bitscore{$key},"\t*\t";
|
|
185 }
|
|
186 print "\n";
|
|
187
|
|
188
|
|
189
|
|
190
|
|
191 print "\nSAM filtered\n";
|
|
192 open(SAMFP1, $sam_filtered_part1) or die ("Can't open $sam_filtered_part1\n");
|
|
193 undef %bitscore;
|
|
194 while (my $line=<SAMFP1>){
|
|
195 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
196 my @fields = split(/\s+/,$line);
|
|
197 my $bit = $fields[1];
|
|
198 if ($bitscore{$bit}){
|
|
199 $bitscore{$bit}++;
|
|
200 }
|
|
201 else {
|
|
202 $bitscore{$bit}=1;
|
|
203 }
|
|
204 }
|
|
205 }
|
|
206 close (SAMFP1);
|
|
207 open(SAMFP2, $sam_filtered_part2) or die ("Can't open $sam_filtered_part2\n");
|
|
208 while (my $line=<SAMFP2>){
|
|
209 if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
|
|
210 my @fields = split(/\s+/,$line);
|
|
211 my $bit = $fields[1];
|
|
212 if ($bitscore{$bit}){
|
|
213 $bitscore{$bit}++;
|
|
214 }
|
|
215 else {
|
|
216 $bitscore{$bit}=1;
|
|
217 }
|
|
218 }
|
|
219 }
|
|
220 close (SAMFP2);
|
|
221 print "bitscore\t";
|
|
222 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
|
|
223 print $key,"\t*\t";
|
|
224 }
|
|
225 print "\n";
|
|
226 print " number \t";
|
|
227 foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
|
|
228 print $bitscore{$key},"\t*\t";
|
|
229 }
|
|
230 print "\n";
|
|
231
|
|
232
|
|
233
|
|
234
|
|
235 print "\nMPILEUP variant\n";
|
|
236 open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n");
|
|
237 my $nbvariant=0;
|
|
238 while (my $line=<MPV>){
|
|
239 my @fields = split(/\s+/,$line);
|
|
240 if ($#fields >= 4){
|
|
241 my $match = $fields[4];
|
|
242 $match =~ s/\$//g; #the read start at this position
|
|
243 $match =~ s/\^.//g; #the read end at this position followed by quality char
|
|
244 if ($match =~/[ACGTNacgtn]+/){
|
|
245 $nbvariant++;
|
|
246 }
|
|
247 }
|
|
248 else {
|
|
249 #print STDERR "Erreur : $line\n";
|
|
250 }
|
|
251 }
|
|
252 print "Variant detected :\t$nbvariant\n";
|
|
253 close (MPV);
|
|
254
|
|
255
|
|
256
|
|
257
|
|
258
|
|
259
|
|
260 print "\nMPILEUP filtered without dubious position\n";
|
|
261 open(LF, $list_filtered) or die ("Can't open $list_filtered\n");
|
|
262 $nbvariant=0;
|
|
263 while (my $line=<LF>){
|
|
264 $nbvariant++;
|
|
265 }
|
|
266 print "Variant selected :\t$nbvariant\n";
|
|
267 close (LF);
|
|
268
|
|
269
|
|
270
|
|
271
|
|
272
|
|
273 print "\nMPILEUP filtered without dubious position and BLAST\n";
|
|
274 open(BFP1, $blast_filtered_part1) or die ("Can't open $blast_filtered_part1\n");
|
|
275 $nbvariant=0;
|
|
276 while (my $line=<BFP1>){
|
|
277 $nbvariant++;
|
|
278 }
|
|
279 close (BFP1);
|
|
280 open(BFP2, $blast_filtered_part2) or die ("Can't open $blast_filtered_part2\n");
|
|
281 while (my $line=<BFP2>){
|
|
282 $nbvariant++;
|
|
283 }
|
|
284 close (BFP2);
|
|
285 print "Variant selected :\t$nbvariant\n";
|
|
286
|
|
287
|
|
288
|
|
289
|
|
290
|
|
291 print "\nSNP selected after mpileup filtering : \t";
|
|
292 open(SNP, $snp_selected) or die ("Can't open $snp_selected\n");
|
|
293 $nbvariant=0;
|
|
294 while (my $line=<SNP>){
|
|
295 $nbvariant++;
|
|
296 }
|
|
297
|
|
298 print "$nbvariant\n";
|
|
299 close (SNP);
|
|
300
|
|
301
|
|
302
|
|
303
|
|
304
|
|
305
|
|
306
|
|
307
|