Mercurial > repos > mcharles > rapsosnp
view rapsodyn/rapsosnp_stats4x.pl @ 3:9332b9da7491 draft
Uploaded
author | mcharles |
---|---|
date | Thu, 11 Sep 2014 07:31:20 -0400 |
parents | 761fecc07fa9 |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use warnings; my $read1_row = $ARGV[0]; my $read2_row = $ARGV[1]; my $read1_trimmed_part1 = $ARGV[2]; my $read1_trimmed_part2 = $ARGV[3]; my $read1_trimmed_part3 = $ARGV[4]; my $read1_trimmed_part4 = $ARGV[5]; my $read2_trimmed_part1 = $ARGV[6]; my $read2_trimmed_part2 = $ARGV[7]; my $read2_trimmed_part3 = $ARGV[8]; my $read2_trimmed_part4 = $ARGV[9]; my $sam_row_part1 = $ARGV[10]; my $sam_row_part2 = $ARGV[11]; my $sam_row_part3 = $ARGV[12]; my $sam_row_part4 = $ARGV[13]; my $sam_filtered_part1 = $ARGV[14]; my $sam_filtered_part2 = $ARGV[15]; my $sam_filtered_part3 = $ARGV[16]; my $sam_filtered_part4 = $ARGV[17]; my $mpileup_variant = $ARGV[18]; my $list_filtered = $ARGV[19]; my $blast_filtered_part1 = $ARGV[20]; my $blast_filtered_part2 = $ARGV[21]; my $blast_filtered_part3 = $ARGV[22]; my $blast_filtered_part4 = $ARGV[23]; my $snp_selected = $ARGV[24]; open(INR1R, $read1_row) or die ("Can't open $read1_row\n"); my $nbread=0; my $nbbase =0; while (my $line1=<INR1R>){ my $line2 = <INR1R>; my $line3 = <INR1R>; my $line4 = <INR1R>; if ($line1 =~ /^@/){ $nbread++; if ($line2=~/([ATGCNX]+)/i){ $nbbase += length($1); } } } print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; close (INR1R); open(INR2R, $read2_row) or die ("Can't open $read2_row\n"); $nbread=0; $nbbase =0; while (my $line1=<INR2R>){ my $line2 = <INR2R>; my $line3 = <INR2R>; my $line4 = <INR2R>; if ($line1 =~ /^@/){ $nbread++; if ($line2=~/([ATGCNX]+)/i){ $nbbase += length($1); } } } print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; close (INR2R); open(INR1TP1, $read1_trimmed_part1) or die ("Can't open $read1_trimmed_part1\n"); $nbread=0; $nbbase =0; while (my $line1=<INR1TP1>){ my $line2 = <INR1TP1>; my $line3 = <INR1TP1>; my $line4 = <INR1TP1>; if ($line1 =~ /^@/){ $nbread++; if ($line2=~/([ATGCNX]+)/i){ $nbbase += length($1); } else { print STDERR "$line1\n$line2\n"; } } } close (INR1TP1); open(INR1TP2, $read1_trimmed_part2) or die ("Can't open $read1_trimmed_part2\n"); while (my $line1=<INR1TP2>){ my $line2 = <INR1TP2>; my $line3 = <INR1TP2>; my $line4 = <INR1TP2>; if ($line1 =~ /^@/){ $nbread++; if ($line2=~/([ATGCNX]+)/i){ $nbbase += length($1); } else { print STDERR "$line1\n$line2\n"; } } } close (INR1TP2); open(INR1TP3, $read1_trimmed_part3) or die ("Can't open $read1_trimmed_part3\n"); while (my $line1=<INR1TP3>){ my $line2 = <INR1TP3>; my $line3 = <INR1TP3>; my $line4 = <INR1TP3>; if ($line1 =~ /^@/){ $nbread++; if ($line2=~/([ATGCNX]+)/i){ $nbbase += length($1); } else { print STDERR "$line1\n$line2\n"; } } } close (INR1TP3); open(INR1TP4, $read1_trimmed_part4) or die ("Can't open $read1_trimmed_part4\n"); while (my $line1=<INR1TP4>){ my $line2 = <INR1TP4>; my $line3 = <INR1TP4>; my $line4 = <INR1TP4>; if ($line1 =~ /^@/){ $nbread++; if ($line2=~/([ATGCNX]+)/i){ $nbbase += length($1); } else { print STDERR "$line1\n$line2\n"; } } } close (INR1TP4); print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; open(INR2TP1, $read2_trimmed_part1) or die ("Can't open $read2_trimmed_part1\n"); $nbread=0; $nbbase =0; while (my $line1=<INR2TP1>){ my $line2 = <INR2TP1>; my $line3 = <INR2TP1>; my $line4 = <INR2TP1>; if ($line1 =~ /^@/){ $nbread++; if ($line2=~/([ATGCNX]+)/i){ $nbbase += length($1); } else { print STDERR "$line1\n$line2\n"; } } } close (INR2TP2); open(INR2TP2, $read2_trimmed_part2) or die ("Can't open $read2_trimmed_part2\n"); while (my $line1=<INR2TP2>){ my $line2 = <INR2TP2>; my $line3 = <INR2TP2>; my $line4 = <INR2TP2>; if ($line1 =~ /^@/){ $nbread++; if ($line2=~/([ATGCNX]+)/i){ $nbbase += length($1); } else { print STDERR "$line1\n$line2\n"; } } } close (INR2TP2); open(INR2TP3, $read2_trimmed_part3) or die ("Can't open $read2_trimmed_part3\n"); while (my $line1=<INR2TP3>){ my $line2 = <INR2TP3>; my $line3 = <INR2TP3>; my $line4 = <INR2TP3>; if ($line1 =~ /^@/){ $nbread++; if ($line2=~/([ATGCNX]+)/i){ $nbbase += length($1); } else { print STDERR "$line1\n$line2\n"; } } } close (INR2TP3); open(INR2TP4, $read2_trimmed_part4) or die ("Can't open $read2_trimmed_part4\n"); while (my $line1=<INR2TP4>){ my $line2 = <INR2TP4>; my $line3 = <INR2TP4>; my $line4 = <INR2TP4>; if ($line1 =~ /^@/){ $nbread++; if ($line2=~/([ATGCNX]+)/i){ $nbbase += length($1); } else { print STDERR "$line1\n$line2\n"; } } } close (INR2TP4); print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; print "\nSAM row\n"; open(SAMP1, $sam_row_part1) or die ("Can't open $sam_row_part1\n"); my %bitscore; while (my $line=<SAMP1>){ if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ my @fields = split(/\s+/,$line); my $bit = $fields[1]; if ($bitscore{$bit}){ $bitscore{$bit}++; } else { $bitscore{$bit}=1; } } } close (SAMP1); open(SAMP2, $sam_row_part2) or die ("Can't open $sam_row_part2\n"); while (my $line=<SAMP2>){ if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ my @fields = split(/\s+/,$line); my $bit = $fields[1]; if ($bitscore{$bit}){ $bitscore{$bit}++; } else { $bitscore{$bit}=1; } } } close (SAMP2); open(SAMP3, $sam_row_part3) or die ("Can't open $sam_row_part3\n"); while (my $line=<SAMP3>){ if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ my @fields = split(/\s+/,$line); my $bit = $fields[1]; if ($bitscore{$bit}){ $bitscore{$bit}++; } else { $bitscore{$bit}=1; } } } close (SAMP3); open(SAMP4, $sam_row_part4) or die ("Can't open $sam_row_part4\n"); while (my $line=<SAMP4>){ if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ my @fields = split(/\s+/,$line); my $bit = $fields[1]; if ($bitscore{$bit}){ $bitscore{$bit}++; } else { $bitscore{$bit}=1; } } } close (SAMP4); print "bitscore\t"; foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { print $key,"\t*\t"; } print "\n"; print " number \t"; foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { print $bitscore{$key},"\t*\t"; } print "\n"; print "\nSAM filtered\n"; open(SAMFP1, $sam_filtered_part1) or die ("Can't open $sam_filtered_part1\n"); undef %bitscore; while (my $line=<SAMFP1>){ if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ my @fields = split(/\s+/,$line); my $bit = $fields[1]; if ($bitscore{$bit}){ $bitscore{$bit}++; } else { $bitscore{$bit}=1; } } } close (SAMFP1); open(SAMFP2, $sam_filtered_part2) or die ("Can't open $sam_filtered_part2\n"); while (my $line=<SAMFP2>){ if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ my @fields = split(/\s+/,$line); my $bit = $fields[1]; if ($bitscore{$bit}){ $bitscore{$bit}++; } else { $bitscore{$bit}=1; } } } close (SAMFP2); open(SAMFP3, $sam_filtered_part3) or die ("Can't open $sam_filtered_part3\n"); while (my $line=<SAMFP3>){ if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ my @fields = split(/\s+/,$line); my $bit = $fields[1]; if ($bitscore{$bit}){ $bitscore{$bit}++; } else { $bitscore{$bit}=1; } } } close (SAMFP3); open(SAMFP4, $sam_filtered_part4) or die ("Can't open $sam_filtered_part4\n"); while (my $line=<SAMFP4>){ if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ my @fields = split(/\s+/,$line); my $bit = $fields[1]; if ($bitscore{$bit}){ $bitscore{$bit}++; } else { $bitscore{$bit}=1; } } } close (SAMFP4); print "bitscore\t"; foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { print $key,"\t*\t"; } print "\n"; print " number \t"; foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { print $bitscore{$key},"\t*\t"; } print "\n"; print "\nMPILEUP variant\n"; open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n"); my $nbvariant=0; while (my $line=<MPV>){ my @fields = split(/\s+/,$line); if ($#fields >= 4){ my $match = $fields[4]; $match =~ s/\$//g; #the read start at this position $match =~ s/\^.//g; #the read end at this position followed by quality char if ($match =~/[ACGTNacgtn]+/){ $nbvariant++; } } else { #print STDERR "Erreur : $line\n"; } } print "Variant detected :\t$nbvariant\n"; close (MPV); print "\nMPILEUP filtered without dubious position\n"; open(LF, $list_filtered) or die ("Can't open $list_filtered\n"); $nbvariant=0; while (my $line=<LF>){ $nbvariant++; } print "Variant selected :\t$nbvariant\n"; close (LF); print "\nMPILEUP filtered without dubious position and BLAST\n"; open(BFP1, $blast_filtered_part1) or die ("Can't open $blast_filtered_part1\n"); $nbvariant=0; while (my $line=<BFP1>){ $nbvariant++; } close (BFP1); open(BFP2, $blast_filtered_part2) or die ("Can't open $blast_filtered_part2\n"); while (my $line=<BFP2>){ $nbvariant++; } close (BFP2); open(BFP3, $blast_filtered_part3) or die ("Can't open $blast_filtered_part3\n"); while (my $line=<BFP3>){ $nbvariant++; } close (BFP3); open(BFP4, $blast_filtered_part4) or die ("Can't open $blast_filtered_part4\n"); while (my $line=<BFP4>){ $nbvariant++; } close (BFP4); print "Variant selected :\t$nbvariant\n"; print "\nSNP selected after mpileup filtering : \t"; open(SNP, $snp_selected) or die ("Can't open $snp_selected\n"); $nbvariant=0; while (my $line=<SNP>){ $nbvariant++; } print "$nbvariant\n"; close (SNP);