Mercurial > repos > mcharles > rapsosnp
diff rapsodyn/rapsosnp_stats.pl @ 1:7f36bd129321 draft
Uploaded
author | mcharles |
---|---|
date | Wed, 10 Sep 2014 10:24:01 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/rapsosnp_stats.pl Wed Sep 10 10:24:01 2014 -0400 @@ -0,0 +1,215 @@ +#!/usr/bin/perl +use strict; +use warnings; + +my $read1_row = $ARGV[0]; +my $read2_row = $ARGV[1]; + +my $read1_trimmed = $ARGV[2]; +my $read2_trimmed = $ARGV[3]; + +my $sam_row = $ARGV[4]; +my $sam_filtered = $ARGV[5]; + +my $mpileup_variant = $ARGV[6]; + +my $list_filtered = $ARGV[7]; + +my $blast_filtered = $ARGV[8]; + +my $snp_selected = $ARGV[9]; + + +open(INR1R, $read1_row) or die ("Can't open $read1_row\n"); +my $nbread=0; +my $nbbase =0; +while (my $line1=<INR1R>){ + my $line2 = <INR1R>; + my $line3 = <INR1R>; + my $line4 = <INR1R>; + if ($line1 =~ /^@/){ + $nbread++; + if ($line2=~/([ATGCNX]+)/i){ + $nbbase += length($1); + } + } +} +print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; +close (INR1R); + +open(INR2R, $read2_row) or die ("Can't open $read2_row\n"); +$nbread=0; +$nbbase =0; +while (my $line1=<INR2R>){ + my $line2 = <INR2R>; + my $line3 = <INR2R>; + my $line4 = <INR2R>; + if ($line1 =~ /^@/){ + $nbread++; + if ($line2=~/([ATGCNX]+)/i){ + $nbbase += length($1); + } + } +} +print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; +close (INR2R); + +open(INR1T, $read1_trimmed) or die ("Can't open $read1_trimmed\n"); +$nbread=0; +$nbbase =0; +while (my $line1=<INR1T>){ + my $line2 = <INR1T>; + my $line3 = <INR1T>; + my $line4 = <INR1T>; + if ($line1 =~ /^@/){ + $nbread++; + if ($line2=~/([ATGCNX]+)/i){ + $nbbase += length($1); + } + else { + print STDERR "$line1\n$line2\n"; + } + } +} +print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; +close (INR1T); + +open(INR2T, $read2_trimmed) or die ("Can't open $read2_trimmed\n"); +$nbread=0; +$nbbase =0; +while (my $line1=<INR2T>){ + my $line2 = <INR2T>; + my $line3 = <INR2T>; + my $line4 = <INR2T>; + if ($line1 =~ /^@/){ + $nbread++; + if ($line2=~/([ATGCNX]+)/i){ + $nbbase += length($1); + } + else { + print STDERR "$line1\n$line2\n"; + } + } +} +print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; +close (INR2T); + +print "\nSAM row\n"; +open(SAM, $sam_row) or die ("Can't open $sam_row\n"); +my %bitscore; +while (my $line=<SAM>){ + if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ + my @fields = split(/\s+/,$line); + my $bit = $fields[1]; + if ($bitscore{$bit}){ + $bitscore{$bit}++; + } + else { + $bitscore{$bit}=1; + } + } +} + +print "bitscore\t"; +foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { + print $key,"\t*\t"; +} +print "\n"; + +print " number \t"; +foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { + print $bitscore{$key},"\t*\t"; +} +print "\n"; +close (SAM); + +print "\nSAM filtered\n"; +open(SAMF, $sam_filtered) or die ("Can't open $sam_filtered\n"); +undef %bitscore; +while (my $line=<SAMF>){ + if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ + my @fields = split(/\s+/,$line); + my $bit = $fields[1]; + if ($bitscore{$bit}){ + $bitscore{$bit}++; + } + else { + $bitscore{$bit}=1; + } + } +} + +print "bitscore\t"; +foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { + print $key,"\t*\t"; +} +print "\n"; + +print " number \t"; +foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { + print $bitscore{$key},"\t*\t"; +} +print "\n"; +close (SAMF); + +print "\nMPILEUP variant\n"; +open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n"); + +my $nbvariant=0; +while (my $line=<MPV>){ + my @fields = split(/\s+/,$line); + if ($#fields >= 4){ + my $match = $fields[4]; + $match =~ s/\$//g; #the read start at this position + $match =~ s/\^.//g; #the read end at this position followed by quality char + if ($match =~/[ACGTNacgtn]+/){ + $nbvariant++; + } + } + else { + #print STDERR "Erreur : $line\n"; + } +} + +print "Variant detected :\t$nbvariant\n"; +close (MPV); + + +print "\nMPILEUP filtered without dubious position\n"; +open(LF, $list_filtered) or die ("Can't open $list_filtered\n"); +$nbvariant=0; +while (my $line=<LF>){ + $nbvariant++; +} + +print "Variant selected :\t$nbvariant\n"; +close (LF); + +print "\nMPILEUP filtered without dubious position and BLAST\n"; +open(BF, $blast_filtered) or die ("Can't open $blast_filtered\n"); +$nbvariant=0; +while (my $line=<BF>){ + $nbvariant++; +} + +print "Variant selected :\t$nbvariant\n"; +close (BF); + + +print "\nSNP selected after mpileup filtering : \t"; +open(SNP, $snp_selected) or die ("Can't open $snp_selected\n"); +$nbvariant=0; +while (my $line=<SNP>){ + $nbvariant++; +} + +print "$nbvariant\n"; +close (SNP); + + + + + + + +