view rapsodyn/rapsosnp_stats.pl @ 4:9074a5104cdd draft

Uploaded
author mcharles
date Wed, 17 Sep 2014 04:20:08 -0400
parents 7f36bd129321
children
line wrap: on
line source

#!/usr/bin/perl
use strict;
use warnings;

my $read1_row = $ARGV[0];
my $read2_row = $ARGV[1];

my $read1_trimmed = $ARGV[2];
my $read2_trimmed = $ARGV[3];

my $sam_row = $ARGV[4];
my $sam_filtered = $ARGV[5];

my $mpileup_variant = $ARGV[6];

my $list_filtered = $ARGV[7];

my $blast_filtered = $ARGV[8];

my $snp_selected = $ARGV[9];


open(INR1R, $read1_row) or die ("Can't open $read1_row\n");
my $nbread=0;
my $nbbase =0;
while (my $line1=<INR1R>){
	my $line2 = <INR1R>;
	my $line3 = <INR1R>;
	my $line4 = <INR1R>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
	}
}
print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
close (INR1R);

open(INR2R, $read2_row) or die ("Can't open $read2_row\n");
$nbread=0;
$nbbase =0;
while (my $line1=<INR2R>){
	my $line2 = <INR2R>;
	my $line3 = <INR2R>;
	my $line4 = <INR2R>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
	}
}
print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
close (INR2R);

open(INR1T, $read1_trimmed) or die ("Can't open $read1_trimmed\n");
$nbread=0;
$nbbase =0;
while (my $line1=<INR1T>){
	my $line2 = <INR1T>;
	my $line3 = <INR1T>;
	my $line4 = <INR1T>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
		else {
			print STDERR "$line1\n$line2\n";
		}
	}
}
print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
close (INR1T);

open(INR2T, $read2_trimmed) or die ("Can't open $read2_trimmed\n");
$nbread=0;
$nbbase =0;
while (my $line1=<INR2T>){
	my $line2 = <INR2T>;
	my $line3 = <INR2T>;
	my $line4 = <INR2T>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
		else {
			print STDERR "$line1\n$line2\n";
		}
	}
}
print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
close (INR2T);

print "\nSAM row\n";
open(SAM, $sam_row) or die ("Can't open $sam_row\n");
my %bitscore;
while (my $line=<SAM>){
	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
		my @fields = split(/\s+/,$line);
		my $bit = $fields[1];
		if ($bitscore{$bit}){
			$bitscore{$bit}++;
		}
		else {
			$bitscore{$bit}=1;
		}
	}
}

print "bitscore\t";
foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
	print $key,"\t*\t";
}
print "\n";

print " number \t";
foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
	print $bitscore{$key},"\t*\t";
}
print "\n";
close (SAM);

print "\nSAM filtered\n";
open(SAMF, $sam_filtered) or die ("Can't open $sam_filtered\n");
undef %bitscore;
while (my $line=<SAMF>){
	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
		my @fields = split(/\s+/,$line);
		my $bit = $fields[1];
		if ($bitscore{$bit}){
			$bitscore{$bit}++;
		}
		else {
			$bitscore{$bit}=1;
		}
	}
}

print "bitscore\t";
foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
	print $key,"\t*\t";
}
print "\n";

print " number \t";
foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
	print $bitscore{$key},"\t*\t";
}
print "\n";
close (SAMF);

print "\nMPILEUP variant\n";
open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n");

my $nbvariant=0;
while (my $line=<MPV>){
	my @fields = split(/\s+/,$line);
	if ($#fields >= 4){
		my $match = $fields[4];
		$match =~ s/\$//g; #the read start at this position
		$match =~ s/\^.//g; #the read end at this position followed by quality char
		if ($match =~/[ACGTNacgtn]+/){
			$nbvariant++;
		}
	}
	else {
		#print STDERR "Erreur : $line\n";
	}
}

print "Variant detected :\t$nbvariant\n";
close (MPV);


print "\nMPILEUP filtered without dubious position\n";
open(LF, $list_filtered) or die ("Can't open $list_filtered\n");
$nbvariant=0;
while (my $line=<LF>){
	$nbvariant++;
}

print "Variant selected :\t$nbvariant\n";
close (LF);

print "\nMPILEUP filtered without dubious position and BLAST\n";
open(BF, $blast_filtered) or die ("Can't open $blast_filtered\n");
$nbvariant=0;
while (my $line=<BF>){
	$nbvariant++;
}

print "Variant selected :\t$nbvariant\n";
close (BF);


print "\nSNP selected after mpileup filtering : \t";
open(SNP, $snp_selected) or die ("Can't open $snp_selected\n");
$nbvariant=0;
while (my $line=<SNP>){
	$nbvariant++;
}

print "$nbvariant\n";
close (SNP);