view rapsodyn/rapsosnp_stats4x.pl @ 2:761fecc07fa9 draft

Uploaded
author mcharles
date Thu, 11 Sep 2014 03:10:47 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl
use strict;
use warnings;

my $read1_row = $ARGV[0];
my $read2_row = $ARGV[1];

my $read1_trimmed_part1 = $ARGV[2];
my $read1_trimmed_part2 = $ARGV[3];
my $read1_trimmed_part3 = $ARGV[4];
my $read1_trimmed_part4 = $ARGV[5];
my $read2_trimmed_part1 = $ARGV[6];
my $read2_trimmed_part2 = $ARGV[7];
my $read2_trimmed_part3 = $ARGV[8];
my $read2_trimmed_part4 = $ARGV[9];

my $sam_row_part1 = $ARGV[10];
my $sam_row_part2 = $ARGV[11];
my $sam_row_part3 = $ARGV[12];
my $sam_row_part4 = $ARGV[13];
my $sam_filtered_part1 = $ARGV[14];
my $sam_filtered_part2 = $ARGV[15];
my $sam_filtered_part3 = $ARGV[16];
my $sam_filtered_part4 = $ARGV[17];

my $mpileup_variant = $ARGV[18];

my $list_filtered = $ARGV[19];

my $blast_filtered_part1 = $ARGV[20];
my $blast_filtered_part2 = $ARGV[21];
my $blast_filtered_part3 = $ARGV[22];
my $blast_filtered_part4 = $ARGV[23];

my $snp_selected = $ARGV[24];


open(INR1R, $read1_row) or die ("Can't open $read1_row\n");
my $nbread=0;
my $nbbase =0;
while (my $line1=<INR1R>){
	my $line2 = <INR1R>;
	my $line3 = <INR1R>;
	my $line4 = <INR1R>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
	}
}
print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
close (INR1R);




open(INR2R, $read2_row) or die ("Can't open $read2_row\n");
$nbread=0;
$nbbase =0;
while (my $line1=<INR2R>){
	my $line2 = <INR2R>;
	my $line3 = <INR2R>;
	my $line4 = <INR2R>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
	}
}
print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
close (INR2R);





open(INR1TP1, $read1_trimmed_part1) or die ("Can't open $read1_trimmed_part1\n");
$nbread=0;
$nbbase =0;
while (my $line1=<INR1TP1>){
	my $line2 = <INR1TP1>;
	my $line3 = <INR1TP1>;
	my $line4 = <INR1TP1>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
		else {
			print STDERR "$line1\n$line2\n";
		}
	}
}
close (INR1TP1);
open(INR1TP2, $read1_trimmed_part2) or die ("Can't open $read1_trimmed_part2\n");
while (my $line1=<INR1TP2>){
	my $line2 = <INR1TP2>;
	my $line3 = <INR1TP2>;
	my $line4 = <INR1TP2>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
		else {
			print STDERR "$line1\n$line2\n";
		}
	}
}
close (INR1TP2);
open(INR1TP3, $read1_trimmed_part3) or die ("Can't open $read1_trimmed_part3\n");
while (my $line1=<INR1TP3>){
	my $line2 = <INR1TP3>;
	my $line3 = <INR1TP3>;
	my $line4 = <INR1TP3>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
		else {
			print STDERR "$line1\n$line2\n";
		}
	}
}
close (INR1TP3);
open(INR1TP4, $read1_trimmed_part4) or die ("Can't open $read1_trimmed_part4\n");
while (my $line1=<INR1TP4>){
	my $line2 = <INR1TP4>;
	my $line3 = <INR1TP4>;
	my $line4 = <INR1TP4>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
		else {
			print STDERR "$line1\n$line2\n";
		}
	}
}
close (INR1TP4);
print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";




open(INR2TP1, $read2_trimmed_part1) or die ("Can't open $read2_trimmed_part1\n");
$nbread=0;
$nbbase =0;
while (my $line1=<INR2TP1>){
	my $line2 = <INR2TP1>;
	my $line3 = <INR2TP1>;
	my $line4 = <INR2TP1>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
		else {
			print STDERR "$line1\n$line2\n";
		}
	}
}
close (INR2TP2);
open(INR2TP2, $read2_trimmed_part2) or die ("Can't open $read2_trimmed_part2\n");
while (my $line1=<INR2TP2>){
	my $line2 = <INR2TP2>;
	my $line3 = <INR2TP2>;
	my $line4 = <INR2TP2>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
		else {
			print STDERR "$line1\n$line2\n";
		}
	}
}
close (INR2TP2);
open(INR2TP3, $read2_trimmed_part3) or die ("Can't open $read2_trimmed_part3\n");
while (my $line1=<INR2TP3>){
	my $line2 = <INR2TP3>;
	my $line3 = <INR2TP3>;
	my $line4 = <INR2TP3>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
		else {
			print STDERR "$line1\n$line2\n";
		}
	}
}
close (INR2TP3);
open(INR2TP4, $read2_trimmed_part4) or die ("Can't open $read2_trimmed_part4\n");
while (my $line1=<INR2TP4>){
	my $line2 = <INR2TP4>;
	my $line3 = <INR2TP4>;
	my $line4 = <INR2TP4>;
	if ($line1 =~ /^@/){
		$nbread++;
		if ($line2=~/([ATGCNX]+)/i){
			$nbbase += length($1);
		}
		else {
			print STDERR "$line1\n$line2\n";
		}
	}
}
close (INR2TP4);
print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";




print "\nSAM row\n";
open(SAMP1, $sam_row_part1) or die ("Can't open $sam_row_part1\n");
my %bitscore;
while (my $line=<SAMP1>){
	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
		my @fields = split(/\s+/,$line);
		my $bit = $fields[1];
		if ($bitscore{$bit}){
			$bitscore{$bit}++;
		}
		else {
			$bitscore{$bit}=1;
		}
	}
}
close (SAMP1);
open(SAMP2, $sam_row_part2) or die ("Can't open $sam_row_part2\n");
while (my $line=<SAMP2>){
	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
		my @fields = split(/\s+/,$line);
		my $bit = $fields[1];
		if ($bitscore{$bit}){
			$bitscore{$bit}++;
		}
		else {
			$bitscore{$bit}=1;
		}
	}
}
close (SAMP2);
open(SAMP3, $sam_row_part3) or die ("Can't open $sam_row_part3\n");
while (my $line=<SAMP3>){
	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
		my @fields = split(/\s+/,$line);
		my $bit = $fields[1];
		if ($bitscore{$bit}){
			$bitscore{$bit}++;
		}
		else {
			$bitscore{$bit}=1;
		}
	}
}
close (SAMP3);
open(SAMP4, $sam_row_part4) or die ("Can't open $sam_row_part4\n");
while (my $line=<SAMP4>){
	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
		my @fields = split(/\s+/,$line);
		my $bit = $fields[1];
		if ($bitscore{$bit}){
			$bitscore{$bit}++;
		}
		else {
			$bitscore{$bit}=1;
		}
	}
}
close (SAMP4);
print "bitscore\t";
foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
	print $key,"\t*\t";
}
print "\n";
print " number \t";
foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
	print $bitscore{$key},"\t*\t";
}
print "\n";




print "\nSAM filtered\n";
open(SAMFP1, $sam_filtered_part1) or die ("Can't open $sam_filtered_part1\n");
undef %bitscore;
while (my $line=<SAMFP1>){
	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
		my @fields = split(/\s+/,$line);
		my $bit = $fields[1];
		if ($bitscore{$bit}){
			$bitscore{$bit}++;
		}
		else {
			$bitscore{$bit}=1;
		}
	}
}
close (SAMFP1);
open(SAMFP2, $sam_filtered_part2) or die ("Can't open $sam_filtered_part2\n");
while (my $line=<SAMFP2>){
	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
		my @fields = split(/\s+/,$line);
		my $bit = $fields[1];
		if ($bitscore{$bit}){
			$bitscore{$bit}++;
		}
		else {
			$bitscore{$bit}=1;
		}
	}
}
close (SAMFP2);
open(SAMFP3, $sam_filtered_part3) or die ("Can't open $sam_filtered_part3\n");
while (my $line=<SAMFP3>){
	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
		my @fields = split(/\s+/,$line);
		my $bit = $fields[1];
		if ($bitscore{$bit}){
			$bitscore{$bit}++;
		}
		else {
			$bitscore{$bit}=1;
		}
	}
}
close (SAMFP3);
open(SAMFP4, $sam_filtered_part4) or die ("Can't open $sam_filtered_part4\n");
while (my $line=<SAMFP4>){
	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
		my @fields = split(/\s+/,$line);
		my $bit = $fields[1];
		if ($bitscore{$bit}){
			$bitscore{$bit}++;
		}
		else {
			$bitscore{$bit}=1;
		}
	}
}
close (SAMFP4);
print "bitscore\t";
foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
	print $key,"\t*\t";
}
print "\n";
print " number \t";
foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
	print $bitscore{$key},"\t*\t";
}
print "\n";




print "\nMPILEUP variant\n";
open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n");
my $nbvariant=0;
while (my $line=<MPV>){
	my @fields = split(/\s+/,$line);
	if ($#fields >= 4){
		my $match = $fields[4];
		$match =~ s/\$//g; #the read start at this position
		$match =~ s/\^.//g; #the read end at this position followed by quality char
		if ($match =~/[ACGTNacgtn]+/){
			$nbvariant++;
		}
	}
	else {
		#print STDERR "Erreur : $line\n";
	}
}
print "Variant detected :\t$nbvariant\n";
close (MPV);






print "\nMPILEUP filtered without dubious position\n";
open(LF, $list_filtered) or die ("Can't open $list_filtered\n");
$nbvariant=0;
while (my $line=<LF>){
	$nbvariant++;
}
print "Variant selected :\t$nbvariant\n";
close (LF);





print "\nMPILEUP filtered without dubious position and BLAST\n";
open(BFP1, $blast_filtered_part1) or die ("Can't open $blast_filtered_part1\n");
$nbvariant=0;
while (my $line=<BFP1>){
	$nbvariant++;
}
close (BFP1);
open(BFP2, $blast_filtered_part2) or die ("Can't open $blast_filtered_part2\n");
while (my $line=<BFP2>){
	$nbvariant++;
}
close (BFP2);
open(BFP3, $blast_filtered_part3) or die ("Can't open $blast_filtered_part3\n");
while (my $line=<BFP3>){
	$nbvariant++;
}
close (BFP3);
open(BFP4, $blast_filtered_part4) or die ("Can't open $blast_filtered_part4\n");
while (my $line=<BFP4>){
	$nbvariant++;
}
close (BFP4);
print "Variant selected :\t$nbvariant\n";





print "\nSNP selected after mpileup filtering : \t";
open(SNP, $snp_selected) or die ("Can't open $snp_selected\n");
$nbvariant=0;
while (my $line=<SNP>){
	$nbvariant++;
}

print "$nbvariant\n";
close (SNP);