diff rapsodyn/rapsosnp_stats4x.pl @ 2:761fecc07fa9 draft

Uploaded
author mcharles
date Thu, 11 Sep 2014 03:10:47 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rapsodyn/rapsosnp_stats4x.pl	Thu Sep 11 03:10:47 2014 -0400
@@ -0,0 +1,447 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+my $read1_row = $ARGV[0];
+my $read2_row = $ARGV[1];
+
+my $read1_trimmed_part1 = $ARGV[2];
+my $read1_trimmed_part2 = $ARGV[3];
+my $read1_trimmed_part3 = $ARGV[4];
+my $read1_trimmed_part4 = $ARGV[5];
+my $read2_trimmed_part1 = $ARGV[6];
+my $read2_trimmed_part2 = $ARGV[7];
+my $read2_trimmed_part3 = $ARGV[8];
+my $read2_trimmed_part4 = $ARGV[9];
+
+my $sam_row_part1 = $ARGV[10];
+my $sam_row_part2 = $ARGV[11];
+my $sam_row_part3 = $ARGV[12];
+my $sam_row_part4 = $ARGV[13];
+my $sam_filtered_part1 = $ARGV[14];
+my $sam_filtered_part2 = $ARGV[15];
+my $sam_filtered_part3 = $ARGV[16];
+my $sam_filtered_part4 = $ARGV[17];
+
+my $mpileup_variant = $ARGV[18];
+
+my $list_filtered = $ARGV[19];
+
+my $blast_filtered_part1 = $ARGV[20];
+my $blast_filtered_part2 = $ARGV[21];
+my $blast_filtered_part3 = $ARGV[22];
+my $blast_filtered_part4 = $ARGV[23];
+
+my $snp_selected = $ARGV[24];
+
+
+open(INR1R, $read1_row) or die ("Can't open $read1_row\n");
+my $nbread=0;
+my $nbbase =0;
+while (my $line1=<INR1R>){
+	my $line2 = <INR1R>;
+	my $line3 = <INR1R>;
+	my $line4 = <INR1R>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+	}
+}
+print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
+close (INR1R);
+
+
+
+
+open(INR2R, $read2_row) or die ("Can't open $read2_row\n");
+$nbread=0;
+$nbbase =0;
+while (my $line1=<INR2R>){
+	my $line2 = <INR2R>;
+	my $line3 = <INR2R>;
+	my $line4 = <INR2R>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+	}
+}
+print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
+close (INR2R);
+
+
+
+
+
+open(INR1TP1, $read1_trimmed_part1) or die ("Can't open $read1_trimmed_part1\n");
+$nbread=0;
+$nbbase =0;
+while (my $line1=<INR1TP1>){
+	my $line2 = <INR1TP1>;
+	my $line3 = <INR1TP1>;
+	my $line4 = <INR1TP1>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+		else {
+			print STDERR "$line1\n$line2\n";
+		}
+	}
+}
+close (INR1TP1);
+open(INR1TP2, $read1_trimmed_part2) or die ("Can't open $read1_trimmed_part2\n");
+while (my $line1=<INR1TP2>){
+	my $line2 = <INR1TP2>;
+	my $line3 = <INR1TP2>;
+	my $line4 = <INR1TP2>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+		else {
+			print STDERR "$line1\n$line2\n";
+		}
+	}
+}
+close (INR1TP2);
+open(INR1TP3, $read1_trimmed_part3) or die ("Can't open $read1_trimmed_part3\n");
+while (my $line1=<INR1TP3>){
+	my $line2 = <INR1TP3>;
+	my $line3 = <INR1TP3>;
+	my $line4 = <INR1TP3>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+		else {
+			print STDERR "$line1\n$line2\n";
+		}
+	}
+}
+close (INR1TP3);
+open(INR1TP4, $read1_trimmed_part4) or die ("Can't open $read1_trimmed_part4\n");
+while (my $line1=<INR1TP4>){
+	my $line2 = <INR1TP4>;
+	my $line3 = <INR1TP4>;
+	my $line4 = <INR1TP4>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+		else {
+			print STDERR "$line1\n$line2\n";
+		}
+	}
+}
+close (INR1TP4);
+print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
+
+
+
+
+open(INR2TP1, $read2_trimmed_part1) or die ("Can't open $read2_trimmed_part1\n");
+$nbread=0;
+$nbbase =0;
+while (my $line1=<INR2TP1>){
+	my $line2 = <INR2TP1>;
+	my $line3 = <INR2TP1>;
+	my $line4 = <INR2TP1>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+		else {
+			print STDERR "$line1\n$line2\n";
+		}
+	}
+}
+close (INR2TP2);
+open(INR2TP2, $read2_trimmed_part2) or die ("Can't open $read2_trimmed_part2\n");
+while (my $line1=<INR2TP2>){
+	my $line2 = <INR2TP2>;
+	my $line3 = <INR2TP2>;
+	my $line4 = <INR2TP2>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+		else {
+			print STDERR "$line1\n$line2\n";
+		}
+	}
+}
+close (INR2TP2);
+open(INR2TP3, $read2_trimmed_part3) or die ("Can't open $read2_trimmed_part3\n");
+while (my $line1=<INR2TP3>){
+	my $line2 = <INR2TP3>;
+	my $line3 = <INR2TP3>;
+	my $line4 = <INR2TP3>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+		else {
+			print STDERR "$line1\n$line2\n";
+		}
+	}
+}
+close (INR2TP3);
+open(INR2TP4, $read2_trimmed_part4) or die ("Can't open $read2_trimmed_part4\n");
+while (my $line1=<INR2TP4>){
+	my $line2 = <INR2TP4>;
+	my $line3 = <INR2TP4>;
+	my $line4 = <INR2TP4>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+		else {
+			print STDERR "$line1\n$line2\n";
+		}
+	}
+}
+close (INR2TP4);
+print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
+
+
+
+
+print "\nSAM row\n";
+open(SAMP1, $sam_row_part1) or die ("Can't open $sam_row_part1\n");
+my %bitscore;
+while (my $line=<SAMP1>){
+	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
+		my @fields = split(/\s+/,$line);
+		my $bit = $fields[1];
+		if ($bitscore{$bit}){
+			$bitscore{$bit}++;
+		}
+		else {
+			$bitscore{$bit}=1;
+		}
+	}
+}
+close (SAMP1);
+open(SAMP2, $sam_row_part2) or die ("Can't open $sam_row_part2\n");
+while (my $line=<SAMP2>){
+	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
+		my @fields = split(/\s+/,$line);
+		my $bit = $fields[1];
+		if ($bitscore{$bit}){
+			$bitscore{$bit}++;
+		}
+		else {
+			$bitscore{$bit}=1;
+		}
+	}
+}
+close (SAMP2);
+open(SAMP3, $sam_row_part3) or die ("Can't open $sam_row_part3\n");
+while (my $line=<SAMP3>){
+	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
+		my @fields = split(/\s+/,$line);
+		my $bit = $fields[1];
+		if ($bitscore{$bit}){
+			$bitscore{$bit}++;
+		}
+		else {
+			$bitscore{$bit}=1;
+		}
+	}
+}
+close (SAMP3);
+open(SAMP4, $sam_row_part4) or die ("Can't open $sam_row_part4\n");
+while (my $line=<SAMP4>){
+	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
+		my @fields = split(/\s+/,$line);
+		my $bit = $fields[1];
+		if ($bitscore{$bit}){
+			$bitscore{$bit}++;
+		}
+		else {
+			$bitscore{$bit}=1;
+		}
+	}
+}
+close (SAMP4);
+print "bitscore\t";
+foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
+	print $key,"\t*\t";
+}
+print "\n";
+print " number \t";
+foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
+	print $bitscore{$key},"\t*\t";
+}
+print "\n";
+
+
+
+
+print "\nSAM filtered\n";
+open(SAMFP1, $sam_filtered_part1) or die ("Can't open $sam_filtered_part1\n");
+undef %bitscore;
+while (my $line=<SAMFP1>){
+	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
+		my @fields = split(/\s+/,$line);
+		my $bit = $fields[1];
+		if ($bitscore{$bit}){
+			$bitscore{$bit}++;
+		}
+		else {
+			$bitscore{$bit}=1;
+		}
+	}
+}
+close (SAMFP1);
+open(SAMFP2, $sam_filtered_part2) or die ("Can't open $sam_filtered_part2\n");
+while (my $line=<SAMFP2>){
+	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
+		my @fields = split(/\s+/,$line);
+		my $bit = $fields[1];
+		if ($bitscore{$bit}){
+			$bitscore{$bit}++;
+		}
+		else {
+			$bitscore{$bit}=1;
+		}
+	}
+}
+close (SAMFP2);
+open(SAMFP3, $sam_filtered_part3) or die ("Can't open $sam_filtered_part3\n");
+while (my $line=<SAMFP3>){
+	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
+		my @fields = split(/\s+/,$line);
+		my $bit = $fields[1];
+		if ($bitscore{$bit}){
+			$bitscore{$bit}++;
+		}
+		else {
+			$bitscore{$bit}=1;
+		}
+	}
+}
+close (SAMFP3);
+open(SAMFP4, $sam_filtered_part4) or die ("Can't open $sam_filtered_part4\n");
+while (my $line=<SAMFP4>){
+	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
+		my @fields = split(/\s+/,$line);
+		my $bit = $fields[1];
+		if ($bitscore{$bit}){
+			$bitscore{$bit}++;
+		}
+		else {
+			$bitscore{$bit}=1;
+		}
+	}
+}
+close (SAMFP4);
+print "bitscore\t";
+foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
+	print $key,"\t*\t";
+}
+print "\n";
+print " number \t";
+foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
+	print $bitscore{$key},"\t*\t";
+}
+print "\n";
+
+
+
+
+print "\nMPILEUP variant\n";
+open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n");
+my $nbvariant=0;
+while (my $line=<MPV>){
+	my @fields = split(/\s+/,$line);
+	if ($#fields >= 4){
+		my $match = $fields[4];
+		$match =~ s/\$//g; #the read start at this position
+		$match =~ s/\^.//g; #the read end at this position followed by quality char
+		if ($match =~/[ACGTNacgtn]+/){
+			$nbvariant++;
+		}
+	}
+	else {
+		#print STDERR "Erreur : $line\n";
+	}
+}
+print "Variant detected :\t$nbvariant\n";
+close (MPV);
+
+
+
+
+
+
+print "\nMPILEUP filtered without dubious position\n";
+open(LF, $list_filtered) or die ("Can't open $list_filtered\n");
+$nbvariant=0;
+while (my $line=<LF>){
+	$nbvariant++;
+}
+print "Variant selected :\t$nbvariant\n";
+close (LF);
+
+
+
+
+
+print "\nMPILEUP filtered without dubious position and BLAST\n";
+open(BFP1, $blast_filtered_part1) or die ("Can't open $blast_filtered_part1\n");
+$nbvariant=0;
+while (my $line=<BFP1>){
+	$nbvariant++;
+}
+close (BFP1);
+open(BFP2, $blast_filtered_part2) or die ("Can't open $blast_filtered_part2\n");
+while (my $line=<BFP2>){
+	$nbvariant++;
+}
+close (BFP2);
+open(BFP3, $blast_filtered_part3) or die ("Can't open $blast_filtered_part3\n");
+while (my $line=<BFP3>){
+	$nbvariant++;
+}
+close (BFP3);
+open(BFP4, $blast_filtered_part4) or die ("Can't open $blast_filtered_part4\n");
+while (my $line=<BFP4>){
+	$nbvariant++;
+}
+close (BFP4);
+print "Variant selected :\t$nbvariant\n";
+
+
+
+
+
+print "\nSNP selected after mpileup filtering : \t";
+open(SNP, $snp_selected) or die ("Can't open $snp_selected\n");
+$nbvariant=0;
+while (my $line=<SNP>){
+	$nbvariant++;
+}
+
+print "$nbvariant\n";
+close (SNP);
+
+
+
+
+
+
+
+