Mercurial > repos > mcharles > rapsosnp
diff rapsodyn/rapsosnp_stats2x.pl @ 2:761fecc07fa9 draft
Uploaded
author | mcharles |
---|---|
date | Thu, 11 Sep 2014 03:10:47 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/rapsosnp_stats2x.pl Thu Sep 11 03:10:47 2014 -0400 @@ -0,0 +1,307 @@ +#!/usr/bin/perl +use strict; +use warnings; + +my $read1_row = $ARGV[0]; +my $read2_row = $ARGV[1]; + +my $read1_trimmed_part1 = $ARGV[2]; +my $read1_trimmed_part2 = $ARGV[3]; +my $read2_trimmed_part1 = $ARGV[4]; +my $read2_trimmed_part2 = $ARGV[5]; + +my $sam_row_part1 = $ARGV[6]; +my $sam_row_part2 = $ARGV[7]; +my $sam_filtered_part1 = $ARGV[8]; +my $sam_filtered_part2 = $ARGV[9]; + +my $mpileup_variant = $ARGV[10]; + +my $list_filtered = $ARGV[11]; + +my $blast_filtered_part1 = $ARGV[12]; +my $blast_filtered_part2 = $ARGV[13]; + +my $snp_selected = $ARGV[14]; + + +open(INR1R, $read1_row) or die ("Can't open $read1_row\n"); +my $nbread=0; +my $nbbase =0; +while (my $line1=<INR1R>){ + my $line2 = <INR1R>; + my $line3 = <INR1R>; + my $line4 = <INR1R>; + if ($line1 =~ /^@/){ + $nbread++; + if ($line2=~/([ATGCNX]+)/i){ + $nbbase += length($1); + } + } +} +print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; +close (INR1R); + + + + +open(INR2R, $read2_row) or die ("Can't open $read2_row\n"); +$nbread=0; +$nbbase =0; +while (my $line1=<INR2R>){ + my $line2 = <INR2R>; + my $line3 = <INR2R>; + my $line4 = <INR2R>; + if ($line1 =~ /^@/){ + $nbread++; + if ($line2=~/([ATGCNX]+)/i){ + $nbbase += length($1); + } + } +} +print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; +close (INR2R); + + + + + +open(INR1TP1, $read1_trimmed_part1) or die ("Can't open $read1_trimmed_part1\n"); +$nbread=0; +$nbbase =0; +while (my $line1=<INR1TP1>){ + my $line2 = <INR1TP1>; + my $line3 = <INR1TP1>; + my $line4 = <INR1TP1>; + if ($line1 =~ /^@/){ + $nbread++; + if ($line2=~/([ATGCNX]+)/i){ + $nbbase += length($1); + } + else { + print STDERR "$line1\n$line2\n"; + } + } +} +close (INR1TP1); +open(INR1TP2, $read1_trimmed_part2) or die ("Can't open $read1_trimmed_part2\n"); +while (my $line1=<INR1TP2>){ + my $line2 = <INR1TP2>; + my $line3 = <INR1TP2>; + my $line4 = <INR1TP2>; + if ($line1 =~ /^@/){ + $nbread++; + if ($line2=~/([ATGCNX]+)/i){ + $nbbase += length($1); + } + else { + print STDERR "$line1\n$line2\n"; + } + } +} +close (INR1TP2); +print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; + + + + +open(INR2TP1, $read2_trimmed_part1) or die ("Can't open $read2_trimmed_part1\n"); +$nbread=0; +$nbbase =0; +while (my $line1=<INR2TP1>){ + my $line2 = <INR2TP1>; + my $line3 = <INR2TP1>; + my $line4 = <INR2TP1>; + if ($line1 =~ /^@/){ + $nbread++; + if ($line2=~/([ATGCNX]+)/i){ + $nbbase += length($1); + } + else { + print STDERR "$line1\n$line2\n"; + } + } +} +close (INR2TP2); +open(INR2TP2, $read2_trimmed_part2) or die ("Can't open $read2_trimmed_part2\n"); +while (my $line1=<INR2TP2>){ + my $line2 = <INR2TP2>; + my $line3 = <INR2TP2>; + my $line4 = <INR2TP2>; + if ($line1 =~ /^@/){ + $nbread++; + if ($line2=~/([ATGCNX]+)/i){ + $nbbase += length($1); + } + else { + print STDERR "$line1\n$line2\n"; + } + } +} +close (INR2TP2); +print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n"; + + + + +print "\nSAM row\n"; +open(SAMP1, $sam_row_part1) or die ("Can't open $sam_row_part1\n"); +my %bitscore; +while (my $line=<SAMP1>){ + if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ + my @fields = split(/\s+/,$line); + my $bit = $fields[1]; + if ($bitscore{$bit}){ + $bitscore{$bit}++; + } + else { + $bitscore{$bit}=1; + } + } +} +close (SAMP1); +open(SAMP2, $sam_row_part2) or die ("Can't open $sam_row_part2\n"); +while (my $line=<SAMP2>){ + if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ + my @fields = split(/\s+/,$line); + my $bit = $fields[1]; + if ($bitscore{$bit}){ + $bitscore{$bit}++; + } + else { + $bitscore{$bit}=1; + } + } +} +close (SAMP2); +print "bitscore\t"; +foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { + print $key,"\t*\t"; +} +print "\n"; +print " number \t"; +foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { + print $bitscore{$key},"\t*\t"; +} +print "\n"; + + + + +print "\nSAM filtered\n"; +open(SAMFP1, $sam_filtered_part1) or die ("Can't open $sam_filtered_part1\n"); +undef %bitscore; +while (my $line=<SAMFP1>){ + if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ + my @fields = split(/\s+/,$line); + my $bit = $fields[1]; + if ($bitscore{$bit}){ + $bitscore{$bit}++; + } + else { + $bitscore{$bit}=1; + } + } +} +close (SAMFP1); +open(SAMFP2, $sam_filtered_part2) or die ("Can't open $sam_filtered_part2\n"); +while (my $line=<SAMFP2>){ + if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){ + my @fields = split(/\s+/,$line); + my $bit = $fields[1]; + if ($bitscore{$bit}){ + $bitscore{$bit}++; + } + else { + $bitscore{$bit}=1; + } + } +} +close (SAMFP2); +print "bitscore\t"; +foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { + print $key,"\t*\t"; +} +print "\n"; +print " number \t"; +foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) { + print $bitscore{$key},"\t*\t"; +} +print "\n"; + + + + +print "\nMPILEUP variant\n"; +open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n"); +my $nbvariant=0; +while (my $line=<MPV>){ + my @fields = split(/\s+/,$line); + if ($#fields >= 4){ + my $match = $fields[4]; + $match =~ s/\$//g; #the read start at this position + $match =~ s/\^.//g; #the read end at this position followed by quality char + if ($match =~/[ACGTNacgtn]+/){ + $nbvariant++; + } + } + else { + #print STDERR "Erreur : $line\n"; + } +} +print "Variant detected :\t$nbvariant\n"; +close (MPV); + + + + + + +print "\nMPILEUP filtered without dubious position\n"; +open(LF, $list_filtered) or die ("Can't open $list_filtered\n"); +$nbvariant=0; +while (my $line=<LF>){ + $nbvariant++; +} +print "Variant selected :\t$nbvariant\n"; +close (LF); + + + + + +print "\nMPILEUP filtered without dubious position and BLAST\n"; +open(BFP1, $blast_filtered_part1) or die ("Can't open $blast_filtered_part1\n"); +$nbvariant=0; +while (my $line=<BFP1>){ + $nbvariant++; +} +close (BFP1); +open(BFP2, $blast_filtered_part2) or die ("Can't open $blast_filtered_part2\n"); +while (my $line=<BFP2>){ + $nbvariant++; +} +close (BFP2); +print "Variant selected :\t$nbvariant\n"; + + + + + +print "\nSNP selected after mpileup filtering : \t"; +open(SNP, $snp_selected) or die ("Can't open $snp_selected\n"); +$nbvariant=0; +while (my $line=<SNP>){ + $nbvariant++; +} + +print "$nbvariant\n"; +close (SNP); + + + + + + + +