Mercurial > repos > mcharles > rapsosnp
view rapsodyn/PileupVariant.pl @ 15:56d328bce3a7 draft default tip
Uploaded
author | mcharles |
---|---|
date | Thu, 29 Jan 2015 08:54:06 -0500 |
parents | 0a6c1cfe4dc8 |
children |
line wrap: on
line source
#!/usr/bin/perl #V1.0.1 added log, option parameters use strict; use warnings; use Getopt::Long; #v1.1.0 choose exclusion file #v1.0.2 manage empty files #v1.0.1 bug correction #V1.0.1 added log, option parameters my $input_pileup_file; my $output_pileup_file; my $input_exclusion_file; my $log_file; my $nb_base_covered=0; my $nb_variant=0; my $nb_variant_conserved=0; my $do_exclusion=0; my $nb_exclusion=0; my $nb_excluded=0; my %exclusion; GetOptions ( "input_pileup_file=s" => \$input_pileup_file, "input_exclusion_file=s" => \$input_exclusion_file, "log_file=s" => \$log_file ) or die("Error in command line arguments\n"); open(IN, $input_pileup_file) or die ("Can't open $input_pileup_file\n"); if ( -z IN){ exit(0); } if ($input_exclusion_file){ open (EX,$input_exclusion_file) or die ("Can't open $input_exclusion_file\n"); if ( -z EX){ } else { $do_exclusion=1; while (my $line=<EX>){ $nb_exclusion++; chomp($line); my @fields = split(/\s+/,$line); if (($fields[0])&&($fields[1])){ $exclusion{"$fields[0]\t$fields[1]"}=1; } else { print STDERR "Error formatting in Exclusion File\nExclusion file lines should be 'chromosome number' TAB 'position'\n$line\n",$fields[0],"\n",$fields[1],"\n"; } } } close (EX); } #Extraction des variants my $nb_line=0; while (my $line=<IN>){ #print $line; if ($line !~ /^\s*$/){ my @fields = split(/\s+/,$line); if ($fields[4]){ $nb_base_covered++; my $pile = $fields[4]; $pile =~ s/\$//g; #the read start at this position $pile =~ s/\^.//g; #the read end at this position followed by quality char if ($fields[4]=~/[ATGCN]/i){ #Indel are +/-\d[ATGCatgc]+ and SNP are [ATGCatgc]+ so [ATGCN]/i detection cover indel and snp $nb_variant++; if (($do_exclusion==1)&&($exclusion{"$fields[0]\t$fields[1]"})){ } else { print $line; $nb_variant_conserved++; } } } elsif ($fields[3]==0){ } else { print STDERR "Error in pileup format\nPileup result expected at column 5\n$line\n"; } } #print $line; } close(IN); open (LF,">$log_file") or die("Can't open $log_file\n"); print LF "\n####\t Variant extraction \n"; print LF "Position covered\t:\t$nb_base_covered\n"; print LF "Variant detected\t:\t$nb_variant\n"; print LF "Variant conserved\t:\t$nb_variant_conserved\n"; close (LF);