Mercurial > repos > mcharles > rapsosnp
diff rapsodyn/PileupVariant.pl @ 10:0a6c1cfe4dc8 draft
Uploaded
author | mcharles |
---|---|
date | Mon, 19 Jan 2015 04:33:21 -0500 |
parents | 3f7b0788a1c4 |
children |
line wrap: on
line diff
--- a/rapsodyn/PileupVariant.pl Mon Oct 20 05:58:31 2014 -0400 +++ b/rapsodyn/PileupVariant.pl Mon Jan 19 04:33:21 2015 -0500 @@ -3,39 +3,94 @@ use strict; use warnings; use Getopt::Long; +#v1.1.0 choose exclusion file +#v1.0.2 manage empty files +#v1.0.1 bug correction +#V1.0.1 added log, option parameters my $input_pileup_file; my $output_pileup_file; +my $input_exclusion_file; my $log_file; my $nb_base_covered=0; my $nb_variant=0; +my $nb_variant_conserved=0; +my $do_exclusion=0; +my $nb_exclusion=0; +my $nb_excluded=0; +my %exclusion; GetOptions ( "input_pileup_file=s" => \$input_pileup_file, +"input_exclusion_file=s" => \$input_exclusion_file, "log_file=s" => \$log_file ) or die("Error in command line arguments\n"); open(IN, $input_pileup_file) or die ("Can't open $input_pileup_file\n"); +if ( -z IN){ + exit(0); +} + +if ($input_exclusion_file){ + open (EX,$input_exclusion_file) or die ("Can't open $input_exclusion_file\n"); + if ( -z EX){ + + } + else { + $do_exclusion=1; + while (my $line=<EX>){ + $nb_exclusion++; + chomp($line); + my @fields = split(/\s+/,$line); + if (($fields[0])&&($fields[1])){ + $exclusion{"$fields[0]\t$fields[1]"}=1; + } + else { + print STDERR "Error formatting in Exclusion File\nExclusion file lines should be 'chromosome number' TAB 'position'\n$line\n",$fields[0],"\n",$fields[1],"\n"; + } + } + + } + close (EX); +} #Extraction des variants my $nb_line=0; while (my $line=<IN>){ #print $line; - $nb_base_covered++; - $line =~ s/\$//g; #the read start at this position - $line =~ s/\^.//g; #the read end at this position followed by quality char + if ($line !~ /^\s*$/){ + my @fields = split(/\s+/,$line); + if ($fields[4]){ + $nb_base_covered++; + my $pile = $fields[4]; + $pile =~ s/\$//g; #the read start at this position + $pile =~ s/\^.//g; #the read end at this position followed by quality char + if ($fields[4]=~/[ATGCN]/i){ #Indel are +/-\d[ATGCatgc]+ and SNP are [ATGCatgc]+ so [ATGCN]/i detection cover indel and snp + $nb_variant++; + if (($do_exclusion==1)&&($exclusion{"$fields[0]\t$fields[1]"})){ + + } + else { + print $line; + $nb_variant_conserved++; + } + } + } + elsif ($fields[3]==0){ + } + else { + print STDERR "Error in pileup format\nPileup result expected at column 5\n$line\n"; + } + } #print $line; - my @field = split(/\s+/,$line); - if ($field[4]=~/[ATGCN]/i){ - print $line; - $nb_variant++; - } + } close(IN); open (LF,">$log_file") or die("Can't open $log_file\n"); print LF "\n####\t Variant extraction \n"; -print LF "Position covered :\t$nb_base_covered\n"; -print LF "Variant detected :\t$nb_variant\n"; +print LF "Position covered\t:\t$nb_base_covered\n"; +print LF "Variant detected\t:\t$nb_variant\n"; +print LF "Variant conserved\t:\t$nb_variant_conserved\n"; close (LF);