Mercurial > repos > mcharles > rapsosnp
view rapsodyn/CreateMatrix.pl @ 15:56d328bce3a7 draft default tip
Uploaded
author | mcharles |
---|---|
date | Thu, 29 Jan 2015 08:54:06 -0500 |
parents | 93e6f2af1ce2 |
children |
line wrap: on
line source
#!/usr/bin/perl #V1.0.0 use strict; use warnings; use Getopt::Long; my $input_pileup_file; my $input_variant_unique_file; my $input_name; GetOptions ( "input_name=s" => \$input_name, "input_pileup_file=s" => \$input_pileup_file, "input_variant_unique_file=s" => \$input_variant_unique_file ) or die("Error in command line arguments\n"); print "Chrom\tPos\tRef\t$input_name\n"; my %variant_unique; open (VU,$input_variant_unique_file) or die ("Can't open variant unique file\n"); while (my $line=<VU>){ if ($line!~/^\s*$/){ my @fields = split (/\t+/,$line); my $chr; my $pos; if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ $chr = $1; } else { print STDERR "Unable to detect chromosome in pileup file\n$line",$fields[0],"\n",$fields[1],"\n"; exit(0); } if ($fields[1]=~/^\s*(\d+)\s*$/){ $pos = $1; } else { print STDERR "Unable to detect position in pileup file\n$line",$fields[0],"\n",$fields[1],"\n"; exit(0); } if ($#fields>=4){ $variant_unique{"$chr#$pos"}=$fields[4]; } else { print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n"; } } } close VU; my %covered; my $compt=0; open (PU,$input_pileup_file) or die ("Can't open pileup file\n"); while (my $line=<PU>){ #$compt++; #if ($compt>200000){last;} if ($line!~/^\s*$/){ my @fields = split (/\t+/,$line); my $chr; my $pos; my $ref; my $depth; my $pile; if ($#fields>=4){ if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ $chr = $1; } else { print STDERR "Unable to detect chromosome in pileup file\n$line\n"; exit(0); } if ($fields[1]=~/^\s*(\d+)\s*$/){ $pos = $1; } else { print STDERR "Unable to detect position in pileup file\n$line\n"; exit(0); } if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){ $ref = $1; } else { print STDERR "Unable to detect reference base in pileup file\n$line\n"; exit(0); } if ($fields[3]=~/^\s*(\d+)\s*$/i){ $depth = $1; } else { print STDERR "Unable to detect depth in pileup file\n$line\n"; exit(0); } $pile = $fields[4]; if ($variant_unique{"$chr#$pos"}) { $pile = $variant_unique{"$chr#$pos"}; $pile =~ s/\$//g; #the read start at this position $pile =~ s/\^.//g; #the read end at this position followed by quality char my $ori_pile = $pile; my @detail = split(//,$pile); my $current_in=""; my $current_del=""; my $current_length=0; my $noindel_pile=""; my @IN; my @DEL; $noindel_pile = $pile; while ($noindel_pile =~ /\+(\d+)/){ my $length = $1; $noindel_pile =~ s/(\+\d+[ATGCNX]{$length})//i ; push(@IN,$1); #print "test : $pile / $noindel_pile\n"; } while ($noindel_pile =~ /\-(\d+)/){ my $length = $1; $noindel_pile =~ s/(\-\d+[ATGCNX]{$length})//i ; push(@DEL,$1); #print "test : $pile / $noindel_pile\n"; } my %variant_type; my $indel_pile = $ori_pile; $variant_type{"IN"} = ($indel_pile =~ s/\+//gi); $variant_type{"DEL"} = ($indel_pile =~ s/\-//gi); my $base_pile = $noindel_pile; $variant_type{"A"} = ($base_pile =~ s/a//gi); $variant_type{"T"} = ($base_pile =~ s/t//gi); $variant_type{"C"} = ($base_pile =~ s/c//gi); $variant_type{"G"} = ($base_pile =~ s/g//gi); $variant_type{"N"} = ($base_pile =~ s/n//gi); my $top_number=0; my $top_variant=""; foreach my $key (sort{$variant_type{$b}<=>$variant_type{$a}} keys %variant_type){ if ($variant_type{$key} > 0){ if ($variant_type{$key} > $top_number){ $top_number = $variant_type{$key}; $top_variant = $key; } elsif ($variant_type{$key} == $top_number){ $top_variant .= "/$key"; } } } print "$chr\t$pos\t$ref\t$top_variant\n"; } else { print "$chr\t$pos\t$ref\t$ref\n"; } } else { print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n"; } } } close PU;