| 
0
 | 
     1 #!/usr/bin/perl
 | 
| 
 | 
     2 #V1.0.0
 | 
| 
 | 
     3 use strict;
 | 
| 
 | 
     4 use warnings;
 | 
| 
 | 
     5 use Getopt::Long;
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 my $input_pileup_file;
 | 
| 
 | 
     8 my $input_mpileupFiltred_file;
 | 
| 
 | 
     9 my $input_name;
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 GetOptions (
 | 
| 
 | 
    12 "input_name=s" => \$input_name,
 | 
| 
 | 
    13 "input_pileup_file=s" => \$input_pileup_file,
 | 
| 
 | 
    14 "input_mpileupFiltred_file=s" => \$input_mpileupFiltred_file
 | 
| 
 | 
    15 ) or die("Error in command line arguments\n");
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 
 | 
| 
 | 
    18 print "Chrom\tPos\tRef\t$input_name\n";
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 my %mpileupFiltred;
 | 
| 
 | 
    21 open (VU,$input_mpileupFiltred_file) or die ("Can't open mpileupFiltred file\n");
 | 
| 
 | 
    22 while (my $line=<VU>){
 | 
| 
 | 
    23 	if ($line!~/^\s*$/){
 | 
| 
 | 
    24 		my @fields = split (/\t+/,$line);
 | 
| 
 | 
    25 		my $chr;
 | 
| 
 | 
    26 		my $pos;
 | 
| 
 | 
    27 		if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
 | 
| 
 | 
    28 			$chr = $1;
 | 
| 
 | 
    29 		}
 | 
| 
 | 
    30 		else {
 | 
| 
 | 
    31 			print STDERR "Unable to detect chromosome in pileup file\n$line",$fields[0],"\n",$fields[1],"\n";
 | 
| 
 | 
    32 			exit(0);
 | 
| 
 | 
    33 		}
 | 
| 
 | 
    34 		if ($fields[1]=~/^\s*(\d+)\s*$/){
 | 
| 
 | 
    35 			$pos = $1;
 | 
| 
 | 
    36 		}
 | 
| 
 | 
    37 		else {
 | 
| 
 | 
    38 			print STDERR "Unable to detect position in pileup file\n$line",$fields[0],"\n",$fields[1],"\n";
 | 
| 
 | 
    39 			exit(0);
 | 
| 
 | 
    40 		}
 | 
| 
 | 
    41 		if ($#fields>=4){
 | 
| 
 | 
    42 			$mpileupFiltred{"$chr#$pos"}=$fields[4];
 | 
| 
 | 
    43 		}
 | 
| 
 | 
    44 		else {
 | 
| 
 | 
    45 			print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n";	
 | 
| 
 | 
    46 		}
 | 
| 
 | 
    47 	}
 | 
| 
 | 
    48 }
 | 
| 
 | 
    49 close VU;
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 my %covered;
 | 
| 
 | 
    52 
 | 
| 
 | 
    53 my $compt=0;
 | 
| 
 | 
    54 open (PU,$input_pileup_file) or die ("Can't open pileup file\n");
 | 
| 
 | 
    55 while (my $line=<PU>){
 | 
| 
 | 
    56 	#$compt++;
 | 
| 
 | 
    57 	#if ($compt>200000){last;}
 | 
| 
 | 
    58 	if ($line!~/^\s*$/){
 | 
| 
 | 
    59 		my @fields = split (/\t+/,$line);
 | 
| 
 | 
    60 		my $chr;
 | 
| 
 | 
    61 		my $pos;
 | 
| 
 | 
    62 		my $ref;
 | 
| 
 | 
    63 		my $depth;
 | 
| 
 | 
    64 		my $pile;
 | 
| 
 | 
    65 		
 | 
| 
 | 
    66 		if ($#fields>=4){
 | 
| 
 | 
    67 			if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
 | 
| 
 | 
    68 				$chr = $1;
 | 
| 
 | 
    69 			}
 | 
| 
 | 
    70 			else {
 | 
| 
 | 
    71 				print STDERR "Unable to detect chromosome in pileup file\n$line\n";
 | 
| 
 | 
    72 				exit(0);
 | 
| 
 | 
    73 			}
 | 
| 
 | 
    74 			if ($fields[1]=~/^\s*(\d+)\s*$/){
 | 
| 
 | 
    75 				$pos = $1;
 | 
| 
 | 
    76 			}
 | 
| 
 | 
    77 			else {
 | 
| 
 | 
    78 				print STDERR "Unable to detect position in pileup file\n$line\n";
 | 
| 
 | 
    79 				exit(0);
 | 
| 
 | 
    80 			}
 | 
| 
 | 
    81 			
 | 
| 
 | 
    82 			if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){
 | 
| 
 | 
    83 				$ref = $1;
 | 
| 
 | 
    84 			}
 | 
| 
 | 
    85 			else {
 | 
| 
 | 
    86 				print STDERR "Unable to detect reference base in pileup file\n$line\n";
 | 
| 
 | 
    87 				exit(0);
 | 
| 
 | 
    88 			}
 | 
| 
 | 
    89 			
 | 
| 
 | 
    90 			if ($fields[3]=~/^\s*(\d+)\s*$/i){
 | 
| 
 | 
    91 				$depth = $1;
 | 
| 
 | 
    92 			}
 | 
| 
 | 
    93 			else {
 | 
| 
 | 
    94 				print STDERR "Unable to detect depth in pileup file\n$line\n";
 | 
| 
 | 
    95 				exit(0);
 | 
| 
 | 
    96 			}
 | 
| 
 | 
    97 			
 | 
| 
 | 
    98 			$pile = $fields[4];
 | 
| 
 | 
    99 			$pile =~ s/\$//g; #the read start at this position
 | 
| 
 | 
   100 			$pile =~ s/\^.//g; #the read end at this position followed by quality char
 | 
| 
 | 
   101 				
 | 
| 
 | 
   102 			if ($mpileupFiltred{"$chr#$pos"}) {
 | 
| 
 | 
   103 				$pile = $mpileupFiltred{"$chr#$pos"};
 | 
| 
 | 
   104 				$pile =~ s/\$//g; #the read start at this position
 | 
| 
 | 
   105 				$pile =~ s/\^.//g; #the read end at this position followed by quality char
 | 
| 
 | 
   106 				my $ori_pile = $pile;
 | 
| 
 | 
   107 				
 | 
| 
 | 
   108 				my @detail = split(//,$pile);
 | 
| 
 | 
   109 				my $current_in="";
 | 
| 
 | 
   110 				my $current_del="";
 | 
| 
 | 
   111 				my $current_length=0;
 | 
| 
 | 
   112 				my $noindel_pile="";
 | 
| 
 | 
   113 				my @IN;
 | 
| 
 | 
   114 				my @DEL;
 | 
| 
 | 
   115 				
 | 
| 
 | 
   116 				$noindel_pile = $pile;
 | 
| 
 | 
   117 				while ($noindel_pile =~ /\+(\d+)/){
 | 
| 
 | 
   118 					my $length = $1;
 | 
| 
 | 
   119 					$noindel_pile =~ s/(\+\d+[ATGCNX]{$length})//i ;
 | 
| 
 | 
   120 					push(@IN,$1);
 | 
| 
 | 
   121 					#print "test : $pile / $noindel_pile\n";
 | 
| 
 | 
   122 				}
 | 
| 
 | 
   123 				while ($noindel_pile =~ /\-(\d+)/){
 | 
| 
 | 
   124 					my $length = $1;
 | 
| 
 | 
   125 					$noindel_pile =~ s/(\-\d+[ATGCNX]{$length})//i ;
 | 
| 
 | 
   126 					push(@DEL,$1);
 | 
| 
 | 
   127 					#print "test : $pile / $noindel_pile\n";
 | 
| 
 | 
   128 				}
 | 
| 
 | 
   129 				
 | 
| 
 | 
   130 				my %variant_type;
 | 
| 
 | 
   131 				
 | 
| 
 | 
   132 				my $indel_pile = $ori_pile;
 | 
| 
 | 
   133 				$variant_type{"IN"} = ($indel_pile =~ s/\+//gi);
 | 
| 
 | 
   134 				$variant_type{"DEL"} = ($indel_pile =~ s/\-//gi);
 | 
| 
 | 
   135 				
 | 
| 
 | 
   136 				my $base_pile = $noindel_pile;
 | 
| 
 | 
   137 				$variant_type{"A"} = ($base_pile =~ s/a//gi);
 | 
| 
 | 
   138 				$variant_type{"T"} = ($base_pile =~ s/t//gi);
 | 
| 
 | 
   139 				$variant_type{"C"} = ($base_pile =~ s/c//gi);
 | 
| 
 | 
   140 				$variant_type{"G"} = ($base_pile =~ s/g//gi);
 | 
| 
 | 
   141 				$variant_type{"N"} = ($base_pile =~ s/n//gi);
 | 
| 
 | 
   142 				
 | 
| 
 | 
   143 				my $top_number=0;
 | 
| 
 | 
   144 				my $top_variant="";
 | 
| 
 | 
   145 				foreach my $key (sort{$variant_type{$b}<=>$variant_type{$a}} keys %variant_type){
 | 
| 
 | 
   146 					if ($variant_type{$key} > 0){
 | 
| 
 | 
   147 						if ($variant_type{$key} > $top_number){
 | 
| 
 | 
   148 							$top_number = $variant_type{$key};
 | 
| 
 | 
   149 							$top_variant = $key;
 | 
| 
 | 
   150 						}
 | 
| 
 | 
   151 						elsif ($variant_type{$key} == $top_number){
 | 
| 
 | 
   152 							$top_variant .= "/$key";
 | 
| 
 | 
   153 						}
 | 
| 
 | 
   154 					}
 | 
| 
 | 
   155 					
 | 
| 
 | 
   156 				}
 | 
| 
 | 
   157 				print "$chr\t$pos\t$ref\t$top_variant\n";
 | 
| 
 | 
   158 				
 | 
| 
 | 
   159 			}
 | 
| 
 | 
   160 			elsif (($pile=~/[\+\-]/)||($pile=~/[ATGCN]/i)){
 | 
| 
 | 
   161 				print "$chr\t$pos\t$ref\tX\n";
 | 
| 
 | 
   162 			}
 | 
| 
 | 
   163 			else {
 | 
| 
 | 
   164 				print "$chr\t$pos\t$ref\t$ref\n";
 | 
| 
 | 
   165 			}
 | 
| 
 | 
   166 		}
 | 
| 
 | 
   167 		else {
 | 
| 
 | 
   168 			print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n";
 | 
| 
 | 
   169 		}
 | 
| 
 | 
   170 	}
 | 
| 
 | 
   171 }
 | 
| 
 | 
   172 close PU;
 | 
| 
 | 
   173 
 | 
| 
 | 
   174 
 |