10
|
1 #!/usr/bin/perl
|
|
2 #V1.0.0
|
|
3 use strict;
|
|
4 use warnings;
|
|
5 use Getopt::Long;
|
|
6
|
|
7 my $input_pileup_file;
|
|
8 my $input_variant_unique_file;
|
14
|
9 my $input_name;
|
10
|
10
|
|
11 GetOptions (
|
14
|
12 "input_name=s" => \$input_name,
|
10
|
13 "input_pileup_file=s" => \$input_pileup_file,
|
|
14 "input_variant_unique_file=s" => \$input_variant_unique_file
|
|
15 ) or die("Error in command line arguments\n");
|
|
16
|
|
17
|
14
|
18 print "Chrom\tPos\tRef\t$input_name\n";
|
10
|
19
|
14
|
20 my %variant_unique;
|
|
21 open (VU,$input_variant_unique_file) or die ("Can't open variant unique file\n");
|
|
22 while (my $line=<VU>){
|
|
23 if ($line!~/^\s*$/){
|
|
24 my @fields = split (/\t+/,$line);
|
|
25 my $chr;
|
|
26 my $pos;
|
|
27 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
|
|
28 $chr = $1;
|
|
29 }
|
|
30 else {
|
|
31 print STDERR "Unable to detect chromosome in pileup file\n$line",$fields[0],"\n",$fields[1],"\n";
|
|
32 exit(0);
|
|
33 }
|
|
34 if ($fields[1]=~/^\s*(\d+)\s*$/){
|
|
35 $pos = $1;
|
|
36 }
|
|
37 else {
|
|
38 print STDERR "Unable to detect position in pileup file\n$line",$fields[0],"\n",$fields[1],"\n";
|
|
39 exit(0);
|
|
40 }
|
|
41 if ($#fields>=4){
|
|
42 $variant_unique{"$chr#$pos"}=$fields[4];
|
|
43 }
|
|
44 else {
|
|
45 print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n";
|
|
46 }
|
|
47 }
|
|
48 }
|
|
49 close VU;
|
|
50
|
|
51 my %covered;
|
10
|
52
|
14
|
53 my $compt=0;
|
|
54 open (PU,$input_pileup_file) or die ("Can't open pileup file\n");
|
|
55 while (my $line=<PU>){
|
|
56 #$compt++;
|
|
57 #if ($compt>200000){last;}
|
|
58 if ($line!~/^\s*$/){
|
|
59 my @fields = split (/\t+/,$line);
|
|
60 my $chr;
|
|
61 my $pos;
|
|
62 my $ref;
|
|
63 my $depth;
|
|
64 my $pile;
|
|
65
|
|
66 if ($#fields>=4){
|
|
67 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
|
|
68 $chr = $1;
|
|
69 }
|
|
70 else {
|
|
71 print STDERR "Unable to detect chromosome in pileup file\n$line\n";
|
|
72 exit(0);
|
|
73 }
|
|
74 if ($fields[1]=~/^\s*(\d+)\s*$/){
|
|
75 $pos = $1;
|
|
76 }
|
|
77 else {
|
|
78 print STDERR "Unable to detect position in pileup file\n$line\n";
|
|
79 exit(0);
|
|
80 }
|
|
81
|
|
82 if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){
|
|
83 $ref = $1;
|
|
84 }
|
|
85 else {
|
|
86 print STDERR "Unable to detect reference base in pileup file\n$line\n";
|
|
87 exit(0);
|
|
88 }
|
|
89
|
|
90 if ($fields[3]=~/^\s*(\d+)\s*$/i){
|
|
91 $depth = $1;
|
|
92 }
|
|
93 else {
|
|
94 print STDERR "Unable to detect depth in pileup file\n$line\n";
|
|
95 exit(0);
|
|
96 }
|
|
97
|
|
98 $pile = $fields[4];
|
|
99
|
|
100
|
|
101 if ($variant_unique{"$chr#$pos"}) {
|
|
102 $pile = $variant_unique{"$chr#$pos"};
|
|
103 $pile =~ s/\$//g; #the read start at this position
|
|
104 $pile =~ s/\^.//g; #the read end at this position followed by quality char
|
|
105 my $ori_pile = $pile;
|
|
106
|
|
107 my @detail = split(//,$pile);
|
|
108 my $current_in="";
|
|
109 my $current_del="";
|
|
110 my $current_length=0;
|
|
111 my $noindel_pile="";
|
|
112 my @IN;
|
|
113 my @DEL;
|
|
114
|
|
115 $noindel_pile = $pile;
|
|
116 while ($noindel_pile =~ /\+(\d+)/){
|
|
117 my $length = $1;
|
|
118 $noindel_pile =~ s/(\+\d+[ATGCNX]{$length})//i ;
|
|
119 push(@IN,$1);
|
|
120 #print "test : $pile / $noindel_pile\n";
|
|
121 }
|
|
122 while ($noindel_pile =~ /\-(\d+)/){
|
|
123 my $length = $1;
|
|
124 $noindel_pile =~ s/(\-\d+[ATGCNX]{$length})//i ;
|
|
125 push(@DEL,$1);
|
|
126 #print "test : $pile / $noindel_pile\n";
|
|
127 }
|
|
128
|
|
129 my %variant_type;
|
|
130
|
|
131 my $indel_pile = $ori_pile;
|
|
132 $variant_type{"IN"} = ($indel_pile =~ s/\+//gi);
|
|
133 $variant_type{"DEL"} = ($indel_pile =~ s/\-//gi);
|
|
134
|
|
135 my $base_pile = $noindel_pile;
|
|
136 $variant_type{"A"} = ($base_pile =~ s/a//gi);
|
|
137 $variant_type{"T"} = ($base_pile =~ s/t//gi);
|
|
138 $variant_type{"C"} = ($base_pile =~ s/c//gi);
|
|
139 $variant_type{"G"} = ($base_pile =~ s/g//gi);
|
|
140 $variant_type{"N"} = ($base_pile =~ s/n//gi);
|
|
141
|
|
142 my $top_number=0;
|
|
143 my $top_variant="";
|
|
144 foreach my $key (sort{$variant_type{$b}<=>$variant_type{$a}} keys %variant_type){
|
|
145 if ($variant_type{$key} > 0){
|
|
146 if ($variant_type{$key} > $top_number){
|
|
147 $top_number = $variant_type{$key};
|
|
148 $top_variant = $key;
|
|
149 }
|
|
150 elsif ($variant_type{$key} == $top_number){
|
|
151 $top_variant .= "/$key";
|
|
152 }
|
|
153 }
|
|
154
|
|
155 }
|
|
156 print "$chr\t$pos\t$ref\t$top_variant\n";
|
|
157
|
|
158 }
|
|
159 else {
|
|
160 print "$chr\t$pos\t$ref\t$ref\n";
|
|
161 }
|
|
162 }
|
|
163 else {
|
|
164 print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n";
|
|
165 }
|
|
166 }
|
|
167 }
|
|
168 close PU;
|
|
169
|
|
170
|