Mercurial > repos > mcharles > rapsosnp
comparison rapsodyn/CreateMatrix.pl @ 14:93e6f2af1ce2 draft
Uploaded
author | mcharles |
---|---|
date | Mon, 26 Jan 2015 18:10:52 -0500 |
parents | 0a6c1cfe4dc8 |
children |
comparison
equal
deleted
inserted
replaced
13:827da1a9a326 | 14:93e6f2af1ce2 |
---|---|
3 use strict; | 3 use strict; |
4 use warnings; | 4 use warnings; |
5 use Getopt::Long; | 5 use Getopt::Long; |
6 | 6 |
7 my $input_pileup_file; | 7 my $input_pileup_file; |
8 my $input_variant_file; | |
9 my $input_variant_unique_file; | 8 my $input_variant_unique_file; |
9 my $input_name; | |
10 | 10 |
11 GetOptions ( | 11 GetOptions ( |
12 "input_name=s" => \$input_name, | |
12 "input_pileup_file=s" => \$input_pileup_file, | 13 "input_pileup_file=s" => \$input_pileup_file, |
13 "input_variant_file=s" => \$input_variant_file, | |
14 "input_variant_unique_file=s" => \$input_variant_unique_file | 14 "input_variant_unique_file=s" => \$input_variant_unique_file |
15 ) or die("Error in command line arguments\n"); | 15 ) or die("Error in command line arguments\n"); |
16 | 16 |
17 | 17 |
18 print "Chrom\tPos\tRef\t$input_name\n"; | |
18 | 19 |
19 print "$input_pileup_file\n$input_variant_file\n$input_variant_unique_file\n"; | 20 my %variant_unique; |
21 open (VU,$input_variant_unique_file) or die ("Can't open variant unique file\n"); | |
22 while (my $line=<VU>){ | |
23 if ($line!~/^\s*$/){ | |
24 my @fields = split (/\t+/,$line); | |
25 my $chr; | |
26 my $pos; | |
27 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ | |
28 $chr = $1; | |
29 } | |
30 else { | |
31 print STDERR "Unable to detect chromosome in pileup file\n$line",$fields[0],"\n",$fields[1],"\n"; | |
32 exit(0); | |
33 } | |
34 if ($fields[1]=~/^\s*(\d+)\s*$/){ | |
35 $pos = $1; | |
36 } | |
37 else { | |
38 print STDERR "Unable to detect position in pileup file\n$line",$fields[0],"\n",$fields[1],"\n"; | |
39 exit(0); | |
40 } | |
41 if ($#fields>=4){ | |
42 $variant_unique{"$chr#$pos"}=$fields[4]; | |
43 } | |
44 else { | |
45 print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n"; | |
46 } | |
47 } | |
48 } | |
49 close VU; | |
20 | 50 |
51 my %covered; | |
52 | |
53 my $compt=0; | |
54 open (PU,$input_pileup_file) or die ("Can't open pileup file\n"); | |
55 while (my $line=<PU>){ | |
56 #$compt++; | |
57 #if ($compt>200000){last;} | |
58 if ($line!~/^\s*$/){ | |
59 my @fields = split (/\t+/,$line); | |
60 my $chr; | |
61 my $pos; | |
62 my $ref; | |
63 my $depth; | |
64 my $pile; | |
65 | |
66 if ($#fields>=4){ | |
67 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ | |
68 $chr = $1; | |
69 } | |
70 else { | |
71 print STDERR "Unable to detect chromosome in pileup file\n$line\n"; | |
72 exit(0); | |
73 } | |
74 if ($fields[1]=~/^\s*(\d+)\s*$/){ | |
75 $pos = $1; | |
76 } | |
77 else { | |
78 print STDERR "Unable to detect position in pileup file\n$line\n"; | |
79 exit(0); | |
80 } | |
81 | |
82 if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){ | |
83 $ref = $1; | |
84 } | |
85 else { | |
86 print STDERR "Unable to detect reference base in pileup file\n$line\n"; | |
87 exit(0); | |
88 } | |
89 | |
90 if ($fields[3]=~/^\s*(\d+)\s*$/i){ | |
91 $depth = $1; | |
92 } | |
93 else { | |
94 print STDERR "Unable to detect depth in pileup file\n$line\n"; | |
95 exit(0); | |
96 } | |
97 | |
98 $pile = $fields[4]; | |
99 | |
100 | |
101 if ($variant_unique{"$chr#$pos"}) { | |
102 $pile = $variant_unique{"$chr#$pos"}; | |
103 $pile =~ s/\$//g; #the read start at this position | |
104 $pile =~ s/\^.//g; #the read end at this position followed by quality char | |
105 my $ori_pile = $pile; | |
106 | |
107 my @detail = split(//,$pile); | |
108 my $current_in=""; | |
109 my $current_del=""; | |
110 my $current_length=0; | |
111 my $noindel_pile=""; | |
112 my @IN; | |
113 my @DEL; | |
114 | |
115 $noindel_pile = $pile; | |
116 while ($noindel_pile =~ /\+(\d+)/){ | |
117 my $length = $1; | |
118 $noindel_pile =~ s/(\+\d+[ATGCNX]{$length})//i ; | |
119 push(@IN,$1); | |
120 #print "test : $pile / $noindel_pile\n"; | |
121 } | |
122 while ($noindel_pile =~ /\-(\d+)/){ | |
123 my $length = $1; | |
124 $noindel_pile =~ s/(\-\d+[ATGCNX]{$length})//i ; | |
125 push(@DEL,$1); | |
126 #print "test : $pile / $noindel_pile\n"; | |
127 } | |
128 | |
129 my %variant_type; | |
130 | |
131 my $indel_pile = $ori_pile; | |
132 $variant_type{"IN"} = ($indel_pile =~ s/\+//gi); | |
133 $variant_type{"DEL"} = ($indel_pile =~ s/\-//gi); | |
134 | |
135 my $base_pile = $noindel_pile; | |
136 $variant_type{"A"} = ($base_pile =~ s/a//gi); | |
137 $variant_type{"T"} = ($base_pile =~ s/t//gi); | |
138 $variant_type{"C"} = ($base_pile =~ s/c//gi); | |
139 $variant_type{"G"} = ($base_pile =~ s/g//gi); | |
140 $variant_type{"N"} = ($base_pile =~ s/n//gi); | |
141 | |
142 my $top_number=0; | |
143 my $top_variant=""; | |
144 foreach my $key (sort{$variant_type{$b}<=>$variant_type{$a}} keys %variant_type){ | |
145 if ($variant_type{$key} > 0){ | |
146 if ($variant_type{$key} > $top_number){ | |
147 $top_number = $variant_type{$key}; | |
148 $top_variant = $key; | |
149 } | |
150 elsif ($variant_type{$key} == $top_number){ | |
151 $top_variant .= "/$key"; | |
152 } | |
153 } | |
154 | |
155 } | |
156 print "$chr\t$pos\t$ref\t$top_variant\n"; | |
157 | |
158 } | |
159 else { | |
160 print "$chr\t$pos\t$ref\t$ref\n"; | |
161 } | |
162 } | |
163 else { | |
164 print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n"; | |
165 } | |
166 } | |
167 } | |
168 close PU; | |
169 | |
170 |