annotate rapsodyn/CreateMatrix.pl @ 14:93e6f2af1ce2 draft

Uploaded
author mcharles
date Mon, 26 Jan 2015 18:10:52 -0500
parents 0a6c1cfe4dc8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
2 #V1.0.0
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
3 use strict;
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
4 use warnings;
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
5 use Getopt::Long;
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
6
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
7 my $input_pileup_file;
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
8 my $input_variant_unique_file;
14
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
9 my $input_name;
10
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
10
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
11 GetOptions (
14
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
12 "input_name=s" => \$input_name,
10
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
13 "input_pileup_file=s" => \$input_pileup_file,
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
14 "input_variant_unique_file=s" => \$input_variant_unique_file
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
15 ) or die("Error in command line arguments\n");
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
16
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
17
14
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
18 print "Chrom\tPos\tRef\t$input_name\n";
10
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
19
14
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
20 my %variant_unique;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
21 open (VU,$input_variant_unique_file) or die ("Can't open variant unique file\n");
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
22 while (my $line=<VU>){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
23 if ($line!~/^\s*$/){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
24 my @fields = split (/\t+/,$line);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
25 my $chr;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
26 my $pos;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
27 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
28 $chr = $1;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
29 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
30 else {
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
31 print STDERR "Unable to detect chromosome in pileup file\n$line",$fields[0],"\n",$fields[1],"\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
32 exit(0);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
33 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
34 if ($fields[1]=~/^\s*(\d+)\s*$/){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
35 $pos = $1;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
36 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
37 else {
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
38 print STDERR "Unable to detect position in pileup file\n$line",$fields[0],"\n",$fields[1],"\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
39 exit(0);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
40 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
41 if ($#fields>=4){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
42 $variant_unique{"$chr#$pos"}=$fields[4];
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
43 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
44 else {
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
45 print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
46 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
47 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
48 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
49 close VU;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
50
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
51 my %covered;
10
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
52
14
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
53 my $compt=0;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
54 open (PU,$input_pileup_file) or die ("Can't open pileup file\n");
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
55 while (my $line=<PU>){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
56 #$compt++;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
57 #if ($compt>200000){last;}
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
58 if ($line!~/^\s*$/){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
59 my @fields = split (/\t+/,$line);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
60 my $chr;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
61 my $pos;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
62 my $ref;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
63 my $depth;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
64 my $pile;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
65
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
66 if ($#fields>=4){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
67 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
68 $chr = $1;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
69 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
70 else {
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
71 print STDERR "Unable to detect chromosome in pileup file\n$line\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
72 exit(0);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
73 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
74 if ($fields[1]=~/^\s*(\d+)\s*$/){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
75 $pos = $1;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
76 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
77 else {
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
78 print STDERR "Unable to detect position in pileup file\n$line\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
79 exit(0);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
80 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
81
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
82 if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
83 $ref = $1;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
84 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
85 else {
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
86 print STDERR "Unable to detect reference base in pileup file\n$line\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
87 exit(0);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
88 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
89
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
90 if ($fields[3]=~/^\s*(\d+)\s*$/i){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
91 $depth = $1;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
92 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
93 else {
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
94 print STDERR "Unable to detect depth in pileup file\n$line\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
95 exit(0);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
96 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
97
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
98 $pile = $fields[4];
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
99
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
100
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
101 if ($variant_unique{"$chr#$pos"}) {
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
102 $pile = $variant_unique{"$chr#$pos"};
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
103 $pile =~ s/\$//g; #the read start at this position
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
104 $pile =~ s/\^.//g; #the read end at this position followed by quality char
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
105 my $ori_pile = $pile;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
106
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
107 my @detail = split(//,$pile);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
108 my $current_in="";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
109 my $current_del="";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
110 my $current_length=0;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
111 my $noindel_pile="";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
112 my @IN;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
113 my @DEL;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
114
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
115 $noindel_pile = $pile;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
116 while ($noindel_pile =~ /\+(\d+)/){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
117 my $length = $1;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
118 $noindel_pile =~ s/(\+\d+[ATGCNX]{$length})//i ;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
119 push(@IN,$1);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
120 #print "test : $pile / $noindel_pile\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
121 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
122 while ($noindel_pile =~ /\-(\d+)/){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
123 my $length = $1;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
124 $noindel_pile =~ s/(\-\d+[ATGCNX]{$length})//i ;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
125 push(@DEL,$1);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
126 #print "test : $pile / $noindel_pile\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
127 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
128
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
129 my %variant_type;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
130
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
131 my $indel_pile = $ori_pile;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
132 $variant_type{"IN"} = ($indel_pile =~ s/\+//gi);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
133 $variant_type{"DEL"} = ($indel_pile =~ s/\-//gi);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
134
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
135 my $base_pile = $noindel_pile;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
136 $variant_type{"A"} = ($base_pile =~ s/a//gi);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
137 $variant_type{"T"} = ($base_pile =~ s/t//gi);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
138 $variant_type{"C"} = ($base_pile =~ s/c//gi);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
139 $variant_type{"G"} = ($base_pile =~ s/g//gi);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
140 $variant_type{"N"} = ($base_pile =~ s/n//gi);
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
141
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
142 my $top_number=0;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
143 my $top_variant="";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
144 foreach my $key (sort{$variant_type{$b}<=>$variant_type{$a}} keys %variant_type){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
145 if ($variant_type{$key} > 0){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
146 if ($variant_type{$key} > $top_number){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
147 $top_number = $variant_type{$key};
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
148 $top_variant = $key;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
149 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
150 elsif ($variant_type{$key} == $top_number){
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
151 $top_variant .= "/$key";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
152 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
153 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
154
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
155 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
156 print "$chr\t$pos\t$ref\t$top_variant\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
157
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
158 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
159 else {
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
160 print "$chr\t$pos\t$ref\t$ref\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
161 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
162 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
163 else {
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
164 print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n";
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
165 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
166 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
167 }
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
168 close PU;
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
169
93e6f2af1ce2 Uploaded
mcharles
parents: 10
diff changeset
170