comparison rapsodyn/CreateMatrix.pl @ 14:93e6f2af1ce2 draft

Uploaded
author mcharles
date Mon, 26 Jan 2015 18:10:52 -0500
parents 0a6c1cfe4dc8
children
comparison
equal deleted inserted replaced
13:827da1a9a326 14:93e6f2af1ce2
3 use strict; 3 use strict;
4 use warnings; 4 use warnings;
5 use Getopt::Long; 5 use Getopt::Long;
6 6
7 my $input_pileup_file; 7 my $input_pileup_file;
8 my $input_variant_file;
9 my $input_variant_unique_file; 8 my $input_variant_unique_file;
9 my $input_name;
10 10
11 GetOptions ( 11 GetOptions (
12 "input_name=s" => \$input_name,
12 "input_pileup_file=s" => \$input_pileup_file, 13 "input_pileup_file=s" => \$input_pileup_file,
13 "input_variant_file=s" => \$input_variant_file,
14 "input_variant_unique_file=s" => \$input_variant_unique_file 14 "input_variant_unique_file=s" => \$input_variant_unique_file
15 ) or die("Error in command line arguments\n"); 15 ) or die("Error in command line arguments\n");
16 16
17 17
18 print "Chrom\tPos\tRef\t$input_name\n";
18 19
19 print "$input_pileup_file\n$input_variant_file\n$input_variant_unique_file\n"; 20 my %variant_unique;
21 open (VU,$input_variant_unique_file) or die ("Can't open variant unique file\n");
22 while (my $line=<VU>){
23 if ($line!~/^\s*$/){
24 my @fields = split (/\t+/,$line);
25 my $chr;
26 my $pos;
27 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
28 $chr = $1;
29 }
30 else {
31 print STDERR "Unable to detect chromosome in pileup file\n$line",$fields[0],"\n",$fields[1],"\n";
32 exit(0);
33 }
34 if ($fields[1]=~/^\s*(\d+)\s*$/){
35 $pos = $1;
36 }
37 else {
38 print STDERR "Unable to detect position in pileup file\n$line",$fields[0],"\n",$fields[1],"\n";
39 exit(0);
40 }
41 if ($#fields>=4){
42 $variant_unique{"$chr#$pos"}=$fields[4];
43 }
44 else {
45 print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n";
46 }
47 }
48 }
49 close VU;
20 50
51 my %covered;
52
53 my $compt=0;
54 open (PU,$input_pileup_file) or die ("Can't open pileup file\n");
55 while (my $line=<PU>){
56 #$compt++;
57 #if ($compt>200000){last;}
58 if ($line!~/^\s*$/){
59 my @fields = split (/\t+/,$line);
60 my $chr;
61 my $pos;
62 my $ref;
63 my $depth;
64 my $pile;
65
66 if ($#fields>=4){
67 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
68 $chr = $1;
69 }
70 else {
71 print STDERR "Unable to detect chromosome in pileup file\n$line\n";
72 exit(0);
73 }
74 if ($fields[1]=~/^\s*(\d+)\s*$/){
75 $pos = $1;
76 }
77 else {
78 print STDERR "Unable to detect position in pileup file\n$line\n";
79 exit(0);
80 }
81
82 if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){
83 $ref = $1;
84 }
85 else {
86 print STDERR "Unable to detect reference base in pileup file\n$line\n";
87 exit(0);
88 }
89
90 if ($fields[3]=~/^\s*(\d+)\s*$/i){
91 $depth = $1;
92 }
93 else {
94 print STDERR "Unable to detect depth in pileup file\n$line\n";
95 exit(0);
96 }
97
98 $pile = $fields[4];
99
100
101 if ($variant_unique{"$chr#$pos"}) {
102 $pile = $variant_unique{"$chr#$pos"};
103 $pile =~ s/\$//g; #the read start at this position
104 $pile =~ s/\^.//g; #the read end at this position followed by quality char
105 my $ori_pile = $pile;
106
107 my @detail = split(//,$pile);
108 my $current_in="";
109 my $current_del="";
110 my $current_length=0;
111 my $noindel_pile="";
112 my @IN;
113 my @DEL;
114
115 $noindel_pile = $pile;
116 while ($noindel_pile =~ /\+(\d+)/){
117 my $length = $1;
118 $noindel_pile =~ s/(\+\d+[ATGCNX]{$length})//i ;
119 push(@IN,$1);
120 #print "test : $pile / $noindel_pile\n";
121 }
122 while ($noindel_pile =~ /\-(\d+)/){
123 my $length = $1;
124 $noindel_pile =~ s/(\-\d+[ATGCNX]{$length})//i ;
125 push(@DEL,$1);
126 #print "test : $pile / $noindel_pile\n";
127 }
128
129 my %variant_type;
130
131 my $indel_pile = $ori_pile;
132 $variant_type{"IN"} = ($indel_pile =~ s/\+//gi);
133 $variant_type{"DEL"} = ($indel_pile =~ s/\-//gi);
134
135 my $base_pile = $noindel_pile;
136 $variant_type{"A"} = ($base_pile =~ s/a//gi);
137 $variant_type{"T"} = ($base_pile =~ s/t//gi);
138 $variant_type{"C"} = ($base_pile =~ s/c//gi);
139 $variant_type{"G"} = ($base_pile =~ s/g//gi);
140 $variant_type{"N"} = ($base_pile =~ s/n//gi);
141
142 my $top_number=0;
143 my $top_variant="";
144 foreach my $key (sort{$variant_type{$b}<=>$variant_type{$a}} keys %variant_type){
145 if ($variant_type{$key} > 0){
146 if ($variant_type{$key} > $top_number){
147 $top_number = $variant_type{$key};
148 $top_variant = $key;
149 }
150 elsif ($variant_type{$key} == $top_number){
151 $top_variant .= "/$key";
152 }
153 }
154
155 }
156 print "$chr\t$pos\t$ref\t$top_variant\n";
157
158 }
159 else {
160 print "$chr\t$pos\t$ref\t$ref\n";
161 }
162 }
163 else {
164 print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n";
165 }
166 }
167 }
168 close PU;
169
170