annotate hgvs_add_missing_sample_coverage @ 0:a3129cb0af43 default tip

initial commit
author Yusuf Ali <ali@yusuf.email>
date Wed, 25 Mar 2015 13:02:25 -0600
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
1 #!/usr/bin/env perl
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
2
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
3 use strict;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
4 use warnings;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
5
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
6 # This script adds sample names to the last column of an HGVS file if the coverage for that position was insufficient to make the call the other samples had.
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
7 @ARGV == 8 or die "Usage: $0 <hgvs_in.txt> <hgvs_out.txt> <sample name1> <sample1.bam> <homo FN coverage cutoff> <het coverage FN cutoff> <min map quality> <min base quality>\n";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
8 my $hgvs_in_file = shift @ARGV;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
9 my $hgvs_out_file = shift @ARGV;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
10 my $sample_name = shift @ARGV;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
11 my $bam_file = shift @ARGV;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
12 my $homo_fn = shift @ARGV;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
13 my $het_fn = shift @ARGV;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
14 my $min_maq = shift @ARGV; # 30 (a.k.a. p < 0.001)
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
15 my $min_baq = shift @ARGV; # 20 (a.k.a. p < 0.01)
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
16
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
17 open(HGVS, $hgvs_in_file)
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
18 or die "Cannot open $hgvs_in_file for reading: $!\n";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
19 my $header_line = <HGVS>;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
20 chomp $header_line;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
21 my @headers = split /\t/, $header_line;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
22 my ($chr_column, $pos_column, $ref_column, $alt_column, $zygosity_column, $alt_cnt_column, $tot_cnt_column, $srcs_column);
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
23 for(my $i = 0; $i <= $#headers; $i++){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
24 if($headers[$i] eq "Chr"){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
25 $chr_column = $i;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
26 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
27 elsif($headers[$i] eq "DNA From"){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
28 $pos_column = $i;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
29 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
30 elsif($headers[$i] eq "Ref base"){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
31 $ref_column = $i;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
32 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
33 elsif($headers[$i] eq "Obs base"){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
34 $alt_column = $i;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
35 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
36 elsif($headers[$i] eq "Zygosity"){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
37 $zygosity_column = $i;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
38 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
39 elsif($headers[$i] eq "Variant Reads"){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
40 $alt_cnt_column = $i;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
41 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
42 elsif($headers[$i] eq "Total Reads"){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
43 $tot_cnt_column = $i;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
44 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
45 elsif($headers[$i] eq "Sources"){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
46 $srcs_column = $i;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
47 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
48 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
49 die "Could not find header Chr in $hgvs_in_file, aborting." if not defined $chr_column;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
50 die "Could not find header 'DNA From' in $hgvs_in_file, aborting." if not defined $pos_column;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
51 die "Could not find header 'Ref base' in $hgvs_in_file, aborting." if not defined $ref_column;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
52 die "Could not find header 'Obs base' in $hgvs_in_file, aborting." if not defined $alt_column;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
53 die "Could not find header 'Variant Reads' in $hgvs_in_file, aborting." if not defined $alt_cnt_column;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
54 die "Could not find header 'Total Reads' in $hgvs_in_file, aborting." if not defined $tot_cnt_column;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
55 die "Could not find header Zygosity in $hgvs_in_file, aborting." if not defined $zygosity_column;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
56 die "Could not find header Sources in $hgvs_in_file, aborting." if not defined $srcs_column;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
57 open(OUT, ">$hgvs_out_file")
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
58 or die "Cannot open $hgvs_out_file for writing: $!\n";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
59 print OUT $header_line, "\n"; # header line
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
60 my ($last_mpileup, $last_key);
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
61 while(<HGVS>){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
62 chomp;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
63 my @F = split /\t/, $_;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
64 if(index($F[$#F], $sample_name) != -1){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
65 print OUT "$_\n"; # keep line as-is
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
66 next;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
67 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
68 else{
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
69 # Not in a call right now
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
70 # If there are reads supporting the call, add the sample name to the list (last column)
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
71 # If there are no reads supporting the call, but coverage is below threshold, put a ~ in front of the sample name
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
72 my $chr = $F[$chr_column];
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
73 my $pos = $F[$pos_column];
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
74 my $ref = $F[$ref_column];
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
75 my $alt = $F[$alt_column];
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
76
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
77 # some tools report SNPs with leading or trailing matched ref seqs, so trim as necessary
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
78 if(length($ref) > 1 and length($ref) == length($alt)){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
79 while(length($ref) and substr($ref, 0, 1) eq substr($alt, 0, 1)){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
80 substr($ref, 0, 1) = "";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
81 substr($alt, 0, 1) = "";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
82 $pos++;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
83 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
84 while(length($ref) and substr($ref, -1) eq substr($alt, -1)){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
85 substr($ref, -1) = "";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
86 substr($alt, -1) = "";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
87 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
88 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
89
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
90 # (todo: what about compound het?)
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
91 my $zygosity = $F[$zygosity_column];
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
92 # see if the coverage is below a called zygosity threshold
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
93 my $end = $pos;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
94 # comment out next bit if we assume that reads have all indels left aligned
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
95 #if($alt ne "NA" and $ref ne "NA" and length($alt) > length($ref)){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
96 # $end += length($alt) - length($ref);
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
97 #}
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
98 my $mpileup;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
99 if(defined $last_key and $last_key eq "$chr:$pos:$ref:$alt"){ # used cached response if this is a repeated input line
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
100 $mpileup = $last_mpileup;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
101 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
102 else{
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
103 $mpileup = `samtools mpileup -q $min_maq -Q $min_baq -r "$chr:$pos-$end" $bam_file 2>&1`;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
104 $last_mpileup = $mpileup;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
105 $last_key = "$chr:$pos:$ref:$alt";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
106 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
107
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
108 $ref = "" if $ref eq "NA";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
109 $alt = "" if $alt eq "NA";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
110 # lines look something like chr10 74100774 N 164 G$ggGgg$GgGgGgggggG$ggggggggGggGGGgg...
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
111 # deletions are -1N, while insertions are +1g
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
112 for my $line (split /\n/, $mpileup){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
113 my @M = split /\t/, $line;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
114 next if $M[0] ne $chr; # header or message stuff
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
115 my $depth = $M[3];
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
116 if($depth == 0){ # No reads at all
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
117 $F[$srcs_column] .= "; ~$sample_name"; # add this sample to the last column
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
118 $F[$zygosity_column] .= "; low"; # zygosity unknown
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
119 $F[$alt_cnt_column] .= "; 0"; # alt count
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
120 $F[$tot_cnt_column] .= "; 0"; # tot count
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
121 #print OUT join("\t", @F), "\n";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
122 last;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
123 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
124 my %base_count;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
125 while($M[4] =~ /[ACGT]|\^.|(-\d+|\+\d+)/gi){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
126 my $call = uc($&); # upper case version of letter
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
127 if($1){ # call is -1n, or +3gcc, etc.
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
128 if(substr($1, 0, 1) eq "+"){ # insertion
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
129 my $offset = substr($1, 1);
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
130 $call .= uc(substr($M[4], pos($M[4]), $offset)); # append the letters (not captures in the regex above) to the number
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
131 pos($M[4]) += $offset; # skip the global search ahead of the letters just consumed
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
132 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
133 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
134 elsif($call eq '$' or length($call) > 1){ # the weird ^Y kind of call
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
135 next; # ignore
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
136 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
137 $base_count{$call}++;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
138 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
139 my $base_count = 0;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
140 if(length($ref) == 1 and length($alt) == 1){ #SNP
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
141 if(exists $base_count{$alt}){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
142 $base_count = $base_count{$alt};
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
143 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
144 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
145 elsif(length($ref) > length($alt)){ #del
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
146 my $del_length = length($alt)-length($ref); # e.g. -2
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
147 if(exists $base_count{$del_length}){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
148 $base_count = $base_count{$del_length};
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
149 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
150 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
151 elsif(length($ref) < length($alt)){ #ins
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
152 my $ins_call = "+".(length($alt)-length($ref)).substr($alt, length($ref)); # e.g. +3AGT
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
153 if(exists $base_count{$ins_call}){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
154 $base_count = $base_count{$ins_call};
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
155 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
156 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
157 else{
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
158 # MNP variant?
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
159 #warn "Cannot check $ref -> $alt: not a SNP insertion or deletion\n";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
160 next;
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
161 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
162 if(defined $base_count and $base_count and $depth/($base_count+1) < $het_fn){ # at least the min het prop (rounded up) of bases agreeing with the (missed in this sample) alt call
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
163 $F[$srcs_column] .= "; +$sample_name"; # add this sample to the last column
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
164 $F[$zygosity_column] .= $base_count/$depth < 0.78 ? "; heterozygote" : ($depth < $het_fn ? "; low" : "; homozygote"); # zygosity
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
165 $F[$alt_cnt_column] .= "; ".$base_count; # alt count
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
166 $F[$tot_cnt_column] .= "; $depth"; # tot count
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
167 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
168 elsif($zygosity =~ /het/ and $depth < $het_fn){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
169 $F[$srcs_column].= "; ~$sample_name";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
170 $F[$zygosity_column] .= $depth < $homo_fn ? "; low" : "; heterozygote"; # zygosity
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
171 $F[$alt_cnt_column] .= "; $base_count";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
172 $F[$tot_cnt_column] .= "; $depth"; # tot count
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
173 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
174 elsif($depth < $homo_fn){
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
175 $F[$srcs_column].= "; ~$sample_name";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
176 $F[$zygosity_column] .= "; low"; # zygosity
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
177 $F[$alt_cnt_column] .= "; $base_count";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
178 $F[$tot_cnt_column] .= "; $depth"; # tot count
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
179 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
180 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
181 print OUT join("\t", @F), "\n";
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
182 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
183 }
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
184 close(HGVS);
a3129cb0af43 initial commit
Yusuf Ali <ali@yusuf.email>
parents:
diff changeset
185 close(OUT);