Mercurial > repos > miller-lab > snp_analysis_conversion
comparison master2pg.pl @ 2:35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
author | cathy |
---|---|
date | Tue, 28 May 2013 17:54:02 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:1d8b23a21735 | 2:35c20b109be5 |
---|---|
1 #!/usr/bin/perl -w | |
2 use strict; | |
3 | |
4 #convert from master variant file to pgSnp | |
5 my $snpsOnly = 1; #flag for if doing SNPs or indels | |
6 my $addRef = 0; #flag whether to append ref allele | |
7 if (@ARGV && $ARGV[0] eq 'indel') { shift @ARGV; $snpsOnly = 0; } | |
8 if (@ARGV && $ARGV[0] eq 'addRef') { shift @ARGV; $addRef = 1; } | |
9 if (!@ARGV) { | |
10 print STDERR "usage: master2pg.pl [indel addRef] infile.masterVar > outfile.pgSnp\n"; | |
11 exit; | |
12 } | |
13 my $in = shift @ARGV; | |
14 open(FH, $in) or die "Couldn't open input file $in, $!\n"; | |
15 | |
16 while (<FH>) { | |
17 chomp; | |
18 if (/^#/) { next; } | |
19 if (/^>/) { next; } #headers | |
20 if (/^\s*$/) { next; } | |
21 my @f = split(/\t/); | |
22 if (!$f[5]) { next; } #WHAT? most likely still zipped? | |
23 if ($f[5] =~ /(hom|het)/) { #zygosity #haploid chrX and chrY? | |
24 #only get snps for now | |
25 if ($snpsOnly && $f[6] eq 'snp') { #varType | |
26 my $a; | |
27 my $c = 2; | |
28 my $freq = 0; #set defaults | |
29 my $sc = 0; | |
30 if ($f[8] eq $f[9]) { #should be homozygous? | |
31 $a = $f[8]; | |
32 $c = 1; | |
33 }else { | |
34 $a = "$f[8]/$f[9]"; | |
35 $sc = "0,0"; #set defaults in case not included | |
36 $freq = '0,0'; | |
37 } | |
38 if (defined $f[10] && $f[10] eq '') { $f[10] = 0; } | |
39 if (defined $f[11] && $f[11] eq '') { $f[11] = 0; } | |
40 if (defined $f[10] && $c == 1) { | |
41 $sc = $f[10]; | |
42 }elsif (defined $f[10] && defined $f[11] && $c == 2) { | |
43 $sc = "$f[10],$f[11]"; | |
44 } | |
45 if (defined $f[16] && $f[16] eq '') { $f[16] = 0; } | |
46 if (defined $f[17] && $f[17] eq '') { $f[17] = 0; } | |
47 if (defined $f[16] && $c == 1) { | |
48 $freq = $f[16]; | |
49 }elsif (defined $f[16] && defined $f[17] && $c == 2) { | |
50 $freq = "$f[16],$f[17]"; | |
51 } | |
52 print "$f[2]\t$f[3]\t$f[4]\t$a\t$c\t$freq\t$sc"; | |
53 if ($addRef) { print "\t$f[7]\n"; } | |
54 else { print "\n"; } | |
55 }elsif (!$snpsOnly) { | |
56 if ($f[8] =~ /^\s*$/) { undef $f[8]; } | |
57 if ($f[9] =~ /^\s*$/) { undef $f[9]; } | |
58 my $a; | |
59 my $c = 2; | |
60 #do indels | |
61 if ($f[6] eq "ins") { | |
62 if (defined $f[8] && defined $f[9] && $f[8] eq $f[9]) { $a = $f[8]; $c = 1; } | |
63 elsif (defined $f[8] && defined $f[9] && $f[8] ne '?' && $f[9] ne '?') { | |
64 $a = "$f[8]/$f[9]"; | |
65 }elsif (!defined $f[8] && defined $f[9]) { | |
66 $a = "$f[9]/-"; | |
67 }elsif (defined $f[8] && !defined $f[9]) { | |
68 $a = "$f[8]/-"; | |
69 } | |
70 }elsif ($f[6] eq "del") { | |
71 if (!defined $f[8] && !defined $f[9]) { | |
72 $a = '-'; #homozygous deletion | |
73 $c = 1; | |
74 }elsif (!defined $f[8] && defined $f[9]) { | |
75 $a = "$f[9]/-"; | |
76 }elsif (defined $f[8] && !defined $f[9]) { | |
77 $a = "$f[8]/-"; | |
78 } | |
79 }elsif ($f[6] eq "sub") { #multiple nt substitutions | |
80 if ($f[8] eq $f[9]) { | |
81 $a = $f[8]; | |
82 $c = 1; | |
83 }else { | |
84 $a = "$f[8]/$f[9]"; | |
85 } | |
86 }elsif ($f[6] eq "complex") { #treat same as multi-nt sub | |
87 if ($f[5] =~ /het-alt/ && !defined $f[8]) { $f[8] = '-'; } | |
88 if ($f[5] =~ /het-alt/ && !defined $f[9]) { $f[9] = '-'; } | |
89 if (defined $f[8] && defined $f[9] && $f[8] eq $f[9]) { | |
90 $c = 1; | |
91 $a = $f[8]; | |
92 }elsif (defined $f[8] && defined $f[9]) { | |
93 $a = "$f[8]/$f[9]"; | |
94 } | |
95 } | |
96 my $sc = '0'; | |
97 my $freq = '0'; | |
98 if ($c == 2) { $sc = '0,0'; $freq = '0,0'; } | |
99 if (defined $f[10] && $f[10] eq '') { $f[10] = 0; } | |
100 if (defined $f[11] && $f[11] eq '') { $f[11] = 0; } | |
101 if (defined $f[10] && $c == 1) { | |
102 $sc = $f[10]; | |
103 }elsif (defined $f[10] && defined $f[11] && $c == 2) { | |
104 $sc = "$f[10],$f[11]"; | |
105 } | |
106 if (defined $f[16] && $f[16] eq '') { $f[16] = 0; } | |
107 if (defined $f[17] && $f[17] eq '') { $f[17] = 0; } | |
108 if (defined $f[16] && $c == 1) { | |
109 if ($f[16] eq '') { $f[16] = 0; } | |
110 $freq = $f[16]; | |
111 }elsif (defined $f[16] && defined $f[17] && $c == 2) { | |
112 $freq = "$f[16],$f[17]"; | |
113 } | |
114 if ($a) { | |
115 print "$f[2]\t$f[3]\t$f[4]\t$a\t$c\t$freq\t$sc"; | |
116 if ($addRef) { print "\t$f[7]\n"; } | |
117 else { print "\n"; } | |
118 } | |
119 } | |
120 }elsif ($f[5] eq 'hap' && $f[6] eq 'snp' && $snpsOnly) { | |
121 my $c = 1; | |
122 my $freq = '0'; | |
123 if (defined $f[10] && $f[10] ne '') { $freq = $f[10]; } | |
124 my $sc = '0'; | |
125 if (defined $f[16] && $f[16] ne '') { $sc = $f[16]; } | |
126 if ($f[8]) { | |
127 print "$f[2]\t$f[3]\t$f[4]\t$f[8]\t$c\t$freq\t$sc"; | |
128 if ($addRef) { print "\t$f[7]\n"; } | |
129 else { print "\n"; } | |
130 } | |
131 }elsif ($f[5] eq 'hap' && !$snpsOnly && $f[6] =~ /(del|ins|sub)/) { | |
132 if ($f[8] =~ /^\s*$/) { undef $f[8]; } | |
133 my $a; | |
134 my $c = 1; | |
135 #do indels | |
136 if ($f[6] eq "ins") { | |
137 $a = $f[8]; | |
138 }elsif ($f[6] eq "del") { | |
139 $a = '-'; #deletion | |
140 }elsif ($f[6] eq "sub") { #multiple nt substitutions | |
141 $a = $f[8]; | |
142 } | |
143 my $sc = '0'; | |
144 my $freq = '0'; | |
145 if (defined $f[10] && $f[10] ne '') { $sc = $f[10]; } | |
146 if (defined $f[16] && $f[16] ne '') { $freq = $f[16]; } | |
147 if ($a) { | |
148 print "$f[2]\t$f[3]\t$f[4]\t$a\t$c\t$freq\t$sc"; | |
149 if ($addRef) { print "\t$f[7]\n"; } | |
150 else { print "\n"; } | |
151 } | |
152 } | |
153 } | |
154 | |
155 close FH or die "Couldn't close $in, $!\n"; | |
156 | |
157 exit; |