comparison master2pg.pl @ 2:35c20b109be5

Retrying upload with "bare" tarball (i.e. one without a top containing directory).
author cathy
date Tue, 28 May 2013 17:54:02 -0400
parents
children
comparison
equal deleted inserted replaced
1:1d8b23a21735 2:35c20b109be5
1 #!/usr/bin/perl -w
2 use strict;
3
4 #convert from master variant file to pgSnp
5 my $snpsOnly = 1; #flag for if doing SNPs or indels
6 my $addRef = 0; #flag whether to append ref allele
7 if (@ARGV && $ARGV[0] eq 'indel') { shift @ARGV; $snpsOnly = 0; }
8 if (@ARGV && $ARGV[0] eq 'addRef') { shift @ARGV; $addRef = 1; }
9 if (!@ARGV) {
10 print STDERR "usage: master2pg.pl [indel addRef] infile.masterVar > outfile.pgSnp\n";
11 exit;
12 }
13 my $in = shift @ARGV;
14 open(FH, $in) or die "Couldn't open input file $in, $!\n";
15
16 while (<FH>) {
17 chomp;
18 if (/^#/) { next; }
19 if (/^>/) { next; } #headers
20 if (/^\s*$/) { next; }
21 my @f = split(/\t/);
22 if (!$f[5]) { next; } #WHAT? most likely still zipped?
23 if ($f[5] =~ /(hom|het)/) { #zygosity #haploid chrX and chrY?
24 #only get snps for now
25 if ($snpsOnly && $f[6] eq 'snp') { #varType
26 my $a;
27 my $c = 2;
28 my $freq = 0; #set defaults
29 my $sc = 0;
30 if ($f[8] eq $f[9]) { #should be homozygous?
31 $a = $f[8];
32 $c = 1;
33 }else {
34 $a = "$f[8]/$f[9]";
35 $sc = "0,0"; #set defaults in case not included
36 $freq = '0,0';
37 }
38 if (defined $f[10] && $f[10] eq '') { $f[10] = 0; }
39 if (defined $f[11] && $f[11] eq '') { $f[11] = 0; }
40 if (defined $f[10] && $c == 1) {
41 $sc = $f[10];
42 }elsif (defined $f[10] && defined $f[11] && $c == 2) {
43 $sc = "$f[10],$f[11]";
44 }
45 if (defined $f[16] && $f[16] eq '') { $f[16] = 0; }
46 if (defined $f[17] && $f[17] eq '') { $f[17] = 0; }
47 if (defined $f[16] && $c == 1) {
48 $freq = $f[16];
49 }elsif (defined $f[16] && defined $f[17] && $c == 2) {
50 $freq = "$f[16],$f[17]";
51 }
52 print "$f[2]\t$f[3]\t$f[4]\t$a\t$c\t$freq\t$sc";
53 if ($addRef) { print "\t$f[7]\n"; }
54 else { print "\n"; }
55 }elsif (!$snpsOnly) {
56 if ($f[8] =~ /^\s*$/) { undef $f[8]; }
57 if ($f[9] =~ /^\s*$/) { undef $f[9]; }
58 my $a;
59 my $c = 2;
60 #do indels
61 if ($f[6] eq "ins") {
62 if (defined $f[8] && defined $f[9] && $f[8] eq $f[9]) { $a = $f[8]; $c = 1; }
63 elsif (defined $f[8] && defined $f[9] && $f[8] ne '?' && $f[9] ne '?') {
64 $a = "$f[8]/$f[9]";
65 }elsif (!defined $f[8] && defined $f[9]) {
66 $a = "$f[9]/-";
67 }elsif (defined $f[8] && !defined $f[9]) {
68 $a = "$f[8]/-";
69 }
70 }elsif ($f[6] eq "del") {
71 if (!defined $f[8] && !defined $f[9]) {
72 $a = '-'; #homozygous deletion
73 $c = 1;
74 }elsif (!defined $f[8] && defined $f[9]) {
75 $a = "$f[9]/-";
76 }elsif (defined $f[8] && !defined $f[9]) {
77 $a = "$f[8]/-";
78 }
79 }elsif ($f[6] eq "sub") { #multiple nt substitutions
80 if ($f[8] eq $f[9]) {
81 $a = $f[8];
82 $c = 1;
83 }else {
84 $a = "$f[8]/$f[9]";
85 }
86 }elsif ($f[6] eq "complex") { #treat same as multi-nt sub
87 if ($f[5] =~ /het-alt/ && !defined $f[8]) { $f[8] = '-'; }
88 if ($f[5] =~ /het-alt/ && !defined $f[9]) { $f[9] = '-'; }
89 if (defined $f[8] && defined $f[9] && $f[8] eq $f[9]) {
90 $c = 1;
91 $a = $f[8];
92 }elsif (defined $f[8] && defined $f[9]) {
93 $a = "$f[8]/$f[9]";
94 }
95 }
96 my $sc = '0';
97 my $freq = '0';
98 if ($c == 2) { $sc = '0,0'; $freq = '0,0'; }
99 if (defined $f[10] && $f[10] eq '') { $f[10] = 0; }
100 if (defined $f[11] && $f[11] eq '') { $f[11] = 0; }
101 if (defined $f[10] && $c == 1) {
102 $sc = $f[10];
103 }elsif (defined $f[10] && defined $f[11] && $c == 2) {
104 $sc = "$f[10],$f[11]";
105 }
106 if (defined $f[16] && $f[16] eq '') { $f[16] = 0; }
107 if (defined $f[17] && $f[17] eq '') { $f[17] = 0; }
108 if (defined $f[16] && $c == 1) {
109 if ($f[16] eq '') { $f[16] = 0; }
110 $freq = $f[16];
111 }elsif (defined $f[16] && defined $f[17] && $c == 2) {
112 $freq = "$f[16],$f[17]";
113 }
114 if ($a) {
115 print "$f[2]\t$f[3]\t$f[4]\t$a\t$c\t$freq\t$sc";
116 if ($addRef) { print "\t$f[7]\n"; }
117 else { print "\n"; }
118 }
119 }
120 }elsif ($f[5] eq 'hap' && $f[6] eq 'snp' && $snpsOnly) {
121 my $c = 1;
122 my $freq = '0';
123 if (defined $f[10] && $f[10] ne '') { $freq = $f[10]; }
124 my $sc = '0';
125 if (defined $f[16] && $f[16] ne '') { $sc = $f[16]; }
126 if ($f[8]) {
127 print "$f[2]\t$f[3]\t$f[4]\t$f[8]\t$c\t$freq\t$sc";
128 if ($addRef) { print "\t$f[7]\n"; }
129 else { print "\n"; }
130 }
131 }elsif ($f[5] eq 'hap' && !$snpsOnly && $f[6] =~ /(del|ins|sub)/) {
132 if ($f[8] =~ /^\s*$/) { undef $f[8]; }
133 my $a;
134 my $c = 1;
135 #do indels
136 if ($f[6] eq "ins") {
137 $a = $f[8];
138 }elsif ($f[6] eq "del") {
139 $a = '-'; #deletion
140 }elsif ($f[6] eq "sub") { #multiple nt substitutions
141 $a = $f[8];
142 }
143 my $sc = '0';
144 my $freq = '0';
145 if (defined $f[10] && $f[10] ne '') { $sc = $f[10]; }
146 if (defined $f[16] && $f[16] ne '') { $freq = $f[16]; }
147 if ($a) {
148 print "$f[2]\t$f[3]\t$f[4]\t$a\t$c\t$freq\t$sc";
149 if ($addRef) { print "\t$f[7]\n"; }
150 else { print "\n"; }
151 }
152 }
153 }
154
155 close FH or die "Couldn't close $in, $!\n";
156
157 exit;