view master2pg.pl @ 2:35c20b109be5

Retrying upload with "bare" tarball (i.e. one without a top containing directory).
author cathy
date Tue, 28 May 2013 17:54:02 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl -w
use strict;

#convert from master variant file to pgSnp
my $snpsOnly = 1; #flag for if doing SNPs or indels
my $addRef = 0;   #flag whether to append ref allele
if (@ARGV && $ARGV[0] eq 'indel') { shift @ARGV; $snpsOnly = 0; }
if (@ARGV && $ARGV[0] eq 'addRef') { shift @ARGV; $addRef = 1; }
if (!@ARGV) {
   print STDERR "usage: master2pg.pl [indel addRef] infile.masterVar > outfile.pgSnp\n";
   exit;
}
my $in = shift @ARGV;
open(FH, $in) or die "Couldn't open input file $in, $!\n";

while (<FH>) {
   chomp;
   if (/^#/) { next; }
   if (/^>/) { next; } #headers
   if (/^\s*$/) { next; } 
   my @f = split(/\t/);
   if (!$f[5]) { next; } #WHAT? most likely still zipped?
   if ($f[5] =~ /(hom|het)/) { #zygosity #haploid chrX and chrY?
      #only get snps for now
      if ($snpsOnly && $f[6] eq 'snp') { #varType
         my $a;
         my $c = 2;
         my $freq = 0; #set defaults
         my $sc = 0;
         if ($f[8] eq $f[9]) { #should be homozygous?
            $a = $f[8];
            $c = 1;
         }else {
            $a = "$f[8]/$f[9]";
            $sc = "0,0";  #set defaults in case not included
            $freq = '0,0';
         }
         if (defined $f[10] && $f[10] eq '') { $f[10] = 0; }
         if (defined $f[11] && $f[11] eq '') { $f[11] = 0; }
         if (defined $f[10] && $c == 1) {
            $sc = $f[10];
         }elsif (defined $f[10] && defined $f[11] && $c == 2) {
            $sc = "$f[10],$f[11]";
         }
         if (defined $f[16] && $f[16] eq '') { $f[16] = 0; }
         if (defined $f[17] && $f[17] eq '') { $f[17] = 0; }
         if (defined $f[16] && $c == 1) {
            $freq = $f[16];
         }elsif (defined $f[16] && defined $f[17] && $c == 2) {
            $freq = "$f[16],$f[17]";
         }
         print "$f[2]\t$f[3]\t$f[4]\t$a\t$c\t$freq\t$sc";
         if ($addRef) { print "\t$f[7]\n"; }
         else { print "\n"; }
      }elsif (!$snpsOnly) {
         if ($f[8] =~ /^\s*$/) { undef $f[8]; }
         if ($f[9] =~ /^\s*$/) { undef $f[9]; }
         my $a;
         my $c = 2;
         #do indels
         if ($f[6] eq "ins") {
            if (defined $f[8] && defined $f[9] && $f[8] eq $f[9]) { $a = $f[8]; $c = 1; }
            elsif (defined $f[8] && defined $f[9] && $f[8] ne '?' && $f[9] ne '?') { 
               $a = "$f[8]/$f[9]";
            }elsif (!defined $f[8] && defined $f[9]) {
               $a = "$f[9]/-";
            }elsif (defined $f[8] && !defined $f[9]) {
               $a = "$f[8]/-";
            }
         }elsif ($f[6] eq "del") {
            if (!defined $f[8] && !defined $f[9]) {
               $a = '-'; #homozygous deletion
               $c = 1;
            }elsif (!defined $f[8] && defined $f[9]) {
               $a = "$f[9]/-";
            }elsif (defined $f[8] && !defined $f[9]) {
               $a = "$f[8]/-";
            }            
         }elsif ($f[6] eq "sub") { #multiple nt substitutions
            if ($f[8] eq $f[9]) {
               $a = $f[8];
               $c = 1;
            }else {
               $a = "$f[8]/$f[9]";
            }
         }elsif ($f[6] eq "complex") { #treat same as multi-nt sub
            if ($f[5] =~ /het-alt/ && !defined $f[8]) { $f[8] = '-'; }
            if ($f[5] =~ /het-alt/ && !defined $f[9]) { $f[9] = '-'; }
            if (defined $f[8] && defined $f[9] && $f[8] eq $f[9]) {
               $c = 1;
               $a = $f[8];
            }elsif (defined $f[8] && defined $f[9]) {
               $a = "$f[8]/$f[9]";
            }
         }
         my $sc = '0';
         my $freq = '0';
         if ($c == 2) { $sc = '0,0'; $freq = '0,0'; }
         if (defined $f[10] && $f[10] eq '') { $f[10] = 0; }
         if (defined $f[11] && $f[11] eq '') { $f[11] = 0; }
         if (defined $f[10] && $c == 1) {
            $sc = $f[10];
         }elsif (defined $f[10] && defined $f[11] && $c == 2) {
            $sc = "$f[10],$f[11]";
         }
         if (defined $f[16] && $f[16] eq '') { $f[16] = 0; }
         if (defined $f[17] && $f[17] eq '') { $f[17] = 0; }
         if (defined $f[16] && $c == 1) {
            if ($f[16] eq '') { $f[16] = 0; }
            $freq = $f[16];
         }elsif (defined $f[16] && defined $f[17] && $c == 2) {
            $freq = "$f[16],$f[17]";
         }
         if ($a) {
            print "$f[2]\t$f[3]\t$f[4]\t$a\t$c\t$freq\t$sc";
            if ($addRef) { print "\t$f[7]\n"; }
            else { print "\n"; }
         }
      }
   }elsif ($f[5] eq 'hap' && $f[6] eq 'snp' && $snpsOnly) {
      my $c = 1;
      my $freq = '0';
      if (defined $f[10] && $f[10] ne '') { $freq = $f[10]; }
      my $sc = '0';
      if (defined $f[16] && $f[16] ne '') { $sc = $f[16]; }
      if ($f[8]) {
         print "$f[2]\t$f[3]\t$f[4]\t$f[8]\t$c\t$freq\t$sc";
         if ($addRef) { print "\t$f[7]\n"; }
         else { print "\n"; }
      }   
   }elsif ($f[5] eq 'hap' && !$snpsOnly && $f[6] =~ /(del|ins|sub)/) {
      if ($f[8] =~ /^\s*$/) { undef $f[8]; }
      my $a;
      my $c = 1;
      #do indels
      if ($f[6] eq "ins") {
         $a = $f[8]; 
      }elsif ($f[6] eq "del") {
         $a = '-'; #deletion
      }elsif ($f[6] eq "sub") { #multiple nt substitutions
         $a = $f[8];
      }
      my $sc = '0';
      my $freq = '0';
      if (defined $f[10] && $f[10] ne '') { $sc = $f[10]; }
      if (defined $f[16] && $f[16] ne '') { $freq = $f[16]; }
      if ($a) {
         print "$f[2]\t$f[3]\t$f[4]\t$a\t$c\t$freq\t$sc";
         if ($addRef) { print "\t$f[7]\n"; }
         else { print "\n"; }
      }
   }
}

close FH or die "Couldn't close $in, $!\n";

exit;