diff dividePgSnpAlleles.pl @ 2:35c20b109be5

Retrying upload with "bare" tarball (i.e. one without a top containing directory).
author cathy
date Tue, 28 May 2013 17:54:02 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dividePgSnpAlleles.pl	Tue May 28 17:54:02 2013 -0400
@@ -0,0 +1,44 @@
+#!/usr/bin/perl -w
+use strict;
+
+#divide the alleles and their information into separate columns for pgSnp-like
+#files. Keep any additional columns beyond the pgSnp ones.
+#reads from stdin, writes to stdout
+my $ref;
+my $in;
+if (@ARGV && $ARGV[0] =~ /-ref=(\d+)/) {
+   $ref = $1 -1;
+   if ($ref == -1) { undef $ref; }
+   shift @ARGV;
+}
+if (@ARGV) { 
+   $in = shift @ARGV;
+}
+
+open(FH, $in) or die "Couldn't open $in, $!\n";
+while (<FH>) {
+   chomp;
+   my @f = split(/\t/);
+   if ($f[0] =~ /^\d+$/ && $f[1] =~ /chr/) { #has bin column shift list
+      shift @f; #remove bin
+   }
+   my @a = split(/\//, $f[3]);
+   my @fr = split(/,/, $f[5]);
+   my @sc = split(/,/, $f[6]);
+   if ($f[4] == 1) { #homozygous add N, 0, 0
+      if ($ref) { push(@a, $f[$ref]); }
+      else { push(@a, "N"); }
+      push(@fr, 0);
+      push(@sc, 0);
+   }
+   if ($f[4] > 2) { next; } #skip those with more than 2 alleles
+   print "$f[0]\t$f[1]\t$f[2]\t$a[0]\t$fr[0]\t$sc[0]\t$a[1]\t$fr[1]\t$sc[1]";
+   if (scalar @f > 7) {
+      splice(@f, 0, 7); #remove first 7
+      print "\t", join("\t", @f), "\n";
+   }else { print "\n"; }
+}
+close FH;
+
+exit;
+