Mercurial > repos > miller-lab > snp_analysis_conversion
annotate pgSnp2gd_snp.pl @ 3:edf12470a1a6 default tip
Bugfix from Belinda, in vcf2pgSnp.pl
author | Cathy Riemer <cathy+hg@bx.psu.edu> |
---|---|
date | Thu, 19 Mar 2015 12:06:34 -0400 |
parents | 35c20b109be5 |
children |
rev | line source |
---|---|
2
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
1 #!/usr/bin/perl -w |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
2 use strict; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
3 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
4 #convert from pgSnp file to snp table (Webb format?) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
5 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
6 #snp table format: |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
7 #1. chr |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
8 #2. position (0 based) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
9 #3. ref allele |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
10 #4. second allele |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
11 #5. overall quality |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
12 #foreach individual (6-9, 10-13, ...) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
13 #a. count of allele in 3 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
14 #b. count of allele in 4 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
15 #c. genotype call (-1, or count of ref allele) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
16 #d. quality of genotype call (quality of non-ref allele from masterVar) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
17 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
18 if (!@ARGV) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
19 print "usage: pgSnp2gd_snp.pl file.pgSnp[.gz|.bz2] [-tab=snpTable.txt -addColsOnly -build=hg19 -name=na -ref=#1based -chr=#1based ] > newSnpTable.txt\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
20 exit; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
21 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
22 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
23 my $in = shift @ARGV; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
24 my $tab; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
25 my $tabOnly; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
26 my $build; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
27 my $name; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
28 my $ref; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
29 my $binChr = 1; #position of chrom column, indicates if bin is added |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
30 foreach (@ARGV) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
31 if (/-tab=(.*)/) { $tab = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
32 elsif (/-addColsOnly/) { $tabOnly = 1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
33 elsif (/-build=(.*)/) { $build = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
34 elsif (/-name=(.*)/) { $name = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
35 elsif (/-ref=(\d+)/) { $ref = $1 - 1; } #go to index |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
36 elsif (/-chr=(\d+)/) { $binChr = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
37 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
38 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
39 if ($binChr == 2 && $ref) { $ref--; } #shift over by 1, we will delete bin |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
40 if ((!$tab or !$tabOnly) && !$ref) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
41 print "Error the reference allele must be in a column in the file if not just adding to a previous SNP table.\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
42 exit; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
43 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
44 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
45 #WARNING loads snp table in memory, this could take > 1G ram |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
46 my %old; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
47 my $colcnt = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
48 my @head; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
49 if ($tab) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
50 open(FH, $tab) or die "Couldn't open $tab, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
51 while (<FH>) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
52 chomp; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
53 if (/^#/) { push(@head, $_); next; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
54 my @f = split(/\t/); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
55 $old{"$f[0]:$f[1]"} = join("\t", @f); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
56 $colcnt = scalar @f; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
57 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
58 close FH or die "Couldn't close $tab, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
59 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
60 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
61 if ($in =~ /.gz$/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
62 open(FH, "zcat $in |") or die "Couldn't open $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
63 }elsif ($in =~ /.bz2$/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
64 open(FH, "bzcat $in |") or die "Couldn't open $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
65 }else { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
66 open(FH, $in) or die "Couldn't open $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
67 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
68 prepHeader(); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
69 if (@head) { #keep old header, add new? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
70 print join("\n", @head), "\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
71 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
72 while (<FH>) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
73 chomp; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
74 if (/^#/) { next; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
75 if (/^\s*$/) { next; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
76 my @f = split(/\t/); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
77 if ($binChr == 2) { #must have a bin column prepended on the beginning |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
78 shift @f; #delete it |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
79 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
80 if (!$f[3]) { next; } #WHAT? most likely still zipped? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
81 if ($f[4] > 2) { next; } #can only do cases of 2 alleles |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
82 if ($f[2] == $f[1] or $f[2] - $f[1] != 1) { next; } #no indels |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
83 if ($f[3] =~ /-/) { next; } #no indels |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
84 #if creating a new table need the reference allele in a column |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
85 if (%old && $old{"$f[0]:$f[1]"}) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
86 my @o = split(/\t/, $old{"$f[0]:$f[1]"}); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
87 my $freq = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
88 my $freq2 = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
89 my $sc; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
90 my $g = 1; #genotype == ref allele count |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
91 if ($f[4] == 1) { #should be homozygous |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
92 if ($f[3] eq $o[2]) { $g = 2; $freq = $f[5]; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
93 elsif ($f[3] eq $o[3]) { $g = 0; $freq2 = $f[5]; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
94 else { next; } #doesn't match either allele, skip |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
95 $sc = $f[6]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
96 }else { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
97 my $a = 0; #index of a alleles, freq, scores |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
98 my $b = 1; #same for b |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
99 my @all = split(/\//, $f[3]); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
100 if ($o[2] ne $all[0] && $o[2] ne $all[1]) { next; } #must match one |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
101 if ($o[3] ne $all[0] && $o[3] ne $all[1]) { next; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
102 if ($o[2] eq $all[1]) { #switch indexes |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
103 $a = 1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
104 $b = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
105 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
106 my @fr = split(/,/, $f[5]); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
107 $freq = $fr[$a]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
108 $freq2 = $fr[$b]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
109 my @s = split(/,/, $f[6]); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
110 $sc = $s[$b]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
111 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
112 #print old |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
113 print $old{"$f[0]:$f[1]"}; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
114 #add new columns |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
115 print "\t$freq\t$freq2\t$g\t$sc\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
116 $old{"$f[0]:$f[1]"} = ''; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
117 }elsif (!$tabOnly) { #new table, or don't have this SNP |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
118 #need reference allele |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
119 if ($f[3] !~ /$f[$ref]/ && $f[4] == 2) { next; } #no reference allele |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
120 my $freq = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
121 my $freq2 = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
122 my $sc; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
123 my $g = 1; #genotype == ref allele count |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
124 my $alt; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
125 if ($f[4] == 1) { #should be homozygous |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
126 if ($f[3] eq $f[$ref]) { $g = 2; $freq = $f[5]; $alt = 'N'; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
127 else { $g = 0; $freq2 = $f[5]; $alt = $f[3]; } #matches alternate |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
128 $sc = $f[6]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
129 }else { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
130 my $a = 0; #index of a alleles, freq, scores |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
131 my $b = 1; #same for b |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
132 my @all = split(/\//, $f[3]); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
133 if ($f[$ref] ne $all[0] && $f[$ref] ne $all[1]) { next; } #must match one |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
134 if ($f[$ref] eq $all[1]) { #switch indexes |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
135 $a = 1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
136 $b = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
137 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
138 my @fr = split(/,/, $f[5]); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
139 $freq = $fr[$a]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
140 $freq2 = $fr[$b]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
141 my @s = split(/,/, $f[6]); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
142 $sc = $s[$b]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
143 $alt = $all[$b]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
144 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
145 #print initial columns |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
146 print "$f[0]\t$f[1]\t$f[$ref]\t$alt\t-1"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
147 #pad for other individuals if needed |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
148 my $i = 5; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
149 while ($i < $colcnt) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
150 print "\t-1\t-1\t-1\t-1"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
151 $i += 4; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
152 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
153 #add new columns |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
154 print "\t$freq\t$freq2\t$g\t$sc\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
155 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
156 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
157 close FH or die "Couldn't close $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
158 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
159 #if adding to a snp table, now we need to finish those not in the latest set |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
160 foreach my $k (keys %old) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
161 if ($old{$k} ne '') { #not printed yet |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
162 print $old{$k}, "\t-1\t-1\t-1\t-1\n"; #plus blank for this one |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
163 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
164 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
165 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
166 exit; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
167 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
168 #parse old header and add or create new |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
169 sub prepHeader { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
170 if (!$build) { $build = 'hg19'; } #set default |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
171 my @cnames; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
172 my @ind; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
173 my $n; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
174 if (@head) { #parse previous header |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
175 my $h = join("", @head); #may split between lines |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
176 if ($h =~ /"column_names":\[(.*?)\]/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
177 my @t = split(/,/, $1); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
178 foreach (@t) { s/"//g; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
179 @cnames = @t; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
180 $n = $cnames[$#cnames]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
181 $n =~ s/Q//; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
182 $n++; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
183 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
184 if ($h =~ /"dbkey":"(.*?)"/) { $build = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
185 if ($h =~ /"individuals":\[(.*)\]/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
186 my $t = $1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
187 $t =~ s/\]\].*/]/; #remove if there is more categories |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
188 @ind = split(/,/, $t); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
189 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
190 }else { #start new header |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
191 @cnames = ("chr", "pos", "A", "B", "Q"); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
192 $n = 1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
193 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
194 #add current |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
195 if (!$name) { $name= 'na'; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
196 my $stcol = $colcnt + 1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
197 if ($stcol == 1) { $stcol = 6; } #move past initial columns |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
198 push(@ind, "[\"$name\",$stcol]"); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
199 push(@cnames, "${n}A", "${n}B", "${n}G", "${n}Q"); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
200 #reassign head |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
201 undef @head; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
202 foreach (@cnames) { $_ = "\"$_\""; } #quote name |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
203 $head[0] = "#{\"column_names\":[" . join(",", @cnames) . "],"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
204 $head[1] = "#\"individuals\":[" . join(",", @ind) . "],"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
205 $head[2] = "#\"dbkey\":\"$build\",\"pos\":2,\"rPos\":2,\"ref\":1,\"scaffold\":1,\"species\":\"$build\"}"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
206 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
207 ####End |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
208 |