Mercurial > repos > miller-lab > snp_analysis_conversion
annotate gd_snp2vcf.pl @ 3:edf12470a1a6 default tip
Bugfix from Belinda, in vcf2pgSnp.pl
author | Cathy Riemer <cathy+hg@bx.psu.edu> |
---|---|
date | Thu, 19 Mar 2015 12:06:34 -0400 |
parents | 35c20b109be5 |
children |
rev | line source |
---|---|
2
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
1 #!/usr/bin/perl -w |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
2 use strict; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
3 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
4 #convert from gd_snp file to vcf file (with dbSNP fields) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
5 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
6 #gd_snp table format: |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
7 #1. chr |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
8 #2. position (0 based) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
9 #3. ref allele |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
10 #4. second allele |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
11 #5. overall quality |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
12 #foreach individual (6-9, 10-13, ...) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
13 #a. count of allele in 3 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
14 #b. count of allele in 4 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
15 #c. genotype call (-1, or count of ref allele) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
16 #d. quality of genotype call (quality of non-ref allele from masterVar) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
17 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
18 if (!@ARGV) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
19 print "usage: gd_snp2vcf.pl file.gd_snp[.gz|.bz2] -geno=8[,12:16,20...] -handle=HANDLE -batch=BATCHNAME -ref=REFERENCEID [-bioproj=XYZ -biosamp=ABC -pop=POPID[,POPID2...] -chrCol=9 -posCol=9 ] > snpsForSubmission.vcf\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
20 exit; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
21 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
22 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
23 my $in = shift @ARGV; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
24 my $genoCols = ''; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
25 my $handle; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
26 my $batch; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
27 my $bioproj; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
28 my $biosamp; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
29 my $ref; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
30 my $pop; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
31 my $cr = 0; #allow to use alternate reference? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
32 my $cp = 1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
33 my $meta; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
34 foreach (@ARGV) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
35 if (/-geno=([0-9,]+)/) { $genoCols .= "$1:"; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
36 elsif (/-handle=(.*)/) { $handle = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
37 elsif (/-batch=(.*)/) { $batch = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
38 elsif (/-bioproj=(.*)/) { $bioproj = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
39 elsif (/-biosamp=(.*)/) { $biosamp = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
40 elsif (/-ref=(.*)/) { $ref = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
41 elsif (/-population=(\S+)/) { $pop = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
42 elsif (/-chrCol=(\d+)/) { $cr = $1 - 1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
43 elsif (/-posCol=(\d+)/) { $cp = $1 - 1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
44 elsif (/-metaOut=(.*)/) { $meta = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
45 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
46 if ($cr < 0 or $cp < 0) { die "ERROR the column numbers should be 1 based.\n"; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
47 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
48 #remove trailing delimiters |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
49 $genoCols =~ s/,:/:/g; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
50 $genoCols =~ s/[,:]$//; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
51 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
52 my @gnc = split(/,|:/, $genoCols); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
53 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
54 if ($in =~ /.gz$/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
55 open(FH, "zcat $in |") or die "Couldn't open $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
56 }elsif ($in =~ /.bz2$/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
57 open(FH, "bzcat $in |") or die "Couldn't open $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
58 }else { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
59 open(FH, $in) or die "Couldn't open $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
60 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
61 my @head = prepHeader(); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
62 if (@head) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
63 print join("\n", @head), "\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
64 #now column headers |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
65 print "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
66 if (defined $pop) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
67 $pop =~ s/,$//; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
68 my $t = $pop; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
69 $t =~ s/,/\t/g; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
70 print "\tFORMAT\t$t"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
71 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
72 print "\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
73 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
74 while (<FH>) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
75 chomp; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
76 if (/^#/) { next; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
77 if (/^\s*$/) { next; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
78 my @f = split(/\t/); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
79 #vcf columns: chrom pos id ref alt qual filter info |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
80 # info must have VRT=[0-9] 1==SNV 2=indel 6=NoVariation 8=MNV ... |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
81 my $vrt = 1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
82 if ($f[2] !~ /^[ACTG]$/ or $f[3] !~ /^[ACTG]$/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
83 die "Sorry this can only do SNV's at this time\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
84 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
85 if (scalar @gnc == 1) { #single genotype column |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
86 if (!defined $f[4] or $f[4] == -1) { $f[4] = '.'; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
87 if ($f[$gnc[0]-1] == 2) { $vrt = 6; } #reference match |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
88 print "$f[$cr]\t$f[$cp]\t$f[$cr];$f[$cp]\t$f[2]\t$f[3]\t$f[4]\t.\tVRT=$vrt\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
89 #TODO? put read counts in comment? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
90 }elsif ($pop) { #do as population |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
91 my @cols; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
92 foreach my $gp (split(/:/,$genoCols)) { #foreach population |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
93 my @g = split(/,/, $gp); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
94 my $totChrom = 2*(scalar @g); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
95 my $totRef = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
96 foreach my $i (@g) { if ($f[$i-1] == -1) { next; } $totRef += $f[$i-1]; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
97 if ($totChrom == $totRef) { $vrt = 6; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
98 if ($totRef > $totChrom) { die "ERROR likely the wrong column was chosen for genotype\n"; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
99 my $altCnt = $totChrom - $totRef; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
100 push(@cols, "$totChrom:$altCnt"); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
101 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
102 print "$f[$cr]\t$f[$cp]\t$f[$cr];$f[$cp]\t$f[2]\t$f[3]\t$f[4]\t.\tVRT=$vrt\tNA:AC\t", join("\t", @cols), "\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
103 }else { #leave allele counts off |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
104 my $totChrom = 2*(scalar @gnc); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
105 my $totRef = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
106 foreach my $i (@gnc) { if ($f[$i-1] == -1) { next; } $totRef += $f[$i-1]; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
107 if ($totChrom == $totRef) { $vrt = 6; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
108 print "$f[$cr]\t$f[$cp]\t$f[$cr];$f[$cp]\t$f[2]\t$f[3]\t$f[4]\t.\tVRT=$vrt\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
109 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
110 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
111 close FH or die "Couldn't close $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
112 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
113 if ($meta) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
114 open(FH, ">", $meta) or die "Couldn't open $meta, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
115 print FH "TYPE: CONT\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
116 "HANDLE: $handle\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
117 "NAME: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
118 "FAX: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
119 "TEL: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
120 "EMAIL: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
121 "LAB: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
122 "INST: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
123 "ADDR: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
124 "||\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
125 "TYPE: METHOD\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
126 "HANDLE: $handle\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
127 "ID: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
128 "METHOD_CLASS: Sequence\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
129 "TEMPLATE_TYPE: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
130 "METHOD:\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
131 "||\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
132 if ($pop) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
133 my @p = split(/,/, $pop); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
134 foreach my $t (@p) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
135 print FH |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
136 "TYPE: POPULATION\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
137 "HANDLE: $handle\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
138 "ID: $t\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
139 "POPULATION: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
140 "||\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
141 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
142 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
143 print FH "TYPE: SNPASSAY\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
144 "HANDLE: $handle\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
145 "BATCH: $batch\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
146 "MOLTYPE: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
147 "METHOD: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
148 "ORGANISM: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
149 "||\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
150 "TYPE: SNPPOPUSE | SNPINDUSE\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
151 "HANDLE: $handle\n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
152 "BATCH: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
153 "METHOD: \n", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
154 "||\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
155 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
156 close FH or die "Couldn't close $meta, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
157 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
158 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
159 exit 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
160 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
161 #parse old header and add or create new |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
162 sub prepHeader { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
163 my @h; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
164 $h[0] = '##fileformat=VCFv4.1'; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
165 my ($day, $mo, $yr) = (localtime)[3,4,5]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
166 $mo++; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
167 $yr+=1900; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
168 $h[1] = '##fileDate=' . "$yr$mo$day"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
169 $h[2] = "##handle=$handle"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
170 $h[3] = "##batch=$batch"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
171 my $i = 4; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
172 if ($bioproj) { $h[$i] = "##bioproject_id=$bioproj"; $i++; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
173 if ($biosamp) { $h[$i] = "##biosample_id=$biosamp"; $i++; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
174 $h[$i] = "##reference=$ref"; ##reference=GCF_999999.99 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
175 #$i++; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
176 #$h[$i] = '##INFO=<ID=LID, Number=1,Type=string, Description="Unique local variation ID or name for display. The LID provided here combined with the handle must be unique for a particular submitter.">' |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
177 $i++; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
178 $h[$i] = '##INFO=<ID=VRT,Number=1,Type=Integer,Description="Variation type,1 - SNV: single nucleotide variation,2 - DIV: deletion/insertion variation,3 - HETEROZYGOUS: variable, but undefined at nucleotide level,4 - STR: short tandem repeat (microsatellite) variation, 5 - NAMED: insertion/deletion variation of named repetitive element,6 - NO VARIATON: sequence scanned for variation, but none observed,7 - MIXED: cluster contains submissions from 2 or more allelic classes (not used) ,8 - MNV: multiple nucleotide variation with all eles of common length greater than 1,9 - Exception">'; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
179 #sometimes have allele freqs? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
180 if (defined $pop) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
181 $i++; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
182 $h[$i] = "##FORMAT=<ID=NA,Number=1,Type=Integer,Description=\"Number of alleles for the population.\""; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
183 $i++; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
184 $h[$i] = '##FORMAT=<ID=AC,Number=.,Type=Integer,Description="Allele count for each alternate allele.">'; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
185 my @p = split(/,/, $pop); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
186 foreach my $t (@p) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
187 $i++; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
188 $h[$i] = "##population_id=$t"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
189 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
190 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
191 #PMID? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
192 ##INFO=<ID=PMID,Number=.,Type=Integer,Description="PubMed ID linked to variation if available."> |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
193 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
194 return @h; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
195 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
196 ####End |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
197 |