annotate gd_snp2vcf.pl @ 3:edf12470a1a6 default tip

Bugfix from Belinda, in vcf2pgSnp.pl
author Cathy Riemer <cathy+hg@bx.psu.edu>
date Thu, 19 Mar 2015 12:06:34 -0400
parents 35c20b109be5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
1 #!/usr/bin/perl -w
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
2 use strict;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
3
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
4 #convert from gd_snp file to vcf file (with dbSNP fields)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
5
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
6 #gd_snp table format:
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
7 #1. chr
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
8 #2. position (0 based)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
9 #3. ref allele
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
10 #4. second allele
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
11 #5. overall quality
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
12 #foreach individual (6-9, 10-13, ...)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
13 #a. count of allele in 3
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
14 #b. count of allele in 4
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
15 #c. genotype call (-1, or count of ref allele)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
16 #d. quality of genotype call (quality of non-ref allele from masterVar)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
17
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
18 if (!@ARGV) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
19 print "usage: gd_snp2vcf.pl file.gd_snp[.gz|.bz2] -geno=8[,12:16,20...] -handle=HANDLE -batch=BATCHNAME -ref=REFERENCEID [-bioproj=XYZ -biosamp=ABC -pop=POPID[,POPID2...] -chrCol=9 -posCol=9 ] > snpsForSubmission.vcf\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
20 exit;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
21 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
22
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
23 my $in = shift @ARGV;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
24 my $genoCols = '';
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
25 my $handle;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
26 my $batch;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
27 my $bioproj;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
28 my $biosamp;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
29 my $ref;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
30 my $pop;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
31 my $cr = 0; #allow to use alternate reference?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
32 my $cp = 1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
33 my $meta;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
34 foreach (@ARGV) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
35 if (/-geno=([0-9,]+)/) { $genoCols .= "$1:"; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
36 elsif (/-handle=(.*)/) { $handle = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
37 elsif (/-batch=(.*)/) { $batch = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
38 elsif (/-bioproj=(.*)/) { $bioproj = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
39 elsif (/-biosamp=(.*)/) { $biosamp = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
40 elsif (/-ref=(.*)/) { $ref = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
41 elsif (/-population=(\S+)/) { $pop = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
42 elsif (/-chrCol=(\d+)/) { $cr = $1 - 1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
43 elsif (/-posCol=(\d+)/) { $cp = $1 - 1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
44 elsif (/-metaOut=(.*)/) { $meta = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
45 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
46 if ($cr < 0 or $cp < 0) { die "ERROR the column numbers should be 1 based.\n"; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
47
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
48 #remove trailing delimiters
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
49 $genoCols =~ s/,:/:/g;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
50 $genoCols =~ s/[,:]$//;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
51
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
52 my @gnc = split(/,|:/, $genoCols);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
53
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
54 if ($in =~ /.gz$/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
55 open(FH, "zcat $in |") or die "Couldn't open $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
56 }elsif ($in =~ /.bz2$/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
57 open(FH, "bzcat $in |") or die "Couldn't open $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
58 }else {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
59 open(FH, $in) or die "Couldn't open $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
60 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
61 my @head = prepHeader();
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
62 if (@head) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
63 print join("\n", @head), "\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
64 #now column headers
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
65 print "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
66 if (defined $pop) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
67 $pop =~ s/,$//;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
68 my $t = $pop;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
69 $t =~ s/,/\t/g;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
70 print "\tFORMAT\t$t";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
71 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
72 print "\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
73 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
74 while (<FH>) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
75 chomp;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
76 if (/^#/) { next; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
77 if (/^\s*$/) { next; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
78 my @f = split(/\t/);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
79 #vcf columns: chrom pos id ref alt qual filter info
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
80 # info must have VRT=[0-9] 1==SNV 2=indel 6=NoVariation 8=MNV ...
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
81 my $vrt = 1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
82 if ($f[2] !~ /^[ACTG]$/ or $f[3] !~ /^[ACTG]$/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
83 die "Sorry this can only do SNV's at this time\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
84 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
85 if (scalar @gnc == 1) { #single genotype column
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
86 if (!defined $f[4] or $f[4] == -1) { $f[4] = '.'; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
87 if ($f[$gnc[0]-1] == 2) { $vrt = 6; } #reference match
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
88 print "$f[$cr]\t$f[$cp]\t$f[$cr];$f[$cp]\t$f[2]\t$f[3]\t$f[4]\t.\tVRT=$vrt\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
89 #TODO? put read counts in comment?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
90 }elsif ($pop) { #do as population
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
91 my @cols;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
92 foreach my $gp (split(/:/,$genoCols)) { #foreach population
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
93 my @g = split(/,/, $gp);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
94 my $totChrom = 2*(scalar @g);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
95 my $totRef = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
96 foreach my $i (@g) { if ($f[$i-1] == -1) { next; } $totRef += $f[$i-1]; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
97 if ($totChrom == $totRef) { $vrt = 6; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
98 if ($totRef > $totChrom) { die "ERROR likely the wrong column was chosen for genotype\n"; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
99 my $altCnt = $totChrom - $totRef;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
100 push(@cols, "$totChrom:$altCnt");
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
101 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
102 print "$f[$cr]\t$f[$cp]\t$f[$cr];$f[$cp]\t$f[2]\t$f[3]\t$f[4]\t.\tVRT=$vrt\tNA:AC\t", join("\t", @cols), "\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
103 }else { #leave allele counts off
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
104 my $totChrom = 2*(scalar @gnc);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
105 my $totRef = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
106 foreach my $i (@gnc) { if ($f[$i-1] == -1) { next; } $totRef += $f[$i-1]; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
107 if ($totChrom == $totRef) { $vrt = 6; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
108 print "$f[$cr]\t$f[$cp]\t$f[$cr];$f[$cp]\t$f[2]\t$f[3]\t$f[4]\t.\tVRT=$vrt\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
109 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
110 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
111 close FH or die "Couldn't close $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
112
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
113 if ($meta) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
114 open(FH, ">", $meta) or die "Couldn't open $meta, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
115 print FH "TYPE: CONT\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
116 "HANDLE: $handle\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
117 "NAME: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
118 "FAX: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
119 "TEL: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
120 "EMAIL: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
121 "LAB: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
122 "INST: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
123 "ADDR: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
124 "||\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
125 "TYPE: METHOD\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
126 "HANDLE: $handle\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
127 "ID: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
128 "METHOD_CLASS: Sequence\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
129 "TEMPLATE_TYPE: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
130 "METHOD:\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
131 "||\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
132 if ($pop) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
133 my @p = split(/,/, $pop);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
134 foreach my $t (@p) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
135 print FH
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
136 "TYPE: POPULATION\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
137 "HANDLE: $handle\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
138 "ID: $t\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
139 "POPULATION: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
140 "||\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
141 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
142 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
143 print FH "TYPE: SNPASSAY\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
144 "HANDLE: $handle\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
145 "BATCH: $batch\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
146 "MOLTYPE: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
147 "METHOD: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
148 "ORGANISM: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
149 "||\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
150 "TYPE: SNPPOPUSE | SNPINDUSE\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
151 "HANDLE: $handle\n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
152 "BATCH: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
153 "METHOD: \n",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
154 "||\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
155
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
156 close FH or die "Couldn't close $meta, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
157 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
158
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
159 exit 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
160
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
161 #parse old header and add or create new
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
162 sub prepHeader {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
163 my @h;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
164 $h[0] = '##fileformat=VCFv4.1';
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
165 my ($day, $mo, $yr) = (localtime)[3,4,5];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
166 $mo++;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
167 $yr+=1900;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
168 $h[1] = '##fileDate=' . "$yr$mo$day";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
169 $h[2] = "##handle=$handle";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
170 $h[3] = "##batch=$batch";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
171 my $i = 4;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
172 if ($bioproj) { $h[$i] = "##bioproject_id=$bioproj"; $i++; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
173 if ($biosamp) { $h[$i] = "##biosample_id=$biosamp"; $i++; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
174 $h[$i] = "##reference=$ref"; ##reference=GCF_999999.99
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
175 #$i++;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
176 #$h[$i] = '##INFO=<ID=LID, Number=1,Type=string, Description="Unique local variation ID or name for display. The LID provided here combined with the handle must be unique for a particular submitter.">'
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
177 $i++;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
178 $h[$i] = '##INFO=<ID=VRT,Number=1,Type=Integer,Description="Variation type,1 - SNV: single nucleotide variation,2 - DIV: deletion/insertion variation,3 - HETEROZYGOUS: variable, but undefined at nucleotide level,4 - STR: short tandem repeat (microsatellite) variation, 5 - NAMED: insertion/deletion variation of named repetitive element,6 - NO VARIATON: sequence scanned for variation, but none observed,7 - MIXED: cluster contains submissions from 2 or more allelic classes (not used) ,8 - MNV: multiple nucleotide variation with all eles of common length greater than 1,9 - Exception">';
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
179 #sometimes have allele freqs?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
180 if (defined $pop) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
181 $i++;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
182 $h[$i] = "##FORMAT=<ID=NA,Number=1,Type=Integer,Description=\"Number of alleles for the population.\"";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
183 $i++;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
184 $h[$i] = '##FORMAT=<ID=AC,Number=.,Type=Integer,Description="Allele count for each alternate allele.">';
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
185 my @p = split(/,/, $pop);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
186 foreach my $t (@p) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
187 $i++;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
188 $h[$i] = "##population_id=$t";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
189 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
190 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
191 #PMID?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
192 ##INFO=<ID=PMID,Number=.,Type=Integer,Description="PubMed ID linked to variation if available.">
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
193
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
194 return @h;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
195 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
196 ####End
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
197