annotate Contra/scripts/vcf_out.py @ 0:7564f3b1e675

Uploaded
author fcaramia
date Thu, 13 Sep 2012 02:31:43 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
1 # ----------------------------------------------------------------------#
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
2 # Copyright (c) 2011, Richard Lupat & Jason Li.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
3 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
4 # > Source License <
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
5 # This file is part of CONTRA.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
6 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
7 # CONTRA is free software: you can redistribute it and/or modify
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
8 # it under the terms of the GNU General Public License as published by
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
10 # (at your option) any later version.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
11 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
12 # CONTRA is distributed in the hope that it will be useful,
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
15 # GNU General Public License for more details.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
16 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
17 # You should have received a copy of the GNU General Public License
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
19 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
20 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
21 #-----------------------------------------------------------------------#
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
22 # Last Updated : 09 Apr 2011 11:00AM
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
23
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
24 def vcf_out(inF, outF):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
25 import math
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
26 f = file.readlines(open(inF))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
27 vcf = open(outF, "w")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
28
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
29 #header
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
30 vcf.write("##fileformat=VCFv4.0\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
31 vcf.write("##reference=1000GenomesPilot-NCBI36\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
32 vcf.write('##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"\n')
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
33 vcf.write('##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"\n')
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
34 vcf.write('##ALT=<ID=CNV,Description="Copy number variable region"\n')
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
35 vcf.write("#CHROM \tPOS \tID \tREF \tALT \tQUAL \tFILTER \tINFO\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
36
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
37 count = 0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
38 while count < len(f):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
39 if (count % 2 == 0):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
40 region = f[count].strip(">\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
41 region = region.split(":")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
42 chr = region[0]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
43
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
44 adjPVal = float(region[2])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
45 if adjPVal <= 0:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
46 adjPVal = 0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
47 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
48 adjPVal = -10 * math.log(adjPVal, 10)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
49 adjPVal = str(round(adjPVal,3))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
50 region[1] = region[1].split("-")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
51 start = region[1][0]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
52 end = region[1][1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
53 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
54 ref = f[count].strip("\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
55 vcf.write(chr +"\t"+ start + "\t" + "." + "\t" + ref + "\t")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
56 vcf.write("<CNV>" + "\t" + adjPVal + "\t" + "PASS" + "\t")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
57 vcf.write("SVTYPE=CNV;END="+ end + "\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
58 count += 1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
59
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
60 vcf.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
61
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
62
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
63
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
64