0
|
1 # ----------------------------------------------------------------------#
|
|
2 # Copyright (c) 2011, Richard Lupat & Jason Li.
|
|
3 #
|
|
4 # > Source License <
|
|
5 # This file is part of CONTRA.
|
|
6 #
|
|
7 # CONTRA is free software: you can redistribute it and/or modify
|
|
8 # it under the terms of the GNU General Public License as published by
|
|
9 # the Free Software Foundation, either version 3 of the License, or
|
|
10 # (at your option) any later version.
|
|
11 #
|
|
12 # CONTRA is distributed in the hope that it will be useful,
|
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 # GNU General Public License for more details.
|
|
16 #
|
|
17 # You should have received a copy of the GNU General Public License
|
|
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
|
|
19 #
|
|
20 #
|
|
21 #-----------------------------------------------------------------------#
|
|
22 # Last Updated : 09 Apr 2011 11:00AM
|
|
23
|
|
24 def vcf_out(inF, outF):
|
|
25 import math
|
|
26 f = file.readlines(open(inF))
|
|
27 vcf = open(outF, "w")
|
|
28
|
|
29 #header
|
|
30 vcf.write("##fileformat=VCFv4.0\n")
|
|
31 vcf.write("##reference=1000GenomesPilot-NCBI36\n")
|
|
32 vcf.write('##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"\n')
|
|
33 vcf.write('##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"\n')
|
|
34 vcf.write('##ALT=<ID=CNV,Description="Copy number variable region"\n')
|
|
35 vcf.write("#CHROM \tPOS \tID \tREF \tALT \tQUAL \tFILTER \tINFO\n")
|
|
36
|
|
37 count = 0
|
|
38 while count < len(f):
|
|
39 if (count % 2 == 0):
|
|
40 region = f[count].strip(">\n")
|
|
41 region = region.split(":")
|
|
42 chr = region[0]
|
|
43
|
|
44 adjPVal = float(region[2])
|
|
45 if adjPVal <= 0:
|
|
46 adjPVal = 0
|
|
47 else:
|
|
48 adjPVal = -10 * math.log(adjPVal, 10)
|
|
49 adjPVal = str(round(adjPVal,3))
|
|
50 region[1] = region[1].split("-")
|
|
51 start = region[1][0]
|
|
52 end = region[1][1]
|
|
53 else:
|
|
54 ref = f[count].strip("\n")
|
|
55 vcf.write(chr +"\t"+ start + "\t" + "." + "\t" + ref + "\t")
|
|
56 vcf.write("<CNV>" + "\t" + adjPVal + "\t" + "PASS" + "\t")
|
|
57 vcf.write("SVTYPE=CNV;END="+ end + "\n")
|
|
58 count += 1
|
|
59
|
|
60 vcf.close()
|
|
61
|
|
62
|
|
63
|
|
64
|