comparison vcf2hrdetect.py @ 6:ad8853ee9909 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
author artbio
date Sun, 14 Feb 2021 23:59:42 +0000
parents
children 7dcf61950215
comparison
equal deleted inserted replaced
5:bd4135caa3fa 6:ad8853ee9909
1 import sys
2
3 handle = open(sys.argv[1], 'r')
4 vcfdict = dict()
5 tabdict = dict()
6 for line in handle:
7 if line[0] == "#":
8 continue
9 else:
10 tabline = line[:-1].split("\t")
11 vcfdict[tabline[2]] = tabline
12 for id in vcfdict.keys():
13 if "_1" in id:
14 newid = id[:-2]
15 pointbreak = vcfdict[id][4]
16 if "]" in pointbreak:
17 coordbreak = pointbreak.split("]")[1].split(":")[1]
18 chrom = pointbreak.split("]")[1].split(":")[0]
19 elif "[" in pointbreak:
20 coordbreak = pointbreak.split("[")[1].split(":")[1]
21 chrom = pointbreak.split("[")[1].split(":")[0]
22 if vcfdict[id][0] == chrom:
23 tabdict[newid] = [chrom, vcfdict[id][1], chrom, coordbreak, "INV"]
24 else:
25 tabdict[newid] = [vcfdict[id][0], vcfdict[id][1],
26 chrom, coordbreak, "TRA"]
27 for id in list(vcfdict):
28 if "_" in id:
29 del(vcfdict[id])
30 for id in vcfdict.keys(): # only sv that are not of type TRA or INV
31 chr1 = vcfdict[id][0]
32 chr2 = vcfdict[id][0]
33 pos1 = vcfdict[id][1]
34 pos2 = vcfdict[id][7].split("END=")[1].split(";")[0]
35 type = vcfdict[id][7].split("SVTYPE=")[1].split(";")[0]
36 tabdict[id] = [chr1, pos1, chr2, pos2, type]
37 out = open(sys.argv[2], 'w')
38 out.write("chr1\tpos1\tchr2\tpos2\ttype\n")
39 for key in tabdict:
40 line = "\t".join(tabdict[key]) + "\n"
41 out.write(line)