annotate vcf2hrdetect.py @ 6:ad8853ee9909 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
author artbio
date Sun, 14 Feb 2021 23:59:42 +0000
parents
children 7dcf61950215
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
1 import sys
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
2
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
3 handle = open(sys.argv[1], 'r')
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
4 vcfdict = dict()
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
5 tabdict = dict()
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
6 for line in handle:
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
7 if line[0] == "#":
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
8 continue
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
9 else:
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
10 tabline = line[:-1].split("\t")
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
11 vcfdict[tabline[2]] = tabline
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
12 for id in vcfdict.keys():
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
13 if "_1" in id:
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
14 newid = id[:-2]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
15 pointbreak = vcfdict[id][4]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
16 if "]" in pointbreak:
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
17 coordbreak = pointbreak.split("]")[1].split(":")[1]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
18 chrom = pointbreak.split("]")[1].split(":")[0]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
19 elif "[" in pointbreak:
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
20 coordbreak = pointbreak.split("[")[1].split(":")[1]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
21 chrom = pointbreak.split("[")[1].split(":")[0]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
22 if vcfdict[id][0] == chrom:
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
23 tabdict[newid] = [chrom, vcfdict[id][1], chrom, coordbreak, "INV"]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
24 else:
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
25 tabdict[newid] = [vcfdict[id][0], vcfdict[id][1],
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
26 chrom, coordbreak, "TRA"]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
27 for id in list(vcfdict):
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
28 if "_" in id:
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
29 del(vcfdict[id])
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
30 for id in vcfdict.keys(): # only sv that are not of type TRA or INV
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
31 chr1 = vcfdict[id][0]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
32 chr2 = vcfdict[id][0]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
33 pos1 = vcfdict[id][1]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
34 pos2 = vcfdict[id][7].split("END=")[1].split(";")[0]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
35 type = vcfdict[id][7].split("SVTYPE=")[1].split(";")[0]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
36 tabdict[id] = [chr1, pos1, chr2, pos2, type]
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
37 out = open(sys.argv[2], 'w')
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
38 out.write("chr1\tpos1\tchr2\tpos2\ttype\n")
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
39 for key in tabdict:
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
40 line = "\t".join(tabdict[key]) + "\n"
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
41 out.write(line)