diff vcf2hrdetect.py @ 6:ad8853ee9909 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
author artbio
date Sun, 14 Feb 2021 23:59:42 +0000
parents
children 7dcf61950215
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcf2hrdetect.py	Sun Feb 14 23:59:42 2021 +0000
@@ -0,0 +1,41 @@
+import sys
+
+handle = open(sys.argv[1], 'r')
+vcfdict = dict()
+tabdict = dict()
+for line in handle:
+    if line[0] == "#":
+        continue
+    else:
+        tabline = line[:-1].split("\t")
+        vcfdict[tabline[2]] = tabline
+for id in vcfdict.keys():
+    if "_1" in id:
+        newid = id[:-2]
+        pointbreak = vcfdict[id][4]
+        if "]" in pointbreak:
+            coordbreak = pointbreak.split("]")[1].split(":")[1]
+            chrom = pointbreak.split("]")[1].split(":")[0]
+        elif "[" in pointbreak:
+            coordbreak = pointbreak.split("[")[1].split(":")[1]
+            chrom = pointbreak.split("[")[1].split(":")[0]
+        if vcfdict[id][0] == chrom:
+            tabdict[newid] = [chrom, vcfdict[id][1], chrom, coordbreak, "INV"]
+        else:
+            tabdict[newid] = [vcfdict[id][0], vcfdict[id][1],
+                              chrom, coordbreak, "TRA"]
+for id in list(vcfdict):
+    if "_" in id:
+        del(vcfdict[id])
+for id in vcfdict.keys():  # only sv that are not of type TRA or INV
+    chr1 = vcfdict[id][0]
+    chr2 = vcfdict[id][0]
+    pos1 = vcfdict[id][1]
+    pos2 = vcfdict[id][7].split("END=")[1].split(";")[0]
+    type = vcfdict[id][7].split("SVTYPE=")[1].split(";")[0]
+    tabdict[id] = [chr1, pos1, chr2, pos2, type]
+out = open(sys.argv[2], 'w')
+out.write("chr1\tpos1\tchr2\tpos2\ttype\n")
+for key in tabdict:
+    line = "\t".join(tabdict[key]) + "\n"
+    out.write(line)