Mercurial > repos > artbio > lumpy_smoove
annotate vcf2hrdetect.py @ 8:b4dec06d8fc6 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 51dc6c56c7d95fc229ffee958354211cd454fd36"
author | artbio |
---|---|
date | Sun, 09 May 2021 17:09:33 +0000 |
parents | ad8853ee9909 |
children | 7dcf61950215 |
rev | line source |
---|---|
6
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
1 import sys |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
2 |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
3 handle = open(sys.argv[1], 'r') |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
4 vcfdict = dict() |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
5 tabdict = dict() |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
6 for line in handle: |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
7 if line[0] == "#": |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
8 continue |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
9 else: |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
10 tabline = line[:-1].split("\t") |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
11 vcfdict[tabline[2]] = tabline |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
12 for id in vcfdict.keys(): |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
13 if "_1" in id: |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
14 newid = id[:-2] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
15 pointbreak = vcfdict[id][4] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
16 if "]" in pointbreak: |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
17 coordbreak = pointbreak.split("]")[1].split(":")[1] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
18 chrom = pointbreak.split("]")[1].split(":")[0] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
19 elif "[" in pointbreak: |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
20 coordbreak = pointbreak.split("[")[1].split(":")[1] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
21 chrom = pointbreak.split("[")[1].split(":")[0] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
22 if vcfdict[id][0] == chrom: |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
23 tabdict[newid] = [chrom, vcfdict[id][1], chrom, coordbreak, "INV"] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
24 else: |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
25 tabdict[newid] = [vcfdict[id][0], vcfdict[id][1], |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
26 chrom, coordbreak, "TRA"] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
27 for id in list(vcfdict): |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
28 if "_" in id: |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
29 del(vcfdict[id]) |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
30 for id in vcfdict.keys(): # only sv that are not of type TRA or INV |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
31 chr1 = vcfdict[id][0] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
32 chr2 = vcfdict[id][0] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
33 pos1 = vcfdict[id][1] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
34 pos2 = vcfdict[id][7].split("END=")[1].split(";")[0] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
35 type = vcfdict[id][7].split("SVTYPE=")[1].split(";")[0] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
36 tabdict[id] = [chr1, pos1, chr2, pos2, type] |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
37 out = open(sys.argv[2], 'w') |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
38 out.write("chr1\tpos1\tchr2\tpos2\ttype\n") |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
39 for key in tabdict: |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
40 line = "\t".join(tabdict[key]) + "\n" |
ad8853ee9909
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff
changeset
|
41 out.write(line) |