annotate filter_multihit_paf.py @ 129:d08080933718 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 6d8a6a308c219c112dbfc09fe48ad462746d6fb0
author fubar
date Mon, 07 Oct 2024 08:55:19 +0000
parents fbabf7498471
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
127
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
1 # bed for each multimatch paf
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
2 # idea from https://github.com/marbl/MashMap/blob/master/scripts/denovo_repeat_annotation.py
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
3 # adds filter for >1 match and #matches as a score
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
4 # ross lazarus october 6 2024
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
5
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
6 from os import sys
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
7
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
8 CHROMOSOMECOL1 = 0
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
9 STARTCOL1 = 2
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
10 ENDCOL1 = 3
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
11 STRAND = 4
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
12 CHROMOSOMECOL2 = 5
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
13 STARTCOL2 = 7
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
14 ENDCOL2 = 8
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
15 IDENTITY = 9
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
16
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
17 hitTable1 = {}
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
18 hitTable2 = {}
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
19 hitTable1_lens = {}
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
20 repeatList = []
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
21 filterLen = 1
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
22
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
23 with open(sys.argv[1]) as f:
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
24 for line in f:
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
25 rowElements = line.split()
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
26 chromosome1 = rowElements[CHROMOSOMECOL1]
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
27 start1 = int(rowElements[STARTCOL1])
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
28 end1 = int(rowElements[ENDCOL1])
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
29 strand = rowElements[STRAND]
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
30 chromosome2 = rowElements[CHROMOSOMECOL2]
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
31 start2 = int(rowElements[STARTCOL2])
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
32 end2 = int(rowElements[ENDCOL2])
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
33 identity = float(rowElements[IDENTITY])
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
34
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
35 if chromosome1 != chromosome2 or (
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
36 abs(start1 - start2) >= 1.5 * int(sys.argv[2])
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
37 and abs(end1 - end2) >= 1.5 * int(sys.argv[2])
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
38 ):
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
39 if end1 - start1 + 1 >= int(sys.argv[2]): ## and identity + 1 >= float(sys.argv[2]): # added one to identity for sensitivity
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
40 h1key = "%s~%d" % (chromosome1, start1)
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
41 h2key = "%s~%d" % (chromosome2, start2)
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
42 if hitTable1.get(h1key, None):
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
43 hitTable1[h1key].append(h2key)
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
44 hitTable1_lens[h1key] = abs(end1 - start1)
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
45 else:
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
46 hitTable1[h1key] = [
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
47 h2key,
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
48 ]
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
49 if hitTable2.get(h2key, None):
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
50 hitTable2[h2key].append(h1key)
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
51 else:
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
52 hitTable2[h2key] = [
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
53 h1key,
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
54 ]
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
55 else:
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
56 print(line)
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
57 for k in hitTable1.keys():
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
58 print(k)
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
59 nk1 = len(hitTable1[k])
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
60 nk2 = 0
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
61 l2 = []
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
62 for i, k2 in enumerate(hitTable1[k]):
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
63 k2l = hitTable2.get(k2,[])
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
64 if len(k2l) > 1:
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
65 nk2 += len(k2l)
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
66 l2.append(','.join(k2l))
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
67 if nk1 > 1 or nk2 > 1:
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
68 print(hitTable1[k], '->', ','.join(l2))
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
69 (chr, start) = k.split("~")
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
70 end = int(start) + hitTable1_lens.get(k,0)
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
71 repeatList.append((chr, start, "%d" % end, k, "%d" % (nk1 + nk2)))
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
72 with open(sys.argv[3], 'w') as f:
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
73 for row in repeatList:
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
74 f.write("\t".join(row))
fbabf7498471 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff changeset
75 f.write("\n")