Mercurial > repos > fubar > jbrowse2
annotate filter_multihit_paf.py @ 129:d08080933718 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 6d8a6a308c219c112dbfc09fe48ad462746d6fb0
author | fubar |
---|---|
date | Mon, 07 Oct 2024 08:55:19 +0000 |
parents | fbabf7498471 |
children |
rev | line source |
---|---|
127
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
1 # bed for each multimatch paf |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
2 # idea from https://github.com/marbl/MashMap/blob/master/scripts/denovo_repeat_annotation.py |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
3 # adds filter for >1 match and #matches as a score |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
4 # ross lazarus october 6 2024 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
5 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
6 from os import sys |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
7 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
8 CHROMOSOMECOL1 = 0 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
9 STARTCOL1 = 2 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
10 ENDCOL1 = 3 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
11 STRAND = 4 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
12 CHROMOSOMECOL2 = 5 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
13 STARTCOL2 = 7 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
14 ENDCOL2 = 8 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
15 IDENTITY = 9 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
16 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
17 hitTable1 = {} |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
18 hitTable2 = {} |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
19 hitTable1_lens = {} |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
20 repeatList = [] |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
21 filterLen = 1 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
22 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
23 with open(sys.argv[1]) as f: |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
24 for line in f: |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
25 rowElements = line.split() |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
26 chromosome1 = rowElements[CHROMOSOMECOL1] |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
27 start1 = int(rowElements[STARTCOL1]) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
28 end1 = int(rowElements[ENDCOL1]) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
29 strand = rowElements[STRAND] |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
30 chromosome2 = rowElements[CHROMOSOMECOL2] |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
31 start2 = int(rowElements[STARTCOL2]) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
32 end2 = int(rowElements[ENDCOL2]) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
33 identity = float(rowElements[IDENTITY]) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
34 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
35 if chromosome1 != chromosome2 or ( |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
36 abs(start1 - start2) >= 1.5 * int(sys.argv[2]) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
37 and abs(end1 - end2) >= 1.5 * int(sys.argv[2]) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
38 ): |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
39 if end1 - start1 + 1 >= int(sys.argv[2]): ## and identity + 1 >= float(sys.argv[2]): # added one to identity for sensitivity |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
40 h1key = "%s~%d" % (chromosome1, start1) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
41 h2key = "%s~%d" % (chromosome2, start2) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
42 if hitTable1.get(h1key, None): |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
43 hitTable1[h1key].append(h2key) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
44 hitTable1_lens[h1key] = abs(end1 - start1) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
45 else: |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
46 hitTable1[h1key] = [ |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
47 h2key, |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
48 ] |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
49 if hitTable2.get(h2key, None): |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
50 hitTable2[h2key].append(h1key) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
51 else: |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
52 hitTable2[h2key] = [ |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
53 h1key, |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
54 ] |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
55 else: |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
56 print(line) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
57 for k in hitTable1.keys(): |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
58 print(k) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
59 nk1 = len(hitTable1[k]) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
60 nk2 = 0 |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
61 l2 = [] |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
62 for i, k2 in enumerate(hitTable1[k]): |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
63 k2l = hitTable2.get(k2,[]) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
64 if len(k2l) > 1: |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
65 nk2 += len(k2l) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
66 l2.append(','.join(k2l)) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
67 if nk1 > 1 or nk2 > 1: |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
68 print(hitTable1[k], '->', ','.join(l2)) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
69 (chr, start) = k.split("~") |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
70 end = int(start) + hitTable1_lens.get(k,0) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
71 repeatList.append((chr, start, "%d" % end, k, "%d" % (nk1 + nk2))) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
72 with open(sys.argv[3], 'w') as f: |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
73 for row in repeatList: |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
74 f.write("\t".join(row)) |
fbabf7498471
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 116b1a4bbd62251ad552306df2dc8aa8f46c6721
fubar
parents:
diff
changeset
|
75 f.write("\n") |