annotate PDAUG_Sequence_Network/PDAUG_Sequence_Network.py @ 7:44df12617f37 draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 32b9c48c81639a81be24bb3e2f48dc0a81c0deca"
author jay
date Sun, 09 Jan 2022 03:33:52 +0000
parents e59674e3a391
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
1 import Levenshtein
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
2 import matplotlib.pyplot as plt
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
3 import networkx as nx
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
4 import os
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
5
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
6
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
7 def SeqSimilarityNetwork(InFile, OutFile):
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
8
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
9 f = open(InFile)
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
10 lines = f.readlines()
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
11
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
12 record = []
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
13 seq = []
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
14
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
15 G = nx.Graph()
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
16
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
17 for line in lines:
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
18
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
19 if ">" in line:
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
20 record.append(line.strip('\n'))
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
21 else:
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
22 seq.append(line.strip('\n'))
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
23
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
24 for x, i in enumerate(seq):
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
25 for X, I in enumerate(seq):
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
26 L = Levenshtein.ratio(i, I )
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
27 if L >= 0.4:
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
28 G.add_edge(record[x], record[X], weight=float(Levenshtein.ratio(i, I )))
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
29
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
30 elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] >= 0.4]
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
31
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
32 pos = nx.spring_layout(G)
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
33 nx.draw_networkx_nodes(G, pos, node_size=10)
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
34 nx.draw_networkx_edges(G, pos, edgelist=elarge,width=1)
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
35 plt.axis('off')
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
36
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
37 plt.savefig(OutFile)
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
38
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
39
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
40
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
41 if __name__=="__main__":
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
42
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
43 import argparse
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
44
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
45 parser = argparse.ArgumentParser()
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
46
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
47 parser.add_argument("-I", "--InFile", required=True, default=None, help="Path to target tsv file")
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
48 parser.add_argument("-O","--OutFile", required=False, help="HTML out file", default="out.png")
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
49 args = parser.parse_args()
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
50
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
51 SeqSimilarityNetwork(args.InFile, args.OutFile)
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
52
e59674e3a391 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
jay
parents:
diff changeset
53