Mercurial > repos > galaxyp > proteore_venn_diagram
annotate venn_diagram.py @ 0:57f01ca855cd draft default tip
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
author | galaxyp |
---|---|
date | Sat, 12 Jun 2021 18:06:28 +0000 |
parents | |
children |
rev | line source |
---|---|
0
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
2 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
3 import argparse |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
4 import csv |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
5 import json |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
6 import os |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
7 import re |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
8 from itertools import combinations |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
9 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
10 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
12 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
13 ######################################################################## |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
14 # FUNCTIONS |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
15 ######################################################################## |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
16 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
17 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
18 def isnumber(format, n): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
19 """ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
20 Check if an element is integer or float |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
21 """ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
22 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
23 int_format = re.compile(r"^[-]?[1-9][0-9]*$") |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
24 test = "" |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
25 if format == "int": |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
26 test = re.match(int_format, n) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
27 elif format == "float": |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
28 test = re.match(float_format, n) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
29 if test: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
30 return True |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
31 else: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
32 return False |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
33 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
34 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
35 def input_to_dict(inputs): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
36 """ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
37 Parse input and return a dictionary of name and data of each lists/files |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
38 """ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
39 comp_dict = {} |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
40 title_dict = {} |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
41 c = ["A", "B", "C", "D", "E", "F"] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
42 for i in range(len(inputs)): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
43 input_file = inputs[i][0] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
44 name = inputs[i][1] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
45 input_type = inputs[i][2] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
46 title = c[i] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
47 title_dict[title] = name |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
48 ids = set() |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
49 if input_type == "file": |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
50 header = inputs[i][3] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
51 ncol = inputs[i][4] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
52 with open(input_file, "r") as handle: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
53 file_content = csv.reader(handle, delimiter="\t") |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
54 file_content = list(file_content) # csv object to list |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
55 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
56 # Check if column number is in right form |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
57 if isnumber("int", ncol.replace("c", "")): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
58 if header == "true": |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
59 # gets ids from defined column |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
60 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
61 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
62 else: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
63 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
64 else: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
65 raise ValueError("Please fill in the right format of column number") # noqa 501 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
66 else: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
67 ids = set() |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
68 file_content = inputs[i][0].split() |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
69 file_content = [x.split(";") for x in file_content] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
70 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
71 # flat list of list of lists, remove empty items |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
72 file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] # noqa 501 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
73 ids.update(file_content) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
74 if 'NA' in ids: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
75 ids.remove('NA') |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
76 comp_dict[title] = ids |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
77 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
78 return comp_dict, title_dict |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
79 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
80 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
81 def intersect(comp_dict): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
82 """ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
83 Calculate the intersections of input |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
84 """ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
85 names = set(comp_dict) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
86 for i in range(1, len(comp_dict) + 1): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
87 for group in combinations(sorted(comp_dict), i): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
88 others = set() |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
89 [others.add(name) for name in names if name not in group] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
90 difference = [] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
91 intersected = set.intersection(*(comp_dict[k] for k in group)) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
92 if len(others) > 0: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
93 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) # noqa 501 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
94 yield group, list(intersected), list(difference) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
95 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
96 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
97 def diagram(comp_dict, title_dict): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
98 """ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
99 Create json string for jvenn diagram plot |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
100 """ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
101 result = {} |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
102 result["name"] = {} |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
103 for k in comp_dict.keys(): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
104 result["name"][k] = title_dict[k] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
105 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
106 result["data"] = {} |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
107 result["values"] = {} |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
108 for group, intersected, difference in intersect(comp_dict): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
109 if len(group) == 1: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
110 result["data"]["".join(group)] = sorted(difference) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
111 result["values"]["".join(group)] = len(difference) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
112 elif len(group) > 1 and len(group) < len(comp_dict): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
113 result["data"]["".join(group)] = sorted(difference) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
114 result["values"]["".join(group)] = len(difference) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
115 elif len(group) == len(comp_dict): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
116 result["data"]["".join(group)] = sorted(intersected) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
117 result["values"]["".join(group)] = len(intersected) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
118 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
119 return result |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
120 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
121 # Write intersections of input to text output file |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
122 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
123 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
124 def write_text_venn(json_result): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
125 lines = [] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
126 result = dict((k, v) for k, v in json_result["data"].items() if v != []) # noqa 501 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
127 for key in result: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
128 if 'NA' in result[key]: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
129 result[key].remove("NA") |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
130 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
131 list_names = dict((k, v) for k, v in json_result["name"].items() if v != []) # noqa 501 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
132 nb_lines_max = max(len(v) for v in result.values()) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
133 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
134 # get list names associated to each column |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
135 column_dict = {} |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
136 for key in result: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
137 if key in list_names: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
138 column_dict[key] = list_names[key] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
139 else: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
140 keys = list(key) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
141 column_dict[key] = "_".join([list_names[k] for k in keys]) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
142 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
143 # construct tsv |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
144 for key in result: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
145 line = result[key] |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
146 if len(line) < nb_lines_max: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
147 line.extend([''] * (nb_lines_max - len(line))) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
148 line = [column_dict[key]] + line # add header |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
149 lines.append(line) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
150 # transpose tsv |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
151 lines = zip(*lines) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
152 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
153 with open("venn_diagram_text_output.tsv", "w") as output: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
154 tsv_output = csv.writer(output, delimiter='\t') |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
155 tsv_output.writerows(lines) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
156 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
157 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
158 def write_summary(summary_file, inputs): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
159 """ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
160 Paste json string into template file |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
161 """ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
162 a, b = input_to_dict(inputs) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
163 data = diagram(a, b) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
164 write_text_venn(data) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
165 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
166 to_replace = { |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
167 "series": [data], |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
168 "displayStat": "true", |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
169 "displaySwitch": "true", |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
170 "shortNumber": "true", |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
171 } |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
172 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
173 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html")) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
174 FH_summary_out = open(summary_file, "w") |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
175 for line in FH_summary_tpl: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
176 if "###JVENN_DATA###" in line: |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
177 line = line.replace("###JVENN_DATA###", json.dumps(to_replace)) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
178 FH_summary_out.write(line) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
179 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
180 FH_summary_out.close() |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
181 FH_summary_tpl.close() |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
182 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
183 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
184 def process(args): |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
185 write_summary(args.summary, args.input) |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
186 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
187 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
188 ##################################################################### |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
189 # MAIN |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
190 ##################################################################### |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
191 if __name__ == '__main__': |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
192 # Parse parameters |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
193 parser = argparse.ArgumentParser(description='Filters an abundance file') |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
194 group_input = parser.add_argument_group('Inputs') |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
195 group_input.add_argument('--input', nargs="+", action="append", |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
196 required=True, help="The input tabular file.") |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
197 group_output = parser.add_argument_group('Outputs') |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
198 group_output.add_argument('--summary', default="summary.html", |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
199 help="The HTML file containing the graphs. \ |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
200 [Default: %(default)s]") |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
201 args = parser.parse_args() |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
202 |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
203 # Process |
57f01ca855cd
"planemo upload commit 47d779aa1de5153673ac8bb1e37c9730210cbb5d"
galaxyp
parents:
diff
changeset
|
204 process(args) |