Mercurial > repos > proteore > proteore_venn_diagram
comparison venn_diagram.py @ 11:958eb2ea02aa draft default tip
"planemo upload commit 00cab92333b0338cb86ff78b5bbc1b6d26621012-dirty"
author | proteore |
---|---|
date | Wed, 12 May 2021 16:48:05 +0000 |
parents | e744a43171ff |
children |
comparison
equal
deleted
inserted
replaced
10:e744a43171ff | 11:958eb2ea02aa |
---|---|
1 #!/usr/bin/env python2.7 | 1 #!/usr/bin/env python |
2 | 2 |
3 import argparse | |
4 import csv | |
5 import json | |
3 import os | 6 import os |
4 import sys | 7 import re |
5 import json | |
6 import operator | |
7 import argparse | |
8 import re, csv | |
9 from itertools import combinations | 8 from itertools import combinations |
10 | 9 |
10 | |
11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) | 11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) |
12 | 12 |
13 ################################################################################################################################################## | 13 ######################################################################## |
14 # FUNCTIONS | 14 # FUNCTIONS |
15 ################################################################################################################################################## | 15 ######################################################################## |
16 | 16 |
17 | |
17 def isnumber(format, n): | 18 def isnumber(format, n): |
18 """ | 19 """ |
19 Check if an element is integer or float | 20 Check if an element is integer or float |
20 """ | 21 """ |
21 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") | 22 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") |
27 test = re.match(float_format, n) | 28 test = re.match(float_format, n) |
28 if test: | 29 if test: |
29 return True | 30 return True |
30 else: | 31 else: |
31 return False | 32 return False |
32 | 33 |
34 | |
33 def input_to_dict(inputs): | 35 def input_to_dict(inputs): |
34 """ | 36 """ |
35 Parse input and return a dictionary of name and data of each lists/files | 37 Parse input and return a dictionary of name and data of each lists/files |
36 """ | 38 """ |
37 comp_dict = {} | 39 comp_dict = {} |
38 title_dict = {} | 40 title_dict = {} |
39 c = ["A", "B", "C", "D", "E", "F"] | 41 c = ["A", "B", "C", "D", "E", "F"] |
40 for i in range(len(inputs)): | 42 for i in range(len(inputs)): |
41 input_file = inputs[i][0] | 43 input_file = inputs[i][0] |
42 name = inputs[i][1] | 44 name = inputs[i][1] |
43 input_type = inputs[i][2] | 45 input_type = inputs[i][2] |
44 title = c[i] | 46 title = c[i] |
45 title_dict[title] = name | 47 title_dict[title] = name |
46 ids = set() | 48 ids = set() |
47 if input_type == "file": | 49 if input_type == "file": |
48 header = inputs[i][3] | 50 header = inputs[i][3] |
49 ncol = inputs[i][4] | 51 ncol = inputs[i][4] |
50 with open(input_file,"r") as handle : | 52 with open(input_file, "r") as handle: |
51 file_content = csv.reader(handle,delimiter="\t") | 53 file_content = csv.reader(handle, delimiter="\t") |
52 file_content = list(file_content) #csv object to list | 54 file_content = list(file_content) # csv object to list |
53 | 55 |
54 # Check if column number is in right form | 56 # Check if column number is in right form |
55 if isnumber("int", ncol.replace("c", "")): | 57 if isnumber("int", ncol.replace("c", "")): |
56 if header == "true": | 58 if header == "true": |
57 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # gets ids from defined column | 59 # gets ids from defined column |
60 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501 | |
61 | |
58 else: | 62 else: |
59 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] | 63 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501 |
60 else: | 64 else: |
61 raise ValueError("Please fill in the right format of column number") | 65 raise ValueError("Please fill in the right format of column number") # noqa 501 |
62 else: | 66 else: |
63 ids = set() | 67 ids = set() |
64 file_content = inputs[i][0].split() | 68 file_content = inputs[i][0].split() |
65 file_content = [x.split(";") for x in file_content] | 69 file_content = [x.split(";") for x in file_content] |
66 | 70 |
67 file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] #flat list of list of lists, remove empty items | 71 # flat list of list of lists, remove empty items |
72 file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] # noqa 501 | |
68 ids.update(file_content) | 73 ids.update(file_content) |
69 if 'NA' in ids : ids.remove('NA') | 74 if 'NA' in ids: |
75 ids.remove('NA') | |
70 comp_dict[title] = ids | 76 comp_dict[title] = ids |
71 | 77 |
72 return comp_dict, title_dict | 78 return comp_dict, title_dict |
73 | 79 |
80 | |
74 def intersect(comp_dict): | 81 def intersect(comp_dict): |
75 """ | 82 """ |
76 Calculate the intersections of input | 83 Calculate the intersections of input |
77 """ | 84 """ |
78 names = set(comp_dict) | 85 names = set(comp_dict) |
81 others = set() | 88 others = set() |
82 [others.add(name) for name in names if name not in group] | 89 [others.add(name) for name in names if name not in group] |
83 difference = [] | 90 difference = [] |
84 intersected = set.intersection(*(comp_dict[k] for k in group)) | 91 intersected = set.intersection(*(comp_dict[k] for k in group)) |
85 if len(others) > 0: | 92 if len(others) > 0: |
86 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) | 93 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) # noqa 501 |
87 yield group, list(intersected), list(difference) | 94 yield group, list(intersected), list(difference) |
95 | |
88 | 96 |
89 def diagram(comp_dict, title_dict): | 97 def diagram(comp_dict, title_dict): |
90 """ | 98 """ |
91 Create json string for jvenn diagram plot | 99 Create json string for jvenn diagram plot |
92 """ | 100 """ |
93 result = {} | 101 result = {} |
94 result["name"] = {} | 102 result["name"] = {} |
95 for k in comp_dict.keys(): | 103 for k in comp_dict.keys(): |
96 result["name"][k] = title_dict[k] | 104 result["name"][k] = title_dict[k] |
97 | 105 |
98 result["data"] = {} | 106 result["data"] = {} |
99 result["values"] = {} | 107 result["values"] = {} |
100 for group, intersected, difference in intersect(comp_dict): | 108 for group, intersected, difference in intersect(comp_dict): |
101 if len(group) == 1: | 109 if len(group) == 1: |
102 result["data"]["".join(group)] = difference | 110 result["data"]["".join(group)] = sorted(difference) |
103 result["values"]["".join(group)] = len(difference) | 111 result["values"]["".join(group)] = len(difference) |
104 elif len(group) > 1 and len(group) < len(comp_dict): | 112 elif len(group) > 1 and len(group) < len(comp_dict): |
105 result["data"]["".join(group)] = difference | 113 result["data"]["".join(group)] = sorted(difference) |
106 result["values"]["".join(group)] = len(difference) | 114 result["values"]["".join(group)] = len(difference) |
107 elif len(group) == len(comp_dict): | 115 elif len(group) == len(comp_dict): |
108 result["data"]["".join(group)] = intersected | 116 result["data"]["".join(group)] = sorted(intersected) |
109 result["values"]["".join(group)] = len(intersected) | 117 result["values"]["".join(group)] = len(intersected) |
110 | 118 |
111 return result | 119 return result |
112 | 120 |
113 #Write intersections of input to text output file | 121 # Write intersections of input to text output file |
122 | |
123 | |
114 def write_text_venn(json_result): | 124 def write_text_venn(json_result): |
115 lines = [] | 125 lines = [] |
116 result = dict((k, v) for k, v in json_result["data"].iteritems() if v != []) | 126 result = dict((k, v) for k, v in json_result["data"].items() if v != []) # noqa 501 |
117 for key in result : | 127 for key in result: |
118 if 'NA' in result[key] : result[key].remove("NA") | 128 if 'NA' in result[key]: |
119 list_names = dict((k, v) for k, v in json_result["name"].iteritems() if v != []) | 129 result[key].remove("NA") |
130 | |
131 list_names = dict((k, v) for k, v in json_result["name"].items() if v != []) # noqa 501 | |
120 nb_lines_max = max(len(v) for v in result.values()) | 132 nb_lines_max = max(len(v) for v in result.values()) |
121 | 133 |
122 #get list names associated to each column | 134 # get list names associated to each column |
123 column_dict = {} | 135 column_dict = {} |
124 for key in result : | 136 for key in result: |
125 if key in list_names : | 137 if key in list_names: |
126 column_dict[key] = list_names[key] | 138 column_dict[key] = list_names[key] |
127 else : | 139 else: |
128 keys= list(key) | 140 keys = list(key) |
129 column_dict[key] = "_".join([list_names[k] for k in keys]) | 141 column_dict[key] = "_".join([list_names[k] for k in keys]) |
130 | 142 |
131 #construct tsv | 143 # construct tsv |
132 for key in result : | 144 for key in result: |
133 line = result[key] | 145 line = result[key] |
134 if len(line) < nb_lines_max : | 146 if len(line) < nb_lines_max: |
135 line.extend(['NA']*(nb_lines_max-len(line))) | 147 line.extend(['']*(nb_lines_max-len(line))) |
136 line = [column_dict[key]] + line #add header | 148 line = [column_dict[key]] + line # add header |
137 lines.append(line) | 149 lines.append(line) |
138 #transpose tsv | 150 # transpose tsv |
139 lines=zip(*lines) | 151 lines = zip(*lines) |
140 | 152 |
141 with open("venn_diagram_text_output.tsv", "w") as output: | 153 with open("venn_diagram_text_output.tsv", "w") as output: |
142 tsv_output = csv.writer(output, delimiter='\t') | 154 tsv_output = csv.writer(output, delimiter='\t') |
143 tsv_output.writerows(lines) | 155 tsv_output.writerows(lines) |
144 | 156 |
157 | |
145 def write_summary(summary_file, inputs): | 158 def write_summary(summary_file, inputs): |
146 """ | 159 """ |
147 Paste json string into template file | 160 Paste json string into template file |
148 """ | 161 """ |
149 a, b = input_to_dict(inputs) | 162 a, b = input_to_dict(inputs) |
150 data = diagram(a, b) | 163 data = diagram(a, b) |
151 write_text_venn(data) | 164 write_text_venn(data) |
152 | 165 |
153 to_replace = { | 166 to_replace = { |
154 "series": [data], | 167 "series": [data], |
155 "displayStat": "true", | 168 "displayStat": "true", |
156 "displaySwitch": "true", | 169 "displaySwitch": "true", |
157 "shortNumber": "true", | 170 "shortNumber": "true", |
158 } | 171 } |
159 | 172 |
160 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html")) | 173 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html")) |
161 FH_summary_out = open(summary_file, "w" ) | 174 FH_summary_out = open(summary_file, "w") |
162 for line in FH_summary_tpl: | 175 for line in FH_summary_tpl: |
163 if "###JVENN_DATA###" in line: | 176 if "###JVENN_DATA###" in line: |
164 line = line.replace("###JVENN_DATA###", json.dumps(to_replace)) | 177 line = line.replace("###JVENN_DATA###", json.dumps(to_replace)) |
165 FH_summary_out.write(line) | 178 FH_summary_out.write(line) |
166 | 179 |
167 FH_summary_out.close() | 180 FH_summary_out.close() |
168 FH_summary_tpl.close() | 181 FH_summary_tpl.close() |
169 | 182 |
183 | |
170 def process(args): | 184 def process(args): |
171 write_summary(args.summary, args.input) | 185 write_summary(args.summary, args.input) |
172 | 186 |
173 | 187 |
174 ################################################################################################################################################## | 188 ##################################################################### |
175 # MAIN | 189 # MAIN |
176 ################################################################################################################################################## | 190 ##################################################################### |
177 if __name__ == '__main__': | 191 if __name__ == '__main__': |
178 # Parse parameters | 192 # Parse parameters |
179 parser = argparse.ArgumentParser(description='Filters an abundance file') | 193 parser = argparse.ArgumentParser(description='Filters an abundance file') |
180 group_input = parser.add_argument_group( 'Inputs' ) | 194 group_input = parser.add_argument_group('Inputs') |
181 group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.") | 195 group_input.add_argument('--input', nargs="+", action="append", |
182 group_output = parser.add_argument_group( 'Outputs' ) | 196 required=True, help="The input tabular file.") |
183 group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]") | 197 group_output = parser.add_argument_group('Outputs') |
198 group_output.add_argument('--summary', default="summary.html", | |
199 help="The HTML file containing the graphs. \ | |
200 [Default: %(default)s]") | |
184 args = parser.parse_args() | 201 args = parser.parse_args() |
185 | 202 |
186 # Process | 203 # Process |
187 process( args ) | 204 process(args) |