comparison venn_diagram.py @ 11:958eb2ea02aa draft default tip

"planemo upload commit 00cab92333b0338cb86ff78b5bbc1b6d26621012-dirty"
author proteore
date Wed, 12 May 2021 16:48:05 +0000
parents e744a43171ff
children
comparison
equal deleted inserted replaced
10:e744a43171ff 11:958eb2ea02aa
1 #!/usr/bin/env python2.7 1 #!/usr/bin/env python
2 2
3 import argparse
4 import csv
5 import json
3 import os 6 import os
4 import sys 7 import re
5 import json
6 import operator
7 import argparse
8 import re, csv
9 from itertools import combinations 8 from itertools import combinations
10 9
10
11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) 11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
12 12
13 ################################################################################################################################################## 13 ########################################################################
14 # FUNCTIONS 14 # FUNCTIONS
15 ################################################################################################################################################## 15 ########################################################################
16 16
17
17 def isnumber(format, n): 18 def isnumber(format, n):
18 """ 19 """
19 Check if an element is integer or float 20 Check if an element is integer or float
20 """ 21 """
21 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") 22 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$")
27 test = re.match(float_format, n) 28 test = re.match(float_format, n)
28 if test: 29 if test:
29 return True 30 return True
30 else: 31 else:
31 return False 32 return False
32 33
34
33 def input_to_dict(inputs): 35 def input_to_dict(inputs):
34 """ 36 """
35 Parse input and return a dictionary of name and data of each lists/files 37 Parse input and return a dictionary of name and data of each lists/files
36 """ 38 """
37 comp_dict = {} 39 comp_dict = {}
38 title_dict = {} 40 title_dict = {}
39 c = ["A", "B", "C", "D", "E", "F"] 41 c = ["A", "B", "C", "D", "E", "F"]
40 for i in range(len(inputs)): 42 for i in range(len(inputs)):
41 input_file = inputs[i][0] 43 input_file = inputs[i][0]
42 name = inputs[i][1] 44 name = inputs[i][1]
43 input_type = inputs[i][2] 45 input_type = inputs[i][2]
44 title = c[i] 46 title = c[i]
45 title_dict[title] = name 47 title_dict[title] = name
46 ids = set() 48 ids = set()
47 if input_type == "file": 49 if input_type == "file":
48 header = inputs[i][3] 50 header = inputs[i][3]
49 ncol = inputs[i][4] 51 ncol = inputs[i][4]
50 with open(input_file,"r") as handle : 52 with open(input_file, "r") as handle:
51 file_content = csv.reader(handle,delimiter="\t") 53 file_content = csv.reader(handle, delimiter="\t")
52 file_content = list(file_content) #csv object to list 54 file_content = list(file_content) # csv object to list
53 55
54 # Check if column number is in right form 56 # Check if column number is in right form
55 if isnumber("int", ncol.replace("c", "")): 57 if isnumber("int", ncol.replace("c", "")):
56 if header == "true": 58 if header == "true":
57 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # gets ids from defined column 59 # gets ids from defined column
60 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501
61
58 else: 62 else:
59 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] 63 file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501
60 else: 64 else:
61 raise ValueError("Please fill in the right format of column number") 65 raise ValueError("Please fill in the right format of column number") # noqa 501
62 else: 66 else:
63 ids = set() 67 ids = set()
64 file_content = inputs[i][0].split() 68 file_content = inputs[i][0].split()
65 file_content = [x.split(";") for x in file_content] 69 file_content = [x.split(";") for x in file_content]
66 70
67 file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] #flat list of list of lists, remove empty items 71 # flat list of list of lists, remove empty items
72 file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] # noqa 501
68 ids.update(file_content) 73 ids.update(file_content)
69 if 'NA' in ids : ids.remove('NA') 74 if 'NA' in ids:
75 ids.remove('NA')
70 comp_dict[title] = ids 76 comp_dict[title] = ids
71 77
72 return comp_dict, title_dict 78 return comp_dict, title_dict
73 79
80
74 def intersect(comp_dict): 81 def intersect(comp_dict):
75 """ 82 """
76 Calculate the intersections of input 83 Calculate the intersections of input
77 """ 84 """
78 names = set(comp_dict) 85 names = set(comp_dict)
81 others = set() 88 others = set()
82 [others.add(name) for name in names if name not in group] 89 [others.add(name) for name in names if name not in group]
83 difference = [] 90 difference = []
84 intersected = set.intersection(*(comp_dict[k] for k in group)) 91 intersected = set.intersection(*(comp_dict[k] for k in group))
85 if len(others) > 0: 92 if len(others) > 0:
86 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) 93 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) # noqa 501
87 yield group, list(intersected), list(difference) 94 yield group, list(intersected), list(difference)
95
88 96
89 def diagram(comp_dict, title_dict): 97 def diagram(comp_dict, title_dict):
90 """ 98 """
91 Create json string for jvenn diagram plot 99 Create json string for jvenn diagram plot
92 """ 100 """
93 result = {} 101 result = {}
94 result["name"] = {} 102 result["name"] = {}
95 for k in comp_dict.keys(): 103 for k in comp_dict.keys():
96 result["name"][k] = title_dict[k] 104 result["name"][k] = title_dict[k]
97 105
98 result["data"] = {} 106 result["data"] = {}
99 result["values"] = {} 107 result["values"] = {}
100 for group, intersected, difference in intersect(comp_dict): 108 for group, intersected, difference in intersect(comp_dict):
101 if len(group) == 1: 109 if len(group) == 1:
102 result["data"]["".join(group)] = difference 110 result["data"]["".join(group)] = sorted(difference)
103 result["values"]["".join(group)] = len(difference) 111 result["values"]["".join(group)] = len(difference)
104 elif len(group) > 1 and len(group) < len(comp_dict): 112 elif len(group) > 1 and len(group) < len(comp_dict):
105 result["data"]["".join(group)] = difference 113 result["data"]["".join(group)] = sorted(difference)
106 result["values"]["".join(group)] = len(difference) 114 result["values"]["".join(group)] = len(difference)
107 elif len(group) == len(comp_dict): 115 elif len(group) == len(comp_dict):
108 result["data"]["".join(group)] = intersected 116 result["data"]["".join(group)] = sorted(intersected)
109 result["values"]["".join(group)] = len(intersected) 117 result["values"]["".join(group)] = len(intersected)
110 118
111 return result 119 return result
112 120
113 #Write intersections of input to text output file 121 # Write intersections of input to text output file
122
123
114 def write_text_venn(json_result): 124 def write_text_venn(json_result):
115 lines = [] 125 lines = []
116 result = dict((k, v) for k, v in json_result["data"].iteritems() if v != []) 126 result = dict((k, v) for k, v in json_result["data"].items() if v != []) # noqa 501
117 for key in result : 127 for key in result:
118 if 'NA' in result[key] : result[key].remove("NA") 128 if 'NA' in result[key]:
119 list_names = dict((k, v) for k, v in json_result["name"].iteritems() if v != []) 129 result[key].remove("NA")
130
131 list_names = dict((k, v) for k, v in json_result["name"].items() if v != []) # noqa 501
120 nb_lines_max = max(len(v) for v in result.values()) 132 nb_lines_max = max(len(v) for v in result.values())
121 133
122 #get list names associated to each column 134 # get list names associated to each column
123 column_dict = {} 135 column_dict = {}
124 for key in result : 136 for key in result:
125 if key in list_names : 137 if key in list_names:
126 column_dict[key] = list_names[key] 138 column_dict[key] = list_names[key]
127 else : 139 else:
128 keys= list(key) 140 keys = list(key)
129 column_dict[key] = "_".join([list_names[k] for k in keys]) 141 column_dict[key] = "_".join([list_names[k] for k in keys])
130 142
131 #construct tsv 143 # construct tsv
132 for key in result : 144 for key in result:
133 line = result[key] 145 line = result[key]
134 if len(line) < nb_lines_max : 146 if len(line) < nb_lines_max:
135 line.extend(['NA']*(nb_lines_max-len(line))) 147 line.extend(['']*(nb_lines_max-len(line)))
136 line = [column_dict[key]] + line #add header 148 line = [column_dict[key]] + line # add header
137 lines.append(line) 149 lines.append(line)
138 #transpose tsv 150 # transpose tsv
139 lines=zip(*lines) 151 lines = zip(*lines)
140 152
141 with open("venn_diagram_text_output.tsv", "w") as output: 153 with open("venn_diagram_text_output.tsv", "w") as output:
142 tsv_output = csv.writer(output, delimiter='\t') 154 tsv_output = csv.writer(output, delimiter='\t')
143 tsv_output.writerows(lines) 155 tsv_output.writerows(lines)
144 156
157
145 def write_summary(summary_file, inputs): 158 def write_summary(summary_file, inputs):
146 """ 159 """
147 Paste json string into template file 160 Paste json string into template file
148 """ 161 """
149 a, b = input_to_dict(inputs) 162 a, b = input_to_dict(inputs)
150 data = diagram(a, b) 163 data = diagram(a, b)
151 write_text_venn(data) 164 write_text_venn(data)
152 165
153 to_replace = { 166 to_replace = {
154 "series": [data], 167 "series": [data],
155 "displayStat": "true", 168 "displayStat": "true",
156 "displaySwitch": "true", 169 "displaySwitch": "true",
157 "shortNumber": "true", 170 "shortNumber": "true",
158 } 171 }
159 172
160 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html")) 173 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html"))
161 FH_summary_out = open(summary_file, "w" ) 174 FH_summary_out = open(summary_file, "w")
162 for line in FH_summary_tpl: 175 for line in FH_summary_tpl:
163 if "###JVENN_DATA###" in line: 176 if "###JVENN_DATA###" in line:
164 line = line.replace("###JVENN_DATA###", json.dumps(to_replace)) 177 line = line.replace("###JVENN_DATA###", json.dumps(to_replace))
165 FH_summary_out.write(line) 178 FH_summary_out.write(line)
166 179
167 FH_summary_out.close() 180 FH_summary_out.close()
168 FH_summary_tpl.close() 181 FH_summary_tpl.close()
169 182
183
170 def process(args): 184 def process(args):
171 write_summary(args.summary, args.input) 185 write_summary(args.summary, args.input)
172 186
173 187
174 ################################################################################################################################################## 188 #####################################################################
175 # MAIN 189 # MAIN
176 ################################################################################################################################################## 190 #####################################################################
177 if __name__ == '__main__': 191 if __name__ == '__main__':
178 # Parse parameters 192 # Parse parameters
179 parser = argparse.ArgumentParser(description='Filters an abundance file') 193 parser = argparse.ArgumentParser(description='Filters an abundance file')
180 group_input = parser.add_argument_group( 'Inputs' ) 194 group_input = parser.add_argument_group('Inputs')
181 group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.") 195 group_input.add_argument('--input', nargs="+", action="append",
182 group_output = parser.add_argument_group( 'Outputs' ) 196 required=True, help="The input tabular file.")
183 group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]") 197 group_output = parser.add_argument_group('Outputs')
198 group_output.add_argument('--summary', default="summary.html",
199 help="The HTML file containing the graphs. \
200 [Default: %(default)s]")
184 args = parser.parse_args() 201 args = parser.parse_args()
185 202
186 # Process 203 # Process
187 process( args ) 204 process(args)