Mercurial > repos > workflow4metabolomics > kmd_hmdb_data_plot
comparison kmd_hmdb_plot_generator.py @ 0:59c8bad5f6bc draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics/blob/master/tools/kmd_hmdb_data_plot/ commit 7fa454b6a4268b89fe18043e8dd10f30a7b4c7ca
author | workflow4metabolomics |
---|---|
date | Tue, 29 Aug 2023 09:45:16 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:59c8bad5f6bc |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 import csv | |
4 import itertools | |
5 import os | |
6 | |
7 import click | |
8 | |
9 import plotly.express | |
10 import plotly.graph_objects | |
11 | |
12 __version__ = "1.0.0" | |
13 | |
14 | |
15 @click.group() | |
16 def cli(): | |
17 pass | |
18 | |
19 | |
20 @cli.command(help="") | |
21 @click.option( | |
22 "--version", | |
23 is_flag=True, | |
24 default=False, | |
25 ) | |
26 @click.option( | |
27 "--input", | |
28 default="./test.tsv", | |
29 help="Provide the mz-ratio." | |
30 ) | |
31 @click.option( | |
32 "--output", | |
33 default="./test.html", | |
34 help="Provide the database." | |
35 ) | |
36 @click.option( | |
37 "--x-column", | |
38 default=["nominal_mass"], | |
39 multiple=True, | |
40 help="Provide the column names for the X axis.", | |
41 ) | |
42 @click.option( | |
43 "--y-column", | |
44 default=["kendricks_mass_defect"], | |
45 multiple=True, | |
46 help="Provide the column names for the Y axis.", | |
47 ) | |
48 @click.option( | |
49 "--annotation-column", | |
50 multiple=True, | |
51 default=[ | |
52 "metabolite_name", | |
53 "chemical_formula", | |
54 ], | |
55 help="Provide the columns name for the annotation." | |
56 ) | |
57 def plot(*args, **kwargs): | |
58 | |
59 if kwargs.pop("version"): | |
60 print(__version__) | |
61 exit(0) | |
62 | |
63 input_path = kwargs.pop("input") | |
64 data = read_input(input_path, kwargs) | |
65 fig = build_fig(*data) | |
66 build_html_plot(fig, kwargs.get("output")) | |
67 | |
68 | |
69 def read_input(path: str, kwargs: {}): | |
70 if not os.path.exists(path): | |
71 raise ValueError(f"The path '{path}' does not exist.") | |
72 sep = detect_sep(path) | |
73 with open(path) as csv_file: | |
74 line_generator = csv.reader(csv_file, delimiter=sep) | |
75 first_line = next(line_generator) | |
76 all_lines = list(line_generator) | |
77 hover_names = ( | |
78 "metabolite_name", | |
79 "chemical_formula", | |
80 ) | |
81 annotation_indexes = get_index_of(first_line, hover_names) | |
82 ( | |
83 x_index, | |
84 y_index, | |
85 x_column, | |
86 y_column, | |
87 ) = get_indexes_names( | |
88 first_line, | |
89 list(kwargs.get("x_column")), | |
90 list(kwargs.get("y_column")), | |
91 ) | |
92 x_lists = [[] for i in range(len(x_index))] | |
93 y_lists = [[] for i in range(len(y_index))] | |
94 x_column = list(map(first_line.__getitem__, x_index)) | |
95 y_column = list(map(first_line.__getitem__, y_index)) | |
96 trace_names = [ | |
97 f"f({x_column[i]}) = {y_column[i]}" | |
98 for i in range(len(x_index)) | |
99 ] | |
100 hover_names = kwargs["annotation_column"] | |
101 annotation_indexes = [ | |
102 get_index_of(first_line, column)[0] | |
103 for column in hover_names | |
104 ] | |
105 hover_names = list(map(first_line.__getitem__, annotation_indexes)) | |
106 annotations = list() | |
107 for line in all_lines: | |
108 for i in range(len(x_index)): | |
109 x_lists[i].append(float(line[x_index[i]])) | |
110 y_lists[i].append(float(line[y_index[i]])) | |
111 annotations.append("<br>".join( | |
112 f"{hover_names[hover_index]}: {line[index]}" | |
113 for hover_index, index in enumerate(annotation_indexes) | |
114 )) | |
115 return x_lists, y_lists, annotations, trace_names | |
116 | |
117 | |
118 def get_indexes_names(first_line, x_column, y_column): | |
119 x_column, y_column = map(list, zip(*itertools.product(x_column, y_column))) | |
120 x_index = get_index_of(first_line, x_column) | |
121 y_index = get_index_of(first_line, y_column) | |
122 for i in range(len(x_index))[::-1]: | |
123 if x_index[i] == y_index[i]: | |
124 del x_index[i], x_column[i], y_index[i], y_column[i], | |
125 return ( | |
126 x_index, | |
127 y_index, | |
128 x_column, | |
129 y_column, | |
130 ) | |
131 | |
132 | |
133 def get_index_of(first_line, column): | |
134 if isinstance(column, (tuple, list)): | |
135 return [get_index_of(first_line, x)[0] for x in list(column)] | |
136 try: | |
137 return [int(column) - 1] | |
138 except ValueError: | |
139 return [first_line.index(column)] | |
140 | |
141 | |
142 def build_fig(x_lists, y_lists, annotations, trace_names): | |
143 fig = plotly.express.scatter() | |
144 for i in range(len(x_lists)): | |
145 fig.add_trace( | |
146 plotly.graph_objects.Scatter( | |
147 name=trace_names[i], | |
148 x=x_lists[i], | |
149 y=y_lists[i], | |
150 hovertext=annotations, | |
151 mode="markers", | |
152 ) | |
153 ) | |
154 return fig | |
155 | |
156 | |
157 def detect_sep(tabular_file: str) -> str: | |
158 with open(tabular_file, "r") as file: | |
159 first_line = file.readline() | |
160 if len(first_line.split(',')) > len(first_line.split('\t')): | |
161 return ',' | |
162 return '\t' | |
163 | |
164 | |
165 def build_html_plot(fig, output: str): | |
166 return plotly.offline.plot( | |
167 fig, | |
168 filename=output, | |
169 auto_open=False, | |
170 ) | |
171 | |
172 | |
173 if __name__ == "__main__": | |
174 cli() |