Mercurial > repos > lain > ms_to_peakforest_it
comparison server.py @ 0:b58b229c4cbf draft
planemo upload commit 523a9c8df173302ad38e9f15e7d82eab01736551-dirty
author | lain |
---|---|
date | Fri, 03 Mar 2023 14:10:24 +0000 |
parents | |
children | 7e3085fc60c1 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b58b229c4cbf |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 import atexit | |
4 import csv | |
5 import http.server | |
6 import json | |
7 import logging | |
8 import os | |
9 import re | |
10 import shutil | |
11 import socketserver | |
12 import sys | |
13 import tempfile | |
14 import yaml | |
15 | |
16 TAB_LIST_PLACEHOLDER = "TAB_LIST_PLACEHOLDER" | |
17 MS_PEAK_VALUES_PLACEHOLDER = "MS_PEAK_VALUES_PLACEHOLDER" | |
18 COMPOUND_NAME_PLACEHOLDER = "COMPOUND_NAME_PLACEHOLDER" | |
19 TAB_INDEX_PLACEHOLDER = "TAB_INDEX_PLACEHOLDER" | |
20 EMBED_JS_PLACEHOLDER = "EMBED_JS" | |
21 ACTIVE_TAB_PLACEHOLDER = "ACTIVE_TAB_PLACEHOLDER" | |
22 ADD_SPECTRUM_FORM = "ADD_SPECTRUM_FORM" | |
23 PRODUCE_JSON_PLACEHOLDER = "PRODUCE_JSON_PLACEHOLDER" | |
24 | |
25 COMPOUND_REF = "compound-ref" | |
26 COMPOUND_MIX = "compound-mix" | |
27 | |
28 END_MS_PEAK_VALUES_PLACEHOLDER = " ]" | |
29 MS_DATA_COLUMN_NUMBER = 9 | |
30 DEFAULT_MS_PEAK_VALUES = ( | |
31 "[\n" | |
32 + (" [" + ','.join([' ""'] * MS_DATA_COLUMN_NUMBER) + "],\n") * 17 | |
33 + END_MS_PEAK_VALUES_PLACEHOLDER | |
34 ) | |
35 | |
36 FRAGNOT_HEADER = { | |
37 "m/z": "fragment_mz", | |
38 "absolute_intensity": "abs_intensity", | |
39 "relative_intensity": "rel_intensity", | |
40 "theo_mass": "", | |
41 "delta_ppm": "ppm", | |
42 "rdbequiv": "", | |
43 "composition": "", | |
44 "attribution": "fragment", | |
45 } | |
46 | |
47 MS_2_SNOOP_HEADER = { | |
48 "name": str, | |
49 "inchikey": str, | |
50 "composition": str, | |
51 "fragment": str, | |
52 "fragment_mz": str, | |
53 "ppm": str, | |
54 "fileid": str, | |
55 "correlation": str, | |
56 "abs_intensity": lambda x:float(x) * 100, | |
57 "rel_intensity": lambda x:float(x) * 100, | |
58 "valid_corelation": str | |
59 } | |
60 | |
61 | |
62 class ConfigException(ValueError): | |
63 """ | |
64 An exception raised when something went wrong in the config and we | |
65 cannot continue - i.e: when there's no token for peakforest | |
66 """ | |
67 | |
68 class YAMLConfig(dict): | |
69 | |
70 """ | |
71 Dictionary that handles key with dot in them: | |
72 test["truc.chose"] | |
73 is equivalant to | |
74 test["truc"]["chose"] | |
75 Assignation works too. | |
76 Add the possibility to use placeholders: | |
77 --- yaml | |
78 test: {{ truc.chose }} | |
79 truc: | |
80 chose: bidule | |
81 --- | |
82 here, test's value is "bidule" | |
83 """ | |
84 | |
85 def __init__(self, *args, **kwargs): | |
86 meta_conf = kwargs.pop("__meta_config__", {}) | |
87 self._debug = meta_conf.get("__debug__", False) | |
88 self._stream_name = meta_conf.get("__debug_stream__", "stdout") | |
89 self._debug_stream = getattr(sys, self._stream_name) | |
90 self._only_root_debug = meta_conf.get("__only_root_debug__", False) | |
91 if "__root__" in kwargs: | |
92 if self._only_root_debug: | |
93 self._debug = False | |
94 self._name = kwargs.pop("__name__") | |
95 self._debugger("Is not root config.") | |
96 self._root = kwargs.pop("__root__") | |
97 else: | |
98 self._name = "root" | |
99 self._debugger("Is root config.") | |
100 self._root = self | |
101 super().__init__(*args, **kwargs) | |
102 for key, value in self.copy().items(): | |
103 if isinstance(value, dict) and not isinstance(value, YAMLConfig): | |
104 self._debugger(f"Parsing sub-config for {key}") | |
105 self[key] = self._propagate(value, key) | |
106 self._replace_placeholders(self) | |
107 self._extract_defaults() | |
108 | |
109 def _propagate(self, sub_dict, name): | |
110 if isinstance(sub_dict, dict) and not isinstance(sub_dict, self.__class__): | |
111 return YAMLConfig( | |
112 **sub_dict, | |
113 __name__=name, | |
114 __root__=self._root, | |
115 __meta_config__={ | |
116 "__debug__": self._debug, | |
117 "__debug_stream__": self._stream_name, | |
118 "__only_root_debug__": self._only_root_debug, | |
119 } | |
120 ) | |
121 return sub_dict | |
122 | |
123 def _debugger(self, message): | |
124 if self._debug: | |
125 self._debug_stream.write(f"[{self._name}]: {message}\n") | |
126 self._debug_stream.flush() | |
127 | |
128 def __getattr__(self, attr): | |
129 if attr in self: | |
130 return self[attr] | |
131 if '.' in attr: | |
132 attr, sub = attr.split('.', 1) | |
133 return getattr(getattr(self, attr), sub) | |
134 return super().__getattribute__(attr) | |
135 | |
136 def _replace_placeholders(self, subpart): | |
137 self._debugger("Replacing placeholders...") | |
138 for sub_key, sub_item in subpart.copy().items(): | |
139 if isinstance(sub_item, str): | |
140 for placeholder in re.findall("{{ (?P<placeholder>.*?) }}", sub_item): | |
141 if placeholder not in self._root: | |
142 self._debugger(f"Could not fine replacement for {placeholder}") | |
143 continue | |
144 replacement = self._root[placeholder] | |
145 if isinstance(replacement, str): | |
146 self._debugger(f"Found placeholder: {placeholder} -> {replacement}") | |
147 sub_item = sub_item.replace( | |
148 "{{ " + placeholder + " }}", | |
149 replacement | |
150 ) | |
151 else: | |
152 self._debugger(f"Found placeholder: {placeholder} -> {replacement.__class__.__name__}") | |
153 sub_item = self._propagate(replacement, placeholder) | |
154 dict.__setitem__(subpart, sub_key, sub_item) | |
155 elif isinstance(sub_item, dict): | |
156 super().__setitem__(sub_key, self._propagate(sub_item, sub_key)) | |
157 | |
158 def _extract_defaults(self): | |
159 if self._root is not self: | |
160 return | |
161 if "defaults" not in self: | |
162 self._debugger("No defaults here.") | |
163 return | |
164 if "arguments" not in self: | |
165 self._debugger("Arguments creation...") | |
166 self["arguments"] = self._propagate({}, "arguments") | |
167 self._debugger("Populating arguments with defaults values") | |
168 for key, value in self.defaults.items(): | |
169 if key not in self: | |
170 if isinstance(value, dict): | |
171 value = self._propagate(value, key) | |
172 self.arguments[key] = value | |
173 self._debugger(f"Default {key} = {value}") | |
174 | |
175 def __setitem__(self, key, value): | |
176 if isinstance(value, dict): | |
177 value = self._propagate(value, key) | |
178 if "." not in key: | |
179 return super().__setitem__(key, value) | |
180 curent = self | |
181 key, subkey = key.rsplit(".", 1) | |
182 self[key][subkey] = value | |
183 | |
184 def __getitem__(self, key): | |
185 if super().__contains__(key): | |
186 return super().__getitem__(key) | |
187 if "." not in key: | |
188 return super().__getitem__(key) | |
189 curent = self | |
190 while "." in key: | |
191 key, subkey = key.split(".", 1) | |
192 curent = curent[key] | |
193 key = subkey | |
194 if subkey not in curent: | |
195 curent[subkey] = self._propagate({}, subkey) | |
196 result = curent[subkey] | |
197 return result | |
198 | |
199 def __contains__(self, key): | |
200 if "." not in key: | |
201 return super().__contains__(key) | |
202 key, subkey = key.split(".", 1) | |
203 if not super().__contains__(key): | |
204 return False | |
205 return subkey in self[key] | |
206 | |
207 def copy(self): | |
208 return { | |
209 key: ( | |
210 value if not isinstance(value, dict) | |
211 else value.copy() | |
212 ) for key, value in self.items() | |
213 } | |
214 | |
215 class YAMLParameters(YAMLConfig): | |
216 | |
217 """ | |
218 Parses parameters from the command line and put them | |
219 in the config. | |
220 Uses the config to know which parameter is recognized, or not, | |
221 to know the metadata (author, version), | |
222 which command is a flag, is optional, the help strings, etc... | |
223 Assigns default small parameter if not defined in the "shortcut" | |
224 section of the config file. | |
225 CLI config must be in the root section "parameters": | |
226 --- | |
227 parameters: | |
228 mandatory: | |
229 input: input file path | |
230 flags: | |
231 help: Show this help | |
232 optional: | |
233 method: "default is {{ defaults.method }}" | |
234 meta: | |
235 author: Lain Pavot | |
236 version: 1.1.0 | |
237 shortcuts: | |
238 help: h | |
239 ## will autogenerate -i for input and -m for method | |
240 --- | |
241 default parameters are searched in the "default" root section. | |
242 """ | |
243 | |
244 def __init__(self, *args, **kwargs): | |
245 super().__init__(*args, **kwargs) | |
246 self._errors = list() | |
247 if not self.parameters.shortcuts: | |
248 self.parameters["shortcuts"] = YAMLConfig() | |
249 self._mandatory = self.parameters.mandatory | |
250 self._optional = self.parameters.optional | |
251 self._flags = { | |
252 flag: False | |
253 for flag in self.parameters.flags | |
254 } | |
255 self._all_params = self._optional.copy() | |
256 self._all_params.update(self._mandatory) | |
257 self._all_params.update(self._flags) | |
258 self._small_params = dict() | |
259 self._determine_small_params() | |
260 | |
261 @property | |
262 def in_error(self): | |
263 return bool(self._errors) | |
264 | |
265 @property | |
266 def sorted_keys(self): | |
267 return sorted(self._all_params.keys()) | |
268 | |
269 @property | |
270 def sorted_items(self): | |
271 return sorted(self._all_params.items()) | |
272 | |
273 def _determine_small_params(self, verbose=False): | |
274 self._small_params = (self.parameters.shortcuts or {}).copy() | |
275 chars = list(map(chr, range(97, 123))) + list(map(chr, range(65, 91))) | |
276 all_params = self._all_params.copy() | |
277 for long, short in self._small_params.items(): | |
278 chars.remove(short) | |
279 del all_params[long] | |
280 for param in all_params.copy().keys(): | |
281 for operation in ( | |
282 lambda x:x[0], ## select first char | |
283 lambda x:x.split('-', 1)[-1][0], ## first char after - | |
284 lambda x:x.split('_', 1)[-1][0], ## first char after _ | |
285 lambda x:x.split('.', 1)[-1][0], ## first char after . | |
286 lambda x:x[0].upper(), ## select first char | |
287 lambda x:x.split('-', 1)[-1][0].upper(), ## first char after - | |
288 lambda x:x.split('_', 1)[-1][0].upper(), ## first char after _ | |
289 lambda x:x.split('.', 1)[-1][0].upper(), ## first char after . | |
290 lambda x: chars[0], ## first letter in the alphabet | |
291 ): | |
292 char = operation(param) | |
293 if char not in self._small_params.values(): | |
294 self._small_params[param] = char | |
295 chars.remove(char) | |
296 del all_params[param] | |
297 break | |
298 | |
299 def _get_parameter_index(self, parameter, original): | |
300 if f"--{parameter}" in sys.argv: | |
301 return sys.argv.index(f"--{parameter}") | |
302 parameter = self._small_params[original] | |
303 if f"-{parameter}" in sys.argv: | |
304 return sys.argv.index(f"-{parameter}") | |
305 return None | |
306 | |
307 def as_parameter(self, string): | |
308 return ( | |
309 string | |
310 .replace('.', '-') | |
311 .replace('_', '-') | |
312 ) | |
313 | |
314 def show_version(self): | |
315 print(self.parameters.meta.version) | |
316 | |
317 def show_help(self): | |
318 parameters = [ | |
319 f"-{self._small_params[arg]}|--{self.as_parameter(arg)} {arg}" | |
320 for arg in self._mandatory | |
321 ] + [ | |
322 f"[-{self._small_params[arg]}|--{self.as_parameter(arg)} {arg}]" | |
323 for arg in self._optional | |
324 ] + [ | |
325 f"[-{self._small_params[arg]}|--{self.as_parameter(arg)}]" | |
326 for arg in self._flags | |
327 ] | |
328 print( | |
329 f"Usage: {__file__} " + ' '.join(parameters) | |
330 + "\n\n" | |
331 + '\n'.join( | |
332 f" -{self._small_params[args]}|--{self.as_parameter(args)}: {help_str}" | |
333 for args, help_str in self.sorted_items | |
334 ) | |
335 + "\n\n" | |
336 + '\n'.join( | |
337 f"{key}: {value}" | |
338 for key, value in self.parameters.meta.items() | |
339 ) | |
340 ) | |
341 sys.exit(0) | |
342 | |
343 def parse_args(self): | |
344 errors = list() | |
345 for kind in ("mandatory", "optional", "flags"): | |
346 keys = list(sorted(getattr(self, f"_{kind}").keys())) | |
347 for original_param, actual_param in zip( | |
348 keys, | |
349 map(self.as_parameter, keys), | |
350 ): | |
351 if original_param in self.defaults: | |
352 self.arguments[original_param] = self.defaults[original_param] | |
353 elif kind == "flags": | |
354 self.arguments[original_param] = False | |
355 parser = getattr(self, f"parse_{kind}") | |
356 if (error := parser(original_param, actual_param)): | |
357 errors.append(error) | |
358 self._errors = errors | |
359 return self | |
360 | |
361 def parse_mandatory(self, original, actual): | |
362 if (index := self._get_parameter_index(actual, original)) is None: | |
363 return f"The parameter --{actual} is mandatory." | |
364 if index == len(sys.argv) - 1: | |
365 return f"The parameter --{actual} needs a value." | |
366 self.arguments[original] = sys.argv[index + 1] | |
367 | |
368 def parse_optional(self, original, actual): | |
369 if (index := self._get_parameter_index(actual, original)) is None: | |
370 return | |
371 if index == len(sys.argv) - 1: | |
372 return f"The parameter --{actual} needs a value." | |
373 self.arguments[original] = sys.argv[index + 1] | |
374 | |
375 def parse_flags(self, original, actual): | |
376 if (index := self._get_parameter_index(actual, original)) is None: | |
377 return | |
378 self.arguments[original] = True | |
379 | |
380 def parse_config(**kwargs): | |
381 """ | |
382 opens the config file, extract it using pyyaml's safe loader | |
383 and tries to extract and apply a maximum of informations/directives | |
384 from the config: | |
385 - token retrieval | |
386 - workdir management | |
387 - tempfile management | |
388 """ | |
389 root_dir = os.path.dirname(os.path.abspath(__file__)) | |
390 with open(os.path.join(root_dir, "config.yml")) as config_file: | |
391 config = YAMLConfig( | |
392 **yaml.load(config_file.read(), Loader=yaml.SafeLoader), | |
393 **kwargs | |
394 ) | |
395 | |
396 if not config.token.value: | |
397 if config.token.use_file: | |
398 if (not os.path.exists(path := config.token.file_path)): | |
399 raise ConfigException("Missing token value or token file.") | |
400 with open(path) as token_file: | |
401 config.token["value"] = token_file.read() | |
402 elif config.defaults.peakforest.token: | |
403 config.token["value"] = config.defaults.peakforest.token | |
404 | |
405 if config.workdir.create_tmp: | |
406 tmp_dir = tempfile.mkdtemp() | |
407 atexit.register(lambda:shutil.rmtree(tmp_dir)) | |
408 else: | |
409 tmp_dir = tempfile.gettempdir() | |
410 config.workdir["tmp_dir"] = tmp_dir | |
411 | |
412 config["root_dir"] = root_dir | |
413 config["tab_list"] = [] | |
414 config["form_template"] = os.path.join(root_dir, config.templates.form) | |
415 config["meta_template"] = os.path.join(root_dir, config.templates.main) | |
416 config["js_template"] = os.path.join(root_dir, config.templates.js) | |
417 config["tab_list_template"] = os.path.join(root_dir, config.templates.tab_list) | |
418 config["placeholders"] = dict() | |
419 config.placeholders[MS_PEAK_VALUES_PLACEHOLDER] = DEFAULT_MS_PEAK_VALUES | |
420 config.placeholders[TAB_INDEX_PLACEHOLDER] = "1" | |
421 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "active" | |
422 config.placeholders[ADD_SPECTRUM_FORM] = "" | |
423 config.placeholders[EMBED_JS_PLACEHOLDER] = "" | |
424 config.placeholders[TAB_LIST_PLACEHOLDER] = "" | |
425 config.placeholders["DEFAULT_MIN_MZ"] = "50" | |
426 config.placeholders["DEFAULT_MAX_MZ"] = "500" | |
427 config.placeholders["DEFAULT_RESOLUTION_LOW"] = "" | |
428 config.placeholders["DEFAULT_RESOLUTION_HIGH"] = "selected=\"selected\"" | |
429 config.placeholders["DEFAULT_RESOLUTION_UNSET"] = "" | |
430 config.placeholders["DEFAULT_MIN_RT"] = "0.9" | |
431 config.placeholders["DEFAULT_MAX_RT"] = "1.4" | |
432 return config | |
433 | |
434 def parse_parameters(config): | |
435 """ | |
436 parses command line and checks provided values are acceptable/usable. | |
437 Raises some error if not. | |
438 """ | |
439 parameters = YAMLParameters(**config) | |
440 parameters.parse_args() | |
441 | |
442 parameters["json_result"] = [] | |
443 | |
444 get_logger(parameters) | |
445 | |
446 arguments = parameters.arguments | |
447 if arguments.help: | |
448 parameters.show_help() | |
449 sys.exit(0) | |
450 | |
451 if arguments.version: | |
452 parameters.show_version() | |
453 sys.exit(0) | |
454 | |
455 if parameters.in_error: | |
456 raise ValueError( | |
457 "Some errors occured during parameters extraction: \n" | |
458 + '\n'.join(parameters.errors) | |
459 ) | |
460 | |
461 if arguments.sample_type == COMPOUND_MIX: | |
462 parameters["form_template"] = os.path.join( | |
463 parameters["root_dir"], | |
464 parameters.templates.form_mix | |
465 ) | |
466 parameters["meta_template"] = os.path.join( | |
467 parameters["root_dir"], | |
468 parameters.templates.main_mix | |
469 ) | |
470 elif arguments.sample_type == COMPOUND_REF: | |
471 parameters["form_template"] = os.path.join( | |
472 parameters["root_dir"], | |
473 parameters.templates.form_ref | |
474 ) | |
475 parameters["meta_template"] = os.path.join( | |
476 parameters["root_dir"], | |
477 parameters.templates.main_ref | |
478 ) | |
479 | |
480 arguments["produce_json"] = ( | |
481 "output_json" in arguments | |
482 and arguments["output_json"] != "" | |
483 ) | |
484 if arguments.produce_json: | |
485 parameters.placeholders[PRODUCE_JSON_PLACEHOLDER] = "true" | |
486 parameters.json_result = [] | |
487 arguments["output_json"] = os.path.abspath(arguments["output_json"]) | |
488 atexit.register(save_json, parameters) | |
489 else: | |
490 parameters.placeholders[PRODUCE_JSON_PLACEHOLDER] = "false" | |
491 | |
492 if arguments.run_dry_html: | |
493 arguments["do_run_dry"] = True | |
494 parameters.generated["html"] = os.path.abspath(arguments.run_dry_html) | |
495 | |
496 if arguments.run_dry_js: | |
497 arguments["do_run_dry"] = True | |
498 parameters.generated["js"] = os.path.abspath(arguments.run_dry_js) | |
499 | |
500 if arguments.do_run_dry: | |
501 parameters.logger.info("Dry run. Server will ne be run.") | |
502 if arguments.run_dry_html: | |
503 parameters.logger.info(f"HTML file will be put in {arguments.run_dry_html}") | |
504 if arguments.run_dry_js: | |
505 parameters.logger.info(f"JS file will be put in {arguments.run_dry_js}") | |
506 | |
507 if arguments.peakforest.token: | |
508 config.token["value"] = arguments.peakforest.token | |
509 if not config.token.value: | |
510 raise ConfigException( | |
511 "No token provided. We will not be able to connect to peakforest." | |
512 ) | |
513 | |
514 if os.path.exists(arguments.input): | |
515 single_file = True | |
516 file_paths = [arguments.input] | |
517 else: | |
518 path_list = arguments.input.split(',') | |
519 if all(map(os.path.exists, path_list)): | |
520 single_file = False | |
521 file_paths = path_list | |
522 else: | |
523 raise ValueError( | |
524 f"Some files cannot be found: " | |
525 + ', '.join( | |
526 path for path in path_list | |
527 if not os.path.exists(path) | |
528 ) | |
529 ) | |
530 arguments["input"] = list(map(os.path.abspath, file_paths)) | |
531 | |
532 if single_file: | |
533 arguments["name"] = [arguments.name] | |
534 arguments["raw_metadata"] = [arguments.raw_metadata] | |
535 parameters.logger.info(f"Single file processing: {arguments.input}") | |
536 else: | |
537 parameters.logger.info(f"Multiple file processing:") | |
538 arguments["raw_metadata"] = arguments.raw_metadata.split( | |
539 arguments.raw_metadata_sep | |
540 ) | |
541 if not arguments.name: | |
542 arguments["name"] = arguments["raw_metadata"] | |
543 else: | |
544 arguments["name"] = arguments.name.split(',') | |
545 for i in range(len(arguments.name)): | |
546 parameters.logger.info(f" - file: {arguments.input[i]}") | |
547 parameters.logger.info(f" - name: {arguments.name[i]}") | |
548 parameters.logger.info(f" - metadata: {arguments.raw_metadata[i]}") | |
549 parameters.logger.info(f" ") | |
550 if ( | |
551 len(arguments.name) != len(arguments.raw_metadata) | |
552 or len(arguments.name) != len(arguments.input) | |
553 ): | |
554 raise ValueError( | |
555 "name, raw_metadata and input parameters have different lengths: \n" | |
556 f"input is {len(arguments.input)} elements long, " | |
557 f"raw_metadata is {len(arguments.raw_metadata)} elements long " | |
558 f"and name is {len(arguments.name)} elements long." | |
559 ) | |
560 if arguments.spectrum_type == "LC_MS": | |
561 arguments["scan_type"] = "ms" | |
562 elif arguments.spectrum_type == "LC_MSMS": | |
563 arguments["scan_type"] = "ms2" | |
564 if arguments.method == "test": | |
565 if arguments.spectrum_type == "LC_MS": | |
566 arguments["method"] = "cf_pfem_urine_qtof" | |
567 else: | |
568 arguments["method"] = "cf_pfem_urine_method1_qtof-msms" | |
569 if arguments["sample_type"] == COMPOUND_MIX: | |
570 check_mix_compound_files(parameters) | |
571 more_info_in_logs(parameters) | |
572 return parameters | |
573 | |
574 def check_mix_compound_files(parameters): | |
575 arguments = parameters.arguments | |
576 try: | |
577 numbarz = [ | |
578 list(map(int, os.path.basename(metadata).split("_", 1)[0].split("-"))) | |
579 for metadata in arguments.raw_metadata | |
580 ] | |
581 except ValueError: | |
582 parameters.logger.error( | |
583 "Metadata/file names does not start with `[0-9]+-[0-9]+_.*` . " | |
584 "This is necessary in the case of compounds mix." | |
585 ) | |
586 sys.exit(-1) | |
587 runs, samples = zip(*numbarz) | |
588 if not all(runs[0] == i for i in runs[1:]): | |
589 parameters.logger.error( | |
590 "Run numbers in metadata/file names are not identical. " | |
591 "You mixed some files." | |
592 ) | |
593 sys.exit(-1) | |
594 length = len(samples) | |
595 if list(sorted(samples)) != list(range(1, length+1)): | |
596 if not all(samples.count(i) == 1 for i in samples): | |
597 parameters.logger.error("Some samples are duplicated. ") | |
598 else: | |
599 parameters.logger.error("Some samples files are missing. ") | |
600 sys.exit(-1) | |
601 | |
602 def more_info_in_logs(config): | |
603 arguments = config.arguments | |
604 if arguments.embed_js: | |
605 config.logger.info(f"JS will be embed in HTML page to form a HTML bundle.") | |
606 else: | |
607 config.logger.info(f"JS are separated files, needed to be served.") | |
608 config.logger.info(f"Choosen parameters:") | |
609 config.logger.info(f" - method: {arguments.method}") | |
610 config.logger.info(f" - peakforest instance: {arguments.peakforest.url}") | |
611 config.logger.info(f" - polarity instance: {arguments.polarity}") | |
612 config.logger.info(f" - spectrum type: {arguments.spectrum_type}") | |
613 config.logger.info(f" - scan type: {arguments.scan_type}") | |
614 config.logger.info(f" - produce JSON: {arguments.produce_json}") | |
615 config.logger.info(f" - sample type: {arguments.sample_type}") | |
616 | |
617 def process_all_files(config): | |
618 """ | |
619 for each file and its metadata, read and process them, | |
620 then fills the meta html template file with the whole result. | |
621 """ | |
622 arguments = config.arguments | |
623 extra_defaults = [ | |
624 process_fragnot_metadata(metadata, config) | |
625 for metadata in arguments.raw_metadata | |
626 ] | |
627 for i, name in enumerate(arguments.name): | |
628 extra_defaults[i]["name"] = name | |
629 | |
630 if not extra_defaults: | |
631 extra_defaults = [{}] * len(arguments.input) | |
632 | |
633 index = 0 | |
634 for input_path, extra_default in zip(arguments.input, extra_defaults): | |
635 config.logger.info(f"Processing file at {input_path}...") | |
636 curent_defaults = arguments.copy() | |
637 curent_defaults.update(extra_default) | |
638 if config.arguments.verbose: | |
639 config.logger.info( | |
640 "[VERBOSE] Defaults for curent file: " | |
641 + ';'.join(f"{key}={value}" for key, value in curent_defaults.items()) | |
642 ) | |
643 tsv_content, tsv_data_extractor = read_input(input_path, config) | |
644 index = process_tsv( | |
645 tsv_content, | |
646 tsv_data_extractor, | |
647 config, | |
648 defaults_data = curent_defaults, | |
649 index = index+1, | |
650 ) | |
651 if arguments.embed_js: | |
652 config.logger.info(f"Embeding JS in HTML file... ") | |
653 for index in range(len(config.tab_list)): | |
654 config.placeholders[EMBED_JS_PLACEHOLDER] += "<script type='text/javascript'>" | |
655 with open(f"add-one-spectrum-{index+1}.js") as js_file: | |
656 config.placeholders[EMBED_JS_PLACEHOLDER] += js_file.read() | |
657 config.placeholders[EMBED_JS_PLACEHOLDER] += "</script>" | |
658 config.placeholders[EMBED_JS_PLACEHOLDER] += "\n" | |
659 config.logger.info(f" - add-one-spectrum-{index+1}.js embed.") | |
660 config.placeholders[TAB_LIST_PLACEHOLDER] = "\n".join(config.tab_list) | |
661 else: | |
662 config.placeholders[EMBED_JS_PLACEHOLDER] += "<script type='text/javascript'>" | |
663 config.placeholders[EMBED_JS_PLACEHOLDER] += "</script>" | |
664 config.placeholders[EMBED_JS_PLACEHOLDER] += "\n".join( | |
665 [""] + [ | |
666 " "*12 + f"<script src=\"./add-one-spectrum-{index+1}.js\"></script>" | |
667 for index in range(len(config.tab_list)) | |
668 ] | |
669 ) | |
670 config.placeholders[EMBED_JS_PLACEHOLDER] += "\n" | |
671 config.placeholders[TAB_LIST_PLACEHOLDER] = "\n".join(config.tab_list) | |
672 | |
673 fill_template("meta_template", "pf_path", config) | |
674 | |
675 def fill_template( | |
676 template_name, | |
677 output_name, | |
678 config, | |
679 additional_placeholders=dict() | |
680 ): | |
681 """ | |
682 Fills a template, replaces the placeholders. | |
683 Either outputs the result in a given file, or returns it if path is none. | |
684 """ | |
685 template_path = config[template_name] | |
686 config.logger.debug(f"Filling template {template_name} at {template_path}...") | |
687 with open(template_path) as template_file: | |
688 template_content = template_file.read() | |
689 placeholders = config.placeholders.copy() | |
690 placeholders.update(additional_placeholders) | |
691 for placeholder, replacement in placeholders.items(): | |
692 if not placeholder.startswith(config.templates.placeholders.start): | |
693 placeholder = placeholder.join(( | |
694 config.templates.placeholders.start, | |
695 config.templates.placeholders.stop | |
696 )) | |
697 template_content = template_content.replace(placeholder, replacement) | |
698 if output_name is None: | |
699 config.logger.debug(f"Returning template content") | |
700 return template_content | |
701 output_path = config[output_name] | |
702 if "{{ index }}" in output_path: | |
703 index_value = additional_placeholders["{{ index }}"] | |
704 config.logger.debug(f"Changing index value for {index_value}") | |
705 output_path = output_path.replace("{{ index }}", index_value) | |
706 config.logger.debug(f"Full output path {output_path}") | |
707 with open(output_path, "w") as output_file: | |
708 output_file.write(template_content) | |
709 | |
710 def read_input(input_path, config): | |
711 """ | |
712 reads a tsv file and determin its processor, based on its header. | |
713 """ | |
714 with open(input_path) as input_file: | |
715 config.logger.info(f"Reading {input_path}...") | |
716 tsv_file = csv.reader(input_file, delimiter='\t') | |
717 header = next(tsv_file) | |
718 tsv_file = list(tsv_file) | |
719 config.logger.info(f"Header is: {', '.join(header)}") | |
720 if header == list(FRAGNOT_HEADER): | |
721 config.logger.info(f"Fragnot recognized.") | |
722 processor = fragnot_extractor | |
723 return uniformize_fragnot(tsv_file, header), processor | |
724 else: | |
725 config.logger.info(f"MS2Snoop recognized.") | |
726 processor = ms2snoop_extractor | |
727 return uniformize_ms2snoop(tsv_file, header), processor | |
728 | |
729 def uniformize_fragnot(content, header): | |
730 """ | |
731 sorts fragnot data so they appear always in the same order | |
732 """ | |
733 return sorted(content, key=lambda x:(float(x[0]), float(x[4]))) | |
734 | |
735 def uniformize_ms2snoop(content, header): | |
736 """ | |
737 sorts ms2snoop data so they appear always in the same order | |
738 """ | |
739 return sorted(content, key=lambda x:(x[0], float(x[4]))) | |
740 | |
741 def process_fragnot_metadata(raw_metadata, config): | |
742 """ | |
743 Tries to extract informations from the metadata provided by fragnot | |
744 files names. | |
745 Heavily based on regex defined in conf file. | |
746 """ | |
747 regex = config.regex.copy() | |
748 del regex["values"] | |
749 result = {} | |
750 config.logger.info(f"Extracting info from {raw_metadata}...") | |
751 count = 0 | |
752 for name, expression in regex.items(): | |
753 if (match := re.search(expression, raw_metadata)): | |
754 result[name] = match[name] | |
755 count += 1 | |
756 did = "+ did" | |
757 else: | |
758 did = "- did not" | |
759 if config.arguments.verbose: | |
760 config.logger.info(f" {did} match {expression}") | |
761 config.logger.info(f"{count} useful informations extracted.") | |
762 return result | |
763 | |
764 def process_tsv( | |
765 tsv_content, | |
766 tsv_data_extractor, | |
767 config, | |
768 defaults_data={}, | |
769 index=1 | |
770 ): | |
771 """ | |
772 processes one tsv file, containing one or multiple compounds. | |
773 Creation of the peak table for each compound | |
774 """ | |
775 tsv_content = list(tsv_content) | |
776 curent_name, ms_data = get_ms_data( | |
777 tsv_content[0], | |
778 tsv_data_extractor, | |
779 defaults_data, | |
780 config | |
781 ) | |
782 _, second_ms_data = get_ms_data( | |
783 tsv_content[1], | |
784 tsv_data_extractor, | |
785 defaults_data, | |
786 config | |
787 ) | |
788 ms_peak_table = [] | |
789 config.logger.info(f"Processing compound {curent_name}...") | |
790 | |
791 for line in tsv_content: | |
792 name, new_ms_data = get_ms_data(line, tsv_data_extractor, defaults_data, config) | |
793 if name != curent_name: | |
794 new_compound(curent_name, index, ms_data, config, ms_peak_table) | |
795 curent_name = name | |
796 index += 1 | |
797 config.logger.info(f"Processing compound {curent_name}...") | |
798 ms_peak_table = [] | |
799 ms_data = new_ms_data | |
800 ms_peak_table.append( | |
801 ", ".join( | |
802 f'"{value}"' if value not in ("na", "NA") | |
803 else '""' | |
804 for value in ( | |
805 ms_data["fragment_mz"], | |
806 ms_data["abs_intensity"], | |
807 ms_data["rel_intensity"], | |
808 ms_data["ppm"], | |
809 ms_data["composition"], | |
810 ms_data["fragment"], | |
811 str(ms_data["valid_corelation"] == "TRUE").lower(), | |
812 "true" if ms_data.get("correlation") == "1" else "false" | |
813 ) | |
814 ) | |
815 ) | |
816 new_compound(curent_name, index, ms_data, config, ms_peak_table) | |
817 return index | |
818 | |
819 def get_ms_data(line, extractor, defaults, config): | |
820 ms_data = defaults.copy() | |
821 ms_data.update(extractor(config, *line)) | |
822 return ms_data["name"], ms_data | |
823 | |
824 def new_compound(name, index, ms_data, config, ms_peak_table): | |
825 """ | |
826 aggregates informations to form the peak table, | |
827 adds the compound to the tab list, | |
828 creates the js file for this tab | |
829 """ | |
830 if len([x for x in ms_peak_table if x.split(", ")[7] == "\"true\""]) > 1: | |
831 for i in range(len(ms_peak_table)): | |
832 ms_peak_table[i] = ", ".join( | |
833 ms_peak_table[i].split(", ")[:-1] + [", \"false\""] | |
834 ) | |
835 config.placeholders[MS_PEAK_VALUES_PLACEHOLDER] = f"""[ | |
836 {','.join('['+line+']' for line in ms_peak_table)} | |
837 ]""" | |
838 tab_list = fill_template( | |
839 "tab_list_template", | |
840 None, | |
841 config, { | |
842 COMPOUND_NAME_PLACEHOLDER: name, | |
843 TAB_INDEX_PLACEHOLDER: str(index), | |
844 }) | |
845 config.tab_list.append(tab_list) | |
846 create_js_file(index, ms_data, config) | |
847 config.placeholders[ADD_SPECTRUM_FORM] += fill_template( | |
848 "form_template", | |
849 None, | |
850 config, | |
851 {TAB_INDEX_PLACEHOLDER: str(index)}, | |
852 ) | |
853 if index == 1: | |
854 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "" | |
855 | |
856 def fragnot_extractor(config, *line): | |
857 """ | |
858 Fragnot processor - extracts one fragnot line of content and | |
859 produces a uniformised output. | |
860 """ | |
861 fragnot_data = { | |
862 FRAGNOT_HEADER[header]: line[i].strip() | |
863 for i, header in enumerate(FRAGNOT_HEADER) | |
864 } | |
865 fragnot_data["composition"] = "unknown" | |
866 fragnot_data["valid_corelation"] = config.arguments.validation | |
867 return fragnot_data | |
868 | |
869 def ms2snoop_extractor(config, *line): | |
870 """ | |
871 Fragnot processor - extracts one ms2snoop line of content and | |
872 produces a uniformised output. | |
873 """ | |
874 ms2snoop_data = { | |
875 header: MS_2_SNOOP_HEADER[header](line[i]) | |
876 for i, header in enumerate(MS_2_SNOOP_HEADER) | |
877 } | |
878 return ms2snoop_data | |
879 | |
880 def create_js_file(index, ms_data, config): | |
881 """ | |
882 fills the js template file for one tab (compound) | |
883 """ | |
884 if (method := ms_data["method"]): | |
885 method = f'"{method}"' | |
886 else: | |
887 method = "null" | |
888 if config.arguments.verbose: | |
889 config.logger.info( | |
890 "[VERBOSE] " | |
891 + ';'.join(f"{key}={value}" for key, value in ms_data.items()) | |
892 ) | |
893 fill_template( | |
894 "js_template", | |
895 "js_file", | |
896 config, | |
897 { | |
898 TAB_INDEX_PLACEHOLDER: str(index), | |
899 "INCHIKEY_PLACEHOLDER": ms_data["inchikey"], | |
900 "DEFAULT_DATA": f"""{{ | |
901 name: "{ms_data["name"]}", | |
902 inchikey: "{ms_data["inchikey"]}", | |
903 method: {method}, | |
904 spectrum_type: "{ms_data["spectrum_type"]}", | |
905 scan_type: "{ms_data["scan_type"]}", | |
906 polarity: "{ms_data["polarity"]}", | |
907 resolution: "{ms_data["resolution"]}", | |
908 sample_type: "{ms_data["sample_type"]}", | |
909 }}""", | |
910 "{{ index }}": str(index) | |
911 }, | |
912 ) | |
913 | |
914 def prepare_workplace(config): | |
915 """ | |
916 prepares the directory we will work in. | |
917 """ | |
918 if config.workdir.work_in_tmp: | |
919 os.chdir(config.workdir.tmp_dir) | |
920 config.logger.info(f"Moving to {os.getcwd()}") | |
921 if config.workdir.generate_in_tmp: | |
922 gen_dir = config.workdir.tmp_dir | |
923 else: | |
924 gen_dir = tempfile.gettempdir() | |
925 config.workdir.tmp_dir = gen_dir | |
926 shutil.copy(os.path.join(config["root_dir"], "common.js"), gen_dir) | |
927 config.logger.info(f"Outputs will be generated in {config.workdir.tmp_dir}") | |
928 return gen_dir | |
929 | |
930 def get_hander_for(directory, config): | |
931 """ | |
932 generates the handler class for the directory we provide. | |
933 """ | |
934 config["json_result"] = [{}] * len(config.tab_list) | |
935 | |
936 class HTTPHandler(http.server.SimpleHTTPRequestHandler): | |
937 | |
938 def __init__(self, *args, **kwargs): | |
939 super().__init__(*args, **kwargs, directory=directory) | |
940 | |
941 def do_POST(self): | |
942 content_length = int(self.headers.get("Content-Length")) | |
943 json_bytes = self.rfile.read(content_length).decode("utf-8") | |
944 json_list = json.loads(json_bytes) | |
945 for i, obj in enumerate(json_list): | |
946 print(obj) | |
947 if obj: | |
948 config["json_result"][i] = obj | |
949 save_json(config) | |
950 self.send_head() | |
951 self.wfile.write(json_bytes.encode("utf-8")) | |
952 return | |
953 | |
954 def do_GET(self): | |
955 if self.path == "/quit": | |
956 self.path = "/" | |
957 super().do_GET() | |
958 exit(0) | |
959 self.path = os.path.join(directory, self.path) | |
960 if self.path == "/": | |
961 self.path = config.generated.html | |
962 return super().do_GET() | |
963 | |
964 return HTTPHandler | |
965 | |
966 | |
967 def save_json(config): | |
968 json_string = json.dumps(config["json_result"]) | |
969 print(json_string) | |
970 with open(config.arguments.output_json, "w") as json_file: | |
971 json_file.write(json_string) | |
972 | |
973 def run_server(config): | |
974 """ | |
975 prepare and runs the server, with the handler for the given directory | |
976 """ | |
977 ip, port = config.network.ip, config.network.port | |
978 config.logger.debug(f"IP and port: {ip}:{port}") | |
979 socketserver.TCPServer.allow_reuse_address = True | |
980 config.logger.debug(f"Allow reuse adress.") | |
981 handler = get_hander_for(config.workdir.tmp_dir, config) | |
982 config.logger.debug(f"Created server handler for {config.workdir.tmp_dir}") | |
983 config.logger.debug( | |
984 f"Content of directory {config.workdir.tmp_dir}: " | |
985 + "\n" | |
986 + '\n'.join(sorted( | |
987 f" - {path}"for path in os.listdir(config.workdir.tmp_dir) | |
988 )) | |
989 ) | |
990 config.logger.debug(f"Creating TCP server...") | |
991 server = socketserver.TCPServer((ip, port), handler) | |
992 if ip == "0.0.0.0": | |
993 displayed_ip = "localhost" | |
994 else: | |
995 displayed_ip = ip | |
996 config.logger.debug(f"Serving...") | |
997 print() | |
998 print(f"http://{displayed_ip}:{port}") | |
999 server.serve_forever() | |
1000 | |
1001 def get_logger(config, dummy=False): | |
1002 dummy_log = lambda msg:dummy and config.logger.info(msg) | |
1003 arguments = config.arguments | |
1004 if not dummy: | |
1005 logger = logging.getLogger(__file__) | |
1006 if arguments.debug: | |
1007 dummy_log(f"Output debug info.") | |
1008 level = logging.DEBUG | |
1009 else: | |
1010 level = logging.INFO | |
1011 if not dummy: | |
1012 logger.setLevel(level) | |
1013 formatter = logging.Formatter( | |
1014 "%(asctime)s - %(levelname)s - %(message)s" | |
1015 ) | |
1016 if arguments.logging.std == "err": | |
1017 dummy_log(f"Handler added to output logs in stderr.") | |
1018 if not dummy: | |
1019 handler = logging.StreamHandler(sys.stderr) | |
1020 handler.setLevel(level) | |
1021 handler.setFormatter(formatter) | |
1022 logger.addHandler(handler) | |
1023 elif arguments.logging.std == "out": | |
1024 dummy_log(f"Handler added to output logs in stdout.") | |
1025 if not dummy: | |
1026 handler = logging.StreamHandler(sys.stdout) | |
1027 handler.setLevel(level) | |
1028 handler.setFormatter(formatter) | |
1029 logger.addHandler(handler) | |
1030 else: | |
1031 dummy_log(f"Logs will not be output in stderr not stdout.") | |
1032 if (path := arguments.logging.file.path): | |
1033 dummy_log(f"Add log file: {arguments.logging.file.path}.") | |
1034 if not arguments.logging.file.append: | |
1035 dummy_log(f"Log file content cleaned.") | |
1036 with open(path, "w"):pass | |
1037 else: | |
1038 dummy_log(f"Logs appended to log file.") | |
1039 if not dummy: | |
1040 file_handler = logging.FileHandler(filename=path) | |
1041 file_handler.setLevel(level) | |
1042 file_handler.setFormatter(formatter) | |
1043 logger.addHandler(file_handler) | |
1044 if not dummy: | |
1045 config["logger"] = logger | |
1046 starting_sequence(logger) | |
1047 get_logger(config, dummy=True) | |
1048 return logger | |
1049 | |
1050 def starting_sequence(logger): | |
1051 logger.info("*bip* *bop*") | |
1052 logger.info("starting...") | |
1053 logger.info("program...") | |
1054 logger.info("MS2PF is running...") | |
1055 logger.info("*bip* *bop* am a robot") | |
1056 atexit.register(stoping_sequence, logger) | |
1057 | |
1058 def stoping_sequence(logger): | |
1059 logger.info("*bip* *bop*") | |
1060 logger.info("ending...") | |
1061 logger.info("program...") | |
1062 logger.info("MS2PF is shuting down...") | |
1063 logger.info("...robot") | |
1064 logger.info("*bip* *bop*") | |
1065 logger.info("shutdown") | |
1066 logger.info("...") | |
1067 | |
1068 if __name__ == "__main__": | |
1069 | |
1070 base_config = parse_config() | |
1071 config = parse_parameters(base_config) | |
1072 | |
1073 """ | |
1074 The config contains result of the parsed config file. | |
1075 """ | |
1076 arguments = config.arguments | |
1077 | |
1078 config.logger.info(f"Starting MS2PF from {os.getcwd()}") | |
1079 | |
1080 gen_dir = prepare_workplace(config) | |
1081 | |
1082 config["pf_path"] = os.path.join(gen_dir, config.generated.html) | |
1083 config.logger.info(f"HTML output file will be {config.pf_path}") | |
1084 config["js_file"] = os.path.join(gen_dir, config.generated.js) | |
1085 config.logger.info(f"JS output files will like {config.js_file}") | |
1086 config.placeholders["PF_URL_PLACEHOLDER"] = arguments.peakforest.url | |
1087 config.placeholders["PF_TOKEN_PLACEHOLDER"] = ( | |
1088 arguments.peakforest.token | |
1089 or config.token.value | |
1090 ) | |
1091 if (token := config.placeholders.PF_TOKEN_PLACEHOLDER): | |
1092 config.logger.info(f"Using a token for authentification - length: {len(token)}") | |
1093 else: | |
1094 config.logger.info(f"No token provided for peakforest authentification.") | |
1095 | |
1096 process_all_files(config) | |
1097 | |
1098 if not arguments.do_run_dry: | |
1099 config.logger.debug(f"Running the server.") | |
1100 if arguments.firefox or arguments.chromium: | |
1101 config.logger.debug(f"Running the server.") | |
1102 import threading | |
1103 import time | |
1104 if arguments.firefox: | |
1105 browser = "firefox" | |
1106 else: | |
1107 browser = "chromium" | |
1108 if (ip := config.network.ip) == "0.0.0.0": | |
1109 ip = "localhost" | |
1110 adress = f"http://{ip}:{config.network.port}" | |
1111 threading.Thread( | |
1112 target=lambda:( | |
1113 time.sleep(1), | |
1114 os.system(f"{browser} {adress}") | |
1115 ), | |
1116 daemon=True | |
1117 ).start() | |
1118 run_server(config) | |
1119 else: | |
1120 config.logger.debug(f"Server not run.") |