comparison server.py @ 0:b58b229c4cbf draft

planemo upload commit 523a9c8df173302ad38e9f15e7d82eab01736551-dirty
author lain
date Fri, 03 Mar 2023 14:10:24 +0000
parents
children 7e3085fc60c1
comparison
equal deleted inserted replaced
-1:000000000000 0:b58b229c4cbf
1 #!/usr/bin/env python3
2
3 import atexit
4 import csv
5 import http.server
6 import json
7 import logging
8 import os
9 import re
10 import shutil
11 import socketserver
12 import sys
13 import tempfile
14 import yaml
15
16 TAB_LIST_PLACEHOLDER = "TAB_LIST_PLACEHOLDER"
17 MS_PEAK_VALUES_PLACEHOLDER = "MS_PEAK_VALUES_PLACEHOLDER"
18 COMPOUND_NAME_PLACEHOLDER = "COMPOUND_NAME_PLACEHOLDER"
19 TAB_INDEX_PLACEHOLDER = "TAB_INDEX_PLACEHOLDER"
20 EMBED_JS_PLACEHOLDER = "EMBED_JS"
21 ACTIVE_TAB_PLACEHOLDER = "ACTIVE_TAB_PLACEHOLDER"
22 ADD_SPECTRUM_FORM = "ADD_SPECTRUM_FORM"
23 PRODUCE_JSON_PLACEHOLDER = "PRODUCE_JSON_PLACEHOLDER"
24
25 COMPOUND_REF = "compound-ref"
26 COMPOUND_MIX = "compound-mix"
27
28 END_MS_PEAK_VALUES_PLACEHOLDER = " ]"
29 MS_DATA_COLUMN_NUMBER = 9
30 DEFAULT_MS_PEAK_VALUES = (
31 "[\n"
32 + (" [" + ','.join([' ""'] * MS_DATA_COLUMN_NUMBER) + "],\n") * 17
33 + END_MS_PEAK_VALUES_PLACEHOLDER
34 )
35
36 FRAGNOT_HEADER = {
37 "m/z": "fragment_mz",
38 "absolute_intensity": "abs_intensity",
39 "relative_intensity": "rel_intensity",
40 "theo_mass": "",
41 "delta_ppm": "ppm",
42 "rdbequiv": "",
43 "composition": "",
44 "attribution": "fragment",
45 }
46
47 MS_2_SNOOP_HEADER = {
48 "name": str,
49 "inchikey": str,
50 "composition": str,
51 "fragment": str,
52 "fragment_mz": str,
53 "ppm": str,
54 "fileid": str,
55 "correlation": str,
56 "abs_intensity": lambda x:float(x) * 100,
57 "rel_intensity": lambda x:float(x) * 100,
58 "valid_corelation": str
59 }
60
61
62 class ConfigException(ValueError):
63 """
64 An exception raised when something went wrong in the config and we
65 cannot continue - i.e: when there's no token for peakforest
66 """
67
68 class YAMLConfig(dict):
69
70 """
71 Dictionary that handles key with dot in them:
72 test["truc.chose"]
73 is equivalant to
74 test["truc"]["chose"]
75 Assignation works too.
76 Add the possibility to use placeholders:
77 --- yaml
78 test: {{ truc.chose }}
79 truc:
80 chose: bidule
81 ---
82 here, test's value is "bidule"
83 """
84
85 def __init__(self, *args, **kwargs):
86 meta_conf = kwargs.pop("__meta_config__", {})
87 self._debug = meta_conf.get("__debug__", False)
88 self._stream_name = meta_conf.get("__debug_stream__", "stdout")
89 self._debug_stream = getattr(sys, self._stream_name)
90 self._only_root_debug = meta_conf.get("__only_root_debug__", False)
91 if "__root__" in kwargs:
92 if self._only_root_debug:
93 self._debug = False
94 self._name = kwargs.pop("__name__")
95 self._debugger("Is not root config.")
96 self._root = kwargs.pop("__root__")
97 else:
98 self._name = "root"
99 self._debugger("Is root config.")
100 self._root = self
101 super().__init__(*args, **kwargs)
102 for key, value in self.copy().items():
103 if isinstance(value, dict) and not isinstance(value, YAMLConfig):
104 self._debugger(f"Parsing sub-config for {key}")
105 self[key] = self._propagate(value, key)
106 self._replace_placeholders(self)
107 self._extract_defaults()
108
109 def _propagate(self, sub_dict, name):
110 if isinstance(sub_dict, dict) and not isinstance(sub_dict, self.__class__):
111 return YAMLConfig(
112 **sub_dict,
113 __name__=name,
114 __root__=self._root,
115 __meta_config__={
116 "__debug__": self._debug,
117 "__debug_stream__": self._stream_name,
118 "__only_root_debug__": self._only_root_debug,
119 }
120 )
121 return sub_dict
122
123 def _debugger(self, message):
124 if self._debug:
125 self._debug_stream.write(f"[{self._name}]: {message}\n")
126 self._debug_stream.flush()
127
128 def __getattr__(self, attr):
129 if attr in self:
130 return self[attr]
131 if '.' in attr:
132 attr, sub = attr.split('.', 1)
133 return getattr(getattr(self, attr), sub)
134 return super().__getattribute__(attr)
135
136 def _replace_placeholders(self, subpart):
137 self._debugger("Replacing placeholders...")
138 for sub_key, sub_item in subpart.copy().items():
139 if isinstance(sub_item, str):
140 for placeholder in re.findall("{{ (?P<placeholder>.*?) }}", sub_item):
141 if placeholder not in self._root:
142 self._debugger(f"Could not fine replacement for {placeholder}")
143 continue
144 replacement = self._root[placeholder]
145 if isinstance(replacement, str):
146 self._debugger(f"Found placeholder: {placeholder} -> {replacement}")
147 sub_item = sub_item.replace(
148 "{{ " + placeholder + " }}",
149 replacement
150 )
151 else:
152 self._debugger(f"Found placeholder: {placeholder} -> {replacement.__class__.__name__}")
153 sub_item = self._propagate(replacement, placeholder)
154 dict.__setitem__(subpart, sub_key, sub_item)
155 elif isinstance(sub_item, dict):
156 super().__setitem__(sub_key, self._propagate(sub_item, sub_key))
157
158 def _extract_defaults(self):
159 if self._root is not self:
160 return
161 if "defaults" not in self:
162 self._debugger("No defaults here.")
163 return
164 if "arguments" not in self:
165 self._debugger("Arguments creation...")
166 self["arguments"] = self._propagate({}, "arguments")
167 self._debugger("Populating arguments with defaults values")
168 for key, value in self.defaults.items():
169 if key not in self:
170 if isinstance(value, dict):
171 value = self._propagate(value, key)
172 self.arguments[key] = value
173 self._debugger(f"Default {key} = {value}")
174
175 def __setitem__(self, key, value):
176 if isinstance(value, dict):
177 value = self._propagate(value, key)
178 if "." not in key:
179 return super().__setitem__(key, value)
180 curent = self
181 key, subkey = key.rsplit(".", 1)
182 self[key][subkey] = value
183
184 def __getitem__(self, key):
185 if super().__contains__(key):
186 return super().__getitem__(key)
187 if "." not in key:
188 return super().__getitem__(key)
189 curent = self
190 while "." in key:
191 key, subkey = key.split(".", 1)
192 curent = curent[key]
193 key = subkey
194 if subkey not in curent:
195 curent[subkey] = self._propagate({}, subkey)
196 result = curent[subkey]
197 return result
198
199 def __contains__(self, key):
200 if "." not in key:
201 return super().__contains__(key)
202 key, subkey = key.split(".", 1)
203 if not super().__contains__(key):
204 return False
205 return subkey in self[key]
206
207 def copy(self):
208 return {
209 key: (
210 value if not isinstance(value, dict)
211 else value.copy()
212 ) for key, value in self.items()
213 }
214
215 class YAMLParameters(YAMLConfig):
216
217 """
218 Parses parameters from the command line and put them
219 in the config.
220 Uses the config to know which parameter is recognized, or not,
221 to know the metadata (author, version),
222 which command is a flag, is optional, the help strings, etc...
223 Assigns default small parameter if not defined in the "shortcut"
224 section of the config file.
225 CLI config must be in the root section "parameters":
226 ---
227 parameters:
228 mandatory:
229 input: input file path
230 flags:
231 help: Show this help
232 optional:
233 method: "default is {{ defaults.method }}"
234 meta:
235 author: Lain Pavot
236 version: 1.1.0
237 shortcuts:
238 help: h
239 ## will autogenerate -i for input and -m for method
240 ---
241 default parameters are searched in the "default" root section.
242 """
243
244 def __init__(self, *args, **kwargs):
245 super().__init__(*args, **kwargs)
246 self._errors = list()
247 if not self.parameters.shortcuts:
248 self.parameters["shortcuts"] = YAMLConfig()
249 self._mandatory = self.parameters.mandatory
250 self._optional = self.parameters.optional
251 self._flags = {
252 flag: False
253 for flag in self.parameters.flags
254 }
255 self._all_params = self._optional.copy()
256 self._all_params.update(self._mandatory)
257 self._all_params.update(self._flags)
258 self._small_params = dict()
259 self._determine_small_params()
260
261 @property
262 def in_error(self):
263 return bool(self._errors)
264
265 @property
266 def sorted_keys(self):
267 return sorted(self._all_params.keys())
268
269 @property
270 def sorted_items(self):
271 return sorted(self._all_params.items())
272
273 def _determine_small_params(self, verbose=False):
274 self._small_params = (self.parameters.shortcuts or {}).copy()
275 chars = list(map(chr, range(97, 123))) + list(map(chr, range(65, 91)))
276 all_params = self._all_params.copy()
277 for long, short in self._small_params.items():
278 chars.remove(short)
279 del all_params[long]
280 for param in all_params.copy().keys():
281 for operation in (
282 lambda x:x[0], ## select first char
283 lambda x:x.split('-', 1)[-1][0], ## first char after -
284 lambda x:x.split('_', 1)[-1][0], ## first char after _
285 lambda x:x.split('.', 1)[-1][0], ## first char after .
286 lambda x:x[0].upper(), ## select first char
287 lambda x:x.split('-', 1)[-1][0].upper(), ## first char after -
288 lambda x:x.split('_', 1)[-1][0].upper(), ## first char after _
289 lambda x:x.split('.', 1)[-1][0].upper(), ## first char after .
290 lambda x: chars[0], ## first letter in the alphabet
291 ):
292 char = operation(param)
293 if char not in self._small_params.values():
294 self._small_params[param] = char
295 chars.remove(char)
296 del all_params[param]
297 break
298
299 def _get_parameter_index(self, parameter, original):
300 if f"--{parameter}" in sys.argv:
301 return sys.argv.index(f"--{parameter}")
302 parameter = self._small_params[original]
303 if f"-{parameter}" in sys.argv:
304 return sys.argv.index(f"-{parameter}")
305 return None
306
307 def as_parameter(self, string):
308 return (
309 string
310 .replace('.', '-')
311 .replace('_', '-')
312 )
313
314 def show_version(self):
315 print(self.parameters.meta.version)
316
317 def show_help(self):
318 parameters = [
319 f"-{self._small_params[arg]}|--{self.as_parameter(arg)} {arg}"
320 for arg in self._mandatory
321 ] + [
322 f"[-{self._small_params[arg]}|--{self.as_parameter(arg)} {arg}]"
323 for arg in self._optional
324 ] + [
325 f"[-{self._small_params[arg]}|--{self.as_parameter(arg)}]"
326 for arg in self._flags
327 ]
328 print(
329 f"Usage: {__file__} " + ' '.join(parameters)
330 + "\n\n"
331 + '\n'.join(
332 f" -{self._small_params[args]}|--{self.as_parameter(args)}: {help_str}"
333 for args, help_str in self.sorted_items
334 )
335 + "\n\n"
336 + '\n'.join(
337 f"{key}: {value}"
338 for key, value in self.parameters.meta.items()
339 )
340 )
341 sys.exit(0)
342
343 def parse_args(self):
344 errors = list()
345 for kind in ("mandatory", "optional", "flags"):
346 keys = list(sorted(getattr(self, f"_{kind}").keys()))
347 for original_param, actual_param in zip(
348 keys,
349 map(self.as_parameter, keys),
350 ):
351 if original_param in self.defaults:
352 self.arguments[original_param] = self.defaults[original_param]
353 elif kind == "flags":
354 self.arguments[original_param] = False
355 parser = getattr(self, f"parse_{kind}")
356 if (error := parser(original_param, actual_param)):
357 errors.append(error)
358 self._errors = errors
359 return self
360
361 def parse_mandatory(self, original, actual):
362 if (index := self._get_parameter_index(actual, original)) is None:
363 return f"The parameter --{actual} is mandatory."
364 if index == len(sys.argv) - 1:
365 return f"The parameter --{actual} needs a value."
366 self.arguments[original] = sys.argv[index + 1]
367
368 def parse_optional(self, original, actual):
369 if (index := self._get_parameter_index(actual, original)) is None:
370 return
371 if index == len(sys.argv) - 1:
372 return f"The parameter --{actual} needs a value."
373 self.arguments[original] = sys.argv[index + 1]
374
375 def parse_flags(self, original, actual):
376 if (index := self._get_parameter_index(actual, original)) is None:
377 return
378 self.arguments[original] = True
379
380 def parse_config(**kwargs):
381 """
382 opens the config file, extract it using pyyaml's safe loader
383 and tries to extract and apply a maximum of informations/directives
384 from the config:
385 - token retrieval
386 - workdir management
387 - tempfile management
388 """
389 root_dir = os.path.dirname(os.path.abspath(__file__))
390 with open(os.path.join(root_dir, "config.yml")) as config_file:
391 config = YAMLConfig(
392 **yaml.load(config_file.read(), Loader=yaml.SafeLoader),
393 **kwargs
394 )
395
396 if not config.token.value:
397 if config.token.use_file:
398 if (not os.path.exists(path := config.token.file_path)):
399 raise ConfigException("Missing token value or token file.")
400 with open(path) as token_file:
401 config.token["value"] = token_file.read()
402 elif config.defaults.peakforest.token:
403 config.token["value"] = config.defaults.peakforest.token
404
405 if config.workdir.create_tmp:
406 tmp_dir = tempfile.mkdtemp()
407 atexit.register(lambda:shutil.rmtree(tmp_dir))
408 else:
409 tmp_dir = tempfile.gettempdir()
410 config.workdir["tmp_dir"] = tmp_dir
411
412 config["root_dir"] = root_dir
413 config["tab_list"] = []
414 config["form_template"] = os.path.join(root_dir, config.templates.form)
415 config["meta_template"] = os.path.join(root_dir, config.templates.main)
416 config["js_template"] = os.path.join(root_dir, config.templates.js)
417 config["tab_list_template"] = os.path.join(root_dir, config.templates.tab_list)
418 config["placeholders"] = dict()
419 config.placeholders[MS_PEAK_VALUES_PLACEHOLDER] = DEFAULT_MS_PEAK_VALUES
420 config.placeholders[TAB_INDEX_PLACEHOLDER] = "1"
421 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "active"
422 config.placeholders[ADD_SPECTRUM_FORM] = ""
423 config.placeholders[EMBED_JS_PLACEHOLDER] = ""
424 config.placeholders[TAB_LIST_PLACEHOLDER] = ""
425 config.placeholders["DEFAULT_MIN_MZ"] = "50"
426 config.placeholders["DEFAULT_MAX_MZ"] = "500"
427 config.placeholders["DEFAULT_RESOLUTION_LOW"] = ""
428 config.placeholders["DEFAULT_RESOLUTION_HIGH"] = "selected=\"selected\""
429 config.placeholders["DEFAULT_RESOLUTION_UNSET"] = ""
430 config.placeholders["DEFAULT_MIN_RT"] = "0.9"
431 config.placeholders["DEFAULT_MAX_RT"] = "1.4"
432 return config
433
434 def parse_parameters(config):
435 """
436 parses command line and checks provided values are acceptable/usable.
437 Raises some error if not.
438 """
439 parameters = YAMLParameters(**config)
440 parameters.parse_args()
441
442 parameters["json_result"] = []
443
444 get_logger(parameters)
445
446 arguments = parameters.arguments
447 if arguments.help:
448 parameters.show_help()
449 sys.exit(0)
450
451 if arguments.version:
452 parameters.show_version()
453 sys.exit(0)
454
455 if parameters.in_error:
456 raise ValueError(
457 "Some errors occured during parameters extraction: \n"
458 + '\n'.join(parameters.errors)
459 )
460
461 if arguments.sample_type == COMPOUND_MIX:
462 parameters["form_template"] = os.path.join(
463 parameters["root_dir"],
464 parameters.templates.form_mix
465 )
466 parameters["meta_template"] = os.path.join(
467 parameters["root_dir"],
468 parameters.templates.main_mix
469 )
470 elif arguments.sample_type == COMPOUND_REF:
471 parameters["form_template"] = os.path.join(
472 parameters["root_dir"],
473 parameters.templates.form_ref
474 )
475 parameters["meta_template"] = os.path.join(
476 parameters["root_dir"],
477 parameters.templates.main_ref
478 )
479
480 arguments["produce_json"] = (
481 "output_json" in arguments
482 and arguments["output_json"] != ""
483 )
484 if arguments.produce_json:
485 parameters.placeholders[PRODUCE_JSON_PLACEHOLDER] = "true"
486 parameters.json_result = []
487 arguments["output_json"] = os.path.abspath(arguments["output_json"])
488 atexit.register(save_json, parameters)
489 else:
490 parameters.placeholders[PRODUCE_JSON_PLACEHOLDER] = "false"
491
492 if arguments.run_dry_html:
493 arguments["do_run_dry"] = True
494 parameters.generated["html"] = os.path.abspath(arguments.run_dry_html)
495
496 if arguments.run_dry_js:
497 arguments["do_run_dry"] = True
498 parameters.generated["js"] = os.path.abspath(arguments.run_dry_js)
499
500 if arguments.do_run_dry:
501 parameters.logger.info("Dry run. Server will ne be run.")
502 if arguments.run_dry_html:
503 parameters.logger.info(f"HTML file will be put in {arguments.run_dry_html}")
504 if arguments.run_dry_js:
505 parameters.logger.info(f"JS file will be put in {arguments.run_dry_js}")
506
507 if arguments.peakforest.token:
508 config.token["value"] = arguments.peakforest.token
509 if not config.token.value:
510 raise ConfigException(
511 "No token provided. We will not be able to connect to peakforest."
512 )
513
514 if os.path.exists(arguments.input):
515 single_file = True
516 file_paths = [arguments.input]
517 else:
518 path_list = arguments.input.split(',')
519 if all(map(os.path.exists, path_list)):
520 single_file = False
521 file_paths = path_list
522 else:
523 raise ValueError(
524 f"Some files cannot be found: "
525 + ', '.join(
526 path for path in path_list
527 if not os.path.exists(path)
528 )
529 )
530 arguments["input"] = list(map(os.path.abspath, file_paths))
531
532 if single_file:
533 arguments["name"] = [arguments.name]
534 arguments["raw_metadata"] = [arguments.raw_metadata]
535 parameters.logger.info(f"Single file processing: {arguments.input}")
536 else:
537 parameters.logger.info(f"Multiple file processing:")
538 arguments["raw_metadata"] = arguments.raw_metadata.split(
539 arguments.raw_metadata_sep
540 )
541 if not arguments.name:
542 arguments["name"] = arguments["raw_metadata"]
543 else:
544 arguments["name"] = arguments.name.split(',')
545 for i in range(len(arguments.name)):
546 parameters.logger.info(f" - file: {arguments.input[i]}")
547 parameters.logger.info(f" - name: {arguments.name[i]}")
548 parameters.logger.info(f" - metadata: {arguments.raw_metadata[i]}")
549 parameters.logger.info(f" ")
550 if (
551 len(arguments.name) != len(arguments.raw_metadata)
552 or len(arguments.name) != len(arguments.input)
553 ):
554 raise ValueError(
555 "name, raw_metadata and input parameters have different lengths: \n"
556 f"input is {len(arguments.input)} elements long, "
557 f"raw_metadata is {len(arguments.raw_metadata)} elements long "
558 f"and name is {len(arguments.name)} elements long."
559 )
560 if arguments.spectrum_type == "LC_MS":
561 arguments["scan_type"] = "ms"
562 elif arguments.spectrum_type == "LC_MSMS":
563 arguments["scan_type"] = "ms2"
564 if arguments.method == "test":
565 if arguments.spectrum_type == "LC_MS":
566 arguments["method"] = "cf_pfem_urine_qtof"
567 else:
568 arguments["method"] = "cf_pfem_urine_method1_qtof-msms"
569 if arguments["sample_type"] == COMPOUND_MIX:
570 check_mix_compound_files(parameters)
571 more_info_in_logs(parameters)
572 return parameters
573
574 def check_mix_compound_files(parameters):
575 arguments = parameters.arguments
576 try:
577 numbarz = [
578 list(map(int, os.path.basename(metadata).split("_", 1)[0].split("-")))
579 for metadata in arguments.raw_metadata
580 ]
581 except ValueError:
582 parameters.logger.error(
583 "Metadata/file names does not start with `[0-9]+-[0-9]+_.*` . "
584 "This is necessary in the case of compounds mix."
585 )
586 sys.exit(-1)
587 runs, samples = zip(*numbarz)
588 if not all(runs[0] == i for i in runs[1:]):
589 parameters.logger.error(
590 "Run numbers in metadata/file names are not identical. "
591 "You mixed some files."
592 )
593 sys.exit(-1)
594 length = len(samples)
595 if list(sorted(samples)) != list(range(1, length+1)):
596 if not all(samples.count(i) == 1 for i in samples):
597 parameters.logger.error("Some samples are duplicated. ")
598 else:
599 parameters.logger.error("Some samples files are missing. ")
600 sys.exit(-1)
601
602 def more_info_in_logs(config):
603 arguments = config.arguments
604 if arguments.embed_js:
605 config.logger.info(f"JS will be embed in HTML page to form a HTML bundle.")
606 else:
607 config.logger.info(f"JS are separated files, needed to be served.")
608 config.logger.info(f"Choosen parameters:")
609 config.logger.info(f" - method: {arguments.method}")
610 config.logger.info(f" - peakforest instance: {arguments.peakforest.url}")
611 config.logger.info(f" - polarity instance: {arguments.polarity}")
612 config.logger.info(f" - spectrum type: {arguments.spectrum_type}")
613 config.logger.info(f" - scan type: {arguments.scan_type}")
614 config.logger.info(f" - produce JSON: {arguments.produce_json}")
615 config.logger.info(f" - sample type: {arguments.sample_type}")
616
617 def process_all_files(config):
618 """
619 for each file and its metadata, read and process them,
620 then fills the meta html template file with the whole result.
621 """
622 arguments = config.arguments
623 extra_defaults = [
624 process_fragnot_metadata(metadata, config)
625 for metadata in arguments.raw_metadata
626 ]
627 for i, name in enumerate(arguments.name):
628 extra_defaults[i]["name"] = name
629
630 if not extra_defaults:
631 extra_defaults = [{}] * len(arguments.input)
632
633 index = 0
634 for input_path, extra_default in zip(arguments.input, extra_defaults):
635 config.logger.info(f"Processing file at {input_path}...")
636 curent_defaults = arguments.copy()
637 curent_defaults.update(extra_default)
638 if config.arguments.verbose:
639 config.logger.info(
640 "[VERBOSE] Defaults for curent file: "
641 + ';'.join(f"{key}={value}" for key, value in curent_defaults.items())
642 )
643 tsv_content, tsv_data_extractor = read_input(input_path, config)
644 index = process_tsv(
645 tsv_content,
646 tsv_data_extractor,
647 config,
648 defaults_data = curent_defaults,
649 index = index+1,
650 )
651 if arguments.embed_js:
652 config.logger.info(f"Embeding JS in HTML file... ")
653 for index in range(len(config.tab_list)):
654 config.placeholders[EMBED_JS_PLACEHOLDER] += "<script type='text/javascript'>"
655 with open(f"add-one-spectrum-{index+1}.js") as js_file:
656 config.placeholders[EMBED_JS_PLACEHOLDER] += js_file.read()
657 config.placeholders[EMBED_JS_PLACEHOLDER] += "</script>"
658 config.placeholders[EMBED_JS_PLACEHOLDER] += "\n"
659 config.logger.info(f" - add-one-spectrum-{index+1}.js embed.")
660 config.placeholders[TAB_LIST_PLACEHOLDER] = "\n".join(config.tab_list)
661 else:
662 config.placeholders[EMBED_JS_PLACEHOLDER] += "<script type='text/javascript'>"
663 config.placeholders[EMBED_JS_PLACEHOLDER] += "</script>"
664 config.placeholders[EMBED_JS_PLACEHOLDER] += "\n".join(
665 [""] + [
666 " "*12 + f"<script src=\"./add-one-spectrum-{index+1}.js\"></script>"
667 for index in range(len(config.tab_list))
668 ]
669 )
670 config.placeholders[EMBED_JS_PLACEHOLDER] += "\n"
671 config.placeholders[TAB_LIST_PLACEHOLDER] = "\n".join(config.tab_list)
672
673 fill_template("meta_template", "pf_path", config)
674
675 def fill_template(
676 template_name,
677 output_name,
678 config,
679 additional_placeholders=dict()
680 ):
681 """
682 Fills a template, replaces the placeholders.
683 Either outputs the result in a given file, or returns it if path is none.
684 """
685 template_path = config[template_name]
686 config.logger.debug(f"Filling template {template_name} at {template_path}...")
687 with open(template_path) as template_file:
688 template_content = template_file.read()
689 placeholders = config.placeholders.copy()
690 placeholders.update(additional_placeholders)
691 for placeholder, replacement in placeholders.items():
692 if not placeholder.startswith(config.templates.placeholders.start):
693 placeholder = placeholder.join((
694 config.templates.placeholders.start,
695 config.templates.placeholders.stop
696 ))
697 template_content = template_content.replace(placeholder, replacement)
698 if output_name is None:
699 config.logger.debug(f"Returning template content")
700 return template_content
701 output_path = config[output_name]
702 if "{{ index }}" in output_path:
703 index_value = additional_placeholders["{{ index }}"]
704 config.logger.debug(f"Changing index value for {index_value}")
705 output_path = output_path.replace("{{ index }}", index_value)
706 config.logger.debug(f"Full output path {output_path}")
707 with open(output_path, "w") as output_file:
708 output_file.write(template_content)
709
710 def read_input(input_path, config):
711 """
712 reads a tsv file and determin its processor, based on its header.
713 """
714 with open(input_path) as input_file:
715 config.logger.info(f"Reading {input_path}...")
716 tsv_file = csv.reader(input_file, delimiter='\t')
717 header = next(tsv_file)
718 tsv_file = list(tsv_file)
719 config.logger.info(f"Header is: {', '.join(header)}")
720 if header == list(FRAGNOT_HEADER):
721 config.logger.info(f"Fragnot recognized.")
722 processor = fragnot_extractor
723 return uniformize_fragnot(tsv_file, header), processor
724 else:
725 config.logger.info(f"MS2Snoop recognized.")
726 processor = ms2snoop_extractor
727 return uniformize_ms2snoop(tsv_file, header), processor
728
729 def uniformize_fragnot(content, header):
730 """
731 sorts fragnot data so they appear always in the same order
732 """
733 return sorted(content, key=lambda x:(float(x[0]), float(x[4])))
734
735 def uniformize_ms2snoop(content, header):
736 """
737 sorts ms2snoop data so they appear always in the same order
738 """
739 return sorted(content, key=lambda x:(x[0], float(x[4])))
740
741 def process_fragnot_metadata(raw_metadata, config):
742 """
743 Tries to extract informations from the metadata provided by fragnot
744 files names.
745 Heavily based on regex defined in conf file.
746 """
747 regex = config.regex.copy()
748 del regex["values"]
749 result = {}
750 config.logger.info(f"Extracting info from {raw_metadata}...")
751 count = 0
752 for name, expression in regex.items():
753 if (match := re.search(expression, raw_metadata)):
754 result[name] = match[name]
755 count += 1
756 did = "+ did"
757 else:
758 did = "- did not"
759 if config.arguments.verbose:
760 config.logger.info(f" {did} match {expression}")
761 config.logger.info(f"{count} useful informations extracted.")
762 return result
763
764 def process_tsv(
765 tsv_content,
766 tsv_data_extractor,
767 config,
768 defaults_data={},
769 index=1
770 ):
771 """
772 processes one tsv file, containing one or multiple compounds.
773 Creation of the peak table for each compound
774 """
775 tsv_content = list(tsv_content)
776 curent_name, ms_data = get_ms_data(
777 tsv_content[0],
778 tsv_data_extractor,
779 defaults_data,
780 config
781 )
782 _, second_ms_data = get_ms_data(
783 tsv_content[1],
784 tsv_data_extractor,
785 defaults_data,
786 config
787 )
788 ms_peak_table = []
789 config.logger.info(f"Processing compound {curent_name}...")
790
791 for line in tsv_content:
792 name, new_ms_data = get_ms_data(line, tsv_data_extractor, defaults_data, config)
793 if name != curent_name:
794 new_compound(curent_name, index, ms_data, config, ms_peak_table)
795 curent_name = name
796 index += 1
797 config.logger.info(f"Processing compound {curent_name}...")
798 ms_peak_table = []
799 ms_data = new_ms_data
800 ms_peak_table.append(
801 ", ".join(
802 f'"{value}"' if value not in ("na", "NA")
803 else '""'
804 for value in (
805 ms_data["fragment_mz"],
806 ms_data["abs_intensity"],
807 ms_data["rel_intensity"],
808 ms_data["ppm"],
809 ms_data["composition"],
810 ms_data["fragment"],
811 str(ms_data["valid_corelation"] == "TRUE").lower(),
812 "true" if ms_data.get("correlation") == "1" else "false"
813 )
814 )
815 )
816 new_compound(curent_name, index, ms_data, config, ms_peak_table)
817 return index
818
819 def get_ms_data(line, extractor, defaults, config):
820 ms_data = defaults.copy()
821 ms_data.update(extractor(config, *line))
822 return ms_data["name"], ms_data
823
824 def new_compound(name, index, ms_data, config, ms_peak_table):
825 """
826 aggregates informations to form the peak table,
827 adds the compound to the tab list,
828 creates the js file for this tab
829 """
830 if len([x for x in ms_peak_table if x.split(", ")[7] == "\"true\""]) > 1:
831 for i in range(len(ms_peak_table)):
832 ms_peak_table[i] = ", ".join(
833 ms_peak_table[i].split(", ")[:-1] + [", \"false\""]
834 )
835 config.placeholders[MS_PEAK_VALUES_PLACEHOLDER] = f"""[
836 {','.join('['+line+']' for line in ms_peak_table)}
837 ]"""
838 tab_list = fill_template(
839 "tab_list_template",
840 None,
841 config, {
842 COMPOUND_NAME_PLACEHOLDER: name,
843 TAB_INDEX_PLACEHOLDER: str(index),
844 })
845 config.tab_list.append(tab_list)
846 create_js_file(index, ms_data, config)
847 config.placeholders[ADD_SPECTRUM_FORM] += fill_template(
848 "form_template",
849 None,
850 config,
851 {TAB_INDEX_PLACEHOLDER: str(index)},
852 )
853 if index == 1:
854 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = ""
855
856 def fragnot_extractor(config, *line):
857 """
858 Fragnot processor - extracts one fragnot line of content and
859 produces a uniformised output.
860 """
861 fragnot_data = {
862 FRAGNOT_HEADER[header]: line[i].strip()
863 for i, header in enumerate(FRAGNOT_HEADER)
864 }
865 fragnot_data["composition"] = "unknown"
866 fragnot_data["valid_corelation"] = config.arguments.validation
867 return fragnot_data
868
869 def ms2snoop_extractor(config, *line):
870 """
871 Fragnot processor - extracts one ms2snoop line of content and
872 produces a uniformised output.
873 """
874 ms2snoop_data = {
875 header: MS_2_SNOOP_HEADER[header](line[i])
876 for i, header in enumerate(MS_2_SNOOP_HEADER)
877 }
878 return ms2snoop_data
879
880 def create_js_file(index, ms_data, config):
881 """
882 fills the js template file for one tab (compound)
883 """
884 if (method := ms_data["method"]):
885 method = f'"{method}"'
886 else:
887 method = "null"
888 if config.arguments.verbose:
889 config.logger.info(
890 "[VERBOSE] "
891 + ';'.join(f"{key}={value}" for key, value in ms_data.items())
892 )
893 fill_template(
894 "js_template",
895 "js_file",
896 config,
897 {
898 TAB_INDEX_PLACEHOLDER: str(index),
899 "INCHIKEY_PLACEHOLDER": ms_data["inchikey"],
900 "DEFAULT_DATA": f"""{{
901 name: "{ms_data["name"]}",
902 inchikey: "{ms_data["inchikey"]}",
903 method: {method},
904 spectrum_type: "{ms_data["spectrum_type"]}",
905 scan_type: "{ms_data["scan_type"]}",
906 polarity: "{ms_data["polarity"]}",
907 resolution: "{ms_data["resolution"]}",
908 sample_type: "{ms_data["sample_type"]}",
909 }}""",
910 "{{ index }}": str(index)
911 },
912 )
913
914 def prepare_workplace(config):
915 """
916 prepares the directory we will work in.
917 """
918 if config.workdir.work_in_tmp:
919 os.chdir(config.workdir.tmp_dir)
920 config.logger.info(f"Moving to {os.getcwd()}")
921 if config.workdir.generate_in_tmp:
922 gen_dir = config.workdir.tmp_dir
923 else:
924 gen_dir = tempfile.gettempdir()
925 config.workdir.tmp_dir = gen_dir
926 shutil.copy(os.path.join(config["root_dir"], "common.js"), gen_dir)
927 config.logger.info(f"Outputs will be generated in {config.workdir.tmp_dir}")
928 return gen_dir
929
930 def get_hander_for(directory, config):
931 """
932 generates the handler class for the directory we provide.
933 """
934 config["json_result"] = [{}] * len(config.tab_list)
935
936 class HTTPHandler(http.server.SimpleHTTPRequestHandler):
937
938 def __init__(self, *args, **kwargs):
939 super().__init__(*args, **kwargs, directory=directory)
940
941 def do_POST(self):
942 content_length = int(self.headers.get("Content-Length"))
943 json_bytes = self.rfile.read(content_length).decode("utf-8")
944 json_list = json.loads(json_bytes)
945 for i, obj in enumerate(json_list):
946 print(obj)
947 if obj:
948 config["json_result"][i] = obj
949 save_json(config)
950 self.send_head()
951 self.wfile.write(json_bytes.encode("utf-8"))
952 return
953
954 def do_GET(self):
955 if self.path == "/quit":
956 self.path = "/"
957 super().do_GET()
958 exit(0)
959 self.path = os.path.join(directory, self.path)
960 if self.path == "/":
961 self.path = config.generated.html
962 return super().do_GET()
963
964 return HTTPHandler
965
966
967 def save_json(config):
968 json_string = json.dumps(config["json_result"])
969 print(json_string)
970 with open(config.arguments.output_json, "w") as json_file:
971 json_file.write(json_string)
972
973 def run_server(config):
974 """
975 prepare and runs the server, with the handler for the given directory
976 """
977 ip, port = config.network.ip, config.network.port
978 config.logger.debug(f"IP and port: {ip}:{port}")
979 socketserver.TCPServer.allow_reuse_address = True
980 config.logger.debug(f"Allow reuse adress.")
981 handler = get_hander_for(config.workdir.tmp_dir, config)
982 config.logger.debug(f"Created server handler for {config.workdir.tmp_dir}")
983 config.logger.debug(
984 f"Content of directory {config.workdir.tmp_dir}: "
985 + "\n"
986 + '\n'.join(sorted(
987 f" - {path}"for path in os.listdir(config.workdir.tmp_dir)
988 ))
989 )
990 config.logger.debug(f"Creating TCP server...")
991 server = socketserver.TCPServer((ip, port), handler)
992 if ip == "0.0.0.0":
993 displayed_ip = "localhost"
994 else:
995 displayed_ip = ip
996 config.logger.debug(f"Serving...")
997 print()
998 print(f"http://{displayed_ip}:{port}")
999 server.serve_forever()
1000
1001 def get_logger(config, dummy=False):
1002 dummy_log = lambda msg:dummy and config.logger.info(msg)
1003 arguments = config.arguments
1004 if not dummy:
1005 logger = logging.getLogger(__file__)
1006 if arguments.debug:
1007 dummy_log(f"Output debug info.")
1008 level = logging.DEBUG
1009 else:
1010 level = logging.INFO
1011 if not dummy:
1012 logger.setLevel(level)
1013 formatter = logging.Formatter(
1014 "%(asctime)s - %(levelname)s - %(message)s"
1015 )
1016 if arguments.logging.std == "err":
1017 dummy_log(f"Handler added to output logs in stderr.")
1018 if not dummy:
1019 handler = logging.StreamHandler(sys.stderr)
1020 handler.setLevel(level)
1021 handler.setFormatter(formatter)
1022 logger.addHandler(handler)
1023 elif arguments.logging.std == "out":
1024 dummy_log(f"Handler added to output logs in stdout.")
1025 if not dummy:
1026 handler = logging.StreamHandler(sys.stdout)
1027 handler.setLevel(level)
1028 handler.setFormatter(formatter)
1029 logger.addHandler(handler)
1030 else:
1031 dummy_log(f"Logs will not be output in stderr not stdout.")
1032 if (path := arguments.logging.file.path):
1033 dummy_log(f"Add log file: {arguments.logging.file.path}.")
1034 if not arguments.logging.file.append:
1035 dummy_log(f"Log file content cleaned.")
1036 with open(path, "w"):pass
1037 else:
1038 dummy_log(f"Logs appended to log file.")
1039 if not dummy:
1040 file_handler = logging.FileHandler(filename=path)
1041 file_handler.setLevel(level)
1042 file_handler.setFormatter(formatter)
1043 logger.addHandler(file_handler)
1044 if not dummy:
1045 config["logger"] = logger
1046 starting_sequence(logger)
1047 get_logger(config, dummy=True)
1048 return logger
1049
1050 def starting_sequence(logger):
1051 logger.info("*bip* *bop*")
1052 logger.info("starting...")
1053 logger.info("program...")
1054 logger.info("MS2PF is running...")
1055 logger.info("*bip* *bop* am a robot")
1056 atexit.register(stoping_sequence, logger)
1057
1058 def stoping_sequence(logger):
1059 logger.info("*bip* *bop*")
1060 logger.info("ending...")
1061 logger.info("program...")
1062 logger.info("MS2PF is shuting down...")
1063 logger.info("...robot")
1064 logger.info("*bip* *bop*")
1065 logger.info("shutdown")
1066 logger.info("...")
1067
1068 if __name__ == "__main__":
1069
1070 base_config = parse_config()
1071 config = parse_parameters(base_config)
1072
1073 """
1074 The config contains result of the parsed config file.
1075 """
1076 arguments = config.arguments
1077
1078 config.logger.info(f"Starting MS2PF from {os.getcwd()}")
1079
1080 gen_dir = prepare_workplace(config)
1081
1082 config["pf_path"] = os.path.join(gen_dir, config.generated.html)
1083 config.logger.info(f"HTML output file will be {config.pf_path}")
1084 config["js_file"] = os.path.join(gen_dir, config.generated.js)
1085 config.logger.info(f"JS output files will like {config.js_file}")
1086 config.placeholders["PF_URL_PLACEHOLDER"] = arguments.peakforest.url
1087 config.placeholders["PF_TOKEN_PLACEHOLDER"] = (
1088 arguments.peakforest.token
1089 or config.token.value
1090 )
1091 if (token := config.placeholders.PF_TOKEN_PLACEHOLDER):
1092 config.logger.info(f"Using a token for authentification - length: {len(token)}")
1093 else:
1094 config.logger.info(f"No token provided for peakforest authentification.")
1095
1096 process_all_files(config)
1097
1098 if not arguments.do_run_dry:
1099 config.logger.debug(f"Running the server.")
1100 if arguments.firefox or arguments.chromium:
1101 config.logger.debug(f"Running the server.")
1102 import threading
1103 import time
1104 if arguments.firefox:
1105 browser = "firefox"
1106 else:
1107 browser = "chromium"
1108 if (ip := config.network.ip) == "0.0.0.0":
1109 ip = "localhost"
1110 adress = f"http://{ip}:{config.network.port}"
1111 threading.Thread(
1112 target=lambda:(
1113 time.sleep(1),
1114 os.system(f"{browser} {adress}")
1115 ),
1116 daemon=True
1117 ).start()
1118 run_server(config)
1119 else:
1120 config.logger.debug(f"Server not run.")