Mercurial > repos > lain > ms_to_peakforest_it
comparison server.py @ 1:7e3085fc60c1 draft default tip
master branch Updating
author | lain |
---|---|
date | Wed, 30 Aug 2023 14:21:18 +0000 |
parents | b58b229c4cbf |
children |
comparison
equal
deleted
inserted
replaced
0:b58b229c4cbf | 1:7e3085fc60c1 |
---|---|
38 "absolute_intensity": "abs_intensity", | 38 "absolute_intensity": "abs_intensity", |
39 "relative_intensity": "rel_intensity", | 39 "relative_intensity": "rel_intensity", |
40 "theo_mass": "", | 40 "theo_mass": "", |
41 "delta_ppm": "ppm", | 41 "delta_ppm": "ppm", |
42 "rdbequiv": "", | 42 "rdbequiv": "", |
43 "composition": "", | 43 "composition": "fragment", |
44 "attribution": "fragment", | 44 "attribution": "composition", |
45 } | 45 } |
46 | 46 |
47 MS_2_SNOOP_HEADER = { | 47 MS_2_SNOOP_HEADER = { |
48 "name": str, | 48 "name": str, |
49 "inchikey": str, | 49 "inchikey": str, |
50 "composition": str, | 50 # "composition": str, |
51 "composition": lambda *args:"", | |
51 "fragment": str, | 52 "fragment": str, |
52 "fragment_mz": str, | 53 "fragment_mz": str, |
53 "ppm": str, | 54 "ppm": str, |
54 "fileid": str, | 55 "fileid": str, |
55 "correlation": str, | 56 "correlation": str, |
56 "abs_intensity": lambda x:float(x) * 100, | 57 "abs_intensity": lambda x:float(x), # * 100, |
57 "rel_intensity": lambda x:float(x) * 100, | 58 "rel_intensity": lambda x:float(x) * 100 if x != "" else "", |
58 "valid_corelation": str | 59 "valid_corelation": str |
59 } | 60 } |
60 | 61 |
61 | 62 |
62 class ConfigException(ValueError): | 63 class ConfigException(ValueError): |
231 help: Show this help | 232 help: Show this help |
232 optional: | 233 optional: |
233 method: "default is {{ defaults.method }}" | 234 method: "default is {{ defaults.method }}" |
234 meta: | 235 meta: |
235 author: Lain Pavot | 236 author: Lain Pavot |
236 version: 1.1.0 | 237 version: 1.2.0 |
237 shortcuts: | 238 shortcuts: |
238 help: h | 239 help: h |
239 ## will autogenerate -i for input and -m for method | 240 ## will autogenerate -i for input and -m for method |
240 --- | 241 --- |
241 default parameters are searched in the "default" root section. | 242 default parameters are searched in the "default" root section. |
420 config.placeholders[TAB_INDEX_PLACEHOLDER] = "1" | 421 config.placeholders[TAB_INDEX_PLACEHOLDER] = "1" |
421 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "active" | 422 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "active" |
422 config.placeholders[ADD_SPECTRUM_FORM] = "" | 423 config.placeholders[ADD_SPECTRUM_FORM] = "" |
423 config.placeholders[EMBED_JS_PLACEHOLDER] = "" | 424 config.placeholders[EMBED_JS_PLACEHOLDER] = "" |
424 config.placeholders[TAB_LIST_PLACEHOLDER] = "" | 425 config.placeholders[TAB_LIST_PLACEHOLDER] = "" |
425 config.placeholders["DEFAULT_MIN_MZ"] = "50" | 426 # config.placeholders["DEFAULT_MIN_MZ"] = "50" |
426 config.placeholders["DEFAULT_MAX_MZ"] = "500" | 427 # config.placeholders["DEFAULT_MAX_MZ"] = "500" |
427 config.placeholders["DEFAULT_RESOLUTION_LOW"] = "" | 428 config.placeholders["DEFAULT_RESOLUTION_LOW"] = "" |
428 config.placeholders["DEFAULT_RESOLUTION_HIGH"] = "selected=\"selected\"" | 429 config.placeholders["DEFAULT_RESOLUTION_HIGH"] = "selected=\"selected\"" |
429 config.placeholders["DEFAULT_RESOLUTION_UNSET"] = "" | 430 config.placeholders["DEFAULT_RESOLUTION_UNSET"] = "" |
430 config.placeholders["DEFAULT_MIN_RT"] = "0.9" | |
431 config.placeholders["DEFAULT_MAX_RT"] = "1.4" | |
432 return config | 431 return config |
433 | 432 |
434 def parse_parameters(config): | 433 def parse_parameters(config): |
435 """ | 434 """ |
436 parses command line and checks provided values are acceptable/usable. | 435 parses command line and checks provided values are acceptable/usable. |
456 raise ValueError( | 455 raise ValueError( |
457 "Some errors occured during parameters extraction: \n" | 456 "Some errors occured during parameters extraction: \n" |
458 + '\n'.join(parameters.errors) | 457 + '\n'.join(parameters.errors) |
459 ) | 458 ) |
460 | 459 |
460 parameters.placeholders["DEFAULT_MIN_RT"] = str(arguments.rt_min) | |
461 parameters.placeholders["DEFAULT_MAX_RT"] = str(arguments.rt_max) | |
462 | |
461 if arguments.sample_type == COMPOUND_MIX: | 463 if arguments.sample_type == COMPOUND_MIX: |
462 parameters["form_template"] = os.path.join( | 464 parameters["form_template"] = os.path.join( |
463 parameters["root_dir"], | 465 parameters["root_dir"], |
464 parameters.templates.form_mix | 466 parameters.templates.form_mix |
465 ) | 467 ) |
466 parameters["meta_template"] = os.path.join( | 468 parameters["meta_template"] = os.path.join( |
467 parameters["root_dir"], | 469 parameters["root_dir"], |
468 parameters.templates.main_mix | 470 parameters.templates.main_mix |
469 ) | 471 ) |
470 elif arguments.sample_type == COMPOUND_REF: | 472 else: # elif arguments.sample_type == COMPOUND_REF: |
471 parameters["form_template"] = os.path.join( | 473 parameters["form_template"] = os.path.join( |
472 parameters["root_dir"], | 474 parameters["root_dir"], |
473 parameters.templates.form_ref | 475 parameters.templates.form_ref |
474 ) | 476 ) |
475 parameters["meta_template"] = os.path.join( | 477 parameters["meta_template"] = os.path.join( |
564 if arguments.method == "test": | 566 if arguments.method == "test": |
565 if arguments.spectrum_type == "LC_MS": | 567 if arguments.spectrum_type == "LC_MS": |
566 arguments["method"] = "cf_pfem_urine_qtof" | 568 arguments["method"] = "cf_pfem_urine_qtof" |
567 else: | 569 else: |
568 arguments["method"] = "cf_pfem_urine_method1_qtof-msms" | 570 arguments["method"] = "cf_pfem_urine_method1_qtof-msms" |
571 arguments["method"] = "toulouse-metatoul-agromix__ft-esi__msms" | |
569 if arguments["sample_type"] == COMPOUND_MIX: | 572 if arguments["sample_type"] == COMPOUND_MIX: |
570 check_mix_compound_files(parameters) | 573 check_mix_compound_files(parameters) |
571 more_info_in_logs(parameters) | 574 more_info_in_logs(parameters) |
572 return parameters | 575 return parameters |
573 | 576 |
577 numbarz = [ | 580 numbarz = [ |
578 list(map(int, os.path.basename(metadata).split("_", 1)[0].split("-"))) | 581 list(map(int, os.path.basename(metadata).split("_", 1)[0].split("-"))) |
579 for metadata in arguments.raw_metadata | 582 for metadata in arguments.raw_metadata |
580 ] | 583 ] |
581 except ValueError: | 584 except ValueError: |
582 parameters.logger.error( | 585 ## file does not start with `[0-9]+-[0-9]+_.*`: probably |
583 "Metadata/file names does not start with `[0-9]+-[0-9]+_.*` . " | 586 ## a ms2snoop file. |
584 "This is necessary in the case of compounds mix." | 587 return |
585 ) | 588 # parameters.logger.error( |
586 sys.exit(-1) | 589 # "Metadata/file names does not start with `[0-9]+-[0-9]+_.*` . " |
590 # "This is necessary in the case of compounds mix." | |
591 # ) | |
592 # sys.exit(-1) | |
587 runs, samples = zip(*numbarz) | 593 runs, samples = zip(*numbarz) |
588 if not all(runs[0] == i for i in runs[1:]): | 594 if not all(runs[0] == i for i in runs[1:]): |
589 parameters.logger.error( | 595 parameters.logger.error( |
590 "Run numbers in metadata/file names are not identical. " | 596 "Run numbers in metadata/file names are not identical. " |
591 "You mixed some files." | 597 "You mixed some files." |
804 for value in ( | 810 for value in ( |
805 ms_data["fragment_mz"], | 811 ms_data["fragment_mz"], |
806 ms_data["abs_intensity"], | 812 ms_data["abs_intensity"], |
807 ms_data["rel_intensity"], | 813 ms_data["rel_intensity"], |
808 ms_data["ppm"], | 814 ms_data["ppm"], |
815 ms_data["fragment"], | |
809 ms_data["composition"], | 816 ms_data["composition"], |
810 ms_data["fragment"], | |
811 str(ms_data["valid_corelation"] == "TRUE").lower(), | 817 str(ms_data["valid_corelation"] == "TRUE").lower(), |
812 "true" if ms_data.get("correlation") == "1" else "false" | 818 "true" if ms_data.get("correlation") == "1" else "false" |
813 ) | 819 ) |
814 ) | 820 ) |
815 ) | 821 ) |
825 """ | 831 """ |
826 aggregates informations to form the peak table, | 832 aggregates informations to form the peak table, |
827 adds the compound to the tab list, | 833 adds the compound to the tab list, |
828 creates the js file for this tab | 834 creates the js file for this tab |
829 """ | 835 """ |
830 if len([x for x in ms_peak_table if x.split(", ")[7] == "\"true\""]) > 1: | 836 ignore_multiple_parent_ion(ms_peak_table) |
831 for i in range(len(ms_peak_table)): | 837 determine_min_max_mz(ms_peak_table) |
832 ms_peak_table[i] = ", ".join( | 838 guess_relative_intensities(ms_peak_table) |
833 ms_peak_table[i].split(", ")[:-1] + [", \"false\""] | 839 accept_all_fragments_if_all_false(ms_peak_table) |
834 ) | |
835 config.placeholders[MS_PEAK_VALUES_PLACEHOLDER] = f"""[ | 840 config.placeholders[MS_PEAK_VALUES_PLACEHOLDER] = f"""[ |
836 {','.join('['+line+']' for line in ms_peak_table)} | 841 {','.join('['+line+']' for line in ms_peak_table)} |
837 ]""" | 842 ]""" |
838 tab_list = fill_template( | 843 tab_list = fill_template( |
839 "tab_list_template", | 844 "tab_list_template", |
851 {TAB_INDEX_PLACEHOLDER: str(index)}, | 856 {TAB_INDEX_PLACEHOLDER: str(index)}, |
852 ) | 857 ) |
853 if index == 1: | 858 if index == 1: |
854 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "" | 859 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "" |
855 | 860 |
861 def determine_min_max_mz(ms_peak_table): | |
862 mz_list = [ | |
863 float(ms_peak_table[i].split(", ")[0].strip("\"")) | |
864 for i in range(len(ms_peak_table)) | |
865 ] | |
866 config.placeholders["DEFAULT_MIN_MZ"] = str(min(mz_list)) | |
867 config.placeholders["DEFAULT_MAX_MZ"] = str(max(mz_list)) | |
868 | |
869 def guess_relative_intensities(ms_peak_table): | |
870 if all( | |
871 ms_peak_table[i].split(", ")[2].strip("\"") == "" | |
872 for i in range(len(ms_peak_table)) | |
873 ): | |
874 absolute_intensities = [ | |
875 float(ms_peak_table[i].split(", ")[1].strip("\"")) | |
876 for i in range(len(ms_peak_table)) | |
877 ] | |
878 greatest = float(max(absolute_intensities)) | |
879 relative_intensities = [ | |
880 intensity / greatest * 100 | |
881 for intensity in absolute_intensities | |
882 ] | |
883 replace_ms_table_value(ms_peak_table, 2, relative_intensities) | |
884 | |
885 def ignore_multiple_parent_ion(ms_peak_table): | |
886 if len([ | |
887 None for x in ms_peak_table | |
888 if x.split(", ")[7].strip("\"") == "true" | |
889 ]) > 1: | |
890 ## if more than one is the precursor, then none is the precursor | |
891 replace_ms_table_value(ms_peak_table, 7, "\"false\"") | |
892 | |
893 def accept_all_fragments_if_all_false(ms_peak_table): | |
894 if all( | |
895 ms_peak_table[i].split(", ")[6].strip("\"") == "false" | |
896 for i in range(len(ms_peak_table)) | |
897 ): | |
898 replace_ms_table_value(ms_peak_table, 6, "true") | |
899 | |
900 def replace_ms_table_value(ms_peak_table, index, value, sep=", "): | |
901 length = len(ms_peak_table) | |
902 if not isinstance(value, list): | |
903 if isinstance(value, str): | |
904 value = [value.join('""')] * length | |
905 else: | |
906 value = [str(value)] * length | |
907 if not isinstance(value[0], str): | |
908 value = [str(x) for x in value] | |
909 count = ms_peak_table[0].count(sep) | |
910 endindex = count - index | |
911 neg_endindex = -endindex | |
912 for i in range(length): | |
913 ms_peak_table[i] = sep.join(( | |
914 *ms_peak_table[i].split(sep, index)[:index], | |
915 value[i], | |
916 *ms_peak_table[i].rsplit(sep, endindex)[neg_endindex:] | |
917 )) | |
918 | |
856 def fragnot_extractor(config, *line): | 919 def fragnot_extractor(config, *line): |
857 """ | 920 """ |
858 Fragnot processor - extracts one fragnot line of content and | 921 Fragnot processor - extracts one fragnot line of content and |
859 produces a uniformised output. | 922 produces a uniformised output. |
860 """ | 923 """ |
861 fragnot_data = { | 924 fragnot_data = { |
862 FRAGNOT_HEADER[header]: line[i].strip() | 925 FRAGNOT_HEADER[header]: line[i].strip() |
863 for i, header in enumerate(FRAGNOT_HEADER) | 926 for i, header in enumerate(FRAGNOT_HEADER) |
864 } | 927 } |
865 fragnot_data["composition"] = "unknown" | 928 # fragnot_data["composition"] = "unknown" |
866 fragnot_data["valid_corelation"] = config.arguments.validation | 929 fragnot_data["valid_corelation"] = config.arguments.validation |
867 return fragnot_data | 930 return fragnot_data |
868 | 931 |
869 def ms2snoop_extractor(config, *line): | 932 def ms2snoop_extractor(config, *line): |
870 """ | 933 """ |
921 if config.workdir.generate_in_tmp: | 984 if config.workdir.generate_in_tmp: |
922 gen_dir = config.workdir.tmp_dir | 985 gen_dir = config.workdir.tmp_dir |
923 else: | 986 else: |
924 gen_dir = tempfile.gettempdir() | 987 gen_dir = tempfile.gettempdir() |
925 config.workdir.tmp_dir = gen_dir | 988 config.workdir.tmp_dir = gen_dir |
926 shutil.copy(os.path.join(config["root_dir"], "common.js"), gen_dir) | 989 shutil.copy(os.path.join(config["root_dir"], "src", "common.js"), gen_dir) |
927 config.logger.info(f"Outputs will be generated in {config.workdir.tmp_dir}") | 990 config.logger.info(f"Outputs will be generated in {config.workdir.tmp_dir}") |
928 return gen_dir | 991 return gen_dir |
929 | 992 |
930 def get_hander_for(directory, config): | 993 def get_hander_for(directory, config): |
931 """ | 994 """ |
939 super().__init__(*args, **kwargs, directory=directory) | 1002 super().__init__(*args, **kwargs, directory=directory) |
940 | 1003 |
941 def do_POST(self): | 1004 def do_POST(self): |
942 content_length = int(self.headers.get("Content-Length")) | 1005 content_length = int(self.headers.get("Content-Length")) |
943 json_bytes = self.rfile.read(content_length).decode("utf-8") | 1006 json_bytes = self.rfile.read(content_length).decode("utf-8") |
944 json_list = json.loads(json_bytes) | 1007 # json_list = json.loads(json_bytes) |
945 for i, obj in enumerate(json_list): | 1008 # for i, obj in enumerate(json_list): |
946 print(obj) | 1009 # print(obj) |
947 if obj: | 1010 # if obj: |
948 config["json_result"][i] = obj | 1011 # config["json_result"][i] = obj |
1012 json_obj = json.loads(json_bytes) | |
1013 config["json_result"][json_obj["index"]] = json_obj["object"] | |
949 save_json(config) | 1014 save_json(config) |
950 self.send_head() | 1015 self.send_head() |
951 self.wfile.write(json_bytes.encode("utf-8")) | 1016 self.wfile.write(json_bytes.encode("utf-8")) |
952 return | 1017 return |
953 | 1018 |
972 | 1037 |
973 def run_server(config): | 1038 def run_server(config): |
974 """ | 1039 """ |
975 prepare and runs the server, with the handler for the given directory | 1040 prepare and runs the server, with the handler for the given directory |
976 """ | 1041 """ |
977 ip, port = config.network.ip, config.network.port | 1042 ip, port = config.arguments.ip, int(config.arguments.port) |
978 config.logger.debug(f"IP and port: {ip}:{port}") | 1043 config.logger.debug(f"IP and port: {ip}:{port}") |
979 socketserver.TCPServer.allow_reuse_address = True | 1044 socketserver.TCPServer.allow_reuse_address = True |
980 config.logger.debug(f"Allow reuse adress.") | 1045 config.logger.debug(f"Allow reuse adress.") |
981 handler = get_hander_for(config.workdir.tmp_dir, config) | 1046 handler = get_hander_for(config.workdir.tmp_dir, config) |
982 config.logger.debug(f"Created server handler for {config.workdir.tmp_dir}") | 1047 config.logger.debug(f"Created server handler for {config.workdir.tmp_dir}") |
1064 logger.info("*bip* *bop*") | 1129 logger.info("*bip* *bop*") |
1065 logger.info("shutdown") | 1130 logger.info("shutdown") |
1066 logger.info("...") | 1131 logger.info("...") |
1067 | 1132 |
1068 if __name__ == "__main__": | 1133 if __name__ == "__main__": |
1134 print(os.listdir("test-data")) | |
1135 | |
1136 if not os.path.exists("config.yml"): | |
1137 shutil.copy("config.default.yml", "config.yml") | |
1069 | 1138 |
1070 base_config = parse_config() | 1139 base_config = parse_config() |
1071 config = parse_parameters(base_config) | 1140 config = parse_parameters(base_config) |
1072 | 1141 |
1073 """ | 1142 """ |
1074 The config contains result of the parsed config file. | 1143 The config contains result of the parsed config file. |
1075 """ | 1144 """ |
1076 arguments = config.arguments | 1145 arguments = config.arguments |
1146 | |
1147 if arguments.pid: | |
1148 print(arguments.pid) | |
1149 with open(arguments.pid, "w") as pid_file: | |
1150 pid_file.write(str(os.getpid())) | |
1151 atexit.register(lambda:os.unlink(arguments.pid)) | |
1077 | 1152 |
1078 config.logger.info(f"Starting MS2PF from {os.getcwd()}") | 1153 config.logger.info(f"Starting MS2PF from {os.getcwd()}") |
1079 | 1154 |
1080 gen_dir = prepare_workplace(config) | 1155 gen_dir = prepare_workplace(config) |
1081 | 1156 |