Mercurial > repos > lain > ms_to_peakforest_it
comparison server.py @ 1:7e3085fc60c1 draft default tip
master branch Updating
| author | lain |
|---|---|
| date | Wed, 30 Aug 2023 14:21:18 +0000 |
| parents | b58b229c4cbf |
| children |
comparison
equal
deleted
inserted
replaced
| 0:b58b229c4cbf | 1:7e3085fc60c1 |
|---|---|
| 38 "absolute_intensity": "abs_intensity", | 38 "absolute_intensity": "abs_intensity", |
| 39 "relative_intensity": "rel_intensity", | 39 "relative_intensity": "rel_intensity", |
| 40 "theo_mass": "", | 40 "theo_mass": "", |
| 41 "delta_ppm": "ppm", | 41 "delta_ppm": "ppm", |
| 42 "rdbequiv": "", | 42 "rdbequiv": "", |
| 43 "composition": "", | 43 "composition": "fragment", |
| 44 "attribution": "fragment", | 44 "attribution": "composition", |
| 45 } | 45 } |
| 46 | 46 |
| 47 MS_2_SNOOP_HEADER = { | 47 MS_2_SNOOP_HEADER = { |
| 48 "name": str, | 48 "name": str, |
| 49 "inchikey": str, | 49 "inchikey": str, |
| 50 "composition": str, | 50 # "composition": str, |
| 51 "composition": lambda *args:"", | |
| 51 "fragment": str, | 52 "fragment": str, |
| 52 "fragment_mz": str, | 53 "fragment_mz": str, |
| 53 "ppm": str, | 54 "ppm": str, |
| 54 "fileid": str, | 55 "fileid": str, |
| 55 "correlation": str, | 56 "correlation": str, |
| 56 "abs_intensity": lambda x:float(x) * 100, | 57 "abs_intensity": lambda x:float(x), # * 100, |
| 57 "rel_intensity": lambda x:float(x) * 100, | 58 "rel_intensity": lambda x:float(x) * 100 if x != "" else "", |
| 58 "valid_corelation": str | 59 "valid_corelation": str |
| 59 } | 60 } |
| 60 | 61 |
| 61 | 62 |
| 62 class ConfigException(ValueError): | 63 class ConfigException(ValueError): |
| 231 help: Show this help | 232 help: Show this help |
| 232 optional: | 233 optional: |
| 233 method: "default is {{ defaults.method }}" | 234 method: "default is {{ defaults.method }}" |
| 234 meta: | 235 meta: |
| 235 author: Lain Pavot | 236 author: Lain Pavot |
| 236 version: 1.1.0 | 237 version: 1.2.0 |
| 237 shortcuts: | 238 shortcuts: |
| 238 help: h | 239 help: h |
| 239 ## will autogenerate -i for input and -m for method | 240 ## will autogenerate -i for input and -m for method |
| 240 --- | 241 --- |
| 241 default parameters are searched in the "default" root section. | 242 default parameters are searched in the "default" root section. |
| 420 config.placeholders[TAB_INDEX_PLACEHOLDER] = "1" | 421 config.placeholders[TAB_INDEX_PLACEHOLDER] = "1" |
| 421 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "active" | 422 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "active" |
| 422 config.placeholders[ADD_SPECTRUM_FORM] = "" | 423 config.placeholders[ADD_SPECTRUM_FORM] = "" |
| 423 config.placeholders[EMBED_JS_PLACEHOLDER] = "" | 424 config.placeholders[EMBED_JS_PLACEHOLDER] = "" |
| 424 config.placeholders[TAB_LIST_PLACEHOLDER] = "" | 425 config.placeholders[TAB_LIST_PLACEHOLDER] = "" |
| 425 config.placeholders["DEFAULT_MIN_MZ"] = "50" | 426 # config.placeholders["DEFAULT_MIN_MZ"] = "50" |
| 426 config.placeholders["DEFAULT_MAX_MZ"] = "500" | 427 # config.placeholders["DEFAULT_MAX_MZ"] = "500" |
| 427 config.placeholders["DEFAULT_RESOLUTION_LOW"] = "" | 428 config.placeholders["DEFAULT_RESOLUTION_LOW"] = "" |
| 428 config.placeholders["DEFAULT_RESOLUTION_HIGH"] = "selected=\"selected\"" | 429 config.placeholders["DEFAULT_RESOLUTION_HIGH"] = "selected=\"selected\"" |
| 429 config.placeholders["DEFAULT_RESOLUTION_UNSET"] = "" | 430 config.placeholders["DEFAULT_RESOLUTION_UNSET"] = "" |
| 430 config.placeholders["DEFAULT_MIN_RT"] = "0.9" | |
| 431 config.placeholders["DEFAULT_MAX_RT"] = "1.4" | |
| 432 return config | 431 return config |
| 433 | 432 |
| 434 def parse_parameters(config): | 433 def parse_parameters(config): |
| 435 """ | 434 """ |
| 436 parses command line and checks provided values are acceptable/usable. | 435 parses command line and checks provided values are acceptable/usable. |
| 456 raise ValueError( | 455 raise ValueError( |
| 457 "Some errors occured during parameters extraction: \n" | 456 "Some errors occured during parameters extraction: \n" |
| 458 + '\n'.join(parameters.errors) | 457 + '\n'.join(parameters.errors) |
| 459 ) | 458 ) |
| 460 | 459 |
| 460 parameters.placeholders["DEFAULT_MIN_RT"] = str(arguments.rt_min) | |
| 461 parameters.placeholders["DEFAULT_MAX_RT"] = str(arguments.rt_max) | |
| 462 | |
| 461 if arguments.sample_type == COMPOUND_MIX: | 463 if arguments.sample_type == COMPOUND_MIX: |
| 462 parameters["form_template"] = os.path.join( | 464 parameters["form_template"] = os.path.join( |
| 463 parameters["root_dir"], | 465 parameters["root_dir"], |
| 464 parameters.templates.form_mix | 466 parameters.templates.form_mix |
| 465 ) | 467 ) |
| 466 parameters["meta_template"] = os.path.join( | 468 parameters["meta_template"] = os.path.join( |
| 467 parameters["root_dir"], | 469 parameters["root_dir"], |
| 468 parameters.templates.main_mix | 470 parameters.templates.main_mix |
| 469 ) | 471 ) |
| 470 elif arguments.sample_type == COMPOUND_REF: | 472 else: # elif arguments.sample_type == COMPOUND_REF: |
| 471 parameters["form_template"] = os.path.join( | 473 parameters["form_template"] = os.path.join( |
| 472 parameters["root_dir"], | 474 parameters["root_dir"], |
| 473 parameters.templates.form_ref | 475 parameters.templates.form_ref |
| 474 ) | 476 ) |
| 475 parameters["meta_template"] = os.path.join( | 477 parameters["meta_template"] = os.path.join( |
| 564 if arguments.method == "test": | 566 if arguments.method == "test": |
| 565 if arguments.spectrum_type == "LC_MS": | 567 if arguments.spectrum_type == "LC_MS": |
| 566 arguments["method"] = "cf_pfem_urine_qtof" | 568 arguments["method"] = "cf_pfem_urine_qtof" |
| 567 else: | 569 else: |
| 568 arguments["method"] = "cf_pfem_urine_method1_qtof-msms" | 570 arguments["method"] = "cf_pfem_urine_method1_qtof-msms" |
| 571 arguments["method"] = "toulouse-metatoul-agromix__ft-esi__msms" | |
| 569 if arguments["sample_type"] == COMPOUND_MIX: | 572 if arguments["sample_type"] == COMPOUND_MIX: |
| 570 check_mix_compound_files(parameters) | 573 check_mix_compound_files(parameters) |
| 571 more_info_in_logs(parameters) | 574 more_info_in_logs(parameters) |
| 572 return parameters | 575 return parameters |
| 573 | 576 |
| 577 numbarz = [ | 580 numbarz = [ |
| 578 list(map(int, os.path.basename(metadata).split("_", 1)[0].split("-"))) | 581 list(map(int, os.path.basename(metadata).split("_", 1)[0].split("-"))) |
| 579 for metadata in arguments.raw_metadata | 582 for metadata in arguments.raw_metadata |
| 580 ] | 583 ] |
| 581 except ValueError: | 584 except ValueError: |
| 582 parameters.logger.error( | 585 ## file does not start with `[0-9]+-[0-9]+_.*`: probably |
| 583 "Metadata/file names does not start with `[0-9]+-[0-9]+_.*` . " | 586 ## a ms2snoop file. |
| 584 "This is necessary in the case of compounds mix." | 587 return |
| 585 ) | 588 # parameters.logger.error( |
| 586 sys.exit(-1) | 589 # "Metadata/file names does not start with `[0-9]+-[0-9]+_.*` . " |
| 590 # "This is necessary in the case of compounds mix." | |
| 591 # ) | |
| 592 # sys.exit(-1) | |
| 587 runs, samples = zip(*numbarz) | 593 runs, samples = zip(*numbarz) |
| 588 if not all(runs[0] == i for i in runs[1:]): | 594 if not all(runs[0] == i for i in runs[1:]): |
| 589 parameters.logger.error( | 595 parameters.logger.error( |
| 590 "Run numbers in metadata/file names are not identical. " | 596 "Run numbers in metadata/file names are not identical. " |
| 591 "You mixed some files." | 597 "You mixed some files." |
| 804 for value in ( | 810 for value in ( |
| 805 ms_data["fragment_mz"], | 811 ms_data["fragment_mz"], |
| 806 ms_data["abs_intensity"], | 812 ms_data["abs_intensity"], |
| 807 ms_data["rel_intensity"], | 813 ms_data["rel_intensity"], |
| 808 ms_data["ppm"], | 814 ms_data["ppm"], |
| 815 ms_data["fragment"], | |
| 809 ms_data["composition"], | 816 ms_data["composition"], |
| 810 ms_data["fragment"], | |
| 811 str(ms_data["valid_corelation"] == "TRUE").lower(), | 817 str(ms_data["valid_corelation"] == "TRUE").lower(), |
| 812 "true" if ms_data.get("correlation") == "1" else "false" | 818 "true" if ms_data.get("correlation") == "1" else "false" |
| 813 ) | 819 ) |
| 814 ) | 820 ) |
| 815 ) | 821 ) |
| 825 """ | 831 """ |
| 826 aggregates informations to form the peak table, | 832 aggregates informations to form the peak table, |
| 827 adds the compound to the tab list, | 833 adds the compound to the tab list, |
| 828 creates the js file for this tab | 834 creates the js file for this tab |
| 829 """ | 835 """ |
| 830 if len([x for x in ms_peak_table if x.split(", ")[7] == "\"true\""]) > 1: | 836 ignore_multiple_parent_ion(ms_peak_table) |
| 831 for i in range(len(ms_peak_table)): | 837 determine_min_max_mz(ms_peak_table) |
| 832 ms_peak_table[i] = ", ".join( | 838 guess_relative_intensities(ms_peak_table) |
| 833 ms_peak_table[i].split(", ")[:-1] + [", \"false\""] | 839 accept_all_fragments_if_all_false(ms_peak_table) |
| 834 ) | |
| 835 config.placeholders[MS_PEAK_VALUES_PLACEHOLDER] = f"""[ | 840 config.placeholders[MS_PEAK_VALUES_PLACEHOLDER] = f"""[ |
| 836 {','.join('['+line+']' for line in ms_peak_table)} | 841 {','.join('['+line+']' for line in ms_peak_table)} |
| 837 ]""" | 842 ]""" |
| 838 tab_list = fill_template( | 843 tab_list = fill_template( |
| 839 "tab_list_template", | 844 "tab_list_template", |
| 851 {TAB_INDEX_PLACEHOLDER: str(index)}, | 856 {TAB_INDEX_PLACEHOLDER: str(index)}, |
| 852 ) | 857 ) |
| 853 if index == 1: | 858 if index == 1: |
| 854 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "" | 859 config.placeholders[ACTIVE_TAB_PLACEHOLDER] = "" |
| 855 | 860 |
| 861 def determine_min_max_mz(ms_peak_table): | |
| 862 mz_list = [ | |
| 863 float(ms_peak_table[i].split(", ")[0].strip("\"")) | |
| 864 for i in range(len(ms_peak_table)) | |
| 865 ] | |
| 866 config.placeholders["DEFAULT_MIN_MZ"] = str(min(mz_list)) | |
| 867 config.placeholders["DEFAULT_MAX_MZ"] = str(max(mz_list)) | |
| 868 | |
| 869 def guess_relative_intensities(ms_peak_table): | |
| 870 if all( | |
| 871 ms_peak_table[i].split(", ")[2].strip("\"") == "" | |
| 872 for i in range(len(ms_peak_table)) | |
| 873 ): | |
| 874 absolute_intensities = [ | |
| 875 float(ms_peak_table[i].split(", ")[1].strip("\"")) | |
| 876 for i in range(len(ms_peak_table)) | |
| 877 ] | |
| 878 greatest = float(max(absolute_intensities)) | |
| 879 relative_intensities = [ | |
| 880 intensity / greatest * 100 | |
| 881 for intensity in absolute_intensities | |
| 882 ] | |
| 883 replace_ms_table_value(ms_peak_table, 2, relative_intensities) | |
| 884 | |
| 885 def ignore_multiple_parent_ion(ms_peak_table): | |
| 886 if len([ | |
| 887 None for x in ms_peak_table | |
| 888 if x.split(", ")[7].strip("\"") == "true" | |
| 889 ]) > 1: | |
| 890 ## if more than one is the precursor, then none is the precursor | |
| 891 replace_ms_table_value(ms_peak_table, 7, "\"false\"") | |
| 892 | |
| 893 def accept_all_fragments_if_all_false(ms_peak_table): | |
| 894 if all( | |
| 895 ms_peak_table[i].split(", ")[6].strip("\"") == "false" | |
| 896 for i in range(len(ms_peak_table)) | |
| 897 ): | |
| 898 replace_ms_table_value(ms_peak_table, 6, "true") | |
| 899 | |
| 900 def replace_ms_table_value(ms_peak_table, index, value, sep=", "): | |
| 901 length = len(ms_peak_table) | |
| 902 if not isinstance(value, list): | |
| 903 if isinstance(value, str): | |
| 904 value = [value.join('""')] * length | |
| 905 else: | |
| 906 value = [str(value)] * length | |
| 907 if not isinstance(value[0], str): | |
| 908 value = [str(x) for x in value] | |
| 909 count = ms_peak_table[0].count(sep) | |
| 910 endindex = count - index | |
| 911 neg_endindex = -endindex | |
| 912 for i in range(length): | |
| 913 ms_peak_table[i] = sep.join(( | |
| 914 *ms_peak_table[i].split(sep, index)[:index], | |
| 915 value[i], | |
| 916 *ms_peak_table[i].rsplit(sep, endindex)[neg_endindex:] | |
| 917 )) | |
| 918 | |
| 856 def fragnot_extractor(config, *line): | 919 def fragnot_extractor(config, *line): |
| 857 """ | 920 """ |
| 858 Fragnot processor - extracts one fragnot line of content and | 921 Fragnot processor - extracts one fragnot line of content and |
| 859 produces a uniformised output. | 922 produces a uniformised output. |
| 860 """ | 923 """ |
| 861 fragnot_data = { | 924 fragnot_data = { |
| 862 FRAGNOT_HEADER[header]: line[i].strip() | 925 FRAGNOT_HEADER[header]: line[i].strip() |
| 863 for i, header in enumerate(FRAGNOT_HEADER) | 926 for i, header in enumerate(FRAGNOT_HEADER) |
| 864 } | 927 } |
| 865 fragnot_data["composition"] = "unknown" | 928 # fragnot_data["composition"] = "unknown" |
| 866 fragnot_data["valid_corelation"] = config.arguments.validation | 929 fragnot_data["valid_corelation"] = config.arguments.validation |
| 867 return fragnot_data | 930 return fragnot_data |
| 868 | 931 |
| 869 def ms2snoop_extractor(config, *line): | 932 def ms2snoop_extractor(config, *line): |
| 870 """ | 933 """ |
| 921 if config.workdir.generate_in_tmp: | 984 if config.workdir.generate_in_tmp: |
| 922 gen_dir = config.workdir.tmp_dir | 985 gen_dir = config.workdir.tmp_dir |
| 923 else: | 986 else: |
| 924 gen_dir = tempfile.gettempdir() | 987 gen_dir = tempfile.gettempdir() |
| 925 config.workdir.tmp_dir = gen_dir | 988 config.workdir.tmp_dir = gen_dir |
| 926 shutil.copy(os.path.join(config["root_dir"], "common.js"), gen_dir) | 989 shutil.copy(os.path.join(config["root_dir"], "src", "common.js"), gen_dir) |
| 927 config.logger.info(f"Outputs will be generated in {config.workdir.tmp_dir}") | 990 config.logger.info(f"Outputs will be generated in {config.workdir.tmp_dir}") |
| 928 return gen_dir | 991 return gen_dir |
| 929 | 992 |
| 930 def get_hander_for(directory, config): | 993 def get_hander_for(directory, config): |
| 931 """ | 994 """ |
| 939 super().__init__(*args, **kwargs, directory=directory) | 1002 super().__init__(*args, **kwargs, directory=directory) |
| 940 | 1003 |
| 941 def do_POST(self): | 1004 def do_POST(self): |
| 942 content_length = int(self.headers.get("Content-Length")) | 1005 content_length = int(self.headers.get("Content-Length")) |
| 943 json_bytes = self.rfile.read(content_length).decode("utf-8") | 1006 json_bytes = self.rfile.read(content_length).decode("utf-8") |
| 944 json_list = json.loads(json_bytes) | 1007 # json_list = json.loads(json_bytes) |
| 945 for i, obj in enumerate(json_list): | 1008 # for i, obj in enumerate(json_list): |
| 946 print(obj) | 1009 # print(obj) |
| 947 if obj: | 1010 # if obj: |
| 948 config["json_result"][i] = obj | 1011 # config["json_result"][i] = obj |
| 1012 json_obj = json.loads(json_bytes) | |
| 1013 config["json_result"][json_obj["index"]] = json_obj["object"] | |
| 949 save_json(config) | 1014 save_json(config) |
| 950 self.send_head() | 1015 self.send_head() |
| 951 self.wfile.write(json_bytes.encode("utf-8")) | 1016 self.wfile.write(json_bytes.encode("utf-8")) |
| 952 return | 1017 return |
| 953 | 1018 |
| 972 | 1037 |
| 973 def run_server(config): | 1038 def run_server(config): |
| 974 """ | 1039 """ |
| 975 prepare and runs the server, with the handler for the given directory | 1040 prepare and runs the server, with the handler for the given directory |
| 976 """ | 1041 """ |
| 977 ip, port = config.network.ip, config.network.port | 1042 ip, port = config.arguments.ip, int(config.arguments.port) |
| 978 config.logger.debug(f"IP and port: {ip}:{port}") | 1043 config.logger.debug(f"IP and port: {ip}:{port}") |
| 979 socketserver.TCPServer.allow_reuse_address = True | 1044 socketserver.TCPServer.allow_reuse_address = True |
| 980 config.logger.debug(f"Allow reuse adress.") | 1045 config.logger.debug(f"Allow reuse adress.") |
| 981 handler = get_hander_for(config.workdir.tmp_dir, config) | 1046 handler = get_hander_for(config.workdir.tmp_dir, config) |
| 982 config.logger.debug(f"Created server handler for {config.workdir.tmp_dir}") | 1047 config.logger.debug(f"Created server handler for {config.workdir.tmp_dir}") |
| 1064 logger.info("*bip* *bop*") | 1129 logger.info("*bip* *bop*") |
| 1065 logger.info("shutdown") | 1130 logger.info("shutdown") |
| 1066 logger.info("...") | 1131 logger.info("...") |
| 1067 | 1132 |
| 1068 if __name__ == "__main__": | 1133 if __name__ == "__main__": |
| 1134 print(os.listdir("test-data")) | |
| 1135 | |
| 1136 if not os.path.exists("config.yml"): | |
| 1137 shutil.copy("config.default.yml", "config.yml") | |
| 1069 | 1138 |
| 1070 base_config = parse_config() | 1139 base_config = parse_config() |
| 1071 config = parse_parameters(base_config) | 1140 config = parse_parameters(base_config) |
| 1072 | 1141 |
| 1073 """ | 1142 """ |
| 1074 The config contains result of the parsed config file. | 1143 The config contains result of the parsed config file. |
| 1075 """ | 1144 """ |
| 1076 arguments = config.arguments | 1145 arguments = config.arguments |
| 1146 | |
| 1147 if arguments.pid: | |
| 1148 print(arguments.pid) | |
| 1149 with open(arguments.pid, "w") as pid_file: | |
| 1150 pid_file.write(str(os.getpid())) | |
| 1151 atexit.register(lambda:os.unlink(arguments.pid)) | |
| 1077 | 1152 |
| 1078 config.logger.info(f"Starting MS2PF from {os.getcwd()}") | 1153 config.logger.info(f"Starting MS2PF from {os.getcwd()}") |
| 1079 | 1154 |
| 1080 gen_dir = prepare_workplace(config) | 1155 gen_dir = prepare_workplace(config) |
| 1081 | 1156 |
