# HG changeset patch # User workflow4metabolomics # Date 1693302316 0 # Node ID 59c8bad5f6bcf5d064e92ecdeb37003165434f59 planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics/blob/master/tools/kmd_hmdb_data_plot/ commit 7fa454b6a4268b89fe18043e8dd10f30a7b4c7ca diff -r 000000000000 -r 59c8bad5f6bc kmd_hmdb_data_plot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kmd_hmdb_data_plot.xml Tue Aug 29 09:45:16 2023 +0000 @@ -0,0 +1,176 @@ + + + retrieves data from KMD HMDB API and produce plot and tsv file + + + macro.xml + 1.0.0 + 0 + + + topic_0091 + + + operation_3803 + + + python + pandas + plotly + kmd_hmdb_api_client + + + '$output' +#end if +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 59c8bad5f6bc kmd_hmdb_interrogator.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kmd_hmdb_interrogator.py Tue Aug 29 09:45:16 2023 +0000 @@ -0,0 +1,328 @@ +#!/usr/bin/env python3 + +import csv +import operator + +import click + +import kmd_hmdb_api_client.client +from kmd_hmdb_api_client.api.default import ( + api_annotation_get, + api_compound_find, + api_taxonomy_get, +) + +__version__ = "1.0.0" + + +kmd_hmdb_client = kmd_hmdb_api_client.client.Client( + "https://kmd-hmdb-rest-api.metabolomics-chopin.e-metabohub.fr", + verify_ssl=False, + timeout=500, +) + +find_compound = ( + lambda *args, **kwargs: + api_compound_find.sync(*args, **kwargs, client=kmd_hmdb_client) +) +get_taxonomy = ( + lambda *args, **kwargs: + api_taxonomy_get.sync(*args, **kwargs, client=kmd_hmdb_client) +) +get_annotation = ( + lambda *args, **kwargs: + api_annotation_get.sync(*args, **kwargs, client=kmd_hmdb_client) +) + +positive_adducts = [ + "M+H", + "M+2H", + "M+H+NH4", + "M+H+Na", + "M+H+K", + "M+ACN+2H", + "M+2Na", + "M+H-2H2O", + "M+H-H2O", + "M+NH4", + "M+Na", + "M+CH3OH+H", + "M+K", + "M+ACN+H", + "M+2Na-H", + "M+IsoProp+H", + "M+ACN+Na", + "M+2K+H", + "M+DMSO+H", + "M+2ACN+H", + "2M+H", + "2M+NH4", + "2M+Na", + "2M+K", +] + +negative_adducts = [ + "M-H", + "M-2H", + "M-H2O-H", + "M+Cl", + "M+FA-H", + "M+Hac-H", + "M-H+HCOONa", + "M+Br", + "M+TFA-H", + "2M-H", + "2M+FA-H", + "2M+Hac-H", +] + +adduct_choices = positive_adducts + negative_adducts + +taxonomy_column_choices = [ + "class", + "kingdom", + "molecular_framework", + "sub_class", + "super_class", + "id", +] + +annotation_column_choices = [ + "adduct", + "kendricks_mass", + "kendricks_mass_defect", + "monisotopic_molecular_weight", + "nominal_mass", + "polarity", + "annotation_id", +] + +compound_column_choices = [ + + "database", + "metabolite_name", + "chemical_formula", + "hmdb_id", + "inchikey", + "compound_id", +] + annotation_column_choices + + +@click.group() +def cli(): + pass + + +@cli.command(help="") +@click.option( + "--version", + is_flag=True, +) +@click.option( + "--mz-ratio", + default=[303.05], + show_default=True, + multiple=True, + help="Provide the mz-ratio." +) +@click.option( + "--database", + default=["farid"], + show_default=True, + multiple=True, + help="Provide the database." +) +@click.option( + "--mass-tolerance", + default=10.5, + show_default=True, + help="Provide the mass-tolerance." +) +@click.option( + "--adducts", + default=["M+H"], + type=click.Choice(adduct_choices), + multiple=True, + show_default=True, + show_choices=False, + help="Provide the adducts." +) +@click.option( + "--columns", + default=compound_column_choices[:], + type=click.Choice(compound_column_choices), + multiple=True, + show_default=True, + show_choices=False, + help="Provide the outputed columns." +) +@click.option( + "--output-path", + help="Provide the output path." +) +def compound(*args, **kwargs): + + if kwargs.pop("version"): + print(__version__) + exit(0) + + adducts = kwargs.pop("adducts") + polarity = get_polarity(adducts) + + other_kwargs, compound_kwargs = build_kwargs( + adducts=adducts, + polarity=polarity, + **kwargs + ) + columns = other_kwargs["columns"] + result = find_compound(**compound_kwargs) + result = explode_compounds( + result, + with_annotations=any(map( + columns.__contains__, + annotation_column_choices + )) + ) + check_columns_in_result(result, columns) + output_csv_result( + result, + columns, + other_kwargs.get("output_path"), + delimiter="\t", + ) + + +def explode_compounds(result, with_annotations): + if with_annotations: + return [{ + "database": cpd.database, + "metabolite_name": cpd.metabolite_name, + "chemical_formula": cpd.chemical_formula, + "hmdb_id": cpd.hmdb_id, + "inchikey": cpd.inchikey, + "compound_id": cpd.id, + "adduct": annotation.name, + "kendricks_mass": annotation.kendricks_mass, + "kendricks_mass_defect": annotation.kendricks_mass_defect, + "monisotopic_molecular_weight": + annotation.monisotopic_molecular_weight, + "nominal_mass": annotation.nominal_mass, + "polarity": annotation.polarity, + "annotation_id": annotation.id, + } + for cpd in result + for annotation in cpd.annotations + ] + else: + return [{ + "database": cpd.database, + "metabolite_name": cpd.metabolite_name, + "chemical_formula": cpd.chemical_formula, + "hmdb_id": cpd.hmdb_id, + "inchikey": cpd.inchikey, + "compound_id": cpd.id, + } + for cpd in result + ] + + +@cli.command(help="") +@click.option( + "--id", + type=int, + help="Provide the wanted annotation's id." +) +@click.option( + "--columns", + default=annotation_column_choices[:], + type=click.Choice(annotation_column_choices), + multiple=True, + show_default=True, + show_choices=False, + help="Provide the outputed columns." +) +@click.option( + "--output-path", + help="Provide the output path." +) +def annotation(*args, **kwargs): + result = get_annotation(id=kwargs.pop("id")) + result = [result] + columns = kwargs["columns"] + check_columns_in_result(result, columns) + output_csv_result( + result, + columns, + kwargs.get("output_path") + ) + + +def get_polarity(adducts): + if any(map(positive_adducts.__contains__, adducts)): + return "positive" + if any(map(negative_adducts.__contains__, adducts)): + return "negative" + # polarity = [] + # if any(map(positive_adducts.__contains__, adducts)): + # polarity.append("positive") + # if any(map(negative_adducts.__contains__, adducts)): + # polarity.append("negative") + + +def build_kwargs(**kwargs): + for original, replacement in ( + ("database", "database_list"), + ("polarity", "polarity_list"), + ): + if original in kwargs: + kwargs[replacement] = kwargs.pop(original) + other_kwargs = { + other_arg: kwargs.pop(other_arg) + for other_arg in ("columns", "output_path", "with_annotations") + if other_arg in kwargs + } + return other_kwargs, kwargs + + +def check_columns_in_result(result, columns): + if not result: + return + if not isinstance(result[0], dict): + result = [item.to_dict() for item in result] + keys = result[0].keys() + missing = [ + column for column in columns + if column not in keys + ] + if missing: + if len(missing) == 1: + raise ValueError( + f"Could not find the column {missing[0]} in the results." + ) + else: + raise ValueError( + "Could not find any of the columns " + + ','.join(missing) + + " in the results." + ) + + +def output_csv_result(result, columns, output_path, **csv_parameters): + if not output_path: + raise ValueError("Missing output path. Cannot output CSV results.") + with open(output_path, mode="w", newline='') as output_file: + writer = csv.writer(output_file, **csv_parameters) + write_result(result, columns, writer) + + +def write_result(result, columns, writer): + getters = list(map(operator.itemgetter, columns)) + writer.writerow(columns) + writer.writerows( + (getter(compound) for getter in getters) + for compound in result + ) + + +if __name__ == "__main__": + cli() diff -r 000000000000 -r 59c8bad5f6bc kmd_hmdb_plot_generator.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kmd_hmdb_plot_generator.py Tue Aug 29 09:45:16 2023 +0000 @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 + +import csv +import itertools +import os + +import click + +import plotly.express +import plotly.graph_objects + +__version__ = "1.0.0" + + +@click.group() +def cli(): + pass + + +@cli.command(help="") +@click.option( + "--version", + is_flag=True, + default=False, +) +@click.option( + "--input", + default="./test.tsv", + help="Provide the mz-ratio." +) +@click.option( + "--output", + default="./test.html", + help="Provide the database." +) +@click.option( + "--x-column", + default=["nominal_mass"], + multiple=True, + help="Provide the column names for the X axis.", +) +@click.option( + "--y-column", + default=["kendricks_mass_defect"], + multiple=True, + help="Provide the column names for the Y axis.", +) +@click.option( + "--annotation-column", + multiple=True, + default=[ + "metabolite_name", + "chemical_formula", + ], + help="Provide the columns name for the annotation." +) +def plot(*args, **kwargs): + + if kwargs.pop("version"): + print(__version__) + exit(0) + + input_path = kwargs.pop("input") + data = read_input(input_path, kwargs) + fig = build_fig(*data) + build_html_plot(fig, kwargs.get("output")) + + +def read_input(path: str, kwargs: {}): + if not os.path.exists(path): + raise ValueError(f"The path '{path}' does not exist.") + sep = detect_sep(path) + with open(path) as csv_file: + line_generator = csv.reader(csv_file, delimiter=sep) + first_line = next(line_generator) + all_lines = list(line_generator) + hover_names = ( + "metabolite_name", + "chemical_formula", + ) + annotation_indexes = get_index_of(first_line, hover_names) + ( + x_index, + y_index, + x_column, + y_column, + ) = get_indexes_names( + first_line, + list(kwargs.get("x_column")), + list(kwargs.get("y_column")), + ) + x_lists = [[] for i in range(len(x_index))] + y_lists = [[] for i in range(len(y_index))] + x_column = list(map(first_line.__getitem__, x_index)) + y_column = list(map(first_line.__getitem__, y_index)) + trace_names = [ + f"f({x_column[i]}) = {y_column[i]}" + for i in range(len(x_index)) + ] + hover_names = kwargs["annotation_column"] + annotation_indexes = [ + get_index_of(first_line, column)[0] + for column in hover_names + ] + hover_names = list(map(first_line.__getitem__, annotation_indexes)) + annotations = list() + for line in all_lines: + for i in range(len(x_index)): + x_lists[i].append(float(line[x_index[i]])) + y_lists[i].append(float(line[y_index[i]])) + annotations.append("
".join( + f"{hover_names[hover_index]}: {line[index]}" + for hover_index, index in enumerate(annotation_indexes) + )) + return x_lists, y_lists, annotations, trace_names + + +def get_indexes_names(first_line, x_column, y_column): + x_column, y_column = map(list, zip(*itertools.product(x_column, y_column))) + x_index = get_index_of(first_line, x_column) + y_index = get_index_of(first_line, y_column) + for i in range(len(x_index))[::-1]: + if x_index[i] == y_index[i]: + del x_index[i], x_column[i], y_index[i], y_column[i], + return ( + x_index, + y_index, + x_column, + y_column, + ) + + +def get_index_of(first_line, column): + if isinstance(column, (tuple, list)): + return [get_index_of(first_line, x)[0] for x in list(column)] + try: + return [int(column) - 1] + except ValueError: + return [first_line.index(column)] + + +def build_fig(x_lists, y_lists, annotations, trace_names): + fig = plotly.express.scatter() + for i in range(len(x_lists)): + fig.add_trace( + plotly.graph_objects.Scatter( + name=trace_names[i], + x=x_lists[i], + y=y_lists[i], + hovertext=annotations, + mode="markers", + ) + ) + return fig + + +def detect_sep(tabular_file: str) -> str: + with open(tabular_file, "r") as file: + first_line = file.readline() + if len(first_line.split(',')) > len(first_line.split('\t')): + return ',' + return '\t' + + +def build_html_plot(fig, output: str): + return plotly.offline.plot( + fig, + filename=output, + auto_open=False, + ) + + +if __name__ == "__main__": + cli() diff -r 000000000000 -r 59c8bad5f6bc macro.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macro.xml Tue Aug 29 09:45:16 2023 +0000 @@ -0,0 +1,170 @@ + + + 303.05 + 10.0 + M+H + HMDB + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "get_data" in str(what['to_do']) + + + + + + + + + "produce_plot" in str(what['to_do']) + + + + \ No newline at end of file diff -r 000000000000 -r 59c8bad5f6bc test-data/get_data_tol_0.01.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_data_tol_0.01.tsv Tue Aug 29 09:45:16 2023 +0000 @@ -0,0 +1,15 @@ +database metabolite_name chemical_formula hmdb_id inchikey compound_id adduct kendricks_mass kendricks_mass_defect monisotopic_molecular_weight nominal_mass polarity annotation_id +hmdb 5-(3',5'-Dihydroxyphenyl)-gamma-valerolactone-O-sulphate-O-methyl C12H14O7S HMDB0060031 FXGBBWWEXQWRKV-UHFFFAOYSA-N 193796 M+H 302.715 0.28509 303.053 303.0 positive 3982213 +hmdb Quercetin C15H10O7 HMDB0005794 REFJWTPEDVJJIY-UHFFFAOYSA-N 40965 M+H 302.712 0.288457 303.05 303.0 positive 4379351 +hmdb 8-Chloroinosine C10H11ClN4O5 HMDB0247428 ROPMUQKCJYNROP-UHFFFAOYSA-N 130732 M+H 302.711 0.289311 303.049 303.0 positive 4548699 +hmdb 5-((p-Hydroxybenzylidene)amino)-3-methylisothiazolo(5,4-d)pyrimidine-4,6(5H,7H)-dione C13H10N4O3S HMDB0253558 ALZDMJPUQGYCAX-UHFFFAOYSA-N 68215 M+H 302.716 0.283753 303.055 303.0 positive 4993233 +hmdb 2',4',5,7,8-Pentahydroxyisoflavone C15H10O7 HMDB0033264 LOLNVJIGYUJCIY-UHFFFAOYSA-N 101970 M+H 302.712 0.288457 303.05 303.0 positive 5292330 +hmdb 2-(2-Nitroimidazol-1-yl)-N-(2,2,3,3,3-pentafluoropropyl)acetamide C8H7F5N4O3 HMDB0251710 JGGDSDPOPRWSCX-UHFFFAOYSA-N 9228 M+H 302.713 0.28728 303.051 303.0 positive 7593628 +hmdb 5,6,7,3',4'-Pentahydroxyisoflavone C15H10O7 HMDB0041687 BIDDAFIPYBBDES-UHFFFAOYSA-N 134953 M+H 302.712 0.288457 303.05 303.0 positive 8100148 +hmdb Morin C15H10O7 HMDB0030796 YXOLAZRVSSWPPT-UHFFFAOYSA-N 141800 M+H 302.712 0.288457 303.05 303.0 positive 8184605 +hmdb Tricetin C15H10O7 HMDB0029620 ARSRJFRKVXALTF-UHFFFAOYSA-N 181210 M+H 302.712 0.288457 303.05 303.0 positive 8412749 +hmdb 9-(2,6-Dioxo-3H-purin-9-yl)-3H-purine-2,6-dione C10H6N8O4 HMDB0257773 LLFQXBCTHVBLEI-UHFFFAOYSA-N 108799 M+H 302.72 0.279918 303.058 303.0 positive 8782069 +hmdb 6-Hydroxyluteolin C15H10O7 HMDB0036632 VYAKIUWQLHRZGK-UHFFFAOYSA-N 74622 M+H 302.712 0.288457 303.05 303.0 positive 9521790 +hmdb Pollenin A C15H10O7 HMDB0303704 ZDOTZEDNGNPOEW-UHFFFAOYSA-N 3105 M+H 302.712 0.288457 303.05 303.0 positive 9722226 +hmdb 5,7,8,3',4'-Pentahydroxyisoflavone C15H10O7 HMDB0041689 USQGZNXXBDCNQF-UHFFFAOYSA-N 15604 M+H 302.712 0.288457 303.05 303.0 positive 9958013 +hmdb {2-methoxy-4-[(5-oxooxolan-2-yl)methyl]phenyl}oxidanesulfonic acid C12H14O7S HMDB0127769 FYRRHCSCZYSADR-UHFFFAOYSA-N 51990 M+H 302.715 0.285089 303.053 303.0 positive 10166087