# HG changeset patch
# User workflow4metabolomics
# Date 1693302316 0
# Node ID 59c8bad5f6bcf5d064e92ecdeb37003165434f59
planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics/blob/master/tools/kmd_hmdb_data_plot/ commit 7fa454b6a4268b89fe18043e8dd10f30a7b4c7ca
diff -r 000000000000 -r 59c8bad5f6bc kmd_hmdb_data_plot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kmd_hmdb_data_plot.xml Tue Aug 29 09:45:16 2023 +0000
@@ -0,0 +1,176 @@
+
+
+ retrieves data from KMD HMDB API and produce plot and tsv file
+
+
+ macro.xml
+ 1.0.0
+ 0
+
+
+ topic_0091
+
+
+ operation_3803
+
+
+ python
+ pandas
+ plotly
+ kmd_hmdb_api_client
+
+
+ '$output'
+#end if
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 59c8bad5f6bc kmd_hmdb_interrogator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kmd_hmdb_interrogator.py Tue Aug 29 09:45:16 2023 +0000
@@ -0,0 +1,328 @@
+#!/usr/bin/env python3
+
+import csv
+import operator
+
+import click
+
+import kmd_hmdb_api_client.client
+from kmd_hmdb_api_client.api.default import (
+ api_annotation_get,
+ api_compound_find,
+ api_taxonomy_get,
+)
+
+__version__ = "1.0.0"
+
+
+kmd_hmdb_client = kmd_hmdb_api_client.client.Client(
+ "https://kmd-hmdb-rest-api.metabolomics-chopin.e-metabohub.fr",
+ verify_ssl=False,
+ timeout=500,
+)
+
+find_compound = (
+ lambda *args, **kwargs:
+ api_compound_find.sync(*args, **kwargs, client=kmd_hmdb_client)
+)
+get_taxonomy = (
+ lambda *args, **kwargs:
+ api_taxonomy_get.sync(*args, **kwargs, client=kmd_hmdb_client)
+)
+get_annotation = (
+ lambda *args, **kwargs:
+ api_annotation_get.sync(*args, **kwargs, client=kmd_hmdb_client)
+)
+
+positive_adducts = [
+ "M+H",
+ "M+2H",
+ "M+H+NH4",
+ "M+H+Na",
+ "M+H+K",
+ "M+ACN+2H",
+ "M+2Na",
+ "M+H-2H2O",
+ "M+H-H2O",
+ "M+NH4",
+ "M+Na",
+ "M+CH3OH+H",
+ "M+K",
+ "M+ACN+H",
+ "M+2Na-H",
+ "M+IsoProp+H",
+ "M+ACN+Na",
+ "M+2K+H",
+ "M+DMSO+H",
+ "M+2ACN+H",
+ "2M+H",
+ "2M+NH4",
+ "2M+Na",
+ "2M+K",
+]
+
+negative_adducts = [
+ "M-H",
+ "M-2H",
+ "M-H2O-H",
+ "M+Cl",
+ "M+FA-H",
+ "M+Hac-H",
+ "M-H+HCOONa",
+ "M+Br",
+ "M+TFA-H",
+ "2M-H",
+ "2M+FA-H",
+ "2M+Hac-H",
+]
+
+adduct_choices = positive_adducts + negative_adducts
+
+taxonomy_column_choices = [
+ "class",
+ "kingdom",
+ "molecular_framework",
+ "sub_class",
+ "super_class",
+ "id",
+]
+
+annotation_column_choices = [
+ "adduct",
+ "kendricks_mass",
+ "kendricks_mass_defect",
+ "monisotopic_molecular_weight",
+ "nominal_mass",
+ "polarity",
+ "annotation_id",
+]
+
+compound_column_choices = [
+
+ "database",
+ "metabolite_name",
+ "chemical_formula",
+ "hmdb_id",
+ "inchikey",
+ "compound_id",
+] + annotation_column_choices
+
+
+@click.group()
+def cli():
+ pass
+
+
+@cli.command(help="")
+@click.option(
+ "--version",
+ is_flag=True,
+)
+@click.option(
+ "--mz-ratio",
+ default=[303.05],
+ show_default=True,
+ multiple=True,
+ help="Provide the mz-ratio."
+)
+@click.option(
+ "--database",
+ default=["farid"],
+ show_default=True,
+ multiple=True,
+ help="Provide the database."
+)
+@click.option(
+ "--mass-tolerance",
+ default=10.5,
+ show_default=True,
+ help="Provide the mass-tolerance."
+)
+@click.option(
+ "--adducts",
+ default=["M+H"],
+ type=click.Choice(adduct_choices),
+ multiple=True,
+ show_default=True,
+ show_choices=False,
+ help="Provide the adducts."
+)
+@click.option(
+ "--columns",
+ default=compound_column_choices[:],
+ type=click.Choice(compound_column_choices),
+ multiple=True,
+ show_default=True,
+ show_choices=False,
+ help="Provide the outputed columns."
+)
+@click.option(
+ "--output-path",
+ help="Provide the output path."
+)
+def compound(*args, **kwargs):
+
+ if kwargs.pop("version"):
+ print(__version__)
+ exit(0)
+
+ adducts = kwargs.pop("adducts")
+ polarity = get_polarity(adducts)
+
+ other_kwargs, compound_kwargs = build_kwargs(
+ adducts=adducts,
+ polarity=polarity,
+ **kwargs
+ )
+ columns = other_kwargs["columns"]
+ result = find_compound(**compound_kwargs)
+ result = explode_compounds(
+ result,
+ with_annotations=any(map(
+ columns.__contains__,
+ annotation_column_choices
+ ))
+ )
+ check_columns_in_result(result, columns)
+ output_csv_result(
+ result,
+ columns,
+ other_kwargs.get("output_path"),
+ delimiter="\t",
+ )
+
+
+def explode_compounds(result, with_annotations):
+ if with_annotations:
+ return [{
+ "database": cpd.database,
+ "metabolite_name": cpd.metabolite_name,
+ "chemical_formula": cpd.chemical_formula,
+ "hmdb_id": cpd.hmdb_id,
+ "inchikey": cpd.inchikey,
+ "compound_id": cpd.id,
+ "adduct": annotation.name,
+ "kendricks_mass": annotation.kendricks_mass,
+ "kendricks_mass_defect": annotation.kendricks_mass_defect,
+ "monisotopic_molecular_weight":
+ annotation.monisotopic_molecular_weight,
+ "nominal_mass": annotation.nominal_mass,
+ "polarity": annotation.polarity,
+ "annotation_id": annotation.id,
+ }
+ for cpd in result
+ for annotation in cpd.annotations
+ ]
+ else:
+ return [{
+ "database": cpd.database,
+ "metabolite_name": cpd.metabolite_name,
+ "chemical_formula": cpd.chemical_formula,
+ "hmdb_id": cpd.hmdb_id,
+ "inchikey": cpd.inchikey,
+ "compound_id": cpd.id,
+ }
+ for cpd in result
+ ]
+
+
+@cli.command(help="")
+@click.option(
+ "--id",
+ type=int,
+ help="Provide the wanted annotation's id."
+)
+@click.option(
+ "--columns",
+ default=annotation_column_choices[:],
+ type=click.Choice(annotation_column_choices),
+ multiple=True,
+ show_default=True,
+ show_choices=False,
+ help="Provide the outputed columns."
+)
+@click.option(
+ "--output-path",
+ help="Provide the output path."
+)
+def annotation(*args, **kwargs):
+ result = get_annotation(id=kwargs.pop("id"))
+ result = [result]
+ columns = kwargs["columns"]
+ check_columns_in_result(result, columns)
+ output_csv_result(
+ result,
+ columns,
+ kwargs.get("output_path")
+ )
+
+
+def get_polarity(adducts):
+ if any(map(positive_adducts.__contains__, adducts)):
+ return "positive"
+ if any(map(negative_adducts.__contains__, adducts)):
+ return "negative"
+ # polarity = []
+ # if any(map(positive_adducts.__contains__, adducts)):
+ # polarity.append("positive")
+ # if any(map(negative_adducts.__contains__, adducts)):
+ # polarity.append("negative")
+
+
+def build_kwargs(**kwargs):
+ for original, replacement in (
+ ("database", "database_list"),
+ ("polarity", "polarity_list"),
+ ):
+ if original in kwargs:
+ kwargs[replacement] = kwargs.pop(original)
+ other_kwargs = {
+ other_arg: kwargs.pop(other_arg)
+ for other_arg in ("columns", "output_path", "with_annotations")
+ if other_arg in kwargs
+ }
+ return other_kwargs, kwargs
+
+
+def check_columns_in_result(result, columns):
+ if not result:
+ return
+ if not isinstance(result[0], dict):
+ result = [item.to_dict() for item in result]
+ keys = result[0].keys()
+ missing = [
+ column for column in columns
+ if column not in keys
+ ]
+ if missing:
+ if len(missing) == 1:
+ raise ValueError(
+ f"Could not find the column {missing[0]} in the results."
+ )
+ else:
+ raise ValueError(
+ "Could not find any of the columns "
+ + ','.join(missing)
+ + " in the results."
+ )
+
+
+def output_csv_result(result, columns, output_path, **csv_parameters):
+ if not output_path:
+ raise ValueError("Missing output path. Cannot output CSV results.")
+ with open(output_path, mode="w", newline='') as output_file:
+ writer = csv.writer(output_file, **csv_parameters)
+ write_result(result, columns, writer)
+
+
+def write_result(result, columns, writer):
+ getters = list(map(operator.itemgetter, columns))
+ writer.writerow(columns)
+ writer.writerows(
+ (getter(compound) for getter in getters)
+ for compound in result
+ )
+
+
+if __name__ == "__main__":
+ cli()
diff -r 000000000000 -r 59c8bad5f6bc kmd_hmdb_plot_generator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kmd_hmdb_plot_generator.py Tue Aug 29 09:45:16 2023 +0000
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+
+import csv
+import itertools
+import os
+
+import click
+
+import plotly.express
+import plotly.graph_objects
+
+__version__ = "1.0.0"
+
+
+@click.group()
+def cli():
+ pass
+
+
+@cli.command(help="")
+@click.option(
+ "--version",
+ is_flag=True,
+ default=False,
+)
+@click.option(
+ "--input",
+ default="./test.tsv",
+ help="Provide the mz-ratio."
+)
+@click.option(
+ "--output",
+ default="./test.html",
+ help="Provide the database."
+)
+@click.option(
+ "--x-column",
+ default=["nominal_mass"],
+ multiple=True,
+ help="Provide the column names for the X axis.",
+)
+@click.option(
+ "--y-column",
+ default=["kendricks_mass_defect"],
+ multiple=True,
+ help="Provide the column names for the Y axis.",
+)
+@click.option(
+ "--annotation-column",
+ multiple=True,
+ default=[
+ "metabolite_name",
+ "chemical_formula",
+ ],
+ help="Provide the columns name for the annotation."
+)
+def plot(*args, **kwargs):
+
+ if kwargs.pop("version"):
+ print(__version__)
+ exit(0)
+
+ input_path = kwargs.pop("input")
+ data = read_input(input_path, kwargs)
+ fig = build_fig(*data)
+ build_html_plot(fig, kwargs.get("output"))
+
+
+def read_input(path: str, kwargs: {}):
+ if not os.path.exists(path):
+ raise ValueError(f"The path '{path}' does not exist.")
+ sep = detect_sep(path)
+ with open(path) as csv_file:
+ line_generator = csv.reader(csv_file, delimiter=sep)
+ first_line = next(line_generator)
+ all_lines = list(line_generator)
+ hover_names = (
+ "metabolite_name",
+ "chemical_formula",
+ )
+ annotation_indexes = get_index_of(first_line, hover_names)
+ (
+ x_index,
+ y_index,
+ x_column,
+ y_column,
+ ) = get_indexes_names(
+ first_line,
+ list(kwargs.get("x_column")),
+ list(kwargs.get("y_column")),
+ )
+ x_lists = [[] for i in range(len(x_index))]
+ y_lists = [[] for i in range(len(y_index))]
+ x_column = list(map(first_line.__getitem__, x_index))
+ y_column = list(map(first_line.__getitem__, y_index))
+ trace_names = [
+ f"f({x_column[i]}) = {y_column[i]}"
+ for i in range(len(x_index))
+ ]
+ hover_names = kwargs["annotation_column"]
+ annotation_indexes = [
+ get_index_of(first_line, column)[0]
+ for column in hover_names
+ ]
+ hover_names = list(map(first_line.__getitem__, annotation_indexes))
+ annotations = list()
+ for line in all_lines:
+ for i in range(len(x_index)):
+ x_lists[i].append(float(line[x_index[i]]))
+ y_lists[i].append(float(line[y_index[i]]))
+ annotations.append("
".join(
+ f"{hover_names[hover_index]}: {line[index]}"
+ for hover_index, index in enumerate(annotation_indexes)
+ ))
+ return x_lists, y_lists, annotations, trace_names
+
+
+def get_indexes_names(first_line, x_column, y_column):
+ x_column, y_column = map(list, zip(*itertools.product(x_column, y_column)))
+ x_index = get_index_of(first_line, x_column)
+ y_index = get_index_of(first_line, y_column)
+ for i in range(len(x_index))[::-1]:
+ if x_index[i] == y_index[i]:
+ del x_index[i], x_column[i], y_index[i], y_column[i],
+ return (
+ x_index,
+ y_index,
+ x_column,
+ y_column,
+ )
+
+
+def get_index_of(first_line, column):
+ if isinstance(column, (tuple, list)):
+ return [get_index_of(first_line, x)[0] for x in list(column)]
+ try:
+ return [int(column) - 1]
+ except ValueError:
+ return [first_line.index(column)]
+
+
+def build_fig(x_lists, y_lists, annotations, trace_names):
+ fig = plotly.express.scatter()
+ for i in range(len(x_lists)):
+ fig.add_trace(
+ plotly.graph_objects.Scatter(
+ name=trace_names[i],
+ x=x_lists[i],
+ y=y_lists[i],
+ hovertext=annotations,
+ mode="markers",
+ )
+ )
+ return fig
+
+
+def detect_sep(tabular_file: str) -> str:
+ with open(tabular_file, "r") as file:
+ first_line = file.readline()
+ if len(first_line.split(',')) > len(first_line.split('\t')):
+ return ','
+ return '\t'
+
+
+def build_html_plot(fig, output: str):
+ return plotly.offline.plot(
+ fig,
+ filename=output,
+ auto_open=False,
+ )
+
+
+if __name__ == "__main__":
+ cli()
diff -r 000000000000 -r 59c8bad5f6bc macro.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macro.xml Tue Aug 29 09:45:16 2023 +0000
@@ -0,0 +1,170 @@
+
+
+ 303.05
+ 10.0
+ M+H
+ HMDB
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ "get_data" in str(what['to_do'])
+
+
+
+
+
+
+
+
+ "produce_plot" in str(what['to_do'])
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 59c8bad5f6bc test-data/get_data_tol_0.01.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_data_tol_0.01.tsv Tue Aug 29 09:45:16 2023 +0000
@@ -0,0 +1,15 @@
+database metabolite_name chemical_formula hmdb_id inchikey compound_id adduct kendricks_mass kendricks_mass_defect monisotopic_molecular_weight nominal_mass polarity annotation_id
+hmdb 5-(3',5'-Dihydroxyphenyl)-gamma-valerolactone-O-sulphate-O-methyl C12H14O7S HMDB0060031 FXGBBWWEXQWRKV-UHFFFAOYSA-N 193796 M+H 302.715 0.28509 303.053 303.0 positive 3982213
+hmdb Quercetin C15H10O7 HMDB0005794 REFJWTPEDVJJIY-UHFFFAOYSA-N 40965 M+H 302.712 0.288457 303.05 303.0 positive 4379351
+hmdb 8-Chloroinosine C10H11ClN4O5 HMDB0247428 ROPMUQKCJYNROP-UHFFFAOYSA-N 130732 M+H 302.711 0.289311 303.049 303.0 positive 4548699
+hmdb 5-((p-Hydroxybenzylidene)amino)-3-methylisothiazolo(5,4-d)pyrimidine-4,6(5H,7H)-dione C13H10N4O3S HMDB0253558 ALZDMJPUQGYCAX-UHFFFAOYSA-N 68215 M+H 302.716 0.283753 303.055 303.0 positive 4993233
+hmdb 2',4',5,7,8-Pentahydroxyisoflavone C15H10O7 HMDB0033264 LOLNVJIGYUJCIY-UHFFFAOYSA-N 101970 M+H 302.712 0.288457 303.05 303.0 positive 5292330
+hmdb 2-(2-Nitroimidazol-1-yl)-N-(2,2,3,3,3-pentafluoropropyl)acetamide C8H7F5N4O3 HMDB0251710 JGGDSDPOPRWSCX-UHFFFAOYSA-N 9228 M+H 302.713 0.28728 303.051 303.0 positive 7593628
+hmdb 5,6,7,3',4'-Pentahydroxyisoflavone C15H10O7 HMDB0041687 BIDDAFIPYBBDES-UHFFFAOYSA-N 134953 M+H 302.712 0.288457 303.05 303.0 positive 8100148
+hmdb Morin C15H10O7 HMDB0030796 YXOLAZRVSSWPPT-UHFFFAOYSA-N 141800 M+H 302.712 0.288457 303.05 303.0 positive 8184605
+hmdb Tricetin C15H10O7 HMDB0029620 ARSRJFRKVXALTF-UHFFFAOYSA-N 181210 M+H 302.712 0.288457 303.05 303.0 positive 8412749
+hmdb 9-(2,6-Dioxo-3H-purin-9-yl)-3H-purine-2,6-dione C10H6N8O4 HMDB0257773 LLFQXBCTHVBLEI-UHFFFAOYSA-N 108799 M+H 302.72 0.279918 303.058 303.0 positive 8782069
+hmdb 6-Hydroxyluteolin C15H10O7 HMDB0036632 VYAKIUWQLHRZGK-UHFFFAOYSA-N 74622 M+H 302.712 0.288457 303.05 303.0 positive 9521790
+hmdb Pollenin A C15H10O7 HMDB0303704 ZDOTZEDNGNPOEW-UHFFFAOYSA-N 3105 M+H 302.712 0.288457 303.05 303.0 positive 9722226
+hmdb 5,7,8,3',4'-Pentahydroxyisoflavone C15H10O7 HMDB0041689 USQGZNXXBDCNQF-UHFFFAOYSA-N 15604 M+H 302.712 0.288457 303.05 303.0 positive 9958013
+hmdb {2-methoxy-4-[(5-oxooxolan-2-yl)methyl]phenyl}oxidanesulfonic acid C12H14O7S HMDB0127769 FYRRHCSCZYSADR-UHFFFAOYSA-N 51990 M+H 302.715 0.285089 303.053 303.0 positive 10166087