Mercurial > repos > workflow4metabolomics > kmd_hmdb_data_plot

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kmd_hmdb_data_plot.xml	Tue Aug 29 09:45:16 2023 +0000
@@ -0,0 +1,176 @@
+<tool id="kmd_hmdb_data_plot" name="KMD HMDB Data Plot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09">
+    <description>
+        retrieves data from KMD HMDB API and produce plot and tsv file
+    </description>
+    <macros>
+        <import>macro.xml</import>
+        <token name="@TOOL_VERSION@">1.0.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <edam_topics>
+        <edam_topic>topic_0091</edam_topic>
+    </edam_topics>
+    <edam_operations>
+        <edam_operation>operation_3803</edam_operation>
+    </edam_operations>
+    <requirements>
+        <requirement type="package" version="3.10">python</requirement>
+        <requirement type="package" version="2.0.3">pandas</requirement>
+        <requirement type="package" version="5.15.0">plotly</requirement>
+        <requirement type="package" version="1.0.1">kmd_hmdb_api_client</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+<![CDATA[
+#if "get_data" in str($what.to_do)
+    python3 '$__tool_directory__/kmd_hmdb_interrogator.py' compound
+        --mz-ratio '$what.mz_ratio'
+        --database '$what.database'
+        --mass-tolerance '$what.mass_tolerance'
+        #for adduct in $what.adducts
+            --adducts '$adduct'
+        #end for
+        --output-path '$output_path'
+#end if
+
+#if $what.to_do == "get_data_and_produce_plot"
+    &&
+#end if
+
+#if "produce_plot" in str($what.to_do)
+    python3 '$__tool_directory__/kmd_hmdb_plot_generator.py' plot
+
+    #if "get_data" in str($what.to_do)
+        --input '$output_path'
+    #else
+        --input '$what.tsv_input'
+    #end if
+
+    #if $what.x_columns
+        #for x in $what.x_columns
+          --x-column '$x'
+        #end for
+    #end if
+
+    #if $what.y_columns
+        #for y in $what.y_columns
+          --y-column '$y'
+        #end for
+    #end if
+
+    #if $what.annotation_columns
+        #for annotation_column in $what.annotation_columns
+            --annotation-column '$annotation_column'
+        #end for
+    #end if
+
+      --output 'out.html'
+
+    && cat 'out.html' > '$output'
+#end if
+]]>
+    </command>
+
+    <inputs>
+        <conditional name="what">
+            <param name="to_do" type="select"
+                label="What to do"
+            >
+                <option value="get_data">Only get data</option>
+                <option value="produce_plot">Only produce plot</option>
+                <option value="get_data_and_produce_plot" selected="true">
+                    Get data from database + Produce Plot
+                </option>
+            </param>
+            <when value="produce_plot">
+                <expand macro="produce_plot_inputs" />
+                <expand macro="not_get_data" />
+            </when>
+            <when value="get_data">
+                <expand macro="get_data_inputs" />
+                <expand macro="not_produce_plot" />
+            </when>
+            <when value="get_data_and_produce_plot">
+                <expand macro="get_data_inputs" />
+                <expand macro="not_produce_plot" />
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <expand macro="get_data_outputs" />
+        <expand macro="produce_plot_outputs" />
+    </outputs>
+
+    <tests>
+        <test>
+            <!-- #1 get_data with tolerance = 0.01 -->
+            <param name="to_do" value="get_data" />
+            <param name="mass_tolerance" value="0.01" />
+            <param name="mz_ratio" value="303.05" />
+            <param name="database" value="hmdb" />
+            <param name="adducts" value="M+H" />
+            <output name="output_path" file="get_data_tol_0.01.tsv" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+This tool includes two utilities.
+One that retrieves data from the KMD HMDB API formated as a tsv file,
+and the other plots those data in a plotly graph.
+
+The default behavior of the plot is to produce a plot of the kmd in
+in function of the nominal mass of the compound.
+
+Compounds are retrieved using a query with default parameters:
+ - mz default to @DEFAULT_MZ@
+ - mz tolerance defaults to @DEFAULT_TOLERENCE@
+ - adduct list defaults to "@DEFAULT_ADDUCT@"
+ - database default to "@DEFAULT_DATABASE@", and possible values are one of:
+
+   - KMD Metabolites
+   - HMDB
+
+Those two utilities are usable independently, or sequentially in galaxy.
+
+Multiple X / Y values in the X/Y column selection can be
+selected to produce a lot of graphs at once.
+
+For example, imagine you have 5 columns in you tsv file,
+and those columns are named: A, B, C, D and F.
+
+If you choose X = [A, C] and Y = [B],
+then you will get two graphs (in one single HTML file) with:
+
+ - f(A) = B
+ - f(C) = B
+
+But if you choose multiple values for both X and Y, you get
+all combinations of columns X and Y. For example, if you
+select X = [A, B] and Y = [C, D, E], then you will get six
+graphs on the same plot:
+
+ - f(A) = C
+ - f(A) = D
+ - f(A) = E
+ - f(B) = C
+ - f(B) = D
+ - f(B) = E
+
+All those graph's traces will be tooglable in the HTML page.
+So don't hesitate to select a lot of parameters for X and Y!
+
+]]>
+    </help>
+    <citations></citations>
+    <creator>
+        <person
+            honorificPrefix="Mx."
+            givenName="Lain"
+            familyName="Pavot"
+            email="lain.pavot@inrae.fr"
+            identifier="https://orcid.org/0009-0007-1841-4358"
+        />
+    </creator>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kmd_hmdb_interrogator.py	Tue Aug 29 09:45:16 2023 +0000
@@ -0,0 +1,328 @@
+#!/usr/bin/env python3
+
+import csv
+import operator
+
+import click
+
+import kmd_hmdb_api_client.client
+from kmd_hmdb_api_client.api.default import (
+    api_annotation_get,
+    api_compound_find,
+    api_taxonomy_get,
+)
+
+__version__ = "1.0.0"
+
+
+kmd_hmdb_client = kmd_hmdb_api_client.client.Client(
+    "https://kmd-hmdb-rest-api.metabolomics-chopin.e-metabohub.fr",
+    verify_ssl=False,
+    timeout=500,
+)
+
+find_compound = (
+    lambda *args, **kwargs:
+        api_compound_find.sync(*args, **kwargs, client=kmd_hmdb_client)
+)
+get_taxonomy = (
+    lambda *args, **kwargs:
+        api_taxonomy_get.sync(*args, **kwargs, client=kmd_hmdb_client)
+)
+get_annotation = (
+    lambda *args, **kwargs:
+        api_annotation_get.sync(*args, **kwargs, client=kmd_hmdb_client)
+)
+
+positive_adducts = [
+    "M+H",
+    "M+2H",
+    "M+H+NH4",
+    "M+H+Na",
+    "M+H+K",
+    "M+ACN+2H",
+    "M+2Na",
+    "M+H-2H2O",
+    "M+H-H2O",
+    "M+NH4",
+    "M+Na",
+    "M+CH3OH+H",
+    "M+K",
+    "M+ACN+H",
+    "M+2Na-H",
+    "M+IsoProp+H",
+    "M+ACN+Na",
+    "M+2K+H",
+    "M+DMSO+H",
+    "M+2ACN+H",
+    "2M+H",
+    "2M+NH4",
+    "2M+Na",
+    "2M+K",
+]
+
+negative_adducts = [
+    "M-H",
+    "M-2H",
+    "M-H2O-H",
+    "M+Cl",
+    "M+FA-H",
+    "M+Hac-H",
+    "M-H+HCOONa",
+    "M+Br",
+    "M+TFA-H",
+    "2M-H",
+    "2M+FA-H",
+    "2M+Hac-H",
+]
+
+adduct_choices = positive_adducts + negative_adducts
+
+taxonomy_column_choices = [
+    "class",
+    "kingdom",
+    "molecular_framework",
+    "sub_class",
+    "super_class",
+    "id",
+]
+
+annotation_column_choices = [
+    "adduct",
+    "kendricks_mass",
+    "kendricks_mass_defect",
+    "monisotopic_molecular_weight",
+    "nominal_mass",
+    "polarity",
+    "annotation_id",
+]
+
+compound_column_choices = [
+
+    "database",
+    "metabolite_name",
+    "chemical_formula",
+    "hmdb_id",
+    "inchikey",
+    "compound_id",
+] + annotation_column_choices
+
+
+@click.group()
+def cli():
+    pass
+
+
+@cli.command(help="")
+@click.option(
+    "--version",
+    is_flag=True,
+)
+@click.option(
+    "--mz-ratio",
+    default=[303.05],
+    show_default=True,
+    multiple=True,
+    help="Provide the mz-ratio."
+)
+@click.option(
+    "--database",
+    default=["farid"],
+    show_default=True,
+    multiple=True,
+    help="Provide the database."
+)
+@click.option(
+    "--mass-tolerance",
+    default=10.5,
+    show_default=True,
+    help="Provide the mass-tolerance."
+)
+@click.option(
+    "--adducts",
+    default=["M+H"],
+    type=click.Choice(adduct_choices),
+    multiple=True,
+    show_default=True,
+    show_choices=False,
+    help="Provide the adducts."
+)
+@click.option(
+    "--columns",
+    default=compound_column_choices[:],
+    type=click.Choice(compound_column_choices),
+    multiple=True,
+    show_default=True,
+    show_choices=False,
+    help="Provide the outputed columns."
+)
+@click.option(
+    "--output-path",
+    help="Provide the output path."
+)
+def compound(*args, **kwargs):
+
+    if kwargs.pop("version"):
+        print(__version__)
+        exit(0)
+
+    adducts = kwargs.pop("adducts")
+    polarity = get_polarity(adducts)
+
+    other_kwargs, compound_kwargs = build_kwargs(
+        adducts=adducts,
+        polarity=polarity,
+        **kwargs
+    )
+    columns = other_kwargs["columns"]
+    result = find_compound(**compound_kwargs)
+    result = explode_compounds(
+        result,
+        with_annotations=any(map(
+          columns.__contains__,
+          annotation_column_choices
+        ))
+    )
+    check_columns_in_result(result, columns)
+    output_csv_result(
+        result,
+        columns,
+        other_kwargs.get("output_path"),
+        delimiter="\t",
+    )
+
+
+def explode_compounds(result, with_annotations):
+    if with_annotations:
+        return [{
+            "database": cpd.database,
+            "metabolite_name": cpd.metabolite_name,
+            "chemical_formula": cpd.chemical_formula,
+            "hmdb_id": cpd.hmdb_id,
+            "inchikey": cpd.inchikey,
+            "compound_id": cpd.id,
+            "adduct": annotation.name,
+            "kendricks_mass": annotation.kendricks_mass,
+            "kendricks_mass_defect": annotation.kendricks_mass_defect,
+            "monisotopic_molecular_weight":
+                annotation.monisotopic_molecular_weight,
+            "nominal_mass": annotation.nominal_mass,
+            "polarity": annotation.polarity,
+            "annotation_id": annotation.id,
+            }
+            for cpd in result
+            for annotation in cpd.annotations
+        ]
+    else:
+        return [{
+            "database": cpd.database,
+            "metabolite_name": cpd.metabolite_name,
+            "chemical_formula": cpd.chemical_formula,
+            "hmdb_id": cpd.hmdb_id,
+            "inchikey": cpd.inchikey,
+            "compound_id": cpd.id,
+            }
+            for cpd in result
+        ]
+
+
+@cli.command(help="")
+@click.option(
+    "--id",
+    type=int,
+    help="Provide the wanted annotation's id."
+)
+@click.option(
+    "--columns",
+    default=annotation_column_choices[:],
+    type=click.Choice(annotation_column_choices),
+    multiple=True,
+    show_default=True,
+    show_choices=False,
+    help="Provide the outputed columns."
+)
+@click.option(
+    "--output-path",
+    help="Provide the output path."
+)
+def annotation(*args, **kwargs):
+    result = get_annotation(id=kwargs.pop("id"))
+    result = [result]
+    columns = kwargs["columns"]
+    check_columns_in_result(result, columns)
+    output_csv_result(
+        result,
+        columns,
+        kwargs.get("output_path")
+    )
+
+
+def get_polarity(adducts):
+    if any(map(positive_adducts.__contains__, adducts)):
+        return "positive"
+    if any(map(negative_adducts.__contains__, adducts)):
+        return "negative"
+    # polarity = []
+    # if any(map(positive_adducts.__contains__, adducts)):
+    #     polarity.append("positive")
+    # if any(map(negative_adducts.__contains__, adducts)):
+    #     polarity.append("negative")
+
+
+def build_kwargs(**kwargs):
+    for original, replacement in (
+        ("database", "database_list"),
+        ("polarity", "polarity_list"),
+    ):
+        if original in kwargs:
+            kwargs[replacement] = kwargs.pop(original)
+    other_kwargs = {
+        other_arg: kwargs.pop(other_arg)
+        for other_arg in ("columns", "output_path", "with_annotations")
+        if other_arg in kwargs
+    }
+    return other_kwargs, kwargs
+
+
+def check_columns_in_result(result, columns):
+    if not result:
+        return
+    if not isinstance(result[0], dict):
+        result = [item.to_dict() for item in result]
+    keys = result[0].keys()
+    missing = [
+        column for column in columns
+        if column not in keys
+    ]
+    if missing:
+        if len(missing) == 1:
+            raise ValueError(
+                f"Could not find the column {missing[0]} in the results."
+            )
+        else:
+            raise ValueError(
+                "Could not find any of the columns "
+                + ','.join(missing)
+                + " in the results."
+            )
+
+
+def output_csv_result(result, columns, output_path, **csv_parameters):
+    if not output_path:
+        raise ValueError("Missing output path. Cannot output CSV results.")
+    with open(output_path, mode="w", newline='') as output_file:
+        writer = csv.writer(output_file, **csv_parameters)
+        write_result(result, columns, writer)
+
+
+def write_result(result, columns, writer):
+    getters = list(map(operator.itemgetter, columns))
+    writer.writerow(columns)
+    writer.writerows(
+        (getter(compound) for getter in getters)
+        for compound in result
+    )
+
+
+if __name__ == "__main__":
+    cli()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kmd_hmdb_plot_generator.py	Tue Aug 29 09:45:16 2023 +0000
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+
+import csv
+import itertools
+import os
+
+import click
+
+import plotly.express
+import plotly.graph_objects
+
+__version__ = "1.0.0"
+
+
+@click.group()
+def cli():
+    pass
+
+
+@cli.command(help="")
+@click.option(
+    "--version",
+    is_flag=True,
+    default=False,
+)
+@click.option(
+    "--input",
+    default="./test.tsv",
+    help="Provide the mz-ratio."
+)
+@click.option(
+    "--output",
+    default="./test.html",
+    help="Provide the database."
+)
+@click.option(
+    "--x-column",
+    default=["nominal_mass"],
+    multiple=True,
+    help="Provide the column names for the X axis.",
+)
+@click.option(
+    "--y-column",
+    default=["kendricks_mass_defect"],
+    multiple=True,
+    help="Provide the column names for the Y axis.",
+)
+@click.option(
+    "--annotation-column",
+    multiple=True,
+    default=[
+        "metabolite_name",
+        "chemical_formula",
+    ],
+    help="Provide the columns name for the annotation."
+)
+def plot(*args, **kwargs):
+
+    if kwargs.pop("version"):
+        print(__version__)
+        exit(0)
+
+    input_path = kwargs.pop("input")
+    data = read_input(input_path, kwargs)
+    fig = build_fig(*data)
+    build_html_plot(fig, kwargs.get("output"))
+
+
+def read_input(path: str, kwargs: {}):
+    if not os.path.exists(path):
+        raise ValueError(f"The path '{path}' does not exist.")
+    sep = detect_sep(path)
+    with open(path) as csv_file:
+        line_generator = csv.reader(csv_file, delimiter=sep)
+        first_line = next(line_generator)
+        all_lines = list(line_generator)
+        hover_names = (
+            "metabolite_name",
+            "chemical_formula",
+        )
+        annotation_indexes = get_index_of(first_line, hover_names)
+        (
+            x_index,
+            y_index,
+            x_column,
+            y_column,
+        ) = get_indexes_names(
+            first_line,
+            list(kwargs.get("x_column")),
+            list(kwargs.get("y_column")),
+        )
+        x_lists = [[] for i in range(len(x_index))]
+        y_lists = [[] for i in range(len(y_index))]
+        x_column = list(map(first_line.__getitem__, x_index))
+        y_column = list(map(first_line.__getitem__, y_index))
+        trace_names = [
+            f"f({x_column[i]}) = {y_column[i]}"
+            for i in range(len(x_index))
+        ]
+        hover_names = kwargs["annotation_column"]
+        annotation_indexes = [
+            get_index_of(first_line, column)[0]
+            for column in hover_names
+        ]
+        hover_names = list(map(first_line.__getitem__, annotation_indexes))
+        annotations = list()
+        for line in all_lines:
+            for i in range(len(x_index)):
+                x_lists[i].append(float(line[x_index[i]]))
+                y_lists[i].append(float(line[y_index[i]]))
+            annotations.append("<br>".join(
+                f"{hover_names[hover_index]}: {line[index]}"
+                for hover_index, index in enumerate(annotation_indexes)
+            ))
+    return x_lists, y_lists, annotations, trace_names
+
+
+def get_indexes_names(first_line, x_column, y_column):
+    x_column, y_column = map(list, zip(*itertools.product(x_column, y_column)))
+    x_index = get_index_of(first_line, x_column)
+    y_index = get_index_of(first_line, y_column)
+    for i in range(len(x_index))[::-1]:
+        if x_index[i] == y_index[i]:
+            del x_index[i], x_column[i], y_index[i], y_column[i],
+    return (
+        x_index,
+        y_index,
+        x_column,
+        y_column,
+    )
+
+
+def get_index_of(first_line, column):
+    if isinstance(column, (tuple, list)):
+        return [get_index_of(first_line, x)[0] for x in list(column)]
+    try:
+        return [int(column) - 1]
+    except ValueError:
+        return [first_line.index(column)]
+
+
+def build_fig(x_lists, y_lists, annotations, trace_names):
+    fig = plotly.express.scatter()
+    for i in range(len(x_lists)):
+        fig.add_trace(
+            plotly.graph_objects.Scatter(
+                name=trace_names[i],
+                x=x_lists[i],
+                y=y_lists[i],
+                hovertext=annotations,
+                mode="markers",
+            )
+        )
+    return fig
+
+
+def detect_sep(tabular_file: str) -> str:
+    with open(tabular_file, "r") as file:
+        first_line = file.readline()
+    if len(first_line.split(',')) > len(first_line.split('\t')):
+        return ','
+    return '\t'
+
+
+def build_html_plot(fig, output: str):
+    return plotly.offline.plot(
+        fig,
+        filename=output,
+        auto_open=False,
+    )
+
+
+if __name__ == "__main__":
+    cli()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macro.xml	Tue Aug 29 09:45:16 2023 +0000
@@ -0,0 +1,170 @@
+<macros>
+
+  <token name="@DEFAULT_MZ@">303.05</token>
+  <token name="@DEFAULT_TOLERENCE@">10.0</token>
+  <token name="@DEFAULT_ADDUCT@">M+H</token>
+  <token name="@DEFAULT_DATABASE@">HMDB</token>
+
+  <xml name="get_data_inputs">
+    <param argument="--mz-ratio" type="float" min="1" max="1000" value="@DEFAULT_MZ@"
+      help="
+        The database will give us all compounds with m/z =
+        m/zRatio±MassTolerance and we will plot those
+        compounds.
+        Choose the mass tolerance according to this information.
+      "
+      label="M/z Ratio"
+    />
+
+    <param argument="--mass-tolerance" type="float" value="@DEFAULT_TOLERENCE@"
+      help="
+        The database will give us all compounds with m/z =
+        m/zRatio±MassTolerance and we will plot those
+        compounds.
+        Choose the mass tolerance according to this information.
+      "
+      label="Mass Tolerance"
+    />
+
+    <param argument="--database" type="select" value="@DEFAULT_DATABASE@"
+      label="Which database to interrogate"
+      help="
+        Those databases are sub-databases of KMD-HMDB Metabolites
+        and contains compounds with their KMD.
+      "
+    >
+      <option selected="true" value="hmdb">HMDB</option>
+      <option value="farid">KMD Metabolites</option>
+    </param>
+
+    <param argument="--adducts" type="select" multiple="true"
+      label="Adducts"
+      help="Which adducts to retrieve"
+    >
+      <option selected="true" value="M+H">M+H</option>
+      <option value="M+2H">M+2H</option>
+      <option value="M+H+NH4">M+H+NH4</option>
+      <option value="M+H+Na">M+H+Na</option>
+      <option value="M+H+K">M+H+K</option>
+      <option value="M+ACN+2H">M+ACN+2H</option>
+      <option value="M+2Na">M+2Na</option>
+      <option value="M+H-2H2O">M+H-2H2O</option>
+      <option value="M+H-H2O">M+H-H2O</option>
+      <option value="M+NH4">M+NH4</option>
+      <option value="M+Na">M+Na</option>
+      <option value="M+CH3OH+H">M+CH3OH+H</option>
+      <option value="M+K">M+K</option>
+      <option value="M+ACN+H">M+ACN+H</option>
+      <option value="M+2Na-H">M+2Na-H</option>
+      <option value="M+IsoProp+H">M+IsoProp+H</option>
+      <option value="M+ACN+Na">M+ACN+Na</option>
+      <option value="M+2K+H">M+2K+H</option>
+      <option value="M+DMSO+H">M+DMSO+H</option>
+      <option value="M+2ACN+H">M+2ACN+H</option>
+      <option value="2M+H">2M+H</option>
+      <option value="2M+NH4">2M+NH4</option>
+      <option value="2M+Na">2M+Na</option>
+      <option value="2M+K">2M+K</option>
+      <option value="M-H">M-H</option>
+      <option value="M-2H">M-2H</option>
+      <option value="M-H2O-H">M-H2O-H</option>
+      <option value="M+Cl">M+Cl</option>
+      <option value="M+FA-H">M+FA-H</option>
+      <option value="M+Hac-H">M+Hac-H</option>
+      <option value="M-H+HCOONa">M-H+HCOONa</option>
+      <option value="M+Br">M+Br</option>
+      <option value="M+TFA-H">M+TFA-H</option>
+      <option value="2M-H">2M-H</option>
+      <option value="2M+FA-H">2M+FA-H</option>
+      <option value="2M+Hac-H">2M+Hac-H</option>
+    </param>
+  </xml>
+
+  <xml name="produce_plot_inputs">
+    <param name="tsv_input" type="data" format="tsv"
+      help="Tabular file to use to produce the plot."
+      label="A Tabular Input File"
+    />
+    <param
+      name="annotation_columns"
+      type="data_column"
+      data_ref="tsv_input"
+      use_header_names="true"
+      multiple="true"
+      optional="true"
+      help="Select columns to show when a point of the graph is hovered"
+      label="Annotation columns"
+    />
+    <param
+      name="x_columns"
+      type="data_column"
+      data_ref="tsv_input"
+      use_header_names="true"
+      optional="true"
+      multiple="true"
+      help="
+        Select one or multiple column to use as X values
+        to generate the graph. See the help section to better
+        understand the usage of multiple values
+      "
+      label="X Axis"
+    />
+    <param
+      name="y_columns"
+      type="data_column"
+      data_ref="tsv_input"
+      use_header_names="true"
+      optional="true"
+      multiple="true"
+      help="
+        Select one or multiple column to use as Y values
+        to generate the graph. See the help section to better
+        understand the usage of multiple values
+      "
+      label="Y Axis"
+    />
+  </xml>
+
+  <xml name="not_get_data">
+    <param name="mz_ratio" value="unknown" type="hidden" />
+    <param name="mass_tolerance" value="unknown" type="hidden" />
+    <param name="database" value="unknown" type="hidden" />
+    <param name="adducts" value="unknown" type="hidden" />
+  </xml>
+
+  <xml name="not_produce_plot">
+    <param name="tsv_input" value="unknown" type="hidden" />
+    <param name="annotation_columns" type="hidden" />
+    <param name="x_columns" type="hidden" />
+    <param name="y_columns" type="hidden" />
+  </xml>
+
+  <xml name="get_data_outputs">
+    <data name="output_path" format="tsv"
+      label="tsv - ${tool.name} on ${what.mz_ratio}±${what.mass_tolerance} - ${what.database}"
+    >
+      <filter>"get_data" in str(what['to_do'])</filter>
+      <actions>
+        <action name="column_names" type="metadata"
+          default="database,metabolite_name,chemical_formula,hmdb_id,inchikey,compound_id,adduct,kendricks_mass,kendricks_mass_defect,monisotopic_molecular_weight,nominal_mass,polarity,annotation_id"
+        />
+      </actions>
+    </data>
+  </xml>
+
+  <xml name="produce_plot_outputs">
+    <data name="output" format="html"
+      label="html - ${tool.name} on ${
+      ' - '
+      + str($what['mz_ratio'])
+      + '±' + str($what['mass_tolerance'])
+      + ' - ' + str($what['database'])
+      if 'get_data' in str($what['to_do'])
+      else ''' ' ''' + $what.csv_input.name + ''' ' '''
+    }"
+    >
+      <filter>"produce_plot" in str(what['to_do'])</filter>
+    </data>
+  </xml>
+
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_data_tol_0.01.tsv	Tue Aug 29 09:45:16 2023 +0000
@@ -0,0 +1,15 @@
+database	metabolite_name	chemical_formula	hmdb_id	inchikey	compound_id	adduct	kendricks_mass	kendricks_mass_defect	monisotopic_molecular_weight	nominal_mass	polarity	annotation_id
+hmdb	5-(3',5'-Dihydroxyphenyl)-gamma-valerolactone-O-sulphate-O-methyl	C12H14O7S	HMDB0060031	FXGBBWWEXQWRKV-UHFFFAOYSA-N	193796	M+H	302.715	0.28509	303.053	303.0	positive	3982213
+hmdb	Quercetin	C15H10O7	HMDB0005794	REFJWTPEDVJJIY-UHFFFAOYSA-N	40965	M+H	302.712	0.288457	303.05	303.0	positive	4379351
+hmdb	8-Chloroinosine	C10H11ClN4O5	HMDB0247428	ROPMUQKCJYNROP-UHFFFAOYSA-N	130732	M+H	302.711	0.289311	303.049	303.0	positive	4548699
+hmdb	5-((p-Hydroxybenzylidene)amino)-3-methylisothiazolo(5,4-d)pyrimidine-4,6(5H,7H)-dione	C13H10N4O3S	HMDB0253558	ALZDMJPUQGYCAX-UHFFFAOYSA-N	68215	M+H	302.716	0.283753	303.055	303.0	positive	4993233
+hmdb	2',4',5,7,8-Pentahydroxyisoflavone	C15H10O7	HMDB0033264	LOLNVJIGYUJCIY-UHFFFAOYSA-N	101970	M+H	302.712	0.288457	303.05	303.0	positive	5292330
+hmdb	2-(2-Nitroimidazol-1-yl)-N-(2,2,3,3,3-pentafluoropropyl)acetamide	C8H7F5N4O3	HMDB0251710	JGGDSDPOPRWSCX-UHFFFAOYSA-N	9228	M+H	302.713	0.28728	303.051	303.0	positive	7593628
+hmdb	5,6,7,3',4'-Pentahydroxyisoflavone	C15H10O7	HMDB0041687	BIDDAFIPYBBDES-UHFFFAOYSA-N	134953	M+H	302.712	0.288457	303.05	303.0	positive	8100148
+hmdb	Morin	C15H10O7	HMDB0030796	YXOLAZRVSSWPPT-UHFFFAOYSA-N	141800	M+H	302.712	0.288457	303.05	303.0	positive	8184605
+hmdb	Tricetin	C15H10O7	HMDB0029620	ARSRJFRKVXALTF-UHFFFAOYSA-N	181210	M+H	302.712	0.288457	303.05	303.0	positive	8412749
+hmdb	9-(2,6-Dioxo-3H-purin-9-yl)-3H-purine-2,6-dione	C10H6N8O4	HMDB0257773	LLFQXBCTHVBLEI-UHFFFAOYSA-N	108799	M+H	302.72	0.279918	303.058	303.0	positive	8782069
+hmdb	6-Hydroxyluteolin	C15H10O7	HMDB0036632	VYAKIUWQLHRZGK-UHFFFAOYSA-N	74622	M+H	302.712	0.288457	303.05	303.0	positive	9521790
+hmdb	Pollenin A	C15H10O7	HMDB0303704	ZDOTZEDNGNPOEW-UHFFFAOYSA-N	3105	M+H	302.712	0.288457	303.05	303.0	positive	9722226
+hmdb	5,7,8,3',4'-Pentahydroxyisoflavone	C15H10O7	HMDB0041689	USQGZNXXBDCNQF-UHFFFAOYSA-N	15604	M+H	302.712	0.288457	303.05	303.0	positive	9958013
+hmdb	{2-methoxy-4-[(5-oxooxolan-2-yl)methyl]phenyl}oxidanesulfonic acid	C12H14O7S	HMDB0127769	FYRRHCSCZYSADR-UHFFFAOYSA-N	51990	M+H	302.715	0.285089	303.053	303.0	positive	10166087