changeset 0:33158d21324d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_pangolin_data commit 902cce06e30ffe8ccba5dc0c3b704eb39fb4c611
author iuc
date Wed, 20 Jul 2022 21:02:43 +0000
parents
children c22f6c820658
files data_manager/macros.xml data_manager/pangolin_data_dm.py data_manager/pangolin_data_dm.xml data_manager_conf.xml test-data/data_compatibility.csv test-data/pangolin_assignment.loc test-data/pangolin_constellations.loc test-data/pangolin_data.loc tool-data/pangolin_assignment.loc tool-data/pangolin_constellations.loc tool-data/pangolin_data.loc tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 13 files changed, 591 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/macros.xml	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,29 @@
+<macros>
+    <xml name="download_conditional" token_dt_name="db_name" token_database_name="db-name" token_software_name="pangolin">
+        <conditional name="@DT_NAME@">
+            <param name="enable" type="select" label="For @DATABASE_NAME@">
+                <option value="latest" selected="true">Download latest version</option>
+                <option value="versions">Download specific versions</option>
+                <option value="no">Don't download @DATABASE_NAME@</option>
+            </param>
+            <when value="latest">
+            </when>
+            <when value="versions">
+                <param name="compatibility_file" 
+                        type="data" format="csv"
+                        label="Version compatibility file"
+                        help="CSV dataset describing compatibility between @SOFTWARE_NAME@ releases and databases; typically, you would want to download fresh from https://raw.githubusercontent.com/cov-lineages/pangolin/master/pangolin/data/data_compatibility.csv" />
+                <param name="versions" type="select" multiple="true" label="Select @DATABASE_NAME@ release">
+                    <options from_dataset="compatibility_file" separator="," >
+                        <column name="name" index="1"/>
+                        <column name="value" index="1"/>
+                        <column name="min_@SOFTWARE_NAME@_version" index="3"/>
+                        <filter type="static_value" column="0" value="@DATABASE_NAME@"/>
+                    </options>
+                    </param>
+            </when>
+            <when value="no">
+            </when>
+        </conditional>
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/pangolin_data_dm.py	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,234 @@
+#!/usr/bin/env python
+
+import argparse
+import datetime
+import json
+import operator
+import pathlib
+import shutil
+import subprocess
+import sys
+import tempfile
+from io import StringIO
+from typing import Generator, TextIO
+
+import requests
+
+
+def parse_date(d: str) -> datetime.datetime:
+    # Parses the publication date from the GitHub API or user input into a datetime object.
+    date = None
+    try:
+        date = datetime.datetime.strptime(d, "%Y-%m-%dT%H:%M:%SZ")
+    except ValueError:
+        date = datetime.datetime.strptime(d, "%Y-%m-%d")
+    return date
+
+
+def get_model_list(package: str) -> Generator[dict, None, None]:
+    page_num = 0
+    while True:
+        url = f"https://api.github.com/repos/cov-lineages/{package}/releases"
+        page_num += 1
+        response = requests.get(url + f"?page={page_num}")
+        if response.status_code == 200:
+            release_list_chunk = json.loads(response.text)
+            if not release_list_chunk:
+                # past the last page of results
+                return
+            for e in release_list_chunk:
+                if e["prerelease"]:
+                    continue
+                yield dict(
+                    tag_name=e["tag_name"],
+                    name=e["name"],
+                    date=parse_date(e["published_at"]),
+                    tarball_url=e["tarball_url"],
+                )
+        else:
+            response.raise_for_status()
+
+
+def download_and_unpack(
+    dependency: str, release: str, output_directory: str
+) -> pathlib.Path:
+    url = f"git+https://github.com/cov-lineages/{dependency}.git@{release}"
+    dependency_package_name = dependency.replace("-", "_")
+    output_path = pathlib.Path(output_directory) / dependency_package_name / release
+    with tempfile.TemporaryDirectory() as tmpdir:
+        pip_command = [
+            sys.executable,
+            "-m",
+            "pip",
+            "install",
+            "--upgrade",
+            "--target",
+            tmpdir,
+            url,
+        ]
+        # output is saved in tmpdir/dependency, final output needs to be
+        # in output_directory/dependency/release
+        subprocess.run(pip_command, check=True)
+        shutil.move(
+            str(pathlib.Path(tmpdir) / dependency_package_name), str(output_path)
+        )
+    return output_path
+
+
+def fetch_compatibility_info(
+    package_name: str,
+    url: str = "https://raw.githubusercontent.com/cov-lineages/pangolin/master/pangolin/data/data_compatibility.csv",
+) -> list[dict[str, str]]:
+    response = requests.get(url)
+    if response.status_code == 200:
+        compatibility = read_compatibility_info(StringIO(response.text), package_name)
+        return compatibility
+    else:
+        return {}
+
+
+def read_compatibility_info(
+    input_file: TextIO, package_name: str
+) -> list[dict[str, str]]:
+    compatibility = {}
+    for line in input_file:
+        fields = line.strip().split(",")
+        if fields[0] != package_name:
+            continue
+        if package_name == "constellations":
+            compatibility[fields[1]] = fields[3]
+        else:
+            # for pangolin-data and pangolin-assignment
+            compatibility[fields[1]] = fields[2]
+    return compatibility
+
+
+def comma_split(args: str) -> list[str]:
+    return args.split(",")
+
+
+def git_lfs_install():
+    """
+    'git-lfs install' must be run after installing git-lfs and before cloning a repo
+    that uses Git LFS. Code taken from pangolin repo.
+    """
+    try:
+        subprocess.run(
+            ["git-lfs", "install"],
+            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+    except subprocess.CalledProcessError as e:
+        stderr = e.stderr.decode("utf-8")
+        sys.stderr.write(f"Error: {e}:\n{stderr}\n")
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--latest", default=False, action="store_true")
+    parser.add_argument("--version_compatibility_file", type=argparse.FileType())
+    parser.add_argument("--versions", type=comma_split)
+    parser.add_argument("--overwrite", default=False, action="store_true")
+    parser.add_argument("--known_revisions", type=comma_split)
+    parser.add_argument("datatable_name")
+    parser.add_argument("datatable_cache_filename")
+    parser.add_argument("galaxy_config")
+    args = parser.parse_args()
+
+    if args.datatable_name == "pangolin_data":
+        package_name = "pangolin-data"
+        min_version_key = "min_pangolin_version"
+    elif args.datatable_name == "pangolin_constellations":
+        package_name = "constellations"
+        min_version_key = "min_scorpio_version"
+    elif args.datatable_name == "pangolin_assignment":
+        package_name = "pangolin-assignment"
+        min_version_key = "min_pangolin_version"
+        git_lfs_install()
+    else:
+        sys.exit(f"Unknown data table {args.datatable_name}")
+
+    with open(args.galaxy_config) as fh:
+        config = json.load(fh)
+
+    output_directory = config.get("output_data", [{}])[0].get("extra_files_path", None)
+
+    try:
+        with open(args.datatable_cache_filename) as fh:
+            data_manager_dict = json.load(fh)
+    except IOError:
+        # on the first run this file doesn't exist
+        data_manager_dict = {}
+
+    if "data_tables" in data_manager_dict:
+        if args.datatable_name not in data_manager_dict["data_tables"]:
+            # got a data_tables entry, probably from a previous run of this script,
+            # but no entry for this specific data table
+            data_manager_dict["data_tables"][args.datatable_name] = []
+    else:
+        # got no entry for data tables, start from scratch
+        data_manager_dict = {"data_tables": {args.datatable_name: []}}
+
+    # known-revisions is populated from the Galaxy `pangolin_data` data table by the wrapper
+    if args.known_revisions is not None:
+        existing_release_tags = set(args.known_revisions)
+    else:
+        existing_release_tags = set()
+    if args.latest:
+        compatibility = fetch_compatibility_info(package_name)
+        for latest_release in get_model_list(package_name):
+            # choose the first release for which we have compatibility info
+            version = latest_release["tag_name"].lstrip("v.")
+            if version in compatibility:
+                latest_release[min_version_key] = compatibility[version]
+                break
+        if latest_release["tag_name"] in existing_release_tags:
+            releases = []
+        else:
+            releases = [latest_release]
+    else:
+        compatibility = read_compatibility_info(
+            args.version_compatibility_file, package_name
+        )
+        downloadable_releases = get_model_list(package_name)
+        releases_wanted = set(args.versions) - set(
+            [tag.lstrip("v.") for tag in existing_release_tags]
+        )
+        releases = []
+        for release in downloadable_releases:
+            version = release["tag_name"].lstrip("v.")
+            if version in releases_wanted:
+                if version in compatibility:
+                    # only add the releases for which we have compatibility info
+                    release[min_version_key] = compatibility[version]
+                    releases.append(release)
+                    releases_wanted.remove(version)
+                    if not releases_wanted:
+                        # we've found all the releases we want
+                        break
+        if releases_wanted:
+            missing_releases = " ".join(releases_wanted)
+            sys.exit(
+                f"Some of the requested releases ({missing_releases}) are not available."
+            )
+
+    for release in releases:
+        fname = download_and_unpack(package_name, release["tag_name"], output_directory)
+        if fname is not None:
+            data_manager_dict["data_tables"][args.datatable_name].append(
+                {
+                    "value": release["tag_name"],
+                    "description": release["name"],
+                    min_version_key: release[min_version_key],
+                    "date": release["date"].isoformat(),  # ISO 8601 is easily sortable
+                    "path": str(output_directory / fname),
+                }
+            )
+    data_manager_dict["data_tables"][args.datatable_name].sort(
+        key=operator.itemgetter("value"), reverse=True
+    )
+    with open(args.datatable_cache_filename, "w") as fh:
+        json.dump(data_manager_dict, fh, indent=2, sort_keys=True)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/pangolin_data_dm.xml	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,141 @@
+<tool id="data_manager_pangolin_data" name="pangolin-data data manager" version="0.0.1+galaxy0" tool_type="manage_data" profile="20.01">
+    <requirements>
+        <requirement type="package">python</requirement>
+        <requirement type="package" version="22.1.2">pip</requirement>
+        <requirement type="package" version="2.28.1">requests</requirement>
+        <requirement type="package" version="2.37.1">git</requirement>
+        <requirement type="package" version="3.2.0">git-lfs</requirement>
+        <requirement type="package" version="3.1.27">gitpython</requirement>
+    </requirements>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <command detect_errors="exit_code"><![CDATA[
+    #set $dt_names = []
+    #if str($pangolin_data.enable) == "latest" or str($pangolin_data.enable) == "versions" 
+        #set $dt_names = $dt_names + ["pangolin_data"]
+    #end if
+    #if str($pangolin_constellations.enable) == "latest" or str($pangolin_constellations.enable) == "versions"
+        #set $dt_names = $dt_names + ["pangolin_constellations"]
+    #end if
+    #if str($pangolin_assignment.enable) == "latest" or str($pangolin_assignment.enable) == "versions"
+        #set $dt_names = $dt_names + ["pangolin_assignment"]
+    #end if
+    #for $dt_name in $dt_names
+        #set $data_table = $__app__.tool_data_tables.get($dt_name)
+        #if $data_table is not None and len($data_table.get_fields()) != 0
+            #set $known_revisions = '--known_revisions=' + ','.join([row[0] for row in $data_table.get_fields()])
+        #else
+            #set $known_revisions = ''
+        #end if
+        python '$__tool_directory__/pangolin_data_dm.py'
+            $known_revisions
+            #if $getVar($dt_name).enable == "latest"
+                --latest
+            #else if $getVar($dt_name).enable == "versions"
+                --version_compatibility_file '$getVar($dt_name).compatibility_file'
+                --versions=$getVar($dt_name).versions
+            #end if
+            '$dt_name'
+            datatable_cache.json
+            '${output_file}' &&
+        #end for
+        cp datatable_cache.json '${output_file}'
+    ]]></command>
+    <inputs>
+        <expand macro="download_conditional" dt_name="pangolin_data" database_name="pangolin-data" software_name="pangolin" />
+        <expand macro="download_conditional" dt_name="pangolin_constellations" database_name="constellations" software_name="scorpio" />
+        <expand macro="download_conditional" dt_name="pangolin_assignment" database_name="pangolin-assignment" software_name="pangolin" />
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="pangolin_data">
+                <param name="enable" value="versions" />
+                <param name="compatibility_file" ftype="csv" value="data_compatibility.csv" />
+                <param name="versions" value="1.12" />
+            </conditional>
+            <conditional name="pangolin_constellations">
+                <param name="enable" value="versions" />
+                <param name="compatibility_file" ftype="csv" value="data_compatibility.csv" />
+                <param name="versions" value="0.1.10" />
+            </conditional>
+            <conditional name="pangolin_assignment">
+                <param name="enable" value="no" />
+            </conditional>
+            <output name="output_file">
+                <assert_contents>
+                    <has_text text="pangolin-data v1.12" />
+                    <has_text text='"description": "constellations release v0.1.10"' />
+                    <has_text text='"min_scorpio_version": "0.3.17"' />
+                    <not_has_text text='"description": "pangolin-assignment' />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="pangolin_data">
+                <param name="enable" value="no" />
+            </conditional>
+            <conditional name="pangolin_constellations">
+                <param name="enable" value="no" />
+            </conditional>
+            <conditional name="pangolin_assignment">
+                <param name="enable" value="versions" />
+                <param name="compatibility_file" ftype="csv" value="data_compatibility.csv" />
+                <param name="versions" value="1.12" />
+            </conditional>
+            <output name="output_file">
+                <assert_contents>
+                    <has_text text='"value": "v1.12"' />
+                    <has_text text='"min_pangolin_version": "4"' />
+                    <not_has_text text='"description": "constellations' />
+                    <not_has_text text='"description": "pangolin-data' />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_failure="true">
+            <conditional name="pangolin_data">
+                <param name="enable" value="no" />
+            </conditional>
+            <conditional name="pangolin_constellations">
+                <param name="enable" value="no" />
+                <param name="compatibility_file" ftype="csv" value="data_compatibility.csv" />
+                <param name="versions" value="0.0.40" />
+            </conditional>
+            <conditional name="pangolin_assignment">
+                <param name="enable" value="no" />
+            </conditional>
+        </test>
+    </tests>
+    <help><![CDATA[
+        This data managers fetches models (from the pangolin-data_,
+        pangolin-assignment_ and constellations_ repositories) for the
+        pangolin_ SARS-CoV-2 lineage typing tool and updates the pangolin_data,
+        pangolin_assignment and pangolin_constellations data tables.
+
+        The default is to fetch the latest version of the databases. If specific versions of databases
+        need to be installed, a version compatibility matrix needs to be provided to help choose
+        the versions available. This matrix is stored in a CSV file in the pangolin repository
+        and can be downloaded at this link_. When loading this file into Galaxy, the csv datatype
+        needs to be selected, because the automatic format detection code detects this file as type txt.
+
+        **Note** that this data manager will only download database versions mentioned in the above
+        mentioned file. While effort is taken to ensure that this file is updated with each pangolin
+        database release, if that has not happened, the user of this data manager will need to provide
+        their own version compatibility matrix file in order to load the latest database release into Galaxy.
+
+        The data manager will read the existing data tables and not re-download or replace databases
+        that are already present in those data tables.
+
+        .. _pangolin-data: https://github.com/cov-lineages/pangolin-data
+        .. _pangolin-assignment: https://github.com/cov-lineages/pangolin-assignment
+        .. _constellations: https://github.com/cov-lineages/constellations
+        .. _pangolin: https://github.com/cov-lineages/pangolin
+        .. _link: https://raw.githubusercontent.com/cov-lineages/pangolin/master/pangolin/data/data_compatibility.csv
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/ve/veab064</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,56 @@
+<?xml version="1.0"?>
+<data_managers>    
+    <data_manager tool_file="data_manager/pangolin_data_dm.xml" id="data_manager_pangolin_data">
+        <data_table name="pangolin_data">
+            <output>
+                <column name="value" />
+                <column name="description" />
+                <column name="min_pangolin_version" />
+                <column name="date" />
+                <column name="path" output_ref="output_file" >
+                    <!-- note: the Python script sanitises the possibly user-supplied scheme name ('value') -->
+                    <move type="directory" relativize_symlinks="True">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">pangolin_data/#echo str($value)#</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/pangolin_data/#echo str($value)#</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="pangolin_constellations">
+            <output>
+                <column name="value" />
+                <column name="description" />
+                <column name="min_scorpio_version" />
+                <column name="date" />
+                <column name="path" output_ref="output_file" >
+                    <!-- note: the Python script sanitises the possibly user-supplied scheme name ('value') -->
+                    <move type="directory" relativize_symlinks="True">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">pangolin_constellations/#echo str($value)#</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/pangolin_constellations/#echo str($value)#</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="pangolin_assignment">
+            <output>
+                <column name="value" />
+                <column name="description" />
+                <column name="min_pangolin_version" />
+                <column name="date" />
+                <column name="path" output_ref="output_file" >
+                    <!-- note: the Python script sanitises the possibly user-supplied scheme name ('value') -->
+                    <move type="directory" relativize_symlinks="True">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">pangolin_assignment/#echo str($value)#</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/pangolin_assignment/#echo str($value)#</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/data_compatibility.csv	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,37 @@
+data_source,version,min_pangolin_version,min_scorpio_version
+pangolin-data,1.12,4,
+pangolin-data,1.11,4,
+pangolin-data,1.9,4,
+pangolin-data,1.8,4,
+pangolin-data,1.6,4,
+pangolin-data,1.3,4,
+pangolin-data,1.2.133,4,
+pangolin-data,1.2.127,4,
+pangolin-assignment,1.12,4,
+pangolin-assignment,1.11,4,
+pangolin-assignment,1.9,4,
+pangolin-assignment,1.8,4,
+pangolin-assignment,1.6,4,
+pangolin-assignment,1.3,4,
+pangolin-assignment,1.2.133,4,
+constellations,0.1.10,,0.3.17
+constellations,0.1.9,,0.3.17
+constellations,0.1.8,,0.3.17
+constellations,0.1.7,,0.3.17
+constellations,0.1.6,,0.3.16
+constellations,0.1.5,,0.3.16
+constellations,0.1.4,,0.3.16
+constellations,0.1.3,,0.3.16
+constellations,0.1.2,,0.3.16
+constellations,0.1.1,,0.3.16
+constellations,0.1.0,,0.3.16
+constellations,0.0.30,,0.3.14
+constellations,0.0.29,,0.3.15
+constellations,0.0.28,,0.3.15
+constellations,0.0.27,,0.3.14
+constellations,0.0.26,,0.3.14
+constellations,0.0.25,,0.3.14
+constellations,0.0.24,,0.3.14
+constellations,0.0.23,,0.3.14
+constellations,0.0.22,,0.3.14
+constellations,0.0.21,,0.3.14
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pangolin_assignment.loc	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,10 @@
+# this is a tab separated file describing the location of pangolin_data databases used for the
+# pangolin SARS-CoV-2 lineage typing tool
+#
+# the columns are:
+# value  description min_pangolin_version	date	path
+#
+# min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_data
+#
+# for example
+#v1.11	pangolin-assignment v1.11	4	2022-06-29T19:09:03	/srv/galaxy/tool-data/pangolin_assignment/v1.11
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pangolin_constellations.loc	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,10 @@
+# this is a tab separated file describing the location of constellations databases used for the
+# scorpio part of the pangolin SARS-CoV-2 lineage typing tool
+#
+# the columns are:
+# value  description min_scorpio_version	date	path
+#
+# min_scorpio_version is the minimum scorpio tool major version that is needed to read the constellations data
+#
+# for example
+#v0.1.10	constellations release v0.1.10	0.3.17	2022-05-05T13:14:56	/srv/galaxy/tool-data/pangolin_constellations/v0.1.10
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pangolin_data.loc	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,10 @@
+# this is a tab separated file describing the location of pangolin_data databases used for the
+# pangolin SARS-CoV-2 lineage typing tool
+#
+# the columns are:
+# value  description min_pangolin_version	date	path
+#
+# min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_data
+#
+# for example
+#v1.11	pangolin-data v1.11	4	2022-06-29T15:57:17	/srv/galaxy/tool-data/pangolin_data/v1.11
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/pangolin_assignment.loc	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,10 @@
+# this is a tab separated file describing the location of pangolin_data databases used for the
+# pangolin SARS-CoV-2 lineage typing tool
+#
+# the columns are:
+# value  description min_pangolin_version	date	path
+#
+# min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_data
+#
+# for example
+#v1.11	pangolin-assignment v1.11	4	2022-06-29T19:09:03	/srv/galaxy/tool-data/pangolin_assignment/v1.11
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/pangolin_constellations.loc	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,10 @@
+# this is a tab separated file describing the location of constellations databases used for the
+# scorpio part of the pangolin SARS-CoV-2 lineage typing tool
+#
+# the columns are:
+# value  description min_scorpio_version	date	path
+#
+# min_scorpio_version is the minimum scorpio tool major version that is needed to read the constellations data
+#
+# for example
+#v0.1.10	constellations release v0.1.10	0	2022-05-05T13:14:56	/srv/galaxy/tool-data/pangolin_constellations/v0.1.10
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/pangolin_data.loc	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,10 @@
+# this is a tab separated file describing the location of pangolin_data databases used for the
+# pangolin SARS-CoV-2 lineage typing tool
+#
+# the columns are:
+# value  description min_pangolin_version	date	path
+#
+# min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_data
+#
+# for example
+#v1.11	pangolin-data v1.11	4	2022-06-29T15:57:17	/srv/galaxy/tool-data/pangolin_data/v1.11
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,17 @@
+<tables>
+    <table name="pangolin_data" comment_char="#" allow_duplicate_entries="False">
+        <!-- min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_data -->
+        <columns>value, description, min_pangolin_version, date, path</columns>
+        <file path="tool-data/pangolin_data.loc" />
+    </table>
+    <table name="pangolin_constellations" comment_char="#" allow_duplicate_entries="False">
+        <!-- min_scorpio_version is the minimum scorpio tool major version that is needed to read the constellations data -->
+        <columns>value, description, min_scorpio_version, date, path</columns>
+        <file path="tool-data/pangolin_constellations.loc" />
+    </table>    
+    <table name="pangolin_assignment" comment_char="#" allow_duplicate_entries="False">
+        <!-- min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_assignment data -->
+        <columns>value, description, min_pangolin_version, date, path</columns>
+        <file path="tool-data/pangolin_assignment.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Wed Jul 20 21:02:43 2022 +0000
@@ -0,0 +1,17 @@
+<tables>
+    <table name="pangolin_data" comment_char="#" allow_duplicate_entries="False">
+        <!-- min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_data -->
+        <columns>value, description, min_pangolin_version, date, path</columns>
+        <file path="${__HERE__}/test-data/pangolin_data.loc" />
+    </table>
+    <table name="pangolin_constellations" comment_char="#" allow_duplicate_entries="False">
+        <!-- min_scorpio_version is the minimum scorpio tool major version that is needed to read the constellations data -->
+        <columns>value, description, min_scorpio_version, date, path</columns>
+        <file path="${__HERE__}/test-data/pangolin_constellations.loc" />
+    </table>    
+    <table name="pangolin_assignment" comment_char="#" allow_duplicate_entries="False">
+        <!-- min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_assignment data -->
+        <columns>value, description, min_pangolin_version, date, path</columns>
+        <file path="${__HERE__}/test-data/pangolin_assignment.loc" />
+    </table>
+</tables>