# HG changeset patch # User iuc # Date 1658350963 0 # Node ID 33158d21324d0a2f96bfb8977464a2b65345556d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_pangolin_data commit 902cce06e30ffe8ccba5dc0c3b704eb39fb4c611 diff -r 000000000000 -r 33158d21324d data_manager/macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/macros.xml Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff -r 000000000000 -r 33158d21324d data_manager/pangolin_data_dm.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/pangolin_data_dm.py Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,234 @@ +#!/usr/bin/env python + +import argparse +import datetime +import json +import operator +import pathlib +import shutil +import subprocess +import sys +import tempfile +from io import StringIO +from typing import Generator, TextIO + +import requests + + +def parse_date(d: str) -> datetime.datetime: + # Parses the publication date from the GitHub API or user input into a datetime object. + date = None + try: + date = datetime.datetime.strptime(d, "%Y-%m-%dT%H:%M:%SZ") + except ValueError: + date = datetime.datetime.strptime(d, "%Y-%m-%d") + return date + + +def get_model_list(package: str) -> Generator[dict, None, None]: + page_num = 0 + while True: + url = f"https://api.github.com/repos/cov-lineages/{package}/releases" + page_num += 1 + response = requests.get(url + f"?page={page_num}") + if response.status_code == 200: + release_list_chunk = json.loads(response.text) + if not release_list_chunk: + # past the last page of results + return + for e in release_list_chunk: + if e["prerelease"]: + continue + yield dict( + tag_name=e["tag_name"], + name=e["name"], + date=parse_date(e["published_at"]), + tarball_url=e["tarball_url"], + ) + else: + response.raise_for_status() + + +def download_and_unpack( + dependency: str, release: str, output_directory: str +) -> pathlib.Path: + url = f"git+https://github.com/cov-lineages/{dependency}.git@{release}" + dependency_package_name = dependency.replace("-", "_") + output_path = pathlib.Path(output_directory) / dependency_package_name / release + with tempfile.TemporaryDirectory() as tmpdir: + pip_command = [ + sys.executable, + "-m", + "pip", + "install", + "--upgrade", + "--target", + tmpdir, + url, + ] + # output is saved in tmpdir/dependency, final output needs to be + # in output_directory/dependency/release + subprocess.run(pip_command, check=True) + shutil.move( + str(pathlib.Path(tmpdir) / dependency_package_name), str(output_path) + ) + return output_path + + +def fetch_compatibility_info( + package_name: str, + url: str = "https://raw.githubusercontent.com/cov-lineages/pangolin/master/pangolin/data/data_compatibility.csv", +) -> list[dict[str, str]]: + response = requests.get(url) + if response.status_code == 200: + compatibility = read_compatibility_info(StringIO(response.text), package_name) + return compatibility + else: + return {} + + +def read_compatibility_info( + input_file: TextIO, package_name: str +) -> list[dict[str, str]]: + compatibility = {} + for line in input_file: + fields = line.strip().split(",") + if fields[0] != package_name: + continue + if package_name == "constellations": + compatibility[fields[1]] = fields[3] + else: + # for pangolin-data and pangolin-assignment + compatibility[fields[1]] = fields[2] + return compatibility + + +def comma_split(args: str) -> list[str]: + return args.split(",") + + +def git_lfs_install(): + """ + 'git-lfs install' must be run after installing git-lfs and before cloning a repo + that uses Git LFS. Code taken from pangolin repo. + """ + try: + subprocess.run( + ["git-lfs", "install"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + except subprocess.CalledProcessError as e: + stderr = e.stderr.decode("utf-8") + sys.stderr.write(f"Error: {e}:\n{stderr}\n") + sys.exit(-1) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--latest", default=False, action="store_true") + parser.add_argument("--version_compatibility_file", type=argparse.FileType()) + parser.add_argument("--versions", type=comma_split) + parser.add_argument("--overwrite", default=False, action="store_true") + parser.add_argument("--known_revisions", type=comma_split) + parser.add_argument("datatable_name") + parser.add_argument("datatable_cache_filename") + parser.add_argument("galaxy_config") + args = parser.parse_args() + + if args.datatable_name == "pangolin_data": + package_name = "pangolin-data" + min_version_key = "min_pangolin_version" + elif args.datatable_name == "pangolin_constellations": + package_name = "constellations" + min_version_key = "min_scorpio_version" + elif args.datatable_name == "pangolin_assignment": + package_name = "pangolin-assignment" + min_version_key = "min_pangolin_version" + git_lfs_install() + else: + sys.exit(f"Unknown data table {args.datatable_name}") + + with open(args.galaxy_config) as fh: + config = json.load(fh) + + output_directory = config.get("output_data", [{}])[0].get("extra_files_path", None) + + try: + with open(args.datatable_cache_filename) as fh: + data_manager_dict = json.load(fh) + except IOError: + # on the first run this file doesn't exist + data_manager_dict = {} + + if "data_tables" in data_manager_dict: + if args.datatable_name not in data_manager_dict["data_tables"]: + # got a data_tables entry, probably from a previous run of this script, + # but no entry for this specific data table + data_manager_dict["data_tables"][args.datatable_name] = [] + else: + # got no entry for data tables, start from scratch + data_manager_dict = {"data_tables": {args.datatable_name: []}} + + # known-revisions is populated from the Galaxy `pangolin_data` data table by the wrapper + if args.known_revisions is not None: + existing_release_tags = set(args.known_revisions) + else: + existing_release_tags = set() + if args.latest: + compatibility = fetch_compatibility_info(package_name) + for latest_release in get_model_list(package_name): + # choose the first release for which we have compatibility info + version = latest_release["tag_name"].lstrip("v.") + if version in compatibility: + latest_release[min_version_key] = compatibility[version] + break + if latest_release["tag_name"] in existing_release_tags: + releases = [] + else: + releases = [latest_release] + else: + compatibility = read_compatibility_info( + args.version_compatibility_file, package_name + ) + downloadable_releases = get_model_list(package_name) + releases_wanted = set(args.versions) - set( + [tag.lstrip("v.") for tag in existing_release_tags] + ) + releases = [] + for release in downloadable_releases: + version = release["tag_name"].lstrip("v.") + if version in releases_wanted: + if version in compatibility: + # only add the releases for which we have compatibility info + release[min_version_key] = compatibility[version] + releases.append(release) + releases_wanted.remove(version) + if not releases_wanted: + # we've found all the releases we want + break + if releases_wanted: + missing_releases = " ".join(releases_wanted) + sys.exit( + f"Some of the requested releases ({missing_releases}) are not available." + ) + + for release in releases: + fname = download_and_unpack(package_name, release["tag_name"], output_directory) + if fname is not None: + data_manager_dict["data_tables"][args.datatable_name].append( + { + "value": release["tag_name"], + "description": release["name"], + min_version_key: release[min_version_key], + "date": release["date"].isoformat(), # ISO 8601 is easily sortable + "path": str(output_directory / fname), + } + ) + data_manager_dict["data_tables"][args.datatable_name].sort( + key=operator.itemgetter("value"), reverse=True + ) + with open(args.datatable_cache_filename, "w") as fh: + json.dump(data_manager_dict, fh, indent=2, sort_keys=True) diff -r 000000000000 -r 33158d21324d data_manager/pangolin_data_dm.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/pangolin_data_dm.xml Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,141 @@ + + + python + pip + requests + git + git-lfs + gitpython + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1093/ve/veab064 + + diff -r 000000000000 -r 33158d21324d data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,56 @@ + + + + + + + + + + + + + ${path} + pangolin_data/#echo str($value)# + + ${GALAXY_DATA_MANAGER_DATA_PATH}/pangolin_data/#echo str($value)# + abspath + + + + + + + + + + + + + ${path} + pangolin_constellations/#echo str($value)# + + ${GALAXY_DATA_MANAGER_DATA_PATH}/pangolin_constellations/#echo str($value)# + abspath + + + + + + + + + + + + + ${path} + pangolin_assignment/#echo str($value)# + + ${GALAXY_DATA_MANAGER_DATA_PATH}/pangolin_assignment/#echo str($value)# + abspath + + + + + diff -r 000000000000 -r 33158d21324d test-data/data_compatibility.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/data_compatibility.csv Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,37 @@ +data_source,version,min_pangolin_version,min_scorpio_version +pangolin-data,1.12,4, +pangolin-data,1.11,4, +pangolin-data,1.9,4, +pangolin-data,1.8,4, +pangolin-data,1.6,4, +pangolin-data,1.3,4, +pangolin-data,1.2.133,4, +pangolin-data,1.2.127,4, +pangolin-assignment,1.12,4, +pangolin-assignment,1.11,4, +pangolin-assignment,1.9,4, +pangolin-assignment,1.8,4, +pangolin-assignment,1.6,4, +pangolin-assignment,1.3,4, +pangolin-assignment,1.2.133,4, +constellations,0.1.10,,0.3.17 +constellations,0.1.9,,0.3.17 +constellations,0.1.8,,0.3.17 +constellations,0.1.7,,0.3.17 +constellations,0.1.6,,0.3.16 +constellations,0.1.5,,0.3.16 +constellations,0.1.4,,0.3.16 +constellations,0.1.3,,0.3.16 +constellations,0.1.2,,0.3.16 +constellations,0.1.1,,0.3.16 +constellations,0.1.0,,0.3.16 +constellations,0.0.30,,0.3.14 +constellations,0.0.29,,0.3.15 +constellations,0.0.28,,0.3.15 +constellations,0.0.27,,0.3.14 +constellations,0.0.26,,0.3.14 +constellations,0.0.25,,0.3.14 +constellations,0.0.24,,0.3.14 +constellations,0.0.23,,0.3.14 +constellations,0.0.22,,0.3.14 +constellations,0.0.21,,0.3.14 diff -r 000000000000 -r 33158d21324d test-data/pangolin_assignment.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pangolin_assignment.loc Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,10 @@ +# this is a tab separated file describing the location of pangolin_data databases used for the +# pangolin SARS-CoV-2 lineage typing tool +# +# the columns are: +# value description min_pangolin_version date path +# +# min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_data +# +# for example +#v1.11 pangolin-assignment v1.11 4 2022-06-29T19:09:03 /srv/galaxy/tool-data/pangolin_assignment/v1.11 diff -r 000000000000 -r 33158d21324d test-data/pangolin_constellations.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pangolin_constellations.loc Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,10 @@ +# this is a tab separated file describing the location of constellations databases used for the +# scorpio part of the pangolin SARS-CoV-2 lineage typing tool +# +# the columns are: +# value description min_scorpio_version date path +# +# min_scorpio_version is the minimum scorpio tool major version that is needed to read the constellations data +# +# for example +#v0.1.10 constellations release v0.1.10 0.3.17 2022-05-05T13:14:56 /srv/galaxy/tool-data/pangolin_constellations/v0.1.10 diff -r 000000000000 -r 33158d21324d test-data/pangolin_data.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pangolin_data.loc Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,10 @@ +# this is a tab separated file describing the location of pangolin_data databases used for the +# pangolin SARS-CoV-2 lineage typing tool +# +# the columns are: +# value description min_pangolin_version date path +# +# min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_data +# +# for example +#v1.11 pangolin-data v1.11 4 2022-06-29T15:57:17 /srv/galaxy/tool-data/pangolin_data/v1.11 diff -r 000000000000 -r 33158d21324d tool-data/pangolin_assignment.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pangolin_assignment.loc Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,10 @@ +# this is a tab separated file describing the location of pangolin_data databases used for the +# pangolin SARS-CoV-2 lineage typing tool +# +# the columns are: +# value description min_pangolin_version date path +# +# min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_data +# +# for example +#v1.11 pangolin-assignment v1.11 4 2022-06-29T19:09:03 /srv/galaxy/tool-data/pangolin_assignment/v1.11 diff -r 000000000000 -r 33158d21324d tool-data/pangolin_constellations.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pangolin_constellations.loc Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,10 @@ +# this is a tab separated file describing the location of constellations databases used for the +# scorpio part of the pangolin SARS-CoV-2 lineage typing tool +# +# the columns are: +# value description min_scorpio_version date path +# +# min_scorpio_version is the minimum scorpio tool major version that is needed to read the constellations data +# +# for example +#v0.1.10 constellations release v0.1.10 0 2022-05-05T13:14:56 /srv/galaxy/tool-data/pangolin_constellations/v0.1.10 diff -r 000000000000 -r 33158d21324d tool-data/pangolin_data.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pangolin_data.loc Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,10 @@ +# this is a tab separated file describing the location of pangolin_data databases used for the +# pangolin SARS-CoV-2 lineage typing tool +# +# the columns are: +# value description min_pangolin_version date path +# +# min_pangolin_version is the minimum pangolin tool major version that is needed to read the pangolin_data +# +# for example +#v1.11 pangolin-data v1.11 4 2022-06-29T15:57:17 /srv/galaxy/tool-data/pangolin_data/v1.11 diff -r 000000000000 -r 33158d21324d tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,17 @@ + + + + value, description, min_pangolin_version, date, path + +
+ + + value, description, min_scorpio_version, date, path + +
+ + + value, description, min_pangolin_version, date, path + +
+
diff -r 000000000000 -r 33158d21324d tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed Jul 20 21:02:43 2022 +0000 @@ -0,0 +1,17 @@ + + + + value, description, min_pangolin_version, date, path + +
+ + + value, description, min_scorpio_version, date, path + +
+ + + value, description, min_pangolin_version, date, path + +
+