Mercurial > repos > iuc > data_manager_pangolearn
changeset 4:6e24e79d3d69 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_pangolearn commit fd2566abd51c88847437d38a5abea8703b8ee034"
author | iuc |
---|---|
date | Tue, 05 Apr 2022 18:40:07 +0000 |
parents | df30a2f1db55 |
children | 29c738066906 |
files | data_manager/pangolearn_dm.py data_manager/pangolearn_dm.xml |
diffstat | 2 files changed, 46 insertions(+), 55 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/pangolearn_dm.py Mon Aug 30 13:44:01 2021 +0000 +++ b/data_manager/pangolearn_dm.py Tue Apr 05 18:40:07 2022 +0000 @@ -12,53 +12,43 @@ import requests -def extract_date(tag_str): - parts = tag_str.split("_") - assert len(parts) < 3, "expected maximum of two parts, got " + str(parts) - # there are tags like: 2021-07-07-2 - parts[0] = "-".join(parts[0].split("-")[:3]) - tag_date = datetime.datetime.strptime(parts[0], "%Y-%m-%d") - if len(parts) == 2: - version = int(parts[1]) - assert ( - version < 24 * 60 - ) # because the code stores versions as minutes of the day, it can't handle versions > 1440 - tag_date += datetime.timedelta(minutes=version) - return tag_date - - def get_model_list( existing_release_tags, - url="https://api.github.com/repos/cov-lineages/pangoLEARN/releases", + url="https://api.github.com/repos/cov-lineages/pangoLEARN/releases" ): - response = requests.get(url) - if response.status_code == 200: - release_list = json.loads(response.text) - release_info = [ - dict( - tag_name=e["tag_name"], - name=e["name"], - date=extract_date(e["tag_name"]), - tarball_url=e["tarball_url"], - ) - for e in release_list - if e["tag_name"] not in existing_release_tags - ] - return release_info - else: - response.raise_for_status() + page_num = 0 + while True: + page_num += 1 + response = requests.get(url + f'?page={page_num}') + if response.status_code == 200: + release_list_chunk = json.loads(response.text) + if not release_list_chunk: + # past the last page of results + return + for e in release_list_chunk: + if e["tag_name"] in existing_release_tags: + continue + if e["prerelease"]: + continue + yield dict( + tag_name=e["tag_name"], + name=e["name"], + date=parse_date(e["tag_name"]), + tarball_url=e["tarball_url"], + ) + else: + response.raise_for_status() def filter_by_date(existing_release_tags, start_date=None, end_date=None): - release_list = get_model_list(existing_release_tags) - return [ - element - for element in release_list - if not ( - (end_date is not None and element["date"] > end_date) - or (start_date is not None and element["date"] < start_date) - ) - ] + ret = [] + for release in get_model_list(existing_release_tags): + if start_date and release["date"] < start_date: + break + if not end_date or release["date"] <= end_date: + ret.append(release) + + return ret def download_and_unpack(url, output_directory): @@ -84,7 +74,9 @@ def parse_date(d): - return datetime.datetime.strptime(d, "%Y-%m-%d") + # Tries to parse the first 10 chars of d as a date, which currently + # succeeds for all pangolearn model releases. + return datetime.datetime.strptime(d[:10], "%Y-%m-%d") if __name__ == "__main__": @@ -101,9 +93,9 @@ args = parser.parse_args() if args.testmode: - releases = filter_by_date(start_date=args.start_date, end_date=args.end_date) + releases = filter_by_date([], start_date=args.start_date, end_date=args.end_date) for release in releases: - print(release["tag_name"], release["tarball_url"].split("/")[-1]) + print(release["tag_name"], release["tarball_url"].split("/")[-1], release["date"]) sys.exit(0) with open(args.galaxy_datamanager_filename) as fh: @@ -129,7 +121,7 @@ else: existing_release_tags = set() if args.latest: - releases = [get_model_list(existing_release_tags)[0]] + releases = [next(get_model_list(existing_release_tags))] else: releases = filter_by_date( existing_release_tags, start_date=args.start_date, end_date=args.end_date @@ -140,22 +132,21 @@ if release["tag_name"] not in existing_release_tags ] for release in releases_to_download: - tag = download_and_unpack(release["tarball_url"], output_directory) - release_date = parse_date(tag) + fname = download_and_unpack(release["tarball_url"], output_directory) if args.pangolearn_format_version is not None: version = args.pangolearn_format_version else: # 2021-05-27 was the first release of pangoLEARN for pangolin 3, which changed DB format - if release_date >= datetime.datetime(2021, 5, 27): + if release["date"] >= datetime.datetime(2021, 5, 27): version = '3.0' else: version = '1.0' data_manager_dict["data_tables"][args.datatable_name].append( dict( - value=tag, + value=release["tag_name"], description=release["name"], format_version=version, - path=output_directory + "/" + tag, + path=output_directory + "/" + fname, ) ) data_manager_dict["data_tables"][args.datatable_name].sort(
--- a/data_manager/pangolearn_dm.xml Mon Aug 30 13:44:01 2021 +0000 +++ b/data_manager/pangolearn_dm.xml Tue Apr 05 18:40:07 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="data_manager_pangolearn" name="PANGOlearn data manager" version="0.0.3" tool_type="manage_data" profile="20.01"> +<tool id="data_manager_pangolearn" name="PANGOlearn data manager" version="0.0.3+galaxy1" tool_type="manage_data" profile="20.01"> <requirements> <requirement type="package" version="3.8">python</requirement> <requirement type="package" version="2.24.0">requests</requirement> @@ -15,7 +15,7 @@ --end_date '$release.end_date' #end if #end if - 'pangolearn' + 'pangolearn' '${output_file}' ]]></command> <inputs> @@ -52,7 +52,7 @@ <has_text text="pangoLEARN data release 2021-04-01"/> <has_text text='"format_version": "1.0"'/> </assert_contents> - </output> + </output> </test> <test> <conditional name="release"> @@ -64,8 +64,8 @@ <assert_contents> <has_text text='"format_version": "3.0"'/> </assert_contents> - </output> - </test> + </output> + </test> </tests> <help><![CDATA[ This data managers fetches models (from the pangoLEARN_ repository) for the pangolin_