changeset 4:6e24e79d3d69 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_pangolearn commit fd2566abd51c88847437d38a5abea8703b8ee034"
author iuc
date Tue, 05 Apr 2022 18:40:07 +0000
parents df30a2f1db55
children 29c738066906
files data_manager/pangolearn_dm.py data_manager/pangolearn_dm.xml
diffstat 2 files changed, 46 insertions(+), 55 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/pangolearn_dm.py	Mon Aug 30 13:44:01 2021 +0000
+++ b/data_manager/pangolearn_dm.py	Tue Apr 05 18:40:07 2022 +0000
@@ -12,53 +12,43 @@
 import requests
 
 
-def extract_date(tag_str):
-    parts = tag_str.split("_")
-    assert len(parts) < 3, "expected maximum of two parts, got " + str(parts)
-    # there are tags like: 2021-07-07-2
-    parts[0] = "-".join(parts[0].split("-")[:3])
-    tag_date = datetime.datetime.strptime(parts[0], "%Y-%m-%d")
-    if len(parts) == 2:
-        version = int(parts[1])
-        assert (
-            version < 24 * 60
-        )  # because the code stores versions as minutes of the day, it can't handle versions > 1440
-        tag_date += datetime.timedelta(minutes=version)
-    return tag_date
-
-
 def get_model_list(
     existing_release_tags,
-    url="https://api.github.com/repos/cov-lineages/pangoLEARN/releases",
+    url="https://api.github.com/repos/cov-lineages/pangoLEARN/releases"
 ):
-    response = requests.get(url)
-    if response.status_code == 200:
-        release_list = json.loads(response.text)
-        release_info = [
-            dict(
-                tag_name=e["tag_name"],
-                name=e["name"],
-                date=extract_date(e["tag_name"]),
-                tarball_url=e["tarball_url"],
-            )
-            for e in release_list
-            if e["tag_name"] not in existing_release_tags
-        ]
-        return release_info
-    else:
-        response.raise_for_status()
+    page_num = 0
+    while True:
+        page_num += 1
+        response = requests.get(url + f'?page={page_num}')
+        if response.status_code == 200:
+            release_list_chunk = json.loads(response.text)
+            if not release_list_chunk:
+                # past the last page of results
+                return
+            for e in release_list_chunk:
+                if e["tag_name"] in existing_release_tags:
+                    continue
+                if e["prerelease"]:
+                    continue
+                yield dict(
+                    tag_name=e["tag_name"],
+                    name=e["name"],
+                    date=parse_date(e["tag_name"]),
+                    tarball_url=e["tarball_url"],
+                )
+        else:
+            response.raise_for_status()
 
 
 def filter_by_date(existing_release_tags, start_date=None, end_date=None):
-    release_list = get_model_list(existing_release_tags)
-    return [
-        element
-        for element in release_list
-        if not (
-            (end_date is not None and element["date"] > end_date)
-            or (start_date is not None and element["date"] < start_date)
-        )
-    ]
+    ret = []
+    for release in get_model_list(existing_release_tags):
+        if start_date and release["date"] < start_date:
+            break
+        if not end_date or release["date"] <= end_date:
+            ret.append(release)
+
+    return ret
 
 
 def download_and_unpack(url, output_directory):
@@ -84,7 +74,9 @@
 
 
 def parse_date(d):
-    return datetime.datetime.strptime(d, "%Y-%m-%d")
+    # Tries to parse the first 10 chars of d as a date, which currently
+    # succeeds for all pangolearn model releases.
+    return datetime.datetime.strptime(d[:10], "%Y-%m-%d")
 
 
 if __name__ == "__main__":
@@ -101,9 +93,9 @@
     args = parser.parse_args()
 
     if args.testmode:
-        releases = filter_by_date(start_date=args.start_date, end_date=args.end_date)
+        releases = filter_by_date([], start_date=args.start_date, end_date=args.end_date)
         for release in releases:
-            print(release["tag_name"], release["tarball_url"].split("/")[-1])
+            print(release["tag_name"], release["tarball_url"].split("/")[-1], release["date"])
         sys.exit(0)
 
     with open(args.galaxy_datamanager_filename) as fh:
@@ -129,7 +121,7 @@
     else:
         existing_release_tags = set()
     if args.latest:
-        releases = [get_model_list(existing_release_tags)[0]]
+        releases = [next(get_model_list(existing_release_tags))]
     else:
         releases = filter_by_date(
             existing_release_tags, start_date=args.start_date, end_date=args.end_date
@@ -140,22 +132,21 @@
         if release["tag_name"] not in existing_release_tags
     ]
     for release in releases_to_download:
-        tag = download_and_unpack(release["tarball_url"], output_directory)
-        release_date = parse_date(tag)
+        fname = download_and_unpack(release["tarball_url"], output_directory)
         if args.pangolearn_format_version is not None:
             version = args.pangolearn_format_version
         else:
             # 2021-05-27 was the first release of pangoLEARN for pangolin 3, which changed DB format
-            if release_date >= datetime.datetime(2021, 5, 27):
+            if release["date"] >= datetime.datetime(2021, 5, 27):
                 version = '3.0'
             else:
                 version = '1.0'
         data_manager_dict["data_tables"][args.datatable_name].append(
             dict(
-                value=tag,
+                value=release["tag_name"],
                 description=release["name"],
                 format_version=version,
-                path=output_directory + "/" + tag,
+                path=output_directory + "/" + fname,
             )
         )
     data_manager_dict["data_tables"][args.datatable_name].sort(
--- a/data_manager/pangolearn_dm.xml	Mon Aug 30 13:44:01 2021 +0000
+++ b/data_manager/pangolearn_dm.xml	Tue Apr 05 18:40:07 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="data_manager_pangolearn" name="PANGOlearn data manager" version="0.0.3" tool_type="manage_data" profile="20.01">
+<tool id="data_manager_pangolearn" name="PANGOlearn data manager" version="0.0.3+galaxy1" tool_type="manage_data" profile="20.01">
     <requirements>
         <requirement type="package" version="3.8">python</requirement>
         <requirement type="package" version="2.24.0">requests</requirement>
@@ -15,7 +15,7 @@
                 --end_date '$release.end_date'
             #end if
         #end if
-        'pangolearn' 
+        'pangolearn'
         '${output_file}'
     ]]></command>
     <inputs>
@@ -52,7 +52,7 @@
                     <has_text text="pangoLEARN data release 2021-04-01"/>
                     <has_text text='"format_version": "1.0"'/>
                 </assert_contents>
-            </output>        
+            </output>
         </test>
         <test>
             <conditional name="release">
@@ -64,8 +64,8 @@
                 <assert_contents>
                     <has_text text='"format_version": "3.0"'/>
                 </assert_contents>
-            </output>        
-        </test>        
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
         This data managers fetches models (from the pangoLEARN_ repository) for the pangolin_