Mercurial > repos > iuc > data_manager_mapseq
comparison data_manager_fetch_mapseq_db.py @ 0:dbf2735e8480 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/ commit 66e797aaa79b92c282a8127260cdfd5702207e35
| author | iuc |
|---|---|
| date | Wed, 13 Sep 2023 19:54:19 +0000 |
| parents | |
| children | 4cd97cc67061 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dbf2735e8480 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import json | |
| 5 import os | |
| 6 import shutil | |
| 7 import tarfile | |
| 8 from datetime import datetime | |
| 9 | |
| 10 import wget | |
| 11 | |
| 12 DB_paths = { | |
| 13 "mgnify_lsu": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/silva_lsu-20200130.tar.gz", | |
| 14 "mgnify_ssu": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/silva_ssu-20200130.tar.gz", | |
| 15 "mgnify_its_unite": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/UNITE-20200214.tar.gz", | |
| 16 "mgnify_its_itsonedb": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/ITSoneDB-20200214.tar.gz", | |
| 17 "test_lsu": "https://zenodo.org/record/8205348/files/test_lsu.tar.gz", | |
| 18 } | |
| 19 | |
| 20 DB_names = { | |
| 21 "mgnify_lsu": "MGnify LSU (v5.0.7) - silva_lsu-20200130", | |
| 22 "mgnify_ssu": "MGnify SSU (v5.0.7) - silva_ssu-20200130", | |
| 23 "mgnify_its_unite": "MGnify ITS ITSonedb (v5.0.7) - ITSoneDB-20200214", | |
| 24 "mgnify_its_itsonedb": "MGnify ITS UNITE (v5.0.7) - UNITE-20200214", | |
| 25 "test_lsu": "Trimmed LSU Test DB", | |
| 26 } | |
| 27 | |
| 28 | |
| 29 def download_untar_store(url, tmp_path, dest_path): | |
| 30 """ | |
| 31 Download a tar.gz file containing one folder, | |
| 32 extract that folder and move the content inside dest_path | |
| 33 """ | |
| 34 | |
| 35 extract_path = os.path.join(tmp_path, "extract") | |
| 36 | |
| 37 os.makedirs(tmp_path, exist_ok=True) | |
| 38 | |
| 39 # download data | |
| 40 filename = wget.download(url, out=tmp_path) | |
| 41 tarfile_path = os.path.join(tmp_path, filename) | |
| 42 tar = tarfile.open(tarfile_path) | |
| 43 tar.extractall(extract_path) | |
| 44 | |
| 45 if len(list(os.listdir(extract_path))) > 1: | |
| 46 print("More then one folder in zipped file, aborting !") | |
| 47 else: | |
| 48 for folder in os.listdir(extract_path): | |
| 49 folder_path = os.path.join(extract_path, folder) | |
| 50 | |
| 51 print(f"Copy data to {dest_path}") | |
| 52 shutil.copytree(folder_path, dest_path) | |
| 53 print("Done !") | |
| 54 | |
| 55 shutil.rmtree(tmp_path) | |
| 56 | |
| 57 | |
| 58 def main(): | |
| 59 # Parse Command Line | |
| 60 parser = argparse.ArgumentParser(description="Create data manager JSON.") | |
| 61 parser.add_argument("--out", dest="output", action="store", help="JSON filename") | |
| 62 parser.add_argument("--version", dest="version", action="store", help="Version of the DB") | |
| 63 parser.add_argument("--database-type", dest="db_type", action="store", help="Db type") | |
| 64 parser.add_argument( | |
| 65 "--test", | |
| 66 action="store_true", | |
| 67 help="option to test the script with an lighted database", | |
| 68 ) | |
| 69 | |
| 70 args = parser.parse_args() | |
| 71 | |
| 72 # the output file of a DM is a json containing args that can be used by the DM | |
| 73 # most tools mainly use these args to find the extra_files_path for the DM, which can be used | |
| 74 # to store the DB data | |
| 75 with open(args.output) as fh: | |
| 76 params = json.load(fh) | |
| 77 | |
| 78 print(params) | |
| 79 | |
| 80 workdir = params["output_data"][0]["extra_files_path"] | |
| 81 os.mkdir(workdir) | |
| 82 | |
| 83 time = datetime.utcnow().strftime("%Y-%m-%d") | |
| 84 db_value = f"{args.db_type}_from_{time}" | |
| 85 | |
| 86 # output paths | |
| 87 db_path = os.path.join(workdir, db_value) | |
| 88 tmp_path = os.path.join(workdir, "tmp") | |
| 89 | |
| 90 # create DB | |
| 91 if args.test: | |
| 92 url = DB_paths["test_lsu"] | |
| 93 else: | |
| 94 url = DB_paths[args.db_type] | |
| 95 | |
| 96 # download data | |
| 97 download_untar_store(url, tmp_path, db_path) | |
| 98 | |
| 99 db_name = DB_names[args.db_type] | |
| 100 # Update Data Manager JSON and write to file | |
| 101 data_manager_entry = { | |
| 102 "data_tables": { | |
| 103 "mapseq_db": { | |
| 104 "value": db_value, | |
| 105 "name": f"{db_name} downloaded at {time}", | |
| 106 "version": args.version, | |
| 107 "path": db_path, | |
| 108 } | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 with open(os.path.join(args.output), "w+") as fh: | |
| 113 json.dump(data_manager_entry, fh, sort_keys=True) | |
| 114 | |
| 115 | |
| 116 if __name__ == "__main__": | |
| 117 main() |
