Mercurial > repos > iuc > data_manager_interproscan
comparison data_manager/interproscan.py @ 0:e93e32359b67 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_interproscan commit 2f5d27a375fcc2e8d77914b3d9e402a9e2df2d97"
| author | iuc |
|---|---|
| date | Mon, 15 Nov 2021 17:21:22 +0000 |
| parents | |
| children | 0db4f153d86d |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e93e32359b67 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import hashlib | |
| 5 import json | |
| 6 import operator | |
| 7 import os | |
| 8 import re | |
| 9 import shutil | |
| 10 import subprocess | |
| 11 import sys | |
| 12 import tarfile | |
| 13 | |
| 14 import requests | |
| 15 | |
| 16 | |
| 17 GH_REPO_API = 'https://api.github.com/repos/ebi-pf-team/interproscan/' | |
| 18 MD5_URL = 'http://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/{version}/interproscan-{version}-64-bit.tar.gz.md5' | |
| 19 DATA_URL = 'http://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/{version}/interproscan-{version}-64-bit.tar.gz' | |
| 20 | |
| 21 # For tests: download a smaller archive containing *some* data | |
| 22 PARTIAL_URL = 'https://github.com/ebi-pf-team/interproscan/archive/{version}.tar.gz' | |
| 23 | |
| 24 | |
| 25 def list_tags(url=None): | |
| 26 | |
| 27 if not url: | |
| 28 url = GH_REPO_API + 'tags' | |
| 29 | |
| 30 resp = requests.get(url=url) | |
| 31 data = resp.json() | |
| 32 | |
| 33 tags = [] | |
| 34 for tag in data: | |
| 35 if re.match(r"^[0-9]\.[0-9]{2}-[0-9]{2}\.[0-9]$", tag['name']): | |
| 36 tags.append(tag['name']) | |
| 37 | |
| 38 if 'next' in resp.links: | |
| 39 tags += list_tags(resp.links['next']['url']) | |
| 40 | |
| 41 return sorted(tags) | |
| 42 | |
| 43 | |
| 44 def download_file(url, dest): | |
| 45 with requests.get(url, stream=True) as r: | |
| 46 r.raise_for_status() | |
| 47 with open(dest, 'wb') as f: | |
| 48 for chunk in r.iter_content(chunk_size=8192): | |
| 49 f.write(chunk) | |
| 50 | |
| 51 | |
| 52 def main(): | |
| 53 parser = argparse.ArgumentParser(description='Download data for InterProScan') | |
| 54 parser.add_argument('--partial', dest='partial', action='store_true', help='Only download a small subset of data (for testing)') | |
| 55 parser.add_argument('-v', '--version', help='Specify an InterProScan version (default: latest)') | |
| 56 parser.add_argument("datatable_name") | |
| 57 parser.add_argument("galaxy_datamanager_filename") | |
| 58 | |
| 59 args = parser.parse_args() | |
| 60 | |
| 61 with open(args.galaxy_datamanager_filename) as fh: | |
| 62 config = json.load(fh) | |
| 63 | |
| 64 output_directory = config.get("output_data", [{}])[0].get("extra_files_path", None) | |
| 65 data_manager_dict = {} | |
| 66 data_manager_dict["data_tables"] = config.get("data_tables", {}) | |
| 67 data_manager_dict["data_tables"][args.datatable_name] = data_manager_dict[ | |
| 68 "data_tables" | |
| 69 ].get(args.datatable_name, []) | |
| 70 | |
| 71 os.mkdir(output_directory) | |
| 72 | |
| 73 all_tags = list_tags() | |
| 74 | |
| 75 if args.version: | |
| 76 if args.version not in all_tags: | |
| 77 raise RuntimeError("Version '%s' is not valid" % args.version) | |
| 78 tag = args.version | |
| 79 else: | |
| 80 tag = all_tags[-1] | |
| 81 | |
| 82 print("Will download data for InterProScan version: %s" % tag) | |
| 83 | |
| 84 print("Getting MD5 checksum:") | |
| 85 md5 = requests.get(url=MD5_URL.format(version=tag)).text | |
| 86 if not re.match(r"^([a-fA-F\d]{32}) interproscan-[0-9]\.[0-9]{2}-[0-9]{2}\.[0-9]-64-bit.tar.gz$", md5): | |
| 87 raise RuntimeError("Got invalid MD5 from the InterProScan FTP server: '%s'" % md5) | |
| 88 print("%s" % md5) | |
| 89 | |
| 90 if args.partial: | |
| 91 print("Downloading partial data tarball...") | |
| 92 dest_tar = os.path.join(output_directory, PARTIAL_URL.format(version=tag).split('/')[-1]) | |
| 93 download_file(PARTIAL_URL.format(version=tag), dest_tar) | |
| 94 else: | |
| 95 print("Downloading data tarball...") | |
| 96 dest_tar = os.path.join(output_directory, DATA_URL.format(version=tag).split('/')[-1]) | |
| 97 download_file(DATA_URL.format(version=tag), dest_tar) | |
| 98 | |
| 99 print("Finished, now checking md5...") | |
| 100 md5_computed = hashlib.md5(open(dest_tar, 'rb').read()).hexdigest() | |
| 101 if not md5.startswith(md5_computed): | |
| 102 raise RuntimeError("MD5 check failed: computed '%s', expected '%s'" % (md5_computed, md5)) | |
| 103 | |
| 104 print("Ok, now extracting data...") | |
| 105 tar = tarfile.open(dest_tar, "r:gz") | |
| 106 tar.extractall(output_directory) | |
| 107 tar.close() | |
| 108 | |
| 109 if args.partial: | |
| 110 print("Moving partial data files around...") | |
| 111 shutil.move(os.path.join(output_directory, 'interproscan-%s' % tag, 'core/jms-implementation/support-mini-x86-32/data/'), os.path.join(output_directory, 'data')) | |
| 112 else: | |
| 113 print("Moving data files around...") | |
| 114 shutil.move(os.path.join(output_directory, 'interproscan-%s' % tag), os.path.join(output_directory, 'data')) | |
| 115 | |
| 116 print("Done, removing tarball and unneeded files...") | |
| 117 os.remove(dest_tar) | |
| 118 shutil.rmtree(os.path.join(output_directory, 'interproscan-%s' % tag)) | |
| 119 | |
| 120 print("Running initial_setup.py (index hmm models)...") | |
| 121 # Write a temp properties file in work dir | |
| 122 prop_file_src = os.path.join(os.path.dirname(os.path.realpath(shutil.which("interproscan.sh"))), 'interproscan.properties') | |
| 123 with open(prop_file_src, 'r') as prop: | |
| 124 prop_content = prop.read() | |
| 125 prop_content = re.sub(r'^data\.directory=.*$', 'data.directory=%s' % os.path.join(output_directory, 'data'), prop_content, flags=re.M) | |
| 126 with open('interproscan.properties', 'w') as prop: | |
| 127 prop.write(prop_content) | |
| 128 # Run the index command | |
| 129 cmd_args = [os.path.join(os.path.dirname(os.path.realpath(shutil.which("interproscan.sh"))), 'initial_setup.py')] | |
| 130 proc = subprocess.Popen(args=cmd_args, shell=False) | |
| 131 out, err = proc.communicate() | |
| 132 print(out) | |
| 133 print(err, file=sys.stderr) | |
| 134 return_code = proc.wait() | |
| 135 if return_code: | |
| 136 print("Error running initial_setup.py.", file=sys.stderr) | |
| 137 sys.exit(return_code) | |
| 138 | |
| 139 data_manager_dict["data_tables"][args.datatable_name].append( | |
| 140 dict( | |
| 141 value=tag, | |
| 142 description="InterProScan %s" % tag, | |
| 143 interproscan_version=tag, | |
| 144 path=output_directory, | |
| 145 ) | |
| 146 ) | |
| 147 | |
| 148 print("Saving data table content...") | |
| 149 | |
| 150 data_manager_dict["data_tables"][args.datatable_name].sort( | |
| 151 key=operator.itemgetter("value"), reverse=True | |
| 152 ) | |
| 153 with open(args.galaxy_datamanager_filename, "w") as fh: | |
| 154 json.dump(data_manager_dict, fh, indent=2, sort_keys=True) | |
| 155 | |
| 156 print("Finished.") | |
| 157 | |
| 158 | |
| 159 if __name__ == "__main__": | |
| 160 main() |
