Mercurial > repos > scottx611x > data_manager_fetch_gene_annotation
changeset 47:3a02b8ab423a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_gene_annotation/ commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
author | iuc |
---|---|
date | Sun, 22 Nov 2020 12:48:13 +0000 |
parents | 9346d2955707 |
children | e4b92f2ca217 |
files | data_manager/data_manager.py data_manager/gene_annotation_fetcher.xml |
diffstat | 2 files changed, 94 insertions(+), 28 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager.py Tue Apr 04 18:07:39 2017 -0400 +++ b/data_manager/data_manager.py Sun Nov 22 12:48:13 2020 +0000 @@ -1,12 +1,84 @@ +# -*- coding: utf-8 -*- import argparse +import bz2 import datetime +import gzip import json import os +import shutil import sys import uuid +import zipfile -import requests -from requests.exceptions import ContentDecodingError + +# Nice solution to opening compressed files (zip/bz2/gz) transparently +# https://stackoverflow.com/a/13045892/638445 + +class CompressedFile(object): + magic = None + file_type = None + mime_type = None + proper_extension = None + + def __init__(self, f): + # f is an open file or file like object + self.f = f + self.accessor = self.open() + + @classmethod + def is_magic(self, data): + return data.startswith(self.magic) + + def open(self): + return None + + +class ZIPFile(CompressedFile): + magic = '\x50\x4b\x03\x04' + file_type = 'zip' + mime_type = 'compressed/zip' + + def open(self): + return zipfile.ZipFile(self.f) + + +class BZ2File(CompressedFile): + magic = '\x42\x5a\x68' + file_type = 'bz2' + mime_type = 'compressed/bz2' + + def open(self): + return bz2.BZ2File(self.f) + + +class GZFile(CompressedFile): + magic = '\x1f\x8b\x08' + file_type = 'gz' + mime_type = 'compressed/gz' + + def open(self): + return gzip.GzipFile(self.f) + + +# factory function to create a suitable instance for accessing files +def get_compressed_file(filename): + with open(filename, 'rb') as f: + start_of_file = f.read(1024) + f.seek(0) + for cls in (ZIPFile, BZ2File, GZFile): + if cls.is_magic(start_of_file): + f.close() + return cls(filename) + + return None + + +try: + # For Python 3.0 and later + from urllib.request import urlretrieve +except ImportError: + # Fall back to Python 2's urllib2 + from urllib import urlretrieve def url_download(url): @@ -16,36 +88,33 @@ :returns: name of downloaded gene annotation file :raises: ContentDecodingError, IOError """ - response = requests.get(url=url, stream=True) # Generate file_name - file_name = response.url.split("/")[-1] + file_name = url.split('/')[-1] + + try: + # download URL (FTP and HTTP work, probably local and data too) + urlretrieve(url, file_name) - block_size = 10 * 1024 * 1024 # 10MB chunked download - with open(file_name, 'w+') as f: - try: - # Good to note here that requests' iter_content() will - # automatically handle decoding "gzip" and "deflate" encoding - # formats - for buf in response.iter_content(block_size): - f.write(buf) - except (ContentDecodingError, IOError) as e: - sys.stderr.write("Error occured downloading reference file: %s" - % e) + # uncompress file if needed + cf = get_compressed_file(file_name) + if cf is not None: + uncompressed_file_name = os.path.splitext(file_name)[0] + with open(uncompressed_file_name, 'w+') as uncompressed_file: + shutil.copyfileobj(cf.accessor, uncompressed_file) os.remove(file_name) - + file_name = uncompressed_file_name + except IOError as e: + sys.stderr.write('Error occured downloading reference file: %s' % e) + os.remove(file_name) return file_name def main(): parser = argparse.ArgumentParser(description='Create data manager JSON.') - parser.add_argument('--out', dest='output', action='store', - help='JSON filename') - parser.add_argument('--name', dest='name', action='store', - default=uuid.uuid4(), help='Data table entry unique ID' - ) - parser.add_argument('--url', dest='url', action='store', - help='Url to download gtf file from') + parser.add_argument('--out', dest='output', action='store', help='JSON filename') + parser.add_argument('--name', dest='name', action='store', default=uuid.uuid4(), help='Data table entry unique ID') + parser.add_argument('--url', dest='url', action='store', help='Url to download gtf file from') args = parser.parse_args() @@ -66,8 +135,8 @@ } } - with open(os.path.join(args.output), "w+") as f: - f.write(json.dumps(data_manager_entry)) + with open(os.path.join(args.output), 'w+') as fh: + json.dump(data_manager_entry, fh, sort_keys=True) if __name__ == '__main__':
--- a/data_manager/gene_annotation_fetcher.xml Tue Apr 04 18:07:39 2017 -0400 +++ b/data_manager/gene_annotation_fetcher.xml Sun Nov 22 12:48:13 2020 +0000 @@ -1,9 +1,6 @@ <?xml version="1.0"?> <tool id="gene_annotation_fetcher_data_manager" name="Gene Annotation Fetch" tool_type="manage_data" version="1.0.1"> <description>gene annotation fetcher</description> - <requirements> - <requirement type="package" version="2.13.0">requests</requirement> - </requirements> <command detect_errors="exit_code"> <![CDATA[ python '$__tool_directory__/data_manager.py' --out '${out_file}'