data_manager_fetch_gene_annotation: data_manager/data

comparison data_manager/data_manager.py @ 47:3a02b8ab423a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_gene_annotation/ commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"

author	iuc
date	Sun, 22 Nov 2020 12:48:13 +0000
parents	9346d2955707
children

comparison

equal deleted inserted replaced

-:9346d2955707
+:3a02b8ab423a
+# -*- coding: utf-8 -*-
 import argparse
+import bz2
 import datetime
+import gzip
 import json
 import os
+import shutil
 import sys
 import uuid
+import zipfile
-import requests
-from requests.exceptions import ContentDecodingError
+# Nice solution to opening compressed files (zip/bz2/gz) transparently
+# https://stackoverflow.com/a/13045892/638445
+class CompressedFile(object):
+magic = None
+file_type = None
+mime_type = None
+proper_extension = None
+def __init__(self, f):
+# f is an open file or file like object
+self.f = f
+self.accessor = self.open()
+@classmethod
+def is_magic(self, data):
+return data.startswith(self.magic)
+def open(self):
+return None
+class ZIPFile(CompressedFile):
+magic = '\x50\x4b\x03\x04'
+file_type = 'zip'
+mime_type = 'compressed/zip'
+def open(self):
+return zipfile.ZipFile(self.f)
+class BZ2File(CompressedFile):
+magic = '\x42\x5a\x68'
+file_type = 'bz2'
+mime_type = 'compressed/bz2'
+def open(self):
+return bz2.BZ2File(self.f)
+class GZFile(CompressedFile):
+magic = '\x1f\x8b\x08'
+file_type = 'gz'
+mime_type = 'compressed/gz'
+def open(self):
+return gzip.GzipFile(self.f)
+# factory function to create a suitable instance for accessing files
+def get_compressed_file(filename):
+with open(filename, 'rb') as f:
+start_of_file = f.read(1024)
+f.seek(0)
+for cls in (ZIPFile, BZ2File, GZFile):
+if cls.is_magic(start_of_file):
+f.close()
+return cls(filename)
+return None
+try:
+# For Python 3.0 and later
+from urllib.request import urlretrieve
+except ImportError:
+# Fall back to Python 2's urllib2
+from urllib import urlretrieve
 def url_download(url):
 """Attempt to download gene annotation file from a given url
 :param url: full url to gene annotation file
 :type url: str.
 :returns: name of downloaded gene annotation file
 :raises: ContentDecodingError, IOError
 """
-response = requests.get(url=url, stream=True)
 # Generate file_name
-file_name = response.url.split("/")[-1]
+file_name = url.split('/')[-1]
-block_size = 10 * 1024 * 1024  # 10MB chunked download
+try:
-with open(file_name, 'w+') as f:
+# download URL (FTP and HTTP work, probably local and data too)
-try:
+urlretrieve(url, file_name)
-# Good to note here that requests' iter_content() will
-# automatically handle decoding "gzip" and "deflate" encoding
+# uncompress file if needed
-# formats
+cf = get_compressed_file(file_name)
-for buf in response.iter_content(block_size):
+if cf is not None:
-f.write(buf)
+uncompressed_file_name = os.path.splitext(file_name)[0]
-except (ContentDecodingError, IOError) as e:
+with open(uncompressed_file_name, 'w+') as uncompressed_file:
-sys.stderr.write("Error occured downloading reference file: %s"
+shutil.copyfileobj(cf.accessor, uncompressed_file)
-% e)
 os.remove(file_name)
+file_name = uncompressed_file_name
+except IOError as e:
+sys.stderr.write('Error occured downloading reference file: %s' % e)
+os.remove(file_name)
 return file_name
 def main():
 parser = argparse.ArgumentParser(description='Create data manager JSON.')
-parser.add_argument('--out', dest='output', action='store',
+parser.add_argument('--out', dest='output', action='store', help='JSON filename')
-help='JSON filename')
+parser.add_argument('--name', dest='name', action='store', default=uuid.uuid4(), help='Data table entry unique ID')
-parser.add_argument('--name', dest='name', action='store',
+parser.add_argument('--url', dest='url', action='store', help='Url to download gtf file from')
-default=uuid.uuid4(), help='Data table entry unique ID'
-)
-parser.add_argument('--url', dest='url', action='store',
-help='Url to download gtf file from')
 args = parser.parse_args()
 work_dir = os.getcwd()
 'path': os.path.join(work_dir, gene_annotation_file_name)
 }
 }
 }
-with open(os.path.join(args.output), "w+") as f:
+with open(os.path.join(args.output), 'w+') as fh:
-f.write(json.dumps(data_manager_entry))
+json.dump(data_manager_entry, fh, sort_keys=True)
 if __name__ == '__main__':
 main()

Mercurial > repos > scottx611x > data_manager_fetch_gene_annotation

comparison data_manager/data_manager.py @ 47:3a02b8ab423a draft