Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/galaxy/util/checkers.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/galaxy/util/checkers.py Thu May 14 16:47:39 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,192 +0,0 @@ -import gzip -import re -import sys -import tarfile -import zipfile - -from six import BytesIO -from six.moves import filter - -from galaxy import util -from galaxy.util.image_util import image_type - -if sys.version_info < (3, 3): - gzip.GzipFile.read1 = gzip.GzipFile.read # workaround for https://bugs.python.org/issue12591 - try: - import bz2file as bz2 - except ImportError: - # If bz2file is unavailable, just fallback to not having pbzip2 support. - import bz2 -else: - import bz2 - -HTML_CHECK_LINES = 100 - - -def check_html(file_path, chunk=None): - if chunk is None: - temp = open(file_path, mode='rb') - elif hasattr(chunk, "splitlines"): - temp = chunk.splitlines() - else: - temp = chunk - regexp1 = re.compile(r"<A\s+[^>]*HREF[^>]+>", re.I) - regexp2 = re.compile(r"<IFRAME[^>]*>", re.I) - regexp3 = re.compile(r"<FRAMESET[^>]*>", re.I) - regexp4 = re.compile(r"<META[\W][^>]*>", re.I) - regexp5 = re.compile(r"<SCRIPT[^>]*>", re.I) - lineno = 0 - # TODO: Potentially reading huge lines into string here, this should be - # reworked. - for line in temp: - line = util.unicodify(line) - lineno += 1 - matches = regexp1.search(line) or regexp2.search(line) or regexp3.search(line) or regexp4.search(line) or regexp5.search(line) - if matches: - if chunk is None: - temp.close() - return True - if HTML_CHECK_LINES and (lineno > HTML_CHECK_LINES): - break - if chunk is None: - temp.close() - return False - - -def check_binary(name, file_path=True): - # Handles files if file_path is True or text if file_path is False - if file_path: - temp = open(name, "rb") - else: - temp = BytesIO(name) - try: - return util.is_binary(temp.read(1024)) - finally: - temp.close() - - -def check_gzip(file_path, check_content=True): - # This method returns a tuple of booleans representing ( is_gzipped, is_valid ) - # Make sure we have a gzipped file - try: - with open(file_path, "rb") as temp: - magic_check = temp.read(2) - if magic_check != util.gzip_magic: - return (False, False) - except Exception: - return (False, False) - # We support some binary data types, so check if the compressed binary file is valid - # If the file is Bam, it should already have been detected as such, so we'll just check - # for sff format. - try: - with gzip.open(file_path, 'rb') as fh: - header = fh.read(4) - if header == b'.sff': - return (True, True) - except Exception: - return(False, False) - - if not check_content: - return (True, True) - - CHUNK_SIZE = 2 ** 15 # 32Kb - gzipped_file = gzip.GzipFile(file_path, mode='rb') - chunk = gzipped_file.read(CHUNK_SIZE) - gzipped_file.close() - # See if we have a compressed HTML file - if check_html(file_path, chunk=chunk): - return (True, False) - return (True, True) - - -def check_bz2(file_path, check_content=True): - try: - with open(file_path, "rb") as temp: - magic_check = temp.read(3) - if magic_check != util.bz2_magic: - return (False, False) - except Exception: - return(False, False) - - if not check_content: - return (True, True) - - CHUNK_SIZE = 2 ** 15 # reKb - bzipped_file = bz2.BZ2File(file_path, mode='rb') - chunk = bzipped_file.read(CHUNK_SIZE) - bzipped_file.close() - # See if we have a compressed HTML file - if check_html(file_path, chunk=chunk): - return (True, False) - return (True, True) - - -def check_zip(file_path, check_content=True, files=1): - if not zipfile.is_zipfile(file_path): - return (False, False) - - if not check_content: - return (True, True) - - CHUNK_SIZE = 2 ** 15 # 32Kb - chunk = None - for filect, member in enumerate(iter_zip(file_path)): - handle, name = member - chunk = handle.read(CHUNK_SIZE) - if chunk and check_html(file_path, chunk): - return (True, False) - if filect >= files: - break - return (True, True) - - -def is_bz2(file_path): - is_bz2, is_valid = check_bz2(file_path, check_content=False) - return is_bz2 - - -def is_gzip(file_path): - is_gzipped, is_valid = check_gzip(file_path, check_content=False) - return is_gzipped - - -def is_zip(file_path): - is_zipped, is_valid = check_zip(file_path, check_content=False) - return is_zipped - - -def is_single_file_zip(file_path): - for i, member in enumerate(iter_zip(file_path)): - if i > 1: - return False - return True - - -def is_tar(file_path): - return tarfile.is_tarfile(file_path) - - -def iter_zip(file_path): - with zipfile.ZipFile(file_path) as z: - for f in filter(lambda x: not x.endswith('/'), z.namelist()): - yield (z.open(f), f) - - -def check_image(file_path): - """ Simple wrapper around image_type to yield a True/False verdict """ - if image_type(file_path): - return True - return False - - -__all__ = ( - 'check_binary', - 'check_bz2', - 'check_gzip', - 'check_html', - 'check_image', - 'check_zip', - 'is_gzip', - 'is_bz2', - 'is_zip', -)