Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/galaxy/util/checkers.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 import bz2 | |
2 import gzip | |
3 import re | |
4 import tarfile | |
5 import zipfile | |
6 from io import ( | |
7 BytesIO, | |
8 StringIO | |
9 ) | |
10 | |
11 from galaxy import util | |
12 from galaxy.util.image_util import image_type | |
13 | |
14 HTML_CHECK_LINES = 100 | |
15 CHUNK_SIZE = 2 ** 15 # 32Kb | |
16 HTML_REGEXPS = ( | |
17 re.compile(r"<A\s+[^>]*HREF[^>]+>", re.I), | |
18 re.compile(r"<IFRAME[^>]*>", re.I), | |
19 re.compile(r"<FRAMESET[^>]*>", re.I), | |
20 re.compile(r"<META[\W][^>]*>", re.I), | |
21 re.compile(r"<SCRIPT[^>]*>", re.I), | |
22 ) | |
23 | |
24 | |
25 def check_html(name, file_path=True): | |
26 """ | |
27 Returns True if the file/string contains HTML code. | |
28 """ | |
29 # Handles files if file_path is True or text if file_path is False | |
30 if file_path: | |
31 temp = open(name, encoding='utf-8') | |
32 else: | |
33 temp = StringIO(util.unicodify(name)) | |
34 try: | |
35 for _ in range(HTML_CHECK_LINES): | |
36 line = temp.readline(CHUNK_SIZE) | |
37 if not line: | |
38 break | |
39 if any(regexp.search(line) for regexp in HTML_REGEXPS): | |
40 return True | |
41 except UnicodeDecodeError: | |
42 return False | |
43 finally: | |
44 temp.close() | |
45 return False | |
46 | |
47 | |
48 def check_binary(name, file_path=True): | |
49 # Handles files if file_path is True or text if file_path is False | |
50 if file_path: | |
51 temp = open(name, "rb") | |
52 else: | |
53 temp = BytesIO(name) | |
54 try: | |
55 return util.is_binary(temp.read(1024)) | |
56 finally: | |
57 temp.close() | |
58 | |
59 | |
60 def check_gzip(file_path, check_content=True): | |
61 # This method returns a tuple of booleans representing ( is_gzipped, is_valid ) | |
62 # Make sure we have a gzipped file | |
63 try: | |
64 with open(file_path, "rb") as temp: | |
65 magic_check = temp.read(2) | |
66 if magic_check != util.gzip_magic: | |
67 return (False, False) | |
68 except Exception: | |
69 return (False, False) | |
70 # We support some binary data types, so check if the compressed binary file is valid | |
71 # If the file is Bam, it should already have been detected as such, so we'll just check | |
72 # for sff format. | |
73 try: | |
74 with gzip.open(file_path, 'rb') as fh: | |
75 header = fh.read(4) | |
76 if header == b'.sff': | |
77 return (True, True) | |
78 except Exception: | |
79 return(False, False) | |
80 | |
81 if not check_content: | |
82 return (True, True) | |
83 | |
84 with gzip.open(file_path, mode='rb') as gzipped_file: | |
85 chunk = gzipped_file.read(CHUNK_SIZE) | |
86 # See if we have a compressed HTML file | |
87 if check_html(chunk, file_path=False): | |
88 return (True, False) | |
89 return (True, True) | |
90 | |
91 | |
92 def check_bz2(file_path, check_content=True): | |
93 try: | |
94 with open(file_path, "rb") as temp: | |
95 magic_check = temp.read(3) | |
96 if magic_check != util.bz2_magic: | |
97 return (False, False) | |
98 except Exception: | |
99 return(False, False) | |
100 | |
101 if not check_content: | |
102 return (True, True) | |
103 | |
104 with bz2.BZ2File(file_path, mode='rb') as bzipped_file: | |
105 chunk = bzipped_file.read(CHUNK_SIZE) | |
106 # See if we have a compressed HTML file | |
107 if check_html(chunk, file_path=False): | |
108 return (True, False) | |
109 return (True, True) | |
110 | |
111 | |
112 def check_zip(file_path, check_content=True, files=1): | |
113 if not zipfile.is_zipfile(file_path): | |
114 return (False, False) | |
115 | |
116 if not check_content: | |
117 return (True, True) | |
118 | |
119 chunk = None | |
120 for filect, member in enumerate(iter_zip(file_path)): | |
121 handle, name = member | |
122 chunk = handle.read(CHUNK_SIZE) | |
123 if chunk and check_html(chunk, file_path=False): | |
124 return (True, False) | |
125 if filect >= files: | |
126 break | |
127 return (True, True) | |
128 | |
129 | |
130 def is_bz2(file_path): | |
131 is_bz2, is_valid = check_bz2(file_path, check_content=False) | |
132 return is_bz2 | |
133 | |
134 | |
135 def is_gzip(file_path): | |
136 is_gzipped, is_valid = check_gzip(file_path, check_content=False) | |
137 return is_gzipped | |
138 | |
139 | |
140 def is_zip(file_path): | |
141 is_zipped, is_valid = check_zip(file_path, check_content=False) | |
142 return is_zipped | |
143 | |
144 | |
145 def is_single_file_zip(file_path): | |
146 for i, _ in enumerate(iter_zip(file_path)): | |
147 if i > 1: | |
148 return False | |
149 return True | |
150 | |
151 | |
152 def is_tar(file_path): | |
153 return tarfile.is_tarfile(file_path) | |
154 | |
155 | |
156 def iter_zip(file_path): | |
157 with zipfile.ZipFile(file_path) as z: | |
158 for f in filter(lambda x: not x.endswith('/'), z.namelist()): | |
159 yield (z.open(f), f) | |
160 | |
161 | |
162 def check_image(file_path): | |
163 """ Simple wrapper around image_type to yield a True/False verdict """ | |
164 if image_type(file_path): | |
165 return True | |
166 return False | |
167 | |
168 | |
169 __all__ = ( | |
170 'check_binary', | |
171 'check_bz2', | |
172 'check_gzip', | |
173 'check_html', | |
174 'check_image', | |
175 'check_zip', | |
176 'is_gzip', | |
177 'is_bz2', | |
178 'is_zip', | |
179 ) |