Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/galaxy/util/__init__.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Utility functions used systemwide. | |
4 | |
5 """ | |
6 from __future__ import absolute_import | |
7 | |
8 import binascii | |
9 import collections | |
10 import errno | |
11 import importlib | |
12 import json | |
13 import os | |
14 import random | |
15 import re | |
16 import shutil | |
17 import smtplib | |
18 import stat | |
19 import string | |
20 import sys | |
21 import tempfile | |
22 import threading | |
23 import time | |
24 import unicodedata | |
25 import xml.dom.minidom | |
26 from datetime import datetime | |
27 from email.mime.multipart import MIMEMultipart | |
28 from email.mime.text import MIMEText | |
29 from functools import partial | |
30 from hashlib import md5 | |
31 from os.path import relpath | |
32 | |
33 import requests | |
34 try: | |
35 import grp | |
36 except ImportError: | |
37 # For Pulsar on Windows (which does not use the function that uses grp) | |
38 grp = None | |
39 from boltons.iterutils import ( | |
40 default_enter, | |
41 remap, | |
42 ) | |
43 LXML_AVAILABLE = True | |
44 try: | |
45 from lxml import etree | |
46 except ImportError: | |
47 LXML_AVAILABLE = False | |
48 import xml.etree.ElementTree as etree | |
49 from requests.adapters import HTTPAdapter | |
50 from requests.packages.urllib3.util.retry import Retry | |
51 from six import binary_type, iteritems, PY2, string_types, text_type | |
52 from six.moves import ( | |
53 xrange, | |
54 zip | |
55 ) | |
56 from six.moves.urllib import parse as urlparse | |
57 | |
58 try: | |
59 import docutils.core as docutils_core | |
60 import docutils.writers.html4css1 as docutils_html4css1 | |
61 except ImportError: | |
62 docutils_core = None | |
63 docutils_html4css1 = None | |
64 | |
65 try: | |
66 import uwsgi | |
67 except ImportError: | |
68 uwsgi = None | |
69 | |
70 from .custom_logging import get_logger | |
71 from .inflection import English, Inflector | |
72 from .path import safe_contains, safe_makedirs, safe_relpath # noqa: F401 | |
73 | |
74 inflector = Inflector(English) | |
75 | |
76 log = get_logger(__name__) | |
77 _lock = threading.RLock() | |
78 | |
79 namedtuple = collections.namedtuple | |
80 | |
81 CHUNK_SIZE = 65536 # 64k | |
82 | |
83 DATABASE_MAX_STRING_SIZE = 32768 | |
84 DATABASE_MAX_STRING_SIZE_PRETTY = '32K' | |
85 | |
86 gzip_magic = b'\x1f\x8b' | |
87 bz2_magic = b'BZh' | |
88 DEFAULT_ENCODING = os.environ.get('GALAXY_DEFAULT_ENCODING', 'utf-8') | |
89 NULL_CHAR = b'\x00' | |
90 BINARY_CHARS = [NULL_CHAR] | |
91 FILENAME_VALID_CHARS = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
92 | |
93 RW_R__R__ = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH | |
94 RWXR_XR_X = stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH | |
95 RWXRWXRWX = stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO | |
96 | |
97 XML = etree.XML | |
98 | |
99 defaultdict = collections.defaultdict | |
100 | |
101 | |
102 def remove_protocol_from_url(url): | |
103 """ Supplied URL may be null, if not ensure http:// or https:// | |
104 etc... is stripped off. | |
105 """ | |
106 if url is None: | |
107 return url | |
108 | |
109 # We have a URL | |
110 if url.find('://') > 0: | |
111 new_url = url.split('://')[1] | |
112 else: | |
113 new_url = url | |
114 return new_url.rstrip('/') | |
115 | |
116 | |
117 def is_binary(value): | |
118 """ | |
119 File is binary if it contains a null-byte by default (e.g. behavior of grep, etc.). | |
120 This may fail for utf-16 files, but so would ASCII encoding. | |
121 >>> is_binary( string.printable ) | |
122 False | |
123 >>> is_binary( b'\\xce\\x94' ) | |
124 False | |
125 >>> is_binary( b'\\x00' ) | |
126 True | |
127 """ | |
128 value = smart_str(value) | |
129 for binary_char in BINARY_CHARS: | |
130 if binary_char in value: | |
131 return True | |
132 return False | |
133 | |
134 | |
135 def is_uuid(value): | |
136 """ | |
137 This method returns True if value is a UUID, otherwise False. | |
138 >>> is_uuid( "123e4567-e89b-12d3-a456-426655440000" ) | |
139 True | |
140 >>> is_uuid( "0x3242340298902834" ) | |
141 False | |
142 """ | |
143 uuid_re = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") | |
144 if re.match(uuid_re, str(value)): | |
145 return True | |
146 else: | |
147 return False | |
148 | |
149 | |
150 def directory_hash_id(id): | |
151 """ | |
152 | |
153 >>> directory_hash_id( 100 ) | |
154 ['000'] | |
155 >>> directory_hash_id( "90000" ) | |
156 ['090'] | |
157 >>> directory_hash_id("777777777") | |
158 ['000', '777', '777'] | |
159 >>> directory_hash_id("135ee48a-4f51-470c-ae2f-ce8bd78799e6") | |
160 ['1', '3', '5'] | |
161 """ | |
162 s = str(id) | |
163 l = len(s) | |
164 # Shortcut -- ids 0-999 go under ../000/ | |
165 if l < 4: | |
166 return ["000"] | |
167 if not is_uuid(s): | |
168 # Pad with zeros until a multiple of three | |
169 padded = ((3 - len(s) % 3) * "0") + s | |
170 # Drop the last three digits -- 1000 files per directory | |
171 padded = padded[:-3] | |
172 # Break into chunks of three | |
173 return [padded[i * 3:(i + 1) * 3] for i in range(len(padded) // 3)] | |
174 else: | |
175 # assume it is a UUID | |
176 return list(iter(s[0:3])) | |
177 | |
178 | |
179 def get_charset_from_http_headers(headers, default=None): | |
180 rval = headers.get('content-type', None) | |
181 if rval and 'charset=' in rval: | |
182 rval = rval.split('charset=')[-1].split(';')[0].strip() | |
183 if rval: | |
184 return rval | |
185 return default | |
186 | |
187 | |
188 def synchronized(func): | |
189 """This wrapper will serialize access to 'func' to a single thread. Use it as a decorator.""" | |
190 def caller(*params, **kparams): | |
191 _lock.acquire(True) # Wait | |
192 try: | |
193 return func(*params, **kparams) | |
194 finally: | |
195 _lock.release() | |
196 return caller | |
197 | |
198 | |
199 def iter_start_of_line(fh, chunk_size=None): | |
200 """ | |
201 Iterate over fh and call readline(chunk_size) | |
202 """ | |
203 for line in iter(partial(fh.readline, chunk_size), ""): | |
204 yield line | |
205 | |
206 | |
207 def file_reader(fp, chunk_size=CHUNK_SIZE): | |
208 """This generator yields the open fileobject in chunks (default 64k). Closes the file at the end""" | |
209 while 1: | |
210 data = fp.read(chunk_size) | |
211 if not data: | |
212 break | |
213 yield data | |
214 fp.close() | |
215 | |
216 | |
217 def unique_id(KEY_SIZE=128): | |
218 """ | |
219 Generates an unique id | |
220 | |
221 >>> ids = [ unique_id() for i in range(1000) ] | |
222 >>> len(set(ids)) | |
223 1000 | |
224 """ | |
225 random_bits = text_type(random.getrandbits(KEY_SIZE)).encode("UTF-8") | |
226 return md5(random_bits).hexdigest() | |
227 | |
228 | |
229 def parse_xml(fname, strip_whitespace=True, remove_comments=True): | |
230 """Returns a parsed xml tree""" | |
231 parser = None | |
232 if remove_comments and LXML_AVAILABLE: | |
233 # If using stdlib etree comments are always removed, | |
234 # but lxml doesn't do this by default | |
235 parser = etree.XMLParser(remove_comments=remove_comments) | |
236 try: | |
237 tree = etree.parse(fname, parser=parser) | |
238 root = tree.getroot() | |
239 if strip_whitespace: | |
240 for elem in root.iter('*'): | |
241 if elem.text is not None: | |
242 elem.text = elem.text.strip() | |
243 if elem.tail is not None: | |
244 elem.tail = elem.tail.strip() | |
245 except IOError as e: | |
246 if e.errno is None and not os.path.exists(fname): | |
247 # lxml doesn't set errno | |
248 e.errno = errno.ENOENT | |
249 raise | |
250 except etree.ParseError: | |
251 log.exception("Error parsing file %s", fname) | |
252 raise | |
253 return tree | |
254 | |
255 | |
256 def parse_xml_string(xml_string, strip_whitespace=True): | |
257 try: | |
258 tree = etree.fromstring(xml_string) | |
259 except ValueError as e: | |
260 if 'strings with encoding declaration are not supported' in unicodify(e): | |
261 tree = etree.fromstring(xml_string.encode('utf-8')) | |
262 else: | |
263 raise e | |
264 if strip_whitespace: | |
265 for elem in tree.iter('*'): | |
266 if elem.text is not None: | |
267 elem.text = elem.text.strip() | |
268 if elem.tail is not None: | |
269 elem.tail = elem.tail.strip() | |
270 return tree | |
271 | |
272 | |
273 def xml_to_string(elem, pretty=False): | |
274 """ | |
275 Returns a string from an xml tree. | |
276 """ | |
277 try: | |
278 if elem is not None: | |
279 if PY2: | |
280 xml_str = etree.tostring(elem, encoding='utf-8') | |
281 else: | |
282 xml_str = etree.tostring(elem, encoding='unicode') | |
283 else: | |
284 xml_str = '' | |
285 except TypeError as e: | |
286 # we assume this is a comment | |
287 if hasattr(elem, 'text'): | |
288 return u"<!-- %s -->\n" % elem.text | |
289 else: | |
290 raise e | |
291 if xml_str and pretty: | |
292 pretty_string = xml.dom.minidom.parseString(xml_str).toprettyxml(indent=' ') | |
293 return "\n".join(line for line in pretty_string.split('\n') if not re.match(r'^[\s\\nb\']*$', line)) | |
294 return xml_str | |
295 | |
296 | |
297 def xml_element_compare(elem1, elem2): | |
298 if not isinstance(elem1, dict): | |
299 elem1 = xml_element_to_dict(elem1) | |
300 if not isinstance(elem2, dict): | |
301 elem2 = xml_element_to_dict(elem2) | |
302 return elem1 == elem2 | |
303 | |
304 | |
305 def xml_element_list_compare(elem_list1, elem_list2): | |
306 return [xml_element_to_dict(elem) for elem in elem_list1] == [xml_element_to_dict(elem) for elem in elem_list2] | |
307 | |
308 | |
309 def xml_element_to_dict(elem): | |
310 rval = {} | |
311 if elem.attrib: | |
312 rval[elem.tag] = {} | |
313 else: | |
314 rval[elem.tag] = None | |
315 | |
316 sub_elems = list(elem) | |
317 if sub_elems: | |
318 sub_elem_dict = dict() | |
319 for sub_sub_elem_dict in map(xml_element_to_dict, sub_elems): | |
320 for key, value in iteritems(sub_sub_elem_dict): | |
321 if key not in sub_elem_dict: | |
322 sub_elem_dict[key] = [] | |
323 sub_elem_dict[key].append(value) | |
324 for key, value in iteritems(sub_elem_dict): | |
325 if len(value) == 1: | |
326 rval[elem.tag][key] = value[0] | |
327 else: | |
328 rval[elem.tag][key] = value | |
329 if elem.attrib: | |
330 for key, value in iteritems(elem.attrib): | |
331 rval[elem.tag]["@%s" % key] = value | |
332 | |
333 if elem.text: | |
334 text = elem.text.strip() | |
335 if text and sub_elems or elem.attrib: | |
336 rval[elem.tag]['#text'] = text | |
337 else: | |
338 rval[elem.tag] = text | |
339 | |
340 return rval | |
341 | |
342 | |
343 def pretty_print_xml(elem, level=0): | |
344 pad = ' ' | |
345 i = "\n" + level * pad | |
346 if len(elem): | |
347 if not elem.text or not elem.text.strip(): | |
348 elem.text = i + pad + pad | |
349 if not elem.tail or not elem.tail.strip(): | |
350 elem.tail = i | |
351 for e in elem: | |
352 pretty_print_xml(e, level + 1) | |
353 if not elem.tail or not elem.tail.strip(): | |
354 elem.tail = i | |
355 else: | |
356 if level and (not elem.tail or not elem.tail.strip()): | |
357 elem.tail = i + pad | |
358 return elem | |
359 | |
360 | |
361 def get_file_size(value, default=None): | |
362 try: | |
363 # try built-in | |
364 return os.path.getsize(value) | |
365 except Exception: | |
366 try: | |
367 # try built-in one name attribute | |
368 return os.path.getsize(value.name) | |
369 except Exception: | |
370 try: | |
371 # try tell() of end of object | |
372 offset = value.tell() | |
373 value.seek(0, 2) | |
374 rval = value.tell() | |
375 value.seek(offset) | |
376 return rval | |
377 except Exception: | |
378 # return default value | |
379 return default | |
380 | |
381 | |
382 def shrink_stream_by_size(value, size, join_by=b"..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False): | |
383 """ | |
384 Shrinks bytes read from `value` to `size`. | |
385 | |
386 `value` needs to implement tell/seek, so files need to be opened in binary mode. | |
387 Returns unicode text with invalid characters replaced. | |
388 """ | |
389 rval = b'' | |
390 join_by = smart_str(join_by) | |
391 if get_file_size(value) > size: | |
392 start = value.tell() | |
393 len_join_by = len(join_by) | |
394 min_size = len_join_by + 2 | |
395 if size < min_size: | |
396 if beginning_on_size_error: | |
397 rval = value.read(size) | |
398 value.seek(start) | |
399 return rval | |
400 elif end_on_size_error: | |
401 value.seek(-size, 2) | |
402 rval = value.read(size) | |
403 value.seek(start) | |
404 return rval | |
405 raise ValueError('With the provided join_by value (%s), the minimum size value is %i.' % (join_by, min_size)) | |
406 left_index = right_index = int((size - len_join_by) / 2) | |
407 if left_index + right_index + len_join_by < size: | |
408 if left_larger: | |
409 left_index += 1 | |
410 else: | |
411 right_index += 1 | |
412 rval = value.read(left_index) + join_by | |
413 value.seek(-right_index, 2) | |
414 rval += value.read(right_index) | |
415 else: | |
416 while True: | |
417 data = value.read(CHUNK_SIZE) | |
418 if not data: | |
419 break | |
420 rval += data | |
421 return unicodify(rval) | |
422 | |
423 | |
424 def shrink_and_unicodify(stream): | |
425 stream = unicodify(stream, strip_null=True) or u'' | |
426 if (len(stream) > DATABASE_MAX_STRING_SIZE): | |
427 stream = shrink_string_by_size(stream, | |
428 DATABASE_MAX_STRING_SIZE, | |
429 join_by="\n..\n", | |
430 left_larger=True, | |
431 beginning_on_size_error=True) | |
432 return stream | |
433 | |
434 | |
435 def shrink_string_by_size(value, size, join_by="..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False): | |
436 if len(value) > size: | |
437 len_join_by = len(join_by) | |
438 min_size = len_join_by + 2 | |
439 if size < min_size: | |
440 if beginning_on_size_error: | |
441 return value[:size] | |
442 elif end_on_size_error: | |
443 return value[-size:] | |
444 raise ValueError('With the provided join_by value (%s), the minimum size value is %i.' % (join_by, min_size)) | |
445 left_index = right_index = int((size - len_join_by) / 2) | |
446 if left_index + right_index + len_join_by < size: | |
447 if left_larger: | |
448 left_index += 1 | |
449 else: | |
450 right_index += 1 | |
451 value = "%s%s%s" % (value[:left_index], join_by, value[-right_index:]) | |
452 return value | |
453 | |
454 | |
455 def pretty_print_time_interval(time=False, precise=False, utc=False): | |
456 """ | |
457 Get a datetime object or a int() Epoch timestamp and return a | |
458 pretty string like 'an hour ago', 'Yesterday', '3 months ago', | |
459 'just now', etc | |
460 credit: http://stackoverflow.com/questions/1551382/user-friendly-time-format-in-python | |
461 """ | |
462 if utc: | |
463 now = datetime.utcnow() | |
464 else: | |
465 now = datetime.now() | |
466 if type(time) is int: | |
467 diff = now - datetime.fromtimestamp(time) | |
468 elif isinstance(time, datetime): | |
469 diff = now - time | |
470 elif isinstance(time, string_types): | |
471 try: | |
472 time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%f") | |
473 except ValueError: | |
474 # MySQL may not support microseconds precision | |
475 time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S") | |
476 diff = now - time | |
477 else: | |
478 diff = now - now | |
479 second_diff = diff.seconds | |
480 day_diff = diff.days | |
481 | |
482 if day_diff < 0: | |
483 return '' | |
484 | |
485 if precise: | |
486 if day_diff == 0: | |
487 if second_diff < 10: | |
488 return "just now" | |
489 if second_diff < 60: | |
490 return str(second_diff) + " seconds ago" | |
491 if second_diff < 120: | |
492 return "a minute ago" | |
493 if second_diff < 3600: | |
494 return str(second_diff / 60) + " minutes ago" | |
495 if second_diff < 7200: | |
496 return "an hour ago" | |
497 if second_diff < 86400: | |
498 return str(second_diff / 3600) + " hours ago" | |
499 if day_diff == 1: | |
500 return "yesterday" | |
501 if day_diff < 7: | |
502 return str(day_diff) + " days ago" | |
503 if day_diff < 31: | |
504 return str(day_diff / 7) + " weeks ago" | |
505 if day_diff < 365: | |
506 return str(day_diff / 30) + " months ago" | |
507 return str(day_diff / 365) + " years ago" | |
508 else: | |
509 if day_diff == 0: | |
510 return "today" | |
511 if day_diff == 1: | |
512 return "yesterday" | |
513 if day_diff < 7: | |
514 return "less than a week" | |
515 if day_diff < 31: | |
516 return "less than a month" | |
517 if day_diff < 365: | |
518 return "less than a year" | |
519 return "a few years ago" | |
520 | |
521 | |
522 def pretty_print_json(json_data, is_json_string=False): | |
523 if is_json_string: | |
524 json_data = json.loads(json_data) | |
525 return json.dumps(json_data, sort_keys=True, indent=4) | |
526 | |
527 | |
528 # characters that are valid | |
529 valid_chars = set(string.ascii_letters + string.digits + " -=_.()/+*^,:?!") | |
530 | |
531 # characters that are allowed but need to be escaped | |
532 mapped_chars = {'>': '__gt__', | |
533 '<': '__lt__', | |
534 "'": '__sq__', | |
535 '"': '__dq__', | |
536 '[': '__ob__', | |
537 ']': '__cb__', | |
538 '{': '__oc__', | |
539 '}': '__cc__', | |
540 '@': '__at__', | |
541 '\n': '__cn__', | |
542 '\r': '__cr__', | |
543 '\t': '__tc__', | |
544 '#': '__pd__'} | |
545 | |
546 | |
547 def restore_text(text, character_map=mapped_chars): | |
548 """Restores sanitized text""" | |
549 if not text: | |
550 return text | |
551 for key, value in character_map.items(): | |
552 text = text.replace(value, key) | |
553 return text | |
554 | |
555 | |
556 def sanitize_text(text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
557 """ | |
558 Restricts the characters that are allowed in text; accepts both strings | |
559 and lists of strings; non-string entities will be cast to strings. | |
560 """ | |
561 if isinstance(text, list): | |
562 return [sanitize_text(x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) for x in text] | |
563 if not isinstance(text, string_types): | |
564 text = smart_str(text) | |
565 return _sanitize_text_helper(text, valid_characters=valid_characters, character_map=character_map) | |
566 | |
567 | |
568 def _sanitize_text_helper(text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
569 """Restricts the characters that are allowed in a string""" | |
570 | |
571 out = [] | |
572 for c in text: | |
573 if c in valid_characters: | |
574 out.append(c) | |
575 elif c in character_map: | |
576 out.append(character_map[c]) | |
577 else: | |
578 out.append(invalid_character) # makes debugging easier | |
579 return ''.join(out) | |
580 | |
581 | |
582 def sanitize_lists_to_string(values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
583 if isinstance(values, list): | |
584 rval = [] | |
585 for value in values: | |
586 rval.append(sanitize_lists_to_string(value, | |
587 valid_characters=valid_characters, | |
588 character_map=character_map, | |
589 invalid_character=invalid_character)) | |
590 values = ",".join(rval) | |
591 else: | |
592 values = sanitize_text(values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) | |
593 return values | |
594 | |
595 | |
596 def sanitize_param(value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'): | |
597 """Clean incoming parameters (strings or lists)""" | |
598 if isinstance(value, string_types): | |
599 return sanitize_text(value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) | |
600 elif isinstance(value, list): | |
601 return [sanitize_text(x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) for x in value] | |
602 else: | |
603 raise Exception('Unknown parameter type (%s)' % (type(value))) | |
604 | |
605 | |
606 valid_filename_chars = set(string.ascii_letters + string.digits + '_.') | |
607 invalid_filenames = ['', '.', '..'] | |
608 | |
609 | |
610 def sanitize_for_filename(text, default=None): | |
611 """ | |
612 Restricts the characters that are allowed in a filename portion; Returns default value or a unique id string if result is not a valid name. | |
613 Method is overly aggressive to minimize possible complications, but a maximum length is not considered. | |
614 """ | |
615 out = [] | |
616 for c in text: | |
617 if c in valid_filename_chars: | |
618 out.append(c) | |
619 else: | |
620 out.append('_') | |
621 out = ''.join(out) | |
622 if out in invalid_filenames: | |
623 if default is None: | |
624 return sanitize_for_filename(str(unique_id())) | |
625 return default | |
626 return out | |
627 | |
628 | |
629 def find_instance_nested(item, instances, match_key=None): | |
630 """ | |
631 Recursively find instances from lists, dicts, tuples. | |
632 | |
633 `instances` should be a tuple of valid instances | |
634 If match_key is given the key must match for an instance to be added to the list of found instances. | |
635 """ | |
636 | |
637 matches = [] | |
638 | |
639 def visit(path, key, value): | |
640 if isinstance(value, instances): | |
641 if match_key is None or match_key == key: | |
642 matches.append(value) | |
643 return key, value | |
644 | |
645 def enter(path, key, value): | |
646 if isinstance(value, instances): | |
647 return None, False | |
648 return default_enter(path, key, value) | |
649 | |
650 remap(item, visit, reraise_visit=False, enter=enter) | |
651 | |
652 return matches | |
653 | |
654 | |
655 def mask_password_from_url(url): | |
656 """ | |
657 Masks out passwords from connection urls like the database connection in galaxy.ini | |
658 | |
659 >>> mask_password_from_url( 'sqlite+postgresql://user:password@localhost/' ) | |
660 'sqlite+postgresql://user:********@localhost/' | |
661 >>> mask_password_from_url( 'amqp://user:amqp@localhost' ) | |
662 'amqp://user:********@localhost' | |
663 >>> mask_password_from_url( 'amqp://localhost') | |
664 'amqp://localhost' | |
665 """ | |
666 split = urlparse.urlsplit(url) | |
667 if split.password: | |
668 if url.count(split.password) == 1: | |
669 url = url.replace(split.password, "********") | |
670 else: | |
671 # This can manipulate the input other than just masking password, | |
672 # so the previous string replace method is preferred when the | |
673 # password doesn't appear twice in the url | |
674 split = split._replace(netloc=split.netloc.replace("%s:%s" % (split.username, split.password), '%s:********' % split.username)) | |
675 url = urlparse.urlunsplit(split) | |
676 return url | |
677 | |
678 | |
679 def ready_name_for_url(raw_name): | |
680 u""" General method to convert a string (i.e. object name) to a URL-ready | |
681 slug. | |
682 | |
683 >>> ready_name_for_url( "My Cool Object" ) | |
684 'My-Cool-Object' | |
685 >>> ready_name_for_url( "!My Cool Object!" ) | |
686 'My-Cool-Object' | |
687 >>> ready_name_for_url( "Hello₩◎ґʟⅾ" ) | |
688 'Hello' | |
689 """ | |
690 | |
691 # Replace whitespace with '-' | |
692 slug_base = re.sub(r"\s+", "-", raw_name) | |
693 # Remove all non-alphanumeric characters. | |
694 slug_base = re.sub(r"[^a-zA-Z0-9\-]", "", slug_base) | |
695 # Remove trailing '-'. | |
696 if slug_base.endswith('-'): | |
697 slug_base = slug_base[:-1] | |
698 return slug_base | |
699 | |
700 | |
701 def which(file): | |
702 # http://stackoverflow.com/questions/5226958/which-equivalent-function-in-python | |
703 for path in os.environ["PATH"].split(":"): | |
704 if os.path.exists(path + "/" + file): | |
705 return path + "/" + file | |
706 | |
707 return None | |
708 | |
709 | |
710 def in_directory(file, directory, local_path_module=os.path): | |
711 """ | |
712 Return true, if the common prefix of both is equal to directory | |
713 e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b. | |
714 This function isn't used exclusively for security checks, but if it is | |
715 used for such checks it is assumed that ``directory`` is a "trusted" path - | |
716 supplied by Galaxy or by the admin and ``file`` is something generated by | |
717 a tool, configuration, external web server, or user supplied input. | |
718 | |
719 local_path_module is used by Pulsar to check Windows paths while running on | |
720 a POSIX-like system. | |
721 | |
722 >>> base_dir = tempfile.mkdtemp() | |
723 >>> safe_dir = os.path.join(base_dir, "user") | |
724 >>> os.mkdir(safe_dir) | |
725 >>> good_file = os.path.join(safe_dir, "1") | |
726 >>> with open(good_file, "w") as f: _ = f.write("hello") | |
727 >>> in_directory(good_file, safe_dir) | |
728 True | |
729 >>> in_directory("/other/file/is/here.txt", safe_dir) | |
730 False | |
731 >>> unsafe_link = os.path.join(safe_dir, "2") | |
732 >>> os.symlink("/other/file/bad.fasta", unsafe_link) | |
733 >>> in_directory(unsafe_link, safe_dir) | |
734 False | |
735 """ | |
736 if local_path_module != os.path: | |
737 _safe_contains = importlib.import_module('galaxy.util.path.%s' % local_path_module.__name__).safe_contains | |
738 else: | |
739 directory = os.path.realpath(directory) | |
740 _safe_contains = safe_contains | |
741 return _safe_contains(directory, file) | |
742 | |
743 | |
744 def merge_sorted_iterables(operator, *iterables): | |
745 """ | |
746 | |
747 >>> operator = lambda x: x | |
748 >>> list( merge_sorted_iterables( operator, [1,2,3], [4,5] ) ) | |
749 [1, 2, 3, 4, 5] | |
750 >>> list( merge_sorted_iterables( operator, [4, 5], [1,2,3] ) ) | |
751 [1, 2, 3, 4, 5] | |
752 >>> list( merge_sorted_iterables( operator, [1, 4, 5], [2], [3] ) ) | |
753 [1, 2, 3, 4, 5] | |
754 """ | |
755 first_iterable = iterables[0] | |
756 if len(iterables) == 1: | |
757 for el in first_iterable: | |
758 yield el | |
759 else: | |
760 for el in __merge_two_sorted_iterables( | |
761 operator, | |
762 iter(first_iterable), | |
763 merge_sorted_iterables(operator, *iterables[1:]) | |
764 ): | |
765 yield el | |
766 | |
767 | |
768 def __merge_two_sorted_iterables(operator, iterable1, iterable2): | |
769 unset = object() | |
770 continue_merge = True | |
771 next_1 = unset | |
772 next_2 = unset | |
773 while continue_merge: | |
774 try: | |
775 if next_1 is unset: | |
776 next_1 = next(iterable1) | |
777 if next_2 is unset: | |
778 next_2 = next(iterable2) | |
779 if operator(next_2) < operator(next_1): | |
780 yield next_2 | |
781 next_2 = unset | |
782 else: | |
783 yield next_1 | |
784 next_1 = unset | |
785 except StopIteration: | |
786 continue_merge = False | |
787 if next_1 is not unset: | |
788 yield next_1 | |
789 if next_2 is not unset: | |
790 yield next_2 | |
791 for el in iterable1: | |
792 yield el | |
793 for el in iterable2: | |
794 yield el | |
795 | |
796 | |
797 class Params(object): | |
798 """ | |
799 Stores and 'sanitizes' parameters. Alphanumeric characters and the | |
800 non-alphanumeric ones that are deemed safe are let to pass through (see L{valid_chars}). | |
801 Some non-safe characters are escaped to safe forms for example C{>} becomes C{__lt__} | |
802 (see L{mapped_chars}). All other characters are replaced with C{X}. | |
803 | |
804 Operates on string or list values only (HTTP parameters). | |
805 | |
806 >>> values = { 'status':'on', 'symbols':[ 'alpha', '<>', '$rm&#!' ] } | |
807 >>> par = Params(values) | |
808 >>> par.status | |
809 'on' | |
810 >>> par.value == None # missing attributes return None | |
811 True | |
812 >>> par.get('price', 0) | |
813 0 | |
814 >>> par.symbols # replaces unknown symbols with X | |
815 ['alpha', '__lt____gt__', 'XrmX__pd__!'] | |
816 >>> sorted(par.flatten()) # flattening to a list | |
817 [('status', 'on'), ('symbols', 'XrmX__pd__!'), ('symbols', '__lt____gt__'), ('symbols', 'alpha')] | |
818 """ | |
819 | |
820 # is NEVER_SANITIZE required now that sanitizing for tool parameters can be controlled on a per parameter basis and occurs via InputValueWrappers? | |
821 NEVER_SANITIZE = ['file_data', 'url_paste', 'URL', 'filesystem_paths'] | |
822 | |
823 def __init__(self, params, sanitize=True): | |
824 if sanitize: | |
825 for key, value in params.items(): | |
826 # sanitize check both ungrouped and grouped parameters by | |
827 # name. Anything relying on NEVER_SANITIZE should be | |
828 # changed to not require this and NEVER_SANITIZE should be | |
829 # removed. | |
830 if (value is not None and | |
831 key not in self.NEVER_SANITIZE and | |
832 True not in [key.endswith("|%s" % nonsanitize_parameter) for | |
833 nonsanitize_parameter in self.NEVER_SANITIZE]): | |
834 self.__dict__[key] = sanitize_param(value) | |
835 else: | |
836 self.__dict__[key] = value | |
837 else: | |
838 self.__dict__.update(params) | |
839 | |
840 def flatten(self): | |
841 """ | |
842 Creates a tuple list from a dict with a tuple/value pair for every value that is a list | |
843 """ | |
844 flat = [] | |
845 for key, value in self.__dict__.items(): | |
846 if isinstance(value, list): | |
847 for v in value: | |
848 flat.append((key, v)) | |
849 else: | |
850 flat.append((key, value)) | |
851 return flat | |
852 | |
853 def __getattr__(self, name): | |
854 """This is here to ensure that we get None for non existing parameters""" | |
855 return None | |
856 | |
857 def get(self, key, default): | |
858 return self.__dict__.get(key, default) | |
859 | |
860 def __str__(self): | |
861 return '%s' % self.__dict__ | |
862 | |
863 def __len__(self): | |
864 return len(self.__dict__) | |
865 | |
866 def __iter__(self): | |
867 return iter(self.__dict__) | |
868 | |
869 def update(self, values): | |
870 self.__dict__.update(values) | |
871 | |
872 | |
873 def rst_to_html(s, error=False): | |
874 """Convert a blob of reStructuredText to HTML""" | |
875 log = get_logger("docutils") | |
876 | |
877 if docutils_core is None: | |
878 raise Exception("Attempted to use rst_to_html but docutils unavailable.") | |
879 | |
880 class FakeStream(object): | |
881 def write(self, str): | |
882 if len(str) > 0 and not str.isspace(): | |
883 if error: | |
884 raise Exception(str) | |
885 log.warning(str) | |
886 | |
887 settings_overrides = { | |
888 "embed_stylesheet": False, | |
889 "template": os.path.join(os.path.dirname(__file__), "docutils_template.txt"), | |
890 "warning_stream": FakeStream(), | |
891 "doctitle_xform": False, # without option, very different rendering depending on | |
892 # number of sections in help content. | |
893 } | |
894 | |
895 return unicodify(docutils_core.publish_string( | |
896 s, writer=docutils_html4css1.Writer(), | |
897 settings_overrides=settings_overrides)) | |
898 | |
899 | |
900 def xml_text(root, name=None): | |
901 """Returns the text inside an element""" | |
902 if name is not None: | |
903 # Try attribute first | |
904 val = root.get(name) | |
905 if val: | |
906 return val | |
907 # Then try as element | |
908 elem = root.find(name) | |
909 else: | |
910 elem = root | |
911 if elem is not None and elem.text: | |
912 text = ''.join(elem.text.splitlines()) | |
913 return text.strip() | |
914 # No luck, return empty string | |
915 return '' | |
916 | |
917 | |
918 def parse_resource_parameters(resource_param_file): | |
919 """Code shared between jobs and workflows for reading resource parameter configuration files. | |
920 | |
921 TODO: Allow YAML in addition to XML. | |
922 """ | |
923 resource_parameters = {} | |
924 if os.path.exists(resource_param_file): | |
925 resource_definitions = parse_xml(resource_param_file) | |
926 resource_definitions_root = resource_definitions.getroot() | |
927 for parameter_elem in resource_definitions_root.findall("param"): | |
928 name = parameter_elem.get("name") | |
929 resource_parameters[name] = parameter_elem | |
930 | |
931 return resource_parameters | |
932 | |
933 | |
934 # asbool implementation pulled from PasteDeploy | |
935 truthy = frozenset({'true', 'yes', 'on', 'y', 't', '1'}) | |
936 falsy = frozenset({'false', 'no', 'off', 'n', 'f', '0'}) | |
937 | |
938 | |
939 def asbool(obj): | |
940 if isinstance(obj, string_types): | |
941 obj = obj.strip().lower() | |
942 if obj in truthy: | |
943 return True | |
944 elif obj in falsy: | |
945 return False | |
946 else: | |
947 raise ValueError("String is not true/false: %r" % obj) | |
948 return bool(obj) | |
949 | |
950 | |
951 def string_as_bool(string): | |
952 if str(string).lower() in ('true', 'yes', 'on', '1'): | |
953 return True | |
954 else: | |
955 return False | |
956 | |
957 | |
958 def string_as_bool_or_none(string): | |
959 """ | |
960 Returns True, None or False based on the argument: | |
961 True if passed True, 'True', 'Yes', or 'On' | |
962 None if passed None or 'None' | |
963 False otherwise | |
964 | |
965 Note: string comparison is case-insensitive so lowecase versions of those | |
966 function equivalently. | |
967 """ | |
968 string = str(string).lower() | |
969 if string in ('true', 'yes', 'on'): | |
970 return True | |
971 elif string == 'none': | |
972 return None | |
973 else: | |
974 return False | |
975 | |
976 | |
977 def listify(item, do_strip=False): | |
978 """ | |
979 Make a single item a single item list. | |
980 | |
981 If *item* is a string, it is split on comma (``,``) characters to produce the list. Optionally, if *do_strip* is | |
982 true, any extra whitespace around the split items is stripped. | |
983 | |
984 If *item* is a list it is returned unchanged. If *item* is a tuple, it is converted to a list and returned. If | |
985 *item* evaluates to False, an empty list is returned. | |
986 | |
987 :type item: object | |
988 :param item: object to make a list from | |
989 :type do_strip: bool | |
990 :param do_strip: strip whitespaces from around split items, if set to ``True`` | |
991 :rtype: list | |
992 :returns: The input as a list | |
993 """ | |
994 if not item: | |
995 return [] | |
996 elif isinstance(item, list): | |
997 return item | |
998 elif isinstance(item, tuple): | |
999 return list(item) | |
1000 elif isinstance(item, string_types) and item.count(','): | |
1001 if do_strip: | |
1002 return [token.strip() for token in item.split(',')] | |
1003 else: | |
1004 return item.split(',') | |
1005 else: | |
1006 return [item] | |
1007 | |
1008 | |
1009 def commaify(amount): | |
1010 orig = amount | |
1011 new = re.sub(r"^(-?\d+)(\d{3})", r'\g<1>,\g<2>', amount) | |
1012 if orig == new: | |
1013 return new | |
1014 else: | |
1015 return commaify(new) | |
1016 | |
1017 | |
1018 def roundify(amount, sfs=2): | |
1019 """ | |
1020 Take a number in string form and truncate to 'sfs' significant figures. | |
1021 """ | |
1022 if len(amount) <= sfs: | |
1023 return amount | |
1024 else: | |
1025 return amount[0:sfs] + '0' * (len(amount) - sfs) | |
1026 | |
1027 | |
1028 def unicodify(value, encoding=DEFAULT_ENCODING, error='replace', strip_null=False): | |
1029 u""" | |
1030 Returns a Unicode string or None. | |
1031 | |
1032 >>> assert unicodify(None) is None | |
1033 >>> assert unicodify('simple string') == u'simple string' | |
1034 >>> assert unicodify(3) == u'3' | |
1035 >>> assert unicodify(bytearray([115, 116, 114, 196, 169, 195, 177, 103])) == u'strĩñg' | |
1036 >>> assert unicodify(Exception(u'strĩñg')) == u'strĩñg' | |
1037 >>> assert unicodify('cómplǐcḁtëd strĩñg') == u'cómplǐcḁtëd strĩñg' | |
1038 >>> s = u'cómplǐcḁtëd strĩñg'; assert unicodify(s) == s | |
1039 >>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1'), 'latin-1') == s | |
1040 >>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1')) == u'l\ufffdt\ufffdn str\ufffd\ufffdg' | |
1041 >>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1'), error='ignore') == u'ltn strg' | |
1042 >>> if PY2: assert unicodify(Exception(u'¼ cup of flour'.encode('latin-1')), error='ignore') == ' cup of flour' | |
1043 """ | |
1044 if value is None: | |
1045 return value | |
1046 try: | |
1047 if isinstance(value, bytearray): | |
1048 value = bytes(value) | |
1049 elif not isinstance(value, string_types) and not isinstance(value, binary_type): | |
1050 # In Python 2, value is not an instance of basestring (i.e. str or unicode) | |
1051 # In Python 3, value is not an instance of bytes or str | |
1052 try: | |
1053 value = text_type(value) | |
1054 except Exception: | |
1055 value = str(value) | |
1056 # Now in Python 2, value is an instance of basestring, but may be not unicode | |
1057 # Now in Python 3, value is an instance of bytes or str | |
1058 if not isinstance(value, text_type): | |
1059 value = text_type(value, encoding, error) | |
1060 except Exception as e: | |
1061 msg = "Value '%s' could not be coerced to Unicode: %s('%s')" % (value, type(e).__name__, e) | |
1062 raise Exception(msg) | |
1063 if strip_null: | |
1064 return value.replace('\0', '') | |
1065 return value | |
1066 | |
1067 | |
1068 def smart_str(s, encoding=DEFAULT_ENCODING, strings_only=False, errors='strict'): | |
1069 u""" | |
1070 Returns a bytestring version of 's', encoded as specified in 'encoding'. | |
1071 | |
1072 If strings_only is True, don't convert (some) non-string-like objects. | |
1073 | |
1074 Adapted from an older, simpler version of django.utils.encoding.smart_str. | |
1075 | |
1076 >>> assert smart_str(None) == b'None' | |
1077 >>> assert smart_str(None, strings_only=True) is None | |
1078 >>> assert smart_str(3) == b'3' | |
1079 >>> assert smart_str(3, strings_only=True) == 3 | |
1080 >>> s = b'a bytes string'; assert smart_str(s) == s | |
1081 >>> s = bytearray(b'a bytes string'); assert smart_str(s) == s | |
1082 >>> assert smart_str(u'a simple unicode string') == b'a simple unicode string' | |
1083 >>> assert smart_str(u'à strange ünicode ڃtring') == b'\\xc3\\xa0 strange \\xc3\\xbcnicode \\xda\\x83tring' | |
1084 >>> assert smart_str(b'\\xc3\\xa0n \\xc3\\xabncoded utf-8 string', encoding='latin-1') == b'\\xe0n \\xebncoded utf-8 string' | |
1085 >>> assert smart_str(bytearray(b'\\xc3\\xa0n \\xc3\\xabncoded utf-8 string'), encoding='latin-1') == b'\\xe0n \\xebncoded utf-8 string' | |
1086 """ | |
1087 if strings_only and isinstance(s, (type(None), int)): | |
1088 return s | |
1089 if not isinstance(s, string_types) and not isinstance(s, (binary_type, bytearray)): | |
1090 # In Python 2, s is not an instance of basestring or bytearray | |
1091 # In Python 3, s is not an instance of str, bytes or bytearray | |
1092 s = str(s) | |
1093 # Now in Python 2, value is an instance of basestring or bytearray | |
1094 # Now in Python 3, value is an instance of str, bytes or bytearray | |
1095 if not isinstance(s, (binary_type, bytearray)): | |
1096 return s.encode(encoding, errors) | |
1097 elif s and encoding != DEFAULT_ENCODING: | |
1098 return s.decode(DEFAULT_ENCODING, errors).encode(encoding, errors) | |
1099 else: | |
1100 return s | |
1101 | |
1102 | |
1103 def strip_control_characters(s): | |
1104 """Strip unicode control characters from a string.""" | |
1105 return "".join(c for c in unicodify(s) if unicodedata.category(c) != "Cc") | |
1106 | |
1107 | |
1108 def strip_control_characters_nested(item): | |
1109 """Recursively strips control characters from lists, dicts, tuples.""" | |
1110 | |
1111 def visit(path, key, value): | |
1112 if isinstance(key, string_types): | |
1113 key = strip_control_characters(key) | |
1114 if isinstance(value, string_types): | |
1115 value = strip_control_characters(value) | |
1116 return key, value | |
1117 | |
1118 return remap(item, visit) | |
1119 | |
1120 | |
1121 def object_to_string(obj): | |
1122 return binascii.hexlify(obj) | |
1123 | |
1124 | |
1125 def string_to_object(s): | |
1126 return binascii.unhexlify(s) | |
1127 | |
1128 | |
1129 class ParamsWithSpecs(collections.defaultdict): | |
1130 """ | |
1131 """ | |
1132 | |
1133 def __init__(self, specs=None, params=None): | |
1134 self.specs = specs or dict() | |
1135 self.params = params or dict() | |
1136 for name, value in self.params.items(): | |
1137 if name not in self.specs: | |
1138 self._param_unknown_error(name) | |
1139 if 'map' in self.specs[name]: | |
1140 try: | |
1141 self.params[name] = self.specs[name]['map'](value) | |
1142 except Exception: | |
1143 self._param_map_error(name, value) | |
1144 if 'valid' in self.specs[name]: | |
1145 if not self.specs[name]['valid'](value): | |
1146 self._param_vaildation_error(name, value) | |
1147 | |
1148 self.update(self.params) | |
1149 | |
1150 def __missing__(self, name): | |
1151 return self.specs[name]['default'] | |
1152 | |
1153 def __getattr__(self, name): | |
1154 return self[name] | |
1155 | |
1156 def _param_unknown_error(self, name): | |
1157 raise NotImplementedError() | |
1158 | |
1159 def _param_map_error(self, name, value): | |
1160 raise NotImplementedError() | |
1161 | |
1162 def _param_vaildation_error(self, name, value): | |
1163 raise NotImplementedError() | |
1164 | |
1165 | |
1166 def compare_urls(url1, url2, compare_scheme=True, compare_hostname=True, compare_path=True): | |
1167 url1 = urlparse.urlparse(url1) | |
1168 url2 = urlparse.urlparse(url2) | |
1169 if compare_scheme and url1.scheme and url2.scheme and url1.scheme != url2.scheme: | |
1170 return False | |
1171 if compare_hostname and url1.hostname and url2.hostname and url1.hostname != url2.hostname: | |
1172 return False | |
1173 if compare_path and url1.path and url2.path and url1.path != url2.path: | |
1174 return False | |
1175 return True | |
1176 | |
1177 | |
1178 def read_dbnames(filename): | |
1179 """ Read build names from file """ | |
1180 class DBNames(list): | |
1181 default_value = "?" | |
1182 default_name = "unspecified (?)" | |
1183 db_names = DBNames() | |
1184 try: | |
1185 ucsc_builds = {} | |
1186 man_builds = [] # assume these are integers | |
1187 name_to_db_base = {} | |
1188 if filename is None: | |
1189 # Should only be happening with the galaxy.tools.parameters.basic:GenomeBuildParameter docstring unit test | |
1190 filename = os.path.join('tool-data', 'shared', 'ucsc', 'builds.txt.sample') | |
1191 for line in open(filename): | |
1192 try: | |
1193 if line[0:1] == "#": | |
1194 continue | |
1195 fields = line.replace("\r", "").replace("\n", "").split("\t") | |
1196 # Special case of unspecified build is at top of list | |
1197 if fields[0] == "?": | |
1198 db_names.insert(0, (fields[0], fields[1])) | |
1199 continue | |
1200 try: # manual build (i.e. microbes) | |
1201 int(fields[0]) | |
1202 man_builds.append((fields[1], fields[0])) | |
1203 except Exception: # UCSC build | |
1204 db_base = fields[0].rstrip('0123456789') | |
1205 if db_base not in ucsc_builds: | |
1206 ucsc_builds[db_base] = [] | |
1207 name_to_db_base[fields[1]] = db_base | |
1208 # we want to sort within a species numerically by revision number | |
1209 build_rev = re.compile(r'\d+$') | |
1210 try: | |
1211 build_rev = int(build_rev.findall(fields[0])[0]) | |
1212 except Exception: | |
1213 build_rev = 0 | |
1214 ucsc_builds[db_base].append((build_rev, fields[0], fields[1])) | |
1215 except Exception: | |
1216 continue | |
1217 sort_names = sorted(name_to_db_base.keys()) | |
1218 for name in sort_names: | |
1219 db_base = name_to_db_base[name] | |
1220 ucsc_builds[db_base].sort() | |
1221 ucsc_builds[db_base].reverse() | |
1222 ucsc_builds[db_base] = [(build, name) for _, build, name in ucsc_builds[db_base]] | |
1223 db_names = DBNames(db_names + ucsc_builds[db_base]) | |
1224 if len(db_names) > 1 and len(man_builds) > 0: | |
1225 db_names.append((db_names.default_value, '----- Additional Species Are Below -----')) | |
1226 man_builds.sort() | |
1227 man_builds = [(build, name) for name, build in man_builds] | |
1228 db_names = DBNames(db_names + man_builds) | |
1229 except Exception as e: | |
1230 log.error("ERROR: Unable to read builds file: %s", unicodify(e)) | |
1231 if len(db_names) < 1: | |
1232 db_names = DBNames([(db_names.default_value, db_names.default_name)]) | |
1233 return db_names | |
1234 | |
1235 | |
1236 def read_build_sites(filename, check_builds=True): | |
1237 """ read db names to ucsc mappings from file, this file should probably be merged with the one above """ | |
1238 build_sites = [] | |
1239 try: | |
1240 for line in open(filename): | |
1241 try: | |
1242 if line[0:1] == "#": | |
1243 continue | |
1244 fields = line.replace("\r", "").replace("\n", "").split("\t") | |
1245 site_name = fields[0] | |
1246 site = fields[1] | |
1247 if check_builds: | |
1248 site_builds = fields[2].split(",") | |
1249 site_dict = {'name': site_name, 'url': site, 'builds': site_builds} | |
1250 else: | |
1251 site_dict = {'name': site_name, 'url': site} | |
1252 build_sites.append(site_dict) | |
1253 except Exception: | |
1254 continue | |
1255 except Exception: | |
1256 log.error("ERROR: Unable to read builds for site file %s", filename) | |
1257 return build_sites | |
1258 | |
1259 | |
1260 def relativize_symlinks(path, start=None, followlinks=False): | |
1261 for root, dirs, files in os.walk(path, followlinks=followlinks): | |
1262 rel_start = None | |
1263 for file_name in files: | |
1264 symlink_file_name = os.path.join(root, file_name) | |
1265 if os.path.islink(symlink_file_name): | |
1266 symlink_target = os.readlink(symlink_file_name) | |
1267 if rel_start is None: | |
1268 if start is None: | |
1269 rel_start = root | |
1270 else: | |
1271 rel_start = start | |
1272 rel_path = relpath(symlink_target, rel_start) | |
1273 os.remove(symlink_file_name) | |
1274 os.symlink(rel_path, symlink_file_name) | |
1275 | |
1276 | |
1277 def stringify_dictionary_keys(in_dict): | |
1278 # returns a new dictionary | |
1279 # changes unicode keys into strings, only works on top level (does not recurse) | |
1280 # unicode keys are not valid for expansion into keyword arguments on method calls | |
1281 out_dict = {} | |
1282 for key, value in iteritems(in_dict): | |
1283 out_dict[str(key)] = value | |
1284 return out_dict | |
1285 | |
1286 | |
1287 def mkstemp_ln(src, prefix='mkstemp_ln_'): | |
1288 """ | |
1289 From tempfile._mkstemp_inner, generate a hard link in the same dir with a | |
1290 random name. Created so we can persist the underlying file of a | |
1291 NamedTemporaryFile upon its closure. | |
1292 """ | |
1293 dir = os.path.dirname(src) | |
1294 names = tempfile._get_candidate_names() | |
1295 for seq in xrange(tempfile.TMP_MAX): | |
1296 name = next(names) | |
1297 file = os.path.join(dir, prefix + name) | |
1298 try: | |
1299 os.link(src, file) | |
1300 return (os.path.abspath(file)) | |
1301 except OSError as e: | |
1302 if e.errno == errno.EEXIST: | |
1303 continue # try again | |
1304 raise | |
1305 raise IOError(errno.EEXIST, "No usable temporary file name found") | |
1306 | |
1307 | |
1308 def umask_fix_perms(path, umask, unmasked_perms, gid=None): | |
1309 """ | |
1310 umask-friendly permissions fixing | |
1311 """ | |
1312 perms = unmasked_perms & ~umask | |
1313 try: | |
1314 st = os.stat(path) | |
1315 except OSError: | |
1316 log.exception('Unable to set permissions or group on %s', path) | |
1317 return | |
1318 # fix modes | |
1319 if stat.S_IMODE(st.st_mode) != perms: | |
1320 try: | |
1321 os.chmod(path, perms) | |
1322 except Exception as e: | |
1323 log.warning('Unable to honor umask (%s) for %s, tried to set: %s but mode remains %s, error was: %s' % (oct(umask), | |
1324 path, | |
1325 oct(perms), | |
1326 oct(stat.S_IMODE(st.st_mode)), | |
1327 unicodify(e))) | |
1328 # fix group | |
1329 if gid is not None and st.st_gid != gid: | |
1330 try: | |
1331 os.chown(path, -1, gid) | |
1332 except Exception as e: | |
1333 try: | |
1334 desired_group = grp.getgrgid(gid) | |
1335 current_group = grp.getgrgid(st.st_gid) | |
1336 except Exception: | |
1337 desired_group = gid | |
1338 current_group = st.st_gid | |
1339 log.warning('Unable to honor primary group (%s) for %s, group remains %s, error was: %s' % (desired_group, | |
1340 path, | |
1341 current_group, | |
1342 unicodify(e))) | |
1343 | |
1344 | |
1345 def docstring_trim(docstring): | |
1346 """Trimming python doc strings. Taken from: http://www.python.org/dev/peps/pep-0257/""" | |
1347 if not docstring: | |
1348 return '' | |
1349 # Convert tabs to spaces (following the normal Python rules) | |
1350 # and split into a list of lines: | |
1351 lines = docstring.expandtabs().splitlines() | |
1352 # Determine minimum indentation (first line doesn't count): | |
1353 indent = sys.maxsize | |
1354 for line in lines[1:]: | |
1355 stripped = line.lstrip() | |
1356 if stripped: | |
1357 indent = min(indent, len(line) - len(stripped)) | |
1358 # Remove indentation (first line is special): | |
1359 trimmed = [lines[0].strip()] | |
1360 if indent < sys.maxsize: | |
1361 for line in lines[1:]: | |
1362 trimmed.append(line[indent:].rstrip()) | |
1363 # Strip off trailing and leading blank lines: | |
1364 while trimmed and not trimmed[-1]: | |
1365 trimmed.pop() | |
1366 while trimmed and not trimmed[0]: | |
1367 trimmed.pop(0) | |
1368 # Return a single string: | |
1369 return '\n'.join(trimmed) | |
1370 | |
1371 | |
1372 def nice_size(size): | |
1373 """ | |
1374 Returns a readably formatted string with the size | |
1375 | |
1376 >>> nice_size(100) | |
1377 '100 bytes' | |
1378 >>> nice_size(10000) | |
1379 '9.8 KB' | |
1380 >>> nice_size(1000000) | |
1381 '976.6 KB' | |
1382 >>> nice_size(100000000) | |
1383 '95.4 MB' | |
1384 """ | |
1385 words = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB'] | |
1386 prefix = '' | |
1387 try: | |
1388 size = float(size) | |
1389 if size < 0: | |
1390 size = abs(size) | |
1391 prefix = '-' | |
1392 except Exception: | |
1393 return '??? bytes' | |
1394 for ind, word in enumerate(words): | |
1395 step = 1024 ** (ind + 1) | |
1396 if step > size: | |
1397 size = size / float(1024 ** ind) | |
1398 if word == 'bytes': # No decimals for bytes | |
1399 return "%s%d bytes" % (prefix, size) | |
1400 return "%s%.1f %s" % (prefix, size, word) | |
1401 return '??? bytes' | |
1402 | |
1403 | |
1404 def size_to_bytes(size): | |
1405 """ | |
1406 Returns a number of bytes (as integer) if given a reasonably formatted string with the size | |
1407 | |
1408 >>> size_to_bytes('1024') | |
1409 1024 | |
1410 >>> size_to_bytes('1.0') | |
1411 1 | |
1412 >>> size_to_bytes('10 bytes') | |
1413 10 | |
1414 >>> size_to_bytes('4k') | |
1415 4096 | |
1416 >>> size_to_bytes('2.2 TB') | |
1417 2418925581107 | |
1418 >>> size_to_bytes('.01 TB') | |
1419 10995116277 | |
1420 >>> size_to_bytes('1.b') | |
1421 1 | |
1422 >>> size_to_bytes('1.2E2k') | |
1423 122880 | |
1424 """ | |
1425 # The following number regexp is based on https://stackoverflow.com/questions/385558/extract-float-double-value/385597#385597 | |
1426 size_re = re.compile(r'(?P<number>(\d+(\.\d*)?|\.\d+)(e[+-]?\d+)?)\s*(?P<multiple>[eptgmk]?(b|bytes?)?)?$') | |
1427 size_match = size_re.match(size.lower()) | |
1428 if size_match is None: | |
1429 raise ValueError("Could not parse string '%s'" % size) | |
1430 number = float(size_match.group("number")) | |
1431 multiple = size_match.group("multiple") | |
1432 if multiple == "" or multiple.startswith('b'): | |
1433 return int(number) | |
1434 elif multiple.startswith('k'): | |
1435 return int(number * 1024) | |
1436 elif multiple.startswith('m'): | |
1437 return int(number * 1024 ** 2) | |
1438 elif multiple.startswith('g'): | |
1439 return int(number * 1024 ** 3) | |
1440 elif multiple.startswith('t'): | |
1441 return int(number * 1024 ** 4) | |
1442 elif multiple.startswith('p'): | |
1443 return int(number * 1024 ** 5) | |
1444 elif multiple.startswith('e'): | |
1445 return int(number * 1024 ** 6) | |
1446 else: | |
1447 raise ValueError("Unknown multiplier '%s' in '%s'" % (multiple, size)) | |
1448 | |
1449 | |
1450 def send_mail(frm, to, subject, body, config, html=None): | |
1451 """ | |
1452 Sends an email. | |
1453 | |
1454 :type frm: str | |
1455 :param frm: from address | |
1456 | |
1457 :type to: str | |
1458 :param to: to address | |
1459 | |
1460 :type subject: str | |
1461 :param subject: Subject line | |
1462 | |
1463 :type body: str | |
1464 :param body: Body text (should be plain text) | |
1465 | |
1466 :type config: object | |
1467 :param config: Galaxy configuration object | |
1468 | |
1469 :type html: str | |
1470 :param html: Alternative HTML representation of the body content. If | |
1471 provided will convert the message to a MIMEMultipart. (Default 'None') | |
1472 """ | |
1473 | |
1474 to = listify(to) | |
1475 if html: | |
1476 msg = MIMEMultipart('alternative') | |
1477 else: | |
1478 msg = MIMEText(body, 'plain', 'utf-8') | |
1479 | |
1480 msg['To'] = ', '.join(to) | |
1481 msg['From'] = frm | |
1482 msg['Subject'] = subject | |
1483 | |
1484 if config.smtp_server is None: | |
1485 log.error("Mail is not configured for this Galaxy instance.") | |
1486 log.info(msg) | |
1487 return | |
1488 | |
1489 if html: | |
1490 mp_text = MIMEText(body, 'plain', 'utf-8') | |
1491 mp_html = MIMEText(html, 'html', 'utf-8') | |
1492 msg.attach(mp_text) | |
1493 msg.attach(mp_html) | |
1494 | |
1495 smtp_ssl = asbool(getattr(config, 'smtp_ssl', False)) | |
1496 if smtp_ssl: | |
1497 s = smtplib.SMTP_SSL(config.smtp_server) | |
1498 else: | |
1499 s = smtplib.SMTP(config.smtp_server) | |
1500 if not smtp_ssl: | |
1501 try: | |
1502 s.starttls() | |
1503 log.debug('Initiated SSL/TLS connection to SMTP server: %s', config.smtp_server) | |
1504 except RuntimeError as e: | |
1505 log.warning('SSL/TLS support is not available to your Python interpreter: %s', unicodify(e)) | |
1506 except smtplib.SMTPHeloError as e: | |
1507 log.error("The server didn't reply properly to the HELO greeting: %s", unicodify(e)) | |
1508 s.close() | |
1509 raise | |
1510 except smtplib.SMTPException as e: | |
1511 log.warning('The server does not support the STARTTLS extension: %s', unicodify(e)) | |
1512 if config.smtp_username and config.smtp_password: | |
1513 try: | |
1514 s.login(config.smtp_username, config.smtp_password) | |
1515 except smtplib.SMTPHeloError as e: | |
1516 log.error("The server didn't reply properly to the HELO greeting: %s", unicodify(e)) | |
1517 s.close() | |
1518 raise | |
1519 except smtplib.SMTPAuthenticationError as e: | |
1520 log.error("The server didn't accept the username/password combination: %s", unicodify(e)) | |
1521 s.close() | |
1522 raise | |
1523 except smtplib.SMTPException as e: | |
1524 log.error("No suitable authentication method was found: %s", unicodify(e)) | |
1525 s.close() | |
1526 raise | |
1527 s.sendmail(frm, to, msg.as_string()) | |
1528 s.quit() | |
1529 | |
1530 | |
1531 def force_symlink(source, link_name): | |
1532 try: | |
1533 os.symlink(source, link_name) | |
1534 except OSError as e: | |
1535 if e.errno == errno.EEXIST: | |
1536 os.remove(link_name) | |
1537 os.symlink(source, link_name) | |
1538 else: | |
1539 raise e | |
1540 | |
1541 | |
1542 def move_merge(source, target): | |
1543 # when using shutil and moving a directory, if the target exists, | |
1544 # then the directory is placed inside of it | |
1545 # if the target doesn't exist, then the target is made into the directory | |
1546 # this makes it so that the target is always the target, and if it exists, | |
1547 # the source contents are moved into the target | |
1548 if os.path.isdir(source) and os.path.exists(target) and os.path.isdir(target): | |
1549 for name in os.listdir(source): | |
1550 move_merge(os.path.join(source, name), os.path.join(target, name)) | |
1551 else: | |
1552 return shutil.move(source, target) | |
1553 | |
1554 | |
1555 def safe_str_cmp(a, b): | |
1556 """safely compare two strings in a timing-attack-resistant manner | |
1557 """ | |
1558 if len(a) != len(b): | |
1559 return False | |
1560 rv = 0 | |
1561 for x, y in zip(a, b): | |
1562 rv |= ord(x) ^ ord(y) | |
1563 return rv == 0 | |
1564 | |
1565 | |
1566 galaxy_root_path = os.path.join(__path__[0], os.pardir, os.pardir, os.pardir) | |
1567 galaxy_samples_path = os.path.join(__path__[0], os.pardir, 'config', 'sample') | |
1568 | |
1569 | |
1570 def galaxy_directory(): | |
1571 root_path = os.path.abspath(galaxy_root_path) | |
1572 if os.path.basename(root_path) == "packages": | |
1573 root_path = os.path.abspath(os.path.join(root_path, "..")) | |
1574 return root_path | |
1575 | |
1576 | |
1577 def galaxy_samples_directory(): | |
1578 return os.path.abspath(galaxy_samples_path) | |
1579 | |
1580 | |
1581 def config_directories_from_setting(directories_setting, galaxy_root=galaxy_root_path): | |
1582 """ | |
1583 Parse the ``directories_setting`` into a list of relative or absolute | |
1584 filesystem paths that will be searched to discover plugins. | |
1585 | |
1586 :type galaxy_root: string | |
1587 :param galaxy_root: the root path of this galaxy installation | |
1588 :type directories_setting: string (default: None) | |
1589 :param directories_setting: the filesystem path (or paths) | |
1590 to search for plugins. Can be CSV string of paths. Will be treated as | |
1591 absolute if a path starts with '/', relative otherwise. | |
1592 :rtype: list of strings | |
1593 :returns: list of filesystem paths | |
1594 """ | |
1595 directories = [] | |
1596 if not directories_setting: | |
1597 return directories | |
1598 | |
1599 for directory in listify(directories_setting): | |
1600 directory = directory.strip() | |
1601 if not directory.startswith('/'): | |
1602 directory = os.path.join(galaxy_root, directory) | |
1603 if not os.path.exists(directory): | |
1604 log.warning('directory not found: %s', directory) | |
1605 continue | |
1606 directories.append(directory) | |
1607 return directories | |
1608 | |
1609 | |
1610 def parse_int(value, min_val=None, max_val=None, default=None, allow_none=False): | |
1611 try: | |
1612 value = int(value) | |
1613 if min_val is not None and value < min_val: | |
1614 return min_val | |
1615 if max_val is not None and value > max_val: | |
1616 return max_val | |
1617 return value | |
1618 except ValueError: | |
1619 if allow_none: | |
1620 if default is None or value == "None": | |
1621 return None | |
1622 if default: | |
1623 return default | |
1624 else: | |
1625 raise | |
1626 | |
1627 | |
1628 def parse_non_hex_float(s): | |
1629 r""" | |
1630 Parse string `s` into a float but throw a `ValueError` if the string is in | |
1631 the otherwise acceptable format `\d+e\d+` (e.g. 40000000000000e5.) | |
1632 | |
1633 This can be passed into `json.loads` to prevent a hex string in the above | |
1634 format from being incorrectly parsed as a float in scientific notation. | |
1635 | |
1636 >>> parse_non_hex_float( '123.4' ) | |
1637 123.4 | |
1638 >>> parse_non_hex_float( '2.45e+3' ) | |
1639 2450.0 | |
1640 >>> parse_non_hex_float( '2.45e-3' ) | |
1641 0.00245 | |
1642 >>> parse_non_hex_float( '40000000000000e5' ) | |
1643 Traceback (most recent call last): | |
1644 ... | |
1645 ValueError: could not convert string to float: 40000000000000e5 | |
1646 """ | |
1647 f = float(s) | |
1648 # successfully parsed as float if here - check for format in original string | |
1649 if 'e' in s and not ('+' in s or '-' in s): | |
1650 raise ValueError('could not convert string to float: ' + s) | |
1651 return f | |
1652 | |
1653 | |
1654 def build_url(base_url, port=80, scheme='http', pathspec=None, params=None, doseq=False): | |
1655 if params is None: | |
1656 params = dict() | |
1657 if pathspec is None: | |
1658 pathspec = [] | |
1659 parsed_url = urlparse.urlparse(base_url) | |
1660 if scheme != 'http': | |
1661 parsed_url.scheme = scheme | |
1662 assert parsed_url.scheme in ('http', 'https', 'ftp'), 'Invalid URL scheme: %s' % scheme | |
1663 if port != 80: | |
1664 url = '%s://%s:%d/%s' % (parsed_url.scheme, parsed_url.netloc.rstrip('/'), int(port), parsed_url.path) | |
1665 else: | |
1666 url = '%s://%s/%s' % (parsed_url.scheme, parsed_url.netloc.rstrip('/'), parsed_url.path.lstrip('/')) | |
1667 if len(pathspec) > 0: | |
1668 url = '%s/%s' % (url.rstrip('/'), '/'.join(pathspec)) | |
1669 if parsed_url.query: | |
1670 for query_parameter in parsed_url.query.split('&'): | |
1671 key, value = query_parameter.split('=') | |
1672 params[key] = value | |
1673 if params: | |
1674 url += '?%s' % urlparse.urlencode(params, doseq=doseq) | |
1675 return url | |
1676 | |
1677 | |
1678 def url_get(base_url, auth=None, pathspec=None, params=None, max_retries=5, backoff_factor=1): | |
1679 """Make contact with the uri provided and return any contents.""" | |
1680 full_url = build_url(base_url, pathspec=pathspec, params=params) | |
1681 s = requests.Session() | |
1682 retries = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=[429]) | |
1683 s.mount(base_url, HTTPAdapter(max_retries=retries)) | |
1684 response = s.get(full_url, auth=auth) | |
1685 response.raise_for_status() | |
1686 return response.text | |
1687 | |
1688 | |
1689 def download_to_file(url, dest_file_path, timeout=30, chunk_size=2 ** 20): | |
1690 """Download a URL to a file in chunks.""" | |
1691 with requests.get(url, timeout=timeout, stream=True) as r, open(dest_file_path, 'wb') as f: | |
1692 for chunk in r.iter_content(chunk_size): | |
1693 if chunk: | |
1694 f.write(chunk) | |
1695 | |
1696 | |
1697 def get_executable(): | |
1698 exe = sys.executable | |
1699 if exe.endswith('uwsgi'): | |
1700 virtualenv = None | |
1701 if uwsgi is not None: | |
1702 for name in ('home', 'virtualenv', 'venv', 'pyhome'): | |
1703 if name in uwsgi.opt: | |
1704 virtualenv = unicodify(uwsgi.opt[name]) | |
1705 break | |
1706 if virtualenv is None and 'VIRTUAL_ENV' in os.environ: | |
1707 virtualenv = os.environ['VIRTUAL_ENV'] | |
1708 if virtualenv is not None: | |
1709 exe = os.path.join(virtualenv, 'bin', 'python') | |
1710 else: | |
1711 exe = os.path.join(os.path.dirname(exe), 'python') | |
1712 if not os.path.exists(exe): | |
1713 exe = 'python' | |
1714 return exe | |
1715 | |
1716 | |
1717 class ExecutionTimer(object): | |
1718 | |
1719 def __init__(self): | |
1720 self.begin = time.time() | |
1721 | |
1722 def __str__(self): | |
1723 return "(%0.3f ms)" % (self.elapsed * 1000) | |
1724 | |
1725 @property | |
1726 def elapsed(self): | |
1727 return (time.time() - self.begin) | |
1728 | |
1729 | |
1730 class StructuredExecutionTimer(object): | |
1731 | |
1732 def __init__(self, timer_id, template, **tags): | |
1733 self.begin = time.time() | |
1734 self.timer_id = timer_id | |
1735 self.template = template | |
1736 self.tags = tags | |
1737 | |
1738 def __str__(self): | |
1739 return self.to_str() | |
1740 | |
1741 def to_str(self, **kwd): | |
1742 if kwd: | |
1743 message = string.Template(self.template).safe_substitute(kwd) | |
1744 else: | |
1745 message = self.template | |
1746 log_message = message + " (%0.3f ms)" % (self.elapsed * 1000) | |
1747 return log_message | |
1748 | |
1749 @property | |
1750 def elapsed(self): | |
1751 return (time.time() - self.begin) | |
1752 | |
1753 | |
1754 if __name__ == '__main__': | |
1755 import doctest | |
1756 doctest.testmod(sys.modules[__name__], verbose=False) |