comparison planemo/lib/python3.7/site-packages/galaxy/util/__init__.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 # -*- coding: utf-8 -*-
2 """
3 Utility functions used systemwide.
4
5 """
6 from __future__ import absolute_import
7
8 import binascii
9 import collections
10 import errno
11 import importlib
12 import json
13 import os
14 import random
15 import re
16 import shutil
17 import smtplib
18 import stat
19 import string
20 import sys
21 import tempfile
22 import threading
23 import time
24 import unicodedata
25 import xml.dom.minidom
26 from datetime import datetime
27 from email.mime.multipart import MIMEMultipart
28 from email.mime.text import MIMEText
29 from functools import partial
30 from hashlib import md5
31 from os.path import relpath
32
33 import requests
34 try:
35 import grp
36 except ImportError:
37 # For Pulsar on Windows (which does not use the function that uses grp)
38 grp = None
39 from boltons.iterutils import (
40 default_enter,
41 remap,
42 )
43 LXML_AVAILABLE = True
44 try:
45 from lxml import etree
46 except ImportError:
47 LXML_AVAILABLE = False
48 import xml.etree.ElementTree as etree
49 from requests.adapters import HTTPAdapter
50 from requests.packages.urllib3.util.retry import Retry
51 from six import binary_type, iteritems, PY2, string_types, text_type
52 from six.moves import (
53 xrange,
54 zip
55 )
56 from six.moves.urllib import parse as urlparse
57
58 try:
59 import docutils.core as docutils_core
60 import docutils.writers.html4css1 as docutils_html4css1
61 except ImportError:
62 docutils_core = None
63 docutils_html4css1 = None
64
65 try:
66 import uwsgi
67 except ImportError:
68 uwsgi = None
69
70 from .custom_logging import get_logger
71 from .inflection import English, Inflector
72 from .path import safe_contains, safe_makedirs, safe_relpath # noqa: F401
73
74 inflector = Inflector(English)
75
76 log = get_logger(__name__)
77 _lock = threading.RLock()
78
79 namedtuple = collections.namedtuple
80
81 CHUNK_SIZE = 65536 # 64k
82
83 DATABASE_MAX_STRING_SIZE = 32768
84 DATABASE_MAX_STRING_SIZE_PRETTY = '32K'
85
86 gzip_magic = b'\x1f\x8b'
87 bz2_magic = b'BZh'
88 DEFAULT_ENCODING = os.environ.get('GALAXY_DEFAULT_ENCODING', 'utf-8')
89 NULL_CHAR = b'\x00'
90 BINARY_CHARS = [NULL_CHAR]
91 FILENAME_VALID_CHARS = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
92
93 RW_R__R__ = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
94 RWXR_XR_X = stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH
95 RWXRWXRWX = stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO
96
97 XML = etree.XML
98
99 defaultdict = collections.defaultdict
100
101
102 def remove_protocol_from_url(url):
103 """ Supplied URL may be null, if not ensure http:// or https://
104 etc... is stripped off.
105 """
106 if url is None:
107 return url
108
109 # We have a URL
110 if url.find('://') > 0:
111 new_url = url.split('://')[1]
112 else:
113 new_url = url
114 return new_url.rstrip('/')
115
116
117 def is_binary(value):
118 """
119 File is binary if it contains a null-byte by default (e.g. behavior of grep, etc.).
120 This may fail for utf-16 files, but so would ASCII encoding.
121 >>> is_binary( string.printable )
122 False
123 >>> is_binary( b'\\xce\\x94' )
124 False
125 >>> is_binary( b'\\x00' )
126 True
127 """
128 value = smart_str(value)
129 for binary_char in BINARY_CHARS:
130 if binary_char in value:
131 return True
132 return False
133
134
135 def is_uuid(value):
136 """
137 This method returns True if value is a UUID, otherwise False.
138 >>> is_uuid( "123e4567-e89b-12d3-a456-426655440000" )
139 True
140 >>> is_uuid( "0x3242340298902834" )
141 False
142 """
143 uuid_re = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
144 if re.match(uuid_re, str(value)):
145 return True
146 else:
147 return False
148
149
150 def directory_hash_id(id):
151 """
152
153 >>> directory_hash_id( 100 )
154 ['000']
155 >>> directory_hash_id( "90000" )
156 ['090']
157 >>> directory_hash_id("777777777")
158 ['000', '777', '777']
159 >>> directory_hash_id("135ee48a-4f51-470c-ae2f-ce8bd78799e6")
160 ['1', '3', '5']
161 """
162 s = str(id)
163 l = len(s)
164 # Shortcut -- ids 0-999 go under ../000/
165 if l < 4:
166 return ["000"]
167 if not is_uuid(s):
168 # Pad with zeros until a multiple of three
169 padded = ((3 - len(s) % 3) * "0") + s
170 # Drop the last three digits -- 1000 files per directory
171 padded = padded[:-3]
172 # Break into chunks of three
173 return [padded[i * 3:(i + 1) * 3] for i in range(len(padded) // 3)]
174 else:
175 # assume it is a UUID
176 return list(iter(s[0:3]))
177
178
179 def get_charset_from_http_headers(headers, default=None):
180 rval = headers.get('content-type', None)
181 if rval and 'charset=' in rval:
182 rval = rval.split('charset=')[-1].split(';')[0].strip()
183 if rval:
184 return rval
185 return default
186
187
188 def synchronized(func):
189 """This wrapper will serialize access to 'func' to a single thread. Use it as a decorator."""
190 def caller(*params, **kparams):
191 _lock.acquire(True) # Wait
192 try:
193 return func(*params, **kparams)
194 finally:
195 _lock.release()
196 return caller
197
198
199 def iter_start_of_line(fh, chunk_size=None):
200 """
201 Iterate over fh and call readline(chunk_size)
202 """
203 for line in iter(partial(fh.readline, chunk_size), ""):
204 yield line
205
206
207 def file_reader(fp, chunk_size=CHUNK_SIZE):
208 """This generator yields the open fileobject in chunks (default 64k). Closes the file at the end"""
209 while 1:
210 data = fp.read(chunk_size)
211 if not data:
212 break
213 yield data
214 fp.close()
215
216
217 def unique_id(KEY_SIZE=128):
218 """
219 Generates an unique id
220
221 >>> ids = [ unique_id() for i in range(1000) ]
222 >>> len(set(ids))
223 1000
224 """
225 random_bits = text_type(random.getrandbits(KEY_SIZE)).encode("UTF-8")
226 return md5(random_bits).hexdigest()
227
228
229 def parse_xml(fname, strip_whitespace=True, remove_comments=True):
230 """Returns a parsed xml tree"""
231 parser = None
232 if remove_comments and LXML_AVAILABLE:
233 # If using stdlib etree comments are always removed,
234 # but lxml doesn't do this by default
235 parser = etree.XMLParser(remove_comments=remove_comments)
236 try:
237 tree = etree.parse(fname, parser=parser)
238 root = tree.getroot()
239 if strip_whitespace:
240 for elem in root.iter('*'):
241 if elem.text is not None:
242 elem.text = elem.text.strip()
243 if elem.tail is not None:
244 elem.tail = elem.tail.strip()
245 except IOError as e:
246 if e.errno is None and not os.path.exists(fname):
247 # lxml doesn't set errno
248 e.errno = errno.ENOENT
249 raise
250 except etree.ParseError:
251 log.exception("Error parsing file %s", fname)
252 raise
253 return tree
254
255
256 def parse_xml_string(xml_string, strip_whitespace=True):
257 try:
258 tree = etree.fromstring(xml_string)
259 except ValueError as e:
260 if 'strings with encoding declaration are not supported' in unicodify(e):
261 tree = etree.fromstring(xml_string.encode('utf-8'))
262 else:
263 raise e
264 if strip_whitespace:
265 for elem in tree.iter('*'):
266 if elem.text is not None:
267 elem.text = elem.text.strip()
268 if elem.tail is not None:
269 elem.tail = elem.tail.strip()
270 return tree
271
272
273 def xml_to_string(elem, pretty=False):
274 """
275 Returns a string from an xml tree.
276 """
277 try:
278 if elem is not None:
279 if PY2:
280 xml_str = etree.tostring(elem, encoding='utf-8')
281 else:
282 xml_str = etree.tostring(elem, encoding='unicode')
283 else:
284 xml_str = ''
285 except TypeError as e:
286 # we assume this is a comment
287 if hasattr(elem, 'text'):
288 return u"<!-- %s -->\n" % elem.text
289 else:
290 raise e
291 if xml_str and pretty:
292 pretty_string = xml.dom.minidom.parseString(xml_str).toprettyxml(indent=' ')
293 return "\n".join(line for line in pretty_string.split('\n') if not re.match(r'^[\s\\nb\']*$', line))
294 return xml_str
295
296
297 def xml_element_compare(elem1, elem2):
298 if not isinstance(elem1, dict):
299 elem1 = xml_element_to_dict(elem1)
300 if not isinstance(elem2, dict):
301 elem2 = xml_element_to_dict(elem2)
302 return elem1 == elem2
303
304
305 def xml_element_list_compare(elem_list1, elem_list2):
306 return [xml_element_to_dict(elem) for elem in elem_list1] == [xml_element_to_dict(elem) for elem in elem_list2]
307
308
309 def xml_element_to_dict(elem):
310 rval = {}
311 if elem.attrib:
312 rval[elem.tag] = {}
313 else:
314 rval[elem.tag] = None
315
316 sub_elems = list(elem)
317 if sub_elems:
318 sub_elem_dict = dict()
319 for sub_sub_elem_dict in map(xml_element_to_dict, sub_elems):
320 for key, value in iteritems(sub_sub_elem_dict):
321 if key not in sub_elem_dict:
322 sub_elem_dict[key] = []
323 sub_elem_dict[key].append(value)
324 for key, value in iteritems(sub_elem_dict):
325 if len(value) == 1:
326 rval[elem.tag][key] = value[0]
327 else:
328 rval[elem.tag][key] = value
329 if elem.attrib:
330 for key, value in iteritems(elem.attrib):
331 rval[elem.tag]["@%s" % key] = value
332
333 if elem.text:
334 text = elem.text.strip()
335 if text and sub_elems or elem.attrib:
336 rval[elem.tag]['#text'] = text
337 else:
338 rval[elem.tag] = text
339
340 return rval
341
342
343 def pretty_print_xml(elem, level=0):
344 pad = ' '
345 i = "\n" + level * pad
346 if len(elem):
347 if not elem.text or not elem.text.strip():
348 elem.text = i + pad + pad
349 if not elem.tail or not elem.tail.strip():
350 elem.tail = i
351 for e in elem:
352 pretty_print_xml(e, level + 1)
353 if not elem.tail or not elem.tail.strip():
354 elem.tail = i
355 else:
356 if level and (not elem.tail or not elem.tail.strip()):
357 elem.tail = i + pad
358 return elem
359
360
361 def get_file_size(value, default=None):
362 try:
363 # try built-in
364 return os.path.getsize(value)
365 except Exception:
366 try:
367 # try built-in one name attribute
368 return os.path.getsize(value.name)
369 except Exception:
370 try:
371 # try tell() of end of object
372 offset = value.tell()
373 value.seek(0, 2)
374 rval = value.tell()
375 value.seek(offset)
376 return rval
377 except Exception:
378 # return default value
379 return default
380
381
382 def shrink_stream_by_size(value, size, join_by=b"..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False):
383 """
384 Shrinks bytes read from `value` to `size`.
385
386 `value` needs to implement tell/seek, so files need to be opened in binary mode.
387 Returns unicode text with invalid characters replaced.
388 """
389 rval = b''
390 join_by = smart_str(join_by)
391 if get_file_size(value) > size:
392 start = value.tell()
393 len_join_by = len(join_by)
394 min_size = len_join_by + 2
395 if size < min_size:
396 if beginning_on_size_error:
397 rval = value.read(size)
398 value.seek(start)
399 return rval
400 elif end_on_size_error:
401 value.seek(-size, 2)
402 rval = value.read(size)
403 value.seek(start)
404 return rval
405 raise ValueError('With the provided join_by value (%s), the minimum size value is %i.' % (join_by, min_size))
406 left_index = right_index = int((size - len_join_by) / 2)
407 if left_index + right_index + len_join_by < size:
408 if left_larger:
409 left_index += 1
410 else:
411 right_index += 1
412 rval = value.read(left_index) + join_by
413 value.seek(-right_index, 2)
414 rval += value.read(right_index)
415 else:
416 while True:
417 data = value.read(CHUNK_SIZE)
418 if not data:
419 break
420 rval += data
421 return unicodify(rval)
422
423
424 def shrink_and_unicodify(stream):
425 stream = unicodify(stream, strip_null=True) or u''
426 if (len(stream) > DATABASE_MAX_STRING_SIZE):
427 stream = shrink_string_by_size(stream,
428 DATABASE_MAX_STRING_SIZE,
429 join_by="\n..\n",
430 left_larger=True,
431 beginning_on_size_error=True)
432 return stream
433
434
435 def shrink_string_by_size(value, size, join_by="..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False):
436 if len(value) > size:
437 len_join_by = len(join_by)
438 min_size = len_join_by + 2
439 if size < min_size:
440 if beginning_on_size_error:
441 return value[:size]
442 elif end_on_size_error:
443 return value[-size:]
444 raise ValueError('With the provided join_by value (%s), the minimum size value is %i.' % (join_by, min_size))
445 left_index = right_index = int((size - len_join_by) / 2)
446 if left_index + right_index + len_join_by < size:
447 if left_larger:
448 left_index += 1
449 else:
450 right_index += 1
451 value = "%s%s%s" % (value[:left_index], join_by, value[-right_index:])
452 return value
453
454
455 def pretty_print_time_interval(time=False, precise=False, utc=False):
456 """
457 Get a datetime object or a int() Epoch timestamp and return a
458 pretty string like 'an hour ago', 'Yesterday', '3 months ago',
459 'just now', etc
460 credit: http://stackoverflow.com/questions/1551382/user-friendly-time-format-in-python
461 """
462 if utc:
463 now = datetime.utcnow()
464 else:
465 now = datetime.now()
466 if type(time) is int:
467 diff = now - datetime.fromtimestamp(time)
468 elif isinstance(time, datetime):
469 diff = now - time
470 elif isinstance(time, string_types):
471 try:
472 time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%f")
473 except ValueError:
474 # MySQL may not support microseconds precision
475 time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S")
476 diff = now - time
477 else:
478 diff = now - now
479 second_diff = diff.seconds
480 day_diff = diff.days
481
482 if day_diff < 0:
483 return ''
484
485 if precise:
486 if day_diff == 0:
487 if second_diff < 10:
488 return "just now"
489 if second_diff < 60:
490 return str(second_diff) + " seconds ago"
491 if second_diff < 120:
492 return "a minute ago"
493 if second_diff < 3600:
494 return str(second_diff / 60) + " minutes ago"
495 if second_diff < 7200:
496 return "an hour ago"
497 if second_diff < 86400:
498 return str(second_diff / 3600) + " hours ago"
499 if day_diff == 1:
500 return "yesterday"
501 if day_diff < 7:
502 return str(day_diff) + " days ago"
503 if day_diff < 31:
504 return str(day_diff / 7) + " weeks ago"
505 if day_diff < 365:
506 return str(day_diff / 30) + " months ago"
507 return str(day_diff / 365) + " years ago"
508 else:
509 if day_diff == 0:
510 return "today"
511 if day_diff == 1:
512 return "yesterday"
513 if day_diff < 7:
514 return "less than a week"
515 if day_diff < 31:
516 return "less than a month"
517 if day_diff < 365:
518 return "less than a year"
519 return "a few years ago"
520
521
522 def pretty_print_json(json_data, is_json_string=False):
523 if is_json_string:
524 json_data = json.loads(json_data)
525 return json.dumps(json_data, sort_keys=True, indent=4)
526
527
528 # characters that are valid
529 valid_chars = set(string.ascii_letters + string.digits + " -=_.()/+*^,:?!")
530
531 # characters that are allowed but need to be escaped
532 mapped_chars = {'>': '__gt__',
533 '<': '__lt__',
534 "'": '__sq__',
535 '"': '__dq__',
536 '[': '__ob__',
537 ']': '__cb__',
538 '{': '__oc__',
539 '}': '__cc__',
540 '@': '__at__',
541 '\n': '__cn__',
542 '\r': '__cr__',
543 '\t': '__tc__',
544 '#': '__pd__'}
545
546
547 def restore_text(text, character_map=mapped_chars):
548 """Restores sanitized text"""
549 if not text:
550 return text
551 for key, value in character_map.items():
552 text = text.replace(value, key)
553 return text
554
555
556 def sanitize_text(text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'):
557 """
558 Restricts the characters that are allowed in text; accepts both strings
559 and lists of strings; non-string entities will be cast to strings.
560 """
561 if isinstance(text, list):
562 return [sanitize_text(x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) for x in text]
563 if not isinstance(text, string_types):
564 text = smart_str(text)
565 return _sanitize_text_helper(text, valid_characters=valid_characters, character_map=character_map)
566
567
568 def _sanitize_text_helper(text, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'):
569 """Restricts the characters that are allowed in a string"""
570
571 out = []
572 for c in text:
573 if c in valid_characters:
574 out.append(c)
575 elif c in character_map:
576 out.append(character_map[c])
577 else:
578 out.append(invalid_character) # makes debugging easier
579 return ''.join(out)
580
581
582 def sanitize_lists_to_string(values, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'):
583 if isinstance(values, list):
584 rval = []
585 for value in values:
586 rval.append(sanitize_lists_to_string(value,
587 valid_characters=valid_characters,
588 character_map=character_map,
589 invalid_character=invalid_character))
590 values = ",".join(rval)
591 else:
592 values = sanitize_text(values, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character)
593 return values
594
595
596 def sanitize_param(value, valid_characters=valid_chars, character_map=mapped_chars, invalid_character='X'):
597 """Clean incoming parameters (strings or lists)"""
598 if isinstance(value, string_types):
599 return sanitize_text(value, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character)
600 elif isinstance(value, list):
601 return [sanitize_text(x, valid_characters=valid_characters, character_map=character_map, invalid_character=invalid_character) for x in value]
602 else:
603 raise Exception('Unknown parameter type (%s)' % (type(value)))
604
605
606 valid_filename_chars = set(string.ascii_letters + string.digits + '_.')
607 invalid_filenames = ['', '.', '..']
608
609
610 def sanitize_for_filename(text, default=None):
611 """
612 Restricts the characters that are allowed in a filename portion; Returns default value or a unique id string if result is not a valid name.
613 Method is overly aggressive to minimize possible complications, but a maximum length is not considered.
614 """
615 out = []
616 for c in text:
617 if c in valid_filename_chars:
618 out.append(c)
619 else:
620 out.append('_')
621 out = ''.join(out)
622 if out in invalid_filenames:
623 if default is None:
624 return sanitize_for_filename(str(unique_id()))
625 return default
626 return out
627
628
629 def find_instance_nested(item, instances, match_key=None):
630 """
631 Recursively find instances from lists, dicts, tuples.
632
633 `instances` should be a tuple of valid instances
634 If match_key is given the key must match for an instance to be added to the list of found instances.
635 """
636
637 matches = []
638
639 def visit(path, key, value):
640 if isinstance(value, instances):
641 if match_key is None or match_key == key:
642 matches.append(value)
643 return key, value
644
645 def enter(path, key, value):
646 if isinstance(value, instances):
647 return None, False
648 return default_enter(path, key, value)
649
650 remap(item, visit, reraise_visit=False, enter=enter)
651
652 return matches
653
654
655 def mask_password_from_url(url):
656 """
657 Masks out passwords from connection urls like the database connection in galaxy.ini
658
659 >>> mask_password_from_url( 'sqlite+postgresql://user:password@localhost/' )
660 'sqlite+postgresql://user:********@localhost/'
661 >>> mask_password_from_url( 'amqp://user:amqp@localhost' )
662 'amqp://user:********@localhost'
663 >>> mask_password_from_url( 'amqp://localhost')
664 'amqp://localhost'
665 """
666 split = urlparse.urlsplit(url)
667 if split.password:
668 if url.count(split.password) == 1:
669 url = url.replace(split.password, "********")
670 else:
671 # This can manipulate the input other than just masking password,
672 # so the previous string replace method is preferred when the
673 # password doesn't appear twice in the url
674 split = split._replace(netloc=split.netloc.replace("%s:%s" % (split.username, split.password), '%s:********' % split.username))
675 url = urlparse.urlunsplit(split)
676 return url
677
678
679 def ready_name_for_url(raw_name):
680 u""" General method to convert a string (i.e. object name) to a URL-ready
681 slug.
682
683 >>> ready_name_for_url( "My Cool Object" )
684 'My-Cool-Object'
685 >>> ready_name_for_url( "!My Cool Object!" )
686 'My-Cool-Object'
687 >>> ready_name_for_url( "Hello₩◎ґʟⅾ" )
688 'Hello'
689 """
690
691 # Replace whitespace with '-'
692 slug_base = re.sub(r"\s+", "-", raw_name)
693 # Remove all non-alphanumeric characters.
694 slug_base = re.sub(r"[^a-zA-Z0-9\-]", "", slug_base)
695 # Remove trailing '-'.
696 if slug_base.endswith('-'):
697 slug_base = slug_base[:-1]
698 return slug_base
699
700
701 def which(file):
702 # http://stackoverflow.com/questions/5226958/which-equivalent-function-in-python
703 for path in os.environ["PATH"].split(":"):
704 if os.path.exists(path + "/" + file):
705 return path + "/" + file
706
707 return None
708
709
710 def in_directory(file, directory, local_path_module=os.path):
711 """
712 Return true, if the common prefix of both is equal to directory
713 e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b.
714 This function isn't used exclusively for security checks, but if it is
715 used for such checks it is assumed that ``directory`` is a "trusted" path -
716 supplied by Galaxy or by the admin and ``file`` is something generated by
717 a tool, configuration, external web server, or user supplied input.
718
719 local_path_module is used by Pulsar to check Windows paths while running on
720 a POSIX-like system.
721
722 >>> base_dir = tempfile.mkdtemp()
723 >>> safe_dir = os.path.join(base_dir, "user")
724 >>> os.mkdir(safe_dir)
725 >>> good_file = os.path.join(safe_dir, "1")
726 >>> with open(good_file, "w") as f: _ = f.write("hello")
727 >>> in_directory(good_file, safe_dir)
728 True
729 >>> in_directory("/other/file/is/here.txt", safe_dir)
730 False
731 >>> unsafe_link = os.path.join(safe_dir, "2")
732 >>> os.symlink("/other/file/bad.fasta", unsafe_link)
733 >>> in_directory(unsafe_link, safe_dir)
734 False
735 """
736 if local_path_module != os.path:
737 _safe_contains = importlib.import_module('galaxy.util.path.%s' % local_path_module.__name__).safe_contains
738 else:
739 directory = os.path.realpath(directory)
740 _safe_contains = safe_contains
741 return _safe_contains(directory, file)
742
743
744 def merge_sorted_iterables(operator, *iterables):
745 """
746
747 >>> operator = lambda x: x
748 >>> list( merge_sorted_iterables( operator, [1,2,3], [4,5] ) )
749 [1, 2, 3, 4, 5]
750 >>> list( merge_sorted_iterables( operator, [4, 5], [1,2,3] ) )
751 [1, 2, 3, 4, 5]
752 >>> list( merge_sorted_iterables( operator, [1, 4, 5], [2], [3] ) )
753 [1, 2, 3, 4, 5]
754 """
755 first_iterable = iterables[0]
756 if len(iterables) == 1:
757 for el in first_iterable:
758 yield el
759 else:
760 for el in __merge_two_sorted_iterables(
761 operator,
762 iter(first_iterable),
763 merge_sorted_iterables(operator, *iterables[1:])
764 ):
765 yield el
766
767
768 def __merge_two_sorted_iterables(operator, iterable1, iterable2):
769 unset = object()
770 continue_merge = True
771 next_1 = unset
772 next_2 = unset
773 while continue_merge:
774 try:
775 if next_1 is unset:
776 next_1 = next(iterable1)
777 if next_2 is unset:
778 next_2 = next(iterable2)
779 if operator(next_2) < operator(next_1):
780 yield next_2
781 next_2 = unset
782 else:
783 yield next_1
784 next_1 = unset
785 except StopIteration:
786 continue_merge = False
787 if next_1 is not unset:
788 yield next_1
789 if next_2 is not unset:
790 yield next_2
791 for el in iterable1:
792 yield el
793 for el in iterable2:
794 yield el
795
796
797 class Params(object):
798 """
799 Stores and 'sanitizes' parameters. Alphanumeric characters and the
800 non-alphanumeric ones that are deemed safe are let to pass through (see L{valid_chars}).
801 Some non-safe characters are escaped to safe forms for example C{>} becomes C{__lt__}
802 (see L{mapped_chars}). All other characters are replaced with C{X}.
803
804 Operates on string or list values only (HTTP parameters).
805
806 >>> values = { 'status':'on', 'symbols':[ 'alpha', '<>', '$rm&#!' ] }
807 >>> par = Params(values)
808 >>> par.status
809 'on'
810 >>> par.value == None # missing attributes return None
811 True
812 >>> par.get('price', 0)
813 0
814 >>> par.symbols # replaces unknown symbols with X
815 ['alpha', '__lt____gt__', 'XrmX__pd__!']
816 >>> sorted(par.flatten()) # flattening to a list
817 [('status', 'on'), ('symbols', 'XrmX__pd__!'), ('symbols', '__lt____gt__'), ('symbols', 'alpha')]
818 """
819
820 # is NEVER_SANITIZE required now that sanitizing for tool parameters can be controlled on a per parameter basis and occurs via InputValueWrappers?
821 NEVER_SANITIZE = ['file_data', 'url_paste', 'URL', 'filesystem_paths']
822
823 def __init__(self, params, sanitize=True):
824 if sanitize:
825 for key, value in params.items():
826 # sanitize check both ungrouped and grouped parameters by
827 # name. Anything relying on NEVER_SANITIZE should be
828 # changed to not require this and NEVER_SANITIZE should be
829 # removed.
830 if (value is not None and
831 key not in self.NEVER_SANITIZE and
832 True not in [key.endswith("|%s" % nonsanitize_parameter) for
833 nonsanitize_parameter in self.NEVER_SANITIZE]):
834 self.__dict__[key] = sanitize_param(value)
835 else:
836 self.__dict__[key] = value
837 else:
838 self.__dict__.update(params)
839
840 def flatten(self):
841 """
842 Creates a tuple list from a dict with a tuple/value pair for every value that is a list
843 """
844 flat = []
845 for key, value in self.__dict__.items():
846 if isinstance(value, list):
847 for v in value:
848 flat.append((key, v))
849 else:
850 flat.append((key, value))
851 return flat
852
853 def __getattr__(self, name):
854 """This is here to ensure that we get None for non existing parameters"""
855 return None
856
857 def get(self, key, default):
858 return self.__dict__.get(key, default)
859
860 def __str__(self):
861 return '%s' % self.__dict__
862
863 def __len__(self):
864 return len(self.__dict__)
865
866 def __iter__(self):
867 return iter(self.__dict__)
868
869 def update(self, values):
870 self.__dict__.update(values)
871
872
873 def rst_to_html(s, error=False):
874 """Convert a blob of reStructuredText to HTML"""
875 log = get_logger("docutils")
876
877 if docutils_core is None:
878 raise Exception("Attempted to use rst_to_html but docutils unavailable.")
879
880 class FakeStream(object):
881 def write(self, str):
882 if len(str) > 0 and not str.isspace():
883 if error:
884 raise Exception(str)
885 log.warning(str)
886
887 settings_overrides = {
888 "embed_stylesheet": False,
889 "template": os.path.join(os.path.dirname(__file__), "docutils_template.txt"),
890 "warning_stream": FakeStream(),
891 "doctitle_xform": False, # without option, very different rendering depending on
892 # number of sections in help content.
893 }
894
895 return unicodify(docutils_core.publish_string(
896 s, writer=docutils_html4css1.Writer(),
897 settings_overrides=settings_overrides))
898
899
900 def xml_text(root, name=None):
901 """Returns the text inside an element"""
902 if name is not None:
903 # Try attribute first
904 val = root.get(name)
905 if val:
906 return val
907 # Then try as element
908 elem = root.find(name)
909 else:
910 elem = root
911 if elem is not None and elem.text:
912 text = ''.join(elem.text.splitlines())
913 return text.strip()
914 # No luck, return empty string
915 return ''
916
917
918 def parse_resource_parameters(resource_param_file):
919 """Code shared between jobs and workflows for reading resource parameter configuration files.
920
921 TODO: Allow YAML in addition to XML.
922 """
923 resource_parameters = {}
924 if os.path.exists(resource_param_file):
925 resource_definitions = parse_xml(resource_param_file)
926 resource_definitions_root = resource_definitions.getroot()
927 for parameter_elem in resource_definitions_root.findall("param"):
928 name = parameter_elem.get("name")
929 resource_parameters[name] = parameter_elem
930
931 return resource_parameters
932
933
934 # asbool implementation pulled from PasteDeploy
935 truthy = frozenset({'true', 'yes', 'on', 'y', 't', '1'})
936 falsy = frozenset({'false', 'no', 'off', 'n', 'f', '0'})
937
938
939 def asbool(obj):
940 if isinstance(obj, string_types):
941 obj = obj.strip().lower()
942 if obj in truthy:
943 return True
944 elif obj in falsy:
945 return False
946 else:
947 raise ValueError("String is not true/false: %r" % obj)
948 return bool(obj)
949
950
951 def string_as_bool(string):
952 if str(string).lower() in ('true', 'yes', 'on', '1'):
953 return True
954 else:
955 return False
956
957
958 def string_as_bool_or_none(string):
959 """
960 Returns True, None or False based on the argument:
961 True if passed True, 'True', 'Yes', or 'On'
962 None if passed None or 'None'
963 False otherwise
964
965 Note: string comparison is case-insensitive so lowecase versions of those
966 function equivalently.
967 """
968 string = str(string).lower()
969 if string in ('true', 'yes', 'on'):
970 return True
971 elif string == 'none':
972 return None
973 else:
974 return False
975
976
977 def listify(item, do_strip=False):
978 """
979 Make a single item a single item list.
980
981 If *item* is a string, it is split on comma (``,``) characters to produce the list. Optionally, if *do_strip* is
982 true, any extra whitespace around the split items is stripped.
983
984 If *item* is a list it is returned unchanged. If *item* is a tuple, it is converted to a list and returned. If
985 *item* evaluates to False, an empty list is returned.
986
987 :type item: object
988 :param item: object to make a list from
989 :type do_strip: bool
990 :param do_strip: strip whitespaces from around split items, if set to ``True``
991 :rtype: list
992 :returns: The input as a list
993 """
994 if not item:
995 return []
996 elif isinstance(item, list):
997 return item
998 elif isinstance(item, tuple):
999 return list(item)
1000 elif isinstance(item, string_types) and item.count(','):
1001 if do_strip:
1002 return [token.strip() for token in item.split(',')]
1003 else:
1004 return item.split(',')
1005 else:
1006 return [item]
1007
1008
1009 def commaify(amount):
1010 orig = amount
1011 new = re.sub(r"^(-?\d+)(\d{3})", r'\g<1>,\g<2>', amount)
1012 if orig == new:
1013 return new
1014 else:
1015 return commaify(new)
1016
1017
1018 def roundify(amount, sfs=2):
1019 """
1020 Take a number in string form and truncate to 'sfs' significant figures.
1021 """
1022 if len(amount) <= sfs:
1023 return amount
1024 else:
1025 return amount[0:sfs] + '0' * (len(amount) - sfs)
1026
1027
1028 def unicodify(value, encoding=DEFAULT_ENCODING, error='replace', strip_null=False):
1029 u"""
1030 Returns a Unicode string or None.
1031
1032 >>> assert unicodify(None) is None
1033 >>> assert unicodify('simple string') == u'simple string'
1034 >>> assert unicodify(3) == u'3'
1035 >>> assert unicodify(bytearray([115, 116, 114, 196, 169, 195, 177, 103])) == u'strĩñg'
1036 >>> assert unicodify(Exception(u'strĩñg')) == u'strĩñg'
1037 >>> assert unicodify('cómplǐcḁtëd strĩñg') == u'cómplǐcḁtëd strĩñg'
1038 >>> s = u'cómplǐcḁtëd strĩñg'; assert unicodify(s) == s
1039 >>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1'), 'latin-1') == s
1040 >>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1')) == u'l\ufffdt\ufffdn str\ufffd\ufffdg'
1041 >>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1'), error='ignore') == u'ltn strg'
1042 >>> if PY2: assert unicodify(Exception(u'¼ cup of flour'.encode('latin-1')), error='ignore') == ' cup of flour'
1043 """
1044 if value is None:
1045 return value
1046 try:
1047 if isinstance(value, bytearray):
1048 value = bytes(value)
1049 elif not isinstance(value, string_types) and not isinstance(value, binary_type):
1050 # In Python 2, value is not an instance of basestring (i.e. str or unicode)
1051 # In Python 3, value is not an instance of bytes or str
1052 try:
1053 value = text_type(value)
1054 except Exception:
1055 value = str(value)
1056 # Now in Python 2, value is an instance of basestring, but may be not unicode
1057 # Now in Python 3, value is an instance of bytes or str
1058 if not isinstance(value, text_type):
1059 value = text_type(value, encoding, error)
1060 except Exception as e:
1061 msg = "Value '%s' could not be coerced to Unicode: %s('%s')" % (value, type(e).__name__, e)
1062 raise Exception(msg)
1063 if strip_null:
1064 return value.replace('\0', '')
1065 return value
1066
1067
1068 def smart_str(s, encoding=DEFAULT_ENCODING, strings_only=False, errors='strict'):
1069 u"""
1070 Returns a bytestring version of 's', encoded as specified in 'encoding'.
1071
1072 If strings_only is True, don't convert (some) non-string-like objects.
1073
1074 Adapted from an older, simpler version of django.utils.encoding.smart_str.
1075
1076 >>> assert smart_str(None) == b'None'
1077 >>> assert smart_str(None, strings_only=True) is None
1078 >>> assert smart_str(3) == b'3'
1079 >>> assert smart_str(3, strings_only=True) == 3
1080 >>> s = b'a bytes string'; assert smart_str(s) == s
1081 >>> s = bytearray(b'a bytes string'); assert smart_str(s) == s
1082 >>> assert smart_str(u'a simple unicode string') == b'a simple unicode string'
1083 >>> assert smart_str(u'à strange ünicode ڃtring') == b'\\xc3\\xa0 strange \\xc3\\xbcnicode \\xda\\x83tring'
1084 >>> assert smart_str(b'\\xc3\\xa0n \\xc3\\xabncoded utf-8 string', encoding='latin-1') == b'\\xe0n \\xebncoded utf-8 string'
1085 >>> assert smart_str(bytearray(b'\\xc3\\xa0n \\xc3\\xabncoded utf-8 string'), encoding='latin-1') == b'\\xe0n \\xebncoded utf-8 string'
1086 """
1087 if strings_only and isinstance(s, (type(None), int)):
1088 return s
1089 if not isinstance(s, string_types) and not isinstance(s, (binary_type, bytearray)):
1090 # In Python 2, s is not an instance of basestring or bytearray
1091 # In Python 3, s is not an instance of str, bytes or bytearray
1092 s = str(s)
1093 # Now in Python 2, value is an instance of basestring or bytearray
1094 # Now in Python 3, value is an instance of str, bytes or bytearray
1095 if not isinstance(s, (binary_type, bytearray)):
1096 return s.encode(encoding, errors)
1097 elif s and encoding != DEFAULT_ENCODING:
1098 return s.decode(DEFAULT_ENCODING, errors).encode(encoding, errors)
1099 else:
1100 return s
1101
1102
1103 def strip_control_characters(s):
1104 """Strip unicode control characters from a string."""
1105 return "".join(c for c in unicodify(s) if unicodedata.category(c) != "Cc")
1106
1107
1108 def strip_control_characters_nested(item):
1109 """Recursively strips control characters from lists, dicts, tuples."""
1110
1111 def visit(path, key, value):
1112 if isinstance(key, string_types):
1113 key = strip_control_characters(key)
1114 if isinstance(value, string_types):
1115 value = strip_control_characters(value)
1116 return key, value
1117
1118 return remap(item, visit)
1119
1120
1121 def object_to_string(obj):
1122 return binascii.hexlify(obj)
1123
1124
1125 def string_to_object(s):
1126 return binascii.unhexlify(s)
1127
1128
1129 class ParamsWithSpecs(collections.defaultdict):
1130 """
1131 """
1132
1133 def __init__(self, specs=None, params=None):
1134 self.specs = specs or dict()
1135 self.params = params or dict()
1136 for name, value in self.params.items():
1137 if name not in self.specs:
1138 self._param_unknown_error(name)
1139 if 'map' in self.specs[name]:
1140 try:
1141 self.params[name] = self.specs[name]['map'](value)
1142 except Exception:
1143 self._param_map_error(name, value)
1144 if 'valid' in self.specs[name]:
1145 if not self.specs[name]['valid'](value):
1146 self._param_vaildation_error(name, value)
1147
1148 self.update(self.params)
1149
1150 def __missing__(self, name):
1151 return self.specs[name]['default']
1152
1153 def __getattr__(self, name):
1154 return self[name]
1155
1156 def _param_unknown_error(self, name):
1157 raise NotImplementedError()
1158
1159 def _param_map_error(self, name, value):
1160 raise NotImplementedError()
1161
1162 def _param_vaildation_error(self, name, value):
1163 raise NotImplementedError()
1164
1165
1166 def compare_urls(url1, url2, compare_scheme=True, compare_hostname=True, compare_path=True):
1167 url1 = urlparse.urlparse(url1)
1168 url2 = urlparse.urlparse(url2)
1169 if compare_scheme and url1.scheme and url2.scheme and url1.scheme != url2.scheme:
1170 return False
1171 if compare_hostname and url1.hostname and url2.hostname and url1.hostname != url2.hostname:
1172 return False
1173 if compare_path and url1.path and url2.path and url1.path != url2.path:
1174 return False
1175 return True
1176
1177
1178 def read_dbnames(filename):
1179 """ Read build names from file """
1180 class DBNames(list):
1181 default_value = "?"
1182 default_name = "unspecified (?)"
1183 db_names = DBNames()
1184 try:
1185 ucsc_builds = {}
1186 man_builds = [] # assume these are integers
1187 name_to_db_base = {}
1188 if filename is None:
1189 # Should only be happening with the galaxy.tools.parameters.basic:GenomeBuildParameter docstring unit test
1190 filename = os.path.join('tool-data', 'shared', 'ucsc', 'builds.txt.sample')
1191 for line in open(filename):
1192 try:
1193 if line[0:1] == "#":
1194 continue
1195 fields = line.replace("\r", "").replace("\n", "").split("\t")
1196 # Special case of unspecified build is at top of list
1197 if fields[0] == "?":
1198 db_names.insert(0, (fields[0], fields[1]))
1199 continue
1200 try: # manual build (i.e. microbes)
1201 int(fields[0])
1202 man_builds.append((fields[1], fields[0]))
1203 except Exception: # UCSC build
1204 db_base = fields[0].rstrip('0123456789')
1205 if db_base not in ucsc_builds:
1206 ucsc_builds[db_base] = []
1207 name_to_db_base[fields[1]] = db_base
1208 # we want to sort within a species numerically by revision number
1209 build_rev = re.compile(r'\d+$')
1210 try:
1211 build_rev = int(build_rev.findall(fields[0])[0])
1212 except Exception:
1213 build_rev = 0
1214 ucsc_builds[db_base].append((build_rev, fields[0], fields[1]))
1215 except Exception:
1216 continue
1217 sort_names = sorted(name_to_db_base.keys())
1218 for name in sort_names:
1219 db_base = name_to_db_base[name]
1220 ucsc_builds[db_base].sort()
1221 ucsc_builds[db_base].reverse()
1222 ucsc_builds[db_base] = [(build, name) for _, build, name in ucsc_builds[db_base]]
1223 db_names = DBNames(db_names + ucsc_builds[db_base])
1224 if len(db_names) > 1 and len(man_builds) > 0:
1225 db_names.append((db_names.default_value, '----- Additional Species Are Below -----'))
1226 man_builds.sort()
1227 man_builds = [(build, name) for name, build in man_builds]
1228 db_names = DBNames(db_names + man_builds)
1229 except Exception as e:
1230 log.error("ERROR: Unable to read builds file: %s", unicodify(e))
1231 if len(db_names) < 1:
1232 db_names = DBNames([(db_names.default_value, db_names.default_name)])
1233 return db_names
1234
1235
1236 def read_build_sites(filename, check_builds=True):
1237 """ read db names to ucsc mappings from file, this file should probably be merged with the one above """
1238 build_sites = []
1239 try:
1240 for line in open(filename):
1241 try:
1242 if line[0:1] == "#":
1243 continue
1244 fields = line.replace("\r", "").replace("\n", "").split("\t")
1245 site_name = fields[0]
1246 site = fields[1]
1247 if check_builds:
1248 site_builds = fields[2].split(",")
1249 site_dict = {'name': site_name, 'url': site, 'builds': site_builds}
1250 else:
1251 site_dict = {'name': site_name, 'url': site}
1252 build_sites.append(site_dict)
1253 except Exception:
1254 continue
1255 except Exception:
1256 log.error("ERROR: Unable to read builds for site file %s", filename)
1257 return build_sites
1258
1259
1260 def relativize_symlinks(path, start=None, followlinks=False):
1261 for root, dirs, files in os.walk(path, followlinks=followlinks):
1262 rel_start = None
1263 for file_name in files:
1264 symlink_file_name = os.path.join(root, file_name)
1265 if os.path.islink(symlink_file_name):
1266 symlink_target = os.readlink(symlink_file_name)
1267 if rel_start is None:
1268 if start is None:
1269 rel_start = root
1270 else:
1271 rel_start = start
1272 rel_path = relpath(symlink_target, rel_start)
1273 os.remove(symlink_file_name)
1274 os.symlink(rel_path, symlink_file_name)
1275
1276
1277 def stringify_dictionary_keys(in_dict):
1278 # returns a new dictionary
1279 # changes unicode keys into strings, only works on top level (does not recurse)
1280 # unicode keys are not valid for expansion into keyword arguments on method calls
1281 out_dict = {}
1282 for key, value in iteritems(in_dict):
1283 out_dict[str(key)] = value
1284 return out_dict
1285
1286
1287 def mkstemp_ln(src, prefix='mkstemp_ln_'):
1288 """
1289 From tempfile._mkstemp_inner, generate a hard link in the same dir with a
1290 random name. Created so we can persist the underlying file of a
1291 NamedTemporaryFile upon its closure.
1292 """
1293 dir = os.path.dirname(src)
1294 names = tempfile._get_candidate_names()
1295 for seq in xrange(tempfile.TMP_MAX):
1296 name = next(names)
1297 file = os.path.join(dir, prefix + name)
1298 try:
1299 os.link(src, file)
1300 return (os.path.abspath(file))
1301 except OSError as e:
1302 if e.errno == errno.EEXIST:
1303 continue # try again
1304 raise
1305 raise IOError(errno.EEXIST, "No usable temporary file name found")
1306
1307
1308 def umask_fix_perms(path, umask, unmasked_perms, gid=None):
1309 """
1310 umask-friendly permissions fixing
1311 """
1312 perms = unmasked_perms & ~umask
1313 try:
1314 st = os.stat(path)
1315 except OSError:
1316 log.exception('Unable to set permissions or group on %s', path)
1317 return
1318 # fix modes
1319 if stat.S_IMODE(st.st_mode) != perms:
1320 try:
1321 os.chmod(path, perms)
1322 except Exception as e:
1323 log.warning('Unable to honor umask (%s) for %s, tried to set: %s but mode remains %s, error was: %s' % (oct(umask),
1324 path,
1325 oct(perms),
1326 oct(stat.S_IMODE(st.st_mode)),
1327 unicodify(e)))
1328 # fix group
1329 if gid is not None and st.st_gid != gid:
1330 try:
1331 os.chown(path, -1, gid)
1332 except Exception as e:
1333 try:
1334 desired_group = grp.getgrgid(gid)
1335 current_group = grp.getgrgid(st.st_gid)
1336 except Exception:
1337 desired_group = gid
1338 current_group = st.st_gid
1339 log.warning('Unable to honor primary group (%s) for %s, group remains %s, error was: %s' % (desired_group,
1340 path,
1341 current_group,
1342 unicodify(e)))
1343
1344
1345 def docstring_trim(docstring):
1346 """Trimming python doc strings. Taken from: http://www.python.org/dev/peps/pep-0257/"""
1347 if not docstring:
1348 return ''
1349 # Convert tabs to spaces (following the normal Python rules)
1350 # and split into a list of lines:
1351 lines = docstring.expandtabs().splitlines()
1352 # Determine minimum indentation (first line doesn't count):
1353 indent = sys.maxsize
1354 for line in lines[1:]:
1355 stripped = line.lstrip()
1356 if stripped:
1357 indent = min(indent, len(line) - len(stripped))
1358 # Remove indentation (first line is special):
1359 trimmed = [lines[0].strip()]
1360 if indent < sys.maxsize:
1361 for line in lines[1:]:
1362 trimmed.append(line[indent:].rstrip())
1363 # Strip off trailing and leading blank lines:
1364 while trimmed and not trimmed[-1]:
1365 trimmed.pop()
1366 while trimmed and not trimmed[0]:
1367 trimmed.pop(0)
1368 # Return a single string:
1369 return '\n'.join(trimmed)
1370
1371
1372 def nice_size(size):
1373 """
1374 Returns a readably formatted string with the size
1375
1376 >>> nice_size(100)
1377 '100 bytes'
1378 >>> nice_size(10000)
1379 '9.8 KB'
1380 >>> nice_size(1000000)
1381 '976.6 KB'
1382 >>> nice_size(100000000)
1383 '95.4 MB'
1384 """
1385 words = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB']
1386 prefix = ''
1387 try:
1388 size = float(size)
1389 if size < 0:
1390 size = abs(size)
1391 prefix = '-'
1392 except Exception:
1393 return '??? bytes'
1394 for ind, word in enumerate(words):
1395 step = 1024 ** (ind + 1)
1396 if step > size:
1397 size = size / float(1024 ** ind)
1398 if word == 'bytes': # No decimals for bytes
1399 return "%s%d bytes" % (prefix, size)
1400 return "%s%.1f %s" % (prefix, size, word)
1401 return '??? bytes'
1402
1403
1404 def size_to_bytes(size):
1405 """
1406 Returns a number of bytes (as integer) if given a reasonably formatted string with the size
1407
1408 >>> size_to_bytes('1024')
1409 1024
1410 >>> size_to_bytes('1.0')
1411 1
1412 >>> size_to_bytes('10 bytes')
1413 10
1414 >>> size_to_bytes('4k')
1415 4096
1416 >>> size_to_bytes('2.2 TB')
1417 2418925581107
1418 >>> size_to_bytes('.01 TB')
1419 10995116277
1420 >>> size_to_bytes('1.b')
1421 1
1422 >>> size_to_bytes('1.2E2k')
1423 122880
1424 """
1425 # The following number regexp is based on https://stackoverflow.com/questions/385558/extract-float-double-value/385597#385597
1426 size_re = re.compile(r'(?P<number>(\d+(\.\d*)?|\.\d+)(e[+-]?\d+)?)\s*(?P<multiple>[eptgmk]?(b|bytes?)?)?$')
1427 size_match = size_re.match(size.lower())
1428 if size_match is None:
1429 raise ValueError("Could not parse string '%s'" % size)
1430 number = float(size_match.group("number"))
1431 multiple = size_match.group("multiple")
1432 if multiple == "" or multiple.startswith('b'):
1433 return int(number)
1434 elif multiple.startswith('k'):
1435 return int(number * 1024)
1436 elif multiple.startswith('m'):
1437 return int(number * 1024 ** 2)
1438 elif multiple.startswith('g'):
1439 return int(number * 1024 ** 3)
1440 elif multiple.startswith('t'):
1441 return int(number * 1024 ** 4)
1442 elif multiple.startswith('p'):
1443 return int(number * 1024 ** 5)
1444 elif multiple.startswith('e'):
1445 return int(number * 1024 ** 6)
1446 else:
1447 raise ValueError("Unknown multiplier '%s' in '%s'" % (multiple, size))
1448
1449
1450 def send_mail(frm, to, subject, body, config, html=None):
1451 """
1452 Sends an email.
1453
1454 :type frm: str
1455 :param frm: from address
1456
1457 :type to: str
1458 :param to: to address
1459
1460 :type subject: str
1461 :param subject: Subject line
1462
1463 :type body: str
1464 :param body: Body text (should be plain text)
1465
1466 :type config: object
1467 :param config: Galaxy configuration object
1468
1469 :type html: str
1470 :param html: Alternative HTML representation of the body content. If
1471 provided will convert the message to a MIMEMultipart. (Default 'None')
1472 """
1473
1474 to = listify(to)
1475 if html:
1476 msg = MIMEMultipart('alternative')
1477 else:
1478 msg = MIMEText(body, 'plain', 'utf-8')
1479
1480 msg['To'] = ', '.join(to)
1481 msg['From'] = frm
1482 msg['Subject'] = subject
1483
1484 if config.smtp_server is None:
1485 log.error("Mail is not configured for this Galaxy instance.")
1486 log.info(msg)
1487 return
1488
1489 if html:
1490 mp_text = MIMEText(body, 'plain', 'utf-8')
1491 mp_html = MIMEText(html, 'html', 'utf-8')
1492 msg.attach(mp_text)
1493 msg.attach(mp_html)
1494
1495 smtp_ssl = asbool(getattr(config, 'smtp_ssl', False))
1496 if smtp_ssl:
1497 s = smtplib.SMTP_SSL(config.smtp_server)
1498 else:
1499 s = smtplib.SMTP(config.smtp_server)
1500 if not smtp_ssl:
1501 try:
1502 s.starttls()
1503 log.debug('Initiated SSL/TLS connection to SMTP server: %s', config.smtp_server)
1504 except RuntimeError as e:
1505 log.warning('SSL/TLS support is not available to your Python interpreter: %s', unicodify(e))
1506 except smtplib.SMTPHeloError as e:
1507 log.error("The server didn't reply properly to the HELO greeting: %s", unicodify(e))
1508 s.close()
1509 raise
1510 except smtplib.SMTPException as e:
1511 log.warning('The server does not support the STARTTLS extension: %s', unicodify(e))
1512 if config.smtp_username and config.smtp_password:
1513 try:
1514 s.login(config.smtp_username, config.smtp_password)
1515 except smtplib.SMTPHeloError as e:
1516 log.error("The server didn't reply properly to the HELO greeting: %s", unicodify(e))
1517 s.close()
1518 raise
1519 except smtplib.SMTPAuthenticationError as e:
1520 log.error("The server didn't accept the username/password combination: %s", unicodify(e))
1521 s.close()
1522 raise
1523 except smtplib.SMTPException as e:
1524 log.error("No suitable authentication method was found: %s", unicodify(e))
1525 s.close()
1526 raise
1527 s.sendmail(frm, to, msg.as_string())
1528 s.quit()
1529
1530
1531 def force_symlink(source, link_name):
1532 try:
1533 os.symlink(source, link_name)
1534 except OSError as e:
1535 if e.errno == errno.EEXIST:
1536 os.remove(link_name)
1537 os.symlink(source, link_name)
1538 else:
1539 raise e
1540
1541
1542 def move_merge(source, target):
1543 # when using shutil and moving a directory, if the target exists,
1544 # then the directory is placed inside of it
1545 # if the target doesn't exist, then the target is made into the directory
1546 # this makes it so that the target is always the target, and if it exists,
1547 # the source contents are moved into the target
1548 if os.path.isdir(source) and os.path.exists(target) and os.path.isdir(target):
1549 for name in os.listdir(source):
1550 move_merge(os.path.join(source, name), os.path.join(target, name))
1551 else:
1552 return shutil.move(source, target)
1553
1554
1555 def safe_str_cmp(a, b):
1556 """safely compare two strings in a timing-attack-resistant manner
1557 """
1558 if len(a) != len(b):
1559 return False
1560 rv = 0
1561 for x, y in zip(a, b):
1562 rv |= ord(x) ^ ord(y)
1563 return rv == 0
1564
1565
1566 galaxy_root_path = os.path.join(__path__[0], os.pardir, os.pardir, os.pardir)
1567 galaxy_samples_path = os.path.join(__path__[0], os.pardir, 'config', 'sample')
1568
1569
1570 def galaxy_directory():
1571 root_path = os.path.abspath(galaxy_root_path)
1572 if os.path.basename(root_path) == "packages":
1573 root_path = os.path.abspath(os.path.join(root_path, ".."))
1574 return root_path
1575
1576
1577 def galaxy_samples_directory():
1578 return os.path.abspath(galaxy_samples_path)
1579
1580
1581 def config_directories_from_setting(directories_setting, galaxy_root=galaxy_root_path):
1582 """
1583 Parse the ``directories_setting`` into a list of relative or absolute
1584 filesystem paths that will be searched to discover plugins.
1585
1586 :type galaxy_root: string
1587 :param galaxy_root: the root path of this galaxy installation
1588 :type directories_setting: string (default: None)
1589 :param directories_setting: the filesystem path (or paths)
1590 to search for plugins. Can be CSV string of paths. Will be treated as
1591 absolute if a path starts with '/', relative otherwise.
1592 :rtype: list of strings
1593 :returns: list of filesystem paths
1594 """
1595 directories = []
1596 if not directories_setting:
1597 return directories
1598
1599 for directory in listify(directories_setting):
1600 directory = directory.strip()
1601 if not directory.startswith('/'):
1602 directory = os.path.join(galaxy_root, directory)
1603 if not os.path.exists(directory):
1604 log.warning('directory not found: %s', directory)
1605 continue
1606 directories.append(directory)
1607 return directories
1608
1609
1610 def parse_int(value, min_val=None, max_val=None, default=None, allow_none=False):
1611 try:
1612 value = int(value)
1613 if min_val is not None and value < min_val:
1614 return min_val
1615 if max_val is not None and value > max_val:
1616 return max_val
1617 return value
1618 except ValueError:
1619 if allow_none:
1620 if default is None or value == "None":
1621 return None
1622 if default:
1623 return default
1624 else:
1625 raise
1626
1627
1628 def parse_non_hex_float(s):
1629 r"""
1630 Parse string `s` into a float but throw a `ValueError` if the string is in
1631 the otherwise acceptable format `\d+e\d+` (e.g. 40000000000000e5.)
1632
1633 This can be passed into `json.loads` to prevent a hex string in the above
1634 format from being incorrectly parsed as a float in scientific notation.
1635
1636 >>> parse_non_hex_float( '123.4' )
1637 123.4
1638 >>> parse_non_hex_float( '2.45e+3' )
1639 2450.0
1640 >>> parse_non_hex_float( '2.45e-3' )
1641 0.00245
1642 >>> parse_non_hex_float( '40000000000000e5' )
1643 Traceback (most recent call last):
1644 ...
1645 ValueError: could not convert string to float: 40000000000000e5
1646 """
1647 f = float(s)
1648 # successfully parsed as float if here - check for format in original string
1649 if 'e' in s and not ('+' in s or '-' in s):
1650 raise ValueError('could not convert string to float: ' + s)
1651 return f
1652
1653
1654 def build_url(base_url, port=80, scheme='http', pathspec=None, params=None, doseq=False):
1655 if params is None:
1656 params = dict()
1657 if pathspec is None:
1658 pathspec = []
1659 parsed_url = urlparse.urlparse(base_url)
1660 if scheme != 'http':
1661 parsed_url.scheme = scheme
1662 assert parsed_url.scheme in ('http', 'https', 'ftp'), 'Invalid URL scheme: %s' % scheme
1663 if port != 80:
1664 url = '%s://%s:%d/%s' % (parsed_url.scheme, parsed_url.netloc.rstrip('/'), int(port), parsed_url.path)
1665 else:
1666 url = '%s://%s/%s' % (parsed_url.scheme, parsed_url.netloc.rstrip('/'), parsed_url.path.lstrip('/'))
1667 if len(pathspec) > 0:
1668 url = '%s/%s' % (url.rstrip('/'), '/'.join(pathspec))
1669 if parsed_url.query:
1670 for query_parameter in parsed_url.query.split('&'):
1671 key, value = query_parameter.split('=')
1672 params[key] = value
1673 if params:
1674 url += '?%s' % urlparse.urlencode(params, doseq=doseq)
1675 return url
1676
1677
1678 def url_get(base_url, auth=None, pathspec=None, params=None, max_retries=5, backoff_factor=1):
1679 """Make contact with the uri provided and return any contents."""
1680 full_url = build_url(base_url, pathspec=pathspec, params=params)
1681 s = requests.Session()
1682 retries = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=[429])
1683 s.mount(base_url, HTTPAdapter(max_retries=retries))
1684 response = s.get(full_url, auth=auth)
1685 response.raise_for_status()
1686 return response.text
1687
1688
1689 def download_to_file(url, dest_file_path, timeout=30, chunk_size=2 ** 20):
1690 """Download a URL to a file in chunks."""
1691 with requests.get(url, timeout=timeout, stream=True) as r, open(dest_file_path, 'wb') as f:
1692 for chunk in r.iter_content(chunk_size):
1693 if chunk:
1694 f.write(chunk)
1695
1696
1697 def get_executable():
1698 exe = sys.executable
1699 if exe.endswith('uwsgi'):
1700 virtualenv = None
1701 if uwsgi is not None:
1702 for name in ('home', 'virtualenv', 'venv', 'pyhome'):
1703 if name in uwsgi.opt:
1704 virtualenv = unicodify(uwsgi.opt[name])
1705 break
1706 if virtualenv is None and 'VIRTUAL_ENV' in os.environ:
1707 virtualenv = os.environ['VIRTUAL_ENV']
1708 if virtualenv is not None:
1709 exe = os.path.join(virtualenv, 'bin', 'python')
1710 else:
1711 exe = os.path.join(os.path.dirname(exe), 'python')
1712 if not os.path.exists(exe):
1713 exe = 'python'
1714 return exe
1715
1716
1717 class ExecutionTimer(object):
1718
1719 def __init__(self):
1720 self.begin = time.time()
1721
1722 def __str__(self):
1723 return "(%0.3f ms)" % (self.elapsed * 1000)
1724
1725 @property
1726 def elapsed(self):
1727 return (time.time() - self.begin)
1728
1729
1730 class StructuredExecutionTimer(object):
1731
1732 def __init__(self, timer_id, template, **tags):
1733 self.begin = time.time()
1734 self.timer_id = timer_id
1735 self.template = template
1736 self.tags = tags
1737
1738 def __str__(self):
1739 return self.to_str()
1740
1741 def to_str(self, **kwd):
1742 if kwd:
1743 message = string.Template(self.template).safe_substitute(kwd)
1744 else:
1745 message = self.template
1746 log_message = message + " (%0.3f ms)" % (self.elapsed * 1000)
1747 return log_message
1748
1749 @property
1750 def elapsed(self):
1751 return (time.time() - self.begin)
1752
1753
1754 if __name__ == '__main__':
1755 import doctest
1756 doctest.testmod(sys.modules[__name__], verbose=False)