comparison env/lib/python3.7/site-packages/boltons/fileutils.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 # -*- coding: utf-8 -*-
2 """Virtually every Python programmer has used Python for wrangling
3 disk contents, and ``fileutils`` collects solutions to some of the
4 most commonly-found gaps in the standard library.
5 """
6
7 from __future__ import print_function
8
9 import os
10 import re
11 import sys
12 import stat
13 import errno
14 import fnmatch
15 from shutil import copy2, copystat, Error
16
17
18 __all__ = ['mkdir_p', 'atomic_save', 'AtomicSaver', 'FilePerms',
19 'iter_find_files', 'copytree']
20
21
22 FULL_PERMS = 511 # 0777 that both Python 2 and 3 can digest
23 RW_PERMS = 438
24 _SINGLE_FULL_PERM = 7 # or 07 in Python 2
25 try:
26 basestring
27 except NameError:
28 unicode = str # Python 3 compat
29 basestring = (str, bytes)
30
31
32 def mkdir_p(path):
33 """Creates a directory and any parent directories that may need to
34 be created along the way, without raising errors for any existing
35 directories. This function mimics the behavior of the ``mkdir -p``
36 command available in Linux/BSD environments, but also works on
37 Windows.
38 """
39 try:
40 os.makedirs(path)
41 except OSError as exc:
42 if exc.errno == errno.EEXIST and os.path.isdir(path):
43 return
44 raise
45 return
46
47
48 class FilePerms(object):
49 """The :class:`FilePerms` type is used to represent standard POSIX
50 filesystem permissions:
51
52 * Read
53 * Write
54 * Execute
55
56 Across three classes of user:
57
58 * Owning (u)ser
59 * Owner's (g)roup
60 * Any (o)ther user
61
62 This class assists with computing new permissions, as well as
63 working with numeric octal ``777``-style and ``rwx``-style
64 permissions. Currently it only considers the bottom 9 permission
65 bits; it does not support sticky bits or more advanced permission
66 systems.
67
68 Args:
69 user (str): A string in the 'rwx' format, omitting characters
70 for which owning user's permissions are not provided.
71 group (str): A string in the 'rwx' format, omitting characters
72 for which owning group permissions are not provided.
73 other (str): A string in the 'rwx' format, omitting characters
74 for which owning other/world permissions are not provided.
75
76 There are many ways to use :class:`FilePerms`:
77
78 >>> FilePerms(user='rwx', group='xrw', other='wxr') # note character order
79 FilePerms(user='rwx', group='rwx', other='rwx')
80 >>> int(FilePerms('r', 'r', ''))
81 288
82 >>> oct(288)[-3:] # XXX Py3k
83 '440'
84
85 See also the :meth:`FilePerms.from_int` and
86 :meth:`FilePerms.from_path` classmethods for useful alternative
87 ways to construct :class:`FilePerms` objects.
88 """
89 # TODO: consider more than the lower 9 bits
90 class _FilePermProperty(object):
91 _perm_chars = 'rwx'
92 _perm_set = frozenset('rwx')
93 _perm_val = {'r': 4, 'w': 2, 'x': 1} # for sorting
94
95 def __init__(self, attribute, offset):
96 self.attribute = attribute
97 self.offset = offset
98
99 def __get__(self, fp_obj, type_=None):
100 if fp_obj is None:
101 return self
102 return getattr(fp_obj, self.attribute)
103
104 def __set__(self, fp_obj, value):
105 cur = getattr(fp_obj, self.attribute)
106 if cur == value:
107 return
108 try:
109 invalid_chars = set(str(value)) - self._perm_set
110 except TypeError:
111 raise TypeError('expected string, not %r' % value)
112 if invalid_chars:
113 raise ValueError('got invalid chars %r in permission'
114 ' specification %r, expected empty string'
115 ' or one or more of %r'
116 % (invalid_chars, value, self._perm_chars))
117
118 sort_key = lambda c: self._perm_val[c]
119 new_value = ''.join(sorted(set(value),
120 key=sort_key, reverse=True))
121 setattr(fp_obj, self.attribute, new_value)
122 self._update_integer(fp_obj, new_value)
123
124 def _update_integer(self, fp_obj, value):
125 mode = 0
126 key = 'xwr'
127 for symbol in value:
128 bit = 2 ** key.index(symbol)
129 mode |= (bit << (self.offset * 3))
130 fp_obj._integer |= mode
131
132 def __init__(self, user='', group='', other=''):
133 self._user, self._group, self._other = '', '', ''
134 self._integer = 0
135 self.user = user
136 self.group = group
137 self.other = other
138
139 @classmethod
140 def from_int(cls, i):
141 """Create a :class:`FilePerms` object from an integer.
142
143 >>> FilePerms.from_int(0o644) # note the leading zero-oh for octal
144 FilePerms(user='rw', group='r', other='r')
145 """
146 i &= FULL_PERMS
147 key = ('', 'x', 'w', 'xw', 'r', 'rx', 'rw', 'rwx')
148 parts = []
149 while i:
150 parts.append(key[i & _SINGLE_FULL_PERM])
151 i >>= 3
152 parts.reverse()
153 return cls(*parts)
154
155 @classmethod
156 def from_path(cls, path):
157 """Make a new :class:`FilePerms` object based on the permissions
158 assigned to the file or directory at *path*.
159
160 Args:
161 path (str): Filesystem path of the target file.
162
163 Here's an example that holds true on most systems:
164
165 >>> import tempfile
166 >>> 'r' in FilePerms.from_path(tempfile.gettempdir()).user
167 True
168 """
169 stat_res = os.stat(path)
170 return cls.from_int(stat.S_IMODE(stat_res.st_mode))
171
172 def __int__(self):
173 return self._integer
174
175 # Sphinx tip: attribute docstrings come after the attribute
176 user = _FilePermProperty('_user', 2)
177 "Stores the ``rwx``-formatted *user* permission."
178 group = _FilePermProperty('_group', 1)
179 "Stores the ``rwx``-formatted *group* permission."
180 other = _FilePermProperty('_other', 0)
181 "Stores the ``rwx``-formatted *other* permission."
182
183 def __repr__(self):
184 cn = self.__class__.__name__
185 return ('%s(user=%r, group=%r, other=%r)'
186 % (cn, self.user, self.group, self.other))
187
188 ####
189
190
191 _TEXT_OPENFLAGS = os.O_RDWR | os.O_CREAT | os.O_EXCL
192 if hasattr(os, 'O_NOINHERIT'):
193 _TEXT_OPENFLAGS |= os.O_NOINHERIT
194 if hasattr(os, 'O_NOFOLLOW'):
195 _TEXT_OPENFLAGS |= os.O_NOFOLLOW
196 _BIN_OPENFLAGS = _TEXT_OPENFLAGS
197 if hasattr(os, 'O_BINARY'):
198 _BIN_OPENFLAGS |= os.O_BINARY
199
200
201 try:
202 import fcntl as fcntl
203 except ImportError:
204 def set_cloexec(fd):
205 "Dummy set_cloexec for platforms without fcntl support"
206 pass
207 else:
208 def set_cloexec(fd):
209 """Does a best-effort :func:`fcntl.fcntl` call to set a fd to be
210 automatically closed by any future child processes.
211
212 Implementation from the :mod:`tempfile` module.
213 """
214 try:
215 flags = fcntl.fcntl(fd, fcntl.F_GETFD, 0)
216 except IOError:
217 pass
218 else:
219 # flags read successfully, modify
220 flags |= fcntl.FD_CLOEXEC
221 fcntl.fcntl(fd, fcntl.F_SETFD, flags)
222 return
223
224
225 def atomic_save(dest_path, **kwargs):
226 """A convenient interface to the :class:`AtomicSaver` type. See the
227 :class:`AtomicSaver` documentation for details.
228 """
229 return AtomicSaver(dest_path, **kwargs)
230
231
232 def path_to_unicode(path):
233 if isinstance(path, unicode):
234 return path
235 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
236 return path.decode(encoding)
237
238
239 if os.name == 'nt':
240 import ctypes
241 from ctypes import c_wchar_p
242 from ctypes.wintypes import DWORD, LPVOID
243
244 _ReplaceFile = ctypes.windll.kernel32.ReplaceFile
245 _ReplaceFile.argtypes = [c_wchar_p, c_wchar_p, c_wchar_p,
246 DWORD, LPVOID, LPVOID]
247
248 def replace(src, dst):
249 # argument names match stdlib docs, docstring below
250 try:
251 # ReplaceFile fails if the dest file does not exist, so
252 # first try to rename it into position
253 os.rename(src, dst)
254 return
255 except WindowsError as we:
256 if we.errno == errno.EEXIST:
257 pass # continue with the ReplaceFile logic below
258 else:
259 raise
260
261 src = path_to_unicode(src)
262 dst = path_to_unicode(dst)
263 res = _ReplaceFile(c_wchar_p(dst), c_wchar_p(src),
264 None, 0, None, None)
265 if not res:
266 raise OSError('failed to replace %r with %r' % (dst, src))
267 return
268
269 def atomic_rename(src, dst, overwrite=False):
270 "Rename *src* to *dst*, replacing *dst* if *overwrite is True"
271 if overwrite:
272 replace(src, dst)
273 else:
274 os.rename(src, dst)
275 return
276 else:
277 # wrapper func for cross compat + docs
278 def replace(src, dst):
279 # os.replace does the same thing on unix
280 return os.rename(src, dst)
281
282 def atomic_rename(src, dst, overwrite=False):
283 "Rename *src* to *dst*, replacing *dst* if *overwrite is True"
284 if overwrite:
285 os.rename(src, dst)
286 else:
287 os.link(src, dst)
288 os.unlink(src)
289 return
290
291
292 _atomic_rename = atomic_rename # backwards compat
293
294 replace.__doc__ = """Similar to :func:`os.replace` in Python 3.3+,
295 this function will atomically create or replace the file at path
296 *dst* with the file at path *src*.
297
298 On Windows, this function uses the ReplaceFile API for maximum
299 possible atomicity on a range of filesystems.
300 """
301
302
303 class AtomicSaver(object):
304 """``AtomicSaver`` is a configurable `context manager`_ that provides
305 a writable :class:`file` which will be moved into place as long as
306 no exceptions are raised within the context manager's block. These
307 "part files" are created in the same directory as the destination
308 path to ensure atomic move operations (i.e., no cross-filesystem
309 moves occur).
310
311 Args:
312 dest_path (str): The path where the completed file will be
313 written.
314 overwrite (bool): Whether to overwrite the destination file if
315 it exists at completion time. Defaults to ``True``.
316 file_perms (int): Integer representation of file permissions
317 for the newly-created file. Defaults are, when the
318 destination path already exists, to copy the permissions
319 from the previous file, or if the file did not exist, to
320 respect the user's configured `umask`_, usually resulting
321 in octal 0644 or 0664.
322 part_file (str): Name of the temporary *part_file*. Defaults
323 to *dest_path* + ``.part``. Note that this argument is
324 just the filename, and not the full path of the part
325 file. To guarantee atomic saves, part files are always
326 created in the same directory as the destination path.
327 overwrite_part (bool): Whether to overwrite the *part_file*,
328 should it exist at setup time. Defaults to ``False``,
329 which results in an :exc:`OSError` being raised on
330 pre-existing part files. Be careful of setting this to
331 ``True`` in situations when multiple threads or processes
332 could be writing to the same part file.
333 rm_part_on_exc (bool): Remove *part_file* on exception cases.
334 Defaults to ``True``, but ``False`` can be useful for
335 recovery in some cases. Note that resumption is not
336 automatic and by default an :exc:`OSError` is raised if
337 the *part_file* exists.
338
339 Practically, the AtomicSaver serves a few purposes:
340
341 * Avoiding overwriting an existing, valid file with a partially
342 written one.
343 * Providing a reasonable guarantee that a part file only has one
344 writer at a time.
345 * Optional recovery of partial data in failure cases.
346
347 .. _context manager: https://docs.python.org/2/reference/compound_stmts.html#with
348 .. _umask: https://en.wikipedia.org/wiki/Umask
349
350 """
351 _default_file_perms = RW_PERMS
352
353 # TODO: option to abort if target file modify date has changed since start?
354 def __init__(self, dest_path, **kwargs):
355 self.dest_path = dest_path
356 self.overwrite = kwargs.pop('overwrite', True)
357 self.file_perms = kwargs.pop('file_perms', None)
358 self.overwrite_part = kwargs.pop('overwrite_part', False)
359 self.part_filename = kwargs.pop('part_file', None)
360 self.rm_part_on_exc = kwargs.pop('rm_part_on_exc', True)
361 self.text_mode = kwargs.pop('text_mode', False) # for windows
362 self.buffering = kwargs.pop('buffering', -1)
363 if kwargs:
364 raise TypeError('unexpected kwargs: %r' % (kwargs.keys(),))
365
366 self.dest_path = os.path.abspath(self.dest_path)
367 self.dest_dir = os.path.dirname(self.dest_path)
368 if not self.part_filename:
369 self.part_path = dest_path + '.part'
370 else:
371 self.part_path = os.path.join(self.dest_dir, self.part_filename)
372 self.mode = 'w+' if self.text_mode else 'w+b'
373 self.open_flags = _TEXT_OPENFLAGS if self.text_mode else _BIN_OPENFLAGS
374
375 self.part_file = None
376
377 def _open_part_file(self):
378 do_chmod = True
379 file_perms = self.file_perms
380 if file_perms is None:
381 try:
382 # try to copy from file being replaced
383 stat_res = os.stat(self.dest_path)
384 file_perms = stat.S_IMODE(stat_res.st_mode)
385 except (OSError, IOError):
386 # default if no destination file exists
387 file_perms = self._default_file_perms
388 do_chmod = False # respect the umask
389
390 fd = os.open(self.part_path, self.open_flags, file_perms)
391 set_cloexec(fd)
392 self.part_file = os.fdopen(fd, self.mode, self.buffering)
393
394 # if default perms are overridden by the user or previous dest_path
395 # chmod away the effects of the umask
396 if do_chmod:
397 try:
398 os.chmod(self.part_path, file_perms)
399 except (OSError, IOError):
400 self.part_file.close()
401 raise
402 return
403
404 def setup(self):
405 """Called on context manager entry (the :keyword:`with` statement),
406 the ``setup()`` method creates the temporary file in the same
407 directory as the destination file.
408
409 ``setup()`` tests for a writable directory with rename permissions
410 early, as the part file may not be written to immediately (not
411 using :func:`os.access` because of the potential issues of
412 effective vs. real privileges).
413
414 If the caller is not using the :class:`AtomicSaver` as a
415 context manager, this method should be called explicitly
416 before writing.
417 """
418 if os.path.lexists(self.dest_path):
419 if not self.overwrite:
420 raise OSError(errno.EEXIST,
421 'Overwrite disabled and file already exists',
422 self.dest_path)
423 if self.overwrite_part and os.path.lexists(self.part_path):
424 os.unlink(self.part_path)
425 self._open_part_file()
426 return
427
428 def __enter__(self):
429 self.setup()
430 return self.part_file
431
432 def __exit__(self, exc_type, exc_val, exc_tb):
433 self.part_file.close()
434 if exc_type:
435 if self.rm_part_on_exc:
436 try:
437 os.unlink(self.part_path)
438 except Exception:
439 pass # avoid masking original error
440 return
441 try:
442 atomic_rename(self.part_path, self.dest_path,
443 overwrite=self.overwrite)
444 except OSError:
445 if self.rm_part_on_exc:
446 try:
447 os.unlink(self.part_path)
448 except Exception:
449 pass # avoid masking original error
450 raise # could not save destination file
451 return
452
453
454 def iter_find_files(directory, patterns, ignored=None, include_dirs=False):
455 """Returns a generator that yields file paths under a *directory*,
456 matching *patterns* using `glob`_ syntax (e.g., ``*.txt``). Also
457 supports *ignored* patterns.
458
459 Args:
460 directory (str): Path that serves as the root of the
461 search. Yielded paths will include this as a prefix.
462 patterns (str or list): A single pattern or list of
463 glob-formatted patterns to find under *directory*.
464 ignored (str or list): A single pattern or list of
465 glob-formatted patterns to ignore.
466 include_dirs (bool): Whether to include directories that match
467 patterns, as well. Defaults to ``False``.
468
469 For example, finding Python files in the current directory:
470
471 >>> _CUR_DIR = os.path.dirname(os.path.abspath(__file__))
472 >>> filenames = sorted(iter_find_files(_CUR_DIR, '*.py'))
473 >>> os.path.basename(filenames[-1])
474 'urlutils.py'
475
476 Or, Python files while ignoring emacs lockfiles:
477
478 >>> filenames = iter_find_files(_CUR_DIR, '*.py', ignored='.#*')
479
480 .. _glob: https://en.wikipedia.org/wiki/Glob_%28programming%29
481
482 """
483 if isinstance(patterns, basestring):
484 patterns = [patterns]
485 pats_re = re.compile('|'.join([fnmatch.translate(p) for p in patterns]))
486
487 if not ignored:
488 ignored = []
489 elif isinstance(ignored, basestring):
490 ignored = [ignored]
491 ign_re = re.compile('|'.join([fnmatch.translate(p) for p in ignored]))
492 for root, dirs, files in os.walk(directory):
493 if include_dirs:
494 for basename in dirs:
495 if pats_re.match(basename):
496 if ignored and ign_re.match(basename):
497 continue
498 filename = os.path.join(root, basename)
499 yield filename
500
501 for basename in files:
502 if pats_re.match(basename):
503 if ignored and ign_re.match(basename):
504 continue
505 filename = os.path.join(root, basename)
506 yield filename
507 return
508
509
510 def copy_tree(src, dst, symlinks=False, ignore=None):
511 """The ``copy_tree`` function is an exact copy of the built-in
512 :func:`shutil.copytree`, with one key difference: it will not
513 raise an exception if part of the tree already exists. It achieves
514 this by using :func:`mkdir_p`.
515
516 Args:
517 src (str): Path of the source directory to copy.
518 dst (str): Destination path. Existing directories accepted.
519 symlinks (bool): If ``True``, copy symlinks rather than their
520 contents.
521 ignore (callable): A callable that takes a path and directory
522 listing, returning the files within the listing to be ignored.
523
524 For more details, check out :func:`shutil.copytree` and
525 :func:`shutil.copy2`.
526
527 """
528 names = os.listdir(src)
529 if ignore is not None:
530 ignored_names = ignore(src, names)
531 else:
532 ignored_names = set()
533
534 mkdir_p(dst)
535 errors = []
536 for name in names:
537 if name in ignored_names:
538 continue
539 srcname = os.path.join(src, name)
540 dstname = os.path.join(dst, name)
541 try:
542 if symlinks and os.path.islink(srcname):
543 linkto = os.readlink(srcname)
544 os.symlink(linkto, dstname)
545 elif os.path.isdir(srcname):
546 copytree(srcname, dstname, symlinks, ignore)
547 else:
548 # Will raise a SpecialFileError for unsupported file types
549 copy2(srcname, dstname)
550 # catch the Error from the recursive copytree so that we can
551 # continue with other files
552 except Error as e:
553 errors.extend(e.args[0])
554 except EnvironmentError as why:
555 errors.append((srcname, dstname, str(why)))
556 try:
557 copystat(src, dst)
558 except OSError as why:
559 if WindowsError is not None and isinstance(why, WindowsError):
560 # Copying file access times may fail on Windows
561 pass
562 else:
563 errors.append((src, dst, str(why)))
564 if errors:
565 raise Error(errors)
566
567
568 copytree = copy_tree # alias for drop-in replacement of shutil
569
570
571 try:
572 file
573 except NameError:
574 file = object
575
576
577 # like open(os.devnull) but with even fewer side effects
578 class DummyFile(file):
579 # TODO: raise ValueErrors on closed for all methods?
580 # TODO: enforce read/write
581 def __init__(self, path, mode='r', buffering=None):
582 self.name = path
583 self.mode = mode
584 self.closed = False
585 self.errors = None
586 self.isatty = False
587 self.encoding = None
588 self.newlines = None
589 self.softspace = 0
590
591 def close(self):
592 self.closed = True
593
594 def fileno(self):
595 return -1
596
597 def flush(self):
598 if self.closed:
599 raise ValueError('I/O operation on a closed file')
600 return
601
602 def next(self):
603 raise StopIteration()
604
605 def read(self, size=0):
606 if self.closed:
607 raise ValueError('I/O operation on a closed file')
608 return ''
609
610 def readline(self, size=0):
611 if self.closed:
612 raise ValueError('I/O operation on a closed file')
613 return ''
614
615 def readlines(self, size=0):
616 if self.closed:
617 raise ValueError('I/O operation on a closed file')
618 return []
619
620 def seek(self):
621 if self.closed:
622 raise ValueError('I/O operation on a closed file')
623 return
624
625 def tell(self):
626 if self.closed:
627 raise ValueError('I/O operation on a closed file')
628 return 0
629
630 def truncate(self):
631 if self.closed:
632 raise ValueError('I/O operation on a closed file')
633 return
634
635 def write(self, string):
636 if self.closed:
637 raise ValueError('I/O operation on a closed file')
638 return
639
640 def writelines(self, list_of_strings):
641 if self.closed:
642 raise ValueError('I/O operation on a closed file')
643 return
644
645 def __next__(self):
646 raise StopIteration()
647
648 def __enter__(self):
649 if self.closed:
650 raise ValueError('I/O operation on a closed file')
651 return
652
653 def __exit__(self, exc_type, exc_val, exc_tb):
654 return
655
656
657 if __name__ == '__main__':
658 with atomic_save('/tmp/final.txt') as f:
659 f.write('rofl')
660 f.write('\n')