comparison env/lib/python3.7/site-packages/scandir.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 """scandir, a better directory iterator and faster os.walk(), now in the Python 3.5 stdlib
2
3 scandir() is a generator version of os.listdir() that returns an
4 iterator over files in a directory, and also exposes the extra
5 information most OSes provide while iterating files in a directory
6 (such as type and stat information).
7
8 This module also includes a version of os.walk() that uses scandir()
9 to speed it up significantly.
10
11 See README.md or https://github.com/benhoyt/scandir for rationale and
12 docs, or read PEP 471 (https://www.python.org/dev/peps/pep-0471/) for
13 more details on its inclusion into Python 3.5
14
15 scandir is released under the new BSD 3-clause license. See
16 LICENSE.txt for the full license text.
17 """
18
19 from __future__ import division
20
21 from errno import ENOENT
22 from os import listdir, lstat, stat, strerror
23 from os.path import join, islink
24 from stat import S_IFDIR, S_IFLNK, S_IFREG
25 import collections
26 import sys
27
28 try:
29 import _scandir
30 except ImportError:
31 _scandir = None
32
33 try:
34 import ctypes
35 except ImportError:
36 ctypes = None
37
38 if _scandir is None and ctypes is None:
39 import warnings
40 warnings.warn("scandir can't find the compiled _scandir C module "
41 "or ctypes, using slow generic fallback")
42
43 __version__ = '1.10.0'
44 __all__ = ['scandir', 'walk']
45
46 # Windows FILE_ATTRIBUTE constants for interpreting the
47 # FIND_DATA.dwFileAttributes member
48 FILE_ATTRIBUTE_ARCHIVE = 32
49 FILE_ATTRIBUTE_COMPRESSED = 2048
50 FILE_ATTRIBUTE_DEVICE = 64
51 FILE_ATTRIBUTE_DIRECTORY = 16
52 FILE_ATTRIBUTE_ENCRYPTED = 16384
53 FILE_ATTRIBUTE_HIDDEN = 2
54 FILE_ATTRIBUTE_INTEGRITY_STREAM = 32768
55 FILE_ATTRIBUTE_NORMAL = 128
56 FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 8192
57 FILE_ATTRIBUTE_NO_SCRUB_DATA = 131072
58 FILE_ATTRIBUTE_OFFLINE = 4096
59 FILE_ATTRIBUTE_READONLY = 1
60 FILE_ATTRIBUTE_REPARSE_POINT = 1024
61 FILE_ATTRIBUTE_SPARSE_FILE = 512
62 FILE_ATTRIBUTE_SYSTEM = 4
63 FILE_ATTRIBUTE_TEMPORARY = 256
64 FILE_ATTRIBUTE_VIRTUAL = 65536
65
66 IS_PY3 = sys.version_info >= (3, 0)
67
68 if IS_PY3:
69 unicode = str # Because Python <= 3.2 doesn't have u'unicode' syntax
70
71
72 class GenericDirEntry(object):
73 __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path')
74
75 def __init__(self, scandir_path, name):
76 self._scandir_path = scandir_path
77 self.name = name
78 self._stat = None
79 self._lstat = None
80 self._path = None
81
82 @property
83 def path(self):
84 if self._path is None:
85 self._path = join(self._scandir_path, self.name)
86 return self._path
87
88 def stat(self, follow_symlinks=True):
89 if follow_symlinks:
90 if self._stat is None:
91 self._stat = stat(self.path)
92 return self._stat
93 else:
94 if self._lstat is None:
95 self._lstat = lstat(self.path)
96 return self._lstat
97
98 # The code duplication below is intentional: this is for slightly
99 # better performance on systems that fall back to GenericDirEntry.
100 # It avoids an additional attribute lookup and method call, which
101 # are relatively slow on CPython.
102 def is_dir(self, follow_symlinks=True):
103 try:
104 st = self.stat(follow_symlinks=follow_symlinks)
105 except OSError as e:
106 if e.errno != ENOENT:
107 raise
108 return False # Path doesn't exist or is a broken symlink
109 return st.st_mode & 0o170000 == S_IFDIR
110
111 def is_file(self, follow_symlinks=True):
112 try:
113 st = self.stat(follow_symlinks=follow_symlinks)
114 except OSError as e:
115 if e.errno != ENOENT:
116 raise
117 return False # Path doesn't exist or is a broken symlink
118 return st.st_mode & 0o170000 == S_IFREG
119
120 def is_symlink(self):
121 try:
122 st = self.stat(follow_symlinks=False)
123 except OSError as e:
124 if e.errno != ENOENT:
125 raise
126 return False # Path doesn't exist or is a broken symlink
127 return st.st_mode & 0o170000 == S_IFLNK
128
129 def inode(self):
130 st = self.stat(follow_symlinks=False)
131 return st.st_ino
132
133 def __str__(self):
134 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
135
136 __repr__ = __str__
137
138
139 def _scandir_generic(path=unicode('.')):
140 """Like os.listdir(), but yield DirEntry objects instead of returning
141 a list of names.
142 """
143 for name in listdir(path):
144 yield GenericDirEntry(path, name)
145
146
147 if IS_PY3 and sys.platform == 'win32':
148 def scandir_generic(path=unicode('.')):
149 if isinstance(path, bytes):
150 raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead")
151 return _scandir_generic(path)
152 scandir_generic.__doc__ = _scandir_generic.__doc__
153 else:
154 scandir_generic = _scandir_generic
155
156
157 scandir_c = None
158 scandir_python = None
159
160
161 if sys.platform == 'win32':
162 if ctypes is not None:
163 from ctypes import wintypes
164
165 # Various constants from windows.h
166 INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value
167 ERROR_FILE_NOT_FOUND = 2
168 ERROR_NO_MORE_FILES = 18
169 IO_REPARSE_TAG_SYMLINK = 0xA000000C
170
171 # Numer of seconds between 1601-01-01 and 1970-01-01
172 SECONDS_BETWEEN_EPOCHS = 11644473600
173
174 kernel32 = ctypes.windll.kernel32
175
176 # ctypes wrappers for (wide string versions of) FindFirstFile,
177 # FindNextFile, and FindClose
178 FindFirstFile = kernel32.FindFirstFileW
179 FindFirstFile.argtypes = [
180 wintypes.LPCWSTR,
181 ctypes.POINTER(wintypes.WIN32_FIND_DATAW),
182 ]
183 FindFirstFile.restype = wintypes.HANDLE
184
185 FindNextFile = kernel32.FindNextFileW
186 FindNextFile.argtypes = [
187 wintypes.HANDLE,
188 ctypes.POINTER(wintypes.WIN32_FIND_DATAW),
189 ]
190 FindNextFile.restype = wintypes.BOOL
191
192 FindClose = kernel32.FindClose
193 FindClose.argtypes = [wintypes.HANDLE]
194 FindClose.restype = wintypes.BOOL
195
196 Win32StatResult = collections.namedtuple('Win32StatResult', [
197 'st_mode',
198 'st_ino',
199 'st_dev',
200 'st_nlink',
201 'st_uid',
202 'st_gid',
203 'st_size',
204 'st_atime',
205 'st_mtime',
206 'st_ctime',
207 'st_atime_ns',
208 'st_mtime_ns',
209 'st_ctime_ns',
210 'st_file_attributes',
211 ])
212
213 def filetime_to_time(filetime):
214 """Convert Win32 FILETIME to time since Unix epoch in seconds."""
215 total = filetime.dwHighDateTime << 32 | filetime.dwLowDateTime
216 return total / 10000000 - SECONDS_BETWEEN_EPOCHS
217
218 def find_data_to_stat(data):
219 """Convert Win32 FIND_DATA struct to stat_result."""
220 # First convert Win32 dwFileAttributes to st_mode
221 attributes = data.dwFileAttributes
222 st_mode = 0
223 if attributes & FILE_ATTRIBUTE_DIRECTORY:
224 st_mode |= S_IFDIR | 0o111
225 else:
226 st_mode |= S_IFREG
227 if attributes & FILE_ATTRIBUTE_READONLY:
228 st_mode |= 0o444
229 else:
230 st_mode |= 0o666
231 if (attributes & FILE_ATTRIBUTE_REPARSE_POINT and
232 data.dwReserved0 == IO_REPARSE_TAG_SYMLINK):
233 st_mode ^= st_mode & 0o170000
234 st_mode |= S_IFLNK
235
236 st_size = data.nFileSizeHigh << 32 | data.nFileSizeLow
237 st_atime = filetime_to_time(data.ftLastAccessTime)
238 st_mtime = filetime_to_time(data.ftLastWriteTime)
239 st_ctime = filetime_to_time(data.ftCreationTime)
240
241 # Some fields set to zero per CPython's posixmodule.c: st_ino, st_dev,
242 # st_nlink, st_uid, st_gid
243 return Win32StatResult(st_mode, 0, 0, 0, 0, 0, st_size,
244 st_atime, st_mtime, st_ctime,
245 int(st_atime * 1000000000),
246 int(st_mtime * 1000000000),
247 int(st_ctime * 1000000000),
248 attributes)
249
250 class Win32DirEntryPython(object):
251 __slots__ = ('name', '_stat', '_lstat', '_find_data', '_scandir_path', '_path', '_inode')
252
253 def __init__(self, scandir_path, name, find_data):
254 self._scandir_path = scandir_path
255 self.name = name
256 self._stat = None
257 self._lstat = None
258 self._find_data = find_data
259 self._path = None
260 self._inode = None
261
262 @property
263 def path(self):
264 if self._path is None:
265 self._path = join(self._scandir_path, self.name)
266 return self._path
267
268 def stat(self, follow_symlinks=True):
269 if follow_symlinks:
270 if self._stat is None:
271 if self.is_symlink():
272 # It's a symlink, call link-following stat()
273 self._stat = stat(self.path)
274 else:
275 # Not a symlink, stat is same as lstat value
276 if self._lstat is None:
277 self._lstat = find_data_to_stat(self._find_data)
278 self._stat = self._lstat
279 return self._stat
280 else:
281 if self._lstat is None:
282 # Lazily convert to stat object, because it's slow
283 # in Python, and often we only need is_dir() etc
284 self._lstat = find_data_to_stat(self._find_data)
285 return self._lstat
286
287 def is_dir(self, follow_symlinks=True):
288 is_symlink = self.is_symlink()
289 if follow_symlinks and is_symlink:
290 try:
291 return self.stat().st_mode & 0o170000 == S_IFDIR
292 except OSError as e:
293 if e.errno != ENOENT:
294 raise
295 return False
296 elif is_symlink:
297 return False
298 else:
299 return (self._find_data.dwFileAttributes &
300 FILE_ATTRIBUTE_DIRECTORY != 0)
301
302 def is_file(self, follow_symlinks=True):
303 is_symlink = self.is_symlink()
304 if follow_symlinks and is_symlink:
305 try:
306 return self.stat().st_mode & 0o170000 == S_IFREG
307 except OSError as e:
308 if e.errno != ENOENT:
309 raise
310 return False
311 elif is_symlink:
312 return False
313 else:
314 return (self._find_data.dwFileAttributes &
315 FILE_ATTRIBUTE_DIRECTORY == 0)
316
317 def is_symlink(self):
318 return (self._find_data.dwFileAttributes &
319 FILE_ATTRIBUTE_REPARSE_POINT != 0 and
320 self._find_data.dwReserved0 == IO_REPARSE_TAG_SYMLINK)
321
322 def inode(self):
323 if self._inode is None:
324 self._inode = lstat(self.path).st_ino
325 return self._inode
326
327 def __str__(self):
328 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
329
330 __repr__ = __str__
331
332 def win_error(error, filename):
333 exc = WindowsError(error, ctypes.FormatError(error))
334 exc.filename = filename
335 return exc
336
337 def _scandir_python(path=unicode('.')):
338 """Like os.listdir(), but yield DirEntry objects instead of returning
339 a list of names.
340 """
341 # Call FindFirstFile and handle errors
342 if isinstance(path, bytes):
343 is_bytes = True
344 filename = join(path.decode('mbcs', 'strict'), '*.*')
345 else:
346 is_bytes = False
347 filename = join(path, '*.*')
348 data = wintypes.WIN32_FIND_DATAW()
349 data_p = ctypes.byref(data)
350 handle = FindFirstFile(filename, data_p)
351 if handle == INVALID_HANDLE_VALUE:
352 error = ctypes.GetLastError()
353 if error == ERROR_FILE_NOT_FOUND:
354 # No files, don't yield anything
355 return
356 raise win_error(error, path)
357
358 # Call FindNextFile in a loop, stopping when no more files
359 try:
360 while True:
361 # Skip '.' and '..' (current and parent directory), but
362 # otherwise yield (filename, stat_result) tuple
363 name = data.cFileName
364 if name not in ('.', '..'):
365 if is_bytes:
366 name = name.encode('mbcs', 'replace')
367 yield Win32DirEntryPython(path, name, data)
368
369 data = wintypes.WIN32_FIND_DATAW()
370 data_p = ctypes.byref(data)
371 success = FindNextFile(handle, data_p)
372 if not success:
373 error = ctypes.GetLastError()
374 if error == ERROR_NO_MORE_FILES:
375 break
376 raise win_error(error, path)
377 finally:
378 if not FindClose(handle):
379 raise win_error(ctypes.GetLastError(), path)
380
381 if IS_PY3:
382 def scandir_python(path=unicode('.')):
383 if isinstance(path, bytes):
384 raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead")
385 return _scandir_python(path)
386 scandir_python.__doc__ = _scandir_python.__doc__
387 else:
388 scandir_python = _scandir_python
389
390 if _scandir is not None:
391 scandir_c = _scandir.scandir
392 DirEntry_c = _scandir.DirEntry
393
394 if _scandir is not None:
395 scandir = scandir_c
396 DirEntry = DirEntry_c
397 elif ctypes is not None:
398 scandir = scandir_python
399 DirEntry = Win32DirEntryPython
400 else:
401 scandir = scandir_generic
402 DirEntry = GenericDirEntry
403
404
405 # Linux, OS X, and BSD implementation
406 elif sys.platform.startswith(('linux', 'darwin', 'sunos5')) or 'bsd' in sys.platform:
407 have_dirent_d_type = (sys.platform != 'sunos5')
408
409 if ctypes is not None and have_dirent_d_type:
410 import ctypes.util
411
412 DIR_p = ctypes.c_void_p
413
414 # Rather annoying how the dirent struct is slightly different on each
415 # platform. The only fields we care about are d_name and d_type.
416 class Dirent(ctypes.Structure):
417 if sys.platform.startswith('linux'):
418 _fields_ = (
419 ('d_ino', ctypes.c_ulong),
420 ('d_off', ctypes.c_long),
421 ('d_reclen', ctypes.c_ushort),
422 ('d_type', ctypes.c_byte),
423 ('d_name', ctypes.c_char * 256),
424 )
425 elif 'openbsd' in sys.platform:
426 _fields_ = (
427 ('d_ino', ctypes.c_uint64),
428 ('d_off', ctypes.c_uint64),
429 ('d_reclen', ctypes.c_uint16),
430 ('d_type', ctypes.c_uint8),
431 ('d_namlen', ctypes.c_uint8),
432 ('__d_padding', ctypes.c_uint8 * 4),
433 ('d_name', ctypes.c_char * 256),
434 )
435 else:
436 _fields_ = (
437 ('d_ino', ctypes.c_uint32), # must be uint32, not ulong
438 ('d_reclen', ctypes.c_ushort),
439 ('d_type', ctypes.c_byte),
440 ('d_namlen', ctypes.c_byte),
441 ('d_name', ctypes.c_char * 256),
442 )
443
444 DT_UNKNOWN = 0
445 DT_DIR = 4
446 DT_REG = 8
447 DT_LNK = 10
448
449 Dirent_p = ctypes.POINTER(Dirent)
450 Dirent_pp = ctypes.POINTER(Dirent_p)
451
452 libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
453 opendir = libc.opendir
454 opendir.argtypes = [ctypes.c_char_p]
455 opendir.restype = DIR_p
456
457 readdir_r = libc.readdir_r
458 readdir_r.argtypes = [DIR_p, Dirent_p, Dirent_pp]
459 readdir_r.restype = ctypes.c_int
460
461 closedir = libc.closedir
462 closedir.argtypes = [DIR_p]
463 closedir.restype = ctypes.c_int
464
465 file_system_encoding = sys.getfilesystemencoding()
466
467 class PosixDirEntry(object):
468 __slots__ = ('name', '_d_type', '_stat', '_lstat', '_scandir_path', '_path', '_inode')
469
470 def __init__(self, scandir_path, name, d_type, inode):
471 self._scandir_path = scandir_path
472 self.name = name
473 self._d_type = d_type
474 self._inode = inode
475 self._stat = None
476 self._lstat = None
477 self._path = None
478
479 @property
480 def path(self):
481 if self._path is None:
482 self._path = join(self._scandir_path, self.name)
483 return self._path
484
485 def stat(self, follow_symlinks=True):
486 if follow_symlinks:
487 if self._stat is None:
488 if self.is_symlink():
489 self._stat = stat(self.path)
490 else:
491 if self._lstat is None:
492 self._lstat = lstat(self.path)
493 self._stat = self._lstat
494 return self._stat
495 else:
496 if self._lstat is None:
497 self._lstat = lstat(self.path)
498 return self._lstat
499
500 def is_dir(self, follow_symlinks=True):
501 if (self._d_type == DT_UNKNOWN or
502 (follow_symlinks and self.is_symlink())):
503 try:
504 st = self.stat(follow_symlinks=follow_symlinks)
505 except OSError as e:
506 if e.errno != ENOENT:
507 raise
508 return False
509 return st.st_mode & 0o170000 == S_IFDIR
510 else:
511 return self._d_type == DT_DIR
512
513 def is_file(self, follow_symlinks=True):
514 if (self._d_type == DT_UNKNOWN or
515 (follow_symlinks and self.is_symlink())):
516 try:
517 st = self.stat(follow_symlinks=follow_symlinks)
518 except OSError as e:
519 if e.errno != ENOENT:
520 raise
521 return False
522 return st.st_mode & 0o170000 == S_IFREG
523 else:
524 return self._d_type == DT_REG
525
526 def is_symlink(self):
527 if self._d_type == DT_UNKNOWN:
528 try:
529 st = self.stat(follow_symlinks=False)
530 except OSError as e:
531 if e.errno != ENOENT:
532 raise
533 return False
534 return st.st_mode & 0o170000 == S_IFLNK
535 else:
536 return self._d_type == DT_LNK
537
538 def inode(self):
539 return self._inode
540
541 def __str__(self):
542 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
543
544 __repr__ = __str__
545
546 def posix_error(filename):
547 errno = ctypes.get_errno()
548 exc = OSError(errno, strerror(errno))
549 exc.filename = filename
550 return exc
551
552 def scandir_python(path=unicode('.')):
553 """Like os.listdir(), but yield DirEntry objects instead of returning
554 a list of names.
555 """
556 if isinstance(path, bytes):
557 opendir_path = path
558 is_bytes = True
559 else:
560 opendir_path = path.encode(file_system_encoding)
561 is_bytes = False
562 dir_p = opendir(opendir_path)
563 if not dir_p:
564 raise posix_error(path)
565 try:
566 result = Dirent_p()
567 while True:
568 entry = Dirent()
569 if readdir_r(dir_p, entry, result):
570 raise posix_error(path)
571 if not result:
572 break
573 name = entry.d_name
574 if name not in (b'.', b'..'):
575 if not is_bytes:
576 name = name.decode(file_system_encoding)
577 yield PosixDirEntry(path, name, entry.d_type, entry.d_ino)
578 finally:
579 if closedir(dir_p):
580 raise posix_error(path)
581
582 if _scandir is not None:
583 scandir_c = _scandir.scandir
584 DirEntry_c = _scandir.DirEntry
585
586 if _scandir is not None:
587 scandir = scandir_c
588 DirEntry = DirEntry_c
589 elif ctypes is not None and have_dirent_d_type:
590 scandir = scandir_python
591 DirEntry = PosixDirEntry
592 else:
593 scandir = scandir_generic
594 DirEntry = GenericDirEntry
595
596
597 # Some other system -- no d_type or stat information
598 else:
599 scandir = scandir_generic
600 DirEntry = GenericDirEntry
601
602
603 def _walk(top, topdown=True, onerror=None, followlinks=False):
604 """Like Python 3.5's implementation of os.walk() -- faster than
605 the pre-Python 3.5 version as it uses scandir() internally.
606 """
607 dirs = []
608 nondirs = []
609
610 # We may not have read permission for top, in which case we can't
611 # get a list of the files the directory contains. os.walk
612 # always suppressed the exception then, rather than blow up for a
613 # minor reason when (say) a thousand readable directories are still
614 # left to visit. That logic is copied here.
615 try:
616 scandir_it = scandir(top)
617 except OSError as error:
618 if onerror is not None:
619 onerror(error)
620 return
621
622 while True:
623 try:
624 try:
625 entry = next(scandir_it)
626 except StopIteration:
627 break
628 except OSError as error:
629 if onerror is not None:
630 onerror(error)
631 return
632
633 try:
634 is_dir = entry.is_dir()
635 except OSError:
636 # If is_dir() raises an OSError, consider that the entry is not
637 # a directory, same behaviour than os.path.isdir().
638 is_dir = False
639
640 if is_dir:
641 dirs.append(entry.name)
642 else:
643 nondirs.append(entry.name)
644
645 if not topdown and is_dir:
646 # Bottom-up: recurse into sub-directory, but exclude symlinks to
647 # directories if followlinks is False
648 if followlinks:
649 walk_into = True
650 else:
651 try:
652 is_symlink = entry.is_symlink()
653 except OSError:
654 # If is_symlink() raises an OSError, consider that the
655 # entry is not a symbolic link, same behaviour than
656 # os.path.islink().
657 is_symlink = False
658 walk_into = not is_symlink
659
660 if walk_into:
661 for entry in walk(entry.path, topdown, onerror, followlinks):
662 yield entry
663
664 # Yield before recursion if going top down
665 if topdown:
666 yield top, dirs, nondirs
667
668 # Recurse into sub-directories
669 for name in dirs:
670 new_path = join(top, name)
671 # Issue #23605: os.path.islink() is used instead of caching
672 # entry.is_symlink() result during the loop on os.scandir() because
673 # the caller can replace the directory entry during the "yield"
674 # above.
675 if followlinks or not islink(new_path):
676 for entry in walk(new_path, topdown, onerror, followlinks):
677 yield entry
678 else:
679 # Yield after recursion if going bottom up
680 yield top, dirs, nondirs
681
682
683 if IS_PY3 or sys.platform != 'win32':
684 walk = _walk
685 else:
686 # Fix for broken unicode handling on Windows on Python 2.x, see:
687 # https://github.com/benhoyt/scandir/issues/54
688 file_system_encoding = sys.getfilesystemencoding()
689
690 def walk(top, topdown=True, onerror=None, followlinks=False):
691 if isinstance(top, bytes):
692 top = top.decode(file_system_encoding)
693 return _walk(top, topdown, onerror, followlinks)