comparison env/lib/python3.7/site-packages/distlib/manifest.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2012-2013 Python Software Foundation.
4 # See LICENSE.txt and CONTRIBUTORS.txt.
5 #
6 """
7 Class representing the list of files in a distribution.
8
9 Equivalent to distutils.filelist, but fixes some problems.
10 """
11 import fnmatch
12 import logging
13 import os
14 import re
15 import sys
16
17 from . import DistlibException
18 from .compat import fsdecode
19 from .util import convert_path
20
21
22 __all__ = ['Manifest']
23
24 logger = logging.getLogger(__name__)
25
26 # a \ followed by some spaces + EOL
27 _COLLAPSE_PATTERN = re.compile('\\\\w*\n', re.M)
28 _COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
29
30 #
31 # Due to the different results returned by fnmatch.translate, we need
32 # to do slightly different processing for Python 2.7 and 3.2 ... this needed
33 # to be brought in for Python 3.6 onwards.
34 #
35 _PYTHON_VERSION = sys.version_info[:2]
36
37 class Manifest(object):
38 """A list of files built by on exploring the filesystem and filtered by
39 applying various patterns to what we find there.
40 """
41
42 def __init__(self, base=None):
43 """
44 Initialise an instance.
45
46 :param base: The base directory to explore under.
47 """
48 self.base = os.path.abspath(os.path.normpath(base or os.getcwd()))
49 self.prefix = self.base + os.sep
50 self.allfiles = None
51 self.files = set()
52
53 #
54 # Public API
55 #
56
57 def findall(self):
58 """Find all files under the base and set ``allfiles`` to the absolute
59 pathnames of files found.
60 """
61 from stat import S_ISREG, S_ISDIR, S_ISLNK
62
63 self.allfiles = allfiles = []
64 root = self.base
65 stack = [root]
66 pop = stack.pop
67 push = stack.append
68
69 while stack:
70 root = pop()
71 names = os.listdir(root)
72
73 for name in names:
74 fullname = os.path.join(root, name)
75
76 # Avoid excess stat calls -- just one will do, thank you!
77 stat = os.stat(fullname)
78 mode = stat.st_mode
79 if S_ISREG(mode):
80 allfiles.append(fsdecode(fullname))
81 elif S_ISDIR(mode) and not S_ISLNK(mode):
82 push(fullname)
83
84 def add(self, item):
85 """
86 Add a file to the manifest.
87
88 :param item: The pathname to add. This can be relative to the base.
89 """
90 if not item.startswith(self.prefix):
91 item = os.path.join(self.base, item)
92 self.files.add(os.path.normpath(item))
93
94 def add_many(self, items):
95 """
96 Add a list of files to the manifest.
97
98 :param items: The pathnames to add. These can be relative to the base.
99 """
100 for item in items:
101 self.add(item)
102
103 def sorted(self, wantdirs=False):
104 """
105 Return sorted files in directory order
106 """
107
108 def add_dir(dirs, d):
109 dirs.add(d)
110 logger.debug('add_dir added %s', d)
111 if d != self.base:
112 parent, _ = os.path.split(d)
113 assert parent not in ('', '/')
114 add_dir(dirs, parent)
115
116 result = set(self.files) # make a copy!
117 if wantdirs:
118 dirs = set()
119 for f in result:
120 add_dir(dirs, os.path.dirname(f))
121 result |= dirs
122 return [os.path.join(*path_tuple) for path_tuple in
123 sorted(os.path.split(path) for path in result)]
124
125 def clear(self):
126 """Clear all collected files."""
127 self.files = set()
128 self.allfiles = []
129
130 def process_directive(self, directive):
131 """
132 Process a directive which either adds some files from ``allfiles`` to
133 ``files``, or removes some files from ``files``.
134
135 :param directive: The directive to process. This should be in a format
136 compatible with distutils ``MANIFEST.in`` files:
137
138 http://docs.python.org/distutils/sourcedist.html#commands
139 """
140 # Parse the line: split it up, make sure the right number of words
141 # is there, and return the relevant words. 'action' is always
142 # defined: it's the first word of the line. Which of the other
143 # three are defined depends on the action; it'll be either
144 # patterns, (dir and patterns), or (dirpattern).
145 action, patterns, thedir, dirpattern = self._parse_directive(directive)
146
147 # OK, now we know that the action is valid and we have the
148 # right number of words on the line for that action -- so we
149 # can proceed with minimal error-checking.
150 if action == 'include':
151 for pattern in patterns:
152 if not self._include_pattern(pattern, anchor=True):
153 logger.warning('no files found matching %r', pattern)
154
155 elif action == 'exclude':
156 for pattern in patterns:
157 found = self._exclude_pattern(pattern, anchor=True)
158 #if not found:
159 # logger.warning('no previously-included files '
160 # 'found matching %r', pattern)
161
162 elif action == 'global-include':
163 for pattern in patterns:
164 if not self._include_pattern(pattern, anchor=False):
165 logger.warning('no files found matching %r '
166 'anywhere in distribution', pattern)
167
168 elif action == 'global-exclude':
169 for pattern in patterns:
170 found = self._exclude_pattern(pattern, anchor=False)
171 #if not found:
172 # logger.warning('no previously-included files '
173 # 'matching %r found anywhere in '
174 # 'distribution', pattern)
175
176 elif action == 'recursive-include':
177 for pattern in patterns:
178 if not self._include_pattern(pattern, prefix=thedir):
179 logger.warning('no files found matching %r '
180 'under directory %r', pattern, thedir)
181
182 elif action == 'recursive-exclude':
183 for pattern in patterns:
184 found = self._exclude_pattern(pattern, prefix=thedir)
185 #if not found:
186 # logger.warning('no previously-included files '
187 # 'matching %r found under directory %r',
188 # pattern, thedir)
189
190 elif action == 'graft':
191 if not self._include_pattern(None, prefix=dirpattern):
192 logger.warning('no directories found matching %r',
193 dirpattern)
194
195 elif action == 'prune':
196 if not self._exclude_pattern(None, prefix=dirpattern):
197 logger.warning('no previously-included directories found '
198 'matching %r', dirpattern)
199 else: # pragma: no cover
200 # This should never happen, as it should be caught in
201 # _parse_template_line
202 raise DistlibException(
203 'invalid action %r' % action)
204
205 #
206 # Private API
207 #
208
209 def _parse_directive(self, directive):
210 """
211 Validate a directive.
212 :param directive: The directive to validate.
213 :return: A tuple of action, patterns, thedir, dir_patterns
214 """
215 words = directive.split()
216 if len(words) == 1 and words[0] not in ('include', 'exclude',
217 'global-include',
218 'global-exclude',
219 'recursive-include',
220 'recursive-exclude',
221 'graft', 'prune'):
222 # no action given, let's use the default 'include'
223 words.insert(0, 'include')
224
225 action = words[0]
226 patterns = thedir = dir_pattern = None
227
228 if action in ('include', 'exclude',
229 'global-include', 'global-exclude'):
230 if len(words) < 2:
231 raise DistlibException(
232 '%r expects <pattern1> <pattern2> ...' % action)
233
234 patterns = [convert_path(word) for word in words[1:]]
235
236 elif action in ('recursive-include', 'recursive-exclude'):
237 if len(words) < 3:
238 raise DistlibException(
239 '%r expects <dir> <pattern1> <pattern2> ...' % action)
240
241 thedir = convert_path(words[1])
242 patterns = [convert_path(word) for word in words[2:]]
243
244 elif action in ('graft', 'prune'):
245 if len(words) != 2:
246 raise DistlibException(
247 '%r expects a single <dir_pattern>' % action)
248
249 dir_pattern = convert_path(words[1])
250
251 else:
252 raise DistlibException('unknown action %r' % action)
253
254 return action, patterns, thedir, dir_pattern
255
256 def _include_pattern(self, pattern, anchor=True, prefix=None,
257 is_regex=False):
258 """Select strings (presumably filenames) from 'self.files' that
259 match 'pattern', a Unix-style wildcard (glob) pattern.
260
261 Patterns are not quite the same as implemented by the 'fnmatch'
262 module: '*' and '?' match non-special characters, where "special"
263 is platform-dependent: slash on Unix; colon, slash, and backslash on
264 DOS/Windows; and colon on Mac OS.
265
266 If 'anchor' is true (the default), then the pattern match is more
267 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
268 'anchor' is false, both of these will match.
269
270 If 'prefix' is supplied, then only filenames starting with 'prefix'
271 (itself a pattern) and ending with 'pattern', with anything in between
272 them, will match. 'anchor' is ignored in this case.
273
274 If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
275 'pattern' is assumed to be either a string containing a regex or a
276 regex object -- no translation is done, the regex is just compiled
277 and used as-is.
278
279 Selected strings will be added to self.files.
280
281 Return True if files are found.
282 """
283 # XXX docstring lying about what the special chars are?
284 found = False
285 pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
286
287 # delayed loading of allfiles list
288 if self.allfiles is None:
289 self.findall()
290
291 for name in self.allfiles:
292 if pattern_re.search(name):
293 self.files.add(name)
294 found = True
295 return found
296
297 def _exclude_pattern(self, pattern, anchor=True, prefix=None,
298 is_regex=False):
299 """Remove strings (presumably filenames) from 'files' that match
300 'pattern'.
301
302 Other parameters are the same as for 'include_pattern()', above.
303 The list 'self.files' is modified in place. Return True if files are
304 found.
305
306 This API is public to allow e.g. exclusion of SCM subdirs, e.g. when
307 packaging source distributions
308 """
309 found = False
310 pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
311 for f in list(self.files):
312 if pattern_re.search(f):
313 self.files.remove(f)
314 found = True
315 return found
316
317 def _translate_pattern(self, pattern, anchor=True, prefix=None,
318 is_regex=False):
319 """Translate a shell-like wildcard pattern to a compiled regular
320 expression.
321
322 Return the compiled regex. If 'is_regex' true,
323 then 'pattern' is directly compiled to a regex (if it's a string)
324 or just returned as-is (assumes it's a regex object).
325 """
326 if is_regex:
327 if isinstance(pattern, str):
328 return re.compile(pattern)
329 else:
330 return pattern
331
332 if _PYTHON_VERSION > (3, 2):
333 # ditch start and end characters
334 start, _, end = self._glob_to_re('_').partition('_')
335
336 if pattern:
337 pattern_re = self._glob_to_re(pattern)
338 if _PYTHON_VERSION > (3, 2):
339 assert pattern_re.startswith(start) and pattern_re.endswith(end)
340 else:
341 pattern_re = ''
342
343 base = re.escape(os.path.join(self.base, ''))
344 if prefix is not None:
345 # ditch end of pattern character
346 if _PYTHON_VERSION <= (3, 2):
347 empty_pattern = self._glob_to_re('')
348 prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
349 else:
350 prefix_re = self._glob_to_re(prefix)
351 assert prefix_re.startswith(start) and prefix_re.endswith(end)
352 prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
353 sep = os.sep
354 if os.sep == '\\':
355 sep = r'\\'
356 if _PYTHON_VERSION <= (3, 2):
357 pattern_re = '^' + base + sep.join((prefix_re,
358 '.*' + pattern_re))
359 else:
360 pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
361 pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep,
362 pattern_re, end)
363 else: # no prefix -- respect anchor flag
364 if anchor:
365 if _PYTHON_VERSION <= (3, 2):
366 pattern_re = '^' + base + pattern_re
367 else:
368 pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):])
369
370 return re.compile(pattern_re)
371
372 def _glob_to_re(self, pattern):
373 """Translate a shell-like glob pattern to a regular expression.
374
375 Return a string containing the regex. Differs from
376 'fnmatch.translate()' in that '*' does not match "special characters"
377 (which are platform-specific).
378 """
379 pattern_re = fnmatch.translate(pattern)
380
381 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
382 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
383 # and by extension they shouldn't match such "special characters" under
384 # any OS. So change all non-escaped dots in the RE to match any
385 # character except the special characters (currently: just os.sep).
386 sep = os.sep
387 if os.sep == '\\':
388 # we're using a regex to manipulate a regex, so we need
389 # to escape the backslash twice
390 sep = r'\\\\'
391 escaped = r'\1[^%s]' % sep
392 pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
393 return pattern_re