Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/urllib3/util/url.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 from __future__ import absolute_import | |
2 import re | |
3 from collections import namedtuple | |
4 | |
5 from ..exceptions import LocationParseError | |
6 from ..packages import six | |
7 | |
8 | |
9 url_attrs = ["scheme", "auth", "host", "port", "path", "query", "fragment"] | |
10 | |
11 # We only want to normalize urls with an HTTP(S) scheme. | |
12 # urllib3 infers URLs without a scheme (None) to be http. | |
13 NORMALIZABLE_SCHEMES = ("http", "https", None) | |
14 | |
15 # Almost all of these patterns were derived from the | |
16 # 'rfc3986' module: https://github.com/python-hyper/rfc3986 | |
17 PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}") | |
18 SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)") | |
19 URI_RE = re.compile( | |
20 r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?" | |
21 r"(?://([^\\/?#]*))?" | |
22 r"([^?#]*)" | |
23 r"(?:\?([^#]*))?" | |
24 r"(?:#(.*))?$", | |
25 re.UNICODE | re.DOTALL, | |
26 ) | |
27 | |
28 IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}" | |
29 HEX_PAT = "[0-9A-Fa-f]{1,4}" | |
30 LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT) | |
31 _subs = {"hex": HEX_PAT, "ls32": LS32_PAT} | |
32 _variations = [ | |
33 # 6( h16 ":" ) ls32 | |
34 "(?:%(hex)s:){6}%(ls32)s", | |
35 # "::" 5( h16 ":" ) ls32 | |
36 "::(?:%(hex)s:){5}%(ls32)s", | |
37 # [ h16 ] "::" 4( h16 ":" ) ls32 | |
38 "(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s", | |
39 # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 | |
40 "(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s", | |
41 # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 | |
42 "(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s", | |
43 # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 | |
44 "(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s", | |
45 # [ *4( h16 ":" ) h16 ] "::" ls32 | |
46 "(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s", | |
47 # [ *5( h16 ":" ) h16 ] "::" h16 | |
48 "(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s", | |
49 # [ *6( h16 ":" ) h16 ] "::" | |
50 "(?:(?:%(hex)s:){0,6}%(hex)s)?::", | |
51 ] | |
52 | |
53 UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._!\-~" | |
54 IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")" | |
55 ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+" | |
56 IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]" | |
57 REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*" | |
58 TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$") | |
59 | |
60 IPV4_RE = re.compile("^" + IPV4_PAT + "$") | |
61 IPV6_RE = re.compile("^" + IPV6_PAT + "$") | |
62 IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$") | |
63 BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$") | |
64 ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$") | |
65 | |
66 SUBAUTHORITY_PAT = (u"^(?:(.*)@)?(%s|%s|%s)(?::([0-9]{0,5}))?$") % ( | |
67 REG_NAME_PAT, | |
68 IPV4_PAT, | |
69 IPV6_ADDRZ_PAT, | |
70 ) | |
71 SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL) | |
72 | |
73 UNRESERVED_CHARS = set( | |
74 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~" | |
75 ) | |
76 SUB_DELIM_CHARS = set("!$&'()*+,;=") | |
77 USERINFO_CHARS = UNRESERVED_CHARS | SUB_DELIM_CHARS | {":"} | |
78 PATH_CHARS = USERINFO_CHARS | {"@", "/"} | |
79 QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {"?"} | |
80 | |
81 | |
82 class Url(namedtuple("Url", url_attrs)): | |
83 """ | |
84 Data structure for representing an HTTP URL. Used as a return value for | |
85 :func:`parse_url`. Both the scheme and host are normalized as they are | |
86 both case-insensitive according to RFC 3986. | |
87 """ | |
88 | |
89 __slots__ = () | |
90 | |
91 def __new__( | |
92 cls, | |
93 scheme=None, | |
94 auth=None, | |
95 host=None, | |
96 port=None, | |
97 path=None, | |
98 query=None, | |
99 fragment=None, | |
100 ): | |
101 if path and not path.startswith("/"): | |
102 path = "/" + path | |
103 if scheme is not None: | |
104 scheme = scheme.lower() | |
105 return super(Url, cls).__new__( | |
106 cls, scheme, auth, host, port, path, query, fragment | |
107 ) | |
108 | |
109 @property | |
110 def hostname(self): | |
111 """For backwards-compatibility with urlparse. We're nice like that.""" | |
112 return self.host | |
113 | |
114 @property | |
115 def request_uri(self): | |
116 """Absolute path including the query string.""" | |
117 uri = self.path or "/" | |
118 | |
119 if self.query is not None: | |
120 uri += "?" + self.query | |
121 | |
122 return uri | |
123 | |
124 @property | |
125 def netloc(self): | |
126 """Network location including host and port""" | |
127 if self.port: | |
128 return "%s:%d" % (self.host, self.port) | |
129 return self.host | |
130 | |
131 @property | |
132 def url(self): | |
133 """ | |
134 Convert self into a url | |
135 | |
136 This function should more or less round-trip with :func:`.parse_url`. The | |
137 returned url may not be exactly the same as the url inputted to | |
138 :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls | |
139 with a blank port will have : removed). | |
140 | |
141 Example: :: | |
142 | |
143 >>> U = parse_url('http://google.com/mail/') | |
144 >>> U.url | |
145 'http://google.com/mail/' | |
146 >>> Url('http', 'username:password', 'host.com', 80, | |
147 ... '/path', 'query', 'fragment').url | |
148 'http://username:password@host.com:80/path?query#fragment' | |
149 """ | |
150 scheme, auth, host, port, path, query, fragment = self | |
151 url = u"" | |
152 | |
153 # We use "is not None" we want things to happen with empty strings (or 0 port) | |
154 if scheme is not None: | |
155 url += scheme + u"://" | |
156 if auth is not None: | |
157 url += auth + u"@" | |
158 if host is not None: | |
159 url += host | |
160 if port is not None: | |
161 url += u":" + str(port) | |
162 if path is not None: | |
163 url += path | |
164 if query is not None: | |
165 url += u"?" + query | |
166 if fragment is not None: | |
167 url += u"#" + fragment | |
168 | |
169 return url | |
170 | |
171 def __str__(self): | |
172 return self.url | |
173 | |
174 | |
175 def split_first(s, delims): | |
176 """ | |
177 .. deprecated:: 1.25 | |
178 | |
179 Given a string and an iterable of delimiters, split on the first found | |
180 delimiter. Return two split parts and the matched delimiter. | |
181 | |
182 If not found, then the first part is the full input string. | |
183 | |
184 Example:: | |
185 | |
186 >>> split_first('foo/bar?baz', '?/=') | |
187 ('foo', 'bar?baz', '/') | |
188 >>> split_first('foo/bar?baz', '123') | |
189 ('foo/bar?baz', '', None) | |
190 | |
191 Scales linearly with number of delims. Not ideal for large number of delims. | |
192 """ | |
193 min_idx = None | |
194 min_delim = None | |
195 for d in delims: | |
196 idx = s.find(d) | |
197 if idx < 0: | |
198 continue | |
199 | |
200 if min_idx is None or idx < min_idx: | |
201 min_idx = idx | |
202 min_delim = d | |
203 | |
204 if min_idx is None or min_idx < 0: | |
205 return s, "", None | |
206 | |
207 return s[:min_idx], s[min_idx + 1 :], min_delim | |
208 | |
209 | |
210 def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): | |
211 """Percent-encodes a URI component without reapplying | |
212 onto an already percent-encoded component. | |
213 """ | |
214 if component is None: | |
215 return component | |
216 | |
217 component = six.ensure_text(component) | |
218 | |
219 # Normalize existing percent-encoded bytes. | |
220 # Try to see if the component we're encoding is already percent-encoded | |
221 # so we can skip all '%' characters but still encode all others. | |
222 component, percent_encodings = PERCENT_RE.subn( | |
223 lambda match: match.group(0).upper(), component | |
224 ) | |
225 | |
226 uri_bytes = component.encode("utf-8", "surrogatepass") | |
227 is_percent_encoded = percent_encodings == uri_bytes.count(b"%") | |
228 encoded_component = bytearray() | |
229 | |
230 for i in range(0, len(uri_bytes)): | |
231 # Will return a single character bytestring on both Python 2 & 3 | |
232 byte = uri_bytes[i : i + 1] | |
233 byte_ord = ord(byte) | |
234 if (is_percent_encoded and byte == b"%") or ( | |
235 byte_ord < 128 and byte.decode() in allowed_chars | |
236 ): | |
237 encoded_component += byte | |
238 continue | |
239 encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper())) | |
240 | |
241 return encoded_component.decode(encoding) | |
242 | |
243 | |
244 def _remove_path_dot_segments(path): | |
245 # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code | |
246 segments = path.split("/") # Turn the path into a list of segments | |
247 output = [] # Initialize the variable to use to store output | |
248 | |
249 for segment in segments: | |
250 # '.' is the current directory, so ignore it, it is superfluous | |
251 if segment == ".": | |
252 continue | |
253 # Anything other than '..', should be appended to the output | |
254 elif segment != "..": | |
255 output.append(segment) | |
256 # In this case segment == '..', if we can, we should pop the last | |
257 # element | |
258 elif output: | |
259 output.pop() | |
260 | |
261 # If the path starts with '/' and the output is empty or the first string | |
262 # is non-empty | |
263 if path.startswith("/") and (not output or output[0]): | |
264 output.insert(0, "") | |
265 | |
266 # If the path starts with '/.' or '/..' ensure we add one more empty | |
267 # string to add a trailing '/' | |
268 if path.endswith(("/.", "/..")): | |
269 output.append("") | |
270 | |
271 return "/".join(output) | |
272 | |
273 | |
274 def _normalize_host(host, scheme): | |
275 if host: | |
276 if isinstance(host, six.binary_type): | |
277 host = six.ensure_str(host) | |
278 | |
279 if scheme in NORMALIZABLE_SCHEMES: | |
280 is_ipv6 = IPV6_ADDRZ_RE.match(host) | |
281 if is_ipv6: | |
282 match = ZONE_ID_RE.search(host) | |
283 if match: | |
284 start, end = match.span(1) | |
285 zone_id = host[start:end] | |
286 | |
287 if zone_id.startswith("%25") and zone_id != "%25": | |
288 zone_id = zone_id[3:] | |
289 else: | |
290 zone_id = zone_id[1:] | |
291 zone_id = "%" + _encode_invalid_chars(zone_id, UNRESERVED_CHARS) | |
292 return host[:start].lower() + zone_id + host[end:] | |
293 else: | |
294 return host.lower() | |
295 elif not IPV4_RE.match(host): | |
296 return six.ensure_str( | |
297 b".".join([_idna_encode(label) for label in host.split(".")]) | |
298 ) | |
299 return host | |
300 | |
301 | |
302 def _idna_encode(name): | |
303 if name and any([ord(x) > 128 for x in name]): | |
304 try: | |
305 import idna | |
306 except ImportError: | |
307 six.raise_from( | |
308 LocationParseError("Unable to parse URL without the 'idna' module"), | |
309 None, | |
310 ) | |
311 try: | |
312 return idna.encode(name.lower(), strict=True, std3_rules=True) | |
313 except idna.IDNAError: | |
314 six.raise_from( | |
315 LocationParseError(u"Name '%s' is not a valid IDNA label" % name), None | |
316 ) | |
317 return name.lower().encode("ascii") | |
318 | |
319 | |
320 def _encode_target(target): | |
321 """Percent-encodes a request target so that there are no invalid characters""" | |
322 path, query = TARGET_RE.match(target).groups() | |
323 target = _encode_invalid_chars(path, PATH_CHARS) | |
324 query = _encode_invalid_chars(query, QUERY_CHARS) | |
325 if query is not None: | |
326 target += "?" + query | |
327 return target | |
328 | |
329 | |
330 def parse_url(url): | |
331 """ | |
332 Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is | |
333 performed to parse incomplete urls. Fields not provided will be None. | |
334 This parser is RFC 3986 compliant. | |
335 | |
336 The parser logic and helper functions are based heavily on | |
337 work done in the ``rfc3986`` module. | |
338 | |
339 :param str url: URL to parse into a :class:`.Url` namedtuple. | |
340 | |
341 Partly backwards-compatible with :mod:`urlparse`. | |
342 | |
343 Example:: | |
344 | |
345 >>> parse_url('http://google.com/mail/') | |
346 Url(scheme='http', host='google.com', port=None, path='/mail/', ...) | |
347 >>> parse_url('google.com:80') | |
348 Url(scheme=None, host='google.com', port=80, path=None, ...) | |
349 >>> parse_url('/foo?bar') | |
350 Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) | |
351 """ | |
352 if not url: | |
353 # Empty | |
354 return Url() | |
355 | |
356 source_url = url | |
357 if not SCHEME_RE.search(url): | |
358 url = "//" + url | |
359 | |
360 try: | |
361 scheme, authority, path, query, fragment = URI_RE.match(url).groups() | |
362 normalize_uri = scheme is None or scheme.lower() in NORMALIZABLE_SCHEMES | |
363 | |
364 if scheme: | |
365 scheme = scheme.lower() | |
366 | |
367 if authority: | |
368 auth, host, port = SUBAUTHORITY_RE.match(authority).groups() | |
369 if auth and normalize_uri: | |
370 auth = _encode_invalid_chars(auth, USERINFO_CHARS) | |
371 if port == "": | |
372 port = None | |
373 else: | |
374 auth, host, port = None, None, None | |
375 | |
376 if port is not None: | |
377 port = int(port) | |
378 if not (0 <= port <= 65535): | |
379 raise LocationParseError(url) | |
380 | |
381 host = _normalize_host(host, scheme) | |
382 | |
383 if normalize_uri and path: | |
384 path = _remove_path_dot_segments(path) | |
385 path = _encode_invalid_chars(path, PATH_CHARS) | |
386 if normalize_uri and query: | |
387 query = _encode_invalid_chars(query, QUERY_CHARS) | |
388 if normalize_uri and fragment: | |
389 fragment = _encode_invalid_chars(fragment, FRAGMENT_CHARS) | |
390 | |
391 except (ValueError, AttributeError): | |
392 return six.raise_from(LocationParseError(source_url), None) | |
393 | |
394 # For the sake of backwards compatibility we put empty | |
395 # string values for path if there are any defined values | |
396 # beyond the path in the URL. | |
397 # TODO: Remove this when we break backwards compatibility. | |
398 if not path: | |
399 if query is not None or fragment is not None: | |
400 path = "" | |
401 else: | |
402 path = None | |
403 | |
404 # Ensure that each part of the URL is a `str` for | |
405 # backwards compatibility. | |
406 if isinstance(url, six.text_type): | |
407 ensure_func = six.ensure_text | |
408 else: | |
409 ensure_func = six.ensure_str | |
410 | |
411 def ensure_type(x): | |
412 return x if x is None else ensure_func(x) | |
413 | |
414 return Url( | |
415 scheme=ensure_type(scheme), | |
416 auth=ensure_type(auth), | |
417 host=ensure_type(host), | |
418 port=port, | |
419 path=ensure_type(path), | |
420 query=ensure_type(query), | |
421 fragment=ensure_type(fragment), | |
422 ) | |
423 | |
424 | |
425 def get_host(url): | |
426 """ | |
427 Deprecated. Use :func:`parse_url` instead. | |
428 """ | |
429 p = parse_url(url) | |
430 return p.scheme or "http", p.hostname, p.port |