Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/urllib3/util/url.py @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
| author | shellac |
|---|---|
| date | Thu, 14 May 2020 14:56:58 -0400 |
| parents | 26e78fe6e8c4 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:75ca89e9b81c | 2:6af9afd405e9 |
|---|---|
| 1 from __future__ import absolute_import | |
| 2 import re | |
| 3 from collections import namedtuple | |
| 4 | |
| 5 from ..exceptions import LocationParseError | |
| 6 from ..packages import six | |
| 7 | |
| 8 | |
| 9 url_attrs = ["scheme", "auth", "host", "port", "path", "query", "fragment"] | |
| 10 | |
| 11 # We only want to normalize urls with an HTTP(S) scheme. | |
| 12 # urllib3 infers URLs without a scheme (None) to be http. | |
| 13 NORMALIZABLE_SCHEMES = ("http", "https", None) | |
| 14 | |
| 15 # Almost all of these patterns were derived from the | |
| 16 # 'rfc3986' module: https://github.com/python-hyper/rfc3986 | |
| 17 PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}") | |
| 18 SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)") | |
| 19 URI_RE = re.compile( | |
| 20 r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?" | |
| 21 r"(?://([^\\/?#]*))?" | |
| 22 r"([^?#]*)" | |
| 23 r"(?:\?([^#]*))?" | |
| 24 r"(?:#(.*))?$", | |
| 25 re.UNICODE | re.DOTALL, | |
| 26 ) | |
| 27 | |
| 28 IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}" | |
| 29 HEX_PAT = "[0-9A-Fa-f]{1,4}" | |
| 30 LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT) | |
| 31 _subs = {"hex": HEX_PAT, "ls32": LS32_PAT} | |
| 32 _variations = [ | |
| 33 # 6( h16 ":" ) ls32 | |
| 34 "(?:%(hex)s:){6}%(ls32)s", | |
| 35 # "::" 5( h16 ":" ) ls32 | |
| 36 "::(?:%(hex)s:){5}%(ls32)s", | |
| 37 # [ h16 ] "::" 4( h16 ":" ) ls32 | |
| 38 "(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s", | |
| 39 # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 | |
| 40 "(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s", | |
| 41 # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 | |
| 42 "(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s", | |
| 43 # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 | |
| 44 "(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s", | |
| 45 # [ *4( h16 ":" ) h16 ] "::" ls32 | |
| 46 "(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s", | |
| 47 # [ *5( h16 ":" ) h16 ] "::" h16 | |
| 48 "(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s", | |
| 49 # [ *6( h16 ":" ) h16 ] "::" | |
| 50 "(?:(?:%(hex)s:){0,6}%(hex)s)?::", | |
| 51 ] | |
| 52 | |
| 53 UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._!\-~" | |
| 54 IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")" | |
| 55 ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+" | |
| 56 IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]" | |
| 57 REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*" | |
| 58 TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$") | |
| 59 | |
| 60 IPV4_RE = re.compile("^" + IPV4_PAT + "$") | |
| 61 IPV6_RE = re.compile("^" + IPV6_PAT + "$") | |
| 62 IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$") | |
| 63 BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$") | |
| 64 ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$") | |
| 65 | |
| 66 SUBAUTHORITY_PAT = (u"^(?:(.*)@)?(%s|%s|%s)(?::([0-9]{0,5}))?$") % ( | |
| 67 REG_NAME_PAT, | |
| 68 IPV4_PAT, | |
| 69 IPV6_ADDRZ_PAT, | |
| 70 ) | |
| 71 SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL) | |
| 72 | |
| 73 UNRESERVED_CHARS = set( | |
| 74 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~" | |
| 75 ) | |
| 76 SUB_DELIM_CHARS = set("!$&'()*+,;=") | |
| 77 USERINFO_CHARS = UNRESERVED_CHARS | SUB_DELIM_CHARS | {":"} | |
| 78 PATH_CHARS = USERINFO_CHARS | {"@", "/"} | |
| 79 QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {"?"} | |
| 80 | |
| 81 | |
| 82 class Url(namedtuple("Url", url_attrs)): | |
| 83 """ | |
| 84 Data structure for representing an HTTP URL. Used as a return value for | |
| 85 :func:`parse_url`. Both the scheme and host are normalized as they are | |
| 86 both case-insensitive according to RFC 3986. | |
| 87 """ | |
| 88 | |
| 89 __slots__ = () | |
| 90 | |
| 91 def __new__( | |
| 92 cls, | |
| 93 scheme=None, | |
| 94 auth=None, | |
| 95 host=None, | |
| 96 port=None, | |
| 97 path=None, | |
| 98 query=None, | |
| 99 fragment=None, | |
| 100 ): | |
| 101 if path and not path.startswith("/"): | |
| 102 path = "/" + path | |
| 103 if scheme is not None: | |
| 104 scheme = scheme.lower() | |
| 105 return super(Url, cls).__new__( | |
| 106 cls, scheme, auth, host, port, path, query, fragment | |
| 107 ) | |
| 108 | |
| 109 @property | |
| 110 def hostname(self): | |
| 111 """For backwards-compatibility with urlparse. We're nice like that.""" | |
| 112 return self.host | |
| 113 | |
| 114 @property | |
| 115 def request_uri(self): | |
| 116 """Absolute path including the query string.""" | |
| 117 uri = self.path or "/" | |
| 118 | |
| 119 if self.query is not None: | |
| 120 uri += "?" + self.query | |
| 121 | |
| 122 return uri | |
| 123 | |
| 124 @property | |
| 125 def netloc(self): | |
| 126 """Network location including host and port""" | |
| 127 if self.port: | |
| 128 return "%s:%d" % (self.host, self.port) | |
| 129 return self.host | |
| 130 | |
| 131 @property | |
| 132 def url(self): | |
| 133 """ | |
| 134 Convert self into a url | |
| 135 | |
| 136 This function should more or less round-trip with :func:`.parse_url`. The | |
| 137 returned url may not be exactly the same as the url inputted to | |
| 138 :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls | |
| 139 with a blank port will have : removed). | |
| 140 | |
| 141 Example: :: | |
| 142 | |
| 143 >>> U = parse_url('http://google.com/mail/') | |
| 144 >>> U.url | |
| 145 'http://google.com/mail/' | |
| 146 >>> Url('http', 'username:password', 'host.com', 80, | |
| 147 ... '/path', 'query', 'fragment').url | |
| 148 'http://username:password@host.com:80/path?query#fragment' | |
| 149 """ | |
| 150 scheme, auth, host, port, path, query, fragment = self | |
| 151 url = u"" | |
| 152 | |
| 153 # We use "is not None" we want things to happen with empty strings (or 0 port) | |
| 154 if scheme is not None: | |
| 155 url += scheme + u"://" | |
| 156 if auth is not None: | |
| 157 url += auth + u"@" | |
| 158 if host is not None: | |
| 159 url += host | |
| 160 if port is not None: | |
| 161 url += u":" + str(port) | |
| 162 if path is not None: | |
| 163 url += path | |
| 164 if query is not None: | |
| 165 url += u"?" + query | |
| 166 if fragment is not None: | |
| 167 url += u"#" + fragment | |
| 168 | |
| 169 return url | |
| 170 | |
| 171 def __str__(self): | |
| 172 return self.url | |
| 173 | |
| 174 | |
| 175 def split_first(s, delims): | |
| 176 """ | |
| 177 .. deprecated:: 1.25 | |
| 178 | |
| 179 Given a string and an iterable of delimiters, split on the first found | |
| 180 delimiter. Return two split parts and the matched delimiter. | |
| 181 | |
| 182 If not found, then the first part is the full input string. | |
| 183 | |
| 184 Example:: | |
| 185 | |
| 186 >>> split_first('foo/bar?baz', '?/=') | |
| 187 ('foo', 'bar?baz', '/') | |
| 188 >>> split_first('foo/bar?baz', '123') | |
| 189 ('foo/bar?baz', '', None) | |
| 190 | |
| 191 Scales linearly with number of delims. Not ideal for large number of delims. | |
| 192 """ | |
| 193 min_idx = None | |
| 194 min_delim = None | |
| 195 for d in delims: | |
| 196 idx = s.find(d) | |
| 197 if idx < 0: | |
| 198 continue | |
| 199 | |
| 200 if min_idx is None or idx < min_idx: | |
| 201 min_idx = idx | |
| 202 min_delim = d | |
| 203 | |
| 204 if min_idx is None or min_idx < 0: | |
| 205 return s, "", None | |
| 206 | |
| 207 return s[:min_idx], s[min_idx + 1 :], min_delim | |
| 208 | |
| 209 | |
| 210 def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): | |
| 211 """Percent-encodes a URI component without reapplying | |
| 212 onto an already percent-encoded component. | |
| 213 """ | |
| 214 if component is None: | |
| 215 return component | |
| 216 | |
| 217 component = six.ensure_text(component) | |
| 218 | |
| 219 # Normalize existing percent-encoded bytes. | |
| 220 # Try to see if the component we're encoding is already percent-encoded | |
| 221 # so we can skip all '%' characters but still encode all others. | |
| 222 component, percent_encodings = PERCENT_RE.subn( | |
| 223 lambda match: match.group(0).upper(), component | |
| 224 ) | |
| 225 | |
| 226 uri_bytes = component.encode("utf-8", "surrogatepass") | |
| 227 is_percent_encoded = percent_encodings == uri_bytes.count(b"%") | |
| 228 encoded_component = bytearray() | |
| 229 | |
| 230 for i in range(0, len(uri_bytes)): | |
| 231 # Will return a single character bytestring on both Python 2 & 3 | |
| 232 byte = uri_bytes[i : i + 1] | |
| 233 byte_ord = ord(byte) | |
| 234 if (is_percent_encoded and byte == b"%") or ( | |
| 235 byte_ord < 128 and byte.decode() in allowed_chars | |
| 236 ): | |
| 237 encoded_component += byte | |
| 238 continue | |
| 239 encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper())) | |
| 240 | |
| 241 return encoded_component.decode(encoding) | |
| 242 | |
| 243 | |
| 244 def _remove_path_dot_segments(path): | |
| 245 # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code | |
| 246 segments = path.split("/") # Turn the path into a list of segments | |
| 247 output = [] # Initialize the variable to use to store output | |
| 248 | |
| 249 for segment in segments: | |
| 250 # '.' is the current directory, so ignore it, it is superfluous | |
| 251 if segment == ".": | |
| 252 continue | |
| 253 # Anything other than '..', should be appended to the output | |
| 254 elif segment != "..": | |
| 255 output.append(segment) | |
| 256 # In this case segment == '..', if we can, we should pop the last | |
| 257 # element | |
| 258 elif output: | |
| 259 output.pop() | |
| 260 | |
| 261 # If the path starts with '/' and the output is empty or the first string | |
| 262 # is non-empty | |
| 263 if path.startswith("/") and (not output or output[0]): | |
| 264 output.insert(0, "") | |
| 265 | |
| 266 # If the path starts with '/.' or '/..' ensure we add one more empty | |
| 267 # string to add a trailing '/' | |
| 268 if path.endswith(("/.", "/..")): | |
| 269 output.append("") | |
| 270 | |
| 271 return "/".join(output) | |
| 272 | |
| 273 | |
| 274 def _normalize_host(host, scheme): | |
| 275 if host: | |
| 276 if isinstance(host, six.binary_type): | |
| 277 host = six.ensure_str(host) | |
| 278 | |
| 279 if scheme in NORMALIZABLE_SCHEMES: | |
| 280 is_ipv6 = IPV6_ADDRZ_RE.match(host) | |
| 281 if is_ipv6: | |
| 282 match = ZONE_ID_RE.search(host) | |
| 283 if match: | |
| 284 start, end = match.span(1) | |
| 285 zone_id = host[start:end] | |
| 286 | |
| 287 if zone_id.startswith("%25") and zone_id != "%25": | |
| 288 zone_id = zone_id[3:] | |
| 289 else: | |
| 290 zone_id = zone_id[1:] | |
| 291 zone_id = "%" + _encode_invalid_chars(zone_id, UNRESERVED_CHARS) | |
| 292 return host[:start].lower() + zone_id + host[end:] | |
| 293 else: | |
| 294 return host.lower() | |
| 295 elif not IPV4_RE.match(host): | |
| 296 return six.ensure_str( | |
| 297 b".".join([_idna_encode(label) for label in host.split(".")]) | |
| 298 ) | |
| 299 return host | |
| 300 | |
| 301 | |
| 302 def _idna_encode(name): | |
| 303 if name and any([ord(x) > 128 for x in name]): | |
| 304 try: | |
| 305 import idna | |
| 306 except ImportError: | |
| 307 six.raise_from( | |
| 308 LocationParseError("Unable to parse URL without the 'idna' module"), | |
| 309 None, | |
| 310 ) | |
| 311 try: | |
| 312 return idna.encode(name.lower(), strict=True, std3_rules=True) | |
| 313 except idna.IDNAError: | |
| 314 six.raise_from( | |
| 315 LocationParseError(u"Name '%s' is not a valid IDNA label" % name), None | |
| 316 ) | |
| 317 return name.lower().encode("ascii") | |
| 318 | |
| 319 | |
| 320 def _encode_target(target): | |
| 321 """Percent-encodes a request target so that there are no invalid characters""" | |
| 322 path, query = TARGET_RE.match(target).groups() | |
| 323 target = _encode_invalid_chars(path, PATH_CHARS) | |
| 324 query = _encode_invalid_chars(query, QUERY_CHARS) | |
| 325 if query is not None: | |
| 326 target += "?" + query | |
| 327 return target | |
| 328 | |
| 329 | |
| 330 def parse_url(url): | |
| 331 """ | |
| 332 Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is | |
| 333 performed to parse incomplete urls. Fields not provided will be None. | |
| 334 This parser is RFC 3986 compliant. | |
| 335 | |
| 336 The parser logic and helper functions are based heavily on | |
| 337 work done in the ``rfc3986`` module. | |
| 338 | |
| 339 :param str url: URL to parse into a :class:`.Url` namedtuple. | |
| 340 | |
| 341 Partly backwards-compatible with :mod:`urlparse`. | |
| 342 | |
| 343 Example:: | |
| 344 | |
| 345 >>> parse_url('http://google.com/mail/') | |
| 346 Url(scheme='http', host='google.com', port=None, path='/mail/', ...) | |
| 347 >>> parse_url('google.com:80') | |
| 348 Url(scheme=None, host='google.com', port=80, path=None, ...) | |
| 349 >>> parse_url('/foo?bar') | |
| 350 Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) | |
| 351 """ | |
| 352 if not url: | |
| 353 # Empty | |
| 354 return Url() | |
| 355 | |
| 356 source_url = url | |
| 357 if not SCHEME_RE.search(url): | |
| 358 url = "//" + url | |
| 359 | |
| 360 try: | |
| 361 scheme, authority, path, query, fragment = URI_RE.match(url).groups() | |
| 362 normalize_uri = scheme is None or scheme.lower() in NORMALIZABLE_SCHEMES | |
| 363 | |
| 364 if scheme: | |
| 365 scheme = scheme.lower() | |
| 366 | |
| 367 if authority: | |
| 368 auth, host, port = SUBAUTHORITY_RE.match(authority).groups() | |
| 369 if auth and normalize_uri: | |
| 370 auth = _encode_invalid_chars(auth, USERINFO_CHARS) | |
| 371 if port == "": | |
| 372 port = None | |
| 373 else: | |
| 374 auth, host, port = None, None, None | |
| 375 | |
| 376 if port is not None: | |
| 377 port = int(port) | |
| 378 if not (0 <= port <= 65535): | |
| 379 raise LocationParseError(url) | |
| 380 | |
| 381 host = _normalize_host(host, scheme) | |
| 382 | |
| 383 if normalize_uri and path: | |
| 384 path = _remove_path_dot_segments(path) | |
| 385 path = _encode_invalid_chars(path, PATH_CHARS) | |
| 386 if normalize_uri and query: | |
| 387 query = _encode_invalid_chars(query, QUERY_CHARS) | |
| 388 if normalize_uri and fragment: | |
| 389 fragment = _encode_invalid_chars(fragment, FRAGMENT_CHARS) | |
| 390 | |
| 391 except (ValueError, AttributeError): | |
| 392 return six.raise_from(LocationParseError(source_url), None) | |
| 393 | |
| 394 # For the sake of backwards compatibility we put empty | |
| 395 # string values for path if there are any defined values | |
| 396 # beyond the path in the URL. | |
| 397 # TODO: Remove this when we break backwards compatibility. | |
| 398 if not path: | |
| 399 if query is not None or fragment is not None: | |
| 400 path = "" | |
| 401 else: | |
| 402 path = None | |
| 403 | |
| 404 # Ensure that each part of the URL is a `str` for | |
| 405 # backwards compatibility. | |
| 406 if isinstance(url, six.text_type): | |
| 407 ensure_func = six.ensure_text | |
| 408 else: | |
| 409 ensure_func = six.ensure_str | |
| 410 | |
| 411 def ensure_type(x): | |
| 412 return x if x is None else ensure_func(x) | |
| 413 | |
| 414 return Url( | |
| 415 scheme=ensure_type(scheme), | |
| 416 auth=ensure_type(auth), | |
| 417 host=ensure_type(host), | |
| 418 port=port, | |
| 419 path=ensure_type(path), | |
| 420 query=ensure_type(query), | |
| 421 fragment=ensure_type(fragment), | |
| 422 ) | |
| 423 | |
| 424 | |
| 425 def get_host(url): | |
| 426 """ | |
| 427 Deprecated. Use :func:`parse_url` instead. | |
| 428 """ | |
| 429 p = parse_url(url) | |
| 430 return p.scheme or "http", p.hostname, p.port |
