Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/requests/utils.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
| author | shellac |
|---|---|
| date | Mon, 01 Jun 2020 08:59:25 -0400 |
| parents | 79f47841a781 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:79f47841a781 | 5:9b1c78e6ba9c |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 | |
| 3 """ | |
| 4 requests.utils | |
| 5 ~~~~~~~~~~~~~~ | |
| 6 | |
| 7 This module provides utility functions that are used within Requests | |
| 8 that are also useful for external consumption. | |
| 9 """ | |
| 10 | |
| 11 import codecs | |
| 12 import contextlib | |
| 13 import io | |
| 14 import os | |
| 15 import re | |
| 16 import socket | |
| 17 import struct | |
| 18 import sys | |
| 19 import tempfile | |
| 20 import warnings | |
| 21 import zipfile | |
| 22 from collections import OrderedDict | |
| 23 | |
| 24 from .__version__ import __version__ | |
| 25 from . import certs | |
| 26 # to_native_string is unused here, but imported here for backwards compatibility | |
| 27 from ._internal_utils import to_native_string | |
| 28 from .compat import parse_http_list as _parse_list_header | |
| 29 from .compat import ( | |
| 30 quote, urlparse, bytes, str, unquote, getproxies, | |
| 31 proxy_bypass, urlunparse, basestring, integer_types, is_py3, | |
| 32 proxy_bypass_environment, getproxies_environment, Mapping) | |
| 33 from .cookies import cookiejar_from_dict | |
| 34 from .structures import CaseInsensitiveDict | |
| 35 from .exceptions import ( | |
| 36 InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError) | |
| 37 | |
| 38 NETRC_FILES = ('.netrc', '_netrc') | |
| 39 | |
| 40 DEFAULT_CA_BUNDLE_PATH = certs.where() | |
| 41 | |
| 42 DEFAULT_PORTS = {'http': 80, 'https': 443} | |
| 43 | |
| 44 | |
| 45 if sys.platform == 'win32': | |
| 46 # provide a proxy_bypass version on Windows without DNS lookups | |
| 47 | |
| 48 def proxy_bypass_registry(host): | |
| 49 try: | |
| 50 if is_py3: | |
| 51 import winreg | |
| 52 else: | |
| 53 import _winreg as winreg | |
| 54 except ImportError: | |
| 55 return False | |
| 56 | |
| 57 try: | |
| 58 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, | |
| 59 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') | |
| 60 # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it | |
| 61 proxyEnable = int(winreg.QueryValueEx(internetSettings, | |
| 62 'ProxyEnable')[0]) | |
| 63 # ProxyOverride is almost always a string | |
| 64 proxyOverride = winreg.QueryValueEx(internetSettings, | |
| 65 'ProxyOverride')[0] | |
| 66 except OSError: | |
| 67 return False | |
| 68 if not proxyEnable or not proxyOverride: | |
| 69 return False | |
| 70 | |
| 71 # make a check value list from the registry entry: replace the | |
| 72 # '<local>' string by the localhost entry and the corresponding | |
| 73 # canonical entry. | |
| 74 proxyOverride = proxyOverride.split(';') | |
| 75 # now check if we match one of the registry values. | |
| 76 for test in proxyOverride: | |
| 77 if test == '<local>': | |
| 78 if '.' not in host: | |
| 79 return True | |
| 80 test = test.replace(".", r"\.") # mask dots | |
| 81 test = test.replace("*", r".*") # change glob sequence | |
| 82 test = test.replace("?", r".") # change glob char | |
| 83 if re.match(test, host, re.I): | |
| 84 return True | |
| 85 return False | |
| 86 | |
| 87 def proxy_bypass(host): # noqa | |
| 88 """Return True, if the host should be bypassed. | |
| 89 | |
| 90 Checks proxy settings gathered from the environment, if specified, | |
| 91 or the registry. | |
| 92 """ | |
| 93 if getproxies_environment(): | |
| 94 return proxy_bypass_environment(host) | |
| 95 else: | |
| 96 return proxy_bypass_registry(host) | |
| 97 | |
| 98 | |
| 99 def dict_to_sequence(d): | |
| 100 """Returns an internal sequence dictionary update.""" | |
| 101 | |
| 102 if hasattr(d, 'items'): | |
| 103 d = d.items() | |
| 104 | |
| 105 return d | |
| 106 | |
| 107 | |
| 108 def super_len(o): | |
| 109 total_length = None | |
| 110 current_position = 0 | |
| 111 | |
| 112 if hasattr(o, '__len__'): | |
| 113 total_length = len(o) | |
| 114 | |
| 115 elif hasattr(o, 'len'): | |
| 116 total_length = o.len | |
| 117 | |
| 118 elif hasattr(o, 'fileno'): | |
| 119 try: | |
| 120 fileno = o.fileno() | |
| 121 except io.UnsupportedOperation: | |
| 122 pass | |
| 123 else: | |
| 124 total_length = os.fstat(fileno).st_size | |
| 125 | |
| 126 # Having used fstat to determine the file length, we need to | |
| 127 # confirm that this file was opened up in binary mode. | |
| 128 if 'b' not in o.mode: | |
| 129 warnings.warn(( | |
| 130 "Requests has determined the content-length for this " | |
| 131 "request using the binary size of the file: however, the " | |
| 132 "file has been opened in text mode (i.e. without the 'b' " | |
| 133 "flag in the mode). This may lead to an incorrect " | |
| 134 "content-length. In Requests 3.0, support will be removed " | |
| 135 "for files in text mode."), | |
| 136 FileModeWarning | |
| 137 ) | |
| 138 | |
| 139 if hasattr(o, 'tell'): | |
| 140 try: | |
| 141 current_position = o.tell() | |
| 142 except (OSError, IOError): | |
| 143 # This can happen in some weird situations, such as when the file | |
| 144 # is actually a special file descriptor like stdin. In this | |
| 145 # instance, we don't know what the length is, so set it to zero and | |
| 146 # let requests chunk it instead. | |
| 147 if total_length is not None: | |
| 148 current_position = total_length | |
| 149 else: | |
| 150 if hasattr(o, 'seek') and total_length is None: | |
| 151 # StringIO and BytesIO have seek but no useable fileno | |
| 152 try: | |
| 153 # seek to end of file | |
| 154 o.seek(0, 2) | |
| 155 total_length = o.tell() | |
| 156 | |
| 157 # seek back to current position to support | |
| 158 # partially read file-like objects | |
| 159 o.seek(current_position or 0) | |
| 160 except (OSError, IOError): | |
| 161 total_length = 0 | |
| 162 | |
| 163 if total_length is None: | |
| 164 total_length = 0 | |
| 165 | |
| 166 return max(0, total_length - current_position) | |
| 167 | |
| 168 | |
| 169 def get_netrc_auth(url, raise_errors=False): | |
| 170 """Returns the Requests tuple auth for a given url from netrc.""" | |
| 171 | |
| 172 try: | |
| 173 from netrc import netrc, NetrcParseError | |
| 174 | |
| 175 netrc_path = None | |
| 176 | |
| 177 for f in NETRC_FILES: | |
| 178 try: | |
| 179 loc = os.path.expanduser('~/{}'.format(f)) | |
| 180 except KeyError: | |
| 181 # os.path.expanduser can fail when $HOME is undefined and | |
| 182 # getpwuid fails. See https://bugs.python.org/issue20164 & | |
| 183 # https://github.com/psf/requests/issues/1846 | |
| 184 return | |
| 185 | |
| 186 if os.path.exists(loc): | |
| 187 netrc_path = loc | |
| 188 break | |
| 189 | |
| 190 # Abort early if there isn't one. | |
| 191 if netrc_path is None: | |
| 192 return | |
| 193 | |
| 194 ri = urlparse(url) | |
| 195 | |
| 196 # Strip port numbers from netloc. This weird `if...encode`` dance is | |
| 197 # used for Python 3.2, which doesn't support unicode literals. | |
| 198 splitstr = b':' | |
| 199 if isinstance(url, str): | |
| 200 splitstr = splitstr.decode('ascii') | |
| 201 host = ri.netloc.split(splitstr)[0] | |
| 202 | |
| 203 try: | |
| 204 _netrc = netrc(netrc_path).authenticators(host) | |
| 205 if _netrc: | |
| 206 # Return with login / password | |
| 207 login_i = (0 if _netrc[0] else 1) | |
| 208 return (_netrc[login_i], _netrc[2]) | |
| 209 except (NetrcParseError, IOError): | |
| 210 # If there was a parsing error or a permissions issue reading the file, | |
| 211 # we'll just skip netrc auth unless explicitly asked to raise errors. | |
| 212 if raise_errors: | |
| 213 raise | |
| 214 | |
| 215 # AppEngine hackiness. | |
| 216 except (ImportError, AttributeError): | |
| 217 pass | |
| 218 | |
| 219 | |
| 220 def guess_filename(obj): | |
| 221 """Tries to guess the filename of the given object.""" | |
| 222 name = getattr(obj, 'name', None) | |
| 223 if (name and isinstance(name, basestring) and name[0] != '<' and | |
| 224 name[-1] != '>'): | |
| 225 return os.path.basename(name) | |
| 226 | |
| 227 | |
| 228 def extract_zipped_paths(path): | |
| 229 """Replace nonexistent paths that look like they refer to a member of a zip | |
| 230 archive with the location of an extracted copy of the target, or else | |
| 231 just return the provided path unchanged. | |
| 232 """ | |
| 233 if os.path.exists(path): | |
| 234 # this is already a valid path, no need to do anything further | |
| 235 return path | |
| 236 | |
| 237 # find the first valid part of the provided path and treat that as a zip archive | |
| 238 # assume the rest of the path is the name of a member in the archive | |
| 239 archive, member = os.path.split(path) | |
| 240 while archive and not os.path.exists(archive): | |
| 241 archive, prefix = os.path.split(archive) | |
| 242 member = '/'.join([prefix, member]) | |
| 243 | |
| 244 if not zipfile.is_zipfile(archive): | |
| 245 return path | |
| 246 | |
| 247 zip_file = zipfile.ZipFile(archive) | |
| 248 if member not in zip_file.namelist(): | |
| 249 return path | |
| 250 | |
| 251 # we have a valid zip archive and a valid member of that archive | |
| 252 tmp = tempfile.gettempdir() | |
| 253 extracted_path = os.path.join(tmp, *member.split('/')) | |
| 254 if not os.path.exists(extracted_path): | |
| 255 extracted_path = zip_file.extract(member, path=tmp) | |
| 256 | |
| 257 return extracted_path | |
| 258 | |
| 259 | |
| 260 def from_key_val_list(value): | |
| 261 """Take an object and test to see if it can be represented as a | |
| 262 dictionary. Unless it can not be represented as such, return an | |
| 263 OrderedDict, e.g., | |
| 264 | |
| 265 :: | |
| 266 | |
| 267 >>> from_key_val_list([('key', 'val')]) | |
| 268 OrderedDict([('key', 'val')]) | |
| 269 >>> from_key_val_list('string') | |
| 270 Traceback (most recent call last): | |
| 271 ... | |
| 272 ValueError: cannot encode objects that are not 2-tuples | |
| 273 >>> from_key_val_list({'key': 'val'}) | |
| 274 OrderedDict([('key', 'val')]) | |
| 275 | |
| 276 :rtype: OrderedDict | |
| 277 """ | |
| 278 if value is None: | |
| 279 return None | |
| 280 | |
| 281 if isinstance(value, (str, bytes, bool, int)): | |
| 282 raise ValueError('cannot encode objects that are not 2-tuples') | |
| 283 | |
| 284 return OrderedDict(value) | |
| 285 | |
| 286 | |
| 287 def to_key_val_list(value): | |
| 288 """Take an object and test to see if it can be represented as a | |
| 289 dictionary. If it can be, return a list of tuples, e.g., | |
| 290 | |
| 291 :: | |
| 292 | |
| 293 >>> to_key_val_list([('key', 'val')]) | |
| 294 [('key', 'val')] | |
| 295 >>> to_key_val_list({'key': 'val'}) | |
| 296 [('key', 'val')] | |
| 297 >>> to_key_val_list('string') | |
| 298 Traceback (most recent call last): | |
| 299 ... | |
| 300 ValueError: cannot encode objects that are not 2-tuples | |
| 301 | |
| 302 :rtype: list | |
| 303 """ | |
| 304 if value is None: | |
| 305 return None | |
| 306 | |
| 307 if isinstance(value, (str, bytes, bool, int)): | |
| 308 raise ValueError('cannot encode objects that are not 2-tuples') | |
| 309 | |
| 310 if isinstance(value, Mapping): | |
| 311 value = value.items() | |
| 312 | |
| 313 return list(value) | |
| 314 | |
| 315 | |
| 316 # From mitsuhiko/werkzeug (used with permission). | |
| 317 def parse_list_header(value): | |
| 318 """Parse lists as described by RFC 2068 Section 2. | |
| 319 | |
| 320 In particular, parse comma-separated lists where the elements of | |
| 321 the list may include quoted-strings. A quoted-string could | |
| 322 contain a comma. A non-quoted string could have quotes in the | |
| 323 middle. Quotes are removed automatically after parsing. | |
| 324 | |
| 325 It basically works like :func:`parse_set_header` just that items | |
| 326 may appear multiple times and case sensitivity is preserved. | |
| 327 | |
| 328 The return value is a standard :class:`list`: | |
| 329 | |
| 330 >>> parse_list_header('token, "quoted value"') | |
| 331 ['token', 'quoted value'] | |
| 332 | |
| 333 To create a header from the :class:`list` again, use the | |
| 334 :func:`dump_header` function. | |
| 335 | |
| 336 :param value: a string with a list header. | |
| 337 :return: :class:`list` | |
| 338 :rtype: list | |
| 339 """ | |
| 340 result = [] | |
| 341 for item in _parse_list_header(value): | |
| 342 if item[:1] == item[-1:] == '"': | |
| 343 item = unquote_header_value(item[1:-1]) | |
| 344 result.append(item) | |
| 345 return result | |
| 346 | |
| 347 | |
| 348 # From mitsuhiko/werkzeug (used with permission). | |
| 349 def parse_dict_header(value): | |
| 350 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and | |
| 351 convert them into a python dict: | |
| 352 | |
| 353 >>> d = parse_dict_header('foo="is a fish", bar="as well"') | |
| 354 >>> type(d) is dict | |
| 355 True | |
| 356 >>> sorted(d.items()) | |
| 357 [('bar', 'as well'), ('foo', 'is a fish')] | |
| 358 | |
| 359 If there is no value for a key it will be `None`: | |
| 360 | |
| 361 >>> parse_dict_header('key_without_value') | |
| 362 {'key_without_value': None} | |
| 363 | |
| 364 To create a header from the :class:`dict` again, use the | |
| 365 :func:`dump_header` function. | |
| 366 | |
| 367 :param value: a string with a dict header. | |
| 368 :return: :class:`dict` | |
| 369 :rtype: dict | |
| 370 """ | |
| 371 result = {} | |
| 372 for item in _parse_list_header(value): | |
| 373 if '=' not in item: | |
| 374 result[item] = None | |
| 375 continue | |
| 376 name, value = item.split('=', 1) | |
| 377 if value[:1] == value[-1:] == '"': | |
| 378 value = unquote_header_value(value[1:-1]) | |
| 379 result[name] = value | |
| 380 return result | |
| 381 | |
| 382 | |
| 383 # From mitsuhiko/werkzeug (used with permission). | |
| 384 def unquote_header_value(value, is_filename=False): | |
| 385 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). | |
| 386 This does not use the real unquoting but what browsers are actually | |
| 387 using for quoting. | |
| 388 | |
| 389 :param value: the header value to unquote. | |
| 390 :rtype: str | |
| 391 """ | |
| 392 if value and value[0] == value[-1] == '"': | |
| 393 # this is not the real unquoting, but fixing this so that the | |
| 394 # RFC is met will result in bugs with internet explorer and | |
| 395 # probably some other browsers as well. IE for example is | |
| 396 # uploading files with "C:\foo\bar.txt" as filename | |
| 397 value = value[1:-1] | |
| 398 | |
| 399 # if this is a filename and the starting characters look like | |
| 400 # a UNC path, then just return the value without quotes. Using the | |
| 401 # replace sequence below on a UNC path has the effect of turning | |
| 402 # the leading double slash into a single slash and then | |
| 403 # _fix_ie_filename() doesn't work correctly. See #458. | |
| 404 if not is_filename or value[:2] != '\\\\': | |
| 405 return value.replace('\\\\', '\\').replace('\\"', '"') | |
| 406 return value | |
| 407 | |
| 408 | |
| 409 def dict_from_cookiejar(cj): | |
| 410 """Returns a key/value dictionary from a CookieJar. | |
| 411 | |
| 412 :param cj: CookieJar object to extract cookies from. | |
| 413 :rtype: dict | |
| 414 """ | |
| 415 | |
| 416 cookie_dict = {} | |
| 417 | |
| 418 for cookie in cj: | |
| 419 cookie_dict[cookie.name] = cookie.value | |
| 420 | |
| 421 return cookie_dict | |
| 422 | |
| 423 | |
| 424 def add_dict_to_cookiejar(cj, cookie_dict): | |
| 425 """Returns a CookieJar from a key/value dictionary. | |
| 426 | |
| 427 :param cj: CookieJar to insert cookies into. | |
| 428 :param cookie_dict: Dict of key/values to insert into CookieJar. | |
| 429 :rtype: CookieJar | |
| 430 """ | |
| 431 | |
| 432 return cookiejar_from_dict(cookie_dict, cj) | |
| 433 | |
| 434 | |
| 435 def get_encodings_from_content(content): | |
| 436 """Returns encodings from given content string. | |
| 437 | |
| 438 :param content: bytestring to extract encodings from. | |
| 439 """ | |
| 440 warnings.warn(( | |
| 441 'In requests 3.0, get_encodings_from_content will be removed. For ' | |
| 442 'more information, please see the discussion on issue #2266. (This' | |
| 443 ' warning should only appear once.)'), | |
| 444 DeprecationWarning) | |
| 445 | |
| 446 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) | |
| 447 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I) | |
| 448 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') | |
| 449 | |
| 450 return (charset_re.findall(content) + | |
| 451 pragma_re.findall(content) + | |
| 452 xml_re.findall(content)) | |
| 453 | |
| 454 | |
| 455 def _parse_content_type_header(header): | |
| 456 """Returns content type and parameters from given header | |
| 457 | |
| 458 :param header: string | |
| 459 :return: tuple containing content type and dictionary of | |
| 460 parameters | |
| 461 """ | |
| 462 | |
| 463 tokens = header.split(';') | |
| 464 content_type, params = tokens[0].strip(), tokens[1:] | |
| 465 params_dict = {} | |
| 466 items_to_strip = "\"' " | |
| 467 | |
| 468 for param in params: | |
| 469 param = param.strip() | |
| 470 if param: | |
| 471 key, value = param, True | |
| 472 index_of_equals = param.find("=") | |
| 473 if index_of_equals != -1: | |
| 474 key = param[:index_of_equals].strip(items_to_strip) | |
| 475 value = param[index_of_equals + 1:].strip(items_to_strip) | |
| 476 params_dict[key.lower()] = value | |
| 477 return content_type, params_dict | |
| 478 | |
| 479 | |
| 480 def get_encoding_from_headers(headers): | |
| 481 """Returns encodings from given HTTP Header Dict. | |
| 482 | |
| 483 :param headers: dictionary to extract encoding from. | |
| 484 :rtype: str | |
| 485 """ | |
| 486 | |
| 487 content_type = headers.get('content-type') | |
| 488 | |
| 489 if not content_type: | |
| 490 return None | |
| 491 | |
| 492 content_type, params = _parse_content_type_header(content_type) | |
| 493 | |
| 494 if 'charset' in params: | |
| 495 return params['charset'].strip("'\"") | |
| 496 | |
| 497 if 'text' in content_type: | |
| 498 return 'ISO-8859-1' | |
| 499 | |
| 500 | |
| 501 def stream_decode_response_unicode(iterator, r): | |
| 502 """Stream decodes a iterator.""" | |
| 503 | |
| 504 if r.encoding is None: | |
| 505 for item in iterator: | |
| 506 yield item | |
| 507 return | |
| 508 | |
| 509 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') | |
| 510 for chunk in iterator: | |
| 511 rv = decoder.decode(chunk) | |
| 512 if rv: | |
| 513 yield rv | |
| 514 rv = decoder.decode(b'', final=True) | |
| 515 if rv: | |
| 516 yield rv | |
| 517 | |
| 518 | |
| 519 def iter_slices(string, slice_length): | |
| 520 """Iterate over slices of a string.""" | |
| 521 pos = 0 | |
| 522 if slice_length is None or slice_length <= 0: | |
| 523 slice_length = len(string) | |
| 524 while pos < len(string): | |
| 525 yield string[pos:pos + slice_length] | |
| 526 pos += slice_length | |
| 527 | |
| 528 | |
| 529 def get_unicode_from_response(r): | |
| 530 """Returns the requested content back in unicode. | |
| 531 | |
| 532 :param r: Response object to get unicode content from. | |
| 533 | |
| 534 Tried: | |
| 535 | |
| 536 1. charset from content-type | |
| 537 2. fall back and replace all unicode characters | |
| 538 | |
| 539 :rtype: str | |
| 540 """ | |
| 541 warnings.warn(( | |
| 542 'In requests 3.0, get_unicode_from_response will be removed. For ' | |
| 543 'more information, please see the discussion on issue #2266. (This' | |
| 544 ' warning should only appear once.)'), | |
| 545 DeprecationWarning) | |
| 546 | |
| 547 tried_encodings = [] | |
| 548 | |
| 549 # Try charset from content-type | |
| 550 encoding = get_encoding_from_headers(r.headers) | |
| 551 | |
| 552 if encoding: | |
| 553 try: | |
| 554 return str(r.content, encoding) | |
| 555 except UnicodeError: | |
| 556 tried_encodings.append(encoding) | |
| 557 | |
| 558 # Fall back: | |
| 559 try: | |
| 560 return str(r.content, encoding, errors='replace') | |
| 561 except TypeError: | |
| 562 return r.content | |
| 563 | |
| 564 | |
| 565 # The unreserved URI characters (RFC 3986) | |
| 566 UNRESERVED_SET = frozenset( | |
| 567 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~") | |
| 568 | |
| 569 | |
| 570 def unquote_unreserved(uri): | |
| 571 """Un-escape any percent-escape sequences in a URI that are unreserved | |
| 572 characters. This leaves all reserved, illegal and non-ASCII bytes encoded. | |
| 573 | |
| 574 :rtype: str | |
| 575 """ | |
| 576 parts = uri.split('%') | |
| 577 for i in range(1, len(parts)): | |
| 578 h = parts[i][0:2] | |
| 579 if len(h) == 2 and h.isalnum(): | |
| 580 try: | |
| 581 c = chr(int(h, 16)) | |
| 582 except ValueError: | |
| 583 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) | |
| 584 | |
| 585 if c in UNRESERVED_SET: | |
| 586 parts[i] = c + parts[i][2:] | |
| 587 else: | |
| 588 parts[i] = '%' + parts[i] | |
| 589 else: | |
| 590 parts[i] = '%' + parts[i] | |
| 591 return ''.join(parts) | |
| 592 | |
| 593 | |
| 594 def requote_uri(uri): | |
| 595 """Re-quote the given URI. | |
| 596 | |
| 597 This function passes the given URI through an unquote/quote cycle to | |
| 598 ensure that it is fully and consistently quoted. | |
| 599 | |
| 600 :rtype: str | |
| 601 """ | |
| 602 safe_with_percent = "!#$%&'()*+,/:;=?@[]~" | |
| 603 safe_without_percent = "!#$&'()*+,/:;=?@[]~" | |
| 604 try: | |
| 605 # Unquote only the unreserved characters | |
| 606 # Then quote only illegal characters (do not quote reserved, | |
| 607 # unreserved, or '%') | |
| 608 return quote(unquote_unreserved(uri), safe=safe_with_percent) | |
| 609 except InvalidURL: | |
| 610 # We couldn't unquote the given URI, so let's try quoting it, but | |
| 611 # there may be unquoted '%'s in the URI. We need to make sure they're | |
| 612 # properly quoted so they do not cause issues elsewhere. | |
| 613 return quote(uri, safe=safe_without_percent) | |
| 614 | |
| 615 | |
| 616 def address_in_network(ip, net): | |
| 617 """This function allows you to check if an IP belongs to a network subnet | |
| 618 | |
| 619 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24 | |
| 620 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24 | |
| 621 | |
| 622 :rtype: bool | |
| 623 """ | |
| 624 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0] | |
| 625 netaddr, bits = net.split('/') | |
| 626 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0] | |
| 627 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask | |
| 628 return (ipaddr & netmask) == (network & netmask) | |
| 629 | |
| 630 | |
| 631 def dotted_netmask(mask): | |
| 632 """Converts mask from /xx format to xxx.xxx.xxx.xxx | |
| 633 | |
| 634 Example: if mask is 24 function returns 255.255.255.0 | |
| 635 | |
| 636 :rtype: str | |
| 637 """ | |
| 638 bits = 0xffffffff ^ (1 << 32 - mask) - 1 | |
| 639 return socket.inet_ntoa(struct.pack('>I', bits)) | |
| 640 | |
| 641 | |
| 642 def is_ipv4_address(string_ip): | |
| 643 """ | |
| 644 :rtype: bool | |
| 645 """ | |
| 646 try: | |
| 647 socket.inet_aton(string_ip) | |
| 648 except socket.error: | |
| 649 return False | |
| 650 return True | |
| 651 | |
| 652 | |
| 653 def is_valid_cidr(string_network): | |
| 654 """ | |
| 655 Very simple check of the cidr format in no_proxy variable. | |
| 656 | |
| 657 :rtype: bool | |
| 658 """ | |
| 659 if string_network.count('/') == 1: | |
| 660 try: | |
| 661 mask = int(string_network.split('/')[1]) | |
| 662 except ValueError: | |
| 663 return False | |
| 664 | |
| 665 if mask < 1 or mask > 32: | |
| 666 return False | |
| 667 | |
| 668 try: | |
| 669 socket.inet_aton(string_network.split('/')[0]) | |
| 670 except socket.error: | |
| 671 return False | |
| 672 else: | |
| 673 return False | |
| 674 return True | |
| 675 | |
| 676 | |
| 677 @contextlib.contextmanager | |
| 678 def set_environ(env_name, value): | |
| 679 """Set the environment variable 'env_name' to 'value' | |
| 680 | |
| 681 Save previous value, yield, and then restore the previous value stored in | |
| 682 the environment variable 'env_name'. | |
| 683 | |
| 684 If 'value' is None, do nothing""" | |
| 685 value_changed = value is not None | |
| 686 if value_changed: | |
| 687 old_value = os.environ.get(env_name) | |
| 688 os.environ[env_name] = value | |
| 689 try: | |
| 690 yield | |
| 691 finally: | |
| 692 if value_changed: | |
| 693 if old_value is None: | |
| 694 del os.environ[env_name] | |
| 695 else: | |
| 696 os.environ[env_name] = old_value | |
| 697 | |
| 698 | |
| 699 def should_bypass_proxies(url, no_proxy): | |
| 700 """ | |
| 701 Returns whether we should bypass proxies or not. | |
| 702 | |
| 703 :rtype: bool | |
| 704 """ | |
| 705 # Prioritize lowercase environment variables over uppercase | |
| 706 # to keep a consistent behaviour with other http projects (curl, wget). | |
| 707 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) | |
| 708 | |
| 709 # First check whether no_proxy is defined. If it is, check that the URL | |
| 710 # we're getting isn't in the no_proxy list. | |
| 711 no_proxy_arg = no_proxy | |
| 712 if no_proxy is None: | |
| 713 no_proxy = get_proxy('no_proxy') | |
| 714 parsed = urlparse(url) | |
| 715 | |
| 716 if parsed.hostname is None: | |
| 717 # URLs don't always have hostnames, e.g. file:/// urls. | |
| 718 return True | |
| 719 | |
| 720 if no_proxy: | |
| 721 # We need to check whether we match here. We need to see if we match | |
| 722 # the end of the hostname, both with and without the port. | |
| 723 no_proxy = ( | |
| 724 host for host in no_proxy.replace(' ', '').split(',') if host | |
| 725 ) | |
| 726 | |
| 727 if is_ipv4_address(parsed.hostname): | |
| 728 for proxy_ip in no_proxy: | |
| 729 if is_valid_cidr(proxy_ip): | |
| 730 if address_in_network(parsed.hostname, proxy_ip): | |
| 731 return True | |
| 732 elif parsed.hostname == proxy_ip: | |
| 733 # If no_proxy ip was defined in plain IP notation instead of cidr notation & | |
| 734 # matches the IP of the index | |
| 735 return True | |
| 736 else: | |
| 737 host_with_port = parsed.hostname | |
| 738 if parsed.port: | |
| 739 host_with_port += ':{}'.format(parsed.port) | |
| 740 | |
| 741 for host in no_proxy: | |
| 742 if parsed.hostname.endswith(host) or host_with_port.endswith(host): | |
| 743 # The URL does match something in no_proxy, so we don't want | |
| 744 # to apply the proxies on this URL. | |
| 745 return True | |
| 746 | |
| 747 with set_environ('no_proxy', no_proxy_arg): | |
| 748 # parsed.hostname can be `None` in cases such as a file URI. | |
| 749 try: | |
| 750 bypass = proxy_bypass(parsed.hostname) | |
| 751 except (TypeError, socket.gaierror): | |
| 752 bypass = False | |
| 753 | |
| 754 if bypass: | |
| 755 return True | |
| 756 | |
| 757 return False | |
| 758 | |
| 759 | |
| 760 def get_environ_proxies(url, no_proxy=None): | |
| 761 """ | |
| 762 Return a dict of environment proxies. | |
| 763 | |
| 764 :rtype: dict | |
| 765 """ | |
| 766 if should_bypass_proxies(url, no_proxy=no_proxy): | |
| 767 return {} | |
| 768 else: | |
| 769 return getproxies() | |
| 770 | |
| 771 | |
| 772 def select_proxy(url, proxies): | |
| 773 """Select a proxy for the url, if applicable. | |
| 774 | |
| 775 :param url: The url being for the request | |
| 776 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs | |
| 777 """ | |
| 778 proxies = proxies or {} | |
| 779 urlparts = urlparse(url) | |
| 780 if urlparts.hostname is None: | |
| 781 return proxies.get(urlparts.scheme, proxies.get('all')) | |
| 782 | |
| 783 proxy_keys = [ | |
| 784 urlparts.scheme + '://' + urlparts.hostname, | |
| 785 urlparts.scheme, | |
| 786 'all://' + urlparts.hostname, | |
| 787 'all', | |
| 788 ] | |
| 789 proxy = None | |
| 790 for proxy_key in proxy_keys: | |
| 791 if proxy_key in proxies: | |
| 792 proxy = proxies[proxy_key] | |
| 793 break | |
| 794 | |
| 795 return proxy | |
| 796 | |
| 797 | |
| 798 def default_user_agent(name="python-requests"): | |
| 799 """ | |
| 800 Return a string representing the default user agent. | |
| 801 | |
| 802 :rtype: str | |
| 803 """ | |
| 804 return '%s/%s' % (name, __version__) | |
| 805 | |
| 806 | |
| 807 def default_headers(): | |
| 808 """ | |
| 809 :rtype: requests.structures.CaseInsensitiveDict | |
| 810 """ | |
| 811 return CaseInsensitiveDict({ | |
| 812 'User-Agent': default_user_agent(), | |
| 813 'Accept-Encoding': ', '.join(('gzip', 'deflate')), | |
| 814 'Accept': '*/*', | |
| 815 'Connection': 'keep-alive', | |
| 816 }) | |
| 817 | |
| 818 | |
| 819 def parse_header_links(value): | |
| 820 """Return a list of parsed link headers proxies. | |
| 821 | |
| 822 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg" | |
| 823 | |
| 824 :rtype: list | |
| 825 """ | |
| 826 | |
| 827 links = [] | |
| 828 | |
| 829 replace_chars = ' \'"' | |
| 830 | |
| 831 value = value.strip(replace_chars) | |
| 832 if not value: | |
| 833 return links | |
| 834 | |
| 835 for val in re.split(', *<', value): | |
| 836 try: | |
| 837 url, params = val.split(';', 1) | |
| 838 except ValueError: | |
| 839 url, params = val, '' | |
| 840 | |
| 841 link = {'url': url.strip('<> \'"')} | |
| 842 | |
| 843 for param in params.split(';'): | |
| 844 try: | |
| 845 key, value = param.split('=') | |
| 846 except ValueError: | |
| 847 break | |
| 848 | |
| 849 link[key.strip(replace_chars)] = value.strip(replace_chars) | |
| 850 | |
| 851 links.append(link) | |
| 852 | |
| 853 return links | |
| 854 | |
| 855 | |
| 856 # Null bytes; no need to recreate these on each call to guess_json_utf | |
| 857 _null = '\x00'.encode('ascii') # encoding to ASCII for Python 3 | |
| 858 _null2 = _null * 2 | |
| 859 _null3 = _null * 3 | |
| 860 | |
| 861 | |
| 862 def guess_json_utf(data): | |
| 863 """ | |
| 864 :rtype: str | |
| 865 """ | |
| 866 # JSON always starts with two ASCII characters, so detection is as | |
| 867 # easy as counting the nulls and from their location and count | |
| 868 # determine the encoding. Also detect a BOM, if present. | |
| 869 sample = data[:4] | |
| 870 if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): | |
| 871 return 'utf-32' # BOM included | |
| 872 if sample[:3] == codecs.BOM_UTF8: | |
| 873 return 'utf-8-sig' # BOM included, MS style (discouraged) | |
| 874 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): | |
| 875 return 'utf-16' # BOM included | |
| 876 nullcount = sample.count(_null) | |
| 877 if nullcount == 0: | |
| 878 return 'utf-8' | |
| 879 if nullcount == 2: | |
| 880 if sample[::2] == _null2: # 1st and 3rd are null | |
| 881 return 'utf-16-be' | |
| 882 if sample[1::2] == _null2: # 2nd and 4th are null | |
| 883 return 'utf-16-le' | |
| 884 # Did not detect 2 valid UTF-16 ascii-range characters | |
| 885 if nullcount == 3: | |
| 886 if sample[:3] == _null3: | |
| 887 return 'utf-32-be' | |
| 888 if sample[1:] == _null3: | |
| 889 return 'utf-32-le' | |
| 890 # Did not detect a valid UTF-32 ascii-range character | |
| 891 return None | |
| 892 | |
| 893 | |
| 894 def prepend_scheme_if_needed(url, new_scheme): | |
| 895 """Given a URL that may or may not have a scheme, prepend the given scheme. | |
| 896 Does not replace a present scheme with the one provided as an argument. | |
| 897 | |
| 898 :rtype: str | |
| 899 """ | |
| 900 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) | |
| 901 | |
| 902 # urlparse is a finicky beast, and sometimes decides that there isn't a | |
| 903 # netloc present. Assume that it's being over-cautious, and switch netloc | |
| 904 # and path if urlparse decided there was no netloc. | |
| 905 if not netloc: | |
| 906 netloc, path = path, netloc | |
| 907 | |
| 908 return urlunparse((scheme, netloc, path, params, query, fragment)) | |
| 909 | |
| 910 | |
| 911 def get_auth_from_url(url): | |
| 912 """Given a url with authentication components, extract them into a tuple of | |
| 913 username,password. | |
| 914 | |
| 915 :rtype: (str,str) | |
| 916 """ | |
| 917 parsed = urlparse(url) | |
| 918 | |
| 919 try: | |
| 920 auth = (unquote(parsed.username), unquote(parsed.password)) | |
| 921 except (AttributeError, TypeError): | |
| 922 auth = ('', '') | |
| 923 | |
| 924 return auth | |
| 925 | |
| 926 | |
| 927 # Moved outside of function to avoid recompile every call | |
| 928 _CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$') | |
| 929 _CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$') | |
| 930 | |
| 931 | |
| 932 def check_header_validity(header): | |
| 933 """Verifies that header value is a string which doesn't contain | |
| 934 leading whitespace or return characters. This prevents unintended | |
| 935 header injection. | |
| 936 | |
| 937 :param header: tuple, in the format (name, value). | |
| 938 """ | |
| 939 name, value = header | |
| 940 | |
| 941 if isinstance(value, bytes): | |
| 942 pat = _CLEAN_HEADER_REGEX_BYTE | |
| 943 else: | |
| 944 pat = _CLEAN_HEADER_REGEX_STR | |
| 945 try: | |
| 946 if not pat.match(value): | |
| 947 raise InvalidHeader("Invalid return character or leading space in header: %s" % name) | |
| 948 except TypeError: | |
| 949 raise InvalidHeader("Value for header {%s: %s} must be of type str or " | |
| 950 "bytes, not %s" % (name, value, type(value))) | |
| 951 | |
| 952 | |
| 953 def urldefragauth(url): | |
| 954 """ | |
| 955 Given a url remove the fragment and the authentication part. | |
| 956 | |
| 957 :rtype: str | |
| 958 """ | |
| 959 scheme, netloc, path, params, query, fragment = urlparse(url) | |
| 960 | |
| 961 # see func:`prepend_scheme_if_needed` | |
| 962 if not netloc: | |
| 963 netloc, path = path, netloc | |
| 964 | |
| 965 netloc = netloc.rsplit('@', 1)[-1] | |
| 966 | |
| 967 return urlunparse((scheme, netloc, path, params, query, '')) | |
| 968 | |
| 969 | |
| 970 def rewind_body(prepared_request): | |
| 971 """Move file pointer back to its recorded starting position | |
| 972 so it can be read again on redirect. | |
| 973 """ | |
| 974 body_seek = getattr(prepared_request.body, 'seek', None) | |
| 975 if body_seek is not None and isinstance(prepared_request._body_position, integer_types): | |
| 976 try: | |
| 977 body_seek(prepared_request._body_position) | |
| 978 except (IOError, OSError): | |
| 979 raise UnrewindableBodyError("An error occurred when rewinding request " | |
| 980 "body for redirect.") | |
| 981 else: | |
| 982 raise UnrewindableBodyError("Unable to rewind request body for redirect.") |
