comparison env/lib/python3.7/site-packages/requests/utils.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
comparison
equal deleted inserted replaced
4:79f47841a781 5:9b1c78e6ba9c
1 # -*- coding: utf-8 -*-
2
3 """
4 requests.utils
5 ~~~~~~~~~~~~~~
6
7 This module provides utility functions that are used within Requests
8 that are also useful for external consumption.
9 """
10
11 import codecs
12 import contextlib
13 import io
14 import os
15 import re
16 import socket
17 import struct
18 import sys
19 import tempfile
20 import warnings
21 import zipfile
22 from collections import OrderedDict
23
24 from .__version__ import __version__
25 from . import certs
26 # to_native_string is unused here, but imported here for backwards compatibility
27 from ._internal_utils import to_native_string
28 from .compat import parse_http_list as _parse_list_header
29 from .compat import (
30 quote, urlparse, bytes, str, unquote, getproxies,
31 proxy_bypass, urlunparse, basestring, integer_types, is_py3,
32 proxy_bypass_environment, getproxies_environment, Mapping)
33 from .cookies import cookiejar_from_dict
34 from .structures import CaseInsensitiveDict
35 from .exceptions import (
36 InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
37
38 NETRC_FILES = ('.netrc', '_netrc')
39
40 DEFAULT_CA_BUNDLE_PATH = certs.where()
41
42 DEFAULT_PORTS = {'http': 80, 'https': 443}
43
44
45 if sys.platform == 'win32':
46 # provide a proxy_bypass version on Windows without DNS lookups
47
48 def proxy_bypass_registry(host):
49 try:
50 if is_py3:
51 import winreg
52 else:
53 import _winreg as winreg
54 except ImportError:
55 return False
56
57 try:
58 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
59 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
60 # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
61 proxyEnable = int(winreg.QueryValueEx(internetSettings,
62 'ProxyEnable')[0])
63 # ProxyOverride is almost always a string
64 proxyOverride = winreg.QueryValueEx(internetSettings,
65 'ProxyOverride')[0]
66 except OSError:
67 return False
68 if not proxyEnable or not proxyOverride:
69 return False
70
71 # make a check value list from the registry entry: replace the
72 # '<local>' string by the localhost entry and the corresponding
73 # canonical entry.
74 proxyOverride = proxyOverride.split(';')
75 # now check if we match one of the registry values.
76 for test in proxyOverride:
77 if test == '<local>':
78 if '.' not in host:
79 return True
80 test = test.replace(".", r"\.") # mask dots
81 test = test.replace("*", r".*") # change glob sequence
82 test = test.replace("?", r".") # change glob char
83 if re.match(test, host, re.I):
84 return True
85 return False
86
87 def proxy_bypass(host): # noqa
88 """Return True, if the host should be bypassed.
89
90 Checks proxy settings gathered from the environment, if specified,
91 or the registry.
92 """
93 if getproxies_environment():
94 return proxy_bypass_environment(host)
95 else:
96 return proxy_bypass_registry(host)
97
98
99 def dict_to_sequence(d):
100 """Returns an internal sequence dictionary update."""
101
102 if hasattr(d, 'items'):
103 d = d.items()
104
105 return d
106
107
108 def super_len(o):
109 total_length = None
110 current_position = 0
111
112 if hasattr(o, '__len__'):
113 total_length = len(o)
114
115 elif hasattr(o, 'len'):
116 total_length = o.len
117
118 elif hasattr(o, 'fileno'):
119 try:
120 fileno = o.fileno()
121 except io.UnsupportedOperation:
122 pass
123 else:
124 total_length = os.fstat(fileno).st_size
125
126 # Having used fstat to determine the file length, we need to
127 # confirm that this file was opened up in binary mode.
128 if 'b' not in o.mode:
129 warnings.warn((
130 "Requests has determined the content-length for this "
131 "request using the binary size of the file: however, the "
132 "file has been opened in text mode (i.e. without the 'b' "
133 "flag in the mode). This may lead to an incorrect "
134 "content-length. In Requests 3.0, support will be removed "
135 "for files in text mode."),
136 FileModeWarning
137 )
138
139 if hasattr(o, 'tell'):
140 try:
141 current_position = o.tell()
142 except (OSError, IOError):
143 # This can happen in some weird situations, such as when the file
144 # is actually a special file descriptor like stdin. In this
145 # instance, we don't know what the length is, so set it to zero and
146 # let requests chunk it instead.
147 if total_length is not None:
148 current_position = total_length
149 else:
150 if hasattr(o, 'seek') and total_length is None:
151 # StringIO and BytesIO have seek but no useable fileno
152 try:
153 # seek to end of file
154 o.seek(0, 2)
155 total_length = o.tell()
156
157 # seek back to current position to support
158 # partially read file-like objects
159 o.seek(current_position or 0)
160 except (OSError, IOError):
161 total_length = 0
162
163 if total_length is None:
164 total_length = 0
165
166 return max(0, total_length - current_position)
167
168
169 def get_netrc_auth(url, raise_errors=False):
170 """Returns the Requests tuple auth for a given url from netrc."""
171
172 try:
173 from netrc import netrc, NetrcParseError
174
175 netrc_path = None
176
177 for f in NETRC_FILES:
178 try:
179 loc = os.path.expanduser('~/{}'.format(f))
180 except KeyError:
181 # os.path.expanduser can fail when $HOME is undefined and
182 # getpwuid fails. See https://bugs.python.org/issue20164 &
183 # https://github.com/psf/requests/issues/1846
184 return
185
186 if os.path.exists(loc):
187 netrc_path = loc
188 break
189
190 # Abort early if there isn't one.
191 if netrc_path is None:
192 return
193
194 ri = urlparse(url)
195
196 # Strip port numbers from netloc. This weird `if...encode`` dance is
197 # used for Python 3.2, which doesn't support unicode literals.
198 splitstr = b':'
199 if isinstance(url, str):
200 splitstr = splitstr.decode('ascii')
201 host = ri.netloc.split(splitstr)[0]
202
203 try:
204 _netrc = netrc(netrc_path).authenticators(host)
205 if _netrc:
206 # Return with login / password
207 login_i = (0 if _netrc[0] else 1)
208 return (_netrc[login_i], _netrc[2])
209 except (NetrcParseError, IOError):
210 # If there was a parsing error or a permissions issue reading the file,
211 # we'll just skip netrc auth unless explicitly asked to raise errors.
212 if raise_errors:
213 raise
214
215 # AppEngine hackiness.
216 except (ImportError, AttributeError):
217 pass
218
219
220 def guess_filename(obj):
221 """Tries to guess the filename of the given object."""
222 name = getattr(obj, 'name', None)
223 if (name and isinstance(name, basestring) and name[0] != '<' and
224 name[-1] != '>'):
225 return os.path.basename(name)
226
227
228 def extract_zipped_paths(path):
229 """Replace nonexistent paths that look like they refer to a member of a zip
230 archive with the location of an extracted copy of the target, or else
231 just return the provided path unchanged.
232 """
233 if os.path.exists(path):
234 # this is already a valid path, no need to do anything further
235 return path
236
237 # find the first valid part of the provided path and treat that as a zip archive
238 # assume the rest of the path is the name of a member in the archive
239 archive, member = os.path.split(path)
240 while archive and not os.path.exists(archive):
241 archive, prefix = os.path.split(archive)
242 member = '/'.join([prefix, member])
243
244 if not zipfile.is_zipfile(archive):
245 return path
246
247 zip_file = zipfile.ZipFile(archive)
248 if member not in zip_file.namelist():
249 return path
250
251 # we have a valid zip archive and a valid member of that archive
252 tmp = tempfile.gettempdir()
253 extracted_path = os.path.join(tmp, *member.split('/'))
254 if not os.path.exists(extracted_path):
255 extracted_path = zip_file.extract(member, path=tmp)
256
257 return extracted_path
258
259
260 def from_key_val_list(value):
261 """Take an object and test to see if it can be represented as a
262 dictionary. Unless it can not be represented as such, return an
263 OrderedDict, e.g.,
264
265 ::
266
267 >>> from_key_val_list([('key', 'val')])
268 OrderedDict([('key', 'val')])
269 >>> from_key_val_list('string')
270 Traceback (most recent call last):
271 ...
272 ValueError: cannot encode objects that are not 2-tuples
273 >>> from_key_val_list({'key': 'val'})
274 OrderedDict([('key', 'val')])
275
276 :rtype: OrderedDict
277 """
278 if value is None:
279 return None
280
281 if isinstance(value, (str, bytes, bool, int)):
282 raise ValueError('cannot encode objects that are not 2-tuples')
283
284 return OrderedDict(value)
285
286
287 def to_key_val_list(value):
288 """Take an object and test to see if it can be represented as a
289 dictionary. If it can be, return a list of tuples, e.g.,
290
291 ::
292
293 >>> to_key_val_list([('key', 'val')])
294 [('key', 'val')]
295 >>> to_key_val_list({'key': 'val'})
296 [('key', 'val')]
297 >>> to_key_val_list('string')
298 Traceback (most recent call last):
299 ...
300 ValueError: cannot encode objects that are not 2-tuples
301
302 :rtype: list
303 """
304 if value is None:
305 return None
306
307 if isinstance(value, (str, bytes, bool, int)):
308 raise ValueError('cannot encode objects that are not 2-tuples')
309
310 if isinstance(value, Mapping):
311 value = value.items()
312
313 return list(value)
314
315
316 # From mitsuhiko/werkzeug (used with permission).
317 def parse_list_header(value):
318 """Parse lists as described by RFC 2068 Section 2.
319
320 In particular, parse comma-separated lists where the elements of
321 the list may include quoted-strings. A quoted-string could
322 contain a comma. A non-quoted string could have quotes in the
323 middle. Quotes are removed automatically after parsing.
324
325 It basically works like :func:`parse_set_header` just that items
326 may appear multiple times and case sensitivity is preserved.
327
328 The return value is a standard :class:`list`:
329
330 >>> parse_list_header('token, "quoted value"')
331 ['token', 'quoted value']
332
333 To create a header from the :class:`list` again, use the
334 :func:`dump_header` function.
335
336 :param value: a string with a list header.
337 :return: :class:`list`
338 :rtype: list
339 """
340 result = []
341 for item in _parse_list_header(value):
342 if item[:1] == item[-1:] == '"':
343 item = unquote_header_value(item[1:-1])
344 result.append(item)
345 return result
346
347
348 # From mitsuhiko/werkzeug (used with permission).
349 def parse_dict_header(value):
350 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
351 convert them into a python dict:
352
353 >>> d = parse_dict_header('foo="is a fish", bar="as well"')
354 >>> type(d) is dict
355 True
356 >>> sorted(d.items())
357 [('bar', 'as well'), ('foo', 'is a fish')]
358
359 If there is no value for a key it will be `None`:
360
361 >>> parse_dict_header('key_without_value')
362 {'key_without_value': None}
363
364 To create a header from the :class:`dict` again, use the
365 :func:`dump_header` function.
366
367 :param value: a string with a dict header.
368 :return: :class:`dict`
369 :rtype: dict
370 """
371 result = {}
372 for item in _parse_list_header(value):
373 if '=' not in item:
374 result[item] = None
375 continue
376 name, value = item.split('=', 1)
377 if value[:1] == value[-1:] == '"':
378 value = unquote_header_value(value[1:-1])
379 result[name] = value
380 return result
381
382
383 # From mitsuhiko/werkzeug (used with permission).
384 def unquote_header_value(value, is_filename=False):
385 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`).
386 This does not use the real unquoting but what browsers are actually
387 using for quoting.
388
389 :param value: the header value to unquote.
390 :rtype: str
391 """
392 if value and value[0] == value[-1] == '"':
393 # this is not the real unquoting, but fixing this so that the
394 # RFC is met will result in bugs with internet explorer and
395 # probably some other browsers as well. IE for example is
396 # uploading files with "C:\foo\bar.txt" as filename
397 value = value[1:-1]
398
399 # if this is a filename and the starting characters look like
400 # a UNC path, then just return the value without quotes. Using the
401 # replace sequence below on a UNC path has the effect of turning
402 # the leading double slash into a single slash and then
403 # _fix_ie_filename() doesn't work correctly. See #458.
404 if not is_filename or value[:2] != '\\\\':
405 return value.replace('\\\\', '\\').replace('\\"', '"')
406 return value
407
408
409 def dict_from_cookiejar(cj):
410 """Returns a key/value dictionary from a CookieJar.
411
412 :param cj: CookieJar object to extract cookies from.
413 :rtype: dict
414 """
415
416 cookie_dict = {}
417
418 for cookie in cj:
419 cookie_dict[cookie.name] = cookie.value
420
421 return cookie_dict
422
423
424 def add_dict_to_cookiejar(cj, cookie_dict):
425 """Returns a CookieJar from a key/value dictionary.
426
427 :param cj: CookieJar to insert cookies into.
428 :param cookie_dict: Dict of key/values to insert into CookieJar.
429 :rtype: CookieJar
430 """
431
432 return cookiejar_from_dict(cookie_dict, cj)
433
434
435 def get_encodings_from_content(content):
436 """Returns encodings from given content string.
437
438 :param content: bytestring to extract encodings from.
439 """
440 warnings.warn((
441 'In requests 3.0, get_encodings_from_content will be removed. For '
442 'more information, please see the discussion on issue #2266. (This'
443 ' warning should only appear once.)'),
444 DeprecationWarning)
445
446 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
447 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
448 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
449
450 return (charset_re.findall(content) +
451 pragma_re.findall(content) +
452 xml_re.findall(content))
453
454
455 def _parse_content_type_header(header):
456 """Returns content type and parameters from given header
457
458 :param header: string
459 :return: tuple containing content type and dictionary of
460 parameters
461 """
462
463 tokens = header.split(';')
464 content_type, params = tokens[0].strip(), tokens[1:]
465 params_dict = {}
466 items_to_strip = "\"' "
467
468 for param in params:
469 param = param.strip()
470 if param:
471 key, value = param, True
472 index_of_equals = param.find("=")
473 if index_of_equals != -1:
474 key = param[:index_of_equals].strip(items_to_strip)
475 value = param[index_of_equals + 1:].strip(items_to_strip)
476 params_dict[key.lower()] = value
477 return content_type, params_dict
478
479
480 def get_encoding_from_headers(headers):
481 """Returns encodings from given HTTP Header Dict.
482
483 :param headers: dictionary to extract encoding from.
484 :rtype: str
485 """
486
487 content_type = headers.get('content-type')
488
489 if not content_type:
490 return None
491
492 content_type, params = _parse_content_type_header(content_type)
493
494 if 'charset' in params:
495 return params['charset'].strip("'\"")
496
497 if 'text' in content_type:
498 return 'ISO-8859-1'
499
500
501 def stream_decode_response_unicode(iterator, r):
502 """Stream decodes a iterator."""
503
504 if r.encoding is None:
505 for item in iterator:
506 yield item
507 return
508
509 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
510 for chunk in iterator:
511 rv = decoder.decode(chunk)
512 if rv:
513 yield rv
514 rv = decoder.decode(b'', final=True)
515 if rv:
516 yield rv
517
518
519 def iter_slices(string, slice_length):
520 """Iterate over slices of a string."""
521 pos = 0
522 if slice_length is None or slice_length <= 0:
523 slice_length = len(string)
524 while pos < len(string):
525 yield string[pos:pos + slice_length]
526 pos += slice_length
527
528
529 def get_unicode_from_response(r):
530 """Returns the requested content back in unicode.
531
532 :param r: Response object to get unicode content from.
533
534 Tried:
535
536 1. charset from content-type
537 2. fall back and replace all unicode characters
538
539 :rtype: str
540 """
541 warnings.warn((
542 'In requests 3.0, get_unicode_from_response will be removed. For '
543 'more information, please see the discussion on issue #2266. (This'
544 ' warning should only appear once.)'),
545 DeprecationWarning)
546
547 tried_encodings = []
548
549 # Try charset from content-type
550 encoding = get_encoding_from_headers(r.headers)
551
552 if encoding:
553 try:
554 return str(r.content, encoding)
555 except UnicodeError:
556 tried_encodings.append(encoding)
557
558 # Fall back:
559 try:
560 return str(r.content, encoding, errors='replace')
561 except TypeError:
562 return r.content
563
564
565 # The unreserved URI characters (RFC 3986)
566 UNRESERVED_SET = frozenset(
567 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
568
569
570 def unquote_unreserved(uri):
571 """Un-escape any percent-escape sequences in a URI that are unreserved
572 characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
573
574 :rtype: str
575 """
576 parts = uri.split('%')
577 for i in range(1, len(parts)):
578 h = parts[i][0:2]
579 if len(h) == 2 and h.isalnum():
580 try:
581 c = chr(int(h, 16))
582 except ValueError:
583 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
584
585 if c in UNRESERVED_SET:
586 parts[i] = c + parts[i][2:]
587 else:
588 parts[i] = '%' + parts[i]
589 else:
590 parts[i] = '%' + parts[i]
591 return ''.join(parts)
592
593
594 def requote_uri(uri):
595 """Re-quote the given URI.
596
597 This function passes the given URI through an unquote/quote cycle to
598 ensure that it is fully and consistently quoted.
599
600 :rtype: str
601 """
602 safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
603 safe_without_percent = "!#$&'()*+,/:;=?@[]~"
604 try:
605 # Unquote only the unreserved characters
606 # Then quote only illegal characters (do not quote reserved,
607 # unreserved, or '%')
608 return quote(unquote_unreserved(uri), safe=safe_with_percent)
609 except InvalidURL:
610 # We couldn't unquote the given URI, so let's try quoting it, but
611 # there may be unquoted '%'s in the URI. We need to make sure they're
612 # properly quoted so they do not cause issues elsewhere.
613 return quote(uri, safe=safe_without_percent)
614
615
616 def address_in_network(ip, net):
617 """This function allows you to check if an IP belongs to a network subnet
618
619 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
620 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
621
622 :rtype: bool
623 """
624 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
625 netaddr, bits = net.split('/')
626 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
627 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
628 return (ipaddr & netmask) == (network & netmask)
629
630
631 def dotted_netmask(mask):
632 """Converts mask from /xx format to xxx.xxx.xxx.xxx
633
634 Example: if mask is 24 function returns 255.255.255.0
635
636 :rtype: str
637 """
638 bits = 0xffffffff ^ (1 << 32 - mask) - 1
639 return socket.inet_ntoa(struct.pack('>I', bits))
640
641
642 def is_ipv4_address(string_ip):
643 """
644 :rtype: bool
645 """
646 try:
647 socket.inet_aton(string_ip)
648 except socket.error:
649 return False
650 return True
651
652
653 def is_valid_cidr(string_network):
654 """
655 Very simple check of the cidr format in no_proxy variable.
656
657 :rtype: bool
658 """
659 if string_network.count('/') == 1:
660 try:
661 mask = int(string_network.split('/')[1])
662 except ValueError:
663 return False
664
665 if mask < 1 or mask > 32:
666 return False
667
668 try:
669 socket.inet_aton(string_network.split('/')[0])
670 except socket.error:
671 return False
672 else:
673 return False
674 return True
675
676
677 @contextlib.contextmanager
678 def set_environ(env_name, value):
679 """Set the environment variable 'env_name' to 'value'
680
681 Save previous value, yield, and then restore the previous value stored in
682 the environment variable 'env_name'.
683
684 If 'value' is None, do nothing"""
685 value_changed = value is not None
686 if value_changed:
687 old_value = os.environ.get(env_name)
688 os.environ[env_name] = value
689 try:
690 yield
691 finally:
692 if value_changed:
693 if old_value is None:
694 del os.environ[env_name]
695 else:
696 os.environ[env_name] = old_value
697
698
699 def should_bypass_proxies(url, no_proxy):
700 """
701 Returns whether we should bypass proxies or not.
702
703 :rtype: bool
704 """
705 # Prioritize lowercase environment variables over uppercase
706 # to keep a consistent behaviour with other http projects (curl, wget).
707 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
708
709 # First check whether no_proxy is defined. If it is, check that the URL
710 # we're getting isn't in the no_proxy list.
711 no_proxy_arg = no_proxy
712 if no_proxy is None:
713 no_proxy = get_proxy('no_proxy')
714 parsed = urlparse(url)
715
716 if parsed.hostname is None:
717 # URLs don't always have hostnames, e.g. file:/// urls.
718 return True
719
720 if no_proxy:
721 # We need to check whether we match here. We need to see if we match
722 # the end of the hostname, both with and without the port.
723 no_proxy = (
724 host for host in no_proxy.replace(' ', '').split(',') if host
725 )
726
727 if is_ipv4_address(parsed.hostname):
728 for proxy_ip in no_proxy:
729 if is_valid_cidr(proxy_ip):
730 if address_in_network(parsed.hostname, proxy_ip):
731 return True
732 elif parsed.hostname == proxy_ip:
733 # If no_proxy ip was defined in plain IP notation instead of cidr notation &
734 # matches the IP of the index
735 return True
736 else:
737 host_with_port = parsed.hostname
738 if parsed.port:
739 host_with_port += ':{}'.format(parsed.port)
740
741 for host in no_proxy:
742 if parsed.hostname.endswith(host) or host_with_port.endswith(host):
743 # The URL does match something in no_proxy, so we don't want
744 # to apply the proxies on this URL.
745 return True
746
747 with set_environ('no_proxy', no_proxy_arg):
748 # parsed.hostname can be `None` in cases such as a file URI.
749 try:
750 bypass = proxy_bypass(parsed.hostname)
751 except (TypeError, socket.gaierror):
752 bypass = False
753
754 if bypass:
755 return True
756
757 return False
758
759
760 def get_environ_proxies(url, no_proxy=None):
761 """
762 Return a dict of environment proxies.
763
764 :rtype: dict
765 """
766 if should_bypass_proxies(url, no_proxy=no_proxy):
767 return {}
768 else:
769 return getproxies()
770
771
772 def select_proxy(url, proxies):
773 """Select a proxy for the url, if applicable.
774
775 :param url: The url being for the request
776 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
777 """
778 proxies = proxies or {}
779 urlparts = urlparse(url)
780 if urlparts.hostname is None:
781 return proxies.get(urlparts.scheme, proxies.get('all'))
782
783 proxy_keys = [
784 urlparts.scheme + '://' + urlparts.hostname,
785 urlparts.scheme,
786 'all://' + urlparts.hostname,
787 'all',
788 ]
789 proxy = None
790 for proxy_key in proxy_keys:
791 if proxy_key in proxies:
792 proxy = proxies[proxy_key]
793 break
794
795 return proxy
796
797
798 def default_user_agent(name="python-requests"):
799 """
800 Return a string representing the default user agent.
801
802 :rtype: str
803 """
804 return '%s/%s' % (name, __version__)
805
806
807 def default_headers():
808 """
809 :rtype: requests.structures.CaseInsensitiveDict
810 """
811 return CaseInsensitiveDict({
812 'User-Agent': default_user_agent(),
813 'Accept-Encoding': ', '.join(('gzip', 'deflate')),
814 'Accept': '*/*',
815 'Connection': 'keep-alive',
816 })
817
818
819 def parse_header_links(value):
820 """Return a list of parsed link headers proxies.
821
822 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
823
824 :rtype: list
825 """
826
827 links = []
828
829 replace_chars = ' \'"'
830
831 value = value.strip(replace_chars)
832 if not value:
833 return links
834
835 for val in re.split(', *<', value):
836 try:
837 url, params = val.split(';', 1)
838 except ValueError:
839 url, params = val, ''
840
841 link = {'url': url.strip('<> \'"')}
842
843 for param in params.split(';'):
844 try:
845 key, value = param.split('=')
846 except ValueError:
847 break
848
849 link[key.strip(replace_chars)] = value.strip(replace_chars)
850
851 links.append(link)
852
853 return links
854
855
856 # Null bytes; no need to recreate these on each call to guess_json_utf
857 _null = '\x00'.encode('ascii') # encoding to ASCII for Python 3
858 _null2 = _null * 2
859 _null3 = _null * 3
860
861
862 def guess_json_utf(data):
863 """
864 :rtype: str
865 """
866 # JSON always starts with two ASCII characters, so detection is as
867 # easy as counting the nulls and from their location and count
868 # determine the encoding. Also detect a BOM, if present.
869 sample = data[:4]
870 if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
871 return 'utf-32' # BOM included
872 if sample[:3] == codecs.BOM_UTF8:
873 return 'utf-8-sig' # BOM included, MS style (discouraged)
874 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
875 return 'utf-16' # BOM included
876 nullcount = sample.count(_null)
877 if nullcount == 0:
878 return 'utf-8'
879 if nullcount == 2:
880 if sample[::2] == _null2: # 1st and 3rd are null
881 return 'utf-16-be'
882 if sample[1::2] == _null2: # 2nd and 4th are null
883 return 'utf-16-le'
884 # Did not detect 2 valid UTF-16 ascii-range characters
885 if nullcount == 3:
886 if sample[:3] == _null3:
887 return 'utf-32-be'
888 if sample[1:] == _null3:
889 return 'utf-32-le'
890 # Did not detect a valid UTF-32 ascii-range character
891 return None
892
893
894 def prepend_scheme_if_needed(url, new_scheme):
895 """Given a URL that may or may not have a scheme, prepend the given scheme.
896 Does not replace a present scheme with the one provided as an argument.
897
898 :rtype: str
899 """
900 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
901
902 # urlparse is a finicky beast, and sometimes decides that there isn't a
903 # netloc present. Assume that it's being over-cautious, and switch netloc
904 # and path if urlparse decided there was no netloc.
905 if not netloc:
906 netloc, path = path, netloc
907
908 return urlunparse((scheme, netloc, path, params, query, fragment))
909
910
911 def get_auth_from_url(url):
912 """Given a url with authentication components, extract them into a tuple of
913 username,password.
914
915 :rtype: (str,str)
916 """
917 parsed = urlparse(url)
918
919 try:
920 auth = (unquote(parsed.username), unquote(parsed.password))
921 except (AttributeError, TypeError):
922 auth = ('', '')
923
924 return auth
925
926
927 # Moved outside of function to avoid recompile every call
928 _CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
929 _CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
930
931
932 def check_header_validity(header):
933 """Verifies that header value is a string which doesn't contain
934 leading whitespace or return characters. This prevents unintended
935 header injection.
936
937 :param header: tuple, in the format (name, value).
938 """
939 name, value = header
940
941 if isinstance(value, bytes):
942 pat = _CLEAN_HEADER_REGEX_BYTE
943 else:
944 pat = _CLEAN_HEADER_REGEX_STR
945 try:
946 if not pat.match(value):
947 raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
948 except TypeError:
949 raise InvalidHeader("Value for header {%s: %s} must be of type str or "
950 "bytes, not %s" % (name, value, type(value)))
951
952
953 def urldefragauth(url):
954 """
955 Given a url remove the fragment and the authentication part.
956
957 :rtype: str
958 """
959 scheme, netloc, path, params, query, fragment = urlparse(url)
960
961 # see func:`prepend_scheme_if_needed`
962 if not netloc:
963 netloc, path = path, netloc
964
965 netloc = netloc.rsplit('@', 1)[-1]
966
967 return urlunparse((scheme, netloc, path, params, query, ''))
968
969
970 def rewind_body(prepared_request):
971 """Move file pointer back to its recorded starting position
972 so it can be read again on redirect.
973 """
974 body_seek = getattr(prepared_request.body, 'seek', None)
975 if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
976 try:
977 body_seek(prepared_request._body_position)
978 except (IOError, OSError):
979 raise UnrewindableBodyError("An error occurred when rewinding request "
980 "body for redirect.")
981 else:
982 raise UnrewindableBodyError("Unable to rewind request body for redirect.")