comparison lib/python3.8/site-packages/pip/_vendor/requests/utils.py @ 0:9e54283cc701 draft

"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
author guerler
date Mon, 27 Jul 2020 03:47:31 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9e54283cc701
1 # -*- coding: utf-8 -*-
2
3 """
4 requests.utils
5 ~~~~~~~~~~~~~~
6
7 This module provides utility functions that are used within Requests
8 that are also useful for external consumption.
9 """
10
11 import codecs
12 import contextlib
13 import io
14 import os
15 import re
16 import socket
17 import struct
18 import sys
19 import tempfile
20 import warnings
21 import zipfile
22
23 from .__version__ import __version__
24 from . import certs
25 # to_native_string is unused here, but imported here for backwards compatibility
26 from ._internal_utils import to_native_string
27 from .compat import parse_http_list as _parse_list_header
28 from .compat import (
29 quote, urlparse, bytes, str, OrderedDict, unquote, getproxies,
30 proxy_bypass, urlunparse, basestring, integer_types, is_py3,
31 proxy_bypass_environment, getproxies_environment, Mapping)
32 from .cookies import cookiejar_from_dict
33 from .structures import CaseInsensitiveDict
34 from .exceptions import (
35 InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
36
37 NETRC_FILES = ('.netrc', '_netrc')
38
39 DEFAULT_CA_BUNDLE_PATH = certs.where()
40
41 DEFAULT_PORTS = {'http': 80, 'https': 443}
42
43
44 if sys.platform == 'win32':
45 # provide a proxy_bypass version on Windows without DNS lookups
46
47 def proxy_bypass_registry(host):
48 try:
49 if is_py3:
50 import winreg
51 else:
52 import _winreg as winreg
53 except ImportError:
54 return False
55
56 try:
57 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
58 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
59 # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
60 proxyEnable = int(winreg.QueryValueEx(internetSettings,
61 'ProxyEnable')[0])
62 # ProxyOverride is almost always a string
63 proxyOverride = winreg.QueryValueEx(internetSettings,
64 'ProxyOverride')[0]
65 except OSError:
66 return False
67 if not proxyEnable or not proxyOverride:
68 return False
69
70 # make a check value list from the registry entry: replace the
71 # '<local>' string by the localhost entry and the corresponding
72 # canonical entry.
73 proxyOverride = proxyOverride.split(';')
74 # now check if we match one of the registry values.
75 for test in proxyOverride:
76 if test == '<local>':
77 if '.' not in host:
78 return True
79 test = test.replace(".", r"\.") # mask dots
80 test = test.replace("*", r".*") # change glob sequence
81 test = test.replace("?", r".") # change glob char
82 if re.match(test, host, re.I):
83 return True
84 return False
85
86 def proxy_bypass(host): # noqa
87 """Return True, if the host should be bypassed.
88
89 Checks proxy settings gathered from the environment, if specified,
90 or the registry.
91 """
92 if getproxies_environment():
93 return proxy_bypass_environment(host)
94 else:
95 return proxy_bypass_registry(host)
96
97
98 def dict_to_sequence(d):
99 """Returns an internal sequence dictionary update."""
100
101 if hasattr(d, 'items'):
102 d = d.items()
103
104 return d
105
106
107 def super_len(o):
108 total_length = None
109 current_position = 0
110
111 if hasattr(o, '__len__'):
112 total_length = len(o)
113
114 elif hasattr(o, 'len'):
115 total_length = o.len
116
117 elif hasattr(o, 'fileno'):
118 try:
119 fileno = o.fileno()
120 except io.UnsupportedOperation:
121 pass
122 else:
123 total_length = os.fstat(fileno).st_size
124
125 # Having used fstat to determine the file length, we need to
126 # confirm that this file was opened up in binary mode.
127 if 'b' not in o.mode:
128 warnings.warn((
129 "Requests has determined the content-length for this "
130 "request using the binary size of the file: however, the "
131 "file has been opened in text mode (i.e. without the 'b' "
132 "flag in the mode). This may lead to an incorrect "
133 "content-length. In Requests 3.0, support will be removed "
134 "for files in text mode."),
135 FileModeWarning
136 )
137
138 if hasattr(o, 'tell'):
139 try:
140 current_position = o.tell()
141 except (OSError, IOError):
142 # This can happen in some weird situations, such as when the file
143 # is actually a special file descriptor like stdin. In this
144 # instance, we don't know what the length is, so set it to zero and
145 # let requests chunk it instead.
146 if total_length is not None:
147 current_position = total_length
148 else:
149 if hasattr(o, 'seek') and total_length is None:
150 # StringIO and BytesIO have seek but no useable fileno
151 try:
152 # seek to end of file
153 o.seek(0, 2)
154 total_length = o.tell()
155
156 # seek back to current position to support
157 # partially read file-like objects
158 o.seek(current_position or 0)
159 except (OSError, IOError):
160 total_length = 0
161
162 if total_length is None:
163 total_length = 0
164
165 return max(0, total_length - current_position)
166
167
168 def get_netrc_auth(url, raise_errors=False):
169 """Returns the Requests tuple auth for a given url from netrc."""
170
171 try:
172 from netrc import netrc, NetrcParseError
173
174 netrc_path = None
175
176 for f in NETRC_FILES:
177 try:
178 loc = os.path.expanduser('~/{}'.format(f))
179 except KeyError:
180 # os.path.expanduser can fail when $HOME is undefined and
181 # getpwuid fails. See https://bugs.python.org/issue20164 &
182 # https://github.com/requests/requests/issues/1846
183 return
184
185 if os.path.exists(loc):
186 netrc_path = loc
187 break
188
189 # Abort early if there isn't one.
190 if netrc_path is None:
191 return
192
193 ri = urlparse(url)
194
195 # Strip port numbers from netloc. This weird `if...encode`` dance is
196 # used for Python 3.2, which doesn't support unicode literals.
197 splitstr = b':'
198 if isinstance(url, str):
199 splitstr = splitstr.decode('ascii')
200 host = ri.netloc.split(splitstr)[0]
201
202 try:
203 _netrc = netrc(netrc_path).authenticators(host)
204 if _netrc:
205 # Return with login / password
206 login_i = (0 if _netrc[0] else 1)
207 return (_netrc[login_i], _netrc[2])
208 except (NetrcParseError, IOError):
209 # If there was a parsing error or a permissions issue reading the file,
210 # we'll just skip netrc auth unless explicitly asked to raise errors.
211 if raise_errors:
212 raise
213
214 # AppEngine hackiness.
215 except (ImportError, AttributeError):
216 pass
217
218
219 def guess_filename(obj):
220 """Tries to guess the filename of the given object."""
221 name = getattr(obj, 'name', None)
222 if (name and isinstance(name, basestring) and name[0] != '<' and
223 name[-1] != '>'):
224 return os.path.basename(name)
225
226
227 def extract_zipped_paths(path):
228 """Replace nonexistent paths that look like they refer to a member of a zip
229 archive with the location of an extracted copy of the target, or else
230 just return the provided path unchanged.
231 """
232 if os.path.exists(path):
233 # this is already a valid path, no need to do anything further
234 return path
235
236 # find the first valid part of the provided path and treat that as a zip archive
237 # assume the rest of the path is the name of a member in the archive
238 archive, member = os.path.split(path)
239 while archive and not os.path.exists(archive):
240 archive, prefix = os.path.split(archive)
241 member = '/'.join([prefix, member])
242
243 if not zipfile.is_zipfile(archive):
244 return path
245
246 zip_file = zipfile.ZipFile(archive)
247 if member not in zip_file.namelist():
248 return path
249
250 # we have a valid zip archive and a valid member of that archive
251 tmp = tempfile.gettempdir()
252 extracted_path = os.path.join(tmp, *member.split('/'))
253 if not os.path.exists(extracted_path):
254 extracted_path = zip_file.extract(member, path=tmp)
255
256 return extracted_path
257
258
259 def from_key_val_list(value):
260 """Take an object and test to see if it can be represented as a
261 dictionary. Unless it can not be represented as such, return an
262 OrderedDict, e.g.,
263
264 ::
265
266 >>> from_key_val_list([('key', 'val')])
267 OrderedDict([('key', 'val')])
268 >>> from_key_val_list('string')
269 ValueError: cannot encode objects that are not 2-tuples
270 >>> from_key_val_list({'key': 'val'})
271 OrderedDict([('key', 'val')])
272
273 :rtype: OrderedDict
274 """
275 if value is None:
276 return None
277
278 if isinstance(value, (str, bytes, bool, int)):
279 raise ValueError('cannot encode objects that are not 2-tuples')
280
281 return OrderedDict(value)
282
283
284 def to_key_val_list(value):
285 """Take an object and test to see if it can be represented as a
286 dictionary. If it can be, return a list of tuples, e.g.,
287
288 ::
289
290 >>> to_key_val_list([('key', 'val')])
291 [('key', 'val')]
292 >>> to_key_val_list({'key': 'val'})
293 [('key', 'val')]
294 >>> to_key_val_list('string')
295 ValueError: cannot encode objects that are not 2-tuples.
296
297 :rtype: list
298 """
299 if value is None:
300 return None
301
302 if isinstance(value, (str, bytes, bool, int)):
303 raise ValueError('cannot encode objects that are not 2-tuples')
304
305 if isinstance(value, Mapping):
306 value = value.items()
307
308 return list(value)
309
310
311 # From mitsuhiko/werkzeug (used with permission).
312 def parse_list_header(value):
313 """Parse lists as described by RFC 2068 Section 2.
314
315 In particular, parse comma-separated lists where the elements of
316 the list may include quoted-strings. A quoted-string could
317 contain a comma. A non-quoted string could have quotes in the
318 middle. Quotes are removed automatically after parsing.
319
320 It basically works like :func:`parse_set_header` just that items
321 may appear multiple times and case sensitivity is preserved.
322
323 The return value is a standard :class:`list`:
324
325 >>> parse_list_header('token, "quoted value"')
326 ['token', 'quoted value']
327
328 To create a header from the :class:`list` again, use the
329 :func:`dump_header` function.
330
331 :param value: a string with a list header.
332 :return: :class:`list`
333 :rtype: list
334 """
335 result = []
336 for item in _parse_list_header(value):
337 if item[:1] == item[-1:] == '"':
338 item = unquote_header_value(item[1:-1])
339 result.append(item)
340 return result
341
342
343 # From mitsuhiko/werkzeug (used with permission).
344 def parse_dict_header(value):
345 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
346 convert them into a python dict:
347
348 >>> d = parse_dict_header('foo="is a fish", bar="as well"')
349 >>> type(d) is dict
350 True
351 >>> sorted(d.items())
352 [('bar', 'as well'), ('foo', 'is a fish')]
353
354 If there is no value for a key it will be `None`:
355
356 >>> parse_dict_header('key_without_value')
357 {'key_without_value': None}
358
359 To create a header from the :class:`dict` again, use the
360 :func:`dump_header` function.
361
362 :param value: a string with a dict header.
363 :return: :class:`dict`
364 :rtype: dict
365 """
366 result = {}
367 for item in _parse_list_header(value):
368 if '=' not in item:
369 result[item] = None
370 continue
371 name, value = item.split('=', 1)
372 if value[:1] == value[-1:] == '"':
373 value = unquote_header_value(value[1:-1])
374 result[name] = value
375 return result
376
377
378 # From mitsuhiko/werkzeug (used with permission).
379 def unquote_header_value(value, is_filename=False):
380 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`).
381 This does not use the real unquoting but what browsers are actually
382 using for quoting.
383
384 :param value: the header value to unquote.
385 :rtype: str
386 """
387 if value and value[0] == value[-1] == '"':
388 # this is not the real unquoting, but fixing this so that the
389 # RFC is met will result in bugs with internet explorer and
390 # probably some other browsers as well. IE for example is
391 # uploading files with "C:\foo\bar.txt" as filename
392 value = value[1:-1]
393
394 # if this is a filename and the starting characters look like
395 # a UNC path, then just return the value without quotes. Using the
396 # replace sequence below on a UNC path has the effect of turning
397 # the leading double slash into a single slash and then
398 # _fix_ie_filename() doesn't work correctly. See #458.
399 if not is_filename or value[:2] != '\\\\':
400 return value.replace('\\\\', '\\').replace('\\"', '"')
401 return value
402
403
404 def dict_from_cookiejar(cj):
405 """Returns a key/value dictionary from a CookieJar.
406
407 :param cj: CookieJar object to extract cookies from.
408 :rtype: dict
409 """
410
411 cookie_dict = {}
412
413 for cookie in cj:
414 cookie_dict[cookie.name] = cookie.value
415
416 return cookie_dict
417
418
419 def add_dict_to_cookiejar(cj, cookie_dict):
420 """Returns a CookieJar from a key/value dictionary.
421
422 :param cj: CookieJar to insert cookies into.
423 :param cookie_dict: Dict of key/values to insert into CookieJar.
424 :rtype: CookieJar
425 """
426
427 return cookiejar_from_dict(cookie_dict, cj)
428
429
430 def get_encodings_from_content(content):
431 """Returns encodings from given content string.
432
433 :param content: bytestring to extract encodings from.
434 """
435 warnings.warn((
436 'In requests 3.0, get_encodings_from_content will be removed. For '
437 'more information, please see the discussion on issue #2266. (This'
438 ' warning should only appear once.)'),
439 DeprecationWarning)
440
441 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
442 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
443 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
444
445 return (charset_re.findall(content) +
446 pragma_re.findall(content) +
447 xml_re.findall(content))
448
449
450 def _parse_content_type_header(header):
451 """Returns content type and parameters from given header
452
453 :param header: string
454 :return: tuple containing content type and dictionary of
455 parameters
456 """
457
458 tokens = header.split(';')
459 content_type, params = tokens[0].strip(), tokens[1:]
460 params_dict = {}
461 items_to_strip = "\"' "
462
463 for param in params:
464 param = param.strip()
465 if param:
466 key, value = param, True
467 index_of_equals = param.find("=")
468 if index_of_equals != -1:
469 key = param[:index_of_equals].strip(items_to_strip)
470 value = param[index_of_equals + 1:].strip(items_to_strip)
471 params_dict[key.lower()] = value
472 return content_type, params_dict
473
474
475 def get_encoding_from_headers(headers):
476 """Returns encodings from given HTTP Header Dict.
477
478 :param headers: dictionary to extract encoding from.
479 :rtype: str
480 """
481
482 content_type = headers.get('content-type')
483
484 if not content_type:
485 return None
486
487 content_type, params = _parse_content_type_header(content_type)
488
489 if 'charset' in params:
490 return params['charset'].strip("'\"")
491
492 if 'text' in content_type:
493 return 'ISO-8859-1'
494
495
496 def stream_decode_response_unicode(iterator, r):
497 """Stream decodes a iterator."""
498
499 if r.encoding is None:
500 for item in iterator:
501 yield item
502 return
503
504 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
505 for chunk in iterator:
506 rv = decoder.decode(chunk)
507 if rv:
508 yield rv
509 rv = decoder.decode(b'', final=True)
510 if rv:
511 yield rv
512
513
514 def iter_slices(string, slice_length):
515 """Iterate over slices of a string."""
516 pos = 0
517 if slice_length is None or slice_length <= 0:
518 slice_length = len(string)
519 while pos < len(string):
520 yield string[pos:pos + slice_length]
521 pos += slice_length
522
523
524 def get_unicode_from_response(r):
525 """Returns the requested content back in unicode.
526
527 :param r: Response object to get unicode content from.
528
529 Tried:
530
531 1. charset from content-type
532 2. fall back and replace all unicode characters
533
534 :rtype: str
535 """
536 warnings.warn((
537 'In requests 3.0, get_unicode_from_response will be removed. For '
538 'more information, please see the discussion on issue #2266. (This'
539 ' warning should only appear once.)'),
540 DeprecationWarning)
541
542 tried_encodings = []
543
544 # Try charset from content-type
545 encoding = get_encoding_from_headers(r.headers)
546
547 if encoding:
548 try:
549 return str(r.content, encoding)
550 except UnicodeError:
551 tried_encodings.append(encoding)
552
553 # Fall back:
554 try:
555 return str(r.content, encoding, errors='replace')
556 except TypeError:
557 return r.content
558
559
560 # The unreserved URI characters (RFC 3986)
561 UNRESERVED_SET = frozenset(
562 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
563
564
565 def unquote_unreserved(uri):
566 """Un-escape any percent-escape sequences in a URI that are unreserved
567 characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
568
569 :rtype: str
570 """
571 parts = uri.split('%')
572 for i in range(1, len(parts)):
573 h = parts[i][0:2]
574 if len(h) == 2 and h.isalnum():
575 try:
576 c = chr(int(h, 16))
577 except ValueError:
578 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
579
580 if c in UNRESERVED_SET:
581 parts[i] = c + parts[i][2:]
582 else:
583 parts[i] = '%' + parts[i]
584 else:
585 parts[i] = '%' + parts[i]
586 return ''.join(parts)
587
588
589 def requote_uri(uri):
590 """Re-quote the given URI.
591
592 This function passes the given URI through an unquote/quote cycle to
593 ensure that it is fully and consistently quoted.
594
595 :rtype: str
596 """
597 safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
598 safe_without_percent = "!#$&'()*+,/:;=?@[]~"
599 try:
600 # Unquote only the unreserved characters
601 # Then quote only illegal characters (do not quote reserved,
602 # unreserved, or '%')
603 return quote(unquote_unreserved(uri), safe=safe_with_percent)
604 except InvalidURL:
605 # We couldn't unquote the given URI, so let's try quoting it, but
606 # there may be unquoted '%'s in the URI. We need to make sure they're
607 # properly quoted so they do not cause issues elsewhere.
608 return quote(uri, safe=safe_without_percent)
609
610
611 def address_in_network(ip, net):
612 """This function allows you to check if an IP belongs to a network subnet
613
614 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
615 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
616
617 :rtype: bool
618 """
619 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
620 netaddr, bits = net.split('/')
621 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
622 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
623 return (ipaddr & netmask) == (network & netmask)
624
625
626 def dotted_netmask(mask):
627 """Converts mask from /xx format to xxx.xxx.xxx.xxx
628
629 Example: if mask is 24 function returns 255.255.255.0
630
631 :rtype: str
632 """
633 bits = 0xffffffff ^ (1 << 32 - mask) - 1
634 return socket.inet_ntoa(struct.pack('>I', bits))
635
636
637 def is_ipv4_address(string_ip):
638 """
639 :rtype: bool
640 """
641 try:
642 socket.inet_aton(string_ip)
643 except socket.error:
644 return False
645 return True
646
647
648 def is_valid_cidr(string_network):
649 """
650 Very simple check of the cidr format in no_proxy variable.
651
652 :rtype: bool
653 """
654 if string_network.count('/') == 1:
655 try:
656 mask = int(string_network.split('/')[1])
657 except ValueError:
658 return False
659
660 if mask < 1 or mask > 32:
661 return False
662
663 try:
664 socket.inet_aton(string_network.split('/')[0])
665 except socket.error:
666 return False
667 else:
668 return False
669 return True
670
671
672 @contextlib.contextmanager
673 def set_environ(env_name, value):
674 """Set the environment variable 'env_name' to 'value'
675
676 Save previous value, yield, and then restore the previous value stored in
677 the environment variable 'env_name'.
678
679 If 'value' is None, do nothing"""
680 value_changed = value is not None
681 if value_changed:
682 old_value = os.environ.get(env_name)
683 os.environ[env_name] = value
684 try:
685 yield
686 finally:
687 if value_changed:
688 if old_value is None:
689 del os.environ[env_name]
690 else:
691 os.environ[env_name] = old_value
692
693
694 def should_bypass_proxies(url, no_proxy):
695 """
696 Returns whether we should bypass proxies or not.
697
698 :rtype: bool
699 """
700 # Prioritize lowercase environment variables over uppercase
701 # to keep a consistent behaviour with other http projects (curl, wget).
702 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
703
704 # First check whether no_proxy is defined. If it is, check that the URL
705 # we're getting isn't in the no_proxy list.
706 no_proxy_arg = no_proxy
707 if no_proxy is None:
708 no_proxy = get_proxy('no_proxy')
709 parsed = urlparse(url)
710
711 if parsed.hostname is None:
712 # URLs don't always have hostnames, e.g. file:/// urls.
713 return True
714
715 if no_proxy:
716 # We need to check whether we match here. We need to see if we match
717 # the end of the hostname, both with and without the port.
718 no_proxy = (
719 host for host in no_proxy.replace(' ', '').split(',') if host
720 )
721
722 if is_ipv4_address(parsed.hostname):
723 for proxy_ip in no_proxy:
724 if is_valid_cidr(proxy_ip):
725 if address_in_network(parsed.hostname, proxy_ip):
726 return True
727 elif parsed.hostname == proxy_ip:
728 # If no_proxy ip was defined in plain IP notation instead of cidr notation &
729 # matches the IP of the index
730 return True
731 else:
732 host_with_port = parsed.hostname
733 if parsed.port:
734 host_with_port += ':{}'.format(parsed.port)
735
736 for host in no_proxy:
737 if parsed.hostname.endswith(host) or host_with_port.endswith(host):
738 # The URL does match something in no_proxy, so we don't want
739 # to apply the proxies on this URL.
740 return True
741
742 with set_environ('no_proxy', no_proxy_arg):
743 # parsed.hostname can be `None` in cases such as a file URI.
744 try:
745 bypass = proxy_bypass(parsed.hostname)
746 except (TypeError, socket.gaierror):
747 bypass = False
748
749 if bypass:
750 return True
751
752 return False
753
754
755 def get_environ_proxies(url, no_proxy=None):
756 """
757 Return a dict of environment proxies.
758
759 :rtype: dict
760 """
761 if should_bypass_proxies(url, no_proxy=no_proxy):
762 return {}
763 else:
764 return getproxies()
765
766
767 def select_proxy(url, proxies):
768 """Select a proxy for the url, if applicable.
769
770 :param url: The url being for the request
771 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
772 """
773 proxies = proxies or {}
774 urlparts = urlparse(url)
775 if urlparts.hostname is None:
776 return proxies.get(urlparts.scheme, proxies.get('all'))
777
778 proxy_keys = [
779 urlparts.scheme + '://' + urlparts.hostname,
780 urlparts.scheme,
781 'all://' + urlparts.hostname,
782 'all',
783 ]
784 proxy = None
785 for proxy_key in proxy_keys:
786 if proxy_key in proxies:
787 proxy = proxies[proxy_key]
788 break
789
790 return proxy
791
792
793 def default_user_agent(name="python-requests"):
794 """
795 Return a string representing the default user agent.
796
797 :rtype: str
798 """
799 return '%s/%s' % (name, __version__)
800
801
802 def default_headers():
803 """
804 :rtype: requests.structures.CaseInsensitiveDict
805 """
806 return CaseInsensitiveDict({
807 'User-Agent': default_user_agent(),
808 'Accept-Encoding': ', '.join(('gzip', 'deflate')),
809 'Accept': '*/*',
810 'Connection': 'keep-alive',
811 })
812
813
814 def parse_header_links(value):
815 """Return a list of parsed link headers proxies.
816
817 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
818
819 :rtype: list
820 """
821
822 links = []
823
824 replace_chars = ' \'"'
825
826 value = value.strip(replace_chars)
827 if not value:
828 return links
829
830 for val in re.split(', *<', value):
831 try:
832 url, params = val.split(';', 1)
833 except ValueError:
834 url, params = val, ''
835
836 link = {'url': url.strip('<> \'"')}
837
838 for param in params.split(';'):
839 try:
840 key, value = param.split('=')
841 except ValueError:
842 break
843
844 link[key.strip(replace_chars)] = value.strip(replace_chars)
845
846 links.append(link)
847
848 return links
849
850
851 # Null bytes; no need to recreate these on each call to guess_json_utf
852 _null = '\x00'.encode('ascii') # encoding to ASCII for Python 3
853 _null2 = _null * 2
854 _null3 = _null * 3
855
856
857 def guess_json_utf(data):
858 """
859 :rtype: str
860 """
861 # JSON always starts with two ASCII characters, so detection is as
862 # easy as counting the nulls and from their location and count
863 # determine the encoding. Also detect a BOM, if present.
864 sample = data[:4]
865 if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
866 return 'utf-32' # BOM included
867 if sample[:3] == codecs.BOM_UTF8:
868 return 'utf-8-sig' # BOM included, MS style (discouraged)
869 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
870 return 'utf-16' # BOM included
871 nullcount = sample.count(_null)
872 if nullcount == 0:
873 return 'utf-8'
874 if nullcount == 2:
875 if sample[::2] == _null2: # 1st and 3rd are null
876 return 'utf-16-be'
877 if sample[1::2] == _null2: # 2nd and 4th are null
878 return 'utf-16-le'
879 # Did not detect 2 valid UTF-16 ascii-range characters
880 if nullcount == 3:
881 if sample[:3] == _null3:
882 return 'utf-32-be'
883 if sample[1:] == _null3:
884 return 'utf-32-le'
885 # Did not detect a valid UTF-32 ascii-range character
886 return None
887
888
889 def prepend_scheme_if_needed(url, new_scheme):
890 """Given a URL that may or may not have a scheme, prepend the given scheme.
891 Does not replace a present scheme with the one provided as an argument.
892
893 :rtype: str
894 """
895 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
896
897 # urlparse is a finicky beast, and sometimes decides that there isn't a
898 # netloc present. Assume that it's being over-cautious, and switch netloc
899 # and path if urlparse decided there was no netloc.
900 if not netloc:
901 netloc, path = path, netloc
902
903 return urlunparse((scheme, netloc, path, params, query, fragment))
904
905
906 def get_auth_from_url(url):
907 """Given a url with authentication components, extract them into a tuple of
908 username,password.
909
910 :rtype: (str,str)
911 """
912 parsed = urlparse(url)
913
914 try:
915 auth = (unquote(parsed.username), unquote(parsed.password))
916 except (AttributeError, TypeError):
917 auth = ('', '')
918
919 return auth
920
921
922 # Moved outside of function to avoid recompile every call
923 _CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
924 _CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
925
926
927 def check_header_validity(header):
928 """Verifies that header value is a string which doesn't contain
929 leading whitespace or return characters. This prevents unintended
930 header injection.
931
932 :param header: tuple, in the format (name, value).
933 """
934 name, value = header
935
936 if isinstance(value, bytes):
937 pat = _CLEAN_HEADER_REGEX_BYTE
938 else:
939 pat = _CLEAN_HEADER_REGEX_STR
940 try:
941 if not pat.match(value):
942 raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
943 except TypeError:
944 raise InvalidHeader("Value for header {%s: %s} must be of type str or "
945 "bytes, not %s" % (name, value, type(value)))
946
947
948 def urldefragauth(url):
949 """
950 Given a url remove the fragment and the authentication part.
951
952 :rtype: str
953 """
954 scheme, netloc, path, params, query, fragment = urlparse(url)
955
956 # see func:`prepend_scheme_if_needed`
957 if not netloc:
958 netloc, path = path, netloc
959
960 netloc = netloc.rsplit('@', 1)[-1]
961
962 return urlunparse((scheme, netloc, path, params, query, ''))
963
964
965 def rewind_body(prepared_request):
966 """Move file pointer back to its recorded starting position
967 so it can be read again on redirect.
968 """
969 body_seek = getattr(prepared_request.body, 'seek', None)
970 if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
971 try:
972 body_seek(prepared_request._body_position)
973 except (IOError, OSError):
974 raise UnrewindableBodyError("An error occurred when rewinding request "
975 "body for redirect.")
976 else:
977 raise UnrewindableBodyError("Unable to rewind request body for redirect.")