comparison env/lib/python3.7/site-packages/requests_toolbelt/utils/deprecated.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
comparison
equal deleted inserted replaced
4:79f47841a781 5:9b1c78e6ba9c
1 # -*- coding: utf-8 -*-
2 """A collection of functions deprecated in requests.utils."""
3 import re
4 import sys
5
6 from requests import utils
7
8 find_charset = re.compile(
9 br'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I
10 ).findall
11
12 find_pragma = re.compile(
13 br'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I
14 ).findall
15
16 find_xml = re.compile(
17 br'^<\?xml.*?encoding=["\']*(.+?)["\'>]'
18 ).findall
19
20
21 def get_encodings_from_content(content):
22 """Return encodings from given content string.
23
24 .. code-block:: python
25
26 import requests
27 from requests_toolbelt.utils import deprecated
28
29 r = requests.get(url)
30 encodings = deprecated.get_encodings_from_content(r)
31
32 :param content: bytestring to extract encodings from
33 :type content: bytes
34 :return: encodings detected in the provided content
35 :rtype: list(str)
36 """
37 encodings = (find_charset(content) + find_pragma(content)
38 + find_xml(content))
39 if (3, 0) <= sys.version_info < (4, 0):
40 encodings = [encoding.decode('utf8') for encoding in encodings]
41 return encodings
42
43
44 def get_unicode_from_response(response):
45 """Return the requested content back in unicode.
46
47 This will first attempt to retrieve the encoding from the response
48 headers. If that fails, it will use
49 :func:`requests_toolbelt.utils.deprecated.get_encodings_from_content`
50 to determine encodings from HTML elements.
51
52 .. code-block:: python
53
54 import requests
55 from requests_toolbelt.utils import deprecated
56
57 r = requests.get(url)
58 text = deprecated.get_unicode_from_response(r)
59
60 :param response: Response object to get unicode content from.
61 :type response: requests.models.Response
62 """
63 tried_encodings = set()
64
65 # Try charset from content-type
66 encoding = utils.get_encoding_from_headers(response.headers)
67
68 if encoding:
69 try:
70 return str(response.content, encoding)
71 except UnicodeError:
72 tried_encodings.add(encoding.lower())
73
74 encodings = get_encodings_from_content(response.content)
75
76 for _encoding in encodings:
77 _encoding = _encoding.lower()
78 if _encoding in tried_encodings:
79 continue
80 try:
81 return str(response.content, _encoding)
82 except UnicodeError:
83 tried_encodings.add(_encoding)
84
85 # Fall back:
86 if encoding:
87 try:
88 return str(response.content, encoding, errors='replace')
89 except TypeError:
90 pass
91 return response.text