Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/requests_toolbelt/utils/deprecated.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """A collection of functions deprecated in requests.utils.""" | |
3 import re | |
4 import sys | |
5 | |
6 from requests import utils | |
7 | |
8 find_charset = re.compile( | |
9 br'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I | |
10 ).findall | |
11 | |
12 find_pragma = re.compile( | |
13 br'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I | |
14 ).findall | |
15 | |
16 find_xml = re.compile( | |
17 br'^<\?xml.*?encoding=["\']*(.+?)["\'>]' | |
18 ).findall | |
19 | |
20 | |
21 def get_encodings_from_content(content): | |
22 """Return encodings from given content string. | |
23 | |
24 .. code-block:: python | |
25 | |
26 import requests | |
27 from requests_toolbelt.utils import deprecated | |
28 | |
29 r = requests.get(url) | |
30 encodings = deprecated.get_encodings_from_content(r) | |
31 | |
32 :param content: bytestring to extract encodings from | |
33 :type content: bytes | |
34 :return: encodings detected in the provided content | |
35 :rtype: list(str) | |
36 """ | |
37 encodings = (find_charset(content) + find_pragma(content) | |
38 + find_xml(content)) | |
39 if (3, 0) <= sys.version_info < (4, 0): | |
40 encodings = [encoding.decode('utf8') for encoding in encodings] | |
41 return encodings | |
42 | |
43 | |
44 def get_unicode_from_response(response): | |
45 """Return the requested content back in unicode. | |
46 | |
47 This will first attempt to retrieve the encoding from the response | |
48 headers. If that fails, it will use | |
49 :func:`requests_toolbelt.utils.deprecated.get_encodings_from_content` | |
50 to determine encodings from HTML elements. | |
51 | |
52 .. code-block:: python | |
53 | |
54 import requests | |
55 from requests_toolbelt.utils import deprecated | |
56 | |
57 r = requests.get(url) | |
58 text = deprecated.get_unicode_from_response(r) | |
59 | |
60 :param response: Response object to get unicode content from. | |
61 :type response: requests.models.Response | |
62 """ | |
63 tried_encodings = set() | |
64 | |
65 # Try charset from content-type | |
66 encoding = utils.get_encoding_from_headers(response.headers) | |
67 | |
68 if encoding: | |
69 try: | |
70 return str(response.content, encoding) | |
71 except UnicodeError: | |
72 tried_encodings.add(encoding.lower()) | |
73 | |
74 encodings = get_encodings_from_content(response.content) | |
75 | |
76 for _encoding in encodings: | |
77 _encoding = _encoding.lower() | |
78 if _encoding in tried_encodings: | |
79 continue | |
80 try: | |
81 return str(response.content, _encoding) | |
82 except UnicodeError: | |
83 tried_encodings.add(_encoding) | |
84 | |
85 # Fall back: | |
86 if encoding: | |
87 try: | |
88 return str(response.content, encoding, errors='replace') | |
89 except TypeError: | |
90 pass | |
91 return response.text |