diff env/lib/python3.7/site-packages/requests_toolbelt/utils/deprecated.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.7/site-packages/requests_toolbelt/utils/deprecated.py	Sat May 02 07:14:21 2020 -0400
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+"""A collection of functions deprecated in requests.utils."""
+import re
+import sys
+
+from requests import utils
+
+find_charset = re.compile(
+    br'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I
+).findall
+
+find_pragma = re.compile(
+    br'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I
+).findall
+
+find_xml = re.compile(
+    br'^<\?xml.*?encoding=["\']*(.+?)["\'>]'
+).findall
+
+
+def get_encodings_from_content(content):
+    """Return encodings from given content string.
+
+    .. code-block:: python
+
+        import requests
+        from requests_toolbelt.utils import deprecated
+
+        r = requests.get(url)
+        encodings = deprecated.get_encodings_from_content(r)
+
+    :param content: bytestring to extract encodings from
+    :type content: bytes
+    :return: encodings detected in the provided content
+    :rtype: list(str)
+    """
+    encodings = (find_charset(content) + find_pragma(content)
+                 + find_xml(content))
+    if (3, 0) <= sys.version_info < (4, 0):
+        encodings = [encoding.decode('utf8') for encoding in encodings]
+    return encodings
+
+
+def get_unicode_from_response(response):
+    """Return the requested content back in unicode.
+
+    This will first attempt to retrieve the encoding from the response
+    headers. If that fails, it will use
+    :func:`requests_toolbelt.utils.deprecated.get_encodings_from_content`
+    to determine encodings from HTML elements.
+
+    .. code-block:: python
+
+        import requests
+        from requests_toolbelt.utils import deprecated
+
+        r = requests.get(url)
+        text = deprecated.get_unicode_from_response(r)
+
+    :param response: Response object to get unicode content from.
+    :type response: requests.models.Response
+    """
+    tried_encodings = set()
+
+    # Try charset from content-type
+    encoding = utils.get_encoding_from_headers(response.headers)
+
+    if encoding:
+        try:
+            return str(response.content, encoding)
+        except UnicodeError:
+            tried_encodings.add(encoding.lower())
+
+    encodings = get_encodings_from_content(response.content)
+
+    for _encoding in encodings:
+        _encoding = _encoding.lower()
+        if _encoding in tried_encodings:
+            continue
+        try:
+            return str(response.content, _encoding)
+        except UnicodeError:
+            tried_encodings.add(_encoding)
+
+    # Fall back:
+    if encoding:
+        try:
+            return str(response.content, encoding, errors='replace')
+        except TypeError:
+            pass
+    return response.text