Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/boltons/urlutils.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler | 
|---|---|
| date | Fri, 31 Jul 2020 00:18:57 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:d30785e31577 | 
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """:mod:`urlutils` is a module dedicated to one of software's most | |
| 3 versatile, well-aged, and beloved data structures: the URL, also known | |
| 4 as the `Uniform Resource Locator`_. | |
| 5 | |
| 6 Among other things, this module is a full reimplementation of URLs, | |
| 7 without any reliance on the :mod:`urlparse` or :mod:`urllib` standard | |
| 8 library modules. The centerpiece and top-level interface of urlutils | |
| 9 is the :class:`URL` type. Also featured is the :func:`find_all_links` | |
| 10 convenience function. Some low-level functions and constants are also | |
| 11 below. | |
| 12 | |
| 13 The implementations in this module are based heavily on `RFC 3986`_ and | |
| 14 `RFC 3987`_, and incorporates details from several other RFCs and `W3C | |
| 15 documents`_. | |
| 16 | |
| 17 .. _Uniform Resource Locator: https://en.wikipedia.org/wiki/Uniform_Resource_Locator | |
| 18 .. _RFC 3986: https://tools.ietf.org/html/rfc3986 | |
| 19 .. _RFC 3987: https://tools.ietf.org/html/rfc3987 | |
| 20 .. _W3C documents: https://www.w3.org/TR/uri-clarification/ | |
| 21 | |
| 22 """ | |
| 23 | |
| 24 import re | |
| 25 import socket | |
| 26 import string | |
| 27 from unicodedata import normalize | |
| 28 | |
| 29 unicode = type(u'') | |
| 30 try: | |
| 31 unichr | |
| 32 except NameError: | |
| 33 unichr = chr | |
| 34 | |
| 35 # The unreserved URI characters (per RFC 3986 Section 2.3) | |
| 36 _UNRESERVED_CHARS = frozenset('~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
| 37 'abcdefghijklmnopqrstuvwxyz') | |
| 38 | |
| 39 # URL parsing regex (based on RFC 3986 Appendix B, with modifications) | |
| 40 _URL_RE = re.compile(r'^((?P<scheme>[^:/?#]+):)?' | |
| 41 r'((?P<_netloc_sep>//)(?P<authority>[^/?#]*))?' | |
| 42 r'(?P<path>[^?#]*)' | |
| 43 r'(\?(?P<query>[^#]*))?' | |
| 44 r'(#(?P<fragment>.*))?') | |
| 45 | |
| 46 | |
| 47 _HEX_CHAR_MAP = dict([((a + b).encode('ascii'), | |
| 48 unichr(int(a + b, 16)).encode('charmap')) | |
| 49 for a in string.hexdigits for b in string.hexdigits]) | |
| 50 _ASCII_RE = re.compile('([\x00-\x7f]+)') | |
| 51 | |
| 52 | |
| 53 # This port list painstakingly curated by hand searching through | |
| 54 # https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml | |
| 55 # and | |
| 56 # https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml | |
| 57 SCHEME_PORT_MAP = {'acap': 674, 'afp': 548, 'dict': 2628, 'dns': 53, | |
| 58 'file': None, 'ftp': 21, 'git': 9418, 'gopher': 70, | |
| 59 'http': 80, 'https': 443, 'imap': 143, 'ipp': 631, | |
| 60 'ipps': 631, 'irc': 194, 'ircs': 6697, 'ldap': 389, | |
| 61 'ldaps': 636, 'mms': 1755, 'msrp': 2855, 'msrps': None, | |
| 62 'mtqp': 1038, 'nfs': 111, 'nntp': 119, 'nntps': 563, | |
| 63 'pop': 110, 'prospero': 1525, 'redis': 6379, 'rsync': 873, | |
| 64 'rtsp': 554, 'rtsps': 322, 'rtspu': 5005, 'sftp': 22, | |
| 65 'smb': 445, 'snmp': 161, 'ssh': 22, 'steam': None, | |
| 66 'svn': 3690, 'telnet': 23, 'ventrilo': 3784, 'vnc': 5900, | |
| 67 'wais': 210, 'ws': 80, 'wss': 443, 'xmpp': None} | |
| 68 | |
| 69 # This list of schemes that don't use authorities is also from the link above. | |
| 70 NO_NETLOC_SCHEMES = set(['urn', 'about', 'bitcoin', 'blob', 'data', 'geo', | |
| 71 'magnet', 'mailto', 'news', 'pkcs11', | |
| 72 'sip', 'sips', 'tel']) | |
| 73 # As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc | |
| 74 | |
| 75 # RFC 3986 section 2.2, Reserved Characters | |
| 76 _GEN_DELIMS = frozenset(u':/?#[]@') | |
| 77 _SUB_DELIMS = frozenset(u"!$&'()*+,;=") | |
| 78 _ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS | |
| 79 | |
| 80 _USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | |
| 81 _USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE | |
| 82 _PATH_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u':@') | |
| 83 _PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE | |
| 84 _FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u'/?') | |
| 85 _FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE | |
| 86 _QUERY_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u'&=+') | |
| 87 _QUERY_DELIMS = _ALL_DELIMS - _QUERY_SAFE | |
| 88 | |
| 89 | |
| 90 class URLParseError(ValueError): | |
| 91 """Exception inheriting from :exc:`ValueError`, raised when failing to | |
| 92 parse a URL. Mostly raised on invalid ports and IPv6 addresses. | |
| 93 """ | |
| 94 pass | |
| 95 | |
| 96 | |
| 97 DEFAULT_ENCODING = 'utf8' | |
| 98 | |
| 99 | |
| 100 def to_unicode(obj): | |
| 101 try: | |
| 102 return unicode(obj) | |
| 103 except UnicodeDecodeError: | |
| 104 return unicode(obj, encoding=DEFAULT_ENCODING) | |
| 105 | |
| 106 | |
| 107 # regex from gruber via tornado | |
| 108 # doesn't support ipv6 | |
| 109 # doesn't support mailto (netloc-less schemes) | |
| 110 _FIND_ALL_URL_RE = re.compile(to_unicode(r"""\b((?:([\w-]+):(/{1,3})|www[.])(?:(?:(?:[^\s&()<>]|&|")*(?:[^!"#$%'()*+,.:;<=>?@\[\]^`{|}~\s]))|(?:\((?:[^\s&()]|&|")*\)))+)""")) | |
| 111 | |
| 112 | |
| 113 def find_all_links(text, with_text=False, default_scheme='https', schemes=()): | |
| 114 """This function uses heuristics to searches plain text for strings | |
| 115 that look like URLs, returning a :class:`list` of :class:`URL` | |
| 116 objects. It supports limiting the accepted schemes, and returning | |
| 117 interleaved text as well. | |
| 118 | |
| 119 >>> find_all_links('Visit https://boltons.rtfd.org!') | |
| 120 [URL(u'https://boltons.rtfd.org')] | |
| 121 >>> find_all_links('Visit https://boltons.rtfd.org!', with_text=True) | |
| 122 [u'Visit ', URL(u'https://boltons.rtfd.org'), u'!'] | |
| 123 | |
| 124 Args: | |
| 125 text (str): The text to search. | |
| 126 | |
| 127 with_text (bool): Whether or not to interleave plaintext blocks | |
| 128 with the returned URL objects. Having all tokens can be | |
| 129 useful for transforming the text, e.g., replacing links with | |
| 130 HTML equivalents. Defaults to ``False``. | |
| 131 | |
| 132 default_scheme (str): Many URLs are written without the scheme | |
| 133 component. This function can match a reasonable subset of | |
| 134 those, provided *default_scheme* is set to a string. Set to | |
| 135 ``False`` to disable matching scheme-less URLs. Defaults to | |
| 136 ``'https'``. | |
| 137 | |
| 138 schemes (list): A list of strings that a URL's scheme must | |
| 139 match in order to be included in the results. Defaults to | |
| 140 empty, which matches all schemes. | |
| 141 | |
| 142 .. note:: Currently this function does not support finding IPv6 | |
| 143 addresses or URLs with netloc-less schemes, like mailto. | |
| 144 | |
| 145 """ | |
| 146 text = to_unicode(text) | |
| 147 prev_end, start, end = 0, None, None | |
| 148 ret = [] | |
| 149 _add = ret.append | |
| 150 | |
| 151 def _add_text(t): | |
| 152 if ret and isinstance(ret[-1], unicode): | |
| 153 ret[-1] += t | |
| 154 else: | |
| 155 _add(t) | |
| 156 | |
| 157 for match in _FIND_ALL_URL_RE.finditer(text): | |
| 158 start, end = match.start(1), match.end(1) | |
| 159 if prev_end < start and with_text: | |
| 160 _add(text[prev_end:start]) | |
| 161 prev_end = end | |
| 162 try: | |
| 163 cur_url_text = match.group(0) | |
| 164 cur_url = URL(cur_url_text) | |
| 165 if not cur_url.scheme: | |
| 166 if default_scheme: | |
| 167 cur_url = URL(default_scheme + '://' + cur_url_text) | |
| 168 else: | |
| 169 _add_text(text[start:end]) | |
| 170 continue | |
| 171 if schemes and cur_url.scheme not in schemes: | |
| 172 _add_text(text[start:end]) | |
| 173 else: | |
| 174 _add(cur_url) | |
| 175 except URLParseError: | |
| 176 # currently this should only be hit with broken port | |
| 177 # strings. the regex above doesn't support ipv6 addresses | |
| 178 if with_text: | |
| 179 _add_text(text[start:end]) | |
| 180 | |
| 181 if with_text: | |
| 182 tail = text[prev_end:] | |
| 183 if tail: | |
| 184 _add_text(tail) | |
| 185 | |
| 186 return ret | |
| 187 | |
| 188 | |
| 189 def _make_quote_map(safe_chars): | |
| 190 ret = {} | |
| 191 # v is included in the dict for py3 mostly, because bytestrings | |
| 192 # are iterables of ints, of course! | |
| 193 for i, v in zip(range(256), range(256)): | |
| 194 c = chr(v) | |
| 195 if c in safe_chars: | |
| 196 ret[c] = ret[v] = c | |
| 197 else: | |
| 198 ret[c] = ret[v] = '%{0:02X}'.format(i) | |
| 199 return ret | |
| 200 | |
| 201 | |
| 202 _USERINFO_PART_QUOTE_MAP = _make_quote_map(_USERINFO_SAFE) | |
| 203 _PATH_PART_QUOTE_MAP = _make_quote_map(_PATH_SAFE) | |
| 204 _QUERY_PART_QUOTE_MAP = _make_quote_map(_QUERY_SAFE) | |
| 205 _FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE) | |
| 206 | |
| 207 | |
| 208 def quote_path_part(text, full_quote=True): | |
| 209 """ | |
| 210 Percent-encode a single segment of a URL path. | |
| 211 """ | |
| 212 if full_quote: | |
| 213 bytestr = normalize('NFC', to_unicode(text)).encode('utf8') | |
| 214 return u''.join([_PATH_PART_QUOTE_MAP[b] for b in bytestr]) | |
| 215 return u''.join([_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t | |
| 216 for t in text]) | |
| 217 | |
| 218 | |
| 219 def quote_query_part(text, full_quote=True): | |
| 220 """ | |
| 221 Percent-encode a single query string key or value. | |
| 222 """ | |
| 223 if full_quote: | |
| 224 bytestr = normalize('NFC', to_unicode(text)).encode('utf8') | |
| 225 return u''.join([_QUERY_PART_QUOTE_MAP[b] for b in bytestr]) | |
| 226 return u''.join([_QUERY_PART_QUOTE_MAP[t] if t in _QUERY_DELIMS else t | |
| 227 for t in text]) | |
| 228 | |
| 229 | |
| 230 def quote_fragment_part(text, full_quote=True): | |
| 231 """Quote the fragment part of the URL. Fragments don't have | |
| 232 subdelimiters, so the whole URL fragment can be passed. | |
| 233 """ | |
| 234 if full_quote: | |
| 235 bytestr = normalize('NFC', to_unicode(text)).encode('utf8') | |
| 236 return u''.join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr]) | |
| 237 return u''.join([_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t | |
| 238 for t in text]) | |
| 239 | |
| 240 | |
| 241 def quote_userinfo_part(text, full_quote=True): | |
| 242 """Quote special characters in either the username or password | |
| 243 section of the URL. Note that userinfo in URLs is considered | |
| 244 deprecated in many circles (especially browsers), and support for | |
| 245 percent-encoded userinfo can be spotty. | |
| 246 """ | |
| 247 if full_quote: | |
| 248 bytestr = normalize('NFC', to_unicode(text)).encode('utf8') | |
| 249 return u''.join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr]) | |
| 250 return u''.join([_USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS | |
| 251 else t for t in text]) | |
| 252 | |
| 253 | |
| 254 def unquote(string, encoding='utf-8', errors='replace'): | |
| 255 """Percent-decode a string, by replacing %xx escapes with their | |
| 256 single-character equivalent. The optional *encoding* and *errors* | |
| 257 parameters specify how to decode percent-encoded sequences into | |
| 258 Unicode characters, as accepted by the :meth:`bytes.decode()` method. By | |
| 259 default, percent-encoded sequences are decoded with UTF-8, and | |
| 260 invalid sequences are replaced by a placeholder character. | |
| 261 | |
| 262 >>> unquote(u'abc%20def') | |
| 263 u'abc def' | |
| 264 """ | |
| 265 if '%' not in string: | |
| 266 string.split | |
| 267 return string | |
| 268 if encoding is None: | |
| 269 encoding = 'utf-8' | |
| 270 if errors is None: | |
| 271 errors = 'replace' | |
| 272 bits = _ASCII_RE.split(string) | |
| 273 res = [bits[0]] | |
| 274 append = res.append | |
| 275 for i in range(1, len(bits), 2): | |
| 276 append(unquote_to_bytes(bits[i]).decode(encoding, errors)) | |
| 277 append(bits[i + 1]) | |
| 278 return ''.join(res) | |
| 279 | |
| 280 | |
| 281 def unquote_to_bytes(string): | |
| 282 """unquote_to_bytes('abc%20def') -> b'abc def'.""" | |
| 283 # Note: strings are encoded as UTF-8. This is only an issue if it contains | |
| 284 # unescaped non-ASCII characters, which URIs should not. | |
| 285 if not string: | |
| 286 # Is it a string-like object? | |
| 287 string.split | |
| 288 return b'' | |
| 289 if isinstance(string, unicode): | |
| 290 string = string.encode('utf-8') | |
| 291 bits = string.split(b'%') | |
| 292 if len(bits) == 1: | |
| 293 return string | |
| 294 # import pdb;pdb.set_trace() | |
| 295 res = [bits[0]] | |
| 296 append = res.append | |
| 297 | |
| 298 for item in bits[1:]: | |
| 299 try: | |
| 300 append(_HEX_CHAR_MAP[item[:2]]) | |
| 301 append(item[2:]) | |
| 302 except KeyError: | |
| 303 append(b'%') | |
| 304 append(item) | |
| 305 return b''.join(res) | |
| 306 | |
| 307 | |
| 308 def register_scheme(text, uses_netloc=None, default_port=None): | |
| 309 """Registers new scheme information, resulting in correct port and | |
| 310 slash behavior from the URL object. There are dozens of standard | |
| 311 schemes preregistered, so this function is mostly meant for | |
| 312 proprietary internal customizations or stopgaps on missing | |
| 313 standards information. If a scheme seems to be missing, please | |
| 314 `file an issue`_! | |
| 315 | |
| 316 Args: | |
| 317 text (str): Text representing the scheme. | |
| 318 (the 'http' in 'http://hatnote.com') | |
| 319 uses_netloc (bool): Does the scheme support specifying a | |
| 320 network host? For instance, "http" does, "mailto" does not. | |
| 321 default_port (int): The default port, if any, for netloc-using | |
| 322 schemes. | |
| 323 | |
| 324 .. _file an issue: https://github.com/mahmoud/boltons/issues | |
| 325 """ | |
| 326 text = text.lower() | |
| 327 if default_port is not None: | |
| 328 try: | |
| 329 default_port = int(default_port) | |
| 330 except ValueError: | |
| 331 raise ValueError('default_port expected integer or None, not %r' | |
| 332 % (default_port,)) | |
| 333 | |
| 334 if uses_netloc is True: | |
| 335 SCHEME_PORT_MAP[text] = default_port | |
| 336 elif uses_netloc is False: | |
| 337 if default_port is not None: | |
| 338 raise ValueError('unexpected default port while specifying' | |
| 339 ' non-netloc scheme: %r' % default_port) | |
| 340 NO_NETLOC_SCHEMES.add(text) | |
| 341 elif uses_netloc is not None: | |
| 342 raise ValueError('uses_netloc expected True, False, or None') | |
| 343 | |
| 344 return | |
| 345 | |
| 346 | |
| 347 def resolve_path_parts(path_parts): | |
| 348 """Normalize the URL path by resolving segments of '.' and '..', | |
| 349 resulting in a dot-free path. See RFC 3986 section 5.2.4, Remove | |
| 350 Dot Segments. | |
| 351 """ | |
| 352 # TODO: what to do with multiple slashes | |
| 353 ret = [] | |
| 354 | |
| 355 for part in path_parts: | |
| 356 if part == u'.': | |
| 357 pass | |
| 358 elif part == u'..': | |
| 359 if ret and (len(ret) > 1 or ret[0]): # prevent unrooting | |
| 360 ret.pop() | |
| 361 else: | |
| 362 ret.append(part) | |
| 363 | |
| 364 if list(path_parts[-1:]) in ([u'.'], [u'..']): | |
| 365 ret.append(u'') | |
| 366 | |
| 367 return ret | |
| 368 | |
| 369 | |
| 370 class cachedproperty(object): | |
| 371 """The ``cachedproperty`` is used similar to :class:`property`, except | |
| 372 that the wrapped method is only called once. This is commonly used | |
| 373 to implement lazy attributes. | |
| 374 | |
| 375 After the property has been accessed, the value is stored on the | |
| 376 instance itself, using the same name as the cachedproperty. This | |
| 377 allows the cache to be cleared with :func:`delattr`, or through | |
| 378 manipulating the object's ``__dict__``. | |
| 379 """ | |
| 380 def __init__(self, func): | |
| 381 self.__doc__ = getattr(func, '__doc__') | |
| 382 self.func = func | |
| 383 | |
| 384 def __get__(self, obj, objtype=None): | |
| 385 if obj is None: | |
| 386 return self | |
| 387 value = obj.__dict__[self.func.__name__] = self.func(obj) | |
| 388 return value | |
| 389 | |
| 390 def __repr__(self): | |
| 391 cn = self.__class__.__name__ | |
| 392 return '<%s func=%s>' % (cn, self.func) | |
| 393 | |
| 394 | |
| 395 class URL(object): | |
| 396 r"""The URL is one of the most ubiquitous data structures in the | |
| 397 virtual and physical landscape. From blogs to billboards, URLs are | |
| 398 so common, that it's easy to overlook their complexity and | |
| 399 power. | |
| 400 | |
| 401 There are 8 parts of a URL, each with its own semantics and | |
| 402 special characters: | |
| 403 | |
| 404 * :attr:`~URL.scheme` | |
| 405 * :attr:`~URL.username` | |
| 406 * :attr:`~URL.password` | |
| 407 * :attr:`~URL.host` | |
| 408 * :attr:`~URL.port` | |
| 409 * :attr:`~URL.path` | |
| 410 * :attr:`~URL.query_params` (query string parameters) | |
| 411 * :attr:`~URL.fragment` | |
| 412 | |
| 413 Each is exposed as an attribute on the URL object. RFC 3986 offers | |
| 414 this brief structural summary of the main URL components:: | |
| 415 | |
| 416 foo://user:pass@example.com:8042/over/there?name=ferret#nose | |
| 417 \_/ \_______/ \_________/ \__/\_________/ \_________/ \__/ | |
| 418 | | | | | | | | |
| 419 scheme userinfo host port path query fragment | |
| 420 | |
| 421 And here's how that example can be manipulated with the URL type: | |
| 422 | |
| 423 >>> url = URL('foo://example.com:8042/over/there?name=ferret#nose') | |
| 424 >>> print(url.host) | |
| 425 example.com | |
| 426 >>> print(url.get_authority()) | |
| 427 example.com:8042 | |
| 428 >>> print(url.qp['name']) # qp is a synonym for query_params | |
| 429 ferret | |
| 430 | |
| 431 URL's approach to encoding is that inputs are decoded as much as | |
| 432 possible, and data remains in this decoded state until re-encoded | |
| 433 using the :meth:`~URL.to_text()` method. In this way, it's similar | |
| 434 to Python's current approach of encouraging immediate decoding of | |
| 435 bytes to text. | |
| 436 | |
| 437 Note that URL instances are mutable objects. If an immutable | |
| 438 representation of the URL is desired, the string from | |
| 439 :meth:`~URL.to_text()` may be used. For an immutable, but | |
| 440 almost-as-featureful, URL object, check out the `hyperlink | |
| 441 package`_. | |
| 442 | |
| 443 .. _hyperlink package: https://github.com/mahmoud/hyperlink | |
| 444 | |
| 445 """ | |
| 446 | |
| 447 # public attributes (for comparison, see __eq__): | |
| 448 _cmp_attrs = ('scheme', 'uses_netloc', 'username', 'password', | |
| 449 'family', 'host', 'port', 'path', 'query_params', 'fragment') | |
| 450 | |
| 451 def __init__(self, url=''): | |
| 452 # TODO: encoding param. The encoding that underlies the | |
| 453 # percent-encoding is always utf8 for IRIs, but can be Latin-1 | |
| 454 # for other usage schemes. | |
| 455 ud = DEFAULT_PARSED_URL | |
| 456 if url: | |
| 457 if isinstance(url, URL): | |
| 458 url = url.to_text() # better way to copy URLs? | |
| 459 elif isinstance(url, bytes): | |
| 460 try: | |
| 461 url = url.decode(DEFAULT_ENCODING) | |
| 462 except UnicodeDecodeError as ude: | |
| 463 raise URLParseError('expected text or %s-encoded bytes.' | |
| 464 ' try decoding the url bytes and' | |
| 465 ' passing the result. (got: %s)' | |
| 466 % (DEFAULT_ENCODING, ude)) | |
| 467 ud = parse_url(url) | |
| 468 | |
| 469 _e = u'' | |
| 470 self.scheme = ud['scheme'] or _e | |
| 471 self._netloc_sep = ud['_netloc_sep'] or _e | |
| 472 self.username = (unquote(ud['username']) | |
| 473 if '%' in (ud['username'] or _e) else ud['username'] or _e) | |
| 474 self.password = (unquote(ud['password']) | |
| 475 if '%' in (ud['password'] or _e) else ud['password'] or _e) | |
| 476 self.family = ud['family'] | |
| 477 | |
| 478 if not ud['host']: | |
| 479 self.host = _e | |
| 480 else: | |
| 481 try: | |
| 482 self.host = ud['host'].encode("ascii") | |
| 483 except UnicodeEncodeError: | |
| 484 self.host = ud['host'] # already non-ascii text | |
| 485 else: | |
| 486 self.host = self.host.decode("idna") | |
| 487 | |
| 488 self.port = ud['port'] | |
| 489 self.path_parts = tuple([unquote(p) if '%' in p else p for p | |
| 490 in (ud['path'] or _e).split(u'/')]) | |
| 491 self._query = ud['query'] or _e | |
| 492 self.fragment = (unquote(ud['fragment']) | |
| 493 if '%' in (ud['fragment'] or _e) else ud['fragment'] or _e) | |
| 494 # TODO: possibly use None as marker for empty vs missing | |
| 495 return | |
| 496 | |
| 497 @classmethod | |
| 498 def from_parts(cls, scheme=None, host=None, path_parts=(), query_params=(), | |
| 499 fragment=u'', port=None, username=None, password=None): | |
| 500 """Build a new URL from parts. Note that the respective arguments are | |
| 501 not in the order they would appear in a URL: | |
| 502 | |
| 503 Args: | |
| 504 scheme (str): The scheme of a URL, e.g., 'http' | |
| 505 host (str): The host string, e.g., 'hatnote.com' | |
| 506 path_parts (tuple): The individual text segments of the | |
| 507 path, e.g., ('post', '123') | |
| 508 query_params (dict): An OMD, dict, or list of (key, value) | |
| 509 pairs representing the keys and values of the URL's query | |
| 510 parameters. | |
| 511 fragment (str): The fragment of the URL, e.g., 'anchor1' | |
| 512 port (int): The integer port of URL, automatic defaults are | |
| 513 available for registered schemes. | |
| 514 username (str): The username for the userinfo part of the URL. | |
| 515 password (str): The password for the userinfo part of the URL. | |
| 516 | |
| 517 Note that this method does relatively little | |
| 518 validation. :meth:`URL.to_text()` should be used to check if | |
| 519 any errors are produced while composing the final textual URL. | |
| 520 """ | |
| 521 ret = cls() | |
| 522 | |
| 523 ret.scheme = scheme | |
| 524 ret.host = host | |
| 525 ret.path_parts = tuple(path_parts) or (u'',) | |
| 526 ret.query_params.update(query_params) | |
| 527 ret.fragment = fragment | |
| 528 ret.port = port | |
| 529 ret.username = username | |
| 530 ret.password = password | |
| 531 | |
| 532 return ret | |
| 533 | |
| 534 @cachedproperty | |
| 535 def query_params(self): | |
| 536 """The parsed form of the query string of the URL, represented as a | |
| 537 :class:`~dictutils.OrderedMultiDict`. Also available as the | |
| 538 handy alias ``qp``. | |
| 539 | |
| 540 >>> url = URL('http://boltons.readthedocs.io/?utm_source=doctest&python=great') | |
| 541 >>> url.qp.keys() | |
| 542 [u'utm_source', u'python'] | |
| 543 """ | |
| 544 return QueryParamDict.from_text(self._query) | |
| 545 | |
| 546 qp = query_params | |
| 547 | |
| 548 @property | |
| 549 def path(self): | |
| 550 "The URL's path, in text form." | |
| 551 return u'/'.join([quote_path_part(p, full_quote=False) | |
| 552 for p in self.path_parts]) | |
| 553 | |
| 554 @path.setter | |
| 555 def path(self, path_text): | |
| 556 self.path_parts = tuple([unquote(p) if '%' in p else p | |
| 557 for p in to_unicode(path_text).split(u'/')]) | |
| 558 return | |
| 559 | |
| 560 @property | |
| 561 def uses_netloc(self): | |
| 562 """Whether or not a URL uses :code:`:` or :code:`://` to separate the | |
| 563 scheme from the rest of the URL depends on the scheme's own | |
| 564 standard definition. There is no way to infer this behavior | |
| 565 from other parts of the URL. A scheme either supports network | |
| 566 locations or it does not. | |
| 567 | |
| 568 The URL type's approach to this is to check for explicitly | |
| 569 registered schemes, with common schemes like HTTP | |
| 570 preregistered. This is the same approach taken by | |
| 571 :mod:`urlparse`. | |
| 572 | |
| 573 URL adds two additional heuristics if the scheme as a whole is | |
| 574 not registered. First, it attempts to check the subpart of the | |
| 575 scheme after the last ``+`` character. This adds intuitive | |
| 576 behavior for schemes like ``git+ssh``. Second, if a URL with | |
| 577 an unrecognized scheme is loaded, it will maintain the | |
| 578 separator it sees. | |
| 579 | |
| 580 >>> print(URL('fakescheme://test.com').to_text()) | |
| 581 fakescheme://test.com | |
| 582 >>> print(URL('mockscheme:hello:world').to_text()) | |
| 583 mockscheme:hello:world | |
| 584 | |
| 585 """ | |
| 586 default = self._netloc_sep | |
| 587 if self.scheme in SCHEME_PORT_MAP: | |
| 588 return True | |
| 589 if self.scheme in NO_NETLOC_SCHEMES: | |
| 590 return False | |
| 591 if self.scheme.split('+')[-1] in SCHEME_PORT_MAP: | |
| 592 return True | |
| 593 return default | |
| 594 | |
| 595 @property | |
| 596 def default_port(self): | |
| 597 """Return the default port for the currently-set scheme. Returns | |
| 598 ``None`` if the scheme is unrecognized. See | |
| 599 :func:`register_scheme` above. If :attr:`~URL.port` matches | |
| 600 this value, no port is emitted in the output of | |
| 601 :meth:`~URL.to_text()`. | |
| 602 | |
| 603 Applies the same '+' heuristic detailed in :meth:`URL.uses_netloc`. | |
| 604 """ | |
| 605 try: | |
| 606 return SCHEME_PORT_MAP[self.scheme] | |
| 607 except KeyError: | |
| 608 return SCHEME_PORT_MAP.get(self.scheme.split('+')[-1]) | |
| 609 | |
| 610 def normalize(self, with_case=True): | |
| 611 """Resolve any "." and ".." references in the path, as well as | |
| 612 normalize scheme and host casing. To turn off case | |
| 613 normalization, pass ``with_case=False``. | |
| 614 | |
| 615 More information can be found in `Section 6.2.2 of RFC 3986`_. | |
| 616 | |
| 617 .. _Section 6.2.2 of RFC 3986: https://tools.ietf.org/html/rfc3986#section-6.2.2 | |
| 618 """ | |
| 619 self.path_parts = resolve_path_parts(self.path_parts) | |
| 620 | |
| 621 if with_case: | |
| 622 self.scheme = self.scheme.lower() | |
| 623 self.host = self.host.lower() | |
| 624 return | |
| 625 | |
| 626 def navigate(self, dest): | |
| 627 """Factory method that returns a _new_ :class:`URL` based on a given | |
| 628 destination, *dest*. Useful for navigating those relative | |
| 629 links with ease. | |
| 630 | |
| 631 The newly created :class:`URL` is normalized before being returned. | |
| 632 | |
| 633 >>> url = URL('http://boltons.readthedocs.io') | |
| 634 >>> url.navigate('en/latest/') | |
| 635 URL(u'http://boltons.readthedocs.io/en/latest/') | |
| 636 | |
| 637 Args: | |
| 638 dest (str): A string or URL object representing the destination | |
| 639 | |
| 640 More information can be found in `Section 5 of RFC 3986`_. | |
| 641 | |
| 642 .. _Section 5 of RFC 3986: https://tools.ietf.org/html/rfc3986#section-5 | |
| 643 """ | |
| 644 orig_dest = None | |
| 645 if not isinstance(dest, URL): | |
| 646 dest, orig_dest = URL(dest), dest | |
| 647 if dest.scheme and dest.host: | |
| 648 # absolute URLs replace everything, but don't make an | |
| 649 # extra copy if we don't have to | |
| 650 return URL(dest) if orig_dest is None else dest | |
| 651 query_params = dest.query_params | |
| 652 | |
| 653 if dest.path: | |
| 654 if dest.path.startswith(u'/'): # absolute path | |
| 655 new_path_parts = list(dest.path_parts) | |
| 656 else: # relative path | |
| 657 new_path_parts = self.path_parts[:-1] + dest.path_parts | |
| 658 else: | |
| 659 new_path_parts = list(self.path_parts) | |
| 660 if not query_params: | |
| 661 query_params = self.query_params | |
| 662 | |
| 663 ret = self.from_parts(scheme=dest.scheme or self.scheme, | |
| 664 host=dest.host or self.host, | |
| 665 port=dest.port or self.port, | |
| 666 path_parts=new_path_parts, | |
| 667 query_params=query_params, | |
| 668 fragment=dest.fragment, | |
| 669 username=dest.username or self.username, | |
| 670 password=dest.password or self.password) | |
| 671 ret.normalize() | |
| 672 return ret | |
| 673 | |
| 674 def get_authority(self, full_quote=False, with_userinfo=False): | |
| 675 """Used by URL schemes that have a network location, | |
| 676 :meth:`~URL.get_authority` combines :attr:`username`, | |
| 677 :attr:`password`, :attr:`host`, and :attr:`port` into one | |
| 678 string, the *authority*, that is used for | |
| 679 connecting to a network-accessible resource. | |
| 680 | |
| 681 Used internally by :meth:`~URL.to_text()` and can be useful | |
| 682 for labeling connections. | |
| 683 | |
| 684 >>> url = URL('ftp://user@ftp.debian.org:2121/debian/README') | |
| 685 >>> print(url.get_authority()) | |
| 686 ftp.debian.org:2121 | |
| 687 >>> print(url.get_authority(with_userinfo=True)) | |
| 688 user@ftp.debian.org:2121 | |
| 689 | |
| 690 Args: | |
| 691 full_quote (bool): Whether or not to apply IDNA encoding. | |
| 692 Defaults to ``False``. | |
| 693 with_userinfo (bool): Whether or not to include username | |
| 694 and password, technically part of the | |
| 695 authority. Defaults to ``False``. | |
| 696 | |
| 697 """ | |
| 698 parts = [] | |
| 699 _add = parts.append | |
| 700 if self.username and with_userinfo: | |
| 701 _add(quote_userinfo_part(self.username)) | |
| 702 if self.password: | |
| 703 _add(':') | |
| 704 _add(quote_userinfo_part(self.password)) | |
| 705 _add('@') | |
| 706 if self.host: | |
| 707 if self.family == socket.AF_INET6: | |
| 708 _add('[') | |
| 709 _add(self.host) | |
| 710 _add(']') | |
| 711 elif full_quote: | |
| 712 _add(self.host.encode('idna').decode('ascii')) | |
| 713 else: | |
| 714 _add(self.host) | |
| 715 # TODO: 0 port? | |
| 716 if self.port and self.port != self.default_port: | |
| 717 _add(':') | |
| 718 _add(unicode(self.port)) | |
| 719 return u''.join(parts) | |
| 720 | |
| 721 def to_text(self, full_quote=False): | |
| 722 """Render a string representing the current state of the URL | |
| 723 object. | |
| 724 | |
| 725 >>> url = URL('http://listen.hatnote.com') | |
| 726 >>> url.fragment = 'en' | |
| 727 >>> print(url.to_text()) | |
| 728 http://listen.hatnote.com#en | |
| 729 | |
| 730 By setting the *full_quote* flag, the URL can either be fully | |
| 731 quoted or minimally quoted. The most common characteristic of | |
| 732 an encoded-URL is the presence of percent-encoded text (e.g., | |
| 733 %60). Unquoted URLs are more readable and suitable | |
| 734 for display, whereas fully-quoted URLs are more conservative | |
| 735 and generally necessary for sending over the network. | |
| 736 """ | |
| 737 scheme = self.scheme | |
| 738 path = u'/'.join([quote_path_part(p, full_quote=full_quote) | |
| 739 for p in self.path_parts]) | |
| 740 authority = self.get_authority(full_quote=full_quote, | |
| 741 with_userinfo=True) | |
| 742 query_string = self.query_params.to_text(full_quote=full_quote) | |
| 743 fragment = quote_fragment_part(self.fragment, full_quote=full_quote) | |
| 744 | |
| 745 parts = [] | |
| 746 _add = parts.append | |
| 747 if scheme: | |
| 748 _add(scheme) | |
| 749 _add(':') | |
| 750 if authority: | |
| 751 _add('//') | |
| 752 _add(authority) | |
| 753 elif (scheme and path[:2] != '//' and self.uses_netloc): | |
| 754 _add('//') | |
| 755 if path: | |
| 756 if scheme and authority and path[:1] != '/': | |
| 757 _add('/') | |
| 758 # TODO: i think this is here because relative paths | |
| 759 # with absolute authorities = undefined | |
| 760 _add(path) | |
| 761 if query_string: | |
| 762 _add('?') | |
| 763 _add(query_string) | |
| 764 if fragment: | |
| 765 _add('#') | |
| 766 _add(fragment) | |
| 767 return u''.join(parts) | |
| 768 | |
| 769 def __repr__(self): | |
| 770 cn = self.__class__.__name__ | |
| 771 return u'%s(%r)' % (cn, self.to_text()) | |
| 772 | |
| 773 def __str__(self): | |
| 774 return self.to_text() | |
| 775 | |
| 776 def __unicode__(self): | |
| 777 return self.to_text() | |
| 778 | |
| 779 def __eq__(self, other): | |
| 780 for attr in self._cmp_attrs: | |
| 781 if not getattr(self, attr) == getattr(other, attr, None): | |
| 782 return False | |
| 783 return True | |
| 784 | |
| 785 def __ne__(self, other): | |
| 786 return not self == other | |
| 787 | |
| 788 | |
| 789 try: | |
| 790 from socket import inet_pton | |
| 791 except ImportError: | |
| 792 # from https://gist.github.com/nnemkin/4966028 | |
| 793 import ctypes | |
| 794 | |
| 795 class _sockaddr(ctypes.Structure): | |
| 796 _fields_ = [("sa_family", ctypes.c_short), | |
| 797 ("__pad1", ctypes.c_ushort), | |
| 798 ("ipv4_addr", ctypes.c_byte * 4), | |
| 799 ("ipv6_addr", ctypes.c_byte * 16), | |
| 800 ("__pad2", ctypes.c_ulong)] | |
| 801 | |
| 802 WSAStringToAddressA = ctypes.windll.ws2_32.WSAStringToAddressA | |
| 803 WSAAddressToStringA = ctypes.windll.ws2_32.WSAAddressToStringA | |
| 804 | |
| 805 def inet_pton(address_family, ip_string): | |
| 806 addr = _sockaddr() | |
| 807 ip_string = ip_string.encode('ascii') | |
| 808 addr.sa_family = address_family | |
| 809 addr_size = ctypes.c_int(ctypes.sizeof(addr)) | |
| 810 | |
| 811 if WSAStringToAddressA(ip_string, address_family, None, ctypes.byref(addr), ctypes.byref(addr_size)) != 0: | |
| 812 raise socket.error(ctypes.FormatError()) | |
| 813 | |
| 814 if address_family == socket.AF_INET: | |
| 815 return ctypes.string_at(addr.ipv4_addr, 4) | |
| 816 if address_family == socket.AF_INET6: | |
| 817 return ctypes.string_at(addr.ipv6_addr, 16) | |
| 818 raise socket.error('unknown address family') | |
| 819 | |
| 820 | |
| 821 def parse_host(host): | |
| 822 """\ | |
| 823 Low-level function used to parse the host portion of a URL. | |
| 824 | |
| 825 Returns a tuple of (family, host) where *family* is a | |
| 826 :mod:`socket` module constant or ``None``, and host is a string. | |
| 827 | |
| 828 >>> parse_host('googlewebsite.com') == (None, 'googlewebsite.com') | |
| 829 True | |
| 830 >>> parse_host('[::1]') == (socket.AF_INET6, '::1') | |
| 831 True | |
| 832 >>> parse_host('192.168.1.1') == (socket.AF_INET, '192.168.1.1') | |
| 833 True | |
| 834 | |
| 835 Odd doctest formatting above due to py3's switch from int to enums | |
| 836 for :mod:`socket` constants. | |
| 837 | |
| 838 """ | |
| 839 if not host: | |
| 840 return None, u'' | |
| 841 if u':' in host and u'[' == host[0] and u']' == host[-1]: | |
| 842 host = host[1:-1] | |
| 843 try: | |
| 844 inet_pton(socket.AF_INET6, host) | |
| 845 except socket.error as se: | |
| 846 raise URLParseError('invalid IPv6 host: %r (%r)' % (host, se)) | |
| 847 except UnicodeEncodeError: | |
| 848 pass # TODO: this can't be a real host right? | |
| 849 else: | |
| 850 family = socket.AF_INET6 | |
| 851 return family, host | |
| 852 try: | |
| 853 inet_pton(socket.AF_INET, host) | |
| 854 except (socket.error, UnicodeEncodeError): | |
| 855 family = None # not an IP | |
| 856 else: | |
| 857 family = socket.AF_INET | |
| 858 return family, host | |
| 859 | |
| 860 | |
| 861 def parse_url(url_text): | |
| 862 """\ | |
| 863 Used to parse the text for a single URL into a dictionary, used | |
| 864 internally by the :class:`URL` type. | |
| 865 | |
| 866 Note that "URL" has a very narrow, standards-based | |
| 867 definition. While :func:`parse_url` may raise | |
| 868 :class:`URLParseError` under a very limited number of conditions, | |
| 869 such as non-integer port, a surprising number of strings are | |
| 870 technically valid URLs. For instance, the text ``"url"`` is a | |
| 871 valid URL, because it is a relative path. | |
| 872 | |
| 873 In short, do not expect this function to validate form inputs or | |
| 874 other more colloquial usages of URLs. | |
| 875 | |
| 876 >>> res = parse_url('http://127.0.0.1:3000/?a=1') | |
| 877 >>> sorted(res.keys()) # res is a basic dictionary | |
| 878 ['_netloc_sep', 'authority', 'family', 'fragment', 'host', 'password', 'path', 'port', 'query', 'scheme', 'username'] | |
| 879 """ | |
| 880 url_text = unicode(url_text) | |
| 881 # raise TypeError('parse_url expected text, not %r' % url_str) | |
| 882 um = _URL_RE.match(url_text) | |
| 883 try: | |
| 884 gs = um.groupdict() | |
| 885 except AttributeError: | |
| 886 raise URLParseError('could not parse url: %r' % url_text) | |
| 887 | |
| 888 au_text = gs['authority'] | |
| 889 user, pw, hostinfo = None, None, au_text | |
| 890 | |
| 891 if au_text: | |
| 892 userinfo, sep, hostinfo = au_text.rpartition('@') | |
| 893 if sep: | |
| 894 # TODO: empty userinfo error? | |
| 895 user, _, pw = userinfo.partition(':') | |
| 896 | |
| 897 host, port = None, None | |
| 898 if hostinfo: | |
| 899 host, sep, port_str = hostinfo.partition(u':') | |
| 900 if sep: | |
| 901 if host and host[0] == u'[' and u']' in port_str: | |
| 902 host_right, _, port_str = port_str.partition(u']') | |
| 903 host = host + u':' + host_right + u']' | |
| 904 if port_str and port_str[0] == u':': | |
| 905 port_str = port_str[1:] | |
| 906 | |
| 907 try: | |
| 908 port = int(port_str) | |
| 909 except ValueError: | |
| 910 if port_str: # empty ports ok according to RFC 3986 6.2.3 | |
| 911 raise URLParseError('expected integer for port, not %r' | |
| 912 % port_str) | |
| 913 port = None | |
| 914 | |
| 915 family, host = parse_host(host) | |
| 916 | |
| 917 gs['username'] = user | |
| 918 gs['password'] = pw | |
| 919 gs['family'] = family | |
| 920 gs['host'] = host | |
| 921 gs['port'] = port | |
| 922 return gs | |
| 923 | |
| 924 | |
| 925 DEFAULT_PARSED_URL = parse_url('') | |
| 926 | |
| 927 | |
| 928 def parse_qsl(qs, keep_blank_values=True, encoding=DEFAULT_ENCODING): | |
| 929 """ | |
| 930 Converts a query string into a list of (key, value) pairs. | |
| 931 """ | |
| 932 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] | |
| 933 ret = [] | |
| 934 for pair in pairs: | |
| 935 if not pair: | |
| 936 continue | |
| 937 key, _, value = pair.partition('=') | |
| 938 if not value: | |
| 939 if keep_blank_values: | |
| 940 value = None | |
| 941 else: | |
| 942 continue | |
| 943 key = unquote(key.replace('+', ' ')) | |
| 944 if value: | |
| 945 value = unquote(value.replace('+', ' ')) | |
| 946 ret.append((key, value)) | |
| 947 return ret | |
| 948 | |
| 949 | |
| 950 """ | |
| 951 # What follows is the OrderedMultiDict from dictutils.py, circa | |
| 952 # 20161021, used for the QueryParamDict, toward the bottom. | |
| 953 """ | |
| 954 | |
| 955 try: | |
| 956 from collections.abc import KeysView, ValuesView, ItemsView | |
| 957 except ImportError: | |
| 958 from collections import KeysView, ValuesView, ItemsView | |
| 959 | |
| 960 try: | |
| 961 from itertools import izip_longest | |
| 962 except ImportError: | |
| 963 from itertools import zip_longest as izip_longest | |
| 964 | |
| 965 try: | |
| 966 from typeutils import make_sentinel | |
| 967 _MISSING = make_sentinel(var_name='_MISSING') | |
| 968 except ImportError: | |
| 969 _MISSING = object() | |
| 970 | |
| 971 | |
| 972 PREV, NEXT, KEY, VALUE, SPREV, SNEXT = range(6) | |
| 973 | |
| 974 | |
| 975 class OrderedMultiDict(dict): | |
| 976 """A MultiDict is a dictionary that can have multiple values per key | |
| 977 and the OrderedMultiDict (OMD) is a MultiDict that retains | |
| 978 original insertion order. Common use cases include: | |
| 979 | |
| 980 * handling query strings parsed from URLs | |
| 981 * inverting a dictionary to create a reverse index (values to keys) | |
| 982 * stacking data from multiple dictionaries in a non-destructive way | |
| 983 | |
| 984 The OrderedMultiDict constructor is identical to the built-in | |
| 985 :class:`dict`, and overall the API is constitutes an intuitive | |
| 986 superset of the built-in type: | |
| 987 | |
| 988 >>> omd = OrderedMultiDict() | |
| 989 >>> omd['a'] = 1 | |
| 990 >>> omd['b'] = 2 | |
| 991 >>> omd.add('a', 3) | |
| 992 >>> omd.get('a') | |
| 993 3 | |
| 994 >>> omd.getlist('a') | |
| 995 [1, 3] | |
| 996 | |
| 997 Some non-:class:`dict`-like behaviors also make an appearance, | |
| 998 such as support for :func:`reversed`: | |
| 999 | |
| 1000 >>> list(reversed(omd)) | |
| 1001 ['b', 'a'] | |
| 1002 | |
| 1003 Note that unlike some other MultiDicts, this OMD gives precedence | |
| 1004 to the most recent value added. ``omd['a']`` refers to ``3``, not | |
| 1005 ``1``. | |
| 1006 | |
| 1007 >>> omd | |
| 1008 OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]) | |
| 1009 >>> omd.poplast('a') | |
| 1010 3 | |
| 1011 >>> omd | |
| 1012 OrderedMultiDict([('a', 1), ('b', 2)]) | |
| 1013 >>> omd.pop('a') | |
| 1014 1 | |
| 1015 >>> omd | |
| 1016 OrderedMultiDict([('b', 2)]) | |
| 1017 | |
| 1018 Note that calling :func:`dict` on an OMD results in a dict of keys | |
| 1019 to *lists* of values: | |
| 1020 | |
| 1021 >>> from pprint import pprint as pp # ensuring proper key ordering | |
| 1022 >>> omd = OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]) | |
| 1023 >>> pp(dict(omd)) | |
| 1024 {'a': 3, 'b': 2} | |
| 1025 | |
| 1026 Note that modifying those lists will modify the OMD. If you want a | |
| 1027 safe-to-modify or flat dictionary, use :meth:`OrderedMultiDict.todict()`. | |
| 1028 | |
| 1029 >>> pp(omd.todict()) | |
| 1030 {'a': 3, 'b': 2} | |
| 1031 >>> pp(omd.todict(multi=True)) | |
| 1032 {'a': [1, 3], 'b': [2]} | |
| 1033 | |
| 1034 With ``multi=False``, items appear with the keys in to original | |
| 1035 insertion order, alongside the most-recently inserted value for | |
| 1036 that key. | |
| 1037 | |
| 1038 >>> OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]).items(multi=False) | |
| 1039 [('a', 3), ('b', 2)] | |
| 1040 | |
| 1041 """ | |
| 1042 def __init__(self, *args, **kwargs): | |
| 1043 if len(args) > 1: | |
| 1044 raise TypeError('%s expected at most 1 argument, got %s' | |
| 1045 % (self.__class__.__name__, len(args))) | |
| 1046 super(OrderedMultiDict, self).__init__() | |
| 1047 | |
| 1048 self._clear_ll() | |
| 1049 if args: | |
| 1050 self.update_extend(args[0]) | |
| 1051 if kwargs: | |
| 1052 self.update(kwargs) | |
| 1053 | |
| 1054 def _clear_ll(self): | |
| 1055 try: | |
| 1056 _map = self._map | |
| 1057 except AttributeError: | |
| 1058 _map = self._map = {} | |
| 1059 self.root = [] | |
| 1060 _map.clear() | |
| 1061 self.root[:] = [self.root, self.root, None] | |
| 1062 | |
| 1063 def _insert(self, k, v): | |
| 1064 root = self.root | |
| 1065 cells = self._map.setdefault(k, []) | |
| 1066 last = root[PREV] | |
| 1067 cell = [last, root, k, v] | |
| 1068 last[NEXT] = root[PREV] = cell | |
| 1069 cells.append(cell) | |
| 1070 | |
| 1071 def add(self, k, v): | |
| 1072 """Add a single value *v* under a key *k*. Existing values under *k* | |
| 1073 are preserved. | |
| 1074 """ | |
| 1075 values = super(OrderedMultiDict, self).setdefault(k, []) | |
| 1076 self._insert(k, v) | |
| 1077 values.append(v) | |
| 1078 | |
| 1079 def addlist(self, k, v): | |
| 1080 """Add an iterable of values underneath a specific key, preserving | |
| 1081 any values already under that key. | |
| 1082 | |
| 1083 >>> omd = OrderedMultiDict([('a', -1)]) | |
| 1084 >>> omd.addlist('a', range(3)) | |
| 1085 >>> omd | |
| 1086 OrderedMultiDict([('a', -1), ('a', 0), ('a', 1), ('a', 2)]) | |
| 1087 | |
| 1088 Called ``addlist`` for consistency with :meth:`getlist`, but | |
| 1089 tuples and other sequences and iterables work. | |
| 1090 """ | |
| 1091 self_insert = self._insert | |
| 1092 values = super(OrderedMultiDict, self).setdefault(k, []) | |
| 1093 for subv in v: | |
| 1094 self_insert(k, subv) | |
| 1095 values.extend(v) | |
| 1096 | |
| 1097 def get(self, k, default=None): | |
| 1098 """Return the value for key *k* if present in the dictionary, else | |
| 1099 *default*. If *default* is not given, ``None`` is returned. | |
| 1100 This method never raises a :exc:`KeyError`. | |
| 1101 | |
| 1102 To get all values under a key, use :meth:`OrderedMultiDict.getlist`. | |
| 1103 """ | |
| 1104 return super(OrderedMultiDict, self).get(k, [default])[-1] | |
| 1105 | |
| 1106 def getlist(self, k, default=_MISSING): | |
| 1107 """Get all values for key *k* as a list, if *k* is in the | |
| 1108 dictionary, else *default*. The list returned is a copy and | |
| 1109 can be safely mutated. If *default* is not given, an empty | |
| 1110 :class:`list` is returned. | |
| 1111 """ | |
| 1112 try: | |
| 1113 return super(OrderedMultiDict, self).__getitem__(k)[:] | |
| 1114 except KeyError: | |
| 1115 if default is _MISSING: | |
| 1116 return [] | |
| 1117 return default | |
| 1118 | |
| 1119 def clear(self): | |
| 1120 "Empty the dictionary." | |
| 1121 super(OrderedMultiDict, self).clear() | |
| 1122 self._clear_ll() | |
| 1123 | |
| 1124 def setdefault(self, k, default=_MISSING): | |
| 1125 """If key *k* is in the dictionary, return its value. If not, insert | |
| 1126 *k* with a value of *default* and return *default*. *default* | |
| 1127 defaults to ``None``. See :meth:`dict.setdefault` for more | |
| 1128 information. | |
| 1129 """ | |
| 1130 if not super(OrderedMultiDict, self).__contains__(k): | |
| 1131 self[k] = None if default is _MISSING else default | |
| 1132 return self[k] | |
| 1133 | |
| 1134 def copy(self): | |
| 1135 "Return a shallow copy of the dictionary." | |
| 1136 return self.__class__(self.iteritems(multi=True)) | |
| 1137 | |
| 1138 @classmethod | |
| 1139 def fromkeys(cls, keys, default=None): | |
| 1140 """Create a dictionary from a list of keys, with all the values | |
| 1141 set to *default*, or ``None`` if *default* is not set. | |
| 1142 """ | |
| 1143 return cls([(k, default) for k in keys]) | |
| 1144 | |
| 1145 def update(self, E, **F): | |
| 1146 """Add items from a dictionary or iterable (and/or keyword arguments), | |
| 1147 overwriting values under an existing key. See | |
| 1148 :meth:`dict.update` for more details. | |
| 1149 """ | |
| 1150 # E and F are throwback names to the dict() __doc__ | |
| 1151 if E is self: | |
| 1152 return | |
| 1153 self_add = self.add | |
| 1154 if isinstance(E, OrderedMultiDict): | |
| 1155 for k in E: | |
| 1156 if k in self: | |
| 1157 del self[k] | |
| 1158 for k, v in E.iteritems(multi=True): | |
| 1159 self_add(k, v) | |
| 1160 elif hasattr(E, 'keys'): | |
| 1161 for k in E.keys(): | |
| 1162 self[k] = E[k] | |
| 1163 else: | |
| 1164 seen = set() | |
| 1165 seen_add = seen.add | |
| 1166 for k, v in E: | |
| 1167 if k not in seen and k in self: | |
| 1168 del self[k] | |
| 1169 seen_add(k) | |
| 1170 self_add(k, v) | |
| 1171 for k in F: | |
| 1172 self[k] = F[k] | |
| 1173 return | |
| 1174 | |
| 1175 def update_extend(self, E, **F): | |
| 1176 """Add items from a dictionary, iterable, and/or keyword | |
| 1177 arguments without overwriting existing items present in the | |
| 1178 dictionary. Like :meth:`update`, but adds to existing keys | |
| 1179 instead of overwriting them. | |
| 1180 """ | |
| 1181 if E is self: | |
| 1182 iterator = iter(E.items()) | |
| 1183 elif isinstance(E, OrderedMultiDict): | |
| 1184 iterator = E.iteritems(multi=True) | |
| 1185 elif hasattr(E, 'keys'): | |
| 1186 iterator = ((k, E[k]) for k in E.keys()) | |
| 1187 else: | |
| 1188 iterator = E | |
| 1189 | |
| 1190 self_add = self.add | |
| 1191 for k, v in iterator: | |
| 1192 self_add(k, v) | |
| 1193 | |
| 1194 def __setitem__(self, k, v): | |
| 1195 if super(OrderedMultiDict, self).__contains__(k): | |
| 1196 self._remove_all(k) | |
| 1197 self._insert(k, v) | |
| 1198 super(OrderedMultiDict, self).__setitem__(k, [v]) | |
| 1199 | |
| 1200 def __getitem__(self, k): | |
| 1201 return super(OrderedMultiDict, self).__getitem__(k)[-1] | |
| 1202 | |
| 1203 def __delitem__(self, k): | |
| 1204 super(OrderedMultiDict, self).__delitem__(k) | |
| 1205 self._remove_all(k) | |
| 1206 | |
| 1207 def __eq__(self, other): | |
| 1208 if self is other: | |
| 1209 return True | |
| 1210 try: | |
| 1211 if len(other) != len(self): | |
| 1212 return False | |
| 1213 except TypeError: | |
| 1214 return False | |
| 1215 if isinstance(other, OrderedMultiDict): | |
| 1216 selfi = self.iteritems(multi=True) | |
| 1217 otheri = other.iteritems(multi=True) | |
| 1218 zipped_items = izip_longest(selfi, otheri, fillvalue=(None, None)) | |
| 1219 for (selfk, selfv), (otherk, otherv) in zipped_items: | |
| 1220 if selfk != otherk or selfv != otherv: | |
| 1221 return False | |
| 1222 if not(next(selfi, _MISSING) is _MISSING | |
| 1223 and next(otheri, _MISSING) is _MISSING): | |
| 1224 # leftovers (TODO: watch for StopIteration?) | |
| 1225 return False | |
| 1226 return True | |
| 1227 elif hasattr(other, 'keys'): | |
| 1228 for selfk in self: | |
| 1229 try: | |
| 1230 other[selfk] == self[selfk] | |
| 1231 except KeyError: | |
| 1232 return False | |
| 1233 return True | |
| 1234 return False | |
| 1235 | |
| 1236 def __ne__(self, other): | |
| 1237 return not (self == other) | |
| 1238 | |
| 1239 def pop(self, k, default=_MISSING): | |
| 1240 """Remove all values under key *k*, returning the most-recently | |
| 1241 inserted value. Raises :exc:`KeyError` if the key is not | |
| 1242 present and no *default* is provided. | |
| 1243 """ | |
| 1244 try: | |
| 1245 return self.popall(k)[-1] | |
| 1246 except KeyError: | |
| 1247 if default is _MISSING: | |
| 1248 raise KeyError(k) | |
| 1249 return default | |
| 1250 | |
| 1251 def popall(self, k, default=_MISSING): | |
| 1252 """Remove all values under key *k*, returning them in the form of | |
| 1253 a list. Raises :exc:`KeyError` if the key is not present and no | |
| 1254 *default* is provided. | |
| 1255 """ | |
| 1256 super_self = super(OrderedMultiDict, self) | |
| 1257 if super_self.__contains__(k): | |
| 1258 self._remove_all(k) | |
| 1259 if default is _MISSING: | |
| 1260 return super_self.pop(k) | |
| 1261 return super_self.pop(k, default) | |
| 1262 | |
| 1263 def poplast(self, k=_MISSING, default=_MISSING): | |
| 1264 """Remove and return the most-recently inserted value under the key | |
| 1265 *k*, or the most-recently inserted key if *k* is not | |
| 1266 provided. If no values remain under *k*, it will be removed | |
| 1267 from the OMD. Raises :exc:`KeyError` if *k* is not present in | |
| 1268 the dictionary, or the dictionary is empty. | |
| 1269 """ | |
| 1270 if k is _MISSING: | |
| 1271 if self: | |
| 1272 k = self.root[PREV][KEY] | |
| 1273 else: | |
| 1274 raise KeyError('empty %r' % type(self)) | |
| 1275 try: | |
| 1276 self._remove(k) | |
| 1277 except KeyError: | |
| 1278 if default is _MISSING: | |
| 1279 raise KeyError(k) | |
| 1280 return default | |
| 1281 values = super(OrderedMultiDict, self).__getitem__(k) | |
| 1282 v = values.pop() | |
| 1283 if not values: | |
| 1284 super(OrderedMultiDict, self).__delitem__(k) | |
| 1285 return v | |
| 1286 | |
| 1287 def _remove(self, k): | |
| 1288 values = self._map[k] | |
| 1289 cell = values.pop() | |
| 1290 cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV] | |
| 1291 if not values: | |
| 1292 del self._map[k] | |
| 1293 | |
| 1294 def _remove_all(self, k): | |
| 1295 values = self._map[k] | |
| 1296 while values: | |
| 1297 cell = values.pop() | |
| 1298 cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV] | |
| 1299 del self._map[k] | |
| 1300 | |
| 1301 def iteritems(self, multi=False): | |
| 1302 """Iterate over the OMD's items in insertion order. By default, | |
| 1303 yields only the most-recently inserted value for each key. Set | |
| 1304 *multi* to ``True`` to get all inserted items. | |
| 1305 """ | |
| 1306 root = self.root | |
| 1307 curr = root[NEXT] | |
| 1308 if multi: | |
| 1309 while curr is not root: | |
| 1310 yield curr[KEY], curr[VALUE] | |
| 1311 curr = curr[NEXT] | |
| 1312 else: | |
| 1313 for key in self.iterkeys(): | |
| 1314 yield key, self[key] | |
| 1315 | |
| 1316 def iterkeys(self, multi=False): | |
| 1317 """Iterate over the OMD's keys in insertion order. By default, yields | |
| 1318 each key once, according to the most recent insertion. Set | |
| 1319 *multi* to ``True`` to get all keys, including duplicates, in | |
| 1320 insertion order. | |
| 1321 """ | |
| 1322 root = self.root | |
| 1323 curr = root[NEXT] | |
| 1324 if multi: | |
| 1325 while curr is not root: | |
| 1326 yield curr[KEY] | |
| 1327 curr = curr[NEXT] | |
| 1328 else: | |
| 1329 yielded = set() | |
| 1330 yielded_add = yielded.add | |
| 1331 while curr is not root: | |
| 1332 k = curr[KEY] | |
| 1333 if k not in yielded: | |
| 1334 yielded_add(k) | |
| 1335 yield k | |
| 1336 curr = curr[NEXT] | |
| 1337 | |
| 1338 def itervalues(self, multi=False): | |
| 1339 """Iterate over the OMD's values in insertion order. By default, | |
| 1340 yields the most-recently inserted value per unique key. Set | |
| 1341 *multi* to ``True`` to get all values according to insertion | |
| 1342 order. | |
| 1343 """ | |
| 1344 for k, v in self.iteritems(multi=multi): | |
| 1345 yield v | |
| 1346 | |
| 1347 def todict(self, multi=False): | |
| 1348 """Gets a basic :class:`dict` of the items in this dictionary. Keys | |
| 1349 are the same as the OMD, values are the most recently inserted | |
| 1350 values for each key. | |
| 1351 | |
| 1352 Setting the *multi* arg to ``True`` is yields the same | |
| 1353 result as calling :class:`dict` on the OMD, except that all the | |
| 1354 value lists are copies that can be safely mutated. | |
| 1355 """ | |
| 1356 if multi: | |
| 1357 return dict([(k, self.getlist(k)) for k in self]) | |
| 1358 return dict([(k, self[k]) for k in self]) | |
| 1359 | |
| 1360 def sorted(self, key=None, reverse=False): | |
| 1361 """Similar to the built-in :func:`sorted`, except this method returns | |
| 1362 a new :class:`OrderedMultiDict` sorted by the provided key | |
| 1363 function, optionally reversed. | |
| 1364 | |
| 1365 Args: | |
| 1366 key (callable): A callable to determine the sort key of | |
| 1367 each element. The callable should expect an **item** | |
| 1368 (key-value pair tuple). | |
| 1369 reverse (bool): Set to ``True`` to reverse the ordering. | |
| 1370 | |
| 1371 >>> omd = OrderedMultiDict(zip(range(3), range(3))) | |
| 1372 >>> omd.sorted(reverse=True) | |
| 1373 OrderedMultiDict([(2, 2), (1, 1), (0, 0)]) | |
| 1374 | |
| 1375 Note that the key function receives an **item** (key-value | |
| 1376 tuple), so the recommended signature looks like: | |
| 1377 | |
| 1378 >>> omd = OrderedMultiDict(zip('hello', 'world')) | |
| 1379 >>> omd.sorted(key=lambda i: i[1]) # i[0] is the key, i[1] is the val | |
| 1380 OrderedMultiDict([('o', 'd'), ('l', 'l'), ('e', 'o'), ('l', 'r'), ('h', 'w')]) | |
| 1381 """ | |
| 1382 cls = self.__class__ | |
| 1383 return cls(sorted(self.iteritems(), key=key, reverse=reverse)) | |
| 1384 | |
| 1385 def sortedvalues(self, key=None, reverse=False): | |
| 1386 """Returns a copy of the :class:`OrderedMultiDict` with the same keys | |
| 1387 in the same order as the original OMD, but the values within | |
| 1388 each keyspace have been sorted according to *key* and | |
| 1389 *reverse*. | |
| 1390 | |
| 1391 Args: | |
| 1392 key (callable): A single-argument callable to determine | |
| 1393 the sort key of each element. The callable should expect | |
| 1394 an **item** (key-value pair tuple). | |
| 1395 reverse (bool): Set to ``True`` to reverse the ordering. | |
| 1396 | |
| 1397 >>> omd = OrderedMultiDict() | |
| 1398 >>> omd.addlist('even', [6, 2]) | |
| 1399 >>> omd.addlist('odd', [1, 5]) | |
| 1400 >>> omd.add('even', 4) | |
| 1401 >>> omd.add('odd', 3) | |
| 1402 >>> somd = omd.sortedvalues() | |
| 1403 >>> somd.getlist('even') | |
| 1404 [2, 4, 6] | |
| 1405 >>> somd.keys(multi=True) == omd.keys(multi=True) | |
| 1406 True | |
| 1407 >>> omd == somd | |
| 1408 False | |
| 1409 >>> somd | |
| 1410 OrderedMultiDict([('even', 2), ('even', 4), ('odd', 1), ('odd', 3), ('even', 6), ('odd', 5)]) | |
| 1411 | |
| 1412 As demonstrated above, contents and key order are | |
| 1413 retained. Only value order changes. | |
| 1414 """ | |
| 1415 try: | |
| 1416 superself_iteritems = super(OrderedMultiDict, self).iteritems() | |
| 1417 except AttributeError: | |
| 1418 superself_iteritems = super(OrderedMultiDict, self).items() | |
| 1419 # (not reverse) because they pop off in reverse order for reinsertion | |
| 1420 sorted_val_map = dict([(k, sorted(v, key=key, reverse=(not reverse))) | |
| 1421 for k, v in superself_iteritems]) | |
| 1422 ret = self.__class__() | |
| 1423 for k in self.iterkeys(multi=True): | |
| 1424 ret.add(k, sorted_val_map[k].pop()) | |
| 1425 return ret | |
| 1426 | |
| 1427 def inverted(self): | |
| 1428 """Returns a new :class:`OrderedMultiDict` with values and keys | |
| 1429 swapped, like creating dictionary transposition or reverse | |
| 1430 index. Insertion order is retained and all keys and values | |
| 1431 are represented in the output. | |
| 1432 | |
| 1433 >>> omd = OMD([(0, 2), (1, 2)]) | |
| 1434 >>> omd.inverted().getlist(2) | |
| 1435 [0, 1] | |
| 1436 | |
| 1437 Inverting twice yields a copy of the original: | |
| 1438 | |
| 1439 >>> omd.inverted().inverted() | |
| 1440 OrderedMultiDict([(0, 2), (1, 2)]) | |
| 1441 """ | |
| 1442 return self.__class__((v, k) for k, v in self.iteritems(multi=True)) | |
| 1443 | |
| 1444 def counts(self): | |
| 1445 """Returns a mapping from key to number of values inserted under that | |
| 1446 key. Like :py:class:`collections.Counter`, but returns a new | |
| 1447 :class:`OrderedMultiDict`. | |
| 1448 """ | |
| 1449 # Returns an OMD because Counter/OrderedDict may not be | |
| 1450 # available, and neither Counter nor dict maintain order. | |
| 1451 super_getitem = super(OrderedMultiDict, self).__getitem__ | |
| 1452 return self.__class__((k, len(super_getitem(k))) for k in self) | |
| 1453 | |
| 1454 def keys(self, multi=False): | |
| 1455 """Returns a list containing the output of :meth:`iterkeys`. See | |
| 1456 that method's docs for more details. | |
| 1457 """ | |
| 1458 return list(self.iterkeys(multi=multi)) | |
| 1459 | |
| 1460 def values(self, multi=False): | |
| 1461 """Returns a list containing the output of :meth:`itervalues`. See | |
| 1462 that method's docs for more details. | |
| 1463 """ | |
| 1464 return list(self.itervalues(multi=multi)) | |
| 1465 | |
| 1466 def items(self, multi=False): | |
| 1467 """Returns a list containing the output of :meth:`iteritems`. See | |
| 1468 that method's docs for more details. | |
| 1469 """ | |
| 1470 return list(self.iteritems(multi=multi)) | |
| 1471 | |
| 1472 def __iter__(self): | |
| 1473 return self.iterkeys() | |
| 1474 | |
| 1475 def __reversed__(self): | |
| 1476 root = self.root | |
| 1477 curr = root[PREV] | |
| 1478 lengths = {} | |
| 1479 lengths_sd = lengths.setdefault | |
| 1480 get_values = super(OrderedMultiDict, self).__getitem__ | |
| 1481 while curr is not root: | |
| 1482 k = curr[KEY] | |
| 1483 vals = get_values(k) | |
| 1484 if lengths_sd(k, 1) == len(vals): | |
| 1485 yield k | |
| 1486 lengths[k] += 1 | |
| 1487 curr = curr[PREV] | |
| 1488 | |
| 1489 def __repr__(self): | |
| 1490 cn = self.__class__.__name__ | |
| 1491 kvs = ', '.join([repr((k, v)) for k, v in self.iteritems(multi=True)]) | |
| 1492 return '%s([%s])' % (cn, kvs) | |
| 1493 | |
| 1494 def viewkeys(self): | |
| 1495 "OMD.viewkeys() -> a set-like object providing a view on OMD's keys" | |
| 1496 return KeysView(self) | |
| 1497 | |
| 1498 def viewvalues(self): | |
| 1499 "OMD.viewvalues() -> an object providing a view on OMD's values" | |
| 1500 return ValuesView(self) | |
| 1501 | |
| 1502 def viewitems(self): | |
| 1503 "OMD.viewitems() -> a set-like object providing a view on OMD's items" | |
| 1504 return ItemsView(self) | |
| 1505 | |
| 1506 | |
| 1507 try: | |
| 1508 # try to import the built-in one anyways | |
| 1509 from boltons.dictutils import OrderedMultiDict | |
| 1510 except ImportError: | |
| 1511 pass | |
| 1512 | |
| 1513 OMD = OrderedMultiDict | |
| 1514 | |
| 1515 | |
| 1516 class QueryParamDict(OrderedMultiDict): | |
| 1517 """A subclass of :class:`~dictutils.OrderedMultiDict` specialized for | |
| 1518 representing query string values. Everything is fully unquoted on | |
| 1519 load and all parsed keys and values are strings by default. | |
| 1520 | |
| 1521 As the name suggests, multiple values are supported and insertion | |
| 1522 order is preserved. | |
| 1523 | |
| 1524 >>> qp = QueryParamDict.from_text(u'key=val1&key=val2&utm_source=rtd') | |
| 1525 >>> qp.getlist('key') | |
| 1526 [u'val1', u'val2'] | |
| 1527 >>> qp['key'] | |
| 1528 u'val2' | |
| 1529 >>> qp.add('key', 'val3') | |
| 1530 >>> qp.to_text() | |
| 1531 'key=val1&key=val2&utm_source=rtd&key=val3' | |
| 1532 | |
| 1533 See :class:`~dictutils.OrderedMultiDict` for more API features. | |
| 1534 """ | |
| 1535 | |
| 1536 @classmethod | |
| 1537 def from_text(cls, query_string): | |
| 1538 """ | |
| 1539 Parse *query_string* and return a new :class:`QueryParamDict`. | |
| 1540 """ | |
| 1541 pairs = parse_qsl(query_string, keep_blank_values=True) | |
| 1542 return cls(pairs) | |
| 1543 | |
| 1544 def to_text(self, full_quote=False): | |
| 1545 """ | |
| 1546 Render and return a query string. | |
| 1547 | |
| 1548 Args: | |
| 1549 full_quote (bool): Whether or not to percent-quote special | |
| 1550 characters or leave them decoded for readability. | |
| 1551 """ | |
| 1552 ret_list = [] | |
| 1553 for k, v in self.iteritems(multi=True): | |
| 1554 key = quote_query_part(to_unicode(k), full_quote=full_quote) | |
| 1555 if v is None: | |
| 1556 ret_list.append(key) | |
| 1557 else: | |
| 1558 val = quote_query_part(to_unicode(v), full_quote=full_quote) | |
| 1559 ret_list.append(u'='.join((key, val))) | |
| 1560 return u'&'.join(ret_list) | |
| 1561 | |
| 1562 # TODO: cleanup OMD/cachedproperty etc.? | |
| 1563 | |
| 1564 # end urlutils.py | 
