comparison env/lib/python3.9/site-packages/boltons/urlutils.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """:mod:`urlutils` is a module dedicated to one of software's most
3 versatile, well-aged, and beloved data structures: the URL, also known
4 as the `Uniform Resource Locator`_.
5
6 Among other things, this module is a full reimplementation of URLs,
7 without any reliance on the :mod:`urlparse` or :mod:`urllib` standard
8 library modules. The centerpiece and top-level interface of urlutils
9 is the :class:`URL` type. Also featured is the :func:`find_all_links`
10 convenience function. Some low-level functions and constants are also
11 below.
12
13 The implementations in this module are based heavily on `RFC 3986`_ and
14 `RFC 3987`_, and incorporates details from several other RFCs and `W3C
15 documents`_.
16
17 .. _Uniform Resource Locator: https://en.wikipedia.org/wiki/Uniform_Resource_Locator
18 .. _RFC 3986: https://tools.ietf.org/html/rfc3986
19 .. _RFC 3987: https://tools.ietf.org/html/rfc3987
20 .. _W3C documents: https://www.w3.org/TR/uri-clarification/
21
22 """
23
24 import re
25 import socket
26 import string
27 from unicodedata import normalize
28
29 unicode = type(u'')
30 try:
31 unichr
32 except NameError:
33 unichr = chr
34
35 # The unreserved URI characters (per RFC 3986 Section 2.3)
36 _UNRESERVED_CHARS = frozenset('~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
37 'abcdefghijklmnopqrstuvwxyz')
38
39 # URL parsing regex (based on RFC 3986 Appendix B, with modifications)
40 _URL_RE = re.compile(r'^((?P<scheme>[^:/?#]+):)?'
41 r'((?P<_netloc_sep>//)(?P<authority>[^/?#]*))?'
42 r'(?P<path>[^?#]*)'
43 r'(\?(?P<query>[^#]*))?'
44 r'(#(?P<fragment>.*))?')
45
46
47 _HEX_CHAR_MAP = dict([((a + b).encode('ascii'),
48 unichr(int(a + b, 16)).encode('charmap'))
49 for a in string.hexdigits for b in string.hexdigits])
50 _ASCII_RE = re.compile('([\x00-\x7f]+)')
51
52
53 # This port list painstakingly curated by hand searching through
54 # https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
55 # and
56 # https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml
57 SCHEME_PORT_MAP = {'acap': 674, 'afp': 548, 'dict': 2628, 'dns': 53,
58 'file': None, 'ftp': 21, 'git': 9418, 'gopher': 70,
59 'http': 80, 'https': 443, 'imap': 143, 'ipp': 631,
60 'ipps': 631, 'irc': 194, 'ircs': 6697, 'ldap': 389,
61 'ldaps': 636, 'mms': 1755, 'msrp': 2855, 'msrps': None,
62 'mtqp': 1038, 'nfs': 111, 'nntp': 119, 'nntps': 563,
63 'pop': 110, 'prospero': 1525, 'redis': 6379, 'rsync': 873,
64 'rtsp': 554, 'rtsps': 322, 'rtspu': 5005, 'sftp': 22,
65 'smb': 445, 'snmp': 161, 'ssh': 22, 'steam': None,
66 'svn': 3690, 'telnet': 23, 'ventrilo': 3784, 'vnc': 5900,
67 'wais': 210, 'ws': 80, 'wss': 443, 'xmpp': None}
68
69 # This list of schemes that don't use authorities is also from the link above.
70 NO_NETLOC_SCHEMES = set(['urn', 'about', 'bitcoin', 'blob', 'data', 'geo',
71 'magnet', 'mailto', 'news', 'pkcs11',
72 'sip', 'sips', 'tel'])
73 # As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
74
75 # RFC 3986 section 2.2, Reserved Characters
76 _GEN_DELIMS = frozenset(u':/?#[]@')
77 _SUB_DELIMS = frozenset(u"!$&'()*+,;=")
78 _ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS
79
80 _USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS
81 _USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE
82 _PATH_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u':@')
83 _PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE
84 _FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u'/?')
85 _FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE
86 _QUERY_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u'&=+')
87 _QUERY_DELIMS = _ALL_DELIMS - _QUERY_SAFE
88
89
90 class URLParseError(ValueError):
91 """Exception inheriting from :exc:`ValueError`, raised when failing to
92 parse a URL. Mostly raised on invalid ports and IPv6 addresses.
93 """
94 pass
95
96
97 DEFAULT_ENCODING = 'utf8'
98
99
100 def to_unicode(obj):
101 try:
102 return unicode(obj)
103 except UnicodeDecodeError:
104 return unicode(obj, encoding=DEFAULT_ENCODING)
105
106
107 # regex from gruber via tornado
108 # doesn't support ipv6
109 # doesn't support mailto (netloc-less schemes)
110 _FIND_ALL_URL_RE = re.compile(to_unicode(r"""\b((?:([\w-]+):(/{1,3})|www[.])(?:(?:(?:[^\s&()<>]|&amp;|&quot;)*(?:[^!"#$%'()*+,.:;<=>?@\[\]^`{|}~\s]))|(?:\((?:[^\s&()]|&amp;|&quot;)*\)))+)"""))
111
112
113 def find_all_links(text, with_text=False, default_scheme='https', schemes=()):
114 """This function uses heuristics to searches plain text for strings
115 that look like URLs, returning a :class:`list` of :class:`URL`
116 objects. It supports limiting the accepted schemes, and returning
117 interleaved text as well.
118
119 >>> find_all_links('Visit https://boltons.rtfd.org!')
120 [URL(u'https://boltons.rtfd.org')]
121 >>> find_all_links('Visit https://boltons.rtfd.org!', with_text=True)
122 [u'Visit ', URL(u'https://boltons.rtfd.org'), u'!']
123
124 Args:
125 text (str): The text to search.
126
127 with_text (bool): Whether or not to interleave plaintext blocks
128 with the returned URL objects. Having all tokens can be
129 useful for transforming the text, e.g., replacing links with
130 HTML equivalents. Defaults to ``False``.
131
132 default_scheme (str): Many URLs are written without the scheme
133 component. This function can match a reasonable subset of
134 those, provided *default_scheme* is set to a string. Set to
135 ``False`` to disable matching scheme-less URLs. Defaults to
136 ``'https'``.
137
138 schemes (list): A list of strings that a URL's scheme must
139 match in order to be included in the results. Defaults to
140 empty, which matches all schemes.
141
142 .. note:: Currently this function does not support finding IPv6
143 addresses or URLs with netloc-less schemes, like mailto.
144
145 """
146 text = to_unicode(text)
147 prev_end, start, end = 0, None, None
148 ret = []
149 _add = ret.append
150
151 def _add_text(t):
152 if ret and isinstance(ret[-1], unicode):
153 ret[-1] += t
154 else:
155 _add(t)
156
157 for match in _FIND_ALL_URL_RE.finditer(text):
158 start, end = match.start(1), match.end(1)
159 if prev_end < start and with_text:
160 _add(text[prev_end:start])
161 prev_end = end
162 try:
163 cur_url_text = match.group(0)
164 cur_url = URL(cur_url_text)
165 if not cur_url.scheme:
166 if default_scheme:
167 cur_url = URL(default_scheme + '://' + cur_url_text)
168 else:
169 _add_text(text[start:end])
170 continue
171 if schemes and cur_url.scheme not in schemes:
172 _add_text(text[start:end])
173 else:
174 _add(cur_url)
175 except URLParseError:
176 # currently this should only be hit with broken port
177 # strings. the regex above doesn't support ipv6 addresses
178 if with_text:
179 _add_text(text[start:end])
180
181 if with_text:
182 tail = text[prev_end:]
183 if tail:
184 _add_text(tail)
185
186 return ret
187
188
189 def _make_quote_map(safe_chars):
190 ret = {}
191 # v is included in the dict for py3 mostly, because bytestrings
192 # are iterables of ints, of course!
193 for i, v in zip(range(256), range(256)):
194 c = chr(v)
195 if c in safe_chars:
196 ret[c] = ret[v] = c
197 else:
198 ret[c] = ret[v] = '%{0:02X}'.format(i)
199 return ret
200
201
202 _USERINFO_PART_QUOTE_MAP = _make_quote_map(_USERINFO_SAFE)
203 _PATH_PART_QUOTE_MAP = _make_quote_map(_PATH_SAFE)
204 _QUERY_PART_QUOTE_MAP = _make_quote_map(_QUERY_SAFE)
205 _FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE)
206
207
208 def quote_path_part(text, full_quote=True):
209 """
210 Percent-encode a single segment of a URL path.
211 """
212 if full_quote:
213 bytestr = normalize('NFC', to_unicode(text)).encode('utf8')
214 return u''.join([_PATH_PART_QUOTE_MAP[b] for b in bytestr])
215 return u''.join([_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t
216 for t in text])
217
218
219 def quote_query_part(text, full_quote=True):
220 """
221 Percent-encode a single query string key or value.
222 """
223 if full_quote:
224 bytestr = normalize('NFC', to_unicode(text)).encode('utf8')
225 return u''.join([_QUERY_PART_QUOTE_MAP[b] for b in bytestr])
226 return u''.join([_QUERY_PART_QUOTE_MAP[t] if t in _QUERY_DELIMS else t
227 for t in text])
228
229
230 def quote_fragment_part(text, full_quote=True):
231 """Quote the fragment part of the URL. Fragments don't have
232 subdelimiters, so the whole URL fragment can be passed.
233 """
234 if full_quote:
235 bytestr = normalize('NFC', to_unicode(text)).encode('utf8')
236 return u''.join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr])
237 return u''.join([_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t
238 for t in text])
239
240
241 def quote_userinfo_part(text, full_quote=True):
242 """Quote special characters in either the username or password
243 section of the URL. Note that userinfo in URLs is considered
244 deprecated in many circles (especially browsers), and support for
245 percent-encoded userinfo can be spotty.
246 """
247 if full_quote:
248 bytestr = normalize('NFC', to_unicode(text)).encode('utf8')
249 return u''.join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr])
250 return u''.join([_USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS
251 else t for t in text])
252
253
254 def unquote(string, encoding='utf-8', errors='replace'):
255 """Percent-decode a string, by replacing %xx escapes with their
256 single-character equivalent. The optional *encoding* and *errors*
257 parameters specify how to decode percent-encoded sequences into
258 Unicode characters, as accepted by the :meth:`bytes.decode()` method. By
259 default, percent-encoded sequences are decoded with UTF-8, and
260 invalid sequences are replaced by a placeholder character.
261
262 >>> unquote(u'abc%20def')
263 u'abc def'
264 """
265 if '%' not in string:
266 string.split
267 return string
268 if encoding is None:
269 encoding = 'utf-8'
270 if errors is None:
271 errors = 'replace'
272 bits = _ASCII_RE.split(string)
273 res = [bits[0]]
274 append = res.append
275 for i in range(1, len(bits), 2):
276 append(unquote_to_bytes(bits[i]).decode(encoding, errors))
277 append(bits[i + 1])
278 return ''.join(res)
279
280
281 def unquote_to_bytes(string):
282 """unquote_to_bytes('abc%20def') -> b'abc def'."""
283 # Note: strings are encoded as UTF-8. This is only an issue if it contains
284 # unescaped non-ASCII characters, which URIs should not.
285 if not string:
286 # Is it a string-like object?
287 string.split
288 return b''
289 if isinstance(string, unicode):
290 string = string.encode('utf-8')
291 bits = string.split(b'%')
292 if len(bits) == 1:
293 return string
294 # import pdb;pdb.set_trace()
295 res = [bits[0]]
296 append = res.append
297
298 for item in bits[1:]:
299 try:
300 append(_HEX_CHAR_MAP[item[:2]])
301 append(item[2:])
302 except KeyError:
303 append(b'%')
304 append(item)
305 return b''.join(res)
306
307
308 def register_scheme(text, uses_netloc=None, default_port=None):
309 """Registers new scheme information, resulting in correct port and
310 slash behavior from the URL object. There are dozens of standard
311 schemes preregistered, so this function is mostly meant for
312 proprietary internal customizations or stopgaps on missing
313 standards information. If a scheme seems to be missing, please
314 `file an issue`_!
315
316 Args:
317 text (str): Text representing the scheme.
318 (the 'http' in 'http://hatnote.com')
319 uses_netloc (bool): Does the scheme support specifying a
320 network host? For instance, "http" does, "mailto" does not.
321 default_port (int): The default port, if any, for netloc-using
322 schemes.
323
324 .. _file an issue: https://github.com/mahmoud/boltons/issues
325 """
326 text = text.lower()
327 if default_port is not None:
328 try:
329 default_port = int(default_port)
330 except ValueError:
331 raise ValueError('default_port expected integer or None, not %r'
332 % (default_port,))
333
334 if uses_netloc is True:
335 SCHEME_PORT_MAP[text] = default_port
336 elif uses_netloc is False:
337 if default_port is not None:
338 raise ValueError('unexpected default port while specifying'
339 ' non-netloc scheme: %r' % default_port)
340 NO_NETLOC_SCHEMES.add(text)
341 elif uses_netloc is not None:
342 raise ValueError('uses_netloc expected True, False, or None')
343
344 return
345
346
347 def resolve_path_parts(path_parts):
348 """Normalize the URL path by resolving segments of '.' and '..',
349 resulting in a dot-free path. See RFC 3986 section 5.2.4, Remove
350 Dot Segments.
351 """
352 # TODO: what to do with multiple slashes
353 ret = []
354
355 for part in path_parts:
356 if part == u'.':
357 pass
358 elif part == u'..':
359 if ret and (len(ret) > 1 or ret[0]): # prevent unrooting
360 ret.pop()
361 else:
362 ret.append(part)
363
364 if list(path_parts[-1:]) in ([u'.'], [u'..']):
365 ret.append(u'')
366
367 return ret
368
369
370 class cachedproperty(object):
371 """The ``cachedproperty`` is used similar to :class:`property`, except
372 that the wrapped method is only called once. This is commonly used
373 to implement lazy attributes.
374
375 After the property has been accessed, the value is stored on the
376 instance itself, using the same name as the cachedproperty. This
377 allows the cache to be cleared with :func:`delattr`, or through
378 manipulating the object's ``__dict__``.
379 """
380 def __init__(self, func):
381 self.__doc__ = getattr(func, '__doc__')
382 self.func = func
383
384 def __get__(self, obj, objtype=None):
385 if obj is None:
386 return self
387 value = obj.__dict__[self.func.__name__] = self.func(obj)
388 return value
389
390 def __repr__(self):
391 cn = self.__class__.__name__
392 return '<%s func=%s>' % (cn, self.func)
393
394
395 class URL(object):
396 r"""The URL is one of the most ubiquitous data structures in the
397 virtual and physical landscape. From blogs to billboards, URLs are
398 so common, that it's easy to overlook their complexity and
399 power.
400
401 There are 8 parts of a URL, each with its own semantics and
402 special characters:
403
404 * :attr:`~URL.scheme`
405 * :attr:`~URL.username`
406 * :attr:`~URL.password`
407 * :attr:`~URL.host`
408 * :attr:`~URL.port`
409 * :attr:`~URL.path`
410 * :attr:`~URL.query_params` (query string parameters)
411 * :attr:`~URL.fragment`
412
413 Each is exposed as an attribute on the URL object. RFC 3986 offers
414 this brief structural summary of the main URL components::
415
416 foo://user:pass@example.com:8042/over/there?name=ferret#nose
417 \_/ \_______/ \_________/ \__/\_________/ \_________/ \__/
418 | | | | | | |
419 scheme userinfo host port path query fragment
420
421 And here's how that example can be manipulated with the URL type:
422
423 >>> url = URL('foo://example.com:8042/over/there?name=ferret#nose')
424 >>> print(url.host)
425 example.com
426 >>> print(url.get_authority())
427 example.com:8042
428 >>> print(url.qp['name']) # qp is a synonym for query_params
429 ferret
430
431 URL's approach to encoding is that inputs are decoded as much as
432 possible, and data remains in this decoded state until re-encoded
433 using the :meth:`~URL.to_text()` method. In this way, it's similar
434 to Python's current approach of encouraging immediate decoding of
435 bytes to text.
436
437 Note that URL instances are mutable objects. If an immutable
438 representation of the URL is desired, the string from
439 :meth:`~URL.to_text()` may be used. For an immutable, but
440 almost-as-featureful, URL object, check out the `hyperlink
441 package`_.
442
443 .. _hyperlink package: https://github.com/mahmoud/hyperlink
444
445 """
446
447 # public attributes (for comparison, see __eq__):
448 _cmp_attrs = ('scheme', 'uses_netloc', 'username', 'password',
449 'family', 'host', 'port', 'path', 'query_params', 'fragment')
450
451 def __init__(self, url=''):
452 # TODO: encoding param. The encoding that underlies the
453 # percent-encoding is always utf8 for IRIs, but can be Latin-1
454 # for other usage schemes.
455 ud = DEFAULT_PARSED_URL
456 if url:
457 if isinstance(url, URL):
458 url = url.to_text() # better way to copy URLs?
459 elif isinstance(url, bytes):
460 try:
461 url = url.decode(DEFAULT_ENCODING)
462 except UnicodeDecodeError as ude:
463 raise URLParseError('expected text or %s-encoded bytes.'
464 ' try decoding the url bytes and'
465 ' passing the result. (got: %s)'
466 % (DEFAULT_ENCODING, ude))
467 ud = parse_url(url)
468
469 _e = u''
470 self.scheme = ud['scheme'] or _e
471 self._netloc_sep = ud['_netloc_sep'] or _e
472 self.username = (unquote(ud['username'])
473 if '%' in (ud['username'] or _e) else ud['username'] or _e)
474 self.password = (unquote(ud['password'])
475 if '%' in (ud['password'] or _e) else ud['password'] or _e)
476 self.family = ud['family']
477
478 if not ud['host']:
479 self.host = _e
480 else:
481 try:
482 self.host = ud['host'].encode("ascii")
483 except UnicodeEncodeError:
484 self.host = ud['host'] # already non-ascii text
485 else:
486 self.host = self.host.decode("idna")
487
488 self.port = ud['port']
489 self.path_parts = tuple([unquote(p) if '%' in p else p for p
490 in (ud['path'] or _e).split(u'/')])
491 self._query = ud['query'] or _e
492 self.fragment = (unquote(ud['fragment'])
493 if '%' in (ud['fragment'] or _e) else ud['fragment'] or _e)
494 # TODO: possibly use None as marker for empty vs missing
495 return
496
497 @classmethod
498 def from_parts(cls, scheme=None, host=None, path_parts=(), query_params=(),
499 fragment=u'', port=None, username=None, password=None):
500 """Build a new URL from parts. Note that the respective arguments are
501 not in the order they would appear in a URL:
502
503 Args:
504 scheme (str): The scheme of a URL, e.g., 'http'
505 host (str): The host string, e.g., 'hatnote.com'
506 path_parts (tuple): The individual text segments of the
507 path, e.g., ('post', '123')
508 query_params (dict): An OMD, dict, or list of (key, value)
509 pairs representing the keys and values of the URL's query
510 parameters.
511 fragment (str): The fragment of the URL, e.g., 'anchor1'
512 port (int): The integer port of URL, automatic defaults are
513 available for registered schemes.
514 username (str): The username for the userinfo part of the URL.
515 password (str): The password for the userinfo part of the URL.
516
517 Note that this method does relatively little
518 validation. :meth:`URL.to_text()` should be used to check if
519 any errors are produced while composing the final textual URL.
520 """
521 ret = cls()
522
523 ret.scheme = scheme
524 ret.host = host
525 ret.path_parts = tuple(path_parts) or (u'',)
526 ret.query_params.update(query_params)
527 ret.fragment = fragment
528 ret.port = port
529 ret.username = username
530 ret.password = password
531
532 return ret
533
534 @cachedproperty
535 def query_params(self):
536 """The parsed form of the query string of the URL, represented as a
537 :class:`~dictutils.OrderedMultiDict`. Also available as the
538 handy alias ``qp``.
539
540 >>> url = URL('http://boltons.readthedocs.io/?utm_source=doctest&python=great')
541 >>> url.qp.keys()
542 [u'utm_source', u'python']
543 """
544 return QueryParamDict.from_text(self._query)
545
546 qp = query_params
547
548 @property
549 def path(self):
550 "The URL's path, in text form."
551 return u'/'.join([quote_path_part(p, full_quote=False)
552 for p in self.path_parts])
553
554 @path.setter
555 def path(self, path_text):
556 self.path_parts = tuple([unquote(p) if '%' in p else p
557 for p in to_unicode(path_text).split(u'/')])
558 return
559
560 @property
561 def uses_netloc(self):
562 """Whether or not a URL uses :code:`:` or :code:`://` to separate the
563 scheme from the rest of the URL depends on the scheme's own
564 standard definition. There is no way to infer this behavior
565 from other parts of the URL. A scheme either supports network
566 locations or it does not.
567
568 The URL type's approach to this is to check for explicitly
569 registered schemes, with common schemes like HTTP
570 preregistered. This is the same approach taken by
571 :mod:`urlparse`.
572
573 URL adds two additional heuristics if the scheme as a whole is
574 not registered. First, it attempts to check the subpart of the
575 scheme after the last ``+`` character. This adds intuitive
576 behavior for schemes like ``git+ssh``. Second, if a URL with
577 an unrecognized scheme is loaded, it will maintain the
578 separator it sees.
579
580 >>> print(URL('fakescheme://test.com').to_text())
581 fakescheme://test.com
582 >>> print(URL('mockscheme:hello:world').to_text())
583 mockscheme:hello:world
584
585 """
586 default = self._netloc_sep
587 if self.scheme in SCHEME_PORT_MAP:
588 return True
589 if self.scheme in NO_NETLOC_SCHEMES:
590 return False
591 if self.scheme.split('+')[-1] in SCHEME_PORT_MAP:
592 return True
593 return default
594
595 @property
596 def default_port(self):
597 """Return the default port for the currently-set scheme. Returns
598 ``None`` if the scheme is unrecognized. See
599 :func:`register_scheme` above. If :attr:`~URL.port` matches
600 this value, no port is emitted in the output of
601 :meth:`~URL.to_text()`.
602
603 Applies the same '+' heuristic detailed in :meth:`URL.uses_netloc`.
604 """
605 try:
606 return SCHEME_PORT_MAP[self.scheme]
607 except KeyError:
608 return SCHEME_PORT_MAP.get(self.scheme.split('+')[-1])
609
610 def normalize(self, with_case=True):
611 """Resolve any "." and ".." references in the path, as well as
612 normalize scheme and host casing. To turn off case
613 normalization, pass ``with_case=False``.
614
615 More information can be found in `Section 6.2.2 of RFC 3986`_.
616
617 .. _Section 6.2.2 of RFC 3986: https://tools.ietf.org/html/rfc3986#section-6.2.2
618 """
619 self.path_parts = resolve_path_parts(self.path_parts)
620
621 if with_case:
622 self.scheme = self.scheme.lower()
623 self.host = self.host.lower()
624 return
625
626 def navigate(self, dest):
627 """Factory method that returns a _new_ :class:`URL` based on a given
628 destination, *dest*. Useful for navigating those relative
629 links with ease.
630
631 The newly created :class:`URL` is normalized before being returned.
632
633 >>> url = URL('http://boltons.readthedocs.io')
634 >>> url.navigate('en/latest/')
635 URL(u'http://boltons.readthedocs.io/en/latest/')
636
637 Args:
638 dest (str): A string or URL object representing the destination
639
640 More information can be found in `Section 5 of RFC 3986`_.
641
642 .. _Section 5 of RFC 3986: https://tools.ietf.org/html/rfc3986#section-5
643 """
644 orig_dest = None
645 if not isinstance(dest, URL):
646 dest, orig_dest = URL(dest), dest
647 if dest.scheme and dest.host:
648 # absolute URLs replace everything, but don't make an
649 # extra copy if we don't have to
650 return URL(dest) if orig_dest is None else dest
651 query_params = dest.query_params
652
653 if dest.path:
654 if dest.path.startswith(u'/'): # absolute path
655 new_path_parts = list(dest.path_parts)
656 else: # relative path
657 new_path_parts = self.path_parts[:-1] + dest.path_parts
658 else:
659 new_path_parts = list(self.path_parts)
660 if not query_params:
661 query_params = self.query_params
662
663 ret = self.from_parts(scheme=dest.scheme or self.scheme,
664 host=dest.host or self.host,
665 port=dest.port or self.port,
666 path_parts=new_path_parts,
667 query_params=query_params,
668 fragment=dest.fragment,
669 username=dest.username or self.username,
670 password=dest.password or self.password)
671 ret.normalize()
672 return ret
673
674 def get_authority(self, full_quote=False, with_userinfo=False):
675 """Used by URL schemes that have a network location,
676 :meth:`~URL.get_authority` combines :attr:`username`,
677 :attr:`password`, :attr:`host`, and :attr:`port` into one
678 string, the *authority*, that is used for
679 connecting to a network-accessible resource.
680
681 Used internally by :meth:`~URL.to_text()` and can be useful
682 for labeling connections.
683
684 >>> url = URL('ftp://user@ftp.debian.org:2121/debian/README')
685 >>> print(url.get_authority())
686 ftp.debian.org:2121
687 >>> print(url.get_authority(with_userinfo=True))
688 user@ftp.debian.org:2121
689
690 Args:
691 full_quote (bool): Whether or not to apply IDNA encoding.
692 Defaults to ``False``.
693 with_userinfo (bool): Whether or not to include username
694 and password, technically part of the
695 authority. Defaults to ``False``.
696
697 """
698 parts = []
699 _add = parts.append
700 if self.username and with_userinfo:
701 _add(quote_userinfo_part(self.username))
702 if self.password:
703 _add(':')
704 _add(quote_userinfo_part(self.password))
705 _add('@')
706 if self.host:
707 if self.family == socket.AF_INET6:
708 _add('[')
709 _add(self.host)
710 _add(']')
711 elif full_quote:
712 _add(self.host.encode('idna').decode('ascii'))
713 else:
714 _add(self.host)
715 # TODO: 0 port?
716 if self.port and self.port != self.default_port:
717 _add(':')
718 _add(unicode(self.port))
719 return u''.join(parts)
720
721 def to_text(self, full_quote=False):
722 """Render a string representing the current state of the URL
723 object.
724
725 >>> url = URL('http://listen.hatnote.com')
726 >>> url.fragment = 'en'
727 >>> print(url.to_text())
728 http://listen.hatnote.com#en
729
730 By setting the *full_quote* flag, the URL can either be fully
731 quoted or minimally quoted. The most common characteristic of
732 an encoded-URL is the presence of percent-encoded text (e.g.,
733 %60). Unquoted URLs are more readable and suitable
734 for display, whereas fully-quoted URLs are more conservative
735 and generally necessary for sending over the network.
736 """
737 scheme = self.scheme
738 path = u'/'.join([quote_path_part(p, full_quote=full_quote)
739 for p in self.path_parts])
740 authority = self.get_authority(full_quote=full_quote,
741 with_userinfo=True)
742 query_string = self.query_params.to_text(full_quote=full_quote)
743 fragment = quote_fragment_part(self.fragment, full_quote=full_quote)
744
745 parts = []
746 _add = parts.append
747 if scheme:
748 _add(scheme)
749 _add(':')
750 if authority:
751 _add('//')
752 _add(authority)
753 elif (scheme and path[:2] != '//' and self.uses_netloc):
754 _add('//')
755 if path:
756 if scheme and authority and path[:1] != '/':
757 _add('/')
758 # TODO: i think this is here because relative paths
759 # with absolute authorities = undefined
760 _add(path)
761 if query_string:
762 _add('?')
763 _add(query_string)
764 if fragment:
765 _add('#')
766 _add(fragment)
767 return u''.join(parts)
768
769 def __repr__(self):
770 cn = self.__class__.__name__
771 return u'%s(%r)' % (cn, self.to_text())
772
773 def __str__(self):
774 return self.to_text()
775
776 def __unicode__(self):
777 return self.to_text()
778
779 def __eq__(self, other):
780 for attr in self._cmp_attrs:
781 if not getattr(self, attr) == getattr(other, attr, None):
782 return False
783 return True
784
785 def __ne__(self, other):
786 return not self == other
787
788
789 try:
790 from socket import inet_pton
791 except ImportError:
792 # from https://gist.github.com/nnemkin/4966028
793 import ctypes
794
795 class _sockaddr(ctypes.Structure):
796 _fields_ = [("sa_family", ctypes.c_short),
797 ("__pad1", ctypes.c_ushort),
798 ("ipv4_addr", ctypes.c_byte * 4),
799 ("ipv6_addr", ctypes.c_byte * 16),
800 ("__pad2", ctypes.c_ulong)]
801
802 WSAStringToAddressA = ctypes.windll.ws2_32.WSAStringToAddressA
803 WSAAddressToStringA = ctypes.windll.ws2_32.WSAAddressToStringA
804
805 def inet_pton(address_family, ip_string):
806 addr = _sockaddr()
807 ip_string = ip_string.encode('ascii')
808 addr.sa_family = address_family
809 addr_size = ctypes.c_int(ctypes.sizeof(addr))
810
811 if WSAStringToAddressA(ip_string, address_family, None, ctypes.byref(addr), ctypes.byref(addr_size)) != 0:
812 raise socket.error(ctypes.FormatError())
813
814 if address_family == socket.AF_INET:
815 return ctypes.string_at(addr.ipv4_addr, 4)
816 if address_family == socket.AF_INET6:
817 return ctypes.string_at(addr.ipv6_addr, 16)
818 raise socket.error('unknown address family')
819
820
821 def parse_host(host):
822 """\
823 Low-level function used to parse the host portion of a URL.
824
825 Returns a tuple of (family, host) where *family* is a
826 :mod:`socket` module constant or ``None``, and host is a string.
827
828 >>> parse_host('googlewebsite.com') == (None, 'googlewebsite.com')
829 True
830 >>> parse_host('[::1]') == (socket.AF_INET6, '::1')
831 True
832 >>> parse_host('192.168.1.1') == (socket.AF_INET, '192.168.1.1')
833 True
834
835 Odd doctest formatting above due to py3's switch from int to enums
836 for :mod:`socket` constants.
837
838 """
839 if not host:
840 return None, u''
841 if u':' in host and u'[' == host[0] and u']' == host[-1]:
842 host = host[1:-1]
843 try:
844 inet_pton(socket.AF_INET6, host)
845 except socket.error as se:
846 raise URLParseError('invalid IPv6 host: %r (%r)' % (host, se))
847 except UnicodeEncodeError:
848 pass # TODO: this can't be a real host right?
849 else:
850 family = socket.AF_INET6
851 return family, host
852 try:
853 inet_pton(socket.AF_INET, host)
854 except (socket.error, UnicodeEncodeError):
855 family = None # not an IP
856 else:
857 family = socket.AF_INET
858 return family, host
859
860
861 def parse_url(url_text):
862 """\
863 Used to parse the text for a single URL into a dictionary, used
864 internally by the :class:`URL` type.
865
866 Note that "URL" has a very narrow, standards-based
867 definition. While :func:`parse_url` may raise
868 :class:`URLParseError` under a very limited number of conditions,
869 such as non-integer port, a surprising number of strings are
870 technically valid URLs. For instance, the text ``"url"`` is a
871 valid URL, because it is a relative path.
872
873 In short, do not expect this function to validate form inputs or
874 other more colloquial usages of URLs.
875
876 >>> res = parse_url('http://127.0.0.1:3000/?a=1')
877 >>> sorted(res.keys()) # res is a basic dictionary
878 ['_netloc_sep', 'authority', 'family', 'fragment', 'host', 'password', 'path', 'port', 'query', 'scheme', 'username']
879 """
880 url_text = unicode(url_text)
881 # raise TypeError('parse_url expected text, not %r' % url_str)
882 um = _URL_RE.match(url_text)
883 try:
884 gs = um.groupdict()
885 except AttributeError:
886 raise URLParseError('could not parse url: %r' % url_text)
887
888 au_text = gs['authority']
889 user, pw, hostinfo = None, None, au_text
890
891 if au_text:
892 userinfo, sep, hostinfo = au_text.rpartition('@')
893 if sep:
894 # TODO: empty userinfo error?
895 user, _, pw = userinfo.partition(':')
896
897 host, port = None, None
898 if hostinfo:
899 host, sep, port_str = hostinfo.partition(u':')
900 if sep:
901 if host and host[0] == u'[' and u']' in port_str:
902 host_right, _, port_str = port_str.partition(u']')
903 host = host + u':' + host_right + u']'
904 if port_str and port_str[0] == u':':
905 port_str = port_str[1:]
906
907 try:
908 port = int(port_str)
909 except ValueError:
910 if port_str: # empty ports ok according to RFC 3986 6.2.3
911 raise URLParseError('expected integer for port, not %r'
912 % port_str)
913 port = None
914
915 family, host = parse_host(host)
916
917 gs['username'] = user
918 gs['password'] = pw
919 gs['family'] = family
920 gs['host'] = host
921 gs['port'] = port
922 return gs
923
924
925 DEFAULT_PARSED_URL = parse_url('')
926
927
928 def parse_qsl(qs, keep_blank_values=True, encoding=DEFAULT_ENCODING):
929 """
930 Converts a query string into a list of (key, value) pairs.
931 """
932 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
933 ret = []
934 for pair in pairs:
935 if not pair:
936 continue
937 key, _, value = pair.partition('=')
938 if not value:
939 if keep_blank_values:
940 value = None
941 else:
942 continue
943 key = unquote(key.replace('+', ' '))
944 if value:
945 value = unquote(value.replace('+', ' '))
946 ret.append((key, value))
947 return ret
948
949
950 """
951 # What follows is the OrderedMultiDict from dictutils.py, circa
952 # 20161021, used for the QueryParamDict, toward the bottom.
953 """
954
955 try:
956 from collections.abc import KeysView, ValuesView, ItemsView
957 except ImportError:
958 from collections import KeysView, ValuesView, ItemsView
959
960 try:
961 from itertools import izip_longest
962 except ImportError:
963 from itertools import zip_longest as izip_longest
964
965 try:
966 from typeutils import make_sentinel
967 _MISSING = make_sentinel(var_name='_MISSING')
968 except ImportError:
969 _MISSING = object()
970
971
972 PREV, NEXT, KEY, VALUE, SPREV, SNEXT = range(6)
973
974
975 class OrderedMultiDict(dict):
976 """A MultiDict is a dictionary that can have multiple values per key
977 and the OrderedMultiDict (OMD) is a MultiDict that retains
978 original insertion order. Common use cases include:
979
980 * handling query strings parsed from URLs
981 * inverting a dictionary to create a reverse index (values to keys)
982 * stacking data from multiple dictionaries in a non-destructive way
983
984 The OrderedMultiDict constructor is identical to the built-in
985 :class:`dict`, and overall the API is constitutes an intuitive
986 superset of the built-in type:
987
988 >>> omd = OrderedMultiDict()
989 >>> omd['a'] = 1
990 >>> omd['b'] = 2
991 >>> omd.add('a', 3)
992 >>> omd.get('a')
993 3
994 >>> omd.getlist('a')
995 [1, 3]
996
997 Some non-:class:`dict`-like behaviors also make an appearance,
998 such as support for :func:`reversed`:
999
1000 >>> list(reversed(omd))
1001 ['b', 'a']
1002
1003 Note that unlike some other MultiDicts, this OMD gives precedence
1004 to the most recent value added. ``omd['a']`` refers to ``3``, not
1005 ``1``.
1006
1007 >>> omd
1008 OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)])
1009 >>> omd.poplast('a')
1010 3
1011 >>> omd
1012 OrderedMultiDict([('a', 1), ('b', 2)])
1013 >>> omd.pop('a')
1014 1
1015 >>> omd
1016 OrderedMultiDict([('b', 2)])
1017
1018 Note that calling :func:`dict` on an OMD results in a dict of keys
1019 to *lists* of values:
1020
1021 >>> from pprint import pprint as pp # ensuring proper key ordering
1022 >>> omd = OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)])
1023 >>> pp(dict(omd))
1024 {'a': 3, 'b': 2}
1025
1026 Note that modifying those lists will modify the OMD. If you want a
1027 safe-to-modify or flat dictionary, use :meth:`OrderedMultiDict.todict()`.
1028
1029 >>> pp(omd.todict())
1030 {'a': 3, 'b': 2}
1031 >>> pp(omd.todict(multi=True))
1032 {'a': [1, 3], 'b': [2]}
1033
1034 With ``multi=False``, items appear with the keys in to original
1035 insertion order, alongside the most-recently inserted value for
1036 that key.
1037
1038 >>> OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]).items(multi=False)
1039 [('a', 3), ('b', 2)]
1040
1041 """
1042 def __init__(self, *args, **kwargs):
1043 if len(args) > 1:
1044 raise TypeError('%s expected at most 1 argument, got %s'
1045 % (self.__class__.__name__, len(args)))
1046 super(OrderedMultiDict, self).__init__()
1047
1048 self._clear_ll()
1049 if args:
1050 self.update_extend(args[0])
1051 if kwargs:
1052 self.update(kwargs)
1053
1054 def _clear_ll(self):
1055 try:
1056 _map = self._map
1057 except AttributeError:
1058 _map = self._map = {}
1059 self.root = []
1060 _map.clear()
1061 self.root[:] = [self.root, self.root, None]
1062
1063 def _insert(self, k, v):
1064 root = self.root
1065 cells = self._map.setdefault(k, [])
1066 last = root[PREV]
1067 cell = [last, root, k, v]
1068 last[NEXT] = root[PREV] = cell
1069 cells.append(cell)
1070
1071 def add(self, k, v):
1072 """Add a single value *v* under a key *k*. Existing values under *k*
1073 are preserved.
1074 """
1075 values = super(OrderedMultiDict, self).setdefault(k, [])
1076 self._insert(k, v)
1077 values.append(v)
1078
1079 def addlist(self, k, v):
1080 """Add an iterable of values underneath a specific key, preserving
1081 any values already under that key.
1082
1083 >>> omd = OrderedMultiDict([('a', -1)])
1084 >>> omd.addlist('a', range(3))
1085 >>> omd
1086 OrderedMultiDict([('a', -1), ('a', 0), ('a', 1), ('a', 2)])
1087
1088 Called ``addlist`` for consistency with :meth:`getlist`, but
1089 tuples and other sequences and iterables work.
1090 """
1091 self_insert = self._insert
1092 values = super(OrderedMultiDict, self).setdefault(k, [])
1093 for subv in v:
1094 self_insert(k, subv)
1095 values.extend(v)
1096
1097 def get(self, k, default=None):
1098 """Return the value for key *k* if present in the dictionary, else
1099 *default*. If *default* is not given, ``None`` is returned.
1100 This method never raises a :exc:`KeyError`.
1101
1102 To get all values under a key, use :meth:`OrderedMultiDict.getlist`.
1103 """
1104 return super(OrderedMultiDict, self).get(k, [default])[-1]
1105
1106 def getlist(self, k, default=_MISSING):
1107 """Get all values for key *k* as a list, if *k* is in the
1108 dictionary, else *default*. The list returned is a copy and
1109 can be safely mutated. If *default* is not given, an empty
1110 :class:`list` is returned.
1111 """
1112 try:
1113 return super(OrderedMultiDict, self).__getitem__(k)[:]
1114 except KeyError:
1115 if default is _MISSING:
1116 return []
1117 return default
1118
1119 def clear(self):
1120 "Empty the dictionary."
1121 super(OrderedMultiDict, self).clear()
1122 self._clear_ll()
1123
1124 def setdefault(self, k, default=_MISSING):
1125 """If key *k* is in the dictionary, return its value. If not, insert
1126 *k* with a value of *default* and return *default*. *default*
1127 defaults to ``None``. See :meth:`dict.setdefault` for more
1128 information.
1129 """
1130 if not super(OrderedMultiDict, self).__contains__(k):
1131 self[k] = None if default is _MISSING else default
1132 return self[k]
1133
1134 def copy(self):
1135 "Return a shallow copy of the dictionary."
1136 return self.__class__(self.iteritems(multi=True))
1137
1138 @classmethod
1139 def fromkeys(cls, keys, default=None):
1140 """Create a dictionary from a list of keys, with all the values
1141 set to *default*, or ``None`` if *default* is not set.
1142 """
1143 return cls([(k, default) for k in keys])
1144
1145 def update(self, E, **F):
1146 """Add items from a dictionary or iterable (and/or keyword arguments),
1147 overwriting values under an existing key. See
1148 :meth:`dict.update` for more details.
1149 """
1150 # E and F are throwback names to the dict() __doc__
1151 if E is self:
1152 return
1153 self_add = self.add
1154 if isinstance(E, OrderedMultiDict):
1155 for k in E:
1156 if k in self:
1157 del self[k]
1158 for k, v in E.iteritems(multi=True):
1159 self_add(k, v)
1160 elif hasattr(E, 'keys'):
1161 for k in E.keys():
1162 self[k] = E[k]
1163 else:
1164 seen = set()
1165 seen_add = seen.add
1166 for k, v in E:
1167 if k not in seen and k in self:
1168 del self[k]
1169 seen_add(k)
1170 self_add(k, v)
1171 for k in F:
1172 self[k] = F[k]
1173 return
1174
1175 def update_extend(self, E, **F):
1176 """Add items from a dictionary, iterable, and/or keyword
1177 arguments without overwriting existing items present in the
1178 dictionary. Like :meth:`update`, but adds to existing keys
1179 instead of overwriting them.
1180 """
1181 if E is self:
1182 iterator = iter(E.items())
1183 elif isinstance(E, OrderedMultiDict):
1184 iterator = E.iteritems(multi=True)
1185 elif hasattr(E, 'keys'):
1186 iterator = ((k, E[k]) for k in E.keys())
1187 else:
1188 iterator = E
1189
1190 self_add = self.add
1191 for k, v in iterator:
1192 self_add(k, v)
1193
1194 def __setitem__(self, k, v):
1195 if super(OrderedMultiDict, self).__contains__(k):
1196 self._remove_all(k)
1197 self._insert(k, v)
1198 super(OrderedMultiDict, self).__setitem__(k, [v])
1199
1200 def __getitem__(self, k):
1201 return super(OrderedMultiDict, self).__getitem__(k)[-1]
1202
1203 def __delitem__(self, k):
1204 super(OrderedMultiDict, self).__delitem__(k)
1205 self._remove_all(k)
1206
1207 def __eq__(self, other):
1208 if self is other:
1209 return True
1210 try:
1211 if len(other) != len(self):
1212 return False
1213 except TypeError:
1214 return False
1215 if isinstance(other, OrderedMultiDict):
1216 selfi = self.iteritems(multi=True)
1217 otheri = other.iteritems(multi=True)
1218 zipped_items = izip_longest(selfi, otheri, fillvalue=(None, None))
1219 for (selfk, selfv), (otherk, otherv) in zipped_items:
1220 if selfk != otherk or selfv != otherv:
1221 return False
1222 if not(next(selfi, _MISSING) is _MISSING
1223 and next(otheri, _MISSING) is _MISSING):
1224 # leftovers (TODO: watch for StopIteration?)
1225 return False
1226 return True
1227 elif hasattr(other, 'keys'):
1228 for selfk in self:
1229 try:
1230 other[selfk] == self[selfk]
1231 except KeyError:
1232 return False
1233 return True
1234 return False
1235
1236 def __ne__(self, other):
1237 return not (self == other)
1238
1239 def pop(self, k, default=_MISSING):
1240 """Remove all values under key *k*, returning the most-recently
1241 inserted value. Raises :exc:`KeyError` if the key is not
1242 present and no *default* is provided.
1243 """
1244 try:
1245 return self.popall(k)[-1]
1246 except KeyError:
1247 if default is _MISSING:
1248 raise KeyError(k)
1249 return default
1250
1251 def popall(self, k, default=_MISSING):
1252 """Remove all values under key *k*, returning them in the form of
1253 a list. Raises :exc:`KeyError` if the key is not present and no
1254 *default* is provided.
1255 """
1256 super_self = super(OrderedMultiDict, self)
1257 if super_self.__contains__(k):
1258 self._remove_all(k)
1259 if default is _MISSING:
1260 return super_self.pop(k)
1261 return super_self.pop(k, default)
1262
1263 def poplast(self, k=_MISSING, default=_MISSING):
1264 """Remove and return the most-recently inserted value under the key
1265 *k*, or the most-recently inserted key if *k* is not
1266 provided. If no values remain under *k*, it will be removed
1267 from the OMD. Raises :exc:`KeyError` if *k* is not present in
1268 the dictionary, or the dictionary is empty.
1269 """
1270 if k is _MISSING:
1271 if self:
1272 k = self.root[PREV][KEY]
1273 else:
1274 raise KeyError('empty %r' % type(self))
1275 try:
1276 self._remove(k)
1277 except KeyError:
1278 if default is _MISSING:
1279 raise KeyError(k)
1280 return default
1281 values = super(OrderedMultiDict, self).__getitem__(k)
1282 v = values.pop()
1283 if not values:
1284 super(OrderedMultiDict, self).__delitem__(k)
1285 return v
1286
1287 def _remove(self, k):
1288 values = self._map[k]
1289 cell = values.pop()
1290 cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV]
1291 if not values:
1292 del self._map[k]
1293
1294 def _remove_all(self, k):
1295 values = self._map[k]
1296 while values:
1297 cell = values.pop()
1298 cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV]
1299 del self._map[k]
1300
1301 def iteritems(self, multi=False):
1302 """Iterate over the OMD's items in insertion order. By default,
1303 yields only the most-recently inserted value for each key. Set
1304 *multi* to ``True`` to get all inserted items.
1305 """
1306 root = self.root
1307 curr = root[NEXT]
1308 if multi:
1309 while curr is not root:
1310 yield curr[KEY], curr[VALUE]
1311 curr = curr[NEXT]
1312 else:
1313 for key in self.iterkeys():
1314 yield key, self[key]
1315
1316 def iterkeys(self, multi=False):
1317 """Iterate over the OMD's keys in insertion order. By default, yields
1318 each key once, according to the most recent insertion. Set
1319 *multi* to ``True`` to get all keys, including duplicates, in
1320 insertion order.
1321 """
1322 root = self.root
1323 curr = root[NEXT]
1324 if multi:
1325 while curr is not root:
1326 yield curr[KEY]
1327 curr = curr[NEXT]
1328 else:
1329 yielded = set()
1330 yielded_add = yielded.add
1331 while curr is not root:
1332 k = curr[KEY]
1333 if k not in yielded:
1334 yielded_add(k)
1335 yield k
1336 curr = curr[NEXT]
1337
1338 def itervalues(self, multi=False):
1339 """Iterate over the OMD's values in insertion order. By default,
1340 yields the most-recently inserted value per unique key. Set
1341 *multi* to ``True`` to get all values according to insertion
1342 order.
1343 """
1344 for k, v in self.iteritems(multi=multi):
1345 yield v
1346
1347 def todict(self, multi=False):
1348 """Gets a basic :class:`dict` of the items in this dictionary. Keys
1349 are the same as the OMD, values are the most recently inserted
1350 values for each key.
1351
1352 Setting the *multi* arg to ``True`` is yields the same
1353 result as calling :class:`dict` on the OMD, except that all the
1354 value lists are copies that can be safely mutated.
1355 """
1356 if multi:
1357 return dict([(k, self.getlist(k)) for k in self])
1358 return dict([(k, self[k]) for k in self])
1359
1360 def sorted(self, key=None, reverse=False):
1361 """Similar to the built-in :func:`sorted`, except this method returns
1362 a new :class:`OrderedMultiDict` sorted by the provided key
1363 function, optionally reversed.
1364
1365 Args:
1366 key (callable): A callable to determine the sort key of
1367 each element. The callable should expect an **item**
1368 (key-value pair tuple).
1369 reverse (bool): Set to ``True`` to reverse the ordering.
1370
1371 >>> omd = OrderedMultiDict(zip(range(3), range(3)))
1372 >>> omd.sorted(reverse=True)
1373 OrderedMultiDict([(2, 2), (1, 1), (0, 0)])
1374
1375 Note that the key function receives an **item** (key-value
1376 tuple), so the recommended signature looks like:
1377
1378 >>> omd = OrderedMultiDict(zip('hello', 'world'))
1379 >>> omd.sorted(key=lambda i: i[1]) # i[0] is the key, i[1] is the val
1380 OrderedMultiDict([('o', 'd'), ('l', 'l'), ('e', 'o'), ('l', 'r'), ('h', 'w')])
1381 """
1382 cls = self.__class__
1383 return cls(sorted(self.iteritems(), key=key, reverse=reverse))
1384
1385 def sortedvalues(self, key=None, reverse=False):
1386 """Returns a copy of the :class:`OrderedMultiDict` with the same keys
1387 in the same order as the original OMD, but the values within
1388 each keyspace have been sorted according to *key* and
1389 *reverse*.
1390
1391 Args:
1392 key (callable): A single-argument callable to determine
1393 the sort key of each element. The callable should expect
1394 an **item** (key-value pair tuple).
1395 reverse (bool): Set to ``True`` to reverse the ordering.
1396
1397 >>> omd = OrderedMultiDict()
1398 >>> omd.addlist('even', [6, 2])
1399 >>> omd.addlist('odd', [1, 5])
1400 >>> omd.add('even', 4)
1401 >>> omd.add('odd', 3)
1402 >>> somd = omd.sortedvalues()
1403 >>> somd.getlist('even')
1404 [2, 4, 6]
1405 >>> somd.keys(multi=True) == omd.keys(multi=True)
1406 True
1407 >>> omd == somd
1408 False
1409 >>> somd
1410 OrderedMultiDict([('even', 2), ('even', 4), ('odd', 1), ('odd', 3), ('even', 6), ('odd', 5)])
1411
1412 As demonstrated above, contents and key order are
1413 retained. Only value order changes.
1414 """
1415 try:
1416 superself_iteritems = super(OrderedMultiDict, self).iteritems()
1417 except AttributeError:
1418 superself_iteritems = super(OrderedMultiDict, self).items()
1419 # (not reverse) because they pop off in reverse order for reinsertion
1420 sorted_val_map = dict([(k, sorted(v, key=key, reverse=(not reverse)))
1421 for k, v in superself_iteritems])
1422 ret = self.__class__()
1423 for k in self.iterkeys(multi=True):
1424 ret.add(k, sorted_val_map[k].pop())
1425 return ret
1426
1427 def inverted(self):
1428 """Returns a new :class:`OrderedMultiDict` with values and keys
1429 swapped, like creating dictionary transposition or reverse
1430 index. Insertion order is retained and all keys and values
1431 are represented in the output.
1432
1433 >>> omd = OMD([(0, 2), (1, 2)])
1434 >>> omd.inverted().getlist(2)
1435 [0, 1]
1436
1437 Inverting twice yields a copy of the original:
1438
1439 >>> omd.inverted().inverted()
1440 OrderedMultiDict([(0, 2), (1, 2)])
1441 """
1442 return self.__class__((v, k) for k, v in self.iteritems(multi=True))
1443
1444 def counts(self):
1445 """Returns a mapping from key to number of values inserted under that
1446 key. Like :py:class:`collections.Counter`, but returns a new
1447 :class:`OrderedMultiDict`.
1448 """
1449 # Returns an OMD because Counter/OrderedDict may not be
1450 # available, and neither Counter nor dict maintain order.
1451 super_getitem = super(OrderedMultiDict, self).__getitem__
1452 return self.__class__((k, len(super_getitem(k))) for k in self)
1453
1454 def keys(self, multi=False):
1455 """Returns a list containing the output of :meth:`iterkeys`. See
1456 that method's docs for more details.
1457 """
1458 return list(self.iterkeys(multi=multi))
1459
1460 def values(self, multi=False):
1461 """Returns a list containing the output of :meth:`itervalues`. See
1462 that method's docs for more details.
1463 """
1464 return list(self.itervalues(multi=multi))
1465
1466 def items(self, multi=False):
1467 """Returns a list containing the output of :meth:`iteritems`. See
1468 that method's docs for more details.
1469 """
1470 return list(self.iteritems(multi=multi))
1471
1472 def __iter__(self):
1473 return self.iterkeys()
1474
1475 def __reversed__(self):
1476 root = self.root
1477 curr = root[PREV]
1478 lengths = {}
1479 lengths_sd = lengths.setdefault
1480 get_values = super(OrderedMultiDict, self).__getitem__
1481 while curr is not root:
1482 k = curr[KEY]
1483 vals = get_values(k)
1484 if lengths_sd(k, 1) == len(vals):
1485 yield k
1486 lengths[k] += 1
1487 curr = curr[PREV]
1488
1489 def __repr__(self):
1490 cn = self.__class__.__name__
1491 kvs = ', '.join([repr((k, v)) for k, v in self.iteritems(multi=True)])
1492 return '%s([%s])' % (cn, kvs)
1493
1494 def viewkeys(self):
1495 "OMD.viewkeys() -> a set-like object providing a view on OMD's keys"
1496 return KeysView(self)
1497
1498 def viewvalues(self):
1499 "OMD.viewvalues() -> an object providing a view on OMD's values"
1500 return ValuesView(self)
1501
1502 def viewitems(self):
1503 "OMD.viewitems() -> a set-like object providing a view on OMD's items"
1504 return ItemsView(self)
1505
1506
1507 try:
1508 # try to import the built-in one anyways
1509 from boltons.dictutils import OrderedMultiDict
1510 except ImportError:
1511 pass
1512
1513 OMD = OrderedMultiDict
1514
1515
1516 class QueryParamDict(OrderedMultiDict):
1517 """A subclass of :class:`~dictutils.OrderedMultiDict` specialized for
1518 representing query string values. Everything is fully unquoted on
1519 load and all parsed keys and values are strings by default.
1520
1521 As the name suggests, multiple values are supported and insertion
1522 order is preserved.
1523
1524 >>> qp = QueryParamDict.from_text(u'key=val1&key=val2&utm_source=rtd')
1525 >>> qp.getlist('key')
1526 [u'val1', u'val2']
1527 >>> qp['key']
1528 u'val2'
1529 >>> qp.add('key', 'val3')
1530 >>> qp.to_text()
1531 'key=val1&key=val2&utm_source=rtd&key=val3'
1532
1533 See :class:`~dictutils.OrderedMultiDict` for more API features.
1534 """
1535
1536 @classmethod
1537 def from_text(cls, query_string):
1538 """
1539 Parse *query_string* and return a new :class:`QueryParamDict`.
1540 """
1541 pairs = parse_qsl(query_string, keep_blank_values=True)
1542 return cls(pairs)
1543
1544 def to_text(self, full_quote=False):
1545 """
1546 Render and return a query string.
1547
1548 Args:
1549 full_quote (bool): Whether or not to percent-quote special
1550 characters or leave them decoded for readability.
1551 """
1552 ret_list = []
1553 for k, v in self.iteritems(multi=True):
1554 key = quote_query_part(to_unicode(k), full_quote=full_quote)
1555 if v is None:
1556 ret_list.append(key)
1557 else:
1558 val = quote_query_part(to_unicode(v), full_quote=full_quote)
1559 ret_list.append(u'='.join((key, val)))
1560 return u'&'.join(ret_list)
1561
1562 # TODO: cleanup OMD/cachedproperty etc.?
1563
1564 # end urlutils.py