Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/boltons/urlutils.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """:mod:`urlutils` is a module dedicated to one of software's most | |
3 versatile, well-aged, and beloved data structures: the URL, also known | |
4 as the `Uniform Resource Locator`_. | |
5 | |
6 Among other things, this module is a full reimplementation of URLs, | |
7 without any reliance on the :mod:`urlparse` or :mod:`urllib` standard | |
8 library modules. The centerpiece and top-level interface of urlutils | |
9 is the :class:`URL` type. Also featured is the :func:`find_all_links` | |
10 convenience function. Some low-level functions and constants are also | |
11 below. | |
12 | |
13 The implementations in this module are based heavily on `RFC 3986`_ and | |
14 `RFC 3987`_, and incorporates details from several other RFCs and `W3C | |
15 documents`_. | |
16 | |
17 .. _Uniform Resource Locator: https://en.wikipedia.org/wiki/Uniform_Resource_Locator | |
18 .. _RFC 3986: https://tools.ietf.org/html/rfc3986 | |
19 .. _RFC 3987: https://tools.ietf.org/html/rfc3987 | |
20 .. _W3C documents: https://www.w3.org/TR/uri-clarification/ | |
21 | |
22 """ | |
23 | |
24 import re | |
25 import socket | |
26 import string | |
27 from unicodedata import normalize | |
28 | |
29 unicode = type(u'') | |
30 try: | |
31 unichr | |
32 except NameError: | |
33 unichr = chr | |
34 | |
35 # The unreserved URI characters (per RFC 3986 Section 2.3) | |
36 _UNRESERVED_CHARS = frozenset('~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
37 'abcdefghijklmnopqrstuvwxyz') | |
38 | |
39 # URL parsing regex (based on RFC 3986 Appendix B, with modifications) | |
40 _URL_RE = re.compile(r'^((?P<scheme>[^:/?#]+):)?' | |
41 r'((?P<_netloc_sep>//)(?P<authority>[^/?#]*))?' | |
42 r'(?P<path>[^?#]*)' | |
43 r'(\?(?P<query>[^#]*))?' | |
44 r'(#(?P<fragment>.*))?') | |
45 | |
46 | |
47 _HEX_CHAR_MAP = dict([((a + b).encode('ascii'), | |
48 unichr(int(a + b, 16)).encode('charmap')) | |
49 for a in string.hexdigits for b in string.hexdigits]) | |
50 _ASCII_RE = re.compile('([\x00-\x7f]+)') | |
51 | |
52 | |
53 # This port list painstakingly curated by hand searching through | |
54 # https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml | |
55 # and | |
56 # https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml | |
57 SCHEME_PORT_MAP = {'acap': 674, 'afp': 548, 'dict': 2628, 'dns': 53, | |
58 'file': None, 'ftp': 21, 'git': 9418, 'gopher': 70, | |
59 'http': 80, 'https': 443, 'imap': 143, 'ipp': 631, | |
60 'ipps': 631, 'irc': 194, 'ircs': 6697, 'ldap': 389, | |
61 'ldaps': 636, 'mms': 1755, 'msrp': 2855, 'msrps': None, | |
62 'mtqp': 1038, 'nfs': 111, 'nntp': 119, 'nntps': 563, | |
63 'pop': 110, 'prospero': 1525, 'redis': 6379, 'rsync': 873, | |
64 'rtsp': 554, 'rtsps': 322, 'rtspu': 5005, 'sftp': 22, | |
65 'smb': 445, 'snmp': 161, 'ssh': 22, 'steam': None, | |
66 'svn': 3690, 'telnet': 23, 'ventrilo': 3784, 'vnc': 5900, | |
67 'wais': 210, 'ws': 80, 'wss': 443, 'xmpp': None} | |
68 | |
69 # This list of schemes that don't use authorities is also from the link above. | |
70 NO_NETLOC_SCHEMES = set(['urn', 'about', 'bitcoin', 'blob', 'data', 'geo', | |
71 'magnet', 'mailto', 'news', 'pkcs11', | |
72 'sip', 'sips', 'tel']) | |
73 # As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc | |
74 | |
75 # RFC 3986 section 2.2, Reserved Characters | |
76 _GEN_DELIMS = frozenset(u':/?#[]@') | |
77 _SUB_DELIMS = frozenset(u"!$&'()*+,;=") | |
78 _ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS | |
79 | |
80 _USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | |
81 _USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE | |
82 _PATH_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u':@') | |
83 _PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE | |
84 _FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u'/?') | |
85 _FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE | |
86 _QUERY_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u'&=+') | |
87 _QUERY_DELIMS = _ALL_DELIMS - _QUERY_SAFE | |
88 | |
89 | |
90 class URLParseError(ValueError): | |
91 """Exception inheriting from :exc:`ValueError`, raised when failing to | |
92 parse a URL. Mostly raised on invalid ports and IPv6 addresses. | |
93 """ | |
94 pass | |
95 | |
96 | |
97 DEFAULT_ENCODING = 'utf8' | |
98 | |
99 | |
100 def to_unicode(obj): | |
101 try: | |
102 return unicode(obj) | |
103 except UnicodeDecodeError: | |
104 return unicode(obj, encoding=DEFAULT_ENCODING) | |
105 | |
106 | |
107 # regex from gruber via tornado | |
108 # doesn't support ipv6 | |
109 # doesn't support mailto (netloc-less schemes) | |
110 _FIND_ALL_URL_RE = re.compile(to_unicode(r"""\b((?:([\w-]+):(/{1,3})|www[.])(?:(?:(?:[^\s&()<>]|&|")*(?:[^!"#$%'()*+,.:;<=>?@\[\]^`{|}~\s]))|(?:\((?:[^\s&()]|&|")*\)))+)""")) | |
111 | |
112 | |
113 def find_all_links(text, with_text=False, default_scheme='https', schemes=()): | |
114 """This function uses heuristics to searches plain text for strings | |
115 that look like URLs, returning a :class:`list` of :class:`URL` | |
116 objects. It supports limiting the accepted schemes, and returning | |
117 interleaved text as well. | |
118 | |
119 >>> find_all_links('Visit https://boltons.rtfd.org!') | |
120 [URL(u'https://boltons.rtfd.org')] | |
121 >>> find_all_links('Visit https://boltons.rtfd.org!', with_text=True) | |
122 [u'Visit ', URL(u'https://boltons.rtfd.org'), u'!'] | |
123 | |
124 Args: | |
125 text (str): The text to search. | |
126 | |
127 with_text (bool): Whether or not to interleave plaintext blocks | |
128 with the returned URL objects. Having all tokens can be | |
129 useful for transforming the text, e.g., replacing links with | |
130 HTML equivalents. Defaults to ``False``. | |
131 | |
132 default_scheme (str): Many URLs are written without the scheme | |
133 component. This function can match a reasonable subset of | |
134 those, provided *default_scheme* is set to a string. Set to | |
135 ``False`` to disable matching scheme-less URLs. Defaults to | |
136 ``'https'``. | |
137 | |
138 schemes (list): A list of strings that a URL's scheme must | |
139 match in order to be included in the results. Defaults to | |
140 empty, which matches all schemes. | |
141 | |
142 .. note:: Currently this function does not support finding IPv6 | |
143 addresses or URLs with netloc-less schemes, like mailto. | |
144 | |
145 """ | |
146 text = to_unicode(text) | |
147 prev_end, start, end = 0, None, None | |
148 ret = [] | |
149 _add = ret.append | |
150 | |
151 def _add_text(t): | |
152 if ret and isinstance(ret[-1], unicode): | |
153 ret[-1] += t | |
154 else: | |
155 _add(t) | |
156 | |
157 for match in _FIND_ALL_URL_RE.finditer(text): | |
158 start, end = match.start(1), match.end(1) | |
159 if prev_end < start and with_text: | |
160 _add(text[prev_end:start]) | |
161 prev_end = end | |
162 try: | |
163 cur_url_text = match.group(0) | |
164 cur_url = URL(cur_url_text) | |
165 if not cur_url.scheme: | |
166 if default_scheme: | |
167 cur_url = URL(default_scheme + '://' + cur_url_text) | |
168 else: | |
169 _add_text(text[start:end]) | |
170 continue | |
171 if schemes and cur_url.scheme not in schemes: | |
172 _add_text(text[start:end]) | |
173 else: | |
174 _add(cur_url) | |
175 except URLParseError: | |
176 # currently this should only be hit with broken port | |
177 # strings. the regex above doesn't support ipv6 addresses | |
178 if with_text: | |
179 _add_text(text[start:end]) | |
180 | |
181 if with_text: | |
182 tail = text[prev_end:] | |
183 if tail: | |
184 _add_text(tail) | |
185 | |
186 return ret | |
187 | |
188 | |
189 def _make_quote_map(safe_chars): | |
190 ret = {} | |
191 # v is included in the dict for py3 mostly, because bytestrings | |
192 # are iterables of ints, of course! | |
193 for i, v in zip(range(256), range(256)): | |
194 c = chr(v) | |
195 if c in safe_chars: | |
196 ret[c] = ret[v] = c | |
197 else: | |
198 ret[c] = ret[v] = '%{0:02X}'.format(i) | |
199 return ret | |
200 | |
201 | |
202 _USERINFO_PART_QUOTE_MAP = _make_quote_map(_USERINFO_SAFE) | |
203 _PATH_PART_QUOTE_MAP = _make_quote_map(_PATH_SAFE) | |
204 _QUERY_PART_QUOTE_MAP = _make_quote_map(_QUERY_SAFE) | |
205 _FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE) | |
206 | |
207 | |
208 def quote_path_part(text, full_quote=True): | |
209 """ | |
210 Percent-encode a single segment of a URL path. | |
211 """ | |
212 if full_quote: | |
213 bytestr = normalize('NFC', to_unicode(text)).encode('utf8') | |
214 return u''.join([_PATH_PART_QUOTE_MAP[b] for b in bytestr]) | |
215 return u''.join([_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t | |
216 for t in text]) | |
217 | |
218 | |
219 def quote_query_part(text, full_quote=True): | |
220 """ | |
221 Percent-encode a single query string key or value. | |
222 """ | |
223 if full_quote: | |
224 bytestr = normalize('NFC', to_unicode(text)).encode('utf8') | |
225 return u''.join([_QUERY_PART_QUOTE_MAP[b] for b in bytestr]) | |
226 return u''.join([_QUERY_PART_QUOTE_MAP[t] if t in _QUERY_DELIMS else t | |
227 for t in text]) | |
228 | |
229 | |
230 def quote_fragment_part(text, full_quote=True): | |
231 """Quote the fragment part of the URL. Fragments don't have | |
232 subdelimiters, so the whole URL fragment can be passed. | |
233 """ | |
234 if full_quote: | |
235 bytestr = normalize('NFC', to_unicode(text)).encode('utf8') | |
236 return u''.join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr]) | |
237 return u''.join([_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t | |
238 for t in text]) | |
239 | |
240 | |
241 def quote_userinfo_part(text, full_quote=True): | |
242 """Quote special characters in either the username or password | |
243 section of the URL. Note that userinfo in URLs is considered | |
244 deprecated in many circles (especially browsers), and support for | |
245 percent-encoded userinfo can be spotty. | |
246 """ | |
247 if full_quote: | |
248 bytestr = normalize('NFC', to_unicode(text)).encode('utf8') | |
249 return u''.join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr]) | |
250 return u''.join([_USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS | |
251 else t for t in text]) | |
252 | |
253 | |
254 def unquote(string, encoding='utf-8', errors='replace'): | |
255 """Percent-decode a string, by replacing %xx escapes with their | |
256 single-character equivalent. The optional *encoding* and *errors* | |
257 parameters specify how to decode percent-encoded sequences into | |
258 Unicode characters, as accepted by the :meth:`bytes.decode()` method. By | |
259 default, percent-encoded sequences are decoded with UTF-8, and | |
260 invalid sequences are replaced by a placeholder character. | |
261 | |
262 >>> unquote(u'abc%20def') | |
263 u'abc def' | |
264 """ | |
265 if '%' not in string: | |
266 string.split | |
267 return string | |
268 if encoding is None: | |
269 encoding = 'utf-8' | |
270 if errors is None: | |
271 errors = 'replace' | |
272 bits = _ASCII_RE.split(string) | |
273 res = [bits[0]] | |
274 append = res.append | |
275 for i in range(1, len(bits), 2): | |
276 append(unquote_to_bytes(bits[i]).decode(encoding, errors)) | |
277 append(bits[i + 1]) | |
278 return ''.join(res) | |
279 | |
280 | |
281 def unquote_to_bytes(string): | |
282 """unquote_to_bytes('abc%20def') -> b'abc def'.""" | |
283 # Note: strings are encoded as UTF-8. This is only an issue if it contains | |
284 # unescaped non-ASCII characters, which URIs should not. | |
285 if not string: | |
286 # Is it a string-like object? | |
287 string.split | |
288 return b'' | |
289 if isinstance(string, unicode): | |
290 string = string.encode('utf-8') | |
291 bits = string.split(b'%') | |
292 if len(bits) == 1: | |
293 return string | |
294 # import pdb;pdb.set_trace() | |
295 res = [bits[0]] | |
296 append = res.append | |
297 | |
298 for item in bits[1:]: | |
299 try: | |
300 append(_HEX_CHAR_MAP[item[:2]]) | |
301 append(item[2:]) | |
302 except KeyError: | |
303 append(b'%') | |
304 append(item) | |
305 return b''.join(res) | |
306 | |
307 | |
308 def register_scheme(text, uses_netloc=None, default_port=None): | |
309 """Registers new scheme information, resulting in correct port and | |
310 slash behavior from the URL object. There are dozens of standard | |
311 schemes preregistered, so this function is mostly meant for | |
312 proprietary internal customizations or stopgaps on missing | |
313 standards information. If a scheme seems to be missing, please | |
314 `file an issue`_! | |
315 | |
316 Args: | |
317 text (str): Text representing the scheme. | |
318 (the 'http' in 'http://hatnote.com') | |
319 uses_netloc (bool): Does the scheme support specifying a | |
320 network host? For instance, "http" does, "mailto" does not. | |
321 default_port (int): The default port, if any, for netloc-using | |
322 schemes. | |
323 | |
324 .. _file an issue: https://github.com/mahmoud/boltons/issues | |
325 """ | |
326 text = text.lower() | |
327 if default_port is not None: | |
328 try: | |
329 default_port = int(default_port) | |
330 except ValueError: | |
331 raise ValueError('default_port expected integer or None, not %r' | |
332 % (default_port,)) | |
333 | |
334 if uses_netloc is True: | |
335 SCHEME_PORT_MAP[text] = default_port | |
336 elif uses_netloc is False: | |
337 if default_port is not None: | |
338 raise ValueError('unexpected default port while specifying' | |
339 ' non-netloc scheme: %r' % default_port) | |
340 NO_NETLOC_SCHEMES.add(text) | |
341 elif uses_netloc is not None: | |
342 raise ValueError('uses_netloc expected True, False, or None') | |
343 | |
344 return | |
345 | |
346 | |
347 def resolve_path_parts(path_parts): | |
348 """Normalize the URL path by resolving segments of '.' and '..', | |
349 resulting in a dot-free path. See RFC 3986 section 5.2.4, Remove | |
350 Dot Segments. | |
351 """ | |
352 # TODO: what to do with multiple slashes | |
353 ret = [] | |
354 | |
355 for part in path_parts: | |
356 if part == u'.': | |
357 pass | |
358 elif part == u'..': | |
359 if ret and (len(ret) > 1 or ret[0]): # prevent unrooting | |
360 ret.pop() | |
361 else: | |
362 ret.append(part) | |
363 | |
364 if list(path_parts[-1:]) in ([u'.'], [u'..']): | |
365 ret.append(u'') | |
366 | |
367 return ret | |
368 | |
369 | |
370 class cachedproperty(object): | |
371 """The ``cachedproperty`` is used similar to :class:`property`, except | |
372 that the wrapped method is only called once. This is commonly used | |
373 to implement lazy attributes. | |
374 | |
375 After the property has been accessed, the value is stored on the | |
376 instance itself, using the same name as the cachedproperty. This | |
377 allows the cache to be cleared with :func:`delattr`, or through | |
378 manipulating the object's ``__dict__``. | |
379 """ | |
380 def __init__(self, func): | |
381 self.__doc__ = getattr(func, '__doc__') | |
382 self.func = func | |
383 | |
384 def __get__(self, obj, objtype=None): | |
385 if obj is None: | |
386 return self | |
387 value = obj.__dict__[self.func.__name__] = self.func(obj) | |
388 return value | |
389 | |
390 def __repr__(self): | |
391 cn = self.__class__.__name__ | |
392 return '<%s func=%s>' % (cn, self.func) | |
393 | |
394 | |
395 class URL(object): | |
396 r"""The URL is one of the most ubiquitous data structures in the | |
397 virtual and physical landscape. From blogs to billboards, URLs are | |
398 so common, that it's easy to overlook their complexity and | |
399 power. | |
400 | |
401 There are 8 parts of a URL, each with its own semantics and | |
402 special characters: | |
403 | |
404 * :attr:`~URL.scheme` | |
405 * :attr:`~URL.username` | |
406 * :attr:`~URL.password` | |
407 * :attr:`~URL.host` | |
408 * :attr:`~URL.port` | |
409 * :attr:`~URL.path` | |
410 * :attr:`~URL.query_params` (query string parameters) | |
411 * :attr:`~URL.fragment` | |
412 | |
413 Each is exposed as an attribute on the URL object. RFC 3986 offers | |
414 this brief structural summary of the main URL components:: | |
415 | |
416 foo://user:pass@example.com:8042/over/there?name=ferret#nose | |
417 \_/ \_______/ \_________/ \__/\_________/ \_________/ \__/ | |
418 | | | | | | | | |
419 scheme userinfo host port path query fragment | |
420 | |
421 And here's how that example can be manipulated with the URL type: | |
422 | |
423 >>> url = URL('foo://example.com:8042/over/there?name=ferret#nose') | |
424 >>> print(url.host) | |
425 example.com | |
426 >>> print(url.get_authority()) | |
427 example.com:8042 | |
428 >>> print(url.qp['name']) # qp is a synonym for query_params | |
429 ferret | |
430 | |
431 URL's approach to encoding is that inputs are decoded as much as | |
432 possible, and data remains in this decoded state until re-encoded | |
433 using the :meth:`~URL.to_text()` method. In this way, it's similar | |
434 to Python's current approach of encouraging immediate decoding of | |
435 bytes to text. | |
436 | |
437 Note that URL instances are mutable objects. If an immutable | |
438 representation of the URL is desired, the string from | |
439 :meth:`~URL.to_text()` may be used. For an immutable, but | |
440 almost-as-featureful, URL object, check out the `hyperlink | |
441 package`_. | |
442 | |
443 .. _hyperlink package: https://github.com/mahmoud/hyperlink | |
444 | |
445 """ | |
446 | |
447 # public attributes (for comparison, see __eq__): | |
448 _cmp_attrs = ('scheme', 'uses_netloc', 'username', 'password', | |
449 'family', 'host', 'port', 'path', 'query_params', 'fragment') | |
450 | |
451 def __init__(self, url=''): | |
452 # TODO: encoding param. The encoding that underlies the | |
453 # percent-encoding is always utf8 for IRIs, but can be Latin-1 | |
454 # for other usage schemes. | |
455 ud = DEFAULT_PARSED_URL | |
456 if url: | |
457 if isinstance(url, URL): | |
458 url = url.to_text() # better way to copy URLs? | |
459 elif isinstance(url, bytes): | |
460 try: | |
461 url = url.decode(DEFAULT_ENCODING) | |
462 except UnicodeDecodeError as ude: | |
463 raise URLParseError('expected text or %s-encoded bytes.' | |
464 ' try decoding the url bytes and' | |
465 ' passing the result. (got: %s)' | |
466 % (DEFAULT_ENCODING, ude)) | |
467 ud = parse_url(url) | |
468 | |
469 _e = u'' | |
470 self.scheme = ud['scheme'] or _e | |
471 self._netloc_sep = ud['_netloc_sep'] or _e | |
472 self.username = (unquote(ud['username']) | |
473 if '%' in (ud['username'] or _e) else ud['username'] or _e) | |
474 self.password = (unquote(ud['password']) | |
475 if '%' in (ud['password'] or _e) else ud['password'] or _e) | |
476 self.family = ud['family'] | |
477 | |
478 if not ud['host']: | |
479 self.host = _e | |
480 else: | |
481 try: | |
482 self.host = ud['host'].encode("ascii") | |
483 except UnicodeEncodeError: | |
484 self.host = ud['host'] # already non-ascii text | |
485 else: | |
486 self.host = self.host.decode("idna") | |
487 | |
488 self.port = ud['port'] | |
489 self.path_parts = tuple([unquote(p) if '%' in p else p for p | |
490 in (ud['path'] or _e).split(u'/')]) | |
491 self._query = ud['query'] or _e | |
492 self.fragment = (unquote(ud['fragment']) | |
493 if '%' in (ud['fragment'] or _e) else ud['fragment'] or _e) | |
494 # TODO: possibly use None as marker for empty vs missing | |
495 return | |
496 | |
497 @classmethod | |
498 def from_parts(cls, scheme=None, host=None, path_parts=(), query_params=(), | |
499 fragment=u'', port=None, username=None, password=None): | |
500 """Build a new URL from parts. Note that the respective arguments are | |
501 not in the order they would appear in a URL: | |
502 | |
503 Args: | |
504 scheme (str): The scheme of a URL, e.g., 'http' | |
505 host (str): The host string, e.g., 'hatnote.com' | |
506 path_parts (tuple): The individual text segments of the | |
507 path, e.g., ('post', '123') | |
508 query_params (dict): An OMD, dict, or list of (key, value) | |
509 pairs representing the keys and values of the URL's query | |
510 parameters. | |
511 fragment (str): The fragment of the URL, e.g., 'anchor1' | |
512 port (int): The integer port of URL, automatic defaults are | |
513 available for registered schemes. | |
514 username (str): The username for the userinfo part of the URL. | |
515 password (str): The password for the userinfo part of the URL. | |
516 | |
517 Note that this method does relatively little | |
518 validation. :meth:`URL.to_text()` should be used to check if | |
519 any errors are produced while composing the final textual URL. | |
520 """ | |
521 ret = cls() | |
522 | |
523 ret.scheme = scheme | |
524 ret.host = host | |
525 ret.path_parts = tuple(path_parts) or (u'',) | |
526 ret.query_params.update(query_params) | |
527 ret.fragment = fragment | |
528 ret.port = port | |
529 ret.username = username | |
530 ret.password = password | |
531 | |
532 return ret | |
533 | |
534 @cachedproperty | |
535 def query_params(self): | |
536 """The parsed form of the query string of the URL, represented as a | |
537 :class:`~dictutils.OrderedMultiDict`. Also available as the | |
538 handy alias ``qp``. | |
539 | |
540 >>> url = URL('http://boltons.readthedocs.io/?utm_source=doctest&python=great') | |
541 >>> url.qp.keys() | |
542 [u'utm_source', u'python'] | |
543 """ | |
544 return QueryParamDict.from_text(self._query) | |
545 | |
546 qp = query_params | |
547 | |
548 @property | |
549 def path(self): | |
550 "The URL's path, in text form." | |
551 return u'/'.join([quote_path_part(p, full_quote=False) | |
552 for p in self.path_parts]) | |
553 | |
554 @path.setter | |
555 def path(self, path_text): | |
556 self.path_parts = tuple([unquote(p) if '%' in p else p | |
557 for p in to_unicode(path_text).split(u'/')]) | |
558 return | |
559 | |
560 @property | |
561 def uses_netloc(self): | |
562 """Whether or not a URL uses :code:`:` or :code:`://` to separate the | |
563 scheme from the rest of the URL depends on the scheme's own | |
564 standard definition. There is no way to infer this behavior | |
565 from other parts of the URL. A scheme either supports network | |
566 locations or it does not. | |
567 | |
568 The URL type's approach to this is to check for explicitly | |
569 registered schemes, with common schemes like HTTP | |
570 preregistered. This is the same approach taken by | |
571 :mod:`urlparse`. | |
572 | |
573 URL adds two additional heuristics if the scheme as a whole is | |
574 not registered. First, it attempts to check the subpart of the | |
575 scheme after the last ``+`` character. This adds intuitive | |
576 behavior for schemes like ``git+ssh``. Second, if a URL with | |
577 an unrecognized scheme is loaded, it will maintain the | |
578 separator it sees. | |
579 | |
580 >>> print(URL('fakescheme://test.com').to_text()) | |
581 fakescheme://test.com | |
582 >>> print(URL('mockscheme:hello:world').to_text()) | |
583 mockscheme:hello:world | |
584 | |
585 """ | |
586 default = self._netloc_sep | |
587 if self.scheme in SCHEME_PORT_MAP: | |
588 return True | |
589 if self.scheme in NO_NETLOC_SCHEMES: | |
590 return False | |
591 if self.scheme.split('+')[-1] in SCHEME_PORT_MAP: | |
592 return True | |
593 return default | |
594 | |
595 @property | |
596 def default_port(self): | |
597 """Return the default port for the currently-set scheme. Returns | |
598 ``None`` if the scheme is unrecognized. See | |
599 :func:`register_scheme` above. If :attr:`~URL.port` matches | |
600 this value, no port is emitted in the output of | |
601 :meth:`~URL.to_text()`. | |
602 | |
603 Applies the same '+' heuristic detailed in :meth:`URL.uses_netloc`. | |
604 """ | |
605 try: | |
606 return SCHEME_PORT_MAP[self.scheme] | |
607 except KeyError: | |
608 return SCHEME_PORT_MAP.get(self.scheme.split('+')[-1]) | |
609 | |
610 def normalize(self, with_case=True): | |
611 """Resolve any "." and ".." references in the path, as well as | |
612 normalize scheme and host casing. To turn off case | |
613 normalization, pass ``with_case=False``. | |
614 | |
615 More information can be found in `Section 6.2.2 of RFC 3986`_. | |
616 | |
617 .. _Section 6.2.2 of RFC 3986: https://tools.ietf.org/html/rfc3986#section-6.2.2 | |
618 """ | |
619 self.path_parts = resolve_path_parts(self.path_parts) | |
620 | |
621 if with_case: | |
622 self.scheme = self.scheme.lower() | |
623 self.host = self.host.lower() | |
624 return | |
625 | |
626 def navigate(self, dest): | |
627 """Factory method that returns a _new_ :class:`URL` based on a given | |
628 destination, *dest*. Useful for navigating those relative | |
629 links with ease. | |
630 | |
631 The newly created :class:`URL` is normalized before being returned. | |
632 | |
633 >>> url = URL('http://boltons.readthedocs.io') | |
634 >>> url.navigate('en/latest/') | |
635 URL(u'http://boltons.readthedocs.io/en/latest/') | |
636 | |
637 Args: | |
638 dest (str): A string or URL object representing the destination | |
639 | |
640 More information can be found in `Section 5 of RFC 3986`_. | |
641 | |
642 .. _Section 5 of RFC 3986: https://tools.ietf.org/html/rfc3986#section-5 | |
643 """ | |
644 orig_dest = None | |
645 if not isinstance(dest, URL): | |
646 dest, orig_dest = URL(dest), dest | |
647 if dest.scheme and dest.host: | |
648 # absolute URLs replace everything, but don't make an | |
649 # extra copy if we don't have to | |
650 return URL(dest) if orig_dest is None else dest | |
651 query_params = dest.query_params | |
652 | |
653 if dest.path: | |
654 if dest.path.startswith(u'/'): # absolute path | |
655 new_path_parts = list(dest.path_parts) | |
656 else: # relative path | |
657 new_path_parts = self.path_parts[:-1] + dest.path_parts | |
658 else: | |
659 new_path_parts = list(self.path_parts) | |
660 if not query_params: | |
661 query_params = self.query_params | |
662 | |
663 ret = self.from_parts(scheme=dest.scheme or self.scheme, | |
664 host=dest.host or self.host, | |
665 port=dest.port or self.port, | |
666 path_parts=new_path_parts, | |
667 query_params=query_params, | |
668 fragment=dest.fragment, | |
669 username=dest.username or self.username, | |
670 password=dest.password or self.password) | |
671 ret.normalize() | |
672 return ret | |
673 | |
674 def get_authority(self, full_quote=False, with_userinfo=False): | |
675 """Used by URL schemes that have a network location, | |
676 :meth:`~URL.get_authority` combines :attr:`username`, | |
677 :attr:`password`, :attr:`host`, and :attr:`port` into one | |
678 string, the *authority*, that is used for | |
679 connecting to a network-accessible resource. | |
680 | |
681 Used internally by :meth:`~URL.to_text()` and can be useful | |
682 for labeling connections. | |
683 | |
684 >>> url = URL('ftp://user@ftp.debian.org:2121/debian/README') | |
685 >>> print(url.get_authority()) | |
686 ftp.debian.org:2121 | |
687 >>> print(url.get_authority(with_userinfo=True)) | |
688 user@ftp.debian.org:2121 | |
689 | |
690 Args: | |
691 full_quote (bool): Whether or not to apply IDNA encoding. | |
692 Defaults to ``False``. | |
693 with_userinfo (bool): Whether or not to include username | |
694 and password, technically part of the | |
695 authority. Defaults to ``False``. | |
696 | |
697 """ | |
698 parts = [] | |
699 _add = parts.append | |
700 if self.username and with_userinfo: | |
701 _add(quote_userinfo_part(self.username)) | |
702 if self.password: | |
703 _add(':') | |
704 _add(quote_userinfo_part(self.password)) | |
705 _add('@') | |
706 if self.host: | |
707 if self.family == socket.AF_INET6: | |
708 _add('[') | |
709 _add(self.host) | |
710 _add(']') | |
711 elif full_quote: | |
712 _add(self.host.encode('idna').decode('ascii')) | |
713 else: | |
714 _add(self.host) | |
715 # TODO: 0 port? | |
716 if self.port and self.port != self.default_port: | |
717 _add(':') | |
718 _add(unicode(self.port)) | |
719 return u''.join(parts) | |
720 | |
721 def to_text(self, full_quote=False): | |
722 """Render a string representing the current state of the URL | |
723 object. | |
724 | |
725 >>> url = URL('http://listen.hatnote.com') | |
726 >>> url.fragment = 'en' | |
727 >>> print(url.to_text()) | |
728 http://listen.hatnote.com#en | |
729 | |
730 By setting the *full_quote* flag, the URL can either be fully | |
731 quoted or minimally quoted. The most common characteristic of | |
732 an encoded-URL is the presence of percent-encoded text (e.g., | |
733 %60). Unquoted URLs are more readable and suitable | |
734 for display, whereas fully-quoted URLs are more conservative | |
735 and generally necessary for sending over the network. | |
736 """ | |
737 scheme = self.scheme | |
738 path = u'/'.join([quote_path_part(p, full_quote=full_quote) | |
739 for p in self.path_parts]) | |
740 authority = self.get_authority(full_quote=full_quote, | |
741 with_userinfo=True) | |
742 query_string = self.query_params.to_text(full_quote=full_quote) | |
743 fragment = quote_fragment_part(self.fragment, full_quote=full_quote) | |
744 | |
745 parts = [] | |
746 _add = parts.append | |
747 if scheme: | |
748 _add(scheme) | |
749 _add(':') | |
750 if authority: | |
751 _add('//') | |
752 _add(authority) | |
753 elif (scheme and path[:2] != '//' and self.uses_netloc): | |
754 _add('//') | |
755 if path: | |
756 if scheme and authority and path[:1] != '/': | |
757 _add('/') | |
758 # TODO: i think this is here because relative paths | |
759 # with absolute authorities = undefined | |
760 _add(path) | |
761 if query_string: | |
762 _add('?') | |
763 _add(query_string) | |
764 if fragment: | |
765 _add('#') | |
766 _add(fragment) | |
767 return u''.join(parts) | |
768 | |
769 def __repr__(self): | |
770 cn = self.__class__.__name__ | |
771 return u'%s(%r)' % (cn, self.to_text()) | |
772 | |
773 def __str__(self): | |
774 return self.to_text() | |
775 | |
776 def __unicode__(self): | |
777 return self.to_text() | |
778 | |
779 def __eq__(self, other): | |
780 for attr in self._cmp_attrs: | |
781 if not getattr(self, attr) == getattr(other, attr, None): | |
782 return False | |
783 return True | |
784 | |
785 def __ne__(self, other): | |
786 return not self == other | |
787 | |
788 | |
789 try: | |
790 from socket import inet_pton | |
791 except ImportError: | |
792 # from https://gist.github.com/nnemkin/4966028 | |
793 import ctypes | |
794 | |
795 class _sockaddr(ctypes.Structure): | |
796 _fields_ = [("sa_family", ctypes.c_short), | |
797 ("__pad1", ctypes.c_ushort), | |
798 ("ipv4_addr", ctypes.c_byte * 4), | |
799 ("ipv6_addr", ctypes.c_byte * 16), | |
800 ("__pad2", ctypes.c_ulong)] | |
801 | |
802 WSAStringToAddressA = ctypes.windll.ws2_32.WSAStringToAddressA | |
803 WSAAddressToStringA = ctypes.windll.ws2_32.WSAAddressToStringA | |
804 | |
805 def inet_pton(address_family, ip_string): | |
806 addr = _sockaddr() | |
807 ip_string = ip_string.encode('ascii') | |
808 addr.sa_family = address_family | |
809 addr_size = ctypes.c_int(ctypes.sizeof(addr)) | |
810 | |
811 if WSAStringToAddressA(ip_string, address_family, None, ctypes.byref(addr), ctypes.byref(addr_size)) != 0: | |
812 raise socket.error(ctypes.FormatError()) | |
813 | |
814 if address_family == socket.AF_INET: | |
815 return ctypes.string_at(addr.ipv4_addr, 4) | |
816 if address_family == socket.AF_INET6: | |
817 return ctypes.string_at(addr.ipv6_addr, 16) | |
818 raise socket.error('unknown address family') | |
819 | |
820 | |
821 def parse_host(host): | |
822 """\ | |
823 Low-level function used to parse the host portion of a URL. | |
824 | |
825 Returns a tuple of (family, host) where *family* is a | |
826 :mod:`socket` module constant or ``None``, and host is a string. | |
827 | |
828 >>> parse_host('googlewebsite.com') == (None, 'googlewebsite.com') | |
829 True | |
830 >>> parse_host('[::1]') == (socket.AF_INET6, '::1') | |
831 True | |
832 >>> parse_host('192.168.1.1') == (socket.AF_INET, '192.168.1.1') | |
833 True | |
834 | |
835 Odd doctest formatting above due to py3's switch from int to enums | |
836 for :mod:`socket` constants. | |
837 | |
838 """ | |
839 if not host: | |
840 return None, u'' | |
841 if u':' in host and u'[' == host[0] and u']' == host[-1]: | |
842 host = host[1:-1] | |
843 try: | |
844 inet_pton(socket.AF_INET6, host) | |
845 except socket.error as se: | |
846 raise URLParseError('invalid IPv6 host: %r (%r)' % (host, se)) | |
847 except UnicodeEncodeError: | |
848 pass # TODO: this can't be a real host right? | |
849 else: | |
850 family = socket.AF_INET6 | |
851 return family, host | |
852 try: | |
853 inet_pton(socket.AF_INET, host) | |
854 except (socket.error, UnicodeEncodeError): | |
855 family = None # not an IP | |
856 else: | |
857 family = socket.AF_INET | |
858 return family, host | |
859 | |
860 | |
861 def parse_url(url_text): | |
862 """\ | |
863 Used to parse the text for a single URL into a dictionary, used | |
864 internally by the :class:`URL` type. | |
865 | |
866 Note that "URL" has a very narrow, standards-based | |
867 definition. While :func:`parse_url` may raise | |
868 :class:`URLParseError` under a very limited number of conditions, | |
869 such as non-integer port, a surprising number of strings are | |
870 technically valid URLs. For instance, the text ``"url"`` is a | |
871 valid URL, because it is a relative path. | |
872 | |
873 In short, do not expect this function to validate form inputs or | |
874 other more colloquial usages of URLs. | |
875 | |
876 >>> res = parse_url('http://127.0.0.1:3000/?a=1') | |
877 >>> sorted(res.keys()) # res is a basic dictionary | |
878 ['_netloc_sep', 'authority', 'family', 'fragment', 'host', 'password', 'path', 'port', 'query', 'scheme', 'username'] | |
879 """ | |
880 url_text = unicode(url_text) | |
881 # raise TypeError('parse_url expected text, not %r' % url_str) | |
882 um = _URL_RE.match(url_text) | |
883 try: | |
884 gs = um.groupdict() | |
885 except AttributeError: | |
886 raise URLParseError('could not parse url: %r' % url_text) | |
887 | |
888 au_text = gs['authority'] | |
889 user, pw, hostinfo = None, None, au_text | |
890 | |
891 if au_text: | |
892 userinfo, sep, hostinfo = au_text.rpartition('@') | |
893 if sep: | |
894 # TODO: empty userinfo error? | |
895 user, _, pw = userinfo.partition(':') | |
896 | |
897 host, port = None, None | |
898 if hostinfo: | |
899 host, sep, port_str = hostinfo.partition(u':') | |
900 if sep: | |
901 if host and host[0] == u'[' and u']' in port_str: | |
902 host_right, _, port_str = port_str.partition(u']') | |
903 host = host + u':' + host_right + u']' | |
904 if port_str and port_str[0] == u':': | |
905 port_str = port_str[1:] | |
906 | |
907 try: | |
908 port = int(port_str) | |
909 except ValueError: | |
910 if port_str: # empty ports ok according to RFC 3986 6.2.3 | |
911 raise URLParseError('expected integer for port, not %r' | |
912 % port_str) | |
913 port = None | |
914 | |
915 family, host = parse_host(host) | |
916 | |
917 gs['username'] = user | |
918 gs['password'] = pw | |
919 gs['family'] = family | |
920 gs['host'] = host | |
921 gs['port'] = port | |
922 return gs | |
923 | |
924 | |
925 DEFAULT_PARSED_URL = parse_url('') | |
926 | |
927 | |
928 def parse_qsl(qs, keep_blank_values=True, encoding=DEFAULT_ENCODING): | |
929 """ | |
930 Converts a query string into a list of (key, value) pairs. | |
931 """ | |
932 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] | |
933 ret = [] | |
934 for pair in pairs: | |
935 if not pair: | |
936 continue | |
937 key, _, value = pair.partition('=') | |
938 if not value: | |
939 if keep_blank_values: | |
940 value = None | |
941 else: | |
942 continue | |
943 key = unquote(key.replace('+', ' ')) | |
944 if value: | |
945 value = unquote(value.replace('+', ' ')) | |
946 ret.append((key, value)) | |
947 return ret | |
948 | |
949 | |
950 """ | |
951 # What follows is the OrderedMultiDict from dictutils.py, circa | |
952 # 20161021, used for the QueryParamDict, toward the bottom. | |
953 """ | |
954 | |
955 try: | |
956 from collections.abc import KeysView, ValuesView, ItemsView | |
957 except ImportError: | |
958 from collections import KeysView, ValuesView, ItemsView | |
959 | |
960 try: | |
961 from itertools import izip_longest | |
962 except ImportError: | |
963 from itertools import zip_longest as izip_longest | |
964 | |
965 try: | |
966 from typeutils import make_sentinel | |
967 _MISSING = make_sentinel(var_name='_MISSING') | |
968 except ImportError: | |
969 _MISSING = object() | |
970 | |
971 | |
972 PREV, NEXT, KEY, VALUE, SPREV, SNEXT = range(6) | |
973 | |
974 | |
975 class OrderedMultiDict(dict): | |
976 """A MultiDict is a dictionary that can have multiple values per key | |
977 and the OrderedMultiDict (OMD) is a MultiDict that retains | |
978 original insertion order. Common use cases include: | |
979 | |
980 * handling query strings parsed from URLs | |
981 * inverting a dictionary to create a reverse index (values to keys) | |
982 * stacking data from multiple dictionaries in a non-destructive way | |
983 | |
984 The OrderedMultiDict constructor is identical to the built-in | |
985 :class:`dict`, and overall the API is constitutes an intuitive | |
986 superset of the built-in type: | |
987 | |
988 >>> omd = OrderedMultiDict() | |
989 >>> omd['a'] = 1 | |
990 >>> omd['b'] = 2 | |
991 >>> omd.add('a', 3) | |
992 >>> omd.get('a') | |
993 3 | |
994 >>> omd.getlist('a') | |
995 [1, 3] | |
996 | |
997 Some non-:class:`dict`-like behaviors also make an appearance, | |
998 such as support for :func:`reversed`: | |
999 | |
1000 >>> list(reversed(omd)) | |
1001 ['b', 'a'] | |
1002 | |
1003 Note that unlike some other MultiDicts, this OMD gives precedence | |
1004 to the most recent value added. ``omd['a']`` refers to ``3``, not | |
1005 ``1``. | |
1006 | |
1007 >>> omd | |
1008 OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]) | |
1009 >>> omd.poplast('a') | |
1010 3 | |
1011 >>> omd | |
1012 OrderedMultiDict([('a', 1), ('b', 2)]) | |
1013 >>> omd.pop('a') | |
1014 1 | |
1015 >>> omd | |
1016 OrderedMultiDict([('b', 2)]) | |
1017 | |
1018 Note that calling :func:`dict` on an OMD results in a dict of keys | |
1019 to *lists* of values: | |
1020 | |
1021 >>> from pprint import pprint as pp # ensuring proper key ordering | |
1022 >>> omd = OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]) | |
1023 >>> pp(dict(omd)) | |
1024 {'a': 3, 'b': 2} | |
1025 | |
1026 Note that modifying those lists will modify the OMD. If you want a | |
1027 safe-to-modify or flat dictionary, use :meth:`OrderedMultiDict.todict()`. | |
1028 | |
1029 >>> pp(omd.todict()) | |
1030 {'a': 3, 'b': 2} | |
1031 >>> pp(omd.todict(multi=True)) | |
1032 {'a': [1, 3], 'b': [2]} | |
1033 | |
1034 With ``multi=False``, items appear with the keys in to original | |
1035 insertion order, alongside the most-recently inserted value for | |
1036 that key. | |
1037 | |
1038 >>> OrderedMultiDict([('a', 1), ('b', 2), ('a', 3)]).items(multi=False) | |
1039 [('a', 3), ('b', 2)] | |
1040 | |
1041 """ | |
1042 def __init__(self, *args, **kwargs): | |
1043 if len(args) > 1: | |
1044 raise TypeError('%s expected at most 1 argument, got %s' | |
1045 % (self.__class__.__name__, len(args))) | |
1046 super(OrderedMultiDict, self).__init__() | |
1047 | |
1048 self._clear_ll() | |
1049 if args: | |
1050 self.update_extend(args[0]) | |
1051 if kwargs: | |
1052 self.update(kwargs) | |
1053 | |
1054 def _clear_ll(self): | |
1055 try: | |
1056 _map = self._map | |
1057 except AttributeError: | |
1058 _map = self._map = {} | |
1059 self.root = [] | |
1060 _map.clear() | |
1061 self.root[:] = [self.root, self.root, None] | |
1062 | |
1063 def _insert(self, k, v): | |
1064 root = self.root | |
1065 cells = self._map.setdefault(k, []) | |
1066 last = root[PREV] | |
1067 cell = [last, root, k, v] | |
1068 last[NEXT] = root[PREV] = cell | |
1069 cells.append(cell) | |
1070 | |
1071 def add(self, k, v): | |
1072 """Add a single value *v* under a key *k*. Existing values under *k* | |
1073 are preserved. | |
1074 """ | |
1075 values = super(OrderedMultiDict, self).setdefault(k, []) | |
1076 self._insert(k, v) | |
1077 values.append(v) | |
1078 | |
1079 def addlist(self, k, v): | |
1080 """Add an iterable of values underneath a specific key, preserving | |
1081 any values already under that key. | |
1082 | |
1083 >>> omd = OrderedMultiDict([('a', -1)]) | |
1084 >>> omd.addlist('a', range(3)) | |
1085 >>> omd | |
1086 OrderedMultiDict([('a', -1), ('a', 0), ('a', 1), ('a', 2)]) | |
1087 | |
1088 Called ``addlist`` for consistency with :meth:`getlist`, but | |
1089 tuples and other sequences and iterables work. | |
1090 """ | |
1091 self_insert = self._insert | |
1092 values = super(OrderedMultiDict, self).setdefault(k, []) | |
1093 for subv in v: | |
1094 self_insert(k, subv) | |
1095 values.extend(v) | |
1096 | |
1097 def get(self, k, default=None): | |
1098 """Return the value for key *k* if present in the dictionary, else | |
1099 *default*. If *default* is not given, ``None`` is returned. | |
1100 This method never raises a :exc:`KeyError`. | |
1101 | |
1102 To get all values under a key, use :meth:`OrderedMultiDict.getlist`. | |
1103 """ | |
1104 return super(OrderedMultiDict, self).get(k, [default])[-1] | |
1105 | |
1106 def getlist(self, k, default=_MISSING): | |
1107 """Get all values for key *k* as a list, if *k* is in the | |
1108 dictionary, else *default*. The list returned is a copy and | |
1109 can be safely mutated. If *default* is not given, an empty | |
1110 :class:`list` is returned. | |
1111 """ | |
1112 try: | |
1113 return super(OrderedMultiDict, self).__getitem__(k)[:] | |
1114 except KeyError: | |
1115 if default is _MISSING: | |
1116 return [] | |
1117 return default | |
1118 | |
1119 def clear(self): | |
1120 "Empty the dictionary." | |
1121 super(OrderedMultiDict, self).clear() | |
1122 self._clear_ll() | |
1123 | |
1124 def setdefault(self, k, default=_MISSING): | |
1125 """If key *k* is in the dictionary, return its value. If not, insert | |
1126 *k* with a value of *default* and return *default*. *default* | |
1127 defaults to ``None``. See :meth:`dict.setdefault` for more | |
1128 information. | |
1129 """ | |
1130 if not super(OrderedMultiDict, self).__contains__(k): | |
1131 self[k] = None if default is _MISSING else default | |
1132 return self[k] | |
1133 | |
1134 def copy(self): | |
1135 "Return a shallow copy of the dictionary." | |
1136 return self.__class__(self.iteritems(multi=True)) | |
1137 | |
1138 @classmethod | |
1139 def fromkeys(cls, keys, default=None): | |
1140 """Create a dictionary from a list of keys, with all the values | |
1141 set to *default*, or ``None`` if *default* is not set. | |
1142 """ | |
1143 return cls([(k, default) for k in keys]) | |
1144 | |
1145 def update(self, E, **F): | |
1146 """Add items from a dictionary or iterable (and/or keyword arguments), | |
1147 overwriting values under an existing key. See | |
1148 :meth:`dict.update` for more details. | |
1149 """ | |
1150 # E and F are throwback names to the dict() __doc__ | |
1151 if E is self: | |
1152 return | |
1153 self_add = self.add | |
1154 if isinstance(E, OrderedMultiDict): | |
1155 for k in E: | |
1156 if k in self: | |
1157 del self[k] | |
1158 for k, v in E.iteritems(multi=True): | |
1159 self_add(k, v) | |
1160 elif hasattr(E, 'keys'): | |
1161 for k in E.keys(): | |
1162 self[k] = E[k] | |
1163 else: | |
1164 seen = set() | |
1165 seen_add = seen.add | |
1166 for k, v in E: | |
1167 if k not in seen and k in self: | |
1168 del self[k] | |
1169 seen_add(k) | |
1170 self_add(k, v) | |
1171 for k in F: | |
1172 self[k] = F[k] | |
1173 return | |
1174 | |
1175 def update_extend(self, E, **F): | |
1176 """Add items from a dictionary, iterable, and/or keyword | |
1177 arguments without overwriting existing items present in the | |
1178 dictionary. Like :meth:`update`, but adds to existing keys | |
1179 instead of overwriting them. | |
1180 """ | |
1181 if E is self: | |
1182 iterator = iter(E.items()) | |
1183 elif isinstance(E, OrderedMultiDict): | |
1184 iterator = E.iteritems(multi=True) | |
1185 elif hasattr(E, 'keys'): | |
1186 iterator = ((k, E[k]) for k in E.keys()) | |
1187 else: | |
1188 iterator = E | |
1189 | |
1190 self_add = self.add | |
1191 for k, v in iterator: | |
1192 self_add(k, v) | |
1193 | |
1194 def __setitem__(self, k, v): | |
1195 if super(OrderedMultiDict, self).__contains__(k): | |
1196 self._remove_all(k) | |
1197 self._insert(k, v) | |
1198 super(OrderedMultiDict, self).__setitem__(k, [v]) | |
1199 | |
1200 def __getitem__(self, k): | |
1201 return super(OrderedMultiDict, self).__getitem__(k)[-1] | |
1202 | |
1203 def __delitem__(self, k): | |
1204 super(OrderedMultiDict, self).__delitem__(k) | |
1205 self._remove_all(k) | |
1206 | |
1207 def __eq__(self, other): | |
1208 if self is other: | |
1209 return True | |
1210 try: | |
1211 if len(other) != len(self): | |
1212 return False | |
1213 except TypeError: | |
1214 return False | |
1215 if isinstance(other, OrderedMultiDict): | |
1216 selfi = self.iteritems(multi=True) | |
1217 otheri = other.iteritems(multi=True) | |
1218 zipped_items = izip_longest(selfi, otheri, fillvalue=(None, None)) | |
1219 for (selfk, selfv), (otherk, otherv) in zipped_items: | |
1220 if selfk != otherk or selfv != otherv: | |
1221 return False | |
1222 if not(next(selfi, _MISSING) is _MISSING | |
1223 and next(otheri, _MISSING) is _MISSING): | |
1224 # leftovers (TODO: watch for StopIteration?) | |
1225 return False | |
1226 return True | |
1227 elif hasattr(other, 'keys'): | |
1228 for selfk in self: | |
1229 try: | |
1230 other[selfk] == self[selfk] | |
1231 except KeyError: | |
1232 return False | |
1233 return True | |
1234 return False | |
1235 | |
1236 def __ne__(self, other): | |
1237 return not (self == other) | |
1238 | |
1239 def pop(self, k, default=_MISSING): | |
1240 """Remove all values under key *k*, returning the most-recently | |
1241 inserted value. Raises :exc:`KeyError` if the key is not | |
1242 present and no *default* is provided. | |
1243 """ | |
1244 try: | |
1245 return self.popall(k)[-1] | |
1246 except KeyError: | |
1247 if default is _MISSING: | |
1248 raise KeyError(k) | |
1249 return default | |
1250 | |
1251 def popall(self, k, default=_MISSING): | |
1252 """Remove all values under key *k*, returning them in the form of | |
1253 a list. Raises :exc:`KeyError` if the key is not present and no | |
1254 *default* is provided. | |
1255 """ | |
1256 super_self = super(OrderedMultiDict, self) | |
1257 if super_self.__contains__(k): | |
1258 self._remove_all(k) | |
1259 if default is _MISSING: | |
1260 return super_self.pop(k) | |
1261 return super_self.pop(k, default) | |
1262 | |
1263 def poplast(self, k=_MISSING, default=_MISSING): | |
1264 """Remove and return the most-recently inserted value under the key | |
1265 *k*, or the most-recently inserted key if *k* is not | |
1266 provided. If no values remain under *k*, it will be removed | |
1267 from the OMD. Raises :exc:`KeyError` if *k* is not present in | |
1268 the dictionary, or the dictionary is empty. | |
1269 """ | |
1270 if k is _MISSING: | |
1271 if self: | |
1272 k = self.root[PREV][KEY] | |
1273 else: | |
1274 raise KeyError('empty %r' % type(self)) | |
1275 try: | |
1276 self._remove(k) | |
1277 except KeyError: | |
1278 if default is _MISSING: | |
1279 raise KeyError(k) | |
1280 return default | |
1281 values = super(OrderedMultiDict, self).__getitem__(k) | |
1282 v = values.pop() | |
1283 if not values: | |
1284 super(OrderedMultiDict, self).__delitem__(k) | |
1285 return v | |
1286 | |
1287 def _remove(self, k): | |
1288 values = self._map[k] | |
1289 cell = values.pop() | |
1290 cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV] | |
1291 if not values: | |
1292 del self._map[k] | |
1293 | |
1294 def _remove_all(self, k): | |
1295 values = self._map[k] | |
1296 while values: | |
1297 cell = values.pop() | |
1298 cell[PREV][NEXT], cell[NEXT][PREV] = cell[NEXT], cell[PREV] | |
1299 del self._map[k] | |
1300 | |
1301 def iteritems(self, multi=False): | |
1302 """Iterate over the OMD's items in insertion order. By default, | |
1303 yields only the most-recently inserted value for each key. Set | |
1304 *multi* to ``True`` to get all inserted items. | |
1305 """ | |
1306 root = self.root | |
1307 curr = root[NEXT] | |
1308 if multi: | |
1309 while curr is not root: | |
1310 yield curr[KEY], curr[VALUE] | |
1311 curr = curr[NEXT] | |
1312 else: | |
1313 for key in self.iterkeys(): | |
1314 yield key, self[key] | |
1315 | |
1316 def iterkeys(self, multi=False): | |
1317 """Iterate over the OMD's keys in insertion order. By default, yields | |
1318 each key once, according to the most recent insertion. Set | |
1319 *multi* to ``True`` to get all keys, including duplicates, in | |
1320 insertion order. | |
1321 """ | |
1322 root = self.root | |
1323 curr = root[NEXT] | |
1324 if multi: | |
1325 while curr is not root: | |
1326 yield curr[KEY] | |
1327 curr = curr[NEXT] | |
1328 else: | |
1329 yielded = set() | |
1330 yielded_add = yielded.add | |
1331 while curr is not root: | |
1332 k = curr[KEY] | |
1333 if k not in yielded: | |
1334 yielded_add(k) | |
1335 yield k | |
1336 curr = curr[NEXT] | |
1337 | |
1338 def itervalues(self, multi=False): | |
1339 """Iterate over the OMD's values in insertion order. By default, | |
1340 yields the most-recently inserted value per unique key. Set | |
1341 *multi* to ``True`` to get all values according to insertion | |
1342 order. | |
1343 """ | |
1344 for k, v in self.iteritems(multi=multi): | |
1345 yield v | |
1346 | |
1347 def todict(self, multi=False): | |
1348 """Gets a basic :class:`dict` of the items in this dictionary. Keys | |
1349 are the same as the OMD, values are the most recently inserted | |
1350 values for each key. | |
1351 | |
1352 Setting the *multi* arg to ``True`` is yields the same | |
1353 result as calling :class:`dict` on the OMD, except that all the | |
1354 value lists are copies that can be safely mutated. | |
1355 """ | |
1356 if multi: | |
1357 return dict([(k, self.getlist(k)) for k in self]) | |
1358 return dict([(k, self[k]) for k in self]) | |
1359 | |
1360 def sorted(self, key=None, reverse=False): | |
1361 """Similar to the built-in :func:`sorted`, except this method returns | |
1362 a new :class:`OrderedMultiDict` sorted by the provided key | |
1363 function, optionally reversed. | |
1364 | |
1365 Args: | |
1366 key (callable): A callable to determine the sort key of | |
1367 each element. The callable should expect an **item** | |
1368 (key-value pair tuple). | |
1369 reverse (bool): Set to ``True`` to reverse the ordering. | |
1370 | |
1371 >>> omd = OrderedMultiDict(zip(range(3), range(3))) | |
1372 >>> omd.sorted(reverse=True) | |
1373 OrderedMultiDict([(2, 2), (1, 1), (0, 0)]) | |
1374 | |
1375 Note that the key function receives an **item** (key-value | |
1376 tuple), so the recommended signature looks like: | |
1377 | |
1378 >>> omd = OrderedMultiDict(zip('hello', 'world')) | |
1379 >>> omd.sorted(key=lambda i: i[1]) # i[0] is the key, i[1] is the val | |
1380 OrderedMultiDict([('o', 'd'), ('l', 'l'), ('e', 'o'), ('l', 'r'), ('h', 'w')]) | |
1381 """ | |
1382 cls = self.__class__ | |
1383 return cls(sorted(self.iteritems(), key=key, reverse=reverse)) | |
1384 | |
1385 def sortedvalues(self, key=None, reverse=False): | |
1386 """Returns a copy of the :class:`OrderedMultiDict` with the same keys | |
1387 in the same order as the original OMD, but the values within | |
1388 each keyspace have been sorted according to *key* and | |
1389 *reverse*. | |
1390 | |
1391 Args: | |
1392 key (callable): A single-argument callable to determine | |
1393 the sort key of each element. The callable should expect | |
1394 an **item** (key-value pair tuple). | |
1395 reverse (bool): Set to ``True`` to reverse the ordering. | |
1396 | |
1397 >>> omd = OrderedMultiDict() | |
1398 >>> omd.addlist('even', [6, 2]) | |
1399 >>> omd.addlist('odd', [1, 5]) | |
1400 >>> omd.add('even', 4) | |
1401 >>> omd.add('odd', 3) | |
1402 >>> somd = omd.sortedvalues() | |
1403 >>> somd.getlist('even') | |
1404 [2, 4, 6] | |
1405 >>> somd.keys(multi=True) == omd.keys(multi=True) | |
1406 True | |
1407 >>> omd == somd | |
1408 False | |
1409 >>> somd | |
1410 OrderedMultiDict([('even', 2), ('even', 4), ('odd', 1), ('odd', 3), ('even', 6), ('odd', 5)]) | |
1411 | |
1412 As demonstrated above, contents and key order are | |
1413 retained. Only value order changes. | |
1414 """ | |
1415 try: | |
1416 superself_iteritems = super(OrderedMultiDict, self).iteritems() | |
1417 except AttributeError: | |
1418 superself_iteritems = super(OrderedMultiDict, self).items() | |
1419 # (not reverse) because they pop off in reverse order for reinsertion | |
1420 sorted_val_map = dict([(k, sorted(v, key=key, reverse=(not reverse))) | |
1421 for k, v in superself_iteritems]) | |
1422 ret = self.__class__() | |
1423 for k in self.iterkeys(multi=True): | |
1424 ret.add(k, sorted_val_map[k].pop()) | |
1425 return ret | |
1426 | |
1427 def inverted(self): | |
1428 """Returns a new :class:`OrderedMultiDict` with values and keys | |
1429 swapped, like creating dictionary transposition or reverse | |
1430 index. Insertion order is retained and all keys and values | |
1431 are represented in the output. | |
1432 | |
1433 >>> omd = OMD([(0, 2), (1, 2)]) | |
1434 >>> omd.inverted().getlist(2) | |
1435 [0, 1] | |
1436 | |
1437 Inverting twice yields a copy of the original: | |
1438 | |
1439 >>> omd.inverted().inverted() | |
1440 OrderedMultiDict([(0, 2), (1, 2)]) | |
1441 """ | |
1442 return self.__class__((v, k) for k, v in self.iteritems(multi=True)) | |
1443 | |
1444 def counts(self): | |
1445 """Returns a mapping from key to number of values inserted under that | |
1446 key. Like :py:class:`collections.Counter`, but returns a new | |
1447 :class:`OrderedMultiDict`. | |
1448 """ | |
1449 # Returns an OMD because Counter/OrderedDict may not be | |
1450 # available, and neither Counter nor dict maintain order. | |
1451 super_getitem = super(OrderedMultiDict, self).__getitem__ | |
1452 return self.__class__((k, len(super_getitem(k))) for k in self) | |
1453 | |
1454 def keys(self, multi=False): | |
1455 """Returns a list containing the output of :meth:`iterkeys`. See | |
1456 that method's docs for more details. | |
1457 """ | |
1458 return list(self.iterkeys(multi=multi)) | |
1459 | |
1460 def values(self, multi=False): | |
1461 """Returns a list containing the output of :meth:`itervalues`. See | |
1462 that method's docs for more details. | |
1463 """ | |
1464 return list(self.itervalues(multi=multi)) | |
1465 | |
1466 def items(self, multi=False): | |
1467 """Returns a list containing the output of :meth:`iteritems`. See | |
1468 that method's docs for more details. | |
1469 """ | |
1470 return list(self.iteritems(multi=multi)) | |
1471 | |
1472 def __iter__(self): | |
1473 return self.iterkeys() | |
1474 | |
1475 def __reversed__(self): | |
1476 root = self.root | |
1477 curr = root[PREV] | |
1478 lengths = {} | |
1479 lengths_sd = lengths.setdefault | |
1480 get_values = super(OrderedMultiDict, self).__getitem__ | |
1481 while curr is not root: | |
1482 k = curr[KEY] | |
1483 vals = get_values(k) | |
1484 if lengths_sd(k, 1) == len(vals): | |
1485 yield k | |
1486 lengths[k] += 1 | |
1487 curr = curr[PREV] | |
1488 | |
1489 def __repr__(self): | |
1490 cn = self.__class__.__name__ | |
1491 kvs = ', '.join([repr((k, v)) for k, v in self.iteritems(multi=True)]) | |
1492 return '%s([%s])' % (cn, kvs) | |
1493 | |
1494 def viewkeys(self): | |
1495 "OMD.viewkeys() -> a set-like object providing a view on OMD's keys" | |
1496 return KeysView(self) | |
1497 | |
1498 def viewvalues(self): | |
1499 "OMD.viewvalues() -> an object providing a view on OMD's values" | |
1500 return ValuesView(self) | |
1501 | |
1502 def viewitems(self): | |
1503 "OMD.viewitems() -> a set-like object providing a view on OMD's items" | |
1504 return ItemsView(self) | |
1505 | |
1506 | |
1507 try: | |
1508 # try to import the built-in one anyways | |
1509 from boltons.dictutils import OrderedMultiDict | |
1510 except ImportError: | |
1511 pass | |
1512 | |
1513 OMD = OrderedMultiDict | |
1514 | |
1515 | |
1516 class QueryParamDict(OrderedMultiDict): | |
1517 """A subclass of :class:`~dictutils.OrderedMultiDict` specialized for | |
1518 representing query string values. Everything is fully unquoted on | |
1519 load and all parsed keys and values are strings by default. | |
1520 | |
1521 As the name suggests, multiple values are supported and insertion | |
1522 order is preserved. | |
1523 | |
1524 >>> qp = QueryParamDict.from_text(u'key=val1&key=val2&utm_source=rtd') | |
1525 >>> qp.getlist('key') | |
1526 [u'val1', u'val2'] | |
1527 >>> qp['key'] | |
1528 u'val2' | |
1529 >>> qp.add('key', 'val3') | |
1530 >>> qp.to_text() | |
1531 'key=val1&key=val2&utm_source=rtd&key=val3' | |
1532 | |
1533 See :class:`~dictutils.OrderedMultiDict` for more API features. | |
1534 """ | |
1535 | |
1536 @classmethod | |
1537 def from_text(cls, query_string): | |
1538 """ | |
1539 Parse *query_string* and return a new :class:`QueryParamDict`. | |
1540 """ | |
1541 pairs = parse_qsl(query_string, keep_blank_values=True) | |
1542 return cls(pairs) | |
1543 | |
1544 def to_text(self, full_quote=False): | |
1545 """ | |
1546 Render and return a query string. | |
1547 | |
1548 Args: | |
1549 full_quote (bool): Whether or not to percent-quote special | |
1550 characters or leave them decoded for readability. | |
1551 """ | |
1552 ret_list = [] | |
1553 for k, v in self.iteritems(multi=True): | |
1554 key = quote_query_part(to_unicode(k), full_quote=full_quote) | |
1555 if v is None: | |
1556 ret_list.append(key) | |
1557 else: | |
1558 val = quote_query_part(to_unicode(v), full_quote=full_quote) | |
1559 ret_list.append(u'='.join((key, val))) | |
1560 return u'&'.join(ret_list) | |
1561 | |
1562 # TODO: cleanup OMD/cachedproperty etc.? | |
1563 | |
1564 # end urlutils.py |