Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/future/backports/urllib/request.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d30785e31577 |
---|---|
1 """ | |
2 Ported using Python-Future from the Python 3.3 standard library. | |
3 | |
4 An extensible library for opening URLs using a variety of protocols | |
5 | |
6 The simplest way to use this module is to call the urlopen function, | |
7 which accepts a string containing a URL or a Request object (described | |
8 below). It opens the URL and returns the results as file-like | |
9 object; the returned object has some extra methods described below. | |
10 | |
11 The OpenerDirector manages a collection of Handler objects that do | |
12 all the actual work. Each Handler implements a particular protocol or | |
13 option. The OpenerDirector is a composite object that invokes the | |
14 Handlers needed to open the requested URL. For example, the | |
15 HTTPHandler performs HTTP GET and POST requests and deals with | |
16 non-error returns. The HTTPRedirectHandler automatically deals with | |
17 HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler | |
18 deals with digest authentication. | |
19 | |
20 urlopen(url, data=None) -- Basic usage is the same as original | |
21 urllib. pass the url and optionally data to post to an HTTP URL, and | |
22 get a file-like object back. One difference is that you can also pass | |
23 a Request instance instead of URL. Raises a URLError (subclass of | |
24 IOError); for HTTP errors, raises an HTTPError, which can also be | |
25 treated as a valid response. | |
26 | |
27 build_opener -- Function that creates a new OpenerDirector instance. | |
28 Will install the default handlers. Accepts one or more Handlers as | |
29 arguments, either instances or Handler classes that it will | |
30 instantiate. If one of the argument is a subclass of the default | |
31 handler, the argument will be installed instead of the default. | |
32 | |
33 install_opener -- Installs a new opener as the default opener. | |
34 | |
35 objects of interest: | |
36 | |
37 OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages | |
38 the Handler classes, while dealing with requests and responses. | |
39 | |
40 Request -- An object that encapsulates the state of a request. The | |
41 state can be as simple as the URL. It can also include extra HTTP | |
42 headers, e.g. a User-Agent. | |
43 | |
44 BaseHandler -- | |
45 | |
46 internals: | |
47 BaseHandler and parent | |
48 _call_chain conventions | |
49 | |
50 Example usage: | |
51 | |
52 import urllib.request | |
53 | |
54 # set up authentication info | |
55 authinfo = urllib.request.HTTPBasicAuthHandler() | |
56 authinfo.add_password(realm='PDQ Application', | |
57 uri='https://mahler:8092/site-updates.py', | |
58 user='klem', | |
59 passwd='geheim$parole') | |
60 | |
61 proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"}) | |
62 | |
63 # build a new opener that adds authentication and caching FTP handlers | |
64 opener = urllib.request.build_opener(proxy_support, authinfo, | |
65 urllib.request.CacheFTPHandler) | |
66 | |
67 # install it | |
68 urllib.request.install_opener(opener) | |
69 | |
70 f = urllib.request.urlopen('http://www.python.org/') | |
71 """ | |
72 | |
73 # XXX issues: | |
74 # If an authentication error handler that tries to perform | |
75 # authentication for some reason but fails, how should the error be | |
76 # signalled? The client needs to know the HTTP error code. But if | |
77 # the handler knows that the problem was, e.g., that it didn't know | |
78 # that hash algo that requested in the challenge, it would be good to | |
79 # pass that information along to the client, too. | |
80 # ftp errors aren't handled cleanly | |
81 # check digest against correct (i.e. non-apache) implementation | |
82 | |
83 # Possible extensions: | |
84 # complex proxies XXX not sure what exactly was meant by this | |
85 # abstract factory for opener | |
86 | |
87 from __future__ import absolute_import, division, print_function, unicode_literals | |
88 from future.builtins import bytes, dict, filter, input, int, map, open, str | |
89 from future.utils import PY2, PY3, raise_with_traceback | |
90 | |
91 import base64 | |
92 import bisect | |
93 import hashlib | |
94 import array | |
95 | |
96 from future.backports import email | |
97 from future.backports.http import client as http_client | |
98 from .error import URLError, HTTPError, ContentTooShortError | |
99 from .parse import ( | |
100 urlparse, urlsplit, urljoin, unwrap, quote, unquote, | |
101 splittype, splithost, splitport, splituser, splitpasswd, | |
102 splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) | |
103 from .response import addinfourl, addclosehook | |
104 | |
105 import io | |
106 import os | |
107 import posixpath | |
108 import re | |
109 import socket | |
110 import sys | |
111 import time | |
112 import tempfile | |
113 import contextlib | |
114 import warnings | |
115 | |
116 from future.utils import PY2 | |
117 | |
118 if PY2: | |
119 from collections import Iterable | |
120 else: | |
121 from collections.abc import Iterable | |
122 | |
123 # check for SSL | |
124 try: | |
125 import ssl | |
126 # Not available in the SSL module in Py2: | |
127 from ssl import SSLContext | |
128 except ImportError: | |
129 _have_ssl = False | |
130 else: | |
131 _have_ssl = True | |
132 | |
133 __all__ = [ | |
134 # Classes | |
135 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', | |
136 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', | |
137 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', | |
138 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', | |
139 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', | |
140 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', | |
141 'UnknownHandler', 'HTTPErrorProcessor', | |
142 # Functions | |
143 'urlopen', 'install_opener', 'build_opener', | |
144 'pathname2url', 'url2pathname', 'getproxies', | |
145 # Legacy interface | |
146 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', | |
147 ] | |
148 | |
149 # used in User-Agent header sent | |
150 __version__ = sys.version[:3] | |
151 | |
152 _opener = None | |
153 def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **_3to2kwargs): | |
154 if 'cadefault' in _3to2kwargs: cadefault = _3to2kwargs['cadefault']; del _3to2kwargs['cadefault'] | |
155 else: cadefault = False | |
156 if 'capath' in _3to2kwargs: capath = _3to2kwargs['capath']; del _3to2kwargs['capath'] | |
157 else: capath = None | |
158 if 'cafile' in _3to2kwargs: cafile = _3to2kwargs['cafile']; del _3to2kwargs['cafile'] | |
159 else: cafile = None | |
160 global _opener | |
161 if cafile or capath or cadefault: | |
162 if not _have_ssl: | |
163 raise ValueError('SSL support not available') | |
164 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) | |
165 context.options |= ssl.OP_NO_SSLv2 | |
166 context.verify_mode = ssl.CERT_REQUIRED | |
167 if cafile or capath: | |
168 context.load_verify_locations(cafile, capath) | |
169 else: | |
170 context.set_default_verify_paths() | |
171 https_handler = HTTPSHandler(context=context, check_hostname=True) | |
172 opener = build_opener(https_handler) | |
173 elif _opener is None: | |
174 _opener = opener = build_opener() | |
175 else: | |
176 opener = _opener | |
177 return opener.open(url, data, timeout) | |
178 | |
179 def install_opener(opener): | |
180 global _opener | |
181 _opener = opener | |
182 | |
183 _url_tempfiles = [] | |
184 def urlretrieve(url, filename=None, reporthook=None, data=None): | |
185 """ | |
186 Retrieve a URL into a temporary location on disk. | |
187 | |
188 Requires a URL argument. If a filename is passed, it is used as | |
189 the temporary file location. The reporthook argument should be | |
190 a callable that accepts a block number, a read size, and the | |
191 total file size of the URL target. The data argument should be | |
192 valid URL encoded data. | |
193 | |
194 If a filename is passed and the URL points to a local resource, | |
195 the result is a copy from local file to new file. | |
196 | |
197 Returns a tuple containing the path to the newly created | |
198 data file as well as the resulting HTTPMessage object. | |
199 """ | |
200 url_type, path = splittype(url) | |
201 | |
202 with contextlib.closing(urlopen(url, data)) as fp: | |
203 headers = fp.info() | |
204 | |
205 # Just return the local path and the "headers" for file:// | |
206 # URLs. No sense in performing a copy unless requested. | |
207 if url_type == "file" and not filename: | |
208 return os.path.normpath(path), headers | |
209 | |
210 # Handle temporary file setup. | |
211 if filename: | |
212 tfp = open(filename, 'wb') | |
213 else: | |
214 tfp = tempfile.NamedTemporaryFile(delete=False) | |
215 filename = tfp.name | |
216 _url_tempfiles.append(filename) | |
217 | |
218 with tfp: | |
219 result = filename, headers | |
220 bs = 1024*8 | |
221 size = -1 | |
222 read = 0 | |
223 blocknum = 0 | |
224 if "content-length" in headers: | |
225 size = int(headers["Content-Length"]) | |
226 | |
227 if reporthook: | |
228 reporthook(blocknum, bs, size) | |
229 | |
230 while True: | |
231 block = fp.read(bs) | |
232 if not block: | |
233 break | |
234 read += len(block) | |
235 tfp.write(block) | |
236 blocknum += 1 | |
237 if reporthook: | |
238 reporthook(blocknum, bs, size) | |
239 | |
240 if size >= 0 and read < size: | |
241 raise ContentTooShortError( | |
242 "retrieval incomplete: got only %i out of %i bytes" | |
243 % (read, size), result) | |
244 | |
245 return result | |
246 | |
247 def urlcleanup(): | |
248 for temp_file in _url_tempfiles: | |
249 try: | |
250 os.unlink(temp_file) | |
251 except EnvironmentError: | |
252 pass | |
253 | |
254 del _url_tempfiles[:] | |
255 global _opener | |
256 if _opener: | |
257 _opener = None | |
258 | |
259 if PY3: | |
260 _cut_port_re = re.compile(r":\d+$", re.ASCII) | |
261 else: | |
262 _cut_port_re = re.compile(r":\d+$") | |
263 | |
264 def request_host(request): | |
265 | |
266 """Return request-host, as defined by RFC 2965. | |
267 | |
268 Variation from RFC: returned value is lowercased, for convenient | |
269 comparison. | |
270 | |
271 """ | |
272 url = request.full_url | |
273 host = urlparse(url)[1] | |
274 if host == "": | |
275 host = request.get_header("Host", "") | |
276 | |
277 # remove port, if present | |
278 host = _cut_port_re.sub("", host, 1) | |
279 return host.lower() | |
280 | |
281 class Request(object): | |
282 | |
283 def __init__(self, url, data=None, headers={}, | |
284 origin_req_host=None, unverifiable=False, | |
285 method=None): | |
286 # unwrap('<URL:type://host/path>') --> 'type://host/path' | |
287 self.full_url = unwrap(url) | |
288 self.full_url, self.fragment = splittag(self.full_url) | |
289 self.data = data | |
290 self.headers = {} | |
291 self._tunnel_host = None | |
292 for key, value in headers.items(): | |
293 self.add_header(key, value) | |
294 self.unredirected_hdrs = {} | |
295 if origin_req_host is None: | |
296 origin_req_host = request_host(self) | |
297 self.origin_req_host = origin_req_host | |
298 self.unverifiable = unverifiable | |
299 self.method = method | |
300 self._parse() | |
301 | |
302 def _parse(self): | |
303 self.type, rest = splittype(self.full_url) | |
304 if self.type is None: | |
305 raise ValueError("unknown url type: %r" % self.full_url) | |
306 self.host, self.selector = splithost(rest) | |
307 if self.host: | |
308 self.host = unquote(self.host) | |
309 | |
310 def get_method(self): | |
311 """Return a string indicating the HTTP request method.""" | |
312 if self.method is not None: | |
313 return self.method | |
314 elif self.data is not None: | |
315 return "POST" | |
316 else: | |
317 return "GET" | |
318 | |
319 def get_full_url(self): | |
320 if self.fragment: | |
321 return '%s#%s' % (self.full_url, self.fragment) | |
322 else: | |
323 return self.full_url | |
324 | |
325 # Begin deprecated methods | |
326 | |
327 def add_data(self, data): | |
328 msg = "Request.add_data method is deprecated." | |
329 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
330 self.data = data | |
331 | |
332 def has_data(self): | |
333 msg = "Request.has_data method is deprecated." | |
334 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
335 return self.data is not None | |
336 | |
337 def get_data(self): | |
338 msg = "Request.get_data method is deprecated." | |
339 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
340 return self.data | |
341 | |
342 def get_type(self): | |
343 msg = "Request.get_type method is deprecated." | |
344 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
345 return self.type | |
346 | |
347 def get_host(self): | |
348 msg = "Request.get_host method is deprecated." | |
349 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
350 return self.host | |
351 | |
352 def get_selector(self): | |
353 msg = "Request.get_selector method is deprecated." | |
354 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
355 return self.selector | |
356 | |
357 def is_unverifiable(self): | |
358 msg = "Request.is_unverifiable method is deprecated." | |
359 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
360 return self.unverifiable | |
361 | |
362 def get_origin_req_host(self): | |
363 msg = "Request.get_origin_req_host method is deprecated." | |
364 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
365 return self.origin_req_host | |
366 | |
367 # End deprecated methods | |
368 | |
369 def set_proxy(self, host, type): | |
370 if self.type == 'https' and not self._tunnel_host: | |
371 self._tunnel_host = self.host | |
372 else: | |
373 self.type= type | |
374 self.selector = self.full_url | |
375 self.host = host | |
376 | |
377 def has_proxy(self): | |
378 return self.selector == self.full_url | |
379 | |
380 def add_header(self, key, val): | |
381 # useful for something like authentication | |
382 self.headers[key.capitalize()] = val | |
383 | |
384 def add_unredirected_header(self, key, val): | |
385 # will not be added to a redirected request | |
386 self.unredirected_hdrs[key.capitalize()] = val | |
387 | |
388 def has_header(self, header_name): | |
389 return (header_name in self.headers or | |
390 header_name in self.unredirected_hdrs) | |
391 | |
392 def get_header(self, header_name, default=None): | |
393 return self.headers.get( | |
394 header_name, | |
395 self.unredirected_hdrs.get(header_name, default)) | |
396 | |
397 def header_items(self): | |
398 hdrs = self.unredirected_hdrs.copy() | |
399 hdrs.update(self.headers) | |
400 return list(hdrs.items()) | |
401 | |
402 class OpenerDirector(object): | |
403 def __init__(self): | |
404 client_version = "Python-urllib/%s" % __version__ | |
405 self.addheaders = [('User-agent', client_version)] | |
406 # self.handlers is retained only for backward compatibility | |
407 self.handlers = [] | |
408 # manage the individual handlers | |
409 self.handle_open = {} | |
410 self.handle_error = {} | |
411 self.process_response = {} | |
412 self.process_request = {} | |
413 | |
414 def add_handler(self, handler): | |
415 if not hasattr(handler, "add_parent"): | |
416 raise TypeError("expected BaseHandler instance, got %r" % | |
417 type(handler)) | |
418 | |
419 added = False | |
420 for meth in dir(handler): | |
421 if meth in ["redirect_request", "do_open", "proxy_open"]: | |
422 # oops, coincidental match | |
423 continue | |
424 | |
425 i = meth.find("_") | |
426 protocol = meth[:i] | |
427 condition = meth[i+1:] | |
428 | |
429 if condition.startswith("error"): | |
430 j = condition.find("_") + i + 1 | |
431 kind = meth[j+1:] | |
432 try: | |
433 kind = int(kind) | |
434 except ValueError: | |
435 pass | |
436 lookup = self.handle_error.get(protocol, {}) | |
437 self.handle_error[protocol] = lookup | |
438 elif condition == "open": | |
439 kind = protocol | |
440 lookup = self.handle_open | |
441 elif condition == "response": | |
442 kind = protocol | |
443 lookup = self.process_response | |
444 elif condition == "request": | |
445 kind = protocol | |
446 lookup = self.process_request | |
447 else: | |
448 continue | |
449 | |
450 handlers = lookup.setdefault(kind, []) | |
451 if handlers: | |
452 bisect.insort(handlers, handler) | |
453 else: | |
454 handlers.append(handler) | |
455 added = True | |
456 | |
457 if added: | |
458 bisect.insort(self.handlers, handler) | |
459 handler.add_parent(self) | |
460 | |
461 def close(self): | |
462 # Only exists for backwards compatibility. | |
463 pass | |
464 | |
465 def _call_chain(self, chain, kind, meth_name, *args): | |
466 # Handlers raise an exception if no one else should try to handle | |
467 # the request, or return None if they can't but another handler | |
468 # could. Otherwise, they return the response. | |
469 handlers = chain.get(kind, ()) | |
470 for handler in handlers: | |
471 func = getattr(handler, meth_name) | |
472 result = func(*args) | |
473 if result is not None: | |
474 return result | |
475 | |
476 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): | |
477 """ | |
478 Accept a URL or a Request object | |
479 | |
480 Python-Future: if the URL is passed as a byte-string, decode it first. | |
481 """ | |
482 if isinstance(fullurl, bytes): | |
483 fullurl = fullurl.decode() | |
484 if isinstance(fullurl, str): | |
485 req = Request(fullurl, data) | |
486 else: | |
487 req = fullurl | |
488 if data is not None: | |
489 req.data = data | |
490 | |
491 req.timeout = timeout | |
492 protocol = req.type | |
493 | |
494 # pre-process request | |
495 meth_name = protocol+"_request" | |
496 for processor in self.process_request.get(protocol, []): | |
497 meth = getattr(processor, meth_name) | |
498 req = meth(req) | |
499 | |
500 response = self._open(req, data) | |
501 | |
502 # post-process response | |
503 meth_name = protocol+"_response" | |
504 for processor in self.process_response.get(protocol, []): | |
505 meth = getattr(processor, meth_name) | |
506 response = meth(req, response) | |
507 | |
508 return response | |
509 | |
510 def _open(self, req, data=None): | |
511 result = self._call_chain(self.handle_open, 'default', | |
512 'default_open', req) | |
513 if result: | |
514 return result | |
515 | |
516 protocol = req.type | |
517 result = self._call_chain(self.handle_open, protocol, protocol + | |
518 '_open', req) | |
519 if result: | |
520 return result | |
521 | |
522 return self._call_chain(self.handle_open, 'unknown', | |
523 'unknown_open', req) | |
524 | |
525 def error(self, proto, *args): | |
526 if proto in ('http', 'https'): | |
527 # XXX http[s] protocols are special-cased | |
528 dict = self.handle_error['http'] # https is not different than http | |
529 proto = args[2] # YUCK! | |
530 meth_name = 'http_error_%s' % proto | |
531 http_err = 1 | |
532 orig_args = args | |
533 else: | |
534 dict = self.handle_error | |
535 meth_name = proto + '_error' | |
536 http_err = 0 | |
537 args = (dict, proto, meth_name) + args | |
538 result = self._call_chain(*args) | |
539 if result: | |
540 return result | |
541 | |
542 if http_err: | |
543 args = (dict, 'default', 'http_error_default') + orig_args | |
544 return self._call_chain(*args) | |
545 | |
546 # XXX probably also want an abstract factory that knows when it makes | |
547 # sense to skip a superclass in favor of a subclass and when it might | |
548 # make sense to include both | |
549 | |
550 def build_opener(*handlers): | |
551 """Create an opener object from a list of handlers. | |
552 | |
553 The opener will use several default handlers, including support | |
554 for HTTP, FTP and when applicable HTTPS. | |
555 | |
556 If any of the handlers passed as arguments are subclasses of the | |
557 default handlers, the default handlers will not be used. | |
558 """ | |
559 def isclass(obj): | |
560 return isinstance(obj, type) or hasattr(obj, "__bases__") | |
561 | |
562 opener = OpenerDirector() | |
563 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, | |
564 HTTPDefaultErrorHandler, HTTPRedirectHandler, | |
565 FTPHandler, FileHandler, HTTPErrorProcessor] | |
566 if hasattr(http_client, "HTTPSConnection"): | |
567 default_classes.append(HTTPSHandler) | |
568 skip = set() | |
569 for klass in default_classes: | |
570 for check in handlers: | |
571 if isclass(check): | |
572 if issubclass(check, klass): | |
573 skip.add(klass) | |
574 elif isinstance(check, klass): | |
575 skip.add(klass) | |
576 for klass in skip: | |
577 default_classes.remove(klass) | |
578 | |
579 for klass in default_classes: | |
580 opener.add_handler(klass()) | |
581 | |
582 for h in handlers: | |
583 if isclass(h): | |
584 h = h() | |
585 opener.add_handler(h) | |
586 return opener | |
587 | |
588 class BaseHandler(object): | |
589 handler_order = 500 | |
590 | |
591 def add_parent(self, parent): | |
592 self.parent = parent | |
593 | |
594 def close(self): | |
595 # Only exists for backwards compatibility | |
596 pass | |
597 | |
598 def __lt__(self, other): | |
599 if not hasattr(other, "handler_order"): | |
600 # Try to preserve the old behavior of having custom classes | |
601 # inserted after default ones (works only for custom user | |
602 # classes which are not aware of handler_order). | |
603 return True | |
604 return self.handler_order < other.handler_order | |
605 | |
606 | |
607 class HTTPErrorProcessor(BaseHandler): | |
608 """Process HTTP error responses.""" | |
609 handler_order = 1000 # after all other processing | |
610 | |
611 def http_response(self, request, response): | |
612 code, msg, hdrs = response.code, response.msg, response.info() | |
613 | |
614 # According to RFC 2616, "2xx" code indicates that the client's | |
615 # request was successfully received, understood, and accepted. | |
616 if not (200 <= code < 300): | |
617 response = self.parent.error( | |
618 'http', request, response, code, msg, hdrs) | |
619 | |
620 return response | |
621 | |
622 https_response = http_response | |
623 | |
624 class HTTPDefaultErrorHandler(BaseHandler): | |
625 def http_error_default(self, req, fp, code, msg, hdrs): | |
626 raise HTTPError(req.full_url, code, msg, hdrs, fp) | |
627 | |
628 class HTTPRedirectHandler(BaseHandler): | |
629 # maximum number of redirections to any single URL | |
630 # this is needed because of the state that cookies introduce | |
631 max_repeats = 4 | |
632 # maximum total number of redirections (regardless of URL) before | |
633 # assuming we're in a loop | |
634 max_redirections = 10 | |
635 | |
636 def redirect_request(self, req, fp, code, msg, headers, newurl): | |
637 """Return a Request or None in response to a redirect. | |
638 | |
639 This is called by the http_error_30x methods when a | |
640 redirection response is received. If a redirection should | |
641 take place, return a new Request to allow http_error_30x to | |
642 perform the redirect. Otherwise, raise HTTPError if no-one | |
643 else should try to handle this url. Return None if you can't | |
644 but another Handler might. | |
645 """ | |
646 m = req.get_method() | |
647 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD") | |
648 or code in (301, 302, 303) and m == "POST")): | |
649 raise HTTPError(req.full_url, code, msg, headers, fp) | |
650 | |
651 # Strictly (according to RFC 2616), 301 or 302 in response to | |
652 # a POST MUST NOT cause a redirection without confirmation | |
653 # from the user (of urllib.request, in this case). In practice, | |
654 # essentially all clients do redirect in this case, so we do | |
655 # the same. | |
656 # be conciliant with URIs containing a space | |
657 newurl = newurl.replace(' ', '%20') | |
658 CONTENT_HEADERS = ("content-length", "content-type") | |
659 newheaders = dict((k, v) for k, v in req.headers.items() | |
660 if k.lower() not in CONTENT_HEADERS) | |
661 return Request(newurl, | |
662 headers=newheaders, | |
663 origin_req_host=req.origin_req_host, | |
664 unverifiable=True) | |
665 | |
666 # Implementation note: To avoid the server sending us into an | |
667 # infinite loop, the request object needs to track what URLs we | |
668 # have already seen. Do this by adding a handler-specific | |
669 # attribute to the Request object. | |
670 def http_error_302(self, req, fp, code, msg, headers): | |
671 # Some servers (incorrectly) return multiple Location headers | |
672 # (so probably same goes for URI). Use first header. | |
673 if "location" in headers: | |
674 newurl = headers["location"] | |
675 elif "uri" in headers: | |
676 newurl = headers["uri"] | |
677 else: | |
678 return | |
679 | |
680 # fix a possible malformed URL | |
681 urlparts = urlparse(newurl) | |
682 | |
683 # For security reasons we don't allow redirection to anything other | |
684 # than http, https or ftp. | |
685 | |
686 if urlparts.scheme not in ('http', 'https', 'ftp', ''): | |
687 raise HTTPError( | |
688 newurl, code, | |
689 "%s - Redirection to url '%s' is not allowed" % (msg, newurl), | |
690 headers, fp) | |
691 | |
692 if not urlparts.path: | |
693 urlparts = list(urlparts) | |
694 urlparts[2] = "/" | |
695 newurl = urlunparse(urlparts) | |
696 | |
697 newurl = urljoin(req.full_url, newurl) | |
698 | |
699 # XXX Probably want to forget about the state of the current | |
700 # request, although that might interact poorly with other | |
701 # handlers that also use handler-specific request attributes | |
702 new = self.redirect_request(req, fp, code, msg, headers, newurl) | |
703 if new is None: | |
704 return | |
705 | |
706 # loop detection | |
707 # .redirect_dict has a key url if url was previously visited. | |
708 if hasattr(req, 'redirect_dict'): | |
709 visited = new.redirect_dict = req.redirect_dict | |
710 if (visited.get(newurl, 0) >= self.max_repeats or | |
711 len(visited) >= self.max_redirections): | |
712 raise HTTPError(req.full_url, code, | |
713 self.inf_msg + msg, headers, fp) | |
714 else: | |
715 visited = new.redirect_dict = req.redirect_dict = {} | |
716 visited[newurl] = visited.get(newurl, 0) + 1 | |
717 | |
718 # Don't close the fp until we are sure that we won't use it | |
719 # with HTTPError. | |
720 fp.read() | |
721 fp.close() | |
722 | |
723 return self.parent.open(new, timeout=req.timeout) | |
724 | |
725 http_error_301 = http_error_303 = http_error_307 = http_error_302 | |
726 | |
727 inf_msg = "The HTTP server returned a redirect error that would " \ | |
728 "lead to an infinite loop.\n" \ | |
729 "The last 30x error message was:\n" | |
730 | |
731 | |
732 def _parse_proxy(proxy): | |
733 """Return (scheme, user, password, host/port) given a URL or an authority. | |
734 | |
735 If a URL is supplied, it must have an authority (host:port) component. | |
736 According to RFC 3986, having an authority component means the URL must | |
737 have two slashes after the scheme: | |
738 | |
739 >>> _parse_proxy('file:/ftp.example.com/') | |
740 Traceback (most recent call last): | |
741 ValueError: proxy URL with no authority: 'file:/ftp.example.com/' | |
742 | |
743 The first three items of the returned tuple may be None. | |
744 | |
745 Examples of authority parsing: | |
746 | |
747 >>> _parse_proxy('proxy.example.com') | |
748 (None, None, None, 'proxy.example.com') | |
749 >>> _parse_proxy('proxy.example.com:3128') | |
750 (None, None, None, 'proxy.example.com:3128') | |
751 | |
752 The authority component may optionally include userinfo (assumed to be | |
753 username:password): | |
754 | |
755 >>> _parse_proxy('joe:password@proxy.example.com') | |
756 (None, 'joe', 'password', 'proxy.example.com') | |
757 >>> _parse_proxy('joe:password@proxy.example.com:3128') | |
758 (None, 'joe', 'password', 'proxy.example.com:3128') | |
759 | |
760 Same examples, but with URLs instead: | |
761 | |
762 >>> _parse_proxy('http://proxy.example.com/') | |
763 ('http', None, None, 'proxy.example.com') | |
764 >>> _parse_proxy('http://proxy.example.com:3128/') | |
765 ('http', None, None, 'proxy.example.com:3128') | |
766 >>> _parse_proxy('http://joe:password@proxy.example.com/') | |
767 ('http', 'joe', 'password', 'proxy.example.com') | |
768 >>> _parse_proxy('http://joe:password@proxy.example.com:3128') | |
769 ('http', 'joe', 'password', 'proxy.example.com:3128') | |
770 | |
771 Everything after the authority is ignored: | |
772 | |
773 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') | |
774 ('ftp', 'joe', 'password', 'proxy.example.com') | |
775 | |
776 Test for no trailing '/' case: | |
777 | |
778 >>> _parse_proxy('http://joe:password@proxy.example.com') | |
779 ('http', 'joe', 'password', 'proxy.example.com') | |
780 | |
781 """ | |
782 scheme, r_scheme = splittype(proxy) | |
783 if not r_scheme.startswith("/"): | |
784 # authority | |
785 scheme = None | |
786 authority = proxy | |
787 else: | |
788 # URL | |
789 if not r_scheme.startswith("//"): | |
790 raise ValueError("proxy URL with no authority: %r" % proxy) | |
791 # We have an authority, so for RFC 3986-compliant URLs (by ss 3. | |
792 # and 3.3.), path is empty or starts with '/' | |
793 end = r_scheme.find("/", 2) | |
794 if end == -1: | |
795 end = None | |
796 authority = r_scheme[2:end] | |
797 userinfo, hostport = splituser(authority) | |
798 if userinfo is not None: | |
799 user, password = splitpasswd(userinfo) | |
800 else: | |
801 user = password = None | |
802 return scheme, user, password, hostport | |
803 | |
804 class ProxyHandler(BaseHandler): | |
805 # Proxies must be in front | |
806 handler_order = 100 | |
807 | |
808 def __init__(self, proxies=None): | |
809 if proxies is None: | |
810 proxies = getproxies() | |
811 assert hasattr(proxies, 'keys'), "proxies must be a mapping" | |
812 self.proxies = proxies | |
813 for type, url in proxies.items(): | |
814 setattr(self, '%s_open' % type, | |
815 lambda r, proxy=url, type=type, meth=self.proxy_open: | |
816 meth(r, proxy, type)) | |
817 | |
818 def proxy_open(self, req, proxy, type): | |
819 orig_type = req.type | |
820 proxy_type, user, password, hostport = _parse_proxy(proxy) | |
821 if proxy_type is None: | |
822 proxy_type = orig_type | |
823 | |
824 if req.host and proxy_bypass(req.host): | |
825 return None | |
826 | |
827 if user and password: | |
828 user_pass = '%s:%s' % (unquote(user), | |
829 unquote(password)) | |
830 creds = base64.b64encode(user_pass.encode()).decode("ascii") | |
831 req.add_header('Proxy-authorization', 'Basic ' + creds) | |
832 hostport = unquote(hostport) | |
833 req.set_proxy(hostport, proxy_type) | |
834 if orig_type == proxy_type or orig_type == 'https': | |
835 # let other handlers take care of it | |
836 return None | |
837 else: | |
838 # need to start over, because the other handlers don't | |
839 # grok the proxy's URL type | |
840 # e.g. if we have a constructor arg proxies like so: | |
841 # {'http': 'ftp://proxy.example.com'}, we may end up turning | |
842 # a request for http://acme.example.com/a into one for | |
843 # ftp://proxy.example.com/a | |
844 return self.parent.open(req, timeout=req.timeout) | |
845 | |
846 class HTTPPasswordMgr(object): | |
847 | |
848 def __init__(self): | |
849 self.passwd = {} | |
850 | |
851 def add_password(self, realm, uri, user, passwd): | |
852 # uri could be a single URI or a sequence | |
853 if isinstance(uri, str): | |
854 uri = [uri] | |
855 if realm not in self.passwd: | |
856 self.passwd[realm] = {} | |
857 for default_port in True, False: | |
858 reduced_uri = tuple( | |
859 [self.reduce_uri(u, default_port) for u in uri]) | |
860 self.passwd[realm][reduced_uri] = (user, passwd) | |
861 | |
862 def find_user_password(self, realm, authuri): | |
863 domains = self.passwd.get(realm, {}) | |
864 for default_port in True, False: | |
865 reduced_authuri = self.reduce_uri(authuri, default_port) | |
866 for uris, authinfo in domains.items(): | |
867 for uri in uris: | |
868 if self.is_suburi(uri, reduced_authuri): | |
869 return authinfo | |
870 return None, None | |
871 | |
872 def reduce_uri(self, uri, default_port=True): | |
873 """Accept authority or URI and extract only the authority and path.""" | |
874 # note HTTP URLs do not have a userinfo component | |
875 parts = urlsplit(uri) | |
876 if parts[1]: | |
877 # URI | |
878 scheme = parts[0] | |
879 authority = parts[1] | |
880 path = parts[2] or '/' | |
881 else: | |
882 # host or host:port | |
883 scheme = None | |
884 authority = uri | |
885 path = '/' | |
886 host, port = splitport(authority) | |
887 if default_port and port is None and scheme is not None: | |
888 dport = {"http": 80, | |
889 "https": 443, | |
890 }.get(scheme) | |
891 if dport is not None: | |
892 authority = "%s:%d" % (host, dport) | |
893 return authority, path | |
894 | |
895 def is_suburi(self, base, test): | |
896 """Check if test is below base in a URI tree | |
897 | |
898 Both args must be URIs in reduced form. | |
899 """ | |
900 if base == test: | |
901 return True | |
902 if base[0] != test[0]: | |
903 return False | |
904 common = posixpath.commonprefix((base[1], test[1])) | |
905 if len(common) == len(base[1]): | |
906 return True | |
907 return False | |
908 | |
909 | |
910 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): | |
911 | |
912 def find_user_password(self, realm, authuri): | |
913 user, password = HTTPPasswordMgr.find_user_password(self, realm, | |
914 authuri) | |
915 if user is not None: | |
916 return user, password | |
917 return HTTPPasswordMgr.find_user_password(self, None, authuri) | |
918 | |
919 | |
920 class AbstractBasicAuthHandler(object): | |
921 | |
922 # XXX this allows for multiple auth-schemes, but will stupidly pick | |
923 # the last one with a realm specified. | |
924 | |
925 # allow for double- and single-quoted realm values | |
926 # (single quotes are a violation of the RFC, but appear in the wild) | |
927 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+' | |
928 'realm=(["\']?)([^"\']*)\\2', re.I) | |
929 | |
930 # XXX could pre-emptively send auth info already accepted (RFC 2617, | |
931 # end of section 2, and section 1.2 immediately after "credentials" | |
932 # production). | |
933 | |
934 def __init__(self, password_mgr=None): | |
935 if password_mgr is None: | |
936 password_mgr = HTTPPasswordMgr() | |
937 self.passwd = password_mgr | |
938 self.add_password = self.passwd.add_password | |
939 self.retried = 0 | |
940 | |
941 def reset_retry_count(self): | |
942 self.retried = 0 | |
943 | |
944 def http_error_auth_reqed(self, authreq, host, req, headers): | |
945 # host may be an authority (without userinfo) or a URL with an | |
946 # authority | |
947 # XXX could be multiple headers | |
948 authreq = headers.get(authreq, None) | |
949 | |
950 if self.retried > 5: | |
951 # retry sending the username:password 5 times before failing. | |
952 raise HTTPError(req.get_full_url(), 401, "basic auth failed", | |
953 headers, None) | |
954 else: | |
955 self.retried += 1 | |
956 | |
957 if authreq: | |
958 scheme = authreq.split()[0] | |
959 if scheme.lower() != 'basic': | |
960 raise ValueError("AbstractBasicAuthHandler does not" | |
961 " support the following scheme: '%s'" % | |
962 scheme) | |
963 else: | |
964 mo = AbstractBasicAuthHandler.rx.search(authreq) | |
965 if mo: | |
966 scheme, quote, realm = mo.groups() | |
967 if quote not in ['"',"'"]: | |
968 warnings.warn("Basic Auth Realm was unquoted", | |
969 UserWarning, 2) | |
970 if scheme.lower() == 'basic': | |
971 response = self.retry_http_basic_auth(host, req, realm) | |
972 if response and response.code != 401: | |
973 self.retried = 0 | |
974 return response | |
975 | |
976 def retry_http_basic_auth(self, host, req, realm): | |
977 user, pw = self.passwd.find_user_password(realm, host) | |
978 if pw is not None: | |
979 raw = "%s:%s" % (user, pw) | |
980 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii") | |
981 if req.headers.get(self.auth_header, None) == auth: | |
982 return None | |
983 req.add_unredirected_header(self.auth_header, auth) | |
984 return self.parent.open(req, timeout=req.timeout) | |
985 else: | |
986 return None | |
987 | |
988 | |
989 class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): | |
990 | |
991 auth_header = 'Authorization' | |
992 | |
993 def http_error_401(self, req, fp, code, msg, headers): | |
994 url = req.full_url | |
995 response = self.http_error_auth_reqed('www-authenticate', | |
996 url, req, headers) | |
997 self.reset_retry_count() | |
998 return response | |
999 | |
1000 | |
1001 class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): | |
1002 | |
1003 auth_header = 'Proxy-authorization' | |
1004 | |
1005 def http_error_407(self, req, fp, code, msg, headers): | |
1006 # http_error_auth_reqed requires that there is no userinfo component in | |
1007 # authority. Assume there isn't one, since urllib.request does not (and | |
1008 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing | |
1009 # userinfo. | |
1010 authority = req.host | |
1011 response = self.http_error_auth_reqed('proxy-authenticate', | |
1012 authority, req, headers) | |
1013 self.reset_retry_count() | |
1014 return response | |
1015 | |
1016 | |
1017 # Return n random bytes. | |
1018 _randombytes = os.urandom | |
1019 | |
1020 | |
1021 class AbstractDigestAuthHandler(object): | |
1022 # Digest authentication is specified in RFC 2617. | |
1023 | |
1024 # XXX The client does not inspect the Authentication-Info header | |
1025 # in a successful response. | |
1026 | |
1027 # XXX It should be possible to test this implementation against | |
1028 # a mock server that just generates a static set of challenges. | |
1029 | |
1030 # XXX qop="auth-int" supports is shaky | |
1031 | |
1032 def __init__(self, passwd=None): | |
1033 if passwd is None: | |
1034 passwd = HTTPPasswordMgr() | |
1035 self.passwd = passwd | |
1036 self.add_password = self.passwd.add_password | |
1037 self.retried = 0 | |
1038 self.nonce_count = 0 | |
1039 self.last_nonce = None | |
1040 | |
1041 def reset_retry_count(self): | |
1042 self.retried = 0 | |
1043 | |
1044 def http_error_auth_reqed(self, auth_header, host, req, headers): | |
1045 authreq = headers.get(auth_header, None) | |
1046 if self.retried > 5: | |
1047 # Don't fail endlessly - if we failed once, we'll probably | |
1048 # fail a second time. Hm. Unless the Password Manager is | |
1049 # prompting for the information. Crap. This isn't great | |
1050 # but it's better than the current 'repeat until recursion | |
1051 # depth exceeded' approach <wink> | |
1052 raise HTTPError(req.full_url, 401, "digest auth failed", | |
1053 headers, None) | |
1054 else: | |
1055 self.retried += 1 | |
1056 if authreq: | |
1057 scheme = authreq.split()[0] | |
1058 if scheme.lower() == 'digest': | |
1059 return self.retry_http_digest_auth(req, authreq) | |
1060 elif scheme.lower() != 'basic': | |
1061 raise ValueError("AbstractDigestAuthHandler does not support" | |
1062 " the following scheme: '%s'" % scheme) | |
1063 | |
1064 def retry_http_digest_auth(self, req, auth): | |
1065 token, challenge = auth.split(' ', 1) | |
1066 chal = parse_keqv_list(filter(None, parse_http_list(challenge))) | |
1067 auth = self.get_authorization(req, chal) | |
1068 if auth: | |
1069 auth_val = 'Digest %s' % auth | |
1070 if req.headers.get(self.auth_header, None) == auth_val: | |
1071 return None | |
1072 req.add_unredirected_header(self.auth_header, auth_val) | |
1073 resp = self.parent.open(req, timeout=req.timeout) | |
1074 return resp | |
1075 | |
1076 def get_cnonce(self, nonce): | |
1077 # The cnonce-value is an opaque | |
1078 # quoted string value provided by the client and used by both client | |
1079 # and server to avoid chosen plaintext attacks, to provide mutual | |
1080 # authentication, and to provide some message integrity protection. | |
1081 # This isn't a fabulous effort, but it's probably Good Enough. | |
1082 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime()) | |
1083 b = s.encode("ascii") + _randombytes(8) | |
1084 dig = hashlib.sha1(b).hexdigest() | |
1085 return dig[:16] | |
1086 | |
1087 def get_authorization(self, req, chal): | |
1088 try: | |
1089 realm = chal['realm'] | |
1090 nonce = chal['nonce'] | |
1091 qop = chal.get('qop') | |
1092 algorithm = chal.get('algorithm', 'MD5') | |
1093 # mod_digest doesn't send an opaque, even though it isn't | |
1094 # supposed to be optional | |
1095 opaque = chal.get('opaque', None) | |
1096 except KeyError: | |
1097 return None | |
1098 | |
1099 H, KD = self.get_algorithm_impls(algorithm) | |
1100 if H is None: | |
1101 return None | |
1102 | |
1103 user, pw = self.passwd.find_user_password(realm, req.full_url) | |
1104 if user is None: | |
1105 return None | |
1106 | |
1107 # XXX not implemented yet | |
1108 if req.data is not None: | |
1109 entdig = self.get_entity_digest(req.data, chal) | |
1110 else: | |
1111 entdig = None | |
1112 | |
1113 A1 = "%s:%s:%s" % (user, realm, pw) | |
1114 A2 = "%s:%s" % (req.get_method(), | |
1115 # XXX selector: what about proxies and full urls | |
1116 req.selector) | |
1117 if qop == 'auth': | |
1118 if nonce == self.last_nonce: | |
1119 self.nonce_count += 1 | |
1120 else: | |
1121 self.nonce_count = 1 | |
1122 self.last_nonce = nonce | |
1123 ncvalue = '%08x' % self.nonce_count | |
1124 cnonce = self.get_cnonce(nonce) | |
1125 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) | |
1126 respdig = KD(H(A1), noncebit) | |
1127 elif qop is None: | |
1128 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) | |
1129 else: | |
1130 # XXX handle auth-int. | |
1131 raise URLError("qop '%s' is not supported." % qop) | |
1132 | |
1133 # XXX should the partial digests be encoded too? | |
1134 | |
1135 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ | |
1136 'response="%s"' % (user, realm, nonce, req.selector, | |
1137 respdig) | |
1138 if opaque: | |
1139 base += ', opaque="%s"' % opaque | |
1140 if entdig: | |
1141 base += ', digest="%s"' % entdig | |
1142 base += ', algorithm="%s"' % algorithm | |
1143 if qop: | |
1144 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) | |
1145 return base | |
1146 | |
1147 def get_algorithm_impls(self, algorithm): | |
1148 # lambdas assume digest modules are imported at the top level | |
1149 if algorithm == 'MD5': | |
1150 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest() | |
1151 elif algorithm == 'SHA': | |
1152 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest() | |
1153 # XXX MD5-sess | |
1154 KD = lambda s, d: H("%s:%s" % (s, d)) | |
1155 return H, KD | |
1156 | |
1157 def get_entity_digest(self, data, chal): | |
1158 # XXX not implemented yet | |
1159 return None | |
1160 | |
1161 | |
1162 class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): | |
1163 """An authentication protocol defined by RFC 2069 | |
1164 | |
1165 Digest authentication improves on basic authentication because it | |
1166 does not transmit passwords in the clear. | |
1167 """ | |
1168 | |
1169 auth_header = 'Authorization' | |
1170 handler_order = 490 # before Basic auth | |
1171 | |
1172 def http_error_401(self, req, fp, code, msg, headers): | |
1173 host = urlparse(req.full_url)[1] | |
1174 retry = self.http_error_auth_reqed('www-authenticate', | |
1175 host, req, headers) | |
1176 self.reset_retry_count() | |
1177 return retry | |
1178 | |
1179 | |
1180 class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): | |
1181 | |
1182 auth_header = 'Proxy-Authorization' | |
1183 handler_order = 490 # before Basic auth | |
1184 | |
1185 def http_error_407(self, req, fp, code, msg, headers): | |
1186 host = req.host | |
1187 retry = self.http_error_auth_reqed('proxy-authenticate', | |
1188 host, req, headers) | |
1189 self.reset_retry_count() | |
1190 return retry | |
1191 | |
1192 class AbstractHTTPHandler(BaseHandler): | |
1193 | |
1194 def __init__(self, debuglevel=0): | |
1195 self._debuglevel = debuglevel | |
1196 | |
1197 def set_http_debuglevel(self, level): | |
1198 self._debuglevel = level | |
1199 | |
1200 def do_request_(self, request): | |
1201 host = request.host | |
1202 if not host: | |
1203 raise URLError('no host given') | |
1204 | |
1205 if request.data is not None: # POST | |
1206 data = request.data | |
1207 if isinstance(data, str): | |
1208 msg = "POST data should be bytes or an iterable of bytes. " \ | |
1209 "It cannot be of type str." | |
1210 raise TypeError(msg) | |
1211 if not request.has_header('Content-type'): | |
1212 request.add_unredirected_header( | |
1213 'Content-type', | |
1214 'application/x-www-form-urlencoded') | |
1215 if not request.has_header('Content-length'): | |
1216 size = None | |
1217 try: | |
1218 ### For Python-Future: | |
1219 if PY2 and isinstance(data, array.array): | |
1220 # memoryviews of arrays aren't supported | |
1221 # in Py2.7. (e.g. memoryview(array.array('I', | |
1222 # [1, 2, 3, 4])) raises a TypeError.) | |
1223 # So we calculate the size manually instead: | |
1224 size = len(data) * data.itemsize | |
1225 ### | |
1226 else: | |
1227 mv = memoryview(data) | |
1228 size = len(mv) * mv.itemsize | |
1229 except TypeError: | |
1230 if isinstance(data, Iterable): | |
1231 raise ValueError("Content-Length should be specified " | |
1232 "for iterable data of type %r %r" % (type(data), | |
1233 data)) | |
1234 else: | |
1235 request.add_unredirected_header( | |
1236 'Content-length', '%d' % size) | |
1237 | |
1238 sel_host = host | |
1239 if request.has_proxy(): | |
1240 scheme, sel = splittype(request.selector) | |
1241 sel_host, sel_path = splithost(sel) | |
1242 if not request.has_header('Host'): | |
1243 request.add_unredirected_header('Host', sel_host) | |
1244 for name, value in self.parent.addheaders: | |
1245 name = name.capitalize() | |
1246 if not request.has_header(name): | |
1247 request.add_unredirected_header(name, value) | |
1248 | |
1249 return request | |
1250 | |
1251 def do_open(self, http_class, req, **http_conn_args): | |
1252 """Return an HTTPResponse object for the request, using http_class. | |
1253 | |
1254 http_class must implement the HTTPConnection API from http.client. | |
1255 """ | |
1256 host = req.host | |
1257 if not host: | |
1258 raise URLError('no host given') | |
1259 | |
1260 # will parse host:port | |
1261 h = http_class(host, timeout=req.timeout, **http_conn_args) | |
1262 | |
1263 headers = dict(req.unredirected_hdrs) | |
1264 headers.update(dict((k, v) for k, v in req.headers.items() | |
1265 if k not in headers)) | |
1266 | |
1267 # TODO(jhylton): Should this be redesigned to handle | |
1268 # persistent connections? | |
1269 | |
1270 # We want to make an HTTP/1.1 request, but the addinfourl | |
1271 # class isn't prepared to deal with a persistent connection. | |
1272 # It will try to read all remaining data from the socket, | |
1273 # which will block while the server waits for the next request. | |
1274 # So make sure the connection gets closed after the (only) | |
1275 # request. | |
1276 headers["Connection"] = "close" | |
1277 headers = dict((name.title(), val) for name, val in headers.items()) | |
1278 | |
1279 if req._tunnel_host: | |
1280 tunnel_headers = {} | |
1281 proxy_auth_hdr = "Proxy-Authorization" | |
1282 if proxy_auth_hdr in headers: | |
1283 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] | |
1284 # Proxy-Authorization should not be sent to origin | |
1285 # server. | |
1286 del headers[proxy_auth_hdr] | |
1287 h.set_tunnel(req._tunnel_host, headers=tunnel_headers) | |
1288 | |
1289 try: | |
1290 h.request(req.get_method(), req.selector, req.data, headers) | |
1291 except socket.error as err: # timeout error | |
1292 h.close() | |
1293 raise URLError(err) | |
1294 else: | |
1295 r = h.getresponse() | |
1296 # If the server does not send us a 'Connection: close' header, | |
1297 # HTTPConnection assumes the socket should be left open. Manually | |
1298 # mark the socket to be closed when this response object goes away. | |
1299 if h.sock: | |
1300 h.sock.close() | |
1301 h.sock = None | |
1302 | |
1303 | |
1304 r.url = req.get_full_url() | |
1305 # This line replaces the .msg attribute of the HTTPResponse | |
1306 # with .headers, because urllib clients expect the response to | |
1307 # have the reason in .msg. It would be good to mark this | |
1308 # attribute is deprecated and get then to use info() or | |
1309 # .headers. | |
1310 r.msg = r.reason | |
1311 return r | |
1312 | |
1313 | |
1314 class HTTPHandler(AbstractHTTPHandler): | |
1315 | |
1316 def http_open(self, req): | |
1317 return self.do_open(http_client.HTTPConnection, req) | |
1318 | |
1319 http_request = AbstractHTTPHandler.do_request_ | |
1320 | |
1321 if hasattr(http_client, 'HTTPSConnection'): | |
1322 | |
1323 class HTTPSHandler(AbstractHTTPHandler): | |
1324 | |
1325 def __init__(self, debuglevel=0, context=None, check_hostname=None): | |
1326 AbstractHTTPHandler.__init__(self, debuglevel) | |
1327 self._context = context | |
1328 self._check_hostname = check_hostname | |
1329 | |
1330 def https_open(self, req): | |
1331 return self.do_open(http_client.HTTPSConnection, req, | |
1332 context=self._context, check_hostname=self._check_hostname) | |
1333 | |
1334 https_request = AbstractHTTPHandler.do_request_ | |
1335 | |
1336 __all__.append('HTTPSHandler') | |
1337 | |
1338 class HTTPCookieProcessor(BaseHandler): | |
1339 def __init__(self, cookiejar=None): | |
1340 import future.backports.http.cookiejar as http_cookiejar | |
1341 if cookiejar is None: | |
1342 cookiejar = http_cookiejar.CookieJar() | |
1343 self.cookiejar = cookiejar | |
1344 | |
1345 def http_request(self, request): | |
1346 self.cookiejar.add_cookie_header(request) | |
1347 return request | |
1348 | |
1349 def http_response(self, request, response): | |
1350 self.cookiejar.extract_cookies(response, request) | |
1351 return response | |
1352 | |
1353 https_request = http_request | |
1354 https_response = http_response | |
1355 | |
1356 class UnknownHandler(BaseHandler): | |
1357 def unknown_open(self, req): | |
1358 type = req.type | |
1359 raise URLError('unknown url type: %s' % type) | |
1360 | |
1361 def parse_keqv_list(l): | |
1362 """Parse list of key=value strings where keys are not duplicated.""" | |
1363 parsed = {} | |
1364 for elt in l: | |
1365 k, v = elt.split('=', 1) | |
1366 if v[0] == '"' and v[-1] == '"': | |
1367 v = v[1:-1] | |
1368 parsed[k] = v | |
1369 return parsed | |
1370 | |
1371 def parse_http_list(s): | |
1372 """Parse lists as described by RFC 2068 Section 2. | |
1373 | |
1374 In particular, parse comma-separated lists where the elements of | |
1375 the list may include quoted-strings. A quoted-string could | |
1376 contain a comma. A non-quoted string could have quotes in the | |
1377 middle. Neither commas nor quotes count if they are escaped. | |
1378 Only double-quotes count, not single-quotes. | |
1379 """ | |
1380 res = [] | |
1381 part = '' | |
1382 | |
1383 escape = quote = False | |
1384 for cur in s: | |
1385 if escape: | |
1386 part += cur | |
1387 escape = False | |
1388 continue | |
1389 if quote: | |
1390 if cur == '\\': | |
1391 escape = True | |
1392 continue | |
1393 elif cur == '"': | |
1394 quote = False | |
1395 part += cur | |
1396 continue | |
1397 | |
1398 if cur == ',': | |
1399 res.append(part) | |
1400 part = '' | |
1401 continue | |
1402 | |
1403 if cur == '"': | |
1404 quote = True | |
1405 | |
1406 part += cur | |
1407 | |
1408 # append last part | |
1409 if part: | |
1410 res.append(part) | |
1411 | |
1412 return [part.strip() for part in res] | |
1413 | |
1414 class FileHandler(BaseHandler): | |
1415 # Use local file or FTP depending on form of URL | |
1416 def file_open(self, req): | |
1417 url = req.selector | |
1418 if url[:2] == '//' and url[2:3] != '/' and (req.host and | |
1419 req.host != 'localhost'): | |
1420 if not req.host is self.get_names(): | |
1421 raise URLError("file:// scheme is supported only on localhost") | |
1422 else: | |
1423 return self.open_local_file(req) | |
1424 | |
1425 # names for the localhost | |
1426 names = None | |
1427 def get_names(self): | |
1428 if FileHandler.names is None: | |
1429 try: | |
1430 FileHandler.names = tuple( | |
1431 socket.gethostbyname_ex('localhost')[2] + | |
1432 socket.gethostbyname_ex(socket.gethostname())[2]) | |
1433 except socket.gaierror: | |
1434 FileHandler.names = (socket.gethostbyname('localhost'),) | |
1435 return FileHandler.names | |
1436 | |
1437 # not entirely sure what the rules are here | |
1438 def open_local_file(self, req): | |
1439 import future.backports.email.utils as email_utils | |
1440 import mimetypes | |
1441 host = req.host | |
1442 filename = req.selector | |
1443 localfile = url2pathname(filename) | |
1444 try: | |
1445 stats = os.stat(localfile) | |
1446 size = stats.st_size | |
1447 modified = email_utils.formatdate(stats.st_mtime, usegmt=True) | |
1448 mtype = mimetypes.guess_type(filename)[0] | |
1449 headers = email.message_from_string( | |
1450 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % | |
1451 (mtype or 'text/plain', size, modified)) | |
1452 if host: | |
1453 host, port = splitport(host) | |
1454 if not host or \ | |
1455 (not port and _safe_gethostbyname(host) in self.get_names()): | |
1456 if host: | |
1457 origurl = 'file://' + host + filename | |
1458 else: | |
1459 origurl = 'file://' + filename | |
1460 return addinfourl(open(localfile, 'rb'), headers, origurl) | |
1461 except OSError as exp: | |
1462 # users shouldn't expect OSErrors coming from urlopen() | |
1463 raise URLError(exp) | |
1464 raise URLError('file not on local host') | |
1465 | |
1466 def _safe_gethostbyname(host): | |
1467 try: | |
1468 return socket.gethostbyname(host) | |
1469 except socket.gaierror: | |
1470 return None | |
1471 | |
1472 class FTPHandler(BaseHandler): | |
1473 def ftp_open(self, req): | |
1474 import ftplib | |
1475 import mimetypes | |
1476 host = req.host | |
1477 if not host: | |
1478 raise URLError('ftp error: no host given') | |
1479 host, port = splitport(host) | |
1480 if port is None: | |
1481 port = ftplib.FTP_PORT | |
1482 else: | |
1483 port = int(port) | |
1484 | |
1485 # username/password handling | |
1486 user, host = splituser(host) | |
1487 if user: | |
1488 user, passwd = splitpasswd(user) | |
1489 else: | |
1490 passwd = None | |
1491 host = unquote(host) | |
1492 user = user or '' | |
1493 passwd = passwd or '' | |
1494 | |
1495 try: | |
1496 host = socket.gethostbyname(host) | |
1497 except socket.error as msg: | |
1498 raise URLError(msg) | |
1499 path, attrs = splitattr(req.selector) | |
1500 dirs = path.split('/') | |
1501 dirs = list(map(unquote, dirs)) | |
1502 dirs, file = dirs[:-1], dirs[-1] | |
1503 if dirs and not dirs[0]: | |
1504 dirs = dirs[1:] | |
1505 try: | |
1506 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) | |
1507 type = file and 'I' or 'D' | |
1508 for attr in attrs: | |
1509 attr, value = splitvalue(attr) | |
1510 if attr.lower() == 'type' and \ | |
1511 value in ('a', 'A', 'i', 'I', 'd', 'D'): | |
1512 type = value.upper() | |
1513 fp, retrlen = fw.retrfile(file, type) | |
1514 headers = "" | |
1515 mtype = mimetypes.guess_type(req.full_url)[0] | |
1516 if mtype: | |
1517 headers += "Content-type: %s\n" % mtype | |
1518 if retrlen is not None and retrlen >= 0: | |
1519 headers += "Content-length: %d\n" % retrlen | |
1520 headers = email.message_from_string(headers) | |
1521 return addinfourl(fp, headers, req.full_url) | |
1522 except ftplib.all_errors as exp: | |
1523 exc = URLError('ftp error: %r' % exp) | |
1524 raise_with_traceback(exc) | |
1525 | |
1526 def connect_ftp(self, user, passwd, host, port, dirs, timeout): | |
1527 return ftpwrapper(user, passwd, host, port, dirs, timeout, | |
1528 persistent=False) | |
1529 | |
1530 class CacheFTPHandler(FTPHandler): | |
1531 # XXX would be nice to have pluggable cache strategies | |
1532 # XXX this stuff is definitely not thread safe | |
1533 def __init__(self): | |
1534 self.cache = {} | |
1535 self.timeout = {} | |
1536 self.soonest = 0 | |
1537 self.delay = 60 | |
1538 self.max_conns = 16 | |
1539 | |
1540 def setTimeout(self, t): | |
1541 self.delay = t | |
1542 | |
1543 def setMaxConns(self, m): | |
1544 self.max_conns = m | |
1545 | |
1546 def connect_ftp(self, user, passwd, host, port, dirs, timeout): | |
1547 key = user, host, port, '/'.join(dirs), timeout | |
1548 if key in self.cache: | |
1549 self.timeout[key] = time.time() + self.delay | |
1550 else: | |
1551 self.cache[key] = ftpwrapper(user, passwd, host, port, | |
1552 dirs, timeout) | |
1553 self.timeout[key] = time.time() + self.delay | |
1554 self.check_cache() | |
1555 return self.cache[key] | |
1556 | |
1557 def check_cache(self): | |
1558 # first check for old ones | |
1559 t = time.time() | |
1560 if self.soonest <= t: | |
1561 for k, v in list(self.timeout.items()): | |
1562 if v < t: | |
1563 self.cache[k].close() | |
1564 del self.cache[k] | |
1565 del self.timeout[k] | |
1566 self.soonest = min(list(self.timeout.values())) | |
1567 | |
1568 # then check the size | |
1569 if len(self.cache) == self.max_conns: | |
1570 for k, v in list(self.timeout.items()): | |
1571 if v == self.soonest: | |
1572 del self.cache[k] | |
1573 del self.timeout[k] | |
1574 break | |
1575 self.soonest = min(list(self.timeout.values())) | |
1576 | |
1577 def clear_cache(self): | |
1578 for conn in self.cache.values(): | |
1579 conn.close() | |
1580 self.cache.clear() | |
1581 self.timeout.clear() | |
1582 | |
1583 | |
1584 # Code move from the old urllib module | |
1585 | |
1586 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size | |
1587 | |
1588 # Helper for non-unix systems | |
1589 if os.name == 'nt': | |
1590 from nturl2path import url2pathname, pathname2url | |
1591 else: | |
1592 def url2pathname(pathname): | |
1593 """OS-specific conversion from a relative URL of the 'file' scheme | |
1594 to a file system path; not recommended for general use.""" | |
1595 return unquote(pathname) | |
1596 | |
1597 def pathname2url(pathname): | |
1598 """OS-specific conversion from a file system path to a relative URL | |
1599 of the 'file' scheme; not recommended for general use.""" | |
1600 return quote(pathname) | |
1601 | |
1602 # This really consists of two pieces: | |
1603 # (1) a class which handles opening of all sorts of URLs | |
1604 # (plus assorted utilities etc.) | |
1605 # (2) a set of functions for parsing URLs | |
1606 # XXX Should these be separated out into different modules? | |
1607 | |
1608 | |
1609 ftpcache = {} | |
1610 class URLopener(object): | |
1611 """Class to open URLs. | |
1612 This is a class rather than just a subroutine because we may need | |
1613 more than one set of global protocol-specific options. | |
1614 Note -- this is a base class for those who don't want the | |
1615 automatic handling of errors type 302 (relocated) and 401 | |
1616 (authorization needed).""" | |
1617 | |
1618 __tempfiles = None | |
1619 | |
1620 version = "Python-urllib/%s" % __version__ | |
1621 | |
1622 # Constructor | |
1623 def __init__(self, proxies=None, **x509): | |
1624 msg = "%(class)s style of invoking requests is deprecated. " \ | |
1625 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} | |
1626 warnings.warn(msg, DeprecationWarning, stacklevel=3) | |
1627 if proxies is None: | |
1628 proxies = getproxies() | |
1629 assert hasattr(proxies, 'keys'), "proxies must be a mapping" | |
1630 self.proxies = proxies | |
1631 self.key_file = x509.get('key_file') | |
1632 self.cert_file = x509.get('cert_file') | |
1633 self.addheaders = [('User-Agent', self.version)] | |
1634 self.__tempfiles = [] | |
1635 self.__unlink = os.unlink # See cleanup() | |
1636 self.tempcache = None | |
1637 # Undocumented feature: if you assign {} to tempcache, | |
1638 # it is used to cache files retrieved with | |
1639 # self.retrieve(). This is not enabled by default | |
1640 # since it does not work for changing documents (and I | |
1641 # haven't got the logic to check expiration headers | |
1642 # yet). | |
1643 self.ftpcache = ftpcache | |
1644 # Undocumented feature: you can use a different | |
1645 # ftp cache by assigning to the .ftpcache member; | |
1646 # in case you want logically independent URL openers | |
1647 # XXX This is not threadsafe. Bah. | |
1648 | |
1649 def __del__(self): | |
1650 self.close() | |
1651 | |
1652 def close(self): | |
1653 self.cleanup() | |
1654 | |
1655 def cleanup(self): | |
1656 # This code sometimes runs when the rest of this module | |
1657 # has already been deleted, so it can't use any globals | |
1658 # or import anything. | |
1659 if self.__tempfiles: | |
1660 for file in self.__tempfiles: | |
1661 try: | |
1662 self.__unlink(file) | |
1663 except OSError: | |
1664 pass | |
1665 del self.__tempfiles[:] | |
1666 if self.tempcache: | |
1667 self.tempcache.clear() | |
1668 | |
1669 def addheader(self, *args): | |
1670 """Add a header to be used by the HTTP interface only | |
1671 e.g. u.addheader('Accept', 'sound/basic')""" | |
1672 self.addheaders.append(args) | |
1673 | |
1674 # External interface | |
1675 def open(self, fullurl, data=None): | |
1676 """Use URLopener().open(file) instead of open(file, 'r').""" | |
1677 fullurl = unwrap(to_bytes(fullurl)) | |
1678 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") | |
1679 if self.tempcache and fullurl in self.tempcache: | |
1680 filename, headers = self.tempcache[fullurl] | |
1681 fp = open(filename, 'rb') | |
1682 return addinfourl(fp, headers, fullurl) | |
1683 urltype, url = splittype(fullurl) | |
1684 if not urltype: | |
1685 urltype = 'file' | |
1686 if urltype in self.proxies: | |
1687 proxy = self.proxies[urltype] | |
1688 urltype, proxyhost = splittype(proxy) | |
1689 host, selector = splithost(proxyhost) | |
1690 url = (host, fullurl) # Signal special case to open_*() | |
1691 else: | |
1692 proxy = None | |
1693 name = 'open_' + urltype | |
1694 self.type = urltype | |
1695 name = name.replace('-', '_') | |
1696 if not hasattr(self, name): | |
1697 if proxy: | |
1698 return self.open_unknown_proxy(proxy, fullurl, data) | |
1699 else: | |
1700 return self.open_unknown(fullurl, data) | |
1701 try: | |
1702 if data is None: | |
1703 return getattr(self, name)(url) | |
1704 else: | |
1705 return getattr(self, name)(url, data) | |
1706 except HTTPError: | |
1707 raise | |
1708 except socket.error as msg: | |
1709 raise_with_traceback(IOError('socket error', msg)) | |
1710 | |
1711 def open_unknown(self, fullurl, data=None): | |
1712 """Overridable interface to open unknown URL type.""" | |
1713 type, url = splittype(fullurl) | |
1714 raise IOError('url error', 'unknown url type', type) | |
1715 | |
1716 def open_unknown_proxy(self, proxy, fullurl, data=None): | |
1717 """Overridable interface to open unknown URL type.""" | |
1718 type, url = splittype(fullurl) | |
1719 raise IOError('url error', 'invalid proxy for %s' % type, proxy) | |
1720 | |
1721 # External interface | |
1722 def retrieve(self, url, filename=None, reporthook=None, data=None): | |
1723 """retrieve(url) returns (filename, headers) for a local object | |
1724 or (tempfilename, headers) for a remote object.""" | |
1725 url = unwrap(to_bytes(url)) | |
1726 if self.tempcache and url in self.tempcache: | |
1727 return self.tempcache[url] | |
1728 type, url1 = splittype(url) | |
1729 if filename is None and (not type or type == 'file'): | |
1730 try: | |
1731 fp = self.open_local_file(url1) | |
1732 hdrs = fp.info() | |
1733 fp.close() | |
1734 return url2pathname(splithost(url1)[1]), hdrs | |
1735 except IOError as msg: | |
1736 pass | |
1737 fp = self.open(url, data) | |
1738 try: | |
1739 headers = fp.info() | |
1740 if filename: | |
1741 tfp = open(filename, 'wb') | |
1742 else: | |
1743 import tempfile | |
1744 garbage, path = splittype(url) | |
1745 garbage, path = splithost(path or "") | |
1746 path, garbage = splitquery(path or "") | |
1747 path, garbage = splitattr(path or "") | |
1748 suffix = os.path.splitext(path)[1] | |
1749 (fd, filename) = tempfile.mkstemp(suffix) | |
1750 self.__tempfiles.append(filename) | |
1751 tfp = os.fdopen(fd, 'wb') | |
1752 try: | |
1753 result = filename, headers | |
1754 if self.tempcache is not None: | |
1755 self.tempcache[url] = result | |
1756 bs = 1024*8 | |
1757 size = -1 | |
1758 read = 0 | |
1759 blocknum = 0 | |
1760 if "content-length" in headers: | |
1761 size = int(headers["Content-Length"]) | |
1762 if reporthook: | |
1763 reporthook(blocknum, bs, size) | |
1764 while 1: | |
1765 block = fp.read(bs) | |
1766 if not block: | |
1767 break | |
1768 read += len(block) | |
1769 tfp.write(block) | |
1770 blocknum += 1 | |
1771 if reporthook: | |
1772 reporthook(blocknum, bs, size) | |
1773 finally: | |
1774 tfp.close() | |
1775 finally: | |
1776 fp.close() | |
1777 | |
1778 # raise exception if actual size does not match content-length header | |
1779 if size >= 0 and read < size: | |
1780 raise ContentTooShortError( | |
1781 "retrieval incomplete: got only %i out of %i bytes" | |
1782 % (read, size), result) | |
1783 | |
1784 return result | |
1785 | |
1786 # Each method named open_<type> knows how to open that type of URL | |
1787 | |
1788 def _open_generic_http(self, connection_factory, url, data): | |
1789 """Make an HTTP connection using connection_class. | |
1790 | |
1791 This is an internal method that should be called from | |
1792 open_http() or open_https(). | |
1793 | |
1794 Arguments: | |
1795 - connection_factory should take a host name and return an | |
1796 HTTPConnection instance. | |
1797 - url is the url to retrieval or a host, relative-path pair. | |
1798 - data is payload for a POST request or None. | |
1799 """ | |
1800 | |
1801 user_passwd = None | |
1802 proxy_passwd= None | |
1803 if isinstance(url, str): | |
1804 host, selector = splithost(url) | |
1805 if host: | |
1806 user_passwd, host = splituser(host) | |
1807 host = unquote(host) | |
1808 realhost = host | |
1809 else: | |
1810 host, selector = url | |
1811 # check whether the proxy contains authorization information | |
1812 proxy_passwd, host = splituser(host) | |
1813 # now we proceed with the url we want to obtain | |
1814 urltype, rest = splittype(selector) | |
1815 url = rest | |
1816 user_passwd = None | |
1817 if urltype.lower() != 'http': | |
1818 realhost = None | |
1819 else: | |
1820 realhost, rest = splithost(rest) | |
1821 if realhost: | |
1822 user_passwd, realhost = splituser(realhost) | |
1823 if user_passwd: | |
1824 selector = "%s://%s%s" % (urltype, realhost, rest) | |
1825 if proxy_bypass(realhost): | |
1826 host = realhost | |
1827 | |
1828 if not host: raise IOError('http error', 'no host given') | |
1829 | |
1830 if proxy_passwd: | |
1831 proxy_passwd = unquote(proxy_passwd) | |
1832 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') | |
1833 else: | |
1834 proxy_auth = None | |
1835 | |
1836 if user_passwd: | |
1837 user_passwd = unquote(user_passwd) | |
1838 auth = base64.b64encode(user_passwd.encode()).decode('ascii') | |
1839 else: | |
1840 auth = None | |
1841 http_conn = connection_factory(host) | |
1842 headers = {} | |
1843 if proxy_auth: | |
1844 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth | |
1845 if auth: | |
1846 headers["Authorization"] = "Basic %s" % auth | |
1847 if realhost: | |
1848 headers["Host"] = realhost | |
1849 | |
1850 # Add Connection:close as we don't support persistent connections yet. | |
1851 # This helps in closing the socket and avoiding ResourceWarning | |
1852 | |
1853 headers["Connection"] = "close" | |
1854 | |
1855 for header, value in self.addheaders: | |
1856 headers[header] = value | |
1857 | |
1858 if data is not None: | |
1859 headers["Content-Type"] = "application/x-www-form-urlencoded" | |
1860 http_conn.request("POST", selector, data, headers) | |
1861 else: | |
1862 http_conn.request("GET", selector, headers=headers) | |
1863 | |
1864 try: | |
1865 response = http_conn.getresponse() | |
1866 except http_client.BadStatusLine: | |
1867 # something went wrong with the HTTP status line | |
1868 raise URLError("http protocol error: bad status line") | |
1869 | |
1870 # According to RFC 2616, "2xx" code indicates that the client's | |
1871 # request was successfully received, understood, and accepted. | |
1872 if 200 <= response.status < 300: | |
1873 return addinfourl(response, response.msg, "http:" + url, | |
1874 response.status) | |
1875 else: | |
1876 return self.http_error( | |
1877 url, response.fp, | |
1878 response.status, response.reason, response.msg, data) | |
1879 | |
1880 def open_http(self, url, data=None): | |
1881 """Use HTTP protocol.""" | |
1882 return self._open_generic_http(http_client.HTTPConnection, url, data) | |
1883 | |
1884 def http_error(self, url, fp, errcode, errmsg, headers, data=None): | |
1885 """Handle http errors. | |
1886 | |
1887 Derived class can override this, or provide specific handlers | |
1888 named http_error_DDD where DDD is the 3-digit error code.""" | |
1889 # First check if there's a specific handler for this error | |
1890 name = 'http_error_%d' % errcode | |
1891 if hasattr(self, name): | |
1892 method = getattr(self, name) | |
1893 if data is None: | |
1894 result = method(url, fp, errcode, errmsg, headers) | |
1895 else: | |
1896 result = method(url, fp, errcode, errmsg, headers, data) | |
1897 if result: return result | |
1898 return self.http_error_default(url, fp, errcode, errmsg, headers) | |
1899 | |
1900 def http_error_default(self, url, fp, errcode, errmsg, headers): | |
1901 """Default error handler: close the connection and raise IOError.""" | |
1902 fp.close() | |
1903 raise HTTPError(url, errcode, errmsg, headers, None) | |
1904 | |
1905 if _have_ssl: | |
1906 def _https_connection(self, host): | |
1907 return http_client.HTTPSConnection(host, | |
1908 key_file=self.key_file, | |
1909 cert_file=self.cert_file) | |
1910 | |
1911 def open_https(self, url, data=None): | |
1912 """Use HTTPS protocol.""" | |
1913 return self._open_generic_http(self._https_connection, url, data) | |
1914 | |
1915 def open_file(self, url): | |
1916 """Use local file or FTP depending on form of URL.""" | |
1917 if not isinstance(url, str): | |
1918 raise URLError('file error: proxy support for file protocol currently not implemented') | |
1919 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': | |
1920 raise ValueError("file:// scheme is supported only on localhost") | |
1921 else: | |
1922 return self.open_local_file(url) | |
1923 | |
1924 def open_local_file(self, url): | |
1925 """Use local file.""" | |
1926 import future.backports.email.utils as email_utils | |
1927 import mimetypes | |
1928 host, file = splithost(url) | |
1929 localname = url2pathname(file) | |
1930 try: | |
1931 stats = os.stat(localname) | |
1932 except OSError as e: | |
1933 raise URLError(e.strerror, e.filename) | |
1934 size = stats.st_size | |
1935 modified = email_utils.formatdate(stats.st_mtime, usegmt=True) | |
1936 mtype = mimetypes.guess_type(url)[0] | |
1937 headers = email.message_from_string( | |
1938 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % | |
1939 (mtype or 'text/plain', size, modified)) | |
1940 if not host: | |
1941 urlfile = file | |
1942 if file[:1] == '/': | |
1943 urlfile = 'file://' + file | |
1944 return addinfourl(open(localname, 'rb'), headers, urlfile) | |
1945 host, port = splitport(host) | |
1946 if (not port | |
1947 and socket.gethostbyname(host) in ((localhost(),) + thishost())): | |
1948 urlfile = file | |
1949 if file[:1] == '/': | |
1950 urlfile = 'file://' + file | |
1951 elif file[:2] == './': | |
1952 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) | |
1953 return addinfourl(open(localname, 'rb'), headers, urlfile) | |
1954 raise URLError('local file error: not on local host') | |
1955 | |
1956 def open_ftp(self, url): | |
1957 """Use FTP protocol.""" | |
1958 if not isinstance(url, str): | |
1959 raise URLError('ftp error: proxy support for ftp protocol currently not implemented') | |
1960 import mimetypes | |
1961 host, path = splithost(url) | |
1962 if not host: raise URLError('ftp error: no host given') | |
1963 host, port = splitport(host) | |
1964 user, host = splituser(host) | |
1965 if user: user, passwd = splitpasswd(user) | |
1966 else: passwd = None | |
1967 host = unquote(host) | |
1968 user = unquote(user or '') | |
1969 passwd = unquote(passwd or '') | |
1970 host = socket.gethostbyname(host) | |
1971 if not port: | |
1972 import ftplib | |
1973 port = ftplib.FTP_PORT | |
1974 else: | |
1975 port = int(port) | |
1976 path, attrs = splitattr(path) | |
1977 path = unquote(path) | |
1978 dirs = path.split('/') | |
1979 dirs, file = dirs[:-1], dirs[-1] | |
1980 if dirs and not dirs[0]: dirs = dirs[1:] | |
1981 if dirs and not dirs[0]: dirs[0] = '/' | |
1982 key = user, host, port, '/'.join(dirs) | |
1983 # XXX thread unsafe! | |
1984 if len(self.ftpcache) > MAXFTPCACHE: | |
1985 # Prune the cache, rather arbitrarily | |
1986 for k in self.ftpcache.keys(): | |
1987 if k != key: | |
1988 v = self.ftpcache[k] | |
1989 del self.ftpcache[k] | |
1990 v.close() | |
1991 try: | |
1992 if key not in self.ftpcache: | |
1993 self.ftpcache[key] = \ | |
1994 ftpwrapper(user, passwd, host, port, dirs) | |
1995 if not file: type = 'D' | |
1996 else: type = 'I' | |
1997 for attr in attrs: | |
1998 attr, value = splitvalue(attr) | |
1999 if attr.lower() == 'type' and \ | |
2000 value in ('a', 'A', 'i', 'I', 'd', 'D'): | |
2001 type = value.upper() | |
2002 (fp, retrlen) = self.ftpcache[key].retrfile(file, type) | |
2003 mtype = mimetypes.guess_type("ftp:" + url)[0] | |
2004 headers = "" | |
2005 if mtype: | |
2006 headers += "Content-Type: %s\n" % mtype | |
2007 if retrlen is not None and retrlen >= 0: | |
2008 headers += "Content-Length: %d\n" % retrlen | |
2009 headers = email.message_from_string(headers) | |
2010 return addinfourl(fp, headers, "ftp:" + url) | |
2011 except ftperrors() as exp: | |
2012 raise_with_traceback(URLError('ftp error %r' % exp)) | |
2013 | |
2014 def open_data(self, url, data=None): | |
2015 """Use "data" URL.""" | |
2016 if not isinstance(url, str): | |
2017 raise URLError('data error: proxy support for data protocol currently not implemented') | |
2018 # ignore POSTed data | |
2019 # | |
2020 # syntax of data URLs: | |
2021 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data | |
2022 # mediatype := [ type "/" subtype ] *( ";" parameter ) | |
2023 # data := *urlchar | |
2024 # parameter := attribute "=" value | |
2025 try: | |
2026 [type, data] = url.split(',', 1) | |
2027 except ValueError: | |
2028 raise IOError('data error', 'bad data URL') | |
2029 if not type: | |
2030 type = 'text/plain;charset=US-ASCII' | |
2031 semi = type.rfind(';') | |
2032 if semi >= 0 and '=' not in type[semi:]: | |
2033 encoding = type[semi+1:] | |
2034 type = type[:semi] | |
2035 else: | |
2036 encoding = '' | |
2037 msg = [] | |
2038 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', | |
2039 time.gmtime(time.time()))) | |
2040 msg.append('Content-type: %s' % type) | |
2041 if encoding == 'base64': | |
2042 # XXX is this encoding/decoding ok? | |
2043 data = base64.decodebytes(data.encode('ascii')).decode('latin-1') | |
2044 else: | |
2045 data = unquote(data) | |
2046 msg.append('Content-Length: %d' % len(data)) | |
2047 msg.append('') | |
2048 msg.append(data) | |
2049 msg = '\n'.join(msg) | |
2050 headers = email.message_from_string(msg) | |
2051 f = io.StringIO(msg) | |
2052 #f.fileno = None # needed for addinfourl | |
2053 return addinfourl(f, headers, url) | |
2054 | |
2055 | |
2056 class FancyURLopener(URLopener): | |
2057 """Derived class with handlers for errors we can handle (perhaps).""" | |
2058 | |
2059 def __init__(self, *args, **kwargs): | |
2060 URLopener.__init__(self, *args, **kwargs) | |
2061 self.auth_cache = {} | |
2062 self.tries = 0 | |
2063 self.maxtries = 10 | |
2064 | |
2065 def http_error_default(self, url, fp, errcode, errmsg, headers): | |
2066 """Default error handling -- don't raise an exception.""" | |
2067 return addinfourl(fp, headers, "http:" + url, errcode) | |
2068 | |
2069 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): | |
2070 """Error 302 -- relocated (temporarily).""" | |
2071 self.tries += 1 | |
2072 if self.maxtries and self.tries >= self.maxtries: | |
2073 if hasattr(self, "http_error_500"): | |
2074 meth = self.http_error_500 | |
2075 else: | |
2076 meth = self.http_error_default | |
2077 self.tries = 0 | |
2078 return meth(url, fp, 500, | |
2079 "Internal Server Error: Redirect Recursion", headers) | |
2080 result = self.redirect_internal(url, fp, errcode, errmsg, headers, | |
2081 data) | |
2082 self.tries = 0 | |
2083 return result | |
2084 | |
2085 def redirect_internal(self, url, fp, errcode, errmsg, headers, data): | |
2086 if 'location' in headers: | |
2087 newurl = headers['location'] | |
2088 elif 'uri' in headers: | |
2089 newurl = headers['uri'] | |
2090 else: | |
2091 return | |
2092 fp.close() | |
2093 | |
2094 # In case the server sent a relative URL, join with original: | |
2095 newurl = urljoin(self.type + ":" + url, newurl) | |
2096 | |
2097 urlparts = urlparse(newurl) | |
2098 | |
2099 # For security reasons, we don't allow redirection to anything other | |
2100 # than http, https and ftp. | |
2101 | |
2102 # We are using newer HTTPError with older redirect_internal method | |
2103 # This older method will get deprecated in 3.3 | |
2104 | |
2105 if urlparts.scheme not in ('http', 'https', 'ftp', ''): | |
2106 raise HTTPError(newurl, errcode, | |
2107 errmsg + | |
2108 " Redirection to url '%s' is not allowed." % newurl, | |
2109 headers, fp) | |
2110 | |
2111 return self.open(newurl) | |
2112 | |
2113 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): | |
2114 """Error 301 -- also relocated (permanently).""" | |
2115 return self.http_error_302(url, fp, errcode, errmsg, headers, data) | |
2116 | |
2117 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): | |
2118 """Error 303 -- also relocated (essentially identical to 302).""" | |
2119 return self.http_error_302(url, fp, errcode, errmsg, headers, data) | |
2120 | |
2121 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): | |
2122 """Error 307 -- relocated, but turn POST into error.""" | |
2123 if data is None: | |
2124 return self.http_error_302(url, fp, errcode, errmsg, headers, data) | |
2125 else: | |
2126 return self.http_error_default(url, fp, errcode, errmsg, headers) | |
2127 | |
2128 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None, | |
2129 retry=False): | |
2130 """Error 401 -- authentication required. | |
2131 This function supports Basic authentication only.""" | |
2132 if 'www-authenticate' not in headers: | |
2133 URLopener.http_error_default(self, url, fp, | |
2134 errcode, errmsg, headers) | |
2135 stuff = headers['www-authenticate'] | |
2136 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) | |
2137 if not match: | |
2138 URLopener.http_error_default(self, url, fp, | |
2139 errcode, errmsg, headers) | |
2140 scheme, realm = match.groups() | |
2141 if scheme.lower() != 'basic': | |
2142 URLopener.http_error_default(self, url, fp, | |
2143 errcode, errmsg, headers) | |
2144 if not retry: | |
2145 URLopener.http_error_default(self, url, fp, errcode, errmsg, | |
2146 headers) | |
2147 name = 'retry_' + self.type + '_basic_auth' | |
2148 if data is None: | |
2149 return getattr(self,name)(url, realm) | |
2150 else: | |
2151 return getattr(self,name)(url, realm, data) | |
2152 | |
2153 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None, | |
2154 retry=False): | |
2155 """Error 407 -- proxy authentication required. | |
2156 This function supports Basic authentication only.""" | |
2157 if 'proxy-authenticate' not in headers: | |
2158 URLopener.http_error_default(self, url, fp, | |
2159 errcode, errmsg, headers) | |
2160 stuff = headers['proxy-authenticate'] | |
2161 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) | |
2162 if not match: | |
2163 URLopener.http_error_default(self, url, fp, | |
2164 errcode, errmsg, headers) | |
2165 scheme, realm = match.groups() | |
2166 if scheme.lower() != 'basic': | |
2167 URLopener.http_error_default(self, url, fp, | |
2168 errcode, errmsg, headers) | |
2169 if not retry: | |
2170 URLopener.http_error_default(self, url, fp, errcode, errmsg, | |
2171 headers) | |
2172 name = 'retry_proxy_' + self.type + '_basic_auth' | |
2173 if data is None: | |
2174 return getattr(self,name)(url, realm) | |
2175 else: | |
2176 return getattr(self,name)(url, realm, data) | |
2177 | |
2178 def retry_proxy_http_basic_auth(self, url, realm, data=None): | |
2179 host, selector = splithost(url) | |
2180 newurl = 'http://' + host + selector | |
2181 proxy = self.proxies['http'] | |
2182 urltype, proxyhost = splittype(proxy) | |
2183 proxyhost, proxyselector = splithost(proxyhost) | |
2184 i = proxyhost.find('@') + 1 | |
2185 proxyhost = proxyhost[i:] | |
2186 user, passwd = self.get_user_passwd(proxyhost, realm, i) | |
2187 if not (user or passwd): return None | |
2188 proxyhost = "%s:%s@%s" % (quote(user, safe=''), | |
2189 quote(passwd, safe=''), proxyhost) | |
2190 self.proxies['http'] = 'http://' + proxyhost + proxyselector | |
2191 if data is None: | |
2192 return self.open(newurl) | |
2193 else: | |
2194 return self.open(newurl, data) | |
2195 | |
2196 def retry_proxy_https_basic_auth(self, url, realm, data=None): | |
2197 host, selector = splithost(url) | |
2198 newurl = 'https://' + host + selector | |
2199 proxy = self.proxies['https'] | |
2200 urltype, proxyhost = splittype(proxy) | |
2201 proxyhost, proxyselector = splithost(proxyhost) | |
2202 i = proxyhost.find('@') + 1 | |
2203 proxyhost = proxyhost[i:] | |
2204 user, passwd = self.get_user_passwd(proxyhost, realm, i) | |
2205 if not (user or passwd): return None | |
2206 proxyhost = "%s:%s@%s" % (quote(user, safe=''), | |
2207 quote(passwd, safe=''), proxyhost) | |
2208 self.proxies['https'] = 'https://' + proxyhost + proxyselector | |
2209 if data is None: | |
2210 return self.open(newurl) | |
2211 else: | |
2212 return self.open(newurl, data) | |
2213 | |
2214 def retry_http_basic_auth(self, url, realm, data=None): | |
2215 host, selector = splithost(url) | |
2216 i = host.find('@') + 1 | |
2217 host = host[i:] | |
2218 user, passwd = self.get_user_passwd(host, realm, i) | |
2219 if not (user or passwd): return None | |
2220 host = "%s:%s@%s" % (quote(user, safe=''), | |
2221 quote(passwd, safe=''), host) | |
2222 newurl = 'http://' + host + selector | |
2223 if data is None: | |
2224 return self.open(newurl) | |
2225 else: | |
2226 return self.open(newurl, data) | |
2227 | |
2228 def retry_https_basic_auth(self, url, realm, data=None): | |
2229 host, selector = splithost(url) | |
2230 i = host.find('@') + 1 | |
2231 host = host[i:] | |
2232 user, passwd = self.get_user_passwd(host, realm, i) | |
2233 if not (user or passwd): return None | |
2234 host = "%s:%s@%s" % (quote(user, safe=''), | |
2235 quote(passwd, safe=''), host) | |
2236 newurl = 'https://' + host + selector | |
2237 if data is None: | |
2238 return self.open(newurl) | |
2239 else: | |
2240 return self.open(newurl, data) | |
2241 | |
2242 def get_user_passwd(self, host, realm, clear_cache=0): | |
2243 key = realm + '@' + host.lower() | |
2244 if key in self.auth_cache: | |
2245 if clear_cache: | |
2246 del self.auth_cache[key] | |
2247 else: | |
2248 return self.auth_cache[key] | |
2249 user, passwd = self.prompt_user_passwd(host, realm) | |
2250 if user or passwd: self.auth_cache[key] = (user, passwd) | |
2251 return user, passwd | |
2252 | |
2253 def prompt_user_passwd(self, host, realm): | |
2254 """Override this in a GUI environment!""" | |
2255 import getpass | |
2256 try: | |
2257 user = input("Enter username for %s at %s: " % (realm, host)) | |
2258 passwd = getpass.getpass("Enter password for %s in %s at %s: " % | |
2259 (user, realm, host)) | |
2260 return user, passwd | |
2261 except KeyboardInterrupt: | |
2262 print() | |
2263 return None, None | |
2264 | |
2265 | |
2266 # Utility functions | |
2267 | |
2268 _localhost = None | |
2269 def localhost(): | |
2270 """Return the IP address of the magic hostname 'localhost'.""" | |
2271 global _localhost | |
2272 if _localhost is None: | |
2273 _localhost = socket.gethostbyname('localhost') | |
2274 return _localhost | |
2275 | |
2276 _thishost = None | |
2277 def thishost(): | |
2278 """Return the IP addresses of the current host.""" | |
2279 global _thishost | |
2280 if _thishost is None: | |
2281 try: | |
2282 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2]) | |
2283 except socket.gaierror: | |
2284 _thishost = tuple(socket.gethostbyname_ex('localhost')[2]) | |
2285 return _thishost | |
2286 | |
2287 _ftperrors = None | |
2288 def ftperrors(): | |
2289 """Return the set of errors raised by the FTP class.""" | |
2290 global _ftperrors | |
2291 if _ftperrors is None: | |
2292 import ftplib | |
2293 _ftperrors = ftplib.all_errors | |
2294 return _ftperrors | |
2295 | |
2296 _noheaders = None | |
2297 def noheaders(): | |
2298 """Return an empty email Message object.""" | |
2299 global _noheaders | |
2300 if _noheaders is None: | |
2301 _noheaders = email.message_from_string("") | |
2302 return _noheaders | |
2303 | |
2304 | |
2305 # Utility classes | |
2306 | |
2307 class ftpwrapper(object): | |
2308 """Class used by open_ftp() for cache of open FTP connections.""" | |
2309 | |
2310 def __init__(self, user, passwd, host, port, dirs, timeout=None, | |
2311 persistent=True): | |
2312 self.user = user | |
2313 self.passwd = passwd | |
2314 self.host = host | |
2315 self.port = port | |
2316 self.dirs = dirs | |
2317 self.timeout = timeout | |
2318 self.refcount = 0 | |
2319 self.keepalive = persistent | |
2320 self.init() | |
2321 | |
2322 def init(self): | |
2323 import ftplib | |
2324 self.busy = 0 | |
2325 self.ftp = ftplib.FTP() | |
2326 self.ftp.connect(self.host, self.port, self.timeout) | |
2327 self.ftp.login(self.user, self.passwd) | |
2328 _target = '/'.join(self.dirs) | |
2329 self.ftp.cwd(_target) | |
2330 | |
2331 def retrfile(self, file, type): | |
2332 import ftplib | |
2333 self.endtransfer() | |
2334 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 | |
2335 else: cmd = 'TYPE ' + type; isdir = 0 | |
2336 try: | |
2337 self.ftp.voidcmd(cmd) | |
2338 except ftplib.all_errors: | |
2339 self.init() | |
2340 self.ftp.voidcmd(cmd) | |
2341 conn = None | |
2342 if file and not isdir: | |
2343 # Try to retrieve as a file | |
2344 try: | |
2345 cmd = 'RETR ' + file | |
2346 conn, retrlen = self.ftp.ntransfercmd(cmd) | |
2347 except ftplib.error_perm as reason: | |
2348 if str(reason)[:3] != '550': | |
2349 raise_with_traceback(URLError('ftp error: %r' % reason)) | |
2350 if not conn: | |
2351 # Set transfer mode to ASCII! | |
2352 self.ftp.voidcmd('TYPE A') | |
2353 # Try a directory listing. Verify that directory exists. | |
2354 if file: | |
2355 pwd = self.ftp.pwd() | |
2356 try: | |
2357 try: | |
2358 self.ftp.cwd(file) | |
2359 except ftplib.error_perm as reason: | |
2360 ### Was: | |
2361 # raise URLError('ftp error: %r' % reason) from reason | |
2362 exc = URLError('ftp error: %r' % reason) | |
2363 exc.__cause__ = reason | |
2364 raise exc | |
2365 finally: | |
2366 self.ftp.cwd(pwd) | |
2367 cmd = 'LIST ' + file | |
2368 else: | |
2369 cmd = 'LIST' | |
2370 conn, retrlen = self.ftp.ntransfercmd(cmd) | |
2371 self.busy = 1 | |
2372 | |
2373 ftpobj = addclosehook(conn.makefile('rb'), self.file_close) | |
2374 self.refcount += 1 | |
2375 conn.close() | |
2376 # Pass back both a suitably decorated object and a retrieval length | |
2377 return (ftpobj, retrlen) | |
2378 | |
2379 def endtransfer(self): | |
2380 self.busy = 0 | |
2381 | |
2382 def close(self): | |
2383 self.keepalive = False | |
2384 if self.refcount <= 0: | |
2385 self.real_close() | |
2386 | |
2387 def file_close(self): | |
2388 self.endtransfer() | |
2389 self.refcount -= 1 | |
2390 if self.refcount <= 0 and not self.keepalive: | |
2391 self.real_close() | |
2392 | |
2393 def real_close(self): | |
2394 self.endtransfer() | |
2395 try: | |
2396 self.ftp.close() | |
2397 except ftperrors(): | |
2398 pass | |
2399 | |
2400 # Proxy handling | |
2401 def getproxies_environment(): | |
2402 """Return a dictionary of scheme -> proxy server URL mappings. | |
2403 | |
2404 Scan the environment for variables named <scheme>_proxy; | |
2405 this seems to be the standard convention. If you need a | |
2406 different way, you can pass a proxies dictionary to the | |
2407 [Fancy]URLopener constructor. | |
2408 | |
2409 """ | |
2410 proxies = {} | |
2411 for name, value in os.environ.items(): | |
2412 name = name.lower() | |
2413 if value and name[-6:] == '_proxy': | |
2414 proxies[name[:-6]] = value | |
2415 return proxies | |
2416 | |
2417 def proxy_bypass_environment(host): | |
2418 """Test if proxies should not be used for a particular host. | |
2419 | |
2420 Checks the environment for a variable named no_proxy, which should | |
2421 be a list of DNS suffixes separated by commas, or '*' for all hosts. | |
2422 """ | |
2423 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') | |
2424 # '*' is special case for always bypass | |
2425 if no_proxy == '*': | |
2426 return 1 | |
2427 # strip port off host | |
2428 hostonly, port = splitport(host) | |
2429 # check if the host ends with any of the DNS suffixes | |
2430 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] | |
2431 for name in no_proxy_list: | |
2432 if name and (hostonly.endswith(name) or host.endswith(name)): | |
2433 return 1 | |
2434 # otherwise, don't bypass | |
2435 return 0 | |
2436 | |
2437 | |
2438 # This code tests an OSX specific data structure but is testable on all | |
2439 # platforms | |
2440 def _proxy_bypass_macosx_sysconf(host, proxy_settings): | |
2441 """ | |
2442 Return True iff this host shouldn't be accessed using a proxy | |
2443 | |
2444 This function uses the MacOSX framework SystemConfiguration | |
2445 to fetch the proxy information. | |
2446 | |
2447 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie: | |
2448 { 'exclude_simple': bool, | |
2449 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16'] | |
2450 } | |
2451 """ | |
2452 from fnmatch import fnmatch | |
2453 | |
2454 hostonly, port = splitport(host) | |
2455 | |
2456 def ip2num(ipAddr): | |
2457 parts = ipAddr.split('.') | |
2458 parts = list(map(int, parts)) | |
2459 if len(parts) != 4: | |
2460 parts = (parts + [0, 0, 0, 0])[:4] | |
2461 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] | |
2462 | |
2463 # Check for simple host names: | |
2464 if '.' not in host: | |
2465 if proxy_settings['exclude_simple']: | |
2466 return True | |
2467 | |
2468 hostIP = None | |
2469 | |
2470 for value in proxy_settings.get('exceptions', ()): | |
2471 # Items in the list are strings like these: *.local, 169.254/16 | |
2472 if not value: continue | |
2473 | |
2474 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) | |
2475 if m is not None: | |
2476 if hostIP is None: | |
2477 try: | |
2478 hostIP = socket.gethostbyname(hostonly) | |
2479 hostIP = ip2num(hostIP) | |
2480 except socket.error: | |
2481 continue | |
2482 | |
2483 base = ip2num(m.group(1)) | |
2484 mask = m.group(2) | |
2485 if mask is None: | |
2486 mask = 8 * (m.group(1).count('.') + 1) | |
2487 else: | |
2488 mask = int(mask[1:]) | |
2489 mask = 32 - mask | |
2490 | |
2491 if (hostIP >> mask) == (base >> mask): | |
2492 return True | |
2493 | |
2494 elif fnmatch(host, value): | |
2495 return True | |
2496 | |
2497 return False | |
2498 | |
2499 | |
2500 if sys.platform == 'darwin': | |
2501 from _scproxy import _get_proxy_settings, _get_proxies | |
2502 | |
2503 def proxy_bypass_macosx_sysconf(host): | |
2504 proxy_settings = _get_proxy_settings() | |
2505 return _proxy_bypass_macosx_sysconf(host, proxy_settings) | |
2506 | |
2507 def getproxies_macosx_sysconf(): | |
2508 """Return a dictionary of scheme -> proxy server URL mappings. | |
2509 | |
2510 This function uses the MacOSX framework SystemConfiguration | |
2511 to fetch the proxy information. | |
2512 """ | |
2513 return _get_proxies() | |
2514 | |
2515 | |
2516 | |
2517 def proxy_bypass(host): | |
2518 if getproxies_environment(): | |
2519 return proxy_bypass_environment(host) | |
2520 else: | |
2521 return proxy_bypass_macosx_sysconf(host) | |
2522 | |
2523 def getproxies(): | |
2524 return getproxies_environment() or getproxies_macosx_sysconf() | |
2525 | |
2526 | |
2527 elif os.name == 'nt': | |
2528 def getproxies_registry(): | |
2529 """Return a dictionary of scheme -> proxy server URL mappings. | |
2530 | |
2531 Win32 uses the registry to store proxies. | |
2532 | |
2533 """ | |
2534 proxies = {} | |
2535 try: | |
2536 import winreg | |
2537 except ImportError: | |
2538 # Std module, so should be around - but you never know! | |
2539 return proxies | |
2540 try: | |
2541 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, | |
2542 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') | |
2543 proxyEnable = winreg.QueryValueEx(internetSettings, | |
2544 'ProxyEnable')[0] | |
2545 if proxyEnable: | |
2546 # Returned as Unicode but problems if not converted to ASCII | |
2547 proxyServer = str(winreg.QueryValueEx(internetSettings, | |
2548 'ProxyServer')[0]) | |
2549 if '=' in proxyServer: | |
2550 # Per-protocol settings | |
2551 for p in proxyServer.split(';'): | |
2552 protocol, address = p.split('=', 1) | |
2553 # See if address has a type:// prefix | |
2554 if not re.match('^([^/:]+)://', address): | |
2555 address = '%s://%s' % (protocol, address) | |
2556 proxies[protocol] = address | |
2557 else: | |
2558 # Use one setting for all protocols | |
2559 if proxyServer[:5] == 'http:': | |
2560 proxies['http'] = proxyServer | |
2561 else: | |
2562 proxies['http'] = 'http://%s' % proxyServer | |
2563 proxies['https'] = 'https://%s' % proxyServer | |
2564 proxies['ftp'] = 'ftp://%s' % proxyServer | |
2565 internetSettings.Close() | |
2566 except (WindowsError, ValueError, TypeError): | |
2567 # Either registry key not found etc, or the value in an | |
2568 # unexpected format. | |
2569 # proxies already set up to be empty so nothing to do | |
2570 pass | |
2571 return proxies | |
2572 | |
2573 def getproxies(): | |
2574 """Return a dictionary of scheme -> proxy server URL mappings. | |
2575 | |
2576 Returns settings gathered from the environment, if specified, | |
2577 or the registry. | |
2578 | |
2579 """ | |
2580 return getproxies_environment() or getproxies_registry() | |
2581 | |
2582 def proxy_bypass_registry(host): | |
2583 try: | |
2584 import winreg | |
2585 except ImportError: | |
2586 # Std modules, so should be around - but you never know! | |
2587 return 0 | |
2588 try: | |
2589 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, | |
2590 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') | |
2591 proxyEnable = winreg.QueryValueEx(internetSettings, | |
2592 'ProxyEnable')[0] | |
2593 proxyOverride = str(winreg.QueryValueEx(internetSettings, | |
2594 'ProxyOverride')[0]) | |
2595 # ^^^^ Returned as Unicode but problems if not converted to ASCII | |
2596 except WindowsError: | |
2597 return 0 | |
2598 if not proxyEnable or not proxyOverride: | |
2599 return 0 | |
2600 # try to make a host list from name and IP address. | |
2601 rawHost, port = splitport(host) | |
2602 host = [rawHost] | |
2603 try: | |
2604 addr = socket.gethostbyname(rawHost) | |
2605 if addr != rawHost: | |
2606 host.append(addr) | |
2607 except socket.error: | |
2608 pass | |
2609 try: | |
2610 fqdn = socket.getfqdn(rawHost) | |
2611 if fqdn != rawHost: | |
2612 host.append(fqdn) | |
2613 except socket.error: | |
2614 pass | |
2615 # make a check value list from the registry entry: replace the | |
2616 # '<local>' string by the localhost entry and the corresponding | |
2617 # canonical entry. | |
2618 proxyOverride = proxyOverride.split(';') | |
2619 # now check if we match one of the registry values. | |
2620 for test in proxyOverride: | |
2621 if test == '<local>': | |
2622 if '.' not in rawHost: | |
2623 return 1 | |
2624 test = test.replace(".", r"\.") # mask dots | |
2625 test = test.replace("*", r".*") # change glob sequence | |
2626 test = test.replace("?", r".") # change glob char | |
2627 for val in host: | |
2628 if re.match(test, val, re.I): | |
2629 return 1 | |
2630 return 0 | |
2631 | |
2632 def proxy_bypass(host): | |
2633 """Return a dictionary of scheme -> proxy server URL mappings. | |
2634 | |
2635 Returns settings gathered from the environment, if specified, | |
2636 or the registry. | |
2637 | |
2638 """ | |
2639 if getproxies_environment(): | |
2640 return proxy_bypass_environment(host) | |
2641 else: | |
2642 return proxy_bypass_registry(host) | |
2643 | |
2644 else: | |
2645 # By default use environment variables | |
2646 getproxies = getproxies_environment | |
2647 proxy_bypass = proxy_bypass_environment |