Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/future/backports/urllib/request.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:18:57 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d30785e31577 |
|---|---|
| 1 """ | |
| 2 Ported using Python-Future from the Python 3.3 standard library. | |
| 3 | |
| 4 An extensible library for opening URLs using a variety of protocols | |
| 5 | |
| 6 The simplest way to use this module is to call the urlopen function, | |
| 7 which accepts a string containing a URL or a Request object (described | |
| 8 below). It opens the URL and returns the results as file-like | |
| 9 object; the returned object has some extra methods described below. | |
| 10 | |
| 11 The OpenerDirector manages a collection of Handler objects that do | |
| 12 all the actual work. Each Handler implements a particular protocol or | |
| 13 option. The OpenerDirector is a composite object that invokes the | |
| 14 Handlers needed to open the requested URL. For example, the | |
| 15 HTTPHandler performs HTTP GET and POST requests and deals with | |
| 16 non-error returns. The HTTPRedirectHandler automatically deals with | |
| 17 HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler | |
| 18 deals with digest authentication. | |
| 19 | |
| 20 urlopen(url, data=None) -- Basic usage is the same as original | |
| 21 urllib. pass the url and optionally data to post to an HTTP URL, and | |
| 22 get a file-like object back. One difference is that you can also pass | |
| 23 a Request instance instead of URL. Raises a URLError (subclass of | |
| 24 IOError); for HTTP errors, raises an HTTPError, which can also be | |
| 25 treated as a valid response. | |
| 26 | |
| 27 build_opener -- Function that creates a new OpenerDirector instance. | |
| 28 Will install the default handlers. Accepts one or more Handlers as | |
| 29 arguments, either instances or Handler classes that it will | |
| 30 instantiate. If one of the argument is a subclass of the default | |
| 31 handler, the argument will be installed instead of the default. | |
| 32 | |
| 33 install_opener -- Installs a new opener as the default opener. | |
| 34 | |
| 35 objects of interest: | |
| 36 | |
| 37 OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages | |
| 38 the Handler classes, while dealing with requests and responses. | |
| 39 | |
| 40 Request -- An object that encapsulates the state of a request. The | |
| 41 state can be as simple as the URL. It can also include extra HTTP | |
| 42 headers, e.g. a User-Agent. | |
| 43 | |
| 44 BaseHandler -- | |
| 45 | |
| 46 internals: | |
| 47 BaseHandler and parent | |
| 48 _call_chain conventions | |
| 49 | |
| 50 Example usage: | |
| 51 | |
| 52 import urllib.request | |
| 53 | |
| 54 # set up authentication info | |
| 55 authinfo = urllib.request.HTTPBasicAuthHandler() | |
| 56 authinfo.add_password(realm='PDQ Application', | |
| 57 uri='https://mahler:8092/site-updates.py', | |
| 58 user='klem', | |
| 59 passwd='geheim$parole') | |
| 60 | |
| 61 proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"}) | |
| 62 | |
| 63 # build a new opener that adds authentication and caching FTP handlers | |
| 64 opener = urllib.request.build_opener(proxy_support, authinfo, | |
| 65 urllib.request.CacheFTPHandler) | |
| 66 | |
| 67 # install it | |
| 68 urllib.request.install_opener(opener) | |
| 69 | |
| 70 f = urllib.request.urlopen('http://www.python.org/') | |
| 71 """ | |
| 72 | |
| 73 # XXX issues: | |
| 74 # If an authentication error handler that tries to perform | |
| 75 # authentication for some reason but fails, how should the error be | |
| 76 # signalled? The client needs to know the HTTP error code. But if | |
| 77 # the handler knows that the problem was, e.g., that it didn't know | |
| 78 # that hash algo that requested in the challenge, it would be good to | |
| 79 # pass that information along to the client, too. | |
| 80 # ftp errors aren't handled cleanly | |
| 81 # check digest against correct (i.e. non-apache) implementation | |
| 82 | |
| 83 # Possible extensions: | |
| 84 # complex proxies XXX not sure what exactly was meant by this | |
| 85 # abstract factory for opener | |
| 86 | |
| 87 from __future__ import absolute_import, division, print_function, unicode_literals | |
| 88 from future.builtins import bytes, dict, filter, input, int, map, open, str | |
| 89 from future.utils import PY2, PY3, raise_with_traceback | |
| 90 | |
| 91 import base64 | |
| 92 import bisect | |
| 93 import hashlib | |
| 94 import array | |
| 95 | |
| 96 from future.backports import email | |
| 97 from future.backports.http import client as http_client | |
| 98 from .error import URLError, HTTPError, ContentTooShortError | |
| 99 from .parse import ( | |
| 100 urlparse, urlsplit, urljoin, unwrap, quote, unquote, | |
| 101 splittype, splithost, splitport, splituser, splitpasswd, | |
| 102 splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) | |
| 103 from .response import addinfourl, addclosehook | |
| 104 | |
| 105 import io | |
| 106 import os | |
| 107 import posixpath | |
| 108 import re | |
| 109 import socket | |
| 110 import sys | |
| 111 import time | |
| 112 import tempfile | |
| 113 import contextlib | |
| 114 import warnings | |
| 115 | |
| 116 from future.utils import PY2 | |
| 117 | |
| 118 if PY2: | |
| 119 from collections import Iterable | |
| 120 else: | |
| 121 from collections.abc import Iterable | |
| 122 | |
| 123 # check for SSL | |
| 124 try: | |
| 125 import ssl | |
| 126 # Not available in the SSL module in Py2: | |
| 127 from ssl import SSLContext | |
| 128 except ImportError: | |
| 129 _have_ssl = False | |
| 130 else: | |
| 131 _have_ssl = True | |
| 132 | |
| 133 __all__ = [ | |
| 134 # Classes | |
| 135 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', | |
| 136 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', | |
| 137 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', | |
| 138 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', | |
| 139 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', | |
| 140 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', | |
| 141 'UnknownHandler', 'HTTPErrorProcessor', | |
| 142 # Functions | |
| 143 'urlopen', 'install_opener', 'build_opener', | |
| 144 'pathname2url', 'url2pathname', 'getproxies', | |
| 145 # Legacy interface | |
| 146 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', | |
| 147 ] | |
| 148 | |
| 149 # used in User-Agent header sent | |
| 150 __version__ = sys.version[:3] | |
| 151 | |
| 152 _opener = None | |
| 153 def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **_3to2kwargs): | |
| 154 if 'cadefault' in _3to2kwargs: cadefault = _3to2kwargs['cadefault']; del _3to2kwargs['cadefault'] | |
| 155 else: cadefault = False | |
| 156 if 'capath' in _3to2kwargs: capath = _3to2kwargs['capath']; del _3to2kwargs['capath'] | |
| 157 else: capath = None | |
| 158 if 'cafile' in _3to2kwargs: cafile = _3to2kwargs['cafile']; del _3to2kwargs['cafile'] | |
| 159 else: cafile = None | |
| 160 global _opener | |
| 161 if cafile or capath or cadefault: | |
| 162 if not _have_ssl: | |
| 163 raise ValueError('SSL support not available') | |
| 164 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) | |
| 165 context.options |= ssl.OP_NO_SSLv2 | |
| 166 context.verify_mode = ssl.CERT_REQUIRED | |
| 167 if cafile or capath: | |
| 168 context.load_verify_locations(cafile, capath) | |
| 169 else: | |
| 170 context.set_default_verify_paths() | |
| 171 https_handler = HTTPSHandler(context=context, check_hostname=True) | |
| 172 opener = build_opener(https_handler) | |
| 173 elif _opener is None: | |
| 174 _opener = opener = build_opener() | |
| 175 else: | |
| 176 opener = _opener | |
| 177 return opener.open(url, data, timeout) | |
| 178 | |
| 179 def install_opener(opener): | |
| 180 global _opener | |
| 181 _opener = opener | |
| 182 | |
| 183 _url_tempfiles = [] | |
| 184 def urlretrieve(url, filename=None, reporthook=None, data=None): | |
| 185 """ | |
| 186 Retrieve a URL into a temporary location on disk. | |
| 187 | |
| 188 Requires a URL argument. If a filename is passed, it is used as | |
| 189 the temporary file location. The reporthook argument should be | |
| 190 a callable that accepts a block number, a read size, and the | |
| 191 total file size of the URL target. The data argument should be | |
| 192 valid URL encoded data. | |
| 193 | |
| 194 If a filename is passed and the URL points to a local resource, | |
| 195 the result is a copy from local file to new file. | |
| 196 | |
| 197 Returns a tuple containing the path to the newly created | |
| 198 data file as well as the resulting HTTPMessage object. | |
| 199 """ | |
| 200 url_type, path = splittype(url) | |
| 201 | |
| 202 with contextlib.closing(urlopen(url, data)) as fp: | |
| 203 headers = fp.info() | |
| 204 | |
| 205 # Just return the local path and the "headers" for file:// | |
| 206 # URLs. No sense in performing a copy unless requested. | |
| 207 if url_type == "file" and not filename: | |
| 208 return os.path.normpath(path), headers | |
| 209 | |
| 210 # Handle temporary file setup. | |
| 211 if filename: | |
| 212 tfp = open(filename, 'wb') | |
| 213 else: | |
| 214 tfp = tempfile.NamedTemporaryFile(delete=False) | |
| 215 filename = tfp.name | |
| 216 _url_tempfiles.append(filename) | |
| 217 | |
| 218 with tfp: | |
| 219 result = filename, headers | |
| 220 bs = 1024*8 | |
| 221 size = -1 | |
| 222 read = 0 | |
| 223 blocknum = 0 | |
| 224 if "content-length" in headers: | |
| 225 size = int(headers["Content-Length"]) | |
| 226 | |
| 227 if reporthook: | |
| 228 reporthook(blocknum, bs, size) | |
| 229 | |
| 230 while True: | |
| 231 block = fp.read(bs) | |
| 232 if not block: | |
| 233 break | |
| 234 read += len(block) | |
| 235 tfp.write(block) | |
| 236 blocknum += 1 | |
| 237 if reporthook: | |
| 238 reporthook(blocknum, bs, size) | |
| 239 | |
| 240 if size >= 0 and read < size: | |
| 241 raise ContentTooShortError( | |
| 242 "retrieval incomplete: got only %i out of %i bytes" | |
| 243 % (read, size), result) | |
| 244 | |
| 245 return result | |
| 246 | |
| 247 def urlcleanup(): | |
| 248 for temp_file in _url_tempfiles: | |
| 249 try: | |
| 250 os.unlink(temp_file) | |
| 251 except EnvironmentError: | |
| 252 pass | |
| 253 | |
| 254 del _url_tempfiles[:] | |
| 255 global _opener | |
| 256 if _opener: | |
| 257 _opener = None | |
| 258 | |
| 259 if PY3: | |
| 260 _cut_port_re = re.compile(r":\d+$", re.ASCII) | |
| 261 else: | |
| 262 _cut_port_re = re.compile(r":\d+$") | |
| 263 | |
| 264 def request_host(request): | |
| 265 | |
| 266 """Return request-host, as defined by RFC 2965. | |
| 267 | |
| 268 Variation from RFC: returned value is lowercased, for convenient | |
| 269 comparison. | |
| 270 | |
| 271 """ | |
| 272 url = request.full_url | |
| 273 host = urlparse(url)[1] | |
| 274 if host == "": | |
| 275 host = request.get_header("Host", "") | |
| 276 | |
| 277 # remove port, if present | |
| 278 host = _cut_port_re.sub("", host, 1) | |
| 279 return host.lower() | |
| 280 | |
| 281 class Request(object): | |
| 282 | |
| 283 def __init__(self, url, data=None, headers={}, | |
| 284 origin_req_host=None, unverifiable=False, | |
| 285 method=None): | |
| 286 # unwrap('<URL:type://host/path>') --> 'type://host/path' | |
| 287 self.full_url = unwrap(url) | |
| 288 self.full_url, self.fragment = splittag(self.full_url) | |
| 289 self.data = data | |
| 290 self.headers = {} | |
| 291 self._tunnel_host = None | |
| 292 for key, value in headers.items(): | |
| 293 self.add_header(key, value) | |
| 294 self.unredirected_hdrs = {} | |
| 295 if origin_req_host is None: | |
| 296 origin_req_host = request_host(self) | |
| 297 self.origin_req_host = origin_req_host | |
| 298 self.unverifiable = unverifiable | |
| 299 self.method = method | |
| 300 self._parse() | |
| 301 | |
| 302 def _parse(self): | |
| 303 self.type, rest = splittype(self.full_url) | |
| 304 if self.type is None: | |
| 305 raise ValueError("unknown url type: %r" % self.full_url) | |
| 306 self.host, self.selector = splithost(rest) | |
| 307 if self.host: | |
| 308 self.host = unquote(self.host) | |
| 309 | |
| 310 def get_method(self): | |
| 311 """Return a string indicating the HTTP request method.""" | |
| 312 if self.method is not None: | |
| 313 return self.method | |
| 314 elif self.data is not None: | |
| 315 return "POST" | |
| 316 else: | |
| 317 return "GET" | |
| 318 | |
| 319 def get_full_url(self): | |
| 320 if self.fragment: | |
| 321 return '%s#%s' % (self.full_url, self.fragment) | |
| 322 else: | |
| 323 return self.full_url | |
| 324 | |
| 325 # Begin deprecated methods | |
| 326 | |
| 327 def add_data(self, data): | |
| 328 msg = "Request.add_data method is deprecated." | |
| 329 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
| 330 self.data = data | |
| 331 | |
| 332 def has_data(self): | |
| 333 msg = "Request.has_data method is deprecated." | |
| 334 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
| 335 return self.data is not None | |
| 336 | |
| 337 def get_data(self): | |
| 338 msg = "Request.get_data method is deprecated." | |
| 339 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
| 340 return self.data | |
| 341 | |
| 342 def get_type(self): | |
| 343 msg = "Request.get_type method is deprecated." | |
| 344 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
| 345 return self.type | |
| 346 | |
| 347 def get_host(self): | |
| 348 msg = "Request.get_host method is deprecated." | |
| 349 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
| 350 return self.host | |
| 351 | |
| 352 def get_selector(self): | |
| 353 msg = "Request.get_selector method is deprecated." | |
| 354 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
| 355 return self.selector | |
| 356 | |
| 357 def is_unverifiable(self): | |
| 358 msg = "Request.is_unverifiable method is deprecated." | |
| 359 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
| 360 return self.unverifiable | |
| 361 | |
| 362 def get_origin_req_host(self): | |
| 363 msg = "Request.get_origin_req_host method is deprecated." | |
| 364 warnings.warn(msg, DeprecationWarning, stacklevel=1) | |
| 365 return self.origin_req_host | |
| 366 | |
| 367 # End deprecated methods | |
| 368 | |
| 369 def set_proxy(self, host, type): | |
| 370 if self.type == 'https' and not self._tunnel_host: | |
| 371 self._tunnel_host = self.host | |
| 372 else: | |
| 373 self.type= type | |
| 374 self.selector = self.full_url | |
| 375 self.host = host | |
| 376 | |
| 377 def has_proxy(self): | |
| 378 return self.selector == self.full_url | |
| 379 | |
| 380 def add_header(self, key, val): | |
| 381 # useful for something like authentication | |
| 382 self.headers[key.capitalize()] = val | |
| 383 | |
| 384 def add_unredirected_header(self, key, val): | |
| 385 # will not be added to a redirected request | |
| 386 self.unredirected_hdrs[key.capitalize()] = val | |
| 387 | |
| 388 def has_header(self, header_name): | |
| 389 return (header_name in self.headers or | |
| 390 header_name in self.unredirected_hdrs) | |
| 391 | |
| 392 def get_header(self, header_name, default=None): | |
| 393 return self.headers.get( | |
| 394 header_name, | |
| 395 self.unredirected_hdrs.get(header_name, default)) | |
| 396 | |
| 397 def header_items(self): | |
| 398 hdrs = self.unredirected_hdrs.copy() | |
| 399 hdrs.update(self.headers) | |
| 400 return list(hdrs.items()) | |
| 401 | |
| 402 class OpenerDirector(object): | |
| 403 def __init__(self): | |
| 404 client_version = "Python-urllib/%s" % __version__ | |
| 405 self.addheaders = [('User-agent', client_version)] | |
| 406 # self.handlers is retained only for backward compatibility | |
| 407 self.handlers = [] | |
| 408 # manage the individual handlers | |
| 409 self.handle_open = {} | |
| 410 self.handle_error = {} | |
| 411 self.process_response = {} | |
| 412 self.process_request = {} | |
| 413 | |
| 414 def add_handler(self, handler): | |
| 415 if not hasattr(handler, "add_parent"): | |
| 416 raise TypeError("expected BaseHandler instance, got %r" % | |
| 417 type(handler)) | |
| 418 | |
| 419 added = False | |
| 420 for meth in dir(handler): | |
| 421 if meth in ["redirect_request", "do_open", "proxy_open"]: | |
| 422 # oops, coincidental match | |
| 423 continue | |
| 424 | |
| 425 i = meth.find("_") | |
| 426 protocol = meth[:i] | |
| 427 condition = meth[i+1:] | |
| 428 | |
| 429 if condition.startswith("error"): | |
| 430 j = condition.find("_") + i + 1 | |
| 431 kind = meth[j+1:] | |
| 432 try: | |
| 433 kind = int(kind) | |
| 434 except ValueError: | |
| 435 pass | |
| 436 lookup = self.handle_error.get(protocol, {}) | |
| 437 self.handle_error[protocol] = lookup | |
| 438 elif condition == "open": | |
| 439 kind = protocol | |
| 440 lookup = self.handle_open | |
| 441 elif condition == "response": | |
| 442 kind = protocol | |
| 443 lookup = self.process_response | |
| 444 elif condition == "request": | |
| 445 kind = protocol | |
| 446 lookup = self.process_request | |
| 447 else: | |
| 448 continue | |
| 449 | |
| 450 handlers = lookup.setdefault(kind, []) | |
| 451 if handlers: | |
| 452 bisect.insort(handlers, handler) | |
| 453 else: | |
| 454 handlers.append(handler) | |
| 455 added = True | |
| 456 | |
| 457 if added: | |
| 458 bisect.insort(self.handlers, handler) | |
| 459 handler.add_parent(self) | |
| 460 | |
| 461 def close(self): | |
| 462 # Only exists for backwards compatibility. | |
| 463 pass | |
| 464 | |
| 465 def _call_chain(self, chain, kind, meth_name, *args): | |
| 466 # Handlers raise an exception if no one else should try to handle | |
| 467 # the request, or return None if they can't but another handler | |
| 468 # could. Otherwise, they return the response. | |
| 469 handlers = chain.get(kind, ()) | |
| 470 for handler in handlers: | |
| 471 func = getattr(handler, meth_name) | |
| 472 result = func(*args) | |
| 473 if result is not None: | |
| 474 return result | |
| 475 | |
| 476 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): | |
| 477 """ | |
| 478 Accept a URL or a Request object | |
| 479 | |
| 480 Python-Future: if the URL is passed as a byte-string, decode it first. | |
| 481 """ | |
| 482 if isinstance(fullurl, bytes): | |
| 483 fullurl = fullurl.decode() | |
| 484 if isinstance(fullurl, str): | |
| 485 req = Request(fullurl, data) | |
| 486 else: | |
| 487 req = fullurl | |
| 488 if data is not None: | |
| 489 req.data = data | |
| 490 | |
| 491 req.timeout = timeout | |
| 492 protocol = req.type | |
| 493 | |
| 494 # pre-process request | |
| 495 meth_name = protocol+"_request" | |
| 496 for processor in self.process_request.get(protocol, []): | |
| 497 meth = getattr(processor, meth_name) | |
| 498 req = meth(req) | |
| 499 | |
| 500 response = self._open(req, data) | |
| 501 | |
| 502 # post-process response | |
| 503 meth_name = protocol+"_response" | |
| 504 for processor in self.process_response.get(protocol, []): | |
| 505 meth = getattr(processor, meth_name) | |
| 506 response = meth(req, response) | |
| 507 | |
| 508 return response | |
| 509 | |
| 510 def _open(self, req, data=None): | |
| 511 result = self._call_chain(self.handle_open, 'default', | |
| 512 'default_open', req) | |
| 513 if result: | |
| 514 return result | |
| 515 | |
| 516 protocol = req.type | |
| 517 result = self._call_chain(self.handle_open, protocol, protocol + | |
| 518 '_open', req) | |
| 519 if result: | |
| 520 return result | |
| 521 | |
| 522 return self._call_chain(self.handle_open, 'unknown', | |
| 523 'unknown_open', req) | |
| 524 | |
| 525 def error(self, proto, *args): | |
| 526 if proto in ('http', 'https'): | |
| 527 # XXX http[s] protocols are special-cased | |
| 528 dict = self.handle_error['http'] # https is not different than http | |
| 529 proto = args[2] # YUCK! | |
| 530 meth_name = 'http_error_%s' % proto | |
| 531 http_err = 1 | |
| 532 orig_args = args | |
| 533 else: | |
| 534 dict = self.handle_error | |
| 535 meth_name = proto + '_error' | |
| 536 http_err = 0 | |
| 537 args = (dict, proto, meth_name) + args | |
| 538 result = self._call_chain(*args) | |
| 539 if result: | |
| 540 return result | |
| 541 | |
| 542 if http_err: | |
| 543 args = (dict, 'default', 'http_error_default') + orig_args | |
| 544 return self._call_chain(*args) | |
| 545 | |
| 546 # XXX probably also want an abstract factory that knows when it makes | |
| 547 # sense to skip a superclass in favor of a subclass and when it might | |
| 548 # make sense to include both | |
| 549 | |
| 550 def build_opener(*handlers): | |
| 551 """Create an opener object from a list of handlers. | |
| 552 | |
| 553 The opener will use several default handlers, including support | |
| 554 for HTTP, FTP and when applicable HTTPS. | |
| 555 | |
| 556 If any of the handlers passed as arguments are subclasses of the | |
| 557 default handlers, the default handlers will not be used. | |
| 558 """ | |
| 559 def isclass(obj): | |
| 560 return isinstance(obj, type) or hasattr(obj, "__bases__") | |
| 561 | |
| 562 opener = OpenerDirector() | |
| 563 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, | |
| 564 HTTPDefaultErrorHandler, HTTPRedirectHandler, | |
| 565 FTPHandler, FileHandler, HTTPErrorProcessor] | |
| 566 if hasattr(http_client, "HTTPSConnection"): | |
| 567 default_classes.append(HTTPSHandler) | |
| 568 skip = set() | |
| 569 for klass in default_classes: | |
| 570 for check in handlers: | |
| 571 if isclass(check): | |
| 572 if issubclass(check, klass): | |
| 573 skip.add(klass) | |
| 574 elif isinstance(check, klass): | |
| 575 skip.add(klass) | |
| 576 for klass in skip: | |
| 577 default_classes.remove(klass) | |
| 578 | |
| 579 for klass in default_classes: | |
| 580 opener.add_handler(klass()) | |
| 581 | |
| 582 for h in handlers: | |
| 583 if isclass(h): | |
| 584 h = h() | |
| 585 opener.add_handler(h) | |
| 586 return opener | |
| 587 | |
| 588 class BaseHandler(object): | |
| 589 handler_order = 500 | |
| 590 | |
| 591 def add_parent(self, parent): | |
| 592 self.parent = parent | |
| 593 | |
| 594 def close(self): | |
| 595 # Only exists for backwards compatibility | |
| 596 pass | |
| 597 | |
| 598 def __lt__(self, other): | |
| 599 if not hasattr(other, "handler_order"): | |
| 600 # Try to preserve the old behavior of having custom classes | |
| 601 # inserted after default ones (works only for custom user | |
| 602 # classes which are not aware of handler_order). | |
| 603 return True | |
| 604 return self.handler_order < other.handler_order | |
| 605 | |
| 606 | |
| 607 class HTTPErrorProcessor(BaseHandler): | |
| 608 """Process HTTP error responses.""" | |
| 609 handler_order = 1000 # after all other processing | |
| 610 | |
| 611 def http_response(self, request, response): | |
| 612 code, msg, hdrs = response.code, response.msg, response.info() | |
| 613 | |
| 614 # According to RFC 2616, "2xx" code indicates that the client's | |
| 615 # request was successfully received, understood, and accepted. | |
| 616 if not (200 <= code < 300): | |
| 617 response = self.parent.error( | |
| 618 'http', request, response, code, msg, hdrs) | |
| 619 | |
| 620 return response | |
| 621 | |
| 622 https_response = http_response | |
| 623 | |
| 624 class HTTPDefaultErrorHandler(BaseHandler): | |
| 625 def http_error_default(self, req, fp, code, msg, hdrs): | |
| 626 raise HTTPError(req.full_url, code, msg, hdrs, fp) | |
| 627 | |
| 628 class HTTPRedirectHandler(BaseHandler): | |
| 629 # maximum number of redirections to any single URL | |
| 630 # this is needed because of the state that cookies introduce | |
| 631 max_repeats = 4 | |
| 632 # maximum total number of redirections (regardless of URL) before | |
| 633 # assuming we're in a loop | |
| 634 max_redirections = 10 | |
| 635 | |
| 636 def redirect_request(self, req, fp, code, msg, headers, newurl): | |
| 637 """Return a Request or None in response to a redirect. | |
| 638 | |
| 639 This is called by the http_error_30x methods when a | |
| 640 redirection response is received. If a redirection should | |
| 641 take place, return a new Request to allow http_error_30x to | |
| 642 perform the redirect. Otherwise, raise HTTPError if no-one | |
| 643 else should try to handle this url. Return None if you can't | |
| 644 but another Handler might. | |
| 645 """ | |
| 646 m = req.get_method() | |
| 647 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD") | |
| 648 or code in (301, 302, 303) and m == "POST")): | |
| 649 raise HTTPError(req.full_url, code, msg, headers, fp) | |
| 650 | |
| 651 # Strictly (according to RFC 2616), 301 or 302 in response to | |
| 652 # a POST MUST NOT cause a redirection without confirmation | |
| 653 # from the user (of urllib.request, in this case). In practice, | |
| 654 # essentially all clients do redirect in this case, so we do | |
| 655 # the same. | |
| 656 # be conciliant with URIs containing a space | |
| 657 newurl = newurl.replace(' ', '%20') | |
| 658 CONTENT_HEADERS = ("content-length", "content-type") | |
| 659 newheaders = dict((k, v) for k, v in req.headers.items() | |
| 660 if k.lower() not in CONTENT_HEADERS) | |
| 661 return Request(newurl, | |
| 662 headers=newheaders, | |
| 663 origin_req_host=req.origin_req_host, | |
| 664 unverifiable=True) | |
| 665 | |
| 666 # Implementation note: To avoid the server sending us into an | |
| 667 # infinite loop, the request object needs to track what URLs we | |
| 668 # have already seen. Do this by adding a handler-specific | |
| 669 # attribute to the Request object. | |
| 670 def http_error_302(self, req, fp, code, msg, headers): | |
| 671 # Some servers (incorrectly) return multiple Location headers | |
| 672 # (so probably same goes for URI). Use first header. | |
| 673 if "location" in headers: | |
| 674 newurl = headers["location"] | |
| 675 elif "uri" in headers: | |
| 676 newurl = headers["uri"] | |
| 677 else: | |
| 678 return | |
| 679 | |
| 680 # fix a possible malformed URL | |
| 681 urlparts = urlparse(newurl) | |
| 682 | |
| 683 # For security reasons we don't allow redirection to anything other | |
| 684 # than http, https or ftp. | |
| 685 | |
| 686 if urlparts.scheme not in ('http', 'https', 'ftp', ''): | |
| 687 raise HTTPError( | |
| 688 newurl, code, | |
| 689 "%s - Redirection to url '%s' is not allowed" % (msg, newurl), | |
| 690 headers, fp) | |
| 691 | |
| 692 if not urlparts.path: | |
| 693 urlparts = list(urlparts) | |
| 694 urlparts[2] = "/" | |
| 695 newurl = urlunparse(urlparts) | |
| 696 | |
| 697 newurl = urljoin(req.full_url, newurl) | |
| 698 | |
| 699 # XXX Probably want to forget about the state of the current | |
| 700 # request, although that might interact poorly with other | |
| 701 # handlers that also use handler-specific request attributes | |
| 702 new = self.redirect_request(req, fp, code, msg, headers, newurl) | |
| 703 if new is None: | |
| 704 return | |
| 705 | |
| 706 # loop detection | |
| 707 # .redirect_dict has a key url if url was previously visited. | |
| 708 if hasattr(req, 'redirect_dict'): | |
| 709 visited = new.redirect_dict = req.redirect_dict | |
| 710 if (visited.get(newurl, 0) >= self.max_repeats or | |
| 711 len(visited) >= self.max_redirections): | |
| 712 raise HTTPError(req.full_url, code, | |
| 713 self.inf_msg + msg, headers, fp) | |
| 714 else: | |
| 715 visited = new.redirect_dict = req.redirect_dict = {} | |
| 716 visited[newurl] = visited.get(newurl, 0) + 1 | |
| 717 | |
| 718 # Don't close the fp until we are sure that we won't use it | |
| 719 # with HTTPError. | |
| 720 fp.read() | |
| 721 fp.close() | |
| 722 | |
| 723 return self.parent.open(new, timeout=req.timeout) | |
| 724 | |
| 725 http_error_301 = http_error_303 = http_error_307 = http_error_302 | |
| 726 | |
| 727 inf_msg = "The HTTP server returned a redirect error that would " \ | |
| 728 "lead to an infinite loop.\n" \ | |
| 729 "The last 30x error message was:\n" | |
| 730 | |
| 731 | |
| 732 def _parse_proxy(proxy): | |
| 733 """Return (scheme, user, password, host/port) given a URL or an authority. | |
| 734 | |
| 735 If a URL is supplied, it must have an authority (host:port) component. | |
| 736 According to RFC 3986, having an authority component means the URL must | |
| 737 have two slashes after the scheme: | |
| 738 | |
| 739 >>> _parse_proxy('file:/ftp.example.com/') | |
| 740 Traceback (most recent call last): | |
| 741 ValueError: proxy URL with no authority: 'file:/ftp.example.com/' | |
| 742 | |
| 743 The first three items of the returned tuple may be None. | |
| 744 | |
| 745 Examples of authority parsing: | |
| 746 | |
| 747 >>> _parse_proxy('proxy.example.com') | |
| 748 (None, None, None, 'proxy.example.com') | |
| 749 >>> _parse_proxy('proxy.example.com:3128') | |
| 750 (None, None, None, 'proxy.example.com:3128') | |
| 751 | |
| 752 The authority component may optionally include userinfo (assumed to be | |
| 753 username:password): | |
| 754 | |
| 755 >>> _parse_proxy('joe:password@proxy.example.com') | |
| 756 (None, 'joe', 'password', 'proxy.example.com') | |
| 757 >>> _parse_proxy('joe:password@proxy.example.com:3128') | |
| 758 (None, 'joe', 'password', 'proxy.example.com:3128') | |
| 759 | |
| 760 Same examples, but with URLs instead: | |
| 761 | |
| 762 >>> _parse_proxy('http://proxy.example.com/') | |
| 763 ('http', None, None, 'proxy.example.com') | |
| 764 >>> _parse_proxy('http://proxy.example.com:3128/') | |
| 765 ('http', None, None, 'proxy.example.com:3128') | |
| 766 >>> _parse_proxy('http://joe:password@proxy.example.com/') | |
| 767 ('http', 'joe', 'password', 'proxy.example.com') | |
| 768 >>> _parse_proxy('http://joe:password@proxy.example.com:3128') | |
| 769 ('http', 'joe', 'password', 'proxy.example.com:3128') | |
| 770 | |
| 771 Everything after the authority is ignored: | |
| 772 | |
| 773 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') | |
| 774 ('ftp', 'joe', 'password', 'proxy.example.com') | |
| 775 | |
| 776 Test for no trailing '/' case: | |
| 777 | |
| 778 >>> _parse_proxy('http://joe:password@proxy.example.com') | |
| 779 ('http', 'joe', 'password', 'proxy.example.com') | |
| 780 | |
| 781 """ | |
| 782 scheme, r_scheme = splittype(proxy) | |
| 783 if not r_scheme.startswith("/"): | |
| 784 # authority | |
| 785 scheme = None | |
| 786 authority = proxy | |
| 787 else: | |
| 788 # URL | |
| 789 if not r_scheme.startswith("//"): | |
| 790 raise ValueError("proxy URL with no authority: %r" % proxy) | |
| 791 # We have an authority, so for RFC 3986-compliant URLs (by ss 3. | |
| 792 # and 3.3.), path is empty or starts with '/' | |
| 793 end = r_scheme.find("/", 2) | |
| 794 if end == -1: | |
| 795 end = None | |
| 796 authority = r_scheme[2:end] | |
| 797 userinfo, hostport = splituser(authority) | |
| 798 if userinfo is not None: | |
| 799 user, password = splitpasswd(userinfo) | |
| 800 else: | |
| 801 user = password = None | |
| 802 return scheme, user, password, hostport | |
| 803 | |
| 804 class ProxyHandler(BaseHandler): | |
| 805 # Proxies must be in front | |
| 806 handler_order = 100 | |
| 807 | |
| 808 def __init__(self, proxies=None): | |
| 809 if proxies is None: | |
| 810 proxies = getproxies() | |
| 811 assert hasattr(proxies, 'keys'), "proxies must be a mapping" | |
| 812 self.proxies = proxies | |
| 813 for type, url in proxies.items(): | |
| 814 setattr(self, '%s_open' % type, | |
| 815 lambda r, proxy=url, type=type, meth=self.proxy_open: | |
| 816 meth(r, proxy, type)) | |
| 817 | |
| 818 def proxy_open(self, req, proxy, type): | |
| 819 orig_type = req.type | |
| 820 proxy_type, user, password, hostport = _parse_proxy(proxy) | |
| 821 if proxy_type is None: | |
| 822 proxy_type = orig_type | |
| 823 | |
| 824 if req.host and proxy_bypass(req.host): | |
| 825 return None | |
| 826 | |
| 827 if user and password: | |
| 828 user_pass = '%s:%s' % (unquote(user), | |
| 829 unquote(password)) | |
| 830 creds = base64.b64encode(user_pass.encode()).decode("ascii") | |
| 831 req.add_header('Proxy-authorization', 'Basic ' + creds) | |
| 832 hostport = unquote(hostport) | |
| 833 req.set_proxy(hostport, proxy_type) | |
| 834 if orig_type == proxy_type or orig_type == 'https': | |
| 835 # let other handlers take care of it | |
| 836 return None | |
| 837 else: | |
| 838 # need to start over, because the other handlers don't | |
| 839 # grok the proxy's URL type | |
| 840 # e.g. if we have a constructor arg proxies like so: | |
| 841 # {'http': 'ftp://proxy.example.com'}, we may end up turning | |
| 842 # a request for http://acme.example.com/a into one for | |
| 843 # ftp://proxy.example.com/a | |
| 844 return self.parent.open(req, timeout=req.timeout) | |
| 845 | |
| 846 class HTTPPasswordMgr(object): | |
| 847 | |
| 848 def __init__(self): | |
| 849 self.passwd = {} | |
| 850 | |
| 851 def add_password(self, realm, uri, user, passwd): | |
| 852 # uri could be a single URI or a sequence | |
| 853 if isinstance(uri, str): | |
| 854 uri = [uri] | |
| 855 if realm not in self.passwd: | |
| 856 self.passwd[realm] = {} | |
| 857 for default_port in True, False: | |
| 858 reduced_uri = tuple( | |
| 859 [self.reduce_uri(u, default_port) for u in uri]) | |
| 860 self.passwd[realm][reduced_uri] = (user, passwd) | |
| 861 | |
| 862 def find_user_password(self, realm, authuri): | |
| 863 domains = self.passwd.get(realm, {}) | |
| 864 for default_port in True, False: | |
| 865 reduced_authuri = self.reduce_uri(authuri, default_port) | |
| 866 for uris, authinfo in domains.items(): | |
| 867 for uri in uris: | |
| 868 if self.is_suburi(uri, reduced_authuri): | |
| 869 return authinfo | |
| 870 return None, None | |
| 871 | |
| 872 def reduce_uri(self, uri, default_port=True): | |
| 873 """Accept authority or URI and extract only the authority and path.""" | |
| 874 # note HTTP URLs do not have a userinfo component | |
| 875 parts = urlsplit(uri) | |
| 876 if parts[1]: | |
| 877 # URI | |
| 878 scheme = parts[0] | |
| 879 authority = parts[1] | |
| 880 path = parts[2] or '/' | |
| 881 else: | |
| 882 # host or host:port | |
| 883 scheme = None | |
| 884 authority = uri | |
| 885 path = '/' | |
| 886 host, port = splitport(authority) | |
| 887 if default_port and port is None and scheme is not None: | |
| 888 dport = {"http": 80, | |
| 889 "https": 443, | |
| 890 }.get(scheme) | |
| 891 if dport is not None: | |
| 892 authority = "%s:%d" % (host, dport) | |
| 893 return authority, path | |
| 894 | |
| 895 def is_suburi(self, base, test): | |
| 896 """Check if test is below base in a URI tree | |
| 897 | |
| 898 Both args must be URIs in reduced form. | |
| 899 """ | |
| 900 if base == test: | |
| 901 return True | |
| 902 if base[0] != test[0]: | |
| 903 return False | |
| 904 common = posixpath.commonprefix((base[1], test[1])) | |
| 905 if len(common) == len(base[1]): | |
| 906 return True | |
| 907 return False | |
| 908 | |
| 909 | |
| 910 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): | |
| 911 | |
| 912 def find_user_password(self, realm, authuri): | |
| 913 user, password = HTTPPasswordMgr.find_user_password(self, realm, | |
| 914 authuri) | |
| 915 if user is not None: | |
| 916 return user, password | |
| 917 return HTTPPasswordMgr.find_user_password(self, None, authuri) | |
| 918 | |
| 919 | |
| 920 class AbstractBasicAuthHandler(object): | |
| 921 | |
| 922 # XXX this allows for multiple auth-schemes, but will stupidly pick | |
| 923 # the last one with a realm specified. | |
| 924 | |
| 925 # allow for double- and single-quoted realm values | |
| 926 # (single quotes are a violation of the RFC, but appear in the wild) | |
| 927 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+' | |
| 928 'realm=(["\']?)([^"\']*)\\2', re.I) | |
| 929 | |
| 930 # XXX could pre-emptively send auth info already accepted (RFC 2617, | |
| 931 # end of section 2, and section 1.2 immediately after "credentials" | |
| 932 # production). | |
| 933 | |
| 934 def __init__(self, password_mgr=None): | |
| 935 if password_mgr is None: | |
| 936 password_mgr = HTTPPasswordMgr() | |
| 937 self.passwd = password_mgr | |
| 938 self.add_password = self.passwd.add_password | |
| 939 self.retried = 0 | |
| 940 | |
| 941 def reset_retry_count(self): | |
| 942 self.retried = 0 | |
| 943 | |
| 944 def http_error_auth_reqed(self, authreq, host, req, headers): | |
| 945 # host may be an authority (without userinfo) or a URL with an | |
| 946 # authority | |
| 947 # XXX could be multiple headers | |
| 948 authreq = headers.get(authreq, None) | |
| 949 | |
| 950 if self.retried > 5: | |
| 951 # retry sending the username:password 5 times before failing. | |
| 952 raise HTTPError(req.get_full_url(), 401, "basic auth failed", | |
| 953 headers, None) | |
| 954 else: | |
| 955 self.retried += 1 | |
| 956 | |
| 957 if authreq: | |
| 958 scheme = authreq.split()[0] | |
| 959 if scheme.lower() != 'basic': | |
| 960 raise ValueError("AbstractBasicAuthHandler does not" | |
| 961 " support the following scheme: '%s'" % | |
| 962 scheme) | |
| 963 else: | |
| 964 mo = AbstractBasicAuthHandler.rx.search(authreq) | |
| 965 if mo: | |
| 966 scheme, quote, realm = mo.groups() | |
| 967 if quote not in ['"',"'"]: | |
| 968 warnings.warn("Basic Auth Realm was unquoted", | |
| 969 UserWarning, 2) | |
| 970 if scheme.lower() == 'basic': | |
| 971 response = self.retry_http_basic_auth(host, req, realm) | |
| 972 if response and response.code != 401: | |
| 973 self.retried = 0 | |
| 974 return response | |
| 975 | |
| 976 def retry_http_basic_auth(self, host, req, realm): | |
| 977 user, pw = self.passwd.find_user_password(realm, host) | |
| 978 if pw is not None: | |
| 979 raw = "%s:%s" % (user, pw) | |
| 980 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii") | |
| 981 if req.headers.get(self.auth_header, None) == auth: | |
| 982 return None | |
| 983 req.add_unredirected_header(self.auth_header, auth) | |
| 984 return self.parent.open(req, timeout=req.timeout) | |
| 985 else: | |
| 986 return None | |
| 987 | |
| 988 | |
| 989 class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): | |
| 990 | |
| 991 auth_header = 'Authorization' | |
| 992 | |
| 993 def http_error_401(self, req, fp, code, msg, headers): | |
| 994 url = req.full_url | |
| 995 response = self.http_error_auth_reqed('www-authenticate', | |
| 996 url, req, headers) | |
| 997 self.reset_retry_count() | |
| 998 return response | |
| 999 | |
| 1000 | |
| 1001 class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): | |
| 1002 | |
| 1003 auth_header = 'Proxy-authorization' | |
| 1004 | |
| 1005 def http_error_407(self, req, fp, code, msg, headers): | |
| 1006 # http_error_auth_reqed requires that there is no userinfo component in | |
| 1007 # authority. Assume there isn't one, since urllib.request does not (and | |
| 1008 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing | |
| 1009 # userinfo. | |
| 1010 authority = req.host | |
| 1011 response = self.http_error_auth_reqed('proxy-authenticate', | |
| 1012 authority, req, headers) | |
| 1013 self.reset_retry_count() | |
| 1014 return response | |
| 1015 | |
| 1016 | |
| 1017 # Return n random bytes. | |
| 1018 _randombytes = os.urandom | |
| 1019 | |
| 1020 | |
| 1021 class AbstractDigestAuthHandler(object): | |
| 1022 # Digest authentication is specified in RFC 2617. | |
| 1023 | |
| 1024 # XXX The client does not inspect the Authentication-Info header | |
| 1025 # in a successful response. | |
| 1026 | |
| 1027 # XXX It should be possible to test this implementation against | |
| 1028 # a mock server that just generates a static set of challenges. | |
| 1029 | |
| 1030 # XXX qop="auth-int" supports is shaky | |
| 1031 | |
| 1032 def __init__(self, passwd=None): | |
| 1033 if passwd is None: | |
| 1034 passwd = HTTPPasswordMgr() | |
| 1035 self.passwd = passwd | |
| 1036 self.add_password = self.passwd.add_password | |
| 1037 self.retried = 0 | |
| 1038 self.nonce_count = 0 | |
| 1039 self.last_nonce = None | |
| 1040 | |
| 1041 def reset_retry_count(self): | |
| 1042 self.retried = 0 | |
| 1043 | |
| 1044 def http_error_auth_reqed(self, auth_header, host, req, headers): | |
| 1045 authreq = headers.get(auth_header, None) | |
| 1046 if self.retried > 5: | |
| 1047 # Don't fail endlessly - if we failed once, we'll probably | |
| 1048 # fail a second time. Hm. Unless the Password Manager is | |
| 1049 # prompting for the information. Crap. This isn't great | |
| 1050 # but it's better than the current 'repeat until recursion | |
| 1051 # depth exceeded' approach <wink> | |
| 1052 raise HTTPError(req.full_url, 401, "digest auth failed", | |
| 1053 headers, None) | |
| 1054 else: | |
| 1055 self.retried += 1 | |
| 1056 if authreq: | |
| 1057 scheme = authreq.split()[0] | |
| 1058 if scheme.lower() == 'digest': | |
| 1059 return self.retry_http_digest_auth(req, authreq) | |
| 1060 elif scheme.lower() != 'basic': | |
| 1061 raise ValueError("AbstractDigestAuthHandler does not support" | |
| 1062 " the following scheme: '%s'" % scheme) | |
| 1063 | |
| 1064 def retry_http_digest_auth(self, req, auth): | |
| 1065 token, challenge = auth.split(' ', 1) | |
| 1066 chal = parse_keqv_list(filter(None, parse_http_list(challenge))) | |
| 1067 auth = self.get_authorization(req, chal) | |
| 1068 if auth: | |
| 1069 auth_val = 'Digest %s' % auth | |
| 1070 if req.headers.get(self.auth_header, None) == auth_val: | |
| 1071 return None | |
| 1072 req.add_unredirected_header(self.auth_header, auth_val) | |
| 1073 resp = self.parent.open(req, timeout=req.timeout) | |
| 1074 return resp | |
| 1075 | |
| 1076 def get_cnonce(self, nonce): | |
| 1077 # The cnonce-value is an opaque | |
| 1078 # quoted string value provided by the client and used by both client | |
| 1079 # and server to avoid chosen plaintext attacks, to provide mutual | |
| 1080 # authentication, and to provide some message integrity protection. | |
| 1081 # This isn't a fabulous effort, but it's probably Good Enough. | |
| 1082 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime()) | |
| 1083 b = s.encode("ascii") + _randombytes(8) | |
| 1084 dig = hashlib.sha1(b).hexdigest() | |
| 1085 return dig[:16] | |
| 1086 | |
| 1087 def get_authorization(self, req, chal): | |
| 1088 try: | |
| 1089 realm = chal['realm'] | |
| 1090 nonce = chal['nonce'] | |
| 1091 qop = chal.get('qop') | |
| 1092 algorithm = chal.get('algorithm', 'MD5') | |
| 1093 # mod_digest doesn't send an opaque, even though it isn't | |
| 1094 # supposed to be optional | |
| 1095 opaque = chal.get('opaque', None) | |
| 1096 except KeyError: | |
| 1097 return None | |
| 1098 | |
| 1099 H, KD = self.get_algorithm_impls(algorithm) | |
| 1100 if H is None: | |
| 1101 return None | |
| 1102 | |
| 1103 user, pw = self.passwd.find_user_password(realm, req.full_url) | |
| 1104 if user is None: | |
| 1105 return None | |
| 1106 | |
| 1107 # XXX not implemented yet | |
| 1108 if req.data is not None: | |
| 1109 entdig = self.get_entity_digest(req.data, chal) | |
| 1110 else: | |
| 1111 entdig = None | |
| 1112 | |
| 1113 A1 = "%s:%s:%s" % (user, realm, pw) | |
| 1114 A2 = "%s:%s" % (req.get_method(), | |
| 1115 # XXX selector: what about proxies and full urls | |
| 1116 req.selector) | |
| 1117 if qop == 'auth': | |
| 1118 if nonce == self.last_nonce: | |
| 1119 self.nonce_count += 1 | |
| 1120 else: | |
| 1121 self.nonce_count = 1 | |
| 1122 self.last_nonce = nonce | |
| 1123 ncvalue = '%08x' % self.nonce_count | |
| 1124 cnonce = self.get_cnonce(nonce) | |
| 1125 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) | |
| 1126 respdig = KD(H(A1), noncebit) | |
| 1127 elif qop is None: | |
| 1128 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) | |
| 1129 else: | |
| 1130 # XXX handle auth-int. | |
| 1131 raise URLError("qop '%s' is not supported." % qop) | |
| 1132 | |
| 1133 # XXX should the partial digests be encoded too? | |
| 1134 | |
| 1135 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ | |
| 1136 'response="%s"' % (user, realm, nonce, req.selector, | |
| 1137 respdig) | |
| 1138 if opaque: | |
| 1139 base += ', opaque="%s"' % opaque | |
| 1140 if entdig: | |
| 1141 base += ', digest="%s"' % entdig | |
| 1142 base += ', algorithm="%s"' % algorithm | |
| 1143 if qop: | |
| 1144 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) | |
| 1145 return base | |
| 1146 | |
| 1147 def get_algorithm_impls(self, algorithm): | |
| 1148 # lambdas assume digest modules are imported at the top level | |
| 1149 if algorithm == 'MD5': | |
| 1150 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest() | |
| 1151 elif algorithm == 'SHA': | |
| 1152 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest() | |
| 1153 # XXX MD5-sess | |
| 1154 KD = lambda s, d: H("%s:%s" % (s, d)) | |
| 1155 return H, KD | |
| 1156 | |
| 1157 def get_entity_digest(self, data, chal): | |
| 1158 # XXX not implemented yet | |
| 1159 return None | |
| 1160 | |
| 1161 | |
| 1162 class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): | |
| 1163 """An authentication protocol defined by RFC 2069 | |
| 1164 | |
| 1165 Digest authentication improves on basic authentication because it | |
| 1166 does not transmit passwords in the clear. | |
| 1167 """ | |
| 1168 | |
| 1169 auth_header = 'Authorization' | |
| 1170 handler_order = 490 # before Basic auth | |
| 1171 | |
| 1172 def http_error_401(self, req, fp, code, msg, headers): | |
| 1173 host = urlparse(req.full_url)[1] | |
| 1174 retry = self.http_error_auth_reqed('www-authenticate', | |
| 1175 host, req, headers) | |
| 1176 self.reset_retry_count() | |
| 1177 return retry | |
| 1178 | |
| 1179 | |
| 1180 class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): | |
| 1181 | |
| 1182 auth_header = 'Proxy-Authorization' | |
| 1183 handler_order = 490 # before Basic auth | |
| 1184 | |
| 1185 def http_error_407(self, req, fp, code, msg, headers): | |
| 1186 host = req.host | |
| 1187 retry = self.http_error_auth_reqed('proxy-authenticate', | |
| 1188 host, req, headers) | |
| 1189 self.reset_retry_count() | |
| 1190 return retry | |
| 1191 | |
| 1192 class AbstractHTTPHandler(BaseHandler): | |
| 1193 | |
| 1194 def __init__(self, debuglevel=0): | |
| 1195 self._debuglevel = debuglevel | |
| 1196 | |
| 1197 def set_http_debuglevel(self, level): | |
| 1198 self._debuglevel = level | |
| 1199 | |
| 1200 def do_request_(self, request): | |
| 1201 host = request.host | |
| 1202 if not host: | |
| 1203 raise URLError('no host given') | |
| 1204 | |
| 1205 if request.data is not None: # POST | |
| 1206 data = request.data | |
| 1207 if isinstance(data, str): | |
| 1208 msg = "POST data should be bytes or an iterable of bytes. " \ | |
| 1209 "It cannot be of type str." | |
| 1210 raise TypeError(msg) | |
| 1211 if not request.has_header('Content-type'): | |
| 1212 request.add_unredirected_header( | |
| 1213 'Content-type', | |
| 1214 'application/x-www-form-urlencoded') | |
| 1215 if not request.has_header('Content-length'): | |
| 1216 size = None | |
| 1217 try: | |
| 1218 ### For Python-Future: | |
| 1219 if PY2 and isinstance(data, array.array): | |
| 1220 # memoryviews of arrays aren't supported | |
| 1221 # in Py2.7. (e.g. memoryview(array.array('I', | |
| 1222 # [1, 2, 3, 4])) raises a TypeError.) | |
| 1223 # So we calculate the size manually instead: | |
| 1224 size = len(data) * data.itemsize | |
| 1225 ### | |
| 1226 else: | |
| 1227 mv = memoryview(data) | |
| 1228 size = len(mv) * mv.itemsize | |
| 1229 except TypeError: | |
| 1230 if isinstance(data, Iterable): | |
| 1231 raise ValueError("Content-Length should be specified " | |
| 1232 "for iterable data of type %r %r" % (type(data), | |
| 1233 data)) | |
| 1234 else: | |
| 1235 request.add_unredirected_header( | |
| 1236 'Content-length', '%d' % size) | |
| 1237 | |
| 1238 sel_host = host | |
| 1239 if request.has_proxy(): | |
| 1240 scheme, sel = splittype(request.selector) | |
| 1241 sel_host, sel_path = splithost(sel) | |
| 1242 if not request.has_header('Host'): | |
| 1243 request.add_unredirected_header('Host', sel_host) | |
| 1244 for name, value in self.parent.addheaders: | |
| 1245 name = name.capitalize() | |
| 1246 if not request.has_header(name): | |
| 1247 request.add_unredirected_header(name, value) | |
| 1248 | |
| 1249 return request | |
| 1250 | |
| 1251 def do_open(self, http_class, req, **http_conn_args): | |
| 1252 """Return an HTTPResponse object for the request, using http_class. | |
| 1253 | |
| 1254 http_class must implement the HTTPConnection API from http.client. | |
| 1255 """ | |
| 1256 host = req.host | |
| 1257 if not host: | |
| 1258 raise URLError('no host given') | |
| 1259 | |
| 1260 # will parse host:port | |
| 1261 h = http_class(host, timeout=req.timeout, **http_conn_args) | |
| 1262 | |
| 1263 headers = dict(req.unredirected_hdrs) | |
| 1264 headers.update(dict((k, v) for k, v in req.headers.items() | |
| 1265 if k not in headers)) | |
| 1266 | |
| 1267 # TODO(jhylton): Should this be redesigned to handle | |
| 1268 # persistent connections? | |
| 1269 | |
| 1270 # We want to make an HTTP/1.1 request, but the addinfourl | |
| 1271 # class isn't prepared to deal with a persistent connection. | |
| 1272 # It will try to read all remaining data from the socket, | |
| 1273 # which will block while the server waits for the next request. | |
| 1274 # So make sure the connection gets closed after the (only) | |
| 1275 # request. | |
| 1276 headers["Connection"] = "close" | |
| 1277 headers = dict((name.title(), val) for name, val in headers.items()) | |
| 1278 | |
| 1279 if req._tunnel_host: | |
| 1280 tunnel_headers = {} | |
| 1281 proxy_auth_hdr = "Proxy-Authorization" | |
| 1282 if proxy_auth_hdr in headers: | |
| 1283 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] | |
| 1284 # Proxy-Authorization should not be sent to origin | |
| 1285 # server. | |
| 1286 del headers[proxy_auth_hdr] | |
| 1287 h.set_tunnel(req._tunnel_host, headers=tunnel_headers) | |
| 1288 | |
| 1289 try: | |
| 1290 h.request(req.get_method(), req.selector, req.data, headers) | |
| 1291 except socket.error as err: # timeout error | |
| 1292 h.close() | |
| 1293 raise URLError(err) | |
| 1294 else: | |
| 1295 r = h.getresponse() | |
| 1296 # If the server does not send us a 'Connection: close' header, | |
| 1297 # HTTPConnection assumes the socket should be left open. Manually | |
| 1298 # mark the socket to be closed when this response object goes away. | |
| 1299 if h.sock: | |
| 1300 h.sock.close() | |
| 1301 h.sock = None | |
| 1302 | |
| 1303 | |
| 1304 r.url = req.get_full_url() | |
| 1305 # This line replaces the .msg attribute of the HTTPResponse | |
| 1306 # with .headers, because urllib clients expect the response to | |
| 1307 # have the reason in .msg. It would be good to mark this | |
| 1308 # attribute is deprecated and get then to use info() or | |
| 1309 # .headers. | |
| 1310 r.msg = r.reason | |
| 1311 return r | |
| 1312 | |
| 1313 | |
| 1314 class HTTPHandler(AbstractHTTPHandler): | |
| 1315 | |
| 1316 def http_open(self, req): | |
| 1317 return self.do_open(http_client.HTTPConnection, req) | |
| 1318 | |
| 1319 http_request = AbstractHTTPHandler.do_request_ | |
| 1320 | |
| 1321 if hasattr(http_client, 'HTTPSConnection'): | |
| 1322 | |
| 1323 class HTTPSHandler(AbstractHTTPHandler): | |
| 1324 | |
| 1325 def __init__(self, debuglevel=0, context=None, check_hostname=None): | |
| 1326 AbstractHTTPHandler.__init__(self, debuglevel) | |
| 1327 self._context = context | |
| 1328 self._check_hostname = check_hostname | |
| 1329 | |
| 1330 def https_open(self, req): | |
| 1331 return self.do_open(http_client.HTTPSConnection, req, | |
| 1332 context=self._context, check_hostname=self._check_hostname) | |
| 1333 | |
| 1334 https_request = AbstractHTTPHandler.do_request_ | |
| 1335 | |
| 1336 __all__.append('HTTPSHandler') | |
| 1337 | |
| 1338 class HTTPCookieProcessor(BaseHandler): | |
| 1339 def __init__(self, cookiejar=None): | |
| 1340 import future.backports.http.cookiejar as http_cookiejar | |
| 1341 if cookiejar is None: | |
| 1342 cookiejar = http_cookiejar.CookieJar() | |
| 1343 self.cookiejar = cookiejar | |
| 1344 | |
| 1345 def http_request(self, request): | |
| 1346 self.cookiejar.add_cookie_header(request) | |
| 1347 return request | |
| 1348 | |
| 1349 def http_response(self, request, response): | |
| 1350 self.cookiejar.extract_cookies(response, request) | |
| 1351 return response | |
| 1352 | |
| 1353 https_request = http_request | |
| 1354 https_response = http_response | |
| 1355 | |
| 1356 class UnknownHandler(BaseHandler): | |
| 1357 def unknown_open(self, req): | |
| 1358 type = req.type | |
| 1359 raise URLError('unknown url type: %s' % type) | |
| 1360 | |
| 1361 def parse_keqv_list(l): | |
| 1362 """Parse list of key=value strings where keys are not duplicated.""" | |
| 1363 parsed = {} | |
| 1364 for elt in l: | |
| 1365 k, v = elt.split('=', 1) | |
| 1366 if v[0] == '"' and v[-1] == '"': | |
| 1367 v = v[1:-1] | |
| 1368 parsed[k] = v | |
| 1369 return parsed | |
| 1370 | |
| 1371 def parse_http_list(s): | |
| 1372 """Parse lists as described by RFC 2068 Section 2. | |
| 1373 | |
| 1374 In particular, parse comma-separated lists where the elements of | |
| 1375 the list may include quoted-strings. A quoted-string could | |
| 1376 contain a comma. A non-quoted string could have quotes in the | |
| 1377 middle. Neither commas nor quotes count if they are escaped. | |
| 1378 Only double-quotes count, not single-quotes. | |
| 1379 """ | |
| 1380 res = [] | |
| 1381 part = '' | |
| 1382 | |
| 1383 escape = quote = False | |
| 1384 for cur in s: | |
| 1385 if escape: | |
| 1386 part += cur | |
| 1387 escape = False | |
| 1388 continue | |
| 1389 if quote: | |
| 1390 if cur == '\\': | |
| 1391 escape = True | |
| 1392 continue | |
| 1393 elif cur == '"': | |
| 1394 quote = False | |
| 1395 part += cur | |
| 1396 continue | |
| 1397 | |
| 1398 if cur == ',': | |
| 1399 res.append(part) | |
| 1400 part = '' | |
| 1401 continue | |
| 1402 | |
| 1403 if cur == '"': | |
| 1404 quote = True | |
| 1405 | |
| 1406 part += cur | |
| 1407 | |
| 1408 # append last part | |
| 1409 if part: | |
| 1410 res.append(part) | |
| 1411 | |
| 1412 return [part.strip() for part in res] | |
| 1413 | |
| 1414 class FileHandler(BaseHandler): | |
| 1415 # Use local file or FTP depending on form of URL | |
| 1416 def file_open(self, req): | |
| 1417 url = req.selector | |
| 1418 if url[:2] == '//' and url[2:3] != '/' and (req.host and | |
| 1419 req.host != 'localhost'): | |
| 1420 if not req.host is self.get_names(): | |
| 1421 raise URLError("file:// scheme is supported only on localhost") | |
| 1422 else: | |
| 1423 return self.open_local_file(req) | |
| 1424 | |
| 1425 # names for the localhost | |
| 1426 names = None | |
| 1427 def get_names(self): | |
| 1428 if FileHandler.names is None: | |
| 1429 try: | |
| 1430 FileHandler.names = tuple( | |
| 1431 socket.gethostbyname_ex('localhost')[2] + | |
| 1432 socket.gethostbyname_ex(socket.gethostname())[2]) | |
| 1433 except socket.gaierror: | |
| 1434 FileHandler.names = (socket.gethostbyname('localhost'),) | |
| 1435 return FileHandler.names | |
| 1436 | |
| 1437 # not entirely sure what the rules are here | |
| 1438 def open_local_file(self, req): | |
| 1439 import future.backports.email.utils as email_utils | |
| 1440 import mimetypes | |
| 1441 host = req.host | |
| 1442 filename = req.selector | |
| 1443 localfile = url2pathname(filename) | |
| 1444 try: | |
| 1445 stats = os.stat(localfile) | |
| 1446 size = stats.st_size | |
| 1447 modified = email_utils.formatdate(stats.st_mtime, usegmt=True) | |
| 1448 mtype = mimetypes.guess_type(filename)[0] | |
| 1449 headers = email.message_from_string( | |
| 1450 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % | |
| 1451 (mtype or 'text/plain', size, modified)) | |
| 1452 if host: | |
| 1453 host, port = splitport(host) | |
| 1454 if not host or \ | |
| 1455 (not port and _safe_gethostbyname(host) in self.get_names()): | |
| 1456 if host: | |
| 1457 origurl = 'file://' + host + filename | |
| 1458 else: | |
| 1459 origurl = 'file://' + filename | |
| 1460 return addinfourl(open(localfile, 'rb'), headers, origurl) | |
| 1461 except OSError as exp: | |
| 1462 # users shouldn't expect OSErrors coming from urlopen() | |
| 1463 raise URLError(exp) | |
| 1464 raise URLError('file not on local host') | |
| 1465 | |
| 1466 def _safe_gethostbyname(host): | |
| 1467 try: | |
| 1468 return socket.gethostbyname(host) | |
| 1469 except socket.gaierror: | |
| 1470 return None | |
| 1471 | |
| 1472 class FTPHandler(BaseHandler): | |
| 1473 def ftp_open(self, req): | |
| 1474 import ftplib | |
| 1475 import mimetypes | |
| 1476 host = req.host | |
| 1477 if not host: | |
| 1478 raise URLError('ftp error: no host given') | |
| 1479 host, port = splitport(host) | |
| 1480 if port is None: | |
| 1481 port = ftplib.FTP_PORT | |
| 1482 else: | |
| 1483 port = int(port) | |
| 1484 | |
| 1485 # username/password handling | |
| 1486 user, host = splituser(host) | |
| 1487 if user: | |
| 1488 user, passwd = splitpasswd(user) | |
| 1489 else: | |
| 1490 passwd = None | |
| 1491 host = unquote(host) | |
| 1492 user = user or '' | |
| 1493 passwd = passwd or '' | |
| 1494 | |
| 1495 try: | |
| 1496 host = socket.gethostbyname(host) | |
| 1497 except socket.error as msg: | |
| 1498 raise URLError(msg) | |
| 1499 path, attrs = splitattr(req.selector) | |
| 1500 dirs = path.split('/') | |
| 1501 dirs = list(map(unquote, dirs)) | |
| 1502 dirs, file = dirs[:-1], dirs[-1] | |
| 1503 if dirs and not dirs[0]: | |
| 1504 dirs = dirs[1:] | |
| 1505 try: | |
| 1506 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) | |
| 1507 type = file and 'I' or 'D' | |
| 1508 for attr in attrs: | |
| 1509 attr, value = splitvalue(attr) | |
| 1510 if attr.lower() == 'type' and \ | |
| 1511 value in ('a', 'A', 'i', 'I', 'd', 'D'): | |
| 1512 type = value.upper() | |
| 1513 fp, retrlen = fw.retrfile(file, type) | |
| 1514 headers = "" | |
| 1515 mtype = mimetypes.guess_type(req.full_url)[0] | |
| 1516 if mtype: | |
| 1517 headers += "Content-type: %s\n" % mtype | |
| 1518 if retrlen is not None and retrlen >= 0: | |
| 1519 headers += "Content-length: %d\n" % retrlen | |
| 1520 headers = email.message_from_string(headers) | |
| 1521 return addinfourl(fp, headers, req.full_url) | |
| 1522 except ftplib.all_errors as exp: | |
| 1523 exc = URLError('ftp error: %r' % exp) | |
| 1524 raise_with_traceback(exc) | |
| 1525 | |
| 1526 def connect_ftp(self, user, passwd, host, port, dirs, timeout): | |
| 1527 return ftpwrapper(user, passwd, host, port, dirs, timeout, | |
| 1528 persistent=False) | |
| 1529 | |
| 1530 class CacheFTPHandler(FTPHandler): | |
| 1531 # XXX would be nice to have pluggable cache strategies | |
| 1532 # XXX this stuff is definitely not thread safe | |
| 1533 def __init__(self): | |
| 1534 self.cache = {} | |
| 1535 self.timeout = {} | |
| 1536 self.soonest = 0 | |
| 1537 self.delay = 60 | |
| 1538 self.max_conns = 16 | |
| 1539 | |
| 1540 def setTimeout(self, t): | |
| 1541 self.delay = t | |
| 1542 | |
| 1543 def setMaxConns(self, m): | |
| 1544 self.max_conns = m | |
| 1545 | |
| 1546 def connect_ftp(self, user, passwd, host, port, dirs, timeout): | |
| 1547 key = user, host, port, '/'.join(dirs), timeout | |
| 1548 if key in self.cache: | |
| 1549 self.timeout[key] = time.time() + self.delay | |
| 1550 else: | |
| 1551 self.cache[key] = ftpwrapper(user, passwd, host, port, | |
| 1552 dirs, timeout) | |
| 1553 self.timeout[key] = time.time() + self.delay | |
| 1554 self.check_cache() | |
| 1555 return self.cache[key] | |
| 1556 | |
| 1557 def check_cache(self): | |
| 1558 # first check for old ones | |
| 1559 t = time.time() | |
| 1560 if self.soonest <= t: | |
| 1561 for k, v in list(self.timeout.items()): | |
| 1562 if v < t: | |
| 1563 self.cache[k].close() | |
| 1564 del self.cache[k] | |
| 1565 del self.timeout[k] | |
| 1566 self.soonest = min(list(self.timeout.values())) | |
| 1567 | |
| 1568 # then check the size | |
| 1569 if len(self.cache) == self.max_conns: | |
| 1570 for k, v in list(self.timeout.items()): | |
| 1571 if v == self.soonest: | |
| 1572 del self.cache[k] | |
| 1573 del self.timeout[k] | |
| 1574 break | |
| 1575 self.soonest = min(list(self.timeout.values())) | |
| 1576 | |
| 1577 def clear_cache(self): | |
| 1578 for conn in self.cache.values(): | |
| 1579 conn.close() | |
| 1580 self.cache.clear() | |
| 1581 self.timeout.clear() | |
| 1582 | |
| 1583 | |
| 1584 # Code move from the old urllib module | |
| 1585 | |
| 1586 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size | |
| 1587 | |
| 1588 # Helper for non-unix systems | |
| 1589 if os.name == 'nt': | |
| 1590 from nturl2path import url2pathname, pathname2url | |
| 1591 else: | |
| 1592 def url2pathname(pathname): | |
| 1593 """OS-specific conversion from a relative URL of the 'file' scheme | |
| 1594 to a file system path; not recommended for general use.""" | |
| 1595 return unquote(pathname) | |
| 1596 | |
| 1597 def pathname2url(pathname): | |
| 1598 """OS-specific conversion from a file system path to a relative URL | |
| 1599 of the 'file' scheme; not recommended for general use.""" | |
| 1600 return quote(pathname) | |
| 1601 | |
| 1602 # This really consists of two pieces: | |
| 1603 # (1) a class which handles opening of all sorts of URLs | |
| 1604 # (plus assorted utilities etc.) | |
| 1605 # (2) a set of functions for parsing URLs | |
| 1606 # XXX Should these be separated out into different modules? | |
| 1607 | |
| 1608 | |
| 1609 ftpcache = {} | |
| 1610 class URLopener(object): | |
| 1611 """Class to open URLs. | |
| 1612 This is a class rather than just a subroutine because we may need | |
| 1613 more than one set of global protocol-specific options. | |
| 1614 Note -- this is a base class for those who don't want the | |
| 1615 automatic handling of errors type 302 (relocated) and 401 | |
| 1616 (authorization needed).""" | |
| 1617 | |
| 1618 __tempfiles = None | |
| 1619 | |
| 1620 version = "Python-urllib/%s" % __version__ | |
| 1621 | |
| 1622 # Constructor | |
| 1623 def __init__(self, proxies=None, **x509): | |
| 1624 msg = "%(class)s style of invoking requests is deprecated. " \ | |
| 1625 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} | |
| 1626 warnings.warn(msg, DeprecationWarning, stacklevel=3) | |
| 1627 if proxies is None: | |
| 1628 proxies = getproxies() | |
| 1629 assert hasattr(proxies, 'keys'), "proxies must be a mapping" | |
| 1630 self.proxies = proxies | |
| 1631 self.key_file = x509.get('key_file') | |
| 1632 self.cert_file = x509.get('cert_file') | |
| 1633 self.addheaders = [('User-Agent', self.version)] | |
| 1634 self.__tempfiles = [] | |
| 1635 self.__unlink = os.unlink # See cleanup() | |
| 1636 self.tempcache = None | |
| 1637 # Undocumented feature: if you assign {} to tempcache, | |
| 1638 # it is used to cache files retrieved with | |
| 1639 # self.retrieve(). This is not enabled by default | |
| 1640 # since it does not work for changing documents (and I | |
| 1641 # haven't got the logic to check expiration headers | |
| 1642 # yet). | |
| 1643 self.ftpcache = ftpcache | |
| 1644 # Undocumented feature: you can use a different | |
| 1645 # ftp cache by assigning to the .ftpcache member; | |
| 1646 # in case you want logically independent URL openers | |
| 1647 # XXX This is not threadsafe. Bah. | |
| 1648 | |
| 1649 def __del__(self): | |
| 1650 self.close() | |
| 1651 | |
| 1652 def close(self): | |
| 1653 self.cleanup() | |
| 1654 | |
| 1655 def cleanup(self): | |
| 1656 # This code sometimes runs when the rest of this module | |
| 1657 # has already been deleted, so it can't use any globals | |
| 1658 # or import anything. | |
| 1659 if self.__tempfiles: | |
| 1660 for file in self.__tempfiles: | |
| 1661 try: | |
| 1662 self.__unlink(file) | |
| 1663 except OSError: | |
| 1664 pass | |
| 1665 del self.__tempfiles[:] | |
| 1666 if self.tempcache: | |
| 1667 self.tempcache.clear() | |
| 1668 | |
| 1669 def addheader(self, *args): | |
| 1670 """Add a header to be used by the HTTP interface only | |
| 1671 e.g. u.addheader('Accept', 'sound/basic')""" | |
| 1672 self.addheaders.append(args) | |
| 1673 | |
| 1674 # External interface | |
| 1675 def open(self, fullurl, data=None): | |
| 1676 """Use URLopener().open(file) instead of open(file, 'r').""" | |
| 1677 fullurl = unwrap(to_bytes(fullurl)) | |
| 1678 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") | |
| 1679 if self.tempcache and fullurl in self.tempcache: | |
| 1680 filename, headers = self.tempcache[fullurl] | |
| 1681 fp = open(filename, 'rb') | |
| 1682 return addinfourl(fp, headers, fullurl) | |
| 1683 urltype, url = splittype(fullurl) | |
| 1684 if not urltype: | |
| 1685 urltype = 'file' | |
| 1686 if urltype in self.proxies: | |
| 1687 proxy = self.proxies[urltype] | |
| 1688 urltype, proxyhost = splittype(proxy) | |
| 1689 host, selector = splithost(proxyhost) | |
| 1690 url = (host, fullurl) # Signal special case to open_*() | |
| 1691 else: | |
| 1692 proxy = None | |
| 1693 name = 'open_' + urltype | |
| 1694 self.type = urltype | |
| 1695 name = name.replace('-', '_') | |
| 1696 if not hasattr(self, name): | |
| 1697 if proxy: | |
| 1698 return self.open_unknown_proxy(proxy, fullurl, data) | |
| 1699 else: | |
| 1700 return self.open_unknown(fullurl, data) | |
| 1701 try: | |
| 1702 if data is None: | |
| 1703 return getattr(self, name)(url) | |
| 1704 else: | |
| 1705 return getattr(self, name)(url, data) | |
| 1706 except HTTPError: | |
| 1707 raise | |
| 1708 except socket.error as msg: | |
| 1709 raise_with_traceback(IOError('socket error', msg)) | |
| 1710 | |
| 1711 def open_unknown(self, fullurl, data=None): | |
| 1712 """Overridable interface to open unknown URL type.""" | |
| 1713 type, url = splittype(fullurl) | |
| 1714 raise IOError('url error', 'unknown url type', type) | |
| 1715 | |
| 1716 def open_unknown_proxy(self, proxy, fullurl, data=None): | |
| 1717 """Overridable interface to open unknown URL type.""" | |
| 1718 type, url = splittype(fullurl) | |
| 1719 raise IOError('url error', 'invalid proxy for %s' % type, proxy) | |
| 1720 | |
| 1721 # External interface | |
| 1722 def retrieve(self, url, filename=None, reporthook=None, data=None): | |
| 1723 """retrieve(url) returns (filename, headers) for a local object | |
| 1724 or (tempfilename, headers) for a remote object.""" | |
| 1725 url = unwrap(to_bytes(url)) | |
| 1726 if self.tempcache and url in self.tempcache: | |
| 1727 return self.tempcache[url] | |
| 1728 type, url1 = splittype(url) | |
| 1729 if filename is None and (not type or type == 'file'): | |
| 1730 try: | |
| 1731 fp = self.open_local_file(url1) | |
| 1732 hdrs = fp.info() | |
| 1733 fp.close() | |
| 1734 return url2pathname(splithost(url1)[1]), hdrs | |
| 1735 except IOError as msg: | |
| 1736 pass | |
| 1737 fp = self.open(url, data) | |
| 1738 try: | |
| 1739 headers = fp.info() | |
| 1740 if filename: | |
| 1741 tfp = open(filename, 'wb') | |
| 1742 else: | |
| 1743 import tempfile | |
| 1744 garbage, path = splittype(url) | |
| 1745 garbage, path = splithost(path or "") | |
| 1746 path, garbage = splitquery(path or "") | |
| 1747 path, garbage = splitattr(path or "") | |
| 1748 suffix = os.path.splitext(path)[1] | |
| 1749 (fd, filename) = tempfile.mkstemp(suffix) | |
| 1750 self.__tempfiles.append(filename) | |
| 1751 tfp = os.fdopen(fd, 'wb') | |
| 1752 try: | |
| 1753 result = filename, headers | |
| 1754 if self.tempcache is not None: | |
| 1755 self.tempcache[url] = result | |
| 1756 bs = 1024*8 | |
| 1757 size = -1 | |
| 1758 read = 0 | |
| 1759 blocknum = 0 | |
| 1760 if "content-length" in headers: | |
| 1761 size = int(headers["Content-Length"]) | |
| 1762 if reporthook: | |
| 1763 reporthook(blocknum, bs, size) | |
| 1764 while 1: | |
| 1765 block = fp.read(bs) | |
| 1766 if not block: | |
| 1767 break | |
| 1768 read += len(block) | |
| 1769 tfp.write(block) | |
| 1770 blocknum += 1 | |
| 1771 if reporthook: | |
| 1772 reporthook(blocknum, bs, size) | |
| 1773 finally: | |
| 1774 tfp.close() | |
| 1775 finally: | |
| 1776 fp.close() | |
| 1777 | |
| 1778 # raise exception if actual size does not match content-length header | |
| 1779 if size >= 0 and read < size: | |
| 1780 raise ContentTooShortError( | |
| 1781 "retrieval incomplete: got only %i out of %i bytes" | |
| 1782 % (read, size), result) | |
| 1783 | |
| 1784 return result | |
| 1785 | |
| 1786 # Each method named open_<type> knows how to open that type of URL | |
| 1787 | |
| 1788 def _open_generic_http(self, connection_factory, url, data): | |
| 1789 """Make an HTTP connection using connection_class. | |
| 1790 | |
| 1791 This is an internal method that should be called from | |
| 1792 open_http() or open_https(). | |
| 1793 | |
| 1794 Arguments: | |
| 1795 - connection_factory should take a host name and return an | |
| 1796 HTTPConnection instance. | |
| 1797 - url is the url to retrieval or a host, relative-path pair. | |
| 1798 - data is payload for a POST request or None. | |
| 1799 """ | |
| 1800 | |
| 1801 user_passwd = None | |
| 1802 proxy_passwd= None | |
| 1803 if isinstance(url, str): | |
| 1804 host, selector = splithost(url) | |
| 1805 if host: | |
| 1806 user_passwd, host = splituser(host) | |
| 1807 host = unquote(host) | |
| 1808 realhost = host | |
| 1809 else: | |
| 1810 host, selector = url | |
| 1811 # check whether the proxy contains authorization information | |
| 1812 proxy_passwd, host = splituser(host) | |
| 1813 # now we proceed with the url we want to obtain | |
| 1814 urltype, rest = splittype(selector) | |
| 1815 url = rest | |
| 1816 user_passwd = None | |
| 1817 if urltype.lower() != 'http': | |
| 1818 realhost = None | |
| 1819 else: | |
| 1820 realhost, rest = splithost(rest) | |
| 1821 if realhost: | |
| 1822 user_passwd, realhost = splituser(realhost) | |
| 1823 if user_passwd: | |
| 1824 selector = "%s://%s%s" % (urltype, realhost, rest) | |
| 1825 if proxy_bypass(realhost): | |
| 1826 host = realhost | |
| 1827 | |
| 1828 if not host: raise IOError('http error', 'no host given') | |
| 1829 | |
| 1830 if proxy_passwd: | |
| 1831 proxy_passwd = unquote(proxy_passwd) | |
| 1832 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') | |
| 1833 else: | |
| 1834 proxy_auth = None | |
| 1835 | |
| 1836 if user_passwd: | |
| 1837 user_passwd = unquote(user_passwd) | |
| 1838 auth = base64.b64encode(user_passwd.encode()).decode('ascii') | |
| 1839 else: | |
| 1840 auth = None | |
| 1841 http_conn = connection_factory(host) | |
| 1842 headers = {} | |
| 1843 if proxy_auth: | |
| 1844 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth | |
| 1845 if auth: | |
| 1846 headers["Authorization"] = "Basic %s" % auth | |
| 1847 if realhost: | |
| 1848 headers["Host"] = realhost | |
| 1849 | |
| 1850 # Add Connection:close as we don't support persistent connections yet. | |
| 1851 # This helps in closing the socket and avoiding ResourceWarning | |
| 1852 | |
| 1853 headers["Connection"] = "close" | |
| 1854 | |
| 1855 for header, value in self.addheaders: | |
| 1856 headers[header] = value | |
| 1857 | |
| 1858 if data is not None: | |
| 1859 headers["Content-Type"] = "application/x-www-form-urlencoded" | |
| 1860 http_conn.request("POST", selector, data, headers) | |
| 1861 else: | |
| 1862 http_conn.request("GET", selector, headers=headers) | |
| 1863 | |
| 1864 try: | |
| 1865 response = http_conn.getresponse() | |
| 1866 except http_client.BadStatusLine: | |
| 1867 # something went wrong with the HTTP status line | |
| 1868 raise URLError("http protocol error: bad status line") | |
| 1869 | |
| 1870 # According to RFC 2616, "2xx" code indicates that the client's | |
| 1871 # request was successfully received, understood, and accepted. | |
| 1872 if 200 <= response.status < 300: | |
| 1873 return addinfourl(response, response.msg, "http:" + url, | |
| 1874 response.status) | |
| 1875 else: | |
| 1876 return self.http_error( | |
| 1877 url, response.fp, | |
| 1878 response.status, response.reason, response.msg, data) | |
| 1879 | |
| 1880 def open_http(self, url, data=None): | |
| 1881 """Use HTTP protocol.""" | |
| 1882 return self._open_generic_http(http_client.HTTPConnection, url, data) | |
| 1883 | |
| 1884 def http_error(self, url, fp, errcode, errmsg, headers, data=None): | |
| 1885 """Handle http errors. | |
| 1886 | |
| 1887 Derived class can override this, or provide specific handlers | |
| 1888 named http_error_DDD where DDD is the 3-digit error code.""" | |
| 1889 # First check if there's a specific handler for this error | |
| 1890 name = 'http_error_%d' % errcode | |
| 1891 if hasattr(self, name): | |
| 1892 method = getattr(self, name) | |
| 1893 if data is None: | |
| 1894 result = method(url, fp, errcode, errmsg, headers) | |
| 1895 else: | |
| 1896 result = method(url, fp, errcode, errmsg, headers, data) | |
| 1897 if result: return result | |
| 1898 return self.http_error_default(url, fp, errcode, errmsg, headers) | |
| 1899 | |
| 1900 def http_error_default(self, url, fp, errcode, errmsg, headers): | |
| 1901 """Default error handler: close the connection and raise IOError.""" | |
| 1902 fp.close() | |
| 1903 raise HTTPError(url, errcode, errmsg, headers, None) | |
| 1904 | |
| 1905 if _have_ssl: | |
| 1906 def _https_connection(self, host): | |
| 1907 return http_client.HTTPSConnection(host, | |
| 1908 key_file=self.key_file, | |
| 1909 cert_file=self.cert_file) | |
| 1910 | |
| 1911 def open_https(self, url, data=None): | |
| 1912 """Use HTTPS protocol.""" | |
| 1913 return self._open_generic_http(self._https_connection, url, data) | |
| 1914 | |
| 1915 def open_file(self, url): | |
| 1916 """Use local file or FTP depending on form of URL.""" | |
| 1917 if not isinstance(url, str): | |
| 1918 raise URLError('file error: proxy support for file protocol currently not implemented') | |
| 1919 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': | |
| 1920 raise ValueError("file:// scheme is supported only on localhost") | |
| 1921 else: | |
| 1922 return self.open_local_file(url) | |
| 1923 | |
| 1924 def open_local_file(self, url): | |
| 1925 """Use local file.""" | |
| 1926 import future.backports.email.utils as email_utils | |
| 1927 import mimetypes | |
| 1928 host, file = splithost(url) | |
| 1929 localname = url2pathname(file) | |
| 1930 try: | |
| 1931 stats = os.stat(localname) | |
| 1932 except OSError as e: | |
| 1933 raise URLError(e.strerror, e.filename) | |
| 1934 size = stats.st_size | |
| 1935 modified = email_utils.formatdate(stats.st_mtime, usegmt=True) | |
| 1936 mtype = mimetypes.guess_type(url)[0] | |
| 1937 headers = email.message_from_string( | |
| 1938 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % | |
| 1939 (mtype or 'text/plain', size, modified)) | |
| 1940 if not host: | |
| 1941 urlfile = file | |
| 1942 if file[:1] == '/': | |
| 1943 urlfile = 'file://' + file | |
| 1944 return addinfourl(open(localname, 'rb'), headers, urlfile) | |
| 1945 host, port = splitport(host) | |
| 1946 if (not port | |
| 1947 and socket.gethostbyname(host) in ((localhost(),) + thishost())): | |
| 1948 urlfile = file | |
| 1949 if file[:1] == '/': | |
| 1950 urlfile = 'file://' + file | |
| 1951 elif file[:2] == './': | |
| 1952 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) | |
| 1953 return addinfourl(open(localname, 'rb'), headers, urlfile) | |
| 1954 raise URLError('local file error: not on local host') | |
| 1955 | |
| 1956 def open_ftp(self, url): | |
| 1957 """Use FTP protocol.""" | |
| 1958 if not isinstance(url, str): | |
| 1959 raise URLError('ftp error: proxy support for ftp protocol currently not implemented') | |
| 1960 import mimetypes | |
| 1961 host, path = splithost(url) | |
| 1962 if not host: raise URLError('ftp error: no host given') | |
| 1963 host, port = splitport(host) | |
| 1964 user, host = splituser(host) | |
| 1965 if user: user, passwd = splitpasswd(user) | |
| 1966 else: passwd = None | |
| 1967 host = unquote(host) | |
| 1968 user = unquote(user or '') | |
| 1969 passwd = unquote(passwd or '') | |
| 1970 host = socket.gethostbyname(host) | |
| 1971 if not port: | |
| 1972 import ftplib | |
| 1973 port = ftplib.FTP_PORT | |
| 1974 else: | |
| 1975 port = int(port) | |
| 1976 path, attrs = splitattr(path) | |
| 1977 path = unquote(path) | |
| 1978 dirs = path.split('/') | |
| 1979 dirs, file = dirs[:-1], dirs[-1] | |
| 1980 if dirs and not dirs[0]: dirs = dirs[1:] | |
| 1981 if dirs and not dirs[0]: dirs[0] = '/' | |
| 1982 key = user, host, port, '/'.join(dirs) | |
| 1983 # XXX thread unsafe! | |
| 1984 if len(self.ftpcache) > MAXFTPCACHE: | |
| 1985 # Prune the cache, rather arbitrarily | |
| 1986 for k in self.ftpcache.keys(): | |
| 1987 if k != key: | |
| 1988 v = self.ftpcache[k] | |
| 1989 del self.ftpcache[k] | |
| 1990 v.close() | |
| 1991 try: | |
| 1992 if key not in self.ftpcache: | |
| 1993 self.ftpcache[key] = \ | |
| 1994 ftpwrapper(user, passwd, host, port, dirs) | |
| 1995 if not file: type = 'D' | |
| 1996 else: type = 'I' | |
| 1997 for attr in attrs: | |
| 1998 attr, value = splitvalue(attr) | |
| 1999 if attr.lower() == 'type' and \ | |
| 2000 value in ('a', 'A', 'i', 'I', 'd', 'D'): | |
| 2001 type = value.upper() | |
| 2002 (fp, retrlen) = self.ftpcache[key].retrfile(file, type) | |
| 2003 mtype = mimetypes.guess_type("ftp:" + url)[0] | |
| 2004 headers = "" | |
| 2005 if mtype: | |
| 2006 headers += "Content-Type: %s\n" % mtype | |
| 2007 if retrlen is not None and retrlen >= 0: | |
| 2008 headers += "Content-Length: %d\n" % retrlen | |
| 2009 headers = email.message_from_string(headers) | |
| 2010 return addinfourl(fp, headers, "ftp:" + url) | |
| 2011 except ftperrors() as exp: | |
| 2012 raise_with_traceback(URLError('ftp error %r' % exp)) | |
| 2013 | |
| 2014 def open_data(self, url, data=None): | |
| 2015 """Use "data" URL.""" | |
| 2016 if not isinstance(url, str): | |
| 2017 raise URLError('data error: proxy support for data protocol currently not implemented') | |
| 2018 # ignore POSTed data | |
| 2019 # | |
| 2020 # syntax of data URLs: | |
| 2021 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data | |
| 2022 # mediatype := [ type "/" subtype ] *( ";" parameter ) | |
| 2023 # data := *urlchar | |
| 2024 # parameter := attribute "=" value | |
| 2025 try: | |
| 2026 [type, data] = url.split(',', 1) | |
| 2027 except ValueError: | |
| 2028 raise IOError('data error', 'bad data URL') | |
| 2029 if not type: | |
| 2030 type = 'text/plain;charset=US-ASCII' | |
| 2031 semi = type.rfind(';') | |
| 2032 if semi >= 0 and '=' not in type[semi:]: | |
| 2033 encoding = type[semi+1:] | |
| 2034 type = type[:semi] | |
| 2035 else: | |
| 2036 encoding = '' | |
| 2037 msg = [] | |
| 2038 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', | |
| 2039 time.gmtime(time.time()))) | |
| 2040 msg.append('Content-type: %s' % type) | |
| 2041 if encoding == 'base64': | |
| 2042 # XXX is this encoding/decoding ok? | |
| 2043 data = base64.decodebytes(data.encode('ascii')).decode('latin-1') | |
| 2044 else: | |
| 2045 data = unquote(data) | |
| 2046 msg.append('Content-Length: %d' % len(data)) | |
| 2047 msg.append('') | |
| 2048 msg.append(data) | |
| 2049 msg = '\n'.join(msg) | |
| 2050 headers = email.message_from_string(msg) | |
| 2051 f = io.StringIO(msg) | |
| 2052 #f.fileno = None # needed for addinfourl | |
| 2053 return addinfourl(f, headers, url) | |
| 2054 | |
| 2055 | |
| 2056 class FancyURLopener(URLopener): | |
| 2057 """Derived class with handlers for errors we can handle (perhaps).""" | |
| 2058 | |
| 2059 def __init__(self, *args, **kwargs): | |
| 2060 URLopener.__init__(self, *args, **kwargs) | |
| 2061 self.auth_cache = {} | |
| 2062 self.tries = 0 | |
| 2063 self.maxtries = 10 | |
| 2064 | |
| 2065 def http_error_default(self, url, fp, errcode, errmsg, headers): | |
| 2066 """Default error handling -- don't raise an exception.""" | |
| 2067 return addinfourl(fp, headers, "http:" + url, errcode) | |
| 2068 | |
| 2069 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): | |
| 2070 """Error 302 -- relocated (temporarily).""" | |
| 2071 self.tries += 1 | |
| 2072 if self.maxtries and self.tries >= self.maxtries: | |
| 2073 if hasattr(self, "http_error_500"): | |
| 2074 meth = self.http_error_500 | |
| 2075 else: | |
| 2076 meth = self.http_error_default | |
| 2077 self.tries = 0 | |
| 2078 return meth(url, fp, 500, | |
| 2079 "Internal Server Error: Redirect Recursion", headers) | |
| 2080 result = self.redirect_internal(url, fp, errcode, errmsg, headers, | |
| 2081 data) | |
| 2082 self.tries = 0 | |
| 2083 return result | |
| 2084 | |
| 2085 def redirect_internal(self, url, fp, errcode, errmsg, headers, data): | |
| 2086 if 'location' in headers: | |
| 2087 newurl = headers['location'] | |
| 2088 elif 'uri' in headers: | |
| 2089 newurl = headers['uri'] | |
| 2090 else: | |
| 2091 return | |
| 2092 fp.close() | |
| 2093 | |
| 2094 # In case the server sent a relative URL, join with original: | |
| 2095 newurl = urljoin(self.type + ":" + url, newurl) | |
| 2096 | |
| 2097 urlparts = urlparse(newurl) | |
| 2098 | |
| 2099 # For security reasons, we don't allow redirection to anything other | |
| 2100 # than http, https and ftp. | |
| 2101 | |
| 2102 # We are using newer HTTPError with older redirect_internal method | |
| 2103 # This older method will get deprecated in 3.3 | |
| 2104 | |
| 2105 if urlparts.scheme not in ('http', 'https', 'ftp', ''): | |
| 2106 raise HTTPError(newurl, errcode, | |
| 2107 errmsg + | |
| 2108 " Redirection to url '%s' is not allowed." % newurl, | |
| 2109 headers, fp) | |
| 2110 | |
| 2111 return self.open(newurl) | |
| 2112 | |
| 2113 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): | |
| 2114 """Error 301 -- also relocated (permanently).""" | |
| 2115 return self.http_error_302(url, fp, errcode, errmsg, headers, data) | |
| 2116 | |
| 2117 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): | |
| 2118 """Error 303 -- also relocated (essentially identical to 302).""" | |
| 2119 return self.http_error_302(url, fp, errcode, errmsg, headers, data) | |
| 2120 | |
| 2121 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): | |
| 2122 """Error 307 -- relocated, but turn POST into error.""" | |
| 2123 if data is None: | |
| 2124 return self.http_error_302(url, fp, errcode, errmsg, headers, data) | |
| 2125 else: | |
| 2126 return self.http_error_default(url, fp, errcode, errmsg, headers) | |
| 2127 | |
| 2128 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None, | |
| 2129 retry=False): | |
| 2130 """Error 401 -- authentication required. | |
| 2131 This function supports Basic authentication only.""" | |
| 2132 if 'www-authenticate' not in headers: | |
| 2133 URLopener.http_error_default(self, url, fp, | |
| 2134 errcode, errmsg, headers) | |
| 2135 stuff = headers['www-authenticate'] | |
| 2136 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) | |
| 2137 if not match: | |
| 2138 URLopener.http_error_default(self, url, fp, | |
| 2139 errcode, errmsg, headers) | |
| 2140 scheme, realm = match.groups() | |
| 2141 if scheme.lower() != 'basic': | |
| 2142 URLopener.http_error_default(self, url, fp, | |
| 2143 errcode, errmsg, headers) | |
| 2144 if not retry: | |
| 2145 URLopener.http_error_default(self, url, fp, errcode, errmsg, | |
| 2146 headers) | |
| 2147 name = 'retry_' + self.type + '_basic_auth' | |
| 2148 if data is None: | |
| 2149 return getattr(self,name)(url, realm) | |
| 2150 else: | |
| 2151 return getattr(self,name)(url, realm, data) | |
| 2152 | |
| 2153 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None, | |
| 2154 retry=False): | |
| 2155 """Error 407 -- proxy authentication required. | |
| 2156 This function supports Basic authentication only.""" | |
| 2157 if 'proxy-authenticate' not in headers: | |
| 2158 URLopener.http_error_default(self, url, fp, | |
| 2159 errcode, errmsg, headers) | |
| 2160 stuff = headers['proxy-authenticate'] | |
| 2161 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) | |
| 2162 if not match: | |
| 2163 URLopener.http_error_default(self, url, fp, | |
| 2164 errcode, errmsg, headers) | |
| 2165 scheme, realm = match.groups() | |
| 2166 if scheme.lower() != 'basic': | |
| 2167 URLopener.http_error_default(self, url, fp, | |
| 2168 errcode, errmsg, headers) | |
| 2169 if not retry: | |
| 2170 URLopener.http_error_default(self, url, fp, errcode, errmsg, | |
| 2171 headers) | |
| 2172 name = 'retry_proxy_' + self.type + '_basic_auth' | |
| 2173 if data is None: | |
| 2174 return getattr(self,name)(url, realm) | |
| 2175 else: | |
| 2176 return getattr(self,name)(url, realm, data) | |
| 2177 | |
| 2178 def retry_proxy_http_basic_auth(self, url, realm, data=None): | |
| 2179 host, selector = splithost(url) | |
| 2180 newurl = 'http://' + host + selector | |
| 2181 proxy = self.proxies['http'] | |
| 2182 urltype, proxyhost = splittype(proxy) | |
| 2183 proxyhost, proxyselector = splithost(proxyhost) | |
| 2184 i = proxyhost.find('@') + 1 | |
| 2185 proxyhost = proxyhost[i:] | |
| 2186 user, passwd = self.get_user_passwd(proxyhost, realm, i) | |
| 2187 if not (user or passwd): return None | |
| 2188 proxyhost = "%s:%s@%s" % (quote(user, safe=''), | |
| 2189 quote(passwd, safe=''), proxyhost) | |
| 2190 self.proxies['http'] = 'http://' + proxyhost + proxyselector | |
| 2191 if data is None: | |
| 2192 return self.open(newurl) | |
| 2193 else: | |
| 2194 return self.open(newurl, data) | |
| 2195 | |
| 2196 def retry_proxy_https_basic_auth(self, url, realm, data=None): | |
| 2197 host, selector = splithost(url) | |
| 2198 newurl = 'https://' + host + selector | |
| 2199 proxy = self.proxies['https'] | |
| 2200 urltype, proxyhost = splittype(proxy) | |
| 2201 proxyhost, proxyselector = splithost(proxyhost) | |
| 2202 i = proxyhost.find('@') + 1 | |
| 2203 proxyhost = proxyhost[i:] | |
| 2204 user, passwd = self.get_user_passwd(proxyhost, realm, i) | |
| 2205 if not (user or passwd): return None | |
| 2206 proxyhost = "%s:%s@%s" % (quote(user, safe=''), | |
| 2207 quote(passwd, safe=''), proxyhost) | |
| 2208 self.proxies['https'] = 'https://' + proxyhost + proxyselector | |
| 2209 if data is None: | |
| 2210 return self.open(newurl) | |
| 2211 else: | |
| 2212 return self.open(newurl, data) | |
| 2213 | |
| 2214 def retry_http_basic_auth(self, url, realm, data=None): | |
| 2215 host, selector = splithost(url) | |
| 2216 i = host.find('@') + 1 | |
| 2217 host = host[i:] | |
| 2218 user, passwd = self.get_user_passwd(host, realm, i) | |
| 2219 if not (user or passwd): return None | |
| 2220 host = "%s:%s@%s" % (quote(user, safe=''), | |
| 2221 quote(passwd, safe=''), host) | |
| 2222 newurl = 'http://' + host + selector | |
| 2223 if data is None: | |
| 2224 return self.open(newurl) | |
| 2225 else: | |
| 2226 return self.open(newurl, data) | |
| 2227 | |
| 2228 def retry_https_basic_auth(self, url, realm, data=None): | |
| 2229 host, selector = splithost(url) | |
| 2230 i = host.find('@') + 1 | |
| 2231 host = host[i:] | |
| 2232 user, passwd = self.get_user_passwd(host, realm, i) | |
| 2233 if not (user or passwd): return None | |
| 2234 host = "%s:%s@%s" % (quote(user, safe=''), | |
| 2235 quote(passwd, safe=''), host) | |
| 2236 newurl = 'https://' + host + selector | |
| 2237 if data is None: | |
| 2238 return self.open(newurl) | |
| 2239 else: | |
| 2240 return self.open(newurl, data) | |
| 2241 | |
| 2242 def get_user_passwd(self, host, realm, clear_cache=0): | |
| 2243 key = realm + '@' + host.lower() | |
| 2244 if key in self.auth_cache: | |
| 2245 if clear_cache: | |
| 2246 del self.auth_cache[key] | |
| 2247 else: | |
| 2248 return self.auth_cache[key] | |
| 2249 user, passwd = self.prompt_user_passwd(host, realm) | |
| 2250 if user or passwd: self.auth_cache[key] = (user, passwd) | |
| 2251 return user, passwd | |
| 2252 | |
| 2253 def prompt_user_passwd(self, host, realm): | |
| 2254 """Override this in a GUI environment!""" | |
| 2255 import getpass | |
| 2256 try: | |
| 2257 user = input("Enter username for %s at %s: " % (realm, host)) | |
| 2258 passwd = getpass.getpass("Enter password for %s in %s at %s: " % | |
| 2259 (user, realm, host)) | |
| 2260 return user, passwd | |
| 2261 except KeyboardInterrupt: | |
| 2262 print() | |
| 2263 return None, None | |
| 2264 | |
| 2265 | |
| 2266 # Utility functions | |
| 2267 | |
| 2268 _localhost = None | |
| 2269 def localhost(): | |
| 2270 """Return the IP address of the magic hostname 'localhost'.""" | |
| 2271 global _localhost | |
| 2272 if _localhost is None: | |
| 2273 _localhost = socket.gethostbyname('localhost') | |
| 2274 return _localhost | |
| 2275 | |
| 2276 _thishost = None | |
| 2277 def thishost(): | |
| 2278 """Return the IP addresses of the current host.""" | |
| 2279 global _thishost | |
| 2280 if _thishost is None: | |
| 2281 try: | |
| 2282 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2]) | |
| 2283 except socket.gaierror: | |
| 2284 _thishost = tuple(socket.gethostbyname_ex('localhost')[2]) | |
| 2285 return _thishost | |
| 2286 | |
| 2287 _ftperrors = None | |
| 2288 def ftperrors(): | |
| 2289 """Return the set of errors raised by the FTP class.""" | |
| 2290 global _ftperrors | |
| 2291 if _ftperrors is None: | |
| 2292 import ftplib | |
| 2293 _ftperrors = ftplib.all_errors | |
| 2294 return _ftperrors | |
| 2295 | |
| 2296 _noheaders = None | |
| 2297 def noheaders(): | |
| 2298 """Return an empty email Message object.""" | |
| 2299 global _noheaders | |
| 2300 if _noheaders is None: | |
| 2301 _noheaders = email.message_from_string("") | |
| 2302 return _noheaders | |
| 2303 | |
| 2304 | |
| 2305 # Utility classes | |
| 2306 | |
| 2307 class ftpwrapper(object): | |
| 2308 """Class used by open_ftp() for cache of open FTP connections.""" | |
| 2309 | |
| 2310 def __init__(self, user, passwd, host, port, dirs, timeout=None, | |
| 2311 persistent=True): | |
| 2312 self.user = user | |
| 2313 self.passwd = passwd | |
| 2314 self.host = host | |
| 2315 self.port = port | |
| 2316 self.dirs = dirs | |
| 2317 self.timeout = timeout | |
| 2318 self.refcount = 0 | |
| 2319 self.keepalive = persistent | |
| 2320 self.init() | |
| 2321 | |
| 2322 def init(self): | |
| 2323 import ftplib | |
| 2324 self.busy = 0 | |
| 2325 self.ftp = ftplib.FTP() | |
| 2326 self.ftp.connect(self.host, self.port, self.timeout) | |
| 2327 self.ftp.login(self.user, self.passwd) | |
| 2328 _target = '/'.join(self.dirs) | |
| 2329 self.ftp.cwd(_target) | |
| 2330 | |
| 2331 def retrfile(self, file, type): | |
| 2332 import ftplib | |
| 2333 self.endtransfer() | |
| 2334 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 | |
| 2335 else: cmd = 'TYPE ' + type; isdir = 0 | |
| 2336 try: | |
| 2337 self.ftp.voidcmd(cmd) | |
| 2338 except ftplib.all_errors: | |
| 2339 self.init() | |
| 2340 self.ftp.voidcmd(cmd) | |
| 2341 conn = None | |
| 2342 if file and not isdir: | |
| 2343 # Try to retrieve as a file | |
| 2344 try: | |
| 2345 cmd = 'RETR ' + file | |
| 2346 conn, retrlen = self.ftp.ntransfercmd(cmd) | |
| 2347 except ftplib.error_perm as reason: | |
| 2348 if str(reason)[:3] != '550': | |
| 2349 raise_with_traceback(URLError('ftp error: %r' % reason)) | |
| 2350 if not conn: | |
| 2351 # Set transfer mode to ASCII! | |
| 2352 self.ftp.voidcmd('TYPE A') | |
| 2353 # Try a directory listing. Verify that directory exists. | |
| 2354 if file: | |
| 2355 pwd = self.ftp.pwd() | |
| 2356 try: | |
| 2357 try: | |
| 2358 self.ftp.cwd(file) | |
| 2359 except ftplib.error_perm as reason: | |
| 2360 ### Was: | |
| 2361 # raise URLError('ftp error: %r' % reason) from reason | |
| 2362 exc = URLError('ftp error: %r' % reason) | |
| 2363 exc.__cause__ = reason | |
| 2364 raise exc | |
| 2365 finally: | |
| 2366 self.ftp.cwd(pwd) | |
| 2367 cmd = 'LIST ' + file | |
| 2368 else: | |
| 2369 cmd = 'LIST' | |
| 2370 conn, retrlen = self.ftp.ntransfercmd(cmd) | |
| 2371 self.busy = 1 | |
| 2372 | |
| 2373 ftpobj = addclosehook(conn.makefile('rb'), self.file_close) | |
| 2374 self.refcount += 1 | |
| 2375 conn.close() | |
| 2376 # Pass back both a suitably decorated object and a retrieval length | |
| 2377 return (ftpobj, retrlen) | |
| 2378 | |
| 2379 def endtransfer(self): | |
| 2380 self.busy = 0 | |
| 2381 | |
| 2382 def close(self): | |
| 2383 self.keepalive = False | |
| 2384 if self.refcount <= 0: | |
| 2385 self.real_close() | |
| 2386 | |
| 2387 def file_close(self): | |
| 2388 self.endtransfer() | |
| 2389 self.refcount -= 1 | |
| 2390 if self.refcount <= 0 and not self.keepalive: | |
| 2391 self.real_close() | |
| 2392 | |
| 2393 def real_close(self): | |
| 2394 self.endtransfer() | |
| 2395 try: | |
| 2396 self.ftp.close() | |
| 2397 except ftperrors(): | |
| 2398 pass | |
| 2399 | |
| 2400 # Proxy handling | |
| 2401 def getproxies_environment(): | |
| 2402 """Return a dictionary of scheme -> proxy server URL mappings. | |
| 2403 | |
| 2404 Scan the environment for variables named <scheme>_proxy; | |
| 2405 this seems to be the standard convention. If you need a | |
| 2406 different way, you can pass a proxies dictionary to the | |
| 2407 [Fancy]URLopener constructor. | |
| 2408 | |
| 2409 """ | |
| 2410 proxies = {} | |
| 2411 for name, value in os.environ.items(): | |
| 2412 name = name.lower() | |
| 2413 if value and name[-6:] == '_proxy': | |
| 2414 proxies[name[:-6]] = value | |
| 2415 return proxies | |
| 2416 | |
| 2417 def proxy_bypass_environment(host): | |
| 2418 """Test if proxies should not be used for a particular host. | |
| 2419 | |
| 2420 Checks the environment for a variable named no_proxy, which should | |
| 2421 be a list of DNS suffixes separated by commas, or '*' for all hosts. | |
| 2422 """ | |
| 2423 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') | |
| 2424 # '*' is special case for always bypass | |
| 2425 if no_proxy == '*': | |
| 2426 return 1 | |
| 2427 # strip port off host | |
| 2428 hostonly, port = splitport(host) | |
| 2429 # check if the host ends with any of the DNS suffixes | |
| 2430 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] | |
| 2431 for name in no_proxy_list: | |
| 2432 if name and (hostonly.endswith(name) or host.endswith(name)): | |
| 2433 return 1 | |
| 2434 # otherwise, don't bypass | |
| 2435 return 0 | |
| 2436 | |
| 2437 | |
| 2438 # This code tests an OSX specific data structure but is testable on all | |
| 2439 # platforms | |
| 2440 def _proxy_bypass_macosx_sysconf(host, proxy_settings): | |
| 2441 """ | |
| 2442 Return True iff this host shouldn't be accessed using a proxy | |
| 2443 | |
| 2444 This function uses the MacOSX framework SystemConfiguration | |
| 2445 to fetch the proxy information. | |
| 2446 | |
| 2447 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie: | |
| 2448 { 'exclude_simple': bool, | |
| 2449 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16'] | |
| 2450 } | |
| 2451 """ | |
| 2452 from fnmatch import fnmatch | |
| 2453 | |
| 2454 hostonly, port = splitport(host) | |
| 2455 | |
| 2456 def ip2num(ipAddr): | |
| 2457 parts = ipAddr.split('.') | |
| 2458 parts = list(map(int, parts)) | |
| 2459 if len(parts) != 4: | |
| 2460 parts = (parts + [0, 0, 0, 0])[:4] | |
| 2461 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] | |
| 2462 | |
| 2463 # Check for simple host names: | |
| 2464 if '.' not in host: | |
| 2465 if proxy_settings['exclude_simple']: | |
| 2466 return True | |
| 2467 | |
| 2468 hostIP = None | |
| 2469 | |
| 2470 for value in proxy_settings.get('exceptions', ()): | |
| 2471 # Items in the list are strings like these: *.local, 169.254/16 | |
| 2472 if not value: continue | |
| 2473 | |
| 2474 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) | |
| 2475 if m is not None: | |
| 2476 if hostIP is None: | |
| 2477 try: | |
| 2478 hostIP = socket.gethostbyname(hostonly) | |
| 2479 hostIP = ip2num(hostIP) | |
| 2480 except socket.error: | |
| 2481 continue | |
| 2482 | |
| 2483 base = ip2num(m.group(1)) | |
| 2484 mask = m.group(2) | |
| 2485 if mask is None: | |
| 2486 mask = 8 * (m.group(1).count('.') + 1) | |
| 2487 else: | |
| 2488 mask = int(mask[1:]) | |
| 2489 mask = 32 - mask | |
| 2490 | |
| 2491 if (hostIP >> mask) == (base >> mask): | |
| 2492 return True | |
| 2493 | |
| 2494 elif fnmatch(host, value): | |
| 2495 return True | |
| 2496 | |
| 2497 return False | |
| 2498 | |
| 2499 | |
| 2500 if sys.platform == 'darwin': | |
| 2501 from _scproxy import _get_proxy_settings, _get_proxies | |
| 2502 | |
| 2503 def proxy_bypass_macosx_sysconf(host): | |
| 2504 proxy_settings = _get_proxy_settings() | |
| 2505 return _proxy_bypass_macosx_sysconf(host, proxy_settings) | |
| 2506 | |
| 2507 def getproxies_macosx_sysconf(): | |
| 2508 """Return a dictionary of scheme -> proxy server URL mappings. | |
| 2509 | |
| 2510 This function uses the MacOSX framework SystemConfiguration | |
| 2511 to fetch the proxy information. | |
| 2512 """ | |
| 2513 return _get_proxies() | |
| 2514 | |
| 2515 | |
| 2516 | |
| 2517 def proxy_bypass(host): | |
| 2518 if getproxies_environment(): | |
| 2519 return proxy_bypass_environment(host) | |
| 2520 else: | |
| 2521 return proxy_bypass_macosx_sysconf(host) | |
| 2522 | |
| 2523 def getproxies(): | |
| 2524 return getproxies_environment() or getproxies_macosx_sysconf() | |
| 2525 | |
| 2526 | |
| 2527 elif os.name == 'nt': | |
| 2528 def getproxies_registry(): | |
| 2529 """Return a dictionary of scheme -> proxy server URL mappings. | |
| 2530 | |
| 2531 Win32 uses the registry to store proxies. | |
| 2532 | |
| 2533 """ | |
| 2534 proxies = {} | |
| 2535 try: | |
| 2536 import winreg | |
| 2537 except ImportError: | |
| 2538 # Std module, so should be around - but you never know! | |
| 2539 return proxies | |
| 2540 try: | |
| 2541 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, | |
| 2542 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') | |
| 2543 proxyEnable = winreg.QueryValueEx(internetSettings, | |
| 2544 'ProxyEnable')[0] | |
| 2545 if proxyEnable: | |
| 2546 # Returned as Unicode but problems if not converted to ASCII | |
| 2547 proxyServer = str(winreg.QueryValueEx(internetSettings, | |
| 2548 'ProxyServer')[0]) | |
| 2549 if '=' in proxyServer: | |
| 2550 # Per-protocol settings | |
| 2551 for p in proxyServer.split(';'): | |
| 2552 protocol, address = p.split('=', 1) | |
| 2553 # See if address has a type:// prefix | |
| 2554 if not re.match('^([^/:]+)://', address): | |
| 2555 address = '%s://%s' % (protocol, address) | |
| 2556 proxies[protocol] = address | |
| 2557 else: | |
| 2558 # Use one setting for all protocols | |
| 2559 if proxyServer[:5] == 'http:': | |
| 2560 proxies['http'] = proxyServer | |
| 2561 else: | |
| 2562 proxies['http'] = 'http://%s' % proxyServer | |
| 2563 proxies['https'] = 'https://%s' % proxyServer | |
| 2564 proxies['ftp'] = 'ftp://%s' % proxyServer | |
| 2565 internetSettings.Close() | |
| 2566 except (WindowsError, ValueError, TypeError): | |
| 2567 # Either registry key not found etc, or the value in an | |
| 2568 # unexpected format. | |
| 2569 # proxies already set up to be empty so nothing to do | |
| 2570 pass | |
| 2571 return proxies | |
| 2572 | |
| 2573 def getproxies(): | |
| 2574 """Return a dictionary of scheme -> proxy server URL mappings. | |
| 2575 | |
| 2576 Returns settings gathered from the environment, if specified, | |
| 2577 or the registry. | |
| 2578 | |
| 2579 """ | |
| 2580 return getproxies_environment() or getproxies_registry() | |
| 2581 | |
| 2582 def proxy_bypass_registry(host): | |
| 2583 try: | |
| 2584 import winreg | |
| 2585 except ImportError: | |
| 2586 # Std modules, so should be around - but you never know! | |
| 2587 return 0 | |
| 2588 try: | |
| 2589 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, | |
| 2590 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') | |
| 2591 proxyEnable = winreg.QueryValueEx(internetSettings, | |
| 2592 'ProxyEnable')[0] | |
| 2593 proxyOverride = str(winreg.QueryValueEx(internetSettings, | |
| 2594 'ProxyOverride')[0]) | |
| 2595 # ^^^^ Returned as Unicode but problems if not converted to ASCII | |
| 2596 except WindowsError: | |
| 2597 return 0 | |
| 2598 if not proxyEnable or not proxyOverride: | |
| 2599 return 0 | |
| 2600 # try to make a host list from name and IP address. | |
| 2601 rawHost, port = splitport(host) | |
| 2602 host = [rawHost] | |
| 2603 try: | |
| 2604 addr = socket.gethostbyname(rawHost) | |
| 2605 if addr != rawHost: | |
| 2606 host.append(addr) | |
| 2607 except socket.error: | |
| 2608 pass | |
| 2609 try: | |
| 2610 fqdn = socket.getfqdn(rawHost) | |
| 2611 if fqdn != rawHost: | |
| 2612 host.append(fqdn) | |
| 2613 except socket.error: | |
| 2614 pass | |
| 2615 # make a check value list from the registry entry: replace the | |
| 2616 # '<local>' string by the localhost entry and the corresponding | |
| 2617 # canonical entry. | |
| 2618 proxyOverride = proxyOverride.split(';') | |
| 2619 # now check if we match one of the registry values. | |
| 2620 for test in proxyOverride: | |
| 2621 if test == '<local>': | |
| 2622 if '.' not in rawHost: | |
| 2623 return 1 | |
| 2624 test = test.replace(".", r"\.") # mask dots | |
| 2625 test = test.replace("*", r".*") # change glob sequence | |
| 2626 test = test.replace("?", r".") # change glob char | |
| 2627 for val in host: | |
| 2628 if re.match(test, val, re.I): | |
| 2629 return 1 | |
| 2630 return 0 | |
| 2631 | |
| 2632 def proxy_bypass(host): | |
| 2633 """Return a dictionary of scheme -> proxy server URL mappings. | |
| 2634 | |
| 2635 Returns settings gathered from the environment, if specified, | |
| 2636 or the registry. | |
| 2637 | |
| 2638 """ | |
| 2639 if getproxies_environment(): | |
| 2640 return proxy_bypass_environment(host) | |
| 2641 else: | |
| 2642 return proxy_bypass_registry(host) | |
| 2643 | |
| 2644 else: | |
| 2645 # By default use environment variables | |
| 2646 getproxies = getproxies_environment | |
| 2647 proxy_bypass = proxy_bypass_environment |
