Mercurial > repos > guerler > hhblits
diff lib/python3.8/site-packages/pip/_internal/network/download.py @ 0:9e54283cc701 draft
"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
author | guerler |
---|---|
date | Mon, 27 Jul 2020 03:47:31 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/python3.8/site-packages/pip/_internal/network/download.py Mon Jul 27 03:47:31 2020 -0400 @@ -0,0 +1,200 @@ +"""Download files with progress indicators. +""" +import cgi +import logging +import mimetypes +import os + +from pip._vendor import requests +from pip._vendor.requests.models import CONTENT_CHUNK_SIZE + +from pip._internal.models.index import PyPI +from pip._internal.network.cache import is_from_cache +from pip._internal.network.utils import response_chunks +from pip._internal.utils.misc import ( + format_size, + redact_auth_from_url, + splitext, +) +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.ui import DownloadProgressProvider + +if MYPY_CHECK_RUNNING: + from typing import Iterable, Optional + + from pip._vendor.requests.models import Response + + from pip._internal.models.link import Link + from pip._internal.network.session import PipSession + +logger = logging.getLogger(__name__) + + +def _get_http_response_size(resp): + # type: (Response) -> Optional[int] + try: + return int(resp.headers['content-length']) + except (ValueError, KeyError, TypeError): + return None + + +def _prepare_download( + resp, # type: Response + link, # type: Link + progress_bar # type: str +): + # type: (...) -> Iterable[bytes] + total_length = _get_http_response_size(resp) + + if link.netloc == PyPI.file_storage_domain: + url = link.show_url + else: + url = link.url_without_fragment + + logged_url = redact_auth_from_url(url) + + if total_length: + logged_url = '{} ({})'.format(logged_url, format_size(total_length)) + + if is_from_cache(resp): + logger.info("Using cached %s", logged_url) + else: + logger.info("Downloading %s", logged_url) + + if logger.getEffectiveLevel() > logging.INFO: + show_progress = False + elif is_from_cache(resp): + show_progress = False + elif not total_length: + show_progress = True + elif total_length > (40 * 1000): + show_progress = True + else: + show_progress = False + + chunks = response_chunks(resp, CONTENT_CHUNK_SIZE) + + if not show_progress: + return chunks + + return DownloadProgressProvider( + progress_bar, max=total_length + )(chunks) + + +def sanitize_content_filename(filename): + # type: (str) -> str + """ + Sanitize the "filename" value from a Content-Disposition header. + """ + return os.path.basename(filename) + + +def parse_content_disposition(content_disposition, default_filename): + # type: (str, str) -> str + """ + Parse the "filename" value from a Content-Disposition header, and + return the default filename if the result is empty. + """ + _type, params = cgi.parse_header(content_disposition) + filename = params.get('filename') + if filename: + # We need to sanitize the filename to prevent directory traversal + # in case the filename contains ".." path parts. + filename = sanitize_content_filename(filename) + return filename or default_filename + + +def _get_http_response_filename(resp, link): + # type: (Response, Link) -> str + """Get an ideal filename from the given HTTP response, falling back to + the link filename if not provided. + """ + filename = link.filename # fallback + # Have a look at the Content-Disposition header for a better guess + content_disposition = resp.headers.get('content-disposition') + if content_disposition: + filename = parse_content_disposition(content_disposition, filename) + ext = splitext(filename)[1] # type: Optional[str] + if not ext: + ext = mimetypes.guess_extension( + resp.headers.get('content-type', '') + ) + if ext: + filename += ext + if not ext and link.url != resp.url: + ext = os.path.splitext(resp.url)[1] + if ext: + filename += ext + return filename + + +def _http_get_download(session, link): + # type: (PipSession, Link) -> Response + target_url = link.url.split('#', 1)[0] + resp = session.get( + target_url, + # We use Accept-Encoding: identity here because requests + # defaults to accepting compressed responses. This breaks in + # a variety of ways depending on how the server is configured. + # - Some servers will notice that the file isn't a compressible + # file and will leave the file alone and with an empty + # Content-Encoding + # - Some servers will notice that the file is already + # compressed and will leave the file alone and will add a + # Content-Encoding: gzip header + # - Some servers won't notice anything at all and will take + # a file that's already been compressed and compress it again + # and set the Content-Encoding: gzip header + # By setting this to request only the identity encoding We're + # hoping to eliminate the third case. Hopefully there does not + # exist a server which when given a file will notice it is + # already compressed and that you're not asking for a + # compressed file and will then decompress it before sending + # because if that's the case I don't think it'll ever be + # possible to make this work. + headers={"Accept-Encoding": "identity"}, + stream=True, + ) + resp.raise_for_status() + return resp + + +class Download(object): + def __init__( + self, + response, # type: Response + filename, # type: str + chunks, # type: Iterable[bytes] + ): + # type: (...) -> None + self.response = response + self.filename = filename + self.chunks = chunks + + +class Downloader(object): + def __init__( + self, + session, # type: PipSession + progress_bar, # type: str + ): + # type: (...) -> None + self._session = session + self._progress_bar = progress_bar + + def __call__(self, link): + # type: (Link) -> Download + try: + resp = _http_get_download(self._session, link) + except requests.HTTPError as e: + logger.critical( + "HTTP error %s while getting %s", e.response.status_code, link + ) + raise + + return Download( + resp, + _get_http_response_filename(resp, link), + _prepare_download(resp, link, self._progress_bar), + )