comparison lib/python3.8/site-packages/pip/_internal/network/download.py @ 0:9e54283cc701 draft

"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
author guerler
date Mon, 27 Jul 2020 03:47:31 -0400 (2020-07-27)
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9e54283cc701
1 """Download files with progress indicators.
2 """
3 import cgi
4 import logging
5 import mimetypes
6 import os
7
8 from pip._vendor import requests
9 from pip._vendor.requests.models import CONTENT_CHUNK_SIZE
10
11 from pip._internal.models.index import PyPI
12 from pip._internal.network.cache import is_from_cache
13 from pip._internal.network.utils import response_chunks
14 from pip._internal.utils.misc import (
15 format_size,
16 redact_auth_from_url,
17 splitext,
18 )
19 from pip._internal.utils.typing import MYPY_CHECK_RUNNING
20 from pip._internal.utils.ui import DownloadProgressProvider
21
22 if MYPY_CHECK_RUNNING:
23 from typing import Iterable, Optional
24
25 from pip._vendor.requests.models import Response
26
27 from pip._internal.models.link import Link
28 from pip._internal.network.session import PipSession
29
30 logger = logging.getLogger(__name__)
31
32
33 def _get_http_response_size(resp):
34 # type: (Response) -> Optional[int]
35 try:
36 return int(resp.headers['content-length'])
37 except (ValueError, KeyError, TypeError):
38 return None
39
40
41 def _prepare_download(
42 resp, # type: Response
43 link, # type: Link
44 progress_bar # type: str
45 ):
46 # type: (...) -> Iterable[bytes]
47 total_length = _get_http_response_size(resp)
48
49 if link.netloc == PyPI.file_storage_domain:
50 url = link.show_url
51 else:
52 url = link.url_without_fragment
53
54 logged_url = redact_auth_from_url(url)
55
56 if total_length:
57 logged_url = '{} ({})'.format(logged_url, format_size(total_length))
58
59 if is_from_cache(resp):
60 logger.info("Using cached %s", logged_url)
61 else:
62 logger.info("Downloading %s", logged_url)
63
64 if logger.getEffectiveLevel() > logging.INFO:
65 show_progress = False
66 elif is_from_cache(resp):
67 show_progress = False
68 elif not total_length:
69 show_progress = True
70 elif total_length > (40 * 1000):
71 show_progress = True
72 else:
73 show_progress = False
74
75 chunks = response_chunks(resp, CONTENT_CHUNK_SIZE)
76
77 if not show_progress:
78 return chunks
79
80 return DownloadProgressProvider(
81 progress_bar, max=total_length
82 )(chunks)
83
84
85 def sanitize_content_filename(filename):
86 # type: (str) -> str
87 """
88 Sanitize the "filename" value from a Content-Disposition header.
89 """
90 return os.path.basename(filename)
91
92
93 def parse_content_disposition(content_disposition, default_filename):
94 # type: (str, str) -> str
95 """
96 Parse the "filename" value from a Content-Disposition header, and
97 return the default filename if the result is empty.
98 """
99 _type, params = cgi.parse_header(content_disposition)
100 filename = params.get('filename')
101 if filename:
102 # We need to sanitize the filename to prevent directory traversal
103 # in case the filename contains ".." path parts.
104 filename = sanitize_content_filename(filename)
105 return filename or default_filename
106
107
108 def _get_http_response_filename(resp, link):
109 # type: (Response, Link) -> str
110 """Get an ideal filename from the given HTTP response, falling back to
111 the link filename if not provided.
112 """
113 filename = link.filename # fallback
114 # Have a look at the Content-Disposition header for a better guess
115 content_disposition = resp.headers.get('content-disposition')
116 if content_disposition:
117 filename = parse_content_disposition(content_disposition, filename)
118 ext = splitext(filename)[1] # type: Optional[str]
119 if not ext:
120 ext = mimetypes.guess_extension(
121 resp.headers.get('content-type', '')
122 )
123 if ext:
124 filename += ext
125 if not ext and link.url != resp.url:
126 ext = os.path.splitext(resp.url)[1]
127 if ext:
128 filename += ext
129 return filename
130
131
132 def _http_get_download(session, link):
133 # type: (PipSession, Link) -> Response
134 target_url = link.url.split('#', 1)[0]
135 resp = session.get(
136 target_url,
137 # We use Accept-Encoding: identity here because requests
138 # defaults to accepting compressed responses. This breaks in
139 # a variety of ways depending on how the server is configured.
140 # - Some servers will notice that the file isn't a compressible
141 # file and will leave the file alone and with an empty
142 # Content-Encoding
143 # - Some servers will notice that the file is already
144 # compressed and will leave the file alone and will add a
145 # Content-Encoding: gzip header
146 # - Some servers won't notice anything at all and will take
147 # a file that's already been compressed and compress it again
148 # and set the Content-Encoding: gzip header
149 # By setting this to request only the identity encoding We're
150 # hoping to eliminate the third case. Hopefully there does not
151 # exist a server which when given a file will notice it is
152 # already compressed and that you're not asking for a
153 # compressed file and will then decompress it before sending
154 # because if that's the case I don't think it'll ever be
155 # possible to make this work.
156 headers={"Accept-Encoding": "identity"},
157 stream=True,
158 )
159 resp.raise_for_status()
160 return resp
161
162
163 class Download(object):
164 def __init__(
165 self,
166 response, # type: Response
167 filename, # type: str
168 chunks, # type: Iterable[bytes]
169 ):
170 # type: (...) -> None
171 self.response = response
172 self.filename = filename
173 self.chunks = chunks
174
175
176 class Downloader(object):
177 def __init__(
178 self,
179 session, # type: PipSession
180 progress_bar, # type: str
181 ):
182 # type: (...) -> None
183 self._session = session
184 self._progress_bar = progress_bar
185
186 def __call__(self, link):
187 # type: (Link) -> Download
188 try:
189 resp = _http_get_download(self._session, link)
190 except requests.HTTPError as e:
191 logger.critical(
192 "HTTP error %s while getting %s", e.response.status_code, link
193 )
194 raise
195
196 return Download(
197 resp,
198 _get_http_response_filename(resp, link),
199 _prepare_download(resp, link, self._progress_bar),
200 )