Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/future/backports/email/quoprimime.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d30785e31577 |
---|---|
1 # Copyright (C) 2001-2006 Python Software Foundation | |
2 # Author: Ben Gertzfield | |
3 # Contact: email-sig@python.org | |
4 | |
5 """Quoted-printable content transfer encoding per RFCs 2045-2047. | |
6 | |
7 This module handles the content transfer encoding method defined in RFC 2045 | |
8 to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to | |
9 safely encode text that is in a character set similar to the 7-bit US ASCII | |
10 character set, but that includes some 8-bit characters that are normally not | |
11 allowed in email bodies or headers. | |
12 | |
13 Quoted-printable is very space-inefficient for encoding binary files; use the | |
14 email.base64mime module for that instead. | |
15 | |
16 This module provides an interface to encode and decode both headers and bodies | |
17 with quoted-printable encoding. | |
18 | |
19 RFC 2045 defines a method for including character set information in an | |
20 `encoded-word' in a header. This method is commonly used for 8-bit real names | |
21 in To:/From:/Cc: etc. fields, as well as Subject: lines. | |
22 | |
23 This module does not do the line wrapping or end-of-line character | |
24 conversion necessary for proper internationalized headers; it only | |
25 does dumb encoding and decoding. To deal with the various line | |
26 wrapping issues, use the email.header module. | |
27 """ | |
28 from __future__ import unicode_literals | |
29 from __future__ import division | |
30 from __future__ import absolute_import | |
31 from future.builtins import bytes, chr, dict, int, range, super | |
32 | |
33 __all__ = [ | |
34 'body_decode', | |
35 'body_encode', | |
36 'body_length', | |
37 'decode', | |
38 'decodestring', | |
39 'header_decode', | |
40 'header_encode', | |
41 'header_length', | |
42 'quote', | |
43 'unquote', | |
44 ] | |
45 | |
46 import re | |
47 import io | |
48 | |
49 from string import ascii_letters, digits, hexdigits | |
50 | |
51 CRLF = '\r\n' | |
52 NL = '\n' | |
53 EMPTYSTRING = '' | |
54 | |
55 # Build a mapping of octets to the expansion of that octet. Since we're only | |
56 # going to have 256 of these things, this isn't terribly inefficient | |
57 # space-wise. Remember that headers and bodies have different sets of safe | |
58 # characters. Initialize both maps with the full expansion, and then override | |
59 # the safe bytes with the more compact form. | |
60 _QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256)) | |
61 _QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy() | |
62 | |
63 # Safe header bytes which need no encoding. | |
64 for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')): | |
65 _QUOPRI_HEADER_MAP[c] = chr(c) | |
66 # Headers have one other special encoding; spaces become underscores. | |
67 _QUOPRI_HEADER_MAP[ord(' ')] = '_' | |
68 | |
69 # Safe body bytes which need no encoding. | |
70 for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>' | |
71 b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`' | |
72 b'abcdefghijklmnopqrstuvwxyz{|}~\t'): | |
73 _QUOPRI_BODY_MAP[c] = chr(c) | |
74 | |
75 | |
76 | |
77 # Helpers | |
78 def header_check(octet): | |
79 """Return True if the octet should be escaped with header quopri.""" | |
80 return chr(octet) != _QUOPRI_HEADER_MAP[octet] | |
81 | |
82 | |
83 def body_check(octet): | |
84 """Return True if the octet should be escaped with body quopri.""" | |
85 return chr(octet) != _QUOPRI_BODY_MAP[octet] | |
86 | |
87 | |
88 def header_length(bytearray): | |
89 """Return a header quoted-printable encoding length. | |
90 | |
91 Note that this does not include any RFC 2047 chrome added by | |
92 `header_encode()`. | |
93 | |
94 :param bytearray: An array of bytes (a.k.a. octets). | |
95 :return: The length in bytes of the byte array when it is encoded with | |
96 quoted-printable for headers. | |
97 """ | |
98 return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray) | |
99 | |
100 | |
101 def body_length(bytearray): | |
102 """Return a body quoted-printable encoding length. | |
103 | |
104 :param bytearray: An array of bytes (a.k.a. octets). | |
105 :return: The length in bytes of the byte array when it is encoded with | |
106 quoted-printable for bodies. | |
107 """ | |
108 return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray) | |
109 | |
110 | |
111 def _max_append(L, s, maxlen, extra=''): | |
112 if not isinstance(s, str): | |
113 s = chr(s) | |
114 if not L: | |
115 L.append(s.lstrip()) | |
116 elif len(L[-1]) + len(s) <= maxlen: | |
117 L[-1] += extra + s | |
118 else: | |
119 L.append(s.lstrip()) | |
120 | |
121 | |
122 def unquote(s): | |
123 """Turn a string in the form =AB to the ASCII character with value 0xab""" | |
124 return chr(int(s[1:3], 16)) | |
125 | |
126 | |
127 def quote(c): | |
128 return '=%02X' % ord(c) | |
129 | |
130 | |
131 | |
132 def header_encode(header_bytes, charset='iso-8859-1'): | |
133 """Encode a single header line with quoted-printable (like) encoding. | |
134 | |
135 Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but | |
136 used specifically for email header fields to allow charsets with mostly 7 | |
137 bit characters (and some 8 bit) to remain more or less readable in non-RFC | |
138 2045 aware mail clients. | |
139 | |
140 charset names the character set to use in the RFC 2046 header. It | |
141 defaults to iso-8859-1. | |
142 """ | |
143 # Return empty headers as an empty string. | |
144 if not header_bytes: | |
145 return '' | |
146 # Iterate over every byte, encoding if necessary. | |
147 encoded = [] | |
148 for octet in header_bytes: | |
149 encoded.append(_QUOPRI_HEADER_MAP[octet]) | |
150 # Now add the RFC chrome to each encoded chunk and glue the chunks | |
151 # together. | |
152 return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded)) | |
153 | |
154 | |
155 class _body_accumulator(io.StringIO): | |
156 | |
157 def __init__(self, maxlinelen, eol, *args, **kw): | |
158 super().__init__(*args, **kw) | |
159 self.eol = eol | |
160 self.maxlinelen = self.room = maxlinelen | |
161 | |
162 def write_str(self, s): | |
163 """Add string s to the accumulated body.""" | |
164 self.write(s) | |
165 self.room -= len(s) | |
166 | |
167 def newline(self): | |
168 """Write eol, then start new line.""" | |
169 self.write_str(self.eol) | |
170 self.room = self.maxlinelen | |
171 | |
172 def write_soft_break(self): | |
173 """Write a soft break, then start a new line.""" | |
174 self.write_str('=') | |
175 self.newline() | |
176 | |
177 def write_wrapped(self, s, extra_room=0): | |
178 """Add a soft line break if needed, then write s.""" | |
179 if self.room < len(s) + extra_room: | |
180 self.write_soft_break() | |
181 self.write_str(s) | |
182 | |
183 def write_char(self, c, is_last_char): | |
184 if not is_last_char: | |
185 # Another character follows on this line, so we must leave | |
186 # extra room, either for it or a soft break, and whitespace | |
187 # need not be quoted. | |
188 self.write_wrapped(c, extra_room=1) | |
189 elif c not in ' \t': | |
190 # For this and remaining cases, no more characters follow, | |
191 # so there is no need to reserve extra room (since a hard | |
192 # break will immediately follow). | |
193 self.write_wrapped(c) | |
194 elif self.room >= 3: | |
195 # It's a whitespace character at end-of-line, and we have room | |
196 # for the three-character quoted encoding. | |
197 self.write(quote(c)) | |
198 elif self.room == 2: | |
199 # There's room for the whitespace character and a soft break. | |
200 self.write(c) | |
201 self.write_soft_break() | |
202 else: | |
203 # There's room only for a soft break. The quoted whitespace | |
204 # will be the only content on the subsequent line. | |
205 self.write_soft_break() | |
206 self.write(quote(c)) | |
207 | |
208 | |
209 def body_encode(body, maxlinelen=76, eol=NL): | |
210 """Encode with quoted-printable, wrapping at maxlinelen characters. | |
211 | |
212 Each line of encoded text will end with eol, which defaults to "\\n". Set | |
213 this to "\\r\\n" if you will be using the result of this function directly | |
214 in an email. | |
215 | |
216 Each line will be wrapped at, at most, maxlinelen characters before the | |
217 eol string (maxlinelen defaults to 76 characters, the maximum value | |
218 permitted by RFC 2045). Long lines will have the 'soft line break' | |
219 quoted-printable character "=" appended to them, so the decoded text will | |
220 be identical to the original text. | |
221 | |
222 The minimum maxlinelen is 4 to have room for a quoted character ("=XX") | |
223 followed by a soft line break. Smaller values will generate a | |
224 ValueError. | |
225 | |
226 """ | |
227 | |
228 if maxlinelen < 4: | |
229 raise ValueError("maxlinelen must be at least 4") | |
230 if not body: | |
231 return body | |
232 | |
233 # The last line may or may not end in eol, but all other lines do. | |
234 last_has_eol = (body[-1] in '\r\n') | |
235 | |
236 # This accumulator will make it easier to build the encoded body. | |
237 encoded_body = _body_accumulator(maxlinelen, eol) | |
238 | |
239 lines = body.splitlines() | |
240 last_line_no = len(lines) - 1 | |
241 for line_no, line in enumerate(lines): | |
242 last_char_index = len(line) - 1 | |
243 for i, c in enumerate(line): | |
244 if body_check(ord(c)): | |
245 c = quote(c) | |
246 encoded_body.write_char(c, i==last_char_index) | |
247 # Add an eol if input line had eol. All input lines have eol except | |
248 # possibly the last one. | |
249 if line_no < last_line_no or last_has_eol: | |
250 encoded_body.newline() | |
251 | |
252 return encoded_body.getvalue() | |
253 | |
254 | |
255 | |
256 # BAW: I'm not sure if the intent was for the signature of this function to be | |
257 # the same as base64MIME.decode() or not... | |
258 def decode(encoded, eol=NL): | |
259 """Decode a quoted-printable string. | |
260 | |
261 Lines are separated with eol, which defaults to \\n. | |
262 """ | |
263 if not encoded: | |
264 return encoded | |
265 # BAW: see comment in encode() above. Again, we're building up the | |
266 # decoded string with string concatenation, which could be done much more | |
267 # efficiently. | |
268 decoded = '' | |
269 | |
270 for line in encoded.splitlines(): | |
271 line = line.rstrip() | |
272 if not line: | |
273 decoded += eol | |
274 continue | |
275 | |
276 i = 0 | |
277 n = len(line) | |
278 while i < n: | |
279 c = line[i] | |
280 if c != '=': | |
281 decoded += c | |
282 i += 1 | |
283 # Otherwise, c == "=". Are we at the end of the line? If so, add | |
284 # a soft line break. | |
285 elif i+1 == n: | |
286 i += 1 | |
287 continue | |
288 # Decode if in form =AB | |
289 elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: | |
290 decoded += unquote(line[i:i+3]) | |
291 i += 3 | |
292 # Otherwise, not in form =AB, pass literally | |
293 else: | |
294 decoded += c | |
295 i += 1 | |
296 | |
297 if i == n: | |
298 decoded += eol | |
299 # Special case if original string did not end with eol | |
300 if encoded[-1] not in '\r\n' and decoded.endswith(eol): | |
301 decoded = decoded[:-1] | |
302 return decoded | |
303 | |
304 | |
305 # For convenience and backwards compatibility w/ standard base64 module | |
306 body_decode = decode | |
307 decodestring = decode | |
308 | |
309 | |
310 | |
311 def _unquote_match(match): | |
312 """Turn a match in the form =AB to the ASCII character with value 0xab""" | |
313 s = match.group(0) | |
314 return unquote(s) | |
315 | |
316 | |
317 # Header decoding is done a bit differently | |
318 def header_decode(s): | |
319 """Decode a string encoded with RFC 2045 MIME header `Q' encoding. | |
320 | |
321 This function does not parse a full MIME header value encoded with | |
322 quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use | |
323 the high level email.header class for that functionality. | |
324 """ | |
325 s = s.replace('_', ' ') | |
326 return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII) |