comparison planemo/lib/python3.7/site-packages/future/backports/email/quoprimime.py @ 0:d30785e31577 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:18:57 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d30785e31577
1 # Copyright (C) 2001-2006 Python Software Foundation
2 # Author: Ben Gertzfield
3 # Contact: email-sig@python.org
4
5 """Quoted-printable content transfer encoding per RFCs 2045-2047.
6
7 This module handles the content transfer encoding method defined in RFC 2045
8 to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
9 safely encode text that is in a character set similar to the 7-bit US ASCII
10 character set, but that includes some 8-bit characters that are normally not
11 allowed in email bodies or headers.
12
13 Quoted-printable is very space-inefficient for encoding binary files; use the
14 email.base64mime module for that instead.
15
16 This module provides an interface to encode and decode both headers and bodies
17 with quoted-printable encoding.
18
19 RFC 2045 defines a method for including character set information in an
20 `encoded-word' in a header. This method is commonly used for 8-bit real names
21 in To:/From:/Cc: etc. fields, as well as Subject: lines.
22
23 This module does not do the line wrapping or end-of-line character
24 conversion necessary for proper internationalized headers; it only
25 does dumb encoding and decoding. To deal with the various line
26 wrapping issues, use the email.header module.
27 """
28 from __future__ import unicode_literals
29 from __future__ import division
30 from __future__ import absolute_import
31 from future.builtins import bytes, chr, dict, int, range, super
32
33 __all__ = [
34 'body_decode',
35 'body_encode',
36 'body_length',
37 'decode',
38 'decodestring',
39 'header_decode',
40 'header_encode',
41 'header_length',
42 'quote',
43 'unquote',
44 ]
45
46 import re
47 import io
48
49 from string import ascii_letters, digits, hexdigits
50
51 CRLF = '\r\n'
52 NL = '\n'
53 EMPTYSTRING = ''
54
55 # Build a mapping of octets to the expansion of that octet. Since we're only
56 # going to have 256 of these things, this isn't terribly inefficient
57 # space-wise. Remember that headers and bodies have different sets of safe
58 # characters. Initialize both maps with the full expansion, and then override
59 # the safe bytes with the more compact form.
60 _QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
61 _QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
62
63 # Safe header bytes which need no encoding.
64 for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')):
65 _QUOPRI_HEADER_MAP[c] = chr(c)
66 # Headers have one other special encoding; spaces become underscores.
67 _QUOPRI_HEADER_MAP[ord(' ')] = '_'
68
69 # Safe body bytes which need no encoding.
70 for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>'
71 b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
72 b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
73 _QUOPRI_BODY_MAP[c] = chr(c)
74
75
76
77 # Helpers
78 def header_check(octet):
79 """Return True if the octet should be escaped with header quopri."""
80 return chr(octet) != _QUOPRI_HEADER_MAP[octet]
81
82
83 def body_check(octet):
84 """Return True if the octet should be escaped with body quopri."""
85 return chr(octet) != _QUOPRI_BODY_MAP[octet]
86
87
88 def header_length(bytearray):
89 """Return a header quoted-printable encoding length.
90
91 Note that this does not include any RFC 2047 chrome added by
92 `header_encode()`.
93
94 :param bytearray: An array of bytes (a.k.a. octets).
95 :return: The length in bytes of the byte array when it is encoded with
96 quoted-printable for headers.
97 """
98 return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
99
100
101 def body_length(bytearray):
102 """Return a body quoted-printable encoding length.
103
104 :param bytearray: An array of bytes (a.k.a. octets).
105 :return: The length in bytes of the byte array when it is encoded with
106 quoted-printable for bodies.
107 """
108 return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
109
110
111 def _max_append(L, s, maxlen, extra=''):
112 if not isinstance(s, str):
113 s = chr(s)
114 if not L:
115 L.append(s.lstrip())
116 elif len(L[-1]) + len(s) <= maxlen:
117 L[-1] += extra + s
118 else:
119 L.append(s.lstrip())
120
121
122 def unquote(s):
123 """Turn a string in the form =AB to the ASCII character with value 0xab"""
124 return chr(int(s[1:3], 16))
125
126
127 def quote(c):
128 return '=%02X' % ord(c)
129
130
131
132 def header_encode(header_bytes, charset='iso-8859-1'):
133 """Encode a single header line with quoted-printable (like) encoding.
134
135 Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
136 used specifically for email header fields to allow charsets with mostly 7
137 bit characters (and some 8 bit) to remain more or less readable in non-RFC
138 2045 aware mail clients.
139
140 charset names the character set to use in the RFC 2046 header. It
141 defaults to iso-8859-1.
142 """
143 # Return empty headers as an empty string.
144 if not header_bytes:
145 return ''
146 # Iterate over every byte, encoding if necessary.
147 encoded = []
148 for octet in header_bytes:
149 encoded.append(_QUOPRI_HEADER_MAP[octet])
150 # Now add the RFC chrome to each encoded chunk and glue the chunks
151 # together.
152 return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
153
154
155 class _body_accumulator(io.StringIO):
156
157 def __init__(self, maxlinelen, eol, *args, **kw):
158 super().__init__(*args, **kw)
159 self.eol = eol
160 self.maxlinelen = self.room = maxlinelen
161
162 def write_str(self, s):
163 """Add string s to the accumulated body."""
164 self.write(s)
165 self.room -= len(s)
166
167 def newline(self):
168 """Write eol, then start new line."""
169 self.write_str(self.eol)
170 self.room = self.maxlinelen
171
172 def write_soft_break(self):
173 """Write a soft break, then start a new line."""
174 self.write_str('=')
175 self.newline()
176
177 def write_wrapped(self, s, extra_room=0):
178 """Add a soft line break if needed, then write s."""
179 if self.room < len(s) + extra_room:
180 self.write_soft_break()
181 self.write_str(s)
182
183 def write_char(self, c, is_last_char):
184 if not is_last_char:
185 # Another character follows on this line, so we must leave
186 # extra room, either for it or a soft break, and whitespace
187 # need not be quoted.
188 self.write_wrapped(c, extra_room=1)
189 elif c not in ' \t':
190 # For this and remaining cases, no more characters follow,
191 # so there is no need to reserve extra room (since a hard
192 # break will immediately follow).
193 self.write_wrapped(c)
194 elif self.room >= 3:
195 # It's a whitespace character at end-of-line, and we have room
196 # for the three-character quoted encoding.
197 self.write(quote(c))
198 elif self.room == 2:
199 # There's room for the whitespace character and a soft break.
200 self.write(c)
201 self.write_soft_break()
202 else:
203 # There's room only for a soft break. The quoted whitespace
204 # will be the only content on the subsequent line.
205 self.write_soft_break()
206 self.write(quote(c))
207
208
209 def body_encode(body, maxlinelen=76, eol=NL):
210 """Encode with quoted-printable, wrapping at maxlinelen characters.
211
212 Each line of encoded text will end with eol, which defaults to "\\n". Set
213 this to "\\r\\n" if you will be using the result of this function directly
214 in an email.
215
216 Each line will be wrapped at, at most, maxlinelen characters before the
217 eol string (maxlinelen defaults to 76 characters, the maximum value
218 permitted by RFC 2045). Long lines will have the 'soft line break'
219 quoted-printable character "=" appended to them, so the decoded text will
220 be identical to the original text.
221
222 The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
223 followed by a soft line break. Smaller values will generate a
224 ValueError.
225
226 """
227
228 if maxlinelen < 4:
229 raise ValueError("maxlinelen must be at least 4")
230 if not body:
231 return body
232
233 # The last line may or may not end in eol, but all other lines do.
234 last_has_eol = (body[-1] in '\r\n')
235
236 # This accumulator will make it easier to build the encoded body.
237 encoded_body = _body_accumulator(maxlinelen, eol)
238
239 lines = body.splitlines()
240 last_line_no = len(lines) - 1
241 for line_no, line in enumerate(lines):
242 last_char_index = len(line) - 1
243 for i, c in enumerate(line):
244 if body_check(ord(c)):
245 c = quote(c)
246 encoded_body.write_char(c, i==last_char_index)
247 # Add an eol if input line had eol. All input lines have eol except
248 # possibly the last one.
249 if line_no < last_line_no or last_has_eol:
250 encoded_body.newline()
251
252 return encoded_body.getvalue()
253
254
255
256 # BAW: I'm not sure if the intent was for the signature of this function to be
257 # the same as base64MIME.decode() or not...
258 def decode(encoded, eol=NL):
259 """Decode a quoted-printable string.
260
261 Lines are separated with eol, which defaults to \\n.
262 """
263 if not encoded:
264 return encoded
265 # BAW: see comment in encode() above. Again, we're building up the
266 # decoded string with string concatenation, which could be done much more
267 # efficiently.
268 decoded = ''
269
270 for line in encoded.splitlines():
271 line = line.rstrip()
272 if not line:
273 decoded += eol
274 continue
275
276 i = 0
277 n = len(line)
278 while i < n:
279 c = line[i]
280 if c != '=':
281 decoded += c
282 i += 1
283 # Otherwise, c == "=". Are we at the end of the line? If so, add
284 # a soft line break.
285 elif i+1 == n:
286 i += 1
287 continue
288 # Decode if in form =AB
289 elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
290 decoded += unquote(line[i:i+3])
291 i += 3
292 # Otherwise, not in form =AB, pass literally
293 else:
294 decoded += c
295 i += 1
296
297 if i == n:
298 decoded += eol
299 # Special case if original string did not end with eol
300 if encoded[-1] not in '\r\n' and decoded.endswith(eol):
301 decoded = decoded[:-1]
302 return decoded
303
304
305 # For convenience and backwards compatibility w/ standard base64 module
306 body_decode = decode
307 decodestring = decode
308
309
310
311 def _unquote_match(match):
312 """Turn a match in the form =AB to the ASCII character with value 0xab"""
313 s = match.group(0)
314 return unquote(s)
315
316
317 # Header decoding is done a bit differently
318 def header_decode(s):
319 """Decode a string encoded with RFC 2045 MIME header `Q' encoding.
320
321 This function does not parse a full MIME header value encoded with
322 quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
323 the high level email.header class for that functionality.
324 """
325 s = s.replace('_', ' ')
326 return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII)