Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/docutils/io.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 (2020-05-02) |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 # $Id: io.py 8394 2019-09-18 10:13:17Z milde $ | |
2 # Author: David Goodger <goodger@python.org> | |
3 # Copyright: This module has been placed in the public domain. | |
4 | |
5 """ | |
6 I/O classes provide a uniform API for low-level input and output. Subclasses | |
7 exist for a variety of input/output mechanisms. | |
8 """ | |
9 from __future__ import print_function | |
10 | |
11 __docformat__ = 'reStructuredText' | |
12 | |
13 import sys | |
14 import os | |
15 import re | |
16 import codecs | |
17 from docutils import TransformSpec | |
18 from docutils.utils.error_reporting import locale_encoding, ErrorString, ErrorOutput | |
19 | |
20 if sys.version_info >= (3, 0): | |
21 unicode = str # noqa | |
22 | |
23 | |
24 class InputError(IOError): pass | |
25 class OutputError(IOError): pass | |
26 | |
27 def check_encoding(stream, encoding): | |
28 """Test, whether the encoding of `stream` matches `encoding`. | |
29 | |
30 Returns | |
31 | |
32 :None: if `encoding` or `stream.encoding` are not a valid encoding | |
33 argument (e.g. ``None``) or `stream.encoding is missing. | |
34 :True: if the encoding argument resolves to the same value as `encoding`, | |
35 :False: if the encodings differ. | |
36 """ | |
37 try: | |
38 return codecs.lookup(stream.encoding) == codecs.lookup(encoding) | |
39 except (LookupError, AttributeError, TypeError): | |
40 return None | |
41 | |
42 | |
43 class Input(TransformSpec): | |
44 | |
45 """ | |
46 Abstract base class for input wrappers. | |
47 """ | |
48 | |
49 component_type = 'input' | |
50 | |
51 default_source_path = None | |
52 | |
53 def __init__(self, source=None, source_path=None, encoding=None, | |
54 error_handler='strict'): | |
55 self.encoding = encoding | |
56 """Text encoding for the input source.""" | |
57 | |
58 self.error_handler = error_handler | |
59 """Text decoding error handler.""" | |
60 | |
61 self.source = source | |
62 """The source of input data.""" | |
63 | |
64 self.source_path = source_path | |
65 """A text reference to the source.""" | |
66 | |
67 if not source_path: | |
68 self.source_path = self.default_source_path | |
69 | |
70 self.successful_encoding = None | |
71 """The encoding that successfully decoded the source data.""" | |
72 | |
73 def __repr__(self): | |
74 return '%s: source=%r, source_path=%r' % (self.__class__, self.source, | |
75 self.source_path) | |
76 | |
77 def read(self): | |
78 raise NotImplementedError | |
79 | |
80 def decode(self, data): | |
81 """ | |
82 Decode a string, `data`, heuristically. | |
83 Raise UnicodeError if unsuccessful. | |
84 | |
85 The client application should call ``locale.setlocale`` at the | |
86 beginning of processing:: | |
87 | |
88 locale.setlocale(locale.LC_ALL, '') | |
89 """ | |
90 if self.encoding and self.encoding.lower() == 'unicode': | |
91 assert isinstance(data, unicode), ( | |
92 'input encoding is "unicode" ' | |
93 'but input is not a unicode object') | |
94 if isinstance(data, unicode): | |
95 # Accept unicode even if self.encoding != 'unicode'. | |
96 return data | |
97 if self.encoding: | |
98 # We believe the user/application when the encoding is | |
99 # explicitly given. | |
100 encodings = [self.encoding] | |
101 else: | |
102 data_encoding = self.determine_encoding_from_data(data) | |
103 if data_encoding: | |
104 # If the data declares its encoding (explicitly or via a BOM), | |
105 # we believe it. | |
106 encodings = [data_encoding] | |
107 else: | |
108 # Apply heuristics only if no encoding is explicitly given and | |
109 # no BOM found. Start with UTF-8, because that only matches | |
110 # data that *IS* UTF-8: | |
111 encodings = ['utf-8', 'latin-1'] | |
112 if locale_encoding: | |
113 encodings.insert(1, locale_encoding) | |
114 for enc in encodings: | |
115 try: | |
116 decoded = unicode(data, enc, self.error_handler) | |
117 self.successful_encoding = enc | |
118 # Return decoded, removing BOMs. | |
119 return decoded.replace(u'\ufeff', u'') | |
120 except (UnicodeError, LookupError) as err: | |
121 error = err # in Python 3, the <exception instance> is | |
122 # local to the except clause | |
123 raise UnicodeError( | |
124 'Unable to decode input data. Tried the following encodings: ' | |
125 '%s.\n(%s)' % (', '.join([repr(enc) for enc in encodings]), | |
126 ErrorString(error))) | |
127 | |
128 coding_slug = re.compile(br"coding[:=]\s*([-\w.]+)") | |
129 """Encoding declaration pattern.""" | |
130 | |
131 byte_order_marks = ((codecs.BOM_UTF8, 'utf-8'), | |
132 (codecs.BOM_UTF16_BE, 'utf-16-be'), | |
133 (codecs.BOM_UTF16_LE, 'utf-16-le'),) | |
134 """Sequence of (start_bytes, encoding) tuples for encoding detection. | |
135 The first bytes of input data are checked against the start_bytes strings. | |
136 A match indicates the given encoding.""" | |
137 | |
138 def determine_encoding_from_data(self, data): | |
139 """ | |
140 Try to determine the encoding of `data` by looking *in* `data`. | |
141 Check for a byte order mark (BOM) or an encoding declaration. | |
142 """ | |
143 # check for a byte order mark: | |
144 for start_bytes, encoding in self.byte_order_marks: | |
145 if data.startswith(start_bytes): | |
146 return encoding | |
147 # check for an encoding declaration pattern in first 2 lines of file: | |
148 for line in data.splitlines()[:2]: | |
149 match = self.coding_slug.search(line) | |
150 if match: | |
151 return match.group(1).decode('ascii') | |
152 return None | |
153 | |
154 | |
155 class Output(TransformSpec): | |
156 | |
157 """ | |
158 Abstract base class for output wrappers. | |
159 """ | |
160 | |
161 component_type = 'output' | |
162 | |
163 default_destination_path = None | |
164 | |
165 def __init__(self, destination=None, destination_path=None, | |
166 encoding=None, error_handler='strict'): | |
167 self.encoding = encoding | |
168 """Text encoding for the output destination.""" | |
169 | |
170 self.error_handler = error_handler or 'strict' | |
171 """Text encoding error handler.""" | |
172 | |
173 self.destination = destination | |
174 """The destination for output data.""" | |
175 | |
176 self.destination_path = destination_path | |
177 """A text reference to the destination.""" | |
178 | |
179 if not destination_path: | |
180 self.destination_path = self.default_destination_path | |
181 | |
182 def __repr__(self): | |
183 return ('%s: destination=%r, destination_path=%r' | |
184 % (self.__class__, self.destination, self.destination_path)) | |
185 | |
186 def write(self, data): | |
187 """`data` is a Unicode string, to be encoded by `self.encode`.""" | |
188 raise NotImplementedError | |
189 | |
190 def encode(self, data): | |
191 if self.encoding and self.encoding.lower() == 'unicode': | |
192 assert isinstance(data, unicode), ( | |
193 'the encoding given is "unicode" but the output is not ' | |
194 'a Unicode string') | |
195 return data | |
196 if not isinstance(data, unicode): | |
197 # Non-unicode (e.g. bytes) output. | |
198 return data | |
199 else: | |
200 return data.encode(self.encoding, self.error_handler) | |
201 | |
202 | |
203 class FileInput(Input): | |
204 | |
205 """ | |
206 Input for single, simple file-like objects. | |
207 """ | |
208 def __init__(self, source=None, source_path=None, | |
209 encoding=None, error_handler='strict', | |
210 autoclose=True, | |
211 mode='r' if sys.version_info >= (3, 0) else 'rU'): | |
212 """ | |
213 :Parameters: | |
214 - `source`: either a file-like object (which is read directly), or | |
215 `None` (which implies `sys.stdin` if no `source_path` given). | |
216 - `source_path`: a path to a file, which is opened and then read. | |
217 - `encoding`: the expected text encoding of the input file. | |
218 - `error_handler`: the encoding error handler to use. | |
219 - `autoclose`: close automatically after read (except when | |
220 `sys.stdin` is the source). | |
221 - `mode`: how the file is to be opened (see standard function | |
222 `open`). The default 'rU' provides universal newline support | |
223 for text files with Python 2.x. | |
224 """ | |
225 Input.__init__(self, source, source_path, encoding, error_handler) | |
226 self.autoclose = autoclose | |
227 self._stderr = ErrorOutput() | |
228 | |
229 if source is None: | |
230 if source_path: | |
231 # Specify encoding in Python 3 | |
232 if sys.version_info >= (3, 0): | |
233 kwargs = {'encoding': self.encoding, | |
234 'errors': self.error_handler} | |
235 else: | |
236 kwargs = {} | |
237 try: | |
238 self.source = open(source_path, mode, **kwargs) | |
239 except IOError as error: | |
240 raise InputError(error.errno, error.strerror, source_path) | |
241 else: | |
242 self.source = sys.stdin | |
243 elif (sys.version_info >= (3, 0) and | |
244 check_encoding(self.source, self.encoding) is False): | |
245 # TODO: re-open, warn or raise error? | |
246 raise UnicodeError('Encoding clash: encoding given is "%s" ' | |
247 'but source is opened with encoding "%s".' % | |
248 (self.encoding, self.source.encoding)) | |
249 if not source_path: | |
250 try: | |
251 self.source_path = self.source.name | |
252 except AttributeError: | |
253 pass | |
254 | |
255 def read(self): | |
256 """ | |
257 Read and decode a single file and return the data (Unicode string). | |
258 """ | |
259 try: | |
260 if self.source is sys.stdin and sys.version_info >= (3, 0): | |
261 # read as binary data to circumvent auto-decoding | |
262 data = self.source.buffer.read() | |
263 # normalize newlines | |
264 data = b'\n'.join(data.splitlines()) + b'\n' | |
265 else: | |
266 data = self.source.read() | |
267 except (UnicodeError, LookupError) as err: # (in Py3k read() decodes) | |
268 if not self.encoding and self.source_path: | |
269 # re-read in binary mode and decode with heuristics | |
270 b_source = open(self.source_path, 'rb') | |
271 data = b_source.read() | |
272 b_source.close() | |
273 # normalize newlines | |
274 data = b'\n'.join(data.splitlines()) + b'\n' | |
275 else: | |
276 raise | |
277 finally: | |
278 if self.autoclose: | |
279 self.close() | |
280 return self.decode(data) | |
281 | |
282 def readlines(self): | |
283 """ | |
284 Return lines of a single file as list of Unicode strings. | |
285 """ | |
286 return self.read().splitlines(True) | |
287 | |
288 def close(self): | |
289 if self.source is not sys.stdin: | |
290 self.source.close() | |
291 | |
292 | |
293 class FileOutput(Output): | |
294 | |
295 """ | |
296 Output for single, simple file-like objects. | |
297 """ | |
298 | |
299 mode = 'w' | |
300 """The mode argument for `open()`.""" | |
301 # 'wb' for binary (e.g. OpenOffice) files (see also `BinaryFileOutput`). | |
302 # (Do not use binary mode ('wb') for text files, as this prevents the | |
303 # conversion of newlines to the system specific default.) | |
304 | |
305 def __init__(self, destination=None, destination_path=None, | |
306 encoding=None, error_handler='strict', autoclose=True, | |
307 handle_io_errors=None, mode=None): | |
308 """ | |
309 :Parameters: | |
310 - `destination`: either a file-like object (which is written | |
311 directly) or `None` (which implies `sys.stdout` if no | |
312 `destination_path` given). | |
313 - `destination_path`: a path to a file, which is opened and then | |
314 written. | |
315 - `encoding`: the text encoding of the output file. | |
316 - `error_handler`: the encoding error handler to use. | |
317 - `autoclose`: close automatically after write (except when | |
318 `sys.stdout` or `sys.stderr` is the destination). | |
319 - `handle_io_errors`: ignored, deprecated, will be removed. | |
320 - `mode`: how the file is to be opened (see standard function | |
321 `open`). The default is 'w', providing universal newline | |
322 support for text files. | |
323 """ | |
324 Output.__init__(self, destination, destination_path, | |
325 encoding, error_handler) | |
326 self.opened = True | |
327 self.autoclose = autoclose | |
328 if mode is not None: | |
329 self.mode = mode | |
330 self._stderr = ErrorOutput() | |
331 if destination is None: | |
332 if destination_path: | |
333 self.opened = False | |
334 else: | |
335 self.destination = sys.stdout | |
336 elif (# destination is file-type object -> check mode: | |
337 mode and hasattr(self.destination, 'mode') | |
338 and mode != self.destination.mode): | |
339 print('Warning: Destination mode "%s" differs from specified ' | |
340 'mode "%s"' % (self.destination.mode, mode), | |
341 file=self._stderr) | |
342 if not destination_path: | |
343 try: | |
344 self.destination_path = self.destination.name | |
345 except AttributeError: | |
346 pass | |
347 | |
348 def open(self): | |
349 # Specify encoding in Python 3. | |
350 if sys.version_info >= (3, 0) and 'b' not in self.mode: | |
351 kwargs = {'encoding': self.encoding, | |
352 'errors': self.error_handler} | |
353 else: | |
354 kwargs = {} | |
355 try: | |
356 self.destination = open(self.destination_path, self.mode, **kwargs) | |
357 except IOError as error: | |
358 raise OutputError(error.errno, error.strerror, | |
359 self.destination_path) | |
360 self.opened = True | |
361 | |
362 def write(self, data): | |
363 """Encode `data`, write it to a single file, and return it. | |
364 | |
365 With Python 3 or binary output mode, `data` is returned unchanged, | |
366 except when specified encoding and output encoding differ. | |
367 """ | |
368 if not self.opened: | |
369 self.open() | |
370 if ('b' not in self.mode and sys.version_info < (3, 0) | |
371 or check_encoding(self.destination, self.encoding) is False | |
372 ): | |
373 data = self.encode(data) | |
374 if sys.version_info >= (3, 0) and os.linesep != '\n': | |
375 data = data.replace(b'\n', bytes(os.linesep, 'ascii')) # fix endings | |
376 | |
377 try: | |
378 self.destination.write(data) | |
379 except TypeError as e: | |
380 if sys.version_info >= (3, 0) and isinstance(data, bytes): | |
381 try: | |
382 self.destination.buffer.write(data) | |
383 except AttributeError: | |
384 if check_encoding(self.destination, | |
385 self.encoding) is False: | |
386 raise ValueError('Encoding of %s (%s) differs \n' | |
387 ' from specified encoding (%s)' % | |
388 (self.destination_path or 'destination', | |
389 self.destination.encoding, self.encoding)) | |
390 else: | |
391 raise e | |
392 except (UnicodeError, LookupError) as err: | |
393 raise UnicodeError( | |
394 'Unable to encode output data. output-encoding is: ' | |
395 '%s.\n(%s)' % (self.encoding, ErrorString(err))) | |
396 finally: | |
397 if self.autoclose: | |
398 self.close() | |
399 return data | |
400 | |
401 def close(self): | |
402 if self.destination not in (sys.stdout, sys.stderr): | |
403 self.destination.close() | |
404 self.opened = False | |
405 | |
406 | |
407 class BinaryFileOutput(FileOutput): | |
408 """ | |
409 A version of docutils.io.FileOutput which writes to a binary file. | |
410 """ | |
411 # Used by core.publish_cmdline_to_binary() which in turn is used by | |
412 # rst2odt (OpenOffice writer) | |
413 mode = 'wb' | |
414 | |
415 | |
416 class StringInput(Input): | |
417 | |
418 """ | |
419 Direct string input. | |
420 """ | |
421 | |
422 default_source_path = '<string>' | |
423 | |
424 def read(self): | |
425 """Decode and return the source string.""" | |
426 return self.decode(self.source) | |
427 | |
428 | |
429 class StringOutput(Output): | |
430 | |
431 """ | |
432 Direct string output. | |
433 """ | |
434 | |
435 default_destination_path = '<string>' | |
436 | |
437 def write(self, data): | |
438 """Encode `data`, store it in `self.destination`, and return it.""" | |
439 self.destination = self.encode(data) | |
440 return self.destination | |
441 | |
442 | |
443 class NullInput(Input): | |
444 | |
445 """ | |
446 Degenerate input: read nothing. | |
447 """ | |
448 | |
449 default_source_path = 'null input' | |
450 | |
451 def read(self): | |
452 """Return a null string.""" | |
453 return u'' | |
454 | |
455 | |
456 class NullOutput(Output): | |
457 | |
458 """ | |
459 Degenerate output: write nothing. | |
460 """ | |
461 | |
462 default_destination_path = 'null output' | |
463 | |
464 def write(self, data): | |
465 """Do nothing ([don't even] send data to the bit bucket).""" | |
466 pass | |
467 | |
468 | |
469 class DocTreeInput(Input): | |
470 | |
471 """ | |
472 Adapter for document tree input. | |
473 | |
474 The document tree must be passed in the ``source`` parameter. | |
475 """ | |
476 | |
477 default_source_path = 'doctree input' | |
478 | |
479 def read(self): | |
480 """Return the document tree.""" | |
481 return self.source |