Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/boltons/ioutils.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 | |
| 3 # Coding decl above needed for rendering the emdash properly in the | |
| 4 # documentation. | |
| 5 | |
| 6 """ | |
| 7 Module ``ioutils`` implements a number of helper classes and functions which | |
| 8 are useful when dealing with input, output, and bytestreams in a variety of | |
| 9 ways. | |
| 10 """ | |
| 11 import os | |
| 12 from io import BytesIO | |
| 13 from abc import ( | |
| 14 ABCMeta, | |
| 15 abstractmethod, | |
| 16 abstractproperty, | |
| 17 ) | |
| 18 from errno import EINVAL | |
| 19 from codecs import EncodedFile | |
| 20 from tempfile import TemporaryFile | |
| 21 | |
| 22 try: | |
| 23 text_type = unicode # Python 2 | |
| 24 binary_type = str | |
| 25 except NameError: | |
| 26 text_type = str # Python 3 | |
| 27 binary_type = bytes | |
| 28 | |
| 29 READ_CHUNK_SIZE = 21333 | |
| 30 """ | |
| 31 Number of bytes to read at a time. The value is ~ 1/3rd of 64k which means that | |
| 32 the value will easily fit in the L2 cache of most processors even if every | |
| 33 codepoint in a string is three bytes long which makes it a nice fast default | |
| 34 value. | |
| 35 """ | |
| 36 | |
| 37 | |
| 38 class SpooledIOBase(object): | |
| 39 """ | |
| 40 The SpooledTempoaryFile class doesn't support a number of attributes and | |
| 41 methods that a StringIO instance does. This brings the api as close to | |
| 42 compatible as possible with StringIO so that it may be used as a near | |
| 43 drop-in replacement to save memory. | |
| 44 | |
| 45 Another issue with SpooledTemporaryFile is that the spooled file is always | |
| 46 a cStringIO rather than a StringIO which causes issues with some of our | |
| 47 tools. | |
| 48 """ | |
| 49 __metaclass__ = ABCMeta | |
| 50 | |
| 51 def __init__(self, max_size=5000000, dir=None): | |
| 52 self._max_size = max_size | |
| 53 self._dir = dir | |
| 54 | |
| 55 @abstractmethod | |
| 56 def read(self, n=-1): | |
| 57 """Read n characters from the buffer""" | |
| 58 | |
| 59 @abstractmethod | |
| 60 def write(self, s): | |
| 61 """Write into the buffer""" | |
| 62 | |
| 63 @abstractmethod | |
| 64 def seek(self, pos, mode=0): | |
| 65 """Seek to a specific point in a file""" | |
| 66 | |
| 67 @abstractmethod | |
| 68 def readline(self, length=None): | |
| 69 """Returns the next available line""" | |
| 70 | |
| 71 @abstractmethod | |
| 72 def readlines(self, sizehint=0): | |
| 73 """Returns a list of all lines from the current position forward""" | |
| 74 | |
| 75 @abstractmethod | |
| 76 def rollover(self): | |
| 77 """Roll file-like-object over into a real temporary file""" | |
| 78 | |
| 79 @abstractmethod | |
| 80 def tell(self): | |
| 81 """Return the current position""" | |
| 82 | |
| 83 @abstractproperty | |
| 84 def buffer(self): | |
| 85 """Should return a flo instance""" | |
| 86 | |
| 87 @abstractproperty | |
| 88 def _rolled(self): | |
| 89 """Returns whether the file has been rolled to a real file or not""" | |
| 90 | |
| 91 @abstractproperty | |
| 92 def len(self): | |
| 93 """Returns the length of the data""" | |
| 94 | |
| 95 def _get_softspace(self): | |
| 96 return self.buffer.softspace | |
| 97 | |
| 98 def _set_softspace(self, val): | |
| 99 self.buffer.softspace = val | |
| 100 | |
| 101 softspace = property(_get_softspace, _set_softspace) | |
| 102 | |
| 103 @property | |
| 104 def _file(self): | |
| 105 return self.buffer | |
| 106 | |
| 107 def close(self): | |
| 108 return self.buffer.close() | |
| 109 | |
| 110 def flush(self): | |
| 111 return self.buffer.flush() | |
| 112 | |
| 113 def isatty(self): | |
| 114 return self.buffer.isatty() | |
| 115 | |
| 116 def next(self): | |
| 117 line = self.readline() | |
| 118 if not line: | |
| 119 pos = self.buffer.tell() | |
| 120 self.buffer.seek(0, os.SEEK_END) | |
| 121 if pos == self.buffer.tell(): | |
| 122 raise StopIteration | |
| 123 else: | |
| 124 self.buffer.seek(pos) | |
| 125 return line | |
| 126 | |
| 127 @property | |
| 128 def closed(self): | |
| 129 return self.buffer.closed | |
| 130 | |
| 131 @property | |
| 132 def pos(self): | |
| 133 return self.tell() | |
| 134 | |
| 135 @property | |
| 136 def buf(self): | |
| 137 return self.getvalue() | |
| 138 | |
| 139 def fileno(self): | |
| 140 self.rollover() | |
| 141 return self.buffer.fileno() | |
| 142 | |
| 143 def truncate(self, size=None): | |
| 144 """ | |
| 145 Custom version of truncate that takes either no arguments (like the | |
| 146 real SpooledTemporaryFile) or a single argument that truncates the | |
| 147 value to a certain index location. | |
| 148 """ | |
| 149 if size is None: | |
| 150 return self.buffer.truncate() | |
| 151 | |
| 152 if size < 0: | |
| 153 raise IOError(EINVAL, "Negative size not allowed") | |
| 154 | |
| 155 # Emulate truncation to a particular location | |
| 156 pos = self.tell() | |
| 157 self.seek(size) | |
| 158 self.buffer.truncate() | |
| 159 if pos < size: | |
| 160 self.seek(pos) | |
| 161 | |
| 162 def getvalue(self): | |
| 163 """Return the entire files contents""" | |
| 164 pos = self.tell() | |
| 165 self.seek(0) | |
| 166 val = self.read() | |
| 167 self.seek(pos) | |
| 168 return val | |
| 169 | |
| 170 def seekable(self): | |
| 171 return True | |
| 172 | |
| 173 def readable(self): | |
| 174 return True | |
| 175 | |
| 176 def writable(self): | |
| 177 return True | |
| 178 | |
| 179 __next__ = next | |
| 180 | |
| 181 def __len__(self): | |
| 182 return self.len | |
| 183 | |
| 184 def __iter__(self): | |
| 185 return self | |
| 186 | |
| 187 def __enter__(self): | |
| 188 return self | |
| 189 | |
| 190 def __exit__(self, *args): | |
| 191 self._file.close() | |
| 192 | |
| 193 def __eq__(self, other): | |
| 194 if isinstance(other, self.__class__): | |
| 195 return self.getvalue() == other.getvalue() | |
| 196 return False | |
| 197 | |
| 198 def __ne__(self, other): | |
| 199 return not self.__eq__(other) | |
| 200 | |
| 201 def __bool__(self): | |
| 202 return True | |
| 203 | |
| 204 __nonzero__ = __bool__ | |
| 205 | |
| 206 | |
| 207 class SpooledBytesIO(SpooledIOBase): | |
| 208 """ | |
| 209 SpooledBytesIO is a spooled file-like-object that only accepts bytes. On | |
| 210 Python 2.x this means the 'str' type; on Python 3.x this means the 'bytes' | |
| 211 type. Bytes are written in and retrieved exactly as given, but it will | |
| 212 raise TypeErrors if something other than bytes are written. | |
| 213 | |
| 214 Example:: | |
| 215 | |
| 216 >>> from boltons import ioutils | |
| 217 >>> with ioutils.SpooledBytesIO() as f: | |
| 218 ... f.write(b"Happy IO") | |
| 219 ... _ = f.seek(0) | |
| 220 ... isinstance(f.getvalue(), ioutils.binary_type) | |
| 221 True | |
| 222 """ | |
| 223 | |
| 224 def read(self, n=-1): | |
| 225 return self.buffer.read(n) | |
| 226 | |
| 227 def write(self, s): | |
| 228 if not isinstance(s, binary_type): | |
| 229 raise TypeError("{0} expected, got {1}".format( | |
| 230 binary_type.__name__, | |
| 231 type(s).__name__ | |
| 232 )) | |
| 233 | |
| 234 if self.tell() + len(s) >= self._max_size: | |
| 235 self.rollover() | |
| 236 self.buffer.write(s) | |
| 237 | |
| 238 def seek(self, pos, mode=0): | |
| 239 return self.buffer.seek(pos, mode) | |
| 240 | |
| 241 def readline(self, length=None): | |
| 242 if length: | |
| 243 return self.buffer.readline(length) | |
| 244 else: | |
| 245 return self.buffer.readline() | |
| 246 | |
| 247 def readlines(self, sizehint=0): | |
| 248 return self.buffer.readlines(sizehint) | |
| 249 | |
| 250 def rollover(self): | |
| 251 """Roll the StringIO over to a TempFile""" | |
| 252 if not self._rolled: | |
| 253 tmp = TemporaryFile(dir=self._dir) | |
| 254 pos = self.buffer.tell() | |
| 255 tmp.write(self.buffer.getvalue()) | |
| 256 tmp.seek(pos) | |
| 257 self.buffer.close() | |
| 258 self._buffer = tmp | |
| 259 | |
| 260 @property | |
| 261 def _rolled(self): | |
| 262 return not isinstance(self.buffer, BytesIO) | |
| 263 | |
| 264 @property | |
| 265 def buffer(self): | |
| 266 try: | |
| 267 return self._buffer | |
| 268 except AttributeError: | |
| 269 self._buffer = BytesIO() | |
| 270 return self._buffer | |
| 271 | |
| 272 @property | |
| 273 def len(self): | |
| 274 """Determine the length of the file""" | |
| 275 pos = self.tell() | |
| 276 if self._rolled: | |
| 277 self.seek(0) | |
| 278 val = os.fstat(self.fileno()).st_size | |
| 279 else: | |
| 280 self.seek(0, os.SEEK_END) | |
| 281 val = self.tell() | |
| 282 self.seek(pos) | |
| 283 return val | |
| 284 | |
| 285 def tell(self): | |
| 286 return self.buffer.tell() | |
| 287 | |
| 288 | |
| 289 class SpooledStringIO(SpooledIOBase): | |
| 290 """ | |
| 291 SpooledStringIO is a spooled file-like-object that only accepts unicode | |
| 292 values. On Python 2.x this means the 'unicode' type and on Python 3.x this | |
| 293 means the 'str' type. Values are accepted as unicode and then coerced into | |
| 294 utf-8 encoded bytes for storage. On retrieval, the values are returned as | |
| 295 unicode. | |
| 296 | |
| 297 Example:: | |
| 298 | |
| 299 >>> from boltons import ioutils | |
| 300 >>> with ioutils.SpooledStringIO() as f: | |
| 301 ... f.write(u"\u2014 Hey, an emdash!") | |
| 302 ... _ = f.seek(0) | |
| 303 ... isinstance(f.read(), ioutils.text_type) | |
| 304 True | |
| 305 | |
| 306 """ | |
| 307 def __init__(self, *args, **kwargs): | |
| 308 self._tell = 0 | |
| 309 super(SpooledStringIO, self).__init__(*args, **kwargs) | |
| 310 | |
| 311 def read(self, n=-1): | |
| 312 ret = self.buffer.reader.read(n, n) | |
| 313 self._tell = self.tell() + len(ret) | |
| 314 return ret | |
| 315 | |
| 316 def write(self, s): | |
| 317 if not isinstance(s, text_type): | |
| 318 raise TypeError("{0} expected, got {1}".format( | |
| 319 text_type.__name__, | |
| 320 type(s).__name__ | |
| 321 )) | |
| 322 current_pos = self.tell() | |
| 323 if self.buffer.tell() + len(s.encode('utf-8')) >= self._max_size: | |
| 324 self.rollover() | |
| 325 self.buffer.write(s.encode('utf-8')) | |
| 326 self._tell = current_pos + len(s) | |
| 327 | |
| 328 def _traverse_codepoints(self, current_position, n): | |
| 329 """Traverse from current position to the right n codepoints""" | |
| 330 dest = current_position + n | |
| 331 while True: | |
| 332 if current_position == dest: | |
| 333 # By chance we've landed on the right position, break | |
| 334 break | |
| 335 | |
| 336 # If the read would take us past the intended position then | |
| 337 # seek only enough to cover the offset | |
| 338 if current_position + READ_CHUNK_SIZE > dest: | |
| 339 self.read(dest - current_position) | |
| 340 break | |
| 341 else: | |
| 342 ret = self.read(READ_CHUNK_SIZE) | |
| 343 | |
| 344 # Increment our current position | |
| 345 current_position += READ_CHUNK_SIZE | |
| 346 | |
| 347 # If we kept reading but there was nothing here, break | |
| 348 # as we are at the end of the file | |
| 349 if not ret: | |
| 350 break | |
| 351 | |
| 352 return dest | |
| 353 | |
| 354 def seek(self, pos, mode=0): | |
| 355 """Traverse from offset to the specified codepoint""" | |
| 356 # Seek to position from the start of the file | |
| 357 if mode == os.SEEK_SET: | |
| 358 self.buffer.seek(0) | |
| 359 self._traverse_codepoints(0, pos) | |
| 360 self._tell = pos | |
| 361 # Seek to new position relative to current position | |
| 362 elif mode == os.SEEK_CUR: | |
| 363 start_pos = self.tell() | |
| 364 self._traverse_codepoints(self.tell(), pos) | |
| 365 self._tell = start_pos + pos | |
| 366 elif mode == os.SEEK_END: | |
| 367 self.buffer.seek(0) | |
| 368 dest_position = self.len - pos | |
| 369 self._traverse_codepoints(0, dest_position) | |
| 370 self._tell = dest_position | |
| 371 else: | |
| 372 raise ValueError( | |
| 373 "Invalid whence ({0}, should be 0, 1, or 2)".format(mode) | |
| 374 ) | |
| 375 return self.tell() | |
| 376 | |
| 377 def readline(self, length=None): | |
| 378 ret = self.buffer.readline(length).decode('utf-8') | |
| 379 self._tell = self.tell() + len(ret) | |
| 380 return ret | |
| 381 | |
| 382 def readlines(self, sizehint=0): | |
| 383 ret = [x.decode('utf-8') for x in self.buffer.readlines(sizehint)] | |
| 384 self._tell = self.tell() + sum((len(x) for x in ret)) | |
| 385 return ret | |
| 386 | |
| 387 @property | |
| 388 def buffer(self): | |
| 389 try: | |
| 390 return self._buffer | |
| 391 except AttributeError: | |
| 392 self._buffer = EncodedFile(BytesIO(), data_encoding='utf-8') | |
| 393 return self._buffer | |
| 394 | |
| 395 @property | |
| 396 def _rolled(self): | |
| 397 return not isinstance(self.buffer.stream, BytesIO) | |
| 398 | |
| 399 def rollover(self): | |
| 400 """Roll the StringIO over to a TempFile""" | |
| 401 if not self._rolled: | |
| 402 tmp = EncodedFile(TemporaryFile(dir=self._dir), | |
| 403 data_encoding='utf-8') | |
| 404 pos = self.buffer.tell() | |
| 405 tmp.write(self.buffer.getvalue()) | |
| 406 tmp.seek(pos) | |
| 407 self.buffer.close() | |
| 408 self._buffer = tmp | |
| 409 | |
| 410 def tell(self): | |
| 411 """Return the codepoint position""" | |
| 412 return self._tell | |
| 413 | |
| 414 @property | |
| 415 def len(self): | |
| 416 """Determine the number of codepoints in the file""" | |
| 417 pos = self.buffer.tell() | |
| 418 self.buffer.seek(0) | |
| 419 total = 0 | |
| 420 while True: | |
| 421 ret = self.read(READ_CHUNK_SIZE) | |
| 422 if not ret: | |
| 423 break | |
| 424 total += len(ret) | |
| 425 self.buffer.seek(pos) | |
| 426 return total | |
| 427 | |
| 428 | |
| 429 def is_text_fileobj(fileobj): | |
| 430 if getattr(fileobj, 'encoding', False): | |
| 431 # codecs.open and io.TextIOBase | |
| 432 return True | |
| 433 if getattr(fileobj, 'getvalue', False): | |
| 434 # StringIO.StringIO / cStringIO.StringIO / io.StringIO | |
| 435 try: | |
| 436 if isinstance(fileobj.getvalue(), type(u'')): | |
| 437 return True | |
| 438 except Exception: | |
| 439 pass | |
| 440 return False | |
| 441 | |
| 442 | |
| 443 class MultiFileReader(object): | |
| 444 """Takes a list of open files or file-like objects and provides an | |
| 445 interface to read from them all contiguously. Like | |
| 446 :func:`itertools.chain()`, but for reading files. | |
| 447 | |
| 448 >>> mfr = MultiFileReader(BytesIO(b'ab'), BytesIO(b'cd'), BytesIO(b'e')) | |
| 449 >>> mfr.read(3).decode('ascii') | |
| 450 u'abc' | |
| 451 >>> mfr.read(3).decode('ascii') | |
| 452 u'de' | |
| 453 | |
| 454 The constructor takes as many fileobjs as you hand it, and will | |
| 455 raise a TypeError on non-file-like objects. A ValueError is raised | |
| 456 when file-like objects are a mix of bytes- and text-handling | |
| 457 objects (for instance, BytesIO and StringIO). | |
| 458 """ | |
| 459 | |
| 460 def __init__(self, *fileobjs): | |
| 461 if not all([callable(getattr(f, 'read', None)) and | |
| 462 callable(getattr(f, 'seek', None)) for f in fileobjs]): | |
| 463 raise TypeError('MultiFileReader expected file-like objects' | |
| 464 ' with .read() and .seek()') | |
| 465 if all([is_text_fileobj(f) for f in fileobjs]): | |
| 466 # codecs.open and io.TextIOBase | |
| 467 self._joiner = u'' | |
| 468 elif any([is_text_fileobj(f) for f in fileobjs]): | |
| 469 raise ValueError('All arguments to MultiFileReader must handle' | |
| 470 ' bytes OR text, not a mix') | |
| 471 else: | |
| 472 # open/file and io.BytesIO | |
| 473 self._joiner = b'' | |
| 474 self._fileobjs = fileobjs | |
| 475 self._index = 0 | |
| 476 | |
| 477 def read(self, amt=None): | |
| 478 """Read up to the specified *amt*, seamlessly bridging across | |
| 479 files. Returns the appropriate type of string (bytes or text) | |
| 480 for the input, and returns an empty string when the files are | |
| 481 exhausted. | |
| 482 """ | |
| 483 if not amt: | |
| 484 return self._joiner.join(f.read() for f in self._fileobjs) | |
| 485 parts = [] | |
| 486 while amt > 0 and self._index < len(self._fileobjs): | |
| 487 parts.append(self._fileobjs[self._index].read(amt)) | |
| 488 got = len(parts[-1]) | |
| 489 if got < amt: | |
| 490 self._index += 1 | |
| 491 amt -= got | |
| 492 return self._joiner.join(parts) | |
| 493 | |
| 494 def seek(self, offset, whence=os.SEEK_SET): | |
| 495 """Enables setting position of the file cursor to a given | |
| 496 *offset*. Currently only supports ``offset=0``. | |
| 497 """ | |
| 498 if whence != os.SEEK_SET: | |
| 499 raise NotImplementedError( | |
| 500 'MultiFileReader.seek() only supports os.SEEK_SET') | |
| 501 if offset != 0: | |
| 502 raise NotImplementedError( | |
| 503 'MultiFileReader only supports seeking to start at this time') | |
| 504 for f in self._fileobjs: | |
| 505 f.seek(0) |
