blob: 4804ed27cd14d628eef56df32cc69722d49d94ea [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
8import errno
9import stat
10import sys
11# Import _thread instead of threading to reduce startup cost
12from _thread import allocate_lock as Lock
13if sys.platform in {'win32', 'cygwin'}:
14 from msvcrt import setmode as _setmode
15else:
16 _setmode = None
17
18import io
19from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
20
21valid_seek_flags = {0, 1, 2} # Hardwired values
22if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
26# open() uses st_blksize whenever we can
27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29# NOTE: Base classes defined here are registered with the "official" ABCs
30# defined in io.py. We don't use real inheritance though, because we don't want
31# to inherit the C implementations.
32
33# Rebind for compatibility
34BlockingIOError = BlockingIOError
35
36# Does io.IOBase finalizer log the exception if the close() method fails?
37# The exception is ignored silently by default in release build.
38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
39# Does open() check its 'errors' argument?
40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
41
42
43def open(file, mode="r", buffering=-1, encoding=None, errors=None,
44 newline=None, closefd=True, opener=None):
45
46 r"""Open file and return a stream. Raise OSError upon failure.
47
48 file is either a text or byte string giving the name (and the path
49 if the file isn't in the current working directory) of the file to
50 be opened or an integer file descriptor of the file to be
51 wrapped. (If a file descriptor is given, it is closed when the
52 returned I/O object is closed, unless closefd is set to False.)
53
54 mode is an optional string that specifies the mode in which the file is
55 opened. It defaults to 'r' which means open for reading in text mode. Other
56 common values are 'w' for writing (truncating the file if it already
57 exists), 'x' for exclusive creation of a new file, and 'a' for appending
58 (which on some Unix systems, means that all writes append to the end of the
59 file regardless of the current seek position). In text mode, if encoding is
60 not specified the encoding used is platform dependent. (For reading and
61 writing raw bytes use binary mode and leave encoding unspecified.) The
62 available modes are:
63
64 ========= ===============================================================
65 Character Meaning
66 --------- ---------------------------------------------------------------
67 'r' open for reading (default)
68 'w' open for writing, truncating the file first
69 'x' create a new file and open it for writing
70 'a' open for writing, appending to the end of the file if it exists
71 'b' binary mode
72 't' text mode (default)
73 '+' open a disk file for updating (reading and writing)
74 'U' universal newline mode (deprecated)
75 ========= ===============================================================
76
77 The default mode is 'rt' (open for reading text). For binary random
78 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
79 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
80 raises an `FileExistsError` if the file already exists.
81
82 Python distinguishes between files opened in binary and text modes,
83 even when the underlying operating system doesn't. Files opened in
84 binary mode (appending 'b' to the mode argument) return contents as
85 bytes objects without any decoding. In text mode (the default, or when
86 't' is appended to the mode argument), the contents of the file are
87 returned as strings, the bytes having been first decoded using a
88 platform-dependent encoding or using the specified encoding if given.
89
90 'U' mode is deprecated and will raise an exception in future versions
91 of Python. It has no effect in Python 3. Use newline to control
92 universal newlines mode.
93
94 buffering is an optional integer used to set the buffering policy.
95 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
96 line buffering (only usable in text mode), and an integer > 1 to indicate
97 the size of a fixed-size chunk buffer. When no buffering argument is
98 given, the default buffering policy works as follows:
99
100 * Binary files are buffered in fixed-size chunks; the size of the buffer
101 is chosen using a heuristic trying to determine the underlying device's
102 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
103 On many systems, the buffer will typically be 4096 or 8192 bytes long.
104
105 * "Interactive" text files (files for which isatty() returns True)
106 use line buffering. Other text files use the policy described above
107 for binary files.
108
109 encoding is the str name of the encoding used to decode or encode the
110 file. This should only be used in text mode. The default encoding is
111 platform dependent, but any encoding supported by Python can be
112 passed. See the codecs module for the list of supported encodings.
113
114 errors is an optional string that specifies how encoding errors are to
115 be handled---this argument should not be used in binary mode. Pass
116 'strict' to raise a ValueError exception if there is an encoding error
117 (the default of None has the same effect), or pass 'ignore' to ignore
118 errors. (Note that ignoring encoding errors can lead to data loss.)
119 See the documentation for codecs.register for a list of the permitted
120 encoding error strings.
121
122 newline is a string controlling how universal newlines works (it only
123 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
124 as follows:
125
126 * On input, if newline is None, universal newlines mode is
127 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
128 these are translated into '\n' before being returned to the
129 caller. If it is '', universal newline mode is enabled, but line
130 endings are returned to the caller untranslated. If it has any of
131 the other legal values, input lines are only terminated by the given
132 string, and the line ending is returned to the caller untranslated.
133
134 * On output, if newline is None, any '\n' characters written are
135 translated to the system default line separator, os.linesep. If
136 newline is '', no translation takes place. If newline is any of the
137 other legal values, any '\n' characters written are translated to
138 the given string.
139
140 closedfd is a bool. If closefd is False, the underlying file descriptor will
141 be kept open when the file is closed. This does not work when a file name is
142 given and must be True in that case.
143
144 The newly created file is non-inheritable.
145
146 A custom opener can be used by passing a callable as *opener*. The
147 underlying file descriptor for the file object is then obtained by calling
148 *opener* with (*file*, *flags*). *opener* must return an open file
149 descriptor (passing os.open as *opener* results in functionality similar to
150 passing None).
151
152 open() returns a file object whose type depends on the mode, and
153 through which the standard file operations such as reading and writing
154 are performed. When open() is used to open a file in a text mode ('w',
155 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
156 a file in a binary mode, the returned class varies: in read binary
157 mode, it returns a BufferedReader; in write binary and append binary
158 modes, it returns a BufferedWriter, and in read/write mode, it returns
159 a BufferedRandom.
160
161 It is also possible to use a string or bytearray as a file for both
162 reading and writing. For strings StringIO can be used like a file
163 opened in a text mode, and for bytes a BytesIO can be used like a file
164 opened in a binary mode.
165 """
166 if not isinstance(file, int):
167 file = os.fspath(file)
168 if not isinstance(file, (str, bytes, int)):
169 raise TypeError("invalid file: %r" % file)
170 if not isinstance(mode, str):
171 raise TypeError("invalid mode: %r" % mode)
172 if not isinstance(buffering, int):
173 raise TypeError("invalid buffering: %r" % buffering)
174 if encoding is not None and not isinstance(encoding, str):
175 raise TypeError("invalid encoding: %r" % encoding)
176 if errors is not None and not isinstance(errors, str):
177 raise TypeError("invalid errors: %r" % errors)
178 modes = set(mode)
179 if modes - set("axrwb+tU") or len(mode) > len(modes):
180 raise ValueError("invalid mode: %r" % mode)
181 creating = "x" in modes
182 reading = "r" in modes
183 writing = "w" in modes
184 appending = "a" in modes
185 updating = "+" in modes
186 text = "t" in modes
187 binary = "b" in modes
188 if "U" in modes:
189 if creating or writing or appending or updating:
190 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
191 import warnings
192 warnings.warn("'U' mode is deprecated",
193 DeprecationWarning, 2)
194 reading = True
195 if text and binary:
196 raise ValueError("can't have text and binary mode at once")
197 if creating + reading + writing + appending > 1:
198 raise ValueError("can't have read/write/append mode at once")
199 if not (creating or reading or writing or appending):
200 raise ValueError("must have exactly one of read/write/append mode")
201 if binary and encoding is not None:
202 raise ValueError("binary mode doesn't take an encoding argument")
203 if binary and errors is not None:
204 raise ValueError("binary mode doesn't take an errors argument")
205 if binary and newline is not None:
206 raise ValueError("binary mode doesn't take a newline argument")
207 if binary and buffering == 1:
208 import warnings
209 warnings.warn("line buffering (buffering=1) isn't supported in binary "
210 "mode, the default buffer size will be used",
211 RuntimeWarning, 2)
212 raw = FileIO(file,
213 (creating and "x" or "") +
214 (reading and "r" or "") +
215 (writing and "w" or "") +
216 (appending and "a" or "") +
217 (updating and "+" or ""),
218 closefd, opener=opener)
219 result = raw
220 try:
221 line_buffering = False
222 if buffering == 1 or buffering < 0 and raw.isatty():
223 buffering = -1
224 line_buffering = True
225 if buffering < 0:
226 buffering = DEFAULT_BUFFER_SIZE
227 try:
228 bs = os.fstat(raw.fileno()).st_blksize
229 except (OSError, AttributeError):
230 pass
231 else:
232 if bs > 1:
233 buffering = bs
234 if buffering < 0:
235 raise ValueError("invalid buffering size")
236 if buffering == 0:
237 if binary:
238 return result
239 raise ValueError("can't have unbuffered text I/O")
240 if updating:
241 buffer = BufferedRandom(raw, buffering)
242 elif creating or writing or appending:
243 buffer = BufferedWriter(raw, buffering)
244 elif reading:
245 buffer = BufferedReader(raw, buffering)
246 else:
247 raise ValueError("unknown mode: %r" % mode)
248 result = buffer
249 if binary:
250 return result
251 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
252 result = text
253 text.mode = mode
254 return result
255 except:
256 result.close()
257 raise
258
259# Define a default pure-Python implementation for open_code()
260# that does not allow hooks. Warn on first use. Defined for tests.
261def _open_code_with_warning(path):
262 """Opens the provided file with mode ``'rb'``. This function
263 should be used when the intent is to treat the contents as
264 executable code.
265
266 ``path`` should be an absolute path.
267
268 When supported by the runtime, this function can be hooked
269 in order to allow embedders more control over code files.
270 This functionality is not supported on the current runtime.
271 """
272 import warnings
273 warnings.warn("_pyio.open_code() may not be using hooks",
274 RuntimeWarning, 2)
275 return open(path, "rb")
276
277try:
278 open_code = io.open_code
279except AttributeError:
280 open_code = _open_code_with_warning
281
282
283class DocDescriptor:
284 """Helper for builtins.open.__doc__
285 """
286 def __get__(self, obj, typ=None):
287 return (
288 "open(file, mode='r', buffering=-1, encoding=None, "
289 "errors=None, newline=None, closefd=True)\n\n" +
290 open.__doc__)
291
292class OpenWrapper:
293 """Wrapper for builtins.open
294
295 Trick so that open won't become a bound method when stored
296 as a class variable (as dbm.dumb does).
297
298 See initstdio() in Python/pylifecycle.c.
299 """
300 __doc__ = DocDescriptor()
301
302 def __new__(cls, *args, **kwargs):
303 return open(*args, **kwargs)
304
305
306# In normal operation, both `UnsupportedOperation`s should be bound to the
307# same object.
308try:
309 UnsupportedOperation = io.UnsupportedOperation
310except AttributeError:
311 class UnsupportedOperation(OSError, ValueError):
312 pass
313
314
315class IOBase(metaclass=abc.ABCMeta):
316
317 """The abstract base class for all I/O classes, acting on streams of
318 bytes. There is no public constructor.
319
320 This class provides dummy implementations for many methods that
321 derived classes can override selectively; the default implementations
322 represent a file that cannot be read, written or seeked.
323
324 Even though IOBase does not declare read or write because
325 their signatures will vary, implementations and clients should
326 consider those methods part of the interface. Also, implementations
327 may raise UnsupportedOperation when operations they do not support are
328 called.
329
330 The basic type used for binary data read from or written to a file is
331 bytes. Other bytes-like objects are accepted as method arguments too.
332 Text I/O classes work with str data.
333
334 Note that calling any method (even inquiries) on a closed stream is
335 undefined. Implementations may raise OSError in this case.
336
337 IOBase (and its subclasses) support the iterator protocol, meaning
338 that an IOBase object can be iterated over yielding the lines in a
339 stream.
340
341 IOBase also supports the :keyword:`with` statement. In this example,
342 fp is closed after the suite of the with statement is complete:
343
344 with open('spam.txt', 'r') as fp:
345 fp.write('Spam and eggs!')
346 """
347
348 ### Internal ###
349
350 def _unsupported(self, name):
351 """Internal: raise an OSError exception for unsupported operations."""
352 raise UnsupportedOperation("%s.%s() not supported" %
353 (self.__class__.__name__, name))
354
355 ### Positioning ###
356
357 def seek(self, pos, whence=0):
358 """Change stream position.
359
360 Change the stream position to byte offset pos. Argument pos is
361 interpreted relative to the position indicated by whence. Values
362 for whence are ints:
363
364 * 0 -- start of stream (the default); offset should be zero or positive
365 * 1 -- current stream position; offset may be negative
366 * 2 -- end of stream; offset is usually negative
367 Some operating systems / file systems could provide additional values.
368
369 Return an int indicating the new absolute position.
370 """
371 self._unsupported("seek")
372
373 def tell(self):
374 """Return an int indicating the current stream position."""
375 return self.seek(0, 1)
376
377 def truncate(self, pos=None):
378 """Truncate file to size bytes.
379
380 Size defaults to the current IO position as reported by tell(). Return
381 the new size.
382 """
383 self._unsupported("truncate")
384
385 ### Flush and close ###
386
387 def flush(self):
388 """Flush write buffers, if applicable.
389
390 This is not implemented for read-only and non-blocking streams.
391 """
392 self._checkClosed()
393 # XXX Should this return the number of bytes written???
394
395 __closed = False
396
397 def close(self):
398 """Flush and close the IO object.
399
400 This method has no effect if the file is already closed.
401 """
402 if not self.__closed:
403 try:
404 self.flush()
405 finally:
406 self.__closed = True
407
408 def __del__(self):
409 """Destructor. Calls close()."""
410 try:
411 closed = self.closed
412 except AttributeError:
413 # If getting closed fails, then the object is probably
414 # in an unusable state, so ignore.
415 return
416
417 if closed:
418 return
419
420 if _IOBASE_EMITS_UNRAISABLE:
421 self.close()
422 else:
423 # The try/except block is in case this is called at program
424 # exit time, when it's possible that globals have already been
425 # deleted, and then the close() call might fail. Since
426 # there's nothing we can do about such failures and they annoy
427 # the end users, we suppress the traceback.
428 try:
429 self.close()
430 except:
431 pass
432
433 ### Inquiries ###
434
435 def seekable(self):
436 """Return a bool indicating whether object supports random access.
437
438 If False, seek(), tell() and truncate() will raise OSError.
439 This method may need to do a test seek().
440 """
441 return False
442
443 def _checkSeekable(self, msg=None):
444 """Internal: raise UnsupportedOperation if file is not seekable
445 """
446 if not self.seekable():
447 raise UnsupportedOperation("File or stream is not seekable."
448 if msg is None else msg)
449
450 def readable(self):
451 """Return a bool indicating whether object was opened for reading.
452
453 If False, read() will raise OSError.
454 """
455 return False
456
457 def _checkReadable(self, msg=None):
458 """Internal: raise UnsupportedOperation if file is not readable
459 """
460 if not self.readable():
461 raise UnsupportedOperation("File or stream is not readable."
462 if msg is None else msg)
463
464 def writable(self):
465 """Return a bool indicating whether object was opened for writing.
466
467 If False, write() and truncate() will raise OSError.
468 """
469 return False
470
471 def _checkWritable(self, msg=None):
472 """Internal: raise UnsupportedOperation if file is not writable
473 """
474 if not self.writable():
475 raise UnsupportedOperation("File or stream is not writable."
476 if msg is None else msg)
477
478 @property
479 def closed(self):
480 """closed: bool. True iff the file has been closed.
481
482 For backwards compatibility, this is a property, not a predicate.
483 """
484 return self.__closed
485
486 def _checkClosed(self, msg=None):
487 """Internal: raise a ValueError if file is closed
488 """
489 if self.closed:
490 raise ValueError("I/O operation on closed file."
491 if msg is None else msg)
492
493 ### Context manager ###
494
495 def __enter__(self): # That's a forward reference
496 """Context management protocol. Returns self (an instance of IOBase)."""
497 self._checkClosed()
498 return self
499
500 def __exit__(self, *args):
501 """Context management protocol. Calls close()"""
502 self.close()
503
504 ### Lower-level APIs ###
505
506 # XXX Should these be present even if unimplemented?
507
508 def fileno(self):
509 """Returns underlying file descriptor (an int) if one exists.
510
511 An OSError is raised if the IO object does not use a file descriptor.
512 """
513 self._unsupported("fileno")
514
515 def isatty(self):
516 """Return a bool indicating whether this is an 'interactive' stream.
517
518 Return False if it can't be determined.
519 """
520 self._checkClosed()
521 return False
522
523 ### Readline[s] and writelines ###
524
525 def readline(self, size=-1):
526 r"""Read and return a line of bytes from the stream.
527
528 If size is specified, at most size bytes will be read.
529 Size should be an int.
530
531 The line terminator is always b'\n' for binary files; for text
532 files, the newlines argument to open can be used to select the line
533 terminator(s) recognized.
534 """
535 # For backwards compatibility, a (slowish) readline().
536 if hasattr(self, "peek"):
537 def nreadahead():
538 readahead = self.peek(1)
539 if not readahead:
540 return 1
541 n = (readahead.find(b"\n") + 1) or len(readahead)
542 if size >= 0:
543 n = min(n, size)
544 return n
545 else:
546 def nreadahead():
547 return 1
548 if size is None:
549 size = -1
550 else:
551 try:
552 size_index = size.__index__
553 except AttributeError:
554 raise TypeError(f"{size!r} is not an integer")
555 else:
556 size = size_index()
557 res = bytearray()
558 while size < 0 or len(res) < size:
559 b = self.read(nreadahead())
560 if not b:
561 break
562 res += b
563 if res.endswith(b"\n"):
564 break
565 return bytes(res)
566
567 def __iter__(self):
568 self._checkClosed()
569 return self
570
571 def __next__(self):
572 line = self.readline()
573 if not line:
574 raise StopIteration
575 return line
576
577 def readlines(self, hint=None):
578 """Return a list of lines from the stream.
579
580 hint can be specified to control the number of lines read: no more
581 lines will be read if the total size (in bytes/characters) of all
582 lines so far exceeds hint.
583 """
584 if hint is None or hint <= 0:
585 return list(self)
586 n = 0
587 lines = []
588 for line in self:
589 lines.append(line)
590 n += len(line)
591 if n >= hint:
592 break
593 return lines
594
595 def writelines(self, lines):
596 """Write a list of lines to the stream.
597
598 Line separators are not added, so it is usual for each of the lines
599 provided to have a line separator at the end.
600 """
601 self._checkClosed()
602 for line in lines:
603 self.write(line)
604
605io.IOBase.register(IOBase)
606
607
608class RawIOBase(IOBase):
609
610 """Base class for raw binary I/O."""
611
612 # The read() method is implemented by calling readinto(); derived
613 # classes that want to support read() only need to implement
614 # readinto() as a primitive operation. In general, readinto() can be
615 # more efficient than read().
616
617 # (It would be tempting to also provide an implementation of
618 # readinto() in terms of read(), in case the latter is a more suitable
619 # primitive operation, but that would lead to nasty recursion in case
620 # a subclass doesn't implement either.)
621
622 def read(self, size=-1):
623 """Read and return up to size bytes, where size is an int.
624
625 Returns an empty bytes object on EOF, or None if the object is
626 set not to block and has no data to read.
627 """
628 if size is None:
629 size = -1
630 if size < 0:
631 return self.readall()
632 b = bytearray(size.__index__())
633 n = self.readinto(b)
634 if n is None:
635 return None
636 del b[n:]
637 return bytes(b)
638
639 def readall(self):
640 """Read until EOF, using multiple read() call."""
641 res = bytearray()
642 while True:
643 data = self.read(DEFAULT_BUFFER_SIZE)
644 if not data:
645 break
646 res += data
647 if res:
648 return bytes(res)
649 else:
650 # b'' or None
651 return data
652
653 def readinto(self, b):
654 """Read bytes into a pre-allocated bytes-like object b.
655
656 Returns an int representing the number of bytes read (0 for EOF), or
657 None if the object is set not to block and has no data to read.
658 """
659 self._unsupported("readinto")
660
661 def write(self, b):
662 """Write the given buffer to the IO stream.
663
664 Returns the number of bytes written, which may be less than the
665 length of b in bytes.
666 """
667 self._unsupported("write")
668
669io.RawIOBase.register(RawIOBase)
670from _io import FileIO
671RawIOBase.register(FileIO)
672
673
674class BufferedIOBase(IOBase):
675
676 """Base class for buffered IO objects.
677
678 The main difference with RawIOBase is that the read() method
679 supports omitting the size argument, and does not have a default
680 implementation that defers to readinto().
681
682 In addition, read(), readinto() and write() may raise
683 BlockingIOError if the underlying raw stream is in non-blocking
684 mode and not ready; unlike their raw counterparts, they will never
685 return None.
686
687 A typical implementation should not inherit from a RawIOBase
688 implementation, but wrap one.
689 """
690
691 def read(self, size=-1):
692 """Read and return up to size bytes, where size is an int.
693
694 If the argument is omitted, None, or negative, reads and
695 returns all data until EOF.
696
697 If the argument is positive, and the underlying raw stream is
698 not 'interactive', multiple raw reads may be issued to satisfy
699 the byte count (unless EOF is reached first). But for
700 interactive raw streams (XXX and for pipes?), at most one raw
701 read will be issued, and a short result does not imply that
702 EOF is imminent.
703
704 Returns an empty bytes array on EOF.
705
706 Raises BlockingIOError if the underlying raw stream has no
707 data at the moment.
708 """
709 self._unsupported("read")
710
711 def read1(self, size=-1):
712 """Read up to size bytes with at most one read() system call,
713 where size is an int.
714 """
715 self._unsupported("read1")
716
717 def readinto(self, b):
718 """Read bytes into a pre-allocated bytes-like object b.
719
720 Like read(), this may issue multiple reads to the underlying raw
721 stream, unless the latter is 'interactive'.
722
723 Returns an int representing the number of bytes read (0 for EOF).
724
725 Raises BlockingIOError if the underlying raw stream has no
726 data at the moment.
727 """
728
729 return self._readinto(b, read1=False)
730
731 def readinto1(self, b):
732 """Read bytes into buffer *b*, using at most one system call
733
734 Returns an int representing the number of bytes read (0 for EOF).
735
736 Raises BlockingIOError if the underlying raw stream has no
737 data at the moment.
738 """
739
740 return self._readinto(b, read1=True)
741
742 def _readinto(self, b, read1):
743 if not isinstance(b, memoryview):
744 b = memoryview(b)
745 b = b.cast('B')
746
747 if read1:
748 data = self.read1(len(b))
749 else:
750 data = self.read(len(b))
751 n = len(data)
752
753 b[:n] = data
754
755 return n
756
757 def write(self, b):
758 """Write the given bytes buffer to the IO stream.
759
760 Return the number of bytes written, which is always the length of b
761 in bytes.
762
763 Raises BlockingIOError if the buffer is full and the
764 underlying raw stream cannot accept more data at the moment.
765 """
766 self._unsupported("write")
767
768 def detach(self):
769 """
770 Separate the underlying raw stream from the buffer and return it.
771
772 After the raw stream has been detached, the buffer is in an unusable
773 state.
774 """
775 self._unsupported("detach")
776
777io.BufferedIOBase.register(BufferedIOBase)
778
779
780class _BufferedIOMixin(BufferedIOBase):
781
782 """A mixin implementation of BufferedIOBase with an underlying raw stream.
783
784 This passes most requests on to the underlying raw stream. It
785 does *not* provide implementations of read(), readinto() or
786 write().
787 """
788
789 def __init__(self, raw):
790 self._raw = raw
791
792 ### Positioning ###
793
794 def seek(self, pos, whence=0):
795 new_position = self.raw.seek(pos, whence)
796 if new_position < 0:
797 raise OSError("seek() returned an invalid position")
798 return new_position
799
800 def tell(self):
801 pos = self.raw.tell()
802 if pos < 0:
803 raise OSError("tell() returned an invalid position")
804 return pos
805
806 def truncate(self, pos=None):
807 self._checkClosed()
808 self._checkWritable()
809
810 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
811 # and a flush may be necessary to synch both views of the current
812 # file state.
813 self.flush()
814
815 if pos is None:
816 pos = self.tell()
817 # XXX: Should seek() be used, instead of passing the position
818 # XXX directly to truncate?
819 return self.raw.truncate(pos)
820
821 ### Flush and close ###
822
823 def flush(self):
824 if self.closed:
825 raise ValueError("flush on closed file")
826 self.raw.flush()
827
828 def close(self):
829 if self.raw is not None and not self.closed:
830 try:
831 # may raise BlockingIOError or BrokenPipeError etc
832 self.flush()
833 finally:
834 self.raw.close()
835
836 def detach(self):
837 if self.raw is None:
838 raise ValueError("raw stream already detached")
839 self.flush()
840 raw = self._raw
841 self._raw = None
842 return raw
843
844 ### Inquiries ###
845
846 def seekable(self):
847 return self.raw.seekable()
848
849 @property
850 def raw(self):
851 return self._raw
852
853 @property
854 def closed(self):
855 return self.raw.closed
856
857 @property
858 def name(self):
859 return self.raw.name
860
861 @property
862 def mode(self):
863 return self.raw.mode
864
865 def __getstate__(self):
866 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
867
868 def __repr__(self):
869 modname = self.__class__.__module__
870 clsname = self.__class__.__qualname__
871 try:
872 name = self.name
873 except AttributeError:
874 return "<{}.{}>".format(modname, clsname)
875 else:
876 return "<{}.{} name={!r}>".format(modname, clsname, name)
877
878 ### Lower-level APIs ###
879
880 def fileno(self):
881 return self.raw.fileno()
882
883 def isatty(self):
884 return self.raw.isatty()
885
886
887class BytesIO(BufferedIOBase):
888
889 """Buffered I/O implementation using an in-memory bytes buffer."""
890
891 # Initialize _buffer as soon as possible since it's used by __del__()
892 # which calls close()
893 _buffer = None
894
895 def __init__(self, initial_bytes=None):
896 buf = bytearray()
897 if initial_bytes is not None:
898 buf += initial_bytes
899 self._buffer = buf
900 self._pos = 0
901
902 def __getstate__(self):
903 if self.closed:
904 raise ValueError("__getstate__ on closed file")
905 return self.__dict__.copy()
906
907 def getvalue(self):
908 """Return the bytes value (contents) of the buffer
909 """
910 if self.closed:
911 raise ValueError("getvalue on closed file")
912 return bytes(self._buffer)
913
914 def getbuffer(self):
915 """Return a readable and writable view of the buffer.
916 """
917 if self.closed:
918 raise ValueError("getbuffer on closed file")
919 return memoryview(self._buffer)
920
921 def close(self):
922 if self._buffer is not None:
923 self._buffer.clear()
924 super().close()
925
926 def read(self, size=-1):
927 if self.closed:
928 raise ValueError("read from closed file")
929 if size is None:
930 size = -1
931 else:
932 try:
933 size_index = size.__index__
934 except AttributeError:
935 raise TypeError(f"{size!r} is not an integer")
936 else:
937 size = size_index()
938 if size < 0:
939 size = len(self._buffer)
940 if len(self._buffer) <= self._pos:
941 return b""
942 newpos = min(len(self._buffer), self._pos + size)
943 b = self._buffer[self._pos : newpos]
944 self._pos = newpos
945 return bytes(b)
946
947 def read1(self, size=-1):
948 """This is the same as read.
949 """
950 return self.read(size)
951
952 def write(self, b):
953 if self.closed:
954 raise ValueError("write to closed file")
955 if isinstance(b, str):
956 raise TypeError("can't write str to binary stream")
957 with memoryview(b) as view:
958 n = view.nbytes # Size of any bytes-like object
959 if n == 0:
960 return 0
961 pos = self._pos
962 if pos > len(self._buffer):
963 # Inserts null bytes between the current end of the file
964 # and the new write position.
965 padding = b'\x00' * (pos - len(self._buffer))
966 self._buffer += padding
967 self._buffer[pos:pos + n] = b
968 self._pos += n
969 return n
970
971 def seek(self, pos, whence=0):
972 if self.closed:
973 raise ValueError("seek on closed file")
974 try:
975 pos_index = pos.__index__
976 except AttributeError:
977 raise TypeError(f"{pos!r} is not an integer")
978 else:
979 pos = pos_index()
980 if whence == 0:
981 if pos < 0:
982 raise ValueError("negative seek position %r" % (pos,))
983 self._pos = pos
984 elif whence == 1:
985 self._pos = max(0, self._pos + pos)
986 elif whence == 2:
987 self._pos = max(0, len(self._buffer) + pos)
988 else:
989 raise ValueError("unsupported whence value")
990 return self._pos
991
992 def tell(self):
993 if self.closed:
994 raise ValueError("tell on closed file")
995 return self._pos
996
997 def truncate(self, pos=None):
998 if self.closed:
999 raise ValueError("truncate on closed file")
1000 if pos is None:
1001 pos = self._pos
1002 else:
1003 try:
1004 pos_index = pos.__index__
1005 except AttributeError:
1006 raise TypeError(f"{pos!r} is not an integer")
1007 else:
1008 pos = pos_index()
1009 if pos < 0:
1010 raise ValueError("negative truncate position %r" % (pos,))
1011 del self._buffer[pos:]
1012 return pos
1013
1014 def readable(self):
1015 if self.closed:
1016 raise ValueError("I/O operation on closed file.")
1017 return True
1018
1019 def writable(self):
1020 if self.closed:
1021 raise ValueError("I/O operation on closed file.")
1022 return True
1023
1024 def seekable(self):
1025 if self.closed:
1026 raise ValueError("I/O operation on closed file.")
1027 return True
1028
1029
1030class BufferedReader(_BufferedIOMixin):
1031
1032 """BufferedReader(raw[, buffer_size])
1033
1034 A buffer for a readable, sequential BaseRawIO object.
1035
1036 The constructor creates a BufferedReader for the given readable raw
1037 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1038 is used.
1039 """
1040
1041 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1042 """Create a new buffered reader using the given readable raw IO object.
1043 """
1044 if not raw.readable():
1045 raise OSError('"raw" argument must be readable.')
1046
1047 _BufferedIOMixin.__init__(self, raw)
1048 if buffer_size <= 0:
1049 raise ValueError("invalid buffer size")
1050 self.buffer_size = buffer_size
1051 self._reset_read_buf()
1052 self._read_lock = Lock()
1053
1054 def readable(self):
1055 return self.raw.readable()
1056
1057 def _reset_read_buf(self):
1058 self._read_buf = b""
1059 self._read_pos = 0
1060
1061 def read(self, size=None):
1062 """Read size bytes.
1063
1064 Returns exactly size bytes of data unless the underlying raw IO
1065 stream reaches EOF or if the call would block in non-blocking
1066 mode. If size is negative, read until EOF or until read() would
1067 block.
1068 """
1069 if size is not None and size < -1:
1070 raise ValueError("invalid number of bytes to read")
1071 with self._read_lock:
1072 return self._read_unlocked(size)
1073
1074 def _read_unlocked(self, n=None):
1075 nodata_val = b""
1076 empty_values = (b"", None)
1077 buf = self._read_buf
1078 pos = self._read_pos
1079
1080 # Special case for when the number of bytes to read is unspecified.
1081 if n is None or n == -1:
1082 self._reset_read_buf()
1083 if hasattr(self.raw, 'readall'):
1084 chunk = self.raw.readall()
1085 if chunk is None:
1086 return buf[pos:] or None
1087 else:
1088 return buf[pos:] + chunk
1089 chunks = [buf[pos:]] # Strip the consumed bytes.
1090 current_size = 0
1091 while True:
1092 # Read until EOF or until read() would block.
1093 chunk = self.raw.read()
1094 if chunk in empty_values:
1095 nodata_val = chunk
1096 break
1097 current_size += len(chunk)
1098 chunks.append(chunk)
1099 return b"".join(chunks) or nodata_val
1100
1101 # The number of bytes to read is specified, return at most n bytes.
1102 avail = len(buf) - pos # Length of the available buffered data.
1103 if n <= avail:
1104 # Fast path: the data to read is fully buffered.
1105 self._read_pos += n
1106 return buf[pos:pos+n]
1107 # Slow path: read from the stream until enough bytes are read,
1108 # or until an EOF occurs or until read() would block.
1109 chunks = [buf[pos:]]
1110 wanted = max(self.buffer_size, n)
1111 while avail < n:
1112 chunk = self.raw.read(wanted)
1113 if chunk in empty_values:
1114 nodata_val = chunk
1115 break
1116 avail += len(chunk)
1117 chunks.append(chunk)
1118 # n is more than avail only when an EOF occurred or when
1119 # read() would have blocked.
1120 n = min(n, avail)
1121 out = b"".join(chunks)
1122 self._read_buf = out[n:] # Save the extra data in the buffer.
1123 self._read_pos = 0
1124 return out[:n] if out else nodata_val
1125
1126 def peek(self, size=0):
1127 """Returns buffered bytes without advancing the position.
1128
1129 The argument indicates a desired minimal number of bytes; we
1130 do at most one raw read to satisfy it. We never return more
1131 than self.buffer_size.
1132 """
1133 with self._read_lock:
1134 return self._peek_unlocked(size)
1135
1136 def _peek_unlocked(self, n=0):
1137 want = min(n, self.buffer_size)
1138 have = len(self._read_buf) - self._read_pos
1139 if have < want or have <= 0:
1140 to_read = self.buffer_size - have
1141 current = self.raw.read(to_read)
1142 if current:
1143 self._read_buf = self._read_buf[self._read_pos:] + current
1144 self._read_pos = 0
1145 return self._read_buf[self._read_pos:]
1146
1147 def read1(self, size=-1):
1148 """Reads up to size bytes, with at most one read() system call."""
1149 # Returns up to size bytes. If at least one byte is buffered, we
1150 # only return buffered bytes. Otherwise, we do one raw read.
1151 if size < 0:
1152 size = self.buffer_size
1153 if size == 0:
1154 return b""
1155 with self._read_lock:
1156 self._peek_unlocked(1)
1157 return self._read_unlocked(
1158 min(size, len(self._read_buf) - self._read_pos))
1159
1160 # Implementing readinto() and readinto1() is not strictly necessary (we
1161 # could rely on the base class that provides an implementation in terms of
1162 # read() and read1()). We do it anyway to keep the _pyio implementation
1163 # similar to the io implementation (which implements the methods for
1164 # performance reasons).
1165 def _readinto(self, buf, read1):
1166 """Read data into *buf* with at most one system call."""
1167
1168 # Need to create a memoryview object of type 'b', otherwise
1169 # we may not be able to assign bytes to it, and slicing it
1170 # would create a new object.
1171 if not isinstance(buf, memoryview):
1172 buf = memoryview(buf)
1173 if buf.nbytes == 0:
1174 return 0
1175 buf = buf.cast('B')
1176
1177 written = 0
1178 with self._read_lock:
1179 while written < len(buf):
1180
1181 # First try to read from internal buffer
1182 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1183 if avail:
1184 buf[written:written+avail] = \
1185 self._read_buf[self._read_pos:self._read_pos+avail]
1186 self._read_pos += avail
1187 written += avail
1188 if written == len(buf):
1189 break
1190
1191 # If remaining space in callers buffer is larger than
1192 # internal buffer, read directly into callers buffer
1193 if len(buf) - written > self.buffer_size:
1194 n = self.raw.readinto(buf[written:])
1195 if not n:
1196 break # eof
1197 written += n
1198
1199 # Otherwise refill internal buffer - unless we're
1200 # in read1 mode and already got some data
1201 elif not (read1 and written):
1202 if not self._peek_unlocked(1):
1203 break # eof
1204
1205 # In readinto1 mode, return as soon as we have some data
1206 if read1 and written:
1207 break
1208
1209 return written
1210
1211 def tell(self):
1212 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1213
1214 def seek(self, pos, whence=0):
1215 if whence not in valid_seek_flags:
1216 raise ValueError("invalid whence value")
1217 with self._read_lock:
1218 if whence == 1:
1219 pos -= len(self._read_buf) - self._read_pos
1220 pos = _BufferedIOMixin.seek(self, pos, whence)
1221 self._reset_read_buf()
1222 return pos
1223
1224class BufferedWriter(_BufferedIOMixin):
1225
1226 """A buffer for a writeable sequential RawIO object.
1227
1228 The constructor creates a BufferedWriter for the given writeable raw
1229 stream. If the buffer_size is not given, it defaults to
1230 DEFAULT_BUFFER_SIZE.
1231 """
1232
1233 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1234 if not raw.writable():
1235 raise OSError('"raw" argument must be writable.')
1236
1237 _BufferedIOMixin.__init__(self, raw)
1238 if buffer_size <= 0:
1239 raise ValueError("invalid buffer size")
1240 self.buffer_size = buffer_size
1241 self._write_buf = bytearray()
1242 self._write_lock = Lock()
1243
1244 def writable(self):
1245 return self.raw.writable()
1246
1247 def write(self, b):
1248 if isinstance(b, str):
1249 raise TypeError("can't write str to binary stream")
1250 with self._write_lock:
1251 if self.closed:
1252 raise ValueError("write to closed file")
1253 # XXX we can implement some more tricks to try and avoid
1254 # partial writes
1255 if len(self._write_buf) > self.buffer_size:
1256 # We're full, so let's pre-flush the buffer. (This may
1257 # raise BlockingIOError with characters_written == 0.)
1258 self._flush_unlocked()
1259 before = len(self._write_buf)
1260 self._write_buf.extend(b)
1261 written = len(self._write_buf) - before
1262 if len(self._write_buf) > self.buffer_size:
1263 try:
1264 self._flush_unlocked()
1265 except BlockingIOError as e:
1266 if len(self._write_buf) > self.buffer_size:
1267 # We've hit the buffer_size. We have to accept a partial
1268 # write and cut back our buffer.
1269 overage = len(self._write_buf) - self.buffer_size
1270 written -= overage
1271 self._write_buf = self._write_buf[:self.buffer_size]
1272 raise BlockingIOError(e.errno, e.strerror, written)
1273 return written
1274
1275 def truncate(self, pos=None):
1276 with self._write_lock:
1277 self._flush_unlocked()
1278 if pos is None:
1279 pos = self.raw.tell()
1280 return self.raw.truncate(pos)
1281
1282 def flush(self):
1283 with self._write_lock:
1284 self._flush_unlocked()
1285
1286 def _flush_unlocked(self):
1287 if self.closed:
1288 raise ValueError("flush on closed file")
1289 while self._write_buf:
1290 try:
1291 n = self.raw.write(self._write_buf)
1292 except BlockingIOError:
1293 raise RuntimeError("self.raw should implement RawIOBase: it "
1294 "should not raise BlockingIOError")
1295 if n is None:
1296 raise BlockingIOError(
1297 errno.EAGAIN,
1298 "write could not complete without blocking", 0)
1299 if n > len(self._write_buf) or n < 0:
1300 raise OSError("write() returned incorrect number of bytes")
1301 del self._write_buf[:n]
1302
1303 def tell(self):
1304 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1305
1306 def seek(self, pos, whence=0):
1307 if whence not in valid_seek_flags:
1308 raise ValueError("invalid whence value")
1309 with self._write_lock:
1310 self._flush_unlocked()
1311 return _BufferedIOMixin.seek(self, pos, whence)
1312
1313 def close(self):
1314 with self._write_lock:
1315 if self.raw is None or self.closed:
1316 return
1317 # We have to release the lock and call self.flush() (which will
1318 # probably just re-take the lock) in case flush has been overridden in
1319 # a subclass or the user set self.flush to something. This is the same
1320 # behavior as the C implementation.
1321 try:
1322 # may raise BlockingIOError or BrokenPipeError etc
1323 self.flush()
1324 finally:
1325 with self._write_lock:
1326 self.raw.close()
1327
1328
1329class BufferedRWPair(BufferedIOBase):
1330
1331 """A buffered reader and writer object together.
1332
1333 A buffered reader object and buffered writer object put together to
1334 form a sequential IO object that can read and write. This is typically
1335 used with a socket or two-way pipe.
1336
1337 reader and writer are RawIOBase objects that are readable and
1338 writeable respectively. If the buffer_size is omitted it defaults to
1339 DEFAULT_BUFFER_SIZE.
1340 """
1341
1342 # XXX The usefulness of this (compared to having two separate IO
1343 # objects) is questionable.
1344
1345 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
1346 """Constructor.
1347
1348 The arguments are two RawIO instances.
1349 """
1350 if not reader.readable():
1351 raise OSError('"reader" argument must be readable.')
1352
1353 if not writer.writable():
1354 raise OSError('"writer" argument must be writable.')
1355
1356 self.reader = BufferedReader(reader, buffer_size)
1357 self.writer = BufferedWriter(writer, buffer_size)
1358
1359 def read(self, size=-1):
1360 if size is None:
1361 size = -1
1362 return self.reader.read(size)
1363
1364 def readinto(self, b):
1365 return self.reader.readinto(b)
1366
1367 def write(self, b):
1368 return self.writer.write(b)
1369
1370 def peek(self, size=0):
1371 return self.reader.peek(size)
1372
1373 def read1(self, size=-1):
1374 return self.reader.read1(size)
1375
1376 def readinto1(self, b):
1377 return self.reader.readinto1(b)
1378
1379 def readable(self):
1380 return self.reader.readable()
1381
1382 def writable(self):
1383 return self.writer.writable()
1384
1385 def flush(self):
1386 return self.writer.flush()
1387
1388 def close(self):
1389 try:
1390 self.writer.close()
1391 finally:
1392 self.reader.close()
1393
1394 def isatty(self):
1395 return self.reader.isatty() or self.writer.isatty()
1396
1397 @property
1398 def closed(self):
1399 return self.writer.closed
1400
1401
1402class BufferedRandom(BufferedWriter, BufferedReader):
1403
1404 """A buffered interface to random access streams.
1405
1406 The constructor creates a reader and writer for a seekable stream,
1407 raw, given in the first argument. If the buffer_size is omitted it
1408 defaults to DEFAULT_BUFFER_SIZE.
1409 """
1410
1411 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1412 raw._checkSeekable()
1413 BufferedReader.__init__(self, raw, buffer_size)
1414 BufferedWriter.__init__(self, raw, buffer_size)
1415
1416 def seek(self, pos, whence=0):
1417 if whence not in valid_seek_flags:
1418 raise ValueError("invalid whence value")
1419 self.flush()
1420 if self._read_buf:
1421 # Undo read ahead.
1422 with self._read_lock:
1423 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1424 # First do the raw seek, then empty the read buffer, so that
1425 # if the raw seek fails, we don't lose buffered data forever.
1426 pos = self.raw.seek(pos, whence)
1427 with self._read_lock:
1428 self._reset_read_buf()
1429 if pos < 0:
1430 raise OSError("seek() returned invalid position")
1431 return pos
1432
1433 def tell(self):
1434 if self._write_buf:
1435 return BufferedWriter.tell(self)
1436 else:
1437 return BufferedReader.tell(self)
1438
1439 def truncate(self, pos=None):
1440 if pos is None:
1441 pos = self.tell()
1442 # Use seek to flush the read buffer.
1443 return BufferedWriter.truncate(self, pos)
1444
1445 def read(self, size=None):
1446 if size is None:
1447 size = -1
1448 self.flush()
1449 return BufferedReader.read(self, size)
1450
1451 def readinto(self, b):
1452 self.flush()
1453 return BufferedReader.readinto(self, b)
1454
1455 def peek(self, size=0):
1456 self.flush()
1457 return BufferedReader.peek(self, size)
1458
1459 def read1(self, size=-1):
1460 self.flush()
1461 return BufferedReader.read1(self, size)
1462
1463 def readinto1(self, b):
1464 self.flush()
1465 return BufferedReader.readinto1(self, b)
1466
1467 def write(self, b):
1468 if self._read_buf:
1469 # Undo readahead
1470 with self._read_lock:
1471 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1472 self._reset_read_buf()
1473 return BufferedWriter.write(self, b)
1474
1475
1476class FileIO(RawIOBase):
1477 _fd = -1
1478 _created = False
1479 _readable = False
1480 _writable = False
1481 _appending = False
1482 _seekable = None
1483 _closefd = True
1484
1485 def __init__(self, file, mode='r', closefd=True, opener=None):
1486 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1487 writing, exclusive creation or appending. The file will be created if it
1488 doesn't exist when opened for writing or appending; it will be truncated
1489 when opened for writing. A FileExistsError will be raised if it already
1490 exists when opened for creating. Opening a file for creating implies
1491 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1492 to allow simultaneous reading and writing. A custom opener can be used by
1493 passing a callable as *opener*. The underlying file descriptor for the file
1494 object is then obtained by calling opener with (*name*, *flags*).
1495 *opener* must return an open file descriptor (passing os.open as *opener*
1496 results in functionality similar to passing None).
1497 """
1498 if self._fd >= 0:
1499 # Have to close the existing file first.
1500 try:
1501 if self._closefd:
1502 os.close(self._fd)
1503 finally:
1504 self._fd = -1
1505
1506 if isinstance(file, float):
1507 raise TypeError('integer argument expected, got float')
1508 if isinstance(file, int):
1509 fd = file
1510 if fd < 0:
1511 raise ValueError('negative file descriptor')
1512 else:
1513 fd = -1
1514
1515 if not isinstance(mode, str):
1516 raise TypeError('invalid mode: %s' % (mode,))
1517 if not set(mode) <= set('xrwab+'):
1518 raise ValueError('invalid mode: %s' % (mode,))
1519 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1520 raise ValueError('Must have exactly one of create/read/write/append '
1521 'mode and at most one plus')
1522
1523 if 'x' in mode:
1524 self._created = True
1525 self._writable = True
1526 flags = os.O_EXCL | os.O_CREAT
1527 elif 'r' in mode:
1528 self._readable = True
1529 flags = 0
1530 elif 'w' in mode:
1531 self._writable = True
1532 flags = os.O_CREAT | os.O_TRUNC
1533 elif 'a' in mode:
1534 self._writable = True
1535 self._appending = True
1536 flags = os.O_APPEND | os.O_CREAT
1537
1538 if '+' in mode:
1539 self._readable = True
1540 self._writable = True
1541
1542 if self._readable and self._writable:
1543 flags |= os.O_RDWR
1544 elif self._readable:
1545 flags |= os.O_RDONLY
1546 else:
1547 flags |= os.O_WRONLY
1548
1549 flags |= getattr(os, 'O_BINARY', 0)
1550
1551 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1552 getattr(os, 'O_CLOEXEC', 0))
1553 flags |= noinherit_flag
1554
1555 owned_fd = None
1556 try:
1557 if fd < 0:
1558 if not closefd:
1559 raise ValueError('Cannot use closefd=False with file name')
1560 if opener is None:
1561 fd = os.open(file, flags, 0o666)
1562 else:
1563 fd = opener(file, flags)
1564 if not isinstance(fd, int):
1565 raise TypeError('expected integer from opener')
1566 if fd < 0:
1567 raise OSError('Negative file descriptor')
1568 owned_fd = fd
1569 if not noinherit_flag:
1570 os.set_inheritable(fd, False)
1571
1572 self._closefd = closefd
1573 fdfstat = os.fstat(fd)
1574 try:
1575 if stat.S_ISDIR(fdfstat.st_mode):
1576 raise IsADirectoryError(errno.EISDIR,
1577 os.strerror(errno.EISDIR), file)
1578 except AttributeError:
1579 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
1580 # don't exist.
1581 pass
1582 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1583 if self._blksize <= 1:
1584 self._blksize = DEFAULT_BUFFER_SIZE
1585
1586 if _setmode:
1587 # don't translate newlines (\r\n <=> \n)
1588 _setmode(fd, os.O_BINARY)
1589
1590 self.name = file
1591 if self._appending:
1592 # For consistent behaviour, we explicitly seek to the
1593 # end of file (otherwise, it might be done only on the
1594 # first write()).
1595 try:
1596 os.lseek(fd, 0, SEEK_END)
1597 except OSError as e:
1598 if e.errno != errno.ESPIPE:
1599 raise
1600 except:
1601 if owned_fd is not None:
1602 os.close(owned_fd)
1603 raise
1604 self._fd = fd
1605
1606 def __del__(self):
1607 if self._fd >= 0 and self._closefd and not self.closed:
1608 import warnings
1609 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
1610 stacklevel=2, source=self)
1611 self.close()
1612
1613 def __getstate__(self):
1614 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
1615
1616 def __repr__(self):
1617 class_name = '%s.%s' % (self.__class__.__module__,
1618 self.__class__.__qualname__)
1619 if self.closed:
1620 return '<%s [closed]>' % class_name
1621 try:
1622 name = self.name
1623 except AttributeError:
1624 return ('<%s fd=%d mode=%r closefd=%r>' %
1625 (class_name, self._fd, self.mode, self._closefd))
1626 else:
1627 return ('<%s name=%r mode=%r closefd=%r>' %
1628 (class_name, name, self.mode, self._closefd))
1629
1630 def _checkReadable(self):
1631 if not self._readable:
1632 raise UnsupportedOperation('File not open for reading')
1633
1634 def _checkWritable(self, msg=None):
1635 if not self._writable:
1636 raise UnsupportedOperation('File not open for writing')
1637
1638 def read(self, size=None):
1639 """Read at most size bytes, returned as bytes.
1640
1641 Only makes one system call, so less data may be returned than requested
1642 In non-blocking mode, returns None if no data is available.
1643 Return an empty bytes object at EOF.
1644 """
1645 self._checkClosed()
1646 self._checkReadable()
1647 if size is None or size < 0:
1648 return self.readall()
1649 try:
1650 return os.read(self._fd, size)
1651 except BlockingIOError:
1652 return None
1653
1654 def readall(self):
1655 """Read all data from the file, returned as bytes.
1656
1657 In non-blocking mode, returns as much as is immediately available,
1658 or None if no data is available. Return an empty bytes object at EOF.
1659 """
1660 self._checkClosed()
1661 self._checkReadable()
1662 bufsize = DEFAULT_BUFFER_SIZE
1663 try:
1664 pos = os.lseek(self._fd, 0, SEEK_CUR)
1665 end = os.fstat(self._fd).st_size
1666 if end >= pos:
1667 bufsize = end - pos + 1
1668 except OSError:
1669 pass
1670
1671 result = bytearray()
1672 while True:
1673 if len(result) >= bufsize:
1674 bufsize = len(result)
1675 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1676 n = bufsize - len(result)
1677 try:
1678 chunk = os.read(self._fd, n)
1679 except BlockingIOError:
1680 if result:
1681 break
1682 return None
1683 if not chunk: # reached the end of the file
1684 break
1685 result += chunk
1686
1687 return bytes(result)
1688
1689 def readinto(self, b):
1690 """Same as RawIOBase.readinto()."""
1691 m = memoryview(b).cast('B')
1692 data = self.read(len(m))
1693 n = len(data)
1694 m[:n] = data
1695 return n
1696
1697 def write(self, b):
1698 """Write bytes b to file, return number written.
1699
1700 Only makes one system call, so not all of the data may be written.
1701 The number of bytes actually written is returned. In non-blocking mode,
1702 returns None if the write would block.
1703 """
1704 self._checkClosed()
1705 self._checkWritable()
1706 try:
1707 return os.write(self._fd, b)
1708 except BlockingIOError:
1709 return None
1710
1711 def seek(self, pos, whence=SEEK_SET):
1712 """Move to new file position.
1713
1714 Argument offset is a byte count. Optional argument whence defaults to
1715 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1716 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1717 and SEEK_END or 2 (move relative to end of file, usually negative, although
1718 many platforms allow seeking beyond the end of a file).
1719
1720 Note that not all file objects are seekable.
1721 """
1722 if isinstance(pos, float):
1723 raise TypeError('an integer is required')
1724 self._checkClosed()
1725 return os.lseek(self._fd, pos, whence)
1726
1727 def tell(self):
1728 """tell() -> int. Current file position.
1729
1730 Can raise OSError for non seekable files."""
1731 self._checkClosed()
1732 return os.lseek(self._fd, 0, SEEK_CUR)
1733
1734 def truncate(self, size=None):
1735 """Truncate the file to at most size bytes.
1736
1737 Size defaults to the current file position, as returned by tell().
1738 The current file position is changed to the value of size.
1739 """
1740 self._checkClosed()
1741 self._checkWritable()
1742 if size is None:
1743 size = self.tell()
1744 os.ftruncate(self._fd, size)
1745 return size
1746
1747 def close(self):
1748 """Close the file.
1749
1750 A closed file cannot be used for further I/O operations. close() may be
1751 called more than once without error.
1752 """
1753 if not self.closed:
1754 try:
1755 if self._closefd:
1756 os.close(self._fd)
1757 finally:
1758 super().close()
1759
1760 def seekable(self):
1761 """True if file supports random-access."""
1762 self._checkClosed()
1763 if self._seekable is None:
1764 try:
1765 self.tell()
1766 except OSError:
1767 self._seekable = False
1768 else:
1769 self._seekable = True
1770 return self._seekable
1771
1772 def readable(self):
1773 """True if file was opened in a read mode."""
1774 self._checkClosed()
1775 return self._readable
1776
1777 def writable(self):
1778 """True if file was opened in a write mode."""
1779 self._checkClosed()
1780 return self._writable
1781
1782 def fileno(self):
1783 """Return the underlying file descriptor (an integer)."""
1784 self._checkClosed()
1785 return self._fd
1786
1787 def isatty(self):
1788 """True if the file is connected to a TTY device."""
1789 self._checkClosed()
1790 return os.isatty(self._fd)
1791
1792 @property
1793 def closefd(self):
1794 """True if the file descriptor will be closed by close()."""
1795 return self._closefd
1796
1797 @property
1798 def mode(self):
1799 """String giving the file mode"""
1800 if self._created:
1801 if self._readable:
1802 return 'xb+'
1803 else:
1804 return 'xb'
1805 elif self._appending:
1806 if self._readable:
1807 return 'ab+'
1808 else:
1809 return 'ab'
1810 elif self._readable:
1811 if self._writable:
1812 return 'rb+'
1813 else:
1814 return 'rb'
1815 else:
1816 return 'wb'
1817
1818
1819class TextIOBase(IOBase):
1820
1821 """Base class for text I/O.
1822
1823 This class provides a character and line based interface to stream
1824 I/O. There is no public constructor.
1825 """
1826
1827 def read(self, size=-1):
1828 """Read at most size characters from stream, where size is an int.
1829
1830 Read from underlying buffer until we have size characters or we hit EOF.
1831 If size is negative or omitted, read until EOF.
1832
1833 Returns a string.
1834 """
1835 self._unsupported("read")
1836
1837 def write(self, s):
1838 """Write string s to stream and returning an int."""
1839 self._unsupported("write")
1840
1841 def truncate(self, pos=None):
1842 """Truncate size to pos, where pos is an int."""
1843 self._unsupported("truncate")
1844
1845 def readline(self):
1846 """Read until newline or EOF.
1847
1848 Returns an empty string if EOF is hit immediately.
1849 """
1850 self._unsupported("readline")
1851
1852 def detach(self):
1853 """
1854 Separate the underlying buffer from the TextIOBase and return it.
1855
1856 After the underlying buffer has been detached, the TextIO is in an
1857 unusable state.
1858 """
1859 self._unsupported("detach")
1860
1861 @property
1862 def encoding(self):
1863 """Subclasses should override."""
1864 return None
1865
1866 @property
1867 def newlines(self):
1868 """Line endings translated so far.
1869
1870 Only line endings translated during reading are considered.
1871
1872 Subclasses should override.
1873 """
1874 return None
1875
1876 @property
1877 def errors(self):
1878 """Error setting of the decoder or encoder.
1879
1880 Subclasses should override."""
1881 return None
1882
1883io.TextIOBase.register(TextIOBase)
1884
1885
1886class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1887 r"""Codec used when reading a file in universal newlines mode. It wraps
1888 another incremental decoder, translating \r\n and \r into \n. It also
1889 records the types of newlines encountered. When used with
1890 translate=False, it ensures that the newline sequence is returned in
1891 one piece.
1892 """
1893 def __init__(self, decoder, translate, errors='strict'):
1894 codecs.IncrementalDecoder.__init__(self, errors=errors)
1895 self.translate = translate
1896 self.decoder = decoder
1897 self.seennl = 0
1898 self.pendingcr = False
1899
1900 def decode(self, input, final=False):
1901 # decode input (with the eventual \r from a previous pass)
1902 if self.decoder is None:
1903 output = input
1904 else:
1905 output = self.decoder.decode(input, final=final)
1906 if self.pendingcr and (output or final):
1907 output = "\r" + output
1908 self.pendingcr = False
1909
1910 # retain last \r even when not translating data:
1911 # then readline() is sure to get \r\n in one pass
1912 if output.endswith("\r") and not final:
1913 output = output[:-1]
1914 self.pendingcr = True
1915
1916 # Record which newlines are read
1917 crlf = output.count('\r\n')
1918 cr = output.count('\r') - crlf
1919 lf = output.count('\n') - crlf
1920 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1921 | (crlf and self._CRLF)
1922
1923 if self.translate:
1924 if crlf:
1925 output = output.replace("\r\n", "\n")
1926 if cr:
1927 output = output.replace("\r", "\n")
1928
1929 return output
1930
1931 def getstate(self):
1932 if self.decoder is None:
1933 buf = b""
1934 flag = 0
1935 else:
1936 buf, flag = self.decoder.getstate()
1937 flag <<= 1
1938 if self.pendingcr:
1939 flag |= 1
1940 return buf, flag
1941
1942 def setstate(self, state):
1943 buf, flag = state
1944 self.pendingcr = bool(flag & 1)
1945 if self.decoder is not None:
1946 self.decoder.setstate((buf, flag >> 1))
1947
1948 def reset(self):
1949 self.seennl = 0
1950 self.pendingcr = False
1951 if self.decoder is not None:
1952 self.decoder.reset()
1953
1954 _LF = 1
1955 _CR = 2
1956 _CRLF = 4
1957
1958 @property
1959 def newlines(self):
1960 return (None,
1961 "\n",
1962 "\r",
1963 ("\r", "\n"),
1964 "\r\n",
1965 ("\n", "\r\n"),
1966 ("\r", "\r\n"),
1967 ("\r", "\n", "\r\n")
1968 )[self.seennl]
1969
1970
1971class TextIOWrapper(TextIOBase):
1972
1973 r"""Character and line based layer over a BufferedIOBase object, buffer.
1974
1975 encoding gives the name of the encoding that the stream will be
1976 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1977
1978 errors determines the strictness of encoding and decoding (see the
1979 codecs.register) and defaults to "strict".
1980
1981 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1982 handling of line endings. If it is None, universal newlines is
1983 enabled. With this enabled, on input, the lines endings '\n', '\r',
1984 or '\r\n' are translated to '\n' before being returned to the
1985 caller. Conversely, on output, '\n' is translated to the system
1986 default line separator, os.linesep. If newline is any other of its
1987 legal values, that newline becomes the newline when the file is read
1988 and it is returned untranslated. On output, '\n' is converted to the
1989 newline.
1990
1991 If line_buffering is True, a call to flush is implied when a call to
1992 write contains a newline character.
1993 """
1994
1995 _CHUNK_SIZE = 2048
1996
1997 # Initialize _buffer as soon as possible since it's used by __del__()
1998 # which calls close()
1999 _buffer = None
2000
2001 # The write_through argument has no effect here since this
2002 # implementation always writes through. The argument is present only
2003 # so that the signature can match the signature of the C version.
2004 def __init__(self, buffer, encoding=None, errors=None, newline=None,
2005 line_buffering=False, write_through=False):
2006 self._check_newline(newline)
2007 if encoding is None:
2008 try:
2009 encoding = os.device_encoding(buffer.fileno())
2010 except (AttributeError, UnsupportedOperation):
2011 pass
2012 if encoding is None:
2013 try:
2014 import locale
2015 except ImportError:
2016 # Importing locale may fail if Python is being built
2017 encoding = "ascii"
2018 else:
2019 encoding = locale.getpreferredencoding(False)
2020
2021 if not isinstance(encoding, str):
2022 raise ValueError("invalid encoding: %r" % encoding)
2023
2024 if not codecs.lookup(encoding)._is_text_encoding:
2025 msg = ("%r is not a text encoding; "
2026 "use codecs.open() to handle arbitrary codecs")
2027 raise LookupError(msg % encoding)
2028
2029 if errors is None:
2030 errors = "strict"
2031 else:
2032 if not isinstance(errors, str):
2033 raise ValueError("invalid errors: %r" % errors)
2034 if _CHECK_ERRORS:
2035 codecs.lookup_error(errors)
2036
2037 self._buffer = buffer
2038 self._decoded_chars = '' # buffer for text returned from decoder
2039 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
2040 self._snapshot = None # info for reconstructing decoder state
2041 self._seekable = self._telling = self.buffer.seekable()
2042 self._has_read1 = hasattr(self.buffer, 'read1')
2043 self._configure(encoding, errors, newline,
2044 line_buffering, write_through)
2045
2046 def _check_newline(self, newline):
2047 if newline is not None and not isinstance(newline, str):
2048 raise TypeError("illegal newline type: %r" % (type(newline),))
2049 if newline not in (None, "", "\n", "\r", "\r\n"):
2050 raise ValueError("illegal newline value: %r" % (newline,))
2051
2052 def _configure(self, encoding=None, errors=None, newline=None,
2053 line_buffering=False, write_through=False):
2054 self._encoding = encoding
2055 self._errors = errors
2056 self._encoder = None
2057 self._decoder = None
2058 self._b2cratio = 0.0
2059
2060 self._readuniversal = not newline
2061 self._readtranslate = newline is None
2062 self._readnl = newline
2063 self._writetranslate = newline != ''
2064 self._writenl = newline or os.linesep
2065
2066 self._line_buffering = line_buffering
2067 self._write_through = write_through
2068
2069 # don't write a BOM in the middle of a file
2070 if self._seekable and self.writable():
2071 position = self.buffer.tell()
2072 if position != 0:
2073 try:
2074 self._get_encoder().setstate(0)
2075 except LookupError:
2076 # Sometimes the encoder doesn't exist
2077 pass
2078
2079 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2080 # where dec_flags is the second (integer) item of the decoder state
2081 # and next_input is the chunk of input bytes that comes next after the
2082 # snapshot point. We use this to reconstruct decoder states in tell().
2083
2084 # Naming convention:
2085 # - "bytes_..." for integer variables that count input bytes
2086 # - "chars_..." for integer variables that count decoded characters
2087
2088 def __repr__(self):
2089 result = "<{}.{}".format(self.__class__.__module__,
2090 self.__class__.__qualname__)
2091 try:
2092 name = self.name
2093 except AttributeError:
2094 pass
2095 else:
2096 result += " name={0!r}".format(name)
2097 try:
2098 mode = self.mode
2099 except AttributeError:
2100 pass
2101 else:
2102 result += " mode={0!r}".format(mode)
2103 return result + " encoding={0!r}>".format(self.encoding)
2104
2105 @property
2106 def encoding(self):
2107 return self._encoding
2108
2109 @property
2110 def errors(self):
2111 return self._errors
2112
2113 @property
2114 def line_buffering(self):
2115 return self._line_buffering
2116
2117 @property
2118 def write_through(self):
2119 return self._write_through
2120
2121 @property
2122 def buffer(self):
2123 return self._buffer
2124
2125 def reconfigure(self, *,
2126 encoding=None, errors=None, newline=Ellipsis,
2127 line_buffering=None, write_through=None):
2128 """Reconfigure the text stream with new parameters.
2129
2130 This also flushes the stream.
2131 """
2132 if (self._decoder is not None
2133 and (encoding is not None or errors is not None
2134 or newline is not Ellipsis)):
2135 raise UnsupportedOperation(
2136 "It is not possible to set the encoding or newline of stream "
2137 "after the first read")
2138
2139 if errors is None:
2140 if encoding is None:
2141 errors = self._errors
2142 else:
2143 errors = 'strict'
2144 elif not isinstance(errors, str):
2145 raise TypeError("invalid errors: %r" % errors)
2146
2147 if encoding is None:
2148 encoding = self._encoding
2149 else:
2150 if not isinstance(encoding, str):
2151 raise TypeError("invalid encoding: %r" % encoding)
2152
2153 if newline is Ellipsis:
2154 newline = self._readnl
2155 self._check_newline(newline)
2156
2157 if line_buffering is None:
2158 line_buffering = self.line_buffering
2159 if write_through is None:
2160 write_through = self.write_through
2161
2162 self.flush()
2163 self._configure(encoding, errors, newline,
2164 line_buffering, write_through)
2165
2166 def seekable(self):
2167 if self.closed:
2168 raise ValueError("I/O operation on closed file.")
2169 return self._seekable
2170
2171 def readable(self):
2172 return self.buffer.readable()
2173
2174 def writable(self):
2175 return self.buffer.writable()
2176
2177 def flush(self):
2178 self.buffer.flush()
2179 self._telling = self._seekable
2180
2181 def close(self):
2182 if self.buffer is not None and not self.closed:
2183 try:
2184 self.flush()
2185 finally:
2186 self.buffer.close()
2187
2188 @property
2189 def closed(self):
2190 return self.buffer.closed
2191
2192 @property
2193 def name(self):
2194 return self.buffer.name
2195
2196 def fileno(self):
2197 return self.buffer.fileno()
2198
2199 def isatty(self):
2200 return self.buffer.isatty()
2201
2202 def write(self, s):
2203 'Write data, where s is a str'
2204 if self.closed:
2205 raise ValueError("write to closed file")
2206 if not isinstance(s, str):
2207 raise TypeError("can't write %s to text stream" %
2208 s.__class__.__name__)
2209 length = len(s)
2210 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2211 if haslf and self._writetranslate and self._writenl != "\n":
2212 s = s.replace("\n", self._writenl)
2213 encoder = self._encoder or self._get_encoder()
2214 # XXX What if we were just reading?
2215 b = encoder.encode(s)
2216 self.buffer.write(b)
2217 if self._line_buffering and (haslf or "\r" in s):
2218 self.flush()
2219 self._set_decoded_chars('')
2220 self._snapshot = None
2221 if self._decoder:
2222 self._decoder.reset()
2223 return length
2224
2225 def _get_encoder(self):
2226 make_encoder = codecs.getincrementalencoder(self._encoding)
2227 self._encoder = make_encoder(self._errors)
2228 return self._encoder
2229
2230 def _get_decoder(self):
2231 make_decoder = codecs.getincrementaldecoder(self._encoding)
2232 decoder = make_decoder(self._errors)
2233 if self._readuniversal:
2234 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2235 self._decoder = decoder
2236 return decoder
2237
2238 # The following three methods implement an ADT for _decoded_chars.
2239 # Text returned from the decoder is buffered here until the client
2240 # requests it by calling our read() or readline() method.
2241 def _set_decoded_chars(self, chars):
2242 """Set the _decoded_chars buffer."""
2243 self._decoded_chars = chars
2244 self._decoded_chars_used = 0
2245
2246 def _get_decoded_chars(self, n=None):
2247 """Advance into the _decoded_chars buffer."""
2248 offset = self._decoded_chars_used
2249 if n is None:
2250 chars = self._decoded_chars[offset:]
2251 else:
2252 chars = self._decoded_chars[offset:offset + n]
2253 self._decoded_chars_used += len(chars)
2254 return chars
2255
2256 def _rewind_decoded_chars(self, n):
2257 """Rewind the _decoded_chars buffer."""
2258 if self._decoded_chars_used < n:
2259 raise AssertionError("rewind decoded_chars out of bounds")
2260 self._decoded_chars_used -= n
2261
2262 def _read_chunk(self):
2263 """
2264 Read and decode the next chunk of data from the BufferedReader.
2265 """
2266
2267 # The return value is True unless EOF was reached. The decoded
2268 # string is placed in self._decoded_chars (replacing its previous
2269 # value). The entire input chunk is sent to the decoder, though
2270 # some of it may remain buffered in the decoder, yet to be
2271 # converted.
2272
2273 if self._decoder is None:
2274 raise ValueError("no decoder")
2275
2276 if self._telling:
2277 # To prepare for tell(), we need to snapshot a point in the
2278 # file where the decoder's input buffer is empty.
2279
2280 dec_buffer, dec_flags = self._decoder.getstate()
2281 # Given this, we know there was a valid snapshot point
2282 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2283
2284 # Read a chunk, decode it, and put the result in self._decoded_chars.
2285 if self._has_read1:
2286 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2287 else:
2288 input_chunk = self.buffer.read(self._CHUNK_SIZE)
2289 eof = not input_chunk
2290 decoded_chars = self._decoder.decode(input_chunk, eof)
2291 self._set_decoded_chars(decoded_chars)
2292 if decoded_chars:
2293 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2294 else:
2295 self._b2cratio = 0.0
2296
2297 if self._telling:
2298 # At the snapshot point, len(dec_buffer) bytes before the read,
2299 # the next input to be decoded is dec_buffer + input_chunk.
2300 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2301
2302 return not eof
2303
2304 def _pack_cookie(self, position, dec_flags=0,
2305 bytes_to_feed=0, need_eof=False, chars_to_skip=0):
2306 # The meaning of a tell() cookie is: seek to position, set the
2307 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2308 # into the decoder with need_eof as the EOF flag, then skip
2309 # chars_to_skip characters of the decoded result. For most simple
2310 # decoders, tell() will often just give a byte offset in the file.
2311 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2312 (chars_to_skip<<192) | bool(need_eof)<<256)
2313
2314 def _unpack_cookie(self, bigint):
2315 rest, position = divmod(bigint, 1<<64)
2316 rest, dec_flags = divmod(rest, 1<<64)
2317 rest, bytes_to_feed = divmod(rest, 1<<64)
2318 need_eof, chars_to_skip = divmod(rest, 1<<64)
2319 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
2320
2321 def tell(self):
2322 if not self._seekable:
2323 raise UnsupportedOperation("underlying stream is not seekable")
2324 if not self._telling:
2325 raise OSError("telling position disabled by next() call")
2326 self.flush()
2327 position = self.buffer.tell()
2328 decoder = self._decoder
2329 if decoder is None or self._snapshot is None:
2330 if self._decoded_chars:
2331 # This should never happen.
2332 raise AssertionError("pending decoded text")
2333 return position
2334
2335 # Skip backward to the snapshot point (see _read_chunk).
2336 dec_flags, next_input = self._snapshot
2337 position -= len(next_input)
2338
2339 # How many decoded characters have been used up since the snapshot?
2340 chars_to_skip = self._decoded_chars_used
2341 if chars_to_skip == 0:
2342 # We haven't moved from the snapshot point.
2343 return self._pack_cookie(position, dec_flags)
2344
2345 # Starting from the snapshot position, we will walk the decoder
2346 # forward until it gives us enough decoded characters.
2347 saved_state = decoder.getstate()
2348 try:
2349 # Fast search for an acceptable start point, close to our
2350 # current pos.
2351 # Rationale: calling decoder.decode() has a large overhead
2352 # regardless of chunk size; we want the number of such calls to
2353 # be O(1) in most situations (common decoders, sensible input).
2354 # Actually, it will be exactly 1 for fixed-size codecs (all
2355 # 8-bit codecs, also UTF-16 and UTF-32).
2356 skip_bytes = int(self._b2cratio * chars_to_skip)
2357 skip_back = 1
2358 assert skip_bytes <= len(next_input)
2359 while skip_bytes > 0:
2360 decoder.setstate((b'', dec_flags))
2361 # Decode up to temptative start point
2362 n = len(decoder.decode(next_input[:skip_bytes]))
2363 if n <= chars_to_skip:
2364 b, d = decoder.getstate()
2365 if not b:
2366 # Before pos and no bytes buffered in decoder => OK
2367 dec_flags = d
2368 chars_to_skip -= n
2369 break
2370 # Skip back by buffered amount and reset heuristic
2371 skip_bytes -= len(b)
2372 skip_back = 1
2373 else:
2374 # We're too far ahead, skip back a bit
2375 skip_bytes -= skip_back
2376 skip_back = skip_back * 2
2377 else:
2378 skip_bytes = 0
2379 decoder.setstate((b'', dec_flags))
2380
2381 # Note our initial start point.
2382 start_pos = position + skip_bytes
2383 start_flags = dec_flags
2384 if chars_to_skip == 0:
2385 # We haven't moved from the start point.
2386 return self._pack_cookie(start_pos, start_flags)
2387
2388 # Feed the decoder one byte at a time. As we go, note the
2389 # nearest "safe start point" before the current location
2390 # (a point where the decoder has nothing buffered, so seek()
2391 # can safely start from there and advance to this location).
2392 bytes_fed = 0
2393 need_eof = False
2394 # Chars decoded since `start_pos`
2395 chars_decoded = 0
2396 for i in range(skip_bytes, len(next_input)):
2397 bytes_fed += 1
2398 chars_decoded += len(decoder.decode(next_input[i:i+1]))
2399 dec_buffer, dec_flags = decoder.getstate()
2400 if not dec_buffer and chars_decoded <= chars_to_skip:
2401 # Decoder buffer is empty, so this is a safe start point.
2402 start_pos += bytes_fed
2403 chars_to_skip -= chars_decoded
2404 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2405 if chars_decoded >= chars_to_skip:
2406 break
2407 else:
2408 # We didn't get enough decoded data; signal EOF to get more.
2409 chars_decoded += len(decoder.decode(b'', final=True))
2410 need_eof = True
2411 if chars_decoded < chars_to_skip:
2412 raise OSError("can't reconstruct logical file position")
2413
2414 # The returned cookie corresponds to the last safe start point.
2415 return self._pack_cookie(
2416 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2417 finally:
2418 decoder.setstate(saved_state)
2419
2420 def truncate(self, pos=None):
2421 self.flush()
2422 if pos is None:
2423 pos = self.tell()
2424 return self.buffer.truncate(pos)
2425
2426 def detach(self):
2427 if self.buffer is None:
2428 raise ValueError("buffer is already detached")
2429 self.flush()
2430 buffer = self._buffer
2431 self._buffer = None
2432 return buffer
2433
2434 def seek(self, cookie, whence=0):
2435 def _reset_encoder(position):
2436 """Reset the encoder (merely useful for proper BOM handling)"""
2437 try:
2438 encoder = self._encoder or self._get_encoder()
2439 except LookupError:
2440 # Sometimes the encoder doesn't exist
2441 pass
2442 else:
2443 if position != 0:
2444 encoder.setstate(0)
2445 else:
2446 encoder.reset()
2447
2448 if self.closed:
2449 raise ValueError("tell on closed file")
2450 if not self._seekable:
2451 raise UnsupportedOperation("underlying stream is not seekable")
2452 if whence == SEEK_CUR:
2453 if cookie != 0:
2454 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
2455 # Seeking to the current position should attempt to
2456 # sync the underlying buffer with the current position.
2457 whence = 0
2458 cookie = self.tell()
2459 elif whence == SEEK_END:
2460 if cookie != 0:
2461 raise UnsupportedOperation("can't do nonzero end-relative seeks")
2462 self.flush()
2463 position = self.buffer.seek(0, whence)
2464 self._set_decoded_chars('')
2465 self._snapshot = None
2466 if self._decoder:
2467 self._decoder.reset()
2468 _reset_encoder(position)
2469 return position
2470 if whence != 0:
2471 raise ValueError("unsupported whence (%r)" % (whence,))
2472 if cookie < 0:
2473 raise ValueError("negative seek position %r" % (cookie,))
2474 self.flush()
2475
2476 # The strategy of seek() is to go back to the safe start point
2477 # and replay the effect of read(chars_to_skip) from there.
2478 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2479 self._unpack_cookie(cookie)
2480
2481 # Seek back to the safe start point.
2482 self.buffer.seek(start_pos)
2483 self._set_decoded_chars('')
2484 self._snapshot = None
2485
2486 # Restore the decoder to its state from the safe start point.
2487 if cookie == 0 and self._decoder:
2488 self._decoder.reset()
2489 elif self._decoder or dec_flags or chars_to_skip:
2490 self._decoder = self._decoder or self._get_decoder()
2491 self._decoder.setstate((b'', dec_flags))
2492 self._snapshot = (dec_flags, b'')
2493
2494 if chars_to_skip:
2495 # Just like _read_chunk, feed the decoder and save a snapshot.
2496 input_chunk = self.buffer.read(bytes_to_feed)
2497 self._set_decoded_chars(
2498 self._decoder.decode(input_chunk, need_eof))
2499 self._snapshot = (dec_flags, input_chunk)
2500
2501 # Skip chars_to_skip of the decoded characters.
2502 if len(self._decoded_chars) < chars_to_skip:
2503 raise OSError("can't restore logical file position")
2504 self._decoded_chars_used = chars_to_skip
2505
2506 _reset_encoder(cookie)
2507 return cookie
2508
2509 def read(self, size=None):
2510 self._checkReadable()
2511 if size is None:
2512 size = -1
2513 else:
2514 try:
2515 size_index = size.__index__
2516 except AttributeError:
2517 raise TypeError(f"{size!r} is not an integer")
2518 else:
2519 size = size_index()
2520 decoder = self._decoder or self._get_decoder()
2521 if size < 0:
2522 # Read everything.
2523 result = (self._get_decoded_chars() +
2524 decoder.decode(self.buffer.read(), final=True))
2525 self._set_decoded_chars('')
2526 self._snapshot = None
2527 return result
2528 else:
2529 # Keep reading chunks until we have size characters to return.
2530 eof = False
2531 result = self._get_decoded_chars(size)
2532 while len(result) < size and not eof:
2533 eof = not self._read_chunk()
2534 result += self._get_decoded_chars(size - len(result))
2535 return result
2536
2537 def __next__(self):
2538 self._telling = False
2539 line = self.readline()
2540 if not line:
2541 self._snapshot = None
2542 self._telling = self._seekable
2543 raise StopIteration
2544 return line
2545
2546 def readline(self, size=None):
2547 if self.closed:
2548 raise ValueError("read from closed file")
2549 if size is None:
2550 size = -1
2551 else:
2552 try:
2553 size_index = size.__index__
2554 except AttributeError:
2555 raise TypeError(f"{size!r} is not an integer")
2556 else:
2557 size = size_index()
2558
2559 # Grab all the decoded text (we will rewind any extra bits later).
2560 line = self._get_decoded_chars()
2561
2562 start = 0
2563 # Make the decoder if it doesn't already exist.
2564 if not self._decoder:
2565 self._get_decoder()
2566
2567 pos = endpos = None
2568 while True:
2569 if self._readtranslate:
2570 # Newlines are already translated, only search for \n
2571 pos = line.find('\n', start)
2572 if pos >= 0:
2573 endpos = pos + 1
2574 break
2575 else:
2576 start = len(line)
2577
2578 elif self._readuniversal:
2579 # Universal newline search. Find any of \r, \r\n, \n
2580 # The decoder ensures that \r\n are not split in two pieces
2581
2582 # In C we'd look for these in parallel of course.
2583 nlpos = line.find("\n", start)
2584 crpos = line.find("\r", start)
2585 if crpos == -1:
2586 if nlpos == -1:
2587 # Nothing found
2588 start = len(line)
2589 else:
2590 # Found \n
2591 endpos = nlpos + 1
2592 break
2593 elif nlpos == -1:
2594 # Found lone \r
2595 endpos = crpos + 1
2596 break
2597 elif nlpos < crpos:
2598 # Found \n
2599 endpos = nlpos + 1
2600 break
2601 elif nlpos == crpos + 1:
2602 # Found \r\n
2603 endpos = crpos + 2
2604 break
2605 else:
2606 # Found \r
2607 endpos = crpos + 1
2608 break
2609 else:
2610 # non-universal
2611 pos = line.find(self._readnl)
2612 if pos >= 0:
2613 endpos = pos + len(self._readnl)
2614 break
2615
2616 if size >= 0 and len(line) >= size:
2617 endpos = size # reached length size
2618 break
2619
2620 # No line ending seen yet - get more data'
2621 while self._read_chunk():
2622 if self._decoded_chars:
2623 break
2624 if self._decoded_chars:
2625 line += self._get_decoded_chars()
2626 else:
2627 # end of file
2628 self._set_decoded_chars('')
2629 self._snapshot = None
2630 return line
2631
2632 if size >= 0 and endpos > size:
2633 endpos = size # don't exceed size
2634
2635 # Rewind _decoded_chars to just after the line ending we found.
2636 self._rewind_decoded_chars(len(line) - endpos)
2637 return line[:endpos]
2638
2639 @property
2640 def newlines(self):
2641 return self._decoder.newlines if self._decoder else None
2642
2643
2644class StringIO(TextIOWrapper):
2645 """Text I/O implementation using an in-memory buffer.
2646
2647 The initial_value argument sets the value of object. The newline
2648 argument is like the one of TextIOWrapper's constructor.
2649 """
2650
2651 def __init__(self, initial_value="", newline="\n"):
2652 super(StringIO, self).__init__(BytesIO(),
2653 encoding="utf-8",
2654 errors="surrogatepass",
2655 newline=newline)
2656 # Issue #5645: make universal newlines semantics the same as in the
2657 # C version, even under Windows.
2658 if newline is None:
2659 self._writetranslate = False
2660 if initial_value is not None:
2661 if not isinstance(initial_value, str):
2662 raise TypeError("initial_value must be str or None, not {0}"
2663 .format(type(initial_value).__name__))
2664 self.write(initial_value)
2665 self.seek(0)
2666
2667 def getvalue(self):
2668 self.flush()
2669 decoder = self._decoder or self._get_decoder()
2670 old_state = decoder.getstate()
2671 decoder.reset()
2672 try:
2673 return decoder.decode(self.buffer.getvalue(), final=True)
2674 finally:
2675 decoder.setstate(old_state)
2676
2677 def __repr__(self):
2678 # TextIOWrapper tells the encoding in its repr. In StringIO,
2679 # that's an implementation detail.
2680 return object.__repr__(self)
2681
2682 @property
2683 def errors(self):
2684 return None
2685
2686 @property
2687 def encoding(self):
2688 return None
2689
2690 def detach(self):
2691 # This doesn't make sense on StringIO.
2692 self._unsupported("detach")