blob: 0c31f93ed8f2e7eb34324381baf208c8555c387c [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5 import fileinput
6 for line in fileinput.input():
7 process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty. If a filename is '-' it
11is also replaced by sys.stdin and the optional arguments mode and
12openhook are ignored. To specify an alternative list of filenames,
13pass it as the argument to input(). A single file name is also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin. Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read. Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect. After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode by default, you can override this by
32setting the mode parameter to input() or FileInput.__init__().
33If an I/O error occurs during opening or reading a file, the OSError
34exception is raised.
35
36If sys.stdin is used more than once, the second and further use will
37return no lines, except perhaps for interactive use, or if it has been
38explicitly reset (e.g. using sys.stdin.seek(0)).
39
40Empty files are opened and immediately closed; the only time their
41presence in the list of filenames is noticeable at all is when the
42last file opened is empty.
43
44It is possible that the last line of a file doesn't end in a newline
45character; otherwise lines are returned including the trailing
46newline.
47
48Class FileInput is the implementation; its methods filename(),
49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
50correspond to the functions in the module. In addition it has a
51readline() method which returns the next input line, and a
52__getitem__() method which implements the sequence behavior. The
53sequence must be accessed in strictly sequential order; sequence
54access and readline() cannot be mixed.
55
56Optional in-place filtering: if the keyword argument inplace=1 is
57passed to input() or to the FileInput constructor, the file is moved
58to a backup file and standard output is directed to the input file.
59This makes it possible to write a filter that rewrites its input file
60in place. If the keyword argument backup=".<some extension>" is also
61given, it specifies the extension for the backup file, and the backup
62file remains around; by default, the extension is ".bak" and it is
63deleted when the output file is closed. In-place filtering is
64disabled when standard input is read. XXX The current implementation
65does not work for MS-DOS 8+3 filesystems.
66
67XXX Possible additions:
68
69- optional getopt argument processing
70- isatty()
71- read(), read(size), even readlines()
72
73"""
74
75import sys, os
76from types import GenericAlias
77
78__all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
79 "fileno", "isfirstline", "isstdin", "FileInput", "hook_compressed",
80 "hook_encoded"]
81
82_state = None
83
84def input(files=None, inplace=False, backup="", *, mode="r", openhook=None):
85 """Return an instance of the FileInput class, which can be iterated.
86
87 The parameters are passed to the constructor of the FileInput class.
88 The returned instance, in addition to being an iterator,
89 keeps global state for the functions of this module,.
90 """
91 global _state
92 if _state and _state._file:
93 raise RuntimeError("input() already active")
94 _state = FileInput(files, inplace, backup, mode=mode, openhook=openhook)
95 return _state
96
97def close():
98 """Close the sequence."""
99 global _state
100 state = _state
101 _state = None
102 if state:
103 state.close()
104
105def nextfile():
106 """
107 Close the current file so that the next iteration will read the first
108 line from the next file (if any); lines not read from the file will
109 not count towards the cumulative line count. The filename is not
110 changed until after the first line of the next file has been read.
111 Before the first line has been read, this function has no effect;
112 it cannot be used to skip the first file. After the last line of the
113 last file has been read, this function has no effect.
114 """
115 if not _state:
116 raise RuntimeError("no active input()")
117 return _state.nextfile()
118
119def filename():
120 """
121 Return the name of the file currently being read.
122 Before the first line has been read, returns None.
123 """
124 if not _state:
125 raise RuntimeError("no active input()")
126 return _state.filename()
127
128def lineno():
129 """
130 Return the cumulative line number of the line that has just been read.
131 Before the first line has been read, returns 0. After the last line
132 of the last file has been read, returns the line number of that line.
133 """
134 if not _state:
135 raise RuntimeError("no active input()")
136 return _state.lineno()
137
138def filelineno():
139 """
140 Return the line number in the current file. Before the first line
141 has been read, returns 0. After the last line of the last file has
142 been read, returns the line number of that line within the file.
143 """
144 if not _state:
145 raise RuntimeError("no active input()")
146 return _state.filelineno()
147
148def fileno():
149 """
150 Return the file number of the current file. When no file is currently
151 opened, returns -1.
152 """
153 if not _state:
154 raise RuntimeError("no active input()")
155 return _state.fileno()
156
157def isfirstline():
158 """
159 Returns true the line just read is the first line of its file,
160 otherwise returns false.
161 """
162 if not _state:
163 raise RuntimeError("no active input()")
164 return _state.isfirstline()
165
166def isstdin():
167 """
168 Returns true if the last line was read from sys.stdin,
169 otherwise returns false.
170 """
171 if not _state:
172 raise RuntimeError("no active input()")
173 return _state.isstdin()
174
175class FileInput:
176 """FileInput([files[, inplace[, backup]]], *, mode=None, openhook=None)
177
178 Class FileInput is the implementation of the module; its methods
179 filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
180 nextfile() and close() correspond to the functions of the same name
181 in the module.
182 In addition it has a readline() method which returns the next
183 input line, and a __getitem__() method which implements the
184 sequence behavior. The sequence must be accessed in strictly
185 sequential order; random access and readline() cannot be mixed.
186 """
187
188 def __init__(self, files=None, inplace=False, backup="", *,
189 mode="r", openhook=None):
190 if isinstance(files, str):
191 files = (files,)
192 elif isinstance(files, os.PathLike):
193 files = (os.fspath(files), )
194 else:
195 if files is None:
196 files = sys.argv[1:]
197 if not files:
198 files = ('-',)
199 else:
200 files = tuple(files)
201 self._files = files
202 self._inplace = inplace
203 self._backup = backup
204 self._savestdout = None
205 self._output = None
206 self._filename = None
207 self._startlineno = 0
208 self._filelineno = 0
209 self._file = None
210 self._isstdin = False
211 self._backupfilename = None
212 # restrict mode argument to reading modes
213 if mode not in ('r', 'rU', 'U', 'rb'):
214 raise ValueError("FileInput opening mode must be one of "
215 "'r', 'rU', 'U' and 'rb'")
216 if 'U' in mode:
217 import warnings
218 warnings.warn("'U' mode is deprecated",
219 DeprecationWarning, 2)
220 self._mode = mode
221 self._write_mode = mode.replace('r', 'w') if 'U' not in mode else 'w'
222 if openhook:
223 if inplace:
224 raise ValueError("FileInput cannot use an opening hook in inplace mode")
225 if not callable(openhook):
226 raise ValueError("FileInput openhook must be callable")
227 self._openhook = openhook
228
229 def __del__(self):
230 self.close()
231
232 def close(self):
233 try:
234 self.nextfile()
235 finally:
236 self._files = ()
237
238 def __enter__(self):
239 return self
240
241 def __exit__(self, type, value, traceback):
242 self.close()
243
244 def __iter__(self):
245 return self
246
247 def __next__(self):
248 while True:
249 line = self._readline()
250 if line:
251 self._filelineno += 1
252 return line
253 if not self._file:
254 raise StopIteration
255 self.nextfile()
256 # repeat with next file
257
258 def __getitem__(self, i):
259 import warnings
260 warnings.warn(
261 "Support for indexing FileInput objects is deprecated. "
262 "Use iterator protocol instead.",
263 DeprecationWarning,
264 stacklevel=2
265 )
266 if i != self.lineno():
267 raise RuntimeError("accessing lines out of order")
268 try:
269 return self.__next__()
270 except StopIteration:
271 raise IndexError("end of input reached")
272
273 def nextfile(self):
274 savestdout = self._savestdout
275 self._savestdout = None
276 if savestdout:
277 sys.stdout = savestdout
278
279 output = self._output
280 self._output = None
281 try:
282 if output:
283 output.close()
284 finally:
285 file = self._file
286 self._file = None
287 try:
288 del self._readline # restore FileInput._readline
289 except AttributeError:
290 pass
291 try:
292 if file and not self._isstdin:
293 file.close()
294 finally:
295 backupfilename = self._backupfilename
296 self._backupfilename = None
297 if backupfilename and not self._backup:
298 try: os.unlink(backupfilename)
299 except OSError: pass
300
301 self._isstdin = False
302
303 def readline(self):
304 while True:
305 line = self._readline()
306 if line:
307 self._filelineno += 1
308 return line
309 if not self._file:
310 return line
311 self.nextfile()
312 # repeat with next file
313
314 def _readline(self):
315 if not self._files:
316 if 'b' in self._mode:
317 return b''
318 else:
319 return ''
320 self._filename = self._files[0]
321 self._files = self._files[1:]
322 self._startlineno = self.lineno()
323 self._filelineno = 0
324 self._file = None
325 self._isstdin = False
326 self._backupfilename = 0
327 if self._filename == '-':
328 self._filename = '<stdin>'
329 if 'b' in self._mode:
330 self._file = getattr(sys.stdin, 'buffer', sys.stdin)
331 else:
332 self._file = sys.stdin
333 self._isstdin = True
334 else:
335 if self._inplace:
336 self._backupfilename = (
337 os.fspath(self._filename) + (self._backup or ".bak"))
338 try:
339 os.unlink(self._backupfilename)
340 except OSError:
341 pass
342 # The next few lines may raise OSError
343 os.rename(self._filename, self._backupfilename)
344 self._file = open(self._backupfilename, self._mode)
345 try:
346 perm = os.fstat(self._file.fileno()).st_mode
347 except OSError:
348 self._output = open(self._filename, self._write_mode)
349 else:
350 mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
351 if hasattr(os, 'O_BINARY'):
352 mode |= os.O_BINARY
353
354 fd = os.open(self._filename, mode, perm)
355 self._output = os.fdopen(fd, self._write_mode)
356 try:
357 os.chmod(self._filename, perm)
358 except OSError:
359 pass
360 self._savestdout = sys.stdout
361 sys.stdout = self._output
362 else:
363 # This may raise OSError
364 if self._openhook:
365 self._file = self._openhook(self._filename, self._mode)
366 else:
367 self._file = open(self._filename, self._mode)
368 self._readline = self._file.readline # hide FileInput._readline
369 return self._readline()
370
371 def filename(self):
372 return self._filename
373
374 def lineno(self):
375 return self._startlineno + self._filelineno
376
377 def filelineno(self):
378 return self._filelineno
379
380 def fileno(self):
381 if self._file:
382 try:
383 return self._file.fileno()
384 except ValueError:
385 return -1
386 else:
387 return -1
388
389 def isfirstline(self):
390 return self._filelineno == 1
391
392 def isstdin(self):
393 return self._isstdin
394
395 __class_getitem__ = classmethod(GenericAlias)
396
397
398def hook_compressed(filename, mode):
399 ext = os.path.splitext(filename)[1]
400 if ext == '.gz':
401 import gzip
402 return gzip.open(filename, mode)
403 elif ext == '.bz2':
404 import bz2
405 return bz2.BZ2File(filename, mode)
406 else:
407 return open(filename, mode)
408
409
410def hook_encoded(encoding, errors=None):
411 def openhook(filename, mode):
412 return open(filename, mode, encoding=encoding, errors=errors)
413 return openhook
414
415
416def _test():
417 import getopt
418 inplace = False
419 backup = False
420 opts, args = getopt.getopt(sys.argv[1:], "ib:")
421 for o, a in opts:
422 if o == '-i': inplace = True
423 if o == '-b': backup = a
424 for line in input(args, inplace=inplace, backup=backup):
425 if line[-1:] == '\n': line = line[:-1]
426 if line[-1:] == '\r': line = line[:-1]
427 print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
428 isfirstline() and "*" or "", line))
429 print("%d: %s[%d]" % (lineno(), filename(), filelineno()))
430
431if __name__ == '__main__':
432 _test()