Blame - linux-x64/clang/python3/lib/python3.9/tarfile.py - hafnium/prebuilts

blob: 1d15612616f1d8ca7ec3d21c028b8495bbe87302 [file] [log] [blame]

Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2	#-------------------------------------------------------------------
				3	# tarfile.py
				4	#-------------------------------------------------------------------
				5	# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
				6	# All rights reserved.
				7	#
				8	# Permission is hereby granted, free of charge, to any person
				9	# obtaining a copy of this software and associated documentation
				10	# files (the "Software"), to deal in the Software without
				11	# restriction, including without limitation the rights to use,
				12	# copy, modify, merge, publish, distribute, sublicense, and/or sell
				13	# copies of the Software, and to permit persons to whom the
				14	# Software is furnished to do so, subject to the following
				15	# conditions:
				16	#
				17	# The above copyright notice and this permission notice shall be
				18	# included in all copies or substantial portions of the Software.
				19	#
				20	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
				21	# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
				22	# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
				23	# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
				24	# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
				25	# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				26	# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
				27	# OTHER DEALINGS IN THE SOFTWARE.
				28	#
				29	"""Read from and write to tar format archives.
				30	"""
				31
				32	version = "0.9.0"
				33	__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
				34	__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
				35
				36	#---------
				37	# Imports
				38	#---------
				39	from builtins import open as bltn_open
				40	import sys
				41	import os
				42	import io
				43	import shutil
				44	import stat
				45	import time
				46	import struct
				47	import copy
				48	import re
				49
				50	try:
				51	import pwd
				52	except ImportError:
				53	pwd = None
				54	try:
				55	import grp
				56	except ImportError:
				57	grp = None
				58
				59	# os.symlink on Windows prior to 6.0 raises NotImplementedError
				60	symlink_exception = (AttributeError, NotImplementedError)
				61	try:
				62	# OSError (winerror=1314) will be raised if the caller does not hold the
				63	# SeCreateSymbolicLinkPrivilege privilege
				64	symlink_exception += (OSError,)
				65	except NameError:
				66	pass
				67
				68	# from tarfile import *
				69	__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
				70	"CompressionError", "StreamError", "ExtractError", "HeaderError",
				71	"ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
				72	"DEFAULT_FORMAT", "open"]
				73
				74	#---------------------------------------------------------
				75	# tar constants
				76	#---------------------------------------------------------
				77	NUL = b"\0" # the null character
				78	BLOCKSIZE = 512 # length of processing blocks
				79	RECORDSIZE = BLOCKSIZE * 20 # length of records
				80	GNU_MAGIC = b"ustar \0" # magic gnu tar string
				81	POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
				82
				83	LENGTH_NAME = 100 # maximum length of a filename
				84	LENGTH_LINK = 100 # maximum length of a linkname
				85	LENGTH_PREFIX = 155 # maximum length of the prefix field
				86
				87	REGTYPE = b"0" # regular file
				88	AREGTYPE = b"\0" # regular file
				89	LNKTYPE = b"1" # link (inside tarfile)
				90	SYMTYPE = b"2" # symbolic link
				91	CHRTYPE = b"3" # character special device
				92	BLKTYPE = b"4" # block special device
				93	DIRTYPE = b"5" # directory
				94	FIFOTYPE = b"6" # fifo special device
				95	CONTTYPE = b"7" # contiguous file
				96
				97	GNUTYPE_LONGNAME = b"L" # GNU tar longname
				98	GNUTYPE_LONGLINK = b"K" # GNU tar longlink
				99	GNUTYPE_SPARSE = b"S" # GNU tar sparse file
				100
				101	XHDTYPE = b"x" # POSIX.1-2001 extended header
				102	XGLTYPE = b"g" # POSIX.1-2001 global header
				103	SOLARIS_XHDTYPE = b"X" # Solaris extended header
				104
				105	USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
				106	GNU_FORMAT = 1 # GNU tar format
				107	PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
				108	DEFAULT_FORMAT = PAX_FORMAT
				109
				110	#---------------------------------------------------------
				111	# tarfile constants
				112	#---------------------------------------------------------
				113	# File types that tarfile supports:
				114	SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
				115	SYMTYPE, DIRTYPE, FIFOTYPE,
				116	CONTTYPE, CHRTYPE, BLKTYPE,
				117	GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
				118	GNUTYPE_SPARSE)
				119
				120	# File types that will be treated as a regular file.
				121	REGULAR_TYPES = (REGTYPE, AREGTYPE,
				122	CONTTYPE, GNUTYPE_SPARSE)
				123
				124	# File types that are part of the GNU tar format.
				125	GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
				126	GNUTYPE_SPARSE)
				127
				128	# Fields from a pax header that override a TarInfo attribute.
				129	PAX_FIELDS = ("path", "linkpath", "size", "mtime",
				130	"uid", "gid", "uname", "gname")
				131
				132	# Fields from a pax header that are affected by hdrcharset.
				133	PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
				134
				135	# Fields in a pax header that are numbers, all other fields
				136	# are treated as strings.
				137	PAX_NUMBER_FIELDS = {
				138	"atime": float,
				139	"ctime": float,
				140	"mtime": float,
				141	"uid": int,
				142	"gid": int,
				143	"size": int
				144	}
				145
				146	#---------------------------------------------------------
				147	# initialization
				148	#---------------------------------------------------------
				149	if os.name == "nt":
				150	ENCODING = "utf-8"
				151	else:
				152	ENCODING = sys.getfilesystemencoding()
				153
				154	#---------------------------------------------------------
				155	# Some useful functions
				156	#---------------------------------------------------------
				157
				158	def stn(s, length, encoding, errors):
				159	"""Convert a string to a null-terminated bytes object.
				160	"""
				161	s = s.encode(encoding, errors)
				162	return s[:length] + (length - len(s)) * NUL
				163
				164	def nts(s, encoding, errors):
				165	"""Convert a null-terminated bytes object to a string.
				166	"""
				167	p = s.find(b"\0")
				168	if p != -1:
				169	s = s[:p]
				170	return s.decode(encoding, errors)
				171
				172	def nti(s):
				173	"""Convert a number field to a python number.
				174	"""
				175	# There are two possible encodings for a number field, see
				176	# itn() below.
				177	if s[0] in (0o200, 0o377):
				178	n = 0
				179	for i in range(len(s) - 1):
				180	n <<= 8
				181	n += s[i + 1]
				182	if s[0] == 0o377:
				183	n = -(256 ** (len(s) - 1) - n)
				184	else:
				185	try:
				186	s = nts(s, "ascii", "strict")
				187	n = int(s.strip() or "0", 8)
				188	except ValueError:
				189	raise InvalidHeaderError("invalid header")
				190	return n
				191
				192	def itn(n, digits=8, format=DEFAULT_FORMAT):
				193	"""Convert a python number to a number field.
				194	"""
				195	# POSIX 1003.1-1988 requires numbers to be encoded as a string of
				196	# octal digits followed by a null-byte, this allows values up to
				197	# (8**(digits-1))-1. GNU tar allows storing numbers greater than
				198	# that if necessary. A leading 0o200 or 0o377 byte indicate this
				199	# particular encoding, the following digits-1 bytes are a big-endian
				200	# base-256 representation. This allows values up to (256**(digits-1))-1.
				201	# A 0o200 byte indicates a positive number, a 0o377 byte a negative
				202	# number.
				203	n = int(n)
				204	if 0 <= n < 8 ** (digits - 1):
				205	s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
				206	elif format == GNU_FORMAT and -256 (digits - 1) <= n < 256 (digits - 1):
				207	if n >= 0:
				208	s = bytearray([0o200])
				209	else:
				210	s = bytearray([0o377])
				211	n = 256 ** digits + n
				212
				213	for i in range(digits - 1):
				214	s.insert(1, n & 0o377)
				215	n >>= 8
				216	else:
				217	raise ValueError("overflow in number field")
				218
				219	return s
				220
				221	def calc_chksums(buf):
				222	"""Calculate the checksum for a member's header by summing up all
				223	characters except for the chksum field which is treated as if
				224	it was filled with spaces. According to the GNU tar sources,
				225	some tars (Sun and NeXT) calculate chksum with signed char,
				226	which will be different if there are chars in the buffer with
				227	the high bit set. So we calculate two checksums, unsigned and
				228	signed.
				229	"""
				230	unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
				231	signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
				232	return unsigned_chksum, signed_chksum
				233
				234	def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
				235	"""Copy length bytes from fileobj src to fileobj dst.
				236	If length is None, copy the entire content.
				237	"""
				238	bufsize = bufsize or 16 * 1024
				239	if length == 0:
				240	return
				241	if length is None:
				242	shutil.copyfileobj(src, dst, bufsize)
				243	return
				244
				245	blocks, remainder = divmod(length, bufsize)
				246	for b in range(blocks):
				247	buf = src.read(bufsize)
				248	if len(buf) < bufsize:
				249	raise exception("unexpected end of data")
				250	dst.write(buf)
				251
				252	if remainder != 0:
				253	buf = src.read(remainder)
				254	if len(buf) < remainder:
				255	raise exception("unexpected end of data")
				256	dst.write(buf)
				257	return
				258
				259	def _safe_print(s):
				260	encoding = getattr(sys.stdout, 'encoding', None)
				261	if encoding is not None:
				262	s = s.encode(encoding, 'backslashreplace').decode(encoding)
				263	print(s, end=' ')
				264
				265
				266	class TarError(Exception):
				267	"""Base exception."""
				268	pass
				269	class ExtractError(TarError):
				270	"""General exception for extract errors."""
				271	pass
				272	class ReadError(TarError):
				273	"""Exception for unreadable tar archives."""
				274	pass
				275	class CompressionError(TarError):
				276	"""Exception for unavailable compression methods."""
				277	pass
				278	class StreamError(TarError):
				279	"""Exception for unsupported operations on stream-like TarFiles."""
				280	pass
				281	class HeaderError(TarError):
				282	"""Base exception for header errors."""
				283	pass
				284	class EmptyHeaderError(HeaderError):
				285	"""Exception for empty headers."""
				286	pass
				287	class TruncatedHeaderError(HeaderError):
				288	"""Exception for truncated headers."""
				289	pass
				290	class EOFHeaderError(HeaderError):
				291	"""Exception for end of file headers."""
				292	pass
				293	class InvalidHeaderError(HeaderError):
				294	"""Exception for invalid headers."""
				295	pass
				296	class SubsequentHeaderError(HeaderError):
				297	"""Exception for missing and invalid extended headers."""
				298	pass
				299
				300	#---------------------------
				301	# internal stream interface
				302	#---------------------------
				303	class _LowLevelFile:
				304	"""Low-level file object. Supports reading and writing.
				305	It is used instead of a regular file object for streaming
				306	access.
				307	"""
				308
				309	def __init__(self, name, mode):
				310	mode = {
				311	"r": os.O_RDONLY,
				312	"w": os.O_WRONLY \| os.O_CREAT \| os.O_TRUNC,
				313	}[mode]
				314	if hasattr(os, "O_BINARY"):
				315	mode \|= os.O_BINARY
				316	self.fd = os.open(name, mode, 0o666)
				317
				318	def close(self):
				319	os.close(self.fd)
				320
				321	def read(self, size):
				322	return os.read(self.fd, size)
				323
				324	def write(self, s):
				325	os.write(self.fd, s)
				326
				327	class _Stream:
				328	"""Class that serves as an adapter between TarFile and
				329	a stream-like object. The stream-like object only
				330	needs to have a read() or write() method and is accessed
				331	blockwise. Use of gzip or bzip2 compression is possible.
				332	A stream-like object could be for example: sys.stdin,
				333	sys.stdout, a socket, a tape device etc.
				334
				335	_Stream is intended to be used only internally.
				336	"""
				337
				338	def __init__(self, name, mode, comptype, fileobj, bufsize):
				339	"""Construct a _Stream object.
				340	"""
				341	self._extfileobj = True
				342	if fileobj is None:
				343	fileobj = _LowLevelFile(name, mode)
				344	self._extfileobj = False
				345
				346	if comptype == '*':
				347	# Enable transparent compression detection for the
				348	# stream interface
				349	fileobj = _StreamProxy(fileobj)
				350	comptype = fileobj.getcomptype()
				351
				352	self.name = name or ""
				353	self.mode = mode
				354	self.comptype = comptype
				355	self.fileobj = fileobj
				356	self.bufsize = bufsize
				357	self.buf = b""
				358	self.pos = 0
				359	self.closed = False
				360
				361	try:
				362	if comptype == "gz":
				363	try:
				364	import zlib
				365	except ImportError:
				366	raise CompressionError("zlib module is not available")
				367	self.zlib = zlib
				368	self.crc = zlib.crc32(b"")
				369	if mode == "r":
				370	self._init_read_gz()
				371	self.exception = zlib.error
				372	else:
				373	self._init_write_gz()
				374
				375	elif comptype == "bz2":
				376	try:
				377	import bz2
				378	except ImportError:
				379	raise CompressionError("bz2 module is not available")
				380	if mode == "r":
				381	self.dbuf = b""
				382	self.cmp = bz2.BZ2Decompressor()
				383	self.exception = OSError
				384	else:
				385	self.cmp = bz2.BZ2Compressor()
				386
				387	elif comptype == "xz":
				388	try:
				389	import lzma
				390	except ImportError:
				391	raise CompressionError("lzma module is not available")
				392	if mode == "r":
				393	self.dbuf = b""
				394	self.cmp = lzma.LZMADecompressor()
				395	self.exception = lzma.LZMAError
				396	else:
				397	self.cmp = lzma.LZMACompressor()
				398
				399	elif comptype != "tar":
				400	raise CompressionError("unknown compression type %r" % comptype)
				401
				402	except:
				403	if not self._extfileobj:
				404	self.fileobj.close()
				405	self.closed = True
				406	raise
				407
				408	def __del__(self):
				409	if hasattr(self, "closed") and not self.closed:
				410	self.close()
				411
				412	def _init_write_gz(self):
				413	"""Initialize for writing with gzip compression.
				414	"""
				415	self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
				416	-self.zlib.MAX_WBITS,
				417	self.zlib.DEF_MEM_LEVEL,
				418	0)
				419	timestamp = struct.pack("<L", int(time.time()))
				420	self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
				421	if self.name.endswith(".gz"):
				422	self.name = self.name[:-3]
				423	# Honor "directory components removed" from RFC1952
				424	self.name = os.path.basename(self.name)
				425	# RFC1952 says we must use ISO-8859-1 for the FNAME field.
				426	self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
				427
				428	def write(self, s):
				429	"""Write string s to the stream.
				430	"""
				431	if self.comptype == "gz":
				432	self.crc = self.zlib.crc32(s, self.crc)
				433	self.pos += len(s)
				434	if self.comptype != "tar":
				435	s = self.cmp.compress(s)
				436	self.__write(s)
				437
				438	def __write(self, s):
				439	"""Write string s to the stream if a whole new block
				440	is ready to be written.
				441	"""
				442	self.buf += s
				443	while len(self.buf) > self.bufsize:
				444	self.fileobj.write(self.buf[:self.bufsize])
				445	self.buf = self.buf[self.bufsize:]
				446
				447	def close(self):
				448	"""Close the _Stream object. No operation should be
				449	done on it afterwards.
				450	"""
				451	if self.closed:
				452	return
				453
				454	self.closed = True
				455	try:
				456	if self.mode == "w" and self.comptype != "tar":
				457	self.buf += self.cmp.flush()
				458
				459	if self.mode == "w" and self.buf:
				460	self.fileobj.write(self.buf)
				461	self.buf = b""
				462	if self.comptype == "gz":
				463	self.fileobj.write(struct.pack("<L", self.crc))
				464	self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
				465	finally:
				466	if not self._extfileobj:
				467	self.fileobj.close()
				468
				469	def _init_read_gz(self):
				470	"""Initialize for reading a gzip compressed fileobj.
				471	"""
				472	self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
				473	self.dbuf = b""
				474
				475	# taken from gzip.GzipFile with some alterations
				476	if self.__read(2) != b"\037\213":
				477	raise ReadError("not a gzip file")
				478	if self.__read(1) != b"\010":
				479	raise CompressionError("unsupported compression method")
				480
				481	flag = ord(self.__read(1))
				482	self.__read(6)
				483
				484	if flag & 4:
				485	xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
				486	self.read(xlen)
				487	if flag & 8:
				488	while True:
				489	s = self.__read(1)
				490	if not s or s == NUL:
				491	break
				492	if flag & 16:
				493	while True:
				494	s = self.__read(1)
				495	if not s or s == NUL:
				496	break
				497	if flag & 2:
				498	self.__read(2)
				499
				500	def tell(self):
				501	"""Return the stream's file pointer position.
				502	"""
				503	return self.pos
				504
				505	def seek(self, pos=0):
				506	"""Set the stream's file pointer to pos. Negative seeking
				507	is forbidden.
				508	"""
				509	if pos - self.pos >= 0:
				510	blocks, remainder = divmod(pos - self.pos, self.bufsize)
				511	for i in range(blocks):
				512	self.read(self.bufsize)
				513	self.read(remainder)
				514	else:
				515	raise StreamError("seeking backwards is not allowed")
				516	return self.pos
				517
				518	def read(self, size):
				519	"""Return the next size number of bytes from the stream."""
				520	assert size is not None
				521	buf = self._read(size)
				522	self.pos += len(buf)
				523	return buf
				524
				525	def _read(self, size):
				526	"""Return size bytes from the stream.
				527	"""
				528	if self.comptype == "tar":
				529	return self.__read(size)
				530
				531	c = len(self.dbuf)
				532	t = [self.dbuf]
				533	while c < size:
				534	# Skip underlying buffer to avoid unaligned double buffering.
				535	if self.buf:
				536	buf = self.buf
				537	self.buf = b""
				538	else:
				539	buf = self.fileobj.read(self.bufsize)
				540	if not buf:
				541	break
				542	try:
				543	buf = self.cmp.decompress(buf)
				544	except self.exception:
				545	raise ReadError("invalid compressed data")
				546	t.append(buf)
				547	c += len(buf)
				548	t = b"".join(t)
				549	self.dbuf = t[size:]
				550	return t[:size]
				551
				552	def __read(self, size):
				553	"""Return size bytes from stream. If internal buffer is empty,
				554	read another block from the stream.
				555	"""
				556	c = len(self.buf)
				557	t = [self.buf]
				558	while c < size:
				559	buf = self.fileobj.read(self.bufsize)
				560	if not buf:
				561	break
				562	t.append(buf)
				563	c += len(buf)
				564	t = b"".join(t)
				565	self.buf = t[size:]
				566	return t[:size]
				567	# class _Stream
				568
				569	class _StreamProxy(object):
				570	"""Small proxy class that enables transparent compression
				571	detection for the Stream interface (mode 'r\|*').
				572	"""
				573
				574	def __init__(self, fileobj):
				575	self.fileobj = fileobj
				576	self.buf = self.fileobj.read(BLOCKSIZE)
				577
				578	def read(self, size):
				579	self.read = self.fileobj.read
				580	return self.buf
				581
				582	def getcomptype(self):
				583	if self.buf.startswith(b"\x1f\x8b\x08"):
				584	return "gz"
				585	elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
				586	return "bz2"
				587	elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
				588	return "xz"
				589	else:
				590	return "tar"
				591
				592	def close(self):
				593	self.fileobj.close()
				594	# class StreamProxy
				595
				596	#------------------------
				597	# Extraction file object
				598	#------------------------
				599	class _FileInFile(object):
				600	"""A thin wrapper around an existing file object that
				601	provides a part of its data as an individual file
				602	object.
				603	"""
				604
				605	def __init__(self, fileobj, offset, size, blockinfo=None):
				606	self.fileobj = fileobj
				607	self.offset = offset
				608	self.size = size
				609	self.position = 0
				610	self.name = getattr(fileobj, "name", None)
				611	self.closed = False
				612
				613	if blockinfo is None:
				614	blockinfo = [(0, size)]
				615
				616	# Construct a map with data and zero blocks.
				617	self.map_index = 0
				618	self.map = []
				619	lastpos = 0
				620	realpos = self.offset
				621	for offset, size in blockinfo:
				622	if offset > lastpos:
				623	self.map.append((False, lastpos, offset, None))
				624	self.map.append((True, offset, offset + size, realpos))
				625	realpos += size
				626	lastpos = offset + size
				627	if lastpos < self.size:
				628	self.map.append((False, lastpos, self.size, None))
				629
				630	def flush(self):
				631	pass
				632
				633	def readable(self):
				634	return True
				635
				636	def writable(self):
				637	return False
				638
				639	def seekable(self):
				640	return self.fileobj.seekable()
				641
				642	def tell(self):
				643	"""Return the current file position.
				644	"""
				645	return self.position
				646
				647	def seek(self, position, whence=io.SEEK_SET):
				648	"""Seek to a position in the file.
				649	"""
				650	if whence == io.SEEK_SET:
				651	self.position = min(max(position, 0), self.size)
				652	elif whence == io.SEEK_CUR:
				653	if position < 0:
				654	self.position = max(self.position + position, 0)
				655	else:
				656	self.position = min(self.position + position, self.size)
				657	elif whence == io.SEEK_END:
				658	self.position = max(min(self.size + position, self.size), 0)
				659	else:
				660	raise ValueError("Invalid argument")
				661	return self.position
				662
				663	def read(self, size=None):
				664	"""Read data from the file.
				665	"""
				666	if size is None:
				667	size = self.size - self.position
				668	else:
				669	size = min(size, self.size - self.position)
				670
				671	buf = b""
				672	while size > 0:
				673	while True:
				674	data, start, stop, offset = self.map[self.map_index]
				675	if start <= self.position < stop:
				676	break
				677	else:
				678	self.map_index += 1
				679	if self.map_index == len(self.map):
				680	self.map_index = 0
				681	length = min(size, stop - self.position)
				682	if data:
				683	self.fileobj.seek(offset + (self.position - start))
				684	b = self.fileobj.read(length)
				685	if len(b) != length:
				686	raise ReadError("unexpected end of data")
				687	buf += b
				688	else:
				689	buf += NUL * length
				690	size -= length
				691	self.position += length
				692	return buf
				693
				694	def readinto(self, b):
				695	buf = self.read(len(b))
				696	b[:len(buf)] = buf
				697	return len(buf)
				698
				699	def close(self):
				700	self.closed = True
				701	#class _FileInFile
				702
				703	class ExFileObject(io.BufferedReader):
				704
				705	def __init__(self, tarfile, tarinfo):
				706	fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
				707	tarinfo.size, tarinfo.sparse)
				708	super().__init__(fileobj)
				709	#class ExFileObject
				710
				711	#------------------
				712	# Exported Classes
				713	#------------------
				714	class TarInfo(object):
				715	"""Informational class which holds the details about an
				716	archive member given by a tar header block.
				717	TarInfo objects are returned by TarFile.getmember(),
				718	TarFile.getmembers() and TarFile.gettarinfo() and are
				719	usually created internally.
				720	"""
				721
				722	__slots__ = dict(
				723	name = 'Name of the archive member.',
				724	mode = 'Permission bits.',
				725	uid = 'User ID of the user who originally stored this member.',
				726	gid = 'Group ID of the user who originally stored this member.',
				727	size = 'Size in bytes.',
				728	mtime = 'Time of last modification.',
				729	chksum = 'Header checksum.',
				730	type = ('File type. type is usually one of these constants: '
				731	'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
				732	'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
				733	linkname = ('Name of the target file name, which is only present '
				734	'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
				735	uname = 'User name.',
				736	gname = 'Group name.',
				737	devmajor = 'Device major number.',
				738	devminor = 'Device minor number.',
				739	offset = 'The tar header starts here.',
				740	offset_data = "The file's data starts here.",
				741	pax_headers = ('A dictionary containing key-value pairs of an '
				742	'associated pax extended header.'),
				743	sparse = 'Sparse member information.',
				744	tarfile = None,
				745	_sparse_structs = None,
				746	_link_target = None,
				747	)
				748
				749	def __init__(self, name=""):
				750	"""Construct a TarInfo object. name is the optional name
				751	of the member.
				752	"""
				753	self.name = name # member name
				754	self.mode = 0o644 # file permissions
				755	self.uid = 0 # user id
				756	self.gid = 0 # group id
				757	self.size = 0 # file size
				758	self.mtime = 0 # modification time
				759	self.chksum = 0 # header checksum
				760	self.type = REGTYPE # member type
				761	self.linkname = "" # link name
				762	self.uname = "" # user name
				763	self.gname = "" # group name
				764	self.devmajor = 0 # device major number
				765	self.devminor = 0 # device minor number
				766
				767	self.offset = 0 # the tar header starts here
				768	self.offset_data = 0 # the file's data starts here
				769
				770	self.sparse = None # sparse member information
				771	self.pax_headers = {} # pax header information
				772
				773	@property
				774	def path(self):
				775	'In pax headers, "name" is called "path".'
				776	return self.name
				777
				778	@path.setter
				779	def path(self, name):
				780	self.name = name
				781
				782	@property
				783	def linkpath(self):
				784	'In pax headers, "linkname" is called "linkpath".'
				785	return self.linkname
				786
				787	@linkpath.setter
				788	def linkpath(self, linkname):
				789	self.linkname = linkname
				790
				791	def __repr__(self):
				792	return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
				793
				794	def get_info(self):
				795	"""Return the TarInfo's attributes as a dictionary.
				796	"""
				797	info = {
				798	"name": self.name,
				799	"mode": self.mode & 0o7777,
				800	"uid": self.uid,
				801	"gid": self.gid,
				802	"size": self.size,
				803	"mtime": self.mtime,
				804	"chksum": self.chksum,
				805	"type": self.type,
				806	"linkname": self.linkname,
				807	"uname": self.uname,
				808	"gname": self.gname,
				809	"devmajor": self.devmajor,
				810	"devminor": self.devminor
				811	}
				812
				813	if info["type"] == DIRTYPE and not info["name"].endswith("/"):
				814	info["name"] += "/"
				815
				816	return info
				817
				818	def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
				819	"""Return a tar header as a string of 512 byte blocks.
				820	"""
				821	info = self.get_info()
				822
				823	if format == USTAR_FORMAT:
				824	return self.create_ustar_header(info, encoding, errors)
				825	elif format == GNU_FORMAT:
				826	return self.create_gnu_header(info, encoding, errors)
				827	elif format == PAX_FORMAT:
				828	return self.create_pax_header(info, encoding)
				829	else:
				830	raise ValueError("invalid format")
				831
				832	def create_ustar_header(self, info, encoding, errors):
				833	"""Return the object as a ustar header block.
				834	"""
				835	info["magic"] = POSIX_MAGIC
				836
				837	if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
				838	raise ValueError("linkname is too long")
				839
				840	if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
				841	info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
				842
				843	return self._create_header(info, USTAR_FORMAT, encoding, errors)
				844
				845	def create_gnu_header(self, info, encoding, errors):
				846	"""Return the object as a GNU header block sequence.
				847	"""
				848	info["magic"] = GNU_MAGIC
				849
				850	buf = b""
				851	if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
				852	buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
				853
				854	if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
				855	buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
				856
				857	return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
				858
				859	def create_pax_header(self, info, encoding):
				860	"""Return the object as a ustar header block. If it cannot be
				861	represented this way, prepend a pax extended header sequence
				862	with supplement information.
				863	"""
				864	info["magic"] = POSIX_MAGIC
				865	pax_headers = self.pax_headers.copy()
				866
				867	# Test string fields for values that exceed the field length or cannot
				868	# be represented in ASCII encoding.
				869	for name, hname, length in (
				870	("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
				871	("uname", "uname", 32), ("gname", "gname", 32)):
				872
				873	if hname in pax_headers:
				874	# The pax header has priority.
				875	continue
				876
				877	# Try to encode the string as ASCII.
				878	try:
				879	info[name].encode("ascii", "strict")
				880	except UnicodeEncodeError:
				881	pax_headers[hname] = info[name]
				882	continue
				883
				884	if len(info[name]) > length:
				885	pax_headers[hname] = info[name]
				886
				887	# Test number fields for values that exceed the field limit or values
				888	# that like to be stored as float.
				889	for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
				890	if name in pax_headers:
				891	# The pax header has priority. Avoid overflow.
				892	info[name] = 0
				893	continue
				894
				895	val = info[name]
				896	if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
				897	pax_headers[name] = str(val)
				898	info[name] = 0
				899
				900	# Create a pax extended header if necessary.
				901	if pax_headers:
				902	buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
				903	else:
				904	buf = b""
				905
				906	return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
				907
				908	@classmethod
				909	def create_pax_global_header(cls, pax_headers):
				910	"""Return the object as a pax global header block sequence.
				911	"""
				912	return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
				913
				914	def _posix_split_name(self, name, encoding, errors):
				915	"""Split a name longer than 100 chars into a prefix
				916	and a name part.
				917	"""
				918	components = name.split("/")
				919	for i in range(1, len(components)):
				920	prefix = "/".join(components[:i])
				921	name = "/".join(components[i:])
				922	if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
				923	len(name.encode(encoding, errors)) <= LENGTH_NAME:
				924	break
				925	else:
				926	raise ValueError("name is too long")
				927
				928	return prefix, name
				929
				930	@staticmethod
				931	def _create_header(info, format, encoding, errors):
				932	"""Return a header block. info is a dictionary with file
				933	information, format must be one of the *_FORMAT constants.
				934	"""
				935	has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
				936	if has_device_fields:
				937	devmajor = itn(info.get("devmajor", 0), 8, format)
				938	devminor = itn(info.get("devminor", 0), 8, format)
				939	else:
				940	devmajor = stn("", 8, encoding, errors)
				941	devminor = stn("", 8, encoding, errors)
				942
				943	parts = [
				944	stn(info.get("name", ""), 100, encoding, errors),
				945	itn(info.get("mode", 0) & 0o7777, 8, format),
				946	itn(info.get("uid", 0), 8, format),
				947	itn(info.get("gid", 0), 8, format),
				948	itn(info.get("size", 0), 12, format),
				949	itn(info.get("mtime", 0), 12, format),
				950	b" ", # checksum field
				951	info.get("type", REGTYPE),
				952	stn(info.get("linkname", ""), 100, encoding, errors),
				953	info.get("magic", POSIX_MAGIC),
				954	stn(info.get("uname", ""), 32, encoding, errors),
				955	stn(info.get("gname", ""), 32, encoding, errors),
				956	devmajor,
				957	devminor,
				958	stn(info.get("prefix", ""), 155, encoding, errors)
				959	]
				960
				961	buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
				962	chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
				963	buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
				964	return buf
				965
				966	@staticmethod
				967	def _create_payload(payload):
				968	"""Return the string payload filled with zero bytes
				969	up to the next 512 byte border.
				970	"""
				971	blocks, remainder = divmod(len(payload), BLOCKSIZE)
				972	if remainder > 0:
				973	payload += (BLOCKSIZE - remainder) * NUL
				974	return payload
				975
				976	@classmethod
				977	def _create_gnu_long_header(cls, name, type, encoding, errors):
				978	"""Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
				979	for name.
				980	"""
				981	name = name.encode(encoding, errors) + NUL
				982
				983	info = {}
				984	info["name"] = "././@LongLink"
				985	info["type"] = type
				986	info["size"] = len(name)
				987	info["magic"] = GNU_MAGIC
				988
				989	# create extended header + name blocks.
				990	return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
				991	cls._create_payload(name)
				992
				993	@classmethod
				994	def _create_pax_generic_header(cls, pax_headers, type, encoding):
				995	"""Return a POSIX.1-2008 extended or global header sequence
				996	that contains a list of keyword, value pairs. The values
				997	must be strings.
				998	"""
				999	# Check if one of the fields contains surrogate characters and thereby
				1000	# forces hdrcharset=BINARY, see _proc_pax() for more information.
				1001	binary = False
				1002	for keyword, value in pax_headers.items():
				1003	try:
				1004	value.encode("utf-8", "strict")
				1005	except UnicodeEncodeError:
				1006	binary = True
				1007	break
				1008
				1009	records = b""
				1010	if binary:
				1011	# Put the hdrcharset field at the beginning of the header.
				1012	records += b"21 hdrcharset=BINARY\n"
				1013
				1014	for keyword, value in pax_headers.items():
				1015	keyword = keyword.encode("utf-8")
				1016	if binary:
				1017	# Try to restore the original byte representation of `value'.
				1018	# Needless to say, that the encoding must match the string.
				1019	value = value.encode(encoding, "surrogateescape")
				1020	else:
				1021	value = value.encode("utf-8")
				1022
				1023	l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
				1024	n = p = 0
				1025	while True:
				1026	n = l + len(str(p))
				1027	if n == p:
				1028	break
				1029	p = n
				1030	records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
				1031
				1032	# We use a hardcoded "././@PaxHeader" name like star does
				1033	# instead of the one that POSIX recommends.
				1034	info = {}
				1035	info["name"] = "././@PaxHeader"
				1036	info["type"] = type
				1037	info["size"] = len(records)
				1038	info["magic"] = POSIX_MAGIC
				1039
				1040	# Create pax header + record blocks.
				1041	return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
				1042	cls._create_payload(records)
				1043
				1044	@classmethod
				1045	def frombuf(cls, buf, encoding, errors):
				1046	"""Construct a TarInfo object from a 512 byte bytes object.
				1047	"""
				1048	if len(buf) == 0:
				1049	raise EmptyHeaderError("empty header")
				1050	if len(buf) != BLOCKSIZE:
				1051	raise TruncatedHeaderError("truncated header")
				1052	if buf.count(NUL) == BLOCKSIZE:
				1053	raise EOFHeaderError("end of file header")
				1054
				1055	chksum = nti(buf[148:156])
				1056	if chksum not in calc_chksums(buf):
				1057	raise InvalidHeaderError("bad checksum")
				1058
				1059	obj = cls()
				1060	obj.name = nts(buf[0:100], encoding, errors)
				1061	obj.mode = nti(buf[100:108])
				1062	obj.uid = nti(buf[108:116])
				1063	obj.gid = nti(buf[116:124])
				1064	obj.size = nti(buf[124:136])
				1065	obj.mtime = nti(buf[136:148])
				1066	obj.chksum = chksum
				1067	obj.type = buf[156:157]
				1068	obj.linkname = nts(buf[157:257], encoding, errors)
				1069	obj.uname = nts(buf[265:297], encoding, errors)
				1070	obj.gname = nts(buf[297:329], encoding, errors)
				1071	obj.devmajor = nti(buf[329:337])
				1072	obj.devminor = nti(buf[337:345])
				1073	prefix = nts(buf[345:500], encoding, errors)
				1074
				1075	# Old V7 tar format represents a directory as a regular
				1076	# file with a trailing slash.
				1077	if obj.type == AREGTYPE and obj.name.endswith("/"):
				1078	obj.type = DIRTYPE
				1079
				1080	# The old GNU sparse format occupies some of the unused
				1081	# space in the buffer for up to 4 sparse structures.
				1082	# Save them for later processing in _proc_sparse().
				1083	if obj.type == GNUTYPE_SPARSE:
				1084	pos = 386
				1085	structs = []
				1086	for i in range(4):
				1087	try:
				1088	offset = nti(buf[pos:pos + 12])
				1089	numbytes = nti(buf[pos + 12:pos + 24])
				1090	except ValueError:
				1091	break
				1092	structs.append((offset, numbytes))
				1093	pos += 24
				1094	isextended = bool(buf[482])
				1095	origsize = nti(buf[483:495])
				1096	obj._sparse_structs = (structs, isextended, origsize)
				1097
				1098	# Remove redundant slashes from directories.
				1099	if obj.isdir():
				1100	obj.name = obj.name.rstrip("/")
				1101
				1102	# Reconstruct a ustar longname.
				1103	if prefix and obj.type not in GNU_TYPES:
				1104	obj.name = prefix + "/" + obj.name
				1105	return obj
				1106
				1107	@classmethod
				1108	def fromtarfile(cls, tarfile):
				1109	"""Return the next TarInfo object from TarFile object
				1110	tarfile.
				1111	"""
				1112	buf = tarfile.fileobj.read(BLOCKSIZE)
				1113	obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
				1114	obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
				1115	return obj._proc_member(tarfile)
				1116
				1117	#--------------------------------------------------------------------------
				1118	# The following are methods that are called depending on the type of a
				1119	# member. The entry point is _proc_member() which can be overridden in a
				1120	# subclass to add custom _proc_() methods. A _proc_() method MUST
				1121	# implement the following
				1122	# operations:
				1123	# 1. Set self.offset_data to the position where the data blocks begin,
				1124	# if there is data that follows.
				1125	# 2. Set tarfile.offset to the position where the next member's header will
				1126	# begin.
				1127	# 3. Return self or another valid TarInfo object.
				1128	def _proc_member(self, tarfile):
				1129	"""Choose the right processing method depending on
				1130	the type and call it.
				1131	"""
				1132	if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
				1133	return self._proc_gnulong(tarfile)
				1134	elif self.type == GNUTYPE_SPARSE:
				1135	return self._proc_sparse(tarfile)
				1136	elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
				1137	return self._proc_pax(tarfile)
				1138	else:
				1139	return self._proc_builtin(tarfile)
				1140
				1141	def _proc_builtin(self, tarfile):
				1142	"""Process a builtin type or an unknown type which
				1143	will be treated as a regular file.
				1144	"""
				1145	self.offset_data = tarfile.fileobj.tell()
				1146	offset = self.offset_data
				1147	if self.isreg() or self.type not in SUPPORTED_TYPES:
				1148	# Skip the following data blocks.
				1149	offset += self._block(self.size)
				1150	tarfile.offset = offset
				1151
				1152	# Patch the TarInfo object with saved global
				1153	# header information.
				1154	self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
				1155
				1156	return self
				1157
				1158	def _proc_gnulong(self, tarfile):
				1159	"""Process the blocks that hold a GNU longname
				1160	or longlink member.
				1161	"""
				1162	buf = tarfile.fileobj.read(self._block(self.size))
				1163
				1164	# Fetch the next header and process it.
				1165	try:
				1166	next = self.fromtarfile(tarfile)
				1167	except HeaderError:
				1168	raise SubsequentHeaderError("missing or bad subsequent header")
				1169
				1170	# Patch the TarInfo object from the next header with
				1171	# the longname information.
				1172	next.offset = self.offset
				1173	if self.type == GNUTYPE_LONGNAME:
				1174	next.name = nts(buf, tarfile.encoding, tarfile.errors)
				1175	elif self.type == GNUTYPE_LONGLINK:
				1176	next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
				1177
				1178	return next
				1179
				1180	def _proc_sparse(self, tarfile):
				1181	"""Process a GNU sparse header plus extra headers.
				1182	"""
				1183	# We already collected some sparse structures in frombuf().
				1184	structs, isextended, origsize = self._sparse_structs
				1185	del self._sparse_structs
				1186
				1187	# Collect sparse structures from extended header blocks.
				1188	while isextended:
				1189	buf = tarfile.fileobj.read(BLOCKSIZE)
				1190	pos = 0
				1191	for i in range(21):
				1192	try:
				1193	offset = nti(buf[pos:pos + 12])
				1194	numbytes = nti(buf[pos + 12:pos + 24])
				1195	except ValueError:
				1196	break
				1197	if offset and numbytes:
				1198	structs.append((offset, numbytes))
				1199	pos += 24
				1200	isextended = bool(buf[504])
				1201	self.sparse = structs
				1202
				1203	self.offset_data = tarfile.fileobj.tell()
				1204	tarfile.offset = self.offset_data + self._block(self.size)
				1205	self.size = origsize
				1206	return self
				1207
				1208	def _proc_pax(self, tarfile):
				1209	"""Process an extended or global header as described in
				1210	POSIX.1-2008.
				1211	"""
				1212	# Read the header information.
				1213	buf = tarfile.fileobj.read(self._block(self.size))
				1214
				1215	# A pax header stores supplemental information for either
				1216	# the following file (extended) or all following files
				1217	# (global).
				1218	if self.type == XGLTYPE:
				1219	pax_headers = tarfile.pax_headers
				1220	else:
				1221	pax_headers = tarfile.pax_headers.copy()
				1222
				1223	# Check if the pax header contains a hdrcharset field. This tells us
				1224	# the encoding of the path, linkpath, uname and gname fields. Normally,
				1225	# these fields are UTF-8 encoded but since POSIX.1-2008 tar
				1226	# implementations are allowed to store them as raw binary strings if
				1227	# the translation to UTF-8 fails.
				1228	match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
				1229	if match is not None:
				1230	pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
				1231
				1232	# For the time being, we don't care about anything other than "BINARY".
				1233	# The only other value that is currently allowed by the standard is
				1234	# "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
				1235	hdrcharset = pax_headers.get("hdrcharset")
				1236	if hdrcharset == "BINARY":
				1237	encoding = tarfile.encoding
				1238	else:
				1239	encoding = "utf-8"
				1240
				1241	# Parse pax header information. A record looks like that:
				1242	# "%d %s=%s\n" % (length, keyword, value). length is the size
				1243	# of the complete record including the length field itself and
				1244	# the newline. keyword and value are both UTF-8 encoded strings.
				1245	regex = re.compile(br"(\d+) ([^=]+)=")
				1246	pos = 0
				1247	while True:
				1248	match = regex.match(buf, pos)
				1249	if not match:
				1250	break
				1251
				1252	length, keyword = match.groups()
				1253	length = int(length)
				1254	if length == 0:
				1255	raise InvalidHeaderError("invalid header")
				1256	value = buf[match.end(2) + 1:match.start(1) + length - 1]
				1257
				1258	# Normally, we could just use "utf-8" as the encoding and "strict"
				1259	# as the error handler, but we better not take the risk. For
				1260	# example, GNU tar <= 1.23 is known to store filenames it cannot
				1261	# translate to UTF-8 as raw strings (unfortunately without a
				1262	# hdrcharset=BINARY header).
				1263	# We first try the strict standard encoding, and if that fails we
				1264	# fall back on the user's encoding and error handler.
				1265	keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
				1266	tarfile.errors)
				1267	if keyword in PAX_NAME_FIELDS:
				1268	value = self._decode_pax_field(value, encoding, tarfile.encoding,
				1269	tarfile.errors)
				1270	else:
				1271	value = self._decode_pax_field(value, "utf-8", "utf-8",
				1272	tarfile.errors)
				1273
				1274	pax_headers[keyword] = value
				1275	pos += length
				1276
				1277	# Fetch the next header.
				1278	try:
				1279	next = self.fromtarfile(tarfile)
				1280	except HeaderError:
				1281	raise SubsequentHeaderError("missing or bad subsequent header")
				1282
				1283	# Process GNU sparse information.
				1284	if "GNU.sparse.map" in pax_headers:
				1285	# GNU extended sparse format version 0.1.
				1286	self._proc_gnusparse_01(next, pax_headers)
				1287
				1288	elif "GNU.sparse.size" in pax_headers:
				1289	# GNU extended sparse format version 0.0.
				1290	self._proc_gnusparse_00(next, pax_headers, buf)
				1291
				1292	elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
				1293	# GNU extended sparse format version 1.0.
				1294	self._proc_gnusparse_10(next, pax_headers, tarfile)
				1295
				1296	if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
				1297	# Patch the TarInfo object with the extended header info.
				1298	next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
				1299	next.offset = self.offset
				1300
				1301	if "size" in pax_headers:
				1302	# If the extended header replaces the size field,
				1303	# we need to recalculate the offset where the next
				1304	# header starts.
				1305	offset = next.offset_data
				1306	if next.isreg() or next.type not in SUPPORTED_TYPES:
				1307	offset += next._block(next.size)
				1308	tarfile.offset = offset
				1309
				1310	return next
				1311
				1312	def _proc_gnusparse_00(self, next, pax_headers, buf):
				1313	"""Process a GNU tar extended sparse header, version 0.0.
				1314	"""
				1315	offsets = []
				1316	for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
				1317	offsets.append(int(match.group(1)))
				1318	numbytes = []
				1319	for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
				1320	numbytes.append(int(match.group(1)))
				1321	next.sparse = list(zip(offsets, numbytes))
				1322
				1323	def _proc_gnusparse_01(self, next, pax_headers):
				1324	"""Process a GNU tar extended sparse header, version 0.1.
				1325	"""
				1326	sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
				1327	next.sparse = list(zip(sparse[::2], sparse[1::2]))
				1328
				1329	def _proc_gnusparse_10(self, next, pax_headers, tarfile):
				1330	"""Process a GNU tar extended sparse header, version 1.0.
				1331	"""
				1332	fields = None
				1333	sparse = []
				1334	buf = tarfile.fileobj.read(BLOCKSIZE)
				1335	fields, buf = buf.split(b"\n", 1)
				1336	fields = int(fields)
				1337	while len(sparse) < fields * 2:
				1338	if b"\n" not in buf:
				1339	buf += tarfile.fileobj.read(BLOCKSIZE)
				1340	number, buf = buf.split(b"\n", 1)
				1341	sparse.append(int(number))
				1342	next.offset_data = tarfile.fileobj.tell()
				1343	next.sparse = list(zip(sparse[::2], sparse[1::2]))
				1344
				1345	def _apply_pax_info(self, pax_headers, encoding, errors):
				1346	"""Replace fields with supplemental information from a previous
				1347	pax extended or global header.
				1348	"""
				1349	for keyword, value in pax_headers.items():
				1350	if keyword == "GNU.sparse.name":
				1351	setattr(self, "path", value)
				1352	elif keyword == "GNU.sparse.size":
				1353	setattr(self, "size", int(value))
				1354	elif keyword == "GNU.sparse.realsize":
				1355	setattr(self, "size", int(value))
				1356	elif keyword in PAX_FIELDS:
				1357	if keyword in PAX_NUMBER_FIELDS:
				1358	try:
				1359	value = PAX_NUMBER_FIELDS[keyword](value)
				1360	except ValueError:
				1361	value = 0
				1362	if keyword == "path":
				1363	value = value.rstrip("/")
				1364	setattr(self, keyword, value)
				1365
				1366	self.pax_headers = pax_headers.copy()
				1367
				1368	def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
				1369	"""Decode a single field from a pax record.
				1370	"""
				1371	try:
				1372	return value.decode(encoding, "strict")
				1373	except UnicodeDecodeError:
				1374	return value.decode(fallback_encoding, fallback_errors)
				1375
				1376	def _block(self, count):
				1377	"""Round up a byte count by BLOCKSIZE and return it,
				1378	e.g. _block(834) => 1024.
				1379	"""
				1380	blocks, remainder = divmod(count, BLOCKSIZE)
				1381	if remainder:
				1382	blocks += 1
				1383	return blocks * BLOCKSIZE
				1384
				1385	def isreg(self):
				1386	'Return True if the Tarinfo object is a regular file.'
				1387	return self.type in REGULAR_TYPES
				1388
				1389	def isfile(self):
				1390	'Return True if the Tarinfo object is a regular file.'
				1391	return self.isreg()
				1392
				1393	def isdir(self):
				1394	'Return True if it is a directory.'
				1395	return self.type == DIRTYPE
				1396
				1397	def issym(self):
				1398	'Return True if it is a symbolic link.'
				1399	return self.type == SYMTYPE
				1400
				1401	def islnk(self):
				1402	'Return True if it is a hard link.'
				1403	return self.type == LNKTYPE
				1404
				1405	def ischr(self):
				1406	'Return True if it is a character device.'
				1407	return self.type == CHRTYPE
				1408
				1409	def isblk(self):
				1410	'Return True if it is a block device.'
				1411	return self.type == BLKTYPE
				1412
				1413	def isfifo(self):
				1414	'Return True if it is a FIFO.'
				1415	return self.type == FIFOTYPE
				1416
				1417	def issparse(self):
				1418	return self.sparse is not None
				1419
				1420	def isdev(self):
				1421	'Return True if it is one of character device, block device or FIFO.'
				1422	return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
				1423	# class TarInfo
				1424
				1425	class TarFile(object):
				1426	"""The TarFile Class provides an interface to tar archives.
				1427	"""
				1428
				1429	debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
				1430
				1431	dereference = False # If true, add content of linked file to the
				1432	# tar file, else the link.
				1433
				1434	ignore_zeros = False # If true, skips empty or invalid blocks and
				1435	# continues processing.
				1436
				1437	errorlevel = 1 # If 0, fatal errors only appear in debug
				1438	# messages (if debug >= 0). If > 0, errors
				1439	# are passed to the caller as exceptions.
				1440
				1441	format = DEFAULT_FORMAT # The format to use when creating an archive.
				1442
				1443	encoding = ENCODING # Encoding for 8-bit character strings.
				1444
				1445	errors = None # Error handler for unicode conversion.
				1446
				1447	tarinfo = TarInfo # The default TarInfo class to use.
				1448
				1449	fileobject = ExFileObject # The file-object for extractfile().
				1450
				1451	def __init__(self, name=None, mode="r", fileobj=None, format=None,
				1452	tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
				1453	errors="surrogateescape", pax_headers=None, debug=None,
				1454	errorlevel=None, copybufsize=None):
				1455	"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
				1456	read from an existing archive, 'a' to append data to an existing
				1457	file or 'w' to create a new file overwriting an existing one. `mode'
				1458	defaults to 'r'.
				1459	If `fileobj' is given, it is used for reading or writing data. If it
				1460	can be determined, `mode' is overridden by `fileobj's mode.
				1461	`fileobj' is not closed, when TarFile is closed.
				1462	"""
				1463	modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
				1464	if mode not in modes:
				1465	raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
				1466	self.mode = mode
				1467	self._mode = modes[mode]
				1468
				1469	if not fileobj:
				1470	if self.mode == "a" and not os.path.exists(name):
				1471	# Create nonexistent files in append mode.
				1472	self.mode = "w"
				1473	self._mode = "wb"
				1474	fileobj = bltn_open(name, self._mode)
				1475	self._extfileobj = False
				1476	else:
				1477	if (name is None and hasattr(fileobj, "name") and
				1478	isinstance(fileobj.name, (str, bytes))):
				1479	name = fileobj.name
				1480	if hasattr(fileobj, "mode"):
				1481	self._mode = fileobj.mode
				1482	self._extfileobj = True
				1483	self.name = os.path.abspath(name) if name else None
				1484	self.fileobj = fileobj
				1485
				1486	# Init attributes.
				1487	if format is not None:
				1488	self.format = format
				1489	if tarinfo is not None:
				1490	self.tarinfo = tarinfo
				1491	if dereference is not None:
				1492	self.dereference = dereference
				1493	if ignore_zeros is not None:
				1494	self.ignore_zeros = ignore_zeros
				1495	if encoding is not None:
				1496	self.encoding = encoding
				1497	self.errors = errors
				1498
				1499	if pax_headers is not None and self.format == PAX_FORMAT:
				1500	self.pax_headers = pax_headers
				1501	else:
				1502	self.pax_headers = {}
				1503
				1504	if debug is not None:
				1505	self.debug = debug
				1506	if errorlevel is not None:
				1507	self.errorlevel = errorlevel
				1508
				1509	# Init datastructures.
				1510	self.copybufsize = copybufsize
				1511	self.closed = False
				1512	self.members = [] # list of members as TarInfo objects
				1513	self._loaded = False # flag if all members have been read
				1514	self.offset = self.fileobj.tell()
				1515	# current position in the archive file
				1516	self.inodes = {} # dictionary caching the inodes of
				1517	# archive members already added
				1518
				1519	try:
				1520	if self.mode == "r":
				1521	self.firstmember = None
				1522	self.firstmember = self.next()
				1523
				1524	if self.mode == "a":
				1525	# Move to the end of the archive,
				1526	# before the first empty block.
				1527	while True:
				1528	self.fileobj.seek(self.offset)
				1529	try:
				1530	tarinfo = self.tarinfo.fromtarfile(self)
				1531	self.members.append(tarinfo)
				1532	except EOFHeaderError:
				1533	self.fileobj.seek(self.offset)
				1534	break
				1535	except HeaderError as e:
				1536	raise ReadError(str(e))
				1537
				1538	if self.mode in ("a", "w", "x"):
				1539	self._loaded = True
				1540
				1541	if self.pax_headers:
				1542	buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
				1543	self.fileobj.write(buf)
				1544	self.offset += len(buf)
				1545	except:
				1546	if not self._extfileobj:
				1547	self.fileobj.close()
				1548	self.closed = True
				1549	raise
				1550
				1551	#--------------------------------------------------------------------------
				1552	# Below are the classmethods which act as alternate constructors to the
				1553	# TarFile class. The open() method is the only one that is needed for
				1554	# public use; it is the "super"-constructor and is able to select an
				1555	# adequate "sub"-constructor for a particular compression using the mapping
				1556	# from OPEN_METH.
				1557	#
				1558	# This concept allows one to subclass TarFile without losing the comfort of
				1559	# the super-constructor. A sub-constructor is registered and made available
				1560	# by adding it to the mapping in OPEN_METH.
				1561
				1562	@classmethod
				1563	def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
				1564	"""Open a tar archive for reading, writing or appending. Return
				1565	an appropriate TarFile class.
				1566
				1567	mode:
				1568	'r' or 'r:*' open for reading with transparent compression
				1569	'r:' open for reading exclusively uncompressed
				1570	'r:gz' open for reading with gzip compression
				1571	'r:bz2' open for reading with bzip2 compression
				1572	'r:xz' open for reading with lzma compression
				1573	'a' or 'a:' open for appending, creating the file if necessary
				1574	'w' or 'w:' open for writing without compression
				1575	'w:gz' open for writing with gzip compression
				1576	'w:bz2' open for writing with bzip2 compression
				1577	'w:xz' open for writing with lzma compression
				1578
				1579	'x' or 'x:' create a tarfile exclusively without compression, raise
				1580	an exception if the file is already created
				1581	'x:gz' create a gzip compressed tarfile, raise an exception
				1582	if the file is already created
				1583	'x:bz2' create a bzip2 compressed tarfile, raise an exception
				1584	if the file is already created
				1585	'x:xz' create an lzma compressed tarfile, raise an exception
				1586	if the file is already created
				1587
				1588	'r\|*' open a stream of tar blocks with transparent compression
				1589	'r\|' open an uncompressed stream of tar blocks for reading
				1590	'r\|gz' open a gzip compressed stream of tar blocks
				1591	'r\|bz2' open a bzip2 compressed stream of tar blocks
				1592	'r\|xz' open an lzma compressed stream of tar blocks
				1593	'w\|' open an uncompressed stream for writing
				1594	'w\|gz' open a gzip compressed stream for writing
				1595	'w\|bz2' open a bzip2 compressed stream for writing
				1596	'w\|xz' open an lzma compressed stream for writing
				1597	"""
				1598
				1599	if not name and not fileobj:
				1600	raise ValueError("nothing to open")
				1601
				1602	if mode in ("r", "r:*"):
				1603	# Find out which *open() is appropriate for opening the file.
				1604	def not_compressed(comptype):
				1605	return cls.OPEN_METH[comptype] == 'taropen'
				1606	for comptype in sorted(cls.OPEN_METH, key=not_compressed):
				1607	func = getattr(cls, cls.OPEN_METH[comptype])
				1608	if fileobj is not None:
				1609	saved_pos = fileobj.tell()
				1610	try:
				1611	return func(name, "r", fileobj, **kwargs)
				1612	except (ReadError, CompressionError):
				1613	if fileobj is not None:
				1614	fileobj.seek(saved_pos)
				1615	continue
				1616	raise ReadError("file could not be opened successfully")
				1617
				1618	elif ":" in mode:
				1619	filemode, comptype = mode.split(":", 1)
				1620	filemode = filemode or "r"
				1621	comptype = comptype or "tar"
				1622
				1623	# Select the *open() function according to
				1624	# given compression.
				1625	if comptype in cls.OPEN_METH:
				1626	func = getattr(cls, cls.OPEN_METH[comptype])
				1627	else:
				1628	raise CompressionError("unknown compression type %r" % comptype)
				1629	return func(name, filemode, fileobj, **kwargs)
				1630
				1631	elif "\|" in mode:
				1632	filemode, comptype = mode.split("\|", 1)
				1633	filemode = filemode or "r"
				1634	comptype = comptype or "tar"
				1635
				1636	if filemode not in ("r", "w"):
				1637	raise ValueError("mode must be 'r' or 'w'")
				1638
				1639	stream = _Stream(name, filemode, comptype, fileobj, bufsize)
				1640	try:
				1641	t = cls(name, filemode, stream, **kwargs)
				1642	except:
				1643	stream.close()
				1644	raise
				1645	t._extfileobj = False
				1646	return t
				1647
				1648	elif mode in ("a", "w", "x"):
				1649	return cls.taropen(name, mode, fileobj, **kwargs)
				1650
				1651	raise ValueError("undiscernible mode")
				1652
				1653	@classmethod
				1654	def taropen(cls, name, mode="r", fileobj=None, **kwargs):
				1655	"""Open uncompressed tar archive name for reading or writing.
				1656	"""
				1657	if mode not in ("r", "a", "w", "x"):
				1658	raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
				1659	return cls(name, mode, fileobj, **kwargs)
				1660
				1661	@classmethod
				1662	def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
				1663	"""Open gzip compressed tar archive name for reading or writing.
				1664	Appending is not allowed.
				1665	"""
				1666	if mode not in ("r", "w", "x"):
				1667	raise ValueError("mode must be 'r', 'w' or 'x'")
				1668
				1669	try:
				1670	from gzip import GzipFile
				1671	except ImportError:
				1672	raise CompressionError("gzip module is not available")
				1673
				1674	try:
				1675	fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
				1676	except OSError:
				1677	if fileobj is not None and mode == 'r':
				1678	raise ReadError("not a gzip file")
				1679	raise
				1680
				1681	try:
				1682	t = cls.taropen(name, mode, fileobj, **kwargs)
				1683	except OSError:
				1684	fileobj.close()
				1685	if mode == 'r':
				1686	raise ReadError("not a gzip file")
				1687	raise
				1688	except:
				1689	fileobj.close()
				1690	raise
				1691	t._extfileobj = False
				1692	return t
				1693
				1694	@classmethod
				1695	def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
				1696	"""Open bzip2 compressed tar archive name for reading or writing.
				1697	Appending is not allowed.
				1698	"""
				1699	if mode not in ("r", "w", "x"):
				1700	raise ValueError("mode must be 'r', 'w' or 'x'")
				1701
				1702	try:
				1703	from bz2 import BZ2File
				1704	except ImportError:
				1705	raise CompressionError("bz2 module is not available")
				1706
				1707	fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
				1708
				1709	try:
				1710	t = cls.taropen(name, mode, fileobj, **kwargs)
				1711	except (OSError, EOFError):
				1712	fileobj.close()
				1713	if mode == 'r':
				1714	raise ReadError("not a bzip2 file")
				1715	raise
				1716	except:
				1717	fileobj.close()
				1718	raise
				1719	t._extfileobj = False
				1720	return t
				1721
				1722	@classmethod
				1723	def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
				1724	"""Open lzma compressed tar archive name for reading or writing.
				1725	Appending is not allowed.
				1726	"""
				1727	if mode not in ("r", "w", "x"):
				1728	raise ValueError("mode must be 'r', 'w' or 'x'")
				1729
				1730	try:
				1731	from lzma import LZMAFile, LZMAError
				1732	except ImportError:
				1733	raise CompressionError("lzma module is not available")
				1734
				1735	fileobj = LZMAFile(fileobj or name, mode, preset=preset)
				1736
				1737	try:
				1738	t = cls.taropen(name, mode, fileobj, **kwargs)
				1739	except (LZMAError, EOFError):
				1740	fileobj.close()
				1741	if mode == 'r':
				1742	raise ReadError("not an lzma file")
				1743	raise
				1744	except:
				1745	fileobj.close()
				1746	raise
				1747	t._extfileobj = False
				1748	return t
				1749
				1750	# All *open() methods are registered here.
				1751	OPEN_METH = {
				1752	"tar": "taropen", # uncompressed tar
				1753	"gz": "gzopen", # gzip compressed tar
				1754	"bz2": "bz2open", # bzip2 compressed tar
				1755	"xz": "xzopen" # lzma compressed tar
				1756	}
				1757
				1758	#--------------------------------------------------------------------------
				1759	# The public methods which TarFile provides:
				1760
				1761	def close(self):
				1762	"""Close the TarFile. In write-mode, two finishing zero blocks are
				1763	appended to the archive.
				1764	"""
				1765	if self.closed:
				1766	return
				1767
				1768	self.closed = True
				1769	try:
				1770	if self.mode in ("a", "w", "x"):
				1771	self.fileobj.write(NUL * (BLOCKSIZE * 2))
				1772	self.offset += (BLOCKSIZE * 2)
				1773	# fill up the end with zero-blocks
				1774	# (like option -b20 for tar does)
				1775	blocks, remainder = divmod(self.offset, RECORDSIZE)
				1776	if remainder > 0:
				1777	self.fileobj.write(NUL * (RECORDSIZE - remainder))
				1778	finally:
				1779	if not self._extfileobj:
				1780	self.fileobj.close()
				1781
				1782	def getmember(self, name):
				1783	"""Return a TarInfo object for member `name'. If `name' can not be
				1784	found in the archive, KeyError is raised. If a member occurs more
				1785	than once in the archive, its last occurrence is assumed to be the
				1786	most up-to-date version.
				1787	"""
				1788	tarinfo = self._getmember(name)
				1789	if tarinfo is None:
				1790	raise KeyError("filename %r not found" % name)
				1791	return tarinfo
				1792
				1793	def getmembers(self):
				1794	"""Return the members of the archive as a list of TarInfo objects. The
				1795	list has the same order as the members in the archive.
				1796	"""
				1797	self._check()
				1798	if not self._loaded: # if we want to obtain a list of
				1799	self._load() # all members, we first have to
				1800	# scan the whole archive.
				1801	return self.members
				1802
				1803	def getnames(self):
				1804	"""Return the members of the archive as a list of their names. It has
				1805	the same order as the list returned by getmembers().
				1806	"""
				1807	return [tarinfo.name for tarinfo in self.getmembers()]
				1808
				1809	def gettarinfo(self, name=None, arcname=None, fileobj=None):
				1810	"""Create a TarInfo object from the result of os.stat or equivalent
				1811	on an existing file. The file is either named by `name', or
				1812	specified as a file object `fileobj' with a file descriptor. If
				1813	given, `arcname' specifies an alternative name for the file in the
				1814	archive, otherwise, the name is taken from the 'name' attribute of
				1815	'fileobj', or the 'name' argument. The name should be a text
				1816	string.
				1817	"""
				1818	self._check("awx")
				1819
				1820	# When fileobj is given, replace name by
				1821	# fileobj's real name.
				1822	if fileobj is not None:
				1823	name = fileobj.name
				1824
				1825	# Building the name of the member in the archive.
				1826	# Backward slashes are converted to forward slashes,
				1827	# Absolute paths are turned to relative paths.
				1828	if arcname is None:
				1829	arcname = name
				1830	drv, arcname = os.path.splitdrive(arcname)
				1831	arcname = arcname.replace(os.sep, "/")
				1832	arcname = arcname.lstrip("/")
				1833
				1834	# Now, fill the TarInfo object with
				1835	# information specific for the file.
				1836	tarinfo = self.tarinfo()
				1837	tarinfo.tarfile = self # Not needed
				1838
				1839	# Use os.stat or os.lstat, depending on if symlinks shall be resolved.
				1840	if fileobj is None:
				1841	if not self.dereference:
				1842	statres = os.lstat(name)
				1843	else:
				1844	statres = os.stat(name)
				1845	else:
				1846	statres = os.fstat(fileobj.fileno())
				1847	linkname = ""
				1848
				1849	stmd = statres.st_mode
				1850	if stat.S_ISREG(stmd):
				1851	inode = (statres.st_ino, statres.st_dev)
				1852	if not self.dereference and statres.st_nlink > 1 and \
				1853	inode in self.inodes and arcname != self.inodes[inode]:
				1854	# Is it a hardlink to an already
				1855	# archived file?
				1856	type = LNKTYPE
				1857	linkname = self.inodes[inode]
				1858	else:
				1859	# The inode is added only if its valid.
				1860	# For win32 it is always 0.
				1861	type = REGTYPE
				1862	if inode[0]:
				1863	self.inodes[inode] = arcname
				1864	elif stat.S_ISDIR(stmd):
				1865	type = DIRTYPE
				1866	elif stat.S_ISFIFO(stmd):
				1867	type = FIFOTYPE
				1868	elif stat.S_ISLNK(stmd):
				1869	type = SYMTYPE
				1870	linkname = os.readlink(name)
				1871	elif stat.S_ISCHR(stmd):
				1872	type = CHRTYPE
				1873	elif stat.S_ISBLK(stmd):
				1874	type = BLKTYPE
				1875	else:
				1876	return None
				1877
				1878	# Fill the TarInfo object with all
				1879	# information we can get.
				1880	tarinfo.name = arcname
				1881	tarinfo.mode = stmd
				1882	tarinfo.uid = statres.st_uid
				1883	tarinfo.gid = statres.st_gid
				1884	if type == REGTYPE:
				1885	tarinfo.size = statres.st_size
				1886	else:
				1887	tarinfo.size = 0
				1888	tarinfo.mtime = statres.st_mtime
				1889	tarinfo.type = type
				1890	tarinfo.linkname = linkname
				1891	if pwd:
				1892	try:
				1893	tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
				1894	except KeyError:
				1895	pass
				1896	if grp:
				1897	try:
				1898	tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
				1899	except KeyError:
				1900	pass
				1901
				1902	if type in (CHRTYPE, BLKTYPE):
				1903	if hasattr(os, "major") and hasattr(os, "minor"):
				1904	tarinfo.devmajor = os.major(statres.st_rdev)
				1905	tarinfo.devminor = os.minor(statres.st_rdev)
				1906	return tarinfo
				1907
				1908	def list(self, verbose=True, *, members=None):
				1909	"""Print a table of contents to sys.stdout. If `verbose' is False, only
				1910	the names of the members are printed. If it is True, an `ls -l'-like
				1911	output is produced. `members' is optional and must be a subset of the
				1912	list returned by getmembers().
				1913	"""
				1914	self._check()
				1915
				1916	if members is None:
				1917	members = self
				1918	for tarinfo in members:
				1919	if verbose:
				1920	_safe_print(stat.filemode(tarinfo.mode))
				1921	_safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
				1922	tarinfo.gname or tarinfo.gid))
				1923	if tarinfo.ischr() or tarinfo.isblk():
				1924	_safe_print("%10s" %
				1925	("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
				1926	else:
				1927	_safe_print("%10d" % tarinfo.size)
				1928	_safe_print("%d-%02d-%02d %02d:%02d:%02d" \
				1929	% time.localtime(tarinfo.mtime)[:6])
				1930
				1931	_safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
				1932
				1933	if verbose:
				1934	if tarinfo.issym():
				1935	_safe_print("-> " + tarinfo.linkname)
				1936	if tarinfo.islnk():
				1937	_safe_print("link to " + tarinfo.linkname)
				1938	print()
				1939
				1940	def add(self, name, arcname=None, recursive=True, *, filter=None):
				1941	"""Add the file `name' to the archive. `name' may be any type of file
				1942	(directory, fifo, symbolic link, etc.). If given, `arcname'
				1943	specifies an alternative name for the file in the archive.
				1944	Directories are added recursively by default. This can be avoided by
				1945	setting `recursive' to False. `filter' is a function
				1946	that expects a TarInfo object argument and returns the changed
				1947	TarInfo object, if it returns None the TarInfo object will be
				1948	excluded from the archive.
				1949	"""
				1950	self._check("awx")
				1951
				1952	if arcname is None:
				1953	arcname = name
				1954
				1955	# Skip if somebody tries to archive the archive...
				1956	if self.name is not None and os.path.abspath(name) == self.name:
				1957	self._dbg(2, "tarfile: Skipped %r" % name)
				1958	return
				1959
				1960	self._dbg(1, name)
				1961
				1962	# Create a TarInfo object from the file.
				1963	tarinfo = self.gettarinfo(name, arcname)
				1964
				1965	if tarinfo is None:
				1966	self._dbg(1, "tarfile: Unsupported type %r" % name)
				1967	return
				1968
				1969	# Change or exclude the TarInfo object.
				1970	if filter is not None:
				1971	tarinfo = filter(tarinfo)
				1972	if tarinfo is None:
				1973	self._dbg(2, "tarfile: Excluded %r" % name)
				1974	return
				1975
				1976	# Append the tar header and data to the archive.
				1977	if tarinfo.isreg():
				1978	with bltn_open(name, "rb") as f:
				1979	self.addfile(tarinfo, f)
				1980
				1981	elif tarinfo.isdir():
				1982	self.addfile(tarinfo)
				1983	if recursive:
				1984	for f in sorted(os.listdir(name)):
				1985	self.add(os.path.join(name, f), os.path.join(arcname, f),
				1986	recursive, filter=filter)
				1987
				1988	else:
				1989	self.addfile(tarinfo)
				1990
				1991	def addfile(self, tarinfo, fileobj=None):
				1992	"""Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
				1993	given, it should be a binary file, and tarinfo.size bytes are read
				1994	from it and added to the archive. You can create TarInfo objects
				1995	directly, or by using gettarinfo().
				1996	"""
				1997	self._check("awx")
				1998
				1999	tarinfo = copy.copy(tarinfo)
				2000
				2001	buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
				2002	self.fileobj.write(buf)
				2003	self.offset += len(buf)
				2004	bufsize=self.copybufsize
				2005	# If there's data to follow, append it.
				2006	if fileobj is not None:
				2007	copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
				2008	blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
				2009	if remainder > 0:
				2010	self.fileobj.write(NUL * (BLOCKSIZE - remainder))
				2011	blocks += 1
				2012	self.offset += blocks * BLOCKSIZE
				2013
				2014	self.members.append(tarinfo)
				2015
				2016	def extractall(self, path=".", members=None, *, numeric_owner=False):
				2017	"""Extract all members from the archive to the current working
				2018	directory and set owner, modification time and permissions on
				2019	directories afterwards. `path' specifies a different directory
				2020	to extract to. `members' is optional and must be a subset of the
				2021	list returned by getmembers(). If `numeric_owner` is True, only
				2022	the numbers for user/group names are used and not the names.
				2023	"""
				2024	directories = []
				2025
				2026	if members is None:
				2027	members = self
				2028
				2029	for tarinfo in members:
				2030	if tarinfo.isdir():
				2031	# Extract directories with a safe mode.
				2032	directories.append(tarinfo)
				2033	tarinfo = copy.copy(tarinfo)
				2034	tarinfo.mode = 0o700
				2035	# Do not set_attrs directories, as we will do that further down
				2036	self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
				2037	numeric_owner=numeric_owner)
				2038
				2039	# Reverse sort directories.
				2040	directories.sort(key=lambda a: a.name)
				2041	directories.reverse()
				2042
				2043	# Set correct owner, mtime and filemode on directories.
				2044	for tarinfo in directories:
				2045	dirpath = os.path.join(path, tarinfo.name)
				2046	try:
				2047	self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
				2048	self.utime(tarinfo, dirpath)
				2049	self.chmod(tarinfo, dirpath)
				2050	except ExtractError as e:
				2051	if self.errorlevel > 1:
				2052	raise
				2053	else:
				2054	self._dbg(1, "tarfile: %s" % e)
				2055
				2056	def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
				2057	"""Extract a member from the archive to the current working directory,
				2058	using its full name. Its file information is extracted as accurately
				2059	as possible. `member' may be a filename or a TarInfo object. You can
				2060	specify a different directory using `path'. File attributes (owner,
				2061	mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
				2062	is True, only the numbers for user/group names are used and not
				2063	the names.
				2064	"""
				2065	self._check("r")
				2066
				2067	if isinstance(member, str):
				2068	tarinfo = self.getmember(member)
				2069	else:
				2070	tarinfo = member
				2071
				2072	# Prepare the link target for makelink().
				2073	if tarinfo.islnk():
				2074	tarinfo._link_target = os.path.join(path, tarinfo.linkname)
				2075
				2076	try:
				2077	self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
				2078	set_attrs=set_attrs,
				2079	numeric_owner=numeric_owner)
				2080	except OSError as e:
				2081	if self.errorlevel > 0:
				2082	raise
				2083	else:
				2084	if e.filename is None:
				2085	self._dbg(1, "tarfile: %s" % e.strerror)
				2086	else:
				2087	self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
				2088	except ExtractError as e:
				2089	if self.errorlevel > 1:
				2090	raise
				2091	else:
				2092	self._dbg(1, "tarfile: %s" % e)
				2093
				2094	def extractfile(self, member):
				2095	"""Extract a member from the archive as a file object. `member' may be
				2096	a filename or a TarInfo object. If `member' is a regular file or
				2097	a link, an io.BufferedReader object is returned. For all other
				2098	existing members, None is returned. If `member' does not appear
				2099	in the archive, KeyError is raised.
				2100	"""
				2101	self._check("r")
				2102
				2103	if isinstance(member, str):
				2104	tarinfo = self.getmember(member)
				2105	else:
				2106	tarinfo = member
				2107
				2108	if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
				2109	# Members with unknown types are treated as regular files.
				2110	return self.fileobject(self, tarinfo)
				2111
				2112	elif tarinfo.islnk() or tarinfo.issym():
				2113	if isinstance(self.fileobj, _Stream):
				2114	# A small but ugly workaround for the case that someone tries
				2115	# to extract a (sym)link as a file-object from a non-seekable
				2116	# stream of tar blocks.
				2117	raise StreamError("cannot extract (sym)link as file object")
				2118	else:
				2119	# A (sym)link's file object is its target's file object.
				2120	return self.extractfile(self._find_link_target(tarinfo))
				2121	else:
				2122	# If there's no data associated with the member (directory, chrdev,
				2123	# blkdev, etc.), return None instead of a file object.
				2124	return None
				2125
				2126	def _extract_member(self, tarinfo, targetpath, set_attrs=True,
				2127	numeric_owner=False):
				2128	"""Extract the TarInfo object tarinfo to a physical
				2129	file called targetpath.
				2130	"""
				2131	# Fetch the TarInfo object for the given name
				2132	# and build the destination pathname, replacing
				2133	# forward slashes to platform specific separators.
				2134	targetpath = targetpath.rstrip("/")
				2135	targetpath = targetpath.replace("/", os.sep)
				2136
				2137	# Create all upper directories.
				2138	upperdirs = os.path.dirname(targetpath)
				2139	if upperdirs and not os.path.exists(upperdirs):
				2140	# Create directories that are not part of the archive with
				2141	# default permissions.
				2142	os.makedirs(upperdirs)
				2143
				2144	if tarinfo.islnk() or tarinfo.issym():
				2145	self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
				2146	else:
				2147	self._dbg(1, tarinfo.name)
				2148
				2149	if tarinfo.isreg():
				2150	self.makefile(tarinfo, targetpath)
				2151	elif tarinfo.isdir():
				2152	self.makedir(tarinfo, targetpath)
				2153	elif tarinfo.isfifo():
				2154	self.makefifo(tarinfo, targetpath)
				2155	elif tarinfo.ischr() or tarinfo.isblk():
				2156	self.makedev(tarinfo, targetpath)
				2157	elif tarinfo.islnk() or tarinfo.issym():
				2158	self.makelink(tarinfo, targetpath)
				2159	elif tarinfo.type not in SUPPORTED_TYPES:
				2160	self.makeunknown(tarinfo, targetpath)
				2161	else:
				2162	self.makefile(tarinfo, targetpath)
				2163
				2164	if set_attrs:
				2165	self.chown(tarinfo, targetpath, numeric_owner)
				2166	if not tarinfo.issym():
				2167	self.chmod(tarinfo, targetpath)
				2168	self.utime(tarinfo, targetpath)
				2169
				2170	#--------------------------------------------------------------------------
				2171	# Below are the different file methods. They are called via
				2172	# _extract_member() when extract() is called. They can be replaced in a
				2173	# subclass to implement other functionality.
				2174
				2175	def makedir(self, tarinfo, targetpath):
				2176	"""Make a directory called targetpath.
				2177	"""
				2178	try:
				2179	# Use a safe mode for the directory, the real mode is set
				2180	# later in _extract_member().
				2181	os.mkdir(targetpath, 0o700)
				2182	except FileExistsError:
				2183	pass
				2184
				2185	def makefile(self, tarinfo, targetpath):
				2186	"""Make a file called targetpath.
				2187	"""
				2188	source = self.fileobj
				2189	source.seek(tarinfo.offset_data)
				2190	bufsize = self.copybufsize
				2191	with bltn_open(targetpath, "wb") as target:
				2192	if tarinfo.sparse is not None:
				2193	for offset, size in tarinfo.sparse:
				2194	target.seek(offset)
				2195	copyfileobj(source, target, size, ReadError, bufsize)
				2196	target.seek(tarinfo.size)
				2197	target.truncate()
				2198	else:
				2199	copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
				2200
				2201	def makeunknown(self, tarinfo, targetpath):
				2202	"""Make a file from a TarInfo object with an unknown type
				2203	at targetpath.
				2204	"""
				2205	self.makefile(tarinfo, targetpath)
				2206	self._dbg(1, "tarfile: Unknown file type %r, " \
				2207	"extracted as regular file." % tarinfo.type)
				2208
				2209	def makefifo(self, tarinfo, targetpath):
				2210	"""Make a fifo called targetpath.
				2211	"""
				2212	if hasattr(os, "mkfifo"):
				2213	os.mkfifo(targetpath)
				2214	else:
				2215	raise ExtractError("fifo not supported by system")
				2216
				2217	def makedev(self, tarinfo, targetpath):
				2218	"""Make a character or block device called targetpath.
				2219	"""
				2220	if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
				2221	raise ExtractError("special devices not supported by system")
				2222
				2223	mode = tarinfo.mode
				2224	if tarinfo.isblk():
				2225	mode \|= stat.S_IFBLK
				2226	else:
				2227	mode \|= stat.S_IFCHR
				2228
				2229	os.mknod(targetpath, mode,
				2230	os.makedev(tarinfo.devmajor, tarinfo.devminor))
				2231
				2232	def makelink(self, tarinfo, targetpath):
				2233	"""Make a (symbolic) link called targetpath. If it cannot be created
				2234	(platform limitation), we try to make a copy of the referenced file
				2235	instead of a link.
				2236	"""
				2237	try:
				2238	# For systems that support symbolic and hard links.
				2239	if tarinfo.issym():
				2240	if os.path.lexists(targetpath):
				2241	# Avoid FileExistsError on following os.symlink.
				2242	os.unlink(targetpath)
				2243	os.symlink(tarinfo.linkname, targetpath)
				2244	else:
				2245	# See extract().
				2246	if os.path.exists(tarinfo._link_target):
				2247	os.link(tarinfo._link_target, targetpath)
				2248	else:
				2249	self._extract_member(self._find_link_target(tarinfo),
				2250	targetpath)
				2251	except symlink_exception:
				2252	try:
				2253	self._extract_member(self._find_link_target(tarinfo),
				2254	targetpath)
				2255	except KeyError:
				2256	raise ExtractError("unable to resolve link inside archive")
				2257
				2258	def chown(self, tarinfo, targetpath, numeric_owner):
				2259	"""Set owner of targetpath according to tarinfo. If numeric_owner
				2260	is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
				2261	is False, fall back to .gid/.uid when the search based on name
				2262	fails.
				2263	"""
				2264	if hasattr(os, "geteuid") and os.geteuid() == 0:
				2265	# We have to be root to do so.
				2266	g = tarinfo.gid
				2267	u = tarinfo.uid
				2268	if not numeric_owner:
				2269	try:
				2270	if grp:
				2271	g = grp.getgrnam(tarinfo.gname)[2]
				2272	except KeyError:
				2273	pass
				2274	try:
				2275	if pwd:
				2276	u = pwd.getpwnam(tarinfo.uname)[2]
				2277	except KeyError:
				2278	pass
				2279	try:
				2280	if tarinfo.issym() and hasattr(os, "lchown"):
				2281	os.lchown(targetpath, u, g)
				2282	else:
				2283	os.chown(targetpath, u, g)
				2284	except OSError:
				2285	raise ExtractError("could not change owner")
				2286
				2287	def chmod(self, tarinfo, targetpath):
				2288	"""Set file permissions of targetpath according to tarinfo.
				2289	"""
				2290	try:
				2291	os.chmod(targetpath, tarinfo.mode)
				2292	except OSError:
				2293	raise ExtractError("could not change mode")
				2294
				2295	def utime(self, tarinfo, targetpath):
				2296	"""Set modification time of targetpath according to tarinfo.
				2297	"""
				2298	if not hasattr(os, 'utime'):
				2299	return
				2300	try:
				2301	os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
				2302	except OSError:
				2303	raise ExtractError("could not change modification time")
				2304
				2305	#--------------------------------------------------------------------------
				2306	def next(self):
				2307	"""Return the next member of the archive as a TarInfo object, when
				2308	TarFile is opened for reading. Return None if there is no more
				2309	available.
				2310	"""
				2311	self._check("ra")
				2312	if self.firstmember is not None:
				2313	m = self.firstmember
				2314	self.firstmember = None
				2315	return m
				2316
				2317	# Advance the file pointer.
				2318	if self.offset != self.fileobj.tell():
				2319	self.fileobj.seek(self.offset - 1)
				2320	if not self.fileobj.read(1):
				2321	raise ReadError("unexpected end of data")
				2322
				2323	# Read the next block.
				2324	tarinfo = None
				2325	while True:
				2326	try:
				2327	tarinfo = self.tarinfo.fromtarfile(self)
				2328	except EOFHeaderError as e:
				2329	if self.ignore_zeros:
				2330	self._dbg(2, "0x%X: %s" % (self.offset, e))
				2331	self.offset += BLOCKSIZE
				2332	continue
				2333	except InvalidHeaderError as e:
				2334	if self.ignore_zeros:
				2335	self._dbg(2, "0x%X: %s" % (self.offset, e))
				2336	self.offset += BLOCKSIZE
				2337	continue
				2338	elif self.offset == 0:
				2339	raise ReadError(str(e))
				2340	except EmptyHeaderError:
				2341	if self.offset == 0:
				2342	raise ReadError("empty file")
				2343	except TruncatedHeaderError as e:
				2344	if self.offset == 0:
				2345	raise ReadError(str(e))
				2346	except SubsequentHeaderError as e:
				2347	raise ReadError(str(e))
				2348	break
				2349
				2350	if tarinfo is not None:
				2351	self.members.append(tarinfo)
				2352	else:
				2353	self._loaded = True
				2354
				2355	return tarinfo
				2356
				2357	#--------------------------------------------------------------------------
				2358	# Little helper methods:
				2359
				2360	def _getmember(self, name, tarinfo=None, normalize=False):
				2361	"""Find an archive member by name from bottom to top.
				2362	If tarinfo is given, it is used as the starting point.
				2363	"""
				2364	# Ensure that all members have been loaded.
				2365	members = self.getmembers()
				2366
				2367	# Limit the member search list up to tarinfo.
				2368	if tarinfo is not None:
				2369	members = members[:members.index(tarinfo)]
				2370
				2371	if normalize:
				2372	name = os.path.normpath(name)
				2373
				2374	for member in reversed(members):
				2375	if normalize:
				2376	member_name = os.path.normpath(member.name)
				2377	else:
				2378	member_name = member.name
				2379
				2380	if name == member_name:
				2381	return member
				2382
				2383	def _load(self):
				2384	"""Read through the entire archive file and look for readable
				2385	members.
				2386	"""
				2387	while True:
				2388	tarinfo = self.next()
				2389	if tarinfo is None:
				2390	break
				2391	self._loaded = True
				2392
				2393	def _check(self, mode=None):
				2394	"""Check if TarFile is still open, and if the operation's mode
				2395	corresponds to TarFile's mode.
				2396	"""
				2397	if self.closed:
				2398	raise OSError("%s is closed" % self.__class__.__name__)
				2399	if mode is not None and self.mode not in mode:
				2400	raise OSError("bad operation for mode %r" % self.mode)
				2401
				2402	def _find_link_target(self, tarinfo):
				2403	"""Find the target member of a symlink or hardlink member in the
				2404	archive.
				2405	"""
				2406	if tarinfo.issym():
				2407	# Always search the entire archive.
				2408	linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
				2409	limit = None
				2410	else:
				2411	# Search the archive before the link, because a hard link is
				2412	# just a reference to an already archived file.
				2413	linkname = tarinfo.linkname
				2414	limit = tarinfo
				2415
				2416	member = self._getmember(linkname, tarinfo=limit, normalize=True)
				2417	if member is None:
				2418	raise KeyError("linkname %r not found" % linkname)
				2419	return member
				2420
				2421	def __iter__(self):
				2422	"""Provide an iterator object.
				2423	"""
				2424	if self._loaded:
				2425	yield from self.members
				2426	return
				2427
				2428	# Yield items using TarFile's next() method.
				2429	# When all members have been read, set TarFile as _loaded.
				2430	index = 0
				2431	# Fix for SF #1100429: Under rare circumstances it can
				2432	# happen that getmembers() is called during iteration,
				2433	# which will have already exhausted the next() method.
				2434	if self.firstmember is not None:
				2435	tarinfo = self.next()
				2436	index += 1
				2437	yield tarinfo
				2438
				2439	while True:
				2440	if index < len(self.members):
				2441	tarinfo = self.members[index]
				2442	elif not self._loaded:
				2443	tarinfo = self.next()
				2444	if not tarinfo:
				2445	self._loaded = True
				2446	return
				2447	else:
				2448	return
				2449	index += 1
				2450	yield tarinfo
				2451
				2452	def _dbg(self, level, msg):
				2453	"""Write debugging output to sys.stderr.
				2454	"""
				2455	if level <= self.debug:
				2456	print(msg, file=sys.stderr)
				2457
				2458	def __enter__(self):
				2459	self._check()
				2460	return self
				2461
				2462	def __exit__(self, type, value, traceback):
				2463	if type is None:
				2464	self.close()
				2465	else:
				2466	# An exception occurred. We must not call close() because
				2467	# it would try to write end-of-archive blocks and padding.
				2468	if not self._extfileobj:
				2469	self.fileobj.close()
				2470	self.closed = True
				2471
				2472	#--------------------
				2473	# exported functions
				2474	#--------------------
				2475	def is_tarfile(name):
				2476	"""Return True if name points to a tar archive that we
				2477	are able to handle, else return False.
				2478
				2479	'name' should be a string, file, or file-like object.
				2480	"""
				2481	try:
				2482	if hasattr(name, "read"):
				2483	t = open(fileobj=name)
				2484	else:
				2485	t = open(name)
				2486	t.close()
				2487	return True
				2488	except TarError:
				2489	return False
				2490
				2491	open = TarFile.open
				2492
				2493
				2494	def main():
				2495	import argparse
				2496
				2497	description = 'A simple command-line interface for tarfile module.'
				2498	parser = argparse.ArgumentParser(description=description)
				2499	parser.add_argument('-v', '--verbose', action='store_true', default=False,
				2500	help='Verbose output')
				2501	group = parser.add_mutually_exclusive_group(required=True)
				2502	group.add_argument('-l', '--list', metavar='<tarfile>',
				2503	help='Show listing of a tarfile')
				2504	group.add_argument('-e', '--extract', nargs='+',
				2505	metavar=('<tarfile>', '<output_dir>'),
				2506	help='Extract tarfile into target dir')
				2507	group.add_argument('-c', '--create', nargs='+',
				2508	metavar=('<name>', '<file>'),
				2509	help='Create tarfile from sources')
				2510	group.add_argument('-t', '--test', metavar='<tarfile>',
				2511	help='Test if a tarfile is valid')
				2512	args = parser.parse_args()
				2513
				2514	if args.test is not None:
				2515	src = args.test
				2516	if is_tarfile(src):
				2517	with open(src, 'r') as tar:
				2518	tar.getmembers()
				2519	print(tar.getmembers(), file=sys.stderr)
				2520	if args.verbose:
				2521	print('{!r} is a tar archive.'.format(src))
				2522	else:
				2523	parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
				2524
				2525	elif args.list is not None:
				2526	src = args.list
				2527	if is_tarfile(src):
				2528	with TarFile.open(src, 'r:*') as tf:
				2529	tf.list(verbose=args.verbose)
				2530	else:
				2531	parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
				2532
				2533	elif args.extract is not None:
				2534	if len(args.extract) == 1:
				2535	src = args.extract[0]
				2536	curdir = os.curdir
				2537	elif len(args.extract) == 2:
				2538	src, curdir = args.extract
				2539	else:
				2540	parser.exit(1, parser.format_help())
				2541
				2542	if is_tarfile(src):
				2543	with TarFile.open(src, 'r:*') as tf:
				2544	tf.extractall(path=curdir)
				2545	if args.verbose:
				2546	if curdir == '.':
				2547	msg = '{!r} file is extracted.'.format(src)
				2548	else:
				2549	msg = ('{!r} file is extracted '
				2550	'into {!r} directory.').format(src, curdir)
				2551	print(msg)
				2552	else:
				2553	parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
				2554
				2555	elif args.create is not None:
				2556	tar_name = args.create.pop(0)
				2557	_, ext = os.path.splitext(tar_name)
				2558	compressions = {
				2559	# gz
				2560	'.gz': 'gz',
				2561	'.tgz': 'gz',
				2562	# xz
				2563	'.xz': 'xz',
				2564	'.txz': 'xz',
				2565	# bz2
				2566	'.bz2': 'bz2',
				2567	'.tbz': 'bz2',
				2568	'.tbz2': 'bz2',
				2569	'.tb2': 'bz2',
				2570	}
				2571	tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
				2572	tar_files = args.create
				2573
				2574	with TarFile.open(tar_name, tar_mode) as tf:
				2575	for file_name in tar_files:
				2576	tf.add(file_name)
				2577
				2578	if args.verbose:
				2579	print('{!r} file created.'.format(tar_name))
				2580
				2581	if __name__ == '__main__':
				2582	main()