Blame - linux-x64/clang/python3/lib/python3.9/base64.py - hafnium/prebuilts

blob: a28109f8a7f9c3667d5b5ee0c3e8e52bb374a8f9 [file] [log] [blame]

Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	1	#! /usr/bin/env python3
				2
				3	"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
				4
				5	# Modified 04-Oct-1995 by Jack Jansen to use binascii module
				6	# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
				7	# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
				8
				9	import re
				10	import struct
				11	import binascii
				12
				13
				14	__all__ = [
				15	# Legacy interface exports traditional RFC 2045 Base64 encodings
				16	'encode', 'decode', 'encodebytes', 'decodebytes',
				17	# Generalized interface for other encodings
				18	'b64encode', 'b64decode', 'b32encode', 'b32decode',
				19	'b16encode', 'b16decode',
				20	# Base85 and Ascii85 encodings
				21	'b85encode', 'b85decode', 'a85encode', 'a85decode',
				22	# Standard Base64 encoding
				23	'standard_b64encode', 'standard_b64decode',
				24	# Some common Base64 alternatives. As referenced by RFC 3458, see thread
				25	# starting at:
				26	#
				27	# http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
				28	'urlsafe_b64encode', 'urlsafe_b64decode',
				29	]
				30
				31
				32	bytes_types = (bytes, bytearray) # Types acceptable as binary data
				33
				34	def _bytes_from_decode_data(s):
				35	if isinstance(s, str):
				36	try:
				37	return s.encode('ascii')
				38	except UnicodeEncodeError:
				39	raise ValueError('string argument should contain only ASCII characters')
				40	if isinstance(s, bytes_types):
				41	return s
				42	try:
				43	return memoryview(s).tobytes()
				44	except TypeError:
				45	raise TypeError("argument should be a bytes-like object or ASCII "
				46	"string, not %r" % s.__class__.__name__) from None
				47
				48
				49	# Base64 encoding/decoding uses binascii
				50
				51	def b64encode(s, altchars=None):
				52	"""Encode the bytes-like object s using Base64 and return a bytes object.
				53
				54	Optional altchars should be a byte string of length 2 which specifies an
				55	alternative alphabet for the '+' and '/' characters. This allows an
				56	application to e.g. generate url or filesystem safe Base64 strings.
				57	"""
				58	encoded = binascii.b2a_base64(s, newline=False)
				59	if altchars is not None:
				60	assert len(altchars) == 2, repr(altchars)
				61	return encoded.translate(bytes.maketrans(b'+/', altchars))
				62	return encoded
				63
				64
				65	def b64decode(s, altchars=None, validate=False):
				66	"""Decode the Base64 encoded bytes-like object or ASCII string s.
				67
				68	Optional altchars must be a bytes-like object or ASCII string of length 2
				69	which specifies the alternative alphabet used instead of the '+' and '/'
				70	characters.
				71
				72	The result is returned as a bytes object. A binascii.Error is raised if
				73	s is incorrectly padded.
				74
				75	If validate is False (the default), characters that are neither in the
				76	normal base-64 alphabet nor the alternative alphabet are discarded prior
				77	to the padding check. If validate is True, these non-alphabet characters
				78	in the input result in a binascii.Error.
				79	"""
				80	s = _bytes_from_decode_data(s)
				81	if altchars is not None:
				82	altchars = _bytes_from_decode_data(altchars)
				83	assert len(altchars) == 2, repr(altchars)
				84	s = s.translate(bytes.maketrans(altchars, b'+/'))
				85	if validate and not re.fullmatch(b'[A-Za-z0-9+/]*={0,2}', s):
				86	raise binascii.Error('Non-base64 digit found')
				87	return binascii.a2b_base64(s)
				88
				89
				90	def standard_b64encode(s):
				91	"""Encode bytes-like object s using the standard Base64 alphabet.
				92
				93	The result is returned as a bytes object.
				94	"""
				95	return b64encode(s)
				96
				97	def standard_b64decode(s):
				98	"""Decode bytes encoded with the standard Base64 alphabet.
				99
				100	Argument s is a bytes-like object or ASCII string to decode. The result
				101	is returned as a bytes object. A binascii.Error is raised if the input
				102	is incorrectly padded. Characters that are not in the standard alphabet
				103	are discarded prior to the padding check.
				104	"""
				105	return b64decode(s)
				106
				107
				108	_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
				109	_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
				110
				111	def urlsafe_b64encode(s):
				112	"""Encode bytes using the URL- and filesystem-safe Base64 alphabet.
				113
				114	Argument s is a bytes-like object to encode. The result is returned as a
				115	bytes object. The alphabet uses '-' instead of '+' and '_' instead of
				116	'/'.
				117	"""
				118	return b64encode(s).translate(_urlsafe_encode_translation)
				119
				120	def urlsafe_b64decode(s):
				121	"""Decode bytes using the URL- and filesystem-safe Base64 alphabet.
				122
				123	Argument s is a bytes-like object or ASCII string to decode. The result
				124	is returned as a bytes object. A binascii.Error is raised if the input
				125	is incorrectly padded. Characters that are not in the URL-safe base-64
				126	alphabet, and are not a plus '+' or slash '/', are discarded prior to the
				127	padding check.
				128
				129	The alphabet uses '-' instead of '+' and '_' instead of '/'.
				130	"""
				131	s = _bytes_from_decode_data(s)
				132	s = s.translate(_urlsafe_decode_translation)
				133	return b64decode(s)
				134
				135
				136
				137	# Base32 encoding/decoding must be done in Python
				138	_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
				139	_b32tab2 = None
				140	_b32rev = None
				141
				142	def b32encode(s):
				143	"""Encode the bytes-like object s using Base32 and return a bytes object.
				144	"""
				145	global _b32tab2
				146	# Delay the initialization of the table to not waste memory
				147	# if the function is never called
				148	if _b32tab2 is None:
				149	b32tab = [bytes((i,)) for i in _b32alphabet]
				150	_b32tab2 = [a + b for a in b32tab for b in b32tab]
				151	b32tab = None
				152
				153	if not isinstance(s, bytes_types):
				154	s = memoryview(s).tobytes()
				155	leftover = len(s) % 5
				156	# Pad the last quantum with zero bits if necessary
				157	if leftover:
				158	s = s + b'\0' * (5 - leftover) # Don't use += !
				159	encoded = bytearray()
				160	from_bytes = int.from_bytes
				161	b32tab2 = _b32tab2
				162	for i in range(0, len(s), 5):
				163	c = from_bytes(s[i: i + 5], 'big')
				164	encoded += (b32tab2[c >> 30] + # bits 1 - 10
				165	b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
				166	b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
				167	b32tab2[c & 0x3ff] # bits 31 - 40
				168	)
				169	# Adjust for any leftover partial quanta
				170	if leftover == 1:
				171	encoded[-6:] = b'======'
				172	elif leftover == 2:
				173	encoded[-4:] = b'===='
				174	elif leftover == 3:
				175	encoded[-3:] = b'==='
				176	elif leftover == 4:
				177	encoded[-1:] = b'='
				178	return bytes(encoded)
				179
				180	def b32decode(s, casefold=False, map01=None):
				181	"""Decode the Base32 encoded bytes-like object or ASCII string s.
				182
				183	Optional casefold is a flag specifying whether a lowercase alphabet is
				184	acceptable as input. For security purposes, the default is False.
				185
				186	RFC 3548 allows for optional mapping of the digit 0 (zero) to the
				187	letter O (oh), and for optional mapping of the digit 1 (one) to
				188	either the letter I (eye) or letter L (el). The optional argument
				189	map01 when not None, specifies which letter the digit 1 should be
				190	mapped to (when map01 is not None, the digit 0 is always mapped to
				191	the letter O). For security purposes the default is None, so that
				192	0 and 1 are not allowed in the input.
				193
				194	The result is returned as a bytes object. A binascii.Error is raised if
				195	the input is incorrectly padded or if there are non-alphabet
				196	characters present in the input.
				197	"""
				198	global _b32rev
				199	# Delay the initialization of the table to not waste memory
				200	# if the function is never called
				201	if _b32rev is None:
				202	_b32rev = {v: k for k, v in enumerate(_b32alphabet)}
				203	s = _bytes_from_decode_data(s)
				204	if len(s) % 8:
				205	raise binascii.Error('Incorrect padding')
				206	# Handle section 2.4 zero and one mapping. The flag map01 will be either
				207	# False, or the character to map the digit 1 (one) to. It should be
				208	# either L (el) or I (eye).
				209	if map01 is not None:
				210	map01 = _bytes_from_decode_data(map01)
				211	assert len(map01) == 1, repr(map01)
				212	s = s.translate(bytes.maketrans(b'01', b'O' + map01))
				213	if casefold:
				214	s = s.upper()
				215	# Strip off pad characters from the right. We need to count the pad
				216	# characters because this will tell us how many null bytes to remove from
				217	# the end of the decoded string.
				218	l = len(s)
				219	s = s.rstrip(b'=')
				220	padchars = l - len(s)
				221	# Now decode the full quanta
				222	decoded = bytearray()
				223	b32rev = _b32rev
				224	for i in range(0, len(s), 8):
				225	quanta = s[i: i + 8]
				226	acc = 0
				227	try:
				228	for c in quanta:
				229	acc = (acc << 5) + b32rev[c]
				230	except KeyError:
				231	raise binascii.Error('Non-base32 digit found') from None
				232	decoded += acc.to_bytes(5, 'big')
				233	# Process the last, partial quanta
				234	if l % 8 or padchars not in {0, 1, 3, 4, 6}:
				235	raise binascii.Error('Incorrect padding')
				236	if padchars and decoded:
				237	acc <<= 5 * padchars
				238	last = acc.to_bytes(5, 'big')
				239	leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1
				240	decoded[-5:] = last[:leftover]
				241	return bytes(decoded)
				242
				243
				244	# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
				245	# lowercase. The RFC also recommends against accepting input case
				246	# insensitively.
				247	def b16encode(s):
				248	"""Encode the bytes-like object s using Base16 and return a bytes object.
				249	"""
				250	return binascii.hexlify(s).upper()
				251
				252
				253	def b16decode(s, casefold=False):
				254	"""Decode the Base16 encoded bytes-like object or ASCII string s.
				255
				256	Optional casefold is a flag specifying whether a lowercase alphabet is
				257	acceptable as input. For security purposes, the default is False.
				258
				259	The result is returned as a bytes object. A binascii.Error is raised if
				260	s is incorrectly padded or if there are non-alphabet characters present
				261	in the input.
				262	"""
				263	s = _bytes_from_decode_data(s)
				264	if casefold:
				265	s = s.upper()
				266	if re.search(b'[^0-9A-F]', s):
				267	raise binascii.Error('Non-base16 digit found')
				268	return binascii.unhexlify(s)
				269
				270	#
				271	# Ascii85 encoding/decoding
				272	#
				273
				274	_a85chars = None
				275	_a85chars2 = None
				276	_A85START = b"<~"
				277	_A85END = b"~>"
				278
				279	def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
				280	# Helper function for a85encode and b85encode
				281	if not isinstance(b, bytes_types):
				282	b = memoryview(b).tobytes()
				283
				284	padding = (-len(b)) % 4
				285	if padding:
				286	b = b + b'\0' * padding
				287	words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
				288
				289	chunks = [b'z' if foldnuls and not word else
				290	b'y' if foldspaces and word == 0x20202020 else
				291	(chars2[word // 614125] +
				292	chars2[word // 85 % 7225] +
				293	chars[word % 85])
				294	for word in words]
				295
				296	if padding and not pad:
				297	if chunks[-1] == b'z':
				298	chunks[-1] = chars[0] * 5
				299	chunks[-1] = chunks[-1][:-padding]
				300
				301	return b''.join(chunks)
				302
				303	def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
				304	"""Encode bytes-like object b using Ascii85 and return a bytes object.
				305
				306	foldspaces is an optional flag that uses the special short sequence 'y'
				307	instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
				308	feature is not supported by the "standard" Adobe encoding.
				309
				310	wrapcol controls whether the output should have newline (b'\\n') characters
				311	added to it. If this is non-zero, each output line will be at most this
				312	many characters long.
				313
				314	pad controls whether the input is padded to a multiple of 4 before
				315	encoding. Note that the btoa implementation always pads.
				316
				317	adobe controls whether the encoded byte sequence is framed with <~ and ~>,
				318	which is used by the Adobe implementation.
				319	"""
				320	global _a85chars, _a85chars2
				321	# Delay the initialization of tables to not waste memory
				322	# if the function is never called
				323	if _a85chars is None:
				324	_a85chars = [bytes((i,)) for i in range(33, 118)]
				325	_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
				326
				327	result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
				328
				329	if adobe:
				330	result = _A85START + result
				331	if wrapcol:
				332	wrapcol = max(2 if adobe else 1, wrapcol)
				333	chunks = [result[i: i + wrapcol]
				334	for i in range(0, len(result), wrapcol)]
				335	if adobe:
				336	if len(chunks[-1]) + 2 > wrapcol:
				337	chunks.append(b'')
				338	result = b'\n'.join(chunks)
				339	if adobe:
				340	result += _A85END
				341
				342	return result
				343
				344	def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
				345	"""Decode the Ascii85 encoded bytes-like object or ASCII string b.
				346
				347	foldspaces is a flag that specifies whether the 'y' short sequence should be
				348	accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
				349	not supported by the "standard" Adobe encoding.
				350
				351	adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
				352	is framed with <~ and ~>).
				353
				354	ignorechars should be a byte string containing characters to ignore from the
				355	input. This should only contain whitespace characters, and by default
				356	contains all whitespace characters in ASCII.
				357
				358	The result is returned as a bytes object.
				359	"""
				360	b = _bytes_from_decode_data(b)
				361	if adobe:
				362	if not b.endswith(_A85END):
				363	raise ValueError(
				364	"Ascii85 encoded byte sequences must end "
				365	"with {!r}".format(_A85END)
				366	)
				367	if b.startswith(_A85START):
				368	b = b[2:-2] # Strip off start/end markers
				369	else:
				370	b = b[:-2]
				371	#
				372	# We have to go through this stepwise, so as to ignore spaces and handle
				373	# special short sequences
				374	#
				375	packI = struct.Struct('!I').pack
				376	decoded = []
				377	decoded_append = decoded.append
				378	curr = []
				379	curr_append = curr.append
				380	curr_clear = curr.clear
				381	for x in b + b'u' * 4:
				382	if b'!'[0] <= x <= b'u'[0]:
				383	curr_append(x)
				384	if len(curr) == 5:
				385	acc = 0
				386	for x in curr:
				387	acc = 85 * acc + (x - 33)
				388	try:
				389	decoded_append(packI(acc))
				390	except struct.error:
				391	raise ValueError('Ascii85 overflow') from None
				392	curr_clear()
				393	elif x == b'z'[0]:
				394	if curr:
				395	raise ValueError('z inside Ascii85 5-tuple')
				396	decoded_append(b'\0\0\0\0')
				397	elif foldspaces and x == b'y'[0]:
				398	if curr:
				399	raise ValueError('y inside Ascii85 5-tuple')
				400	decoded_append(b'\x20\x20\x20\x20')
				401	elif x in ignorechars:
				402	# Skip whitespace
				403	continue
				404	else:
				405	raise ValueError('Non-Ascii85 digit found: %c' % x)
				406
				407	result = b''.join(decoded)
				408	padding = 4 - len(curr)
				409	if padding:
				410	# Throw away the extra padding
				411	result = result[:-padding]
				412	return result
				413
				414	# The following code is originally taken (with permission) from Mercurial
				415
				416	_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				417	b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{\|}~")
				418	_b85chars = None
				419	_b85chars2 = None
				420	_b85dec = None
				421
				422	def b85encode(b, pad=False):
				423	"""Encode bytes-like object b in base85 format and return a bytes object.
				424
				425	If pad is true, the input is padded with b'\\0' so its length is a multiple of
				426	4 bytes before encoding.
				427	"""
				428	global _b85chars, _b85chars2
				429	# Delay the initialization of tables to not waste memory
				430	# if the function is never called
				431	if _b85chars is None:
				432	_b85chars = [bytes((i,)) for i in _b85alphabet]
				433	_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
				434	return _85encode(b, _b85chars, _b85chars2, pad)
				435
				436	def b85decode(b):
				437	"""Decode the base85-encoded bytes-like object or ASCII string b
				438
				439	The result is returned as a bytes object.
				440	"""
				441	global _b85dec
				442	# Delay the initialization of tables to not waste memory
				443	# if the function is never called
				444	if _b85dec is None:
				445	_b85dec = [None] * 256
				446	for i, c in enumerate(_b85alphabet):
				447	_b85dec[c] = i
				448
				449	b = _bytes_from_decode_data(b)
				450	padding = (-len(b)) % 5
				451	b = b + b'~' * padding
				452	out = []
				453	packI = struct.Struct('!I').pack
				454	for i in range(0, len(b), 5):
				455	chunk = b[i:i + 5]
				456	acc = 0
				457	try:
				458	for c in chunk:
				459	acc = acc * 85 + _b85dec[c]
				460	except TypeError:
				461	for j, c in enumerate(chunk):
				462	if _b85dec[c] is None:
				463	raise ValueError('bad base85 character at position %d'
				464	% (i + j)) from None
				465	raise
				466	try:
				467	out.append(packI(acc))
				468	except struct.error:
				469	raise ValueError('base85 overflow in hunk starting at byte %d'
				470	% i) from None
				471
				472	result = b''.join(out)
				473	if padding:
				474	result = result[:-padding]
				475	return result
				476
				477	# Legacy interface. This code could be cleaned up since I don't believe
				478	# binascii has any line length limitations. It just doesn't seem worth it
				479	# though. The files should be opened in binary mode.
				480
				481	MAXLINESIZE = 76 # Excluding the CRLF
				482	MAXBINSIZE = (MAXLINESIZE//4)*3
				483
				484	def encode(input, output):
				485	"""Encode a file; input and output are binary files."""
				486	while True:
				487	s = input.read(MAXBINSIZE)
				488	if not s:
				489	break
				490	while len(s) < MAXBINSIZE:
				491	ns = input.read(MAXBINSIZE-len(s))
				492	if not ns:
				493	break
				494	s += ns
				495	line = binascii.b2a_base64(s)
				496	output.write(line)
				497
				498
				499	def decode(input, output):
				500	"""Decode a file; input and output are binary files."""
				501	while True:
				502	line = input.readline()
				503	if not line:
				504	break
				505	s = binascii.a2b_base64(line)
				506	output.write(s)
				507
				508	def _input_type_check(s):
				509	try:
				510	m = memoryview(s)
				511	except TypeError as err:
				512	msg = "expected bytes-like object, not %s" % s.__class__.__name__
				513	raise TypeError(msg) from err
				514	if m.format not in ('c', 'b', 'B'):
				515	msg = ("expected single byte elements, not %r from %s" %
				516	(m.format, s.__class__.__name__))
				517	raise TypeError(msg)
				518	if m.ndim != 1:
				519	msg = ("expected 1-D data, not %d-D data from %s" %
				520	(m.ndim, s.__class__.__name__))
				521	raise TypeError(msg)
				522
				523
				524	def encodebytes(s):
				525	"""Encode a bytestring into a bytes object containing multiple lines
				526	of base-64 data."""
				527	_input_type_check(s)
				528	pieces = []
				529	for i in range(0, len(s), MAXBINSIZE):
				530	chunk = s[i : i + MAXBINSIZE]
				531	pieces.append(binascii.b2a_base64(chunk))
				532	return b"".join(pieces)
				533
				534
				535	def decodebytes(s):
				536	"""Decode a bytestring of base-64 data into a bytes object."""
				537	_input_type_check(s)
				538	return binascii.a2b_base64(s)
				539
				540
				541	# Usable as a script...
				542	def main():
				543	"""Small main program"""
				544	import sys, getopt
				545	try:
				546	opts, args = getopt.getopt(sys.argv[1:], 'deut')
				547	except getopt.error as msg:
				548	sys.stdout = sys.stderr
				549	print(msg)
				550	print("""usage: %s [-d\|-e\|-u\|-t] [file\|-]
				551	-d, -u: decode
				552	-e: encode (default)
				553	-t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
				554	sys.exit(2)
				555	func = encode
				556	for o, a in opts:
				557	if o == '-e': func = encode
				558	if o == '-d': func = decode
				559	if o == '-u': func = decode
				560	if o == '-t': test(); return
				561	if args and args[0] != '-':
				562	with open(args[0], 'rb') as f:
				563	func(f, sys.stdout.buffer)
				564	else:
				565	func(sys.stdin.buffer, sys.stdout.buffer)
				566
				567
				568	def test():
				569	s0 = b"Aladdin:open sesame"
				570	print(repr(s0))
				571	s1 = encodebytes(s0)
				572	print(repr(s1))
				573	s2 = decodebytes(s1)
				574	print(repr(s2))
				575	assert s0 == s2
				576
				577
				578	if __name__ == '__main__':
				579	main()