Blame - linux-x64/clang/python3/lib/python3.9/shlex.py - hafnium/prebuilts

blob: 4801a6c1d47bd9e0a8ada16089221c8237b777d5 [file] [log] [blame]

Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	1	"""A lexical analyzer class for simple shell-like syntaxes."""
				2
				3	# Module and documentation by Eric S. Raymond, 21 Dec 1998
				4	# Input stacking and error message cleanup added by ESR, March 2000
				5	# push_source() and pop_source() made explicit by ESR, January 2001.
				6	# Posix compliance, split(), string arguments, and
				7	# iterator interface by Gustavo Niemeyer, April 2003.
				8	# changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
				9
				10	import os
				11	import re
				12	import sys
				13	from collections import deque
				14
				15	from io import StringIO
				16
				17	__all__ = ["shlex", "split", "quote", "join"]
				18
				19	class shlex:
				20	"A lexical analyzer class for simple shell-like syntaxes."
				21	def __init__(self, instream=None, infile=None, posix=False,
				22	punctuation_chars=False):
				23	if isinstance(instream, str):
				24	instream = StringIO(instream)
				25	if instream is not None:
				26	self.instream = instream
				27	self.infile = infile
				28	else:
				29	self.instream = sys.stdin
				30	self.infile = None
				31	self.posix = posix
				32	if posix:
				33	self.eof = None
				34	else:
				35	self.eof = ''
				36	self.commenters = '#'
				37	self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
				38	'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
				39	if self.posix:
				40	self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
				41	'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
				42	self.whitespace = ' \t\r\n'
				43	self.whitespace_split = False
				44	self.quotes = '\'"'
				45	self.escape = '\\'
				46	self.escapedquotes = '"'
				47	self.state = ' '
				48	self.pushback = deque()
				49	self.lineno = 1
				50	self.debug = 0
				51	self.token = ''
				52	self.filestack = deque()
				53	self.source = None
				54	if not punctuation_chars:
				55	punctuation_chars = ''
				56	elif punctuation_chars is True:
				57	punctuation_chars = '();<>\|&'
				58	self._punctuation_chars = punctuation_chars
				59	if punctuation_chars:
				60	# _pushback_chars is a push back queue used by lookahead logic
				61	self._pushback_chars = deque()
				62	# these chars added because allowed in file names, args, wildcards
				63	self.wordchars += '~-./*?='
				64	#remove any punctuation chars from wordchars
				65	t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars))
				66	self.wordchars = self.wordchars.translate(t)
				67
				68	@property
				69	def punctuation_chars(self):
				70	return self._punctuation_chars
				71
				72	def push_token(self, tok):
				73	"Push a token onto the stack popped by the get_token method"
				74	if self.debug >= 1:
				75	print("shlex: pushing token " + repr(tok))
				76	self.pushback.appendleft(tok)
				77
				78	def push_source(self, newstream, newfile=None):
				79	"Push an input source onto the lexer's input source stack."
				80	if isinstance(newstream, str):
				81	newstream = StringIO(newstream)
				82	self.filestack.appendleft((self.infile, self.instream, self.lineno))
				83	self.infile = newfile
				84	self.instream = newstream
				85	self.lineno = 1
				86	if self.debug:
				87	if newfile is not None:
				88	print('shlex: pushing to file %s' % (self.infile,))
				89	else:
				90	print('shlex: pushing to stream %s' % (self.instream,))
				91
				92	def pop_source(self):
				93	"Pop the input source stack."
				94	self.instream.close()
				95	(self.infile, self.instream, self.lineno) = self.filestack.popleft()
				96	if self.debug:
				97	print('shlex: popping to %s, line %d' \
				98	% (self.instream, self.lineno))
				99	self.state = ' '
				100
				101	def get_token(self):
				102	"Get a token from the input stream (or from stack if it's nonempty)"
				103	if self.pushback:
				104	tok = self.pushback.popleft()
				105	if self.debug >= 1:
				106	print("shlex: popping token " + repr(tok))
				107	return tok
				108	# No pushback. Get a token.
				109	raw = self.read_token()
				110	# Handle inclusions
				111	if self.source is not None:
				112	while raw == self.source:
				113	spec = self.sourcehook(self.read_token())
				114	if spec:
				115	(newfile, newstream) = spec
				116	self.push_source(newstream, newfile)
				117	raw = self.get_token()
				118	# Maybe we got EOF instead?
				119	while raw == self.eof:
				120	if not self.filestack:
				121	return self.eof
				122	else:
				123	self.pop_source()
				124	raw = self.get_token()
				125	# Neither inclusion nor EOF
				126	if self.debug >= 1:
				127	if raw != self.eof:
				128	print("shlex: token=" + repr(raw))
				129	else:
				130	print("shlex: token=EOF")
				131	return raw
				132
				133	def read_token(self):
				134	quoted = False
				135	escapedstate = ' '
				136	while True:
				137	if self.punctuation_chars and self._pushback_chars:
				138	nextchar = self._pushback_chars.pop()
				139	else:
				140	nextchar = self.instream.read(1)
				141	if nextchar == '\n':
				142	self.lineno += 1
				143	if self.debug >= 3:
				144	print("shlex: in state %r I see character: %r" % (self.state,
				145	nextchar))
				146	if self.state is None:
				147	self.token = '' # past end of file
				148	break
				149	elif self.state == ' ':
				150	if not nextchar:
				151	self.state = None # end of file
				152	break
				153	elif nextchar in self.whitespace:
				154	if self.debug >= 2:
				155	print("shlex: I see whitespace in whitespace state")
				156	if self.token or (self.posix and quoted):
				157	break # emit current token
				158	else:
				159	continue
				160	elif nextchar in self.commenters:
				161	self.instream.readline()
				162	self.lineno += 1
				163	elif self.posix and nextchar in self.escape:
				164	escapedstate = 'a'
				165	self.state = nextchar
				166	elif nextchar in self.wordchars:
				167	self.token = nextchar
				168	self.state = 'a'
				169	elif nextchar in self.punctuation_chars:
				170	self.token = nextchar
				171	self.state = 'c'
				172	elif nextchar in self.quotes:
				173	if not self.posix:
				174	self.token = nextchar
				175	self.state = nextchar
				176	elif self.whitespace_split:
				177	self.token = nextchar
				178	self.state = 'a'
				179	else:
				180	self.token = nextchar
				181	if self.token or (self.posix and quoted):
				182	break # emit current token
				183	else:
				184	continue
				185	elif self.state in self.quotes:
				186	quoted = True
				187	if not nextchar: # end of file
				188	if self.debug >= 2:
				189	print("shlex: I see EOF in quotes state")
				190	# XXX what error should be raised here?
				191	raise ValueError("No closing quotation")
				192	if nextchar == self.state:
				193	if not self.posix:
				194	self.token += nextchar
				195	self.state = ' '
				196	break
				197	else:
				198	self.state = 'a'
				199	elif (self.posix and nextchar in self.escape and self.state
				200	in self.escapedquotes):
				201	escapedstate = self.state
				202	self.state = nextchar
				203	else:
				204	self.token += nextchar
				205	elif self.state in self.escape:
				206	if not nextchar: # end of file
				207	if self.debug >= 2:
				208	print("shlex: I see EOF in escape state")
				209	# XXX what error should be raised here?
				210	raise ValueError("No escaped character")
				211	# In posix shells, only the quote itself or the escape
				212	# character may be escaped within quotes.
				213	if (escapedstate in self.quotes and
				214	nextchar != self.state and nextchar != escapedstate):
				215	self.token += self.state
				216	self.token += nextchar
				217	self.state = escapedstate
				218	elif self.state in ('a', 'c'):
				219	if not nextchar:
				220	self.state = None # end of file
				221	break
				222	elif nextchar in self.whitespace:
				223	if self.debug >= 2:
				224	print("shlex: I see whitespace in word state")
				225	self.state = ' '
				226	if self.token or (self.posix and quoted):
				227	break # emit current token
				228	else:
				229	continue
				230	elif nextchar in self.commenters:
				231	self.instream.readline()
				232	self.lineno += 1
				233	if self.posix:
				234	self.state = ' '
				235	if self.token or (self.posix and quoted):
				236	break # emit current token
				237	else:
				238	continue
				239	elif self.state == 'c':
				240	if nextchar in self.punctuation_chars:
				241	self.token += nextchar
				242	else:
				243	if nextchar not in self.whitespace:
				244	self._pushback_chars.append(nextchar)
				245	self.state = ' '
				246	break
				247	elif self.posix and nextchar in self.quotes:
				248	self.state = nextchar
				249	elif self.posix and nextchar in self.escape:
				250	escapedstate = 'a'
				251	self.state = nextchar
				252	elif (nextchar in self.wordchars or nextchar in self.quotes
				253	or (self.whitespace_split and
				254	nextchar not in self.punctuation_chars)):
				255	self.token += nextchar
				256	else:
				257	if self.punctuation_chars:
				258	self._pushback_chars.append(nextchar)
				259	else:
				260	self.pushback.appendleft(nextchar)
				261	if self.debug >= 2:
				262	print("shlex: I see punctuation in word state")
				263	self.state = ' '
				264	if self.token or (self.posix and quoted):
				265	break # emit current token
				266	else:
				267	continue
				268	result = self.token
				269	self.token = ''
				270	if self.posix and not quoted and result == '':
				271	result = None
				272	if self.debug > 1:
				273	if result:
				274	print("shlex: raw token=" + repr(result))
				275	else:
				276	print("shlex: raw token=EOF")
				277	return result
				278
				279	def sourcehook(self, newfile):
				280	"Hook called on a filename to be sourced."
				281	if newfile[0] == '"':
				282	newfile = newfile[1:-1]
				283	# This implements cpp-like semantics for relative-path inclusion.
				284	if isinstance(self.infile, str) and not os.path.isabs(newfile):
				285	newfile = os.path.join(os.path.dirname(self.infile), newfile)
				286	return (newfile, open(newfile, "r"))
				287
				288	def error_leader(self, infile=None, lineno=None):
				289	"Emit a C-compiler-like, Emacs-friendly error-message leader."
				290	if infile is None:
				291	infile = self.infile
				292	if lineno is None:
				293	lineno = self.lineno
				294	return "\"%s\", line %d: " % (infile, lineno)
				295
				296	def __iter__(self):
				297	return self
				298
				299	def __next__(self):
				300	token = self.get_token()
				301	if token == self.eof:
				302	raise StopIteration
				303	return token
				304
				305	def split(s, comments=False, posix=True):
				306	"""Split the string s using shell-like syntax."""
				307	if s is None:
				308	import warnings
				309	warnings.warn("Passing None for 's' to shlex.split() is deprecated.",
				310	DeprecationWarning, stacklevel=2)
				311	lex = shlex(s, posix=posix)
				312	lex.whitespace_split = True
				313	if not comments:
				314	lex.commenters = ''
				315	return list(lex)
				316
				317
				318	def join(split_command):
				319	"""Return a shell-escaped string from split_command."""
				320	return ' '.join(quote(arg) for arg in split_command)
				321
				322
				323	_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search
				324
				325	def quote(s):
				326	"""Return a shell-escaped version of the string s."""
				327	if not s:
				328	return "''"
				329	if _find_unsafe(s) is None:
				330	return s
				331
				332	# use single quotes, and put single quotes into double quotes
				333	# the string $'b is then quoted as '$'"'"'b'
				334	return "'" + s.replace("'", "'\"'\"'") + "'"
				335
				336
				337	def _print_tokens(lexer):
				338	while 1:
				339	tt = lexer.get_token()
				340	if not tt:
				341	break
				342	print("Token: " + repr(tt))
				343
				344	if __name__ == '__main__':
				345	if len(sys.argv) == 1:
				346	_print_tokens(shlex())
				347	else:
				348	fn = sys.argv[1]
				349	with open(fn) as f:
				350	_print_tokens(shlex(f, fn))