Blame - linux-x64/clang/python3/lib/python3.9/tokenize.py - hafnium/prebuilts

blob: 1aee21b5e18fa716dfaa5306fc6aa8a96d253641 [file] [log] [blame]

Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	1	"""Tokenization help for Python programs.
				2
				3	tokenize(readline) is a generator that breaks a stream of bytes into
				4	Python tokens. It decodes the bytes according to PEP-0263 for
				5	determining source file encoding.
				6
				7	It accepts a readline-like method which is called repeatedly to get the
				8	next line of input (or b"" for EOF). It generates 5-tuples with these
				9	members:
				10
				11	the token type (see token.py)
				12	the token (a string)
				13	the starting (row, column) indices of the token (a 2-tuple of ints)
				14	the ending (row, column) indices of the token (a 2-tuple of ints)
				15	the original line (string)
				16
				17	It is designed to match the working of the Python tokenizer exactly, except
				18	that it produces COMMENT tokens for comments and gives type OP for all
				19	operators. Additionally, all token lists start with an ENCODING token
				20	which tells you which encoding was used to decode the bytes stream.
				21	"""
				22
				23	__author__ = 'Ka-Ping Yee <ping@lfw.org>'
				24	__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
				25	'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
				26	'Michael Foord')
				27	from builtins import open as _builtin_open
				28	from codecs import lookup, BOM_UTF8
				29	import collections
				30	from io import TextIOWrapper
				31	import itertools as _itertools
				32	import re
				33	import sys
				34	from token import *
				35	from token import EXACT_TOKEN_TYPES
				36
				37	cookie_re = re.compile(r'^[ \t\f]#.?coding[:=][ \t]*([-\w.]+)', re.ASCII)
				38	blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]\|$)', re.ASCII)
				39
				40	import token
				41	__all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
				42	"untokenize", "TokenInfo"]
				43	del token
				44
				45	class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
				46	def __repr__(self):
				47	annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
				48	return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
				49	self._replace(type=annotated_type))
				50
				51	@property
				52	def exact_type(self):
				53	if self.type == OP and self.string in EXACT_TOKEN_TYPES:
				54	return EXACT_TOKEN_TYPES[self.string]
				55	else:
				56	return self.type
				57
				58	def group(*choices): return '(' + '\|'.join(choices) + ')'
				59	def any(choices): return group(choices) + '*'
				60	def maybe(choices): return group(choices) + '?'
				61
				62	# Note: we use unicode matching for names ("\w") but ascii matching for
				63	# number literals.
				64	Whitespace = r'[ \f\t]*'
				65	Comment = r'#[^\r\n]*'
				66	Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
				67	Name = r'\w+'
				68
				69	Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
				70	Binnumber = r'0[bB](?:_?[01])+'
				71	Octnumber = r'0[oO](?:_?[0-7])+'
				72	Decnumber = r'(?:0(?:_?0)\|[1-9](?:_?[0-9]))'
				73	Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
				74	Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
				75	Pointfloat = group(r'[0-9](?:_?[0-9])\.(?:[0-9](?:_?[0-9]))?',
				76	r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
				77	Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
				78	Floatnumber = group(Pointfloat, Expfloat)
				79	Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
				80	Number = group(Imagnumber, Floatnumber, Intnumber)
				81
				82	# Return the empty string, plus all of the valid string prefixes.
				83	def _all_string_prefixes():
				84	# The valid string prefixes. Only contain the lower case versions,
				85	# and don't contain any permutations (include 'fr', but not
				86	# 'rf'). The various permutations will be generated.
				87	_valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
				88	# if we add binary f-strings, add: ['fb', 'fbr']
				89	result = {''}
				90	for prefix in _valid_string_prefixes:
				91	for t in _itertools.permutations(prefix):
				92	# create a list with upper and lower versions of each
				93	# character
				94	for u in _itertools.product(*[(c, c.upper()) for c in t]):
				95	result.add(''.join(u))
				96	return result
				97
				98	def _compile(expr):
				99	return re.compile(expr, re.UNICODE)
				100
				101	# Note that since _all_string_prefixes includes the empty string,
				102	# StringPrefix can be the empty string (making it optional).
				103	StringPrefix = group(*_all_string_prefixes())
				104
				105	# Tail end of ' string.
				106	Single = r"[^'\\](?:\\.[^'\\])*'"
				107	# Tail end of " string.
				108	Double = r'[^"\\](?:\\.[^"\\])*"'
				109	# Tail end of ''' string.
				110	Single3 = r"[^'\\](?:(?:\\.\|'(?!''))[^'\\])*'''"
				111	# Tail end of """ string.
				112	Double3 = r'[^"\\](?:(?:\\.\|"(?!""))[^"\\])*"""'
				113	Triple = group(StringPrefix + "'''", StringPrefix + '"""')
				114	# Single-line ' or " string.
				115	String = group(StringPrefix + r"'[^\n'\\](?:\\.[^\n'\\])*'",
				116	StringPrefix + r'"[^\n"\\](?:\\.[^\n"\\])*"')
				117
				118	# Sorting in reverse order puts the long operators before their prefixes.
				119	# Otherwise if = came before ==, == would get recognized as two instances
				120	# of =.
				121	Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True)))
				122	Funny = group(r'\r?\n', Special)
				123
				124	PlainToken = group(Number, Funny, String, Name)
				125	Token = Ignore + PlainToken
				126
				127	# First (or only) line of ' or " string.
				128	ContStr = group(StringPrefix + r"'[^\n'\\](?:\\.[^\n'\\])*" +
				129	group("'", r'\\\r?\n'),
				130	StringPrefix + r'"[^\n"\\](?:\\.[^\n"\\])*' +
				131	group('"', r'\\\r?\n'))
				132	PseudoExtras = group(r'\\\r?\n\|\Z', Comment, Triple)
				133	PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
				134
				135	# For a given string prefix plus quotes, endpats maps it to a regex
				136	# to match the remainder of that string. _prefix can be empty, for
				137	# a normal single or triple quoted string (with no prefix).
				138	endpats = {}
				139	for _prefix in _all_string_prefixes():
				140	endpats[_prefix + "'"] = Single
				141	endpats[_prefix + '"'] = Double
				142	endpats[_prefix + "'''"] = Single3
				143	endpats[_prefix + '"""'] = Double3
				144
				145	# A set of all of the single and triple quoted string prefixes,
				146	# including the opening quotes.
				147	single_quoted = set()
				148	triple_quoted = set()
				149	for t in _all_string_prefixes():
				150	for u in (t + '"', t + "'"):
				151	single_quoted.add(u)
				152	for u in (t + '"""', t + "'''"):
				153	triple_quoted.add(u)
				154
				155	tabsize = 8
				156
				157	class TokenError(Exception): pass
				158
				159	class StopTokenizing(Exception): pass
				160
				161
				162	class Untokenizer:
				163
				164	def __init__(self):
				165	self.tokens = []
				166	self.prev_row = 1
				167	self.prev_col = 0
				168	self.encoding = None
				169
				170	def add_whitespace(self, start):
				171	row, col = start
				172	if row < self.prev_row or row == self.prev_row and col < self.prev_col:
				173	raise ValueError("start ({},{}) precedes previous end ({},{})"
				174	.format(row, col, self.prev_row, self.prev_col))
				175	row_offset = row - self.prev_row
				176	if row_offset:
				177	self.tokens.append("\\\n" * row_offset)
				178	self.prev_col = 0
				179	col_offset = col - self.prev_col
				180	if col_offset:
				181	self.tokens.append(" " * col_offset)
				182
				183	def untokenize(self, iterable):
				184	it = iter(iterable)
				185	indents = []
				186	startline = False
				187	for t in it:
				188	if len(t) == 2:
				189	self.compat(t, it)
				190	break
				191	tok_type, token, start, end, line = t
				192	if tok_type == ENCODING:
				193	self.encoding = token
				194	continue
				195	if tok_type == ENDMARKER:
				196	break
				197	if tok_type == INDENT:
				198	indents.append(token)
				199	continue
				200	elif tok_type == DEDENT:
				201	indents.pop()
				202	self.prev_row, self.prev_col = end
				203	continue
				204	elif tok_type in (NEWLINE, NL):
				205	startline = True
				206	elif startline and indents:
				207	indent = indents[-1]
				208	if start[1] >= len(indent):
				209	self.tokens.append(indent)
				210	self.prev_col = len(indent)
				211	startline = False
				212	self.add_whitespace(start)
				213	self.tokens.append(token)
				214	self.prev_row, self.prev_col = end
				215	if tok_type in (NEWLINE, NL):
				216	self.prev_row += 1
				217	self.prev_col = 0
				218	return "".join(self.tokens)
				219
				220	def compat(self, token, iterable):
				221	indents = []
				222	toks_append = self.tokens.append
				223	startline = token[0] in (NEWLINE, NL)
				224	prevstring = False
				225
				226	for tok in _itertools.chain([token], iterable):
				227	toknum, tokval = tok[:2]
				228	if toknum == ENCODING:
				229	self.encoding = tokval
				230	continue
				231
				232	if toknum in (NAME, NUMBER):
				233	tokval += ' '
				234
				235	# Insert a space between two consecutive strings
				236	if toknum == STRING:
				237	if prevstring:
				238	tokval = ' ' + tokval
				239	prevstring = True
				240	else:
				241	prevstring = False
				242
				243	if toknum == INDENT:
				244	indents.append(tokval)
				245	continue
				246	elif toknum == DEDENT:
				247	indents.pop()
				248	continue
				249	elif toknum in (NEWLINE, NL):
				250	startline = True
				251	elif startline and indents:
				252	toks_append(indents[-1])
				253	startline = False
				254	toks_append(tokval)
				255
				256
				257	def untokenize(iterable):
				258	"""Transform tokens back into Python source code.
				259	It returns a bytes object, encoded using the ENCODING
				260	token, which is the first token sequence output by tokenize.
				261
				262	Each element returned by the iterable must be a token sequence
				263	with at least two elements, a token number and token value. If
				264	only two tokens are passed, the resulting output is poor.
				265
				266	Round-trip invariant for full input:
				267	Untokenized source will match input source exactly
				268
				269	Round-trip invariant for limited input:
				270	# Output bytes will tokenize back to the input
				271	t1 = [tok[:2] for tok in tokenize(f.readline)]
				272	newcode = untokenize(t1)
				273	readline = BytesIO(newcode).readline
				274	t2 = [tok[:2] for tok in tokenize(readline)]
				275	assert t1 == t2
				276	"""
				277	ut = Untokenizer()
				278	out = ut.untokenize(iterable)
				279	if ut.encoding is not None:
				280	out = out.encode(ut.encoding)
				281	return out
				282
				283
				284	def _get_normal_name(orig_enc):
				285	"""Imitates get_normal_name in tokenizer.c."""
				286	# Only care about the first 12 characters.
				287	enc = orig_enc[:12].lower().replace("_", "-")
				288	if enc == "utf-8" or enc.startswith("utf-8-"):
				289	return "utf-8"
				290	if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
				291	enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
				292	return "iso-8859-1"
				293	return orig_enc
				294
				295	def detect_encoding(readline):
				296	"""
				297	The detect_encoding() function is used to detect the encoding that should
				298	be used to decode a Python source file. It requires one argument, readline,
				299	in the same way as the tokenize() generator.
				300
				301	It will call readline a maximum of twice, and return the encoding used
				302	(as a string) and a list of any lines (left as bytes) it has read in.
				303
				304	It detects the encoding from the presence of a utf-8 bom or an encoding
				305	cookie as specified in pep-0263. If both a bom and a cookie are present,
				306	but disagree, a SyntaxError will be raised. If the encoding cookie is an
				307	invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
				308	'utf-8-sig' is returned.
				309
				310	If no encoding is specified, then the default of 'utf-8' will be returned.
				311	"""
				312	try:
				313	filename = readline.__self__.name
				314	except AttributeError:
				315	filename = None
				316	bom_found = False
				317	encoding = None
				318	default = 'utf-8'
				319	def read_or_stop():
				320	try:
				321	return readline()
				322	except StopIteration:
				323	return b''
				324
				325	def find_cookie(line):
				326	try:
				327	# Decode as UTF-8. Either the line is an encoding declaration,
				328	# in which case it should be pure ASCII, or it must be UTF-8
				329	# per default encoding.
				330	line_string = line.decode('utf-8')
				331	except UnicodeDecodeError:
				332	msg = "invalid or missing encoding declaration"
				333	if filename is not None:
				334	msg = '{} for {!r}'.format(msg, filename)
				335	raise SyntaxError(msg)
				336
				337	match = cookie_re.match(line_string)
				338	if not match:
				339	return None
				340	encoding = _get_normal_name(match.group(1))
				341	try:
				342	codec = lookup(encoding)
				343	except LookupError:
				344	# This behaviour mimics the Python interpreter
				345	if filename is None:
				346	msg = "unknown encoding: " + encoding
				347	else:
				348	msg = "unknown encoding for {!r}: {}".format(filename,
				349	encoding)
				350	raise SyntaxError(msg)
				351
				352	if bom_found:
				353	if encoding != 'utf-8':
				354	# This behaviour mimics the Python interpreter
				355	if filename is None:
				356	msg = 'encoding problem: utf-8'
				357	else:
				358	msg = 'encoding problem for {!r}: utf-8'.format(filename)
				359	raise SyntaxError(msg)
				360	encoding += '-sig'
				361	return encoding
				362
				363	first = read_or_stop()
				364	if first.startswith(BOM_UTF8):
				365	bom_found = True
				366	first = first[3:]
				367	default = 'utf-8-sig'
				368	if not first:
				369	return default, []
				370
				371	encoding = find_cookie(first)
				372	if encoding:
				373	return encoding, [first]
				374	if not blank_re.match(first):
				375	return default, [first]
				376
				377	second = read_or_stop()
				378	if not second:
				379	return default, [first]
				380
				381	encoding = find_cookie(second)
				382	if encoding:
				383	return encoding, [first, second]
				384
				385	return default, [first, second]
				386
				387
				388	def open(filename):
				389	"""Open a file in read only mode using the encoding detected by
				390	detect_encoding().
				391	"""
				392	buffer = _builtin_open(filename, 'rb')
				393	try:
				394	encoding, lines = detect_encoding(buffer.readline)
				395	buffer.seek(0)
				396	text = TextIOWrapper(buffer, encoding, line_buffering=True)
				397	text.mode = 'r'
				398	return text
				399	except:
				400	buffer.close()
				401	raise
				402
				403
				404	def tokenize(readline):
				405	"""
				406	The tokenize() generator requires one argument, readline, which
				407	must be a callable object which provides the same interface as the
				408	readline() method of built-in file objects. Each call to the function
				409	should return one line of input as bytes. Alternatively, readline
				410	can be a callable function terminating with StopIteration:
				411	readline = open(myfile, 'rb').__next__ # Example of alternate readline
				412
				413	The generator produces 5-tuples with these members: the token type; the
				414	token string; a 2-tuple (srow, scol) of ints specifying the row and
				415	column where the token begins in the source; a 2-tuple (erow, ecol) of
				416	ints specifying the row and column where the token ends in the source;
				417	and the line on which the token was found. The line passed is the
				418	physical line.
				419
				420	The first token sequence will always be an ENCODING token
				421	which tells you which encoding was used to decode the bytes stream.
				422	"""
				423	encoding, consumed = detect_encoding(readline)
				424	empty = _itertools.repeat(b"")
				425	rl_gen = _itertools.chain(consumed, iter(readline, b""), empty)
				426	return _tokenize(rl_gen.__next__, encoding)
				427
				428
				429	def _tokenize(readline, encoding):
				430	lnum = parenlev = continued = 0
				431	numchars = '0123456789'
				432	contstr, needcont = '', 0
				433	contline = None
				434	indents = [0]
				435
				436	if encoding is not None:
				437	if encoding == "utf-8-sig":
				438	# BOM will already have been stripped.
				439	encoding = "utf-8"
				440	yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
				441	last_line = b''
				442	line = b''
				443	while True: # loop over lines in stream
				444	try:
				445	# We capture the value of the line variable here because
				446	# readline uses the empty string '' to signal end of input,
				447	# hence `line` itself will always be overwritten at the end
				448	# of this loop.
				449	last_line = line
				450	line = readline()
				451	except StopIteration:
				452	line = b''
				453
				454	if encoding is not None:
				455	line = line.decode(encoding)
				456	lnum += 1
				457	pos, max = 0, len(line)
				458
				459	if contstr: # continued string
				460	if not line:
				461	raise TokenError("EOF in multi-line string", strstart)
				462	endmatch = endprog.match(line)
				463	if endmatch:
				464	pos = end = endmatch.end(0)
				465	yield TokenInfo(STRING, contstr + line[:end],
				466	strstart, (lnum, end), contline + line)
				467	contstr, needcont = '', 0
				468	contline = None
				469	elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
				470	yield TokenInfo(ERRORTOKEN, contstr + line,
				471	strstart, (lnum, len(line)), contline)
				472	contstr = ''
				473	contline = None
				474	continue
				475	else:
				476	contstr = contstr + line
				477	contline = contline + line
				478	continue
				479
				480	elif parenlev == 0 and not continued: # new statement
				481	if not line: break
				482	column = 0
				483	while pos < max: # measure leading whitespace
				484	if line[pos] == ' ':
				485	column += 1
				486	elif line[pos] == '\t':
				487	column = (column//tabsize + 1)*tabsize
				488	elif line[pos] == '\f':
				489	column = 0
				490	else:
				491	break
				492	pos += 1
				493	if pos == max:
				494	break
				495
				496	if line[pos] in '#\r\n': # skip comments or blank lines
				497	if line[pos] == '#':
				498	comment_token = line[pos:].rstrip('\r\n')
				499	yield TokenInfo(COMMENT, comment_token,
				500	(lnum, pos), (lnum, pos + len(comment_token)), line)
				501	pos += len(comment_token)
				502
				503	yield TokenInfo(NL, line[pos:],
				504	(lnum, pos), (lnum, len(line)), line)
				505	continue
				506
				507	if column > indents[-1]: # count indents or dedents
				508	indents.append(column)
				509	yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
				510	while column < indents[-1]:
				511	if column not in indents:
				512	raise IndentationError(
				513	"unindent does not match any outer indentation level",
				514	("<tokenize>", lnum, pos, line))
				515	indents = indents[:-1]
				516
				517	yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
				518
				519	else: # continued statement
				520	if not line:
				521	raise TokenError("EOF in multi-line statement", (lnum, 0))
				522	continued = 0
				523
				524	while pos < max:
				525	pseudomatch = _compile(PseudoToken).match(line, pos)
				526	if pseudomatch: # scan for tokens
				527	start, end = pseudomatch.span(1)
				528	spos, epos, pos = (lnum, start), (lnum, end), end
				529	if start == end:
				530	continue
				531	token, initial = line[start:end], line[start]
				532
				533	if (initial in numchars or # ordinary number
				534	(initial == '.' and token != '.' and token != '...')):
				535	yield TokenInfo(NUMBER, token, spos, epos, line)
				536	elif initial in '\r\n':
				537	if parenlev > 0:
				538	yield TokenInfo(NL, token, spos, epos, line)
				539	else:
				540	yield TokenInfo(NEWLINE, token, spos, epos, line)
				541
				542	elif initial == '#':
				543	assert not token.endswith("\n")
				544	yield TokenInfo(COMMENT, token, spos, epos, line)
				545
				546	elif token in triple_quoted:
				547	endprog = _compile(endpats[token])
				548	endmatch = endprog.match(line, pos)
				549	if endmatch: # all on one line
				550	pos = endmatch.end(0)
				551	token = line[start:pos]
				552	yield TokenInfo(STRING, token, spos, (lnum, pos), line)
				553	else:
				554	strstart = (lnum, start) # multiple lines
				555	contstr = line[start:]
				556	contline = line
				557	break
				558
				559	# Check up to the first 3 chars of the token to see if
				560	# they're in the single_quoted set. If so, they start
				561	# a string.
				562	# We're using the first 3, because we're looking for
				563	# "rb'" (for example) at the start of the token. If
				564	# we switch to longer prefixes, this needs to be
				565	# adjusted.
				566	# Note that initial == token[:1].
				567	# Also note that single quote checking must come after
				568	# triple quote checking (above).
				569	elif (initial in single_quoted or
				570	token[:2] in single_quoted or
				571	token[:3] in single_quoted):
				572	if token[-1] == '\n': # continued string
				573	strstart = (lnum, start)
				574	# Again, using the first 3 chars of the
				575	# token. This is looking for the matching end
				576	# regex for the correct type of quote
				577	# character. So it's really looking for
				578	# endpats["'"] or endpats['"'], by trying to
				579	# skip string prefix characters, if any.
				580	endprog = _compile(endpats.get(initial) or
				581	endpats.get(token[1]) or
				582	endpats.get(token[2]))
				583	contstr, needcont = line[start:], 1
				584	contline = line
				585	break
				586	else: # ordinary string
				587	yield TokenInfo(STRING, token, spos, epos, line)
				588
				589	elif initial.isidentifier(): # ordinary name
				590	yield TokenInfo(NAME, token, spos, epos, line)
				591	elif initial == '\\': # continued stmt
				592	continued = 1
				593	else:
				594	if initial in '([{':
				595	parenlev += 1
				596	elif initial in ')]}':
				597	parenlev -= 1
				598	yield TokenInfo(OP, token, spos, epos, line)
				599	else:
				600	yield TokenInfo(ERRORTOKEN, line[pos],
				601	(lnum, pos), (lnum, pos+1), line)
				602	pos += 1
				603
				604	# Add an implicit NEWLINE if the input doesn't end in one
				605	if last_line and last_line[-1] not in '\r\n':
				606	yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
				607	for indent in indents[1:]: # pop remaining indent levels
				608	yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
				609	yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
				610
				611
				612	def generate_tokens(readline):
				613	"""Tokenize a source reading Python code as unicode strings.
				614
				615	This has the same API as tokenize(), except that it expects the readline
				616	callable to return str objects instead of bytes.
				617	"""
				618	return _tokenize(readline, None)
				619
				620	def main():
				621	import argparse
				622
				623	# Helper error handling routines
				624	def perror(message):
				625	sys.stderr.write(message)
				626	sys.stderr.write('\n')
				627
				628	def error(message, filename=None, location=None):
				629	if location:
				630	args = (filename,) + location + (message,)
				631	perror("%s:%d:%d: error: %s" % args)
				632	elif filename:
				633	perror("%s: error: %s" % (filename, message))
				634	else:
				635	perror("error: %s" % message)
				636	sys.exit(1)
				637
				638	# Parse the arguments and options
				639	parser = argparse.ArgumentParser(prog='python -m tokenize')
				640	parser.add_argument(dest='filename', nargs='?',
				641	metavar='filename.py',
				642	help='the file to tokenize; defaults to stdin')
				643	parser.add_argument('-e', '--exact', dest='exact', action='store_true',
				644	help='display token names using the exact type')
				645	args = parser.parse_args()
				646
				647	try:
				648	# Tokenize the input
				649	if args.filename:
				650	filename = args.filename
				651	with _builtin_open(filename, 'rb') as f:
				652	tokens = list(tokenize(f.readline))
				653	else:
				654	filename = "<stdin>"
				655	tokens = _tokenize(sys.stdin.readline, None)
				656
				657	# Output the tokenization
				658	for token in tokens:
				659	token_type = token.type
				660	if args.exact:
				661	token_type = token.exact_type
				662	token_range = "%d,%d-%d,%d:" % (token.start + token.end)
				663	print("%-20s%-15s%-15r" %
				664	(token_range, tok_name[token_type], token.string))
				665	except IndentationError as err:
				666	line, column = err.args[1][1:3]
				667	error(err.args[0], filename, (line, column))
				668	except TokenError as err:
				669	line, column = err.args[1]
				670	error(err.args[0], filename, (line, column))
				671	except SyntaxError as err:
				672	error(err, filename)
				673	except OSError as err:
				674	error(err)
				675	except KeyboardInterrupt:
				676	print("interrupted\n")
				677	except Exception as err:
				678	perror("unexpected error: %s" % err)
				679	raise
				680
				681	if __name__ == "__main__":
				682	main()