Blame - linux-x64/clang/python3/lib/python3.9/tabnanny.py - hafnium/prebuilts

blob: 7973f26f98b8b245de3569580ca57e11736be7e9 [file] [log] [blame]

Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	1	#! /usr/bin/env python3
				2
				3	"""The Tab Nanny despises ambiguous indentation. She knows no mercy.
				4
				5	tabnanny -- Detection of ambiguous indentation
				6
				7	For the time being this module is intended to be called as a script.
				8	However it is possible to import it into an IDE and use the function
				9	check() described below.
				10
				11	Warning: The API provided by this module is likely to change in future
				12	releases; such changes may not be backward compatible.
				13	"""
				14
				15	# Released to the public domain, by Tim Peters, 15 April 1998.
				16
				17	# XXX Note: this is now a standard library module.
				18	# XXX The API needs to undergo changes however; the current code is too
				19	# XXX script-like. This will be addressed later.
				20
				21	__version__ = "6"
				22
				23	import os
				24	import sys
				25	import tokenize
				26	if not hasattr(tokenize, 'NL'):
				27	raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
				28
				29	__all__ = ["check", "NannyNag", "process_tokens"]
				30
				31	verbose = 0
				32	filename_only = 0
				33
				34	def errprint(*args):
				35	sep = ""
				36	for arg in args:
				37	sys.stderr.write(sep + str(arg))
				38	sep = " "
				39	sys.stderr.write("\n")
				40
				41	def main():
				42	import getopt
				43
				44	global verbose, filename_only
				45	try:
				46	opts, args = getopt.getopt(sys.argv[1:], "qv")
				47	except getopt.error as msg:
				48	errprint(msg)
				49	return
				50	for o, a in opts:
				51	if o == '-q':
				52	filename_only = filename_only + 1
				53	if o == '-v':
				54	verbose = verbose + 1
				55	if not args:
				56	errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
				57	return
				58	for arg in args:
				59	check(arg)
				60
				61	class NannyNag(Exception):
				62	"""
				63	Raised by process_tokens() if detecting an ambiguous indent.
				64	Captured and handled in check().
				65	"""
				66	def __init__(self, lineno, msg, line):
				67	self.lineno, self.msg, self.line = lineno, msg, line
				68	def get_lineno(self):
				69	return self.lineno
				70	def get_msg(self):
				71	return self.msg
				72	def get_line(self):
				73	return self.line
				74
				75	def check(file):
				76	"""check(file_or_dir)
				77
				78	If file_or_dir is a directory and not a symbolic link, then recursively
				79	descend the directory tree named by file_or_dir, checking all .py files
				80	along the way. If file_or_dir is an ordinary Python source file, it is
				81	checked for whitespace related problems. The diagnostic messages are
				82	written to standard output using the print statement.
				83	"""
				84
				85	if os.path.isdir(file) and not os.path.islink(file):
				86	if verbose:
				87	print("%r: listing directory" % (file,))
				88	names = os.listdir(file)
				89	for name in names:
				90	fullname = os.path.join(file, name)
				91	if (os.path.isdir(fullname) and
				92	not os.path.islink(fullname) or
				93	os.path.normcase(name[-3:]) == ".py"):
				94	check(fullname)
				95	return
				96
				97	try:
				98	f = tokenize.open(file)
				99	except OSError as msg:
				100	errprint("%r: I/O Error: %s" % (file, msg))
				101	return
				102
				103	if verbose > 1:
				104	print("checking %r ..." % file)
				105
				106	try:
				107	process_tokens(tokenize.generate_tokens(f.readline))
				108
				109	except tokenize.TokenError as msg:
				110	errprint("%r: Token Error: %s" % (file, msg))
				111	return
				112
				113	except IndentationError as msg:
				114	errprint("%r: Indentation Error: %s" % (file, msg))
				115	return
				116
				117	except NannyNag as nag:
				118	badline = nag.get_lineno()
				119	line = nag.get_line()
				120	if verbose:
				121	print("%r: * Line %d: trouble in tab city! *" % (file, badline))
				122	print("offending line: %r" % (line,))
				123	print(nag.get_msg())
				124	else:
				125	if ' ' in file: file = '"' + file + '"'
				126	if filename_only: print(file)
				127	else: print(file, badline, repr(line))
				128	return
				129
				130	finally:
				131	f.close()
				132
				133	if verbose:
				134	print("%r: Clean bill of health." % (file,))
				135
				136	class Whitespace:
				137	# the characters used for space and tab
				138	S, T = ' \t'
				139
				140	# members:
				141	# raw
				142	# the original string
				143	# n
				144	# the number of leading whitespace characters in raw
				145	# nt
				146	# the number of tabs in raw[:n]
				147	# norm
				148	# the normal form as a pair (count, trailing), where:
				149	# count
				150	# a tuple such that raw[:n] contains count[i]
				151	# instances of S * i + T
				152	# trailing
				153	# the number of trailing spaces in raw[:n]
				154	# It's A Theorem that m.indent_level(t) ==
				155	# n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
				156	# is_simple
				157	# true iff raw[:n] is of the form (T)(S)
				158
				159	def __init__(self, ws):
				160	self.raw = ws
				161	S, T = Whitespace.S, Whitespace.T
				162	count = []
				163	b = n = nt = 0
				164	for ch in self.raw:
				165	if ch == S:
				166	n = n + 1
				167	b = b + 1
				168	elif ch == T:
				169	n = n + 1
				170	nt = nt + 1
				171	if b >= len(count):
				172	count = count + [0] * (b - len(count) + 1)
				173	count[b] = count[b] + 1
				174	b = 0
				175	else:
				176	break
				177	self.n = n
				178	self.nt = nt
				179	self.norm = tuple(count), b
				180	self.is_simple = len(count) <= 1
				181
				182	# return length of longest contiguous run of spaces (whether or not
				183	# preceding a tab)
				184	def longest_run_of_spaces(self):
				185	count, trailing = self.norm
				186	return max(len(count)-1, trailing)
				187
				188	def indent_level(self, tabsize):
				189	# count, il = self.norm
				190	# for i in range(len(count)):
				191	# if count[i]:
				192	# il = il + (i//tabsize + 1)tabsize count[i]
				193	# return il
				194
				195	# quicker:
				196	# il = trailing + sum (i//ts + 1)tscount[i] =
				197	# trailing + ts * sum (i//ts + 1)*count[i] =
				198	# trailing + ts * sum i//ts*count[i] + count[i] =
				199	# trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
				200	# trailing + ts * [(sum i//ts*count[i]) + num_tabs]
				201	# and note that i//ts*count[i] is 0 when i < ts
				202
				203	count, trailing = self.norm
				204	il = 0
				205	for i in range(tabsize, len(count)):
				206	il = il + i//tabsize * count[i]
				207	return trailing + tabsize * (il + self.nt)
				208
				209	# return true iff self.indent_level(t) == other.indent_level(t)
				210	# for all t >= 1
				211	def equal(self, other):
				212	return self.norm == other.norm
				213
				214	# return a list of tuples (ts, i1, i2) such that
				215	# i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
				216	# Intended to be used after not self.equal(other) is known, in which
				217	# case it will return at least one witnessing tab size.
				218	def not_equal_witness(self, other):
				219	n = max(self.longest_run_of_spaces(),
				220	other.longest_run_of_spaces()) + 1
				221	a = []
				222	for ts in range(1, n+1):
				223	if self.indent_level(ts) != other.indent_level(ts):
				224	a.append( (ts,
				225	self.indent_level(ts),
				226	other.indent_level(ts)) )
				227	return a
				228
				229	# Return True iff self.indent_level(t) < other.indent_level(t)
				230	# for all t >= 1.
				231	# The algorithm is due to Vincent Broman.
				232	# Easy to prove it's correct.
				233	# XXXpost that.
				234	# Trivial to prove n is sharp (consider T vs ST).
				235	# Unknown whether there's a faster general way. I suspected so at
				236	# first, but no longer.
				237	# For the special (but common!) case where M and N are both of the
				238	# form (T)(S), M.less(N) iff M.len() < N.len() and
				239	# M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
				240	# XXXwrite that up.
				241	# Note that M is of the form (T)(S) iff len(M.norm[0]) <= 1.
				242	def less(self, other):
				243	if self.n >= other.n:
				244	return False
				245	if self.is_simple and other.is_simple:
				246	return self.nt <= other.nt
				247	n = max(self.longest_run_of_spaces(),
				248	other.longest_run_of_spaces()) + 1
				249	# the self.n >= other.n test already did it for ts=1
				250	for ts in range(2, n+1):
				251	if self.indent_level(ts) >= other.indent_level(ts):
				252	return False
				253	return True
				254
				255	# return a list of tuples (ts, i1, i2) such that
				256	# i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
				257	# Intended to be used after not self.less(other) is known, in which
				258	# case it will return at least one witnessing tab size.
				259	def not_less_witness(self, other):
				260	n = max(self.longest_run_of_spaces(),
				261	other.longest_run_of_spaces()) + 1
				262	a = []
				263	for ts in range(1, n+1):
				264	if self.indent_level(ts) >= other.indent_level(ts):
				265	a.append( (ts,
				266	self.indent_level(ts),
				267	other.indent_level(ts)) )
				268	return a
				269
				270	def format_witnesses(w):
				271	firsts = (str(tup[0]) for tup in w)
				272	prefix = "at tab size"
				273	if len(w) > 1:
				274	prefix = prefix + "s"
				275	return prefix + " " + ', '.join(firsts)
				276
				277	def process_tokens(tokens):
				278	INDENT = tokenize.INDENT
				279	DEDENT = tokenize.DEDENT
				280	NEWLINE = tokenize.NEWLINE
				281	JUNK = tokenize.COMMENT, tokenize.NL
				282	indents = [Whitespace("")]
				283	check_equal = 0
				284
				285	for (type, token, start, end, line) in tokens:
				286	if type == NEWLINE:
				287	# a program statement, or ENDMARKER, will eventually follow,
				288	# after some (possibly empty) run of tokens of the form
				289	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				290	# If an INDENT appears, setting check_equal is wrong, and will
				291	# be undone when we see the INDENT.
				292	check_equal = 1
				293
				294	elif type == INDENT:
				295	check_equal = 0
				296	thisguy = Whitespace(token)
				297	if not indents[-1].less(thisguy):
				298	witness = indents[-1].not_less_witness(thisguy)
				299	msg = "indent not greater e.g. " + format_witnesses(witness)
				300	raise NannyNag(start[0], msg, line)
				301	indents.append(thisguy)
				302
				303	elif type == DEDENT:
				304	# there's nothing we need to check here! what's important is
				305	# that when the run of DEDENTs ends, the indentation of the
				306	# program statement (or ENDMARKER) that triggered the run is
				307	# equal to what's left at the top of the indents stack
				308
				309	# Ouch! This assert triggers if the last line of the source
				310	# is indented and lacks a newline -- then DEDENTs pop out
				311	# of thin air.
				312	# assert check_equal # else no earlier NEWLINE, or an earlier INDENT
				313	check_equal = 1
				314
				315	del indents[-1]
				316
				317	elif check_equal and type not in JUNK:
				318	# this is the first "real token" following a NEWLINE, so it
				319	# must be the first token of the next program statement, or an
				320	# ENDMARKER; the "line" argument exposes the leading whitespace
				321	# for this statement; in the case of ENDMARKER, line is an empty
				322	# string, so will properly match the empty string with which the
				323	# "indents" stack was seeded
				324	check_equal = 0
				325	thisguy = Whitespace(line)
				326	if not indents[-1].equal(thisguy):
				327	witness = indents[-1].not_equal_witness(thisguy)
				328	msg = "indent not equal e.g. " + format_witnesses(witness)
				329	raise NannyNag(start[0], msg, line)
				330
				331
				332	if __name__ == '__main__':
				333	main()