blob: 7973f26f98b8b245de3569580ca57e11736be7e9 [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001#! /usr/bin/env python3
2
3"""The Tab Nanny despises ambiguous indentation. She knows no mercy.
4
5tabnanny -- Detection of ambiguous indentation
6
7For the time being this module is intended to be called as a script.
8However it is possible to import it into an IDE and use the function
9check() described below.
10
11Warning: The API provided by this module is likely to change in future
12releases; such changes may not be backward compatible.
13"""
14
15# Released to the public domain, by Tim Peters, 15 April 1998.
16
17# XXX Note: this is now a standard library module.
18# XXX The API needs to undergo changes however; the current code is too
19# XXX script-like. This will be addressed later.
20
21__version__ = "6"
22
23import os
24import sys
25import tokenize
26if not hasattr(tokenize, 'NL'):
27 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
28
29__all__ = ["check", "NannyNag", "process_tokens"]
30
31verbose = 0
32filename_only = 0
33
34def errprint(*args):
35 sep = ""
36 for arg in args:
37 sys.stderr.write(sep + str(arg))
38 sep = " "
39 sys.stderr.write("\n")
40
41def main():
42 import getopt
43
44 global verbose, filename_only
45 try:
46 opts, args = getopt.getopt(sys.argv[1:], "qv")
47 except getopt.error as msg:
48 errprint(msg)
49 return
50 for o, a in opts:
51 if o == '-q':
52 filename_only = filename_only + 1
53 if o == '-v':
54 verbose = verbose + 1
55 if not args:
56 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
57 return
58 for arg in args:
59 check(arg)
60
61class NannyNag(Exception):
62 """
63 Raised by process_tokens() if detecting an ambiguous indent.
64 Captured and handled in check().
65 """
66 def __init__(self, lineno, msg, line):
67 self.lineno, self.msg, self.line = lineno, msg, line
68 def get_lineno(self):
69 return self.lineno
70 def get_msg(self):
71 return self.msg
72 def get_line(self):
73 return self.line
74
75def check(file):
76 """check(file_or_dir)
77
78 If file_or_dir is a directory and not a symbolic link, then recursively
79 descend the directory tree named by file_or_dir, checking all .py files
80 along the way. If file_or_dir is an ordinary Python source file, it is
81 checked for whitespace related problems. The diagnostic messages are
82 written to standard output using the print statement.
83 """
84
85 if os.path.isdir(file) and not os.path.islink(file):
86 if verbose:
87 print("%r: listing directory" % (file,))
88 names = os.listdir(file)
89 for name in names:
90 fullname = os.path.join(file, name)
91 if (os.path.isdir(fullname) and
92 not os.path.islink(fullname) or
93 os.path.normcase(name[-3:]) == ".py"):
94 check(fullname)
95 return
96
97 try:
98 f = tokenize.open(file)
99 except OSError as msg:
100 errprint("%r: I/O Error: %s" % (file, msg))
101 return
102
103 if verbose > 1:
104 print("checking %r ..." % file)
105
106 try:
107 process_tokens(tokenize.generate_tokens(f.readline))
108
109 except tokenize.TokenError as msg:
110 errprint("%r: Token Error: %s" % (file, msg))
111 return
112
113 except IndentationError as msg:
114 errprint("%r: Indentation Error: %s" % (file, msg))
115 return
116
117 except NannyNag as nag:
118 badline = nag.get_lineno()
119 line = nag.get_line()
120 if verbose:
121 print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
122 print("offending line: %r" % (line,))
123 print(nag.get_msg())
124 else:
125 if ' ' in file: file = '"' + file + '"'
126 if filename_only: print(file)
127 else: print(file, badline, repr(line))
128 return
129
130 finally:
131 f.close()
132
133 if verbose:
134 print("%r: Clean bill of health." % (file,))
135
136class Whitespace:
137 # the characters used for space and tab
138 S, T = ' \t'
139
140 # members:
141 # raw
142 # the original string
143 # n
144 # the number of leading whitespace characters in raw
145 # nt
146 # the number of tabs in raw[:n]
147 # norm
148 # the normal form as a pair (count, trailing), where:
149 # count
150 # a tuple such that raw[:n] contains count[i]
151 # instances of S * i + T
152 # trailing
153 # the number of trailing spaces in raw[:n]
154 # It's A Theorem that m.indent_level(t) ==
155 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
156 # is_simple
157 # true iff raw[:n] is of the form (T*)(S*)
158
159 def __init__(self, ws):
160 self.raw = ws
161 S, T = Whitespace.S, Whitespace.T
162 count = []
163 b = n = nt = 0
164 for ch in self.raw:
165 if ch == S:
166 n = n + 1
167 b = b + 1
168 elif ch == T:
169 n = n + 1
170 nt = nt + 1
171 if b >= len(count):
172 count = count + [0] * (b - len(count) + 1)
173 count[b] = count[b] + 1
174 b = 0
175 else:
176 break
177 self.n = n
178 self.nt = nt
179 self.norm = tuple(count), b
180 self.is_simple = len(count) <= 1
181
182 # return length of longest contiguous run of spaces (whether or not
183 # preceding a tab)
184 def longest_run_of_spaces(self):
185 count, trailing = self.norm
186 return max(len(count)-1, trailing)
187
188 def indent_level(self, tabsize):
189 # count, il = self.norm
190 # for i in range(len(count)):
191 # if count[i]:
192 # il = il + (i//tabsize + 1)*tabsize * count[i]
193 # return il
194
195 # quicker:
196 # il = trailing + sum (i//ts + 1)*ts*count[i] =
197 # trailing + ts * sum (i//ts + 1)*count[i] =
198 # trailing + ts * sum i//ts*count[i] + count[i] =
199 # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
200 # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
201 # and note that i//ts*count[i] is 0 when i < ts
202
203 count, trailing = self.norm
204 il = 0
205 for i in range(tabsize, len(count)):
206 il = il + i//tabsize * count[i]
207 return trailing + tabsize * (il + self.nt)
208
209 # return true iff self.indent_level(t) == other.indent_level(t)
210 # for all t >= 1
211 def equal(self, other):
212 return self.norm == other.norm
213
214 # return a list of tuples (ts, i1, i2) such that
215 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
216 # Intended to be used after not self.equal(other) is known, in which
217 # case it will return at least one witnessing tab size.
218 def not_equal_witness(self, other):
219 n = max(self.longest_run_of_spaces(),
220 other.longest_run_of_spaces()) + 1
221 a = []
222 for ts in range(1, n+1):
223 if self.indent_level(ts) != other.indent_level(ts):
224 a.append( (ts,
225 self.indent_level(ts),
226 other.indent_level(ts)) )
227 return a
228
229 # Return True iff self.indent_level(t) < other.indent_level(t)
230 # for all t >= 1.
231 # The algorithm is due to Vincent Broman.
232 # Easy to prove it's correct.
233 # XXXpost that.
234 # Trivial to prove n is sharp (consider T vs ST).
235 # Unknown whether there's a faster general way. I suspected so at
236 # first, but no longer.
237 # For the special (but common!) case where M and N are both of the
238 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
239 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
240 # XXXwrite that up.
241 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
242 def less(self, other):
243 if self.n >= other.n:
244 return False
245 if self.is_simple and other.is_simple:
246 return self.nt <= other.nt
247 n = max(self.longest_run_of_spaces(),
248 other.longest_run_of_spaces()) + 1
249 # the self.n >= other.n test already did it for ts=1
250 for ts in range(2, n+1):
251 if self.indent_level(ts) >= other.indent_level(ts):
252 return False
253 return True
254
255 # return a list of tuples (ts, i1, i2) such that
256 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
257 # Intended to be used after not self.less(other) is known, in which
258 # case it will return at least one witnessing tab size.
259 def not_less_witness(self, other):
260 n = max(self.longest_run_of_spaces(),
261 other.longest_run_of_spaces()) + 1
262 a = []
263 for ts in range(1, n+1):
264 if self.indent_level(ts) >= other.indent_level(ts):
265 a.append( (ts,
266 self.indent_level(ts),
267 other.indent_level(ts)) )
268 return a
269
270def format_witnesses(w):
271 firsts = (str(tup[0]) for tup in w)
272 prefix = "at tab size"
273 if len(w) > 1:
274 prefix = prefix + "s"
275 return prefix + " " + ', '.join(firsts)
276
277def process_tokens(tokens):
278 INDENT = tokenize.INDENT
279 DEDENT = tokenize.DEDENT
280 NEWLINE = tokenize.NEWLINE
281 JUNK = tokenize.COMMENT, tokenize.NL
282 indents = [Whitespace("")]
283 check_equal = 0
284
285 for (type, token, start, end, line) in tokens:
286 if type == NEWLINE:
287 # a program statement, or ENDMARKER, will eventually follow,
288 # after some (possibly empty) run of tokens of the form
289 # (NL | COMMENT)* (INDENT | DEDENT+)?
290 # If an INDENT appears, setting check_equal is wrong, and will
291 # be undone when we see the INDENT.
292 check_equal = 1
293
294 elif type == INDENT:
295 check_equal = 0
296 thisguy = Whitespace(token)
297 if not indents[-1].less(thisguy):
298 witness = indents[-1].not_less_witness(thisguy)
299 msg = "indent not greater e.g. " + format_witnesses(witness)
300 raise NannyNag(start[0], msg, line)
301 indents.append(thisguy)
302
303 elif type == DEDENT:
304 # there's nothing we need to check here! what's important is
305 # that when the run of DEDENTs ends, the indentation of the
306 # program statement (or ENDMARKER) that triggered the run is
307 # equal to what's left at the top of the indents stack
308
309 # Ouch! This assert triggers if the last line of the source
310 # is indented *and* lacks a newline -- then DEDENTs pop out
311 # of thin air.
312 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
313 check_equal = 1
314
315 del indents[-1]
316
317 elif check_equal and type not in JUNK:
318 # this is the first "real token" following a NEWLINE, so it
319 # must be the first token of the next program statement, or an
320 # ENDMARKER; the "line" argument exposes the leading whitespace
321 # for this statement; in the case of ENDMARKER, line is an empty
322 # string, so will properly match the empty string with which the
323 # "indents" stack was seeded
324 check_equal = 0
325 thisguy = Whitespace(line)
326 if not indents[-1].equal(thisguy):
327 witness = indents[-1].not_equal_witness(thisguy)
328 msg = "indent not equal e.g. " + format_witnesses(witness)
329 raise NannyNag(start[0], msg, line)
330
331
332if __name__ == '__main__':
333 main()