blob: 99a17343fb61fdaa0f13422fa0325b95aeb88a4b [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001"""Parse a Python module and describe its classes and functions.
2
3Parse enough of a Python file to recognize imports and class and
4function definitions, and to find out the superclasses of a class.
5
6The interface consists of a single function:
7 readmodule_ex(module, path=None)
8where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched. If present,
10path is prepended to the system search path sys.path. The return value
11is a dictionary. The keys of the dictionary are the names of the
12classes and functions defined in the module (including classes that are
13defined via the from XXX import YYY construct). The values are
14instances of classes Class and Function. One special key/value pair is
15present for packages: the key '__path__' has a list as its value which
16contains the package search path.
17
18Classes and Functions have a common superclass: _Object. Every instance
19has the following attributes:
20 module -- name of the module;
21 name -- name of the object;
22 file -- file in which the object is defined;
23 lineno -- line in the file where the object's definition starts;
24 parent -- parent of this object, if any;
25 children -- nested objects contained in this object.
26The 'children' attribute is a dictionary mapping names to objects.
27
28Instances of Function describe functions with the attributes from _Object.
29
30Instances of Class describe classes with the attributes from _Object,
31plus the following:
32 super -- list of super classes (Class instances if possible);
33 methods -- mapping of method names to beginning line numbers.
34If the name of a super class is not recognized, the corresponding
35entry in the list of super classes is not a class instance but a
36string giving the name of the super class. Since import statements
37are recognized and imported modules are scanned as well, this
38shouldn't happen often.
39"""
40
41import io
42import sys
43import importlib.util
44import tokenize
45from token import NAME, DEDENT, OP
46
47__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
48
49_modules = {} # Initialize cache of modules we've seen.
50
51
52class _Object:
53 "Information about Python class or function."
54 def __init__(self, module, name, file, lineno, parent):
55 self.module = module
56 self.name = name
57 self.file = file
58 self.lineno = lineno
59 self.parent = parent
60 self.children = {}
61
62 def _addchild(self, name, obj):
63 self.children[name] = obj
64
65
66class Function(_Object):
67 "Information about a Python function, including methods."
68 def __init__(self, module, name, file, lineno, parent=None):
69 _Object.__init__(self, module, name, file, lineno, parent)
70
71
72class Class(_Object):
73 "Information about a Python class."
74 def __init__(self, module, name, super, file, lineno, parent=None):
75 _Object.__init__(self, module, name, file, lineno, parent)
76 self.super = [] if super is None else super
77 self.methods = {}
78
79 def _addmethod(self, name, lineno):
80 self.methods[name] = lineno
81
82
83def _nest_function(ob, func_name, lineno):
84 "Return a Function after nesting within ob."
85 newfunc = Function(ob.module, func_name, ob.file, lineno, ob)
86 ob._addchild(func_name, newfunc)
87 if isinstance(ob, Class):
88 ob._addmethod(func_name, lineno)
89 return newfunc
90
91def _nest_class(ob, class_name, lineno, super=None):
92 "Return a Class after nesting within ob."
93 newclass = Class(ob.module, class_name, super, ob.file, lineno, ob)
94 ob._addchild(class_name, newclass)
95 return newclass
96
97def readmodule(module, path=None):
98 """Return Class objects for the top-level classes in module.
99
100 This is the original interface, before Functions were added.
101 """
102
103 res = {}
104 for key, value in _readmodule(module, path or []).items():
105 if isinstance(value, Class):
106 res[key] = value
107 return res
108
109def readmodule_ex(module, path=None):
110 """Return a dictionary with all functions and classes in module.
111
112 Search for module in PATH + sys.path.
113 If possible, include imported superclasses.
114 Do this by reading source, without importing (and executing) it.
115 """
116 return _readmodule(module, path or [])
117
118def _readmodule(module, path, inpackage=None):
119 """Do the hard work for readmodule[_ex].
120
121 If inpackage is given, it must be the dotted name of the package in
122 which we are searching for a submodule, and then PATH must be the
123 package search path; otherwise, we are searching for a top-level
124 module, and path is combined with sys.path.
125 """
126 # Compute the full module name (prepending inpackage if set).
127 if inpackage is not None:
128 fullmodule = "%s.%s" % (inpackage, module)
129 else:
130 fullmodule = module
131
132 # Check in the cache.
133 if fullmodule in _modules:
134 return _modules[fullmodule]
135
136 # Initialize the dict for this module's contents.
137 tree = {}
138
139 # Check if it is a built-in module; we don't do much for these.
140 if module in sys.builtin_module_names and inpackage is None:
141 _modules[module] = tree
142 return tree
143
144 # Check for a dotted module name.
145 i = module.rfind('.')
146 if i >= 0:
147 package = module[:i]
148 submodule = module[i+1:]
149 parent = _readmodule(package, path, inpackage)
150 if inpackage is not None:
151 package = "%s.%s" % (inpackage, package)
152 if not '__path__' in parent:
153 raise ImportError('No package named {}'.format(package))
154 return _readmodule(submodule, parent['__path__'], package)
155
156 # Search the path for the module.
157 f = None
158 if inpackage is not None:
159 search_path = path
160 else:
161 search_path = path + sys.path
162 spec = importlib.util._find_spec_from_path(fullmodule, search_path)
163 if spec is None:
164 raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule)
165 _modules[fullmodule] = tree
166 # Is module a package?
167 if spec.submodule_search_locations is not None:
168 tree['__path__'] = spec.submodule_search_locations
169 try:
170 source = spec.loader.get_source(fullmodule)
171 except (AttributeError, ImportError):
172 # If module is not Python source, we cannot do anything.
173 return tree
174 else:
175 if source is None:
176 return tree
177
178 fname = spec.loader.get_filename(fullmodule)
179 return _create_tree(fullmodule, path, fname, source, tree, inpackage)
180
181
182def _create_tree(fullmodule, path, fname, source, tree, inpackage):
183 """Return the tree for a particular module.
184
185 fullmodule (full module name), inpackage+module, becomes o.module.
186 path is passed to recursive calls of _readmodule.
187 fname becomes o.file.
188 source is tokenized. Imports cause recursive calls to _readmodule.
189 tree is {} or {'__path__': <submodule search locations>}.
190 inpackage, None or string, is passed to recursive calls of _readmodule.
191
192 The effect of recursive calls is mutation of global _modules.
193 """
194 f = io.StringIO(source)
195
196 stack = [] # Initialize stack of (class, indent) pairs.
197
198 g = tokenize.generate_tokens(f.readline)
199 try:
200 for tokentype, token, start, _end, _line in g:
201 if tokentype == DEDENT:
202 lineno, thisindent = start
203 # Close previous nested classes and defs.
204 while stack and stack[-1][1] >= thisindent:
205 del stack[-1]
206 elif token == 'def':
207 lineno, thisindent = start
208 # Close previous nested classes and defs.
209 while stack and stack[-1][1] >= thisindent:
210 del stack[-1]
211 tokentype, func_name, start = next(g)[0:3]
212 if tokentype != NAME:
213 continue # Skip def with syntax error.
214 cur_func = None
215 if stack:
216 cur_obj = stack[-1][0]
217 cur_func = _nest_function(cur_obj, func_name, lineno)
218 else:
219 # It is just a function.
220 cur_func = Function(fullmodule, func_name, fname, lineno)
221 tree[func_name] = cur_func
222 stack.append((cur_func, thisindent))
223 elif token == 'class':
224 lineno, thisindent = start
225 # Close previous nested classes and defs.
226 while stack and stack[-1][1] >= thisindent:
227 del stack[-1]
228 tokentype, class_name, start = next(g)[0:3]
229 if tokentype != NAME:
230 continue # Skip class with syntax error.
231 # Parse what follows the class name.
232 tokentype, token, start = next(g)[0:3]
233 inherit = None
234 if token == '(':
235 names = [] # Initialize list of superclasses.
236 level = 1
237 super = [] # Tokens making up current superclass.
238 while True:
239 tokentype, token, start = next(g)[0:3]
240 if token in (')', ',') and level == 1:
241 n = "".join(super)
242 if n in tree:
243 # We know this super class.
244 n = tree[n]
245 else:
246 c = n.split('.')
247 if len(c) > 1:
248 # Super class form is module.class:
249 # look in module for class.
250 m = c[-2]
251 c = c[-1]
252 if m in _modules:
253 d = _modules[m]
254 if c in d:
255 n = d[c]
256 names.append(n)
257 super = []
258 if token == '(':
259 level += 1
260 elif token == ')':
261 level -= 1
262 if level == 0:
263 break
264 elif token == ',' and level == 1:
265 pass
266 # Only use NAME and OP (== dot) tokens for type name.
267 elif tokentype in (NAME, OP) and level == 1:
268 super.append(token)
269 # Expressions in the base list are not supported.
270 inherit = names
271 if stack:
272 cur_obj = stack[-1][0]
273 cur_class = _nest_class(
274 cur_obj, class_name, lineno, inherit)
275 else:
276 cur_class = Class(fullmodule, class_name, inherit,
277 fname, lineno)
278 tree[class_name] = cur_class
279 stack.append((cur_class, thisindent))
280 elif token == 'import' and start[1] == 0:
281 modules = _getnamelist(g)
282 for mod, _mod2 in modules:
283 try:
284 # Recursively read the imported module.
285 if inpackage is None:
286 _readmodule(mod, path)
287 else:
288 try:
289 _readmodule(mod, path, inpackage)
290 except ImportError:
291 _readmodule(mod, [])
292 except:
293 # If we can't find or parse the imported module,
294 # too bad -- don't die here.
295 pass
296 elif token == 'from' and start[1] == 0:
297 mod, token = _getname(g)
298 if not mod or token != "import":
299 continue
300 names = _getnamelist(g)
301 try:
302 # Recursively read the imported module.
303 d = _readmodule(mod, path, inpackage)
304 except:
305 # If we can't find or parse the imported module,
306 # too bad -- don't die here.
307 continue
308 # Add any classes that were defined in the imported module
309 # to our name space if they were mentioned in the list.
310 for n, n2 in names:
311 if n in d:
312 tree[n2 or n] = d[n]
313 elif n == '*':
314 # Don't add names that start with _.
315 for n in d:
316 if n[0] != '_':
317 tree[n] = d[n]
318 except StopIteration:
319 pass
320
321 f.close()
322 return tree
323
324
325def _getnamelist(g):
326 """Return list of (dotted-name, as-name or None) tuples for token source g.
327
328 An as-name is the name that follows 'as' in an as clause.
329 """
330 names = []
331 while True:
332 name, token = _getname(g)
333 if not name:
334 break
335 if token == 'as':
336 name2, token = _getname(g)
337 else:
338 name2 = None
339 names.append((name, name2))
340 while token != "," and "\n" not in token:
341 token = next(g)[1]
342 if token != ",":
343 break
344 return names
345
346
347def _getname(g):
348 "Return (dotted-name or None, next-token) tuple for token source g."
349 parts = []
350 tokentype, token = next(g)[0:2]
351 if tokentype != NAME and token != '*':
352 return (None, token)
353 parts.append(token)
354 while True:
355 tokentype, token = next(g)[0:2]
356 if token != '.':
357 break
358 tokentype, token = next(g)[0:2]
359 if tokentype != NAME:
360 break
361 parts.append(token)
362 return (".".join(parts), token)
363
364
365def _main():
366 "Print module output (default this file) for quick visual check."
367 import os
368 try:
369 mod = sys.argv[1]
370 except:
371 mod = __file__
372 if os.path.exists(mod):
373 path = [os.path.dirname(mod)]
374 mod = os.path.basename(mod)
375 if mod.lower().endswith(".py"):
376 mod = mod[:-3]
377 else:
378 path = []
379 tree = readmodule_ex(mod, path)
380 lineno_key = lambda a: getattr(a, 'lineno', 0)
381 objs = sorted(tree.values(), key=lineno_key, reverse=True)
382 indent_level = 2
383 while objs:
384 obj = objs.pop()
385 if isinstance(obj, list):
386 # Value is a __path__ key.
387 continue
388 if not hasattr(obj, 'indent'):
389 obj.indent = 0
390
391 if isinstance(obj, _Object):
392 new_objs = sorted(obj.children.values(),
393 key=lineno_key, reverse=True)
394 for ob in new_objs:
395 ob.indent = obj.indent + indent_level
396 objs.extend(new_objs)
397 if isinstance(obj, Class):
398 print("{}class {} {} {}"
399 .format(' ' * obj.indent, obj.name, obj.super, obj.lineno))
400 elif isinstance(obj, Function):
401 print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno))
402
403if __name__ == "__main__":
404 _main()