blob: cb455f40c4d7894ef73ab25bed6659e917565394 [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001"""Find modules used by a script, using introspection."""
2
3import dis
4import importlib._bootstrap_external
5import importlib.machinery
6import marshal
7import os
8import io
9import sys
10
11
12LOAD_CONST = dis.opmap['LOAD_CONST']
13IMPORT_NAME = dis.opmap['IMPORT_NAME']
14STORE_NAME = dis.opmap['STORE_NAME']
15STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
16STORE_OPS = STORE_NAME, STORE_GLOBAL
17EXTENDED_ARG = dis.EXTENDED_ARG
18
19# Old imp constants:
20
21_SEARCH_ERROR = 0
22_PY_SOURCE = 1
23_PY_COMPILED = 2
24_C_EXTENSION = 3
25_PKG_DIRECTORY = 5
26_C_BUILTIN = 6
27_PY_FROZEN = 7
28
29# Modulefinder does a good job at simulating Python's, but it can not
30# handle __path__ modifications packages make at runtime. Therefore there
31# is a mechanism whereby you can register extra paths in this map for a
32# package, and it will be honored.
33
34# Note this is a mapping is lists of paths.
35packagePathMap = {}
36
37# A Public interface
38def AddPackagePath(packagename, path):
39 packagePathMap.setdefault(packagename, []).append(path)
40
41replacePackageMap = {}
42
43# This ReplacePackage mechanism allows modulefinder to work around
44# situations in which a package injects itself under the name
45# of another package into sys.modules at runtime by calling
46# ReplacePackage("real_package_name", "faked_package_name")
47# before running ModuleFinder.
48
49def ReplacePackage(oldname, newname):
50 replacePackageMap[oldname] = newname
51
52
53def _find_module(name, path=None):
54 """An importlib reimplementation of imp.find_module (for our purposes)."""
55
56 # It's necessary to clear the caches for our Finder first, in case any
57 # modules are being added/deleted/modified at runtime. In particular,
58 # test_modulefinder.py changes file tree contents in a cache-breaking way:
59
60 importlib.machinery.PathFinder.invalidate_caches()
61
62 spec = importlib.machinery.PathFinder.find_spec(name, path)
63
64 if spec is None:
65 raise ImportError("No module named {name!r}".format(name=name), name=name)
66
67 # Some special cases:
68
69 if spec.loader is importlib.machinery.BuiltinImporter:
70 return None, None, ("", "", _C_BUILTIN)
71
72 if spec.loader is importlib.machinery.FrozenImporter:
73 return None, None, ("", "", _PY_FROZEN)
74
75 file_path = spec.origin
76
77 if spec.loader.is_package(name):
78 return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
79
80 if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
81 kind = _PY_SOURCE
82
83 elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
84 kind = _C_EXTENSION
85
86 elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
87 kind = _PY_COMPILED
88
89 else: # Should never happen.
90 return None, None, ("", "", _SEARCH_ERROR)
91
92 file = io.open_code(file_path)
93 suffix = os.path.splitext(file_path)[-1]
94
95 return file, file_path, (suffix, "rb", kind)
96
97
98class Module:
99
100 def __init__(self, name, file=None, path=None):
101 self.__name__ = name
102 self.__file__ = file
103 self.__path__ = path
104 self.__code__ = None
105 # The set of global names that are assigned to in the module.
106 # This includes those names imported through starimports of
107 # Python modules.
108 self.globalnames = {}
109 # The set of starimports this module did that could not be
110 # resolved, ie. a starimport from a non-Python module.
111 self.starimports = {}
112
113 def __repr__(self):
114 s = "Module(%r" % (self.__name__,)
115 if self.__file__ is not None:
116 s = s + ", %r" % (self.__file__,)
117 if self.__path__ is not None:
118 s = s + ", %r" % (self.__path__,)
119 s = s + ")"
120 return s
121
122class ModuleFinder:
123
124 def __init__(self, path=None, debug=0, excludes=None, replace_paths=None):
125 if path is None:
126 path = sys.path
127 self.path = path
128 self.modules = {}
129 self.badmodules = {}
130 self.debug = debug
131 self.indent = 0
132 self.excludes = excludes if excludes is not None else []
133 self.replace_paths = replace_paths if replace_paths is not None else []
134 self.processed_paths = [] # Used in debugging only
135
136 def msg(self, level, str, *args):
137 if level <= self.debug:
138 for i in range(self.indent):
139 print(" ", end=' ')
140 print(str, end=' ')
141 for arg in args:
142 print(repr(arg), end=' ')
143 print()
144
145 def msgin(self, *args):
146 level = args[0]
147 if level <= self.debug:
148 self.indent = self.indent + 1
149 self.msg(*args)
150
151 def msgout(self, *args):
152 level = args[0]
153 if level <= self.debug:
154 self.indent = self.indent - 1
155 self.msg(*args)
156
157 def run_script(self, pathname):
158 self.msg(2, "run_script", pathname)
159 with io.open_code(pathname) as fp:
160 stuff = ("", "rb", _PY_SOURCE)
161 self.load_module('__main__', fp, pathname, stuff)
162
163 def load_file(self, pathname):
164 dir, name = os.path.split(pathname)
165 name, ext = os.path.splitext(name)
166 with io.open_code(pathname) as fp:
167 stuff = (ext, "rb", _PY_SOURCE)
168 self.load_module(name, fp, pathname, stuff)
169
170 def import_hook(self, name, caller=None, fromlist=None, level=-1):
171 self.msg(3, "import_hook", name, caller, fromlist, level)
172 parent = self.determine_parent(caller, level=level)
173 q, tail = self.find_head_package(parent, name)
174 m = self.load_tail(q, tail)
175 if not fromlist:
176 return q
177 if m.__path__:
178 self.ensure_fromlist(m, fromlist)
179 return None
180
181 def determine_parent(self, caller, level=-1):
182 self.msgin(4, "determine_parent", caller, level)
183 if not caller or level == 0:
184 self.msgout(4, "determine_parent -> None")
185 return None
186 pname = caller.__name__
187 if level >= 1: # relative import
188 if caller.__path__:
189 level -= 1
190 if level == 0:
191 parent = self.modules[pname]
192 assert parent is caller
193 self.msgout(4, "determine_parent ->", parent)
194 return parent
195 if pname.count(".") < level:
196 raise ImportError("relative importpath too deep")
197 pname = ".".join(pname.split(".")[:-level])
198 parent = self.modules[pname]
199 self.msgout(4, "determine_parent ->", parent)
200 return parent
201 if caller.__path__:
202 parent = self.modules[pname]
203 assert caller is parent
204 self.msgout(4, "determine_parent ->", parent)
205 return parent
206 if '.' in pname:
207 i = pname.rfind('.')
208 pname = pname[:i]
209 parent = self.modules[pname]
210 assert parent.__name__ == pname
211 self.msgout(4, "determine_parent ->", parent)
212 return parent
213 self.msgout(4, "determine_parent -> None")
214 return None
215
216 def find_head_package(self, parent, name):
217 self.msgin(4, "find_head_package", parent, name)
218 if '.' in name:
219 i = name.find('.')
220 head = name[:i]
221 tail = name[i+1:]
222 else:
223 head = name
224 tail = ""
225 if parent:
226 qname = "%s.%s" % (parent.__name__, head)
227 else:
228 qname = head
229 q = self.import_module(head, qname, parent)
230 if q:
231 self.msgout(4, "find_head_package ->", (q, tail))
232 return q, tail
233 if parent:
234 qname = head
235 parent = None
236 q = self.import_module(head, qname, parent)
237 if q:
238 self.msgout(4, "find_head_package ->", (q, tail))
239 return q, tail
240 self.msgout(4, "raise ImportError: No module named", qname)
241 raise ImportError("No module named " + qname)
242
243 def load_tail(self, q, tail):
244 self.msgin(4, "load_tail", q, tail)
245 m = q
246 while tail:
247 i = tail.find('.')
248 if i < 0: i = len(tail)
249 head, tail = tail[:i], tail[i+1:]
250 mname = "%s.%s" % (m.__name__, head)
251 m = self.import_module(head, mname, m)
252 if not m:
253 self.msgout(4, "raise ImportError: No module named", mname)
254 raise ImportError("No module named " + mname)
255 self.msgout(4, "load_tail ->", m)
256 return m
257
258 def ensure_fromlist(self, m, fromlist, recursive=0):
259 self.msg(4, "ensure_fromlist", m, fromlist, recursive)
260 for sub in fromlist:
261 if sub == "*":
262 if not recursive:
263 all = self.find_all_submodules(m)
264 if all:
265 self.ensure_fromlist(m, all, 1)
266 elif not hasattr(m, sub):
267 subname = "%s.%s" % (m.__name__, sub)
268 submod = self.import_module(sub, subname, m)
269 if not submod:
270 raise ImportError("No module named " + subname)
271
272 def find_all_submodules(self, m):
273 if not m.__path__:
274 return
275 modules = {}
276 # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
277 # But we must also collect Python extension modules - although
278 # we cannot separate normal dlls from Python extensions.
279 suffixes = []
280 suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
281 suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
282 suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
283 for dir in m.__path__:
284 try:
285 names = os.listdir(dir)
286 except OSError:
287 self.msg(2, "can't list directory", dir)
288 continue
289 for name in names:
290 mod = None
291 for suff in suffixes:
292 n = len(suff)
293 if name[-n:] == suff:
294 mod = name[:-n]
295 break
296 if mod and mod != "__init__":
297 modules[mod] = mod
298 return modules.keys()
299
300 def import_module(self, partname, fqname, parent):
301 self.msgin(3, "import_module", partname, fqname, parent)
302 try:
303 m = self.modules[fqname]
304 except KeyError:
305 pass
306 else:
307 self.msgout(3, "import_module ->", m)
308 return m
309 if fqname in self.badmodules:
310 self.msgout(3, "import_module -> None")
311 return None
312 if parent and parent.__path__ is None:
313 self.msgout(3, "import_module -> None")
314 return None
315 try:
316 fp, pathname, stuff = self.find_module(partname,
317 parent and parent.__path__, parent)
318 except ImportError:
319 self.msgout(3, "import_module ->", None)
320 return None
321
322 try:
323 m = self.load_module(fqname, fp, pathname, stuff)
324 finally:
325 if fp:
326 fp.close()
327 if parent:
328 setattr(parent, partname, m)
329 self.msgout(3, "import_module ->", m)
330 return m
331
332 def load_module(self, fqname, fp, pathname, file_info):
333 suffix, mode, type = file_info
334 self.msgin(2, "load_module", fqname, fp and "fp", pathname)
335 if type == _PKG_DIRECTORY:
336 m = self.load_package(fqname, pathname)
337 self.msgout(2, "load_module ->", m)
338 return m
339 if type == _PY_SOURCE:
340 co = compile(fp.read(), pathname, 'exec')
341 elif type == _PY_COMPILED:
342 try:
343 data = fp.read()
344 importlib._bootstrap_external._classify_pyc(data, fqname, {})
345 except ImportError as exc:
346 self.msgout(2, "raise ImportError: " + str(exc), pathname)
347 raise
348 co = marshal.loads(memoryview(data)[16:])
349 else:
350 co = None
351 m = self.add_module(fqname)
352 m.__file__ = pathname
353 if co:
354 if self.replace_paths:
355 co = self.replace_paths_in_code(co)
356 m.__code__ = co
357 self.scan_code(co, m)
358 self.msgout(2, "load_module ->", m)
359 return m
360
361 def _add_badmodule(self, name, caller):
362 if name not in self.badmodules:
363 self.badmodules[name] = {}
364 if caller:
365 self.badmodules[name][caller.__name__] = 1
366 else:
367 self.badmodules[name]["-"] = 1
368
369 def _safe_import_hook(self, name, caller, fromlist, level=-1):
370 # wrapper for self.import_hook() that won't raise ImportError
371 if name in self.badmodules:
372 self._add_badmodule(name, caller)
373 return
374 try:
375 self.import_hook(name, caller, level=level)
376 except ImportError as msg:
377 self.msg(2, "ImportError:", str(msg))
378 self._add_badmodule(name, caller)
379 except SyntaxError as msg:
380 self.msg(2, "SyntaxError:", str(msg))
381 self._add_badmodule(name, caller)
382 else:
383 if fromlist:
384 for sub in fromlist:
385 fullname = name + "." + sub
386 if fullname in self.badmodules:
387 self._add_badmodule(fullname, caller)
388 continue
389 try:
390 self.import_hook(name, caller, [sub], level=level)
391 except ImportError as msg:
392 self.msg(2, "ImportError:", str(msg))
393 self._add_badmodule(fullname, caller)
394
395 def scan_opcodes(self, co):
396 # Scan the code, and yield 'interesting' opcode combinations
397 code = co.co_code
398 names = co.co_names
399 consts = co.co_consts
400 opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
401 if op != EXTENDED_ARG]
402 for i, (op, oparg) in enumerate(opargs):
403 if op in STORE_OPS:
404 yield "store", (names[oparg],)
405 continue
406 if (op == IMPORT_NAME and i >= 2
407 and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
408 level = consts[opargs[i-2][1]]
409 fromlist = consts[opargs[i-1][1]]
410 if level == 0: # absolute import
411 yield "absolute_import", (fromlist, names[oparg])
412 else: # relative import
413 yield "relative_import", (level, fromlist, names[oparg])
414 continue
415
416 def scan_code(self, co, m):
417 code = co.co_code
418 scanner = self.scan_opcodes
419 for what, args in scanner(co):
420 if what == "store":
421 name, = args
422 m.globalnames[name] = 1
423 elif what == "absolute_import":
424 fromlist, name = args
425 have_star = 0
426 if fromlist is not None:
427 if "*" in fromlist:
428 have_star = 1
429 fromlist = [f for f in fromlist if f != "*"]
430 self._safe_import_hook(name, m, fromlist, level=0)
431 if have_star:
432 # We've encountered an "import *". If it is a Python module,
433 # the code has already been parsed and we can suck out the
434 # global names.
435 mm = None
436 if m.__path__:
437 # At this point we don't know whether 'name' is a
438 # submodule of 'm' or a global module. Let's just try
439 # the full name first.
440 mm = self.modules.get(m.__name__ + "." + name)
441 if mm is None:
442 mm = self.modules.get(name)
443 if mm is not None:
444 m.globalnames.update(mm.globalnames)
445 m.starimports.update(mm.starimports)
446 if mm.__code__ is None:
447 m.starimports[name] = 1
448 else:
449 m.starimports[name] = 1
450 elif what == "relative_import":
451 level, fromlist, name = args
452 if name:
453 self._safe_import_hook(name, m, fromlist, level=level)
454 else:
455 parent = self.determine_parent(m, level=level)
456 self._safe_import_hook(parent.__name__, None, fromlist, level=0)
457 else:
458 # We don't expect anything else from the generator.
459 raise RuntimeError(what)
460
461 for c in co.co_consts:
462 if isinstance(c, type(co)):
463 self.scan_code(c, m)
464
465 def load_package(self, fqname, pathname):
466 self.msgin(2, "load_package", fqname, pathname)
467 newname = replacePackageMap.get(fqname)
468 if newname:
469 fqname = newname
470 m = self.add_module(fqname)
471 m.__file__ = pathname
472 m.__path__ = [pathname]
473
474 # As per comment at top of file, simulate runtime __path__ additions.
475 m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
476
477 fp, buf, stuff = self.find_module("__init__", m.__path__)
478 try:
479 self.load_module(fqname, fp, buf, stuff)
480 self.msgout(2, "load_package ->", m)
481 return m
482 finally:
483 if fp:
484 fp.close()
485
486 def add_module(self, fqname):
487 if fqname in self.modules:
488 return self.modules[fqname]
489 self.modules[fqname] = m = Module(fqname)
490 return m
491
492 def find_module(self, name, path, parent=None):
493 if parent is not None:
494 # assert path is not None
495 fullname = parent.__name__+'.'+name
496 else:
497 fullname = name
498 if fullname in self.excludes:
499 self.msgout(3, "find_module -> Excluded", fullname)
500 raise ImportError(name)
501
502 if path is None:
503 if name in sys.builtin_module_names:
504 return (None, None, ("", "", _C_BUILTIN))
505
506 path = self.path
507
508 return _find_module(name, path)
509
510 def report(self):
511 """Print a report to stdout, listing the found modules with their
512 paths, as well as modules that are missing, or seem to be missing.
513 """
514 print()
515 print(" %-25s %s" % ("Name", "File"))
516 print(" %-25s %s" % ("----", "----"))
517 # Print modules found
518 keys = sorted(self.modules.keys())
519 for key in keys:
520 m = self.modules[key]
521 if m.__path__:
522 print("P", end=' ')
523 else:
524 print("m", end=' ')
525 print("%-25s" % key, m.__file__ or "")
526
527 # Print missing modules
528 missing, maybe = self.any_missing_maybe()
529 if missing:
530 print()
531 print("Missing modules:")
532 for name in missing:
533 mods = sorted(self.badmodules[name].keys())
534 print("?", name, "imported from", ', '.join(mods))
535 # Print modules that may be missing, but then again, maybe not...
536 if maybe:
537 print()
538 print("Submodules that appear to be missing, but could also be", end=' ')
539 print("global names in the parent package:")
540 for name in maybe:
541 mods = sorted(self.badmodules[name].keys())
542 print("?", name, "imported from", ', '.join(mods))
543
544 def any_missing(self):
545 """Return a list of modules that appear to be missing. Use
546 any_missing_maybe() if you want to know which modules are
547 certain to be missing, and which *may* be missing.
548 """
549 missing, maybe = self.any_missing_maybe()
550 return missing + maybe
551
552 def any_missing_maybe(self):
553 """Return two lists, one with modules that are certainly missing
554 and one with modules that *may* be missing. The latter names could
555 either be submodules *or* just global names in the package.
556
557 The reason it can't always be determined is that it's impossible to
558 tell which names are imported when "from module import *" is done
559 with an extension module, short of actually importing it.
560 """
561 missing = []
562 maybe = []
563 for name in self.badmodules:
564 if name in self.excludes:
565 continue
566 i = name.rfind(".")
567 if i < 0:
568 missing.append(name)
569 continue
570 subname = name[i+1:]
571 pkgname = name[:i]
572 pkg = self.modules.get(pkgname)
573 if pkg is not None:
574 if pkgname in self.badmodules[name]:
575 # The package tried to import this module itself and
576 # failed. It's definitely missing.
577 missing.append(name)
578 elif subname in pkg.globalnames:
579 # It's a global in the package: definitely not missing.
580 pass
581 elif pkg.starimports:
582 # It could be missing, but the package did an "import *"
583 # from a non-Python module, so we simply can't be sure.
584 maybe.append(name)
585 else:
586 # It's not a global in the package, the package didn't
587 # do funny star imports, it's very likely to be missing.
588 # The symbol could be inserted into the package from the
589 # outside, but since that's not good style we simply list
590 # it missing.
591 missing.append(name)
592 else:
593 missing.append(name)
594 missing.sort()
595 maybe.sort()
596 return missing, maybe
597
598 def replace_paths_in_code(self, co):
599 new_filename = original_filename = os.path.normpath(co.co_filename)
600 for f, r in self.replace_paths:
601 if original_filename.startswith(f):
602 new_filename = r + original_filename[len(f):]
603 break
604
605 if self.debug and original_filename not in self.processed_paths:
606 if new_filename != original_filename:
607 self.msgout(2, "co_filename %r changed to %r" \
608 % (original_filename,new_filename,))
609 else:
610 self.msgout(2, "co_filename %r remains unchanged" \
611 % (original_filename,))
612 self.processed_paths.append(original_filename)
613
614 consts = list(co.co_consts)
615 for i in range(len(consts)):
616 if isinstance(consts[i], type(co)):
617 consts[i] = self.replace_paths_in_code(consts[i])
618
619 return co.replace(co_consts=tuple(consts), co_filename=new_filename)
620
621
622def test():
623 # Parse command line
624 import getopt
625 try:
626 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
627 except getopt.error as msg:
628 print(msg)
629 return
630
631 # Process options
632 debug = 1
633 domods = 0
634 addpath = []
635 exclude = []
636 for o, a in opts:
637 if o == '-d':
638 debug = debug + 1
639 if o == '-m':
640 domods = 1
641 if o == '-p':
642 addpath = addpath + a.split(os.pathsep)
643 if o == '-q':
644 debug = 0
645 if o == '-x':
646 exclude.append(a)
647
648 # Provide default arguments
649 if not args:
650 script = "hello.py"
651 else:
652 script = args[0]
653
654 # Set the path based on sys.path and the script directory
655 path = sys.path[:]
656 path[0] = os.path.dirname(script)
657 path = addpath + path
658 if debug > 1:
659 print("path:")
660 for item in path:
661 print(" ", repr(item))
662
663 # Create the module finder and turn its crank
664 mf = ModuleFinder(path, debug, exclude)
665 for arg in args[1:]:
666 if arg == '-m':
667 domods = 1
668 continue
669 if domods:
670 if arg[-2:] == '.*':
671 mf.import_hook(arg[:-2], None, ["*"])
672 else:
673 mf.import_hook(arg)
674 else:
675 mf.load_file(arg)
676 mf.run_script(script)
677 mf.report()
678 return mf # for -i debugging
679
680
681if __name__ == '__main__':
682 try:
683 mf = test()
684 except KeyboardInterrupt:
685 print("\n[interrupted]")