Blame - linux-x64/clang/python3/lib/python3.9/profile.py - hafnium/prebuilts

blob: 5cb017ed8300993ad9767db967e105926e2f0159 [file] [log] [blame]

Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	1	#! /usr/bin/env python3
				2	#
				3	# Class for profiling python code. rev 1.0 6/2/94
				4	#
				5	# Written by James Roskind
				6	# Based on prior profile module by Sjoerd Mullender...
				7	# which was hacked somewhat by: Guido van Rossum
				8
				9	"""Class for profiling Python code."""
				10
				11	# Copyright Disney Enterprises, Inc. All Rights Reserved.
				12	# Licensed to PSF under a Contributor Agreement
				13	#
				14	# Licensed under the Apache License, Version 2.0 (the "License");
				15	# you may not use this file except in compliance with the License.
				16	# You may obtain a copy of the License at
				17	#
				18	# http://www.apache.org/licenses/LICENSE-2.0
				19	#
				20	# Unless required by applicable law or agreed to in writing, software
				21	# distributed under the License is distributed on an "AS IS" BASIS,
				22	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
				23	# either express or implied. See the License for the specific language
				24	# governing permissions and limitations under the License.
				25
				26
				27	import sys
				28	import time
				29	import marshal
				30
				31	__all__ = ["run", "runctx", "Profile"]
				32
				33	# Sample timer for use with
				34	#i_count = 0
				35	#def integer_timer():
				36	# global i_count
				37	# i_count = i_count + 1
				38	# return i_count
				39	#itimes = integer_timer # replace with C coded timer returning integers
				40
				41	class _Utils:
				42	"""Support class for utility functions which are shared by
				43	profile.py and cProfile.py modules.
				44	Not supposed to be used directly.
				45	"""
				46
				47	def __init__(self, profiler):
				48	self.profiler = profiler
				49
				50	def run(self, statement, filename, sort):
				51	prof = self.profiler()
				52	try:
				53	prof.run(statement)
				54	except SystemExit:
				55	pass
				56	finally:
				57	self._show(prof, filename, sort)
				58
				59	def runctx(self, statement, globals, locals, filename, sort):
				60	prof = self.profiler()
				61	try:
				62	prof.runctx(statement, globals, locals)
				63	except SystemExit:
				64	pass
				65	finally:
				66	self._show(prof, filename, sort)
				67
				68	def _show(self, prof, filename, sort):
				69	if filename is not None:
				70	prof.dump_stats(filename)
				71	else:
				72	prof.print_stats(sort)
				73
				74
				75	#**************************************************************************
				76	# The following are the static member functions for the profiler class
				77	# Note that an instance of Profile() is not needed to call them.
				78	#**************************************************************************
				79
				80	def run(statement, filename=None, sort=-1):
				81	"""Run statement under profiler optionally saving results in filename
				82
				83	This function takes a single argument that can be passed to the
				84	"exec" statement, and an optional file name. In all cases this
				85	routine attempts to "exec" its first argument and gather profiling
				86	statistics from the execution. If no file name is present, then this
				87	function automatically prints a simple profiling report, sorted by the
				88	standard name string (file/line/function-name) that is presented in
				89	each line.
				90	"""
				91	return _Utils(Profile).run(statement, filename, sort)
				92
				93	def runctx(statement, globals, locals, filename=None, sort=-1):
				94	"""Run statement under profiler, supplying your own globals and locals,
				95	optionally saving results in filename.
				96
				97	statement and filename have the same semantics as profile.run
				98	"""
				99	return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
				100
				101
				102	class Profile:
				103	"""Profiler class.
				104
				105	self.cur is always a tuple. Each such tuple corresponds to a stack
				106	frame that is currently active (self.cur[-2]). The following are the
				107	definitions of its members. We use this external "parallel stack" to
				108	avoid contaminating the program that we are profiling. (old profiler
				109	used to write into the frames local dictionary!!) Derived classes
				110	can change the definition of some entries, as long as they leave
				111	[-2:] intact (frame and previous tuple). In case an internal error is
				112	detected, the -3 element is used as the function name.
				113
				114	[ 0] = Time that needs to be charged to the parent frame's function.
				115	It is used so that a function call will not have to access the
				116	timing data for the parent frame.
				117	[ 1] = Total time spent in this frame's function, excluding time in
				118	subfunctions (this latter is tallied in cur[2]).
				119	[ 2] = Total time spent in subfunctions, excluding time executing the
				120	frame's function (this latter is tallied in cur[1]).
				121	[-3] = Name of the function that corresponds to this frame.
				122	[-2] = Actual frame that we correspond to (used to sync exception handling).
				123	[-1] = Our parent 6-tuple (corresponds to frame.f_back).
				124
				125	Timing data for each function is stored as a 5-tuple in the dictionary
				126	self.timings[]. The index is always the name stored in self.cur[-3].
				127	The following are the definitions of the members:
				128
				129	[0] = The number of times this function was called, not counting direct
				130	or indirect recursion,
				131	[1] = Number of times this function appears on the stack, minus one
				132	[2] = Total time spent internal to this function
				133	[3] = Cumulative time that this function was present on the stack. In
				134	non-recursive functions, this is the total execution time from start
				135	to finish of each invocation of a function, including time spent in
				136	all subfunctions.
				137	[4] = A dictionary indicating for each function name, the number of times
				138	it was called by us.
				139	"""
				140
				141	bias = 0 # calibration constant
				142
				143	def __init__(self, timer=None, bias=None):
				144	self.timings = {}
				145	self.cur = None
				146	self.cmd = ""
				147	self.c_func_name = ""
				148
				149	if bias is None:
				150	bias = self.bias
				151	self.bias = bias # Materialize in local dict for lookup speed.
				152
				153	if not timer:
				154	self.timer = self.get_time = time.process_time
				155	self.dispatcher = self.trace_dispatch_i
				156	else:
				157	self.timer = timer
				158	t = self.timer() # test out timer function
				159	try:
				160	length = len(t)
				161	except TypeError:
				162	self.get_time = timer
				163	self.dispatcher = self.trace_dispatch_i
				164	else:
				165	if length == 2:
				166	self.dispatcher = self.trace_dispatch
				167	else:
				168	self.dispatcher = self.trace_dispatch_l
				169	# This get_time() implementation needs to be defined
				170	# here to capture the passed-in timer in the parameter
				171	# list (for performance). Note that we can't assume
				172	# the timer() result contains two values in all
				173	# cases.
				174	def get_time_timer(timer=timer, sum=sum):
				175	return sum(timer())
				176	self.get_time = get_time_timer
				177	self.t = self.get_time()
				178	self.simulate_call('profiler')
				179
				180	# Heavily optimized dispatch routine for time.process_time() timer
				181
				182	def trace_dispatch(self, frame, event, arg):
				183	timer = self.timer
				184	t = timer()
				185	t = t[0] + t[1] - self.t - self.bias
				186
				187	if event == "c_call":
				188	self.c_func_name = arg.__name__
				189
				190	if self.dispatch[event](self, frame,t):
				191	t = timer()
				192	self.t = t[0] + t[1]
				193	else:
				194	r = timer()
				195	self.t = r[0] + r[1] - t # put back unrecorded delta
				196
				197	# Dispatch routine for best timer program (return = scalar, fastest if
				198	# an integer but float works too -- and time.process_time() relies on that).
				199
				200	def trace_dispatch_i(self, frame, event, arg):
				201	timer = self.timer
				202	t = timer() - self.t - self.bias
				203
				204	if event == "c_call":
				205	self.c_func_name = arg.__name__
				206
				207	if self.dispatch[event](self, frame, t):
				208	self.t = timer()
				209	else:
				210	self.t = timer() - t # put back unrecorded delta
				211
				212	# Dispatch routine for macintosh (timer returns time in ticks of
				213	# 1/60th second)
				214
				215	def trace_dispatch_mac(self, frame, event, arg):
				216	timer = self.timer
				217	t = timer()/60.0 - self.t - self.bias
				218
				219	if event == "c_call":
				220	self.c_func_name = arg.__name__
				221
				222	if self.dispatch[event](self, frame, t):
				223	self.t = timer()/60.0
				224	else:
				225	self.t = timer()/60.0 - t # put back unrecorded delta
				226
				227	# SLOW generic dispatch routine for timer returning lists of numbers
				228
				229	def trace_dispatch_l(self, frame, event, arg):
				230	get_time = self.get_time
				231	t = get_time() - self.t - self.bias
				232
				233	if event == "c_call":
				234	self.c_func_name = arg.__name__
				235
				236	if self.dispatch[event](self, frame, t):
				237	self.t = get_time()
				238	else:
				239	self.t = get_time() - t # put back unrecorded delta
				240
				241	# In the event handlers, the first 3 elements of self.cur are unpacked
				242	# into vrbls w/ 3-letter names. The last two characters are meant to be
				243	# mnemonic:
				244	# _pt self.cur[0] "parent time" time to be charged to parent frame
				245	# _it self.cur[1] "internal time" time spent directly in the function
				246	# _et self.cur[2] "external time" time spent in subfunctions
				247
				248	def trace_dispatch_exception(self, frame, t):
				249	rpt, rit, ret, rfn, rframe, rcur = self.cur
				250	if (rframe is not frame) and rcur:
				251	return self.trace_dispatch_return(rframe, t)
				252	self.cur = rpt, rit+t, ret, rfn, rframe, rcur
				253	return 1
				254
				255
				256	def trace_dispatch_call(self, frame, t):
				257	if self.cur and frame.f_back is not self.cur[-2]:
				258	rpt, rit, ret, rfn, rframe, rcur = self.cur
				259	if not isinstance(rframe, Profile.fake_frame):
				260	assert rframe.f_back is frame.f_back, ("Bad call", rfn,
				261	rframe, rframe.f_back,
				262	frame, frame.f_back)
				263	self.trace_dispatch_return(rframe, 0)
				264	assert (self.cur is None or \
				265	frame.f_back is self.cur[-2]), ("Bad call",
				266	self.cur[-3])
				267	fcode = frame.f_code
				268	fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
				269	self.cur = (t, 0, 0, fn, frame, self.cur)
				270	timings = self.timings
				271	if fn in timings:
				272	cc, ns, tt, ct, callers = timings[fn]
				273	timings[fn] = cc, ns + 1, tt, ct, callers
				274	else:
				275	timings[fn] = 0, 0, 0, 0, {}
				276	return 1
				277
				278	def trace_dispatch_c_call (self, frame, t):
				279	fn = ("", 0, self.c_func_name)
				280	self.cur = (t, 0, 0, fn, frame, self.cur)
				281	timings = self.timings
				282	if fn in timings:
				283	cc, ns, tt, ct, callers = timings[fn]
				284	timings[fn] = cc, ns+1, tt, ct, callers
				285	else:
				286	timings[fn] = 0, 0, 0, 0, {}
				287	return 1
				288
				289	def trace_dispatch_return(self, frame, t):
				290	if frame is not self.cur[-2]:
				291	assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
				292	self.trace_dispatch_return(self.cur[-2], 0)
				293
				294	# Prefix "r" means part of the Returning or exiting frame.
				295	# Prefix "p" means part of the Previous or Parent or older frame.
				296
				297	rpt, rit, ret, rfn, frame, rcur = self.cur
				298	rit = rit + t
				299	frame_total = rit + ret
				300
				301	ppt, pit, pet, pfn, pframe, pcur = rcur
				302	self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
				303
				304	timings = self.timings
				305	cc, ns, tt, ct, callers = timings[rfn]
				306	if not ns:
				307	# This is the only occurrence of the function on the stack.
				308	# Else this is a (directly or indirectly) recursive call, and
				309	# its cumulative time will get updated when the topmost call to
				310	# it returns.
				311	ct = ct + frame_total
				312	cc = cc + 1
				313
				314	if pfn in callers:
				315	callers[pfn] = callers[pfn] + 1 # hack: gather more
				316	# stats such as the amount of time added to ct courtesy
				317	# of this specific call, and the contribution to cc
				318	# courtesy of this call.
				319	else:
				320	callers[pfn] = 1
				321
				322	timings[rfn] = cc, ns - 1, tt + rit, ct, callers
				323
				324	return 1
				325
				326
				327	dispatch = {
				328	"call": trace_dispatch_call,
				329	"exception": trace_dispatch_exception,
				330	"return": trace_dispatch_return,
				331	"c_call": trace_dispatch_c_call,
				332	"c_exception": trace_dispatch_return, # the C function returned
				333	"c_return": trace_dispatch_return,
				334	}
				335
				336
				337	# The next few functions play with self.cmd. By carefully preloading
				338	# our parallel stack, we can force the profiled result to include
				339	# an arbitrary string as the name of the calling function.
				340	# We use self.cmd as that string, and the resulting stats look
				341	# very nice :-).
				342
				343	def set_cmd(self, cmd):
				344	if self.cur[-1]: return # already set
				345	self.cmd = cmd
				346	self.simulate_call(cmd)
				347
				348	class fake_code:
				349	def __init__(self, filename, line, name):
				350	self.co_filename = filename
				351	self.co_line = line
				352	self.co_name = name
				353	self.co_firstlineno = 0
				354
				355	def __repr__(self):
				356	return repr((self.co_filename, self.co_line, self.co_name))
				357
				358	class fake_frame:
				359	def __init__(self, code, prior):
				360	self.f_code = code
				361	self.f_back = prior
				362
				363	def simulate_call(self, name):
				364	code = self.fake_code('profile', 0, name)
				365	if self.cur:
				366	pframe = self.cur[-2]
				367	else:
				368	pframe = None
				369	frame = self.fake_frame(code, pframe)
				370	self.dispatch['call'](self, frame, 0)
				371
				372	# collect stats from pending stack, including getting final
				373	# timings for self.cmd frame.
				374
				375	def simulate_cmd_complete(self):
				376	get_time = self.get_time
				377	t = get_time() - self.t
				378	while self.cur[-1]:
				379	# We can cause assertion errors here if
				380	# dispatch_trace_return checks for a frame match!
				381	self.dispatch['return'](self, self.cur[-2], t)
				382	t = 0
				383	self.t = get_time() - t
				384
				385
				386	def print_stats(self, sort=-1):
				387	import pstats
				388	pstats.Stats(self).strip_dirs().sort_stats(sort). \
				389	print_stats()
				390
				391	def dump_stats(self, file):
				392	with open(file, 'wb') as f:
				393	self.create_stats()
				394	marshal.dump(self.stats, f)
				395
				396	def create_stats(self):
				397	self.simulate_cmd_complete()
				398	self.snapshot_stats()
				399
				400	def snapshot_stats(self):
				401	self.stats = {}
				402	for func, (cc, ns, tt, ct, callers) in self.timings.items():
				403	callers = callers.copy()
				404	nc = 0
				405	for callcnt in callers.values():
				406	nc += callcnt
				407	self.stats[func] = cc, nc, tt, ct, callers
				408
				409
				410	# The following two methods can be called by clients to use
				411	# a profiler to profile a statement, given as a string.
				412
				413	def run(self, cmd):
				414	import __main__
				415	dict = __main__.__dict__
				416	return self.runctx(cmd, dict, dict)
				417
				418	def runctx(self, cmd, globals, locals):
				419	self.set_cmd(cmd)
				420	sys.setprofile(self.dispatcher)
				421	try:
				422	exec(cmd, globals, locals)
				423	finally:
				424	sys.setprofile(None)
				425	return self
				426
				427	# This method is more useful to profile a single function call.
				428	def runcall(self, func, /, args, *kw):
				429	self.set_cmd(repr(func))
				430	sys.setprofile(self.dispatcher)
				431	try:
				432	return func(args, *kw)
				433	finally:
				434	sys.setprofile(None)
				435
				436
				437	#******************************************************************
				438	# The following calculates the overhead for using a profiler. The
				439	# problem is that it takes a fair amount of time for the profiler
				440	# to stop the stopwatch (from the time it receives an event).
				441	# Similarly, there is a delay from the time that the profiler
				442	# re-starts the stopwatch before the user's code really gets to
				443	# continue. The following code tries to measure the difference on
				444	# a per-event basis.
				445	#
				446	# Note that this difference is only significant if there are a lot of
				447	# events, and relatively little user code per event. For example,
				448	# code with small functions will typically benefit from having the
				449	# profiler calibrated for the current platform. This could be
				450	# done on the fly during init() time, but it is not worth the
				451	# effort. Also note that if too large a value specified, then
				452	# execution time on some functions will actually appear as a
				453	# negative number. It is normal for some functions (with very
				454	# low call counts) to have such negative stats, even if the
				455	# calibration figure is "correct."
				456	#
				457	# One alternative to profile-time calibration adjustments (i.e.,
				458	# adding in the magic little delta during each event) is to track
				459	# more carefully the number of events (and cumulatively, the number
				460	# of events during sub functions) that are seen. If this were
				461	# done, then the arithmetic could be done after the fact (i.e., at
				462	# display time). Currently, we track only call/return events.
				463	# These values can be deduced by examining the callees and callers
				464	# vectors for each functions. Hence we can almost correct the
				465	# internal time figure at print time (note that we currently don't
				466	# track exception event processing counts). Unfortunately, there
				467	# is currently no similar information for cumulative sub-function
				468	# time. It would not be hard to "get all this info" at profiler
				469	# time. Specifically, we would have to extend the tuples to keep
				470	# counts of this in each frame, and then extend the defs of timing
				471	# tuples to include the significant two figures. I'm a bit fearful
				472	# that this additional feature will slow the heavily optimized
				473	# event/time ratio (i.e., the profiler would run slower, fur a very
				474	# low "value added" feature.)
				475	#**************************************************************
				476
				477	def calibrate(self, m, verbose=0):
				478	if self.__class__ is not Profile:
				479	raise TypeError("Subclasses must override .calibrate().")
				480
				481	saved_bias = self.bias
				482	self.bias = 0
				483	try:
				484	return self._calibrate_inner(m, verbose)
				485	finally:
				486	self.bias = saved_bias
				487
				488	def _calibrate_inner(self, m, verbose):
				489	get_time = self.get_time
				490
				491	# Set up a test case to be run with and without profiling. Include
				492	# lots of calls, because we're trying to quantify stopwatch overhead.
				493	# Do not raise any exceptions, though, because we want to know
				494	# exactly how many profile events are generated (one call event, +
				495	# one return event, per Python-level call).
				496
				497	def f1(n):
				498	for i in range(n):
				499	x = 1
				500
				501	def f(m, f1=f1):
				502	for i in range(m):
				503	f1(100)
				504
				505	f(m) # warm up the cache
				506
				507	# elapsed_noprofile <- time f(m) takes without profiling.
				508	t0 = get_time()
				509	f(m)
				510	t1 = get_time()
				511	elapsed_noprofile = t1 - t0
				512	if verbose:
				513	print("elapsed time without profiling =", elapsed_noprofile)
				514
				515	# elapsed_profile <- time f(m) takes with profiling. The difference
				516	# is profiling overhead, only some of which the profiler subtracts
				517	# out on its own.
				518	p = Profile()
				519	t0 = get_time()
				520	p.runctx('f(m)', globals(), locals())
				521	t1 = get_time()
				522	elapsed_profile = t1 - t0
				523	if verbose:
				524	print("elapsed time with profiling =", elapsed_profile)
				525
				526	# reported_time <- "CPU seconds" the profiler charged to f and f1.
				527	total_calls = 0.0
				528	reported_time = 0.0
				529	for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
				530	p.timings.items():
				531	if funcname in ("f", "f1"):
				532	total_calls += cc
				533	reported_time += tt
				534
				535	if verbose:
				536	print("'CPU seconds' profiler reported =", reported_time)
				537	print("total # calls =", total_calls)
				538	if total_calls != m + 1:
				539	raise ValueError("internal error: total calls = %d" % total_calls)
				540
				541	# reported_time - elapsed_noprofile = overhead the profiler wasn't
				542	# able to measure. Divide by twice the number of calls (since there
				543	# are two profiler events per call in this test) to get the hidden
				544	# overhead per event.
				545	mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
				546	if verbose:
				547	print("mean stopwatch overhead per profile event =", mean)
				548	return mean
				549
				550	#****************************************************************************
				551
				552	def main():
				553	import os
				554	from optparse import OptionParser
				555
				556	usage = "profile.py [-o output_file_path] [-s sort] [-m module \| scriptfile] [arg] ..."
				557	parser = OptionParser(usage=usage)
				558	parser.allow_interspersed_args = False
				559	parser.add_option('-o', '--outfile', dest="outfile",
				560	help="Save stats to <outfile>", default=None)
				561	parser.add_option('-m', dest="module", action="store_true",
				562	help="Profile a library module.", default=False)
				563	parser.add_option('-s', '--sort', dest="sort",
				564	help="Sort order when printing to stdout, based on pstats.Stats class",
				565	default=-1)
				566
				567	if not sys.argv[1:]:
				568	parser.print_usage()
				569	sys.exit(2)
				570
				571	(options, args) = parser.parse_args()
				572	sys.argv[:] = args
				573
				574	# The script that we're profiling may chdir, so capture the absolute path
				575	# to the output file at startup.
				576	if options.outfile is not None:
				577	options.outfile = os.path.abspath(options.outfile)
				578
				579	if len(args) > 0:
				580	if options.module:
				581	import runpy
				582	code = "run_module(modname, run_name='__main__')"
				583	globs = {
				584	'run_module': runpy.run_module,
				585	'modname': args[0]
				586	}
				587	else:
				588	progname = args[0]
				589	sys.path.insert(0, os.path.dirname(progname))
				590	with open(progname, 'rb') as fp:
				591	code = compile(fp.read(), progname, 'exec')
				592	globs = {
				593	'__file__': progname,
				594	'__name__': '__main__',
				595	'__package__': None,
				596	'__cached__': None,
				597	}
				598	runctx(code, globs, None, options.outfile, options.sort)
				599	else:
				600	parser.print_usage()
				601	return parser
				602
				603	# When invoked as main program, invoke the profiler on a script
				604	if __name__ == '__main__':
				605	main()