blob: 5cb017ed8300993ad9767db967e105926e2f0159 [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001#! /usr/bin/env python3
2#
3# Class for profiling python code. rev 1.0 6/2/94
4#
5# Written by James Roskind
6# Based on prior profile module by Sjoerd Mullender...
7# which was hacked somewhat by: Guido van Rossum
8
9"""Class for profiling Python code."""
10
11# Copyright Disney Enterprises, Inc. All Rights Reserved.
12# Licensed to PSF under a Contributor Agreement
13#
14# Licensed under the Apache License, Version 2.0 (the "License");
15# you may not use this file except in compliance with the License.
16# You may obtain a copy of the License at
17#
18# http://www.apache.org/licenses/LICENSE-2.0
19#
20# Unless required by applicable law or agreed to in writing, software
21# distributed under the License is distributed on an "AS IS" BASIS,
22# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23# either express or implied. See the License for the specific language
24# governing permissions and limitations under the License.
25
26
27import sys
28import time
29import marshal
30
31__all__ = ["run", "runctx", "Profile"]
32
33# Sample timer for use with
34#i_count = 0
35#def integer_timer():
36# global i_count
37# i_count = i_count + 1
38# return i_count
39#itimes = integer_timer # replace with C coded timer returning integers
40
41class _Utils:
42 """Support class for utility functions which are shared by
43 profile.py and cProfile.py modules.
44 Not supposed to be used directly.
45 """
46
47 def __init__(self, profiler):
48 self.profiler = profiler
49
50 def run(self, statement, filename, sort):
51 prof = self.profiler()
52 try:
53 prof.run(statement)
54 except SystemExit:
55 pass
56 finally:
57 self._show(prof, filename, sort)
58
59 def runctx(self, statement, globals, locals, filename, sort):
60 prof = self.profiler()
61 try:
62 prof.runctx(statement, globals, locals)
63 except SystemExit:
64 pass
65 finally:
66 self._show(prof, filename, sort)
67
68 def _show(self, prof, filename, sort):
69 if filename is not None:
70 prof.dump_stats(filename)
71 else:
72 prof.print_stats(sort)
73
74
75#**************************************************************************
76# The following are the static member functions for the profiler class
77# Note that an instance of Profile() is *not* needed to call them.
78#**************************************************************************
79
80def run(statement, filename=None, sort=-1):
81 """Run statement under profiler optionally saving results in filename
82
83 This function takes a single argument that can be passed to the
84 "exec" statement, and an optional file name. In all cases this
85 routine attempts to "exec" its first argument and gather profiling
86 statistics from the execution. If no file name is present, then this
87 function automatically prints a simple profiling report, sorted by the
88 standard name string (file/line/function-name) that is presented in
89 each line.
90 """
91 return _Utils(Profile).run(statement, filename, sort)
92
93def runctx(statement, globals, locals, filename=None, sort=-1):
94 """Run statement under profiler, supplying your own globals and locals,
95 optionally saving results in filename.
96
97 statement and filename have the same semantics as profile.run
98 """
99 return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
100
101
102class Profile:
103 """Profiler class.
104
105 self.cur is always a tuple. Each such tuple corresponds to a stack
106 frame that is currently active (self.cur[-2]). The following are the
107 definitions of its members. We use this external "parallel stack" to
108 avoid contaminating the program that we are profiling. (old profiler
109 used to write into the frames local dictionary!!) Derived classes
110 can change the definition of some entries, as long as they leave
111 [-2:] intact (frame and previous tuple). In case an internal error is
112 detected, the -3 element is used as the function name.
113
114 [ 0] = Time that needs to be charged to the parent frame's function.
115 It is used so that a function call will not have to access the
116 timing data for the parent frame.
117 [ 1] = Total time spent in this frame's function, excluding time in
118 subfunctions (this latter is tallied in cur[2]).
119 [ 2] = Total time spent in subfunctions, excluding time executing the
120 frame's function (this latter is tallied in cur[1]).
121 [-3] = Name of the function that corresponds to this frame.
122 [-2] = Actual frame that we correspond to (used to sync exception handling).
123 [-1] = Our parent 6-tuple (corresponds to frame.f_back).
124
125 Timing data for each function is stored as a 5-tuple in the dictionary
126 self.timings[]. The index is always the name stored in self.cur[-3].
127 The following are the definitions of the members:
128
129 [0] = The number of times this function was called, not counting direct
130 or indirect recursion,
131 [1] = Number of times this function appears on the stack, minus one
132 [2] = Total time spent internal to this function
133 [3] = Cumulative time that this function was present on the stack. In
134 non-recursive functions, this is the total execution time from start
135 to finish of each invocation of a function, including time spent in
136 all subfunctions.
137 [4] = A dictionary indicating for each function name, the number of times
138 it was called by us.
139 """
140
141 bias = 0 # calibration constant
142
143 def __init__(self, timer=None, bias=None):
144 self.timings = {}
145 self.cur = None
146 self.cmd = ""
147 self.c_func_name = ""
148
149 if bias is None:
150 bias = self.bias
151 self.bias = bias # Materialize in local dict for lookup speed.
152
153 if not timer:
154 self.timer = self.get_time = time.process_time
155 self.dispatcher = self.trace_dispatch_i
156 else:
157 self.timer = timer
158 t = self.timer() # test out timer function
159 try:
160 length = len(t)
161 except TypeError:
162 self.get_time = timer
163 self.dispatcher = self.trace_dispatch_i
164 else:
165 if length == 2:
166 self.dispatcher = self.trace_dispatch
167 else:
168 self.dispatcher = self.trace_dispatch_l
169 # This get_time() implementation needs to be defined
170 # here to capture the passed-in timer in the parameter
171 # list (for performance). Note that we can't assume
172 # the timer() result contains two values in all
173 # cases.
174 def get_time_timer(timer=timer, sum=sum):
175 return sum(timer())
176 self.get_time = get_time_timer
177 self.t = self.get_time()
178 self.simulate_call('profiler')
179
180 # Heavily optimized dispatch routine for time.process_time() timer
181
182 def trace_dispatch(self, frame, event, arg):
183 timer = self.timer
184 t = timer()
185 t = t[0] + t[1] - self.t - self.bias
186
187 if event == "c_call":
188 self.c_func_name = arg.__name__
189
190 if self.dispatch[event](self, frame,t):
191 t = timer()
192 self.t = t[0] + t[1]
193 else:
194 r = timer()
195 self.t = r[0] + r[1] - t # put back unrecorded delta
196
197 # Dispatch routine for best timer program (return = scalar, fastest if
198 # an integer but float works too -- and time.process_time() relies on that).
199
200 def trace_dispatch_i(self, frame, event, arg):
201 timer = self.timer
202 t = timer() - self.t - self.bias
203
204 if event == "c_call":
205 self.c_func_name = arg.__name__
206
207 if self.dispatch[event](self, frame, t):
208 self.t = timer()
209 else:
210 self.t = timer() - t # put back unrecorded delta
211
212 # Dispatch routine for macintosh (timer returns time in ticks of
213 # 1/60th second)
214
215 def trace_dispatch_mac(self, frame, event, arg):
216 timer = self.timer
217 t = timer()/60.0 - self.t - self.bias
218
219 if event == "c_call":
220 self.c_func_name = arg.__name__
221
222 if self.dispatch[event](self, frame, t):
223 self.t = timer()/60.0
224 else:
225 self.t = timer()/60.0 - t # put back unrecorded delta
226
227 # SLOW generic dispatch routine for timer returning lists of numbers
228
229 def trace_dispatch_l(self, frame, event, arg):
230 get_time = self.get_time
231 t = get_time() - self.t - self.bias
232
233 if event == "c_call":
234 self.c_func_name = arg.__name__
235
236 if self.dispatch[event](self, frame, t):
237 self.t = get_time()
238 else:
239 self.t = get_time() - t # put back unrecorded delta
240
241 # In the event handlers, the first 3 elements of self.cur are unpacked
242 # into vrbls w/ 3-letter names. The last two characters are meant to be
243 # mnemonic:
244 # _pt self.cur[0] "parent time" time to be charged to parent frame
245 # _it self.cur[1] "internal time" time spent directly in the function
246 # _et self.cur[2] "external time" time spent in subfunctions
247
248 def trace_dispatch_exception(self, frame, t):
249 rpt, rit, ret, rfn, rframe, rcur = self.cur
250 if (rframe is not frame) and rcur:
251 return self.trace_dispatch_return(rframe, t)
252 self.cur = rpt, rit+t, ret, rfn, rframe, rcur
253 return 1
254
255
256 def trace_dispatch_call(self, frame, t):
257 if self.cur and frame.f_back is not self.cur[-2]:
258 rpt, rit, ret, rfn, rframe, rcur = self.cur
259 if not isinstance(rframe, Profile.fake_frame):
260 assert rframe.f_back is frame.f_back, ("Bad call", rfn,
261 rframe, rframe.f_back,
262 frame, frame.f_back)
263 self.trace_dispatch_return(rframe, 0)
264 assert (self.cur is None or \
265 frame.f_back is self.cur[-2]), ("Bad call",
266 self.cur[-3])
267 fcode = frame.f_code
268 fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
269 self.cur = (t, 0, 0, fn, frame, self.cur)
270 timings = self.timings
271 if fn in timings:
272 cc, ns, tt, ct, callers = timings[fn]
273 timings[fn] = cc, ns + 1, tt, ct, callers
274 else:
275 timings[fn] = 0, 0, 0, 0, {}
276 return 1
277
278 def trace_dispatch_c_call (self, frame, t):
279 fn = ("", 0, self.c_func_name)
280 self.cur = (t, 0, 0, fn, frame, self.cur)
281 timings = self.timings
282 if fn in timings:
283 cc, ns, tt, ct, callers = timings[fn]
284 timings[fn] = cc, ns+1, tt, ct, callers
285 else:
286 timings[fn] = 0, 0, 0, 0, {}
287 return 1
288
289 def trace_dispatch_return(self, frame, t):
290 if frame is not self.cur[-2]:
291 assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
292 self.trace_dispatch_return(self.cur[-2], 0)
293
294 # Prefix "r" means part of the Returning or exiting frame.
295 # Prefix "p" means part of the Previous or Parent or older frame.
296
297 rpt, rit, ret, rfn, frame, rcur = self.cur
298 rit = rit + t
299 frame_total = rit + ret
300
301 ppt, pit, pet, pfn, pframe, pcur = rcur
302 self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
303
304 timings = self.timings
305 cc, ns, tt, ct, callers = timings[rfn]
306 if not ns:
307 # This is the only occurrence of the function on the stack.
308 # Else this is a (directly or indirectly) recursive call, and
309 # its cumulative time will get updated when the topmost call to
310 # it returns.
311 ct = ct + frame_total
312 cc = cc + 1
313
314 if pfn in callers:
315 callers[pfn] = callers[pfn] + 1 # hack: gather more
316 # stats such as the amount of time added to ct courtesy
317 # of this specific call, and the contribution to cc
318 # courtesy of this call.
319 else:
320 callers[pfn] = 1
321
322 timings[rfn] = cc, ns - 1, tt + rit, ct, callers
323
324 return 1
325
326
327 dispatch = {
328 "call": trace_dispatch_call,
329 "exception": trace_dispatch_exception,
330 "return": trace_dispatch_return,
331 "c_call": trace_dispatch_c_call,
332 "c_exception": trace_dispatch_return, # the C function returned
333 "c_return": trace_dispatch_return,
334 }
335
336
337 # The next few functions play with self.cmd. By carefully preloading
338 # our parallel stack, we can force the profiled result to include
339 # an arbitrary string as the name of the calling function.
340 # We use self.cmd as that string, and the resulting stats look
341 # very nice :-).
342
343 def set_cmd(self, cmd):
344 if self.cur[-1]: return # already set
345 self.cmd = cmd
346 self.simulate_call(cmd)
347
348 class fake_code:
349 def __init__(self, filename, line, name):
350 self.co_filename = filename
351 self.co_line = line
352 self.co_name = name
353 self.co_firstlineno = 0
354
355 def __repr__(self):
356 return repr((self.co_filename, self.co_line, self.co_name))
357
358 class fake_frame:
359 def __init__(self, code, prior):
360 self.f_code = code
361 self.f_back = prior
362
363 def simulate_call(self, name):
364 code = self.fake_code('profile', 0, name)
365 if self.cur:
366 pframe = self.cur[-2]
367 else:
368 pframe = None
369 frame = self.fake_frame(code, pframe)
370 self.dispatch['call'](self, frame, 0)
371
372 # collect stats from pending stack, including getting final
373 # timings for self.cmd frame.
374
375 def simulate_cmd_complete(self):
376 get_time = self.get_time
377 t = get_time() - self.t
378 while self.cur[-1]:
379 # We *can* cause assertion errors here if
380 # dispatch_trace_return checks for a frame match!
381 self.dispatch['return'](self, self.cur[-2], t)
382 t = 0
383 self.t = get_time() - t
384
385
386 def print_stats(self, sort=-1):
387 import pstats
388 pstats.Stats(self).strip_dirs().sort_stats(sort). \
389 print_stats()
390
391 def dump_stats(self, file):
392 with open(file, 'wb') as f:
393 self.create_stats()
394 marshal.dump(self.stats, f)
395
396 def create_stats(self):
397 self.simulate_cmd_complete()
398 self.snapshot_stats()
399
400 def snapshot_stats(self):
401 self.stats = {}
402 for func, (cc, ns, tt, ct, callers) in self.timings.items():
403 callers = callers.copy()
404 nc = 0
405 for callcnt in callers.values():
406 nc += callcnt
407 self.stats[func] = cc, nc, tt, ct, callers
408
409
410 # The following two methods can be called by clients to use
411 # a profiler to profile a statement, given as a string.
412
413 def run(self, cmd):
414 import __main__
415 dict = __main__.__dict__
416 return self.runctx(cmd, dict, dict)
417
418 def runctx(self, cmd, globals, locals):
419 self.set_cmd(cmd)
420 sys.setprofile(self.dispatcher)
421 try:
422 exec(cmd, globals, locals)
423 finally:
424 sys.setprofile(None)
425 return self
426
427 # This method is more useful to profile a single function call.
428 def runcall(self, func, /, *args, **kw):
429 self.set_cmd(repr(func))
430 sys.setprofile(self.dispatcher)
431 try:
432 return func(*args, **kw)
433 finally:
434 sys.setprofile(None)
435
436
437 #******************************************************************
438 # The following calculates the overhead for using a profiler. The
439 # problem is that it takes a fair amount of time for the profiler
440 # to stop the stopwatch (from the time it receives an event).
441 # Similarly, there is a delay from the time that the profiler
442 # re-starts the stopwatch before the user's code really gets to
443 # continue. The following code tries to measure the difference on
444 # a per-event basis.
445 #
446 # Note that this difference is only significant if there are a lot of
447 # events, and relatively little user code per event. For example,
448 # code with small functions will typically benefit from having the
449 # profiler calibrated for the current platform. This *could* be
450 # done on the fly during init() time, but it is not worth the
451 # effort. Also note that if too large a value specified, then
452 # execution time on some functions will actually appear as a
453 # negative number. It is *normal* for some functions (with very
454 # low call counts) to have such negative stats, even if the
455 # calibration figure is "correct."
456 #
457 # One alternative to profile-time calibration adjustments (i.e.,
458 # adding in the magic little delta during each event) is to track
459 # more carefully the number of events (and cumulatively, the number
460 # of events during sub functions) that are seen. If this were
461 # done, then the arithmetic could be done after the fact (i.e., at
462 # display time). Currently, we track only call/return events.
463 # These values can be deduced by examining the callees and callers
464 # vectors for each functions. Hence we *can* almost correct the
465 # internal time figure at print time (note that we currently don't
466 # track exception event processing counts). Unfortunately, there
467 # is currently no similar information for cumulative sub-function
468 # time. It would not be hard to "get all this info" at profiler
469 # time. Specifically, we would have to extend the tuples to keep
470 # counts of this in each frame, and then extend the defs of timing
471 # tuples to include the significant two figures. I'm a bit fearful
472 # that this additional feature will slow the heavily optimized
473 # event/time ratio (i.e., the profiler would run slower, fur a very
474 # low "value added" feature.)
475 #**************************************************************
476
477 def calibrate(self, m, verbose=0):
478 if self.__class__ is not Profile:
479 raise TypeError("Subclasses must override .calibrate().")
480
481 saved_bias = self.bias
482 self.bias = 0
483 try:
484 return self._calibrate_inner(m, verbose)
485 finally:
486 self.bias = saved_bias
487
488 def _calibrate_inner(self, m, verbose):
489 get_time = self.get_time
490
491 # Set up a test case to be run with and without profiling. Include
492 # lots of calls, because we're trying to quantify stopwatch overhead.
493 # Do not raise any exceptions, though, because we want to know
494 # exactly how many profile events are generated (one call event, +
495 # one return event, per Python-level call).
496
497 def f1(n):
498 for i in range(n):
499 x = 1
500
501 def f(m, f1=f1):
502 for i in range(m):
503 f1(100)
504
505 f(m) # warm up the cache
506
507 # elapsed_noprofile <- time f(m) takes without profiling.
508 t0 = get_time()
509 f(m)
510 t1 = get_time()
511 elapsed_noprofile = t1 - t0
512 if verbose:
513 print("elapsed time without profiling =", elapsed_noprofile)
514
515 # elapsed_profile <- time f(m) takes with profiling. The difference
516 # is profiling overhead, only some of which the profiler subtracts
517 # out on its own.
518 p = Profile()
519 t0 = get_time()
520 p.runctx('f(m)', globals(), locals())
521 t1 = get_time()
522 elapsed_profile = t1 - t0
523 if verbose:
524 print("elapsed time with profiling =", elapsed_profile)
525
526 # reported_time <- "CPU seconds" the profiler charged to f and f1.
527 total_calls = 0.0
528 reported_time = 0.0
529 for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
530 p.timings.items():
531 if funcname in ("f", "f1"):
532 total_calls += cc
533 reported_time += tt
534
535 if verbose:
536 print("'CPU seconds' profiler reported =", reported_time)
537 print("total # calls =", total_calls)
538 if total_calls != m + 1:
539 raise ValueError("internal error: total calls = %d" % total_calls)
540
541 # reported_time - elapsed_noprofile = overhead the profiler wasn't
542 # able to measure. Divide by twice the number of calls (since there
543 # are two profiler events per call in this test) to get the hidden
544 # overhead per event.
545 mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
546 if verbose:
547 print("mean stopwatch overhead per profile event =", mean)
548 return mean
549
550#****************************************************************************
551
552def main():
553 import os
554 from optparse import OptionParser
555
556 usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
557 parser = OptionParser(usage=usage)
558 parser.allow_interspersed_args = False
559 parser.add_option('-o', '--outfile', dest="outfile",
560 help="Save stats to <outfile>", default=None)
561 parser.add_option('-m', dest="module", action="store_true",
562 help="Profile a library module.", default=False)
563 parser.add_option('-s', '--sort', dest="sort",
564 help="Sort order when printing to stdout, based on pstats.Stats class",
565 default=-1)
566
567 if not sys.argv[1:]:
568 parser.print_usage()
569 sys.exit(2)
570
571 (options, args) = parser.parse_args()
572 sys.argv[:] = args
573
574 # The script that we're profiling may chdir, so capture the absolute path
575 # to the output file at startup.
576 if options.outfile is not None:
577 options.outfile = os.path.abspath(options.outfile)
578
579 if len(args) > 0:
580 if options.module:
581 import runpy
582 code = "run_module(modname, run_name='__main__')"
583 globs = {
584 'run_module': runpy.run_module,
585 'modname': args[0]
586 }
587 else:
588 progname = args[0]
589 sys.path.insert(0, os.path.dirname(progname))
590 with open(progname, 'rb') as fp:
591 code = compile(fp.read(), progname, 'exec')
592 globs = {
593 '__file__': progname,
594 '__name__': '__main__',
595 '__package__': None,
596 '__cached__': None,
597 }
598 runctx(code, globs, None, options.outfile, options.sort)
599 else:
600 parser.print_usage()
601 return parser
602
603# When invoked as main program, invoke the profiler on a script
604if __name__ == '__main__':
605 main()