Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index e4e321b..e43f469 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -1,16 +1,19 @@
perf-y += sched-messaging.o
perf-y += sched-pipe.o
+perf-y += syscall.o
perf-y += mem-functions.o
perf-y += futex-hash.o
perf-y += futex-wake.o
perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
perf-y += futex-lock-pi.o
-
perf-y += epoll-wait.o
perf-y += epoll-ctl.o
+perf-y += synthesize.o
+perf-y += kallsyms-parse.o
+perf-y += find-bit-bench.o
+perf-y += inject-buildid.o
-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 4aa6de1..eac36af 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -33,17 +33,21 @@
int bench_numa(int argc, const char **argv);
int bench_sched_messaging(int argc, const char **argv);
int bench_sched_pipe(int argc, const char **argv);
+int bench_syscall_basic(int argc, const char **argv);
int bench_mem_memcpy(int argc, const char **argv);
int bench_mem_memset(int argc, const char **argv);
+int bench_mem_find_bit(int argc, const char **argv);
int bench_futex_hash(int argc, const char **argv);
int bench_futex_wake(int argc, const char **argv);
int bench_futex_wake_parallel(int argc, const char **argv);
int bench_futex_requeue(int argc, const char **argv);
/* pi futexes */
int bench_futex_lock_pi(int argc, const char **argv);
-
int bench_epoll_wait(int argc, const char **argv);
int bench_epoll_ctl(int argc, const char **argv);
+int bench_synthesize(int argc, const char **argv);
+int bench_kallsyms_parse(int argc, const char **argv);
+int bench_inject_build_id(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index a7526c0..ca2d591 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -5,7 +5,7 @@
* Benchmark the various operations allowed for epoll_ctl(2).
* The idea is to concurrently stress a single epoll instance
*/
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
/* For the CLR_() macros */
#include <string.h>
#include <pthread.h>
@@ -312,6 +312,7 @@
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -411,4 +412,4 @@
errmem:
err(EXIT_FAILURE, "calloc");
}
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index d1c5cb5..75dca97 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
/*
* Copyright (C) 2018 Davidlohr Bueso.
*
@@ -426,6 +426,7 @@
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -518,7 +519,8 @@
qsort(worker, nthreads, sizeof(struct worker), cmpworker);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+ unsigned long t = bench__runtime.tv_sec > 0 ?
+ worker[i].ops / bench__runtime.tv_sec : 0;
update_stats(&throughput_stats, t);
@@ -538,4 +540,4 @@
errmem:
err(EXIT_FAILURE, "calloc");
}
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
new file mode 100644
index 0000000..73b5bcc
--- /dev/null
+++ b/tools/perf/bench/find-bit-bench.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark find_next_bit and related bit operations.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <stdlib.h>
+#include "bench.h"
+#include "../util/stat.h"
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int outer_iterations = 5;
+static unsigned int inner_iterations = 100000;
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
+ "Number of outer iterations used"),
+ OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
+ "Number of inner iterations used"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench mem find_bit <options>",
+ NULL
+};
+
+static unsigned int accumulator;
+static unsigned int use_of_val;
+
+static noinline void workload(int val)
+{
+ use_of_val += val;
+ accumulator++;
+}
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
+static bool asm_test_bit(long nr, const unsigned long *addr)
+{
+ bool oldbit;
+
+ asm volatile("bt %2,%1"
+ : "=@ccc" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+#else
+#define asm_test_bit test_bit
+#endif
+
+static int do_for_each_set_bit(unsigned int num_bits)
+{
+ unsigned long *to_test = bitmap_alloc(num_bits);
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ struct stats fb_time_stats, tb_time_stats;
+ double time_average, time_stddev;
+ unsigned int bit, i, j;
+ unsigned int set_bits, skip;
+ unsigned int old;
+
+ init_stats(&fb_time_stats);
+ init_stats(&tb_time_stats);
+
+ for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
+ bitmap_zero(to_test, num_bits);
+ skip = num_bits / set_bits;
+ for (i = 0; i < num_bits; i += skip)
+ set_bit(i, to_test);
+
+ for (i = 0; i < outer_iterations; i++) {
+ old = accumulator;
+ gettimeofday(&start, NULL);
+ for (j = 0; j < inner_iterations; j++) {
+ for_each_set_bit(bit, to_test, num_bits)
+ workload(bit);
+ }
+ gettimeofday(&end, NULL);
+ assert(old + (inner_iterations * set_bits) == accumulator);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&fb_time_stats, runtime_us);
+
+ old = accumulator;
+ gettimeofday(&start, NULL);
+ for (j = 0; j < inner_iterations; j++) {
+ for (bit = 0; bit < num_bits; bit++) {
+ if (asm_test_bit(bit, to_test))
+ workload(bit);
+ }
+ }
+ gettimeofday(&end, NULL);
+ assert(old + (inner_iterations * set_bits) == accumulator);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&tb_time_stats, runtime_us);
+ }
+
+ printf("%d operations %d bits set of %d bits\n",
+ inner_iterations, set_bits, num_bits);
+ time_average = avg_stats(&fb_time_stats);
+ time_stddev = stddev_stats(&fb_time_stats);
+ printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+ time_average = avg_stats(&tb_time_stats);
+ time_stddev = stddev_stats(&tb_time_stats);
+ printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+
+ if (use_of_val == accumulator) /* Try to avoid compiler tricks. */
+ printf("\n");
+ }
+ bitmap_free(to_test);
+ return 0;
+}
+
+int bench_mem_find_bit(int argc, const char **argv)
+{
+ int err = 0, i;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 1; i <= 2048; i <<= 1)
+ do_for_each_set_bit(i);
+
+ return err;
+}
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 2177686..915bf3d 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -137,6 +137,7 @@
if (!cpu)
goto errmem;
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -204,7 +205,8 @@
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+ unsigned long t = bench__runtime.tv_sec > 0 ?
+ worker[i].ops / bench__runtime.tv_sec : 0;
update_stats(&throughput_stats, t);
if (!silent) {
if (nfutexes == 1)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 30d9712..159bc89 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -160,6 +160,7 @@
if (!cpu)
err(EXIT_FAILURE, "calloc");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -210,7 +211,8 @@
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+ unsigned long t = bench__runtime.tv_sec > 0 ?
+ worker[i].ops / bench__runtime.tv_sec : 0;
update_stats(&throughput_stats, t);
if (!silent)
@@ -224,6 +226,7 @@
print_summary();
free(worker);
+ perf_cpu_map__put(cpu);
return ret;
err:
usage_with_options(bench_futex_lock_pi_usage, options);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index a00a689..105b36c 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -128,6 +128,7 @@
if (!cpu)
err(EXIT_FAILURE, "cpu_map__new");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -215,6 +216,7 @@
print_summary();
free(worker);
+ perf_cpu_map__put(cpu);
return ret;
err:
usage_with_options(bench_futex_requeue_usage, options);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index a053cf2..a129c94 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -234,6 +234,7 @@
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -319,6 +320,7 @@
print_summary();
free(blocked_worker);
+ perf_cpu_map__put(cpu);
return ret;
}
#endif /* HAVE_PTHREAD_BARRIER */
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 58906e9..507ff53 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -136,6 +136,7 @@
if (!cpu)
err(EXIT_FAILURE, "calloc");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -209,5 +210,6 @@
print_summary();
free(worker);
+ perf_cpu_map__put(cpu);
return ret;
}
diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c
new file mode 100644
index 0000000..f4ec01d
--- /dev/null
+++ b/tools/perf/bench/inject-buildid.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <stddef.h>
+#include <ftw.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <internal/lib.h>
+#include <subcmd/parse-options.h>
+
+#include "bench.h"
+#include "util/data.h"
+#include "util/stat.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/symbol.h"
+#include "util/session.h"
+#include "util/build-id.h"
+#include "util/synthetic-events.h"
+
+#define MMAP_DEV_MAJOR 8
+#define DSO_MMAP_RATIO 4
+
+static unsigned int iterations = 100;
+static unsigned int nr_mmaps = 100;
+static unsigned int nr_samples = 100; /* samples per mmap */
+
+static u64 bench_sample_type;
+static u16 bench_id_hdr_size;
+
+struct bench_data {
+ int pid;
+ int input_pipe[2];
+ int output_pipe[2];
+ pthread_t th;
+};
+
+struct bench_dso {
+ struct list_head list;
+ char *name;
+ int ino;
+};
+
+static int nr_dsos;
+static struct bench_dso *dsos;
+
+extern int cmd_inject(int argc, const char *argv[]);
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "iterations", &iterations,
+ "Number of iterations used to compute average (default: 100)"),
+ OPT_UINTEGER('m', "nr-mmaps", &nr_mmaps,
+ "Number of mmap events for each iteration (default: 100)"),
+ OPT_UINTEGER('n', "nr-samples", &nr_samples,
+ "Number of sample events per mmap event (default: 100)"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show iteration count, DSO name, etc)"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench internals inject-build-id <options>",
+ NULL
+};
+
+/*
+ * Helper for collect_dso that adds the given file as a dso to dso_list
+ * if it contains a build-id. Stops after collecting 4 times more than
+ * we need (for MMAP2 events).
+ */
+static int add_dso(const char *fpath, const struct stat *sb __maybe_unused,
+ int typeflag, struct FTW *ftwbuf __maybe_unused)
+{
+ struct bench_dso *dso = &dsos[nr_dsos];
+ struct build_id bid;
+
+ if (typeflag == FTW_D || typeflag == FTW_SL)
+ return 0;
+
+ if (filename__read_build_id(fpath, &bid) < 0)
+ return 0;
+
+ dso->name = realpath(fpath, NULL);
+ if (dso->name == NULL)
+ return -1;
+
+ dso->ino = nr_dsos++;
+ pr_debug2(" Adding DSO: %s\n", fpath);
+
+ /* stop if we collected enough DSOs */
+ if ((unsigned int)nr_dsos == DSO_MMAP_RATIO * nr_mmaps)
+ return 1;
+
+ return 0;
+}
+
+static void collect_dso(void)
+{
+ dsos = calloc(nr_mmaps * DSO_MMAP_RATIO, sizeof(*dsos));
+ if (dsos == NULL) {
+ printf(" Memory allocation failed\n");
+ exit(1);
+ }
+
+ if (nftw("/usr/lib/", add_dso, 10, FTW_PHYS) < 0)
+ return;
+
+ pr_debug(" Collected %d DSOs\n", nr_dsos);
+}
+
+static void release_dso(void)
+{
+ int i;
+
+ for (i = 0; i < nr_dsos; i++) {
+ struct bench_dso *dso = &dsos[i];
+
+ free(dso->name);
+ }
+ free(dsos);
+}
+
+/* Fake address used by mmap and sample events */
+static u64 dso_map_addr(struct bench_dso *dso)
+{
+ return 0x400000ULL + dso->ino * 8192ULL;
+}
+
+static ssize_t synthesize_attr(struct bench_data *data)
+{
+ union perf_event event;
+
+ memset(&event, 0, sizeof(event.attr) + sizeof(u64));
+
+ event.header.type = PERF_RECORD_HEADER_ATTR;
+ event.header.size = sizeof(event.attr) + sizeof(u64);
+
+ event.attr.attr.type = PERF_TYPE_SOFTWARE;
+ event.attr.attr.config = PERF_COUNT_SW_TASK_CLOCK;
+ event.attr.attr.exclude_kernel = 1;
+ event.attr.attr.sample_id_all = 1;
+ event.attr.attr.sample_type = bench_sample_type;
+
+ return writen(data->input_pipe[1], &event, event.header.size);
+}
+
+static ssize_t synthesize_fork(struct bench_data *data)
+{
+ union perf_event event;
+
+ memset(&event, 0, sizeof(event.fork) + bench_id_hdr_size);
+
+ event.header.type = PERF_RECORD_FORK;
+ event.header.misc = PERF_RECORD_MISC_FORK_EXEC;
+ event.header.size = sizeof(event.fork) + bench_id_hdr_size;
+
+ event.fork.ppid = 1;
+ event.fork.ptid = 1;
+ event.fork.pid = data->pid;
+ event.fork.tid = data->pid;
+
+ return writen(data->input_pipe[1], &event, event.header.size);
+}
+
+static ssize_t synthesize_mmap(struct bench_data *data, struct bench_dso *dso, u64 timestamp)
+{
+ union perf_event event;
+ size_t len = offsetof(struct perf_record_mmap2, filename);
+ u64 *id_hdr_ptr = (void *)&event;
+ int ts_idx;
+
+ len += roundup(strlen(dso->name) + 1, 8) + bench_id_hdr_size;
+
+ memset(&event, 0, min(len, sizeof(event.mmap2)));
+
+ event.header.type = PERF_RECORD_MMAP2;
+ event.header.misc = PERF_RECORD_MISC_USER;
+ event.header.size = len;
+
+ event.mmap2.pid = data->pid;
+ event.mmap2.tid = data->pid;
+ event.mmap2.maj = MMAP_DEV_MAJOR;
+ event.mmap2.ino = dso->ino;
+
+ strcpy(event.mmap2.filename, dso->name);
+
+ event.mmap2.start = dso_map_addr(dso);
+ event.mmap2.len = 4096;
+ event.mmap2.prot = PROT_EXEC;
+
+ if (len > sizeof(event.mmap2)) {
+ /* write mmap2 event first */
+ if (writen(data->input_pipe[1], &event, len - bench_id_hdr_size) < 0)
+ return -1;
+ /* zero-fill sample id header */
+ memset(id_hdr_ptr, 0, bench_id_hdr_size);
+ /* put timestamp in the right position */
+ ts_idx = (bench_id_hdr_size / sizeof(u64)) - 2;
+ id_hdr_ptr[ts_idx] = timestamp;
+ if (writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size) < 0)
+ return -1;
+
+ return len;
+ }
+
+ ts_idx = (len / sizeof(u64)) - 2;
+ id_hdr_ptr[ts_idx] = timestamp;
+ return writen(data->input_pipe[1], &event, len);
+}
+
+static ssize_t synthesize_sample(struct bench_data *data, struct bench_dso *dso, u64 timestamp)
+{
+ union perf_event event;
+ struct perf_sample sample = {
+ .tid = data->pid,
+ .pid = data->pid,
+ .ip = dso_map_addr(dso),
+ .time = timestamp,
+ };
+
+ event.header.type = PERF_RECORD_SAMPLE;
+ event.header.misc = PERF_RECORD_MISC_USER;
+ event.header.size = perf_event__sample_event_size(&sample, bench_sample_type, 0);
+
+ perf_event__synthesize_sample(&event, bench_sample_type, 0, &sample);
+
+ return writen(data->input_pipe[1], &event, event.header.size);
+}
+
+static ssize_t synthesize_flush(struct bench_data *data)
+{
+ struct perf_event_header header = {
+ .size = sizeof(header),
+ .type = PERF_RECORD_FINISHED_ROUND,
+ };
+
+ return writen(data->input_pipe[1], &header, header.size);
+}
+
+static void *data_reader(void *arg)
+{
+ struct bench_data *data = arg;
+ char buf[8192];
+ int flag;
+ int n;
+
+ flag = fcntl(data->output_pipe[0], F_GETFL);
+ fcntl(data->output_pipe[0], F_SETFL, flag | O_NONBLOCK);
+
+ /* read out data from child */
+ while (true) {
+ n = read(data->output_pipe[0], buf, sizeof(buf));
+ if (n > 0)
+ continue;
+ if (n == 0)
+ break;
+
+ if (errno != EINTR && errno != EAGAIN)
+ break;
+
+ usleep(100);
+ }
+
+ close(data->output_pipe[0]);
+ return NULL;
+}
+
+static int setup_injection(struct bench_data *data, bool build_id_all)
+{
+ int ready_pipe[2];
+ int dev_null_fd;
+ char buf;
+
+ if (pipe(ready_pipe) < 0)
+ return -1;
+
+ if (pipe(data->input_pipe) < 0)
+ return -1;
+
+ if (pipe(data->output_pipe) < 0)
+ return -1;
+
+ data->pid = fork();
+ if (data->pid < 0)
+ return -1;
+
+ if (data->pid == 0) {
+ const char **inject_argv;
+ int inject_argc = 2;
+
+ close(data->input_pipe[1]);
+ close(data->output_pipe[0]);
+ close(ready_pipe[0]);
+
+ dup2(data->input_pipe[0], STDIN_FILENO);
+ close(data->input_pipe[0]);
+ dup2(data->output_pipe[1], STDOUT_FILENO);
+ close(data->output_pipe[1]);
+
+ dev_null_fd = open("/dev/null", O_WRONLY);
+ if (dev_null_fd < 0)
+ exit(1);
+
+ dup2(dev_null_fd, STDERR_FILENO);
+
+ if (build_id_all)
+ inject_argc++;
+
+ inject_argv = calloc(inject_argc + 1, sizeof(*inject_argv));
+ if (inject_argv == NULL)
+ exit(1);
+
+ inject_argv[0] = strdup("inject");
+ inject_argv[1] = strdup("-b");
+ if (build_id_all)
+ inject_argv[2] = strdup("--buildid-all");
+
+ /* signal that we're ready to go */
+ close(ready_pipe[1]);
+
+ cmd_inject(inject_argc, inject_argv);
+
+ exit(0);
+ }
+
+ pthread_create(&data->th, NULL, data_reader, data);
+
+ close(ready_pipe[1]);
+ close(data->input_pipe[0]);
+ close(data->output_pipe[1]);
+
+ /* wait for child ready */
+ if (read(ready_pipe[0], &buf, 1) < 0)
+ return -1;
+ close(ready_pipe[0]);
+
+ return 0;
+}
+
+static int inject_build_id(struct bench_data *data, u64 *max_rss)
+{
+ int status;
+ unsigned int i, k;
+ struct rusage rusage;
+
+ /* this makes the child to run */
+ if (perf_header__write_pipe(data->input_pipe[1]) < 0)
+ return -1;
+
+ if (synthesize_attr(data) < 0)
+ return -1;
+
+ if (synthesize_fork(data) < 0)
+ return -1;
+
+ for (i = 0; i < nr_mmaps; i++) {
+ int idx = rand() % (nr_dsos - 1);
+ struct bench_dso *dso = &dsos[idx];
+ u64 timestamp = rand() % 1000000;
+
+ pr_debug2(" [%d] injecting: %s\n", i+1, dso->name);
+ if (synthesize_mmap(data, dso, timestamp) < 0)
+ return -1;
+
+ for (k = 0; k < nr_samples; k++) {
+ if (synthesize_sample(data, dso, timestamp + k * 1000) < 0)
+ return -1;
+ }
+
+ if ((i + 1) % 10 == 0) {
+ if (synthesize_flush(data) < 0)
+ return -1;
+ }
+ }
+
+ /* tihs makes the child to finish */
+ close(data->input_pipe[1]);
+
+ wait4(data->pid, &status, 0, &rusage);
+ *max_rss = rusage.ru_maxrss;
+
+ pr_debug(" Child %d exited with %d\n", data->pid, status);
+
+ return 0;
+}
+
+static void do_inject_loop(struct bench_data *data, bool build_id_all)
+{
+ unsigned int i;
+ struct stats time_stats, mem_stats;
+ double time_average, time_stddev;
+ double mem_average, mem_stddev;
+
+ init_stats(&time_stats);
+ init_stats(&mem_stats);
+
+ pr_debug(" Build-id%s injection benchmark\n", build_id_all ? "-all" : "");
+
+ for (i = 0; i < iterations; i++) {
+ struct timeval start, end, diff;
+ u64 runtime_us, max_rss;
+
+ pr_debug(" Iteration #%d\n", i+1);
+
+ if (setup_injection(data, build_id_all) < 0) {
+ printf(" Build-id injection setup failed\n");
+ break;
+ }
+
+ gettimeofday(&start, NULL);
+ if (inject_build_id(data, &max_rss) < 0) {
+ printf(" Build-id injection failed\n");
+ break;
+ }
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ update_stats(&mem_stats, max_rss);
+
+ pthread_join(data->th, NULL);
+ }
+
+ time_average = avg_stats(&time_stats) / USEC_PER_MSEC;
+ time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC;
+ printf(" Average build-id%s injection took: %.3f msec (+- %.3f msec)\n",
+ build_id_all ? "-all" : "", time_average, time_stddev);
+
+ /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */
+ time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2));
+ time_stddev = stddev_stats(&time_stats) / (nr_mmaps * (nr_samples + 2));
+ printf(" Average time per event: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+
+ mem_average = avg_stats(&mem_stats);
+ mem_stddev = stddev_stats(&mem_stats);
+ printf(" Average memory usage: %.0f KB (+- %.0f KB)\n",
+ mem_average, mem_stddev);
+}
+
+static int do_inject_loops(struct bench_data *data)
+{
+
+ srand(time(NULL));
+ symbol__init(NULL);
+
+ bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP;
+ bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME;
+ bench_id_hdr_size = 32;
+
+ collect_dso();
+ if (nr_dsos == 0) {
+ printf(" Cannot collect DSOs for injection\n");
+ return -1;
+ }
+
+ do_inject_loop(data, false);
+ do_inject_loop(data, true);
+
+ release_dso();
+ return 0;
+}
+
+int bench_inject_build_id(int argc, const char **argv)
+{
+ struct bench_data data;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ return do_inject_loops(&data);
+}
+
diff --git a/tools/perf/bench/kallsyms-parse.c b/tools/perf/bench/kallsyms-parse.c
new file mode 100644
index 0000000..2b0d0f9
--- /dev/null
+++ b/tools/perf/bench/kallsyms-parse.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark of /proc/kallsyms parsing.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <stdlib.h>
+#include "bench.h"
+#include "../util/stat.h"
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+#include <symbol/kallsyms.h>
+
+static unsigned int iterations = 100;
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "iterations", &iterations,
+ "Number of iterations used to compute average"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench internals kallsyms-parse <options>",
+ NULL
+};
+
+static int bench_process_symbol(void *arg __maybe_unused,
+ const char *name __maybe_unused,
+ char type __maybe_unused,
+ u64 start __maybe_unused)
+{
+ return 0;
+}
+
+static int do_kallsyms_parse(void)
+{
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double time_average, time_stddev;
+ int err;
+ struct stats time_stats;
+
+ init_stats(&time_stats);
+
+ for (i = 0; i < iterations; i++) {
+ gettimeofday(&start, NULL);
+ err = kallsyms__parse("/proc/kallsyms", NULL,
+ bench_process_symbol);
+ if (err)
+ return err;
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ }
+
+ time_average = avg_stats(&time_stats) / USEC_PER_MSEC;
+ time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC;
+ printf(" Average kallsyms__parse took: %.3f ms (+- %.3f ms)\n",
+ time_average, time_stddev);
+ return 0;
+}
+
+int bench_kallsyms_parse(int argc, const char **argv)
+{
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ return do_kallsyms_parse();
+}
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index 9ad015a..6eb45a2 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -2,6 +2,9 @@
/* Various wrappers to make the kernel .S file build in user-space: */
+// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it
+#define SYM_FUNC_START_LOCAL(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#define memcpy MEMCPY /* don't hide glibc's memcpy() */
#define altinstr_replacement text
#define globl p2align 4; .globl
diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
deleted file mode 100644
index 4130734..0000000
--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
- * happy.
- */
-#include <linux/types.h>
-
-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++, from++) {
- /*
- * Call the assembly routine back directly since
- * memcpy_mcsafe() may silently fallback to memcpy.
- */
- unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
- if (rem)
- break;
- }
- return len;
-}
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
index d550bd5..6f093c4 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
+// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it
+#define SYM_FUNC_START_LOCAL(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#define memset MEMSET /* don't hide glibc's memset() */
#define altinstr_replacement text
#define globl p2align 4; .globl
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 5797253..11726ec 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -137,12 +137,13 @@
u8 *data;
pthread_mutex_t startup_mutex;
+ pthread_cond_t startup_cond;
int nr_tasks_started;
- pthread_mutex_t startup_done_mutex;
-
pthread_mutex_t start_work_mutex;
+ pthread_cond_t start_work_cond;
int nr_tasks_working;
+ bool start_work;
pthread_mutex_t stop_work_mutex;
u64 bytes_done;
@@ -247,17 +248,22 @@
*/
static bool node_has_cpus(int node)
{
- struct bitmask *cpu = numa_allocate_cpumask();
- unsigned int i;
+ struct bitmask *cpumask = numa_allocate_cpumask();
+ bool ret = false; /* fall back to nocpus */
+ int cpu;
- if (cpu && !numa_node_to_cpus(node, cpu)) {
- for (i = 0; i < cpu->size; i++) {
- if (numa_bitmask_isbitset(cpu, i))
- return true;
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu)) {
+ ret = true;
+ break;
+ }
}
}
+ numa_free_cpumask(cpumask);
- return false; /* lets fall back to nocpus safely */
+ return ret;
}
static cpu_set_t bind_to_cpu(int target_cpu)
@@ -288,14 +294,10 @@
static cpu_set_t bind_to_node(int target_node)
{
- int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
cpu_set_t orig_mask, mask;
int cpu;
int ret;
- BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
- BUG_ON(!cpus_per_node);
-
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
@@ -305,13 +307,16 @@
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
} else {
- int cpu_start = (target_node + 0) * cpus_per_node;
- int cpu_stop = (target_node + 1) * cpus_per_node;
+ struct bitmask *cpumask = numa_allocate_cpumask();
- BUG_ON(cpu_stop > g->p.nr_cpus);
-
- for (cpu = cpu_start; cpu < cpu_stop; cpu++)
- CPU_SET(cpu, &mask);
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(target_node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu))
+ CPU_SET(cpu, &mask);
+ }
+ }
+ numa_free_cpumask(cpumask);
}
ret = sched_setaffinity(0, sizeof(mask), &mask);
@@ -479,6 +484,18 @@
pthread_mutex_init(mutex, &attr);
}
+/*
+ * Return a process-shared (global) condition variable:
+ */
+static void init_global_cond(pthread_cond_t *cond)
+{
+ pthread_condattr_t attr;
+
+ pthread_condattr_init(&attr);
+ pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+ pthread_cond_init(cond, &attr);
+}
+
static int parse_cpu_list(const char *arg)
{
p0.cpu_list_str = strdup(arg);
@@ -729,8 +746,6 @@
return -1;
return parse_node_list(arg);
-
- return 0;
}
#define BIT(x) (1ul << x)
@@ -813,12 +828,12 @@
}
}
} else if (!g->p.data_backwards || (nr + loop) & 1) {
+ /* Process data forwards: */
d0 = data + off;
d = data + off + 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d >= d1))
d = data;
@@ -836,7 +851,6 @@
d = data + off - 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d < data))
d = data + words-1;
@@ -1135,15 +1149,18 @@
if (g->p.serialize_startup) {
pthread_mutex_lock(&g->startup_mutex);
g->nr_tasks_started++;
+ /* The last thread wakes the main process. */
+ if (g->nr_tasks_started == g->p.nr_tasks)
+ pthread_cond_signal(&g->startup_cond);
+
pthread_mutex_unlock(&g->startup_mutex);
/* Here we will wait for the main process to start us all at once: */
pthread_mutex_lock(&g->start_work_mutex);
+ g->start_work = false;
g->nr_tasks_working++;
-
- /* Last one wake the main process: */
- if (g->nr_tasks_working == g->p.nr_tasks)
- pthread_mutex_unlock(&g->startup_done_mutex);
+ while (!g->start_work)
+ pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex);
pthread_mutex_unlock(&g->start_work_mutex);
}
@@ -1440,8 +1457,9 @@
/* Startup serialization: */
init_global_mutex(&g->start_work_mutex);
+ init_global_cond(&g->start_work_cond);
init_global_mutex(&g->startup_mutex);
- init_global_mutex(&g->startup_done_mutex);
+ init_global_cond(&g->startup_cond);
init_global_mutex(&g->stop_work_mutex);
init_thread_data();
@@ -1501,9 +1519,6 @@
pids = zalloc(g->p.nr_proc * sizeof(*pids));
pid = -1;
- /* All threads try to acquire it, this way we can wait for them to start up: */
- pthread_mutex_lock(&g->start_work_mutex);
-
if (g->p.serialize_startup) {
tprintf(" #\n");
tprintf(" # Startup synchronization: ..."); fflush(stdout);
@@ -1525,22 +1540,29 @@
pids[i] = pid;
}
- /* Wait for all the threads to start up: */
- while (g->nr_tasks_started != g->p.nr_tasks)
- usleep(USEC_PER_MSEC);
-
- BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
if (g->p.serialize_startup) {
+ bool threads_ready = false;
double startup_sec;
- pthread_mutex_lock(&g->startup_done_mutex);
+ /*
+ * Wait for all the threads to start up. The last thread will
+ * signal this process.
+ */
+ pthread_mutex_lock(&g->startup_mutex);
+ while (g->nr_tasks_started != g->p.nr_tasks)
+ pthread_cond_wait(&g->startup_cond, &g->startup_mutex);
- /* This will start all threads: */
- pthread_mutex_unlock(&g->start_work_mutex);
+ pthread_mutex_unlock(&g->startup_mutex);
- /* This mutex is locked - the last started thread will wake us: */
- pthread_mutex_lock(&g->startup_done_mutex);
+ /* Wait for all threads to be at the start_work_cond. */
+ while (!threads_ready) {
+ pthread_mutex_lock(&g->start_work_mutex);
+ threads_ready = (g->nr_tasks_working == g->p.nr_tasks);
+ pthread_mutex_unlock(&g->start_work_mutex);
+ if (!threads_ready)
+ usleep(1);
+ }
gettimeofday(&stop, NULL);
@@ -1554,7 +1576,11 @@
tprintf(" #\n");
start = stop;
- pthread_mutex_unlock(&g->startup_done_mutex);
+ /* Start all threads running. */
+ pthread_mutex_lock(&g->start_work_mutex);
+ g->start_work = true;
+ pthread_mutex_unlock(&g->start_work_mutex);
+ pthread_cond_broadcast(&g->start_work_cond);
} else {
gettimeofday(&start, NULL);
}
@@ -1733,12 +1759,12 @@
*/
static const char *tests[][MAX_ARGS] = {
/* Basic single-stream NUMA bandwidth measurements: */
- { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM },
{ "RAM-bw-local-NOTHP,",
"mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
- { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "1", OPT_BW_RAM },
/* 2-stream NUMA bandwidth measurements: */
@@ -1755,7 +1781,7 @@
{ " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
{ " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
{ " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
- { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV },
{ " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
{ " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
{ " 4x4-convergence-NOTHP,",
@@ -1780,24 +1806,24 @@
"mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
{ "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
- { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
- { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
- { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
- { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
+ { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
+ { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
+ { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
+ { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
- { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
- { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
- { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
- { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
- { " 4x8-bw-thread-NOTHP,",
+ { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
+ { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
+ { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
+ { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
+ { " 4x8-bw-process-NOTHP,",
"mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
- { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
- { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
+ { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
+ { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
- { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
- { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
+ { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
+ { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
- { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
+ { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
{ "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
{ "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
{ "numa01-bw-thread-NOTHP,",
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index b142d87..cecce93 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -40,7 +40,7 @@
unsigned int num_fds;
int ready_out;
int wakefd;
- int out_fds[0];
+ int out_fds[];
};
struct receiver_context {
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
new file mode 100644
index 0000000..b2924e3
--- /dev/null
+++ b/tools/perf/bench/synthesize.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark synthesis of perf events such as at the start of a 'perf
+ * record'. Synthesis is done on the current process and the 'dummy' event
+ * handlers are invoked that support dump_trace but otherwise do nothing.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#include <stdio.h>
+#include "bench.h"
+#include "../util/debug.h"
+#include "../util/session.h"
+#include "../util/stat.h"
+#include "../util/synthetic-events.h"
+#include "../util/target.h"
+#include "../util/thread_map.h"
+#include "../util/tool.h"
+#include "../util/util.h"
+#include <linux/atomic.h>
+#include <linux/err.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int min_threads = 1;
+static unsigned int max_threads = UINT_MAX;
+static unsigned int single_iterations = 10000;
+static unsigned int multi_iterations = 10;
+static bool run_st;
+static bool run_mt;
+
+static const struct option options[] = {
+ OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
+ OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
+ OPT_UINTEGER('m', "min-threads", &min_threads,
+ "Minimum number of threads in multithreaded bench"),
+ OPT_UINTEGER('M', "max-threads", &max_threads,
+ "Maximum number of threads in multithreaded bench"),
+ OPT_UINTEGER('i', "single-iterations", &single_iterations,
+ "Number of iterations used to compute single-threaded average"),
+ OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
+ "Number of iterations used to compute multi-threaded average"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench internals synthesize <options>",
+ NULL
+};
+
+static atomic_t event_count;
+
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ atomic_inc(&event_count);
+ return 0;
+}
+
+static int do_run_single_threaded(struct perf_session *session,
+ struct perf_thread_map *threads,
+ struct target *target, bool data_mmap)
+{
+ const unsigned int nr_threads_synthesize = 1;
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double time_average, time_stddev, event_average, event_stddev;
+ int err;
+ struct stats time_stats, event_stats;
+
+ init_stats(&time_stats);
+ init_stats(&event_stats);
+
+ for (i = 0; i < single_iterations; i++) {
+ atomic_set(&event_count, 0);
+ gettimeofday(&start, NULL);
+ err = __machine__synthesize_threads(&session->machines.host,
+ NULL,
+ target, threads,
+ process_synthesized_event,
+ data_mmap,
+ nr_threads_synthesize);
+ if (err)
+ return err;
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ update_stats(&event_stats, atomic_read(&event_count));
+ }
+
+ time_average = avg_stats(&time_stats);
+ time_stddev = stddev_stats(&time_stats);
+ printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
+ data_mmap ? "data " : "", time_average, time_stddev);
+
+ event_average = avg_stats(&event_stats);
+ event_stddev = stddev_stats(&event_stats);
+ printf(" Average num. events: %.3f (+- %.3f)\n",
+ event_average, event_stddev);
+
+ printf(" Average time per event %.3f usec\n",
+ time_average / event_average);
+ return 0;
+}
+
+static int run_single_threaded(void)
+{
+ struct perf_session *session;
+ struct target target = {
+ .pid = "self",
+ };
+ struct perf_thread_map *threads;
+ int err;
+
+ perf_set_singlethreaded();
+ session = perf_session__new(NULL, false, NULL);
+ if (IS_ERR(session)) {
+ pr_err("Session creation failed.\n");
+ return PTR_ERR(session);
+ }
+ threads = thread_map__new_by_pid(getpid());
+ if (!threads) {
+ pr_err("Thread map creation failed.\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ puts(
+"Computing performance of single threaded perf event synthesis by\n"
+"synthesizing events on the perf process itself:");
+
+ err = do_run_single_threaded(session, threads, &target, false);
+ if (err)
+ goto err_out;
+
+ err = do_run_single_threaded(session, threads, &target, true);
+
+err_out:
+ if (threads)
+ perf_thread_map__put(threads);
+
+ perf_session__delete(session);
+ return err;
+}
+
+static int do_run_multi_threaded(struct target *target,
+ unsigned int nr_threads_synthesize)
+{
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double time_average, time_stddev, event_average, event_stddev;
+ int err;
+ struct stats time_stats, event_stats;
+ struct perf_session *session;
+
+ init_stats(&time_stats);
+ init_stats(&event_stats);
+ for (i = 0; i < multi_iterations; i++) {
+ session = perf_session__new(NULL, false, NULL);
+ if (IS_ERR(session))
+ return PTR_ERR(session);
+
+ atomic_set(&event_count, 0);
+ gettimeofday(&start, NULL);
+ err = __machine__synthesize_threads(&session->machines.host,
+ NULL,
+ target, NULL,
+ process_synthesized_event,
+ false,
+ nr_threads_synthesize);
+ if (err) {
+ perf_session__delete(session);
+ return err;
+ }
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ update_stats(&event_stats, atomic_read(&event_count));
+ perf_session__delete(session);
+ }
+
+ time_average = avg_stats(&time_stats);
+ time_stddev = stddev_stats(&time_stats);
+ printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+
+ event_average = avg_stats(&event_stats);
+ event_stddev = stddev_stats(&event_stats);
+ printf(" Average num. events: %.3f (+- %.3f)\n",
+ event_average, event_stddev);
+
+ printf(" Average time per event %.3f usec\n",
+ time_average / event_average);
+ return 0;
+}
+
+static int run_multi_threaded(void)
+{
+ struct target target = {
+ .cpu_list = "0"
+ };
+ unsigned int nr_threads_synthesize;
+ int err;
+
+ if (max_threads == UINT_MAX)
+ max_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+ puts(
+"Computing performance of multi threaded perf event synthesis by\n"
+"synthesizing events on CPU 0:");
+
+ for (nr_threads_synthesize = min_threads;
+ nr_threads_synthesize <= max_threads;
+ nr_threads_synthesize++) {
+ if (nr_threads_synthesize == 1)
+ perf_set_singlethreaded();
+ else
+ perf_set_multithreaded();
+
+ printf(" Number of synthesis threads: %u\n",
+ nr_threads_synthesize);
+
+ err = do_run_multi_threaded(&target, nr_threads_synthesize);
+ if (err)
+ return err;
+ }
+ perf_set_singlethreaded();
+ return 0;
+}
+
+int bench_synthesize(int argc, const char **argv)
+{
+ int err = 0;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * If neither single threaded or multi-threaded are specified, default
+ * to running just single threaded.
+ */
+ if (!run_st && !run_mt)
+ run_st = true;
+
+ if (run_st)
+ err = run_single_threaded();
+
+ if (!err && run_mt)
+ err = run_multi_threaded();
+
+ return err;
+}
diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c
new file mode 100644
index 0000000..5fe621c
--- /dev/null
+++ b/tools/perf/bench/syscall.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * syscall.c
+ *
+ * syscall: Benchmark for system call performance
+ */
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#define LOOPS_DEFAULT 10000000
+static int loops = LOOPS_DEFAULT;
+
+static const struct option options[] = {
+ OPT_INTEGER('l', "loop", &loops, "Specify number of loops"),
+ OPT_END()
+};
+
+static const char * const bench_syscall_usage[] = {
+ "perf bench syscall <options>",
+ NULL
+};
+
+int bench_syscall_basic(int argc, const char **argv)
+{
+ struct timeval start, stop, diff;
+ unsigned long long result_usec = 0;
+ int i;
+
+ argc = parse_options(argc, argv, options, bench_syscall_usage, 0);
+
+ gettimeofday(&start, NULL);
+
+ for (i = 0; i < loops; i++)
+ getppid();
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Executed %'d getppid() calls\n", loops);
+
+ result_usec = diff.tv_sec * 1000000;
+ result_usec += diff.tv_usec;
+
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec/1000));
+
+ printf(" %14lf usecs/op\n",
+ (double)result_usec / (double)loops);
+ printf(" %'14d ops/sec\n",
+ (int)((double)loops /
+ ((double)result_usec / (double)1000000)));
+ break;
+
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec / 1000));
+ break;
+
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}