Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 1fafc32..cb50a9e 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* In-Memory Collection (IMC) Performance Monitor counter support.
*
* Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
* (C) 2017 Anju T Sudhakar, IBM Corporation.
* (C) 2017 Hemant K Shaw, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or later version.
*/
#include <linux/perf_event.h>
#include <linux/slab.h>
@@ -28,13 +24,13 @@
static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
static struct imc_pmu **per_nest_pmu_arr;
static cpumask_t nest_imc_cpumask;
-struct imc_pmu_ref *nest_imc_refc;
+static struct imc_pmu_ref *nest_imc_refc;
static int nest_pmus;
/* Core IMC data structures and variables */
static cpumask_t core_imc_cpumask;
-struct imc_pmu_ref *core_imc_refc;
+static struct imc_pmu_ref *core_imc_refc;
static struct imc_pmu *core_imc_pmu;
/* Thread IMC data structures and variables */
@@ -43,12 +39,17 @@
static struct imc_pmu *thread_imc_pmu;
static int thread_imc_mem_size;
-struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+/* Trace IMC data structures */
+static DEFINE_PER_CPU(u64 *, trace_imc_mem);
+static struct imc_pmu_ref *trace_imc_refc;
+static int trace_imc_mem_size;
+
+static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct imc_pmu, pmu);
}
-PMU_FORMAT_ATTR(event, "config:0-40");
+PMU_FORMAT_ATTR(event, "config:0-61");
PMU_FORMAT_ATTR(offset, "config:0-31");
PMU_FORMAT_ATTR(rvalue, "config:32");
PMU_FORMAT_ATTR(mode, "config:33-40");
@@ -65,6 +66,25 @@
.attrs = imc_format_attrs,
};
+/* Format attribute for imc trace-mode */
+PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
+PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
+PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
+PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
+static struct attribute *trace_imc_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_cpmc_reserved.attr,
+ &format_attr_cpmc_event.attr,
+ &format_attr_cpmc_samplesel.attr,
+ &format_attr_cpmc_load.attr,
+ NULL,
+};
+
+static struct attribute_group trace_imc_format_group = {
+.name = "format",
+.attrs = trace_imc_format_attrs,
+};
+
/* Get the cpumask printed to a buffer "buf" */
static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
struct device_attribute *attr,
@@ -342,7 +362,14 @@
*/
nid = cpu_to_node(cpu);
l_cpumask = cpumask_of_node(nid);
- target = cpumask_any_but(l_cpumask, cpu);
+ target = cpumask_last(l_cpumask);
+
+ /*
+ * If this(target) is the last cpu in the cpumask for this chip,
+ * check for any possible online cpu in the chip.
+ */
+ if (unlikely(target == cpu))
+ target = cpumask_any_but(l_cpumask, cpu);
/*
* Update the cpumask with the target cpu and
@@ -473,15 +500,6 @@
if (event->hw.sample_period)
return -EINVAL;
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;
@@ -496,6 +514,11 @@
* Get the base memory addresss for this cpu.
*/
chip_id = cpu_to_chip_id(event->cpu);
+
+ /* Return, if chip_id is not valid */
+ if (chip_id < 0)
+ return -ENODEV;
+
pcni = pmu->mem_info;
do {
if (pcni->id == chip_id) {
@@ -503,7 +526,7 @@
break;
}
pcni++;
- } while (pcni);
+ } while (pcni->vbase != 0);
if (!flag)
return -ENODEV;
@@ -554,6 +577,7 @@
{
int nid, rc = 0, core_id = (cpu / threads_per_core);
struct imc_mem_info *mem_info;
+ struct page *page;
/*
* alloc_pages_node() will allocate memory for core in the
@@ -564,11 +588,12 @@
mem_info->id = core_id;
/* We need only vbase for core counters */
- mem_info->vbase = page_address(alloc_pages_node(nid,
- GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
- __GFP_NOWARN, get_order(size)));
- if (!mem_info->vbase)
+ page = alloc_pages_node(nid,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
return -ENOMEM;
+ mem_info->vbase = page_address(page);
/* Init the mutex */
core_imc_refc[core_id].id = core_id;
@@ -651,7 +676,10 @@
return 0;
/* Find any online cpu in that core except the current "cpu" */
- ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+ ncpu = cpumask_last(cpu_sibling_mask(cpu));
+
+ if (unlikely(ncpu == cpu))
+ ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
if (ncpu >= 0 && ncpu < nr_cpu_ids) {
cpumask_set_cpu(ncpu, &core_imc_cpumask);
@@ -748,15 +776,6 @@
if (event->hw.sample_period)
return -EINVAL;
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;
@@ -806,8 +825,11 @@
}
/*
- * Allocates a page of memory for each of the online cpus, and write the
- * physical base address of that page to the LDBAR for that cpu.
+ * Allocates a page of memory for each of the online cpus, and load
+ * LDBAR with 0.
+ * The physical base address of the page allocated for a cpu will be
+ * written to the LDBAR for that cpu, when the thread-imc event
+ * is added.
*
* LDBAR Register Layout:
*
@@ -825,26 +847,26 @@
*/
static int thread_imc_mem_alloc(int cpu_id, int size)
{
- u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
+ u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
int nid = cpu_to_node(cpu_id);
if (!local_mem) {
+ struct page *page;
/*
* This case could happen only once at start, since we dont
* free the memory in cpu offline path.
*/
- local_mem = page_address(alloc_pages_node(nid,
+ page = alloc_pages_node(nid,
GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
- __GFP_NOWARN, get_order(size)));
- if (!local_mem)
+ __GFP_NOWARN, get_order(size));
+ if (!page)
return -ENOMEM;
+ local_mem = page_address(page);
per_cpu(thread_imc_mem, cpu_id) = local_mem;
}
- ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
-
- mtspr(SPRN_LDBAR, ldbar_value);
+ mtspr(SPRN_LDBAR, 0);
return 0;
}
@@ -876,6 +898,9 @@
if (event->attr.type != event->pmu->type)
return -ENOENT;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
/* Sampling not supported */
if (event->hw.sample_period)
return -EINVAL;
@@ -995,6 +1020,7 @@
{
int core_id;
struct imc_pmu_ref *ref;
+ u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
if (flags & PERF_EF_START)
imc_event_start(event, flags);
@@ -1003,6 +1029,9 @@
return -EINVAL;
core_id = smp_processor_id() / threads_per_core;
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
+ mtspr(SPRN_LDBAR, ldbar_value);
+
/*
* imc pmus are enabled only when it is used.
* See if this is triggered for the first time.
@@ -1034,11 +1063,7 @@
int core_id;
struct imc_pmu_ref *ref;
- /*
- * Take a snapshot and calculate the delta and update
- * the event counter values.
- */
- imc_event_update(event);
+ mtspr(SPRN_LDBAR, 0);
core_id = smp_processor_id() / threads_per_core;
ref = &core_imc_refc[core_id];
@@ -1057,6 +1082,243 @@
ref->refc = 0;
}
mutex_unlock(&ref->lock);
+ /*
+ * Take a snapshot and calculate the delta and update
+ * the event counter values.
+ */
+ imc_event_update(event);
+}
+
+/*
+ * Allocate a page of memory for each cpu, and load LDBAR with 0.
+ */
+static int trace_imc_mem_alloc(int cpu_id, int size)
+{
+ u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
+ int phys_id = cpu_to_node(cpu_id), rc = 0;
+ int core_id = (cpu_id / threads_per_core);
+
+ if (!local_mem) {
+ struct page *page;
+
+ page = alloc_pages_node(phys_id,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
+ return -ENOMEM;
+ local_mem = page_address(page);
+ per_cpu(trace_imc_mem, cpu_id) = local_mem;
+
+ /* Initialise the counters for trace mode */
+ rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
+ get_hard_smp_processor_id(cpu_id));
+ if (rc) {
+ pr_info("IMC:opal init failed for trace imc\n");
+ return rc;
+ }
+ }
+
+ /* Init the mutex, if not already */
+ trace_imc_refc[core_id].id = core_id;
+ mutex_init(&trace_imc_refc[core_id].lock);
+
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+static int ppc_trace_imc_cpu_online(unsigned int cpu)
+{
+ return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+}
+
+static int ppc_trace_imc_cpu_offline(unsigned int cpu)
+{
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+static int trace_imc_cpu_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
+ "perf/powerpc/imc_trace:online",
+ ppc_trace_imc_cpu_online,
+ ppc_trace_imc_cpu_offline);
+}
+
+static u64 get_trace_imc_event_base_addr(void)
+{
+ return (u64)per_cpu(trace_imc_mem, smp_processor_id());
+}
+
+/*
+ * Function to parse trace-imc data obtained
+ * and to prepare the perf sample.
+ */
+static int trace_imc_prepare_sample(struct trace_imc_data *mem,
+ struct perf_sample_data *data,
+ u64 *prev_tb,
+ struct perf_event_header *header,
+ struct perf_event *event)
+{
+ /* Sanity checks for a valid record */
+ if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
+ *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
+ else
+ return -EINVAL;
+
+ if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
+ be64_to_cpu(READ_ONCE(mem->tb2)))
+ return -EINVAL;
+
+ /* Prepare perf sample */
+ data->ip = be64_to_cpu(READ_ONCE(mem->ip));
+ data->period = event->hw.last_period;
+
+ header->type = PERF_RECORD_SAMPLE;
+ header->size = sizeof(*header) + event->header_size;
+ header->misc = 0;
+
+ if (is_kernel_addr(data->ip))
+ header->misc |= PERF_RECORD_MISC_KERNEL;
+ else
+ header->misc |= PERF_RECORD_MISC_USER;
+
+ perf_event_header__init_id(header, data, event);
+
+ return 0;
+}
+
+static void dump_trace_imc_data(struct perf_event *event)
+{
+ struct trace_imc_data *mem;
+ int i, ret;
+ u64 prev_tb = 0;
+
+ mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
+ for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
+ i++, mem++) {
+ struct perf_sample_data data;
+ struct perf_event_header header;
+
+ ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
+ if (ret) /* Exit, if not a valid record */
+ break;
+ else {
+ /* If this is a valid record, create the sample */
+ struct perf_output_handle handle;
+
+ if (perf_output_begin(&handle, event, header.size))
+ return;
+
+ perf_output_sample(&handle, &header, &data, event);
+ perf_output_end(&handle);
+ }
+ }
+}
+
+static int trace_imc_event_add(struct perf_event *event, int flags)
+{
+ int core_id = smp_processor_id() / threads_per_core;
+ struct imc_pmu_ref *ref = NULL;
+ u64 local_mem, ldbar_value;
+
+ /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
+ local_mem = get_trace_imc_event_base_addr();
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
+
+ if (core_imc_refc)
+ ref = &core_imc_refc[core_id];
+ if (!ref) {
+ /* If core-imc is not enabled, use trace-imc reference count */
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+ }
+ mtspr(SPRN_LDBAR, ldbar_value);
+ mutex_lock(&ref->lock);
+ if (ref->refc == 0) {
+ if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ mutex_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
+ mtspr(SPRN_LDBAR, 0);
+ return -EINVAL;
+ }
+ }
+ ++ref->refc;
+ mutex_unlock(&ref->lock);
+
+ return 0;
+}
+
+static void trace_imc_event_read(struct perf_event *event)
+{
+ return;
+}
+
+static void trace_imc_event_stop(struct perf_event *event, int flags)
+{
+ u64 local_mem = get_trace_imc_event_base_addr();
+ dump_trace_imc_data(event);
+ memset((void *)local_mem, 0, sizeof(u64));
+}
+
+static void trace_imc_event_start(struct perf_event *event, int flags)
+{
+ return;
+}
+
+static void trace_imc_event_del(struct perf_event *event, int flags)
+{
+ int core_id = smp_processor_id() / threads_per_core;
+ struct imc_pmu_ref *ref = NULL;
+
+ if (core_imc_refc)
+ ref = &core_imc_refc[core_id];
+ if (!ref) {
+ /* If core-imc is not enabled, use trace-imc reference count */
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
+ if (!ref)
+ return;
+ }
+ mtspr(SPRN_LDBAR, 0);
+ mutex_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ mutex_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ ref->refc = 0;
+ }
+ mutex_unlock(&ref->lock);
+ trace_imc_event_stop(event, flags);
+}
+
+static int trace_imc_event_init(struct perf_event *event)
+{
+ struct task_struct *target;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ /* Return if this is a couting event */
+ if (event->attr.sample_period == 0)
+ return -ENOENT;
+
+ event->hw.idx = -1;
+ target = event->hw.target;
+
+ event->pmu->task_ctx_nr = perf_hw_context;
+ return 0;
}
/* update_pmu_ops : Populate the appropriate operations for "pmu" */
@@ -1069,6 +1331,7 @@
pmu->pmu.stop = imc_event_stop;
pmu->pmu.read = imc_event_update;
pmu->pmu.attr_groups = pmu->attr_groups;
+ pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
switch (pmu->domain) {
@@ -1088,6 +1351,14 @@
pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
break;
+ case IMC_DOMAIN_TRACE:
+ pmu->pmu.event_init = trace_imc_event_init;
+ pmu->pmu.add = trace_imc_event_add;
+ pmu->pmu.del = trace_imc_event_del;
+ pmu->pmu.start = trace_imc_event_start;
+ pmu->pmu.stop = trace_imc_event_stop;
+ pmu->pmu.read = trace_imc_event_read;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
default:
break;
}
@@ -1180,6 +1451,18 @@
}
}
+static void cleanup_all_trace_imc_memory(void)
+{
+ int i, order = get_order(trace_imc_mem_size);
+
+ for_each_online_cpu(i) {
+ if (per_cpu(trace_imc_mem, i))
+ free_pages((u64)per_cpu(trace_imc_mem, i), order);
+
+ }
+ kfree(trace_imc_refc);
+}
+
/* Function to free the attr_groups which are dynamically allocated */
static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
{
@@ -1221,6 +1504,11 @@
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
}
+
+ if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
+ cleanup_all_trace_imc_memory();
+ }
}
/*
@@ -1303,6 +1591,27 @@
thread_imc_pmu = pmu_ptr;
break;
+ case IMC_DOMAIN_TRACE:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+ if (!pmu_ptr->pmu.name)
+ return -ENOMEM;
+
+ nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+ trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
+ GFP_KERNEL);
+ if (!trace_imc_refc)
+ return -ENOMEM;
+
+ trace_imc_mem_size = pmu_ptr->counter_mem_size;
+ for_each_online_cpu(cpu) {
+ res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+ if (res) {
+ cleanup_all_trace_imc_memory();
+ goto err;
+ }
+ }
+ break;
default:
return -EINVAL;
}
@@ -1376,6 +1685,14 @@
}
break;
+ case IMC_DOMAIN_TRACE:
+ ret = trace_imc_cpu_init();
+ if (ret) {
+ cleanup_all_trace_imc_memory();
+ goto err_free_mem;
+ }
+
+ break;
default:
return -EINVAL; /* Unknown domain */
}
@@ -1392,7 +1709,7 @@
if (ret)
goto err_free_cpuhp_mem;
- pr_info("%s performance monitor hardware support registered\n",
+ pr_debug("%s performance monitor hardware support registered\n",
pmu_ptr->pmu.name);
return 0;