Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 68ccc5b..67a50c7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -56,11 +56,12 @@
#include <linux/perf_regs.h>
#include <linux/cgroup.h>
#include <linux/refcount.h>
+#include <linux/security.h>
#include <asm/local.h>
struct perf_callchain_entry {
__u64 nr;
- __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
+ __u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */
};
struct perf_callchain_entry_ctx {
@@ -92,15 +93,27 @@
/*
* branch stack layout:
* nr: number of taken branches stored in entries[]
+ * hw_idx: The low level index of raw branch records
+ * for the most recent branch.
+ * -1ULL means invalid/unknown.
*
* Note that nr can vary from sample to sample
* branches (to, from) are stored from most recent
* to least recent, i.e., entries[0] contains the most
* recent branch.
+ * The entries[] is an abstraction of raw branch records,
+ * which may not be stored in age order in HW, e.g. Intel LBR.
+ * The hw_idx is to expose the low level index of raw
+ * branch record for the most recent branch aka entries[0].
+ * The hw_idx index is between -1 (unknown) and max depth,
+ * which can be retrieved in /sys/devices/cpu/caps/branches.
+ * For the architectures whose raw branch records are
+ * already stored in age order, the hw_idx should be 0.
*/
struct perf_branch_stack {
__u64 nr;
- struct perf_branch_entry entries[0];
+ __u64 hw_idx;
+ struct perf_branch_entry entries[];
};
struct task_struct;
@@ -199,17 +212,26 @@
*/
u64 sample_period;
- /*
- * The period we started this sample with.
- */
- u64 last_period;
+ union {
+ struct { /* Sampling */
+ /*
+ * The period we started this sample with.
+ */
+ u64 last_period;
- /*
- * However much is left of the current period; note that this is
- * a full 64bit value and allows for generation of periods longer
- * than hardware might allow.
- */
- local64_t period_left;
+ /*
+ * However much is left of the current period;
+ * note that this is a full 64bit value and
+ * allows for generation of periods longer
+ * than hardware might allow.
+ */
+ local64_t period_left;
+ };
+ struct { /* Topdown events counting for context switch */
+ u64 saved_metric;
+ u64 saved_slots;
+ };
+ };
/*
* State for throttling the event, see __perf_event_overflow() and
@@ -248,6 +270,8 @@
#define PERF_PMU_CAP_NO_EXCLUDE 0x80
#define PERF_PMU_CAP_AUX_OUTPUT 0x100
+struct perf_output_handle;
+
/**
* struct pmu - generic performance monitoring unit
*/
@@ -351,7 +375,7 @@
* ->stop() with PERF_EF_UPDATE will read the counter and update
* period/count values like ->read() would.
*
- * ->start() with PERF_EF_RELOAD will reprogram the the counter
+ * ->start() with PERF_EF_RELOAD will reprogram the counter
* value, must be preceded by a ->stop() with PERF_EF_UPDATE.
*/
void (*start) (struct perf_event *event, int flags);
@@ -404,11 +428,21 @@
*/
void (*sched_task) (struct perf_event_context *ctx,
bool sched_in);
- /*
- * PMU specific data size
- */
- size_t task_ctx_size;
+ /*
+ * Kmem cache of PMU specific data
+ */
+ struct kmem_cache *task_ctx_cache;
+
+ /*
+ * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
+ * can be synchronized using this function. See Intel LBR callstack support
+ * implementation and Perf core context switch handling callbacks for usage
+ * examples.
+ */
+ void (*swap_task_ctx) (struct perf_event_context *prev,
+ struct perf_event_context *next);
+ /* optional */
/*
* Set up pmu-private data structures for an AUX area
@@ -423,6 +457,19 @@
void (*free_aux) (void *aux); /* optional */
/*
+ * Take a snapshot of the AUX buffer without touching the event
+ * state, so that preempting ->start()/->stop() callbacks does
+ * not interfere with their logic. Called in PMI context.
+ *
+ * Returns the size of AUX data copied to the output handle.
+ *
+ * Optional.
+ */
+ long (*snapshot_aux) (struct perf_event *event,
+ struct perf_output_handle *handle,
+ unsigned long size);
+
+ /*
* Validate address range filters: make sure the HW supports the
* requested configuration and number of filters; return 0 if the
* supplied filters are valid, -errno otherwise.
@@ -538,9 +585,13 @@
* PERF_EV_CAP_SOFTWARE: Is a software event.
* PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
* from any CPU in the package where it is active.
+ * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
+ * cannot be a group leader. If an event with this flag is detached from the
+ * group it is scheduled out and moved into an unrecoverable ERROR state.
*/
#define PERF_EV_CAP_SOFTWARE BIT(0)
#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
+#define PERF_EV_CAP_SIBLING BIT(2)
#define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
@@ -555,9 +606,10 @@
#define PERF_ATTACH_TASK 0x04
#define PERF_ATTACH_TASK_DATA 0x08
#define PERF_ATTACH_ITRACE 0x10
+#define PERF_ATTACH_SCHED_CB 0x20
struct perf_cgroup;
-struct ring_buffer;
+struct perf_buffer;
struct pmu_event_list {
raw_spinlock_t lock;
@@ -626,16 +678,6 @@
u64 total_time_running;
u64 tstamp;
- /*
- * timestamp shadows the actual context timing but it can
- * be safely used in NMI interrupt context. It reflects the
- * context time as it was when the event was last scheduled in.
- *
- * ctx_time already accounts for ctx->timestamp. Therefore to
- * compute ctx_time for a sample, simply add perf_clock().
- */
- u64 shadow_ctx_time;
-
struct perf_event_attr attr;
u16 header_size;
u16 id_header_size;
@@ -669,7 +711,7 @@
struct mutex mmap_mutex;
atomic_t mmap_count;
- struct ring_buffer *rb;
+ struct perf_buffer *rb;
struct list_head rb_entry;
unsigned long rcu_batches;
int rcu_pending;
@@ -721,6 +763,9 @@
struct perf_cgroup *cgrp; /* cgroup event is attach to */
#endif
+#ifdef CONFIG_SECURITY
+ void *security;
+#endif
struct list_head sb_list;
#endif /* CONFIG_PERF_EVENTS */
};
@@ -777,6 +822,7 @@
*/
u64 time;
u64 timestamp;
+ u64 timeoffset;
/*
* These fields let us detect when two contexts have both
@@ -822,11 +868,18 @@
int sched_cb_usage;
int online;
+ /*
+ * Per-CPU storage for iterators used in visit_groups_merge. The default
+ * storage is of size 2 to hold the CPU and any CPU event iterators.
+ */
+ int heap_size;
+ struct perf_event **heap;
+ struct perf_event *heap_default[2];
};
struct perf_output_handle {
struct perf_event *event;
- struct ring_buffer *rb;
+ struct perf_buffer *rb;
unsigned long wakeup;
unsigned long size;
u64 aux_flags;
@@ -852,6 +905,8 @@
struct perf_cgroup_info {
u64 time;
u64 timestamp;
+ u64 timeoffset;
+ int active;
};
struct perf_cgroup {
@@ -960,18 +1015,14 @@
u32 reserved;
} cpu_entry;
struct perf_callchain_entry *callchain;
+ u64 aux_size;
- /*
- * regs_user may point to task_pt_regs or to regs_user_copy, depending
- * on arch details.
- */
struct perf_regs regs_user;
- struct pt_regs regs_user_copy;
-
struct perf_regs regs_intr;
u64 stack_user_size;
u64 phys_addr;
+ u64 cgroup;
} ____cacheline_aligned;
/* default value for data source */
@@ -1175,7 +1226,18 @@
enum perf_bpf_event_type type,
u16 flags);
-extern struct perf_guest_info_callbacks *perf_guest_cbs;
+extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
+static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
+{
+ /*
+ * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading
+ * the callbacks between a !NULL check and dereferences, to ensure
+ * pending stores/changes to the callback pointers are visible before a
+ * non-NULL perf_guest_cbs is visible to readers, and to prevent a
+ * module from unloading callbacks while readers are active.
+ */
+ return rcu_dereference(perf_guest_cbs);
+}
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1183,6 +1245,9 @@
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);
+extern void perf_event_text_poke(const void *addr,
+ const void *old_bytes, size_t old_len,
+ const void *new_bytes, size_t new_len);
/* Callchains */
DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
@@ -1195,6 +1260,8 @@
extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);
+extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
+extern void put_callchain_entry(int rctx);
extern int sysctl_perf_event_max_stack;
extern int sysctl_perf_event_max_contexts_per_stack;
@@ -1231,29 +1298,48 @@
extern void perf_sample_event_took(u64 sample_len_ns);
-extern int perf_proc_update_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos);
-extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos);
-
+int perf_proc_update_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
+int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
int perf_event_max_stack_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
-static inline bool perf_paranoid_tracepoint_raw(void)
+/* Access to perf_event_open(2) syscall. */
+#define PERF_SECURITY_OPEN 0
+
+/* Finer grained perf_event_open(2) access control. */
+#define PERF_SECURITY_CPU 1
+#define PERF_SECURITY_KERNEL 2
+#define PERF_SECURITY_TRACEPOINT 3
+
+static inline int perf_is_paranoid(void)
{
return sysctl_perf_event_paranoid > -1;
}
-static inline bool perf_paranoid_cpu(void)
+static inline int perf_allow_kernel(struct perf_event_attr *attr)
{
- return sysctl_perf_event_paranoid > 0;
+ if (sysctl_perf_event_paranoid > 1 && !perfmon_capable())
+ return -EACCES;
+
+ return security_perf_event_open(attr, PERF_SECURITY_KERNEL);
}
-static inline bool perf_paranoid_kernel(void)
+static inline int perf_allow_cpu(struct perf_event_attr *attr)
{
- return sysctl_perf_event_paranoid > 1;
+ if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
+ return -EACCES;
+
+ return security_perf_event_open(attr, PERF_SECURITY_CPU);
+}
+
+static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
+{
+ if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
+ return -EPERM;
+
+ return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
}
extern void perf_event_init(void);
@@ -1314,11 +1400,14 @@
extern void perf_event_addr_filters_sync(struct perf_event *event);
extern int perf_output_begin(struct perf_output_handle *handle,
+ struct perf_sample_data *data,
struct perf_event *event, unsigned int size);
extern int perf_output_begin_forward(struct perf_output_handle *handle,
- struct perf_event *event,
- unsigned int size);
+ struct perf_sample_data *data,
+ struct perf_event *event,
+ unsigned int size);
extern int perf_output_begin_backward(struct perf_output_handle *handle,
+ struct perf_sample_data *data,
struct perf_event *event,
unsigned int size);
@@ -1327,6 +1416,9 @@
const void *buf, unsigned int len);
extern unsigned int perf_output_skip(struct perf_output_handle *handle,
unsigned int len);
+extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
+ struct perf_output_handle *handle,
+ unsigned long from, unsigned long to);
extern int perf_swevent_get_recursion_context(void);
extern void perf_swevent_put_recursion_context(int rctx);
extern u64 perf_swevent_set_period(struct perf_event *event);
@@ -1336,6 +1428,8 @@
extern void perf_event_disable_inatomic(struct perf_event *event);
extern void perf_event_task_tick(void);
extern int perf_event_account_interrupt(struct perf_event *event);
+extern int perf_event_period(struct perf_event *event, u64 value);
+extern u64 perf_event_pause(struct perf_event *event, bool reset);
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
@@ -1406,6 +1500,11 @@
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
+static inline void perf_event_text_poke(const void *addr,
+ const void *old_bytes,
+ size_t old_len,
+ const void *new_bytes,
+ size_t new_len) { }
static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; }
static inline void perf_swevent_put_recursion_context(int rctx) { }
@@ -1415,6 +1514,14 @@
static inline int __perf_event_disable(void *info) { return -1; }
static inline void perf_event_task_tick(void) { }
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
+static inline int perf_event_period(struct perf_event *event, u64 value)
+{
+ return -EINVAL;
+}
+static inline u64 perf_event_pause(struct perf_event *event, bool reset)
+{
+ return 0;
+}
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
@@ -1480,4 +1587,8 @@
#define perf_event_exit_cpu NULL
#endif
+extern void __weak arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg,
+ u64 now);
+
#endif /* _LINUX_PERF_EVENT_H */