Blame - tools/include/linux/ring_buffer.h - hafnium/third_party/linux.git

blob: 6c02617377c29b4f9fb758593dd5f2566518cfd9 [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1	#ifndef _TOOLS_LINUX_RING_BUFFER_H_
				2	#define _TOOLS_LINUX_RING_BUFFER_H_
				3
				4	#include <asm/barrier.h>
				5	#include <linux/perf_event.h>
				6
				7	/*
				8	* Contract with kernel for walking the perf ring buffer from
				9	* user space requires the following barrier pairing (quote
				10	* from kernel/events/ring_buffer.c):
				11	*
				12	* Since the mmap() consumer (userspace) can run on a
				13	* different CPU:
				14	*
				15	* kernel user
				16	*
				17	* if (LOAD ->data_tail) { LOAD ->data_head
				18	* (A) smp_rmb() (C)
				19	* STORE $data LOAD $data
				20	* smp_wmb() (B) smp_mb() (D)
				21	* STORE ->data_head STORE ->data_tail
				22	* }
				23	*
				24	* Where A pairs with D, and B pairs with C.
				25	*
				26	* In our case A is a control dependency that separates the
				27	* load of the ->data_tail and the stores of $data. In case
				28	* ->data_tail indicates there is no room in the buffer to
				29	* store $data we do not.
				30	*
				31	* D needs to be a full barrier since it separates the data
				32	* READ from the tail WRITE.
				33	*
				34	* For B a WMB is sufficient since it separates two WRITEs,
				35	* and for C an RMB is sufficient since it separates two READs.
				36	*
				37	* Note, instead of B, C, D we could also use smp_store_release()
				38	* in B and D as well as smp_load_acquire() in C.
				39	*
				40	* However, this optimization does not make sense for all kernel
				41	* supported architectures since for a fair number it would
				42	* resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
				43	* and smp_mb() + WRITE_ONCE() pair for smp_store_release().
				44	*
				45	* Thus for those smp_wmb() in B and smp_rmb() in C would still
				46	* be less expensive. For the case of D this has either the same
				47	* cost or is less expensive, for example, due to TSO x86 can
				48	* avoid the CPU barrier entirely.
				49	*/
				50
				51	static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
				52	{
				53	/*
				54	* Architectures where smp_load_acquire() does not fallback to
				55	* READ_ONCE() + smp_mb() pair.
				56	*/
				57	#if defined(__x86_64__) \|\| defined(__aarch64__) \|\| defined(__powerpc64__) \|\| \
				58	defined(__ia64__) \|\| defined(__sparc__) && defined(__arch64__)
				59	return smp_load_acquire(&base->data_head);
				60	#else
				61	u64 head = READ_ONCE(base->data_head);
				62
				63	smp_rmb();
				64	return head;
				65	#endif
				66	}
				67
				68	static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
				69	u64 tail)
				70	{
				71	smp_store_release(&base->data_tail, tail);
				72	}
				73
				74	#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */