Blame - arch/mips/include/asm/sync.h - hafnium/third_party/linux

blob: aabd097933fe97f599361dd002d6e9f2fcdd999b [file] [log] [blame]

Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1	/* SPDX-License-Identifier: GPL-2.0-only */
				2	#ifndef __MIPS_ASM_SYNC_H__
				3	#define __MIPS_ASM_SYNC_H__
				4
				5	/*
				6	* sync types are defined by the MIPS64 Instruction Set documentation in Volume
				7	* II-A of the MIPS Architecture Reference Manual, which can be found here:
				8	*
				9	* https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
				10	*
				11	* Two types of barrier are provided:
				12	*
				13	* 1) Completion barriers, which ensure that a memory operation has actually
				14	* completed & often involve stalling the CPU pipeline to do so.
				15	*
				16	* 2) Ordering barriers, which only ensure that affected memory operations
				17	* won't be reordered in the CPU pipeline in a manner that violates the
				18	* restrictions imposed by the barrier.
				19	*
				20	* Ordering barriers can be more efficient than completion barriers, since:
				21	*
				22	* a) Ordering barriers only require memory access instructions which preceed
				23	* them in program order (older instructions) to reach a point in the
				24	* load/store datapath beyond which reordering is not possible before
				25	* allowing memory access instructions which follow them (younger
				26	* instructions) to be performed. That is, older instructions don't
				27	* actually need to complete - they just need to get far enough that all
				28	* other coherent CPUs will observe their completion before they observe
				29	* the effects of younger instructions.
				30	*
				31	* b) Multiple variants of ordering barrier are provided which allow the
				32	* effects to be restricted to different combinations of older or younger
				33	* loads or stores. By way of example, if we only care that stores older
				34	* than a barrier are observed prior to stores that are younger than a
				35	* barrier & don't care about the ordering of loads then the 'wmb'
				36	* ordering barrier can be used. Limiting the barrier's effects to stores
				37	* allows loads to continue unaffected & potentially allows the CPU to
				38	* make progress faster than if younger loads had to wait for older stores
				39	* to complete.
				40	*/
				41
				42	/*
				43	* No sync instruction at all; used to allow code to nullify the effect of the
				44	* __SYNC() macro without needing lots of #ifdefery.
				45	*/
				46	#define __SYNC_none -1
				47
				48	/*
				49	* A full completion barrier; all memory accesses appearing prior to this sync
				50	* instruction in program order must complete before any memory accesses
				51	* appearing after this sync instruction in program order.
				52	*/
				53	#define __SYNC_full 0x00
				54
				55	/*
				56	* For now we use a full completion barrier to implement all sync types, until
				57	* we're satisfied that lightweight ordering barriers defined by MIPSr6 are
				58	* sufficient to uphold our desired memory model.
				59	*/
				60	#define __SYNC_aq __SYNC_full
				61	#define __SYNC_rl __SYNC_full
				62	#define __SYNC_mb __SYNC_full
				63
				64	/*
				65	* ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
				66	* barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
				67	* speculative reads.
				68	*/
				69	#ifdef CONFIG_CPU_CAVIUM_OCTEON
				70	# define __SYNC_rmb __SYNC_none
				71	# define __SYNC_wmb 0x04
				72	#else
				73	# define __SYNC_rmb __SYNC_full
				74	# define __SYNC_wmb __SYNC_full
				75	#endif
				76
				77	/*
				78	* A GINV sync is a little different; it doesn't relate directly to loads or
				79	* stores, but instead causes synchronization of an icache or TLB global
				80	* invalidation operation triggered by the ginvi or ginvt instructions
				81	* respectively. In cases where we need to know that a ginvi or ginvt operation
				82	* has been performed by all coherent CPUs, we must issue a sync instruction of
				83	* this type. Once this instruction graduates all coherent CPUs will have
				84	* observed the invalidation.
				85	*/
				86	#define __SYNC_ginv 0x14
				87
				88	/* Trivial; indicate that we always need this sync instruction. */
				89	#define __SYNC_always (1 << 0)
				90
				91	/*
				92	* Indicate that we need this sync instruction only on systems with weakly
				93	* ordered memory access. In general this is most MIPS systems, but there are
				94	* exceptions which provide strongly ordered memory.
				95	*/
				96	#ifdef CONFIG_WEAK_ORDERING
				97	# define __SYNC_weak_ordering (1 << 1)
				98	#else
				99	# define __SYNC_weak_ordering 0
				100	#endif
				101
				102	/*
				103	* Indicate that we need this sync instruction only on systems where LL/SC
				104	* don't implicitly provide a memory barrier. In general this is most MIPS
				105	* systems.
				106	*/
				107	#ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
				108	# define __SYNC_weak_llsc (1 << 2)
				109	#else
				110	# define __SYNC_weak_llsc 0
				111	#endif
				112
				113	/*
				114	* Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
				115	* store or prefetch) in between an LL & SC can cause the SC instruction to
				116	* erroneously succeed, breaking atomicity. Whilst it's unusual to write code
				117	* containing such sequences, this bug bites harder than we might otherwise
				118	* expect due to reordering & speculation:
				119	*
				120	* 1) A memory access appearing prior to the LL in program order may actually
				121	* be executed after the LL - this is the reordering case.
				122	*
				123	* In order to avoid this we need to place a memory barrier (ie. a SYNC
				124	* instruction) prior to every LL instruction, in between it and any earlier
				125	* memory access instructions.
				126	*
				127	* This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
				128	*
				129	* 2) If a conditional branch exists between an LL & SC with a target outside
				130	* of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
				131	* or similar, then misprediction of the branch may allow speculative
				132	* execution of memory accesses from outside of the LL-SC loop.
				133	*
				134	* In order to avoid this we need a memory barrier (ie. a SYNC instruction)
				135	* at each affected branch target.
				136	*
				137	* This case affects all current Loongson 3 CPUs.
				138	*
				139	* The above described cases cause an error in the cache coherence protocol;
				140	* such that the Invalidate of a competing LL-SC goes 'missing' and SC
				141	* erroneously observes its core still has Exclusive state and lets the SC
				142	* proceed.
				143	*
				144	* Therefore the error only occurs on SMP systems.
				145	*/
				146	#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
				147	# define __SYNC_loongson3_war (1 << 31)
				148	#else
				149	# define __SYNC_loongson3_war 0
				150	#endif
				151
				152	/*
				153	* Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
				154	* barrier to be ineffective, requiring the use of 2 in sequence to provide an
				155	* effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
				156	* optimized memory barrier primitives."). Here we specify that the affected
				157	* sync instructions should be emitted twice.
				158	* Note that this expression is evaluated by the assembler (not the compiler),
				159	* and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
				160	*/
				161	#ifdef CONFIG_CPU_CAVIUM_OCTEON
				162	# define __SYNC_rpt(type) (1 - (type == __SYNC_wmb))
				163	#else
				164	# define __SYNC_rpt(type) 1
				165	#endif
				166
				167	/*
				168	* The main event. Here we actually emit a sync instruction of a given type, if
				169	* reason is non-zero.
				170	*
				171	* In future we have the option of emitting entries in a fixups-style table
				172	* here that would allow us to opportunistically remove some sync instructions
				173	* when we detect at runtime that we're running on a CPU that doesn't need
				174	* them.
				175	*/
				176	#ifdef CONFIG_CPU_HAS_SYNC
				177	# define ____SYNC(_type, _reason, _else) \
				178	.if (( _type ) != -1) && ( _reason ); \
				179	.set push; \
				180	.set MIPS_ISA_LEVEL_RAW; \
				181	.rept __SYNC_rpt(_type); \
				182	sync _type; \
				183	.endr; \
				184	.set pop; \
				185	.else; \
				186	_else; \
				187	.endif
				188	#else
				189	# define ____SYNC(_type, _reason, _else)
				190	#endif
				191
				192	/*
				193	* Preprocessor magic to expand macros used as arguments before we insert them
				194	* into assembly code.
				195	*/
				196	#ifdef __ASSEMBLY__
				197	# define ___SYNC(type, reason, else) \
				198	____SYNC(type, reason, else)
				199	#else
				200	# define ___SYNC(type, reason, else) \
				201	__stringify(____SYNC(type, reason, else))
				202	#endif
				203
				204	#define __SYNC(type, reason) \
				205	___SYNC(__SYNC_##type, __SYNC_##reason, )
				206	#define __SYNC_ELSE(type, reason, else) \
				207	___SYNC(__SYNC_##type, __SYNC_##reason, else)
				208
				209	#endif /* __MIPS_ASM_SYNC_H__ */