Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef _ASM_X86_PERCPU_H |
| 3 | #define _ASM_X86_PERCPU_H |
| 4 | |
| 5 | #ifdef CONFIG_X86_64 |
| 6 | #define __percpu_seg gs |
| 7 | #define __percpu_mov_op movq |
| 8 | #else |
| 9 | #define __percpu_seg fs |
| 10 | #define __percpu_mov_op movl |
| 11 | #endif |
| 12 | |
| 13 | #ifdef __ASSEMBLY__ |
| 14 | |
| 15 | /* |
| 16 | * PER_CPU finds an address of a per-cpu variable. |
| 17 | * |
| 18 | * Args: |
| 19 | * var - variable name |
| 20 | * reg - 32bit register |
| 21 | * |
| 22 | * The resulting address is stored in the "reg" argument. |
| 23 | * |
| 24 | * Example: |
| 25 | * PER_CPU(cpu_gdt_descr, %ebx) |
| 26 | */ |
| 27 | #ifdef CONFIG_SMP |
| 28 | #define PER_CPU(var, reg) \ |
| 29 | __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \ |
| 30 | lea var(reg), reg |
| 31 | #define PER_CPU_VAR(var) %__percpu_seg:var |
| 32 | #else /* ! SMP */ |
| 33 | #define PER_CPU(var, reg) __percpu_mov_op $var, reg |
| 34 | #define PER_CPU_VAR(var) var |
| 35 | #endif /* SMP */ |
| 36 | |
| 37 | #ifdef CONFIG_X86_64_SMP |
| 38 | #define INIT_PER_CPU_VAR(var) init_per_cpu__##var |
| 39 | #else |
| 40 | #define INIT_PER_CPU_VAR(var) var |
| 41 | #endif |
| 42 | |
| 43 | #else /* ...!ASSEMBLY */ |
| 44 | |
| 45 | #include <linux/kernel.h> |
| 46 | #include <linux/stringify.h> |
| 47 | |
| 48 | #ifdef CONFIG_SMP |
| 49 | #define __percpu_prefix "%%"__stringify(__percpu_seg)":" |
| 50 | #define __my_cpu_offset this_cpu_read(this_cpu_off) |
| 51 | |
| 52 | /* |
| 53 | * Compared to the generic __my_cpu_offset version, the following |
| 54 | * saves one instruction and avoids clobbering a temp register. |
| 55 | */ |
| 56 | #define arch_raw_cpu_ptr(ptr) \ |
| 57 | ({ \ |
| 58 | unsigned long tcp_ptr__; \ |
| 59 | asm volatile("add " __percpu_arg(1) ", %0" \ |
| 60 | : "=r" (tcp_ptr__) \ |
| 61 | : "m" (this_cpu_off), "0" (ptr)); \ |
| 62 | (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ |
| 63 | }) |
| 64 | #else |
| 65 | #define __percpu_prefix "" |
| 66 | #endif |
| 67 | |
| 68 | #define __percpu_arg(x) __percpu_prefix "%" #x |
| 69 | |
| 70 | /* |
| 71 | * Initialized pointers to per-cpu variables needed for the boot |
| 72 | * processor need to use these macros to get the proper address |
| 73 | * offset from __per_cpu_load on SMP. |
| 74 | * |
| 75 | * There also must be an entry in vmlinux_64.lds.S |
| 76 | */ |
| 77 | #define DECLARE_INIT_PER_CPU(var) \ |
| 78 | extern typeof(var) init_per_cpu_var(var) |
| 79 | |
| 80 | #ifdef CONFIG_X86_64_SMP |
| 81 | #define init_per_cpu_var(var) init_per_cpu__##var |
| 82 | #else |
| 83 | #define init_per_cpu_var(var) var |
| 84 | #endif |
| 85 | |
| 86 | /* For arch-specific code, we can use direct single-insn ops (they |
| 87 | * don't give an lvalue though). */ |
| 88 | extern void __bad_percpu_size(void); |
| 89 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 90 | #define percpu_to_op(qual, op, var, val) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 91 | do { \ |
| 92 | typedef typeof(var) pto_T__; \ |
| 93 | if (0) { \ |
| 94 | pto_T__ pto_tmp__; \ |
| 95 | pto_tmp__ = (val); \ |
| 96 | (void)pto_tmp__; \ |
| 97 | } \ |
| 98 | switch (sizeof(var)) { \ |
| 99 | case 1: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 100 | asm qual (op "b %1,"__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 101 | : "+m" (var) \ |
| 102 | : "qi" ((pto_T__)(val))); \ |
| 103 | break; \ |
| 104 | case 2: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 105 | asm qual (op "w %1,"__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 106 | : "+m" (var) \ |
| 107 | : "ri" ((pto_T__)(val))); \ |
| 108 | break; \ |
| 109 | case 4: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 110 | asm qual (op "l %1,"__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 111 | : "+m" (var) \ |
| 112 | : "ri" ((pto_T__)(val))); \ |
| 113 | break; \ |
| 114 | case 8: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 115 | asm qual (op "q %1,"__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 116 | : "+m" (var) \ |
| 117 | : "re" ((pto_T__)(val))); \ |
| 118 | break; \ |
| 119 | default: __bad_percpu_size(); \ |
| 120 | } \ |
| 121 | } while (0) |
| 122 | |
| 123 | /* |
| 124 | * Generate a percpu add to memory instruction and optimize code |
| 125 | * if one is added or subtracted. |
| 126 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 127 | #define percpu_add_op(qual, var, val) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 128 | do { \ |
| 129 | typedef typeof(var) pao_T__; \ |
| 130 | const int pao_ID__ = (__builtin_constant_p(val) && \ |
| 131 | ((val) == 1 || (val) == -1)) ? \ |
| 132 | (int)(val) : 0; \ |
| 133 | if (0) { \ |
| 134 | pao_T__ pao_tmp__; \ |
| 135 | pao_tmp__ = (val); \ |
| 136 | (void)pao_tmp__; \ |
| 137 | } \ |
| 138 | switch (sizeof(var)) { \ |
| 139 | case 1: \ |
| 140 | if (pao_ID__ == 1) \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 141 | asm qual ("incb "__percpu_arg(0) : "+m" (var)); \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 142 | else if (pao_ID__ == -1) \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 143 | asm qual ("decb "__percpu_arg(0) : "+m" (var)); \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 144 | else \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 145 | asm qual ("addb %1, "__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 146 | : "+m" (var) \ |
| 147 | : "qi" ((pao_T__)(val))); \ |
| 148 | break; \ |
| 149 | case 2: \ |
| 150 | if (pao_ID__ == 1) \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 151 | asm qual ("incw "__percpu_arg(0) : "+m" (var)); \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 152 | else if (pao_ID__ == -1) \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 153 | asm qual ("decw "__percpu_arg(0) : "+m" (var)); \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 154 | else \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 155 | asm qual ("addw %1, "__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 156 | : "+m" (var) \ |
| 157 | : "ri" ((pao_T__)(val))); \ |
| 158 | break; \ |
| 159 | case 4: \ |
| 160 | if (pao_ID__ == 1) \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 161 | asm qual ("incl "__percpu_arg(0) : "+m" (var)); \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 162 | else if (pao_ID__ == -1) \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 163 | asm qual ("decl "__percpu_arg(0) : "+m" (var)); \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 164 | else \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 165 | asm qual ("addl %1, "__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 166 | : "+m" (var) \ |
| 167 | : "ri" ((pao_T__)(val))); \ |
| 168 | break; \ |
| 169 | case 8: \ |
| 170 | if (pao_ID__ == 1) \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 171 | asm qual ("incq "__percpu_arg(0) : "+m" (var)); \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 172 | else if (pao_ID__ == -1) \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 173 | asm qual ("decq "__percpu_arg(0) : "+m" (var)); \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 174 | else \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 175 | asm qual ("addq %1, "__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 176 | : "+m" (var) \ |
| 177 | : "re" ((pao_T__)(val))); \ |
| 178 | break; \ |
| 179 | default: __bad_percpu_size(); \ |
| 180 | } \ |
| 181 | } while (0) |
| 182 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 183 | #define percpu_from_op(qual, op, var) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 184 | ({ \ |
| 185 | typeof(var) pfo_ret__; \ |
| 186 | switch (sizeof(var)) { \ |
| 187 | case 1: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 188 | asm qual (op "b "__percpu_arg(1)",%0" \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 189 | : "=q" (pfo_ret__) \ |
| 190 | : "m" (var)); \ |
| 191 | break; \ |
| 192 | case 2: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 193 | asm qual (op "w "__percpu_arg(1)",%0" \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 194 | : "=r" (pfo_ret__) \ |
| 195 | : "m" (var)); \ |
| 196 | break; \ |
| 197 | case 4: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 198 | asm qual (op "l "__percpu_arg(1)",%0" \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 199 | : "=r" (pfo_ret__) \ |
| 200 | : "m" (var)); \ |
| 201 | break; \ |
| 202 | case 8: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 203 | asm qual (op "q "__percpu_arg(1)",%0" \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 204 | : "=r" (pfo_ret__) \ |
| 205 | : "m" (var)); \ |
| 206 | break; \ |
| 207 | default: __bad_percpu_size(); \ |
| 208 | } \ |
| 209 | pfo_ret__; \ |
| 210 | }) |
| 211 | |
| 212 | #define percpu_stable_op(op, var) \ |
| 213 | ({ \ |
| 214 | typeof(var) pfo_ret__; \ |
| 215 | switch (sizeof(var)) { \ |
| 216 | case 1: \ |
| 217 | asm(op "b "__percpu_arg(P1)",%0" \ |
| 218 | : "=q" (pfo_ret__) \ |
| 219 | : "p" (&(var))); \ |
| 220 | break; \ |
| 221 | case 2: \ |
| 222 | asm(op "w "__percpu_arg(P1)",%0" \ |
| 223 | : "=r" (pfo_ret__) \ |
| 224 | : "p" (&(var))); \ |
| 225 | break; \ |
| 226 | case 4: \ |
| 227 | asm(op "l "__percpu_arg(P1)",%0" \ |
| 228 | : "=r" (pfo_ret__) \ |
| 229 | : "p" (&(var))); \ |
| 230 | break; \ |
| 231 | case 8: \ |
| 232 | asm(op "q "__percpu_arg(P1)",%0" \ |
| 233 | : "=r" (pfo_ret__) \ |
| 234 | : "p" (&(var))); \ |
| 235 | break; \ |
| 236 | default: __bad_percpu_size(); \ |
| 237 | } \ |
| 238 | pfo_ret__; \ |
| 239 | }) |
| 240 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 241 | #define percpu_unary_op(qual, op, var) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 242 | ({ \ |
| 243 | switch (sizeof(var)) { \ |
| 244 | case 1: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 245 | asm qual (op "b "__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 246 | : "+m" (var)); \ |
| 247 | break; \ |
| 248 | case 2: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 249 | asm qual (op "w "__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 250 | : "+m" (var)); \ |
| 251 | break; \ |
| 252 | case 4: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 253 | asm qual (op "l "__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 254 | : "+m" (var)); \ |
| 255 | break; \ |
| 256 | case 8: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 257 | asm qual (op "q "__percpu_arg(0) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 258 | : "+m" (var)); \ |
| 259 | break; \ |
| 260 | default: __bad_percpu_size(); \ |
| 261 | } \ |
| 262 | }) |
| 263 | |
| 264 | /* |
| 265 | * Add return operation |
| 266 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 267 | #define percpu_add_return_op(qual, var, val) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 268 | ({ \ |
| 269 | typeof(var) paro_ret__ = val; \ |
| 270 | switch (sizeof(var)) { \ |
| 271 | case 1: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 272 | asm qual ("xaddb %0, "__percpu_arg(1) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 273 | : "+q" (paro_ret__), "+m" (var) \ |
| 274 | : : "memory"); \ |
| 275 | break; \ |
| 276 | case 2: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 277 | asm qual ("xaddw %0, "__percpu_arg(1) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 278 | : "+r" (paro_ret__), "+m" (var) \ |
| 279 | : : "memory"); \ |
| 280 | break; \ |
| 281 | case 4: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 282 | asm qual ("xaddl %0, "__percpu_arg(1) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 283 | : "+r" (paro_ret__), "+m" (var) \ |
| 284 | : : "memory"); \ |
| 285 | break; \ |
| 286 | case 8: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 287 | asm qual ("xaddq %0, "__percpu_arg(1) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 288 | : "+re" (paro_ret__), "+m" (var) \ |
| 289 | : : "memory"); \ |
| 290 | break; \ |
| 291 | default: __bad_percpu_size(); \ |
| 292 | } \ |
| 293 | paro_ret__ += val; \ |
| 294 | paro_ret__; \ |
| 295 | }) |
| 296 | |
| 297 | /* |
| 298 | * xchg is implemented using cmpxchg without a lock prefix. xchg is |
| 299 | * expensive due to the implied lock prefix. The processor cannot prefetch |
| 300 | * cachelines if xchg is used. |
| 301 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 302 | #define percpu_xchg_op(qual, var, nval) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 303 | ({ \ |
| 304 | typeof(var) pxo_ret__; \ |
| 305 | typeof(var) pxo_new__ = (nval); \ |
| 306 | switch (sizeof(var)) { \ |
| 307 | case 1: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 308 | asm qual ("\n\tmov "__percpu_arg(1)",%%al" \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 309 | "\n1:\tcmpxchgb %2, "__percpu_arg(1) \ |
| 310 | "\n\tjnz 1b" \ |
| 311 | : "=&a" (pxo_ret__), "+m" (var) \ |
| 312 | : "q" (pxo_new__) \ |
| 313 | : "memory"); \ |
| 314 | break; \ |
| 315 | case 2: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 316 | asm qual ("\n\tmov "__percpu_arg(1)",%%ax" \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 317 | "\n1:\tcmpxchgw %2, "__percpu_arg(1) \ |
| 318 | "\n\tjnz 1b" \ |
| 319 | : "=&a" (pxo_ret__), "+m" (var) \ |
| 320 | : "r" (pxo_new__) \ |
| 321 | : "memory"); \ |
| 322 | break; \ |
| 323 | case 4: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 324 | asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 325 | "\n1:\tcmpxchgl %2, "__percpu_arg(1) \ |
| 326 | "\n\tjnz 1b" \ |
| 327 | : "=&a" (pxo_ret__), "+m" (var) \ |
| 328 | : "r" (pxo_new__) \ |
| 329 | : "memory"); \ |
| 330 | break; \ |
| 331 | case 8: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 332 | asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 333 | "\n1:\tcmpxchgq %2, "__percpu_arg(1) \ |
| 334 | "\n\tjnz 1b" \ |
| 335 | : "=&a" (pxo_ret__), "+m" (var) \ |
| 336 | : "r" (pxo_new__) \ |
| 337 | : "memory"); \ |
| 338 | break; \ |
| 339 | default: __bad_percpu_size(); \ |
| 340 | } \ |
| 341 | pxo_ret__; \ |
| 342 | }) |
| 343 | |
| 344 | /* |
| 345 | * cmpxchg has no such implied lock semantics as a result it is much |
| 346 | * more efficient for cpu local operations. |
| 347 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 348 | #define percpu_cmpxchg_op(qual, var, oval, nval) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 349 | ({ \ |
| 350 | typeof(var) pco_ret__; \ |
| 351 | typeof(var) pco_old__ = (oval); \ |
| 352 | typeof(var) pco_new__ = (nval); \ |
| 353 | switch (sizeof(var)) { \ |
| 354 | case 1: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 355 | asm qual ("cmpxchgb %2, "__percpu_arg(1) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 356 | : "=a" (pco_ret__), "+m" (var) \ |
| 357 | : "q" (pco_new__), "0" (pco_old__) \ |
| 358 | : "memory"); \ |
| 359 | break; \ |
| 360 | case 2: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 361 | asm qual ("cmpxchgw %2, "__percpu_arg(1) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 362 | : "=a" (pco_ret__), "+m" (var) \ |
| 363 | : "r" (pco_new__), "0" (pco_old__) \ |
| 364 | : "memory"); \ |
| 365 | break; \ |
| 366 | case 4: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 367 | asm qual ("cmpxchgl %2, "__percpu_arg(1) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 368 | : "=a" (pco_ret__), "+m" (var) \ |
| 369 | : "r" (pco_new__), "0" (pco_old__) \ |
| 370 | : "memory"); \ |
| 371 | break; \ |
| 372 | case 8: \ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 373 | asm qual ("cmpxchgq %2, "__percpu_arg(1) \ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 374 | : "=a" (pco_ret__), "+m" (var) \ |
| 375 | : "r" (pco_new__), "0" (pco_old__) \ |
| 376 | : "memory"); \ |
| 377 | break; \ |
| 378 | default: __bad_percpu_size(); \ |
| 379 | } \ |
| 380 | pco_ret__; \ |
| 381 | }) |
| 382 | |
| 383 | /* |
| 384 | * this_cpu_read() makes gcc load the percpu variable every time it is |
| 385 | * accessed while this_cpu_read_stable() allows the value to be cached. |
| 386 | * this_cpu_read_stable() is more efficient and can be used if its value |
| 387 | * is guaranteed to be valid across cpus. The current users include |
| 388 | * get_current() and get_thread_info() both of which are actually |
| 389 | * per-thread variables implemented as per-cpu variables and thus |
| 390 | * stable for the duration of the respective task. |
| 391 | */ |
| 392 | #define this_cpu_read_stable(var) percpu_stable_op("mov", var) |
| 393 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 394 | #define raw_cpu_read_1(pcp) percpu_from_op(, "mov", pcp) |
| 395 | #define raw_cpu_read_2(pcp) percpu_from_op(, "mov", pcp) |
| 396 | #define raw_cpu_read_4(pcp) percpu_from_op(, "mov", pcp) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 397 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 398 | #define raw_cpu_write_1(pcp, val) percpu_to_op(, "mov", (pcp), val) |
| 399 | #define raw_cpu_write_2(pcp, val) percpu_to_op(, "mov", (pcp), val) |
| 400 | #define raw_cpu_write_4(pcp, val) percpu_to_op(, "mov", (pcp), val) |
| 401 | #define raw_cpu_add_1(pcp, val) percpu_add_op(, (pcp), val) |
| 402 | #define raw_cpu_add_2(pcp, val) percpu_add_op(, (pcp), val) |
| 403 | #define raw_cpu_add_4(pcp, val) percpu_add_op(, (pcp), val) |
| 404 | #define raw_cpu_and_1(pcp, val) percpu_to_op(, "and", (pcp), val) |
| 405 | #define raw_cpu_and_2(pcp, val) percpu_to_op(, "and", (pcp), val) |
| 406 | #define raw_cpu_and_4(pcp, val) percpu_to_op(, "and", (pcp), val) |
| 407 | #define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val) |
| 408 | #define raw_cpu_or_2(pcp, val) percpu_to_op(, "or", (pcp), val) |
| 409 | #define raw_cpu_or_4(pcp, val) percpu_to_op(, "or", (pcp), val) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 410 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 411 | /* |
| 412 | * raw_cpu_xchg() can use a load-store since it is not required to be |
| 413 | * IRQ-safe. |
| 414 | */ |
| 415 | #define raw_percpu_xchg_op(var, nval) \ |
| 416 | ({ \ |
| 417 | typeof(var) pxo_ret__ = raw_cpu_read(var); \ |
| 418 | raw_cpu_write(var, (nval)); \ |
| 419 | pxo_ret__; \ |
| 420 | }) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 421 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 422 | #define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val) |
| 423 | #define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val) |
| 424 | #define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 425 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 426 | #define this_cpu_read_1(pcp) percpu_from_op(volatile, "mov", pcp) |
| 427 | #define this_cpu_read_2(pcp) percpu_from_op(volatile, "mov", pcp) |
| 428 | #define this_cpu_read_4(pcp) percpu_from_op(volatile, "mov", pcp) |
| 429 | #define this_cpu_write_1(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) |
| 430 | #define this_cpu_write_2(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) |
| 431 | #define this_cpu_write_4(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) |
| 432 | #define this_cpu_add_1(pcp, val) percpu_add_op(volatile, (pcp), val) |
| 433 | #define this_cpu_add_2(pcp, val) percpu_add_op(volatile, (pcp), val) |
| 434 | #define this_cpu_add_4(pcp, val) percpu_add_op(volatile, (pcp), val) |
| 435 | #define this_cpu_and_1(pcp, val) percpu_to_op(volatile, "and", (pcp), val) |
| 436 | #define this_cpu_and_2(pcp, val) percpu_to_op(volatile, "and", (pcp), val) |
| 437 | #define this_cpu_and_4(pcp, val) percpu_to_op(volatile, "and", (pcp), val) |
| 438 | #define this_cpu_or_1(pcp, val) percpu_to_op(volatile, "or", (pcp), val) |
| 439 | #define this_cpu_or_2(pcp, val) percpu_to_op(volatile, "or", (pcp), val) |
| 440 | #define this_cpu_or_4(pcp, val) percpu_to_op(volatile, "or", (pcp), val) |
| 441 | #define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(volatile, pcp, nval) |
| 442 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval) |
| 443 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval) |
| 444 | |
| 445 | #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val) |
| 446 | #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val) |
| 447 | #define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val) |
| 448 | #define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) |
| 449 | #define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) |
| 450 | #define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) |
| 451 | |
| 452 | #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val) |
| 453 | #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val) |
| 454 | #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val) |
| 455 | #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) |
| 456 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) |
| 457 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 458 | |
| 459 | #ifdef CONFIG_X86_CMPXCHG64 |
| 460 | #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ |
| 461 | ({ \ |
| 462 | bool __ret; \ |
| 463 | typeof(pcp1) __o1 = (o1), __n1 = (n1); \ |
| 464 | typeof(pcp2) __o2 = (o2), __n2 = (n2); \ |
| 465 | asm volatile("cmpxchg8b "__percpu_arg(1) \ |
| 466 | CC_SET(z) \ |
| 467 | : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \ |
| 468 | : "b" (__n1), "c" (__n2)); \ |
| 469 | __ret; \ |
| 470 | }) |
| 471 | |
| 472 | #define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
| 473 | #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
| 474 | #endif /* CONFIG_X86_CMPXCHG64 */ |
| 475 | |
| 476 | /* |
| 477 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
| 478 | * 32 bit must fall back to generic operations. |
| 479 | */ |
| 480 | #ifdef CONFIG_X86_64 |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 481 | #define raw_cpu_read_8(pcp) percpu_from_op(, "mov", pcp) |
| 482 | #define raw_cpu_write_8(pcp, val) percpu_to_op(, "mov", (pcp), val) |
| 483 | #define raw_cpu_add_8(pcp, val) percpu_add_op(, (pcp), val) |
| 484 | #define raw_cpu_and_8(pcp, val) percpu_to_op(, "and", (pcp), val) |
| 485 | #define raw_cpu_or_8(pcp, val) percpu_to_op(, "or", (pcp), val) |
| 486 | #define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val) |
| 487 | #define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval) |
| 488 | #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 489 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 490 | #define this_cpu_read_8(pcp) percpu_from_op(volatile, "mov", pcp) |
| 491 | #define this_cpu_write_8(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) |
| 492 | #define this_cpu_add_8(pcp, val) percpu_add_op(volatile, (pcp), val) |
| 493 | #define this_cpu_and_8(pcp, val) percpu_to_op(volatile, "and", (pcp), val) |
| 494 | #define this_cpu_or_8(pcp, val) percpu_to_op(volatile, "or", (pcp), val) |
| 495 | #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val) |
| 496 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval) |
| 497 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 498 | |
| 499 | /* |
| 500 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction |
| 501 | * is not supported on early AMD64 processors so we must be able to emulate |
| 502 | * it in software. The address used in the cmpxchg16 instruction must be |
| 503 | * aligned to a 16 byte boundary. |
| 504 | */ |
| 505 | #define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \ |
| 506 | ({ \ |
| 507 | bool __ret; \ |
| 508 | typeof(pcp1) __o1 = (o1), __n1 = (n1); \ |
| 509 | typeof(pcp2) __o2 = (o2), __n2 = (n2); \ |
| 510 | alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ |
| 511 | "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ |
| 512 | X86_FEATURE_CX16, \ |
| 513 | ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ |
| 514 | "+m" (pcp2), "+d" (__o2)), \ |
| 515 | "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ |
| 516 | __ret; \ |
| 517 | }) |
| 518 | |
| 519 | #define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
| 520 | #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
| 521 | |
| 522 | #endif |
| 523 | |
| 524 | static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, |
| 525 | const unsigned long __percpu *addr) |
| 526 | { |
| 527 | unsigned long __percpu *a = |
| 528 | (unsigned long __percpu *)addr + nr / BITS_PER_LONG; |
| 529 | |
| 530 | #ifdef CONFIG_X86_64 |
| 531 | return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; |
| 532 | #else |
| 533 | return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0; |
| 534 | #endif |
| 535 | } |
| 536 | |
| 537 | static inline bool x86_this_cpu_variable_test_bit(int nr, |
| 538 | const unsigned long __percpu *addr) |
| 539 | { |
| 540 | bool oldbit; |
| 541 | |
| 542 | asm volatile("btl "__percpu_arg(2)",%1" |
| 543 | CC_SET(c) |
| 544 | : CC_OUT(c) (oldbit) |
| 545 | : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); |
| 546 | |
| 547 | return oldbit; |
| 548 | } |
| 549 | |
| 550 | #define x86_this_cpu_test_bit(nr, addr) \ |
| 551 | (__builtin_constant_p((nr)) \ |
| 552 | ? x86_this_cpu_constant_test_bit((nr), (addr)) \ |
| 553 | : x86_this_cpu_variable_test_bit((nr), (addr))) |
| 554 | |
| 555 | |
| 556 | #include <asm-generic/percpu.h> |
| 557 | |
| 558 | /* We can use this directly for local CPU (faster). */ |
| 559 | DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off); |
| 560 | |
| 561 | #endif /* !__ASSEMBLY__ */ |
| 562 | |
| 563 | #ifdef CONFIG_SMP |
| 564 | |
| 565 | /* |
| 566 | * Define the "EARLY_PER_CPU" macros. These are used for some per_cpu |
| 567 | * variables that are initialized and accessed before there are per_cpu |
| 568 | * areas allocated. |
| 569 | */ |
| 570 | |
| 571 | #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ |
| 572 | DEFINE_PER_CPU(_type, _name) = _initvalue; \ |
| 573 | __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ |
| 574 | { [0 ... NR_CPUS-1] = _initvalue }; \ |
| 575 | __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map |
| 576 | |
| 577 | #define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ |
| 578 | DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue; \ |
| 579 | __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ |
| 580 | { [0 ... NR_CPUS-1] = _initvalue }; \ |
| 581 | __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map |
| 582 | |
| 583 | #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ |
| 584 | EXPORT_PER_CPU_SYMBOL(_name) |
| 585 | |
| 586 | #define DECLARE_EARLY_PER_CPU(_type, _name) \ |
| 587 | DECLARE_PER_CPU(_type, _name); \ |
| 588 | extern __typeof__(_type) *_name##_early_ptr; \ |
| 589 | extern __typeof__(_type) _name##_early_map[] |
| 590 | |
| 591 | #define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ |
| 592 | DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \ |
| 593 | extern __typeof__(_type) *_name##_early_ptr; \ |
| 594 | extern __typeof__(_type) _name##_early_map[] |
| 595 | |
| 596 | #define early_per_cpu_ptr(_name) (_name##_early_ptr) |
| 597 | #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) |
| 598 | #define early_per_cpu(_name, _cpu) \ |
| 599 | *(early_per_cpu_ptr(_name) ? \ |
| 600 | &early_per_cpu_ptr(_name)[_cpu] : \ |
| 601 | &per_cpu(_name, _cpu)) |
| 602 | |
| 603 | #else /* !CONFIG_SMP */ |
| 604 | #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ |
| 605 | DEFINE_PER_CPU(_type, _name) = _initvalue |
| 606 | |
| 607 | #define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ |
| 608 | DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue |
| 609 | |
| 610 | #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ |
| 611 | EXPORT_PER_CPU_SYMBOL(_name) |
| 612 | |
| 613 | #define DECLARE_EARLY_PER_CPU(_type, _name) \ |
| 614 | DECLARE_PER_CPU(_type, _name) |
| 615 | |
| 616 | #define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ |
| 617 | DECLARE_PER_CPU_READ_MOSTLY(_type, _name) |
| 618 | |
| 619 | #define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) |
| 620 | #define early_per_cpu_ptr(_name) NULL |
| 621 | /* no early_per_cpu_map() */ |
| 622 | |
| 623 | #endif /* !CONFIG_SMP */ |
| 624 | |
| 625 | #endif /* _ASM_X86_PERCPU_H */ |