Arunachalam Ganapathy | 0bbdc2d | 2023-04-05 15:30:18 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2023, Arm Limited. All rights reserved. |
| 3 | * |
| 4 | * SPDX-License-Identifier: BSD-3-Clause |
| 5 | */ |
| 6 | |
| 7 | #include <arch_features.h> |
| 8 | #include <arch_helpers.h> |
| 9 | #include <assert.h> |
| 10 | #include <debug.h> |
| 11 | #include <lib/extensions/sve.h> |
| 12 | |
| 13 | static inline uint64_t sve_read_zcr_elx(void) |
| 14 | { |
| 15 | return IS_IN_EL2() ? read_zcr_el2() : read_zcr_el1(); |
| 16 | } |
| 17 | |
| 18 | static inline void sve_write_zcr_elx(uint64_t reg_val) |
| 19 | { |
| 20 | if (IS_IN_EL2()) { |
| 21 | write_zcr_el2(reg_val); |
| 22 | } else { |
| 23 | write_zcr_el1(reg_val); |
| 24 | } |
Arunachalam Ganapathy | fa05bd9 | 2023-08-30 14:36:53 +0100 | [diff] [blame^] | 25 | |
Arunachalam Ganapathy | 0bbdc2d | 2023-04-05 15:30:18 +0100 | [diff] [blame] | 26 | isb(); |
| 27 | } |
| 28 | |
| 29 | static void _sve_config_vq(uint8_t sve_vq) |
| 30 | { |
| 31 | u_register_t zcr_elx; |
| 32 | |
| 33 | zcr_elx = sve_read_zcr_elx(); |
| 34 | if (IS_IN_EL2()) { |
| 35 | zcr_elx &= ~(MASK(ZCR_EL2_SVE_VL)); |
| 36 | zcr_elx |= INPLACE(ZCR_EL2_SVE_VL, sve_vq); |
| 37 | } else { |
| 38 | zcr_elx &= ~(MASK(ZCR_EL1_SVE_VL)); |
| 39 | zcr_elx |= INPLACE(ZCR_EL1_SVE_VL, sve_vq); |
| 40 | } |
| 41 | sve_write_zcr_elx(zcr_elx); |
| 42 | } |
| 43 | |
| 44 | /* Set the SVE vector length in the current EL's ZCR_ELx register */ |
| 45 | void sve_config_vq(uint8_t sve_vq) |
| 46 | { |
| 47 | assert(is_armv8_2_sve_present()); |
| 48 | |
| 49 | /* cap vq to arch supported max value */ |
| 50 | if (sve_vq > SVE_VQ_ARCH_MAX) { |
| 51 | sve_vq = SVE_VQ_ARCH_MAX; |
| 52 | } |
| 53 | |
| 54 | _sve_config_vq(sve_vq); |
| 55 | } |
| 56 | |
| 57 | /* |
| 58 | * Probes all valid vector length upto 'sve_max_vq'. Configures ZCR_ELx with 0 |
| 59 | * to 'sve_max_vq'. And for each step, call sve_rdvl to get the vector length. |
| 60 | * Convert the vector length to VQ and set the bit corresponding to the VQ. |
| 61 | * Returns: |
| 62 | * bitmap corresponding to each support VL |
| 63 | */ |
| 64 | uint32_t sve_probe_vl(uint8_t sve_max_vq) |
| 65 | { |
| 66 | uint32_t vl_bitmap = 0; |
| 67 | uint8_t vq, rdvl_vq; |
| 68 | |
| 69 | assert(is_armv8_2_sve_present()); |
| 70 | |
| 71 | /* cap vq to arch supported max value */ |
| 72 | if (sve_max_vq > SVE_VQ_ARCH_MAX) { |
| 73 | sve_max_vq = SVE_VQ_ARCH_MAX; |
| 74 | } |
| 75 | |
| 76 | for (vq = 0; vq <= sve_max_vq; vq++) { |
| 77 | _sve_config_vq(vq); |
Arunachalam Ganapathy | 0358997 | 2023-08-30 11:04:51 +0100 | [diff] [blame] | 78 | rdvl_vq = SVE_VL_TO_VQ(sve_rdvl_1()); |
Arunachalam Ganapathy | 0bbdc2d | 2023-04-05 15:30:18 +0100 | [diff] [blame] | 79 | if (vl_bitmap & BIT_32(rdvl_vq)) { |
| 80 | continue; |
| 81 | } |
| 82 | vl_bitmap |= BIT_32(rdvl_vq); |
| 83 | } |
| 84 | |
| 85 | return vl_bitmap; |
| 86 | } |
Arunachalam Ganapathy | d179ddc | 2023-04-12 10:41:42 +0100 | [diff] [blame] | 87 | |
Arunachalam Ganapathy | fa05bd9 | 2023-08-30 14:36:53 +0100 | [diff] [blame^] | 88 | /* Write SVE Z[0-31] registers passed in 'z_regs' */ |
Arunachalam Ganapathy | 0358997 | 2023-08-30 11:04:51 +0100 | [diff] [blame] | 89 | void sve_z_regs_write(const sve_z_regs_t *z_regs) |
Arunachalam Ganapathy | d179ddc | 2023-04-12 10:41:42 +0100 | [diff] [blame] | 90 | { |
Arunachalam Ganapathy | d179ddc | 2023-04-12 10:41:42 +0100 | [diff] [blame] | 91 | __asm__ volatile( |
| 92 | ".arch_extension sve\n" |
| 93 | fill_sve_helper(0) |
| 94 | fill_sve_helper(1) |
| 95 | fill_sve_helper(2) |
| 96 | fill_sve_helper(3) |
| 97 | fill_sve_helper(4) |
| 98 | fill_sve_helper(5) |
| 99 | fill_sve_helper(6) |
| 100 | fill_sve_helper(7) |
| 101 | fill_sve_helper(8) |
| 102 | fill_sve_helper(9) |
| 103 | fill_sve_helper(10) |
| 104 | fill_sve_helper(11) |
| 105 | fill_sve_helper(12) |
| 106 | fill_sve_helper(13) |
| 107 | fill_sve_helper(14) |
| 108 | fill_sve_helper(15) |
| 109 | fill_sve_helper(16) |
| 110 | fill_sve_helper(17) |
| 111 | fill_sve_helper(18) |
| 112 | fill_sve_helper(19) |
| 113 | fill_sve_helper(20) |
| 114 | fill_sve_helper(21) |
| 115 | fill_sve_helper(22) |
| 116 | fill_sve_helper(23) |
| 117 | fill_sve_helper(24) |
| 118 | fill_sve_helper(25) |
| 119 | fill_sve_helper(26) |
| 120 | fill_sve_helper(27) |
| 121 | fill_sve_helper(28) |
| 122 | fill_sve_helper(29) |
| 123 | fill_sve_helper(30) |
| 124 | fill_sve_helper(31) |
| 125 | ".arch_extension nosve\n" |
Arunachalam Ganapathy | 0358997 | 2023-08-30 11:04:51 +0100 | [diff] [blame] | 126 | : : "r" (z_regs)); |
Arunachalam Ganapathy | d179ddc | 2023-04-12 10:41:42 +0100 | [diff] [blame] | 127 | } |
| 128 | |
Arunachalam Ganapathy | fa05bd9 | 2023-08-30 14:36:53 +0100 | [diff] [blame^] | 129 | /* Read SVE Z[0-31] and store it in 'zregs' */ |
Arunachalam Ganapathy | 0358997 | 2023-08-30 11:04:51 +0100 | [diff] [blame] | 130 | void sve_z_regs_read(sve_z_regs_t *z_regs) |
Arunachalam Ganapathy | d179ddc | 2023-04-12 10:41:42 +0100 | [diff] [blame] | 131 | { |
Arunachalam Ganapathy | d179ddc | 2023-04-12 10:41:42 +0100 | [diff] [blame] | 132 | __asm__ volatile( |
| 133 | ".arch_extension sve\n" |
| 134 | read_sve_helper(0) |
| 135 | read_sve_helper(1) |
| 136 | read_sve_helper(2) |
| 137 | read_sve_helper(3) |
| 138 | read_sve_helper(4) |
| 139 | read_sve_helper(5) |
| 140 | read_sve_helper(6) |
| 141 | read_sve_helper(7) |
| 142 | read_sve_helper(8) |
| 143 | read_sve_helper(9) |
| 144 | read_sve_helper(10) |
| 145 | read_sve_helper(11) |
| 146 | read_sve_helper(12) |
| 147 | read_sve_helper(13) |
| 148 | read_sve_helper(14) |
| 149 | read_sve_helper(15) |
| 150 | read_sve_helper(16) |
| 151 | read_sve_helper(17) |
| 152 | read_sve_helper(18) |
| 153 | read_sve_helper(19) |
| 154 | read_sve_helper(20) |
| 155 | read_sve_helper(21) |
| 156 | read_sve_helper(22) |
| 157 | read_sve_helper(23) |
| 158 | read_sve_helper(24) |
| 159 | read_sve_helper(25) |
| 160 | read_sve_helper(26) |
| 161 | read_sve_helper(27) |
| 162 | read_sve_helper(28) |
| 163 | read_sve_helper(29) |
| 164 | read_sve_helper(30) |
| 165 | read_sve_helper(31) |
| 166 | ".arch_extension nosve\n" |
Arunachalam Ganapathy | 0358997 | 2023-08-30 11:04:51 +0100 | [diff] [blame] | 167 | : : "r" (z_regs)); |
Arunachalam Ganapathy | d179ddc | 2023-04-12 10:41:42 +0100 | [diff] [blame] | 168 | } |
Arunachalam Ganapathy | fa05bd9 | 2023-08-30 14:36:53 +0100 | [diff] [blame^] | 169 | |
| 170 | /* Write SVE P[0-15] registers passed in 'p_regs' */ |
| 171 | void sve_p_regs_write(const sve_p_regs_t *p_regs) |
| 172 | { |
| 173 | __asm__ volatile( |
| 174 | ".arch_extension sve\n" |
| 175 | fill_sve_p_helper(0) |
| 176 | fill_sve_p_helper(1) |
| 177 | fill_sve_p_helper(2) |
| 178 | fill_sve_p_helper(3) |
| 179 | fill_sve_p_helper(4) |
| 180 | fill_sve_p_helper(5) |
| 181 | fill_sve_p_helper(6) |
| 182 | fill_sve_p_helper(7) |
| 183 | fill_sve_p_helper(8) |
| 184 | fill_sve_p_helper(9) |
| 185 | fill_sve_p_helper(10) |
| 186 | fill_sve_p_helper(11) |
| 187 | fill_sve_p_helper(12) |
| 188 | fill_sve_p_helper(13) |
| 189 | fill_sve_p_helper(14) |
| 190 | fill_sve_p_helper(15) |
| 191 | ".arch_extension nosve\n" |
| 192 | : : "r" (p_regs)); |
| 193 | } |
| 194 | |
| 195 | /* Read SVE P[0-15] registers and store it in 'p_regs' */ |
| 196 | void sve_p_regs_read(sve_p_regs_t *p_regs) |
| 197 | { |
| 198 | __asm__ volatile( |
| 199 | ".arch_extension sve\n" |
| 200 | read_sve_p_helper(0) |
| 201 | read_sve_p_helper(1) |
| 202 | read_sve_p_helper(2) |
| 203 | read_sve_p_helper(3) |
| 204 | read_sve_p_helper(4) |
| 205 | read_sve_p_helper(5) |
| 206 | read_sve_p_helper(6) |
| 207 | read_sve_p_helper(7) |
| 208 | read_sve_p_helper(8) |
| 209 | read_sve_p_helper(9) |
| 210 | read_sve_p_helper(10) |
| 211 | read_sve_p_helper(11) |
| 212 | read_sve_p_helper(12) |
| 213 | read_sve_p_helper(13) |
| 214 | read_sve_p_helper(14) |
| 215 | read_sve_p_helper(15) |
| 216 | ".arch_extension nosve\n" |
| 217 | : : "r" (p_regs)); |
| 218 | } |
| 219 | |
| 220 | /* Write SVE FFR registers passed in 'ffr_regs' */ |
| 221 | void sve_ffr_regs_write(const sve_ffr_regs_t *ffr_regs) |
| 222 | { |
| 223 | uint8_t sve_p_reg[SVE_P_REG_LEN_BYTES]; |
| 224 | |
| 225 | /* Save p0. Load 'ffr_regs' to p0 and write FFR. Restore p0 */ |
| 226 | __asm__ volatile( |
| 227 | ".arch_extension sve\n" |
| 228 | " str p0, [%1]\n" |
| 229 | " ldr p0, [%0]\n" |
| 230 | " wrffr p0.B\n" |
| 231 | " ldr p0, [%1]\n" |
| 232 | ".arch_extension nosve\n" |
| 233 | : |
| 234 | : "r" (ffr_regs), "r" (sve_p_reg) |
| 235 | : "memory"); |
| 236 | } |
| 237 | |
| 238 | /* Read SVE FFR registers and store it in 'ffr_regs' */ |
| 239 | void sve_ffr_regs_read(sve_ffr_regs_t *ffr_regs) |
| 240 | { |
| 241 | uint8_t sve_p_reg[SVE_P_REG_LEN_BYTES]; |
| 242 | |
| 243 | /* Save p0. Read FFR to p0 and save p0 (ffr) to 'ffr_regs'. Restore p0 */ |
| 244 | __asm__ volatile( |
| 245 | ".arch_extension sve\n" |
| 246 | " str p0, [%1]\n" |
| 247 | " rdffr p0.B\n" |
| 248 | " str p0, [%0]\n" |
| 249 | " ldr p0, [%1]\n" |
| 250 | ".arch_extension nosve\n" |
| 251 | : |
| 252 | : "r" (ffr_regs), "r" (sve_p_reg) |
| 253 | : "memory"); |
| 254 | } |
| 255 | |
| 256 | /* |
| 257 | * Generate random values and write it to 'z_regs', then write it to SVE Z |
| 258 | * registers. |
| 259 | */ |
| 260 | void sve_z_regs_write_rand(sve_z_regs_t *z_regs) |
| 261 | { |
| 262 | uint32_t rval; |
| 263 | uint32_t z_size; |
| 264 | uint8_t *z_reg; |
| 265 | |
| 266 | z_size = (uint32_t)sve_rdvl_1(); |
| 267 | |
| 268 | /* Write Z regs */ |
| 269 | rval = rand(); |
| 270 | memset((void *)z_regs, 0, sizeof(sve_z_regs_t)); |
| 271 | for (uint32_t i = 0U; i < SVE_NUM_VECTORS; i++) { |
| 272 | z_reg = (uint8_t *)z_regs + (i * z_size); |
| 273 | |
| 274 | memset((void *)z_reg, rval * (i + 1), z_size); |
| 275 | } |
| 276 | sve_z_regs_write(z_regs); |
| 277 | } |
| 278 | |
| 279 | /* |
| 280 | * Generate random values and write it to 'p_regs', then write it to SVE P |
| 281 | * registers. |
| 282 | */ |
| 283 | void sve_p_regs_write_rand(sve_p_regs_t *p_regs) |
| 284 | { |
| 285 | uint32_t p_size; |
| 286 | uint8_t *p_reg; |
| 287 | uint32_t rval; |
| 288 | |
| 289 | p_size = (uint32_t)sve_rdvl_1() / 8; |
| 290 | |
| 291 | /* Write P regs */ |
| 292 | rval = rand(); |
| 293 | memset((void *)p_regs, 0, sizeof(sve_p_regs_t)); |
| 294 | for (uint32_t i = 0U; i < SVE_NUM_P_REGS; i++) { |
| 295 | p_reg = (uint8_t *)p_regs + (i * p_size); |
| 296 | |
| 297 | memset((void *)p_reg, rval * (i + 1), p_size); |
| 298 | } |
| 299 | sve_p_regs_write(p_regs); |
| 300 | } |
| 301 | |
| 302 | /* |
| 303 | * Generate random values and write it to 'ffr_regs', then write it to SVE FFR |
| 304 | * registers. |
| 305 | */ |
| 306 | void sve_ffr_regs_write_rand(sve_ffr_regs_t *ffr_regs) |
| 307 | { |
| 308 | uint32_t ffr_size; |
| 309 | uint8_t *ffr_reg; |
| 310 | uint32_t rval; |
| 311 | |
| 312 | ffr_size = (uint32_t)sve_rdvl_1() / 8; |
| 313 | |
| 314 | rval = rand(); |
| 315 | memset((void *)ffr_regs, 0, sizeof(sve_ffr_regs_t)); |
| 316 | for (uint32_t i = 0U; i < SVE_NUM_FFR_REGS; i++) { |
| 317 | ffr_reg = (uint8_t *)ffr_regs + (i * ffr_size); |
| 318 | |
| 319 | memset((void *)ffr_reg, rval * (i + 1), ffr_size); |
| 320 | } |
| 321 | sve_ffr_regs_write(ffr_regs); |
| 322 | } |
| 323 | |
| 324 | /* |
| 325 | * Compare Z registers passed in 's1' (old values) with 's2' (new values). |
| 326 | * |
| 327 | * Returns: |
| 328 | * 0 : All Z[0-31] registers in 's1' and 's2' are equal |
| 329 | * nonzero : Sets the Nth bit of the Z register that is not equal |
| 330 | */ |
| 331 | uint64_t sve_z_regs_compare(const sve_z_regs_t *s1, const sve_z_regs_t *s2) |
| 332 | { |
| 333 | uint32_t z_size; |
| 334 | uint64_t cmp_bitmap = 0UL; |
| 335 | |
| 336 | z_size = (uint32_t)sve_rdvl_1(); |
| 337 | |
| 338 | for (uint32_t i = 0U; i < SVE_NUM_VECTORS; i++) { |
| 339 | uint8_t *s1_z = (uint8_t *)s1 + (i * z_size); |
| 340 | uint8_t *s2_z = (uint8_t *)s2 + (i * z_size); |
| 341 | |
| 342 | if ((memcmp(s1_z, s2_z, z_size) == 0)) { |
| 343 | continue; |
| 344 | } |
| 345 | |
| 346 | cmp_bitmap |= BIT_64(i); |
| 347 | VERBOSE("SVE Z_%u mismatch\n", i); |
| 348 | } |
| 349 | |
| 350 | return cmp_bitmap; |
| 351 | } |
| 352 | |
| 353 | /* |
| 354 | * Compare P registers passed in 's1' (old values) with 's2' (new values). |
| 355 | * |
| 356 | * Returns: |
| 357 | * 0 : All P[0-15] registers in 's1' and 's2' are equal |
| 358 | * nonzero : Sets the Nth bit of the P register that is not equal |
| 359 | */ |
| 360 | uint64_t sve_p_regs_compare(const sve_p_regs_t *s1, const sve_p_regs_t *s2) |
| 361 | { |
| 362 | uint32_t p_size; |
| 363 | uint64_t cmp_bitmap = 0UL; |
| 364 | |
| 365 | /* Size of one predicate register 1/8 of Z register */ |
| 366 | p_size = (uint32_t)sve_rdvl_1() / 8U; |
| 367 | |
| 368 | for (uint32_t i = 0U; i < SVE_NUM_P_REGS; i++) { |
| 369 | uint8_t *s1_p = (uint8_t *)s1 + (i * p_size); |
| 370 | uint8_t *s2_p = (uint8_t *)s2 + (i * p_size); |
| 371 | |
| 372 | if ((memcmp(s1_p, s2_p, p_size) == 0)) { |
| 373 | continue; |
| 374 | } |
| 375 | |
| 376 | cmp_bitmap |= BIT_64(i); |
| 377 | VERBOSE("SVE P_%u mismatch\n", i); |
| 378 | } |
| 379 | |
| 380 | return cmp_bitmap; |
| 381 | } |
| 382 | |
| 383 | /* |
| 384 | * Compare FFR register passed in 's1' (old values) with 's2' (new values). |
| 385 | * |
| 386 | * Returns: |
| 387 | * 0 : FFR register in 's1' and 's2' are equal |
| 388 | * nonzero : FFR register is not equal |
| 389 | */ |
| 390 | uint64_t sve_ffr_regs_compare(const sve_ffr_regs_t *s1, const sve_ffr_regs_t *s2) |
| 391 | { |
| 392 | uint32_t ffr_size; |
| 393 | uint64_t cmp_bitmap = 0UL; |
| 394 | |
| 395 | /* Size of one FFR register 1/8 of Z register */ |
| 396 | ffr_size = (uint32_t)sve_rdvl_1() / 8U; |
| 397 | |
| 398 | for (uint32_t i = 0U; i < SVE_NUM_FFR_REGS; i++) { |
| 399 | uint8_t *s1_ffr = (uint8_t *)s1 + (i * ffr_size); |
| 400 | uint8_t *s2_ffr = (uint8_t *)s2 + (i * ffr_size); |
| 401 | |
| 402 | if ((memcmp(s1_ffr, s2_ffr, ffr_size) == 0)) { |
| 403 | continue; |
| 404 | } |
| 405 | |
| 406 | cmp_bitmap |= BIT_64(i); |
| 407 | VERBOSE("SVE FFR_%u mismatch:\n", i); |
| 408 | } |
| 409 | |
| 410 | return cmp_bitmap; |
| 411 | } |