David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. |
| 3 | |
| 4 | #include <linux/errno.h> |
| 5 | #include <linux/interrupt.h> |
| 6 | #include <linux/module.h> |
| 7 | #include <linux/of.h> |
| 8 | #include <linux/perf_event.h> |
| 9 | #include <linux/platform_device.h> |
| 10 | |
| 11 | #define CSKY_PMU_MAX_EVENTS 32 |
| 12 | #define DEFAULT_COUNT_WIDTH 48 |
| 13 | |
| 14 | #define HPCR "<0, 0x0>" /* PMU Control reg */ |
| 15 | #define HPSPR "<0, 0x1>" /* Start PC reg */ |
| 16 | #define HPEPR "<0, 0x2>" /* End PC reg */ |
| 17 | #define HPSIR "<0, 0x3>" /* Soft Counter reg */ |
| 18 | #define HPCNTENR "<0, 0x4>" /* Count Enable reg */ |
| 19 | #define HPINTENR "<0, 0x5>" /* Interrupt Enable reg */ |
| 20 | #define HPOFSR "<0, 0x6>" /* Interrupt Status reg */ |
| 21 | |
| 22 | /* The events for a given PMU register set. */ |
| 23 | struct pmu_hw_events { |
| 24 | /* |
| 25 | * The events that are active on the PMU for the given index. |
| 26 | */ |
| 27 | struct perf_event *events[CSKY_PMU_MAX_EVENTS]; |
| 28 | |
| 29 | /* |
| 30 | * A 1 bit for an index indicates that the counter is being used for |
| 31 | * an event. A 0 means that the counter can be used. |
| 32 | */ |
| 33 | unsigned long used_mask[BITS_TO_LONGS(CSKY_PMU_MAX_EVENTS)]; |
| 34 | }; |
| 35 | |
| 36 | static uint64_t (*hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS])(void); |
| 37 | static void (*hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS])(uint64_t val); |
| 38 | |
| 39 | static struct csky_pmu_t { |
| 40 | struct pmu pmu; |
| 41 | struct pmu_hw_events __percpu *hw_events; |
| 42 | struct platform_device *plat_device; |
| 43 | uint32_t count_width; |
| 44 | uint32_t hpcr; |
| 45 | u64 max_period; |
| 46 | } csky_pmu; |
| 47 | static int csky_pmu_irq; |
| 48 | |
| 49 | #define to_csky_pmu(p) (container_of(p, struct csky_pmu, pmu)) |
| 50 | |
| 51 | #define cprgr(reg) \ |
| 52 | ({ \ |
| 53 | unsigned int tmp; \ |
| 54 | asm volatile("cprgr %0, "reg"\n" \ |
| 55 | : "=r"(tmp) \ |
| 56 | : \ |
| 57 | : "memory"); \ |
| 58 | tmp; \ |
| 59 | }) |
| 60 | |
| 61 | #define cpwgr(reg, val) \ |
| 62 | ({ \ |
| 63 | asm volatile( \ |
| 64 | "cpwgr %0, "reg"\n" \ |
| 65 | : \ |
| 66 | : "r"(val) \ |
| 67 | : "memory"); \ |
| 68 | }) |
| 69 | |
| 70 | #define cprcr(reg) \ |
| 71 | ({ \ |
| 72 | unsigned int tmp; \ |
| 73 | asm volatile("cprcr %0, "reg"\n" \ |
| 74 | : "=r"(tmp) \ |
| 75 | : \ |
| 76 | : "memory"); \ |
| 77 | tmp; \ |
| 78 | }) |
| 79 | |
| 80 | #define cpwcr(reg, val) \ |
| 81 | ({ \ |
| 82 | asm volatile( \ |
| 83 | "cpwcr %0, "reg"\n" \ |
| 84 | : \ |
| 85 | : "r"(val) \ |
| 86 | : "memory"); \ |
| 87 | }) |
| 88 | |
| 89 | /* cycle counter */ |
| 90 | static uint64_t csky_pmu_read_cc(void) |
| 91 | { |
| 92 | uint32_t lo, hi, tmp; |
| 93 | uint64_t result; |
| 94 | |
| 95 | do { |
| 96 | tmp = cprgr("<0, 0x3>"); |
| 97 | lo = cprgr("<0, 0x2>"); |
| 98 | hi = cprgr("<0, 0x3>"); |
| 99 | } while (hi != tmp); |
| 100 | |
| 101 | result = (uint64_t) (hi) << 32; |
| 102 | result |= lo; |
| 103 | |
| 104 | return result; |
| 105 | } |
| 106 | |
| 107 | static void csky_pmu_write_cc(uint64_t val) |
| 108 | { |
| 109 | cpwgr("<0, 0x2>", (uint32_t) val); |
| 110 | cpwgr("<0, 0x3>", (uint32_t) (val >> 32)); |
| 111 | } |
| 112 | |
| 113 | /* instruction counter */ |
| 114 | static uint64_t csky_pmu_read_ic(void) |
| 115 | { |
| 116 | uint32_t lo, hi, tmp; |
| 117 | uint64_t result; |
| 118 | |
| 119 | do { |
| 120 | tmp = cprgr("<0, 0x5>"); |
| 121 | lo = cprgr("<0, 0x4>"); |
| 122 | hi = cprgr("<0, 0x5>"); |
| 123 | } while (hi != tmp); |
| 124 | |
| 125 | result = (uint64_t) (hi) << 32; |
| 126 | result |= lo; |
| 127 | |
| 128 | return result; |
| 129 | } |
| 130 | |
| 131 | static void csky_pmu_write_ic(uint64_t val) |
| 132 | { |
| 133 | cpwgr("<0, 0x4>", (uint32_t) val); |
| 134 | cpwgr("<0, 0x5>", (uint32_t) (val >> 32)); |
| 135 | } |
| 136 | |
| 137 | /* l1 icache access counter */ |
| 138 | static uint64_t csky_pmu_read_icac(void) |
| 139 | { |
| 140 | uint32_t lo, hi, tmp; |
| 141 | uint64_t result; |
| 142 | |
| 143 | do { |
| 144 | tmp = cprgr("<0, 0x7>"); |
| 145 | lo = cprgr("<0, 0x6>"); |
| 146 | hi = cprgr("<0, 0x7>"); |
| 147 | } while (hi != tmp); |
| 148 | |
| 149 | result = (uint64_t) (hi) << 32; |
| 150 | result |= lo; |
| 151 | |
| 152 | return result; |
| 153 | } |
| 154 | |
| 155 | static void csky_pmu_write_icac(uint64_t val) |
| 156 | { |
| 157 | cpwgr("<0, 0x6>", (uint32_t) val); |
| 158 | cpwgr("<0, 0x7>", (uint32_t) (val >> 32)); |
| 159 | } |
| 160 | |
| 161 | /* l1 icache miss counter */ |
| 162 | static uint64_t csky_pmu_read_icmc(void) |
| 163 | { |
| 164 | uint32_t lo, hi, tmp; |
| 165 | uint64_t result; |
| 166 | |
| 167 | do { |
| 168 | tmp = cprgr("<0, 0x9>"); |
| 169 | lo = cprgr("<0, 0x8>"); |
| 170 | hi = cprgr("<0, 0x9>"); |
| 171 | } while (hi != tmp); |
| 172 | |
| 173 | result = (uint64_t) (hi) << 32; |
| 174 | result |= lo; |
| 175 | |
| 176 | return result; |
| 177 | } |
| 178 | |
| 179 | static void csky_pmu_write_icmc(uint64_t val) |
| 180 | { |
| 181 | cpwgr("<0, 0x8>", (uint32_t) val); |
| 182 | cpwgr("<0, 0x9>", (uint32_t) (val >> 32)); |
| 183 | } |
| 184 | |
| 185 | /* l1 dcache access counter */ |
| 186 | static uint64_t csky_pmu_read_dcac(void) |
| 187 | { |
| 188 | uint32_t lo, hi, tmp; |
| 189 | uint64_t result; |
| 190 | |
| 191 | do { |
| 192 | tmp = cprgr("<0, 0xb>"); |
| 193 | lo = cprgr("<0, 0xa>"); |
| 194 | hi = cprgr("<0, 0xb>"); |
| 195 | } while (hi != tmp); |
| 196 | |
| 197 | result = (uint64_t) (hi) << 32; |
| 198 | result |= lo; |
| 199 | |
| 200 | return result; |
| 201 | } |
| 202 | |
| 203 | static void csky_pmu_write_dcac(uint64_t val) |
| 204 | { |
| 205 | cpwgr("<0, 0xa>", (uint32_t) val); |
| 206 | cpwgr("<0, 0xb>", (uint32_t) (val >> 32)); |
| 207 | } |
| 208 | |
| 209 | /* l1 dcache miss counter */ |
| 210 | static uint64_t csky_pmu_read_dcmc(void) |
| 211 | { |
| 212 | uint32_t lo, hi, tmp; |
| 213 | uint64_t result; |
| 214 | |
| 215 | do { |
| 216 | tmp = cprgr("<0, 0xd>"); |
| 217 | lo = cprgr("<0, 0xc>"); |
| 218 | hi = cprgr("<0, 0xd>"); |
| 219 | } while (hi != tmp); |
| 220 | |
| 221 | result = (uint64_t) (hi) << 32; |
| 222 | result |= lo; |
| 223 | |
| 224 | return result; |
| 225 | } |
| 226 | |
| 227 | static void csky_pmu_write_dcmc(uint64_t val) |
| 228 | { |
| 229 | cpwgr("<0, 0xc>", (uint32_t) val); |
| 230 | cpwgr("<0, 0xd>", (uint32_t) (val >> 32)); |
| 231 | } |
| 232 | |
| 233 | /* l2 cache access counter */ |
| 234 | static uint64_t csky_pmu_read_l2ac(void) |
| 235 | { |
| 236 | uint32_t lo, hi, tmp; |
| 237 | uint64_t result; |
| 238 | |
| 239 | do { |
| 240 | tmp = cprgr("<0, 0xf>"); |
| 241 | lo = cprgr("<0, 0xe>"); |
| 242 | hi = cprgr("<0, 0xf>"); |
| 243 | } while (hi != tmp); |
| 244 | |
| 245 | result = (uint64_t) (hi) << 32; |
| 246 | result |= lo; |
| 247 | |
| 248 | return result; |
| 249 | } |
| 250 | |
| 251 | static void csky_pmu_write_l2ac(uint64_t val) |
| 252 | { |
| 253 | cpwgr("<0, 0xe>", (uint32_t) val); |
| 254 | cpwgr("<0, 0xf>", (uint32_t) (val >> 32)); |
| 255 | } |
| 256 | |
| 257 | /* l2 cache miss counter */ |
| 258 | static uint64_t csky_pmu_read_l2mc(void) |
| 259 | { |
| 260 | uint32_t lo, hi, tmp; |
| 261 | uint64_t result; |
| 262 | |
| 263 | do { |
| 264 | tmp = cprgr("<0, 0x11>"); |
| 265 | lo = cprgr("<0, 0x10>"); |
| 266 | hi = cprgr("<0, 0x11>"); |
| 267 | } while (hi != tmp); |
| 268 | |
| 269 | result = (uint64_t) (hi) << 32; |
| 270 | result |= lo; |
| 271 | |
| 272 | return result; |
| 273 | } |
| 274 | |
| 275 | static void csky_pmu_write_l2mc(uint64_t val) |
| 276 | { |
| 277 | cpwgr("<0, 0x10>", (uint32_t) val); |
| 278 | cpwgr("<0, 0x11>", (uint32_t) (val >> 32)); |
| 279 | } |
| 280 | |
| 281 | /* I-UTLB miss counter */ |
| 282 | static uint64_t csky_pmu_read_iutlbmc(void) |
| 283 | { |
| 284 | uint32_t lo, hi, tmp; |
| 285 | uint64_t result; |
| 286 | |
| 287 | do { |
| 288 | tmp = cprgr("<0, 0x15>"); |
| 289 | lo = cprgr("<0, 0x14>"); |
| 290 | hi = cprgr("<0, 0x15>"); |
| 291 | } while (hi != tmp); |
| 292 | |
| 293 | result = (uint64_t) (hi) << 32; |
| 294 | result |= lo; |
| 295 | |
| 296 | return result; |
| 297 | } |
| 298 | |
| 299 | static void csky_pmu_write_iutlbmc(uint64_t val) |
| 300 | { |
| 301 | cpwgr("<0, 0x14>", (uint32_t) val); |
| 302 | cpwgr("<0, 0x15>", (uint32_t) (val >> 32)); |
| 303 | } |
| 304 | |
| 305 | /* D-UTLB miss counter */ |
| 306 | static uint64_t csky_pmu_read_dutlbmc(void) |
| 307 | { |
| 308 | uint32_t lo, hi, tmp; |
| 309 | uint64_t result; |
| 310 | |
| 311 | do { |
| 312 | tmp = cprgr("<0, 0x17>"); |
| 313 | lo = cprgr("<0, 0x16>"); |
| 314 | hi = cprgr("<0, 0x17>"); |
| 315 | } while (hi != tmp); |
| 316 | |
| 317 | result = (uint64_t) (hi) << 32; |
| 318 | result |= lo; |
| 319 | |
| 320 | return result; |
| 321 | } |
| 322 | |
| 323 | static void csky_pmu_write_dutlbmc(uint64_t val) |
| 324 | { |
| 325 | cpwgr("<0, 0x16>", (uint32_t) val); |
| 326 | cpwgr("<0, 0x17>", (uint32_t) (val >> 32)); |
| 327 | } |
| 328 | |
| 329 | /* JTLB miss counter */ |
| 330 | static uint64_t csky_pmu_read_jtlbmc(void) |
| 331 | { |
| 332 | uint32_t lo, hi, tmp; |
| 333 | uint64_t result; |
| 334 | |
| 335 | do { |
| 336 | tmp = cprgr("<0, 0x19>"); |
| 337 | lo = cprgr("<0, 0x18>"); |
| 338 | hi = cprgr("<0, 0x19>"); |
| 339 | } while (hi != tmp); |
| 340 | |
| 341 | result = (uint64_t) (hi) << 32; |
| 342 | result |= lo; |
| 343 | |
| 344 | return result; |
| 345 | } |
| 346 | |
| 347 | static void csky_pmu_write_jtlbmc(uint64_t val) |
| 348 | { |
| 349 | cpwgr("<0, 0x18>", (uint32_t) val); |
| 350 | cpwgr("<0, 0x19>", (uint32_t) (val >> 32)); |
| 351 | } |
| 352 | |
| 353 | /* software counter */ |
| 354 | static uint64_t csky_pmu_read_softc(void) |
| 355 | { |
| 356 | uint32_t lo, hi, tmp; |
| 357 | uint64_t result; |
| 358 | |
| 359 | do { |
| 360 | tmp = cprgr("<0, 0x1b>"); |
| 361 | lo = cprgr("<0, 0x1a>"); |
| 362 | hi = cprgr("<0, 0x1b>"); |
| 363 | } while (hi != tmp); |
| 364 | |
| 365 | result = (uint64_t) (hi) << 32; |
| 366 | result |= lo; |
| 367 | |
| 368 | return result; |
| 369 | } |
| 370 | |
| 371 | static void csky_pmu_write_softc(uint64_t val) |
| 372 | { |
| 373 | cpwgr("<0, 0x1a>", (uint32_t) val); |
| 374 | cpwgr("<0, 0x1b>", (uint32_t) (val >> 32)); |
| 375 | } |
| 376 | |
| 377 | /* conditional branch mispredict counter */ |
| 378 | static uint64_t csky_pmu_read_cbmc(void) |
| 379 | { |
| 380 | uint32_t lo, hi, tmp; |
| 381 | uint64_t result; |
| 382 | |
| 383 | do { |
| 384 | tmp = cprgr("<0, 0x1d>"); |
| 385 | lo = cprgr("<0, 0x1c>"); |
| 386 | hi = cprgr("<0, 0x1d>"); |
| 387 | } while (hi != tmp); |
| 388 | |
| 389 | result = (uint64_t) (hi) << 32; |
| 390 | result |= lo; |
| 391 | |
| 392 | return result; |
| 393 | } |
| 394 | |
| 395 | static void csky_pmu_write_cbmc(uint64_t val) |
| 396 | { |
| 397 | cpwgr("<0, 0x1c>", (uint32_t) val); |
| 398 | cpwgr("<0, 0x1d>", (uint32_t) (val >> 32)); |
| 399 | } |
| 400 | |
| 401 | /* conditional branch instruction counter */ |
| 402 | static uint64_t csky_pmu_read_cbic(void) |
| 403 | { |
| 404 | uint32_t lo, hi, tmp; |
| 405 | uint64_t result; |
| 406 | |
| 407 | do { |
| 408 | tmp = cprgr("<0, 0x1f>"); |
| 409 | lo = cprgr("<0, 0x1e>"); |
| 410 | hi = cprgr("<0, 0x1f>"); |
| 411 | } while (hi != tmp); |
| 412 | |
| 413 | result = (uint64_t) (hi) << 32; |
| 414 | result |= lo; |
| 415 | |
| 416 | return result; |
| 417 | } |
| 418 | |
| 419 | static void csky_pmu_write_cbic(uint64_t val) |
| 420 | { |
| 421 | cpwgr("<0, 0x1e>", (uint32_t) val); |
| 422 | cpwgr("<0, 0x1f>", (uint32_t) (val >> 32)); |
| 423 | } |
| 424 | |
| 425 | /* indirect branch mispredict counter */ |
| 426 | static uint64_t csky_pmu_read_ibmc(void) |
| 427 | { |
| 428 | uint32_t lo, hi, tmp; |
| 429 | uint64_t result; |
| 430 | |
| 431 | do { |
| 432 | tmp = cprgr("<0, 0x21>"); |
| 433 | lo = cprgr("<0, 0x20>"); |
| 434 | hi = cprgr("<0, 0x21>"); |
| 435 | } while (hi != tmp); |
| 436 | |
| 437 | result = (uint64_t) (hi) << 32; |
| 438 | result |= lo; |
| 439 | |
| 440 | return result; |
| 441 | } |
| 442 | |
| 443 | static void csky_pmu_write_ibmc(uint64_t val) |
| 444 | { |
| 445 | cpwgr("<0, 0x20>", (uint32_t) val); |
| 446 | cpwgr("<0, 0x21>", (uint32_t) (val >> 32)); |
| 447 | } |
| 448 | |
| 449 | /* indirect branch instruction counter */ |
| 450 | static uint64_t csky_pmu_read_ibic(void) |
| 451 | { |
| 452 | uint32_t lo, hi, tmp; |
| 453 | uint64_t result; |
| 454 | |
| 455 | do { |
| 456 | tmp = cprgr("<0, 0x23>"); |
| 457 | lo = cprgr("<0, 0x22>"); |
| 458 | hi = cprgr("<0, 0x23>"); |
| 459 | } while (hi != tmp); |
| 460 | |
| 461 | result = (uint64_t) (hi) << 32; |
| 462 | result |= lo; |
| 463 | |
| 464 | return result; |
| 465 | } |
| 466 | |
| 467 | static void csky_pmu_write_ibic(uint64_t val) |
| 468 | { |
| 469 | cpwgr("<0, 0x22>", (uint32_t) val); |
| 470 | cpwgr("<0, 0x23>", (uint32_t) (val >> 32)); |
| 471 | } |
| 472 | |
| 473 | /* LSU spec fail counter */ |
| 474 | static uint64_t csky_pmu_read_lsfc(void) |
| 475 | { |
| 476 | uint32_t lo, hi, tmp; |
| 477 | uint64_t result; |
| 478 | |
| 479 | do { |
| 480 | tmp = cprgr("<0, 0x25>"); |
| 481 | lo = cprgr("<0, 0x24>"); |
| 482 | hi = cprgr("<0, 0x25>"); |
| 483 | } while (hi != tmp); |
| 484 | |
| 485 | result = (uint64_t) (hi) << 32; |
| 486 | result |= lo; |
| 487 | |
| 488 | return result; |
| 489 | } |
| 490 | |
| 491 | static void csky_pmu_write_lsfc(uint64_t val) |
| 492 | { |
| 493 | cpwgr("<0, 0x24>", (uint32_t) val); |
| 494 | cpwgr("<0, 0x25>", (uint32_t) (val >> 32)); |
| 495 | } |
| 496 | |
| 497 | /* store instruction counter */ |
| 498 | static uint64_t csky_pmu_read_sic(void) |
| 499 | { |
| 500 | uint32_t lo, hi, tmp; |
| 501 | uint64_t result; |
| 502 | |
| 503 | do { |
| 504 | tmp = cprgr("<0, 0x27>"); |
| 505 | lo = cprgr("<0, 0x26>"); |
| 506 | hi = cprgr("<0, 0x27>"); |
| 507 | } while (hi != tmp); |
| 508 | |
| 509 | result = (uint64_t) (hi) << 32; |
| 510 | result |= lo; |
| 511 | |
| 512 | return result; |
| 513 | } |
| 514 | |
| 515 | static void csky_pmu_write_sic(uint64_t val) |
| 516 | { |
| 517 | cpwgr("<0, 0x26>", (uint32_t) val); |
| 518 | cpwgr("<0, 0x27>", (uint32_t) (val >> 32)); |
| 519 | } |
| 520 | |
| 521 | /* dcache read access counter */ |
| 522 | static uint64_t csky_pmu_read_dcrac(void) |
| 523 | { |
| 524 | uint32_t lo, hi, tmp; |
| 525 | uint64_t result; |
| 526 | |
| 527 | do { |
| 528 | tmp = cprgr("<0, 0x29>"); |
| 529 | lo = cprgr("<0, 0x28>"); |
| 530 | hi = cprgr("<0, 0x29>"); |
| 531 | } while (hi != tmp); |
| 532 | |
| 533 | result = (uint64_t) (hi) << 32; |
| 534 | result |= lo; |
| 535 | |
| 536 | return result; |
| 537 | } |
| 538 | |
| 539 | static void csky_pmu_write_dcrac(uint64_t val) |
| 540 | { |
| 541 | cpwgr("<0, 0x28>", (uint32_t) val); |
| 542 | cpwgr("<0, 0x29>", (uint32_t) (val >> 32)); |
| 543 | } |
| 544 | |
| 545 | /* dcache read miss counter */ |
| 546 | static uint64_t csky_pmu_read_dcrmc(void) |
| 547 | { |
| 548 | uint32_t lo, hi, tmp; |
| 549 | uint64_t result; |
| 550 | |
| 551 | do { |
| 552 | tmp = cprgr("<0, 0x2b>"); |
| 553 | lo = cprgr("<0, 0x2a>"); |
| 554 | hi = cprgr("<0, 0x2b>"); |
| 555 | } while (hi != tmp); |
| 556 | |
| 557 | result = (uint64_t) (hi) << 32; |
| 558 | result |= lo; |
| 559 | |
| 560 | return result; |
| 561 | } |
| 562 | |
| 563 | static void csky_pmu_write_dcrmc(uint64_t val) |
| 564 | { |
| 565 | cpwgr("<0, 0x2a>", (uint32_t) val); |
| 566 | cpwgr("<0, 0x2b>", (uint32_t) (val >> 32)); |
| 567 | } |
| 568 | |
| 569 | /* dcache write access counter */ |
| 570 | static uint64_t csky_pmu_read_dcwac(void) |
| 571 | { |
| 572 | uint32_t lo, hi, tmp; |
| 573 | uint64_t result; |
| 574 | |
| 575 | do { |
| 576 | tmp = cprgr("<0, 0x2d>"); |
| 577 | lo = cprgr("<0, 0x2c>"); |
| 578 | hi = cprgr("<0, 0x2d>"); |
| 579 | } while (hi != tmp); |
| 580 | |
| 581 | result = (uint64_t) (hi) << 32; |
| 582 | result |= lo; |
| 583 | |
| 584 | return result; |
| 585 | } |
| 586 | |
| 587 | static void csky_pmu_write_dcwac(uint64_t val) |
| 588 | { |
| 589 | cpwgr("<0, 0x2c>", (uint32_t) val); |
| 590 | cpwgr("<0, 0x2d>", (uint32_t) (val >> 32)); |
| 591 | } |
| 592 | |
| 593 | /* dcache write miss counter */ |
| 594 | static uint64_t csky_pmu_read_dcwmc(void) |
| 595 | { |
| 596 | uint32_t lo, hi, tmp; |
| 597 | uint64_t result; |
| 598 | |
| 599 | do { |
| 600 | tmp = cprgr("<0, 0x2f>"); |
| 601 | lo = cprgr("<0, 0x2e>"); |
| 602 | hi = cprgr("<0, 0x2f>"); |
| 603 | } while (hi != tmp); |
| 604 | |
| 605 | result = (uint64_t) (hi) << 32; |
| 606 | result |= lo; |
| 607 | |
| 608 | return result; |
| 609 | } |
| 610 | |
| 611 | static void csky_pmu_write_dcwmc(uint64_t val) |
| 612 | { |
| 613 | cpwgr("<0, 0x2e>", (uint32_t) val); |
| 614 | cpwgr("<0, 0x2f>", (uint32_t) (val >> 32)); |
| 615 | } |
| 616 | |
| 617 | /* l2cache read access counter */ |
| 618 | static uint64_t csky_pmu_read_l2rac(void) |
| 619 | { |
| 620 | uint32_t lo, hi, tmp; |
| 621 | uint64_t result; |
| 622 | |
| 623 | do { |
| 624 | tmp = cprgr("<0, 0x31>"); |
| 625 | lo = cprgr("<0, 0x30>"); |
| 626 | hi = cprgr("<0, 0x31>"); |
| 627 | } while (hi != tmp); |
| 628 | |
| 629 | result = (uint64_t) (hi) << 32; |
| 630 | result |= lo; |
| 631 | |
| 632 | return result; |
| 633 | } |
| 634 | |
| 635 | static void csky_pmu_write_l2rac(uint64_t val) |
| 636 | { |
| 637 | cpwgr("<0, 0x30>", (uint32_t) val); |
| 638 | cpwgr("<0, 0x31>", (uint32_t) (val >> 32)); |
| 639 | } |
| 640 | |
| 641 | /* l2cache read miss counter */ |
| 642 | static uint64_t csky_pmu_read_l2rmc(void) |
| 643 | { |
| 644 | uint32_t lo, hi, tmp; |
| 645 | uint64_t result; |
| 646 | |
| 647 | do { |
| 648 | tmp = cprgr("<0, 0x33>"); |
| 649 | lo = cprgr("<0, 0x32>"); |
| 650 | hi = cprgr("<0, 0x33>"); |
| 651 | } while (hi != tmp); |
| 652 | |
| 653 | result = (uint64_t) (hi) << 32; |
| 654 | result |= lo; |
| 655 | |
| 656 | return result; |
| 657 | } |
| 658 | |
| 659 | static void csky_pmu_write_l2rmc(uint64_t val) |
| 660 | { |
| 661 | cpwgr("<0, 0x32>", (uint32_t) val); |
| 662 | cpwgr("<0, 0x33>", (uint32_t) (val >> 32)); |
| 663 | } |
| 664 | |
| 665 | /* l2cache write access counter */ |
| 666 | static uint64_t csky_pmu_read_l2wac(void) |
| 667 | { |
| 668 | uint32_t lo, hi, tmp; |
| 669 | uint64_t result; |
| 670 | |
| 671 | do { |
| 672 | tmp = cprgr("<0, 0x35>"); |
| 673 | lo = cprgr("<0, 0x34>"); |
| 674 | hi = cprgr("<0, 0x35>"); |
| 675 | } while (hi != tmp); |
| 676 | |
| 677 | result = (uint64_t) (hi) << 32; |
| 678 | result |= lo; |
| 679 | |
| 680 | return result; |
| 681 | } |
| 682 | |
| 683 | static void csky_pmu_write_l2wac(uint64_t val) |
| 684 | { |
| 685 | cpwgr("<0, 0x34>", (uint32_t) val); |
| 686 | cpwgr("<0, 0x35>", (uint32_t) (val >> 32)); |
| 687 | } |
| 688 | |
| 689 | /* l2cache write miss counter */ |
| 690 | static uint64_t csky_pmu_read_l2wmc(void) |
| 691 | { |
| 692 | uint32_t lo, hi, tmp; |
| 693 | uint64_t result; |
| 694 | |
| 695 | do { |
| 696 | tmp = cprgr("<0, 0x37>"); |
| 697 | lo = cprgr("<0, 0x36>"); |
| 698 | hi = cprgr("<0, 0x37>"); |
| 699 | } while (hi != tmp); |
| 700 | |
| 701 | result = (uint64_t) (hi) << 32; |
| 702 | result |= lo; |
| 703 | |
| 704 | return result; |
| 705 | } |
| 706 | |
| 707 | static void csky_pmu_write_l2wmc(uint64_t val) |
| 708 | { |
| 709 | cpwgr("<0, 0x36>", (uint32_t) val); |
| 710 | cpwgr("<0, 0x37>", (uint32_t) (val >> 32)); |
| 711 | } |
| 712 | |
| 713 | #define HW_OP_UNSUPPORTED 0xffff |
| 714 | static const int csky_pmu_hw_map[PERF_COUNT_HW_MAX] = { |
| 715 | [PERF_COUNT_HW_CPU_CYCLES] = 0x1, |
| 716 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x2, |
| 717 | [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, |
| 718 | [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, |
| 719 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0xf, |
| 720 | [PERF_COUNT_HW_BRANCH_MISSES] = 0xe, |
| 721 | [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, |
| 722 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, |
| 723 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, |
| 724 | [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED, |
| 725 | }; |
| 726 | |
| 727 | #define C(_x) PERF_COUNT_HW_CACHE_##_x |
| 728 | #define CACHE_OP_UNSUPPORTED 0xffff |
| 729 | static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { |
| 730 | [C(L1D)] = { |
| 731 | #ifdef CONFIG_CPU_CK810 |
| 732 | [C(OP_READ)] = { |
| 733 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 734 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 735 | }, |
| 736 | [C(OP_WRITE)] = { |
| 737 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 738 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 739 | }, |
| 740 | [C(OP_PREFETCH)] = { |
| 741 | [C(RESULT_ACCESS)] = 0x5, |
| 742 | [C(RESULT_MISS)] = 0x6, |
| 743 | }, |
| 744 | #else |
| 745 | [C(OP_READ)] = { |
| 746 | [C(RESULT_ACCESS)] = 0x14, |
| 747 | [C(RESULT_MISS)] = 0x15, |
| 748 | }, |
| 749 | [C(OP_WRITE)] = { |
| 750 | [C(RESULT_ACCESS)] = 0x16, |
| 751 | [C(RESULT_MISS)] = 0x17, |
| 752 | }, |
| 753 | [C(OP_PREFETCH)] = { |
| 754 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 755 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 756 | }, |
| 757 | #endif |
| 758 | }, |
| 759 | [C(L1I)] = { |
| 760 | [C(OP_READ)] = { |
| 761 | [C(RESULT_ACCESS)] = 0x3, |
| 762 | [C(RESULT_MISS)] = 0x4, |
| 763 | }, |
| 764 | [C(OP_WRITE)] = { |
| 765 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 766 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 767 | }, |
| 768 | [C(OP_PREFETCH)] = { |
| 769 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 770 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 771 | }, |
| 772 | }, |
| 773 | [C(LL)] = { |
| 774 | #ifdef CONFIG_CPU_CK810 |
| 775 | [C(OP_READ)] = { |
| 776 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 777 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 778 | }, |
| 779 | [C(OP_WRITE)] = { |
| 780 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 781 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 782 | }, |
| 783 | [C(OP_PREFETCH)] = { |
| 784 | [C(RESULT_ACCESS)] = 0x7, |
| 785 | [C(RESULT_MISS)] = 0x8, |
| 786 | }, |
| 787 | #else |
| 788 | [C(OP_READ)] = { |
| 789 | [C(RESULT_ACCESS)] = 0x18, |
| 790 | [C(RESULT_MISS)] = 0x19, |
| 791 | }, |
| 792 | [C(OP_WRITE)] = { |
| 793 | [C(RESULT_ACCESS)] = 0x1a, |
| 794 | [C(RESULT_MISS)] = 0x1b, |
| 795 | }, |
| 796 | [C(OP_PREFETCH)] = { |
| 797 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 798 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 799 | }, |
| 800 | #endif |
| 801 | }, |
| 802 | [C(DTLB)] = { |
| 803 | #ifdef CONFIG_CPU_CK810 |
| 804 | [C(OP_READ)] = { |
| 805 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 806 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 807 | }, |
| 808 | [C(OP_WRITE)] = { |
| 809 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 810 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 811 | }, |
| 812 | #else |
| 813 | [C(OP_READ)] = { |
| 814 | [C(RESULT_ACCESS)] = 0x14, |
| 815 | [C(RESULT_MISS)] = 0xb, |
| 816 | }, |
| 817 | [C(OP_WRITE)] = { |
| 818 | [C(RESULT_ACCESS)] = 0x16, |
| 819 | [C(RESULT_MISS)] = 0xb, |
| 820 | }, |
| 821 | #endif |
| 822 | [C(OP_PREFETCH)] = { |
| 823 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 824 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 825 | }, |
| 826 | }, |
| 827 | [C(ITLB)] = { |
| 828 | #ifdef CONFIG_CPU_CK810 |
| 829 | [C(OP_READ)] = { |
| 830 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 831 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 832 | }, |
| 833 | #else |
| 834 | [C(OP_READ)] = { |
| 835 | [C(RESULT_ACCESS)] = 0x3, |
| 836 | [C(RESULT_MISS)] = 0xa, |
| 837 | }, |
| 838 | #endif |
| 839 | [C(OP_WRITE)] = { |
| 840 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 841 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 842 | }, |
| 843 | [C(OP_PREFETCH)] = { |
| 844 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 845 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 846 | }, |
| 847 | }, |
| 848 | [C(BPU)] = { |
| 849 | [C(OP_READ)] = { |
| 850 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 851 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 852 | }, |
| 853 | [C(OP_WRITE)] = { |
| 854 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 855 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 856 | }, |
| 857 | [C(OP_PREFETCH)] = { |
| 858 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 859 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 860 | }, |
| 861 | }, |
| 862 | [C(NODE)] = { |
| 863 | [C(OP_READ)] = { |
| 864 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 865 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 866 | }, |
| 867 | [C(OP_WRITE)] = { |
| 868 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 869 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 870 | }, |
| 871 | [C(OP_PREFETCH)] = { |
| 872 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, |
| 873 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, |
| 874 | }, |
| 875 | }, |
| 876 | }; |
| 877 | |
| 878 | int csky_pmu_event_set_period(struct perf_event *event) |
| 879 | { |
| 880 | struct hw_perf_event *hwc = &event->hw; |
| 881 | s64 left = local64_read(&hwc->period_left); |
| 882 | s64 period = hwc->sample_period; |
| 883 | int ret = 0; |
| 884 | |
| 885 | if (unlikely(left <= -period)) { |
| 886 | left = period; |
| 887 | local64_set(&hwc->period_left, left); |
| 888 | hwc->last_period = period; |
| 889 | ret = 1; |
| 890 | } |
| 891 | |
| 892 | if (unlikely(left <= 0)) { |
| 893 | left += period; |
| 894 | local64_set(&hwc->period_left, left); |
| 895 | hwc->last_period = period; |
| 896 | ret = 1; |
| 897 | } |
| 898 | |
| 899 | if (left > (s64)csky_pmu.max_period) |
| 900 | left = csky_pmu.max_period; |
| 901 | |
| 902 | /* |
| 903 | * The hw event starts counting from this event offset, |
| 904 | * mark it to be able to extract future "deltas": |
| 905 | */ |
| 906 | local64_set(&hwc->prev_count, (u64)(-left)); |
| 907 | |
| 908 | if (hw_raw_write_mapping[hwc->idx] != NULL) |
| 909 | hw_raw_write_mapping[hwc->idx]((u64)(-left) & |
| 910 | csky_pmu.max_period); |
| 911 | |
| 912 | cpwcr(HPOFSR, ~BIT(hwc->idx) & cprcr(HPOFSR)); |
| 913 | |
| 914 | perf_event_update_userpage(event); |
| 915 | |
| 916 | return ret; |
| 917 | } |
| 918 | |
| 919 | static void csky_perf_event_update(struct perf_event *event, |
| 920 | struct hw_perf_event *hwc) |
| 921 | { |
| 922 | uint64_t prev_raw_count = local64_read(&hwc->prev_count); |
| 923 | /* |
| 924 | * Sign extend count value to 64bit, otherwise delta calculation |
| 925 | * would be incorrect when overflow occurs. |
| 926 | */ |
| 927 | uint64_t new_raw_count = sign_extend64( |
| 928 | hw_raw_read_mapping[hwc->idx](), csky_pmu.count_width - 1); |
| 929 | int64_t delta = new_raw_count - prev_raw_count; |
| 930 | |
| 931 | /* |
| 932 | * We aren't afraid of hwc->prev_count changing beneath our feet |
| 933 | * because there's no way for us to re-enter this function anytime. |
| 934 | */ |
| 935 | local64_set(&hwc->prev_count, new_raw_count); |
| 936 | local64_add(delta, &event->count); |
| 937 | local64_sub(delta, &hwc->period_left); |
| 938 | } |
| 939 | |
| 940 | static void csky_pmu_reset(void *info) |
| 941 | { |
| 942 | cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1)); |
| 943 | } |
| 944 | |
| 945 | static void csky_pmu_read(struct perf_event *event) |
| 946 | { |
| 947 | csky_perf_event_update(event, &event->hw); |
| 948 | } |
| 949 | |
| 950 | static int csky_pmu_cache_event(u64 config) |
| 951 | { |
| 952 | unsigned int cache_type, cache_op, cache_result; |
| 953 | |
| 954 | cache_type = (config >> 0) & 0xff; |
| 955 | cache_op = (config >> 8) & 0xff; |
| 956 | cache_result = (config >> 16) & 0xff; |
| 957 | |
| 958 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) |
| 959 | return -EINVAL; |
| 960 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) |
| 961 | return -EINVAL; |
| 962 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) |
| 963 | return -EINVAL; |
| 964 | |
| 965 | return csky_pmu_cache_map[cache_type][cache_op][cache_result]; |
| 966 | } |
| 967 | |
| 968 | static int csky_pmu_event_init(struct perf_event *event) |
| 969 | { |
| 970 | struct hw_perf_event *hwc = &event->hw; |
| 971 | int ret; |
| 972 | |
| 973 | switch (event->attr.type) { |
| 974 | case PERF_TYPE_HARDWARE: |
| 975 | if (event->attr.config >= PERF_COUNT_HW_MAX) |
| 976 | return -ENOENT; |
| 977 | ret = csky_pmu_hw_map[event->attr.config]; |
| 978 | if (ret == HW_OP_UNSUPPORTED) |
| 979 | return -ENOENT; |
| 980 | hwc->idx = ret; |
| 981 | break; |
| 982 | case PERF_TYPE_HW_CACHE: |
| 983 | ret = csky_pmu_cache_event(event->attr.config); |
| 984 | if (ret == CACHE_OP_UNSUPPORTED) |
| 985 | return -ENOENT; |
| 986 | hwc->idx = ret; |
| 987 | break; |
| 988 | case PERF_TYPE_RAW: |
| 989 | if (hw_raw_read_mapping[event->attr.config] == NULL) |
| 990 | return -ENOENT; |
| 991 | hwc->idx = event->attr.config; |
| 992 | break; |
| 993 | default: |
| 994 | return -ENOENT; |
| 995 | } |
| 996 | |
| 997 | if (event->attr.exclude_user) |
| 998 | csky_pmu.hpcr = BIT(2); |
| 999 | else if (event->attr.exclude_kernel) |
| 1000 | csky_pmu.hpcr = BIT(3); |
| 1001 | else |
| 1002 | csky_pmu.hpcr = BIT(2) | BIT(3); |
| 1003 | |
| 1004 | csky_pmu.hpcr |= BIT(1) | BIT(0); |
| 1005 | |
| 1006 | return 0; |
| 1007 | } |
| 1008 | |
| 1009 | /* starts all counters */ |
| 1010 | static void csky_pmu_enable(struct pmu *pmu) |
| 1011 | { |
| 1012 | cpwcr(HPCR, csky_pmu.hpcr); |
| 1013 | } |
| 1014 | |
| 1015 | /* stops all counters */ |
| 1016 | static void csky_pmu_disable(struct pmu *pmu) |
| 1017 | { |
| 1018 | cpwcr(HPCR, BIT(1)); |
| 1019 | } |
| 1020 | |
| 1021 | static void csky_pmu_start(struct perf_event *event, int flags) |
| 1022 | { |
| 1023 | unsigned long flg; |
| 1024 | struct hw_perf_event *hwc = &event->hw; |
| 1025 | int idx = hwc->idx; |
| 1026 | |
| 1027 | if (WARN_ON_ONCE(idx == -1)) |
| 1028 | return; |
| 1029 | |
| 1030 | if (flags & PERF_EF_RELOAD) |
| 1031 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); |
| 1032 | |
| 1033 | hwc->state = 0; |
| 1034 | |
| 1035 | csky_pmu_event_set_period(event); |
| 1036 | |
| 1037 | local_irq_save(flg); |
| 1038 | |
| 1039 | cpwcr(HPINTENR, BIT(idx) | cprcr(HPINTENR)); |
| 1040 | cpwcr(HPCNTENR, BIT(idx) | cprcr(HPCNTENR)); |
| 1041 | |
| 1042 | local_irq_restore(flg); |
| 1043 | } |
| 1044 | |
| 1045 | static void csky_pmu_stop_event(struct perf_event *event) |
| 1046 | { |
| 1047 | unsigned long flg; |
| 1048 | struct hw_perf_event *hwc = &event->hw; |
| 1049 | int idx = hwc->idx; |
| 1050 | |
| 1051 | local_irq_save(flg); |
| 1052 | |
| 1053 | cpwcr(HPINTENR, ~BIT(idx) & cprcr(HPINTENR)); |
| 1054 | cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR)); |
| 1055 | |
| 1056 | local_irq_restore(flg); |
| 1057 | } |
| 1058 | |
| 1059 | static void csky_pmu_stop(struct perf_event *event, int flags) |
| 1060 | { |
| 1061 | if (!(event->hw.state & PERF_HES_STOPPED)) { |
| 1062 | csky_pmu_stop_event(event); |
| 1063 | event->hw.state |= PERF_HES_STOPPED; |
| 1064 | } |
| 1065 | |
| 1066 | if ((flags & PERF_EF_UPDATE) && |
| 1067 | !(event->hw.state & PERF_HES_UPTODATE)) { |
| 1068 | csky_perf_event_update(event, &event->hw); |
| 1069 | event->hw.state |= PERF_HES_UPTODATE; |
| 1070 | } |
| 1071 | } |
| 1072 | |
| 1073 | static void csky_pmu_del(struct perf_event *event, int flags) |
| 1074 | { |
| 1075 | struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events); |
| 1076 | struct hw_perf_event *hwc = &event->hw; |
| 1077 | |
| 1078 | csky_pmu_stop(event, PERF_EF_UPDATE); |
| 1079 | |
| 1080 | hw_events->events[hwc->idx] = NULL; |
| 1081 | |
| 1082 | perf_event_update_userpage(event); |
| 1083 | } |
| 1084 | |
| 1085 | /* allocate hardware counter and optionally start counting */ |
| 1086 | static int csky_pmu_add(struct perf_event *event, int flags) |
| 1087 | { |
| 1088 | struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events); |
| 1089 | struct hw_perf_event *hwc = &event->hw; |
| 1090 | |
| 1091 | hw_events->events[hwc->idx] = event; |
| 1092 | |
| 1093 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
| 1094 | |
| 1095 | if (flags & PERF_EF_START) |
| 1096 | csky_pmu_start(event, PERF_EF_RELOAD); |
| 1097 | |
| 1098 | perf_event_update_userpage(event); |
| 1099 | |
| 1100 | return 0; |
| 1101 | } |
| 1102 | |
| 1103 | static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev) |
| 1104 | { |
| 1105 | struct perf_sample_data data; |
| 1106 | struct pmu_hw_events *cpuc = this_cpu_ptr(csky_pmu.hw_events); |
| 1107 | struct pt_regs *regs; |
| 1108 | int idx; |
| 1109 | |
| 1110 | /* |
| 1111 | * Did an overflow occur? |
| 1112 | */ |
| 1113 | if (!cprcr(HPOFSR)) |
| 1114 | return IRQ_NONE; |
| 1115 | |
| 1116 | /* |
| 1117 | * Handle the counter(s) overflow(s) |
| 1118 | */ |
| 1119 | regs = get_irq_regs(); |
| 1120 | |
| 1121 | csky_pmu_disable(&csky_pmu.pmu); |
| 1122 | |
| 1123 | for (idx = 0; idx < CSKY_PMU_MAX_EVENTS; ++idx) { |
| 1124 | struct perf_event *event = cpuc->events[idx]; |
| 1125 | struct hw_perf_event *hwc; |
| 1126 | |
| 1127 | /* Ignore if we don't have an event. */ |
| 1128 | if (!event) |
| 1129 | continue; |
| 1130 | /* |
| 1131 | * We have a single interrupt for all counters. Check that |
| 1132 | * each counter has overflowed before we process it. |
| 1133 | */ |
| 1134 | if (!(cprcr(HPOFSR) & BIT(idx))) |
| 1135 | continue; |
| 1136 | |
| 1137 | hwc = &event->hw; |
| 1138 | csky_perf_event_update(event, &event->hw); |
| 1139 | perf_sample_data_init(&data, 0, hwc->last_period); |
| 1140 | csky_pmu_event_set_period(event); |
| 1141 | |
| 1142 | if (perf_event_overflow(event, &data, regs)) |
| 1143 | csky_pmu_stop_event(event); |
| 1144 | } |
| 1145 | |
| 1146 | csky_pmu_enable(&csky_pmu.pmu); |
| 1147 | |
| 1148 | /* |
| 1149 | * Handle the pending perf events. |
| 1150 | * |
| 1151 | * Note: this call *must* be run with interrupts disabled. For |
| 1152 | * platforms that can have the PMU interrupts raised as an NMI, this |
| 1153 | * will not work. |
| 1154 | */ |
| 1155 | irq_work_run(); |
| 1156 | |
| 1157 | return IRQ_HANDLED; |
| 1158 | } |
| 1159 | |
| 1160 | static int csky_pmu_request_irq(irq_handler_t handler) |
| 1161 | { |
| 1162 | int err, irqs; |
| 1163 | struct platform_device *pmu_device = csky_pmu.plat_device; |
| 1164 | |
| 1165 | if (!pmu_device) |
| 1166 | return -ENODEV; |
| 1167 | |
| 1168 | irqs = min(pmu_device->num_resources, num_possible_cpus()); |
| 1169 | if (irqs < 1) { |
| 1170 | pr_err("no irqs for PMUs defined\n"); |
| 1171 | return -ENODEV; |
| 1172 | } |
| 1173 | |
| 1174 | csky_pmu_irq = platform_get_irq(pmu_device, 0); |
| 1175 | if (csky_pmu_irq < 0) |
| 1176 | return -ENODEV; |
| 1177 | err = request_percpu_irq(csky_pmu_irq, handler, "csky-pmu", |
| 1178 | this_cpu_ptr(csky_pmu.hw_events)); |
| 1179 | if (err) { |
| 1180 | pr_err("unable to request IRQ%d for CSKY PMU counters\n", |
| 1181 | csky_pmu_irq); |
| 1182 | return err; |
| 1183 | } |
| 1184 | |
| 1185 | return 0; |
| 1186 | } |
| 1187 | |
| 1188 | static void csky_pmu_free_irq(void) |
| 1189 | { |
| 1190 | int irq; |
| 1191 | struct platform_device *pmu_device = csky_pmu.plat_device; |
| 1192 | |
| 1193 | irq = platform_get_irq(pmu_device, 0); |
| 1194 | if (irq >= 0) |
| 1195 | free_percpu_irq(irq, this_cpu_ptr(csky_pmu.hw_events)); |
| 1196 | } |
| 1197 | |
| 1198 | int init_hw_perf_events(void) |
| 1199 | { |
| 1200 | csky_pmu.hw_events = alloc_percpu_gfp(struct pmu_hw_events, |
| 1201 | GFP_KERNEL); |
| 1202 | if (!csky_pmu.hw_events) { |
| 1203 | pr_info("failed to allocate per-cpu PMU data.\n"); |
| 1204 | return -ENOMEM; |
| 1205 | } |
| 1206 | |
| 1207 | csky_pmu.pmu = (struct pmu) { |
| 1208 | .pmu_enable = csky_pmu_enable, |
| 1209 | .pmu_disable = csky_pmu_disable, |
| 1210 | .event_init = csky_pmu_event_init, |
| 1211 | .add = csky_pmu_add, |
| 1212 | .del = csky_pmu_del, |
| 1213 | .start = csky_pmu_start, |
| 1214 | .stop = csky_pmu_stop, |
| 1215 | .read = csky_pmu_read, |
| 1216 | }; |
| 1217 | |
| 1218 | memset((void *)hw_raw_read_mapping, 0, |
| 1219 | sizeof(hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS])); |
| 1220 | |
| 1221 | hw_raw_read_mapping[0x1] = csky_pmu_read_cc; |
| 1222 | hw_raw_read_mapping[0x2] = csky_pmu_read_ic; |
| 1223 | hw_raw_read_mapping[0x3] = csky_pmu_read_icac; |
| 1224 | hw_raw_read_mapping[0x4] = csky_pmu_read_icmc; |
| 1225 | hw_raw_read_mapping[0x5] = csky_pmu_read_dcac; |
| 1226 | hw_raw_read_mapping[0x6] = csky_pmu_read_dcmc; |
| 1227 | hw_raw_read_mapping[0x7] = csky_pmu_read_l2ac; |
| 1228 | hw_raw_read_mapping[0x8] = csky_pmu_read_l2mc; |
| 1229 | hw_raw_read_mapping[0xa] = csky_pmu_read_iutlbmc; |
| 1230 | hw_raw_read_mapping[0xb] = csky_pmu_read_dutlbmc; |
| 1231 | hw_raw_read_mapping[0xc] = csky_pmu_read_jtlbmc; |
| 1232 | hw_raw_read_mapping[0xd] = csky_pmu_read_softc; |
| 1233 | hw_raw_read_mapping[0xe] = csky_pmu_read_cbmc; |
| 1234 | hw_raw_read_mapping[0xf] = csky_pmu_read_cbic; |
| 1235 | hw_raw_read_mapping[0x10] = csky_pmu_read_ibmc; |
| 1236 | hw_raw_read_mapping[0x11] = csky_pmu_read_ibic; |
| 1237 | hw_raw_read_mapping[0x12] = csky_pmu_read_lsfc; |
| 1238 | hw_raw_read_mapping[0x13] = csky_pmu_read_sic; |
| 1239 | hw_raw_read_mapping[0x14] = csky_pmu_read_dcrac; |
| 1240 | hw_raw_read_mapping[0x15] = csky_pmu_read_dcrmc; |
| 1241 | hw_raw_read_mapping[0x16] = csky_pmu_read_dcwac; |
| 1242 | hw_raw_read_mapping[0x17] = csky_pmu_read_dcwmc; |
| 1243 | hw_raw_read_mapping[0x18] = csky_pmu_read_l2rac; |
| 1244 | hw_raw_read_mapping[0x19] = csky_pmu_read_l2rmc; |
| 1245 | hw_raw_read_mapping[0x1a] = csky_pmu_read_l2wac; |
| 1246 | hw_raw_read_mapping[0x1b] = csky_pmu_read_l2wmc; |
| 1247 | |
| 1248 | memset((void *)hw_raw_write_mapping, 0, |
| 1249 | sizeof(hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS])); |
| 1250 | |
| 1251 | hw_raw_write_mapping[0x1] = csky_pmu_write_cc; |
| 1252 | hw_raw_write_mapping[0x2] = csky_pmu_write_ic; |
| 1253 | hw_raw_write_mapping[0x3] = csky_pmu_write_icac; |
| 1254 | hw_raw_write_mapping[0x4] = csky_pmu_write_icmc; |
| 1255 | hw_raw_write_mapping[0x5] = csky_pmu_write_dcac; |
| 1256 | hw_raw_write_mapping[0x6] = csky_pmu_write_dcmc; |
| 1257 | hw_raw_write_mapping[0x7] = csky_pmu_write_l2ac; |
| 1258 | hw_raw_write_mapping[0x8] = csky_pmu_write_l2mc; |
| 1259 | hw_raw_write_mapping[0xa] = csky_pmu_write_iutlbmc; |
| 1260 | hw_raw_write_mapping[0xb] = csky_pmu_write_dutlbmc; |
| 1261 | hw_raw_write_mapping[0xc] = csky_pmu_write_jtlbmc; |
| 1262 | hw_raw_write_mapping[0xd] = csky_pmu_write_softc; |
| 1263 | hw_raw_write_mapping[0xe] = csky_pmu_write_cbmc; |
| 1264 | hw_raw_write_mapping[0xf] = csky_pmu_write_cbic; |
| 1265 | hw_raw_write_mapping[0x10] = csky_pmu_write_ibmc; |
| 1266 | hw_raw_write_mapping[0x11] = csky_pmu_write_ibic; |
| 1267 | hw_raw_write_mapping[0x12] = csky_pmu_write_lsfc; |
| 1268 | hw_raw_write_mapping[0x13] = csky_pmu_write_sic; |
| 1269 | hw_raw_write_mapping[0x14] = csky_pmu_write_dcrac; |
| 1270 | hw_raw_write_mapping[0x15] = csky_pmu_write_dcrmc; |
| 1271 | hw_raw_write_mapping[0x16] = csky_pmu_write_dcwac; |
| 1272 | hw_raw_write_mapping[0x17] = csky_pmu_write_dcwmc; |
| 1273 | hw_raw_write_mapping[0x18] = csky_pmu_write_l2rac; |
| 1274 | hw_raw_write_mapping[0x19] = csky_pmu_write_l2rmc; |
| 1275 | hw_raw_write_mapping[0x1a] = csky_pmu_write_l2wac; |
| 1276 | hw_raw_write_mapping[0x1b] = csky_pmu_write_l2wmc; |
| 1277 | |
| 1278 | return 0; |
| 1279 | } |
| 1280 | |
| 1281 | static int csky_pmu_starting_cpu(unsigned int cpu) |
| 1282 | { |
| 1283 | enable_percpu_irq(csky_pmu_irq, 0); |
| 1284 | return 0; |
| 1285 | } |
| 1286 | |
| 1287 | static int csky_pmu_dying_cpu(unsigned int cpu) |
| 1288 | { |
| 1289 | disable_percpu_irq(csky_pmu_irq); |
| 1290 | return 0; |
| 1291 | } |
| 1292 | |
| 1293 | int csky_pmu_device_probe(struct platform_device *pdev, |
| 1294 | const struct of_device_id *of_table) |
| 1295 | { |
| 1296 | struct device_node *node = pdev->dev.of_node; |
| 1297 | int ret; |
| 1298 | |
| 1299 | ret = init_hw_perf_events(); |
| 1300 | if (ret) { |
| 1301 | pr_notice("[perf] failed to probe PMU!\n"); |
| 1302 | return ret; |
| 1303 | } |
| 1304 | |
| 1305 | if (of_property_read_u32(node, "count-width", |
| 1306 | &csky_pmu.count_width)) { |
| 1307 | csky_pmu.count_width = DEFAULT_COUNT_WIDTH; |
| 1308 | } |
| 1309 | csky_pmu.max_period = BIT_ULL(csky_pmu.count_width) - 1; |
| 1310 | |
| 1311 | csky_pmu.plat_device = pdev; |
| 1312 | |
| 1313 | /* Ensure the PMU has sane values out of reset. */ |
| 1314 | on_each_cpu(csky_pmu_reset, &csky_pmu, 1); |
| 1315 | |
| 1316 | ret = csky_pmu_request_irq(csky_pmu_handle_irq); |
| 1317 | if (ret) { |
| 1318 | csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; |
| 1319 | pr_notice("[perf] PMU request irq fail!\n"); |
| 1320 | } |
| 1321 | |
| 1322 | ret = cpuhp_setup_state(CPUHP_AP_PERF_ONLINE, "AP_PERF_ONLINE", |
| 1323 | csky_pmu_starting_cpu, |
| 1324 | csky_pmu_dying_cpu); |
| 1325 | if (ret) { |
| 1326 | csky_pmu_free_irq(); |
| 1327 | free_percpu(csky_pmu.hw_events); |
| 1328 | return ret; |
| 1329 | } |
| 1330 | |
| 1331 | ret = perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW); |
| 1332 | if (ret) { |
| 1333 | csky_pmu_free_irq(); |
| 1334 | free_percpu(csky_pmu.hw_events); |
| 1335 | } |
| 1336 | |
| 1337 | return ret; |
| 1338 | } |
| 1339 | |
| 1340 | static const struct of_device_id csky_pmu_of_device_ids[] = { |
| 1341 | {.compatible = "csky,csky-pmu"}, |
| 1342 | {}, |
| 1343 | }; |
| 1344 | |
| 1345 | static int csky_pmu_dev_probe(struct platform_device *pdev) |
| 1346 | { |
| 1347 | return csky_pmu_device_probe(pdev, csky_pmu_of_device_ids); |
| 1348 | } |
| 1349 | |
| 1350 | static struct platform_driver csky_pmu_driver = { |
| 1351 | .driver = { |
| 1352 | .name = "csky-pmu", |
| 1353 | .of_match_table = csky_pmu_of_device_ids, |
| 1354 | }, |
| 1355 | .probe = csky_pmu_dev_probe, |
| 1356 | }; |
| 1357 | |
| 1358 | static int __init csky_pmu_probe(void) |
| 1359 | { |
| 1360 | int ret; |
| 1361 | |
| 1362 | ret = platform_driver_register(&csky_pmu_driver); |
| 1363 | if (ret) |
| 1364 | pr_notice("[perf] PMU initialization failed\n"); |
| 1365 | else |
| 1366 | pr_notice("[perf] PMU initialization done\n"); |
| 1367 | |
| 1368 | return ret; |
| 1369 | } |
| 1370 | |
| 1371 | device_initcall(csky_pmu_probe); |