Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/compiler.h> |
| 3 | #include <string.h> |
| 4 | #include <perf/cpumap.h> |
| 5 | #include <perf/evlist.h> |
| 6 | #include "metricgroup.h" |
| 7 | #include "tests.h" |
| 8 | #include "pmu-events/pmu-events.h" |
| 9 | #include "evlist.h" |
| 10 | #include "rblist.h" |
| 11 | #include "debug.h" |
| 12 | #include "expr.h" |
| 13 | #include "stat.h" |
| 14 | |
| 15 | static struct pmu_event pme_test[] = { |
| 16 | { |
| 17 | .metric_expr = "inst_retired.any / cpu_clk_unhalted.thread", |
| 18 | .metric_name = "IPC", |
| 19 | .metric_group = "group1", |
| 20 | }, |
| 21 | { |
| 22 | .metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * " |
| 23 | "( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))", |
| 24 | .metric_name = "Frontend_Bound_SMT", |
| 25 | }, |
| 26 | { |
| 27 | .metric_expr = "l1d\\-loads\\-misses / inst_retired.any", |
| 28 | .metric_name = "dcache_miss_cpi", |
| 29 | }, |
| 30 | { |
| 31 | .metric_expr = "l1i\\-loads\\-misses / inst_retired.any", |
| 32 | .metric_name = "icache_miss_cycles", |
| 33 | }, |
| 34 | { |
| 35 | .metric_expr = "(dcache_miss_cpi + icache_miss_cycles)", |
| 36 | .metric_name = "cache_miss_cycles", |
| 37 | .metric_group = "group1", |
| 38 | }, |
| 39 | { |
| 40 | .metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit", |
| 41 | .metric_name = "DCache_L2_All_Hits", |
| 42 | }, |
| 43 | { |
| 44 | .metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + " |
| 45 | "l2_rqsts.pf_miss + l2_rqsts.rfo_miss", |
| 46 | .metric_name = "DCache_L2_All_Miss", |
| 47 | }, |
| 48 | { |
| 49 | .metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss", |
| 50 | .metric_name = "DCache_L2_All", |
| 51 | }, |
| 52 | { |
| 53 | .metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)", |
| 54 | .metric_name = "DCache_L2_Hits", |
| 55 | }, |
| 56 | { |
| 57 | .metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)", |
| 58 | .metric_name = "DCache_L2_Misses", |
| 59 | }, |
| 60 | { |
| 61 | .metric_expr = "ipc + m2", |
| 62 | .metric_name = "M1", |
| 63 | }, |
| 64 | { |
| 65 | .metric_expr = "ipc + m1", |
| 66 | .metric_name = "M2", |
| 67 | }, |
| 68 | { |
| 69 | .metric_expr = "1/m3", |
| 70 | .metric_name = "M3", |
| 71 | }, |
| 72 | { |
| 73 | .name = NULL, |
| 74 | } |
| 75 | }; |
| 76 | |
| 77 | static struct pmu_events_map map = { |
| 78 | .cpuid = "test", |
| 79 | .version = "1", |
| 80 | .type = "core", |
| 81 | .table = pme_test, |
| 82 | }; |
| 83 | |
| 84 | struct value { |
| 85 | const char *event; |
| 86 | u64 val; |
| 87 | }; |
| 88 | |
| 89 | static u64 find_value(const char *name, struct value *values) |
| 90 | { |
| 91 | struct value *v = values; |
| 92 | |
| 93 | while (v->event) { |
| 94 | if (!strcmp(name, v->event)) |
| 95 | return v->val; |
| 96 | v++; |
| 97 | }; |
| 98 | return 0; |
| 99 | } |
| 100 | |
| 101 | static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, |
| 102 | struct value *vals) |
| 103 | { |
| 104 | struct evsel *evsel; |
| 105 | u64 count; |
| 106 | |
| 107 | evlist__for_each_entry(evlist, evsel) { |
| 108 | count = find_value(evsel->name, vals); |
| 109 | perf_stat__update_shadow_stats(evsel, count, 0, st); |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | static double compute_single(struct rblist *metric_events, struct evlist *evlist, |
| 114 | struct runtime_stat *st, const char *name) |
| 115 | { |
| 116 | struct metric_expr *mexp; |
| 117 | struct metric_event *me; |
| 118 | struct evsel *evsel; |
| 119 | |
| 120 | evlist__for_each_entry(evlist, evsel) { |
| 121 | me = metricgroup__lookup(metric_events, evsel, false); |
| 122 | if (me != NULL) { |
| 123 | list_for_each_entry (mexp, &me->head, nd) { |
| 124 | if (strcmp(mexp->metric_name, name)) |
| 125 | continue; |
| 126 | return test_generic_metric(mexp, 0, st); |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | return 0.; |
| 131 | } |
| 132 | |
| 133 | static int __compute_metric(const char *name, struct value *vals, |
| 134 | const char *name1, double *ratio1, |
| 135 | const char *name2, double *ratio2) |
| 136 | { |
| 137 | struct rblist metric_events = { |
| 138 | .nr_entries = 0, |
| 139 | }; |
| 140 | struct perf_cpu_map *cpus; |
| 141 | struct runtime_stat st; |
| 142 | struct evlist *evlist; |
| 143 | int err; |
| 144 | |
| 145 | /* |
| 146 | * We need to prepare evlist for stat mode running on CPU 0 |
| 147 | * because that's where all the stats are going to be created. |
| 148 | */ |
| 149 | evlist = evlist__new(); |
| 150 | if (!evlist) |
| 151 | return -ENOMEM; |
| 152 | |
| 153 | cpus = perf_cpu_map__new("0"); |
| 154 | if (!cpus) { |
| 155 | evlist__delete(evlist); |
| 156 | return -ENOMEM; |
| 157 | } |
| 158 | |
| 159 | perf_evlist__set_maps(&evlist->core, cpus, NULL); |
| 160 | runtime_stat__init(&st); |
| 161 | |
| 162 | /* Parse the metric into metric_events list. */ |
| 163 | err = metricgroup__parse_groups_test(evlist, &map, name, |
| 164 | false, false, |
| 165 | &metric_events); |
| 166 | if (err) |
| 167 | goto out; |
| 168 | |
| 169 | err = perf_evlist__alloc_stats(evlist, false); |
| 170 | if (err) |
| 171 | goto out; |
| 172 | |
| 173 | /* Load the runtime stats with given numbers for events. */ |
| 174 | load_runtime_stat(&st, evlist, vals); |
| 175 | |
| 176 | /* And execute the metric */ |
| 177 | if (name1 && ratio1) |
| 178 | *ratio1 = compute_single(&metric_events, evlist, &st, name1); |
| 179 | if (name2 && ratio2) |
| 180 | *ratio2 = compute_single(&metric_events, evlist, &st, name2); |
| 181 | |
| 182 | out: |
| 183 | /* ... clenup. */ |
| 184 | metricgroup__rblist_exit(&metric_events); |
| 185 | runtime_stat__exit(&st); |
| 186 | perf_evlist__free_stats(evlist); |
| 187 | perf_cpu_map__put(cpus); |
| 188 | evlist__delete(evlist); |
| 189 | return err; |
| 190 | } |
| 191 | |
| 192 | static int compute_metric(const char *name, struct value *vals, double *ratio) |
| 193 | { |
| 194 | return __compute_metric(name, vals, name, ratio, NULL, NULL); |
| 195 | } |
| 196 | |
| 197 | static int compute_metric_group(const char *name, struct value *vals, |
| 198 | const char *name1, double *ratio1, |
| 199 | const char *name2, double *ratio2) |
| 200 | { |
| 201 | return __compute_metric(name, vals, name1, ratio1, name2, ratio2); |
| 202 | } |
| 203 | |
| 204 | static int test_ipc(void) |
| 205 | { |
| 206 | double ratio; |
| 207 | struct value vals[] = { |
| 208 | { .event = "inst_retired.any", .val = 300 }, |
| 209 | { .event = "cpu_clk_unhalted.thread", .val = 200 }, |
| 210 | { .event = NULL, }, |
| 211 | }; |
| 212 | |
| 213 | TEST_ASSERT_VAL("failed to compute metric", |
| 214 | compute_metric("IPC", vals, &ratio) == 0); |
| 215 | |
| 216 | TEST_ASSERT_VAL("IPC failed, wrong ratio", |
| 217 | ratio == 1.5); |
| 218 | return 0; |
| 219 | } |
| 220 | |
| 221 | static int test_frontend(void) |
| 222 | { |
| 223 | double ratio; |
| 224 | struct value vals[] = { |
| 225 | { .event = "idq_uops_not_delivered.core", .val = 300 }, |
| 226 | { .event = "cpu_clk_unhalted.thread", .val = 200 }, |
| 227 | { .event = "cpu_clk_unhalted.one_thread_active", .val = 400 }, |
| 228 | { .event = "cpu_clk_unhalted.ref_xclk", .val = 600 }, |
| 229 | { .event = NULL, }, |
| 230 | }; |
| 231 | |
| 232 | TEST_ASSERT_VAL("failed to compute metric", |
| 233 | compute_metric("Frontend_Bound_SMT", vals, &ratio) == 0); |
| 234 | |
| 235 | TEST_ASSERT_VAL("Frontend_Bound_SMT failed, wrong ratio", |
| 236 | ratio == 0.45); |
| 237 | return 0; |
| 238 | } |
| 239 | |
| 240 | static int test_cache_miss_cycles(void) |
| 241 | { |
| 242 | double ratio; |
| 243 | struct value vals[] = { |
| 244 | { .event = "l1d-loads-misses", .val = 300 }, |
| 245 | { .event = "l1i-loads-misses", .val = 200 }, |
| 246 | { .event = "inst_retired.any", .val = 400 }, |
| 247 | { .event = NULL, }, |
| 248 | }; |
| 249 | |
| 250 | TEST_ASSERT_VAL("failed to compute metric", |
| 251 | compute_metric("cache_miss_cycles", vals, &ratio) == 0); |
| 252 | |
| 253 | TEST_ASSERT_VAL("cache_miss_cycles failed, wrong ratio", |
| 254 | ratio == 1.25); |
| 255 | return 0; |
| 256 | } |
| 257 | |
| 258 | |
| 259 | /* |
| 260 | * DCache_L2_All_Hits = l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hi |
| 261 | * DCache_L2_All_Miss = max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + |
| 262 | * l2_rqsts.pf_miss + l2_rqsts.rfo_miss |
| 263 | * DCache_L2_All = dcache_l2_all_hits + dcache_l2_all_miss |
| 264 | * DCache_L2_Hits = d_ratio(dcache_l2_all_hits, dcache_l2_all) |
| 265 | * DCache_L2_Misses = d_ratio(dcache_l2_all_miss, dcache_l2_all) |
| 266 | * |
| 267 | * l2_rqsts.demand_data_rd_hit = 100 |
| 268 | * l2_rqsts.pf_hit = 200 |
| 269 | * l2_rqsts.rfo_hi = 300 |
| 270 | * l2_rqsts.all_demand_data_rd = 400 |
| 271 | * l2_rqsts.pf_miss = 500 |
| 272 | * l2_rqsts.rfo_miss = 600 |
| 273 | * |
| 274 | * DCache_L2_All_Hits = 600 |
| 275 | * DCache_L2_All_Miss = MAX(400 - 100, 0) + 500 + 600 = 1400 |
| 276 | * DCache_L2_All = 600 + 1400 = 2000 |
| 277 | * DCache_L2_Hits = 600 / 2000 = 0.3 |
| 278 | * DCache_L2_Misses = 1400 / 2000 = 0.7 |
| 279 | */ |
| 280 | static int test_dcache_l2(void) |
| 281 | { |
| 282 | double ratio; |
| 283 | struct value vals[] = { |
| 284 | { .event = "l2_rqsts.demand_data_rd_hit", .val = 100 }, |
| 285 | { .event = "l2_rqsts.pf_hit", .val = 200 }, |
| 286 | { .event = "l2_rqsts.rfo_hit", .val = 300 }, |
| 287 | { .event = "l2_rqsts.all_demand_data_rd", .val = 400 }, |
| 288 | { .event = "l2_rqsts.pf_miss", .val = 500 }, |
| 289 | { .event = "l2_rqsts.rfo_miss", .val = 600 }, |
| 290 | { .event = NULL, }, |
| 291 | }; |
| 292 | |
| 293 | TEST_ASSERT_VAL("failed to compute metric", |
| 294 | compute_metric("DCache_L2_Hits", vals, &ratio) == 0); |
| 295 | |
| 296 | TEST_ASSERT_VAL("DCache_L2_Hits failed, wrong ratio", |
| 297 | ratio == 0.3); |
| 298 | |
| 299 | TEST_ASSERT_VAL("failed to compute metric", |
| 300 | compute_metric("DCache_L2_Misses", vals, &ratio) == 0); |
| 301 | |
| 302 | TEST_ASSERT_VAL("DCache_L2_Misses failed, wrong ratio", |
| 303 | ratio == 0.7); |
| 304 | return 0; |
| 305 | } |
| 306 | |
| 307 | static int test_recursion_fail(void) |
| 308 | { |
| 309 | double ratio; |
| 310 | struct value vals[] = { |
| 311 | { .event = "inst_retired.any", .val = 300 }, |
| 312 | { .event = "cpu_clk_unhalted.thread", .val = 200 }, |
| 313 | { .event = NULL, }, |
| 314 | }; |
| 315 | |
| 316 | TEST_ASSERT_VAL("failed to find recursion", |
| 317 | compute_metric("M1", vals, &ratio) == -1); |
| 318 | |
| 319 | TEST_ASSERT_VAL("failed to find recursion", |
| 320 | compute_metric("M3", vals, &ratio) == -1); |
| 321 | return 0; |
| 322 | } |
| 323 | |
| 324 | static int test_metric_group(void) |
| 325 | { |
| 326 | double ratio1, ratio2; |
| 327 | struct value vals[] = { |
| 328 | { .event = "cpu_clk_unhalted.thread", .val = 200 }, |
| 329 | { .event = "l1d-loads-misses", .val = 300 }, |
| 330 | { .event = "l1i-loads-misses", .val = 200 }, |
| 331 | { .event = "inst_retired.any", .val = 400 }, |
| 332 | { .event = NULL, }, |
| 333 | }; |
| 334 | |
| 335 | TEST_ASSERT_VAL("failed to find recursion", |
| 336 | compute_metric_group("group1", vals, |
| 337 | "IPC", &ratio1, |
| 338 | "cache_miss_cycles", &ratio2) == 0); |
| 339 | |
| 340 | TEST_ASSERT_VAL("group IPC failed, wrong ratio", |
| 341 | ratio1 == 2.0); |
| 342 | |
| 343 | TEST_ASSERT_VAL("group cache_miss_cycles failed, wrong ratio", |
| 344 | ratio2 == 1.25); |
| 345 | return 0; |
| 346 | } |
| 347 | |
| 348 | int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused) |
| 349 | { |
| 350 | TEST_ASSERT_VAL("IPC failed", test_ipc() == 0); |
| 351 | TEST_ASSERT_VAL("frontend failed", test_frontend() == 0); |
| 352 | TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0); |
| 353 | TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0); |
| 354 | TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0); |
| 355 | TEST_ASSERT_VAL("test metric group", test_metric_group() == 0); |
| 356 | return 0; |
| 357 | } |