Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright(C) 2015-2018 Linaro Limited. |
| 4 | * |
| 5 | * Author: Tor Jeremiassen <tor@ti.com> |
| 6 | * Author: Mathieu Poirier <mathieu.poirier@linaro.org> |
| 7 | */ |
| 8 | |
| 9 | #include <linux/bitops.h> |
| 10 | #include <linux/err.h> |
| 11 | #include <linux/kernel.h> |
| 12 | #include <linux/log2.h> |
| 13 | #include <linux/types.h> |
| 14 | |
| 15 | #include <stdlib.h> |
| 16 | |
| 17 | #include "auxtrace.h" |
| 18 | #include "color.h" |
| 19 | #include "cs-etm.h" |
| 20 | #include "cs-etm-decoder/cs-etm-decoder.h" |
| 21 | #include "debug.h" |
| 22 | #include "evlist.h" |
| 23 | #include "intlist.h" |
| 24 | #include "machine.h" |
| 25 | #include "map.h" |
| 26 | #include "perf.h" |
| 27 | #include "thread.h" |
| 28 | #include "thread_map.h" |
| 29 | #include "thread-stack.h" |
| 30 | #include "util.h" |
| 31 | |
| 32 | #define MAX_TIMESTAMP (~0ULL) |
| 33 | |
| 34 | /* |
| 35 | * A64 instructions are always 4 bytes |
| 36 | * |
| 37 | * Only A64 is supported, so can use this constant for converting between |
| 38 | * addresses and instruction counts, calculting offsets etc |
| 39 | */ |
| 40 | #define A64_INSTR_SIZE 4 |
| 41 | |
| 42 | struct cs_etm_auxtrace { |
| 43 | struct auxtrace auxtrace; |
| 44 | struct auxtrace_queues queues; |
| 45 | struct auxtrace_heap heap; |
| 46 | struct itrace_synth_opts synth_opts; |
| 47 | struct perf_session *session; |
| 48 | struct machine *machine; |
| 49 | struct thread *unknown_thread; |
| 50 | |
| 51 | u8 timeless_decoding; |
| 52 | u8 snapshot_mode; |
| 53 | u8 data_queued; |
| 54 | u8 sample_branches; |
| 55 | u8 sample_instructions; |
| 56 | |
| 57 | int num_cpu; |
| 58 | u32 auxtrace_type; |
| 59 | u64 branches_sample_type; |
| 60 | u64 branches_id; |
| 61 | u64 instructions_sample_type; |
| 62 | u64 instructions_sample_period; |
| 63 | u64 instructions_id; |
| 64 | u64 **metadata; |
| 65 | u64 kernel_start; |
| 66 | unsigned int pmu_type; |
| 67 | }; |
| 68 | |
| 69 | struct cs_etm_queue { |
| 70 | struct cs_etm_auxtrace *etm; |
| 71 | struct thread *thread; |
| 72 | struct cs_etm_decoder *decoder; |
| 73 | struct auxtrace_buffer *buffer; |
| 74 | const struct cs_etm_state *state; |
| 75 | union perf_event *event_buf; |
| 76 | unsigned int queue_nr; |
| 77 | pid_t pid, tid; |
| 78 | int cpu; |
| 79 | u64 time; |
| 80 | u64 timestamp; |
| 81 | u64 offset; |
| 82 | u64 period_instructions; |
| 83 | struct branch_stack *last_branch; |
| 84 | struct branch_stack *last_branch_rb; |
| 85 | size_t last_branch_pos; |
| 86 | struct cs_etm_packet *prev_packet; |
| 87 | struct cs_etm_packet *packet; |
| 88 | }; |
| 89 | |
| 90 | static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); |
| 91 | static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, |
| 92 | pid_t tid, u64 time_); |
| 93 | |
| 94 | static void cs_etm__packet_dump(const char *pkt_string) |
| 95 | { |
| 96 | const char *color = PERF_COLOR_BLUE; |
| 97 | int len = strlen(pkt_string); |
| 98 | |
| 99 | if (len && (pkt_string[len-1] == '\n')) |
| 100 | color_fprintf(stdout, color, " %s", pkt_string); |
| 101 | else |
| 102 | color_fprintf(stdout, color, " %s\n", pkt_string); |
| 103 | |
| 104 | fflush(stdout); |
| 105 | } |
| 106 | |
| 107 | static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, |
| 108 | struct auxtrace_buffer *buffer) |
| 109 | { |
| 110 | int i, ret; |
| 111 | const char *color = PERF_COLOR_BLUE; |
| 112 | struct cs_etm_decoder_params d_params; |
| 113 | struct cs_etm_trace_params *t_params; |
| 114 | struct cs_etm_decoder *decoder; |
| 115 | size_t buffer_used = 0; |
| 116 | |
| 117 | fprintf(stdout, "\n"); |
| 118 | color_fprintf(stdout, color, |
| 119 | ". ... CoreSight ETM Trace data: size %zu bytes\n", |
| 120 | buffer->size); |
| 121 | |
| 122 | /* Use metadata to fill in trace parameters for trace decoder */ |
| 123 | t_params = zalloc(sizeof(*t_params) * etm->num_cpu); |
| 124 | for (i = 0; i < etm->num_cpu; i++) { |
| 125 | t_params[i].protocol = CS_ETM_PROTO_ETMV4i; |
| 126 | t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; |
| 127 | t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; |
| 128 | t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; |
| 129 | t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; |
| 130 | t_params[i].etmv4.reg_configr = |
| 131 | etm->metadata[i][CS_ETMV4_TRCCONFIGR]; |
| 132 | t_params[i].etmv4.reg_traceidr = |
| 133 | etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; |
| 134 | } |
| 135 | |
| 136 | /* Set decoder parameters to simply print the trace packets */ |
| 137 | d_params.packet_printer = cs_etm__packet_dump; |
| 138 | d_params.operation = CS_ETM_OPERATION_PRINT; |
| 139 | d_params.formatted = true; |
| 140 | d_params.fsyncs = false; |
| 141 | d_params.hsyncs = false; |
| 142 | d_params.frame_aligned = true; |
| 143 | |
| 144 | decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); |
| 145 | |
| 146 | zfree(&t_params); |
| 147 | |
| 148 | if (!decoder) |
| 149 | return; |
| 150 | do { |
| 151 | size_t consumed; |
| 152 | |
| 153 | ret = cs_etm_decoder__process_data_block( |
| 154 | decoder, buffer->offset, |
| 155 | &((u8 *)buffer->data)[buffer_used], |
| 156 | buffer->size - buffer_used, &consumed); |
| 157 | if (ret) |
| 158 | break; |
| 159 | |
| 160 | buffer_used += consumed; |
| 161 | } while (buffer_used < buffer->size); |
| 162 | |
| 163 | cs_etm_decoder__free(decoder); |
| 164 | } |
| 165 | |
| 166 | static int cs_etm__flush_events(struct perf_session *session, |
| 167 | struct perf_tool *tool) |
| 168 | { |
| 169 | int ret; |
| 170 | struct cs_etm_auxtrace *etm = container_of(session->auxtrace, |
| 171 | struct cs_etm_auxtrace, |
| 172 | auxtrace); |
| 173 | if (dump_trace) |
| 174 | return 0; |
| 175 | |
| 176 | if (!tool->ordered_events) |
| 177 | return -EINVAL; |
| 178 | |
| 179 | if (!etm->timeless_decoding) |
| 180 | return -EINVAL; |
| 181 | |
| 182 | ret = cs_etm__update_queues(etm); |
| 183 | |
| 184 | if (ret < 0) |
| 185 | return ret; |
| 186 | |
| 187 | return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); |
| 188 | } |
| 189 | |
| 190 | static void cs_etm__free_queue(void *priv) |
| 191 | { |
| 192 | struct cs_etm_queue *etmq = priv; |
| 193 | |
| 194 | if (!etmq) |
| 195 | return; |
| 196 | |
| 197 | thread__zput(etmq->thread); |
| 198 | cs_etm_decoder__free(etmq->decoder); |
| 199 | zfree(&etmq->event_buf); |
| 200 | zfree(&etmq->last_branch); |
| 201 | zfree(&etmq->last_branch_rb); |
| 202 | zfree(&etmq->prev_packet); |
| 203 | zfree(&etmq->packet); |
| 204 | free(etmq); |
| 205 | } |
| 206 | |
| 207 | static void cs_etm__free_events(struct perf_session *session) |
| 208 | { |
| 209 | unsigned int i; |
| 210 | struct cs_etm_auxtrace *aux = container_of(session->auxtrace, |
| 211 | struct cs_etm_auxtrace, |
| 212 | auxtrace); |
| 213 | struct auxtrace_queues *queues = &aux->queues; |
| 214 | |
| 215 | for (i = 0; i < queues->nr_queues; i++) { |
| 216 | cs_etm__free_queue(queues->queue_array[i].priv); |
| 217 | queues->queue_array[i].priv = NULL; |
| 218 | } |
| 219 | |
| 220 | auxtrace_queues__free(queues); |
| 221 | } |
| 222 | |
| 223 | static void cs_etm__free(struct perf_session *session) |
| 224 | { |
| 225 | int i; |
| 226 | struct int_node *inode, *tmp; |
| 227 | struct cs_etm_auxtrace *aux = container_of(session->auxtrace, |
| 228 | struct cs_etm_auxtrace, |
| 229 | auxtrace); |
| 230 | cs_etm__free_events(session); |
| 231 | session->auxtrace = NULL; |
| 232 | |
| 233 | /* First remove all traceID/CPU# nodes for the RB tree */ |
| 234 | intlist__for_each_entry_safe(inode, tmp, traceid_list) |
| 235 | intlist__remove(traceid_list, inode); |
| 236 | /* Then the RB tree itself */ |
| 237 | intlist__delete(traceid_list); |
| 238 | |
| 239 | for (i = 0; i < aux->num_cpu; i++) |
| 240 | zfree(&aux->metadata[i]); |
| 241 | |
| 242 | thread__zput(aux->unknown_thread); |
| 243 | zfree(&aux->metadata); |
| 244 | zfree(&aux); |
| 245 | } |
| 246 | |
| 247 | static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) |
| 248 | { |
| 249 | struct machine *machine; |
| 250 | |
| 251 | machine = etmq->etm->machine; |
| 252 | |
| 253 | if (address >= etmq->etm->kernel_start) { |
| 254 | if (machine__is_host(machine)) |
| 255 | return PERF_RECORD_MISC_KERNEL; |
| 256 | else |
| 257 | return PERF_RECORD_MISC_GUEST_KERNEL; |
| 258 | } else { |
| 259 | if (machine__is_host(machine)) |
| 260 | return PERF_RECORD_MISC_USER; |
| 261 | else if (perf_guest) |
| 262 | return PERF_RECORD_MISC_GUEST_USER; |
| 263 | else |
| 264 | return PERF_RECORD_MISC_HYPERVISOR; |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, |
| 269 | size_t size, u8 *buffer) |
| 270 | { |
| 271 | u8 cpumode; |
| 272 | u64 offset; |
| 273 | int len; |
| 274 | struct thread *thread; |
| 275 | struct machine *machine; |
| 276 | struct addr_location al; |
| 277 | |
| 278 | if (!etmq) |
| 279 | return -1; |
| 280 | |
| 281 | machine = etmq->etm->machine; |
| 282 | cpumode = cs_etm__cpu_mode(etmq, address); |
| 283 | |
| 284 | thread = etmq->thread; |
| 285 | if (!thread) { |
| 286 | if (cpumode != PERF_RECORD_MISC_KERNEL) |
| 287 | return -EINVAL; |
| 288 | thread = etmq->etm->unknown_thread; |
| 289 | } |
| 290 | |
| 291 | if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso) |
| 292 | return 0; |
| 293 | |
| 294 | if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && |
| 295 | dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) |
| 296 | return 0; |
| 297 | |
| 298 | offset = al.map->map_ip(al.map, address); |
| 299 | |
| 300 | map__load(al.map); |
| 301 | |
| 302 | len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); |
| 303 | |
| 304 | if (len <= 0) |
| 305 | return 0; |
| 306 | |
| 307 | return len; |
| 308 | } |
| 309 | |
| 310 | static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, |
| 311 | unsigned int queue_nr) |
| 312 | { |
| 313 | int i; |
| 314 | struct cs_etm_decoder_params d_params; |
| 315 | struct cs_etm_trace_params *t_params; |
| 316 | struct cs_etm_queue *etmq; |
| 317 | size_t szp = sizeof(struct cs_etm_packet); |
| 318 | |
| 319 | etmq = zalloc(sizeof(*etmq)); |
| 320 | if (!etmq) |
| 321 | return NULL; |
| 322 | |
| 323 | etmq->packet = zalloc(szp); |
| 324 | if (!etmq->packet) |
| 325 | goto out_free; |
| 326 | |
| 327 | if (etm->synth_opts.last_branch || etm->sample_branches) { |
| 328 | etmq->prev_packet = zalloc(szp); |
| 329 | if (!etmq->prev_packet) |
| 330 | goto out_free; |
| 331 | } |
| 332 | |
| 333 | if (etm->synth_opts.last_branch) { |
| 334 | size_t sz = sizeof(struct branch_stack); |
| 335 | |
| 336 | sz += etm->synth_opts.last_branch_sz * |
| 337 | sizeof(struct branch_entry); |
| 338 | etmq->last_branch = zalloc(sz); |
| 339 | if (!etmq->last_branch) |
| 340 | goto out_free; |
| 341 | etmq->last_branch_rb = zalloc(sz); |
| 342 | if (!etmq->last_branch_rb) |
| 343 | goto out_free; |
| 344 | } |
| 345 | |
| 346 | etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); |
| 347 | if (!etmq->event_buf) |
| 348 | goto out_free; |
| 349 | |
| 350 | etmq->etm = etm; |
| 351 | etmq->queue_nr = queue_nr; |
| 352 | etmq->pid = -1; |
| 353 | etmq->tid = -1; |
| 354 | etmq->cpu = -1; |
| 355 | |
| 356 | /* Use metadata to fill in trace parameters for trace decoder */ |
| 357 | t_params = zalloc(sizeof(*t_params) * etm->num_cpu); |
| 358 | |
| 359 | if (!t_params) |
| 360 | goto out_free; |
| 361 | |
| 362 | for (i = 0; i < etm->num_cpu; i++) { |
| 363 | t_params[i].protocol = CS_ETM_PROTO_ETMV4i; |
| 364 | t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; |
| 365 | t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; |
| 366 | t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; |
| 367 | t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; |
| 368 | t_params[i].etmv4.reg_configr = |
| 369 | etm->metadata[i][CS_ETMV4_TRCCONFIGR]; |
| 370 | t_params[i].etmv4.reg_traceidr = |
| 371 | etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; |
| 372 | } |
| 373 | |
| 374 | /* Set decoder parameters to simply print the trace packets */ |
| 375 | d_params.packet_printer = cs_etm__packet_dump; |
| 376 | d_params.operation = CS_ETM_OPERATION_DECODE; |
| 377 | d_params.formatted = true; |
| 378 | d_params.fsyncs = false; |
| 379 | d_params.hsyncs = false; |
| 380 | d_params.frame_aligned = true; |
| 381 | d_params.data = etmq; |
| 382 | |
| 383 | etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); |
| 384 | |
| 385 | zfree(&t_params); |
| 386 | |
| 387 | if (!etmq->decoder) |
| 388 | goto out_free; |
| 389 | |
| 390 | /* |
| 391 | * Register a function to handle all memory accesses required by |
| 392 | * the trace decoder library. |
| 393 | */ |
| 394 | if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, |
| 395 | 0x0L, ((u64) -1L), |
| 396 | cs_etm__mem_access)) |
| 397 | goto out_free_decoder; |
| 398 | |
| 399 | etmq->offset = 0; |
| 400 | etmq->period_instructions = 0; |
| 401 | |
| 402 | return etmq; |
| 403 | |
| 404 | out_free_decoder: |
| 405 | cs_etm_decoder__free(etmq->decoder); |
| 406 | out_free: |
| 407 | zfree(&etmq->event_buf); |
| 408 | zfree(&etmq->last_branch); |
| 409 | zfree(&etmq->last_branch_rb); |
| 410 | zfree(&etmq->prev_packet); |
| 411 | zfree(&etmq->packet); |
| 412 | free(etmq); |
| 413 | |
| 414 | return NULL; |
| 415 | } |
| 416 | |
| 417 | static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, |
| 418 | struct auxtrace_queue *queue, |
| 419 | unsigned int queue_nr) |
| 420 | { |
| 421 | struct cs_etm_queue *etmq = queue->priv; |
| 422 | |
| 423 | if (list_empty(&queue->head) || etmq) |
| 424 | return 0; |
| 425 | |
| 426 | etmq = cs_etm__alloc_queue(etm, queue_nr); |
| 427 | |
| 428 | if (!etmq) |
| 429 | return -ENOMEM; |
| 430 | |
| 431 | queue->priv = etmq; |
| 432 | |
| 433 | if (queue->cpu != -1) |
| 434 | etmq->cpu = queue->cpu; |
| 435 | |
| 436 | etmq->tid = queue->tid; |
| 437 | |
| 438 | return 0; |
| 439 | } |
| 440 | |
| 441 | static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) |
| 442 | { |
| 443 | unsigned int i; |
| 444 | int ret; |
| 445 | |
| 446 | for (i = 0; i < etm->queues.nr_queues; i++) { |
| 447 | ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); |
| 448 | if (ret) |
| 449 | return ret; |
| 450 | } |
| 451 | |
| 452 | return 0; |
| 453 | } |
| 454 | |
| 455 | static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) |
| 456 | { |
| 457 | if (etm->queues.new_data) { |
| 458 | etm->queues.new_data = false; |
| 459 | return cs_etm__setup_queues(etm); |
| 460 | } |
| 461 | |
| 462 | return 0; |
| 463 | } |
| 464 | |
| 465 | static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) |
| 466 | { |
| 467 | struct branch_stack *bs_src = etmq->last_branch_rb; |
| 468 | struct branch_stack *bs_dst = etmq->last_branch; |
| 469 | size_t nr = 0; |
| 470 | |
| 471 | /* |
| 472 | * Set the number of records before early exit: ->nr is used to |
| 473 | * determine how many branches to copy from ->entries. |
| 474 | */ |
| 475 | bs_dst->nr = bs_src->nr; |
| 476 | |
| 477 | /* |
| 478 | * Early exit when there is nothing to copy. |
| 479 | */ |
| 480 | if (!bs_src->nr) |
| 481 | return; |
| 482 | |
| 483 | /* |
| 484 | * As bs_src->entries is a circular buffer, we need to copy from it in |
| 485 | * two steps. First, copy the branches from the most recently inserted |
| 486 | * branch ->last_branch_pos until the end of bs_src->entries buffer. |
| 487 | */ |
| 488 | nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; |
| 489 | memcpy(&bs_dst->entries[0], |
| 490 | &bs_src->entries[etmq->last_branch_pos], |
| 491 | sizeof(struct branch_entry) * nr); |
| 492 | |
| 493 | /* |
| 494 | * If we wrapped around at least once, the branches from the beginning |
| 495 | * of the bs_src->entries buffer and until the ->last_branch_pos element |
| 496 | * are older valid branches: copy them over. The total number of |
| 497 | * branches copied over will be equal to the number of branches asked by |
| 498 | * the user in last_branch_sz. |
| 499 | */ |
| 500 | if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { |
| 501 | memcpy(&bs_dst->entries[nr], |
| 502 | &bs_src->entries[0], |
| 503 | sizeof(struct branch_entry) * etmq->last_branch_pos); |
| 504 | } |
| 505 | } |
| 506 | |
| 507 | static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) |
| 508 | { |
| 509 | etmq->last_branch_pos = 0; |
| 510 | etmq->last_branch_rb->nr = 0; |
| 511 | } |
| 512 | |
| 513 | static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet) |
| 514 | { |
| 515 | /* Returns 0 for the CS_ETM_TRACE_ON packet */ |
| 516 | if (packet->sample_type == CS_ETM_TRACE_ON) |
| 517 | return 0; |
| 518 | |
| 519 | /* |
| 520 | * The packet records the execution range with an exclusive end address |
| 521 | * |
| 522 | * A64 instructions are constant size, so the last executed |
| 523 | * instruction is A64_INSTR_SIZE before the end address |
| 524 | * Will need to do instruction level decode for T32 instructions as |
| 525 | * they can be variable size (not yet supported). |
| 526 | */ |
| 527 | return packet->end_addr - A64_INSTR_SIZE; |
| 528 | } |
| 529 | |
| 530 | static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) |
| 531 | { |
| 532 | /* Returns 0 for the CS_ETM_TRACE_ON packet */ |
| 533 | if (packet->sample_type == CS_ETM_TRACE_ON) |
| 534 | return 0; |
| 535 | |
| 536 | return packet->start_addr; |
| 537 | } |
| 538 | |
| 539 | static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet) |
| 540 | { |
| 541 | /* |
| 542 | * Only A64 instructions are currently supported, so can get |
| 543 | * instruction count by dividing. |
| 544 | * Will need to do instruction level decode for T32 instructions as |
| 545 | * they can be variable size (not yet supported). |
| 546 | */ |
| 547 | return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE; |
| 548 | } |
| 549 | |
| 550 | static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet, |
| 551 | u64 offset) |
| 552 | { |
| 553 | /* |
| 554 | * Only A64 instructions are currently supported, so can get |
| 555 | * instruction address by muliplying. |
| 556 | * Will need to do instruction level decode for T32 instructions as |
| 557 | * they can be variable size (not yet supported). |
| 558 | */ |
| 559 | return packet->start_addr + offset * A64_INSTR_SIZE; |
| 560 | } |
| 561 | |
| 562 | static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) |
| 563 | { |
| 564 | struct branch_stack *bs = etmq->last_branch_rb; |
| 565 | struct branch_entry *be; |
| 566 | |
| 567 | /* |
| 568 | * The branches are recorded in a circular buffer in reverse |
| 569 | * chronological order: we start recording from the last element of the |
| 570 | * buffer down. After writing the first element of the stack, move the |
| 571 | * insert position back to the end of the buffer. |
| 572 | */ |
| 573 | if (!etmq->last_branch_pos) |
| 574 | etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; |
| 575 | |
| 576 | etmq->last_branch_pos -= 1; |
| 577 | |
| 578 | be = &bs->entries[etmq->last_branch_pos]; |
| 579 | be->from = cs_etm__last_executed_instr(etmq->prev_packet); |
| 580 | be->to = cs_etm__first_executed_instr(etmq->packet); |
| 581 | /* No support for mispredict */ |
| 582 | be->flags.mispred = 0; |
| 583 | be->flags.predicted = 1; |
| 584 | |
| 585 | /* |
| 586 | * Increment bs->nr until reaching the number of last branches asked by |
| 587 | * the user on the command line. |
| 588 | */ |
| 589 | if (bs->nr < etmq->etm->synth_opts.last_branch_sz) |
| 590 | bs->nr += 1; |
| 591 | } |
| 592 | |
| 593 | static int cs_etm__inject_event(union perf_event *event, |
| 594 | struct perf_sample *sample, u64 type) |
| 595 | { |
| 596 | event->header.size = perf_event__sample_event_size(sample, type, 0); |
| 597 | return perf_event__synthesize_sample(event, type, 0, sample); |
| 598 | } |
| 599 | |
| 600 | |
| 601 | static int |
| 602 | cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) |
| 603 | { |
| 604 | struct auxtrace_buffer *aux_buffer = etmq->buffer; |
| 605 | struct auxtrace_buffer *old_buffer = aux_buffer; |
| 606 | struct auxtrace_queue *queue; |
| 607 | |
| 608 | queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; |
| 609 | |
| 610 | aux_buffer = auxtrace_buffer__next(queue, aux_buffer); |
| 611 | |
| 612 | /* If no more data, drop the previous auxtrace_buffer and return */ |
| 613 | if (!aux_buffer) { |
| 614 | if (old_buffer) |
| 615 | auxtrace_buffer__drop_data(old_buffer); |
| 616 | buff->len = 0; |
| 617 | return 0; |
| 618 | } |
| 619 | |
| 620 | etmq->buffer = aux_buffer; |
| 621 | |
| 622 | /* If the aux_buffer doesn't have data associated, try to load it */ |
| 623 | if (!aux_buffer->data) { |
| 624 | /* get the file desc associated with the perf data file */ |
| 625 | int fd = perf_data__fd(etmq->etm->session->data); |
| 626 | |
| 627 | aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); |
| 628 | if (!aux_buffer->data) |
| 629 | return -ENOMEM; |
| 630 | } |
| 631 | |
| 632 | /* If valid, drop the previous buffer */ |
| 633 | if (old_buffer) |
| 634 | auxtrace_buffer__drop_data(old_buffer); |
| 635 | |
| 636 | buff->offset = aux_buffer->offset; |
| 637 | buff->len = aux_buffer->size; |
| 638 | buff->buf = aux_buffer->data; |
| 639 | |
| 640 | buff->ref_timestamp = aux_buffer->reference; |
| 641 | |
| 642 | return buff->len; |
| 643 | } |
| 644 | |
| 645 | static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, |
| 646 | struct auxtrace_queue *queue) |
| 647 | { |
| 648 | struct cs_etm_queue *etmq = queue->priv; |
| 649 | |
| 650 | /* CPU-wide tracing isn't supported yet */ |
| 651 | if (queue->tid == -1) |
| 652 | return; |
| 653 | |
| 654 | if ((!etmq->thread) && (etmq->tid != -1)) |
| 655 | etmq->thread = machine__find_thread(etm->machine, -1, |
| 656 | etmq->tid); |
| 657 | |
| 658 | if (etmq->thread) { |
| 659 | etmq->pid = etmq->thread->pid_; |
| 660 | if (queue->cpu == -1) |
| 661 | etmq->cpu = etmq->thread->cpu; |
| 662 | } |
| 663 | } |
| 664 | |
| 665 | static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, |
| 666 | u64 addr, u64 period) |
| 667 | { |
| 668 | int ret = 0; |
| 669 | struct cs_etm_auxtrace *etm = etmq->etm; |
| 670 | union perf_event *event = etmq->event_buf; |
| 671 | struct perf_sample sample = {.ip = 0,}; |
| 672 | |
| 673 | event->sample.header.type = PERF_RECORD_SAMPLE; |
| 674 | event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); |
| 675 | event->sample.header.size = sizeof(struct perf_event_header); |
| 676 | |
| 677 | sample.ip = addr; |
| 678 | sample.pid = etmq->pid; |
| 679 | sample.tid = etmq->tid; |
| 680 | sample.id = etmq->etm->instructions_id; |
| 681 | sample.stream_id = etmq->etm->instructions_id; |
| 682 | sample.period = period; |
| 683 | sample.cpu = etmq->packet->cpu; |
| 684 | sample.flags = 0; |
| 685 | sample.insn_len = 1; |
| 686 | sample.cpumode = event->sample.header.misc; |
| 687 | |
| 688 | if (etm->synth_opts.last_branch) { |
| 689 | cs_etm__copy_last_branch_rb(etmq); |
| 690 | sample.branch_stack = etmq->last_branch; |
| 691 | } |
| 692 | |
| 693 | if (etm->synth_opts.inject) { |
| 694 | ret = cs_etm__inject_event(event, &sample, |
| 695 | etm->instructions_sample_type); |
| 696 | if (ret) |
| 697 | return ret; |
| 698 | } |
| 699 | |
| 700 | ret = perf_session__deliver_synth_event(etm->session, event, &sample); |
| 701 | |
| 702 | if (ret) |
| 703 | pr_err( |
| 704 | "CS ETM Trace: failed to deliver instruction event, error %d\n", |
| 705 | ret); |
| 706 | |
| 707 | if (etm->synth_opts.last_branch) |
| 708 | cs_etm__reset_last_branch_rb(etmq); |
| 709 | |
| 710 | return ret; |
| 711 | } |
| 712 | |
| 713 | /* |
| 714 | * The cs etm packet encodes an instruction range between a branch target |
| 715 | * and the next taken branch. Generate sample accordingly. |
| 716 | */ |
| 717 | static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) |
| 718 | { |
| 719 | int ret = 0; |
| 720 | struct cs_etm_auxtrace *etm = etmq->etm; |
| 721 | struct perf_sample sample = {.ip = 0,}; |
| 722 | union perf_event *event = etmq->event_buf; |
| 723 | struct dummy_branch_stack { |
| 724 | u64 nr; |
| 725 | struct branch_entry entries; |
| 726 | } dummy_bs; |
| 727 | u64 ip; |
| 728 | |
| 729 | ip = cs_etm__last_executed_instr(etmq->prev_packet); |
| 730 | |
| 731 | event->sample.header.type = PERF_RECORD_SAMPLE; |
| 732 | event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); |
| 733 | event->sample.header.size = sizeof(struct perf_event_header); |
| 734 | |
| 735 | sample.ip = ip; |
| 736 | sample.pid = etmq->pid; |
| 737 | sample.tid = etmq->tid; |
| 738 | sample.addr = cs_etm__first_executed_instr(etmq->packet); |
| 739 | sample.id = etmq->etm->branches_id; |
| 740 | sample.stream_id = etmq->etm->branches_id; |
| 741 | sample.period = 1; |
| 742 | sample.cpu = etmq->packet->cpu; |
| 743 | sample.flags = 0; |
| 744 | sample.cpumode = event->sample.header.misc; |
| 745 | |
| 746 | /* |
| 747 | * perf report cannot handle events without a branch stack |
| 748 | */ |
| 749 | if (etm->synth_opts.last_branch) { |
| 750 | dummy_bs = (struct dummy_branch_stack){ |
| 751 | .nr = 1, |
| 752 | .entries = { |
| 753 | .from = sample.ip, |
| 754 | .to = sample.addr, |
| 755 | }, |
| 756 | }; |
| 757 | sample.branch_stack = (struct branch_stack *)&dummy_bs; |
| 758 | } |
| 759 | |
| 760 | if (etm->synth_opts.inject) { |
| 761 | ret = cs_etm__inject_event(event, &sample, |
| 762 | etm->branches_sample_type); |
| 763 | if (ret) |
| 764 | return ret; |
| 765 | } |
| 766 | |
| 767 | ret = perf_session__deliver_synth_event(etm->session, event, &sample); |
| 768 | |
| 769 | if (ret) |
| 770 | pr_err( |
| 771 | "CS ETM Trace: failed to deliver instruction event, error %d\n", |
| 772 | ret); |
| 773 | |
| 774 | return ret; |
| 775 | } |
| 776 | |
| 777 | struct cs_etm_synth { |
| 778 | struct perf_tool dummy_tool; |
| 779 | struct perf_session *session; |
| 780 | }; |
| 781 | |
| 782 | static int cs_etm__event_synth(struct perf_tool *tool, |
| 783 | union perf_event *event, |
| 784 | struct perf_sample *sample __maybe_unused, |
| 785 | struct machine *machine __maybe_unused) |
| 786 | { |
| 787 | struct cs_etm_synth *cs_etm_synth = |
| 788 | container_of(tool, struct cs_etm_synth, dummy_tool); |
| 789 | |
| 790 | return perf_session__deliver_synth_event(cs_etm_synth->session, |
| 791 | event, NULL); |
| 792 | } |
| 793 | |
| 794 | static int cs_etm__synth_event(struct perf_session *session, |
| 795 | struct perf_event_attr *attr, u64 id) |
| 796 | { |
| 797 | struct cs_etm_synth cs_etm_synth; |
| 798 | |
| 799 | memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); |
| 800 | cs_etm_synth.session = session; |
| 801 | |
| 802 | return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, |
| 803 | &id, cs_etm__event_synth); |
| 804 | } |
| 805 | |
| 806 | static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, |
| 807 | struct perf_session *session) |
| 808 | { |
| 809 | struct perf_evlist *evlist = session->evlist; |
| 810 | struct perf_evsel *evsel; |
| 811 | struct perf_event_attr attr; |
| 812 | bool found = false; |
| 813 | u64 id; |
| 814 | int err; |
| 815 | |
| 816 | evlist__for_each_entry(evlist, evsel) { |
| 817 | if (evsel->attr.type == etm->pmu_type) { |
| 818 | found = true; |
| 819 | break; |
| 820 | } |
| 821 | } |
| 822 | |
| 823 | if (!found) { |
| 824 | pr_debug("No selected events with CoreSight Trace data\n"); |
| 825 | return 0; |
| 826 | } |
| 827 | |
| 828 | memset(&attr, 0, sizeof(struct perf_event_attr)); |
| 829 | attr.size = sizeof(struct perf_event_attr); |
| 830 | attr.type = PERF_TYPE_HARDWARE; |
| 831 | attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; |
| 832 | attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | |
| 833 | PERF_SAMPLE_PERIOD; |
| 834 | if (etm->timeless_decoding) |
| 835 | attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; |
| 836 | else |
| 837 | attr.sample_type |= PERF_SAMPLE_TIME; |
| 838 | |
| 839 | attr.exclude_user = evsel->attr.exclude_user; |
| 840 | attr.exclude_kernel = evsel->attr.exclude_kernel; |
| 841 | attr.exclude_hv = evsel->attr.exclude_hv; |
| 842 | attr.exclude_host = evsel->attr.exclude_host; |
| 843 | attr.exclude_guest = evsel->attr.exclude_guest; |
| 844 | attr.sample_id_all = evsel->attr.sample_id_all; |
| 845 | attr.read_format = evsel->attr.read_format; |
| 846 | |
| 847 | /* create new id val to be a fixed offset from evsel id */ |
| 848 | id = evsel->id[0] + 1000000000; |
| 849 | |
| 850 | if (!id) |
| 851 | id = 1; |
| 852 | |
| 853 | if (etm->synth_opts.branches) { |
| 854 | attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; |
| 855 | attr.sample_period = 1; |
| 856 | attr.sample_type |= PERF_SAMPLE_ADDR; |
| 857 | err = cs_etm__synth_event(session, &attr, id); |
| 858 | if (err) |
| 859 | return err; |
| 860 | etm->sample_branches = true; |
| 861 | etm->branches_sample_type = attr.sample_type; |
| 862 | etm->branches_id = id; |
| 863 | id += 1; |
| 864 | attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; |
| 865 | } |
| 866 | |
| 867 | if (etm->synth_opts.last_branch) |
| 868 | attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; |
| 869 | |
| 870 | if (etm->synth_opts.instructions) { |
| 871 | attr.config = PERF_COUNT_HW_INSTRUCTIONS; |
| 872 | attr.sample_period = etm->synth_opts.period; |
| 873 | etm->instructions_sample_period = attr.sample_period; |
| 874 | err = cs_etm__synth_event(session, &attr, id); |
| 875 | if (err) |
| 876 | return err; |
| 877 | etm->sample_instructions = true; |
| 878 | etm->instructions_sample_type = attr.sample_type; |
| 879 | etm->instructions_id = id; |
| 880 | id += 1; |
| 881 | } |
| 882 | |
| 883 | return 0; |
| 884 | } |
| 885 | |
| 886 | static int cs_etm__sample(struct cs_etm_queue *etmq) |
| 887 | { |
| 888 | struct cs_etm_auxtrace *etm = etmq->etm; |
| 889 | struct cs_etm_packet *tmp; |
| 890 | int ret; |
| 891 | u64 instrs_executed; |
| 892 | |
| 893 | instrs_executed = cs_etm__instr_count(etmq->packet); |
| 894 | etmq->period_instructions += instrs_executed; |
| 895 | |
| 896 | /* |
| 897 | * Record a branch when the last instruction in |
| 898 | * PREV_PACKET is a branch. |
| 899 | */ |
| 900 | if (etm->synth_opts.last_branch && |
| 901 | etmq->prev_packet && |
| 902 | etmq->prev_packet->sample_type == CS_ETM_RANGE && |
| 903 | etmq->prev_packet->last_instr_taken_branch) |
| 904 | cs_etm__update_last_branch_rb(etmq); |
| 905 | |
| 906 | if (etm->sample_instructions && |
| 907 | etmq->period_instructions >= etm->instructions_sample_period) { |
| 908 | /* |
| 909 | * Emit instruction sample periodically |
| 910 | * TODO: allow period to be defined in cycles and clock time |
| 911 | */ |
| 912 | |
| 913 | /* Get number of instructions executed after the sample point */ |
| 914 | u64 instrs_over = etmq->period_instructions - |
| 915 | etm->instructions_sample_period; |
| 916 | |
| 917 | /* |
| 918 | * Calculate the address of the sampled instruction (-1 as |
| 919 | * sample is reported as though instruction has just been |
| 920 | * executed, but PC has not advanced to next instruction) |
| 921 | */ |
| 922 | u64 offset = (instrs_executed - instrs_over - 1); |
| 923 | u64 addr = cs_etm__instr_addr(etmq->packet, offset); |
| 924 | |
| 925 | ret = cs_etm__synth_instruction_sample( |
| 926 | etmq, addr, etm->instructions_sample_period); |
| 927 | if (ret) |
| 928 | return ret; |
| 929 | |
| 930 | /* Carry remaining instructions into next sample period */ |
| 931 | etmq->period_instructions = instrs_over; |
| 932 | } |
| 933 | |
| 934 | if (etm->sample_branches && etmq->prev_packet) { |
| 935 | bool generate_sample = false; |
| 936 | |
| 937 | /* Generate sample for tracing on packet */ |
| 938 | if (etmq->prev_packet->sample_type == CS_ETM_TRACE_ON) |
| 939 | generate_sample = true; |
| 940 | |
| 941 | /* Generate sample for branch taken packet */ |
| 942 | if (etmq->prev_packet->sample_type == CS_ETM_RANGE && |
| 943 | etmq->prev_packet->last_instr_taken_branch) |
| 944 | generate_sample = true; |
| 945 | |
| 946 | if (generate_sample) { |
| 947 | ret = cs_etm__synth_branch_sample(etmq); |
| 948 | if (ret) |
| 949 | return ret; |
| 950 | } |
| 951 | } |
| 952 | |
| 953 | if (etm->sample_branches || etm->synth_opts.last_branch) { |
| 954 | /* |
| 955 | * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for |
| 956 | * the next incoming packet. |
| 957 | */ |
| 958 | tmp = etmq->packet; |
| 959 | etmq->packet = etmq->prev_packet; |
| 960 | etmq->prev_packet = tmp; |
| 961 | } |
| 962 | |
| 963 | return 0; |
| 964 | } |
| 965 | |
| 966 | static int cs_etm__flush(struct cs_etm_queue *etmq) |
| 967 | { |
| 968 | int err = 0; |
| 969 | struct cs_etm_auxtrace *etm = etmq->etm; |
| 970 | struct cs_etm_packet *tmp; |
| 971 | |
| 972 | if (!etmq->prev_packet) |
| 973 | return 0; |
| 974 | |
| 975 | /* Handle start tracing packet */ |
| 976 | if (etmq->prev_packet->sample_type == CS_ETM_EMPTY) |
| 977 | goto swap_packet; |
| 978 | |
| 979 | if (etmq->etm->synth_opts.last_branch && |
| 980 | etmq->prev_packet->sample_type == CS_ETM_RANGE) { |
| 981 | /* |
| 982 | * Generate a last branch event for the branches left in the |
| 983 | * circular buffer at the end of the trace. |
| 984 | * |
| 985 | * Use the address of the end of the last reported execution |
| 986 | * range |
| 987 | */ |
| 988 | u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); |
| 989 | |
| 990 | err = cs_etm__synth_instruction_sample( |
| 991 | etmq, addr, |
| 992 | etmq->period_instructions); |
| 993 | if (err) |
| 994 | return err; |
| 995 | |
| 996 | etmq->period_instructions = 0; |
| 997 | |
| 998 | } |
| 999 | |
| 1000 | if (etm->sample_branches && |
| 1001 | etmq->prev_packet->sample_type == CS_ETM_RANGE) { |
| 1002 | err = cs_etm__synth_branch_sample(etmq); |
| 1003 | if (err) |
| 1004 | return err; |
| 1005 | } |
| 1006 | |
| 1007 | swap_packet: |
| 1008 | if (etmq->etm->synth_opts.last_branch) { |
| 1009 | /* |
| 1010 | * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for |
| 1011 | * the next incoming packet. |
| 1012 | */ |
| 1013 | tmp = etmq->packet; |
| 1014 | etmq->packet = etmq->prev_packet; |
| 1015 | etmq->prev_packet = tmp; |
| 1016 | } |
| 1017 | |
| 1018 | return err; |
| 1019 | } |
| 1020 | |
| 1021 | static int cs_etm__run_decoder(struct cs_etm_queue *etmq) |
| 1022 | { |
| 1023 | struct cs_etm_auxtrace *etm = etmq->etm; |
| 1024 | struct cs_etm_buffer buffer; |
| 1025 | size_t buffer_used, processed; |
| 1026 | int err = 0; |
| 1027 | |
| 1028 | if (!etm->kernel_start) |
| 1029 | etm->kernel_start = machine__kernel_start(etm->machine); |
| 1030 | |
| 1031 | /* Go through each buffer in the queue and decode them one by one */ |
| 1032 | while (1) { |
| 1033 | buffer_used = 0; |
| 1034 | memset(&buffer, 0, sizeof(buffer)); |
| 1035 | err = cs_etm__get_trace(&buffer, etmq); |
| 1036 | if (err <= 0) |
| 1037 | return err; |
| 1038 | /* |
| 1039 | * We cannot assume consecutive blocks in the data file are |
| 1040 | * contiguous, reset the decoder to force re-sync. |
| 1041 | */ |
| 1042 | err = cs_etm_decoder__reset(etmq->decoder); |
| 1043 | if (err != 0) |
| 1044 | return err; |
| 1045 | |
| 1046 | /* Run trace decoder until buffer consumed or end of trace */ |
| 1047 | do { |
| 1048 | processed = 0; |
| 1049 | err = cs_etm_decoder__process_data_block( |
| 1050 | etmq->decoder, |
| 1051 | etmq->offset, |
| 1052 | &buffer.buf[buffer_used], |
| 1053 | buffer.len - buffer_used, |
| 1054 | &processed); |
| 1055 | if (err) |
| 1056 | return err; |
| 1057 | |
| 1058 | etmq->offset += processed; |
| 1059 | buffer_used += processed; |
| 1060 | |
| 1061 | /* Process each packet in this chunk */ |
| 1062 | while (1) { |
| 1063 | err = cs_etm_decoder__get_packet(etmq->decoder, |
| 1064 | etmq->packet); |
| 1065 | if (err <= 0) |
| 1066 | /* |
| 1067 | * Stop processing this chunk on |
| 1068 | * end of data or error |
| 1069 | */ |
| 1070 | break; |
| 1071 | |
| 1072 | switch (etmq->packet->sample_type) { |
| 1073 | case CS_ETM_RANGE: |
| 1074 | /* |
| 1075 | * If the packet contains an instruction |
| 1076 | * range, generate instruction sequence |
| 1077 | * events. |
| 1078 | */ |
| 1079 | cs_etm__sample(etmq); |
| 1080 | break; |
| 1081 | case CS_ETM_TRACE_ON: |
| 1082 | /* |
| 1083 | * Discontinuity in trace, flush |
| 1084 | * previous branch stack |
| 1085 | */ |
| 1086 | cs_etm__flush(etmq); |
| 1087 | break; |
| 1088 | case CS_ETM_EMPTY: |
| 1089 | /* |
| 1090 | * Should not receive empty packet, |
| 1091 | * report error. |
| 1092 | */ |
| 1093 | pr_err("CS ETM Trace: empty packet\n"); |
| 1094 | return -EINVAL; |
| 1095 | default: |
| 1096 | break; |
| 1097 | } |
| 1098 | } |
| 1099 | } while (buffer.len > buffer_used); |
| 1100 | |
| 1101 | if (err == 0) |
| 1102 | /* Flush any remaining branch stack entries */ |
| 1103 | err = cs_etm__flush(etmq); |
| 1104 | } |
| 1105 | |
| 1106 | return err; |
| 1107 | } |
| 1108 | |
| 1109 | static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, |
| 1110 | pid_t tid, u64 time_) |
| 1111 | { |
| 1112 | unsigned int i; |
| 1113 | struct auxtrace_queues *queues = &etm->queues; |
| 1114 | |
| 1115 | for (i = 0; i < queues->nr_queues; i++) { |
| 1116 | struct auxtrace_queue *queue = &etm->queues.queue_array[i]; |
| 1117 | struct cs_etm_queue *etmq = queue->priv; |
| 1118 | |
| 1119 | if (etmq && ((tid == -1) || (etmq->tid == tid))) { |
| 1120 | etmq->time = time_; |
| 1121 | cs_etm__set_pid_tid_cpu(etm, queue); |
| 1122 | cs_etm__run_decoder(etmq); |
| 1123 | } |
| 1124 | } |
| 1125 | |
| 1126 | return 0; |
| 1127 | } |
| 1128 | |
| 1129 | static int cs_etm__process_event(struct perf_session *session, |
| 1130 | union perf_event *event, |
| 1131 | struct perf_sample *sample, |
| 1132 | struct perf_tool *tool) |
| 1133 | { |
| 1134 | int err = 0; |
| 1135 | u64 timestamp; |
| 1136 | struct cs_etm_auxtrace *etm = container_of(session->auxtrace, |
| 1137 | struct cs_etm_auxtrace, |
| 1138 | auxtrace); |
| 1139 | |
| 1140 | if (dump_trace) |
| 1141 | return 0; |
| 1142 | |
| 1143 | if (!tool->ordered_events) { |
| 1144 | pr_err("CoreSight ETM Trace requires ordered events\n"); |
| 1145 | return -EINVAL; |
| 1146 | } |
| 1147 | |
| 1148 | if (!etm->timeless_decoding) |
| 1149 | return -EINVAL; |
| 1150 | |
| 1151 | if (sample->time && (sample->time != (u64) -1)) |
| 1152 | timestamp = sample->time; |
| 1153 | else |
| 1154 | timestamp = 0; |
| 1155 | |
| 1156 | if (timestamp || etm->timeless_decoding) { |
| 1157 | err = cs_etm__update_queues(etm); |
| 1158 | if (err) |
| 1159 | return err; |
| 1160 | } |
| 1161 | |
| 1162 | if (event->header.type == PERF_RECORD_EXIT) |
| 1163 | return cs_etm__process_timeless_queues(etm, |
| 1164 | event->fork.tid, |
| 1165 | sample->time); |
| 1166 | |
| 1167 | return 0; |
| 1168 | } |
| 1169 | |
| 1170 | static int cs_etm__process_auxtrace_event(struct perf_session *session, |
| 1171 | union perf_event *event, |
| 1172 | struct perf_tool *tool __maybe_unused) |
| 1173 | { |
| 1174 | struct cs_etm_auxtrace *etm = container_of(session->auxtrace, |
| 1175 | struct cs_etm_auxtrace, |
| 1176 | auxtrace); |
| 1177 | if (!etm->data_queued) { |
| 1178 | struct auxtrace_buffer *buffer; |
| 1179 | off_t data_offset; |
| 1180 | int fd = perf_data__fd(session->data); |
| 1181 | bool is_pipe = perf_data__is_pipe(session->data); |
| 1182 | int err; |
| 1183 | |
| 1184 | if (is_pipe) |
| 1185 | data_offset = 0; |
| 1186 | else { |
| 1187 | data_offset = lseek(fd, 0, SEEK_CUR); |
| 1188 | if (data_offset == -1) |
| 1189 | return -errno; |
| 1190 | } |
| 1191 | |
| 1192 | err = auxtrace_queues__add_event(&etm->queues, session, |
| 1193 | event, data_offset, &buffer); |
| 1194 | if (err) |
| 1195 | return err; |
| 1196 | |
| 1197 | if (dump_trace) |
| 1198 | if (auxtrace_buffer__get_data(buffer, fd)) { |
| 1199 | cs_etm__dump_event(etm, buffer); |
| 1200 | auxtrace_buffer__put_data(buffer); |
| 1201 | } |
| 1202 | } |
| 1203 | |
| 1204 | return 0; |
| 1205 | } |
| 1206 | |
| 1207 | static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) |
| 1208 | { |
| 1209 | struct perf_evsel *evsel; |
| 1210 | struct perf_evlist *evlist = etm->session->evlist; |
| 1211 | bool timeless_decoding = true; |
| 1212 | |
| 1213 | /* |
| 1214 | * Circle through the list of event and complain if we find one |
| 1215 | * with the time bit set. |
| 1216 | */ |
| 1217 | evlist__for_each_entry(evlist, evsel) { |
| 1218 | if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) |
| 1219 | timeless_decoding = false; |
| 1220 | } |
| 1221 | |
| 1222 | return timeless_decoding; |
| 1223 | } |
| 1224 | |
| 1225 | static const char * const cs_etm_global_header_fmts[] = { |
| 1226 | [CS_HEADER_VERSION_0] = " Header version %llx\n", |
| 1227 | [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", |
| 1228 | [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", |
| 1229 | }; |
| 1230 | |
| 1231 | static const char * const cs_etm_priv_fmts[] = { |
| 1232 | [CS_ETM_MAGIC] = " Magic number %llx\n", |
| 1233 | [CS_ETM_CPU] = " CPU %lld\n", |
| 1234 | [CS_ETM_ETMCR] = " ETMCR %llx\n", |
| 1235 | [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", |
| 1236 | [CS_ETM_ETMCCER] = " ETMCCER %llx\n", |
| 1237 | [CS_ETM_ETMIDR] = " ETMIDR %llx\n", |
| 1238 | }; |
| 1239 | |
| 1240 | static const char * const cs_etmv4_priv_fmts[] = { |
| 1241 | [CS_ETM_MAGIC] = " Magic number %llx\n", |
| 1242 | [CS_ETM_CPU] = " CPU %lld\n", |
| 1243 | [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", |
| 1244 | [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", |
| 1245 | [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", |
| 1246 | [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", |
| 1247 | [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", |
| 1248 | [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", |
| 1249 | [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", |
| 1250 | }; |
| 1251 | |
| 1252 | static void cs_etm__print_auxtrace_info(u64 *val, int num) |
| 1253 | { |
| 1254 | int i, j, cpu = 0; |
| 1255 | |
| 1256 | for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) |
| 1257 | fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); |
| 1258 | |
| 1259 | for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { |
| 1260 | if (val[i] == __perf_cs_etmv3_magic) |
| 1261 | for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) |
| 1262 | fprintf(stdout, cs_etm_priv_fmts[j], val[i]); |
| 1263 | else if (val[i] == __perf_cs_etmv4_magic) |
| 1264 | for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) |
| 1265 | fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); |
| 1266 | else |
| 1267 | /* failure.. return */ |
| 1268 | return; |
| 1269 | } |
| 1270 | } |
| 1271 | |
| 1272 | int cs_etm__process_auxtrace_info(union perf_event *event, |
| 1273 | struct perf_session *session) |
| 1274 | { |
| 1275 | struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; |
| 1276 | struct cs_etm_auxtrace *etm = NULL; |
| 1277 | struct int_node *inode; |
| 1278 | unsigned int pmu_type; |
| 1279 | int event_header_size = sizeof(struct perf_event_header); |
| 1280 | int info_header_size; |
| 1281 | int total_size = auxtrace_info->header.size; |
| 1282 | int priv_size = 0; |
| 1283 | int num_cpu; |
| 1284 | int err = 0, idx = -1; |
| 1285 | int i, j, k; |
| 1286 | u64 *ptr, *hdr = NULL; |
| 1287 | u64 **metadata = NULL; |
| 1288 | |
| 1289 | /* |
| 1290 | * sizeof(auxtrace_info_event::type) + |
| 1291 | * sizeof(auxtrace_info_event::reserved) == 8 |
| 1292 | */ |
| 1293 | info_header_size = 8; |
| 1294 | |
| 1295 | if (total_size < (event_header_size + info_header_size)) |
| 1296 | return -EINVAL; |
| 1297 | |
| 1298 | priv_size = total_size - event_header_size - info_header_size; |
| 1299 | |
| 1300 | /* First the global part */ |
| 1301 | ptr = (u64 *) auxtrace_info->priv; |
| 1302 | |
| 1303 | /* Look for version '0' of the header */ |
| 1304 | if (ptr[0] != 0) |
| 1305 | return -EINVAL; |
| 1306 | |
| 1307 | hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); |
| 1308 | if (!hdr) |
| 1309 | return -ENOMEM; |
| 1310 | |
| 1311 | /* Extract header information - see cs-etm.h for format */ |
| 1312 | for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) |
| 1313 | hdr[i] = ptr[i]; |
| 1314 | num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; |
| 1315 | pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & |
| 1316 | 0xffffffff); |
| 1317 | |
| 1318 | /* |
| 1319 | * Create an RB tree for traceID-CPU# tuple. Since the conversion has |
| 1320 | * to be made for each packet that gets decoded, optimizing access in |
| 1321 | * anything other than a sequential array is worth doing. |
| 1322 | */ |
| 1323 | traceid_list = intlist__new(NULL); |
| 1324 | if (!traceid_list) { |
| 1325 | err = -ENOMEM; |
| 1326 | goto err_free_hdr; |
| 1327 | } |
| 1328 | |
| 1329 | metadata = zalloc(sizeof(*metadata) * num_cpu); |
| 1330 | if (!metadata) { |
| 1331 | err = -ENOMEM; |
| 1332 | goto err_free_traceid_list; |
| 1333 | } |
| 1334 | |
| 1335 | /* |
| 1336 | * The metadata is stored in the auxtrace_info section and encodes |
| 1337 | * the configuration of the ARM embedded trace macrocell which is |
| 1338 | * required by the trace decoder to properly decode the trace due |
| 1339 | * to its highly compressed nature. |
| 1340 | */ |
| 1341 | for (j = 0; j < num_cpu; j++) { |
| 1342 | if (ptr[i] == __perf_cs_etmv3_magic) { |
| 1343 | metadata[j] = zalloc(sizeof(*metadata[j]) * |
| 1344 | CS_ETM_PRIV_MAX); |
| 1345 | if (!metadata[j]) { |
| 1346 | err = -ENOMEM; |
| 1347 | goto err_free_metadata; |
| 1348 | } |
| 1349 | for (k = 0; k < CS_ETM_PRIV_MAX; k++) |
| 1350 | metadata[j][k] = ptr[i + k]; |
| 1351 | |
| 1352 | /* The traceID is our handle */ |
| 1353 | idx = metadata[j][CS_ETM_ETMTRACEIDR]; |
| 1354 | i += CS_ETM_PRIV_MAX; |
| 1355 | } else if (ptr[i] == __perf_cs_etmv4_magic) { |
| 1356 | metadata[j] = zalloc(sizeof(*metadata[j]) * |
| 1357 | CS_ETMV4_PRIV_MAX); |
| 1358 | if (!metadata[j]) { |
| 1359 | err = -ENOMEM; |
| 1360 | goto err_free_metadata; |
| 1361 | } |
| 1362 | for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) |
| 1363 | metadata[j][k] = ptr[i + k]; |
| 1364 | |
| 1365 | /* The traceID is our handle */ |
| 1366 | idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; |
| 1367 | i += CS_ETMV4_PRIV_MAX; |
| 1368 | } |
| 1369 | |
| 1370 | /* Get an RB node for this CPU */ |
| 1371 | inode = intlist__findnew(traceid_list, idx); |
| 1372 | |
| 1373 | /* Something went wrong, no need to continue */ |
| 1374 | if (!inode) { |
| 1375 | err = PTR_ERR(inode); |
| 1376 | goto err_free_metadata; |
| 1377 | } |
| 1378 | |
| 1379 | /* |
| 1380 | * The node for that CPU should not be taken. |
| 1381 | * Back out if that's the case. |
| 1382 | */ |
| 1383 | if (inode->priv) { |
| 1384 | err = -EINVAL; |
| 1385 | goto err_free_metadata; |
| 1386 | } |
| 1387 | /* All good, associate the traceID with the CPU# */ |
| 1388 | inode->priv = &metadata[j][CS_ETM_CPU]; |
| 1389 | } |
| 1390 | |
| 1391 | /* |
| 1392 | * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and |
| 1393 | * CS_ETMV4_PRIV_MAX mark how many double words are in the |
| 1394 | * global metadata, and each cpu's metadata respectively. |
| 1395 | * The following tests if the correct number of double words was |
| 1396 | * present in the auxtrace info section. |
| 1397 | */ |
| 1398 | if (i * 8 != priv_size) { |
| 1399 | err = -EINVAL; |
| 1400 | goto err_free_metadata; |
| 1401 | } |
| 1402 | |
| 1403 | etm = zalloc(sizeof(*etm)); |
| 1404 | |
| 1405 | if (!etm) { |
| 1406 | err = -ENOMEM; |
| 1407 | goto err_free_metadata; |
| 1408 | } |
| 1409 | |
| 1410 | err = auxtrace_queues__init(&etm->queues); |
| 1411 | if (err) |
| 1412 | goto err_free_etm; |
| 1413 | |
| 1414 | etm->session = session; |
| 1415 | etm->machine = &session->machines.host; |
| 1416 | |
| 1417 | etm->num_cpu = num_cpu; |
| 1418 | etm->pmu_type = pmu_type; |
| 1419 | etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); |
| 1420 | etm->metadata = metadata; |
| 1421 | etm->auxtrace_type = auxtrace_info->type; |
| 1422 | etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); |
| 1423 | |
| 1424 | etm->auxtrace.process_event = cs_etm__process_event; |
| 1425 | etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; |
| 1426 | etm->auxtrace.flush_events = cs_etm__flush_events; |
| 1427 | etm->auxtrace.free_events = cs_etm__free_events; |
| 1428 | etm->auxtrace.free = cs_etm__free; |
| 1429 | session->auxtrace = &etm->auxtrace; |
| 1430 | |
| 1431 | etm->unknown_thread = thread__new(999999999, 999999999); |
| 1432 | if (!etm->unknown_thread) |
| 1433 | goto err_free_queues; |
| 1434 | |
| 1435 | /* |
| 1436 | * Initialize list node so that at thread__zput() we can avoid |
| 1437 | * segmentation fault at list_del_init(). |
| 1438 | */ |
| 1439 | INIT_LIST_HEAD(&etm->unknown_thread->node); |
| 1440 | |
| 1441 | err = thread__set_comm(etm->unknown_thread, "unknown", 0); |
| 1442 | if (err) |
| 1443 | goto err_delete_thread; |
| 1444 | |
| 1445 | if (thread__init_map_groups(etm->unknown_thread, etm->machine)) |
| 1446 | goto err_delete_thread; |
| 1447 | |
| 1448 | if (dump_trace) { |
| 1449 | cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); |
| 1450 | return 0; |
| 1451 | } |
| 1452 | |
| 1453 | if (session->itrace_synth_opts && session->itrace_synth_opts->set) { |
| 1454 | etm->synth_opts = *session->itrace_synth_opts; |
| 1455 | } else { |
| 1456 | itrace_synth_opts__set_default(&etm->synth_opts); |
| 1457 | etm->synth_opts.callchain = false; |
| 1458 | } |
| 1459 | |
| 1460 | err = cs_etm__synth_events(etm, session); |
| 1461 | if (err) |
| 1462 | goto err_delete_thread; |
| 1463 | |
| 1464 | err = auxtrace_queues__process_index(&etm->queues, session); |
| 1465 | if (err) |
| 1466 | goto err_delete_thread; |
| 1467 | |
| 1468 | etm->data_queued = etm->queues.populated; |
| 1469 | |
| 1470 | return 0; |
| 1471 | |
| 1472 | err_delete_thread: |
| 1473 | thread__zput(etm->unknown_thread); |
| 1474 | err_free_queues: |
| 1475 | auxtrace_queues__free(&etm->queues); |
| 1476 | session->auxtrace = NULL; |
| 1477 | err_free_etm: |
| 1478 | zfree(&etm); |
| 1479 | err_free_metadata: |
| 1480 | /* No need to check @metadata[j], free(NULL) is supported */ |
| 1481 | for (j = 0; j < num_cpu; j++) |
| 1482 | free(metadata[j]); |
| 1483 | zfree(&metadata); |
| 1484 | err_free_traceid_list: |
| 1485 | intlist__delete(traceid_list); |
| 1486 | err_free_hdr: |
| 1487 | zfree(&hdr); |
| 1488 | |
| 1489 | return -EINVAL; |
| 1490 | } |