Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright IBM Corp. 2018 |
| 4 | * Auxtrace support for s390 CPU-Measurement Sampling Facility |
| 5 | * |
| 6 | * Author(s): Thomas Richter <tmricht@linux.ibm.com> |
| 7 | * |
| 8 | * Auxiliary traces are collected during 'perf record' using rbd000 event. |
| 9 | * Several PERF_RECORD_XXX are generated during recording: |
| 10 | * |
| 11 | * PERF_RECORD_AUX: |
| 12 | * Records that new data landed in the AUX buffer part. |
| 13 | * PERF_RECORD_AUXTRACE: |
| 14 | * Defines auxtrace data. Followed by the actual data. The contents of |
| 15 | * the auxtrace data is dependent on the event and the CPU. |
| 16 | * This record is generated by perf record command. For details |
| 17 | * see Documentation/perf.data-file-format.txt. |
| 18 | * PERF_RECORD_AUXTRACE_INFO: |
| 19 | * Defines a table of contains for PERF_RECORD_AUXTRACE records. This |
| 20 | * record is generated during 'perf record' command. Each record contains up |
| 21 | * to 256 entries describing offset and size of the AUXTRACE data in the |
| 22 | * perf.data file. |
| 23 | * PERF_RECORD_AUXTRACE_ERROR: |
| 24 | * Indicates an error during AUXTRACE collection such as buffer overflow. |
| 25 | * PERF_RECORD_FINISHED_ROUND: |
| 26 | * Perf events are not necessarily in time stamp order, as they can be |
| 27 | * collected in parallel on different CPUs. If the events should be |
| 28 | * processed in time order they need to be sorted first. |
| 29 | * Perf report guarantees that there is no reordering over a |
| 30 | * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a |
| 31 | * time stamp lower than this record are processed (and displayed) before |
| 32 | * the succeeding perf record are processed. |
| 33 | * |
| 34 | * These records are evaluated during perf report command. |
| 35 | * |
| 36 | * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for |
| 37 | * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info() |
| 38 | * below. |
| 39 | * Auxiliary trace data is collected per CPU. To merge the data into the report |
| 40 | * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace |
| 41 | * data is in ascending order. |
| 42 | * |
| 43 | * Each queue has a double linked list of auxtrace_buffers. This list contains |
| 44 | * the offset and size of a CPU's auxtrace data. During auxtrace processing |
| 45 | * the data portion is mmap()'ed. |
| 46 | * |
| 47 | * To sort the queues in chronological order, all queue access is controlled |
| 48 | * by the auxtrace_heap. This is basicly a stack, each stack element has two |
| 49 | * entries, the queue number and a time stamp. However the stack is sorted by |
| 50 | * the time stamps. The highest time stamp is at the bottom the lowest |
| 51 | * (nearest) time stamp is at the top. That sort order is maintained at all |
| 52 | * times! |
| 53 | * |
| 54 | * After the auxtrace infrastructure has been setup, the auxtrace queues are |
| 55 | * filled with data (offset/size pairs) and the auxtrace_heap is populated. |
| 56 | * |
| 57 | * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues. |
| 58 | * Each record is handled by s390_cpumsf_process_event(). The time stamp of |
| 59 | * the perf record is compared with the time stamp located on the auxtrace_heap |
| 60 | * top element. If that time stamp is lower than the time stamp from the |
| 61 | * record sample, the auxtrace queues will be processed. As auxtrace queues |
| 62 | * control many auxtrace_buffers and each buffer can be quite large, the |
| 63 | * auxtrace buffer might be processed only partially. In this case the |
| 64 | * position in the auxtrace_buffer of that queue is remembered and the time |
| 65 | * stamp of the last processed entry of the auxtrace_buffer replaces the |
| 66 | * current auxtrace_heap top. |
| 67 | * |
| 68 | * 3. Auxtrace_queues might run of out data and are feeded by the |
| 69 | * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). |
| 70 | * |
| 71 | * Event Generation |
| 72 | * Each sampling-data entry in the auxilary trace data generates a perf sample. |
| 73 | * This sample is filled |
| 74 | * with data from the auxtrace such as PID/TID, instruction address, CPU state, |
| 75 | * etc. This sample is processed with perf_session__deliver_synth_event() to |
| 76 | * be included into the GUI. |
| 77 | * |
| 78 | * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining |
| 79 | * auxiliary traces entries until the time stamp of this record is reached |
| 80 | * auxtrace_heap top. This is triggered by ordered_event->deliver(). |
| 81 | * |
| 82 | * |
| 83 | * Perf event processing. |
| 84 | * Event processing of PERF_RECORD_XXX entries relies on time stamp entries. |
| 85 | * This is the function call sequence: |
| 86 | * |
| 87 | * __cmd_report() |
| 88 | * | |
| 89 | * perf_session__process_events() |
| 90 | * | |
| 91 | * __perf_session__process_events() |
| 92 | * | |
| 93 | * perf_session__process_event() |
| 94 | * | This functions splits the PERF_RECORD_XXX records. |
| 95 | * | - Those generated by perf record command (type number equal or higher |
| 96 | * | than PERF_RECORD_USER_TYPE_START) are handled by |
| 97 | * | perf_session__process_user_event(see below) |
| 98 | * | - Those generated by the kernel are handled by |
| 99 | * | perf_evlist__parse_sample_timestamp() |
| 100 | * | |
| 101 | * perf_evlist__parse_sample_timestamp() |
| 102 | * | Extract time stamp from sample data. |
| 103 | * | |
| 104 | * perf_session__queue_event() |
| 105 | * | If timestamp is positive the sample is entered into an ordered_event |
| 106 | * | list, sort order is the timestamp. The event processing is deferred until |
| 107 | * | later (see perf_session__process_user_event()). |
| 108 | * | Other timestamps (0 or -1) are handled immediately by |
| 109 | * | perf_session__deliver_event(). These are events generated at start up |
| 110 | * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP* |
| 111 | * | records. They are needed to create a list of running processes and its |
| 112 | * | memory mappings and layout. They are needed at the beginning to enable |
| 113 | * | command perf report to create process trees and memory mappings. |
| 114 | * | |
| 115 | * perf_session__deliver_event() |
| 116 | * | Delivers a PERF_RECORD_XXX entry for handling. |
| 117 | * | |
| 118 | * auxtrace__process_event() |
| 119 | * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with |
| 120 | * | time stamps from the auxiliary trace buffers. This enables |
| 121 | * | synchronization between auxiliary trace data and the events on the |
| 122 | * | perf.data file. |
| 123 | * | |
| 124 | * machine__deliver_event() |
| 125 | * | Handles the PERF_RECORD_XXX event. This depends on the record type. |
| 126 | * It might update the process tree, update a process memory map or enter |
| 127 | * a sample with IP and call back chain data into GUI data pool. |
| 128 | * |
| 129 | * |
| 130 | * Deferred processing determined by perf_session__process_user_event() is |
| 131 | * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These |
| 132 | * are generated during command perf record. |
| 133 | * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all |
| 134 | * PERF_RECORD_XXX entries stored in the ordered_event list. This list was |
| 135 | * built up while reading the perf.data file. |
| 136 | * Each event is now processed by calling perf_session__deliver_event(). |
| 137 | * This enables time synchronization between the data in the perf.data file and |
| 138 | * the data in the auxiliary trace buffers. |
| 139 | */ |
| 140 | |
| 141 | #include <endian.h> |
| 142 | #include <errno.h> |
| 143 | #include <byteswap.h> |
| 144 | #include <inttypes.h> |
| 145 | #include <linux/kernel.h> |
| 146 | #include <linux/types.h> |
| 147 | #include <linux/bitops.h> |
| 148 | #include <linux/log2.h> |
| 149 | |
| 150 | #include "cpumap.h" |
| 151 | #include "color.h" |
| 152 | #include "evsel.h" |
| 153 | #include "evlist.h" |
| 154 | #include "machine.h" |
| 155 | #include "session.h" |
| 156 | #include "util.h" |
| 157 | #include "thread.h" |
| 158 | #include "debug.h" |
| 159 | #include "auxtrace.h" |
| 160 | #include "s390-cpumsf.h" |
| 161 | #include "s390-cpumsf-kernel.h" |
| 162 | |
| 163 | struct s390_cpumsf { |
| 164 | struct auxtrace auxtrace; |
| 165 | struct auxtrace_queues queues; |
| 166 | struct auxtrace_heap heap; |
| 167 | struct perf_session *session; |
| 168 | struct machine *machine; |
| 169 | u32 auxtrace_type; |
| 170 | u32 pmu_type; |
| 171 | u16 machine_type; |
| 172 | bool data_queued; |
| 173 | }; |
| 174 | |
| 175 | struct s390_cpumsf_queue { |
| 176 | struct s390_cpumsf *sf; |
| 177 | unsigned int queue_nr; |
| 178 | struct auxtrace_buffer *buffer; |
| 179 | int cpu; |
| 180 | }; |
| 181 | |
| 182 | /* Display s390 CPU measurement facility basic-sampling data entry */ |
| 183 | static bool s390_cpumsf_basic_show(const char *color, size_t pos, |
| 184 | struct hws_basic_entry *basic) |
| 185 | { |
| 186 | if (basic->def != 1) { |
| 187 | pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); |
| 188 | return false; |
| 189 | } |
| 190 | color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x" |
| 191 | " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n" |
| 192 | "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n", |
| 193 | pos, basic->def, basic->U, |
| 194 | basic->T ? 'T' : ' ', |
| 195 | basic->W ? 'W' : ' ', |
| 196 | basic->P ? 'P' : ' ', |
| 197 | basic->I ? 'I' : ' ', |
| 198 | basic->AS, basic->prim_asn, basic->ia, basic->CL, |
| 199 | basic->hpp, basic->gpp); |
| 200 | return true; |
| 201 | } |
| 202 | |
| 203 | /* Display s390 CPU measurement facility diagnostic-sampling data entry */ |
| 204 | static bool s390_cpumsf_diag_show(const char *color, size_t pos, |
| 205 | struct hws_diag_entry *diag) |
| 206 | { |
| 207 | if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { |
| 208 | pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); |
| 209 | return false; |
| 210 | } |
| 211 | color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n", |
| 212 | pos, diag->def, diag->I ? 'I' : ' '); |
| 213 | return true; |
| 214 | } |
| 215 | |
| 216 | /* Return TOD timestamp contained in an trailer entry */ |
| 217 | static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) |
| 218 | { |
| 219 | /* te->t set: TOD in STCKE format, bytes 8-15 |
| 220 | * to->t not set: TOD in STCK format, bytes 0-7 |
| 221 | */ |
| 222 | unsigned long long ts; |
| 223 | |
| 224 | memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); |
| 225 | return ts; |
| 226 | } |
| 227 | |
| 228 | /* Display s390 CPU measurement facility trailer entry */ |
| 229 | static bool s390_cpumsf_trailer_show(const char *color, size_t pos, |
| 230 | struct hws_trailer_entry *te) |
| 231 | { |
| 232 | if (te->bsdes != sizeof(struct hws_basic_entry)) { |
| 233 | pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); |
| 234 | return false; |
| 235 | } |
| 236 | color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" |
| 237 | " dsdes:%d Overflow:%lld Time:%#llx\n" |
| 238 | "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", |
| 239 | pos, |
| 240 | te->f ? 'F' : ' ', |
| 241 | te->a ? 'A' : ' ', |
| 242 | te->t ? 'T' : ' ', |
| 243 | te->bsdes, te->dsdes, te->overflow, |
| 244 | trailer_timestamp(te), te->clock_base, te->progusage2, |
| 245 | te->progusage[0], te->progusage[1]); |
| 246 | return true; |
| 247 | } |
| 248 | |
| 249 | /* Test a sample data block. It must be 4KB or a multiple thereof in size and |
| 250 | * 4KB page aligned. Each sample data page has a trailer entry at the |
| 251 | * end which contains the sample entry data sizes. |
| 252 | * |
| 253 | * Return true if the sample data block passes the checks and set the |
| 254 | * basic set entry size and diagnostic set entry size. |
| 255 | * |
| 256 | * Return false on failure. |
| 257 | * |
| 258 | * Note: Old hardware does not set the basic or diagnostic entry sizes |
| 259 | * in the trailer entry. Use the type number instead. |
| 260 | */ |
| 261 | static bool s390_cpumsf_validate(int machine_type, |
| 262 | unsigned char *buf, size_t len, |
| 263 | unsigned short *bsdes, |
| 264 | unsigned short *dsdes) |
| 265 | { |
| 266 | struct hws_basic_entry *basic = (struct hws_basic_entry *)buf; |
| 267 | struct hws_trailer_entry *te; |
| 268 | |
| 269 | *dsdes = *bsdes = 0; |
| 270 | if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ |
| 271 | return false; |
| 272 | if (basic->def != 1) /* No basic set entry, must be first */ |
| 273 | return false; |
| 274 | /* Check for trailer entry at end of SDB */ |
| 275 | te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ |
| 276 | - sizeof(*te)); |
| 277 | *bsdes = te->bsdes; |
| 278 | *dsdes = te->dsdes; |
| 279 | if (!te->bsdes && !te->dsdes) { |
| 280 | /* Very old hardware, use CPUID */ |
| 281 | switch (machine_type) { |
| 282 | case 2097: |
| 283 | case 2098: |
| 284 | *dsdes = 64; |
| 285 | *bsdes = 32; |
| 286 | break; |
| 287 | case 2817: |
| 288 | case 2818: |
| 289 | *dsdes = 74; |
| 290 | *bsdes = 32; |
| 291 | break; |
| 292 | case 2827: |
| 293 | case 2828: |
| 294 | *dsdes = 85; |
| 295 | *bsdes = 32; |
| 296 | break; |
| 297 | default: |
| 298 | /* Illegal trailer entry */ |
| 299 | return false; |
| 300 | } |
| 301 | } |
| 302 | return true; |
| 303 | } |
| 304 | |
| 305 | /* Return true if there is room for another entry */ |
| 306 | static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos) |
| 307 | { |
| 308 | size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry); |
| 309 | |
| 310 | if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz) |
| 311 | return false; |
| 312 | return true; |
| 313 | } |
| 314 | |
| 315 | /* Dump an auxiliary buffer. These buffers are multiple of |
| 316 | * 4KB SDB pages. |
| 317 | */ |
| 318 | static void s390_cpumsf_dump(struct s390_cpumsf *sf, |
| 319 | unsigned char *buf, size_t len) |
| 320 | { |
| 321 | const char *color = PERF_COLOR_BLUE; |
| 322 | struct hws_basic_entry *basic; |
| 323 | struct hws_diag_entry *diag; |
| 324 | unsigned short bsdes, dsdes; |
| 325 | size_t pos = 0; |
| 326 | |
| 327 | color_fprintf(stdout, color, |
| 328 | ". ... s390 AUX data: size %zu bytes\n", |
| 329 | len); |
| 330 | |
| 331 | if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, |
| 332 | &dsdes)) { |
| 333 | pr_err("Invalid AUX trace data block size:%zu" |
| 334 | " (type:%d bsdes:%hd dsdes:%hd)\n", |
| 335 | len, sf->machine_type, bsdes, dsdes); |
| 336 | return; |
| 337 | } |
| 338 | |
| 339 | /* s390 kernel always returns 4KB blocks fully occupied, |
| 340 | * no partially filled SDBs. |
| 341 | */ |
| 342 | while (pos < len) { |
| 343 | /* Handle Basic entry */ |
| 344 | basic = (struct hws_basic_entry *)(buf + pos); |
| 345 | if (s390_cpumsf_basic_show(color, pos, basic)) |
| 346 | pos += bsdes; |
| 347 | else |
| 348 | return; |
| 349 | |
| 350 | /* Handle Diagnostic entry */ |
| 351 | diag = (struct hws_diag_entry *)(buf + pos); |
| 352 | if (s390_cpumsf_diag_show(color, pos, diag)) |
| 353 | pos += dsdes; |
| 354 | else |
| 355 | return; |
| 356 | |
| 357 | /* Check for trailer entry */ |
| 358 | if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { |
| 359 | /* Show trailer entry */ |
| 360 | struct hws_trailer_entry te; |
| 361 | |
| 362 | pos = (pos + S390_CPUMSF_PAGESZ) |
| 363 | & ~(S390_CPUMSF_PAGESZ - 1); |
| 364 | pos -= sizeof(te); |
| 365 | memcpy(&te, buf + pos, sizeof(te)); |
| 366 | /* Set descriptor sizes in case of old hardware |
| 367 | * where these values are not set. |
| 368 | */ |
| 369 | te.bsdes = bsdes; |
| 370 | te.dsdes = dsdes; |
| 371 | if (s390_cpumsf_trailer_show(color, pos, &te)) |
| 372 | pos += sizeof(te); |
| 373 | else |
| 374 | return; |
| 375 | } |
| 376 | } |
| 377 | } |
| 378 | |
| 379 | static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf, |
| 380 | size_t len) |
| 381 | { |
| 382 | printf(".\n"); |
| 383 | s390_cpumsf_dump(sf, buf, len); |
| 384 | } |
| 385 | |
| 386 | #define S390_LPP_PID_MASK 0xffffffff |
| 387 | |
| 388 | static bool s390_cpumsf_make_event(size_t pos, |
| 389 | struct hws_basic_entry *basic, |
| 390 | struct s390_cpumsf_queue *sfq) |
| 391 | { |
| 392 | struct perf_sample sample = { |
| 393 | .ip = basic->ia, |
| 394 | .pid = basic->hpp & S390_LPP_PID_MASK, |
| 395 | .tid = basic->hpp & S390_LPP_PID_MASK, |
| 396 | .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN, |
| 397 | .cpu = sfq->cpu, |
| 398 | .period = 1 |
| 399 | }; |
| 400 | union perf_event event; |
| 401 | |
| 402 | memset(&event, 0, sizeof(event)); |
| 403 | if (basic->CL == 1) /* Native LPAR mode */ |
| 404 | sample.cpumode = basic->P ? PERF_RECORD_MISC_USER |
| 405 | : PERF_RECORD_MISC_KERNEL; |
| 406 | else if (basic->CL == 2) /* Guest kernel/user space */ |
| 407 | sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER |
| 408 | : PERF_RECORD_MISC_GUEST_KERNEL; |
| 409 | else if (basic->gpp || basic->prim_asn != 0xffff) |
| 410 | /* Use heuristics on old hardware */ |
| 411 | sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER |
| 412 | : PERF_RECORD_MISC_GUEST_KERNEL; |
| 413 | else |
| 414 | sample.cpumode = basic->P ? PERF_RECORD_MISC_USER |
| 415 | : PERF_RECORD_MISC_KERNEL; |
| 416 | |
| 417 | event.sample.header.type = PERF_RECORD_SAMPLE; |
| 418 | event.sample.header.misc = sample.cpumode; |
| 419 | event.sample.header.size = sizeof(struct perf_event_header); |
| 420 | |
| 421 | pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", |
| 422 | __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, |
| 423 | sample.tid, sample.cpumode, sample.cpu); |
| 424 | if (perf_session__deliver_synth_event(sfq->sf->session, &event, |
| 425 | &sample)) { |
| 426 | pr_err("s390 Auxiliary Trace: failed to deliver event\n"); |
| 427 | return false; |
| 428 | } |
| 429 | return true; |
| 430 | } |
| 431 | |
| 432 | static unsigned long long get_trailer_time(const unsigned char *buf) |
| 433 | { |
| 434 | struct hws_trailer_entry *te; |
| 435 | unsigned long long aux_time; |
| 436 | |
| 437 | te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ |
| 438 | - sizeof(*te)); |
| 439 | |
| 440 | if (!te->clock_base) /* TOD_CLOCK_BASE value missing */ |
| 441 | return 0; |
| 442 | |
| 443 | /* Correct calculation to convert time stamp in trailer entry to |
| 444 | * nano seconds (taken from arch/s390 function tod_to_ns()). |
| 445 | * TOD_CLOCK_BASE is stored in trailer entry member progusage2. |
| 446 | */ |
| 447 | aux_time = trailer_timestamp(te) - te->progusage2; |
| 448 | aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); |
| 449 | return aux_time; |
| 450 | } |
| 451 | |
| 452 | /* Process the data samples of a single queue. The first parameter is a |
| 453 | * pointer to the queue, the second parameter is the time stamp. This |
| 454 | * is the time stamp: |
| 455 | * - of the event that triggered this processing. |
| 456 | * - or the time stamp when the last proccesing of this queue stopped. |
| 457 | * In this case it stopped at a 4KB page boundary and record the |
| 458 | * position on where to continue processing on the next invocation |
| 459 | * (see buffer->use_data and buffer->use_size). |
| 460 | * |
| 461 | * When this function returns the second parameter is updated to |
| 462 | * reflect the time stamp of the last processed auxiliary data entry |
| 463 | * (taken from the trailer entry of that page). The caller uses this |
| 464 | * returned time stamp to record the last processed entry in this |
| 465 | * queue. |
| 466 | * |
| 467 | * The function returns: |
| 468 | * 0: Processing successful. The second parameter returns the |
| 469 | * time stamp from the trailer entry until which position |
| 470 | * processing took place. Subsequent calls resume from this |
| 471 | * position. |
| 472 | * <0: An error occurred during processing. The second parameter |
| 473 | * returns the maximum time stamp. |
| 474 | * >0: Done on this queue. The second parameter returns the |
| 475 | * maximum time stamp. |
| 476 | */ |
| 477 | static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) |
| 478 | { |
| 479 | struct s390_cpumsf *sf = sfq->sf; |
| 480 | unsigned char *buf = sfq->buffer->use_data; |
| 481 | size_t len = sfq->buffer->use_size; |
| 482 | struct hws_basic_entry *basic; |
| 483 | unsigned short bsdes, dsdes; |
| 484 | size_t pos = 0; |
| 485 | int err = 1; |
| 486 | u64 aux_ts; |
| 487 | |
| 488 | if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, |
| 489 | &dsdes)) { |
| 490 | *ts = ~0ULL; |
| 491 | return -1; |
| 492 | } |
| 493 | |
| 494 | /* Get trailer entry time stamp and check if entries in |
| 495 | * this auxiliary page are ready for processing. If the |
| 496 | * time stamp of the first entry is too high, whole buffer |
| 497 | * can be skipped. In this case return time stamp. |
| 498 | */ |
| 499 | aux_ts = get_trailer_time(buf); |
| 500 | if (!aux_ts) { |
| 501 | pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n", |
| 502 | sfq->buffer->data_offset); |
| 503 | aux_ts = ~0ULL; |
| 504 | goto out; |
| 505 | } |
| 506 | if (aux_ts > *ts) { |
| 507 | *ts = aux_ts; |
| 508 | return 0; |
| 509 | } |
| 510 | |
| 511 | while (pos < len) { |
| 512 | /* Handle Basic entry */ |
| 513 | basic = (struct hws_basic_entry *)(buf + pos); |
| 514 | if (s390_cpumsf_make_event(pos, basic, sfq)) |
| 515 | pos += bsdes; |
| 516 | else { |
| 517 | err = -EBADF; |
| 518 | goto out; |
| 519 | } |
| 520 | |
| 521 | pos += dsdes; /* Skip diagnositic entry */ |
| 522 | |
| 523 | /* Check for trailer entry */ |
| 524 | if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { |
| 525 | pos = (pos + S390_CPUMSF_PAGESZ) |
| 526 | & ~(S390_CPUMSF_PAGESZ - 1); |
| 527 | /* Check existence of next page */ |
| 528 | if (pos >= len) |
| 529 | break; |
| 530 | aux_ts = get_trailer_time(buf + pos); |
| 531 | if (!aux_ts) { |
| 532 | aux_ts = ~0ULL; |
| 533 | goto out; |
| 534 | } |
| 535 | if (aux_ts > *ts) { |
| 536 | *ts = aux_ts; |
| 537 | sfq->buffer->use_data += pos; |
| 538 | sfq->buffer->use_size -= pos; |
| 539 | return 0; |
| 540 | } |
| 541 | } |
| 542 | } |
| 543 | out: |
| 544 | *ts = aux_ts; |
| 545 | sfq->buffer->use_size = 0; |
| 546 | sfq->buffer->use_data = NULL; |
| 547 | return err; /* Buffer completely scanned or error */ |
| 548 | } |
| 549 | |
| 550 | /* Run the s390 auxiliary trace decoder. |
| 551 | * Select the queue buffer to operate on, the caller already selected |
| 552 | * the proper queue, depending on second parameter 'ts'. |
| 553 | * This is the time stamp until which the auxiliary entries should |
| 554 | * be processed. This value is updated by called functions and |
| 555 | * returned to the caller. |
| 556 | * |
| 557 | * Resume processing in the current buffer. If there is no buffer |
| 558 | * get a new buffer from the queue and setup start position for |
| 559 | * processing. |
| 560 | * When a buffer is completely processed remove it from the queue |
| 561 | * before returning. |
| 562 | * |
| 563 | * This function returns |
| 564 | * 1: When the queue is empty. Second parameter will be set to |
| 565 | * maximum time stamp. |
| 566 | * 0: Normal processing done. |
| 567 | * <0: Error during queue buffer setup. This causes the caller |
| 568 | * to stop processing completely. |
| 569 | */ |
| 570 | static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, |
| 571 | u64 *ts) |
| 572 | { |
| 573 | |
| 574 | struct auxtrace_buffer *buffer; |
| 575 | struct auxtrace_queue *queue; |
| 576 | int err; |
| 577 | |
| 578 | queue = &sfq->sf->queues.queue_array[sfq->queue_nr]; |
| 579 | |
| 580 | /* Get buffer and last position in buffer to resume |
| 581 | * decoding the auxiliary entries. One buffer might be large |
| 582 | * and decoding might stop in between. This depends on the time |
| 583 | * stamp of the trailer entry in each page of the auxiliary |
| 584 | * data and the time stamp of the event triggering the decoding. |
| 585 | */ |
| 586 | if (sfq->buffer == NULL) { |
| 587 | sfq->buffer = buffer = auxtrace_buffer__next(queue, |
| 588 | sfq->buffer); |
| 589 | if (!buffer) { |
| 590 | *ts = ~0ULL; |
| 591 | return 1; /* Processing done on this queue */ |
| 592 | } |
| 593 | /* Start with a new buffer on this queue */ |
| 594 | if (buffer->data) { |
| 595 | buffer->use_size = buffer->size; |
| 596 | buffer->use_data = buffer->data; |
| 597 | } |
| 598 | } else |
| 599 | buffer = sfq->buffer; |
| 600 | |
| 601 | if (!buffer->data) { |
| 602 | int fd = perf_data__fd(sfq->sf->session->data); |
| 603 | |
| 604 | buffer->data = auxtrace_buffer__get_data(buffer, fd); |
| 605 | if (!buffer->data) |
| 606 | return -ENOMEM; |
| 607 | buffer->use_size = buffer->size; |
| 608 | buffer->use_data = buffer->data; |
| 609 | } |
| 610 | pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", |
| 611 | __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, |
| 612 | buffer->size, buffer->use_size); |
| 613 | err = s390_cpumsf_samples(sfq, ts); |
| 614 | |
| 615 | /* If non-zero, there is either an error (err < 0) or the buffer is |
| 616 | * completely done (err > 0). The error is unrecoverable, usually |
| 617 | * some descriptors could not be read successfully, so continue with |
| 618 | * the next buffer. |
| 619 | * In both cases the parameter 'ts' has been updated. |
| 620 | */ |
| 621 | if (err) { |
| 622 | sfq->buffer = NULL; |
| 623 | list_del(&buffer->list); |
| 624 | auxtrace_buffer__free(buffer); |
| 625 | if (err > 0) /* Buffer done, no error */ |
| 626 | err = 0; |
| 627 | } |
| 628 | return err; |
| 629 | } |
| 630 | |
| 631 | static struct s390_cpumsf_queue * |
| 632 | s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) |
| 633 | { |
| 634 | struct s390_cpumsf_queue *sfq; |
| 635 | |
| 636 | sfq = zalloc(sizeof(struct s390_cpumsf_queue)); |
| 637 | if (sfq == NULL) |
| 638 | return NULL; |
| 639 | |
| 640 | sfq->sf = sf; |
| 641 | sfq->queue_nr = queue_nr; |
| 642 | sfq->cpu = -1; |
| 643 | return sfq; |
| 644 | } |
| 645 | |
| 646 | static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf, |
| 647 | struct auxtrace_queue *queue, |
| 648 | unsigned int queue_nr, u64 ts) |
| 649 | { |
| 650 | struct s390_cpumsf_queue *sfq = queue->priv; |
| 651 | |
| 652 | if (list_empty(&queue->head)) |
| 653 | return 0; |
| 654 | |
| 655 | if (sfq == NULL) { |
| 656 | sfq = s390_cpumsf_alloc_queue(sf, queue_nr); |
| 657 | if (!sfq) |
| 658 | return -ENOMEM; |
| 659 | queue->priv = sfq; |
| 660 | |
| 661 | if (queue->cpu != -1) |
| 662 | sfq->cpu = queue->cpu; |
| 663 | } |
| 664 | return auxtrace_heap__add(&sf->heap, queue_nr, ts); |
| 665 | } |
| 666 | |
| 667 | static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts) |
| 668 | { |
| 669 | unsigned int i; |
| 670 | int ret = 0; |
| 671 | |
| 672 | for (i = 0; i < sf->queues.nr_queues; i++) { |
| 673 | ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i], |
| 674 | i, ts); |
| 675 | if (ret) |
| 676 | break; |
| 677 | } |
| 678 | return ret; |
| 679 | } |
| 680 | |
| 681 | static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts) |
| 682 | { |
| 683 | if (!sf->queues.new_data) |
| 684 | return 0; |
| 685 | |
| 686 | sf->queues.new_data = false; |
| 687 | return s390_cpumsf_setup_queues(sf, ts); |
| 688 | } |
| 689 | |
| 690 | static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) |
| 691 | { |
| 692 | unsigned int queue_nr; |
| 693 | u64 ts; |
| 694 | int ret; |
| 695 | |
| 696 | while (1) { |
| 697 | struct auxtrace_queue *queue; |
| 698 | struct s390_cpumsf_queue *sfq; |
| 699 | |
| 700 | if (!sf->heap.heap_cnt) |
| 701 | return 0; |
| 702 | |
| 703 | if (sf->heap.heap_array[0].ordinal >= timestamp) |
| 704 | return 0; |
| 705 | |
| 706 | queue_nr = sf->heap.heap_array[0].queue_nr; |
| 707 | queue = &sf->queues.queue_array[queue_nr]; |
| 708 | sfq = queue->priv; |
| 709 | |
| 710 | auxtrace_heap__pop(&sf->heap); |
| 711 | if (sf->heap.heap_cnt) { |
| 712 | ts = sf->heap.heap_array[0].ordinal + 1; |
| 713 | if (ts > timestamp) |
| 714 | ts = timestamp; |
| 715 | } else { |
| 716 | ts = timestamp; |
| 717 | } |
| 718 | |
| 719 | ret = s390_cpumsf_run_decoder(sfq, &ts); |
| 720 | if (ret < 0) { |
| 721 | auxtrace_heap__add(&sf->heap, queue_nr, ts); |
| 722 | return ret; |
| 723 | } |
| 724 | if (!ret) { |
| 725 | ret = auxtrace_heap__add(&sf->heap, queue_nr, ts); |
| 726 | if (ret < 0) |
| 727 | return ret; |
| 728 | } |
| 729 | } |
| 730 | return 0; |
| 731 | } |
| 732 | |
| 733 | static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, |
| 734 | pid_t pid, pid_t tid, u64 ip) |
| 735 | { |
| 736 | char msg[MAX_AUXTRACE_ERROR_MSG]; |
| 737 | union perf_event event; |
| 738 | int err; |
| 739 | |
| 740 | strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); |
| 741 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, |
| 742 | code, cpu, pid, tid, ip, msg); |
| 743 | |
| 744 | err = perf_session__deliver_synth_event(sf->session, &event, NULL); |
| 745 | if (err) |
| 746 | pr_err("s390 Auxiliary Trace: failed to deliver error event," |
| 747 | "error %d\n", err); |
| 748 | return err; |
| 749 | } |
| 750 | |
| 751 | static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) |
| 752 | { |
| 753 | return s390_cpumsf_synth_error(sf, 1, sample->cpu, |
| 754 | sample->pid, sample->tid, 0); |
| 755 | } |
| 756 | |
| 757 | static int |
| 758 | s390_cpumsf_process_event(struct perf_session *session __maybe_unused, |
| 759 | union perf_event *event, |
| 760 | struct perf_sample *sample, |
| 761 | struct perf_tool *tool) |
| 762 | { |
| 763 | struct s390_cpumsf *sf = container_of(session->auxtrace, |
| 764 | struct s390_cpumsf, |
| 765 | auxtrace); |
| 766 | u64 timestamp = sample->time; |
| 767 | int err = 0; |
| 768 | |
| 769 | if (dump_trace) |
| 770 | return 0; |
| 771 | |
| 772 | if (!tool->ordered_events) { |
| 773 | pr_err("s390 Auxiliary Trace requires ordered events\n"); |
| 774 | return -EINVAL; |
| 775 | } |
| 776 | |
| 777 | if (event->header.type == PERF_RECORD_AUX && |
| 778 | event->aux.flags & PERF_AUX_FLAG_TRUNCATED) |
| 779 | return s390_cpumsf_lost(sf, sample); |
| 780 | |
| 781 | if (timestamp) { |
| 782 | err = s390_cpumsf_update_queues(sf, timestamp); |
| 783 | if (!err) |
| 784 | err = s390_cpumsf_process_queues(sf, timestamp); |
| 785 | } |
| 786 | return err; |
| 787 | } |
| 788 | |
| 789 | struct s390_cpumsf_synth { |
| 790 | struct perf_tool cpumsf_tool; |
| 791 | struct perf_session *session; |
| 792 | }; |
| 793 | |
| 794 | static int |
| 795 | s390_cpumsf_process_auxtrace_event(struct perf_session *session, |
| 796 | union perf_event *event __maybe_unused, |
| 797 | struct perf_tool *tool __maybe_unused) |
| 798 | { |
| 799 | struct s390_cpumsf *sf = container_of(session->auxtrace, |
| 800 | struct s390_cpumsf, |
| 801 | auxtrace); |
| 802 | |
| 803 | int fd = perf_data__fd(session->data); |
| 804 | struct auxtrace_buffer *buffer; |
| 805 | off_t data_offset; |
| 806 | int err; |
| 807 | |
| 808 | if (sf->data_queued) |
| 809 | return 0; |
| 810 | |
| 811 | if (perf_data__is_pipe(session->data)) { |
| 812 | data_offset = 0; |
| 813 | } else { |
| 814 | data_offset = lseek(fd, 0, SEEK_CUR); |
| 815 | if (data_offset == -1) |
| 816 | return -errno; |
| 817 | } |
| 818 | |
| 819 | err = auxtrace_queues__add_event(&sf->queues, session, event, |
| 820 | data_offset, &buffer); |
| 821 | if (err) |
| 822 | return err; |
| 823 | |
| 824 | /* Dump here after copying piped trace out of the pipe */ |
| 825 | if (dump_trace) { |
| 826 | if (auxtrace_buffer__get_data(buffer, fd)) { |
| 827 | s390_cpumsf_dump_event(sf, buffer->data, |
| 828 | buffer->size); |
| 829 | auxtrace_buffer__put_data(buffer); |
| 830 | } |
| 831 | } |
| 832 | return 0; |
| 833 | } |
| 834 | |
| 835 | static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) |
| 836 | { |
| 837 | } |
| 838 | |
| 839 | static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, |
| 840 | struct perf_tool *tool __maybe_unused) |
| 841 | { |
| 842 | return 0; |
| 843 | } |
| 844 | |
| 845 | static void s390_cpumsf_free_queues(struct perf_session *session) |
| 846 | { |
| 847 | struct s390_cpumsf *sf = container_of(session->auxtrace, |
| 848 | struct s390_cpumsf, |
| 849 | auxtrace); |
| 850 | struct auxtrace_queues *queues = &sf->queues; |
| 851 | unsigned int i; |
| 852 | |
| 853 | for (i = 0; i < queues->nr_queues; i++) |
| 854 | zfree(&queues->queue_array[i].priv); |
| 855 | auxtrace_queues__free(queues); |
| 856 | } |
| 857 | |
| 858 | static void s390_cpumsf_free(struct perf_session *session) |
| 859 | { |
| 860 | struct s390_cpumsf *sf = container_of(session->auxtrace, |
| 861 | struct s390_cpumsf, |
| 862 | auxtrace); |
| 863 | |
| 864 | auxtrace_heap__free(&sf->heap); |
| 865 | s390_cpumsf_free_queues(session); |
| 866 | session->auxtrace = NULL; |
| 867 | free(sf); |
| 868 | } |
| 869 | |
| 870 | static int s390_cpumsf_get_type(const char *cpuid) |
| 871 | { |
| 872 | int ret, family = 0; |
| 873 | |
| 874 | ret = sscanf(cpuid, "%*[^,],%u", &family); |
| 875 | return (ret == 1) ? family : 0; |
| 876 | } |
| 877 | |
| 878 | /* Check itrace options set on perf report command. |
| 879 | * Return true, if none are set or all options specified can be |
| 880 | * handled on s390. |
| 881 | * Return false otherwise. |
| 882 | */ |
| 883 | static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) |
| 884 | { |
| 885 | if (!itops || !itops->set) |
| 886 | return true; |
| 887 | pr_err("No --itrace options supported\n"); |
| 888 | return false; |
| 889 | } |
| 890 | |
| 891 | int s390_cpumsf_process_auxtrace_info(union perf_event *event, |
| 892 | struct perf_session *session) |
| 893 | { |
| 894 | struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; |
| 895 | struct s390_cpumsf *sf; |
| 896 | int err; |
| 897 | |
| 898 | if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event)) |
| 899 | return -EINVAL; |
| 900 | |
| 901 | sf = zalloc(sizeof(struct s390_cpumsf)); |
| 902 | if (sf == NULL) |
| 903 | return -ENOMEM; |
| 904 | |
| 905 | if (!check_auxtrace_itrace(session->itrace_synth_opts)) { |
| 906 | err = -EINVAL; |
| 907 | goto err_free; |
| 908 | } |
| 909 | |
| 910 | err = auxtrace_queues__init(&sf->queues); |
| 911 | if (err) |
| 912 | goto err_free; |
| 913 | |
| 914 | sf->session = session; |
| 915 | sf->machine = &session->machines.host; /* No kvm support */ |
| 916 | sf->auxtrace_type = auxtrace_info->type; |
| 917 | sf->pmu_type = PERF_TYPE_RAW; |
| 918 | sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); |
| 919 | |
| 920 | sf->auxtrace.process_event = s390_cpumsf_process_event; |
| 921 | sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; |
| 922 | sf->auxtrace.flush_events = s390_cpumsf_flush; |
| 923 | sf->auxtrace.free_events = s390_cpumsf_free_events; |
| 924 | sf->auxtrace.free = s390_cpumsf_free; |
| 925 | session->auxtrace = &sf->auxtrace; |
| 926 | |
| 927 | if (dump_trace) |
| 928 | return 0; |
| 929 | |
| 930 | err = auxtrace_queues__process_index(&sf->queues, session); |
| 931 | if (err) |
| 932 | goto err_free_queues; |
| 933 | |
| 934 | if (sf->queues.populated) |
| 935 | sf->data_queued = true; |
| 936 | |
| 937 | return 0; |
| 938 | |
| 939 | err_free_queues: |
| 940 | auxtrace_queues__free(&sf->queues); |
| 941 | session->auxtrace = NULL; |
| 942 | err_free: |
| 943 | free(sf); |
| 944 | return err; |
| 945 | } |