David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 2 | /* Copyright (c) 2016 Facebook |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 3 | */ |
| 4 | #include <linux/bpf.h> |
| 5 | #include <linux/jhash.h> |
| 6 | #include <linux/filter.h> |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 7 | #include <linux/kernel.h> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 8 | #include <linux/stacktrace.h> |
| 9 | #include <linux/perf_event.h> |
| 10 | #include <linux/elf.h> |
| 11 | #include <linux/pagemap.h> |
| 12 | #include <linux/irq_work.h> |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 13 | #include <linux/btf_ids.h> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 14 | #include "percpu_freelist.h" |
| 15 | |
| 16 | #define STACK_CREATE_FLAG_MASK \ |
| 17 | (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ |
| 18 | BPF_F_STACK_BUILD_ID) |
| 19 | |
| 20 | struct stack_map_bucket { |
| 21 | struct pcpu_freelist_node fnode; |
| 22 | u32 hash; |
| 23 | u32 nr; |
| 24 | u64 data[]; |
| 25 | }; |
| 26 | |
| 27 | struct bpf_stack_map { |
| 28 | struct bpf_map map; |
| 29 | void *elems; |
| 30 | struct pcpu_freelist freelist; |
| 31 | u32 n_buckets; |
| 32 | struct stack_map_bucket *buckets[]; |
| 33 | }; |
| 34 | |
| 35 | /* irq_work to run up_read() for build_id lookup in nmi context */ |
| 36 | struct stack_map_irq_work { |
| 37 | struct irq_work irq_work; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 38 | struct mm_struct *mm; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 39 | }; |
| 40 | |
| 41 | static void do_up_read(struct irq_work *entry) |
| 42 | { |
| 43 | struct stack_map_irq_work *work; |
| 44 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 45 | if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) |
| 46 | return; |
| 47 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 48 | work = container_of(entry, struct stack_map_irq_work, irq_work); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 49 | mmap_read_unlock_non_owner(work->mm); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 50 | } |
| 51 | |
| 52 | static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work); |
| 53 | |
| 54 | static inline bool stack_map_use_build_id(struct bpf_map *map) |
| 55 | { |
| 56 | return (map->map_flags & BPF_F_STACK_BUILD_ID); |
| 57 | } |
| 58 | |
| 59 | static inline int stack_map_data_size(struct bpf_map *map) |
| 60 | { |
| 61 | return stack_map_use_build_id(map) ? |
| 62 | sizeof(struct bpf_stack_build_id) : sizeof(u64); |
| 63 | } |
| 64 | |
| 65 | static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) |
| 66 | { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 67 | u64 elem_size = sizeof(struct stack_map_bucket) + |
| 68 | (u64)smap->map.value_size; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 69 | int err; |
| 70 | |
| 71 | smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, |
| 72 | smap->map.numa_node); |
| 73 | if (!smap->elems) |
| 74 | return -ENOMEM; |
| 75 | |
| 76 | err = pcpu_freelist_init(&smap->freelist); |
| 77 | if (err) |
| 78 | goto free_elems; |
| 79 | |
| 80 | pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, |
| 81 | smap->map.max_entries); |
| 82 | return 0; |
| 83 | |
| 84 | free_elems: |
| 85 | bpf_map_area_free(smap->elems); |
| 86 | return err; |
| 87 | } |
| 88 | |
| 89 | /* Called from syscall */ |
| 90 | static struct bpf_map *stack_map_alloc(union bpf_attr *attr) |
| 91 | { |
| 92 | u32 value_size = attr->value_size; |
| 93 | struct bpf_stack_map *smap; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 94 | struct bpf_map_memory mem; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 95 | u64 cost, n_buckets; |
| 96 | int err; |
| 97 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 98 | if (!bpf_capable()) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 99 | return ERR_PTR(-EPERM); |
| 100 | |
| 101 | if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) |
| 102 | return ERR_PTR(-EINVAL); |
| 103 | |
| 104 | /* check sanity of attributes */ |
| 105 | if (attr->max_entries == 0 || attr->key_size != 4 || |
| 106 | value_size < 8 || value_size % 8) |
| 107 | return ERR_PTR(-EINVAL); |
| 108 | |
| 109 | BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); |
| 110 | if (attr->map_flags & BPF_F_STACK_BUILD_ID) { |
| 111 | if (value_size % sizeof(struct bpf_stack_build_id) || |
| 112 | value_size / sizeof(struct bpf_stack_build_id) |
| 113 | > sysctl_perf_event_max_stack) |
| 114 | return ERR_PTR(-EINVAL); |
| 115 | } else if (value_size / 8 > sysctl_perf_event_max_stack) |
| 116 | return ERR_PTR(-EINVAL); |
| 117 | |
| 118 | /* hash table size must be power of 2 */ |
| 119 | n_buckets = roundup_pow_of_two(attr->max_entries); |
Olivier Deprez | 0e64123 | 2021-09-23 10:07:05 +0200 | [diff] [blame] | 120 | if (!n_buckets) |
| 121 | return ERR_PTR(-E2BIG); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 122 | |
| 123 | cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 124 | err = bpf_map_charge_init(&mem, cost + attr->max_entries * |
| 125 | (sizeof(struct stack_map_bucket) + (u64)value_size)); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 126 | if (err) |
| 127 | return ERR_PTR(err); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 128 | |
| 129 | smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 130 | if (!smap) { |
| 131 | bpf_map_charge_finish(&mem); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 132 | return ERR_PTR(-ENOMEM); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 133 | } |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 134 | |
| 135 | bpf_map_init_from_attr(&smap->map, attr); |
| 136 | smap->map.value_size = value_size; |
| 137 | smap->n_buckets = n_buckets; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 138 | |
| 139 | err = get_callchain_buffers(sysctl_perf_event_max_stack); |
| 140 | if (err) |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 141 | goto free_charge; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 142 | |
| 143 | err = prealloc_elems_and_freelist(smap); |
| 144 | if (err) |
| 145 | goto put_buffers; |
| 146 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 147 | bpf_map_charge_move(&smap->map.memory, &mem); |
| 148 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 149 | return &smap->map; |
| 150 | |
| 151 | put_buffers: |
| 152 | put_callchain_buffers(); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 153 | free_charge: |
| 154 | bpf_map_charge_finish(&mem); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 155 | bpf_map_area_free(smap); |
| 156 | return ERR_PTR(err); |
| 157 | } |
| 158 | |
| 159 | #define BPF_BUILD_ID 3 |
| 160 | /* |
| 161 | * Parse build id from the note segment. This logic can be shared between |
| 162 | * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are |
| 163 | * identical. |
| 164 | */ |
| 165 | static inline int stack_map_parse_build_id(void *page_addr, |
| 166 | unsigned char *build_id, |
| 167 | void *note_start, |
| 168 | Elf32_Word note_size) |
| 169 | { |
| 170 | Elf32_Word note_offs = 0, new_offs; |
| 171 | |
| 172 | /* check for overflow */ |
| 173 | if (note_start < page_addr || note_start + note_size < note_start) |
| 174 | return -EINVAL; |
| 175 | |
| 176 | /* only supports note that fits in the first page */ |
| 177 | if (note_start + note_size > page_addr + PAGE_SIZE) |
| 178 | return -EINVAL; |
| 179 | |
| 180 | while (note_offs + sizeof(Elf32_Nhdr) < note_size) { |
| 181 | Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); |
| 182 | |
| 183 | if (nhdr->n_type == BPF_BUILD_ID && |
| 184 | nhdr->n_namesz == sizeof("GNU") && |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 185 | nhdr->n_descsz > 0 && |
| 186 | nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 187 | memcpy(build_id, |
| 188 | note_start + note_offs + |
| 189 | ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 190 | nhdr->n_descsz); |
| 191 | memset(build_id + nhdr->n_descsz, 0, |
| 192 | BPF_BUILD_ID_SIZE - nhdr->n_descsz); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 193 | return 0; |
| 194 | } |
| 195 | new_offs = note_offs + sizeof(Elf32_Nhdr) + |
| 196 | ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); |
| 197 | if (new_offs <= note_offs) /* overflow */ |
| 198 | break; |
| 199 | note_offs = new_offs; |
| 200 | } |
| 201 | return -EINVAL; |
| 202 | } |
| 203 | |
| 204 | /* Parse build ID from 32-bit ELF */ |
| 205 | static int stack_map_get_build_id_32(void *page_addr, |
| 206 | unsigned char *build_id) |
| 207 | { |
| 208 | Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr; |
| 209 | Elf32_Phdr *phdr; |
| 210 | int i; |
| 211 | |
| 212 | /* only supports phdr that fits in one page */ |
| 213 | if (ehdr->e_phnum > |
| 214 | (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr)) |
| 215 | return -EINVAL; |
| 216 | |
| 217 | phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr)); |
| 218 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 219 | for (i = 0; i < ehdr->e_phnum; ++i) { |
| 220 | if (phdr[i].p_type == PT_NOTE && |
| 221 | !stack_map_parse_build_id(page_addr, build_id, |
| 222 | page_addr + phdr[i].p_offset, |
| 223 | phdr[i].p_filesz)) |
| 224 | return 0; |
| 225 | } |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 226 | return -EINVAL; |
| 227 | } |
| 228 | |
| 229 | /* Parse build ID from 64-bit ELF */ |
| 230 | static int stack_map_get_build_id_64(void *page_addr, |
| 231 | unsigned char *build_id) |
| 232 | { |
| 233 | Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr; |
| 234 | Elf64_Phdr *phdr; |
| 235 | int i; |
| 236 | |
| 237 | /* only supports phdr that fits in one page */ |
| 238 | if (ehdr->e_phnum > |
| 239 | (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr)) |
| 240 | return -EINVAL; |
| 241 | |
| 242 | phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr)); |
| 243 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 244 | for (i = 0; i < ehdr->e_phnum; ++i) { |
| 245 | if (phdr[i].p_type == PT_NOTE && |
| 246 | !stack_map_parse_build_id(page_addr, build_id, |
| 247 | page_addr + phdr[i].p_offset, |
| 248 | phdr[i].p_filesz)) |
| 249 | return 0; |
| 250 | } |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 251 | return -EINVAL; |
| 252 | } |
| 253 | |
| 254 | /* Parse build ID of ELF file mapped to vma */ |
| 255 | static int stack_map_get_build_id(struct vm_area_struct *vma, |
| 256 | unsigned char *build_id) |
| 257 | { |
| 258 | Elf32_Ehdr *ehdr; |
| 259 | struct page *page; |
| 260 | void *page_addr; |
| 261 | int ret; |
| 262 | |
| 263 | /* only works for page backed storage */ |
| 264 | if (!vma->vm_file) |
| 265 | return -EINVAL; |
| 266 | |
| 267 | page = find_get_page(vma->vm_file->f_mapping, 0); |
| 268 | if (!page) |
| 269 | return -EFAULT; /* page not mapped */ |
| 270 | |
| 271 | ret = -EINVAL; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 272 | page_addr = kmap_atomic(page); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 273 | ehdr = (Elf32_Ehdr *)page_addr; |
| 274 | |
| 275 | /* compare magic x7f "ELF" */ |
| 276 | if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) |
| 277 | goto out; |
| 278 | |
| 279 | /* only support executable file and shared object file */ |
| 280 | if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) |
| 281 | goto out; |
| 282 | |
| 283 | if (ehdr->e_ident[EI_CLASS] == ELFCLASS32) |
| 284 | ret = stack_map_get_build_id_32(page_addr, build_id); |
| 285 | else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) |
| 286 | ret = stack_map_get_build_id_64(page_addr, build_id); |
| 287 | out: |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 288 | kunmap_atomic(page_addr); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 289 | put_page(page); |
| 290 | return ret; |
| 291 | } |
| 292 | |
| 293 | static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, |
| 294 | u64 *ips, u32 trace_nr, bool user) |
| 295 | { |
| 296 | int i; |
| 297 | struct vm_area_struct *vma; |
| 298 | bool irq_work_busy = false; |
| 299 | struct stack_map_irq_work *work = NULL; |
| 300 | |
Olivier Deprez | 0e64123 | 2021-09-23 10:07:05 +0200 | [diff] [blame] | 301 | if (irqs_disabled()) { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 302 | if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { |
| 303 | work = this_cpu_ptr(&up_read_work); |
| 304 | if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) { |
| 305 | /* cannot queue more up_read, fallback */ |
| 306 | irq_work_busy = true; |
| 307 | } |
| 308 | } else { |
| 309 | /* |
| 310 | * PREEMPT_RT does not allow to trylock mmap sem in |
| 311 | * interrupt disabled context. Force the fallback code. |
| 312 | */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 313 | irq_work_busy = true; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 314 | } |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 315 | } |
| 316 | |
| 317 | /* |
Olivier Deprez | 0e64123 | 2021-09-23 10:07:05 +0200 | [diff] [blame] | 318 | * We cannot do up_read() when the irq is disabled, because of |
| 319 | * risk to deadlock with rq_lock. To do build_id lookup when the |
| 320 | * irqs are disabled, we need to run up_read() in irq_work. We use |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 321 | * a percpu variable to do the irq_work. If the irq_work is |
| 322 | * already used by another lookup, we fall back to report ips. |
| 323 | * |
| 324 | * Same fallback is used for kernel stack (!user) on a stackmap |
| 325 | * with build_id. |
| 326 | */ |
| 327 | if (!user || !current || !current->mm || irq_work_busy || |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 328 | !mmap_read_trylock_non_owner(current->mm)) { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 329 | /* cannot access current->mm, fall back to ips */ |
| 330 | for (i = 0; i < trace_nr; i++) { |
| 331 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; |
| 332 | id_offs[i].ip = ips[i]; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 333 | memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 334 | } |
| 335 | return; |
| 336 | } |
| 337 | |
| 338 | for (i = 0; i < trace_nr; i++) { |
| 339 | vma = find_vma(current->mm, ips[i]); |
| 340 | if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) { |
| 341 | /* per entry fall back to ips */ |
| 342 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; |
| 343 | id_offs[i].ip = ips[i]; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 344 | memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 345 | continue; |
| 346 | } |
| 347 | id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] |
| 348 | - vma->vm_start; |
| 349 | id_offs[i].status = BPF_STACK_BUILD_ID_VALID; |
| 350 | } |
| 351 | |
| 352 | if (!work) { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 353 | mmap_read_unlock_non_owner(current->mm); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 354 | } else { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 355 | work->mm = current->mm; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 356 | irq_work_queue(&work->irq_work); |
| 357 | } |
| 358 | } |
| 359 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 360 | static struct perf_callchain_entry * |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 361 | get_callchain_entry_for_task(struct task_struct *task, u32 max_depth) |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 362 | { |
| 363 | #ifdef CONFIG_STACKTRACE |
| 364 | struct perf_callchain_entry *entry; |
| 365 | int rctx; |
| 366 | |
| 367 | entry = get_callchain_entry(&rctx); |
| 368 | |
| 369 | if (!entry) |
| 370 | return NULL; |
| 371 | |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 372 | entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip, |
| 373 | max_depth, 0); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 374 | |
| 375 | /* stack_trace_save_tsk() works on unsigned long array, while |
| 376 | * perf_callchain_entry uses u64 array. For 32-bit systems, it is |
| 377 | * necessary to fix this mismatch. |
| 378 | */ |
| 379 | if (__BITS_PER_LONG != 64) { |
| 380 | unsigned long *from = (unsigned long *) entry->ip; |
| 381 | u64 *to = entry->ip; |
| 382 | int i; |
| 383 | |
| 384 | /* copy data from the end to avoid using extra buffer */ |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 385 | for (i = entry->nr - 1; i >= 0; i--) |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 386 | to[i] = (u64)(from[i]); |
| 387 | } |
| 388 | |
| 389 | put_callchain_entry(rctx); |
| 390 | |
| 391 | return entry; |
| 392 | #else /* CONFIG_STACKTRACE */ |
| 393 | return NULL; |
| 394 | #endif |
| 395 | } |
| 396 | |
| 397 | static long __bpf_get_stackid(struct bpf_map *map, |
| 398 | struct perf_callchain_entry *trace, u64 flags) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 399 | { |
| 400 | struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 401 | struct stack_map_bucket *bucket, *new_bucket, *old_bucket; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 402 | u32 skip = flags & BPF_F_SKIP_FIELD_MASK; |
| 403 | u32 hash, id, trace_nr, trace_len; |
| 404 | bool user = flags & BPF_F_USER_STACK; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 405 | u64 *ips; |
| 406 | bool hash_matches; |
| 407 | |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 408 | if (trace->nr <= skip) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 409 | /* skipping more than usable stack trace */ |
| 410 | return -EFAULT; |
| 411 | |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 412 | trace_nr = trace->nr - skip; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 413 | trace_len = trace_nr * sizeof(u64); |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 414 | ips = trace->ip + skip; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 415 | hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); |
| 416 | id = hash & (smap->n_buckets - 1); |
| 417 | bucket = READ_ONCE(smap->buckets[id]); |
| 418 | |
| 419 | hash_matches = bucket && bucket->hash == hash; |
| 420 | /* fast cmp */ |
| 421 | if (hash_matches && flags & BPF_F_FAST_STACK_CMP) |
| 422 | return id; |
| 423 | |
| 424 | if (stack_map_use_build_id(map)) { |
| 425 | /* for build_id+offset, pop a bucket before slow cmp */ |
| 426 | new_bucket = (struct stack_map_bucket *) |
| 427 | pcpu_freelist_pop(&smap->freelist); |
| 428 | if (unlikely(!new_bucket)) |
| 429 | return -ENOMEM; |
| 430 | new_bucket->nr = trace_nr; |
| 431 | stack_map_get_build_id_offset( |
| 432 | (struct bpf_stack_build_id *)new_bucket->data, |
| 433 | ips, trace_nr, user); |
| 434 | trace_len = trace_nr * sizeof(struct bpf_stack_build_id); |
| 435 | if (hash_matches && bucket->nr == trace_nr && |
| 436 | memcmp(bucket->data, new_bucket->data, trace_len) == 0) { |
| 437 | pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); |
| 438 | return id; |
| 439 | } |
| 440 | if (bucket && !(flags & BPF_F_REUSE_STACKID)) { |
| 441 | pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); |
| 442 | return -EEXIST; |
| 443 | } |
| 444 | } else { |
| 445 | if (hash_matches && bucket->nr == trace_nr && |
| 446 | memcmp(bucket->data, ips, trace_len) == 0) |
| 447 | return id; |
| 448 | if (bucket && !(flags & BPF_F_REUSE_STACKID)) |
| 449 | return -EEXIST; |
| 450 | |
| 451 | new_bucket = (struct stack_map_bucket *) |
| 452 | pcpu_freelist_pop(&smap->freelist); |
| 453 | if (unlikely(!new_bucket)) |
| 454 | return -ENOMEM; |
| 455 | memcpy(new_bucket->data, ips, trace_len); |
| 456 | } |
| 457 | |
| 458 | new_bucket->hash = hash; |
| 459 | new_bucket->nr = trace_nr; |
| 460 | |
| 461 | old_bucket = xchg(&smap->buckets[id], new_bucket); |
| 462 | if (old_bucket) |
| 463 | pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); |
| 464 | return id; |
| 465 | } |
| 466 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 467 | BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, |
| 468 | u64, flags) |
| 469 | { |
| 470 | u32 max_depth = map->value_size / stack_map_data_size(map); |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 471 | u32 skip = flags & BPF_F_SKIP_FIELD_MASK; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 472 | bool user = flags & BPF_F_USER_STACK; |
| 473 | struct perf_callchain_entry *trace; |
| 474 | bool kernel = !user; |
| 475 | |
| 476 | if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | |
| 477 | BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) |
| 478 | return -EINVAL; |
| 479 | |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 480 | max_depth += skip; |
| 481 | if (max_depth > sysctl_perf_event_max_stack) |
| 482 | max_depth = sysctl_perf_event_max_stack; |
| 483 | |
| 484 | trace = get_perf_callchain(regs, 0, kernel, user, max_depth, |
| 485 | false, false); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 486 | |
| 487 | if (unlikely(!trace)) |
| 488 | /* couldn't fetch the stack trace */ |
| 489 | return -EFAULT; |
| 490 | |
| 491 | return __bpf_get_stackid(map, trace, flags); |
| 492 | } |
| 493 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 494 | const struct bpf_func_proto bpf_get_stackid_proto = { |
| 495 | .func = bpf_get_stackid, |
| 496 | .gpl_only = true, |
| 497 | .ret_type = RET_INTEGER, |
| 498 | .arg1_type = ARG_PTR_TO_CTX, |
| 499 | .arg2_type = ARG_CONST_MAP_PTR, |
| 500 | .arg3_type = ARG_ANYTHING, |
| 501 | }; |
| 502 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 503 | static __u64 count_kernel_ip(struct perf_callchain_entry *trace) |
| 504 | { |
| 505 | __u64 nr_kernel = 0; |
| 506 | |
| 507 | while (nr_kernel < trace->nr) { |
| 508 | if (trace->ip[nr_kernel] == PERF_CONTEXT_USER) |
| 509 | break; |
| 510 | nr_kernel++; |
| 511 | } |
| 512 | return nr_kernel; |
| 513 | } |
| 514 | |
| 515 | BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, |
| 516 | struct bpf_map *, map, u64, flags) |
| 517 | { |
| 518 | struct perf_event *event = ctx->event; |
| 519 | struct perf_callchain_entry *trace; |
| 520 | bool kernel, user; |
| 521 | __u64 nr_kernel; |
| 522 | int ret; |
| 523 | |
| 524 | /* perf_sample_data doesn't have callchain, use bpf_get_stackid */ |
| 525 | if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) |
| 526 | return bpf_get_stackid((unsigned long)(ctx->regs), |
| 527 | (unsigned long) map, flags, 0, 0); |
| 528 | |
| 529 | if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | |
| 530 | BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) |
| 531 | return -EINVAL; |
| 532 | |
| 533 | user = flags & BPF_F_USER_STACK; |
| 534 | kernel = !user; |
| 535 | |
| 536 | trace = ctx->data->callchain; |
| 537 | if (unlikely(!trace)) |
| 538 | return -EFAULT; |
| 539 | |
| 540 | nr_kernel = count_kernel_ip(trace); |
| 541 | |
| 542 | if (kernel) { |
| 543 | __u64 nr = trace->nr; |
| 544 | |
| 545 | trace->nr = nr_kernel; |
| 546 | ret = __bpf_get_stackid(map, trace, flags); |
| 547 | |
| 548 | /* restore nr */ |
| 549 | trace->nr = nr; |
| 550 | } else { /* user */ |
| 551 | u64 skip = flags & BPF_F_SKIP_FIELD_MASK; |
| 552 | |
| 553 | skip += nr_kernel; |
| 554 | if (skip > BPF_F_SKIP_FIELD_MASK) |
| 555 | return -EFAULT; |
| 556 | |
| 557 | flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; |
| 558 | ret = __bpf_get_stackid(map, trace, flags); |
| 559 | } |
| 560 | return ret; |
| 561 | } |
| 562 | |
| 563 | const struct bpf_func_proto bpf_get_stackid_proto_pe = { |
| 564 | .func = bpf_get_stackid_pe, |
| 565 | .gpl_only = false, |
| 566 | .ret_type = RET_INTEGER, |
| 567 | .arg1_type = ARG_PTR_TO_CTX, |
| 568 | .arg2_type = ARG_CONST_MAP_PTR, |
| 569 | .arg3_type = ARG_ANYTHING, |
| 570 | }; |
| 571 | |
| 572 | static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, |
| 573 | struct perf_callchain_entry *trace_in, |
| 574 | void *buf, u32 size, u64 flags) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 575 | { |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 576 | u32 trace_nr, copy_len, elem_size, num_elem, max_depth; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 577 | bool user_build_id = flags & BPF_F_USER_BUILD_ID; |
| 578 | u32 skip = flags & BPF_F_SKIP_FIELD_MASK; |
| 579 | bool user = flags & BPF_F_USER_STACK; |
| 580 | struct perf_callchain_entry *trace; |
| 581 | bool kernel = !user; |
| 582 | int err = -EINVAL; |
| 583 | u64 *ips; |
| 584 | |
| 585 | if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | |
| 586 | BPF_F_USER_BUILD_ID))) |
| 587 | goto clear; |
| 588 | if (kernel && user_build_id) |
| 589 | goto clear; |
| 590 | |
| 591 | elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) |
| 592 | : sizeof(u64); |
| 593 | if (unlikely(size % elem_size)) |
| 594 | goto clear; |
| 595 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 596 | /* cannot get valid user stack for task without user_mode regs */ |
| 597 | if (task && user && !user_mode(regs)) |
| 598 | goto err_fault; |
| 599 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 600 | num_elem = size / elem_size; |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 601 | max_depth = num_elem + skip; |
| 602 | if (sysctl_perf_event_max_stack < max_depth) |
| 603 | max_depth = sysctl_perf_event_max_stack; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 604 | |
| 605 | if (trace_in) |
| 606 | trace = trace_in; |
| 607 | else if (kernel && task) |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 608 | trace = get_callchain_entry_for_task(task, max_depth); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 609 | else |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 610 | trace = get_perf_callchain(regs, 0, kernel, user, max_depth, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 611 | false, false); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 612 | if (unlikely(!trace)) |
| 613 | goto err_fault; |
| 614 | |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 615 | if (trace->nr < skip) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 616 | goto err_fault; |
| 617 | |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 618 | trace_nr = trace->nr - skip; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 619 | trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; |
| 620 | copy_len = trace_nr * elem_size; |
Olivier Deprez | 92d4c21 | 2022-12-06 15:05:30 +0100 | [diff] [blame] | 621 | |
| 622 | ips = trace->ip + skip; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 623 | if (user && user_build_id) |
| 624 | stack_map_get_build_id_offset(buf, ips, trace_nr, user); |
| 625 | else |
| 626 | memcpy(buf, ips, copy_len); |
| 627 | |
| 628 | if (size > copy_len) |
| 629 | memset(buf + copy_len, 0, size - copy_len); |
| 630 | return copy_len; |
| 631 | |
| 632 | err_fault: |
| 633 | err = -EFAULT; |
| 634 | clear: |
| 635 | memset(buf, 0, size); |
| 636 | return err; |
| 637 | } |
| 638 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 639 | BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, |
| 640 | u64, flags) |
| 641 | { |
| 642 | return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); |
| 643 | } |
| 644 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 645 | const struct bpf_func_proto bpf_get_stack_proto = { |
| 646 | .func = bpf_get_stack, |
| 647 | .gpl_only = true, |
| 648 | .ret_type = RET_INTEGER, |
| 649 | .arg1_type = ARG_PTR_TO_CTX, |
| 650 | .arg2_type = ARG_PTR_TO_UNINIT_MEM, |
| 651 | .arg3_type = ARG_CONST_SIZE_OR_ZERO, |
| 652 | .arg4_type = ARG_ANYTHING, |
| 653 | }; |
| 654 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 655 | BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, |
| 656 | u32, size, u64, flags) |
| 657 | { |
| 658 | struct pt_regs *regs; |
| 659 | long res = -EINVAL; |
| 660 | |
| 661 | if (!try_get_task_stack(task)) |
| 662 | return -EFAULT; |
| 663 | |
| 664 | regs = task_pt_regs(task); |
| 665 | if (regs) |
| 666 | res = __bpf_get_stack(regs, task, NULL, buf, size, flags); |
| 667 | put_task_stack(task); |
| 668 | |
| 669 | return res; |
| 670 | } |
| 671 | |
| 672 | BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct) |
| 673 | |
| 674 | const struct bpf_func_proto bpf_get_task_stack_proto = { |
| 675 | .func = bpf_get_task_stack, |
| 676 | .gpl_only = false, |
| 677 | .ret_type = RET_INTEGER, |
| 678 | .arg1_type = ARG_PTR_TO_BTF_ID, |
| 679 | .arg1_btf_id = &bpf_get_task_stack_btf_ids[0], |
| 680 | .arg2_type = ARG_PTR_TO_UNINIT_MEM, |
| 681 | .arg3_type = ARG_CONST_SIZE_OR_ZERO, |
| 682 | .arg4_type = ARG_ANYTHING, |
| 683 | }; |
| 684 | |
| 685 | BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, |
| 686 | void *, buf, u32, size, u64, flags) |
| 687 | { |
| 688 | struct pt_regs *regs = (struct pt_regs *)(ctx->regs); |
| 689 | struct perf_event *event = ctx->event; |
| 690 | struct perf_callchain_entry *trace; |
| 691 | bool kernel, user; |
| 692 | int err = -EINVAL; |
| 693 | __u64 nr_kernel; |
| 694 | |
| 695 | if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) |
| 696 | return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); |
| 697 | |
| 698 | if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | |
| 699 | BPF_F_USER_BUILD_ID))) |
| 700 | goto clear; |
| 701 | |
| 702 | user = flags & BPF_F_USER_STACK; |
| 703 | kernel = !user; |
| 704 | |
| 705 | err = -EFAULT; |
| 706 | trace = ctx->data->callchain; |
| 707 | if (unlikely(!trace)) |
| 708 | goto clear; |
| 709 | |
| 710 | nr_kernel = count_kernel_ip(trace); |
| 711 | |
| 712 | if (kernel) { |
| 713 | __u64 nr = trace->nr; |
| 714 | |
| 715 | trace->nr = nr_kernel; |
| 716 | err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); |
| 717 | |
| 718 | /* restore nr */ |
| 719 | trace->nr = nr; |
| 720 | } else { /* user */ |
| 721 | u64 skip = flags & BPF_F_SKIP_FIELD_MASK; |
| 722 | |
| 723 | skip += nr_kernel; |
| 724 | if (skip > BPF_F_SKIP_FIELD_MASK) |
| 725 | goto clear; |
| 726 | |
| 727 | flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; |
| 728 | err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); |
| 729 | } |
| 730 | return err; |
| 731 | |
| 732 | clear: |
| 733 | memset(buf, 0, size); |
| 734 | return err; |
| 735 | |
| 736 | } |
| 737 | |
| 738 | const struct bpf_func_proto bpf_get_stack_proto_pe = { |
| 739 | .func = bpf_get_stack_pe, |
| 740 | .gpl_only = true, |
| 741 | .ret_type = RET_INTEGER, |
| 742 | .arg1_type = ARG_PTR_TO_CTX, |
| 743 | .arg2_type = ARG_PTR_TO_UNINIT_MEM, |
| 744 | .arg3_type = ARG_CONST_SIZE_OR_ZERO, |
| 745 | .arg4_type = ARG_ANYTHING, |
| 746 | }; |
| 747 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 748 | /* Called from eBPF program */ |
| 749 | static void *stack_map_lookup_elem(struct bpf_map *map, void *key) |
| 750 | { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 751 | return ERR_PTR(-EOPNOTSUPP); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 752 | } |
| 753 | |
| 754 | /* Called from syscall */ |
| 755 | int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) |
| 756 | { |
| 757 | struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); |
| 758 | struct stack_map_bucket *bucket, *old_bucket; |
| 759 | u32 id = *(u32 *)key, trace_len; |
| 760 | |
| 761 | if (unlikely(id >= smap->n_buckets)) |
| 762 | return -ENOENT; |
| 763 | |
| 764 | bucket = xchg(&smap->buckets[id], NULL); |
| 765 | if (!bucket) |
| 766 | return -ENOENT; |
| 767 | |
| 768 | trace_len = bucket->nr * stack_map_data_size(map); |
| 769 | memcpy(value, bucket->data, trace_len); |
| 770 | memset(value + trace_len, 0, map->value_size - trace_len); |
| 771 | |
| 772 | old_bucket = xchg(&smap->buckets[id], bucket); |
| 773 | if (old_bucket) |
| 774 | pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); |
| 775 | return 0; |
| 776 | } |
| 777 | |
| 778 | static int stack_map_get_next_key(struct bpf_map *map, void *key, |
| 779 | void *next_key) |
| 780 | { |
| 781 | struct bpf_stack_map *smap = container_of(map, |
| 782 | struct bpf_stack_map, map); |
| 783 | u32 id; |
| 784 | |
| 785 | WARN_ON_ONCE(!rcu_read_lock_held()); |
| 786 | |
| 787 | if (!key) { |
| 788 | id = 0; |
| 789 | } else { |
| 790 | id = *(u32 *)key; |
| 791 | if (id >= smap->n_buckets || !smap->buckets[id]) |
| 792 | id = 0; |
| 793 | else |
| 794 | id++; |
| 795 | } |
| 796 | |
| 797 | while (id < smap->n_buckets && !smap->buckets[id]) |
| 798 | id++; |
| 799 | |
| 800 | if (id >= smap->n_buckets) |
| 801 | return -ENOENT; |
| 802 | |
| 803 | *(u32 *)next_key = id; |
| 804 | return 0; |
| 805 | } |
| 806 | |
| 807 | static int stack_map_update_elem(struct bpf_map *map, void *key, void *value, |
| 808 | u64 map_flags) |
| 809 | { |
| 810 | return -EINVAL; |
| 811 | } |
| 812 | |
| 813 | /* Called from syscall or from eBPF program */ |
| 814 | static int stack_map_delete_elem(struct bpf_map *map, void *key) |
| 815 | { |
| 816 | struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); |
| 817 | struct stack_map_bucket *old_bucket; |
| 818 | u32 id = *(u32 *)key; |
| 819 | |
| 820 | if (unlikely(id >= smap->n_buckets)) |
| 821 | return -E2BIG; |
| 822 | |
| 823 | old_bucket = xchg(&smap->buckets[id], NULL); |
| 824 | if (old_bucket) { |
| 825 | pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); |
| 826 | return 0; |
| 827 | } else { |
| 828 | return -ENOENT; |
| 829 | } |
| 830 | } |
| 831 | |
| 832 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ |
| 833 | static void stack_map_free(struct bpf_map *map) |
| 834 | { |
| 835 | struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); |
| 836 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 837 | bpf_map_area_free(smap->elems); |
| 838 | pcpu_freelist_destroy(&smap->freelist); |
| 839 | bpf_map_area_free(smap); |
| 840 | put_callchain_buffers(); |
| 841 | } |
| 842 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 843 | static int stack_trace_map_btf_id; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 844 | const struct bpf_map_ops stack_trace_map_ops = { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 845 | .map_meta_equal = bpf_map_meta_equal, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 846 | .map_alloc = stack_map_alloc, |
| 847 | .map_free = stack_map_free, |
| 848 | .map_get_next_key = stack_map_get_next_key, |
| 849 | .map_lookup_elem = stack_map_lookup_elem, |
| 850 | .map_update_elem = stack_map_update_elem, |
| 851 | .map_delete_elem = stack_map_delete_elem, |
| 852 | .map_check_btf = map_check_no_btf, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame] | 853 | .map_btf_name = "bpf_stack_map", |
| 854 | .map_btf_id = &stack_trace_map_btf_id, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 855 | }; |
| 856 | |
| 857 | static int __init stack_map_init(void) |
| 858 | { |
| 859 | int cpu; |
| 860 | struct stack_map_irq_work *work; |
| 861 | |
| 862 | for_each_possible_cpu(cpu) { |
| 863 | work = per_cpu_ptr(&up_read_work, cpu); |
| 864 | init_irq_work(&work->irq_work, do_up_read); |
| 865 | } |
| 866 | return 0; |
| 867 | } |
| 868 | subsys_initcall(stack_map_init); |