Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e1d9adb..b0d78bc 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := core.o
-CFLAGS_core.o += $(call cc-disable-warning, override-init)
+ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
+# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
+cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
+endif
+CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index 1b6b934..d99e89f 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -502,13 +502,14 @@
static void bpf_common_lru_push_free(struct bpf_lru *lru,
struct bpf_lru_node *node)
{
+ u8 node_type = READ_ONCE(node->type);
unsigned long flags;
- if (WARN_ON_ONCE(node->type == BPF_LRU_LIST_T_FREE) ||
- WARN_ON_ONCE(node->type == BPF_LRU_LOCAL_LIST_T_FREE))
+ if (WARN_ON_ONCE(node_type == BPF_LRU_LIST_T_FREE) ||
+ WARN_ON_ONCE(node_type == BPF_LRU_LOCAL_LIST_T_FREE))
return;
- if (node->type == BPF_LRU_LOCAL_LIST_T_PENDING) {
+ if (node_type == BPF_LRU_LOCAL_LIST_T_PENDING) {
struct bpf_lru_locallist *loc_l;
loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 29c7c06..b03087f 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2309,7 +2309,7 @@
struct_size = struct_type->size;
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
- if (struct_size - bytes_offset < sizeof(int)) {
+ if (struct_size - bytes_offset < member_type->size) {
btf_verifier_log_member(env, struct_type, member,
"Member exceeds struct_size");
return -EINVAL;
@@ -3460,7 +3460,7 @@
union bpf_attr __user *uattr)
{
struct bpf_btf_info __user *uinfo;
- struct bpf_btf_info info = {};
+ struct bpf_btf_info info;
u32 info_copy, btf_copy;
void __user *ubtf;
u32 uinfo_len;
@@ -3469,6 +3469,7 @@
uinfo_len = attr->info.info_len;
info_copy = min_t(u32, uinfo_len, sizeof(info));
+ memset(&info, 0, sizeof(info));
if (copy_from_user(&info, uinfo, info_copy))
return -EFAULT;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index a3eaf08..c2f0aa8 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -35,8 +35,8 @@
*/
static void cgroup_bpf_release(struct work_struct *work)
{
- struct cgroup *cgrp = container_of(work, struct cgroup,
- bpf.release_work);
+ struct cgroup *p, *cgrp = container_of(work, struct cgroup,
+ bpf.release_work);
enum bpf_cgroup_storage_type stype;
struct bpf_prog_array *old_array;
unsigned int type;
@@ -65,6 +65,9 @@
mutex_unlock(&cgroup_mutex);
+ for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
+ cgroup_bpf_put(p);
+
percpu_ref_exit(&cgrp->bpf.refcnt);
cgroup_put(cgrp);
}
@@ -199,6 +202,7 @@
*/
#define NR ARRAY_SIZE(cgrp->bpf.effective)
struct bpf_prog_array *arrays[NR] = {};
+ struct cgroup *p;
int ret, i;
ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
@@ -206,6 +210,9 @@
if (ret)
return ret;
+ for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
+ cgroup_bpf_get(p);
+
for (i = 0; i < NR; i++)
INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@@ -221,6 +228,9 @@
for (i = 0; i < NR; i++)
bpf_prog_array_free(arrays[i]);
+ for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
+ cgroup_bpf_put(p);
+
percpu_ref_exit(&cgrp->bpf.refcnt);
return -ENOMEM;
@@ -293,8 +303,8 @@
{
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
- *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+ struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
enum bpf_cgroup_storage_type stype;
struct bpf_prog_list *pl;
bool pl_was_allocated;
@@ -956,16 +966,23 @@
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
{
- if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
+ if (unlikely(max_optlen < 0))
return -EINVAL;
+ if (unlikely(max_optlen > PAGE_SIZE)) {
+ /* We don't expose optvals that are greater than PAGE_SIZE
+ * to the BPF program.
+ */
+ max_optlen = PAGE_SIZE;
+ }
+
ctx->optval = kzalloc(max_optlen, GFP_USER);
if (!ctx->optval)
return -ENOMEM;
ctx->optval_end = ctx->optval + max_optlen;
- return 0;
+ return max_optlen;
}
static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
@@ -999,13 +1016,13 @@
*/
max_optlen = max_t(int, 16, *optlen);
- ret = sockopt_alloc_buf(&ctx, max_optlen);
- if (ret)
- return ret;
+ max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
+ if (max_optlen < 0)
+ return max_optlen;
ctx.optlen = *optlen;
- if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
+ if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
ret = -EFAULT;
goto out;
}
@@ -1033,13 +1050,20 @@
/* export any potential modifications */
*level = ctx.level;
*optname = ctx.optname;
- *optlen = ctx.optlen;
- *kernel_optval = ctx.optval;
+
+ /* optlen == 0 from BPF indicates that we should
+ * use original userspace data.
+ */
+ if (ctx.optlen != 0) {
+ *optlen = ctx.optlen;
+ *kernel_optval = ctx.optval;
+ /* export and don't free sockopt buf */
+ return 0;
+ }
}
out:
- if (ret)
- sockopt_free_buf(&ctx);
+ sockopt_free_buf(&ctx);
return ret;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt);
@@ -1066,12 +1090,12 @@
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
return retval;
- ret = sockopt_alloc_buf(&ctx, max_optlen);
- if (ret)
- return ret;
-
ctx.optlen = max_optlen;
+ max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
+ if (max_optlen < 0)
+ return max_optlen;
+
if (!retval) {
/* If kernel getsockopt finished successfully,
* copy whatever was returned to the user back
@@ -1085,10 +1109,13 @@
goto out;
}
- if (ctx.optlen > max_optlen)
- ctx.optlen = max_optlen;
+ if (ctx.optlen < 0) {
+ ret = -EFAULT;
+ goto out;
+ }
- if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
+ if (copy_from_user(ctx.optval, optval,
+ min(ctx.optlen, max_optlen)) != 0) {
ret = -EFAULT;
goto out;
}
@@ -1104,7 +1131,7 @@
goto out;
}
- if (ctx.optlen > max_optlen) {
+ if (ctx.optlen > max_optlen || ctx.optlen < 0) {
ret = -EFAULT;
goto out;
}
@@ -1117,10 +1144,12 @@
goto out;
}
- if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
- put_user(ctx.optlen, optlen)) {
- ret = -EFAULT;
- goto out;
+ if (ctx.optlen != 0) {
+ if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
+ put_user(ctx.optlen, optlen)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
ret = ctx.retval;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ef0e1e3..d9a3d99 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -31,6 +31,7 @@
#include <linux/rcupdate.h>
#include <linux/perf_event.h>
+#include <asm/barrier.h>
#include <asm/unaligned.h>
/* Registers */
@@ -1299,7 +1300,7 @@
*
* Decode and execute eBPF instructions.
*/
-static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
@@ -1310,6 +1311,7 @@
/* Non-UAPI available opcodes. */
[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
+ [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC,
};
#undef BPF_INSN_3_LBL
#undef BPF_INSN_2_LBL
@@ -1321,29 +1323,54 @@
select_insn:
goto *jumptable[insn->code];
- /* ALU */
-#define ALU(OPCODE, OP) \
- ALU64_##OPCODE##_X: \
- DST = DST OP SRC; \
- CONT; \
- ALU_##OPCODE##_X: \
- DST = (u32) DST OP (u32) SRC; \
- CONT; \
- ALU64_##OPCODE##_K: \
- DST = DST OP IMM; \
- CONT; \
- ALU_##OPCODE##_K: \
- DST = (u32) DST OP (u32) IMM; \
+ /* Explicitly mask the register-based shift amounts with 63 or 31
+ * to avoid undefined behavior. Normally this won't affect the
+ * generated code, for example, in case of native 64 bit archs such
+ * as x86-64 or arm64, the compiler is optimizing the AND away for
+ * the interpreter. In case of JITs, each of the JIT backends compiles
+ * the BPF shift operations to machine instructions which produce
+ * implementation-defined results in such a case; the resulting
+ * contents of the register may be arbitrary, but program behaviour
+ * as a whole remains defined. In other words, in case of JIT backends,
+ * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation.
+ */
+ /* ALU (shifts) */
+#define SHT(OPCODE, OP) \
+ ALU64_##OPCODE##_X: \
+ DST = DST OP (SRC & 63); \
+ CONT; \
+ ALU_##OPCODE##_X: \
+ DST = (u32) DST OP ((u32) SRC & 31); \
+ CONT; \
+ ALU64_##OPCODE##_K: \
+ DST = DST OP IMM; \
+ CONT; \
+ ALU_##OPCODE##_K: \
+ DST = (u32) DST OP (u32) IMM; \
CONT;
-
+ /* ALU (rest) */
+#define ALU(OPCODE, OP) \
+ ALU64_##OPCODE##_X: \
+ DST = DST OP SRC; \
+ CONT; \
+ ALU_##OPCODE##_X: \
+ DST = (u32) DST OP (u32) SRC; \
+ CONT; \
+ ALU64_##OPCODE##_K: \
+ DST = DST OP IMM; \
+ CONT; \
+ ALU_##OPCODE##_K: \
+ DST = (u32) DST OP (u32) IMM; \
+ CONT;
ALU(ADD, +)
ALU(SUB, -)
ALU(AND, &)
ALU(OR, |)
- ALU(LSH, <<)
- ALU(RSH, >>)
ALU(XOR, ^)
ALU(MUL, *)
+ SHT(LSH, <<)
+ SHT(RSH, >>)
+#undef SHT
#undef ALU
ALU_NEG:
DST = (u32) -DST;
@@ -1368,13 +1395,13 @@
insn++;
CONT;
ALU_ARSH_X:
- DST = (u64) (u32) (((s32) DST) >> SRC);
+ DST = (u64) (u32) (((s32) DST) >> (SRC & 31));
CONT;
ALU_ARSH_K:
DST = (u64) (u32) (((s32) DST) >> IMM);
CONT;
ALU64_ARSH_X:
- (*(s64 *) &DST) >>= SRC;
+ (*(s64 *) &DST) >>= (SRC & 63);
CONT;
ALU64_ARSH_K:
(*(s64 *) &DST) >>= IMM;
@@ -1525,7 +1552,21 @@
COND_JMP(s, JSGE, >=)
COND_JMP(s, JSLE, <=)
#undef COND_JMP
- /* STX and ST and LDX*/
+ /* ST, STX and LDX*/
+ ST_NOSPEC:
+ /* Speculation barrier for mitigating Speculative Store Bypass.
+ * In case of arm64, we rely on the firmware mitigation as
+ * controlled via the ssbd kernel parameter. Whenever the
+ * mitigation is enabled, it works for all of the kernel code
+ * with no need to provide any additional instructions here.
+ * In case of x86, we use 'lfence' insn for mitigation. We
+ * reuse preexisting logic from Spectre v1 mitigation that
+ * happens to produce the required code on x86 for v4 as well.
+ */
+#ifdef CONFIG_X86
+ barrier_nospec();
+#endif
+ CONT;
#define LDST(SIZEOP, SIZE) \
STX_MEM_##SIZEOP: \
*(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index ef49e17..a367fc8 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -486,7 +486,7 @@
return -EOVERFLOW;
/* Make sure CPU is a valid possible cpu */
- if (!cpu_possible(key_cpu))
+ if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu))
return -ENODEV;
if (qsize == 0) {
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 3867864..6684696 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -74,7 +74,7 @@
struct bpf_dtab {
struct bpf_map map;
- struct bpf_dtab_netdev **netdev_map;
+ struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */
struct list_head __percpu *flush_list;
struct list_head list;
@@ -88,12 +88,13 @@
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);
-static struct hlist_head *dev_map_create_hash(unsigned int entries)
+static struct hlist_head *dev_map_create_hash(unsigned int entries,
+ int numa_node)
{
int i;
struct hlist_head *hash;
- hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
+ hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node);
if (hash != NULL)
for (i = 0; i < entries; i++)
INIT_HLIST_HEAD(&hash[i]);
@@ -101,6 +102,12 @@
return hash;
}
+static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
+ int idx)
+{
+ return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
+}
+
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
int err, cpu;
@@ -120,8 +127,7 @@
bpf_map_init_from_attr(&dtab->map, attr);
/* make sure page count doesn't overflow */
- cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
- cost += sizeof(struct list_head) * num_possible_cpus();
+ cost = (u64) sizeof(struct list_head) * num_possible_cpus();
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
@@ -129,6 +135,8 @@
if (!dtab->n_buckets) /* Overflow check */
return -EINVAL;
cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets;
+ } else {
+ cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
}
/* if map size is larger than memlock limit, reject it */
@@ -143,24 +151,23 @@
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(dtab->flush_list, cpu));
- dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
- sizeof(struct bpf_dtab_netdev *),
- dtab->map.numa_node);
- if (!dtab->netdev_map)
- goto free_percpu;
-
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
- dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
+ dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
+ dtab->map.numa_node);
if (!dtab->dev_index_head)
- goto free_map_area;
+ goto free_percpu;
spin_lock_init(&dtab->index_lock);
+ } else {
+ dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
+ sizeof(struct bpf_dtab_netdev *),
+ dtab->map.numa_node);
+ if (!dtab->netdev_map)
+ goto free_percpu;
}
return 0;
-free_map_area:
- bpf_map_area_free(dtab->netdev_map);
free_percpu:
free_percpu(dtab->flush_list);
free_charge:
@@ -228,21 +235,40 @@
cond_resched();
}
- for (i = 0; i < dtab->map.max_entries; i++) {
- struct bpf_dtab_netdev *dev;
+ if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+ for (i = 0; i < dtab->n_buckets; i++) {
+ struct bpf_dtab_netdev *dev;
+ struct hlist_head *head;
+ struct hlist_node *next;
- dev = dtab->netdev_map[i];
- if (!dev)
- continue;
+ head = dev_map_index_hash(dtab, i);
- free_percpu(dev->bulkq);
- dev_put(dev->dev);
- kfree(dev);
+ hlist_for_each_entry_safe(dev, next, head, index_hlist) {
+ hlist_del_rcu(&dev->index_hlist);
+ free_percpu(dev->bulkq);
+ dev_put(dev->dev);
+ kfree(dev);
+ }
+ }
+
+ bpf_map_area_free(dtab->dev_index_head);
+ } else {
+ for (i = 0; i < dtab->map.max_entries; i++) {
+ struct bpf_dtab_netdev *dev;
+
+ dev = dtab->netdev_map[i];
+ if (!dev)
+ continue;
+
+ free_percpu(dev->bulkq);
+ dev_put(dev->dev);
+ kfree(dev);
+ }
+
+ bpf_map_area_free(dtab->netdev_map);
}
free_percpu(dtab->flush_list);
- bpf_map_area_free(dtab->netdev_map);
- kfree(dtab->dev_index_head);
kfree(dtab);
}
@@ -263,19 +289,14 @@
return 0;
}
-static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
- int idx)
-{
- return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
-}
-
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct hlist_head *head = dev_map_index_hash(dtab, key);
struct bpf_dtab_netdev *dev;
- hlist_for_each_entry_rcu(dev, head, index_hlist)
+ hlist_for_each_entry_rcu(dev, head, index_hlist,
+ lockdep_is_held(&dtab->index_lock))
if (dev->idx == key)
return dev;
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index b44d8c4..ff1dd7d 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -162,15 +162,17 @@
else
verbose(cbs->private_data, "BUG_%02x\n", insn->code);
} else if (class == BPF_ST) {
- if (BPF_MODE(insn->code) != BPF_MEM) {
+ if (BPF_MODE(insn->code) == BPF_MEM) {
+ verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->dst_reg,
+ insn->off, insn->imm);
+ } else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) {
+ verbose(cbs->private_data, "(%02x) nospec\n", insn->code);
+ } else {
verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
- return;
}
- verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
- insn->code,
- bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
- insn->dst_reg,
- insn->off, insn->imm);
} else if (class == BPF_LDX) {
if (BPF_MODE(insn->code) != BPF_MEM) {
verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 22066a6..03a6758 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -664,26 +664,23 @@
struct htab_elem *l = container_of(head, struct htab_elem, rcu);
struct bpf_htab *htab = l->htab;
- /* must increment bpf_prog_active to avoid kprobe+bpf triggering while
- * we're calling kfree, otherwise deadlock is possible if kprobes
- * are placed somewhere inside of slub
- */
- preempt_disable();
- __this_cpu_inc(bpf_prog_active);
htab_elem_free(htab, l);
- __this_cpu_dec(bpf_prog_active);
- preempt_enable();
+}
+
+static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
+{
+ struct bpf_map *map = &htab->map;
+ void *ptr;
+
+ if (map->ops->map_fd_put_ptr) {
+ ptr = fd_htab_map_get_ptr(map, l);
+ map->ops->map_fd_put_ptr(ptr);
+ }
}
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
{
- struct bpf_map *map = &htab->map;
-
- if (map->ops->map_fd_put_ptr) {
- void *ptr = fd_htab_map_get_ptr(map, l);
-
- map->ops->map_fd_put_ptr(ptr);
- }
+ htab_put_fd_value(htab, l);
if (htab_is_prealloc(htab)) {
__pcpu_freelist_push(&htab->freelist, &l->fnode);
@@ -712,6 +709,32 @@
}
}
+static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
+ void *value, bool onallcpus)
+{
+ /* When using prealloc and not setting the initial value on all cpus,
+ * zero-fill element values for other cpus (just as what happens when
+ * not using prealloc). Otherwise, bpf program has no way to ensure
+ * known initial values for cpus other than current one
+ * (onallcpus=false always when coming from bpf prog).
+ */
+ if (htab_is_prealloc(htab) && !onallcpus) {
+ u32 size = round_up(htab->map.value_size, 8);
+ int current_cpu = raw_smp_processor_id();
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == current_cpu)
+ bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
+ size);
+ else
+ memset(per_cpu_ptr(pptr, cpu), 0, size);
+ }
+ } else {
+ pcpu_copy_value(htab, pptr, value, onallcpus);
+ }
+}
+
static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
{
return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
@@ -735,6 +758,7 @@
*/
pl_new = this_cpu_ptr(htab->extra_elems);
l_new = *pl_new;
+ htab_put_fd_value(htab, old_elem);
*pl_new = old_elem;
} else {
struct pcpu_freelist_node *l;
@@ -781,7 +805,7 @@
}
}
- pcpu_copy_value(htab, pptr, value, onallcpus);
+ pcpu_init_value(htab, pptr, value, onallcpus);
if (!prealloc)
htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1077,7 +1101,7 @@
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
value, onallcpus);
} else {
- pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
+ pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
value, onallcpus);
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
l_new = NULL;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5e28718..a77d281 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -105,7 +105,7 @@
}
const struct bpf_func_proto bpf_map_peek_elem_proto = {
- .func = bpf_map_pop_elem,
+ .func = bpf_map_peek_elem,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index a70f720..375d93e 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -196,6 +196,7 @@
void *key = map_iter(m)->key;
void *prev_key;
+ (*pos)++;
if (map_iter(m)->done)
return NULL;
@@ -204,12 +205,12 @@
else
prev_key = key;
+ rcu_read_lock();
if (map->ops->map_get_next_key(map, prev_key, key)) {
map_iter(m)->done = true;
- return NULL;
+ key = NULL;
}
-
- ++(*pos);
+ rcu_read_unlock();
return key;
}
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 5b9da09..3668a0b 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -321,7 +321,7 @@
ulen = info->jited_prog_len;
info->jited_prog_len = aux->offload->jited_len;
- if (info->jited_prog_len & ulen) {
+ if (info->jited_prog_len && ulen) {
uinsns = u64_to_user_ptr(info->jited_prog_insns);
ulen = min_t(u32, info->jited_prog_len, ulen);
if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 052580c..fba2ade 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -112,6 +112,8 @@
/* hash table size must be power of 2 */
n_buckets = roundup_pow_of_two(attr->max_entries);
+ if (!n_buckets)
+ return ERR_PTR(-E2BIG);
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
@@ -287,7 +289,7 @@
bool irq_work_busy = false;
struct stack_map_irq_work *work = NULL;
- if (in_nmi()) {
+ if (irqs_disabled()) {
work = this_cpu_ptr(&up_read_work);
if (work->irq_work.flags & IRQ_WORK_BUSY)
/* cannot queue more up_read, fallback */
@@ -295,8 +297,9 @@
}
/*
- * We cannot do up_read() in nmi context. To do build_id lookup
- * in nmi context, we need to run up_read() in irq_work. We use
+ * We cannot do up_read() when the irq is disabled, because of
+ * risk to deadlock with rq_lock. To do build_id lookup when the
+ * irqs are disabled, we need to run up_read() in irq_work. We use
* a percpu variable to do the irq_work. If the irq_work is
* already used by another lookup, we fall back to report ips.
*
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ace1cfa..bf03d04 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1118,7 +1118,8 @@
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
- if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
+ !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
@@ -1146,8 +1147,10 @@
if (err)
goto free_value;
- if (copy_to_user(uvalue, value, value_size) != 0)
+ if (copy_to_user(uvalue, value, value_size) != 0) {
+ err = -EFAULT;
goto free_value;
+ }
err = 0;
@@ -2026,10 +2029,10 @@
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
break;
case BPF_SK_MSG_VERDICT:
- return sock_map_get_from_fd(attr, NULL);
+ return sock_map_prog_detach(attr, BPF_PROG_TYPE_SK_MSG);
case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT:
- return sock_map_get_from_fd(attr, NULL);
+ return sock_map_prog_detach(attr, BPF_PROG_TYPE_SK_SKB);
case BPF_LIRC_MODE2:
return lirc_prog_detach(attr);
case BPF_FLOW_DISSECTOR:
@@ -2245,7 +2248,8 @@
return NULL;
}
-static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
+static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
+ const struct cred *f_cred)
{
const struct bpf_map *map;
struct bpf_insn *insns;
@@ -2268,7 +2272,7 @@
insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) {
if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS))
insns[i].code = BPF_JMP | BPF_CALL;
- if (!bpf_dump_raw_ok())
+ if (!bpf_dump_raw_ok(f_cred))
insns[i].imm = 0;
continue;
}
@@ -2320,12 +2324,13 @@
return 0;
}
-static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
+static int bpf_prog_get_info_by_fd(struct file *file,
+ struct bpf_prog *prog,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
- struct bpf_prog_info info = {};
+ struct bpf_prog_info info;
u32 info_len = attr->info.info_len;
struct bpf_prog_stats stats;
char __user *uinsns;
@@ -2337,6 +2342,7 @@
return err;
info_len = min_t(u32, sizeof(info), info_len);
+ memset(&info, 0, sizeof(info));
if (copy_from_user(&info, uinfo, info_len))
return -EFAULT;
@@ -2388,11 +2394,11 @@
struct bpf_insn *insns_sanitized;
bool fault;
- if (prog->blinded && !bpf_dump_raw_ok()) {
+ if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
info.xlated_prog_insns = 0;
goto done;
}
- insns_sanitized = bpf_insn_prepare_dump(prog);
+ insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
if (!insns_sanitized)
return -ENOMEM;
uinsns = u64_to_user_ptr(info.xlated_prog_insns);
@@ -2426,7 +2432,7 @@
}
if (info.jited_prog_len && ulen) {
- if (bpf_dump_raw_ok()) {
+ if (bpf_dump_raw_ok(file->f_cred)) {
uinsns = u64_to_user_ptr(info.jited_prog_insns);
ulen = min_t(u32, info.jited_prog_len, ulen);
@@ -2461,7 +2467,7 @@
ulen = info.nr_jited_ksyms;
info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
if (ulen) {
- if (bpf_dump_raw_ok()) {
+ if (bpf_dump_raw_ok(file->f_cred)) {
unsigned long ksym_addr;
u64 __user *user_ksyms;
u32 i;
@@ -2492,7 +2498,7 @@
ulen = info.nr_jited_func_lens;
info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
if (ulen) {
- if (bpf_dump_raw_ok()) {
+ if (bpf_dump_raw_ok(file->f_cred)) {
u32 __user *user_lens;
u32 func_len, i;
@@ -2549,7 +2555,7 @@
else
info.nr_jited_line_info = 0;
if (info.nr_jited_line_info && ulen) {
- if (bpf_dump_raw_ok()) {
+ if (bpf_dump_raw_ok(file->f_cred)) {
__u64 __user *user_linfo;
u32 i;
@@ -2595,12 +2601,13 @@
return 0;
}
-static int bpf_map_get_info_by_fd(struct bpf_map *map,
+static int bpf_map_get_info_by_fd(struct file *file,
+ struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
- struct bpf_map_info info = {};
+ struct bpf_map_info info;
u32 info_len = attr->info.info_len;
int err;
@@ -2609,6 +2616,7 @@
return err;
info_len = min_t(u32, sizeof(info), info_len);
+ memset(&info, 0, sizeof(info));
info.type = map->map_type;
info.id = map->id;
info.key_size = map->key_size;
@@ -2636,7 +2644,8 @@
return 0;
}
-static int bpf_btf_get_info_by_fd(struct btf *btf,
+static int bpf_btf_get_info_by_fd(struct file *file,
+ struct btf *btf,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
@@ -2668,13 +2677,13 @@
return -EBADFD;
if (f.file->f_op == &bpf_prog_fops)
- err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
+ err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr,
uattr);
else if (f.file->f_op == &bpf_map_fops)
- err = bpf_map_get_info_by_fd(f.file->private_data, attr,
+ err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr,
uattr);
else if (f.file->f_op == &btf_fops)
- err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr);
+ err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
else
err = -EINVAL;
@@ -2836,7 +2845,7 @@
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
- union bpf_attr attr = {};
+ union bpf_attr attr;
int err;
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
@@ -2848,6 +2857,7 @@
size = min_t(u32, size, sizeof(attr));
/* copy attributes from user space, may be less than sizeof(bpf_attr) */
+ memset(&attr, 0, sizeof(attr));
if (copy_from_user(&attr, uattr, size) != 0)
return -EFAULT;
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
index 7ae5ddd..11b3380 100644
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -9,15 +9,15 @@
#include <linux/sysfs.h>
/* See scripts/link-vmlinux.sh, gen_btf() func for details */
-extern char __weak _binary__btf_vmlinux_bin_start[];
-extern char __weak _binary__btf_vmlinux_bin_end[];
+extern char __weak __start_BTF[];
+extern char __weak __stop_BTF[];
static ssize_t
btf_vmlinux_read(struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
- memcpy(buf, _binary__btf_vmlinux_bin_start + off, len);
+ memcpy(buf, __start_BTF + off, len);
return len;
}
@@ -30,16 +30,15 @@
static int __init btf_vmlinux_init(void)
{
- if (!_binary__btf_vmlinux_bin_start)
+ bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF;
+
+ if (!__start_BTF || bin_attr_btf_vmlinux.size == 0)
return 0;
btf_kobj = kobject_create_and_add("btf", kernel_kobj);
if (!btf_kobj)
return -ENOMEM;
- bin_attr_btf_vmlinux.size = _binary__btf_vmlinux_bin_end -
- _binary__btf_vmlinux_bin_start;
-
return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux);
}
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index ca52b96..d4f335a 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -44,14 +44,19 @@
return TNUM(a.value >> shift, a.mask >> shift);
}
-struct tnum tnum_arshift(struct tnum a, u8 min_shift)
+struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness)
{
/* if a.value is negative, arithmetic shifting by minimum shift
* will have larger negative offset compared to more shifting.
* If a.value is nonnegative, arithmetic shifting by minimum shift
* will have larger positive offset compare to more shifting.
*/
- return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
+ if (insn_bitness == 32)
+ return TNUM((u32)(((s32)a.value) >> min_shift),
+ (u32)(((s32)a.mask) >> min_shift));
+ else
+ return TNUM((s64)a.value >> min_shift,
+ (s64)a.mask >> min_shift);
}
struct tnum tnum_add(struct tnum a, struct tnum b)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ffc3e53..60383b2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -201,8 +201,7 @@
bool pkt_access;
int regno;
int access_size;
- s64 msize_smax_value;
- u64 msize_umax_value;
+ u64 msize_max_value;
int ref_obj_id;
int func_id;
};
@@ -852,7 +851,8 @@
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
-static void __mark_reg_not_init(struct bpf_reg_state *reg);
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg);
/* Mark the unknown part of a register (variable offset or scalar value) as
* known to have the value @imm.
@@ -890,7 +890,7 @@
verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
/* Something bad happened, let's kill all regs */
for (regno = 0; regno < MAX_BPF_REG; regno++)
- __mark_reg_not_init(regs + regno);
+ __mark_reg_not_init(env, regs + regno);
return;
}
__mark_reg_known_zero(regs + regno);
@@ -988,7 +988,8 @@
}
/* Mark a register as having a completely unknown (scalar) value. */
-static void __mark_reg_unknown(struct bpf_reg_state *reg)
+static void __mark_reg_unknown(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg)
{
/*
* Clear type, id, off, and union(map_ptr, range) and
@@ -998,6 +999,8 @@
reg->type = SCALAR_VALUE;
reg->var_off = tnum_unknown;
reg->frameno = 0;
+ reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
+ true : false;
__mark_reg_unbounded(reg);
}
@@ -1008,19 +1011,16 @@
verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
/* Something bad happened, let's kill all regs except FP */
for (regno = 0; regno < BPF_REG_FP; regno++)
- __mark_reg_not_init(regs + regno);
+ __mark_reg_not_init(env, regs + regno);
return;
}
- regs += regno;
- __mark_reg_unknown(regs);
- /* constant backtracking is enabled for root without bpf2bpf calls */
- regs->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
- true : false;
+ __mark_reg_unknown(env, regs + regno);
}
-static void __mark_reg_not_init(struct bpf_reg_state *reg)
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg)
{
- __mark_reg_unknown(reg);
+ __mark_reg_unknown(env, reg);
reg->type = NOT_INIT;
}
@@ -1031,10 +1031,10 @@
verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
/* Something bad happened, let's kill all regs except FP */
for (regno = 0; regno < BPF_REG_FP; regno++)
- __mark_reg_not_init(regs + regno);
+ __mark_reg_not_init(env, regs + regno);
return;
}
- __mark_reg_not_init(regs + regno);
+ __mark_reg_not_init(env, regs + regno);
}
#define DEF_NOT_SUBREG (0)
@@ -1160,6 +1160,10 @@
for (i = 0; i < insn_cnt; i++) {
u8 code = insn[i].code;
+ if (code == (BPF_JMP | BPF_CALL) &&
+ insn[i].imm == BPF_FUNC_tail_call &&
+ insn[i].src_reg != BPF_PSEUDO_CALL)
+ subprog[cur_subprog].has_tail_call = true;
if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
goto next;
if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
@@ -1866,6 +1870,15 @@
return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
}
+static bool __is_pointer_value(bool allow_ptr_leaks,
+ const struct bpf_reg_state *reg)
+{
+ if (allow_ptr_leaks)
+ return false;
+
+ return reg->type != SCALAR_VALUE;
+}
+
static void save_register_state(struct bpf_func_state *state,
int spi, struct bpf_reg_state *reg)
{
@@ -1907,6 +1920,19 @@
cur = env->cur_state->frame[env->cur_state->curframe];
if (value_regno >= 0)
reg = &cur->regs[value_regno];
+ if (!env->allow_ptr_leaks) {
+ bool sanitize = reg && is_spillable_regtype(reg->type);
+
+ for (i = 0; i < size; i++) {
+ if (state->stack[spi].slot_type[i] == STACK_INVALID) {
+ sanitize = true;
+ break;
+ }
+ }
+
+ if (sanitize)
+ env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
+ }
if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
!register_is_null(reg) && env->allow_ptr_leaks) {
@@ -1929,47 +1955,10 @@
verbose(env, "invalid size of register spill\n");
return -EACCES;
}
-
if (state != cur && reg->type == PTR_TO_STACK) {
verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
return -EINVAL;
}
-
- if (!env->allow_ptr_leaks) {
- bool sanitize = false;
-
- if (state->stack[spi].slot_type[0] == STACK_SPILL &&
- register_is_const(&state->stack[spi].spilled_ptr))
- sanitize = true;
- for (i = 0; i < BPF_REG_SIZE; i++)
- if (state->stack[spi].slot_type[i] == STACK_MISC) {
- sanitize = true;
- break;
- }
- if (sanitize) {
- int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
- int soff = (-spi - 1) * BPF_REG_SIZE;
-
- /* detected reuse of integer stack slot with a pointer
- * which means either llvm is reusing stack slot or
- * an attacker is trying to exploit CVE-2018-3639
- * (speculative store bypass)
- * Have to sanitize that slot with preemptive
- * store of zero.
- */
- if (*poff && *poff != soff) {
- /* disallow programs where single insn stores
- * into two different stack slots, since verifier
- * cannot sanitize them
- */
- verbose(env,
- "insn %d cannot access two stack slots fp%d and fp%d",
- insn_idx, *poff, soff);
- return -EINVAL;
- }
- *poff = soff;
- }
- }
save_register_state(state, spi, reg);
} else {
u8 type = STACK_MISC;
@@ -2056,6 +2045,16 @@
* which resets stack/reg liveness for state transitions
*/
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+ } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
+ /* If value_regno==-1, the caller is asking us whether
+ * it is acceptable to use this value as a SCALAR_VALUE
+ * (e.g. for XADD).
+ * We must not allow unprivileged callers to do that
+ * with spilled pointers.
+ */
+ verbose(env, "leaking pointer from stack off %d\n",
+ off);
+ return -EACCES;
}
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
} else {
@@ -2416,15 +2415,6 @@
return -EACCES;
}
-static bool __is_pointer_value(bool allow_ptr_leaks,
- const struct bpf_reg_state *reg)
-{
- if (allow_ptr_leaks)
- return false;
-
- return reg->type != SCALAR_VALUE;
-}
-
static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
{
return cur_regs(env) + regno;
@@ -2602,6 +2592,31 @@
int ret_prog[MAX_CALL_FRAMES];
process_func:
+ /* protect against potential stack overflow that might happen when
+ * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
+ * depth for such case down to 256 so that the worst case scenario
+ * would result in 8k stack size (32 which is tailcall limit * 256 =
+ * 8k).
+ *
+ * To get the idea what might happen, see an example:
+ * func1 -> sub rsp, 128
+ * subfunc1 -> sub rsp, 256
+ * tailcall1 -> add rsp, 256
+ * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
+ * subfunc2 -> sub rsp, 64
+ * subfunc22 -> sub rsp, 128
+ * tailcall2 -> add rsp, 128
+ * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
+ *
+ * tailcall will unwind the current stack frame but it will not get rid
+ * of caller's stack as shown on the example above.
+ */
+ if (idx && subprog[idx].has_tail_call && depth >= 256) {
+ verbose(env,
+ "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
+ depth);
+ return -EACCES;
+ }
/* round up to 32-bytes, since this is granularity
* of interpreter stack size
*/
@@ -2739,6 +2754,41 @@
reg->smax_value = reg->umax_value;
}
+static bool bpf_map_is_rdonly(const struct bpf_map *map)
+{
+ return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
+}
+
+static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
+{
+ void *ptr;
+ u64 addr;
+ int err;
+
+ err = map->ops->map_direct_value_addr(map, &addr, off);
+ if (err)
+ return err;
+ ptr = (void *)(long)addr + off;
+
+ switch (size) {
+ case sizeof(u8):
+ *val = (u64)*(u8 *)ptr;
+ break;
+ case sizeof(u16):
+ *val = (u64)*(u16 *)ptr;
+ break;
+ case sizeof(u32):
+ *val = (u64)*(u32 *)ptr;
+ break;
+ case sizeof(u64):
+ *val = *(u64 *)ptr;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -2776,9 +2826,27 @@
if (err)
return err;
err = check_map_access(env, regno, off, size, false);
- if (!err && t == BPF_READ && value_regno >= 0)
- mark_reg_unknown(env, regs, value_regno);
+ if (!err && t == BPF_READ && value_regno >= 0) {
+ struct bpf_map *map = reg->map_ptr;
+ /* if map is read-only, track its contents as scalars */
+ if (tnum_is_const(reg->var_off) &&
+ bpf_map_is_rdonly(map) &&
+ map->ops->map_direct_value_addr) {
+ int map_off = off + reg->var_off.value;
+ u64 val = 0;
+
+ err = bpf_map_direct_read(map, map_off, size,
+ &val);
+ if (err)
+ return err;
+
+ regs[value_regno].type = SCALAR_VALUE;
+ __mark_reg_known(®s[value_regno], val);
+ } else {
+ mark_reg_unknown(env, regs, value_regno);
+ }
+ }
} else if (reg->type == PTR_TO_CTX) {
enum bpf_reg_type reg_type = SCALAR_VALUE;
@@ -3055,7 +3123,7 @@
}
if (state->stack[spi].slot_type[0] == STACK_SPILL &&
state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
- __mark_reg_unknown(&state->stack[spi].spilled_ptr);
+ __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
for (j = 0; j < BPF_REG_SIZE; j++)
state->stack[spi].slot_type[j] = STACK_MISC;
goto mark;
@@ -3376,8 +3444,7 @@
/* remember the mem_size which may be used later
* to refine return values.
*/
- meta->msize_smax_value = reg->smax_value;
- meta->msize_umax_value = reg->umax_value;
+ meta->msize_max_value = reg->umax_value;
/* The register is SCALAR_VALUE; the access check
* happens using its boundaries.
@@ -3695,7 +3762,7 @@
if (!reg)
continue;
if (reg_is_pkt_pointer_any(reg))
- __mark_reg_unknown(reg);
+ __mark_reg_unknown(env, reg);
}
}
@@ -3723,7 +3790,7 @@
if (!reg)
continue;
if (reg->ref_obj_id == ref_obj_id)
- __mark_reg_unknown(reg);
+ __mark_reg_unknown(env, reg);
}
}
@@ -3865,21 +3932,44 @@
return 0;
}
-static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
- int func_id,
- struct bpf_call_arg_meta *meta)
+static int do_refine_retval_range(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs, int ret_type,
+ int func_id, struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *ret_reg = ®s[BPF_REG_0];
+ struct bpf_reg_state tmp_reg = *ret_reg;
+ bool ret;
if (ret_type != RET_INTEGER ||
(func_id != BPF_FUNC_get_stack &&
func_id != BPF_FUNC_probe_read_str))
- return;
+ return 0;
- ret_reg->smax_value = meta->msize_smax_value;
- ret_reg->umax_value = meta->msize_umax_value;
+ /* Error case where ret is in interval [S32MIN, -1]. */
+ ret_reg->smin_value = S32_MIN;
+ ret_reg->smax_value = -1;
+
__reg_deduce_bounds(ret_reg);
__reg_bound_offset(ret_reg);
+ __update_reg_bounds(ret_reg);
+
+ ret = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
+ if (!ret)
+ return -EFAULT;
+
+ *ret_reg = tmp_reg;
+
+ /* Success case where ret is in range [0, msize_max_value]. */
+ ret_reg->smin_value = 0;
+ ret_reg->smax_value = meta->msize_max_value;
+ ret_reg->umin_value = ret_reg->smin_value;
+ ret_reg->umax_value = ret_reg->smax_value;
+
+ __reg_deduce_bounds(ret_reg);
+ __reg_bound_offset(ret_reg);
+ __update_reg_bounds(ret_reg);
+
+ return 0;
}
static int
@@ -4111,7 +4201,9 @@
regs[BPF_REG_0].ref_obj_id = id;
}
- do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
+ err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
+ if (err)
+ return err;
err = check_map_func_compatibility(env, meta.map_ptr, func_id);
if (err)
@@ -4200,35 +4292,43 @@
return &env->insn_aux_data[env->insn_idx];
}
+enum {
+ REASON_BOUNDS = -1,
+ REASON_TYPE = -2,
+ REASON_PATHS = -3,
+ REASON_LIMIT = -4,
+ REASON_STACK = -5,
+};
+
static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
- u32 *ptr_limit, u8 opcode, bool off_is_neg)
+ u32 *alu_limit, bool mask_to_left)
{
- bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
- (opcode == BPF_SUB && !off_is_neg);
- u32 off;
+ u32 max = 0, ptr_limit = 0;
switch (ptr_reg->type) {
case PTR_TO_STACK:
- /* Indirect variable offset stack access is prohibited in
- * unprivileged mode so it's not handled here.
+ /* Offset 0 is out-of-bounds, but acceptable start for the
+ * left direction, see BPF_REG_FP. Also, unknown scalar
+ * offset where we would need to deal with min/max bounds is
+ * currently prohibited for unprivileged.
*/
- off = ptr_reg->off + ptr_reg->var_off.value;
- if (mask_to_left)
- *ptr_limit = MAX_BPF_STACK + off;
- else
- *ptr_limit = -off;
- return 0;
+ max = MAX_BPF_STACK + mask_to_left;
+ ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
+ break;
case PTR_TO_MAP_VALUE:
- if (mask_to_left) {
- *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
- } else {
- off = ptr_reg->smin_value + ptr_reg->off;
- *ptr_limit = ptr_reg->map_ptr->value_size - off;
- }
- return 0;
+ max = ptr_reg->map_ptr->value_size;
+ ptr_limit = (mask_to_left ?
+ ptr_reg->smin_value :
+ ptr_reg->umax_value) + ptr_reg->off;
+ break;
default:
- return -EINVAL;
+ return REASON_TYPE;
}
+
+ if (ptr_limit >= max)
+ return REASON_LIMIT;
+ *alu_limit = ptr_limit;
+ return 0;
}
static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
@@ -4246,7 +4346,7 @@
if (aux->alu_state &&
(aux->alu_state != alu_state ||
aux->alu_limit != alu_limit))
- return -EACCES;
+ return REASON_PATHS;
/* Corresponding fixup done in fixup_bpf_calls(). */
aux->alu_state = alu_state;
@@ -4265,19 +4365,55 @@
return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
}
+static bool sanitize_needed(u8 opcode)
+{
+ return opcode == BPF_ADD || opcode == BPF_SUB;
+}
+
+struct bpf_sanitize_info {
+ struct bpf_insn_aux_data aux;
+ bool mask_to_left;
+};
+
+static struct bpf_verifier_state *
+sanitize_speculative_path(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn,
+ u32 next_idx, u32 curr_idx)
+{
+ struct bpf_verifier_state *branch;
+ struct bpf_reg_state *regs;
+
+ branch = push_stack(env, next_idx, curr_idx, true);
+ if (branch && insn) {
+ regs = branch->frame[branch->curframe]->regs;
+ if (BPF_SRC(insn->code) == BPF_K) {
+ mark_reg_unknown(env, regs, insn->dst_reg);
+ } else if (BPF_SRC(insn->code) == BPF_X) {
+ mark_reg_unknown(env, regs, insn->dst_reg);
+ mark_reg_unknown(env, regs, insn->src_reg);
+ }
+ }
+ return branch;
+}
+
static int sanitize_ptr_alu(struct bpf_verifier_env *env,
struct bpf_insn *insn,
const struct bpf_reg_state *ptr_reg,
+ const struct bpf_reg_state *off_reg,
struct bpf_reg_state *dst_reg,
- bool off_is_neg)
+ struct bpf_sanitize_info *info,
+ const bool commit_window)
{
+ struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
struct bpf_verifier_state *vstate = env->cur_state;
- struct bpf_insn_aux_data *aux = cur_aux(env);
+ bool off_is_imm = tnum_is_const(off_reg->var_off);
+ bool off_is_neg = off_reg->smin_value < 0;
bool ptr_is_dst_reg = ptr_reg == dst_reg;
u8 opcode = BPF_OP(insn->code);
u32 alu_state, alu_limit;
struct bpf_reg_state tmp;
bool ret;
+ int err;
if (can_skip_alu_sanitation(env, insn))
return 0;
@@ -4289,15 +4425,53 @@
if (vstate->speculative)
goto do_sim;
- alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
- alu_state |= ptr_is_dst_reg ?
- BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+ if (!commit_window) {
+ if (!tnum_is_const(off_reg->var_off) &&
+ (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
+ return REASON_BOUNDS;
- if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
- return 0;
- if (update_alu_sanitation_state(aux, alu_state, alu_limit))
- return -EACCES;
+ info->mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
+ (opcode == BPF_SUB && !off_is_neg);
+ }
+
+ err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
+ if (err < 0)
+ return err;
+
+ if (commit_window) {
+ /* In commit phase we narrow the masking window based on
+ * the observed pointer move after the simulated operation.
+ */
+ alu_state = info->aux.alu_state;
+ alu_limit = abs(info->aux.alu_limit - alu_limit);
+ } else {
+ alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
+ alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
+ alu_state |= ptr_is_dst_reg ?
+ BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+
+ /* Limit pruning on unknown scalars to enable deep search for
+ * potential masking differences from other program paths.
+ */
+ if (!off_is_imm)
+ env->explore_alu_limits = true;
+ }
+
+ err = update_alu_sanitation_state(aux, alu_state, alu_limit);
+ if (err < 0)
+ return err;
do_sim:
+ /* If we're in commit phase, we're done here given we already
+ * pushed the truncated dst_reg into the speculative verification
+ * stack.
+ *
+ * Also, when register is a known constant, we rewrite register-based
+ * operation to immediate-based, and thus do not need masking (and as
+ * a consequence, do not need to simulate the zero-truncation either).
+ */
+ if (commit_window || off_is_imm)
+ return 0;
+
/* Simulate and find potential out-of-bounds access under
* speculative execution from truncation as a result of
* masking when off was not within expected range. If off
@@ -4311,10 +4485,98 @@
tmp = *dst_reg;
*dst_reg = *ptr_reg;
}
- ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
+ ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
+ env->insn_idx);
if (!ptr_is_dst_reg && ret)
*dst_reg = tmp;
- return !ret ? -EFAULT : 0;
+ return !ret ? REASON_STACK : 0;
+}
+
+static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
+{
+ struct bpf_verifier_state *vstate = env->cur_state;
+
+ /* If we simulate paths under speculation, we don't update the
+ * insn as 'seen' such that when we verify unreachable paths in
+ * the non-speculative domain, sanitize_dead_code() can still
+ * rewrite/sanitize them.
+ */
+ if (!vstate->speculative)
+ env->insn_aux_data[env->insn_idx].seen = true;
+}
+
+static int sanitize_err(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn, int reason,
+ const struct bpf_reg_state *off_reg,
+ const struct bpf_reg_state *dst_reg)
+{
+ static const char *err = "pointer arithmetic with it prohibited for !root";
+ const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
+ u32 dst = insn->dst_reg, src = insn->src_reg;
+
+ switch (reason) {
+ case REASON_BOUNDS:
+ verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
+ off_reg == dst_reg ? dst : src, err);
+ break;
+ case REASON_TYPE:
+ verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
+ off_reg == dst_reg ? src : dst, err);
+ break;
+ case REASON_PATHS:
+ verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
+ dst, op, err);
+ break;
+ case REASON_LIMIT:
+ verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
+ dst, op, err);
+ break;
+ case REASON_STACK:
+ verbose(env, "R%d could not be pushed for speculative verification, %s\n",
+ dst, err);
+ break;
+ default:
+ verbose(env, "verifier internal error: unknown reason (%d)\n",
+ reason);
+ break;
+ }
+
+ return -EACCES;
+}
+
+static int sanitize_check_bounds(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn,
+ const struct bpf_reg_state *dst_reg)
+{
+ u32 dst = insn->dst_reg;
+
+ /* For unprivileged we require that resulting offset must be in bounds
+ * in order to be able to sanitize access later on.
+ */
+ if (env->allow_ptr_leaks)
+ return 0;
+
+ switch (dst_reg->type) {
+ case PTR_TO_STACK:
+ if (check_stack_access(env, dst_reg, dst_reg->off +
+ dst_reg->var_off.value, 1)) {
+ verbose(env, "R%d stack pointer arithmetic goes out of range, "
+ "prohibited for !root\n", dst);
+ return -EACCES;
+ }
+ break;
+ case PTR_TO_MAP_VALUE:
+ if (check_map_access(env, dst, dst_reg->off, 1, false)) {
+ verbose(env, "R%d pointer arithmetic of map value goes out of range, "
+ "prohibited for !root\n", dst);
+ return -EACCES;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
}
/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
@@ -4335,8 +4597,9 @@
smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
- u32 dst = insn->dst_reg, src = insn->src_reg;
+ struct bpf_sanitize_info info = {};
u8 opcode = BPF_OP(insn->code);
+ u32 dst = insn->dst_reg;
int ret;
dst_reg = ®s[dst];
@@ -4346,7 +4609,7 @@
/* Taint dst register if offset had invalid bounds derived from
* e.g. dead branches.
*/
- __mark_reg_unknown(dst_reg);
+ __mark_reg_unknown(env, dst_reg);
return 0;
}
@@ -4364,6 +4627,10 @@
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
case CONST_PTR_TO_MAP:
+ /* smin_val represents the known value */
+ if (known && smin_val == 0 && opcode == BPF_ADD)
+ break;
+ /* fall-through */
case PTR_TO_PACKET_END:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
@@ -4375,13 +4642,6 @@
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
- case PTR_TO_MAP_VALUE:
- if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
- verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
- off_reg == dst_reg ? dst : src);
- return -EACCES;
- }
- /* fall-through */
default:
break;
}
@@ -4396,13 +4656,15 @@
!check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
return -EINVAL;
+ if (sanitize_needed(opcode)) {
+ ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
+ &info, false);
+ if (ret < 0)
+ return sanitize_err(env, insn, ret, off_reg, dst_reg);
+ }
+
switch (opcode) {
case BPF_ADD:
- ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
- if (ret < 0) {
- verbose(env, "R%d tried to add from different maps or paths\n", dst);
- return ret;
- }
/* We can take a fixed offset as long as it doesn't overflow
* the s32 'off' field
*/
@@ -4453,11 +4715,6 @@
}
break;
case BPF_SUB:
- ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
- if (ret < 0) {
- verbose(env, "R%d tried to sub from different maps or paths\n", dst);
- return ret;
- }
if (dst_reg == off_reg) {
/* scalar -= pointer. Creates an unknown scalar */
verbose(env, "R%d tried to subtract pointer from scalar\n",
@@ -4538,22 +4795,13 @@
__reg_deduce_bounds(dst_reg);
__reg_bound_offset(dst_reg);
- /* For unprivileged we require that resulting offset must be in bounds
- * in order to be able to sanitize access later on.
- */
- if (!env->allow_ptr_leaks) {
- if (dst_reg->type == PTR_TO_MAP_VALUE &&
- check_map_access(env, dst, dst_reg->off, 1, false)) {
- verbose(env, "R%d pointer arithmetic of map value goes out of range, "
- "prohibited for !root\n", dst);
- return -EACCES;
- } else if (dst_reg->type == PTR_TO_STACK &&
- check_stack_access(env, dst_reg, dst_reg->off +
- dst_reg->var_off.value, 1)) {
- verbose(env, "R%d stack pointer arithmetic goes out of range, "
- "prohibited for !root\n", dst);
- return -EACCES;
- }
+ if (sanitize_check_bounds(env, insn, dst_reg) < 0)
+ return -EACCES;
+ if (sanitize_needed(opcode)) {
+ ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
+ &info, true);
+ if (ret < 0)
+ return sanitize_err(env, insn, ret, off_reg, dst_reg);
}
return 0;
@@ -4574,7 +4822,6 @@
s64 smin_val, smax_val;
u64 umin_val, umax_val;
u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
- u32 dst = insn->dst_reg;
int ret;
if (insn_bitness == 32) {
@@ -4598,23 +4845,24 @@
/* Taint dst register if offset had invalid bounds derived from
* e.g. dead branches.
*/
- __mark_reg_unknown(dst_reg);
+ __mark_reg_unknown(env, dst_reg);
return 0;
}
if (!src_known &&
opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
- __mark_reg_unknown(dst_reg);
+ __mark_reg_unknown(env, dst_reg);
return 0;
}
+ if (sanitize_needed(opcode)) {
+ ret = sanitize_val_alu(env, insn);
+ if (ret < 0)
+ return sanitize_err(env, insn, ret, NULL, NULL);
+ }
+
switch (opcode) {
case BPF_ADD:
- ret = sanitize_val_alu(env, insn);
- if (ret < 0) {
- verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
- return ret;
- }
if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
signed_add_overflows(dst_reg->smax_value, smax_val)) {
dst_reg->smin_value = S64_MIN;
@@ -4634,11 +4882,6 @@
dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
break;
case BPF_SUB:
- ret = sanitize_val_alu(env, insn);
- if (ret < 0) {
- verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
- return ret;
- }
if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
signed_sub_overflows(dst_reg->smax_value, smin_val)) {
/* Overflow possible, we know nothing */
@@ -4812,9 +5055,16 @@
/* Upon reaching here, src_known is true and
* umax_val is equal to umin_val.
*/
- dst_reg->smin_value >>= umin_val;
- dst_reg->smax_value >>= umin_val;
- dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
+ if (insn_bitness == 32) {
+ dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val);
+ dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val);
+ } else {
+ dst_reg->smin_value >>= umin_val;
+ dst_reg->smax_value >>= umin_val;
+ }
+
+ dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val,
+ insn_bitness);
/* blow away the dst_reg umin_value/umax_value and rely on
* dst_reg var_off to refine the result.
@@ -5317,6 +5567,70 @@
reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
}
+/* Constrain the possible values of @reg with unsigned upper bound @bound.
+ * If @is_exclusive, @bound is an exclusive limit, otherwise it is inclusive.
+ * If @is_jmp32, @bound is a 32-bit value that only constrains the low 32 bits
+ * of @reg.
+ */
+static void set_upper_bound(struct bpf_reg_state *reg, u64 bound, bool is_jmp32,
+ bool is_exclusive)
+{
+ if (is_exclusive) {
+ /* There are no values for `reg` that make `reg<0` true. */
+ if (bound == 0)
+ return;
+ bound--;
+ }
+ if (is_jmp32) {
+ /* Constrain the register's value in the tnum representation.
+ * For 64-bit comparisons this happens later in
+ * __reg_bound_offset(), but for 32-bit comparisons, we can be
+ * more precise than what can be derived from the updated
+ * numeric bounds.
+ */
+ struct tnum t = tnum_range(0, bound);
+
+ t.mask |= ~0xffffffffULL; /* upper half is unknown */
+ reg->var_off = tnum_intersect(reg->var_off, t);
+
+ /* Compute the 64-bit bound from the 32-bit bound. */
+ bound += gen_hi_max(reg->var_off);
+ }
+ reg->umax_value = min(reg->umax_value, bound);
+}
+
+/* Constrain the possible values of @reg with unsigned lower bound @bound.
+ * If @is_exclusive, @bound is an exclusive limit, otherwise it is inclusive.
+ * If @is_jmp32, @bound is a 32-bit value that only constrains the low 32 bits
+ * of @reg.
+ */
+static void set_lower_bound(struct bpf_reg_state *reg, u64 bound, bool is_jmp32,
+ bool is_exclusive)
+{
+ if (is_exclusive) {
+ /* There are no values for `reg` that make `reg>MAX` true. */
+ if (bound == (is_jmp32 ? U32_MAX : U64_MAX))
+ return;
+ bound++;
+ }
+ if (is_jmp32) {
+ /* Constrain the register's value in the tnum representation.
+ * For 64-bit comparisons this happens later in
+ * __reg_bound_offset(), but for 32-bit comparisons, we can be
+ * more precise than what can be derived from the updated
+ * numeric bounds.
+ */
+ struct tnum t = tnum_range(bound, U32_MAX);
+
+ t.mask |= ~0xffffffffULL; /* upper half is unknown */
+ reg->var_off = tnum_intersect(reg->var_off, t);
+
+ /* Compute the 64-bit bound from the 32-bit bound. */
+ bound += gen_hi_min(reg->var_off);
+ }
+ reg->umin_value = max(reg->umin_value, bound);
+}
+
/* Adjusts the register min/max values in the case that the dst_reg is the
* variable register that we are working on, and src_reg is a constant or we're
* simply doing a BPF_K check.
@@ -5372,15 +5686,8 @@
case BPF_JGE:
case BPF_JGT:
{
- u64 false_umax = opcode == BPF_JGT ? val : val - 1;
- u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
-
- if (is_jmp32) {
- false_umax += gen_hi_max(false_reg->var_off);
- true_umin += gen_hi_min(true_reg->var_off);
- }
- false_reg->umax_value = min(false_reg->umax_value, false_umax);
- true_reg->umin_value = max(true_reg->umin_value, true_umin);
+ set_upper_bound(false_reg, val, is_jmp32, opcode == BPF_JGE);
+ set_lower_bound(true_reg, val, is_jmp32, opcode == BPF_JGT);
break;
}
case BPF_JSGE:
@@ -5401,15 +5708,8 @@
case BPF_JLE:
case BPF_JLT:
{
- u64 false_umin = opcode == BPF_JLT ? val : val + 1;
- u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
-
- if (is_jmp32) {
- false_umin += gen_hi_min(false_reg->var_off);
- true_umax += gen_hi_max(true_reg->var_off);
- }
- false_reg->umin_value = max(false_reg->umin_value, false_umin);
- true_reg->umax_value = min(true_reg->umax_value, true_umax);
+ set_lower_bound(false_reg, val, is_jmp32, opcode == BPF_JLE);
+ set_upper_bound(true_reg, val, is_jmp32, opcode == BPF_JLT);
break;
}
case BPF_JSLE:
@@ -5484,15 +5784,8 @@
case BPF_JGE:
case BPF_JGT:
{
- u64 false_umin = opcode == BPF_JGT ? val : val + 1;
- u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
-
- if (is_jmp32) {
- false_umin += gen_hi_min(false_reg->var_off);
- true_umax += gen_hi_max(true_reg->var_off);
- }
- false_reg->umin_value = max(false_reg->umin_value, false_umin);
- true_reg->umax_value = min(true_reg->umax_value, true_umax);
+ set_lower_bound(false_reg, val, is_jmp32, opcode == BPF_JGE);
+ set_upper_bound(true_reg, val, is_jmp32, opcode == BPF_JGT);
break;
}
case BPF_JSGE:
@@ -5510,15 +5803,8 @@
case BPF_JLE:
case BPF_JLT:
{
- u64 false_umax = opcode == BPF_JLT ? val : val - 1;
- u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
-
- if (is_jmp32) {
- false_umax += gen_hi_max(false_reg->var_off);
- true_umin += gen_hi_min(true_reg->var_off);
- }
- false_reg->umax_value = min(false_reg->umax_value, false_umax);
- true_reg->umin_value = max(true_reg->umin_value, true_umin);
+ set_upper_bound(false_reg, val, is_jmp32, opcode == BPF_JLE);
+ set_lower_bound(true_reg, val, is_jmp32, opcode == BPF_JLT);
break;
}
case BPF_JSLE:
@@ -5850,14 +6136,28 @@
if (err)
return err;
}
+
if (pred == 1) {
- /* only follow the goto, ignore fall-through */
+ /* Only follow the goto, ignore fall-through. If needed, push
+ * the fall-through branch for simulation under speculative
+ * execution.
+ */
+ if (!env->allow_ptr_leaks &&
+ !sanitize_speculative_path(env, insn, *insn_idx + 1,
+ *insn_idx))
+ return -EFAULT;
*insn_idx += insn->off;
return 0;
} else if (pred == 0) {
- /* only follow fall-through branch, since
- * that's where the program will go
+ /* Only follow the fall-through branch, since that's where the
+ * program will go. If needed, push the goto branch for
+ * simulation under speculative execution.
*/
+ if (!env->allow_ptr_leaks &&
+ !sanitize_speculative_path(env, insn,
+ *insn_idx + insn->off + 1,
+ *insn_idx))
+ return -EFAULT;
return 0;
}
@@ -6019,6 +6319,7 @@
static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct bpf_reg_state *regs = cur_regs(env);
+ static const int ctx_reg = BPF_REG_6;
u8 mode = BPF_MODE(insn->code);
int i, err;
@@ -6052,7 +6353,7 @@
}
/* check whether implicit source operand (register R6) is readable */
- err = check_reg_arg(env, BPF_REG_6, SRC_OP);
+ err = check_reg_arg(env, ctx_reg, SRC_OP);
if (err)
return err;
@@ -6071,7 +6372,7 @@
return -EINVAL;
}
- if (regs[BPF_REG_6].type != PTR_TO_CTX) {
+ if (regs[ctx_reg].type != PTR_TO_CTX) {
verbose(env,
"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
return -EINVAL;
@@ -6084,6 +6385,10 @@
return err;
}
+ err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg);
+ if (err < 0)
+ return err;
+
/* reset caller saved regs to unreadable */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
@@ -6677,13 +6982,6 @@
old->smax_value >= cur->smax_value;
}
-/* Maximum number of register states that can exist at once */
-#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
-struct idpair {
- u32 old;
- u32 cur;
-};
-
/* If in the old state two registers had the same id, then they need to have
* the same id in the new state as well. But that id could be different from
* the old state, so we need to track the mapping from old to new ids.
@@ -6694,11 +6992,11 @@
* So we look through our idmap to see if this old id has been seen before. If
* so, we require the new id to match; otherwise, we add the id pair to the map.
*/
-static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
+static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
{
unsigned int i;
- for (i = 0; i < ID_MAP_SIZE; i++) {
+ for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
if (!idmap[i].old) {
/* Reached an empty slot; haven't seen this id before */
idmap[i].old = old_id;
@@ -6727,7 +7025,7 @@
/* since the register is unused, clear its state
* to make further comparison simpler
*/
- __mark_reg_not_init(&st->regs[i]);
+ __mark_reg_not_init(env, &st->regs[i]);
}
for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
@@ -6735,7 +7033,7 @@
/* liveness must not touch this stack slot anymore */
st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
if (!(live & REG_LIVE_READ)) {
- __mark_reg_not_init(&st->stack[i].spilled_ptr);
+ __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
for (j = 0; j < BPF_REG_SIZE; j++)
st->stack[i].slot_type[j] = STACK_INVALID;
}
@@ -6810,8 +7108,8 @@
}
/* Returns true if (rold safe implies rcur safe) */
-static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
- struct idpair *idmap)
+static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+ struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
{
bool equal;
@@ -6837,6 +7135,8 @@
return false;
switch (rold->type) {
case SCALAR_VALUE:
+ if (env->explore_alu_limits)
+ return false;
if (rcur->type == SCALAR_VALUE) {
if (!rold->precise && !rcur->precise)
return true;
@@ -6926,9 +7226,8 @@
return false;
}
-static bool stacksafe(struct bpf_func_state *old,
- struct bpf_func_state *cur,
- struct idpair *idmap)
+static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+ struct bpf_func_state *cur, struct bpf_id_pair *idmap)
{
int i, spi;
@@ -6973,9 +7272,8 @@
continue;
if (old->stack[spi].slot_type[0] != STACK_SPILL)
continue;
- if (!regsafe(&old->stack[spi].spilled_ptr,
- &cur->stack[spi].spilled_ptr,
- idmap))
+ if (!regsafe(env, &old->stack[spi].spilled_ptr,
+ &cur->stack[spi].spilled_ptr, idmap))
/* when explored and current stack slot are both storing
* spilled registers, check that stored pointers types
* are the same as well.
@@ -7025,32 +7323,24 @@
* whereas register type in current state is meaningful, it means that
* the current state will reach 'bpf_exit' instruction safely
*/
-static bool func_states_equal(struct bpf_func_state *old,
+static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
struct bpf_func_state *cur)
{
- struct idpair *idmap;
- bool ret = false;
int i;
- idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
- /* If we failed to allocate the idmap, just say it's not safe */
- if (!idmap)
+ memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
+ for (i = 0; i < MAX_BPF_REG; i++)
+ if (!regsafe(env, &old->regs[i], &cur->regs[i],
+ env->idmap_scratch))
+ return false;
+
+ if (!stacksafe(env, old, cur, env->idmap_scratch))
return false;
- for (i = 0; i < MAX_BPF_REG; i++) {
- if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
- goto out_free;
- }
-
- if (!stacksafe(old, cur, idmap))
- goto out_free;
-
if (!refsafe(old, cur))
- goto out_free;
- ret = true;
-out_free:
- kfree(idmap);
- return ret;
+ return false;
+
+ return true;
}
static bool states_equal(struct bpf_verifier_env *env,
@@ -7077,7 +7367,7 @@
for (i = 0; i <= old->curframe; i++) {
if (old->frame[i]->callsite != cur->frame[i]->callsite)
return false;
- if (!func_states_equal(old->frame[i], cur->frame[i]))
+ if (!func_states_equal(env, old->frame[i], cur->frame[i]))
return false;
}
return true;
@@ -7569,7 +7859,7 @@
}
regs = cur_regs(env);
- env->insn_aux_data[env->insn_idx].seen = true;
+ sanitize_mark_insn_seen(env);
prev_insn_idx = env->insn_idx;
if (class == BPF_ALU || class == BPF_ALU64) {
@@ -7804,7 +8094,7 @@
return err;
env->insn_idx++;
- env->insn_aux_data[env->insn_idx].seen = true;
+ sanitize_mark_insn_seen(env);
} else {
verbose(env, "invalid BPF_LD mode\n");
return -EINVAL;
@@ -8078,11 +8368,13 @@
* insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
* [0, off) and [off, end) to new locations, so the patched range stays zero
*/
-static int adjust_insn_aux_data(struct bpf_verifier_env *env,
- struct bpf_prog *new_prog, u32 off, u32 cnt)
+static void adjust_insn_aux_data(struct bpf_verifier_env *env,
+ struct bpf_insn_aux_data *new_data,
+ struct bpf_prog *new_prog, u32 off, u32 cnt)
{
- struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+ struct bpf_insn_aux_data *old_data = env->insn_aux_data;
struct bpf_insn *insn = new_prog->insnsi;
+ bool old_seen = old_data[off].seen;
u32 prog_len;
int i;
@@ -8093,22 +8385,19 @@
old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
if (cnt == 1)
- return 0;
+ return;
prog_len = new_prog->len;
- new_data = vzalloc(array_size(prog_len,
- sizeof(struct bpf_insn_aux_data)));
- if (!new_data)
- return -ENOMEM;
+
memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
memcpy(new_data + off + cnt - 1, old_data + off,
sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
for (i = off; i < off + cnt - 1; i++) {
- new_data[i].seen = true;
+ /* Expand insni[off]'s seen count to the patched range. */
+ new_data[i].seen = old_seen;
new_data[i].zext_dst = insn_has_def32(env, insn + i);
}
env->insn_aux_data = new_data;
vfree(old_data);
- return 0;
}
static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
@@ -8129,6 +8418,14 @@
const struct bpf_insn *patch, u32 len)
{
struct bpf_prog *new_prog;
+ struct bpf_insn_aux_data *new_data = NULL;
+
+ if (len > 1) {
+ new_data = vzalloc(array_size(env->prog->len + len - 1,
+ sizeof(struct bpf_insn_aux_data)));
+ if (!new_data)
+ return NULL;
+ }
new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
if (IS_ERR(new_prog)) {
@@ -8136,10 +8433,10 @@
verbose(env,
"insn %d cannot be patched due to 16-bit range\n",
env->insn_aux_data[off].orig_idx);
+ vfree(new_data);
return NULL;
}
- if (adjust_insn_aux_data(env, new_prog, off, len))
- return NULL;
+ adjust_insn_aux_data(env, new_data, new_prog, off, len);
adjust_subprog_starts(env, off, len);
return new_prog;
}
@@ -8314,6 +8611,7 @@
if (aux_data[i].seen)
continue;
memcpy(insn + i, &trap, sizeof(trap));
+ aux_data[i].zext_dst = false;
}
}
@@ -8523,35 +8821,33 @@
for (i = 0; i < insn_cnt; i++, insn++) {
bpf_convert_ctx_access_t convert_ctx_access;
+ bool ctx_access;
if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
- insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
+ insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
type = BPF_READ;
- else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_DW))
+ ctx_access = true;
+ } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
type = BPF_WRITE;
- else
+ ctx_access = BPF_CLASS(insn->code) == BPF_STX;
+ } else {
continue;
+ }
if (type == BPF_WRITE &&
- env->insn_aux_data[i + delta].sanitize_stack_off) {
+ env->insn_aux_data[i + delta].sanitize_stack_spill) {
struct bpf_insn patch[] = {
- /* Sanitize suspicious stack slot with zero.
- * There are no memory dependencies for this store,
- * since it's only using frame pointer and immediate
- * constant of zero
- */
- BPF_ST_MEM(BPF_DW, BPF_REG_FP,
- env->insn_aux_data[i + delta].sanitize_stack_off,
- 0),
- /* the original STX instruction will immediately
- * overwrite the same stack slot with appropriate value
- */
*insn,
+ BPF_ST_NOSPEC(),
};
cnt = ARRAY_SIZE(patch);
@@ -8565,6 +8861,9 @@
continue;
}
+ if (!ctx_access)
+ continue;
+
switch (env->insn_aux_data[i + delta].ptr_type) {
case PTR_TO_CTX:
if (!ops->convert_ctx_access)
@@ -8626,6 +8925,10 @@
if (is_narrower_load && size < target_size) {
u8 shift = bpf_ctx_narrow_access_offset(
off, size, size_default) * 8;
+ if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
+ verbose(env, "bpf verifier narrow ctx load misconfigured\n");
+ return -EINVAL;
+ }
if (ctx_field_size <= 4) {
if (shift)
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
@@ -8887,30 +9190,30 @@
insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
- struct bpf_insn mask_and_div[] = {
- BPF_MOV32_REG(insn->src_reg, insn->src_reg),
- /* Rx div 0 -> 0 */
- BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
+ bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+ struct bpf_insn *patchlet;
+ struct bpf_insn chk_and_div[] = {
+ /* [R,W]x div 0 -> 0 */
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JNE | BPF_K, insn->src_reg,
+ 0, 2, 0),
BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
*insn,
};
- struct bpf_insn mask_and_mod[] = {
- BPF_MOV32_REG(insn->src_reg, insn->src_reg),
- /* Rx mod 0 -> Rx */
- BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
+ struct bpf_insn chk_and_mod[] = {
+ /* [R,W]x mod 0 -> [R,W]x */
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, insn->src_reg,
+ 0, 1 + (is64 ? 0 : 1), 0),
*insn,
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
};
- struct bpf_insn *patchlet;
- if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
- insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
- patchlet = mask_and_div + (is64 ? 1 : 0);
- cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
- } else {
- patchlet = mask_and_mod + (is64 ? 1 : 0);
- cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
- }
+ patchlet = isdiv ? chk_and_div : chk_and_mod;
+ cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
+ ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
if (!new_prog)
@@ -8947,7 +9250,7 @@
const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
struct bpf_insn insn_buf[16];
struct bpf_insn *patch = &insn_buf[0];
- bool issrc, isneg;
+ bool issrc, isneg, isimm;
u32 off_reg;
aux = &env->insn_aux_data[i + delta];
@@ -8958,28 +9261,29 @@
isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
BPF_ALU_SANITIZE_SRC;
+ isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
off_reg = issrc ? insn->src_reg : insn->dst_reg;
- if (isneg)
- *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
- *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
- *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
- *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
- *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
- *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
- if (issrc) {
- *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
- off_reg);
- insn->src_reg = BPF_REG_AX;
+ if (isimm) {
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
} else {
- *patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
- BPF_REG_AX);
+ if (isneg)
+ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+ *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
+ *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
+ *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
}
+ if (!issrc)
+ *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
+ insn->src_reg = BPF_REG_AX;
if (isneg)
insn->code = insn->code == code_add ?
code_sub : code_add;
*patch++ = *insn;
- if (issrc && isneg)
+ if (issrc && isneg && !isimm)
*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
cnt = patch - insn_buf;