Blame - kernel/bpf/cgroup.c - hafnium/third_party/linux

blob: c2f0aa818b7af250046d818aa2390a5f769efc78 [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-only
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2	/*
				3	* Functions to manage eBPF programs attached to cgroups
				4	*
				5	* Copyright (c) 2016 Daniel Mack
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6	*/
				7
				8	#include <linux/kernel.h>
				9	#include <linux/atomic.h>
				10	#include <linux/cgroup.h>
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	11	#include <linux/filter.h>
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	12	#include <linux/slab.h>
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	13	#include <linux/sysctl.h>
				14	#include <linux/string.h>
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	15	#include <linux/bpf.h>
				16	#include <linux/bpf-cgroup.h>
				17	#include <net/sock.h>
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	18	#include <net/bpf_sk_storage.h>
				19
				20	#include "../cgroup/cgroup-internal.h"
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	21
				22	DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
				23	EXPORT_SYMBOL(cgroup_bpf_enabled_key);
				24
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	25	void cgroup_bpf_offline(struct cgroup *cgrp)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	26	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	27	cgroup_get(cgrp);
				28	percpu_ref_kill(&cgrp->bpf.refcnt);
				29	}
				30
				31	/**
				32	* cgroup_bpf_release() - put references of all bpf programs and
				33	* release all cgroup bpf data
				34	* @work: work structure embedded into the cgroup to modify
				35	*/
				36	static void cgroup_bpf_release(struct work_struct *work)
				37	{
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	38	struct cgroup p, cgrp = container_of(work, struct cgroup,
				39	bpf.release_work);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	40	enum bpf_cgroup_storage_type stype;
				41	struct bpf_prog_array *old_array;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	42	unsigned int type;
				43
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	44	mutex_lock(&cgroup_mutex);
				45
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	46	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
				47	struct list_head *progs = &cgrp->bpf.progs[type];
				48	struct bpf_prog_list pl, tmp;
				49
				50	list_for_each_entry_safe(pl, tmp, progs, node) {
				51	list_del(&pl->node);
				52	bpf_prog_put(pl->prog);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	53	for_each_cgroup_storage_type(stype) {
				54	bpf_cgroup_storage_unlink(pl->storage[stype]);
				55	bpf_cgroup_storage_free(pl->storage[stype]);
				56	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	57	kfree(pl);
				58	static_branch_dec(&cgroup_bpf_enabled_key);
				59	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	60	old_array = rcu_dereference_protected(
				61	cgrp->bpf.effective[type],
				62	lockdep_is_held(&cgroup_mutex));
				63	bpf_prog_array_free(old_array);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	64	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	65
				66	mutex_unlock(&cgroup_mutex);
				67
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	68	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
				69	cgroup_bpf_put(p);
				70
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	71	percpu_ref_exit(&cgrp->bpf.refcnt);
				72	cgroup_put(cgrp);
				73	}
				74
				75	/**
				76	* cgroup_bpf_release_fn() - callback used to schedule releasing
				77	* of bpf cgroup data
				78	* @ref: percpu ref counter structure
				79	*/
				80	static void cgroup_bpf_release_fn(struct percpu_ref *ref)
				81	{
				82	struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
				83
				84	INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
				85	queue_work(system_wq, &cgrp->bpf.release_work);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	86	}
				87
				88	/* count number of elements in the list.
				89	* it's slow but the list cannot be long
				90	*/
				91	static u32 prog_list_length(struct list_head *head)
				92	{
				93	struct bpf_prog_list *pl;
				94	u32 cnt = 0;
				95
				96	list_for_each_entry(pl, head, node) {
				97	if (!pl->prog)
				98	continue;
				99	cnt++;
				100	}
				101	return cnt;
				102	}
				103
				104	/* if parent has non-overridable prog attached,
				105	* disallow attaching new programs to the descendent cgroup.
				106	* if parent has overridable or multi-prog, allow attaching
				107	*/
				108	static bool hierarchy_allows_attach(struct cgroup *cgrp,
				109	enum bpf_attach_type type,
				110	u32 new_flags)
				111	{
				112	struct cgroup *p;
				113
				114	p = cgroup_parent(cgrp);
				115	if (!p)
				116	return true;
				117	do {
				118	u32 flags = p->bpf.flags[type];
				119	u32 cnt;
				120
				121	if (flags & BPF_F_ALLOW_MULTI)
				122	return true;
				123	cnt = prog_list_length(&p->bpf.progs[type]);
				124	WARN_ON_ONCE(cnt > 1);
				125	if (cnt == 1)
				126	return !!(flags & BPF_F_ALLOW_OVERRIDE);
				127	p = cgroup_parent(p);
				128	} while (p);
				129	return true;
				130	}
				131
				132	/* compute a chain of effective programs for a given cgroup:
				133	* start from the list of programs in this cgroup and add
				134	* all parent programs.
				135	* Note that parent's F_ALLOW_OVERRIDE-type program is yielding
				136	* to programs in this cgroup
				137	*/
				138	static int compute_effective_progs(struct cgroup *cgrp,
				139	enum bpf_attach_type type,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	140	struct bpf_prog_array **array)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	141	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	142	enum bpf_cgroup_storage_type stype;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	143	struct bpf_prog_array *progs;
				144	struct bpf_prog_list *pl;
				145	struct cgroup *p = cgrp;
				146	int cnt = 0;
				147
				148	/* count number of effective programs by walking parents */
				149	do {
				150	if (cnt == 0 \|\| (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
				151	cnt += prog_list_length(&p->bpf.progs[type]);
				152	p = cgroup_parent(p);
				153	} while (p);
				154
				155	progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
				156	if (!progs)
				157	return -ENOMEM;
				158
				159	/* populate the array with effective progs */
				160	cnt = 0;
				161	p = cgrp;
				162	do {
				163	if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
				164	continue;
				165
				166	list_for_each_entry(pl, &p->bpf.progs[type], node) {
				167	if (!pl->prog)
				168	continue;
				169
				170	progs->items[cnt].prog = pl->prog;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	171	for_each_cgroup_storage_type(stype)
				172	progs->items[cnt].cgroup_storage[stype] =
				173	pl->storage[stype];
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	174	cnt++;
				175	}
				176	} while ((p = cgroup_parent(p)));
				177
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	178	*array = progs;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	179	return 0;
				180	}
				181
				182	static void activate_effective_progs(struct cgroup *cgrp,
				183	enum bpf_attach_type type,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	184	struct bpf_prog_array *old_array)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	185	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	186	rcu_swap_protected(cgrp->bpf.effective[type], old_array,
				187	lockdep_is_held(&cgroup_mutex));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	188	/* free prog array after grace period, since __cgroup_bpf_run_*()
				189	* might be still walking the array
				190	*/
				191	bpf_prog_array_free(old_array);
				192	}
				193
				194	/**
				195	* cgroup_bpf_inherit() - inherit effective programs from parent
				196	* @cgrp: the cgroup to modify
				197	*/
				198	int cgroup_bpf_inherit(struct cgroup *cgrp)
				199	{
				200	/* has to use marco instead of const int, since compiler thinks
				201	* that array below is variable length
				202	*/
				203	#define NR ARRAY_SIZE(cgrp->bpf.effective)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	204	struct bpf_prog_array *arrays[NR] = {};
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	205	struct cgroup *p;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	206	int ret, i;
				207
				208	ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
				209	GFP_KERNEL);
				210	if (ret)
				211	return ret;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	212
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	213	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
				214	cgroup_bpf_get(p);
				215
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	216	for (i = 0; i < NR; i++)
				217	INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
				218
				219	for (i = 0; i < NR; i++)
				220	if (compute_effective_progs(cgrp, i, &arrays[i]))
				221	goto cleanup;
				222
				223	for (i = 0; i < NR; i++)
				224	activate_effective_progs(cgrp, i, arrays[i]);
				225
				226	return 0;
				227	cleanup:
				228	for (i = 0; i < NR; i++)
				229	bpf_prog_array_free(arrays[i]);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	230
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	231	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
				232	cgroup_bpf_put(p);
				233
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	234	percpu_ref_exit(&cgrp->bpf.refcnt);
				235
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	236	return -ENOMEM;
				237	}
				238
				239	static int update_effective_progs(struct cgroup *cgrp,
				240	enum bpf_attach_type type)
				241	{
				242	struct cgroup_subsys_state *css;
				243	int err;
				244
				245	/* allocate and recompute effective prog arrays */
				246	css_for_each_descendant_pre(css, &cgrp->self) {
				247	struct cgroup *desc = container_of(css, struct cgroup, self);
				248
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	249	if (percpu_ref_is_zero(&desc->bpf.refcnt))
				250	continue;
				251
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	252	err = compute_effective_progs(desc, type, &desc->bpf.inactive);
				253	if (err)
				254	goto cleanup;
				255	}
				256
				257	/* all allocations were successful. Activate all prog arrays */
				258	css_for_each_descendant_pre(css, &cgrp->self) {
				259	struct cgroup *desc = container_of(css, struct cgroup, self);
				260
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	261	if (percpu_ref_is_zero(&desc->bpf.refcnt)) {
				262	if (unlikely(desc->bpf.inactive)) {
				263	bpf_prog_array_free(desc->bpf.inactive);
				264	desc->bpf.inactive = NULL;
				265	}
				266	continue;
				267	}
				268
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	269	activate_effective_progs(desc, type, desc->bpf.inactive);
				270	desc->bpf.inactive = NULL;
				271	}
				272
				273	return 0;
				274
				275	cleanup:
				276	/* oom while computing effective. Free all computed effective arrays
				277	* since they were not activated
				278	*/
				279	css_for_each_descendant_pre(css, &cgrp->self) {
				280	struct cgroup *desc = container_of(css, struct cgroup, self);
				281
				282	bpf_prog_array_free(desc->bpf.inactive);
				283	desc->bpf.inactive = NULL;
				284	}
				285
				286	return err;
				287	}
				288
				289	#define BPF_CGROUP_MAX_PROGS 64
				290
				291	/**
				292	* __cgroup_bpf_attach() - Attach the program to a cgroup, and
				293	* propagate the change to descendants
				294	* @cgrp: The cgroup which descendants to traverse
				295	* @prog: A program to attach
				296	* @type: Type of attach operation
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	297	* @flags: Option flags
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	298	*
				299	* Must be called with cgroup_mutex held.
				300	*/
				301	int __cgroup_bpf_attach(struct cgroup cgrp, struct bpf_prog prog,
				302	enum bpf_attach_type type, u32 flags)
				303	{
				304	struct list_head *progs = &cgrp->bpf.progs[type];
				305	struct bpf_prog *old_prog = NULL;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	306	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
				307	struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	308	enum bpf_cgroup_storage_type stype;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	309	struct bpf_prog_list *pl;
				310	bool pl_was_allocated;
				311	int err;
				312
				313	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
				314	/* invalid combination */
				315	return -EINVAL;
				316
				317	if (!hierarchy_allows_attach(cgrp, type, flags))
				318	return -EPERM;
				319
				320	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
				321	/* Disallow attaching non-overridable on top
				322	* of existing overridable in this cgroup.
				323	* Disallow attaching multi-prog if overridable or none
				324	*/
				325	return -EPERM;
				326
				327	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
				328	return -E2BIG;
				329
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	330	for_each_cgroup_storage_type(stype) {
				331	storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
				332	if (IS_ERR(storage[stype])) {
				333	storage[stype] = NULL;
				334	for_each_cgroup_storage_type(stype)
				335	bpf_cgroup_storage_free(storage[stype]);
				336	return -ENOMEM;
				337	}
				338	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	339
				340	if (flags & BPF_F_ALLOW_MULTI) {
				341	list_for_each_entry(pl, progs, node) {
				342	if (pl->prog == prog) {
				343	/* disallow attaching the same prog twice */
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	344	for_each_cgroup_storage_type(stype)
				345	bpf_cgroup_storage_free(storage[stype]);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	346	return -EINVAL;
				347	}
				348	}
				349
				350	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
				351	if (!pl) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	352	for_each_cgroup_storage_type(stype)
				353	bpf_cgroup_storage_free(storage[stype]);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	354	return -ENOMEM;
				355	}
				356
				357	pl_was_allocated = true;
				358	pl->prog = prog;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	359	for_each_cgroup_storage_type(stype)
				360	pl->storage[stype] = storage[stype];
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	361	list_add_tail(&pl->node, progs);
				362	} else {
				363	if (list_empty(progs)) {
				364	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
				365	if (!pl) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	366	for_each_cgroup_storage_type(stype)
				367	bpf_cgroup_storage_free(storage[stype]);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	368	return -ENOMEM;
				369	}
				370	pl_was_allocated = true;
				371	list_add_tail(&pl->node, progs);
				372	} else {
				373	pl = list_first_entry(progs, typeof(*pl), node);
				374	old_prog = pl->prog;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	375	for_each_cgroup_storage_type(stype) {
				376	old_storage[stype] = pl->storage[stype];
				377	bpf_cgroup_storage_unlink(old_storage[stype]);
				378	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	379	pl_was_allocated = false;
				380	}
				381	pl->prog = prog;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	382	for_each_cgroup_storage_type(stype)
				383	pl->storage[stype] = storage[stype];
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	384	}
				385
				386	cgrp->bpf.flags[type] = flags;
				387
				388	err = update_effective_progs(cgrp, type);
				389	if (err)
				390	goto cleanup;
				391
				392	static_branch_inc(&cgroup_bpf_enabled_key);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	393	for_each_cgroup_storage_type(stype) {
				394	if (!old_storage[stype])
				395	continue;
				396	bpf_cgroup_storage_free(old_storage[stype]);
				397	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	398	if (old_prog) {
				399	bpf_prog_put(old_prog);
				400	static_branch_dec(&cgroup_bpf_enabled_key);
				401	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	402	for_each_cgroup_storage_type(stype)
				403	bpf_cgroup_storage_link(storage[stype], cgrp, type);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	404	return 0;
				405
				406	cleanup:
				407	/* and cleanup the prog list */
				408	pl->prog = old_prog;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	409	for_each_cgroup_storage_type(stype) {
				410	bpf_cgroup_storage_free(pl->storage[stype]);
				411	pl->storage[stype] = old_storage[stype];
				412	bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
				413	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	414	if (pl_was_allocated) {
				415	list_del(&pl->node);
				416	kfree(pl);
				417	}
				418	return err;
				419	}
				420
				421	/**
				422	* __cgroup_bpf_detach() - Detach the program from a cgroup, and
				423	* propagate the change to descendants
				424	* @cgrp: The cgroup which descendants to traverse
				425	* @prog: A program to detach or NULL
				426	* @type: Type of detach operation
				427	*
				428	* Must be called with cgroup_mutex held.
				429	*/
				430	int __cgroup_bpf_detach(struct cgroup cgrp, struct bpf_prog prog,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	431	enum bpf_attach_type type)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	432	{
				433	struct list_head *progs = &cgrp->bpf.progs[type];
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	434	enum bpf_cgroup_storage_type stype;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	435	u32 flags = cgrp->bpf.flags[type];
				436	struct bpf_prog *old_prog = NULL;
				437	struct bpf_prog_list *pl;
				438	int err;
				439
				440	if (flags & BPF_F_ALLOW_MULTI) {
				441	if (!prog)
				442	/* to detach MULTI prog the user has to specify valid FD
				443	* of the program to be detached
				444	*/
				445	return -EINVAL;
				446	} else {
				447	if (list_empty(progs))
				448	/* report error when trying to detach and nothing is attached */
				449	return -ENOENT;
				450	}
				451
				452	if (flags & BPF_F_ALLOW_MULTI) {
				453	/* find the prog and detach it */
				454	list_for_each_entry(pl, progs, node) {
				455	if (pl->prog != prog)
				456	continue;
				457	old_prog = prog;
				458	/* mark it deleted, so it's ignored while
				459	* recomputing effective
				460	*/
				461	pl->prog = NULL;
				462	break;
				463	}
				464	if (!old_prog)
				465	return -ENOENT;
				466	} else {
				467	/* to maintain backward compatibility NONE and OVERRIDE cgroups
				468	* allow detaching with invalid FD (prog==NULL)
				469	*/
				470	pl = list_first_entry(progs, typeof(*pl), node);
				471	old_prog = pl->prog;
				472	pl->prog = NULL;
				473	}
				474
				475	err = update_effective_progs(cgrp, type);
				476	if (err)
				477	goto cleanup;
				478
				479	/* now can actually delete it from this cgroup list */
				480	list_del(&pl->node);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	481	for_each_cgroup_storage_type(stype) {
				482	bpf_cgroup_storage_unlink(pl->storage[stype]);
				483	bpf_cgroup_storage_free(pl->storage[stype]);
				484	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	485	kfree(pl);
				486	if (list_empty(progs))
				487	/* last program was detached, reset flags to zero */
				488	cgrp->bpf.flags[type] = 0;
				489
				490	bpf_prog_put(old_prog);
				491	static_branch_dec(&cgroup_bpf_enabled_key);
				492	return 0;
				493
				494	cleanup:
				495	/* and restore back old_prog */
				496	pl->prog = old_prog;
				497	return err;
				498	}
				499
				500	/* Must be called with cgroup_mutex held to avoid races. */
				501	int __cgroup_bpf_query(struct cgroup cgrp, const union bpf_attr attr,
				502	union bpf_attr __user *uattr)
				503	{
				504	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
				505	enum bpf_attach_type type = attr->query.attach_type;
				506	struct list_head *progs = &cgrp->bpf.progs[type];
				507	u32 flags = cgrp->bpf.flags[type];
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	508	struct bpf_prog_array *effective;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	509	int cnt, ret = 0, i;
				510
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	511	effective = rcu_dereference_protected(cgrp->bpf.effective[type],
				512	lockdep_is_held(&cgroup_mutex));
				513
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	514	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	515	cnt = bpf_prog_array_length(effective);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	516	else
				517	cnt = prog_list_length(progs);
				518
				519	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
				520	return -EFAULT;
				521	if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
				522	return -EFAULT;
				523	if (attr->query.prog_cnt == 0 \|\| !prog_ids \|\| !cnt)
				524	/* return early if user requested only program count + flags */
				525	return 0;
				526	if (attr->query.prog_cnt < cnt) {
				527	cnt = attr->query.prog_cnt;
				528	ret = -ENOSPC;
				529	}
				530
				531	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	532	return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	533	} else {
				534	struct bpf_prog_list *pl;
				535	u32 id;
				536
				537	i = 0;
				538	list_for_each_entry(pl, progs, node) {
				539	id = pl->prog->aux->id;
				540	if (copy_to_user(prog_ids + i, &id, sizeof(id)))
				541	return -EFAULT;
				542	if (++i == cnt)
				543	break;
				544	}
				545	}
				546	return ret;
				547	}
				548
				549	int cgroup_bpf_prog_attach(const union bpf_attr *attr,
				550	enum bpf_prog_type ptype, struct bpf_prog *prog)
				551	{
				552	struct cgroup *cgrp;
				553	int ret;
				554
				555	cgrp = cgroup_get_from_fd(attr->target_fd);
				556	if (IS_ERR(cgrp))
				557	return PTR_ERR(cgrp);
				558
				559	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
				560	attr->attach_flags);
				561	cgroup_put(cgrp);
				562	return ret;
				563	}
				564
				565	int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
				566	{
				567	struct bpf_prog *prog;
				568	struct cgroup *cgrp;
				569	int ret;
				570
				571	cgrp = cgroup_get_from_fd(attr->target_fd);
				572	if (IS_ERR(cgrp))
				573	return PTR_ERR(cgrp);
				574
				575	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
				576	if (IS_ERR(prog))
				577	prog = NULL;
				578
				579	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
				580	if (prog)
				581	bpf_prog_put(prog);
				582
				583	cgroup_put(cgrp);
				584	return ret;
				585	}
				586
				587	int cgroup_bpf_prog_query(const union bpf_attr *attr,
				588	union bpf_attr __user *uattr)
				589	{
				590	struct cgroup *cgrp;
				591	int ret;
				592
				593	cgrp = cgroup_get_from_fd(attr->query.target_fd);
				594	if (IS_ERR(cgrp))
				595	return PTR_ERR(cgrp);
				596
				597	ret = cgroup_bpf_query(cgrp, attr, uattr);
				598
				599	cgroup_put(cgrp);
				600	return ret;
				601	}
				602
				603	/**
				604	* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
				605	* @sk: The socket sending or receiving traffic
				606	* @skb: The skb that is being sent or received
				607	* @type: The type of program to be exectuted
				608	*
				609	* If no socket is passed, or the socket is not of type INET or INET6,
				610	* this function does nothing and returns 0.
				611	*
				612	* The program type passed in via @type must be suitable for network
				613	* filtering. No further check is performed to assert that.
				614	*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	615	* For egress packets, this function can return:
				616	* NET_XMIT_SUCCESS (0) - continue with packet output
				617	* NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
				618	* NET_XMIT_CN (2) - continue with packet output and notify TCP
				619	* to call cwr
				620	* -EPERM - drop packet
				621	*
				622	* For ingress packets, this function will return -EPERM if any
				623	* attached program was found and if it returned != 1 during execution.
				624	* Otherwise 0 is returned.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	625	*/
				626	int __cgroup_bpf_run_filter_skb(struct sock *sk,
				627	struct sk_buff *skb,
				628	enum bpf_attach_type type)
				629	{
				630	unsigned int offset = skb->data - skb_network_header(skb);
				631	struct sock *save_sk;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	632	void *saved_data_end;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	633	struct cgroup *cgrp;
				634	int ret;
				635
				636	if (!sk \|\| !sk_fullsock(sk))
				637	return 0;
				638
				639	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
				640	return 0;
				641
				642	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
				643	save_sk = skb->sk;
				644	skb->sk = sk;
				645	__skb_push(skb, offset);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	646
				647	/* compute pointers for the bpf prog */
				648	bpf_compute_and_save_data_end(skb, &saved_data_end);
				649
				650	if (type == BPF_CGROUP_INET_EGRESS) {
				651	ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
				652	cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
				653	} else {
				654	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
				655	__bpf_prog_run_save_cb);
				656	ret = (ret == 1 ? 0 : -EPERM);
				657	}
				658	bpf_restore_data_end(skb, saved_data_end);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	659	__skb_pull(skb, offset);
				660	skb->sk = save_sk;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	661
				662	return ret;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	663	}
				664	EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
				665
				666	/**
				667	* __cgroup_bpf_run_filter_sk() - Run a program on a sock
				668	* @sk: sock structure to manipulate
				669	* @type: The type of program to be exectuted
				670	*
				671	* socket is passed is expected to be of type INET or INET6.
				672	*
				673	* The program type passed in via @type must be suitable for sock
				674	* filtering. No further check is performed to assert that.
				675	*
				676	* This function will return %-EPERM if any if an attached program was found
				677	* and if it returned != 1 during execution. In all other cases, 0 is returned.
				678	*/
				679	int __cgroup_bpf_run_filter_sk(struct sock *sk,
				680	enum bpf_attach_type type)
				681	{
				682	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
				683	int ret;
				684
				685	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
				686	return ret == 1 ? 0 : -EPERM;
				687	}
				688	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
				689
				690	/**
				691	* __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
				692	* provided by user sockaddr
				693	* @sk: sock struct that will use sockaddr
				694	* @uaddr: sockaddr struct provided by user
				695	* @type: The type of program to be exectuted
				696	* @t_ctx: Pointer to attach type specific context
				697	*
				698	* socket is expected to be of type INET or INET6.
				699	*
				700	* This function will return %-EPERM if an attached program is found and
				701	* returned value != 1 during execution. In all other cases, 0 is returned.
				702	*/
				703	int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
				704	struct sockaddr *uaddr,
				705	enum bpf_attach_type type,
				706	void *t_ctx)
				707	{
				708	struct bpf_sock_addr_kern ctx = {
				709	.sk = sk,
				710	.uaddr = uaddr,
				711	.t_ctx = t_ctx,
				712	};
				713	struct sockaddr_storage unspec;
				714	struct cgroup *cgrp;
				715	int ret;
				716
				717	/* Check socket family since not all sockets represent network
				718	* endpoint (e.g. AF_UNIX).
				719	*/
				720	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
				721	return 0;
				722
				723	if (!ctx.uaddr) {
				724	memset(&unspec, 0, sizeof(unspec));
				725	ctx.uaddr = (struct sockaddr *)&unspec;
				726	}
				727
				728	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
				729	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
				730
				731	return ret == 1 ? 0 : -EPERM;
				732	}
				733	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
				734
				735	/**
				736	* __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
				737	* @sk: socket to get cgroup from
				738	* @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
				739	* sk with connection information (IP addresses, etc.) May not contain
				740	* cgroup info if it is a req sock.
				741	* @type: The type of program to be exectuted
				742	*
				743	* socket passed is expected to be of type INET or INET6.
				744	*
				745	* The program type passed in via @type must be suitable for sock_ops
				746	* filtering. No further check is performed to assert that.
				747	*
				748	* This function will return %-EPERM if any if an attached program was found
				749	* and if it returned != 1 during execution. In all other cases, 0 is returned.
				750	*/
				751	int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
				752	struct bpf_sock_ops_kern *sock_ops,
				753	enum bpf_attach_type type)
				754	{
				755	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
				756	int ret;
				757
				758	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
				759	BPF_PROG_RUN);
				760	return ret == 1 ? 0 : -EPERM;
				761	}
				762	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
				763
				764	int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
				765	short access, enum bpf_attach_type type)
				766	{
				767	struct cgroup *cgrp;
				768	struct bpf_cgroup_dev_ctx ctx = {
				769	.access_type = (access << 16) \| dev_type,
				770	.major = major,
				771	.minor = minor,
				772	};
				773	int allow = 1;
				774
				775	rcu_read_lock();
				776	cgrp = task_dfl_cgroup(current);
				777	allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
				778	BPF_PROG_RUN);
				779	rcu_read_unlock();
				780
				781	return !allow;
				782	}
				783	EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
				784
				785	static const struct bpf_func_proto *
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	786	cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	787	{
				788	switch (func_id) {
				789	case BPF_FUNC_map_lookup_elem:
				790	return &bpf_map_lookup_elem_proto;
				791	case BPF_FUNC_map_update_elem:
				792	return &bpf_map_update_elem_proto;
				793	case BPF_FUNC_map_delete_elem:
				794	return &bpf_map_delete_elem_proto;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	795	case BPF_FUNC_map_push_elem:
				796	return &bpf_map_push_elem_proto;
				797	case BPF_FUNC_map_pop_elem:
				798	return &bpf_map_pop_elem_proto;
				799	case BPF_FUNC_map_peek_elem:
				800	return &bpf_map_peek_elem_proto;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	801	case BPF_FUNC_get_current_uid_gid:
				802	return &bpf_get_current_uid_gid_proto;
				803	case BPF_FUNC_get_local_storage:
				804	return &bpf_get_local_storage_proto;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	805	case BPF_FUNC_get_current_cgroup_id:
				806	return &bpf_get_current_cgroup_id_proto;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	807	case BPF_FUNC_trace_printk:
				808	if (capable(CAP_SYS_ADMIN))
				809	return bpf_get_trace_printk_proto();
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	810	/* fall through */
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	811	default:
				812	return NULL;
				813	}
				814	}
				815
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	816	static const struct bpf_func_proto *
				817	cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
				818	{
				819	return cgroup_base_func_proto(func_id, prog);
				820	}
				821
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	822	static bool cgroup_dev_is_valid_access(int off, int size,
				823	enum bpf_access_type type,
				824	const struct bpf_prog *prog,
				825	struct bpf_insn_access_aux *info)
				826	{
				827	const int size_default = sizeof(__u32);
				828
				829	if (type == BPF_WRITE)
				830	return false;
				831
				832	if (off < 0 \|\| off + size > sizeof(struct bpf_cgroup_dev_ctx))
				833	return false;
				834	/* The verifier guarantees that size > 0. */
				835	if (off % size != 0)
				836	return false;
				837
				838	switch (off) {
				839	case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
				840	bpf_ctx_record_field_size(info, size_default);
				841	if (!bpf_ctx_narrow_access_ok(off, size, size_default))
				842	return false;
				843	break;
				844	default:
				845	if (size != size_default)
				846	return false;
				847	}
				848
				849	return true;
				850	}
				851
				852	const struct bpf_prog_ops cg_dev_prog_ops = {
				853	};
				854
				855	const struct bpf_verifier_ops cg_dev_verifier_ops = {
				856	.get_func_proto = cgroup_dev_func_proto,
				857	.is_valid_access = cgroup_dev_is_valid_access,
				858	};
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	859
				860	/**
				861	* __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
				862	*
				863	* @head: sysctl table header
				864	* @table: sysctl table
				865	* @write: sysctl is being read (= 0) or written (= 1)
				866	* @buf: pointer to buffer passed by user space
				867	* @pcount: value-result argument: value is size of buffer pointed to by @buf,
				868	* result is size of @new_buf if program set new value, initial value
				869	* otherwise
				870	* @ppos: value-result argument: value is position at which read from or write
				871	* to sysctl is happening, result is new position if program overrode it,
				872	* initial value otherwise
				873	* @new_buf: pointer to pointer to new buffer that will be allocated if program
				874	* overrides new value provided by user space on sysctl write
				875	* NOTE: it's caller responsibility to free *new_buf if it was set
				876	* @type: type of program to be executed
				877	*
				878	* Program is run when sysctl is being accessed, either read or written, and
				879	* can allow or deny such access.
				880	*
				881	* This function will return %-EPERM if an attached program is found and
				882	* returned value != 1 during execution. In all other cases 0 is returned.
				883	*/
				884	int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
				885	struct ctl_table *table, int write,
				886	void __user buf, size_t pcount,
				887	loff_t ppos, void *new_buf,
				888	enum bpf_attach_type type)
				889	{
				890	struct bpf_sysctl_kern ctx = {
				891	.head = head,
				892	.table = table,
				893	.write = write,
				894	.ppos = ppos,
				895	.cur_val = NULL,
				896	.cur_len = PAGE_SIZE,
				897	.new_val = NULL,
				898	.new_len = 0,
				899	.new_updated = 0,
				900	};
				901	struct cgroup *cgrp;
				902	int ret;
				903
				904	ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
				905	if (ctx.cur_val) {
				906	mm_segment_t old_fs;
				907	loff_t pos = 0;
				908
				909	old_fs = get_fs();
				910	set_fs(KERNEL_DS);
				911	if (table->proc_handler(table, 0, (void __user *)ctx.cur_val,
				912	&ctx.cur_len, &pos)) {
				913	/* Let BPF program decide how to proceed. */
				914	ctx.cur_len = 0;
				915	}
				916	set_fs(old_fs);
				917	} else {
				918	/* Let BPF program decide how to proceed. */
				919	ctx.cur_len = 0;
				920	}
				921
				922	if (write && buf && *pcount) {
				923	/* BPF program should be able to override new value with a
				924	* buffer bigger than provided by user.
				925	*/
				926	ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
				927	ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
				928	if (!ctx.new_val \|\|
				929	copy_from_user(ctx.new_val, buf, ctx.new_len))
				930	/* Let BPF program decide how to proceed. */
				931	ctx.new_len = 0;
				932	}
				933
				934	rcu_read_lock();
				935	cgrp = task_dfl_cgroup(current);
				936	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
				937	rcu_read_unlock();
				938
				939	kfree(ctx.cur_val);
				940
				941	if (ret == 1 && ctx.new_updated) {
				942	*new_buf = ctx.new_val;
				943	*pcount = ctx.new_len;
				944	} else {
				945	kfree(ctx.new_val);
				946	}
				947
				948	return ret == 1 ? 0 : -EPERM;
				949	}
				950	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
				951
				952	#ifdef CONFIG_NET
				953	static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
				954	enum bpf_attach_type attach_type)
				955	{
				956	struct bpf_prog_array *prog_array;
				957	bool empty;
				958
				959	rcu_read_lock();
				960	prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
				961	empty = bpf_prog_array_is_empty(prog_array);
				962	rcu_read_unlock();
				963
				964	return empty;
				965	}
				966
				967	static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
				968	{
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	969	if (unlikely(max_optlen < 0))
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	970	return -EINVAL;
				971
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	972	if (unlikely(max_optlen > PAGE_SIZE)) {
				973	/* We don't expose optvals that are greater than PAGE_SIZE
				974	* to the BPF program.
				975	*/
				976	max_optlen = PAGE_SIZE;
				977	}
				978
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	979	ctx->optval = kzalloc(max_optlen, GFP_USER);
				980	if (!ctx->optval)
				981	return -ENOMEM;
				982
				983	ctx->optval_end = ctx->optval + max_optlen;
				984
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	985	return max_optlen;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	986	}
				987
				988	static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
				989	{
				990	kfree(ctx->optval);
				991	}
				992
				993	int __cgroup_bpf_run_filter_setsockopt(struct sock sk, int level,
				994	int optname, char __user optval,
				995	int optlen, char *kernel_optval)
				996	{
				997	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
				998	struct bpf_sockopt_kern ctx = {
				999	.sk = sk,
				1000	.level = *level,
				1001	.optname = *optname,
				1002	};
				1003	int ret, max_optlen;
				1004
				1005	/* Opportunistic check to see whether we have any BPF program
				1006	* attached to the hook so we don't waste time allocating
				1007	* memory and locking the socket.
				1008	*/
				1009	if (!cgroup_bpf_enabled \|\|
				1010	__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
				1011	return 0;
				1012
				1013	/* Allocate a bit more than the initial user buffer for
				1014	* BPF program. The canonical use case is overriding
				1015	* TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
				1016	*/
				1017	max_optlen = max_t(int, 16, *optlen);
				1018
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1019	max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
				1020	if (max_optlen < 0)
				1021	return max_optlen;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1022
				1023	ctx.optlen = *optlen;
				1024
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1025	if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1026	ret = -EFAULT;
				1027	goto out;
				1028	}
				1029
				1030	lock_sock(sk);
				1031	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
				1032	&ctx, BPF_PROG_RUN);
				1033	release_sock(sk);
				1034
				1035	if (!ret) {
				1036	ret = -EPERM;
				1037	goto out;
				1038	}
				1039
				1040	if (ctx.optlen == -1) {
				1041	/* optlen set to -1, bypass kernel */
				1042	ret = 1;
				1043	} else if (ctx.optlen > max_optlen \|\| ctx.optlen < -1) {
				1044	/* optlen is out of bounds */
				1045	ret = -EFAULT;
				1046	} else {
				1047	/* optlen within bounds, run kernel handler */
				1048	ret = 0;
				1049
				1050	/* export any potential modifications */
				1051	*level = ctx.level;
				1052	*optname = ctx.optname;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1053
				1054	/* optlen == 0 from BPF indicates that we should
				1055	* use original userspace data.
				1056	*/
				1057	if (ctx.optlen != 0) {
				1058	*optlen = ctx.optlen;
				1059	*kernel_optval = ctx.optval;
				1060	/* export and don't free sockopt buf */
				1061	return 0;
				1062	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1063	}
				1064
				1065	out:
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1066	sockopt_free_buf(&ctx);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1067	return ret;
				1068	}
				1069	EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt);
				1070
				1071	int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
				1072	int optname, char __user *optval,
				1073	int __user *optlen, int max_optlen,
				1074	int retval)
				1075	{
				1076	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
				1077	struct bpf_sockopt_kern ctx = {
				1078	.sk = sk,
				1079	.level = level,
				1080	.optname = optname,
				1081	.retval = retval,
				1082	};
				1083	int ret;
				1084
				1085	/* Opportunistic check to see whether we have any BPF program
				1086	* attached to the hook so we don't waste time allocating
				1087	* memory and locking the socket.
				1088	*/
				1089	if (!cgroup_bpf_enabled \|\|
				1090	__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
				1091	return retval;
				1092
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1093	ctx.optlen = max_optlen;
				1094
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1095	max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
				1096	if (max_optlen < 0)
				1097	return max_optlen;
				1098
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1099	if (!retval) {
				1100	/* If kernel getsockopt finished successfully,
				1101	* copy whatever was returned to the user back
				1102	* into our temporary buffer. Set optlen to the
				1103	* one that kernel returned as well to let
				1104	* BPF programs inspect the value.
				1105	*/
				1106
				1107	if (get_user(ctx.optlen, optlen)) {
				1108	ret = -EFAULT;
				1109	goto out;
				1110	}
				1111
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1112	if (ctx.optlen < 0) {
				1113	ret = -EFAULT;
				1114	goto out;
				1115	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1116
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1117	if (copy_from_user(ctx.optval, optval,
				1118	min(ctx.optlen, max_optlen)) != 0) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1119	ret = -EFAULT;
				1120	goto out;
				1121	}
				1122	}
				1123
				1124	lock_sock(sk);
				1125	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
				1126	&ctx, BPF_PROG_RUN);
				1127	release_sock(sk);
				1128
				1129	if (!ret) {
				1130	ret = -EPERM;
				1131	goto out;
				1132	}
				1133
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1134	if (ctx.optlen > max_optlen \|\| ctx.optlen < 0) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1135	ret = -EFAULT;
				1136	goto out;
				1137	}
				1138
				1139	/* BPF programs only allowed to set retval to 0, not some
				1140	* arbitrary value.
				1141	*/
				1142	if (ctx.retval != 0 && ctx.retval != retval) {
				1143	ret = -EFAULT;
				1144	goto out;
				1145	}
				1146
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1147	if (ctx.optlen != 0) {
				1148	if (copy_to_user(optval, ctx.optval, ctx.optlen) \|\|
				1149	put_user(ctx.optlen, optlen)) {
				1150	ret = -EFAULT;
				1151	goto out;
				1152	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1153	}
				1154
				1155	ret = ctx.retval;
				1156
				1157	out:
				1158	sockopt_free_buf(&ctx);
				1159	return ret;
				1160	}
				1161	EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt);
				1162	#endif
				1163
				1164	static ssize_t sysctl_cpy_dir(const struct ctl_dir dir, char *bufp,
				1165	size_t *lenp)
				1166	{
				1167	ssize_t tmp_ret = 0, ret;
				1168
				1169	if (dir->header.parent) {
				1170	tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
				1171	if (tmp_ret < 0)
				1172	return tmp_ret;
				1173	}
				1174
				1175	ret = strscpy(bufp, dir->header.ctl_table[0].procname, lenp);
				1176	if (ret < 0)
				1177	return ret;
				1178	*bufp += ret;
				1179	*lenp -= ret;
				1180	ret += tmp_ret;
				1181
				1182	/* Avoid leading slash. */
				1183	if (!ret)
				1184	return ret;
				1185
				1186	tmp_ret = strscpy(bufp, "/", lenp);
				1187	if (tmp_ret < 0)
				1188	return tmp_ret;
				1189	*bufp += tmp_ret;
				1190	*lenp -= tmp_ret;
				1191
				1192	return ret + tmp_ret;
				1193	}
				1194
				1195	BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern , ctx, char , buf,
				1196	size_t, buf_len, u64, flags)
				1197	{
				1198	ssize_t tmp_ret = 0, ret;
				1199
				1200	if (!buf)
				1201	return -EINVAL;
				1202
				1203	if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
				1204	if (!ctx->head)
				1205	return -EINVAL;
				1206	tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
				1207	if (tmp_ret < 0)
				1208	return tmp_ret;
				1209	}
				1210
				1211	ret = strscpy(buf, ctx->table->procname, buf_len);
				1212
				1213	return ret < 0 ? ret : tmp_ret + ret;
				1214	}
				1215
				1216	static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
				1217	.func = bpf_sysctl_get_name,
				1218	.gpl_only = false,
				1219	.ret_type = RET_INTEGER,
				1220	.arg1_type = ARG_PTR_TO_CTX,
				1221	.arg2_type = ARG_PTR_TO_MEM,
				1222	.arg3_type = ARG_CONST_SIZE,
				1223	.arg4_type = ARG_ANYTHING,
				1224	};
				1225
				1226	static int copy_sysctl_value(char dst, size_t dst_len, char src,
				1227	size_t src_len)
				1228	{
				1229	if (!dst)
				1230	return -EINVAL;
				1231
				1232	if (!dst_len)
				1233	return -E2BIG;
				1234
				1235	if (!src \|\| !src_len) {
				1236	memset(dst, 0, dst_len);
				1237	return -EINVAL;
				1238	}
				1239
				1240	memcpy(dst, src, min(dst_len, src_len));
				1241
				1242	if (dst_len > src_len) {
				1243	memset(dst + src_len, '\0', dst_len - src_len);
				1244	return src_len;
				1245	}
				1246
				1247	dst[dst_len - 1] = '\0';
				1248
				1249	return -E2BIG;
				1250	}
				1251
				1252	BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
				1253	char *, buf, size_t, buf_len)
				1254	{
				1255	return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
				1256	}
				1257
				1258	static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
				1259	.func = bpf_sysctl_get_current_value,
				1260	.gpl_only = false,
				1261	.ret_type = RET_INTEGER,
				1262	.arg1_type = ARG_PTR_TO_CTX,
				1263	.arg2_type = ARG_PTR_TO_UNINIT_MEM,
				1264	.arg3_type = ARG_CONST_SIZE,
				1265	};
				1266
				1267	BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern , ctx, char , buf,
				1268	size_t, buf_len)
				1269	{
				1270	if (!ctx->write) {
				1271	if (buf && buf_len)
				1272	memset(buf, '\0', buf_len);
				1273	return -EINVAL;
				1274	}
				1275	return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
				1276	}
				1277
				1278	static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
				1279	.func = bpf_sysctl_get_new_value,
				1280	.gpl_only = false,
				1281	.ret_type = RET_INTEGER,
				1282	.arg1_type = ARG_PTR_TO_CTX,
				1283	.arg2_type = ARG_PTR_TO_UNINIT_MEM,
				1284	.arg3_type = ARG_CONST_SIZE,
				1285	};
				1286
				1287	BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
				1288	const char *, buf, size_t, buf_len)
				1289	{
				1290	if (!ctx->write \|\| !ctx->new_val \|\| !ctx->new_len \|\| !buf \|\| !buf_len)
				1291	return -EINVAL;
				1292
				1293	if (buf_len > PAGE_SIZE - 1)
				1294	return -E2BIG;
				1295
				1296	memcpy(ctx->new_val, buf, buf_len);
				1297	ctx->new_len = buf_len;
				1298	ctx->new_updated = 1;
				1299
				1300	return 0;
				1301	}
				1302
				1303	static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
				1304	.func = bpf_sysctl_set_new_value,
				1305	.gpl_only = false,
				1306	.ret_type = RET_INTEGER,
				1307	.arg1_type = ARG_PTR_TO_CTX,
				1308	.arg2_type = ARG_PTR_TO_MEM,
				1309	.arg3_type = ARG_CONST_SIZE,
				1310	};
				1311
				1312	static const struct bpf_func_proto *
				1313	sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
				1314	{
				1315	switch (func_id) {
				1316	case BPF_FUNC_strtol:
				1317	return &bpf_strtol_proto;
				1318	case BPF_FUNC_strtoul:
				1319	return &bpf_strtoul_proto;
				1320	case BPF_FUNC_sysctl_get_name:
				1321	return &bpf_sysctl_get_name_proto;
				1322	case BPF_FUNC_sysctl_get_current_value:
				1323	return &bpf_sysctl_get_current_value_proto;
				1324	case BPF_FUNC_sysctl_get_new_value:
				1325	return &bpf_sysctl_get_new_value_proto;
				1326	case BPF_FUNC_sysctl_set_new_value:
				1327	return &bpf_sysctl_set_new_value_proto;
				1328	default:
				1329	return cgroup_base_func_proto(func_id, prog);
				1330	}
				1331	}
				1332
				1333	static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
				1334	const struct bpf_prog *prog,
				1335	struct bpf_insn_access_aux *info)
				1336	{
				1337	const int size_default = sizeof(__u32);
				1338
				1339	if (off < 0 \|\| off + size > sizeof(struct bpf_sysctl) \|\| off % size)
				1340	return false;
				1341
				1342	switch (off) {
				1343	case bpf_ctx_range(struct bpf_sysctl, write):
				1344	if (type != BPF_READ)
				1345	return false;
				1346	bpf_ctx_record_field_size(info, size_default);
				1347	return bpf_ctx_narrow_access_ok(off, size, size_default);
				1348	case bpf_ctx_range(struct bpf_sysctl, file_pos):
				1349	if (type == BPF_READ) {
				1350	bpf_ctx_record_field_size(info, size_default);
				1351	return bpf_ctx_narrow_access_ok(off, size, size_default);
				1352	} else {
				1353	return size == size_default;
				1354	}
				1355	default:
				1356	return false;
				1357	}
				1358	}
				1359
				1360	static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
				1361	const struct bpf_insn *si,
				1362	struct bpf_insn *insn_buf,
				1363	struct bpf_prog prog, u32 target_size)
				1364	{
				1365	struct bpf_insn *insn = insn_buf;
				1366	u32 read_size;
				1367
				1368	switch (si->off) {
				1369	case offsetof(struct bpf_sysctl, write):
				1370	*insn++ = BPF_LDX_MEM(
				1371	BPF_SIZE(si->code), si->dst_reg, si->src_reg,
				1372	bpf_target_off(struct bpf_sysctl_kern, write,
				1373	FIELD_SIZEOF(struct bpf_sysctl_kern,
				1374	write),
				1375	target_size));
				1376	break;
				1377	case offsetof(struct bpf_sysctl, file_pos):
				1378	/* ppos is a pointer so it should be accessed via indirect
				1379	* loads and stores. Also for stores additional temporary
				1380	* register is used since neither src_reg nor dst_reg can be
				1381	* overridden.
				1382	*/
				1383	if (type == BPF_WRITE) {
				1384	int treg = BPF_REG_9;
				1385
				1386	if (si->src_reg == treg \|\| si->dst_reg == treg)
				1387	--treg;
				1388	if (si->src_reg == treg \|\| si->dst_reg == treg)
				1389	--treg;
				1390	*insn++ = BPF_STX_MEM(
				1391	BPF_DW, si->dst_reg, treg,
				1392	offsetof(struct bpf_sysctl_kern, tmp_reg));
				1393	*insn++ = BPF_LDX_MEM(
				1394	BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
				1395	treg, si->dst_reg,
				1396	offsetof(struct bpf_sysctl_kern, ppos));
				1397	*insn++ = BPF_STX_MEM(
				1398	BPF_SIZEOF(u32), treg, si->src_reg,
				1399	bpf_ctx_narrow_access_offset(
				1400	0, sizeof(u32), sizeof(loff_t)));
				1401	*insn++ = BPF_LDX_MEM(
				1402	BPF_DW, treg, si->dst_reg,
				1403	offsetof(struct bpf_sysctl_kern, tmp_reg));
				1404	} else {
				1405	*insn++ = BPF_LDX_MEM(
				1406	BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
				1407	si->dst_reg, si->src_reg,
				1408	offsetof(struct bpf_sysctl_kern, ppos));
				1409	read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
				1410	*insn++ = BPF_LDX_MEM(
				1411	BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
				1412	bpf_ctx_narrow_access_offset(
				1413	0, read_size, sizeof(loff_t)));
				1414	}
				1415	*target_size = sizeof(u32);
				1416	break;
				1417	}
				1418
				1419	return insn - insn_buf;
				1420	}
				1421
				1422	const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
				1423	.get_func_proto = sysctl_func_proto,
				1424	.is_valid_access = sysctl_is_valid_access,
				1425	.convert_ctx_access = sysctl_convert_ctx_access,
				1426	};
				1427
				1428	const struct bpf_prog_ops cg_sysctl_prog_ops = {
				1429	};
				1430
				1431	static const struct bpf_func_proto *
				1432	cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
				1433	{
				1434	switch (func_id) {
				1435	#ifdef CONFIG_NET
				1436	case BPF_FUNC_sk_storage_get:
				1437	return &bpf_sk_storage_get_proto;
				1438	case BPF_FUNC_sk_storage_delete:
				1439	return &bpf_sk_storage_delete_proto;
				1440	#endif
				1441	#ifdef CONFIG_INET
				1442	case BPF_FUNC_tcp_sock:
				1443	return &bpf_tcp_sock_proto;
				1444	#endif
				1445	default:
				1446	return cgroup_base_func_proto(func_id, prog);
				1447	}
				1448	}
				1449
				1450	static bool cg_sockopt_is_valid_access(int off, int size,
				1451	enum bpf_access_type type,
				1452	const struct bpf_prog *prog,
				1453	struct bpf_insn_access_aux *info)
				1454	{
				1455	const int size_default = sizeof(__u32);
				1456
				1457	if (off < 0 \|\| off >= sizeof(struct bpf_sockopt))
				1458	return false;
				1459
				1460	if (off % size != 0)
				1461	return false;
				1462
				1463	if (type == BPF_WRITE) {
				1464	switch (off) {
				1465	case offsetof(struct bpf_sockopt, retval):
				1466	if (size != size_default)
				1467	return false;
				1468	return prog->expected_attach_type ==
				1469	BPF_CGROUP_GETSOCKOPT;
				1470	case offsetof(struct bpf_sockopt, optname):
				1471	/* fallthrough */
				1472	case offsetof(struct bpf_sockopt, level):
				1473	if (size != size_default)
				1474	return false;
				1475	return prog->expected_attach_type ==
				1476	BPF_CGROUP_SETSOCKOPT;
				1477	case offsetof(struct bpf_sockopt, optlen):
				1478	return size == size_default;
				1479	default:
				1480	return false;
				1481	}
				1482	}
				1483
				1484	switch (off) {
				1485	case offsetof(struct bpf_sockopt, sk):
				1486	if (size != sizeof(__u64))
				1487	return false;
				1488	info->reg_type = PTR_TO_SOCKET;
				1489	break;
				1490	case offsetof(struct bpf_sockopt, optval):
				1491	if (size != sizeof(__u64))
				1492	return false;
				1493	info->reg_type = PTR_TO_PACKET;
				1494	break;
				1495	case offsetof(struct bpf_sockopt, optval_end):
				1496	if (size != sizeof(__u64))
				1497	return false;
				1498	info->reg_type = PTR_TO_PACKET_END;
				1499	break;
				1500	case offsetof(struct bpf_sockopt, retval):
				1501	if (size != size_default)
				1502	return false;
				1503	return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;
				1504	default:
				1505	if (size != size_default)
				1506	return false;
				1507	break;
				1508	}
				1509	return true;
				1510	}
				1511
				1512	#define CG_SOCKOPT_ACCESS_FIELD(T, F) \
				1513	T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
				1514	si->dst_reg, si->src_reg, \
				1515	offsetof(struct bpf_sockopt_kern, F))
				1516
				1517	static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
				1518	const struct bpf_insn *si,
				1519	struct bpf_insn *insn_buf,
				1520	struct bpf_prog *prog,
				1521	u32 *target_size)
				1522	{
				1523	struct bpf_insn *insn = insn_buf;
				1524
				1525	switch (si->off) {
				1526	case offsetof(struct bpf_sockopt, sk):
				1527	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk);
				1528	break;
				1529	case offsetof(struct bpf_sockopt, level):
				1530	if (type == BPF_WRITE)
				1531	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level);
				1532	else
				1533	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level);
				1534	break;
				1535	case offsetof(struct bpf_sockopt, optname):
				1536	if (type == BPF_WRITE)
				1537	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname);
				1538	else
				1539	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname);
				1540	break;
				1541	case offsetof(struct bpf_sockopt, optlen):
				1542	if (type == BPF_WRITE)
				1543	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen);
				1544	else
				1545	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
				1546	break;
				1547	case offsetof(struct bpf_sockopt, retval):
				1548	if (type == BPF_WRITE)
				1549	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
				1550	else
				1551	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
				1552	break;
				1553	case offsetof(struct bpf_sockopt, optval):
				1554	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
				1555	break;
				1556	case offsetof(struct bpf_sockopt, optval_end):
				1557	*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end);
				1558	break;
				1559	}
				1560
				1561	return insn - insn_buf;
				1562	}
				1563
				1564	static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
				1565	bool direct_write,
				1566	const struct bpf_prog *prog)
				1567	{
				1568	/* Nothing to do for sockopt argument. The data is kzalloc'ated.
				1569	*/
				1570	return 0;
				1571	}
				1572
				1573	const struct bpf_verifier_ops cg_sockopt_verifier_ops = {
				1574	.get_func_proto = cg_sockopt_func_proto,
				1575	.is_valid_access = cg_sockopt_is_valid_access,
				1576	.convert_ctx_access = cg_sockopt_convert_ctx_access,
				1577	.gen_prologue = cg_sockopt_get_prologue,
				1578	};
				1579
				1580	const struct bpf_prog_ops cg_sockopt_prog_ops = {
				1581	};