Blame - kernel/bpf/syscall.c - hafnium/third_party/linux

blob: 382c09dddf93b48bd4cb7a83982a367a823b3866 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame^]	1	/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
				2	*
				3	* This program is free software; you can redistribute it and/or
				4	* modify it under the terms of version 2 of the GNU General Public
				5	* License as published by the Free Software Foundation.
				6	*
				7	* This program is distributed in the hope that it will be useful, but
				8	* WITHOUT ANY WARRANTY; without even the implied warranty of
				9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				10	* General Public License for more details.
				11	*/
				12	#include <linux/bpf.h>
				13	#include <linux/bpf_trace.h>
				14	#include <linux/bpf_lirc.h>
				15	#include <linux/btf.h>
				16	#include <linux/syscalls.h>
				17	#include <linux/slab.h>
				18	#include <linux/sched/signal.h>
				19	#include <linux/vmalloc.h>
				20	#include <linux/mmzone.h>
				21	#include <linux/anon_inodes.h>
				22	#include <linux/fdtable.h>
				23	#include <linux/file.h>
				24	#include <linux/fs.h>
				25	#include <linux/license.h>
				26	#include <linux/filter.h>
				27	#include <linux/version.h>
				28	#include <linux/kernel.h>
				29	#include <linux/idr.h>
				30	#include <linux/cred.h>
				31	#include <linux/timekeeping.h>
				32	#include <linux/ctype.h>
				33	#include <linux/btf.h>
				34	#include <linux/nospec.h>
				35
				36	#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY \|\| \
				37	(map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY \|\| \
				38	(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY \|\| \
				39	(map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
				40	#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
				41	#define IS_FD_MAP(map) (IS_FD_ARRAY(map) \|\| IS_FD_HASH(map))
				42
				43	#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY \| BPF_F_WRONLY)
				44
				45	DEFINE_PER_CPU(int, bpf_prog_active);
				46	static DEFINE_IDR(prog_idr);
				47	static DEFINE_SPINLOCK(prog_idr_lock);
				48	static DEFINE_IDR(map_idr);
				49	static DEFINE_SPINLOCK(map_idr_lock);
				50
				51	int sysctl_unprivileged_bpf_disabled __read_mostly;
				52
				53	static const struct bpf_map_ops * const bpf_map_types[] = {
				54	#define BPF_PROG_TYPE(_id, _ops)
				55	#define BPF_MAP_TYPE(_id, _ops) \
				56	[_id] = &_ops,
				57	#include <linux/bpf_types.h>
				58	#undef BPF_PROG_TYPE
				59	#undef BPF_MAP_TYPE
				60	};
				61
				62	/*
				63	* If we're handed a bigger struct than we know of, ensure all the unknown bits
				64	* are 0 - i.e. new user-space does not rely on any kernel feature extensions
				65	* we don't know about yet.
				66	*
				67	* There is a ToCToU between this function call and the following
				68	* copy_from_user() call. However, this is not a concern since this function is
				69	* meant to be a future-proofing of bits.
				70	*/
				71	int bpf_check_uarg_tail_zero(void __user *uaddr,
				72	size_t expected_size,
				73	size_t actual_size)
				74	{
				75	unsigned char __user *addr;
				76	unsigned char __user *end;
				77	unsigned char val;
				78	int err;
				79
				80	if (unlikely(actual_size > PAGE_SIZE)) /* silly large */
				81	return -E2BIG;
				82
				83	if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size)))
				84	return -EFAULT;
				85
				86	if (actual_size <= expected_size)
				87	return 0;
				88
				89	addr = uaddr + expected_size;
				90	end = uaddr + actual_size;
				91
				92	for (; addr < end; addr++) {
				93	err = get_user(val, addr);
				94	if (err)
				95	return err;
				96	if (val)
				97	return -E2BIG;
				98	}
				99
				100	return 0;
				101	}
				102
				103	const struct bpf_map_ops bpf_map_offload_ops = {
				104	.map_alloc = bpf_map_offload_map_alloc,
				105	.map_free = bpf_map_offload_map_free,
				106	.map_check_btf = map_check_no_btf,
				107	};
				108
				109	static struct bpf_map find_and_alloc_map(union bpf_attr attr)
				110	{
				111	const struct bpf_map_ops *ops;
				112	u32 type = attr->map_type;
				113	struct bpf_map *map;
				114	int err;
				115
				116	if (type >= ARRAY_SIZE(bpf_map_types))
				117	return ERR_PTR(-EINVAL);
				118	type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
				119	ops = bpf_map_types[type];
				120	if (!ops)
				121	return ERR_PTR(-EINVAL);
				122
				123	if (ops->map_alloc_check) {
				124	err = ops->map_alloc_check(attr);
				125	if (err)
				126	return ERR_PTR(err);
				127	}
				128	if (attr->map_ifindex)
				129	ops = &bpf_map_offload_ops;
				130	map = ops->map_alloc(attr);
				131	if (IS_ERR(map))
				132	return map;
				133	map->ops = ops;
				134	map->map_type = type;
				135	return map;
				136	}
				137
				138	void *bpf_map_area_alloc(size_t size, int numa_node)
				139	{
				140	/* We definitely need __GFP_NORETRY, so OOM killer doesn't
				141	* trigger under memory pressure as we really just want to
				142	* fail instead.
				143	*/
				144	const gfp_t flags = __GFP_NOWARN \| __GFP_NORETRY \| __GFP_ZERO;
				145	void *area;
				146
				147	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
				148	area = kmalloc_node(size, GFP_USER \| flags, numa_node);
				149	if (area != NULL)
				150	return area;
				151	}
				152
				153	return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL \| flags,
				154	__builtin_return_address(0));
				155	}
				156
				157	void bpf_map_area_free(void *area)
				158	{
				159	kvfree(area);
				160	}
				161
				162	void bpf_map_init_from_attr(struct bpf_map map, union bpf_attr attr)
				163	{
				164	map->map_type = attr->map_type;
				165	map->key_size = attr->key_size;
				166	map->value_size = attr->value_size;
				167	map->max_entries = attr->max_entries;
				168	map->map_flags = attr->map_flags;
				169	map->numa_node = bpf_map_attr_numa_node(attr);
				170	}
				171
				172	int bpf_map_precharge_memlock(u32 pages)
				173	{
				174	struct user_struct *user = get_current_user();
				175	unsigned long memlock_limit, cur;
				176
				177	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
				178	cur = atomic_long_read(&user->locked_vm);
				179	free_uid(user);
				180	if (cur + pages > memlock_limit)
				181	return -EPERM;
				182	return 0;
				183	}
				184
				185	static int bpf_charge_memlock(struct user_struct *user, u32 pages)
				186	{
				187	unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
				188
				189	if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
				190	atomic_long_sub(pages, &user->locked_vm);
				191	return -EPERM;
				192	}
				193	return 0;
				194	}
				195
				196	static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
				197	{
				198	atomic_long_sub(pages, &user->locked_vm);
				199	}
				200
				201	static int bpf_map_init_memlock(struct bpf_map *map)
				202	{
				203	struct user_struct *user = get_current_user();
				204	int ret;
				205
				206	ret = bpf_charge_memlock(user, map->pages);
				207	if (ret) {
				208	free_uid(user);
				209	return ret;
				210	}
				211	map->user = user;
				212	return ret;
				213	}
				214
				215	static void bpf_map_release_memlock(struct bpf_map *map)
				216	{
				217	struct user_struct *user = map->user;
				218	bpf_uncharge_memlock(user, map->pages);
				219	free_uid(user);
				220	}
				221
				222	int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
				223	{
				224	int ret;
				225
				226	ret = bpf_charge_memlock(map->user, pages);
				227	if (ret)
				228	return ret;
				229	map->pages += pages;
				230	return ret;
				231	}
				232
				233	void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
				234	{
				235	bpf_uncharge_memlock(map->user, pages);
				236	map->pages -= pages;
				237	}
				238
				239	static int bpf_map_alloc_id(struct bpf_map *map)
				240	{
				241	int id;
				242
				243	idr_preload(GFP_KERNEL);
				244	spin_lock_bh(&map_idr_lock);
				245	id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
				246	if (id > 0)
				247	map->id = id;
				248	spin_unlock_bh(&map_idr_lock);
				249	idr_preload_end();
				250
				251	if (WARN_ON_ONCE(!id))
				252	return -ENOSPC;
				253
				254	return id > 0 ? 0 : id;
				255	}
				256
				257	void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
				258	{
				259	unsigned long flags;
				260
				261	/* Offloaded maps are removed from the IDR store when their device
				262	* disappears - even if someone holds an fd to them they are unusable,
				263	* the memory is gone, all ops will fail; they are simply waiting for
				264	* refcnt to drop to be freed.
				265	*/
				266	if (!map->id)
				267	return;
				268
				269	if (do_idr_lock)
				270	spin_lock_irqsave(&map_idr_lock, flags);
				271	else
				272	__acquire(&map_idr_lock);
				273
				274	idr_remove(&map_idr, map->id);
				275	map->id = 0;
				276
				277	if (do_idr_lock)
				278	spin_unlock_irqrestore(&map_idr_lock, flags);
				279	else
				280	__release(&map_idr_lock);
				281	}
				282
				283	/* called from workqueue */
				284	static void bpf_map_free_deferred(struct work_struct *work)
				285	{
				286	struct bpf_map *map = container_of(work, struct bpf_map, work);
				287
				288	bpf_map_release_memlock(map);
				289	security_bpf_map_free(map);
				290	/* implementation dependent freeing */
				291	map->ops->map_free(map);
				292	}
				293
				294	static void bpf_map_put_uref(struct bpf_map *map)
				295	{
				296	if (atomic_dec_and_test(&map->usercnt)) {
				297	if (map->ops->map_release_uref)
				298	map->ops->map_release_uref(map);
				299	}
				300	}
				301
				302	/* decrement map refcnt and schedule it for freeing via workqueue
				303	* (unrelying map implementation ops->map_free() might sleep)
				304	*/
				305	static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
				306	{
				307	if (atomic_dec_and_test(&map->refcnt)) {
				308	/* bpf_map_free_id() must be called first */
				309	bpf_map_free_id(map, do_idr_lock);
				310	btf_put(map->btf);
				311	INIT_WORK(&map->work, bpf_map_free_deferred);
				312	schedule_work(&map->work);
				313	}
				314	}
				315
				316	void bpf_map_put(struct bpf_map *map)
				317	{
				318	__bpf_map_put(map, true);
				319	}
				320	EXPORT_SYMBOL_GPL(bpf_map_put);
				321
				322	void bpf_map_put_with_uref(struct bpf_map *map)
				323	{
				324	bpf_map_put_uref(map);
				325	bpf_map_put(map);
				326	}
				327
				328	static int bpf_map_release(struct inode inode, struct file filp)
				329	{
				330	struct bpf_map *map = filp->private_data;
				331
				332	if (map->ops->map_release)
				333	map->ops->map_release(map, filp);
				334
				335	bpf_map_put_with_uref(map);
				336	return 0;
				337	}
				338
				339	#ifdef CONFIG_PROC_FS
				340	static void bpf_map_show_fdinfo(struct seq_file m, struct file filp)
				341	{
				342	const struct bpf_map *map = filp->private_data;
				343	const struct bpf_array *array;
				344	u32 owner_prog_type = 0;
				345	u32 owner_jited = 0;
				346
				347	if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
				348	array = container_of(map, struct bpf_array, map);
				349	owner_prog_type = array->owner_prog_type;
				350	owner_jited = array->owner_jited;
				351	}
				352
				353	seq_printf(m,
				354	"map_type:\t%u\n"
				355	"key_size:\t%u\n"
				356	"value_size:\t%u\n"
				357	"max_entries:\t%u\n"
				358	"map_flags:\t%#x\n"
				359	"memlock:\t%llu\n"
				360	"map_id:\t%u\n",
				361	map->map_type,
				362	map->key_size,
				363	map->value_size,
				364	map->max_entries,
				365	map->map_flags,
				366	map->pages * 1ULL << PAGE_SHIFT,
				367	map->id);
				368
				369	if (owner_prog_type) {
				370	seq_printf(m, "owner_prog_type:\t%u\n",
				371	owner_prog_type);
				372	seq_printf(m, "owner_jited:\t%u\n",
				373	owner_jited);
				374	}
				375	}
				376	#endif
				377
				378	static ssize_t bpf_dummy_read(struct file filp, char __user buf, size_t siz,
				379	loff_t *ppos)
				380	{
				381	/* We need this handler such that alloc_file() enables
				382	* f_mode with FMODE_CAN_READ.
				383	*/
				384	return -EINVAL;
				385	}
				386
				387	static ssize_t bpf_dummy_write(struct file filp, const char __user buf,
				388	size_t siz, loff_t *ppos)
				389	{
				390	/* We need this handler such that alloc_file() enables
				391	* f_mode with FMODE_CAN_WRITE.
				392	*/
				393	return -EINVAL;
				394	}
				395
				396	const struct file_operations bpf_map_fops = {
				397	#ifdef CONFIG_PROC_FS
				398	.show_fdinfo = bpf_map_show_fdinfo,
				399	#endif
				400	.release = bpf_map_release,
				401	.read = bpf_dummy_read,
				402	.write = bpf_dummy_write,
				403	};
				404
				405	int bpf_map_new_fd(struct bpf_map *map, int flags)
				406	{
				407	int ret;
				408
				409	ret = security_bpf_map(map, OPEN_FMODE(flags));
				410	if (ret < 0)
				411	return ret;
				412
				413	return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
				414	flags \| O_CLOEXEC);
				415	}
				416
				417	int bpf_get_file_flag(int flags)
				418	{
				419	if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
				420	return -EINVAL;
				421	if (flags & BPF_F_RDONLY)
				422	return O_RDONLY;
				423	if (flags & BPF_F_WRONLY)
				424	return O_WRONLY;
				425	return O_RDWR;
				426	}
				427
				428	/* helper macro to check that unused fields 'union bpf_attr' are zero */
				429	#define CHECK_ATTR(CMD) \
				430	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
				431	sizeof(attr->CMD##_LAST_FIELD), 0, \
				432	sizeof(*attr) - \
				433	offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
				434	sizeof(attr->CMD##_LAST_FIELD)) != NULL
				435
				436	/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes.
				437	* Return 0 on success and < 0 on error.
				438	*/
				439	static int bpf_obj_name_cpy(char dst, const char src)
				440	{
				441	const char *end = src + BPF_OBJ_NAME_LEN;
				442
				443	memset(dst, 0, BPF_OBJ_NAME_LEN);
				444
				445	/* Copy all isalnum() and '_' char */
				446	while (src < end && *src) {
				447	if (!isalnum(src) && src != '_')
				448	return -EINVAL;
				449	dst++ = src++;
				450	}
				451
				452	/* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */
				453	if (src == end)
				454	return -EINVAL;
				455
				456	return 0;
				457	}
				458
				459	int map_check_no_btf(const struct bpf_map *map,
				460	const struct btf_type *key_type,
				461	const struct btf_type *value_type)
				462	{
				463	return -ENOTSUPP;
				464	}
				465
				466	static int map_check_btf(const struct bpf_map map, const struct btf btf,
				467	u32 btf_key_id, u32 btf_value_id)
				468	{
				469	const struct btf_type key_type, value_type;
				470	u32 key_size, value_size;
				471	int ret = 0;
				472
				473	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
				474	if (!key_type \|\| key_size != map->key_size)
				475	return -EINVAL;
				476
				477	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
				478	if (!value_type \|\| value_size != map->value_size)
				479	return -EINVAL;
				480
				481	if (map->ops->map_check_btf)
				482	ret = map->ops->map_check_btf(map, key_type, value_type);
				483
				484	return ret;
				485	}
				486
				487	#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
				488	/* called via syscall */
				489	static int map_create(union bpf_attr *attr)
				490	{
				491	int numa_node = bpf_map_attr_numa_node(attr);
				492	struct bpf_map *map;
				493	int f_flags;
				494	int err;
				495
				496	err = CHECK_ATTR(BPF_MAP_CREATE);
				497	if (err)
				498	return -EINVAL;
				499
				500	f_flags = bpf_get_file_flag(attr->map_flags);
				501	if (f_flags < 0)
				502	return f_flags;
				503
				504	if (numa_node != NUMA_NO_NODE &&
				505	((unsigned int)numa_node >= nr_node_ids \|\|
				506	!node_online(numa_node)))
				507	return -EINVAL;
				508
				509	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
				510	map = find_and_alloc_map(attr);
				511	if (IS_ERR(map))
				512	return PTR_ERR(map);
				513
				514	err = bpf_obj_name_cpy(map->name, attr->map_name);
				515	if (err)
				516	goto free_map_nouncharge;
				517
				518	atomic_set(&map->refcnt, 1);
				519	atomic_set(&map->usercnt, 1);
				520
				521	if (attr->btf_key_type_id \|\| attr->btf_value_type_id) {
				522	struct btf *btf;
				523
				524	if (!attr->btf_key_type_id \|\| !attr->btf_value_type_id) {
				525	err = -EINVAL;
				526	goto free_map_nouncharge;
				527	}
				528
				529	btf = btf_get_by_fd(attr->btf_fd);
				530	if (IS_ERR(btf)) {
				531	err = PTR_ERR(btf);
				532	goto free_map_nouncharge;
				533	}
				534
				535	err = map_check_btf(map, btf, attr->btf_key_type_id,
				536	attr->btf_value_type_id);
				537	if (err) {
				538	btf_put(btf);
				539	goto free_map_nouncharge;
				540	}
				541
				542	map->btf = btf;
				543	map->btf_key_type_id = attr->btf_key_type_id;
				544	map->btf_value_type_id = attr->btf_value_type_id;
				545	}
				546
				547	err = security_bpf_map_alloc(map);
				548	if (err)
				549	goto free_map_nouncharge;
				550
				551	err = bpf_map_init_memlock(map);
				552	if (err)
				553	goto free_map_sec;
				554
				555	err = bpf_map_alloc_id(map);
				556	if (err)
				557	goto free_map;
				558
				559	err = bpf_map_new_fd(map, f_flags);
				560	if (err < 0) {
				561	/* failed to allocate fd.
				562	* bpf_map_put() is needed because the above
				563	* bpf_map_alloc_id() has published the map
				564	* to the userspace and the userspace may
				565	* have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
				566	*/
				567	bpf_map_put(map);
				568	return err;
				569	}
				570
				571	return err;
				572
				573	free_map:
				574	bpf_map_release_memlock(map);
				575	free_map_sec:
				576	security_bpf_map_free(map);
				577	free_map_nouncharge:
				578	btf_put(map->btf);
				579	map->ops->map_free(map);
				580	return err;
				581	}
				582
				583	/* if error is returned, fd is released.
				584	* On success caller should complete fd access with matching fdput()
				585	*/
				586	struct bpf_map *__bpf_map_get(struct fd f)
				587	{
				588	if (!f.file)
				589	return ERR_PTR(-EBADF);
				590	if (f.file->f_op != &bpf_map_fops) {
				591	fdput(f);
				592	return ERR_PTR(-EINVAL);
				593	}
				594
				595	return f.file->private_data;
				596	}
				597
				598	/* prog's and map's refcnt limit */
				599	#define BPF_MAX_REFCNT 32768
				600
				601	struct bpf_map bpf_map_inc(struct bpf_map map, bool uref)
				602	{
				603	if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
				604	atomic_dec(&map->refcnt);
				605	return ERR_PTR(-EBUSY);
				606	}
				607	if (uref)
				608	atomic_inc(&map->usercnt);
				609	return map;
				610	}
				611	EXPORT_SYMBOL_GPL(bpf_map_inc);
				612
				613	struct bpf_map *bpf_map_get_with_uref(u32 ufd)
				614	{
				615	struct fd f = fdget(ufd);
				616	struct bpf_map *map;
				617
				618	map = __bpf_map_get(f);
				619	if (IS_ERR(map))
				620	return map;
				621
				622	map = bpf_map_inc(map, true);
				623	fdput(f);
				624
				625	return map;
				626	}
				627
				628	/* map_idr_lock should have been held */
				629	static struct bpf_map bpf_map_inc_not_zero(struct bpf_map map,
				630	bool uref)
				631	{
				632	int refold;
				633
				634	refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
				635
				636	if (refold >= BPF_MAX_REFCNT) {
				637	__bpf_map_put(map, false);
				638	return ERR_PTR(-EBUSY);
				639	}
				640
				641	if (!refold)
				642	return ERR_PTR(-ENOENT);
				643
				644	if (uref)
				645	atomic_inc(&map->usercnt);
				646
				647	return map;
				648	}
				649
				650	int __weak bpf_stackmap_copy(struct bpf_map map, void key, void *value)
				651	{
				652	return -ENOTSUPP;
				653	}
				654
				655	/* last field in 'union bpf_attr' used by this command */
				656	#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
				657
				658	static int map_lookup_elem(union bpf_attr *attr)
				659	{
				660	void __user *ukey = u64_to_user_ptr(attr->key);
				661	void __user *uvalue = u64_to_user_ptr(attr->value);
				662	int ufd = attr->map_fd;
				663	struct bpf_map *map;
				664	void key, value, *ptr;
				665	u32 value_size;
				666	struct fd f;
				667	int err;
				668
				669	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
				670	return -EINVAL;
				671
				672	f = fdget(ufd);
				673	map = __bpf_map_get(f);
				674	if (IS_ERR(map))
				675	return PTR_ERR(map);
				676
				677	if (!(f.file->f_mode & FMODE_CAN_READ)) {
				678	err = -EPERM;
				679	goto err_put;
				680	}
				681
				682	key = memdup_user(ukey, map->key_size);
				683	if (IS_ERR(key)) {
				684	err = PTR_ERR(key);
				685	goto err_put;
				686	}
				687
				688	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
				689	map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH \|\|
				690	map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
				691	value_size = round_up(map->value_size, 8) * num_possible_cpus();
				692	else if (IS_FD_MAP(map))
				693	value_size = sizeof(u32);
				694	else
				695	value_size = map->value_size;
				696
				697	err = -ENOMEM;
				698	value = kmalloc(value_size, GFP_USER \| __GFP_NOWARN);
				699	if (!value)
				700	goto free_key;
				701
				702	if (bpf_map_is_dev_bound(map)) {
				703	err = bpf_map_offload_lookup_elem(map, key, value);
				704	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
				705	map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
				706	err = bpf_percpu_hash_copy(map, key, value);
				707	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
				708	err = bpf_percpu_array_copy(map, key, value);
				709	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
				710	err = bpf_stackmap_copy(map, key, value);
				711	} else if (IS_FD_ARRAY(map)) {
				712	err = bpf_fd_array_map_lookup_elem(map, key, value);
				713	} else if (IS_FD_HASH(map)) {
				714	err = bpf_fd_htab_map_lookup_elem(map, key, value);
				715	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
				716	err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
				717	} else {
				718	rcu_read_lock();
				719	ptr = map->ops->map_lookup_elem(map, key);
				720	if (ptr)
				721	memcpy(value, ptr, value_size);
				722	rcu_read_unlock();
				723	err = ptr ? 0 : -ENOENT;
				724	}
				725
				726	if (err)
				727	goto free_value;
				728
				729	err = -EFAULT;
				730	if (copy_to_user(uvalue, value, value_size) != 0)
				731	goto free_value;
				732
				733	err = 0;
				734
				735	free_value:
				736	kfree(value);
				737	free_key:
				738	kfree(key);
				739	err_put:
				740	fdput(f);
				741	return err;
				742	}
				743
				744	static void maybe_wait_bpf_programs(struct bpf_map *map)
				745	{
				746	/* Wait for any running BPF programs to complete so that
				747	* userspace, when we return to it, knows that all programs
				748	* that could be running use the new map value.
				749	*/
				750	if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS \|\|
				751	map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
				752	synchronize_rcu();
				753	}
				754
				755	#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
				756
				757	static int map_update_elem(union bpf_attr *attr)
				758	{
				759	void __user *ukey = u64_to_user_ptr(attr->key);
				760	void __user *uvalue = u64_to_user_ptr(attr->value);
				761	int ufd = attr->map_fd;
				762	struct bpf_map *map;
				763	void key, value;
				764	u32 value_size;
				765	struct fd f;
				766	int err;
				767
				768	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
				769	return -EINVAL;
				770
				771	f = fdget(ufd);
				772	map = __bpf_map_get(f);
				773	if (IS_ERR(map))
				774	return PTR_ERR(map);
				775
				776	if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
				777	err = -EPERM;
				778	goto err_put;
				779	}
				780
				781	key = memdup_user(ukey, map->key_size);
				782	if (IS_ERR(key)) {
				783	err = PTR_ERR(key);
				784	goto err_put;
				785	}
				786
				787	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
				788	map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH \|\|
				789	map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
				790	value_size = round_up(map->value_size, 8) * num_possible_cpus();
				791	else
				792	value_size = map->value_size;
				793
				794	err = -ENOMEM;
				795	value = kmalloc(value_size, GFP_USER \| __GFP_NOWARN);
				796	if (!value)
				797	goto free_key;
				798
				799	err = -EFAULT;
				800	if (copy_from_user(value, uvalue, value_size) != 0)
				801	goto free_value;
				802
				803	/* Need to create a kthread, thus must support schedule */
				804	if (bpf_map_is_dev_bound(map)) {
				805	err = bpf_map_offload_update_elem(map, key, value, attr->flags);
				806	goto out;
				807	} else if (map->map_type == BPF_MAP_TYPE_CPUMAP \|\|
				808	map->map_type == BPF_MAP_TYPE_SOCKHASH \|\|
				809	map->map_type == BPF_MAP_TYPE_SOCKMAP) {
				810	err = map->ops->map_update_elem(map, key, value, attr->flags);
				811	goto out;
				812	}
				813
				814	/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
				815	* inside bpf map update or delete otherwise deadlocks are possible
				816	*/
				817	preempt_disable();
				818	__this_cpu_inc(bpf_prog_active);
				819	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
				820	map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
				821	err = bpf_percpu_hash_update(map, key, value, attr->flags);
				822	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
				823	err = bpf_percpu_array_update(map, key, value, attr->flags);
				824	} else if (IS_FD_ARRAY(map)) {
				825	rcu_read_lock();
				826	err = bpf_fd_array_map_update_elem(map, f.file, key, value,
				827	attr->flags);
				828	rcu_read_unlock();
				829	} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
				830	rcu_read_lock();
				831	err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
				832	attr->flags);
				833	rcu_read_unlock();
				834	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
				835	/* rcu_read_lock() is not needed */
				836	err = bpf_fd_reuseport_array_update_elem(map, key, value,
				837	attr->flags);
				838	} else {
				839	rcu_read_lock();
				840	err = map->ops->map_update_elem(map, key, value, attr->flags);
				841	rcu_read_unlock();
				842	}
				843	__this_cpu_dec(bpf_prog_active);
				844	preempt_enable();
				845	maybe_wait_bpf_programs(map);
				846	out:
				847	free_value:
				848	kfree(value);
				849	free_key:
				850	kfree(key);
				851	err_put:
				852	fdput(f);
				853	return err;
				854	}
				855
				856	#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
				857
				858	static int map_delete_elem(union bpf_attr *attr)
				859	{
				860	void __user *ukey = u64_to_user_ptr(attr->key);
				861	int ufd = attr->map_fd;
				862	struct bpf_map *map;
				863	struct fd f;
				864	void *key;
				865	int err;
				866
				867	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
				868	return -EINVAL;
				869
				870	f = fdget(ufd);
				871	map = __bpf_map_get(f);
				872	if (IS_ERR(map))
				873	return PTR_ERR(map);
				874
				875	if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
				876	err = -EPERM;
				877	goto err_put;
				878	}
				879
				880	key = memdup_user(ukey, map->key_size);
				881	if (IS_ERR(key)) {
				882	err = PTR_ERR(key);
				883	goto err_put;
				884	}
				885
				886	if (bpf_map_is_dev_bound(map)) {
				887	err = bpf_map_offload_delete_elem(map, key);
				888	goto out;
				889	}
				890
				891	preempt_disable();
				892	__this_cpu_inc(bpf_prog_active);
				893	rcu_read_lock();
				894	err = map->ops->map_delete_elem(map, key);
				895	rcu_read_unlock();
				896	__this_cpu_dec(bpf_prog_active);
				897	preempt_enable();
				898	maybe_wait_bpf_programs(map);
				899	out:
				900	kfree(key);
				901	err_put:
				902	fdput(f);
				903	return err;
				904	}
				905
				906	/* last field in 'union bpf_attr' used by this command */
				907	#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
				908
				909	static int map_get_next_key(union bpf_attr *attr)
				910	{
				911	void __user *ukey = u64_to_user_ptr(attr->key);
				912	void __user *unext_key = u64_to_user_ptr(attr->next_key);
				913	int ufd = attr->map_fd;
				914	struct bpf_map *map;
				915	void key, next_key;
				916	struct fd f;
				917	int err;
				918
				919	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
				920	return -EINVAL;
				921
				922	f = fdget(ufd);
				923	map = __bpf_map_get(f);
				924	if (IS_ERR(map))
				925	return PTR_ERR(map);
				926
				927	if (!(f.file->f_mode & FMODE_CAN_READ)) {
				928	err = -EPERM;
				929	goto err_put;
				930	}
				931
				932	if (ukey) {
				933	key = memdup_user(ukey, map->key_size);
				934	if (IS_ERR(key)) {
				935	err = PTR_ERR(key);
				936	goto err_put;
				937	}
				938	} else {
				939	key = NULL;
				940	}
				941
				942	err = -ENOMEM;
				943	next_key = kmalloc(map->key_size, GFP_USER);
				944	if (!next_key)
				945	goto free_key;
				946
				947	if (bpf_map_is_dev_bound(map)) {
				948	err = bpf_map_offload_get_next_key(map, key, next_key);
				949	goto out;
				950	}
				951
				952	rcu_read_lock();
				953	err = map->ops->map_get_next_key(map, key, next_key);
				954	rcu_read_unlock();
				955	out:
				956	if (err)
				957	goto free_next_key;
				958
				959	err = -EFAULT;
				960	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
				961	goto free_next_key;
				962
				963	err = 0;
				964
				965	free_next_key:
				966	kfree(next_key);
				967	free_key:
				968	kfree(key);
				969	err_put:
				970	fdput(f);
				971	return err;
				972	}
				973
				974	static const struct bpf_prog_ops * const bpf_prog_types[] = {
				975	#define BPF_PROG_TYPE(_id, _name) \
				976	[_id] = & _name ## _prog_ops,
				977	#define BPF_MAP_TYPE(_id, _ops)
				978	#include <linux/bpf_types.h>
				979	#undef BPF_PROG_TYPE
				980	#undef BPF_MAP_TYPE
				981	};
				982
				983	static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
				984	{
				985	const struct bpf_prog_ops *ops;
				986
				987	if (type >= ARRAY_SIZE(bpf_prog_types))
				988	return -EINVAL;
				989	type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
				990	ops = bpf_prog_types[type];
				991	if (!ops)
				992	return -EINVAL;
				993
				994	if (!bpf_prog_is_dev_bound(prog->aux))
				995	prog->aux->ops = ops;
				996	else
				997	prog->aux->ops = &bpf_offload_prog_ops;
				998	prog->type = type;
				999	return 0;
				1000	}
				1001
				1002	/* drop refcnt on maps used by eBPF program and free auxilary data */
				1003	static void free_used_maps(struct bpf_prog_aux *aux)
				1004	{
				1005	int i;
				1006
				1007	if (aux->cgroup_storage)
				1008	bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
				1009
				1010	for (i = 0; i < aux->used_map_cnt; i++)
				1011	bpf_map_put(aux->used_maps[i]);
				1012
				1013	kfree(aux->used_maps);
				1014	}
				1015
				1016	int __bpf_prog_charge(struct user_struct *user, u32 pages)
				1017	{
				1018	unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
				1019	unsigned long user_bufs;
				1020
				1021	if (user) {
				1022	user_bufs = atomic_long_add_return(pages, &user->locked_vm);
				1023	if (user_bufs > memlock_limit) {
				1024	atomic_long_sub(pages, &user->locked_vm);
				1025	return -EPERM;
				1026	}
				1027	}
				1028
				1029	return 0;
				1030	}
				1031
				1032	void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
				1033	{
				1034	if (user)
				1035	atomic_long_sub(pages, &user->locked_vm);
				1036	}
				1037
				1038	static int bpf_prog_charge_memlock(struct bpf_prog *prog)
				1039	{
				1040	struct user_struct *user = get_current_user();
				1041	int ret;
				1042
				1043	ret = __bpf_prog_charge(user, prog->pages);
				1044	if (ret) {
				1045	free_uid(user);
				1046	return ret;
				1047	}
				1048
				1049	prog->aux->user = user;
				1050	return 0;
				1051	}
				1052
				1053	static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
				1054	{
				1055	struct user_struct *user = prog->aux->user;
				1056
				1057	__bpf_prog_uncharge(user, prog->pages);
				1058	free_uid(user);
				1059	}
				1060
				1061	static int bpf_prog_alloc_id(struct bpf_prog *prog)
				1062	{
				1063	int id;
				1064
				1065	idr_preload(GFP_KERNEL);
				1066	spin_lock_bh(&prog_idr_lock);
				1067	id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
				1068	if (id > 0)
				1069	prog->aux->id = id;
				1070	spin_unlock_bh(&prog_idr_lock);
				1071	idr_preload_end();
				1072
				1073	/* id is in [1, INT_MAX) */
				1074	if (WARN_ON_ONCE(!id))
				1075	return -ENOSPC;
				1076
				1077	return id > 0 ? 0 : id;
				1078	}
				1079
				1080	void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
				1081	{
				1082	/* cBPF to eBPF migrations are currently not in the idr store.
				1083	* Offloaded programs are removed from the store when their device
				1084	* disappears - even if someone grabs an fd to them they are unusable,
				1085	* simply waiting for refcnt to drop to be freed.
				1086	*/
				1087	if (!prog->aux->id)
				1088	return;
				1089
				1090	if (do_idr_lock)
				1091	spin_lock_bh(&prog_idr_lock);
				1092	else
				1093	__acquire(&prog_idr_lock);
				1094
				1095	idr_remove(&prog_idr, prog->aux->id);
				1096	prog->aux->id = 0;
				1097
				1098	if (do_idr_lock)
				1099	spin_unlock_bh(&prog_idr_lock);
				1100	else
				1101	__release(&prog_idr_lock);
				1102	}
				1103
				1104	static void __bpf_prog_put_rcu(struct rcu_head *rcu)
				1105	{
				1106	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
				1107
				1108	free_used_maps(aux);
				1109	bpf_prog_uncharge_memlock(aux->prog);
				1110	security_bpf_prog_free(aux);
				1111	bpf_prog_free(aux->prog);
				1112	}
				1113
				1114	static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
				1115	{
				1116	if (atomic_dec_and_test(&prog->aux->refcnt)) {
				1117	/* bpf_prog_free_id() must be called first */
				1118	bpf_prog_free_id(prog, do_idr_lock);
				1119	bpf_prog_kallsyms_del_all(prog);
				1120
				1121	call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
				1122	}
				1123	}
				1124
				1125	void bpf_prog_put(struct bpf_prog *prog)
				1126	{
				1127	__bpf_prog_put(prog, true);
				1128	}
				1129	EXPORT_SYMBOL_GPL(bpf_prog_put);
				1130
				1131	static int bpf_prog_release(struct inode inode, struct file filp)
				1132	{
				1133	struct bpf_prog *prog = filp->private_data;
				1134
				1135	bpf_prog_put(prog);
				1136	return 0;
				1137	}
				1138
				1139	#ifdef CONFIG_PROC_FS
				1140	static void bpf_prog_show_fdinfo(struct seq_file m, struct file filp)
				1141	{
				1142	const struct bpf_prog *prog = filp->private_data;
				1143	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
				1144
				1145	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
				1146	seq_printf(m,
				1147	"prog_type:\t%u\n"
				1148	"prog_jited:\t%u\n"
				1149	"prog_tag:\t%s\n"
				1150	"memlock:\t%llu\n"
				1151	"prog_id:\t%u\n",
				1152	prog->type,
				1153	prog->jited,
				1154	prog_tag,
				1155	prog->pages * 1ULL << PAGE_SHIFT,
				1156	prog->aux->id);
				1157	}
				1158	#endif
				1159
				1160	const struct file_operations bpf_prog_fops = {
				1161	#ifdef CONFIG_PROC_FS
				1162	.show_fdinfo = bpf_prog_show_fdinfo,
				1163	#endif
				1164	.release = bpf_prog_release,
				1165	.read = bpf_dummy_read,
				1166	.write = bpf_dummy_write,
				1167	};
				1168
				1169	int bpf_prog_new_fd(struct bpf_prog *prog)
				1170	{
				1171	int ret;
				1172
				1173	ret = security_bpf_prog(prog);
				1174	if (ret < 0)
				1175	return ret;
				1176
				1177	return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
				1178	O_RDWR \| O_CLOEXEC);
				1179	}
				1180
				1181	static struct bpf_prog *____bpf_prog_get(struct fd f)
				1182	{
				1183	if (!f.file)
				1184	return ERR_PTR(-EBADF);
				1185	if (f.file->f_op != &bpf_prog_fops) {
				1186	fdput(f);
				1187	return ERR_PTR(-EINVAL);
				1188	}
				1189
				1190	return f.file->private_data;
				1191	}
				1192
				1193	struct bpf_prog bpf_prog_add(struct bpf_prog prog, int i)
				1194	{
				1195	if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
				1196	atomic_sub(i, &prog->aux->refcnt);
				1197	return ERR_PTR(-EBUSY);
				1198	}
				1199	return prog;
				1200	}
				1201	EXPORT_SYMBOL_GPL(bpf_prog_add);
				1202
				1203	void bpf_prog_sub(struct bpf_prog *prog, int i)
				1204	{
				1205	/* Only to be used for undoing previous bpf_prog_add() in some
				1206	* error path. We still know that another entity in our call
				1207	* path holds a reference to the program, thus atomic_sub() can
				1208	* be safely used in such cases!
				1209	*/
				1210	WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
				1211	}
				1212	EXPORT_SYMBOL_GPL(bpf_prog_sub);
				1213
				1214	struct bpf_prog bpf_prog_inc(struct bpf_prog prog)
				1215	{
				1216	return bpf_prog_add(prog, 1);
				1217	}
				1218	EXPORT_SYMBOL_GPL(bpf_prog_inc);
				1219
				1220	/* prog_idr_lock should have been held */
				1221	struct bpf_prog bpf_prog_inc_not_zero(struct bpf_prog prog)
				1222	{
				1223	int refold;
				1224
				1225	refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
				1226
				1227	if (refold >= BPF_MAX_REFCNT) {
				1228	__bpf_prog_put(prog, false);
				1229	return ERR_PTR(-EBUSY);
				1230	}
				1231
				1232	if (!refold)
				1233	return ERR_PTR(-ENOENT);
				1234
				1235	return prog;
				1236	}
				1237	EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
				1238
				1239	bool bpf_prog_get_ok(struct bpf_prog *prog,
				1240	enum bpf_prog_type *attach_type, bool attach_drv)
				1241	{
				1242	/* not an attachment, just a refcount inc, always allow */
				1243	if (!attach_type)
				1244	return true;
				1245
				1246	if (prog->type != *attach_type)
				1247	return false;
				1248	if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv)
				1249	return false;
				1250
				1251	return true;
				1252	}
				1253
				1254	static struct bpf_prog __bpf_prog_get(u32 ufd, enum bpf_prog_type attach_type,
				1255	bool attach_drv)
				1256	{
				1257	struct fd f = fdget(ufd);
				1258	struct bpf_prog *prog;
				1259
				1260	prog = ____bpf_prog_get(f);
				1261	if (IS_ERR(prog))
				1262	return prog;
				1263	if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
				1264	prog = ERR_PTR(-EINVAL);
				1265	goto out;
				1266	}
				1267
				1268	prog = bpf_prog_inc(prog);
				1269	out:
				1270	fdput(f);
				1271	return prog;
				1272	}
				1273
				1274	struct bpf_prog *bpf_prog_get(u32 ufd)
				1275	{
				1276	return __bpf_prog_get(ufd, NULL, false);
				1277	}
				1278
				1279	struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
				1280	bool attach_drv)
				1281	{
				1282	return __bpf_prog_get(ufd, &type, attach_drv);
				1283	}
				1284	EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
				1285
				1286	/* Initially all BPF programs could be loaded w/o specifying
				1287	* expected_attach_type. Later for some of them specifying expected_attach_type
				1288	* at load time became required so that program could be validated properly.
				1289	* Programs of types that are allowed to be loaded both w/ and w/o (for
				1290	* backward compatibility) expected_attach_type, should have the default attach
				1291	* type assigned to expected_attach_type for the latter case, so that it can be
				1292	* validated later at attach time.
				1293	*
				1294	* bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
				1295	* prog type requires it but has some attach types that have to be backward
				1296	* compatible.
				1297	*/
				1298	static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
				1299	{
				1300	switch (attr->prog_type) {
				1301	case BPF_PROG_TYPE_CGROUP_SOCK:
				1302	/* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
				1303	* exist so checking for non-zero is the way to go here.
				1304	*/
				1305	if (!attr->expected_attach_type)
				1306	attr->expected_attach_type =
				1307	BPF_CGROUP_INET_SOCK_CREATE;
				1308	break;
				1309	}
				1310	}
				1311
				1312	static int
				1313	bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
				1314	enum bpf_attach_type expected_attach_type)
				1315	{
				1316	switch (prog_type) {
				1317	case BPF_PROG_TYPE_CGROUP_SOCK:
				1318	switch (expected_attach_type) {
				1319	case BPF_CGROUP_INET_SOCK_CREATE:
				1320	case BPF_CGROUP_INET4_POST_BIND:
				1321	case BPF_CGROUP_INET6_POST_BIND:
				1322	return 0;
				1323	default:
				1324	return -EINVAL;
				1325	}
				1326	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
				1327	switch (expected_attach_type) {
				1328	case BPF_CGROUP_INET4_BIND:
				1329	case BPF_CGROUP_INET6_BIND:
				1330	case BPF_CGROUP_INET4_CONNECT:
				1331	case BPF_CGROUP_INET6_CONNECT:
				1332	case BPF_CGROUP_UDP4_SENDMSG:
				1333	case BPF_CGROUP_UDP6_SENDMSG:
				1334	return 0;
				1335	default:
				1336	return -EINVAL;
				1337	}
				1338	default:
				1339	return 0;
				1340	}
				1341	}
				1342
				1343	/* last field in 'union bpf_attr' used by this command */
				1344	#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type
				1345
				1346	static int bpf_prog_load(union bpf_attr *attr)
				1347	{
				1348	enum bpf_prog_type type = attr->prog_type;
				1349	struct bpf_prog *prog;
				1350	int err;
				1351	char license[128];
				1352	bool is_gpl;
				1353
				1354	if (CHECK_ATTR(BPF_PROG_LOAD))
				1355	return -EINVAL;
				1356
				1357	if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
				1358	return -EINVAL;
				1359
				1360	/* copy eBPF program license from user space */
				1361	if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
				1362	sizeof(license) - 1) < 0)
				1363	return -EFAULT;
				1364	license[sizeof(license) - 1] = 0;
				1365
				1366	/* eBPF programs must be GPL compatible to use GPL-ed functions */
				1367	is_gpl = license_is_gpl_compatible(license);
				1368
				1369	if (attr->insn_cnt == 0 \|\| attr->insn_cnt > BPF_MAXINSNS)
				1370	return -E2BIG;
				1371
				1372	if (type == BPF_PROG_TYPE_KPROBE &&
				1373	attr->kern_version != LINUX_VERSION_CODE)
				1374	return -EINVAL;
				1375
				1376	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
				1377	type != BPF_PROG_TYPE_CGROUP_SKB &&
				1378	!capable(CAP_SYS_ADMIN))
				1379	return -EPERM;
				1380
				1381	bpf_prog_load_fixup_attach_type(attr);
				1382	if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
				1383	return -EINVAL;
				1384
				1385	/* plain bpf_prog allocation */
				1386	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
				1387	if (!prog)
				1388	return -ENOMEM;
				1389
				1390	prog->expected_attach_type = attr->expected_attach_type;
				1391
				1392	prog->aux->offload_requested = !!attr->prog_ifindex;
				1393
				1394	err = security_bpf_prog_alloc(prog->aux);
				1395	if (err)
				1396	goto free_prog_nouncharge;
				1397
				1398	err = bpf_prog_charge_memlock(prog);
				1399	if (err)
				1400	goto free_prog_sec;
				1401
				1402	prog->len = attr->insn_cnt;
				1403
				1404	err = -EFAULT;
				1405	if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
				1406	bpf_prog_insn_size(prog)) != 0)
				1407	goto free_prog;
				1408
				1409	prog->orig_prog = NULL;
				1410	prog->jited = 0;
				1411
				1412	atomic_set(&prog->aux->refcnt, 1);
				1413	prog->gpl_compatible = is_gpl ? 1 : 0;
				1414
				1415	if (bpf_prog_is_dev_bound(prog->aux)) {
				1416	err = bpf_prog_offload_init(prog, attr);
				1417	if (err)
				1418	goto free_prog;
				1419	}
				1420
				1421	/* find program type: socket_filter vs tracing_filter */
				1422	err = find_prog_type(type, prog);
				1423	if (err < 0)
				1424	goto free_prog;
				1425
				1426	prog->aux->load_time = ktime_get_boot_ns();
				1427	err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
				1428	if (err)
				1429	goto free_prog;
				1430
				1431	/* run eBPF verifier */
				1432	err = bpf_check(&prog, attr);
				1433	if (err < 0)
				1434	goto free_used_maps;
				1435
				1436	prog = bpf_prog_select_runtime(prog, &err);
				1437	if (err < 0)
				1438	goto free_used_maps;
				1439
				1440	err = bpf_prog_alloc_id(prog);
				1441	if (err)
				1442	goto free_used_maps;
				1443
				1444	err = bpf_prog_new_fd(prog);
				1445	if (err < 0) {
				1446	/* failed to allocate fd.
				1447	* bpf_prog_put() is needed because the above
				1448	* bpf_prog_alloc_id() has published the prog
				1449	* to the userspace and the userspace may
				1450	* have refcnt-ed it through BPF_PROG_GET_FD_BY_ID.
				1451	*/
				1452	bpf_prog_put(prog);
				1453	return err;
				1454	}
				1455
				1456	bpf_prog_kallsyms_add(prog);
				1457	return err;
				1458
				1459	free_used_maps:
				1460	bpf_prog_kallsyms_del_subprogs(prog);
				1461	free_used_maps(prog->aux);
				1462	free_prog:
				1463	bpf_prog_uncharge_memlock(prog);
				1464	free_prog_sec:
				1465	security_bpf_prog_free(prog->aux);
				1466	free_prog_nouncharge:
				1467	bpf_prog_free(prog);
				1468	return err;
				1469	}
				1470
				1471	#define BPF_OBJ_LAST_FIELD file_flags
				1472
				1473	static int bpf_obj_pin(const union bpf_attr *attr)
				1474	{
				1475	if (CHECK_ATTR(BPF_OBJ) \|\| attr->file_flags != 0)
				1476	return -EINVAL;
				1477
				1478	return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
				1479	}
				1480
				1481	static int bpf_obj_get(const union bpf_attr *attr)
				1482	{
				1483	if (CHECK_ATTR(BPF_OBJ) \|\| attr->bpf_fd != 0 \|\|
				1484	attr->file_flags & ~BPF_OBJ_FLAG_MASK)
				1485	return -EINVAL;
				1486
				1487	return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
				1488	attr->file_flags);
				1489	}
				1490
				1491	struct bpf_raw_tracepoint {
				1492	struct bpf_raw_event_map *btp;
				1493	struct bpf_prog *prog;
				1494	};
				1495
				1496	static int bpf_raw_tracepoint_release(struct inode inode, struct file filp)
				1497	{
				1498	struct bpf_raw_tracepoint *raw_tp = filp->private_data;
				1499
				1500	if (raw_tp->prog) {
				1501	bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
				1502	bpf_prog_put(raw_tp->prog);
				1503	}
				1504	kfree(raw_tp);
				1505	return 0;
				1506	}
				1507
				1508	static const struct file_operations bpf_raw_tp_fops = {
				1509	.release = bpf_raw_tracepoint_release,
				1510	.read = bpf_dummy_read,
				1511	.write = bpf_dummy_write,
				1512	};
				1513
				1514	#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
				1515
				1516	static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
				1517	{
				1518	struct bpf_raw_tracepoint *raw_tp;
				1519	struct bpf_raw_event_map *btp;
				1520	struct bpf_prog *prog;
				1521	char tp_name[128];
				1522	int tp_fd, err;
				1523
				1524	if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
				1525	sizeof(tp_name) - 1) < 0)
				1526	return -EFAULT;
				1527	tp_name[sizeof(tp_name) - 1] = 0;
				1528
				1529	btp = bpf_find_raw_tracepoint(tp_name);
				1530	if (!btp)
				1531	return -ENOENT;
				1532
				1533	raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
				1534	if (!raw_tp)
				1535	return -ENOMEM;
				1536	raw_tp->btp = btp;
				1537
				1538	prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
				1539	BPF_PROG_TYPE_RAW_TRACEPOINT);
				1540	if (IS_ERR(prog)) {
				1541	err = PTR_ERR(prog);
				1542	goto out_free_tp;
				1543	}
				1544
				1545	err = bpf_probe_register(raw_tp->btp, prog);
				1546	if (err)
				1547	goto out_put_prog;
				1548
				1549	raw_tp->prog = prog;
				1550	tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
				1551	O_CLOEXEC);
				1552	if (tp_fd < 0) {
				1553	bpf_probe_unregister(raw_tp->btp, prog);
				1554	err = tp_fd;
				1555	goto out_put_prog;
				1556	}
				1557	return tp_fd;
				1558
				1559	out_put_prog:
				1560	bpf_prog_put(prog);
				1561	out_free_tp:
				1562	kfree(raw_tp);
				1563	return err;
				1564	}
				1565
				1566	static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
				1567	enum bpf_attach_type attach_type)
				1568	{
				1569	switch (prog->type) {
				1570	case BPF_PROG_TYPE_CGROUP_SOCK:
				1571	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
				1572	return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
				1573	default:
				1574	return 0;
				1575	}
				1576	}
				1577
				1578	#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
				1579
				1580	#define BPF_F_ATTACH_MASK \
				1581	(BPF_F_ALLOW_OVERRIDE \| BPF_F_ALLOW_MULTI)
				1582
				1583	static int bpf_prog_attach(const union bpf_attr *attr)
				1584	{
				1585	enum bpf_prog_type ptype;
				1586	struct bpf_prog *prog;
				1587	int ret;
				1588
				1589	if (!capable(CAP_NET_ADMIN))
				1590	return -EPERM;
				1591
				1592	if (CHECK_ATTR(BPF_PROG_ATTACH))
				1593	return -EINVAL;
				1594
				1595	if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
				1596	return -EINVAL;
				1597
				1598	switch (attr->attach_type) {
				1599	case BPF_CGROUP_INET_INGRESS:
				1600	case BPF_CGROUP_INET_EGRESS:
				1601	ptype = BPF_PROG_TYPE_CGROUP_SKB;
				1602	break;
				1603	case BPF_CGROUP_INET_SOCK_CREATE:
				1604	case BPF_CGROUP_INET4_POST_BIND:
				1605	case BPF_CGROUP_INET6_POST_BIND:
				1606	ptype = BPF_PROG_TYPE_CGROUP_SOCK;
				1607	break;
				1608	case BPF_CGROUP_INET4_BIND:
				1609	case BPF_CGROUP_INET6_BIND:
				1610	case BPF_CGROUP_INET4_CONNECT:
				1611	case BPF_CGROUP_INET6_CONNECT:
				1612	case BPF_CGROUP_UDP4_SENDMSG:
				1613	case BPF_CGROUP_UDP6_SENDMSG:
				1614	ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
				1615	break;
				1616	case BPF_CGROUP_SOCK_OPS:
				1617	ptype = BPF_PROG_TYPE_SOCK_OPS;
				1618	break;
				1619	case BPF_CGROUP_DEVICE:
				1620	ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
				1621	break;
				1622	case BPF_SK_MSG_VERDICT:
				1623	ptype = BPF_PROG_TYPE_SK_MSG;
				1624	break;
				1625	case BPF_SK_SKB_STREAM_PARSER:
				1626	case BPF_SK_SKB_STREAM_VERDICT:
				1627	ptype = BPF_PROG_TYPE_SK_SKB;
				1628	break;
				1629	case BPF_LIRC_MODE2:
				1630	ptype = BPF_PROG_TYPE_LIRC_MODE2;
				1631	break;
				1632	default:
				1633	return -EINVAL;
				1634	}
				1635
				1636	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
				1637	if (IS_ERR(prog))
				1638	return PTR_ERR(prog);
				1639
				1640	if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
				1641	bpf_prog_put(prog);
				1642	return -EINVAL;
				1643	}
				1644
				1645	switch (ptype) {
				1646	case BPF_PROG_TYPE_SK_SKB:
				1647	case BPF_PROG_TYPE_SK_MSG:
				1648	ret = sockmap_get_from_fd(attr, ptype, prog);
				1649	break;
				1650	case BPF_PROG_TYPE_LIRC_MODE2:
				1651	ret = lirc_prog_attach(attr, prog);
				1652	break;
				1653	default:
				1654	ret = cgroup_bpf_prog_attach(attr, ptype, prog);
				1655	}
				1656
				1657	if (ret)
				1658	bpf_prog_put(prog);
				1659	return ret;
				1660	}
				1661
				1662	#define BPF_PROG_DETACH_LAST_FIELD attach_type
				1663
				1664	static int bpf_prog_detach(const union bpf_attr *attr)
				1665	{
				1666	enum bpf_prog_type ptype;
				1667
				1668	if (!capable(CAP_NET_ADMIN))
				1669	return -EPERM;
				1670
				1671	if (CHECK_ATTR(BPF_PROG_DETACH))
				1672	return -EINVAL;
				1673
				1674	switch (attr->attach_type) {
				1675	case BPF_CGROUP_INET_INGRESS:
				1676	case BPF_CGROUP_INET_EGRESS:
				1677	ptype = BPF_PROG_TYPE_CGROUP_SKB;
				1678	break;
				1679	case BPF_CGROUP_INET_SOCK_CREATE:
				1680	case BPF_CGROUP_INET4_POST_BIND:
				1681	case BPF_CGROUP_INET6_POST_BIND:
				1682	ptype = BPF_PROG_TYPE_CGROUP_SOCK;
				1683	break;
				1684	case BPF_CGROUP_INET4_BIND:
				1685	case BPF_CGROUP_INET6_BIND:
				1686	case BPF_CGROUP_INET4_CONNECT:
				1687	case BPF_CGROUP_INET6_CONNECT:
				1688	case BPF_CGROUP_UDP4_SENDMSG:
				1689	case BPF_CGROUP_UDP6_SENDMSG:
				1690	ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
				1691	break;
				1692	case BPF_CGROUP_SOCK_OPS:
				1693	ptype = BPF_PROG_TYPE_SOCK_OPS;
				1694	break;
				1695	case BPF_CGROUP_DEVICE:
				1696	ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
				1697	break;
				1698	case BPF_SK_MSG_VERDICT:
				1699	return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
				1700	case BPF_SK_SKB_STREAM_PARSER:
				1701	case BPF_SK_SKB_STREAM_VERDICT:
				1702	return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
				1703	case BPF_LIRC_MODE2:
				1704	return lirc_prog_detach(attr);
				1705	default:
				1706	return -EINVAL;
				1707	}
				1708
				1709	return cgroup_bpf_prog_detach(attr, ptype);
				1710	}
				1711
				1712	#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
				1713
				1714	static int bpf_prog_query(const union bpf_attr *attr,
				1715	union bpf_attr __user *uattr)
				1716	{
				1717	if (!capable(CAP_NET_ADMIN))
				1718	return -EPERM;
				1719	if (CHECK_ATTR(BPF_PROG_QUERY))
				1720	return -EINVAL;
				1721	if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE)
				1722	return -EINVAL;
				1723
				1724	switch (attr->query.attach_type) {
				1725	case BPF_CGROUP_INET_INGRESS:
				1726	case BPF_CGROUP_INET_EGRESS:
				1727	case BPF_CGROUP_INET_SOCK_CREATE:
				1728	case BPF_CGROUP_INET4_BIND:
				1729	case BPF_CGROUP_INET6_BIND:
				1730	case BPF_CGROUP_INET4_POST_BIND:
				1731	case BPF_CGROUP_INET6_POST_BIND:
				1732	case BPF_CGROUP_INET4_CONNECT:
				1733	case BPF_CGROUP_INET6_CONNECT:
				1734	case BPF_CGROUP_UDP4_SENDMSG:
				1735	case BPF_CGROUP_UDP6_SENDMSG:
				1736	case BPF_CGROUP_SOCK_OPS:
				1737	case BPF_CGROUP_DEVICE:
				1738	break;
				1739	case BPF_LIRC_MODE2:
				1740	return lirc_prog_query(attr, uattr);
				1741	default:
				1742	return -EINVAL;
				1743	}
				1744
				1745	return cgroup_bpf_prog_query(attr, uattr);
				1746	}
				1747
				1748	#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
				1749
				1750	static int bpf_prog_test_run(const union bpf_attr *attr,
				1751	union bpf_attr __user *uattr)
				1752	{
				1753	struct bpf_prog *prog;
				1754	int ret = -ENOTSUPP;
				1755
				1756	if (!capable(CAP_SYS_ADMIN))
				1757	return -EPERM;
				1758	if (CHECK_ATTR(BPF_PROG_TEST_RUN))
				1759	return -EINVAL;
				1760
				1761	prog = bpf_prog_get(attr->test.prog_fd);
				1762	if (IS_ERR(prog))
				1763	return PTR_ERR(prog);
				1764
				1765	if (prog->aux->ops->test_run)
				1766	ret = prog->aux->ops->test_run(prog, attr, uattr);
				1767
				1768	bpf_prog_put(prog);
				1769	return ret;
				1770	}
				1771
				1772	#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
				1773
				1774	static int bpf_obj_get_next_id(const union bpf_attr *attr,
				1775	union bpf_attr __user *uattr,
				1776	struct idr *idr,
				1777	spinlock_t *lock)
				1778	{
				1779	u32 next_id = attr->start_id;
				1780	int err = 0;
				1781
				1782	if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) \|\| next_id >= INT_MAX)
				1783	return -EINVAL;
				1784
				1785	if (!capable(CAP_SYS_ADMIN))
				1786	return -EPERM;
				1787
				1788	next_id++;
				1789	spin_lock_bh(lock);
				1790	if (!idr_get_next(idr, &next_id))
				1791	err = -ENOENT;
				1792	spin_unlock_bh(lock);
				1793
				1794	if (!err)
				1795	err = put_user(next_id, &uattr->next_id);
				1796
				1797	return err;
				1798	}
				1799
				1800	#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
				1801
				1802	static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
				1803	{
				1804	struct bpf_prog *prog;
				1805	u32 id = attr->prog_id;
				1806	int fd;
				1807
				1808	if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
				1809	return -EINVAL;
				1810
				1811	if (!capable(CAP_SYS_ADMIN))
				1812	return -EPERM;
				1813
				1814	spin_lock_bh(&prog_idr_lock);
				1815	prog = idr_find(&prog_idr, id);
				1816	if (prog)
				1817	prog = bpf_prog_inc_not_zero(prog);
				1818	else
				1819	prog = ERR_PTR(-ENOENT);
				1820	spin_unlock_bh(&prog_idr_lock);
				1821
				1822	if (IS_ERR(prog))
				1823	return PTR_ERR(prog);
				1824
				1825	fd = bpf_prog_new_fd(prog);
				1826	if (fd < 0)
				1827	bpf_prog_put(prog);
				1828
				1829	return fd;
				1830	}
				1831
				1832	#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
				1833
				1834	static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
				1835	{
				1836	struct bpf_map *map;
				1837	u32 id = attr->map_id;
				1838	int f_flags;
				1839	int fd;
				1840
				1841	if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) \|\|
				1842	attr->open_flags & ~BPF_OBJ_FLAG_MASK)
				1843	return -EINVAL;
				1844
				1845	if (!capable(CAP_SYS_ADMIN))
				1846	return -EPERM;
				1847
				1848	f_flags = bpf_get_file_flag(attr->open_flags);
				1849	if (f_flags < 0)
				1850	return f_flags;
				1851
				1852	spin_lock_bh(&map_idr_lock);
				1853	map = idr_find(&map_idr, id);
				1854	if (map)
				1855	map = bpf_map_inc_not_zero(map, true);
				1856	else
				1857	map = ERR_PTR(-ENOENT);
				1858	spin_unlock_bh(&map_idr_lock);
				1859
				1860	if (IS_ERR(map))
				1861	return PTR_ERR(map);
				1862
				1863	fd = bpf_map_new_fd(map, f_flags);
				1864	if (fd < 0)
				1865	bpf_map_put(map);
				1866
				1867	return fd;
				1868	}
				1869
				1870	static const struct bpf_map bpf_map_from_imm(const struct bpf_prog prog,
				1871	unsigned long addr)
				1872	{
				1873	int i;
				1874
				1875	for (i = 0; i < prog->aux->used_map_cnt; i++)
				1876	if (prog->aux->used_maps[i] == (void *)addr)
				1877	return prog->aux->used_maps[i];
				1878	return NULL;
				1879	}
				1880
				1881	static struct bpf_insn bpf_insn_prepare_dump(const struct bpf_prog prog)
				1882	{
				1883	const struct bpf_map *map;
				1884	struct bpf_insn *insns;
				1885	u64 imm;
				1886	int i;
				1887
				1888	insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
				1889	GFP_USER);
				1890	if (!insns)
				1891	return insns;
				1892
				1893	for (i = 0; i < prog->len; i++) {
				1894	if (insns[i].code == (BPF_JMP \| BPF_TAIL_CALL)) {
				1895	insns[i].code = BPF_JMP \| BPF_CALL;
				1896	insns[i].imm = BPF_FUNC_tail_call;
				1897	/* fall-through */
				1898	}
				1899	if (insns[i].code == (BPF_JMP \| BPF_CALL) \|\|
				1900	insns[i].code == (BPF_JMP \| BPF_CALL_ARGS)) {
				1901	if (insns[i].code == (BPF_JMP \| BPF_CALL_ARGS))
				1902	insns[i].code = BPF_JMP \| BPF_CALL;
				1903	if (!bpf_dump_raw_ok())
				1904	insns[i].imm = 0;
				1905	continue;
				1906	}
				1907
				1908	if (insns[i].code != (BPF_LD \| BPF_IMM \| BPF_DW))
				1909	continue;
				1910
				1911	imm = ((u64)insns[i + 1].imm << 32) \| (u32)insns[i].imm;
				1912	map = bpf_map_from_imm(prog, imm);
				1913	if (map) {
				1914	insns[i].src_reg = BPF_PSEUDO_MAP_FD;
				1915	insns[i].imm = map->id;
				1916	insns[i + 1].imm = 0;
				1917	continue;
				1918	}
				1919
				1920	if (!bpf_dump_raw_ok() &&
				1921	imm == (unsigned long)prog->aux) {
				1922	insns[i].imm = 0;
				1923	insns[i + 1].imm = 0;
				1924	continue;
				1925	}
				1926	}
				1927
				1928	return insns;
				1929	}
				1930
				1931	static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
				1932	const union bpf_attr *attr,
				1933	union bpf_attr __user *uattr)
				1934	{
				1935	struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
				1936	struct bpf_prog_info info = {};
				1937	u32 info_len = attr->info.info_len;
				1938	char __user *uinsns;
				1939	u32 ulen;
				1940	int err;
				1941
				1942	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
				1943	if (err)
				1944	return err;
				1945	info_len = min_t(u32, sizeof(info), info_len);
				1946
				1947	if (copy_from_user(&info, uinfo, info_len))
				1948	return -EFAULT;
				1949
				1950	info.type = prog->type;
				1951	info.id = prog->aux->id;
				1952	info.load_time = prog->aux->load_time;
				1953	info.created_by_uid = from_kuid_munged(current_user_ns(),
				1954	prog->aux->user->uid);
				1955	info.gpl_compatible = prog->gpl_compatible;
				1956
				1957	memcpy(info.tag, prog->tag, sizeof(prog->tag));
				1958	memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
				1959
				1960	ulen = info.nr_map_ids;
				1961	info.nr_map_ids = prog->aux->used_map_cnt;
				1962	ulen = min_t(u32, info.nr_map_ids, ulen);
				1963	if (ulen) {
				1964	u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
				1965	u32 i;
				1966
				1967	for (i = 0; i < ulen; i++)
				1968	if (put_user(prog->aux->used_maps[i]->id,
				1969	&user_map_ids[i]))
				1970	return -EFAULT;
				1971	}
				1972
				1973	if (!capable(CAP_SYS_ADMIN)) {
				1974	info.jited_prog_len = 0;
				1975	info.xlated_prog_len = 0;
				1976	info.nr_jited_ksyms = 0;
				1977	info.nr_jited_func_lens = 0;
				1978	goto done;
				1979	}
				1980
				1981	ulen = info.xlated_prog_len;
				1982	info.xlated_prog_len = bpf_prog_insn_size(prog);
				1983	if (info.xlated_prog_len && ulen) {
				1984	struct bpf_insn *insns_sanitized;
				1985	bool fault;
				1986
				1987	if (prog->blinded && !bpf_dump_raw_ok()) {
				1988	info.xlated_prog_insns = 0;
				1989	goto done;
				1990	}
				1991	insns_sanitized = bpf_insn_prepare_dump(prog);
				1992	if (!insns_sanitized)
				1993	return -ENOMEM;
				1994	uinsns = u64_to_user_ptr(info.xlated_prog_insns);
				1995	ulen = min_t(u32, info.xlated_prog_len, ulen);
				1996	fault = copy_to_user(uinsns, insns_sanitized, ulen);
				1997	kfree(insns_sanitized);
				1998	if (fault)
				1999	return -EFAULT;
				2000	}
				2001
				2002	if (bpf_prog_is_dev_bound(prog->aux)) {
				2003	err = bpf_prog_offload_info_fill(&info, prog);
				2004	if (err)
				2005	return err;
				2006	goto done;
				2007	}
				2008
				2009	/* NOTE: the following code is supposed to be skipped for offload.
				2010	* bpf_prog_offload_info_fill() is the place to fill similar fields
				2011	* for offload.
				2012	*/
				2013	ulen = info.jited_prog_len;
				2014	if (prog->aux->func_cnt) {
				2015	u32 i;
				2016
				2017	info.jited_prog_len = 0;
				2018	for (i = 0; i < prog->aux->func_cnt; i++)
				2019	info.jited_prog_len += prog->aux->func[i]->jited_len;
				2020	} else {
				2021	info.jited_prog_len = prog->jited_len;
				2022	}
				2023
				2024	if (info.jited_prog_len && ulen) {
				2025	if (bpf_dump_raw_ok()) {
				2026	uinsns = u64_to_user_ptr(info.jited_prog_insns);
				2027	ulen = min_t(u32, info.jited_prog_len, ulen);
				2028
				2029	/* for multi-function programs, copy the JITed
				2030	* instructions for all the functions
				2031	*/
				2032	if (prog->aux->func_cnt) {
				2033	u32 len, free, i;
				2034	u8 *img;
				2035
				2036	free = ulen;
				2037	for (i = 0; i < prog->aux->func_cnt; i++) {
				2038	len = prog->aux->func[i]->jited_len;
				2039	len = min_t(u32, len, free);
				2040	img = (u8 *) prog->aux->func[i]->bpf_func;
				2041	if (copy_to_user(uinsns, img, len))
				2042	return -EFAULT;
				2043	uinsns += len;
				2044	free -= len;
				2045	if (!free)
				2046	break;
				2047	}
				2048	} else {
				2049	if (copy_to_user(uinsns, prog->bpf_func, ulen))
				2050	return -EFAULT;
				2051	}
				2052	} else {
				2053	info.jited_prog_insns = 0;
				2054	}
				2055	}
				2056
				2057	ulen = info.nr_jited_ksyms;
				2058	info.nr_jited_ksyms = prog->aux->func_cnt;
				2059	if (info.nr_jited_ksyms && ulen) {
				2060	if (bpf_dump_raw_ok()) {
				2061	u64 __user *user_ksyms;
				2062	ulong ksym_addr;
				2063	u32 i;
				2064
				2065	/* copy the address of the kernel symbol
				2066	* corresponding to each function
				2067	*/
				2068	ulen = min_t(u32, info.nr_jited_ksyms, ulen);
				2069	user_ksyms = u64_to_user_ptr(info.jited_ksyms);
				2070	for (i = 0; i < ulen; i++) {
				2071	ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
				2072	ksym_addr &= PAGE_MASK;
				2073	if (put_user((u64) ksym_addr, &user_ksyms[i]))
				2074	return -EFAULT;
				2075	}
				2076	} else {
				2077	info.jited_ksyms = 0;
				2078	}
				2079	}
				2080
				2081	ulen = info.nr_jited_func_lens;
				2082	info.nr_jited_func_lens = prog->aux->func_cnt;
				2083	if (info.nr_jited_func_lens && ulen) {
				2084	if (bpf_dump_raw_ok()) {
				2085	u32 __user *user_lens;
				2086	u32 func_len, i;
				2087
				2088	/* copy the JITed image lengths for each function */
				2089	ulen = min_t(u32, info.nr_jited_func_lens, ulen);
				2090	user_lens = u64_to_user_ptr(info.jited_func_lens);
				2091	for (i = 0; i < ulen; i++) {
				2092	func_len = prog->aux->func[i]->jited_len;
				2093	if (put_user(func_len, &user_lens[i]))
				2094	return -EFAULT;
				2095	}
				2096	} else {
				2097	info.jited_func_lens = 0;
				2098	}
				2099	}
				2100
				2101	done:
				2102	if (copy_to_user(uinfo, &info, info_len) \|\|
				2103	put_user(info_len, &uattr->info.info_len))
				2104	return -EFAULT;
				2105
				2106	return 0;
				2107	}
				2108
				2109	static int bpf_map_get_info_by_fd(struct bpf_map *map,
				2110	const union bpf_attr *attr,
				2111	union bpf_attr __user *uattr)
				2112	{
				2113	struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
				2114	struct bpf_map_info info = {};
				2115	u32 info_len = attr->info.info_len;
				2116	int err;
				2117
				2118	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
				2119	if (err)
				2120	return err;
				2121	info_len = min_t(u32, sizeof(info), info_len);
				2122
				2123	info.type = map->map_type;
				2124	info.id = map->id;
				2125	info.key_size = map->key_size;
				2126	info.value_size = map->value_size;
				2127	info.max_entries = map->max_entries;
				2128	info.map_flags = map->map_flags;
				2129	memcpy(info.name, map->name, sizeof(map->name));
				2130
				2131	if (map->btf) {
				2132	info.btf_id = btf_id(map->btf);
				2133	info.btf_key_type_id = map->btf_key_type_id;
				2134	info.btf_value_type_id = map->btf_value_type_id;
				2135	}
				2136
				2137	if (bpf_map_is_dev_bound(map)) {
				2138	err = bpf_map_offload_info_fill(&info, map);
				2139	if (err)
				2140	return err;
				2141	}
				2142
				2143	if (copy_to_user(uinfo, &info, info_len) \|\|
				2144	put_user(info_len, &uattr->info.info_len))
				2145	return -EFAULT;
				2146
				2147	return 0;
				2148	}
				2149
				2150	static int bpf_btf_get_info_by_fd(struct btf *btf,
				2151	const union bpf_attr *attr,
				2152	union bpf_attr __user *uattr)
				2153	{
				2154	struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
				2155	u32 info_len = attr->info.info_len;
				2156	int err;
				2157
				2158	err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
				2159	if (err)
				2160	return err;
				2161
				2162	return btf_get_info_by_fd(btf, attr, uattr);
				2163	}
				2164
				2165	#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
				2166
				2167	static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
				2168	union bpf_attr __user *uattr)
				2169	{
				2170	int ufd = attr->info.bpf_fd;
				2171	struct fd f;
				2172	int err;
				2173
				2174	if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
				2175	return -EINVAL;
				2176
				2177	f = fdget(ufd);
				2178	if (!f.file)
				2179	return -EBADFD;
				2180
				2181	if (f.file->f_op == &bpf_prog_fops)
				2182	err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
				2183	uattr);
				2184	else if (f.file->f_op == &bpf_map_fops)
				2185	err = bpf_map_get_info_by_fd(f.file->private_data, attr,
				2186	uattr);
				2187	else if (f.file->f_op == &btf_fops)
				2188	err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr);
				2189	else
				2190	err = -EINVAL;
				2191
				2192	fdput(f);
				2193	return err;
				2194	}
				2195
				2196	#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
				2197
				2198	static int bpf_btf_load(const union bpf_attr *attr)
				2199	{
				2200	if (CHECK_ATTR(BPF_BTF_LOAD))
				2201	return -EINVAL;
				2202
				2203	if (!capable(CAP_SYS_ADMIN))
				2204	return -EPERM;
				2205
				2206	return btf_new_fd(attr);
				2207	}
				2208
				2209	#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
				2210
				2211	static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
				2212	{
				2213	if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
				2214	return -EINVAL;
				2215
				2216	if (!capable(CAP_SYS_ADMIN))
				2217	return -EPERM;
				2218
				2219	return btf_get_fd_by_id(attr->btf_id);
				2220	}
				2221
				2222	static int bpf_task_fd_query_copy(const union bpf_attr *attr,
				2223	union bpf_attr __user *uattr,
				2224	u32 prog_id, u32 fd_type,
				2225	const char *buf, u64 probe_offset,
				2226	u64 probe_addr)
				2227	{
				2228	char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
				2229	u32 len = buf ? strlen(buf) : 0, input_len;
				2230	int err = 0;
				2231
				2232	if (put_user(len, &uattr->task_fd_query.buf_len))
				2233	return -EFAULT;
				2234	input_len = attr->task_fd_query.buf_len;
				2235	if (input_len && ubuf) {
				2236	if (!len) {
				2237	/* nothing to copy, just make ubuf NULL terminated */
				2238	char zero = '\0';
				2239
				2240	if (put_user(zero, ubuf))
				2241	return -EFAULT;
				2242	} else if (input_len >= len + 1) {
				2243	/* ubuf can hold the string with NULL terminator */
				2244	if (copy_to_user(ubuf, buf, len + 1))
				2245	return -EFAULT;
				2246	} else {
				2247	/* ubuf cannot hold the string with NULL terminator,
				2248	* do a partial copy with NULL terminator.
				2249	*/
				2250	char zero = '\0';
				2251
				2252	err = -ENOSPC;
				2253	if (copy_to_user(ubuf, buf, input_len - 1))
				2254	return -EFAULT;
				2255	if (put_user(zero, ubuf + input_len - 1))
				2256	return -EFAULT;
				2257	}
				2258	}
				2259
				2260	if (put_user(prog_id, &uattr->task_fd_query.prog_id) \|\|
				2261	put_user(fd_type, &uattr->task_fd_query.fd_type) \|\|
				2262	put_user(probe_offset, &uattr->task_fd_query.probe_offset) \|\|
				2263	put_user(probe_addr, &uattr->task_fd_query.probe_addr))
				2264	return -EFAULT;
				2265
				2266	return err;
				2267	}
				2268
				2269	#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
				2270
				2271	static int bpf_task_fd_query(const union bpf_attr *attr,
				2272	union bpf_attr __user *uattr)
				2273	{
				2274	pid_t pid = attr->task_fd_query.pid;
				2275	u32 fd = attr->task_fd_query.fd;
				2276	const struct perf_event *event;
				2277	struct files_struct *files;
				2278	struct task_struct *task;
				2279	struct file *file;
				2280	int err;
				2281
				2282	if (CHECK_ATTR(BPF_TASK_FD_QUERY))
				2283	return -EINVAL;
				2284
				2285	if (!capable(CAP_SYS_ADMIN))
				2286	return -EPERM;
				2287
				2288	if (attr->task_fd_query.flags != 0)
				2289	return -EINVAL;
				2290
				2291	task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
				2292	if (!task)
				2293	return -ENOENT;
				2294
				2295	files = get_files_struct(task);
				2296	put_task_struct(task);
				2297	if (!files)
				2298	return -ENOENT;
				2299
				2300	err = 0;
				2301	spin_lock(&files->file_lock);
				2302	file = fcheck_files(files, fd);
				2303	if (!file)
				2304	err = -EBADF;
				2305	else
				2306	get_file(file);
				2307	spin_unlock(&files->file_lock);
				2308	put_files_struct(files);
				2309
				2310	if (err)
				2311	goto out;
				2312
				2313	if (file->f_op == &bpf_raw_tp_fops) {
				2314	struct bpf_raw_tracepoint *raw_tp = file->private_data;
				2315	struct bpf_raw_event_map *btp = raw_tp->btp;
				2316
				2317	err = bpf_task_fd_query_copy(attr, uattr,
				2318	raw_tp->prog->aux->id,
				2319	BPF_FD_TYPE_RAW_TRACEPOINT,
				2320	btp->tp->name, 0, 0);
				2321	goto put_file;
				2322	}
				2323
				2324	event = perf_get_event(file);
				2325	if (!IS_ERR(event)) {
				2326	u64 probe_offset, probe_addr;
				2327	u32 prog_id, fd_type;
				2328	const char *buf;
				2329
				2330	err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
				2331	&buf, &probe_offset,
				2332	&probe_addr);
				2333	if (!err)
				2334	err = bpf_task_fd_query_copy(attr, uattr, prog_id,
				2335	fd_type, buf,
				2336	probe_offset,
				2337	probe_addr);
				2338	goto put_file;
				2339	}
				2340
				2341	err = -ENOTSUPP;
				2342	put_file:
				2343	fput(file);
				2344	out:
				2345	return err;
				2346	}
				2347
				2348	SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
				2349	{
				2350	union bpf_attr attr = {};
				2351	int err;
				2352
				2353	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
				2354	return -EPERM;
				2355
				2356	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
				2357	if (err)
				2358	return err;
				2359	size = min_t(u32, size, sizeof(attr));
				2360
				2361	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
				2362	if (copy_from_user(&attr, uattr, size) != 0)
				2363	return -EFAULT;
				2364
				2365	err = security_bpf(cmd, &attr, size);
				2366	if (err < 0)
				2367	return err;
				2368
				2369	switch (cmd) {
				2370	case BPF_MAP_CREATE:
				2371	err = map_create(&attr);
				2372	break;
				2373	case BPF_MAP_LOOKUP_ELEM:
				2374	err = map_lookup_elem(&attr);
				2375	break;
				2376	case BPF_MAP_UPDATE_ELEM:
				2377	err = map_update_elem(&attr);
				2378	break;
				2379	case BPF_MAP_DELETE_ELEM:
				2380	err = map_delete_elem(&attr);
				2381	break;
				2382	case BPF_MAP_GET_NEXT_KEY:
				2383	err = map_get_next_key(&attr);
				2384	break;
				2385	case BPF_PROG_LOAD:
				2386	err = bpf_prog_load(&attr);
				2387	break;
				2388	case BPF_OBJ_PIN:
				2389	err = bpf_obj_pin(&attr);
				2390	break;
				2391	case BPF_OBJ_GET:
				2392	err = bpf_obj_get(&attr);
				2393	break;
				2394	case BPF_PROG_ATTACH:
				2395	err = bpf_prog_attach(&attr);
				2396	break;
				2397	case BPF_PROG_DETACH:
				2398	err = bpf_prog_detach(&attr);
				2399	break;
				2400	case BPF_PROG_QUERY:
				2401	err = bpf_prog_query(&attr, uattr);
				2402	break;
				2403	case BPF_PROG_TEST_RUN:
				2404	err = bpf_prog_test_run(&attr, uattr);
				2405	break;
				2406	case BPF_PROG_GET_NEXT_ID:
				2407	err = bpf_obj_get_next_id(&attr, uattr,
				2408	&prog_idr, &prog_idr_lock);
				2409	break;
				2410	case BPF_MAP_GET_NEXT_ID:
				2411	err = bpf_obj_get_next_id(&attr, uattr,
				2412	&map_idr, &map_idr_lock);
				2413	break;
				2414	case BPF_PROG_GET_FD_BY_ID:
				2415	err = bpf_prog_get_fd_by_id(&attr);
				2416	break;
				2417	case BPF_MAP_GET_FD_BY_ID:
				2418	err = bpf_map_get_fd_by_id(&attr);
				2419	break;
				2420	case BPF_OBJ_GET_INFO_BY_FD:
				2421	err = bpf_obj_get_info_by_fd(&attr, uattr);
				2422	break;
				2423	case BPF_RAW_TRACEPOINT_OPEN:
				2424	err = bpf_raw_tracepoint_open(&attr);
				2425	break;
				2426	case BPF_BTF_LOAD:
				2427	err = bpf_btf_load(&attr);
				2428	break;
				2429	case BPF_BTF_GET_FD_BY_ID:
				2430	err = bpf_btf_get_fd_by_id(&attr);
				2431	break;
				2432	case BPF_TASK_FD_QUERY:
				2433	err = bpf_task_fd_query(&attr, uattr);
				2434	break;
				2435	default:
				2436	err = -EINVAL;
				2437	break;
				2438	}
				2439
				2440	return err;
				2441	}