Blame - main.c - hafnium/driver/linux - TrustedFirmware Git Browser

blob: 11bbc2e4379fdcce381c6953306729097eefc3e3 [file] [log] [blame]

Andrew Walbran	13c3a0b	2018-11-30 11:51:53 +0000	[diff] [blame]	1	/*
				2	* Copyright 2018 Google LLC
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public License
				6	* version 2 as published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	* GNU General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public License
				14	* along with this program; if not, write to the Free Software
				15	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
				16	*/
				17
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	18	#include <linux/hrtimer.h>
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	19	#include <linux/atomic.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	20	#include <linux/init.h>
				21	#include <linux/kernel.h>
				22	#include <linux/kthread.h>
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	23	#include <linux/mm.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	24	#include <linux/module.h>
				25	#include <linux/sched/task.h>
				26	#include <linux/slab.h>
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	27	#include <linux/net.h>
				28	#include <net/sock.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	29
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	30	#include <hf/call.h>
				31
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	32	/* TODO: Reusing AF_ECONET for now as it's otherwise unused. */
				33	#define AF_HF AF_ECONET
				34	#define PF_HF AF_HF
				35
				36	#define MESSAGE_INT_ID 1
				37
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	38	#define CONFIG_HAFNIUM_MAX_VMS 16
				39	#define CONFIG_HAFNIUM_MAX_VCPUS 32
				40
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	41	struct hf_vcpu {
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	42	struct hf_vm *vm;
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	43	uint32_t vcpu_index;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	44	struct task_struct *task;
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	45	atomic_t abort_sleep;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	46	struct hrtimer timer;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	47	};
				48
				49	struct hf_vm {
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	50	uint32_t id;
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	51	uint32_t vcpu_count;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	52	struct hf_vcpu *vcpu;
				53	};
				54
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	55	struct hf_msg_hdr {
				56	uint64_t src_port;
				57	uint64_t dst_port;
				58	};
				59
				60	struct hf_sock {
				61	/* This needs to be the first field. */
				62	struct sock sk;
				63
				64	/*
				65	* The following fields are immutable after the socket transitions to
				66	* SS_CONNECTED state.
				67	*/
				68	uint64_t local_port;
				69	uint64_t remote_port;
				70	struct hf_vm *peer_vm;
				71	};
				72
				73	struct sockaddr_hf {
				74	sa_family_t family;
				75	uint32_t vm_id;
				76	uint64_t port;
				77	};
				78
				79	static struct proto hf_sock_proto = {
				80	.name = "hafnium",
				81	.owner = THIS_MODULE,
				82	.obj_size = sizeof(struct hf_sock),
				83	};
				84
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	85	static struct hf_vm *hf_vms;
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	86	static uint32_t hf_vm_count;
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	87	static struct page *hf_send_page;
				88	static struct page *hf_recv_page;
				89	static atomic64_t hf_next_port = ATOMIC64_INIT(0);
				90	static DEFINE_SPINLOCK(hf_send_lock);
				91	static DEFINE_HASHTABLE(hf_local_port_hash, 7);
				92	static DEFINE_SPINLOCK(hf_local_port_hash_lock);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	93
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	94	/**
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	95	* Wakes up the kernel thread responsible for running the given vcpu.
				96	*
				97	* Returns 0 if the thread was already running, 1 otherwise.
				98	*/
				99	static int hf_vcpu_wake_up(struct hf_vcpu *vcpu)
				100	{
				101	/* Set a flag indicating that the thread should not go to sleep. */
				102	atomic_set(&vcpu->abort_sleep, 1);
				103
				104	/* Set the thread to running state. */
				105	return wake_up_process(vcpu->task);
				106	}
				107
				108	/**
				109	* Puts the current thread to sleep. The current thread must be responsible for
				110	* running the given vcpu.
				111	*
				112	* Going to sleep will fail if hf_vcpu_wake_up() or kthread_stop() was called on
				113	* this vcpu/thread since the last time it [re]started running.
				114	*/
				115	static void hf_vcpu_sleep(struct hf_vcpu *vcpu)
				116	{
				117	int abort;
				118
				119	set_current_state(TASK_INTERRUPTIBLE);
				120
				121	/* Check the sleep-abort flag after making thread interruptible. */
				122	abort = atomic_read(&vcpu->abort_sleep);
				123	if (!abort && !kthread_should_stop())
				124	schedule();
				125
				126	/* Set state back to running on the way out. */
				127	set_current_state(TASK_RUNNING);
				128	}
				129
				130	/**
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	131	* Wakes up the thread associated with the vcpu that owns the given timer. This
				132	* is called when the timer the thread is waiting on expires.
				133	*/
				134	static enum hrtimer_restart hf_vcpu_timer_expired(struct hrtimer *timer)
				135	{
				136	struct hf_vcpu *vcpu = container_of(timer, struct hf_vcpu, timer);
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	137	/* TODO: Inject interrupt. */
				138	hf_vcpu_wake_up(vcpu);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	139	return HRTIMER_NORESTART;
				140	}
				141
				142	/**
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	143	* Handles a message delivered to this VM by validating that it's well-formed
				144	* and then queueing it for delivery to the appropriate socket.
				145	*/
				146	static void hf_handle_message(struct hf_vm sender, const void ptr, size_t len)
				147	{
				148	struct hf_sock *hsock;
				149	const struct hf_msg_hdr *hdr = ptr;
				150	struct sk_buff *skb;
				151	int err;
				152
				153	/* Ignore messages that are too small to hold a header. */
				154	if (len < sizeof(struct hf_msg_hdr))
				155	return;
				156
				157	len -= sizeof(struct hf_msg_hdr);
				158
				159	/* Go through the colliding sockets. */
				160	rcu_read_lock();
				161	hash_for_each_possible_rcu(hf_local_port_hash, hsock, sk.sk_node,
				162	hdr->dst_port) {
				163	if (hsock->peer_vm == sender &&
				164	hsock->remote_port == hdr->src_port) {
				165	sock_hold(&hsock->sk);
				166	break;
				167	}
				168	}
				169	rcu_read_unlock();
				170
				171	/* Nothing to do if we couldn't find the target. */
				172	if (!hsock)
				173	return;
				174
				175	/* TODO: From this point on, there are two failure paths: when we
				176	* create the skb below, and when we enqueue it to the socket. What
				177	* should we do if they fail? Ideally we would have some form of flow
				178	* control to prevent message loss, but how to do it efficiently?
				179	*
				180	* One option is to have a pre-allocated message that indicates to the
				181	* sender that a message was dropped. This way we guarantee that the
				182	* sender will be aware of loss and should back-off.
				183	*/
				184	/* Create the skb. */
				185	skb = alloc_skb(len, GFP_KERNEL);
				186	if (!skb)
				187	goto exit;
				188
				189	memcpy(skb_put(skb, len), hdr + 1, len);
				190
				191	/*
				192	* Add the skb to the receive queue of the target socket. On success it
				193	* calls sk->sk_data_ready, which is currently set to sock_def_readable,
				194	* which wakes up any waiters.
				195	*/
				196	err = sock_queue_rcv_skb(&hsock->sk, skb);
				197	if (err)
				198	kfree_skb(skb);
				199
				200	exit:
				201	sock_put(&hsock->sk);
				202	}
				203
				204	/**
				205	* This function is called when Hafnium requests that the primary VM wake up a
				206	* vCPU that belongs to a secondary VM.
				207	*
				208	* It wakes up the thread if it's sleeping, or kicks it if it's already running.
				209	*
				210	* If vCPU is HF_INVALID_VCPU, it injects a MESSAGE_INT_ID interrupt into a vCPU
				211	* belonging to the specified VM.
				212	*/
				213	static void hf_handle_wake_up_request(uint32_t vm_id, uint16_t vcpu)
				214	{
				215	struct hf_vm *vm;
				216
				217	if (vm_id > hf_vm_count) {
				218	pr_warn("Request to wake up non-existent VM id: %u\n", vm_id);
				219	return;
				220	}
				221
				222	vm = &hf_vms[vm_id - 1];
				223	if (vcpu >= vm->vcpu_count) {
				224	int64_t ret;
				225
				226	if (vcpu != HF_INVALID_VCPU) {
				227	pr_warn("Request to wake up non-existent vCPU: %u.%u\n",
				228	vm_id, vcpu);
				229	return;
				230	}
				231
				232	/*
				233	* TODO: For now we're picking the first vcpu to interrupt, but
				234	* we want to be smarter.
				235	*/
				236	vcpu = 0;
				237	ret = hf_inject_interrupt(vm_id, vcpu, MESSAGE_INT_ID);
				238	if (ret != 1) {
				239	/* We don't need to wake up the vcpu. */
				240	return;
				241	}
				242	}
				243
				244	if (hf_vcpu_wake_up(&vm->vcpu[vcpu]) == 0) {
				245	/*
				246	* The task was already running (presumably on a different
				247	* physical CPU); interrupt it. This gives Hafnium a chance to
				248	* inject any new interrupts.
				249	*/
				250	kick_process(vm->vcpu[vcpu].task);
				251	}
				252	}
				253
				254	/**
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	255	* This is the main loop of each vcpu.
				256	*/
				257	static int hf_vcpu_thread(void *data)
				258	{
				259	struct hf_vcpu *vcpu = data;
Andrew Scull	dc8cab5	2018-10-10 18:29:39 +0100	[diff] [blame]	260	struct hf_vcpu_run_return ret;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	261
				262	hrtimer_init(&vcpu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				263	vcpu->timer.function = &hf_vcpu_timer_expired;
				264
				265	while (!kthread_should_stop()) {
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	266	/*
				267	* We're about to run the vcpu, so we can reset the abort-sleep
				268	* flag.
				269	*/
				270	atomic_set(&vcpu->abort_sleep, 0);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	271
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	272	/* Call into Hafnium to run vcpu. */
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	273	ret = hf_vcpu_run(vcpu->vm->id, vcpu->vcpu_index);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	274
Andrew Scull	dc8cab5	2018-10-10 18:29:39 +0100	[diff] [blame]	275	switch (ret.code) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	276	/* Yield (forcibly or voluntarily). */
				277	case HF_VCPU_RUN_YIELD:
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	278	break;
				279
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	280	/* WFI. */
				281	case HF_VCPU_RUN_WAIT_FOR_INTERRUPT:
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	282	hf_vcpu_sleep(vcpu);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	283	break;
				284
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	285	/* Wake up another vcpu. */
				286	case HF_VCPU_RUN_WAKE_UP:
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	287	hf_handle_wake_up_request(ret.wake_up.vm_id,
				288	ret.wake_up.vcpu);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	289	break;
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	290
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	291	/* Response available. */
Andrew Scull	0973a2e	2018-10-05 11:11:24 +0100	[diff] [blame]	292	case HF_VCPU_RUN_MESSAGE:
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	293	hf_handle_message(vcpu->vm, page_address(hf_recv_page),
				294	ret.message.size);
				295	hf_mailbox_clear();
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	296	break;
Andrew Scull	dc8cab5	2018-10-10 18:29:39 +0100	[diff] [blame]	297
				298	case HF_VCPU_RUN_SLEEP:
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	299	hrtimer_start(&vcpu->timer, ret.sleep.ns,
				300	HRTIMER_MODE_REL);
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	301	hf_vcpu_sleep(vcpu);
Andrew Scull	dc8cab5	2018-10-10 18:29:39 +0100	[diff] [blame]	302	hrtimer_cancel(&vcpu->timer);
				303	break;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	304	}
				305	}
				306
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	307	return 0;
				308	}
				309
				310	/**
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	311	* Converts a pointer to a struct sock into a pointer to a struct hf_sock. It
				312	* relies on the fact that the first field of hf_sock is a sock.
				313	*/
				314	static struct hf_sock hsock_from_sk(struct sock sk)
				315	{
				316	return (struct hf_sock *)sk;
				317	}
				318
				319	/**
				320	* This is called when the last reference to the outer socket is released. For
				321	* example, if it's a user-space socket, when the last file descriptor pointing
				322	* to this socket is closed.
				323	*
				324	* It begins cleaning up resources, though some can only be cleaned up after all
				325	* references to the underlying socket are released, which is handled by
				326	* hf_sock_destruct().
				327	*/
				328	static int hf_sock_release(struct socket *sock)
				329	{
				330	struct sock *sk = sock->sk;
				331	struct hf_sock *hsock = hsock_from_sk(sk);
				332	unsigned long flags;
				333
				334	if (!sk)
				335	return 0;
				336
				337	/* Shutdown for both send and receive. */
				338	lock_sock(sk);
				339	sk->sk_shutdown \|= RCV_SHUTDOWN \| SEND_SHUTDOWN;
				340	sk->sk_state_change(sk);
				341	release_sock(sk);
				342
				343	/* Remove from the hash table, so lookups from now on won't find it. */
				344	spin_lock_irqsave(&hf_local_port_hash_lock, flags);
				345	hash_del_rcu(&hsock->sk.sk_node);
				346	spin_unlock_irqrestore(&hf_local_port_hash_lock, flags);
				347
				348	/*
				349	* TODO: When we implement a tx queue, we need to clear it here so that
				350	* sk_wmem_alloc will not prevent sk from being freed (sk_free).
				351	*/
				352
				353	/*
				354	* Wait for in-flight lookups to finish. We need to do this here because
				355	* in-flight lookups rely on the reference to the socket we're about to
				356	* release.
				357	*/
				358	synchronize_rcu();
				359	sock_put(sk);
				360	sock->sk = NULL;
				361
				362	return 0;
				363	}
				364
				365	/**
				366	* This is called when there are no more references to the socket. It frees all
				367	* resources that haven't been freed during release.
				368	*/
				369	static void hf_sock_destruct(struct sock *sk)
				370	{
				371	/*
				372	* Clear the receive queue now that the handler cannot add any more
				373	* skbs to it.
				374	*/
				375	skb_queue_purge(&sk->sk_receive_queue);
				376	}
				377
				378	/**
				379	* Connects the Hafnium socket to the provided VM and port. After the socket is
				380	* connected, it can be used to exchange datagrams with the specified peer.
				381	*/
				382	static int hf_sock_connect(struct socket sock, struct sockaddr saddr,
				383	int len, int connect_flags)
				384	{
				385	struct sock *sk = sock->sk;
				386	struct hf_sock *hsock = hsock_from_sk(sk);
				387	struct hf_vm *vm;
				388	struct sockaddr_hf *addr;
				389	int err;
				390	unsigned long flags;
				391
				392	/* Basic address validation. */
				393	if (len < sizeof(struct sockaddr_hf) \|\| saddr->sa_family != AF_HF)
				394	return -EINVAL;
				395
				396	addr = (struct sockaddr_hf *)saddr;
				397	if (addr->vm_id > hf_vm_count)
				398	return -ENETUNREACH;
				399
				400	vm = &hf_vms[addr->vm_id - 1];
				401
				402	/*
				403	* TODO: Once we implement access control in Hafnium, check that the
				404	* caller is allowed to contact the specified VM. Return -ECONNREFUSED
				405	* if access is denied.
				406	*/
				407
				408	/* Take lock to make sure state doesn't change as we connect. */
				409	lock_sock(sk);
				410
				411	/* Only unconnected sockets are allowed to become connected. */
				412	if (sock->state != SS_UNCONNECTED) {
				413	err = -EISCONN;
				414	goto exit;
				415	}
				416
				417	hsock->local_port = atomic64_inc_return(&hf_next_port);
				418	hsock->remote_port = addr->port;
				419	hsock->peer_vm = vm;
				420
				421	sock->state = SS_CONNECTED;
				422
				423	/* Add socket to hash table now that it's fully initialised. */
				424	spin_lock_irqsave(&hf_local_port_hash_lock, flags);
				425	hash_add_rcu(hf_local_port_hash, &sk->sk_node, hsock->local_port);
				426	spin_unlock_irqrestore(&hf_local_port_hash_lock, flags);
				427
				428	err = 0;
				429	exit:
				430	release_sock(sk);
				431	return err;
				432	}
				433
				434	/**
				435	* Sends the given skb to the appropriate VM by calling Hafnium. It will also
				436	* trigger the wake up of a recipient VM.
				437	*
				438	* Takes ownership of the skb on success.
				439	*/
				440	static int hf_send_skb(struct sk_buff *skb)
				441	{
				442	unsigned long flags;
				443	int64_t ret;
				444	struct hf_sock *hsock = hsock_from_sk(skb->sk);
				445	struct hf_vm *vm = hsock->peer_vm;
				446
				447	/*
				448	* Call Hafnium under the send lock so that we serialize the use of the
				449	* global send buffer.
				450	*/
				451	spin_lock_irqsave(&hf_send_lock, flags);
				452	memcpy(page_address(hf_send_page), skb->data, skb->len);
				453	ret = hf_mailbox_send(vm->id, skb->len);
				454	spin_unlock_irqrestore(&hf_send_lock, flags);
				455
				456	if (ret < 0)
				457	return -EAGAIN;
				458
				459	/* Wake some vcpu up to handle the new message. */
				460	hf_handle_wake_up_request(vm->id, ret);
				461
				462	kfree_skb(skb);
				463
				464	return 0;
				465	}
				466
				467	/**
				468	* Determines if the given socket is in the connected state. It acquires and
				469	* releases the socket lock.
				470	*/
				471	static bool hf_sock_is_connected(struct socket *sock)
				472	{
				473	bool ret;
				474
				475	lock_sock(sock->sk);
				476	ret = sock->state == SS_CONNECTED;
				477	release_sock(sock->sk);
				478
				479	return ret;
				480	}
				481
				482	/**
				483	* Sends a message to the VM & port the socket is connected to. All variants
				484	* of write/send/sendto/sendmsg eventually call this function.
				485	*/
				486	static int hf_sock_sendmsg(struct socket sock, struct msghdr m, size_t len)
				487	{
				488	struct sock *sk = sock->sk;
				489	struct sk_buff *skb;
				490	int err;
				491	struct hf_msg_hdr *hdr;
				492	struct hf_sock *hsock = hsock_from_sk(sk);
				493
				494	/* Check length. */
				495	if (len > HF_MAILBOX_SIZE - sizeof(struct hf_msg_hdr))
				496	return -EMSGSIZE;
				497
				498	/* We don't allow the destination address to be specified. */
				499	if (m->msg_namelen > 0)
				500	return -EISCONN;
				501
				502	/* We don't support out of band messages. */
				503	if (m->msg_flags & MSG_OOB)
				504	return -EOPNOTSUPP;
				505
				506	/*
				507	* Ensure that the socket is connected. We don't need to hold the socket
				508	* lock (acquired and released by hf_sock_is_connected) for the
				509	* remainder of the function because the fields we care about are
				510	* immutable once the state is SS_CONNECTED.
				511	*/
				512	if (!hf_sock_is_connected(sock))
				513	return -ENOTCONN;
				514
				515	/*
				516	* Allocate an skb for this write. If there isn't enough room in the
				517	* socket's send buffer (sk_wmem_alloc >= sk_sndbuf), this will block
				518	* (if it's a blocking call). On success, it increments sk_wmem_alloc
				519	* and sets up the skb such that sk_wmem_alloc gets decremented when
				520	* the skb is freed (sock_wfree gets called).
				521	*/
				522	skb = sock_alloc_send_skb(sk, len + sizeof(struct hf_msg_hdr),
				523	m->msg_flags & MSG_DONTWAIT, &err);
				524	if (!skb)
				525	return err;
				526
				527	/* Reserve room for the header and initialise it. */
				528	skb_reserve(skb, sizeof(struct hf_msg_hdr));
				529	hdr = skb_push(skb, sizeof(struct hf_msg_hdr));
				530	hdr->src_port = hsock->local_port;
				531	hdr->dst_port = hsock->remote_port;
				532
				533	/* Allocate area for the contents, then copy into skb. */
				534	if (!copy_from_iter_full(skb_put(skb, len), len, &m->msg_iter)) {
				535	err = -EFAULT;
				536	goto err_cleanup;
				537	}
				538
				539	/*
				540	* TODO: We currently do this inline, but when we have support for
				541	* readiness notification from Hafnium, we must add this to a per-VM tx
				542	* queue that can make progress when the VM becomes writable. This will
				543	* fix send buffering and poll readiness notification.
				544	*/
				545	err = hf_send_skb(skb);
				546	if (err)
				547	goto err_cleanup;
				548
				549	return 0;
				550
				551	err_cleanup:
				552	kfree_skb(skb);
				553	return err;
				554	}
				555
				556	/**
				557	* Receives a message originated from the VM & port the socket is connected to.
				558	* All variants of read/recv/recvfrom/recvmsg eventually call this function.
				559	*/
				560	static int hf_sock_recvmsg(struct socket sock, struct msghdr m, size_t len,
				561	int flags)
				562	{
				563	struct sock *sk = sock->sk;
				564	struct sk_buff *skb;
				565	int err;
				566	size_t copy_len;
				567
				568	if (!hf_sock_is_connected(sock))
				569	return -ENOTCONN;
				570
				571	/* Grab the next skb from the receive queue. */
				572	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
				573	if (!skb)
				574	return err;
				575
				576	/* Make sure we don't copy more than what fits in the output buffer. */
				577	copy_len = skb->len;
				578	if (copy_len > len) {
				579	copy_len = len;
				580	m->msg_flags \|= MSG_TRUNC;
				581	}
				582
				583	/* Make sure we don't overflow the return value type. */
				584	if (copy_len > INT_MAX) {
				585	copy_len = INT_MAX;
				586	m->msg_flags \|= MSG_TRUNC;
				587	}
				588
				589	/* Copy skb to output iterator, then free it. */
				590	err = skb_copy_datagram_msg(skb, 0, m, copy_len);
				591	skb_free_datagram(sk, skb);
				592	if (err)
				593	return err;
				594
				595	return copy_len;
				596	}
				597
				598	/**
				599	* This function is called when a Hafnium socket is created. It initialises all
				600	* state such that the caller will be able to connect the socket and then send
				601	* and receive messages through it.
				602	*/
				603	static int hf_sock_create(struct net net, struct socket sock, int protocol,
				604	int kern)
				605	{
				606	static const struct proto_ops ops = {
				607	.family = PF_HF,
				608	.owner = THIS_MODULE,
				609	.release = hf_sock_release,
				610	.bind = sock_no_bind,
				611	.connect = hf_sock_connect,
				612	.socketpair = sock_no_socketpair,
				613	.accept = sock_no_accept,
				614	.ioctl = sock_no_ioctl,
				615	.listen = sock_no_listen,
				616	.shutdown = sock_no_shutdown,
				617	.setsockopt = sock_no_setsockopt,
				618	.getsockopt = sock_no_getsockopt,
				619	.sendmsg = hf_sock_sendmsg,
				620	.recvmsg = hf_sock_recvmsg,
				621	.mmap = sock_no_mmap,
				622	.sendpage = sock_no_sendpage,
				623	.poll = datagram_poll,
				624	};
				625	struct sock *sk;
				626
				627	if (sock->type != SOCK_DGRAM)
				628	return -ESOCKTNOSUPPORT;
				629
				630	if (protocol != 0)
				631	return -EPROTONOSUPPORT;
				632
				633	/*
				634	* For now we only allow callers with sys admin capability to create
				635	* Hafnium sockets.
				636	*/
				637	if (!capable(CAP_SYS_ADMIN))
				638	return -EPERM;
				639
				640	/* Allocate and initialise socket. */
				641	sk = sk_alloc(net, PF_HF, GFP_KERNEL, &hf_sock_proto, kern);
				642	if (!sk)
				643	return -ENOMEM;
				644
				645	sock_init_data(sock, sk);
				646
				647	sk->sk_destruct = hf_sock_destruct;
				648	sock->ops = &ops;
				649	sock->state = SS_UNCONNECTED;
				650
				651	return 0;
				652	}
				653
				654	/**
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	655	* Frees all resources, including threads, associated with the Hafnium driver.
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	656	*/
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	657	static void hf_free_resources(void)
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	658	{
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	659	uint32_t i, j;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	660
				661	/*
				662	* First stop all worker threads. We need to do this before freeing
				663	* resources because workers may reference each other, so it is only
				664	* safe to free resources after they have all stopped.
				665	*/
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	666	for (i = 0; i < hf_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	667	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	668
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	669	for (j = 0; j < vm->vcpu_count; j++)
				670	kthread_stop(vm->vcpu[j].task);
				671	}
				672
				673	/* Free resources. */
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	674	for (i = 0; i < hf_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	675	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	676
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	677	for (j = 0; j < vm->vcpu_count; j++)
				678	put_task_struct(vm->vcpu[j].task);
				679	kfree(vm->vcpu);
				680	}
				681
				682	kfree(hf_vms);
				683	}
				684
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	685	/**
				686	* Initializes the Hafnium driver by creating a thread for each vCPU of each
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	687	* virtual machine.
				688	*/
				689	static int __init hf_init(void)
				690	{
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	691	static const struct net_proto_family proto_family = {
				692	.family = PF_HF,
				693	.create = hf_sock_create,
				694	.owner = THIS_MODULE,
				695	};
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	696	int64_t ret;
				697	uint32_t i, j;
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	698	uint32_t total_vm_count;
				699	uint32_t total_vcpu_count;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	700
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	701	/* Allocate a page for send and receive buffers. */
				702	hf_send_page = alloc_page(GFP_KERNEL);
				703	if (!hf_send_page) {
				704	pr_err("Unable to allocate send buffer\n");
				705	return -ENOMEM;
				706	}
				707
				708	hf_recv_page = alloc_page(GFP_KERNEL);
				709	if (!hf_recv_page) {
				710	__free_page(hf_send_page);
				711	pr_err("Unable to allocate receive buffer\n");
				712	return -ENOMEM;
				713	}
				714
				715	/*
				716	* Configure both addresses. Once configured, we cannot free these pages
				717	* because the hypervisor will use them, even if the module is
				718	* unloaded.
				719	*/
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	720	ret = hf_vm_configure(page_to_phys(hf_send_page),
				721	page_to_phys(hf_recv_page));
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	722	if (ret) {
				723	__free_page(hf_send_page);
				724	__free_page(hf_recv_page);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	725	/*
				726	* TODO: We may want to grab this information from hypervisor
				727	* and go from there.
				728	*/
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	729	pr_err("Unable to configure VM\n");
				730	return -EIO;
				731	}
				732
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	733	/* Get the number of VMs. */
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	734	ret = hf_vm_get_count();
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	735	if (ret < 0) {
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	736	pr_err("Unable to retrieve number of VMs: %lld\n", ret);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	737	return -EIO;
				738	}
				739
				740	/* Confirm the maximum number of VMs looks sane. */
				741	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VMS < 1);
				742	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VMS > U16_MAX);
				743
				744	/* Validate the number of VMs. There must at least be the primary. */
				745	if (ret < 1 \|\| ret > CONFIG_HAFNIUM_MAX_VMS) {
				746	pr_err("Number of VMs is out of range: %lld\n", ret);
				747	return -EDQUOT;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	748	}
				749
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	750	/* Only track the secondary VMs. */
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	751	total_vm_count = ret - 1;
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	752	hf_vms = kmalloc_array(total_vm_count, sizeof(struct hf_vm),
				753	GFP_KERNEL);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	754	if (!hf_vms)
				755	return -ENOMEM;
				756
				757	/* Initialize each VM. */
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	758	total_vcpu_count = 0;
				759	for (i = 0; i < total_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	760	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	761
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	762	/* Adjust the ID as only the secondaries are tracked. */
				763	vm->id = i + 1;
				764
				765	ret = hf_vcpu_get_count(vm->id);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	766	if (ret < 0) {
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	767	pr_err("HF_VCPU_GET_COUNT failed for vm=%u: %lld",
				768	vm->id, ret);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	769	ret = -EIO;
				770	goto fail_with_cleanup;
				771	}
				772
				773	/* Avoid overflowing the vcpu count. */
				774	if (ret > (U32_MAX - total_vcpu_count)) {
				775	pr_err("Too many vcpus: %u\n", total_vcpu_count);
				776	ret = -EDQUOT;
				777	goto fail_with_cleanup;
				778	}
				779
				780	/* Confirm the maximum number of VCPUs looks sane. */
				781	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VCPUS < 1);
				782	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VCPUS > U16_MAX);
				783
				784	/* Enforce the limit on vcpus. */
				785	total_vcpu_count += ret;
				786	if (total_vcpu_count > CONFIG_HAFNIUM_MAX_VCPUS) {
				787	pr_err("Too many vcpus: %u\n", total_vcpu_count);
				788	ret = -EDQUOT;
				789	goto fail_with_cleanup;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	790	}
				791
				792	vm->vcpu_count = ret;
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	793	vm->vcpu = kmalloc_array(vm->vcpu_count, sizeof(struct hf_vcpu),
				794	GFP_KERNEL);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	795	if (!vm->vcpu) {
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	796	pr_err("No memory for %u vcpus for vm %u",
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	797	vm->vcpu_count, vm->id);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	798	ret = -ENOMEM;
				799	goto fail_with_cleanup;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	800	}
				801
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	802	/* Update the number of initialized VMs. */
				803	hf_vm_count = i + 1;
				804
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	805	/* Create a kernel thread for each vcpu. */
				806	for (j = 0; j < vm->vcpu_count; j++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	807	struct hf_vcpu *vcpu = &vm->vcpu[j];
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	808	vcpu->task = kthread_create(hf_vcpu_thread, vcpu,
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	809	"vcpu_thread_%u_%u",
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	810	vm->id, j);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	811	if (IS_ERR(vcpu->task)) {
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	812	pr_err("Error creating task (vm=%u,vcpu=%u): %ld\n",
				813	vm->id, j, PTR_ERR(vcpu->task));
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	814	vm->vcpu_count = j;
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	815	ret = PTR_ERR(vcpu->task);
				816	goto fail_with_cleanup;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	817	}
				818
				819	get_task_struct(vcpu->task);
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	820	vcpu->vm = vm;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	821	vcpu->vcpu_index = j;
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	822	atomic_set(&vcpu->abort_sleep, 0);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	823	}
				824	}
				825
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	826	/* Register protocol and socket family. */
				827	ret = proto_register(&hf_sock_proto, 0);
				828	if (ret) {
				829	pr_err("Unable to register protocol: %lld\n", ret);
				830	goto fail_with_cleanup;
				831	}
				832
				833	ret = sock_register(&proto_family);
				834	if (ret) {
				835	pr_err("Unable to register Hafnium's socket family: %lld\n",
				836	ret);
				837	goto fail_unregister_proto;
				838	}
				839
				840	/*
				841	* Start running threads now that all is initialized.
				842	*
				843	* Any failures from this point on must also unregister the socket
				844	* family with a call to sock_unregister().
				845	*/
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	846	for (i = 0; i < hf_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	847	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	848	for (j = 0; j < vm->vcpu_count; j++)
				849	wake_up_process(vm->vcpu[j].task);
				850	}
				851
				852	/* Dump vm/vcpu count info. */
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	853	pr_info("Hafnium successfully loaded with %u VMs:\n", hf_vm_count);
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	854	for (i = 0; i < hf_vm_count; i++) {
				855	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	856
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	857	pr_info("\tVM %u: %u vCPUS\n", vm->id, vm->vcpu_count);
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	858	}
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	859
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	860	return 0;
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	861
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	862	fail_unregister_proto:
				863	proto_unregister(&hf_sock_proto);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	864	fail_with_cleanup:
				865	hf_free_resources();
				866	return ret;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	867	}
				868
				869	/**
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	870	* Frees up all resources used by the Hafnium driver in preparation for
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	871	* unloading it.
				872	*/
				873	static void __exit hf_exit(void)
				874	{
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	875	pr_info("Preparing to unload Hafnium\n");
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	876	sock_unregister(PF_HF);
				877	proto_unregister(&hf_sock_proto);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	878	hf_free_resources();
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	879	pr_info("Hafnium ready to unload\n");
				880	}
				881
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame^]	882	MODULE_LICENSE("GPL v2");
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	883
				884	module_init(hf_init);
				885	module_exit(hf_exit);