Blame - main.c - hafnium/driver/linux - TrustedFirmware Git Browser

blob: 6f39eced4b4e38c925d33e4eded675d9d468b571 [file] [log] [blame]

Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-only
Andrew Walbran	13c3a0b	2018-11-30 11:51:53 +0000	[diff] [blame]	2	/*
Andrew Walbran	2bc0a32	2019-03-07 15:48:06 +0000	[diff] [blame]	3	* Copyright 2018 The Hafnium Authors.
Andrew Walbran	13c3a0b	2018-11-30 11:51:53 +0000	[diff] [blame]	4	*
				5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License
				7	* version 2 as published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
Andrew Walbran	13c3a0b	2018-11-30 11:51:53 +0000	[diff] [blame]	13	*/
				14
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	15	#include <clocksource/arm_arch_timer.h>
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	16	#include <linux/atomic.h>
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	17	#include <linux/cpuhotplug.h>
				18	#include <linux/hrtimer.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	19	#include <linux/init.h>
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	20	#include <linux/interrupt.h>
				21	#include <linux/irq.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	22	#include <linux/kernel.h>
				23	#include <linux/kthread.h>
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	24	#include <linux/mm.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	25	#include <linux/module.h>
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	26	#include <linux/net.h>
				27	#include <linux/of.h>
				28	#include <linux/platform_device.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	29	#include <linux/sched/task.h>
				30	#include <linux/slab.h>
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	31	#include <net/sock.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	32
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	33	#include <hf/call.h>
				34
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	35	/* TODO: Reusing AF_ECONET for now as it's otherwise unused. */
				36	#define AF_HF AF_ECONET
				37	#define PF_HF AF_HF
				38
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	39	#define HYPERVISOR_TIMER_NAME "el2_timer"
				40
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	41	#define CONFIG_HAFNIUM_MAX_VMS 16
				42	#define CONFIG_HAFNIUM_MAX_VCPUS 32
				43
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	44	#define FIRST_SECONDARY_VM_ID 1
				45
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	46	struct hf_vcpu {
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	47	struct hf_vm *vm;
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	48	uint32_t vcpu_index;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	49	struct task_struct *task;
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	50	atomic_t abort_sleep;
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	51	atomic_t waiting_for_message;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	52	struct hrtimer timer;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	53	};
				54
				55	struct hf_vm {
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	56	uint32_t id;
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	57	uint32_t vcpu_count;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	58	struct hf_vcpu *vcpu;
				59	};
				60
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	61	struct hf_msg_hdr {
				62	uint64_t src_port;
				63	uint64_t dst_port;
				64	};
				65
				66	struct hf_sock {
				67	/* This needs to be the first field. */
				68	struct sock sk;
				69
				70	/*
				71	* The following fields are immutable after the socket transitions to
				72	* SS_CONNECTED state.
				73	*/
				74	uint64_t local_port;
				75	uint64_t remote_port;
				76	struct hf_vm *peer_vm;
				77	};
				78
				79	struct sockaddr_hf {
				80	sa_family_t family;
				81	uint32_t vm_id;
				82	uint64_t port;
				83	};
				84
				85	static struct proto hf_sock_proto = {
				86	.name = "hafnium",
				87	.owner = THIS_MODULE,
				88	.obj_size = sizeof(struct hf_sock),
				89	};
				90
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	91	static struct hf_vm *hf_vms;
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	92	static uint32_t hf_vm_count;
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	93	static struct page *hf_send_page;
				94	static struct page *hf_recv_page;
				95	static atomic64_t hf_next_port = ATOMIC64_INIT(0);
				96	static DEFINE_SPINLOCK(hf_send_lock);
				97	static DEFINE_HASHTABLE(hf_local_port_hash, 7);
				98	static DEFINE_SPINLOCK(hf_local_port_hash_lock);
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	99	static int hf_irq;
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	100	static enum cpuhp_state hf_cpuhp_state;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	101
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	102	/**
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	103	* Retrieves a VM from its ID, returning NULL if the VM doesn't exist.
				104	*/
				105	static struct hf_vm *hf_vm_from_id(uint32_t vm_id)
				106	{
				107	if (vm_id < FIRST_SECONDARY_VM_ID \|\|
				108	vm_id >= FIRST_SECONDARY_VM_ID + hf_vm_count)
				109	return NULL;
				110
				111	return &hf_vms[vm_id - FIRST_SECONDARY_VM_ID];
				112	}
				113
				114	/**
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	115	* Wakes up the kernel thread responsible for running the given vcpu.
				116	*
				117	* Returns 0 if the thread was already running, 1 otherwise.
				118	*/
				119	static int hf_vcpu_wake_up(struct hf_vcpu *vcpu)
				120	{
				121	/* Set a flag indicating that the thread should not go to sleep. */
				122	atomic_set(&vcpu->abort_sleep, 1);
				123
				124	/* Set the thread to running state. */
				125	return wake_up_process(vcpu->task);
				126	}
				127
				128	/**
				129	* Puts the current thread to sleep. The current thread must be responsible for
				130	* running the given vcpu.
				131	*
				132	* Going to sleep will fail if hf_vcpu_wake_up() or kthread_stop() was called on
				133	* this vcpu/thread since the last time it [re]started running.
				134	*/
				135	static void hf_vcpu_sleep(struct hf_vcpu *vcpu)
				136	{
				137	int abort;
				138
				139	set_current_state(TASK_INTERRUPTIBLE);
				140
				141	/* Check the sleep-abort flag after making thread interruptible. */
				142	abort = atomic_read(&vcpu->abort_sleep);
				143	if (!abort && !kthread_should_stop())
				144	schedule();
				145
				146	/* Set state back to running on the way out. */
				147	set_current_state(TASK_RUNNING);
				148	}
				149
				150	/**
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	151	* Wakes up the thread associated with the vcpu that owns the given timer. This
				152	* is called when the timer the thread is waiting on expires.
				153	*/
				154	static enum hrtimer_restart hf_vcpu_timer_expired(struct hrtimer *timer)
				155	{
				156	struct hf_vcpu *vcpu = container_of(timer, struct hf_vcpu, timer);
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	157	/* TODO: Inject interrupt. */
				158	hf_vcpu_wake_up(vcpu);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	159	return HRTIMER_NORESTART;
				160	}
				161
				162	/**
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	163	* This function is called when Hafnium requests that the primary VM wake up a
				164	* vCPU that belongs to a secondary VM.
				165	*
				166	* It wakes up the thread if it's sleeping, or kicks it if it's already running.
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	167	*/
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	168	static void hf_handle_wake_up_request(uint32_t vm_id, uint16_t vcpu)
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	169	{
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	170	struct hf_vm *vm = hf_vm_from_id(vm_id);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	171
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	172	if (!vm) {
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	173	pr_warn("Request to wake up non-existent VM id: %u\n", vm_id);
				174	return;
				175	}
				176
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	177	if (vcpu >= vm->vcpu_count) {
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	178	pr_warn("Request to wake up non-existent vCPU: %u.%u\n",
				179	vm_id, vcpu);
				180	return;
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	181	}
				182
				183	if (hf_vcpu_wake_up(&vm->vcpu[vcpu]) == 0) {
				184	/*
				185	* The task was already running (presumably on a different
				186	* physical CPU); interrupt it. This gives Hafnium a chance to
				187	* inject any new interrupts.
				188	*/
				189	kick_process(vm->vcpu[vcpu].task);
				190	}
				191	}
				192
				193	/**
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	194	* Injects an interrupt into a vCPU of the VM and ensures the vCPU will run to
				195	* handle the interrupt.
				196	*/
				197	static void hf_interrupt_vm(uint32_t vm_id, uint64_t int_id)
				198	{
				199	struct hf_vm *vm = hf_vm_from_id(vm_id);
				200	uint16_t vcpu;
				201	int64_t ret;
				202
				203	if (!vm) {
				204	pr_warn("Request to wake up non-existent VM id: %u\n", vm_id);
				205	return;
				206	}
				207
				208	/*
				209	* TODO: For now we're picking the first vcpu to interrupt, but
				210	* we want to be smarter.
				211	*/
				212	vcpu = 0;
				213	ret = hf_interrupt_inject(vm_id, vcpu, int_id);
				214
				215	if (ret == -1) {
				216	pr_warn("Failed to inject interrupt %lld to vCPU %d of VM %d",
				217	int_id, vcpu, vm_id);
				218	return;
				219	}
				220
				221	if (ret != 1) {
				222	/* We don't need to wake up the vcpu. */
				223	return;
				224	}
				225
				226	hf_handle_wake_up_request(vm_id, vcpu);
				227	}
				228
				229	/**
Wedson Almeida Filho	cd9fef9	2019-01-11 21:24:08 +0000	[diff] [blame]	230	* Notify all waiters on the given VM.
				231	*/
				232	static void hf_notify_waiters(uint32_t vm_id)
				233	{
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	234	int64_t waiter_vm_id;
Wedson Almeida Filho	cd9fef9	2019-01-11 21:24:08 +0000	[diff] [blame]	235
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	236	while ((waiter_vm_id = hf_mailbox_waiter_get(vm_id)) != -1) {
				237	if (waiter_vm_id == HF_PRIMARY_VM_ID) {
Wedson Almeida Filho	cd9fef9	2019-01-11 21:24:08 +0000	[diff] [blame]	238	/*
				239	* TODO: Use this information when implementing per-vm
				240	* queues.
				241	*/
				242	} else {
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	243	hf_interrupt_vm(waiter_vm_id,
				244	HF_MAILBOX_WRITABLE_INTID);
Wedson Almeida Filho	cd9fef9	2019-01-11 21:24:08 +0000	[diff] [blame]	245	}
				246	}
				247	}
				248
				249	/**
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	250	* Delivers a message to a VM.
				251	*/
				252	static void hf_deliver_message(uint32_t vm_id)
				253	{
				254	struct hf_vm *vm = hf_vm_from_id(vm_id);
				255	uint32_t i;
				256
				257	if (!vm) {
				258	pr_warn("Tried to deliver message to non-existent VM id: %u\n",
				259	vm_id);
				260	return;
				261	}
				262
				263	/* Try to wake a vCPU that is waiting for a message. */
				264	for (i = 0; i < vm->vcpu_count; i++) {
				265	if (atomic_read(&vm->vcpu[i].waiting_for_message)) {
				266	hf_handle_wake_up_request(vm->id,
				267	vm->vcpu[i].vcpu_index);
				268	return;
				269	}
				270	}
				271
				272	/* None were waiting for a message so interrupt one. */
				273	hf_interrupt_vm(vm->id, HF_MAILBOX_READABLE_INTID);
				274	}
				275
				276	/**
Andrew Scull	df6478f	2019-02-19 17:52:08 +0000	[diff] [blame]	277	* Handles a message delivered to this VM by validating that it's well-formed
				278	* and then queueing it for delivery to the appropriate socket.
				279	*/
				280	static void hf_handle_message(struct hf_vm sender, const void ptr, size_t len)
				281	{
				282	struct hf_sock *hsock;
				283	const struct hf_msg_hdr *hdr = ptr;
				284	struct sk_buff *skb;
				285	int err;
				286
				287	/* Ignore messages that are too small to hold a header. */
				288	if (len < sizeof(struct hf_msg_hdr))
				289	return;
				290
				291	len -= sizeof(struct hf_msg_hdr);
				292
				293	/* Go through the colliding sockets. */
				294	rcu_read_lock();
				295	hash_for_each_possible_rcu(hf_local_port_hash, hsock, sk.sk_node,
				296	hdr->dst_port) {
				297	if (hsock->peer_vm == sender &&
				298	hsock->remote_port == hdr->src_port) {
				299	sock_hold(&hsock->sk);
				300	break;
				301	}
				302	}
				303	rcu_read_unlock();
				304
				305	/* Nothing to do if we couldn't find the target. */
				306	if (!hsock)
				307	return;
				308
				309	/*
				310	* TODO: From this point on, there are two failure paths: when we
				311	* create the skb below, and when we enqueue it to the socket. What
				312	* should we do if they fail? Ideally we would have some form of flow
				313	* control to prevent message loss, but how to do it efficiently?
				314	*
				315	* One option is to have a pre-allocated message that indicates to the
				316	* sender that a message was dropped. This way we guarantee that the
				317	* sender will be aware of loss and should back-off.
				318	*/
				319	/* Create the skb. */
				320	skb = alloc_skb(len, GFP_KERNEL);
				321	if (!skb)
				322	goto exit;
				323
				324	memcpy(skb_put(skb, len), hdr + 1, len);
				325
				326	/*
				327	* Add the skb to the receive queue of the target socket. On success it
				328	* calls sk->sk_data_ready, which is currently set to sock_def_readable,
				329	* which wakes up any waiters.
				330	*/
				331	err = sock_queue_rcv_skb(&hsock->sk, skb);
				332	if (err)
				333	kfree_skb(skb);
				334
				335	exit:
				336	sock_put(&hsock->sk);
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	337
				338	if (hf_mailbox_clear() == 1)
				339	hf_notify_waiters(HF_PRIMARY_VM_ID);
Andrew Scull	df6478f	2019-02-19 17:52:08 +0000	[diff] [blame]	340	}
				341
				342	/**
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	343	* This is the main loop of each vcpu.
				344	*/
				345	static int hf_vcpu_thread(void *data)
				346	{
				347	struct hf_vcpu *vcpu = data;
Andrew Scull	dc8cab5	2018-10-10 18:29:39 +0100	[diff] [blame]	348	struct hf_vcpu_run_return ret;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	349
				350	hrtimer_init(&vcpu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				351	vcpu->timer.function = &hf_vcpu_timer_expired;
				352
				353	while (!kthread_should_stop()) {
Andrew Scull	01f83de	2019-01-23 13:41:47 +0000	[diff] [blame]	354	uint32_t i;
				355
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	356	/*
				357	* We're about to run the vcpu, so we can reset the abort-sleep
				358	* flag.
				359	*/
				360	atomic_set(&vcpu->abort_sleep, 0);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	361
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	362	/* Call into Hafnium to run vcpu. */
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	363	ret = hf_vcpu_run(vcpu->vm->id, vcpu->vcpu_index);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	364
Andrew Scull	dc8cab5	2018-10-10 18:29:39 +0100	[diff] [blame]	365	switch (ret.code) {
Andrew Scull	e05702e	2019-01-08 14:46:46 +0000	[diff] [blame]	366	/* Preempted. */
				367	case HF_VCPU_RUN_PREEMPTED:
				368	if (need_resched())
				369	schedule();
				370	break;
				371
				372	/* Yield. */
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	373	case HF_VCPU_RUN_YIELD:
Andrew Scull	e05702e	2019-01-08 14:46:46 +0000	[diff] [blame]	374	if (!kthread_should_stop())
				375	schedule();
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	376	break;
				377
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	378	/* WFI. */
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	379	case HF_VCPU_RUN_WAIT_FOR_INTERRUPT:
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	380	if (ret.sleep.ns != HF_SLEEP_INDEFINITE) {
				381	hrtimer_start(&vcpu->timer, ret.sleep.ns,
				382	HRTIMER_MODE_REL);
				383	}
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	384	hf_vcpu_sleep(vcpu);
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	385	hrtimer_cancel(&vcpu->timer);
				386	break;
				387
				388	/* Waiting for a message. */
				389	case HF_VCPU_RUN_WAIT_FOR_MESSAGE:
				390	atomic_set(&vcpu->waiting_for_message, 1);
				391	if (ret.sleep.ns != HF_SLEEP_INDEFINITE) {
				392	hrtimer_start(&vcpu->timer, ret.sleep.ns,
				393	HRTIMER_MODE_REL);
				394	}
				395	hf_vcpu_sleep(vcpu);
				396	hrtimer_cancel(&vcpu->timer);
				397	atomic_set(&vcpu->waiting_for_message, 0);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	398	break;
				399
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	400	/* Wake up another vcpu. */
				401	case HF_VCPU_RUN_WAKE_UP:
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	402	hf_handle_wake_up_request(ret.wake_up.vm_id,
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	403	ret.wake_up.vcpu);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	404	break;
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	405
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	406	/* Response available. */
Andrew Scull	0973a2e	2018-10-05 11:11:24 +0100	[diff] [blame]	407	case HF_VCPU_RUN_MESSAGE:
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	408	if (ret.message.vm_id == HF_PRIMARY_VM_ID) {
				409	hf_handle_message(vcpu->vm,
				410	page_address(hf_recv_page),
				411	ret.message.size);
				412	} else {
				413	hf_deliver_message(ret.message.vm_id);
				414	}
Andrew Scull	dc8cab5	2018-10-10 18:29:39 +0100	[diff] [blame]	415	break;
Wedson Almeida Filho	cd9fef9	2019-01-11 21:24:08 +0000	[diff] [blame]	416
				417	/* Notify all waiters. */
				418	case HF_VCPU_RUN_NOTIFY_WAITERS:
				419	hf_notify_waiters(vcpu->vm->id);
				420	break;
Andrew Scull	01f83de	2019-01-23 13:41:47 +0000	[diff] [blame]	421
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	422	/* Abort was triggered. */
Andrew Scull	01f83de	2019-01-23 13:41:47 +0000	[diff] [blame]	423	case HF_VCPU_RUN_ABORTED:
				424	for (i = 0; i < vcpu->vm->vcpu_count; i++) {
				425	if (i == vcpu->vcpu_index)
				426	continue;
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	427	hf_handle_wake_up_request(vcpu->vm->id, i);
Andrew Scull	01f83de	2019-01-23 13:41:47 +0000	[diff] [blame]	428	}
				429	hf_vcpu_sleep(vcpu);
				430	break;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	431	}
				432	}
				433
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	434	return 0;
				435	}
				436
				437	/**
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	438	* Converts a pointer to a struct sock into a pointer to a struct hf_sock. It
				439	* relies on the fact that the first field of hf_sock is a sock.
				440	*/
				441	static struct hf_sock hsock_from_sk(struct sock sk)
				442	{
				443	return (struct hf_sock *)sk;
				444	}
				445
				446	/**
				447	* This is called when the last reference to the outer socket is released. For
				448	* example, if it's a user-space socket, when the last file descriptor pointing
				449	* to this socket is closed.
				450	*
				451	* It begins cleaning up resources, though some can only be cleaned up after all
				452	* references to the underlying socket are released, which is handled by
				453	* hf_sock_destruct().
				454	*/
				455	static int hf_sock_release(struct socket *sock)
				456	{
				457	struct sock *sk = sock->sk;
				458	struct hf_sock *hsock = hsock_from_sk(sk);
				459	unsigned long flags;
				460
				461	if (!sk)
				462	return 0;
				463
				464	/* Shutdown for both send and receive. */
				465	lock_sock(sk);
				466	sk->sk_shutdown \|= RCV_SHUTDOWN \| SEND_SHUTDOWN;
				467	sk->sk_state_change(sk);
				468	release_sock(sk);
				469
				470	/* Remove from the hash table, so lookups from now on won't find it. */
				471	spin_lock_irqsave(&hf_local_port_hash_lock, flags);
				472	hash_del_rcu(&hsock->sk.sk_node);
				473	spin_unlock_irqrestore(&hf_local_port_hash_lock, flags);
				474
				475	/*
				476	* TODO: When we implement a tx queue, we need to clear it here so that
				477	* sk_wmem_alloc will not prevent sk from being freed (sk_free).
				478	*/
				479
				480	/*
				481	* Wait for in-flight lookups to finish. We need to do this here because
Wedson Almeida Filho	89d0e47	2019-01-03 19:18:39 +0000	[diff] [blame]	482	* in-flight lookups rely on the reference to the socket we're about to
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	483	* release.
				484	*/
				485	synchronize_rcu();
				486	sock_put(sk);
				487	sock->sk = NULL;
				488
				489	return 0;
				490	}
				491
				492	/**
				493	* This is called when there are no more references to the socket. It frees all
				494	* resources that haven't been freed during release.
				495	*/
				496	static void hf_sock_destruct(struct sock *sk)
				497	{
				498	/*
				499	* Clear the receive queue now that the handler cannot add any more
				500	* skbs to it.
				501	*/
				502	skb_queue_purge(&sk->sk_receive_queue);
				503	}
				504
				505	/**
				506	* Connects the Hafnium socket to the provided VM and port. After the socket is
				507	* connected, it can be used to exchange datagrams with the specified peer.
				508	*/
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	509	static int hf_sock_connect(struct socket sock, struct sockaddr saddr, int len,
				510	int connect_flags)
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	511	{
				512	struct sock *sk = sock->sk;
				513	struct hf_sock *hsock = hsock_from_sk(sk);
				514	struct hf_vm *vm;
				515	struct sockaddr_hf *addr;
				516	int err;
				517	unsigned long flags;
				518
				519	/* Basic address validation. */
				520	if (len < sizeof(struct sockaddr_hf) \|\| saddr->sa_family != AF_HF)
				521	return -EINVAL;
				522
				523	addr = (struct sockaddr_hf *)saddr;
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	524	vm = hf_vm_from_id(addr->vm_id);
				525	if (!vm)
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	526	return -ENETUNREACH;
				527
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	528	/*
				529	* TODO: Once we implement access control in Hafnium, check that the
				530	* caller is allowed to contact the specified VM. Return -ECONNREFUSED
				531	* if access is denied.
				532	*/
				533
				534	/* Take lock to make sure state doesn't change as we connect. */
				535	lock_sock(sk);
				536
				537	/* Only unconnected sockets are allowed to become connected. */
				538	if (sock->state != SS_UNCONNECTED) {
				539	err = -EISCONN;
				540	goto exit;
				541	}
				542
				543	hsock->local_port = atomic64_inc_return(&hf_next_port);
				544	hsock->remote_port = addr->port;
				545	hsock->peer_vm = vm;
				546
				547	sock->state = SS_CONNECTED;
				548
				549	/* Add socket to hash table now that it's fully initialised. */
				550	spin_lock_irqsave(&hf_local_port_hash_lock, flags);
				551	hash_add_rcu(hf_local_port_hash, &sk->sk_node, hsock->local_port);
				552	spin_unlock_irqrestore(&hf_local_port_hash_lock, flags);
				553
				554	err = 0;
				555	exit:
				556	release_sock(sk);
				557	return err;
				558	}
				559
				560	/**
				561	* Sends the given skb to the appropriate VM by calling Hafnium. It will also
				562	* trigger the wake up of a recipient VM.
				563	*
				564	* Takes ownership of the skb on success.
				565	*/
				566	static int hf_send_skb(struct sk_buff *skb)
				567	{
				568	unsigned long flags;
				569	int64_t ret;
				570	struct hf_sock *hsock = hsock_from_sk(skb->sk);
				571	struct hf_vm *vm = hsock->peer_vm;
				572
				573	/*
				574	* Call Hafnium under the send lock so that we serialize the use of the
				575	* global send buffer.
				576	*/
				577	spin_lock_irqsave(&hf_send_lock, flags);
				578	memcpy(page_address(hf_send_page), skb->data, skb->len);
Wedson Almeida Filho	dbfc903	2019-01-09 19:03:32 +0000	[diff] [blame]	579	ret = hf_mailbox_send(vm->id, skb->len, false);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	580	spin_unlock_irqrestore(&hf_send_lock, flags);
				581
				582	if (ret < 0)
				583	return -EAGAIN;
				584
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame^]	585	/* Ensure the VM will run to pick up the message. */
				586	hf_deliver_message(vm->id);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	587
				588	kfree_skb(skb);
				589
				590	return 0;
				591	}
				592
				593	/**
				594	* Determines if the given socket is in the connected state. It acquires and
				595	* releases the socket lock.
				596	*/
				597	static bool hf_sock_is_connected(struct socket *sock)
				598	{
				599	bool ret;
				600
				601	lock_sock(sock->sk);
				602	ret = sock->state == SS_CONNECTED;
				603	release_sock(sock->sk);
				604
				605	return ret;
				606	}
				607
				608	/**
				609	* Sends a message to the VM & port the socket is connected to. All variants
				610	* of write/send/sendto/sendmsg eventually call this function.
				611	*/
				612	static int hf_sock_sendmsg(struct socket sock, struct msghdr m, size_t len)
				613	{
				614	struct sock *sk = sock->sk;
				615	struct sk_buff *skb;
				616	int err;
				617	struct hf_msg_hdr *hdr;
				618	struct hf_sock *hsock = hsock_from_sk(sk);
				619
				620	/* Check length. */
				621	if (len > HF_MAILBOX_SIZE - sizeof(struct hf_msg_hdr))
				622	return -EMSGSIZE;
				623
				624	/* We don't allow the destination address to be specified. */
				625	if (m->msg_namelen > 0)
				626	return -EISCONN;
				627
				628	/* We don't support out of band messages. */
				629	if (m->msg_flags & MSG_OOB)
				630	return -EOPNOTSUPP;
				631
				632	/*
				633	* Ensure that the socket is connected. We don't need to hold the socket
				634	* lock (acquired and released by hf_sock_is_connected) for the
				635	* remainder of the function because the fields we care about are
				636	* immutable once the state is SS_CONNECTED.
				637	*/
				638	if (!hf_sock_is_connected(sock))
				639	return -ENOTCONN;
				640
				641	/*
				642	* Allocate an skb for this write. If there isn't enough room in the
				643	* socket's send buffer (sk_wmem_alloc >= sk_sndbuf), this will block
				644	* (if it's a blocking call). On success, it increments sk_wmem_alloc
				645	* and sets up the skb such that sk_wmem_alloc gets decremented when
				646	* the skb is freed (sock_wfree gets called).
				647	*/
				648	skb = sock_alloc_send_skb(sk, len + sizeof(struct hf_msg_hdr),
				649	m->msg_flags & MSG_DONTWAIT, &err);
				650	if (!skb)
				651	return err;
				652
				653	/* Reserve room for the header and initialise it. */
				654	skb_reserve(skb, sizeof(struct hf_msg_hdr));
				655	hdr = skb_push(skb, sizeof(struct hf_msg_hdr));
				656	hdr->src_port = hsock->local_port;
				657	hdr->dst_port = hsock->remote_port;
				658
				659	/* Allocate area for the contents, then copy into skb. */
				660	if (!copy_from_iter_full(skb_put(skb, len), len, &m->msg_iter)) {
				661	err = -EFAULT;
				662	goto err_cleanup;
				663	}
				664
				665	/*
				666	* TODO: We currently do this inline, but when we have support for
				667	* readiness notification from Hafnium, we must add this to a per-VM tx
				668	* queue that can make progress when the VM becomes writable. This will
				669	* fix send buffering and poll readiness notification.
				670	*/
				671	err = hf_send_skb(skb);
				672	if (err)
				673	goto err_cleanup;
				674
				675	return 0;
				676
				677	err_cleanup:
				678	kfree_skb(skb);
				679	return err;
				680	}
				681
				682	/**
				683	* Receives a message originated from the VM & port the socket is connected to.
				684	* All variants of read/recv/recvfrom/recvmsg eventually call this function.
				685	*/
				686	static int hf_sock_recvmsg(struct socket sock, struct msghdr m, size_t len,
				687	int flags)
				688	{
				689	struct sock *sk = sock->sk;
				690	struct sk_buff *skb;
				691	int err;
				692	size_t copy_len;
				693
				694	if (!hf_sock_is_connected(sock))
				695	return -ENOTCONN;
				696
				697	/* Grab the next skb from the receive queue. */
				698	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
				699	if (!skb)
				700	return err;
				701
				702	/* Make sure we don't copy more than what fits in the output buffer. */
				703	copy_len = skb->len;
				704	if (copy_len > len) {
				705	copy_len = len;
				706	m->msg_flags \|= MSG_TRUNC;
				707	}
				708
				709	/* Make sure we don't overflow the return value type. */
				710	if (copy_len > INT_MAX) {
				711	copy_len = INT_MAX;
				712	m->msg_flags \|= MSG_TRUNC;
				713	}
				714
				715	/* Copy skb to output iterator, then free it. */
				716	err = skb_copy_datagram_msg(skb, 0, m, copy_len);
				717	skb_free_datagram(sk, skb);
				718	if (err)
				719	return err;
				720
				721	return copy_len;
				722	}
				723
				724	/**
				725	* This function is called when a Hafnium socket is created. It initialises all
				726	* state such that the caller will be able to connect the socket and then send
				727	* and receive messages through it.
				728	*/
				729	static int hf_sock_create(struct net net, struct socket sock, int protocol,
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	730	int kern)
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	731	{
				732	static const struct proto_ops ops = {
				733	.family = PF_HF,
				734	.owner = THIS_MODULE,
				735	.release = hf_sock_release,
				736	.bind = sock_no_bind,
				737	.connect = hf_sock_connect,
				738	.socketpair = sock_no_socketpair,
				739	.accept = sock_no_accept,
				740	.ioctl = sock_no_ioctl,
				741	.listen = sock_no_listen,
				742	.shutdown = sock_no_shutdown,
				743	.setsockopt = sock_no_setsockopt,
				744	.getsockopt = sock_no_getsockopt,
				745	.sendmsg = hf_sock_sendmsg,
				746	.recvmsg = hf_sock_recvmsg,
				747	.mmap = sock_no_mmap,
				748	.sendpage = sock_no_sendpage,
				749	.poll = datagram_poll,
				750	};
				751	struct sock *sk;
				752
				753	if (sock->type != SOCK_DGRAM)
				754	return -ESOCKTNOSUPPORT;
				755
				756	if (protocol != 0)
				757	return -EPROTONOSUPPORT;
				758
				759	/*
				760	* For now we only allow callers with sys admin capability to create
				761	* Hafnium sockets.
				762	*/
				763	if (!capable(CAP_SYS_ADMIN))
				764	return -EPERM;
				765
				766	/* Allocate and initialise socket. */
				767	sk = sk_alloc(net, PF_HF, GFP_KERNEL, &hf_sock_proto, kern);
				768	if (!sk)
				769	return -ENOMEM;
				770
				771	sock_init_data(sock, sk);
				772
				773	sk->sk_destruct = hf_sock_destruct;
				774	sock->ops = &ops;
				775	sock->state = SS_UNCONNECTED;
				776
				777	return 0;
				778	}
				779
				780	/**
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	781	* Frees all resources, including threads, associated with the Hafnium driver.
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	782	*/
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	783	static void hf_free_resources(void)
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	784	{
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	785	uint32_t i, j;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	786
				787	/*
				788	* First stop all worker threads. We need to do this before freeing
				789	* resources because workers may reference each other, so it is only
				790	* safe to free resources after they have all stopped.
				791	*/
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	792	for (i = 0; i < hf_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	793	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	794
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	795	for (j = 0; j < vm->vcpu_count; j++)
				796	kthread_stop(vm->vcpu[j].task);
				797	}
				798
				799	/* Free resources. */
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	800	for (i = 0; i < hf_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	801	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	802
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	803	for (j = 0; j < vm->vcpu_count; j++)
				804	put_task_struct(vm->vcpu[j].task);
				805	kfree(vm->vcpu);
				806	}
				807
				808	kfree(hf_vms);
				809	}
				810
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	811	/**
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	812	* Handles the hypervisor timer interrupt.
				813	*/
				814	static irqreturn_t hf_nop_irq_handler(int irq, void *dev)
				815	{
				816	/*
				817	* No need to do anything, the interrupt only exists to return to the
				818	* primary vCPU so that the virtual timer will be restored and fire as
				819	* normal.
				820	*/
				821	return IRQ_HANDLED;
				822	}
				823
				824	/**
				825	* Enables the hypervisor timer interrupt on a CPU, when it starts or after the
				826	* driver is first loaded.
				827	*/
				828	static int hf_starting_cpu(unsigned int cpu)
				829	{
				830	if (hf_irq != 0) {
				831	/* Enable the interrupt, and set it to be edge-triggered. */
				832	enable_percpu_irq(hf_irq, IRQ_TYPE_EDGE_RISING);
				833	}
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	834
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	835	return 0;
				836	}
				837
				838	/**
				839	* Disables the hypervisor timer interrupt on a CPU when it is powered down.
				840	*/
				841	static int hf_dying_cpu(unsigned int cpu)
				842	{
				843	if (hf_irq != 0) {
				844	/* Disable the interrupt while the CPU is asleep. */
				845	disable_percpu_irq(hf_irq);
				846	}
				847
				848	return 0;
				849	}
				850
				851	/**
				852	* Registers for the hypervisor timer interrupt.
				853	*/
				854	static int hf_int_driver_probe(struct platform_device *pdev)
				855	{
				856	int irq;
				857	int ret;
				858
				859	/*
				860	* Register a handler for the hyperviser timer IRQ, as it is needed for
				861	* Hafnium to emulate the virtual timer for Linux while a secondary vCPU
				862	* is running.
				863	*/
				864	irq = platform_get_irq(pdev, ARCH_TIMER_HYP_PPI);
				865	if (irq < 0) {
				866	pr_err("Error getting hypervisor timer IRQ: %d\n", irq);
				867	return irq;
				868	}
				869	hf_irq = irq;
				870
				871	ret = request_percpu_irq(irq, hf_nop_irq_handler, HYPERVISOR_TIMER_NAME,
				872	pdev);
				873	if (ret != 0) {
				874	pr_err("Error registering hypervisor timer IRQ %d: %d\n",
				875	irq, ret);
				876	return ret;
				877	}
				878	pr_info("Hafnium registered for IRQ %d\n", irq);
				879	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
				880	"hafnium/hypervisor_timer:starting",
				881	hf_starting_cpu, hf_dying_cpu);
				882	if (ret < 0) {
				883	pr_err("Error enabling timer on all CPUs: %d\n", ret);
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	884	free_percpu_irq(irq, pdev);
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	885	return ret;
				886	}
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	887	hf_cpuhp_state = ret;
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	888
				889	return 0;
				890	}
				891
				892	/**
				893	* Unregisters for the hypervisor timer interrupt.
				894	*/
				895	static int hf_int_driver_remove(struct platform_device *pdev)
				896	{
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	897	/*
				898	* This will cause hf_dying_cpu to be called on each CPU, which will
				899	* disable the IRQs.
				900	*/
				901	cpuhp_remove_state(hf_cpuhp_state);
				902	free_percpu_irq(hf_irq, pdev);
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	903
				904	return 0;
				905	}
				906
				907	static const struct of_device_id hf_int_driver_id[] = {
				908	{.compatible = "arm,armv7-timer"},
				909	{.compatible = "arm,armv8-timer"},
				910	{}
				911	};
				912
				913	static struct platform_driver hf_int_driver = {
				914	.driver = {
				915	.name = HYPERVISOR_TIMER_NAME,
				916	.owner = THIS_MODULE,
				917	.of_match_table = of_match_ptr(hf_int_driver_id),
				918	},
				919	.probe = hf_int_driver_probe,
				920	.remove = hf_int_driver_remove,
				921	};
				922
				923	/**
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	924	* Initializes the Hafnium driver by creating a thread for each vCPU of each
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	925	* virtual machine.
				926	*/
				927	static int __init hf_init(void)
				928	{
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	929	static const struct net_proto_family proto_family = {
				930	.family = PF_HF,
				931	.create = hf_sock_create,
				932	.owner = THIS_MODULE,
				933	};
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	934	int64_t ret;
				935	uint32_t i, j;
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	936	uint32_t total_vm_count;
				937	uint32_t total_vcpu_count;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	938
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	939	/* Allocate a page for send and receive buffers. */
				940	hf_send_page = alloc_page(GFP_KERNEL);
				941	if (!hf_send_page) {
				942	pr_err("Unable to allocate send buffer\n");
				943	return -ENOMEM;
				944	}
				945
				946	hf_recv_page = alloc_page(GFP_KERNEL);
				947	if (!hf_recv_page) {
				948	__free_page(hf_send_page);
				949	pr_err("Unable to allocate receive buffer\n");
				950	return -ENOMEM;
				951	}
				952
				953	/*
				954	* Configure both addresses. Once configured, we cannot free these pages
				955	* because the hypervisor will use them, even if the module is
				956	* unloaded.
				957	*/
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	958	ret = hf_vm_configure(page_to_phys(hf_send_page),
				959	page_to_phys(hf_recv_page));
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	960	if (ret) {
				961	__free_page(hf_send_page);
				962	__free_page(hf_recv_page);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	963	/*
				964	* TODO: We may want to grab this information from hypervisor
				965	* and go from there.
				966	*/
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	967	pr_err("Unable to configure VM\n");
				968	return -EIO;
				969	}
				970
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	971	/* Get the number of VMs. */
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	972	ret = hf_vm_get_count();
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	973	if (ret < 0) {
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	974	pr_err("Unable to retrieve number of VMs: %lld\n", ret);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	975	return -EIO;
				976	}
				977
				978	/* Confirm the maximum number of VMs looks sane. */
				979	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VMS < 1);
				980	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VMS > U16_MAX);
				981
				982	/* Validate the number of VMs. There must at least be the primary. */
				983	if (ret < 1 \|\| ret > CONFIG_HAFNIUM_MAX_VMS) {
				984	pr_err("Number of VMs is out of range: %lld\n", ret);
				985	return -EDQUOT;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	986	}
				987
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	988	/* Only track the secondary VMs. */
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	989	total_vm_count = ret - 1;
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	990	hf_vms =
				991	kmalloc_array(total_vm_count, sizeof(struct hf_vm), GFP_KERNEL);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	992	if (!hf_vms)
				993	return -ENOMEM;
				994
				995	/* Initialize each VM. */
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	996	total_vcpu_count = 0;
				997	for (i = 0; i < total_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	998	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	999
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	1000	/* Adjust the ID as only the secondaries are tracked. */
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	1001	vm->id = i + FIRST_SECONDARY_VM_ID;
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	1002
				1003	ret = hf_vcpu_get_count(vm->id);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1004	if (ret < 0) {
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1005	pr_err("HF_VCPU_GET_COUNT failed for vm=%u: %lld",
				1006	vm->id, ret);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1007	ret = -EIO;
				1008	goto fail_with_cleanup;
				1009	}
				1010
				1011	/* Avoid overflowing the vcpu count. */
				1012	if (ret > (U32_MAX - total_vcpu_count)) {
				1013	pr_err("Too many vcpus: %u\n", total_vcpu_count);
				1014	ret = -EDQUOT;
				1015	goto fail_with_cleanup;
				1016	}
				1017
				1018	/* Confirm the maximum number of VCPUs looks sane. */
				1019	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VCPUS < 1);
				1020	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VCPUS > U16_MAX);
				1021
				1022	/* Enforce the limit on vcpus. */
				1023	total_vcpu_count += ret;
				1024	if (total_vcpu_count > CONFIG_HAFNIUM_MAX_VCPUS) {
				1025	pr_err("Too many vcpus: %u\n", total_vcpu_count);
				1026	ret = -EDQUOT;
				1027	goto fail_with_cleanup;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1028	}
				1029
				1030	vm->vcpu_count = ret;
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1031	vm->vcpu = kmalloc_array(vm->vcpu_count, sizeof(struct hf_vcpu),
				1032	GFP_KERNEL);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1033	if (!vm->vcpu) {
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1034	ret = -ENOMEM;
				1035	goto fail_with_cleanup;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1036	}
				1037
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1038	/* Update the number of initialized VMs. */
				1039	hf_vm_count = i + 1;
				1040
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1041	/* Create a kernel thread for each vcpu. */
				1042	for (j = 0; j < vm->vcpu_count; j++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	1043	struct hf_vcpu *vcpu = &vm->vcpu[j];
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	1044
				1045	vcpu->task =
				1046	kthread_create(hf_vcpu_thread, vcpu,
				1047	"vcpu_thread_%u_%u", vm->id, j);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1048	if (IS_ERR(vcpu->task)) {
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1049	pr_err("Error creating task (vm=%u,vcpu=%u): %ld\n",
				1050	vm->id, j, PTR_ERR(vcpu->task));
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1051	vm->vcpu_count = j;
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1052	ret = PTR_ERR(vcpu->task);
				1053	goto fail_with_cleanup;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1054	}
				1055
				1056	get_task_struct(vcpu->task);
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	1057	vcpu->vm = vm;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1058	vcpu->vcpu_index = j;
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	1059	atomic_set(&vcpu->abort_sleep, 0);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1060	}
				1061	}
				1062
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1063	/* Register protocol and socket family. */
				1064	ret = proto_register(&hf_sock_proto, 0);
				1065	if (ret) {
				1066	pr_err("Unable to register protocol: %lld\n", ret);
				1067	goto fail_with_cleanup;
				1068	}
				1069
				1070	ret = sock_register(&proto_family);
				1071	if (ret) {
				1072	pr_err("Unable to register Hafnium's socket family: %lld\n",
				1073	ret);
				1074	goto fail_unregister_proto;
				1075	}
				1076
				1077	/*
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	1078	* Register as a driver for the timer device, so we can register a
				1079	* handler for the hyperviser timer IRQ.
				1080	*/
				1081	ret = platform_driver_register(&hf_int_driver);
				1082	if (ret != 0) {
				1083	pr_err("Error registering timer driver %lld\n", ret);
				1084	goto fail_unregister_socket;
				1085	}
				1086
				1087	/*
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1088	* Start running threads now that all is initialized.
				1089	*
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	1090	* Any failures from this point on must also unregister the driver with
				1091	* platform_driver_unregister().
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1092	*/
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1093	for (i = 0; i < hf_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	1094	struct hf_vm *vm = &hf_vms[i];
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	1095
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1096	for (j = 0; j < vm->vcpu_count; j++)
				1097	wake_up_process(vm->vcpu[j].task);
				1098	}
				1099
				1100	/* Dump vm/vcpu count info. */
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	1101	pr_info("Hafnium successfully loaded with %u VMs:\n", hf_vm_count);
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	1102	for (i = 0; i < hf_vm_count; i++) {
				1103	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1104
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	1105	pr_info("\tVM %u: %u vCPUS\n", vm->id, vm->vcpu_count);
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	1106	}
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1107
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1108	return 0;
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1109
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	1110	fail_unregister_socket:
				1111	sock_unregister(PF_HF);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1112	fail_unregister_proto:
				1113	proto_unregister(&hf_sock_proto);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1114	fail_with_cleanup:
				1115	hf_free_resources();
				1116	return ret;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1117	}
				1118
				1119	/**
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	1120	* Frees up all resources used by the Hafnium driver in preparation for
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1121	* unloading it.
				1122	*/
				1123	static void __exit hf_exit(void)
				1124	{
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	1125	pr_info("Preparing to unload Hafnium\n");
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1126	sock_unregister(PF_HF);
				1127	proto_unregister(&hf_sock_proto);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1128	hf_free_resources();
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	1129	platform_driver_unregister(&hf_int_driver);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1130	pr_info("Hafnium ready to unload\n");
				1131	}
				1132
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1133	MODULE_LICENSE("GPL v2");
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1134
				1135	module_init(hf_init);
				1136	module_exit(hf_exit);