Blame - main.c - hafnium/driver/linux - TrustedFirmware Git Browser

blob: 6d306fee5e4999fd99f6f752c98c5d05671d6e8c [file] [log] [blame]

Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-only
Andrew Walbran	13c3a0b	2018-11-30 11:51:53 +0000	[diff] [blame]	2	/*
Andrew Walbran	2bc0a32	2019-03-07 15:48:06 +0000	[diff] [blame]	3	* Copyright 2018 The Hafnium Authors.
Andrew Walbran	13c3a0b	2018-11-30 11:51:53 +0000	[diff] [blame]	4	*
				5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License
				7	* version 2 as published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
Andrew Walbran	13c3a0b	2018-11-30 11:51:53 +0000	[diff] [blame]	13	*/
				14
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	15	#include <clocksource/arm_arch_timer.h>
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	16	#include <linux/atomic.h>
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	17	#include <linux/cpuhotplug.h>
				18	#include <linux/hrtimer.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	19	#include <linux/init.h>
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	20	#include <linux/interrupt.h>
				21	#include <linux/irq.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	22	#include <linux/kernel.h>
				23	#include <linux/kthread.h>
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	24	#include <linux/mm.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	25	#include <linux/module.h>
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	26	#include <linux/net.h>
				27	#include <linux/of.h>
				28	#include <linux/platform_device.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	29	#include <linux/sched/task.h>
				30	#include <linux/slab.h>
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	31	#include <net/sock.h>
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	32
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	33	#include <hf/call.h>
Jose Marinho	1cc6c75	2019-03-11 16:28:03 +0000	[diff] [blame]	34	#include <hf/spci.h>
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	35
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	36	/* TODO: Reusing AF_ECONET for now as it's otherwise unused. */
				37	#define AF_HF AF_ECONET
				38	#define PF_HF AF_HF
				39
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	40	#define HYPERVISOR_TIMER_NAME "el2_timer"
				41
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	42	#define CONFIG_HAFNIUM_MAX_VMS 16
				43	#define CONFIG_HAFNIUM_MAX_VCPUS 32
				44
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	45	#define FIRST_SECONDARY_VM_ID 1
				46
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	47	struct hf_vcpu {
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	48	struct hf_vm *vm;
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	49	uint32_t vcpu_index;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	50	struct task_struct *task;
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	51	atomic_t abort_sleep;
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	52	atomic_t waiting_for_message;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	53	struct hrtimer timer;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	54	};
				55
				56	struct hf_vm {
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	57	uint32_t id;
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	58	uint32_t vcpu_count;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	59	struct hf_vcpu *vcpu;
				60	};
				61
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	62	struct hf_msg_hdr {
				63	uint64_t src_port;
				64	uint64_t dst_port;
				65	};
				66
				67	struct hf_sock {
				68	/* This needs to be the first field. */
				69	struct sock sk;
				70
				71	/*
				72	* The following fields are immutable after the socket transitions to
				73	* SS_CONNECTED state.
				74	*/
				75	uint64_t local_port;
				76	uint64_t remote_port;
				77	struct hf_vm *peer_vm;
				78	};
				79
				80	struct sockaddr_hf {
				81	sa_family_t family;
				82	uint32_t vm_id;
				83	uint64_t port;
				84	};
				85
				86	static struct proto hf_sock_proto = {
				87	.name = "hafnium",
				88	.owner = THIS_MODULE,
				89	.obj_size = sizeof(struct hf_sock),
				90	};
				91
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	92	static struct hf_vm *hf_vms;
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	93	static uint32_t hf_vm_count;
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	94	static struct page *hf_send_page;
				95	static struct page *hf_recv_page;
				96	static atomic64_t hf_next_port = ATOMIC64_INIT(0);
				97	static DEFINE_SPINLOCK(hf_send_lock);
				98	static DEFINE_HASHTABLE(hf_local_port_hash, 7);
				99	static DEFINE_SPINLOCK(hf_local_port_hash_lock);
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	100	static int hf_irq;
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	101	static enum cpuhp_state hf_cpuhp_state;
Jose Marinho	1cc6c75	2019-03-11 16:28:03 +0000	[diff] [blame]	102	static spci_vm_id_t current_vm_id;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	103
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	104	/**
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	105	* Retrieves a VM from its ID, returning NULL if the VM doesn't exist.
				106	*/
				107	static struct hf_vm *hf_vm_from_id(uint32_t vm_id)
				108	{
				109	if (vm_id < FIRST_SECONDARY_VM_ID \|\|
				110	vm_id >= FIRST_SECONDARY_VM_ID + hf_vm_count)
				111	return NULL;
				112
				113	return &hf_vms[vm_id - FIRST_SECONDARY_VM_ID];
				114	}
				115
				116	/**
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	117	* Wakes up the kernel thread responsible for running the given vcpu.
				118	*
				119	* Returns 0 if the thread was already running, 1 otherwise.
				120	*/
				121	static int hf_vcpu_wake_up(struct hf_vcpu *vcpu)
				122	{
				123	/* Set a flag indicating that the thread should not go to sleep. */
				124	atomic_set(&vcpu->abort_sleep, 1);
				125
				126	/* Set the thread to running state. */
				127	return wake_up_process(vcpu->task);
				128	}
				129
				130	/**
				131	* Puts the current thread to sleep. The current thread must be responsible for
				132	* running the given vcpu.
				133	*
				134	* Going to sleep will fail if hf_vcpu_wake_up() or kthread_stop() was called on
				135	* this vcpu/thread since the last time it [re]started running.
				136	*/
				137	static void hf_vcpu_sleep(struct hf_vcpu *vcpu)
				138	{
				139	int abort;
				140
				141	set_current_state(TASK_INTERRUPTIBLE);
				142
				143	/* Check the sleep-abort flag after making thread interruptible. */
				144	abort = atomic_read(&vcpu->abort_sleep);
				145	if (!abort && !kthread_should_stop())
				146	schedule();
				147
				148	/* Set state back to running on the way out. */
				149	set_current_state(TASK_RUNNING);
				150	}
				151
				152	/**
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	153	* Wakes up the thread associated with the vcpu that owns the given timer. This
				154	* is called when the timer the thread is waiting on expires.
				155	*/
				156	static enum hrtimer_restart hf_vcpu_timer_expired(struct hrtimer *timer)
				157	{
				158	struct hf_vcpu *vcpu = container_of(timer, struct hf_vcpu, timer);
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	159	/* TODO: Inject interrupt. */
				160	hf_vcpu_wake_up(vcpu);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	161	return HRTIMER_NORESTART;
				162	}
				163
				164	/**
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	165	* This function is called when Hafnium requests that the primary VM wake up a
				166	* vCPU that belongs to a secondary VM.
				167	*
				168	* It wakes up the thread if it's sleeping, or kicks it if it's already running.
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	169	*/
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	170	static void hf_handle_wake_up_request(uint32_t vm_id, uint16_t vcpu)
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	171	{
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	172	struct hf_vm *vm = hf_vm_from_id(vm_id);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	173
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	174	if (!vm) {
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	175	pr_warn("Request to wake up non-existent VM id: %u\n", vm_id);
				176	return;
				177	}
				178
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	179	if (vcpu >= vm->vcpu_count) {
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	180	pr_warn("Request to wake up non-existent vCPU: %u.%u\n",
				181	vm_id, vcpu);
				182	return;
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	183	}
				184
				185	if (hf_vcpu_wake_up(&vm->vcpu[vcpu]) == 0) {
				186	/*
				187	* The task was already running (presumably on a different
				188	* physical CPU); interrupt it. This gives Hafnium a chance to
				189	* inject any new interrupts.
				190	*/
				191	kick_process(vm->vcpu[vcpu].task);
				192	}
				193	}
				194
				195	/**
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	196	* Injects an interrupt into a vCPU of the VM and ensures the vCPU will run to
				197	* handle the interrupt.
				198	*/
				199	static void hf_interrupt_vm(uint32_t vm_id, uint64_t int_id)
				200	{
				201	struct hf_vm *vm = hf_vm_from_id(vm_id);
				202	uint16_t vcpu;
				203	int64_t ret;
				204
				205	if (!vm) {
				206	pr_warn("Request to wake up non-existent VM id: %u\n", vm_id);
				207	return;
				208	}
				209
				210	/*
				211	* TODO: For now we're picking the first vcpu to interrupt, but
				212	* we want to be smarter.
				213	*/
				214	vcpu = 0;
				215	ret = hf_interrupt_inject(vm_id, vcpu, int_id);
				216
				217	if (ret == -1) {
				218	pr_warn("Failed to inject interrupt %lld to vCPU %d of VM %d",
				219	int_id, vcpu, vm_id);
				220	return;
				221	}
				222
				223	if (ret != 1) {
				224	/* We don't need to wake up the vcpu. */
				225	return;
				226	}
				227
				228	hf_handle_wake_up_request(vm_id, vcpu);
				229	}
				230
				231	/**
Wedson Almeida Filho	cd9fef9	2019-01-11 21:24:08 +0000	[diff] [blame]	232	* Notify all waiters on the given VM.
				233	*/
				234	static void hf_notify_waiters(uint32_t vm_id)
				235	{
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	236	int64_t waiter_vm_id;
Wedson Almeida Filho	cd9fef9	2019-01-11 21:24:08 +0000	[diff] [blame]	237
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	238	while ((waiter_vm_id = hf_mailbox_waiter_get(vm_id)) != -1) {
				239	if (waiter_vm_id == HF_PRIMARY_VM_ID) {
Wedson Almeida Filho	cd9fef9	2019-01-11 21:24:08 +0000	[diff] [blame]	240	/*
				241	* TODO: Use this information when implementing per-vm
				242	* queues.
				243	*/
				244	} else {
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	245	hf_interrupt_vm(waiter_vm_id,
				246	HF_MAILBOX_WRITABLE_INTID);
Wedson Almeida Filho	cd9fef9	2019-01-11 21:24:08 +0000	[diff] [blame]	247	}
				248	}
				249	}
				250
				251	/**
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	252	* Delivers a message to a VM.
				253	*/
				254	static void hf_deliver_message(uint32_t vm_id)
				255	{
				256	struct hf_vm *vm = hf_vm_from_id(vm_id);
				257	uint32_t i;
				258
				259	if (!vm) {
				260	pr_warn("Tried to deliver message to non-existent VM id: %u\n",
				261	vm_id);
				262	return;
				263	}
				264
				265	/* Try to wake a vCPU that is waiting for a message. */
				266	for (i = 0; i < vm->vcpu_count; i++) {
				267	if (atomic_read(&vm->vcpu[i].waiting_for_message)) {
				268	hf_handle_wake_up_request(vm->id,
				269	vm->vcpu[i].vcpu_index);
				270	return;
				271	}
				272	}
				273
				274	/* None were waiting for a message so interrupt one. */
				275	hf_interrupt_vm(vm->id, HF_MAILBOX_READABLE_INTID);
				276	}
				277
				278	/**
Andrew Scull	df6478f	2019-02-19 17:52:08 +0000	[diff] [blame]	279	* Handles a message delivered to this VM by validating that it's well-formed
				280	* and then queueing it for delivery to the appropriate socket.
				281	*/
Andrew Scull	9470423	2019-04-01 12:36:37 +0100	[diff] [blame^]	282	static void hf_handle_message(struct hf_vm *sender,
				283	const struct spci_message *message)
Andrew Scull	df6478f	2019-02-19 17:52:08 +0000	[diff] [blame]	284	{
				285	struct hf_sock *hsock;
Andrew Scull	9470423	2019-04-01 12:36:37 +0100	[diff] [blame^]	286	const struct hf_msg_hdr hdr = (struct hf_msg_hdr )message->payload;
				287	size_t len = message->length;
Andrew Scull	df6478f	2019-02-19 17:52:08 +0000	[diff] [blame]	288	struct sk_buff *skb;
				289	int err;
				290
				291	/* Ignore messages that are too small to hold a header. */
				292	if (len < sizeof(struct hf_msg_hdr))
				293	return;
				294
				295	len -= sizeof(struct hf_msg_hdr);
				296
				297	/* Go through the colliding sockets. */
				298	rcu_read_lock();
				299	hash_for_each_possible_rcu(hf_local_port_hash, hsock, sk.sk_node,
				300	hdr->dst_port) {
				301	if (hsock->peer_vm == sender &&
				302	hsock->remote_port == hdr->src_port) {
				303	sock_hold(&hsock->sk);
				304	break;
				305	}
				306	}
				307	rcu_read_unlock();
				308
				309	/* Nothing to do if we couldn't find the target. */
				310	if (!hsock)
				311	return;
				312
				313	/*
				314	* TODO: From this point on, there are two failure paths: when we
				315	* create the skb below, and when we enqueue it to the socket. What
				316	* should we do if they fail? Ideally we would have some form of flow
				317	* control to prevent message loss, but how to do it efficiently?
				318	*
				319	* One option is to have a pre-allocated message that indicates to the
				320	* sender that a message was dropped. This way we guarantee that the
				321	* sender will be aware of loss and should back-off.
				322	*/
				323	/* Create the skb. */
				324	skb = alloc_skb(len, GFP_KERNEL);
				325	if (!skb)
				326	goto exit;
				327
				328	memcpy(skb_put(skb, len), hdr + 1, len);
				329
				330	/*
				331	* Add the skb to the receive queue of the target socket. On success it
				332	* calls sk->sk_data_ready, which is currently set to sock_def_readable,
				333	* which wakes up any waiters.
				334	*/
				335	err = sock_queue_rcv_skb(&hsock->sk, skb);
				336	if (err)
				337	kfree_skb(skb);
				338
				339	exit:
				340	sock_put(&hsock->sk);
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	341
				342	if (hf_mailbox_clear() == 1)
				343	hf_notify_waiters(HF_PRIMARY_VM_ID);
Andrew Scull	df6478f	2019-02-19 17:52:08 +0000	[diff] [blame]	344	}
				345
				346	/**
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	347	* This is the main loop of each vcpu.
				348	*/
				349	static int hf_vcpu_thread(void *data)
				350	{
				351	struct hf_vcpu *vcpu = data;
Andrew Scull	dc8cab5	2018-10-10 18:29:39 +0100	[diff] [blame]	352	struct hf_vcpu_run_return ret;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	353
				354	hrtimer_init(&vcpu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				355	vcpu->timer.function = &hf_vcpu_timer_expired;
				356
				357	while (!kthread_should_stop()) {
Andrew Scull	01f83de	2019-01-23 13:41:47 +0000	[diff] [blame]	358	uint32_t i;
				359
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	360	/*
				361	* We're about to run the vcpu, so we can reset the abort-sleep
				362	* flag.
				363	*/
				364	atomic_set(&vcpu->abort_sleep, 0);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	365
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	366	/* Call into Hafnium to run vcpu. */
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	367	ret = hf_vcpu_run(vcpu->vm->id, vcpu->vcpu_index);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	368
Andrew Scull	dc8cab5	2018-10-10 18:29:39 +0100	[diff] [blame]	369	switch (ret.code) {
Andrew Scull	e05702e	2019-01-08 14:46:46 +0000	[diff] [blame]	370	/* Preempted. */
				371	case HF_VCPU_RUN_PREEMPTED:
				372	if (need_resched())
				373	schedule();
				374	break;
				375
				376	/* Yield. */
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	377	case HF_VCPU_RUN_YIELD:
Andrew Scull	e05702e	2019-01-08 14:46:46 +0000	[diff] [blame]	378	if (!kthread_should_stop())
				379	schedule();
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	380	break;
				381
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	382	/* WFI. */
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	383	case HF_VCPU_RUN_WAIT_FOR_INTERRUPT:
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	384	if (ret.sleep.ns != HF_SLEEP_INDEFINITE) {
				385	hrtimer_start(&vcpu->timer, ret.sleep.ns,
				386	HRTIMER_MODE_REL);
				387	}
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	388	hf_vcpu_sleep(vcpu);
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	389	hrtimer_cancel(&vcpu->timer);
				390	break;
				391
				392	/* Waiting for a message. */
				393	case HF_VCPU_RUN_WAIT_FOR_MESSAGE:
				394	atomic_set(&vcpu->waiting_for_message, 1);
				395	if (ret.sleep.ns != HF_SLEEP_INDEFINITE) {
				396	hrtimer_start(&vcpu->timer, ret.sleep.ns,
				397	HRTIMER_MODE_REL);
				398	}
				399	hf_vcpu_sleep(vcpu);
				400	hrtimer_cancel(&vcpu->timer);
				401	atomic_set(&vcpu->waiting_for_message, 0);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	402	break;
				403
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	404	/* Wake up another vcpu. */
				405	case HF_VCPU_RUN_WAKE_UP:
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	406	hf_handle_wake_up_request(ret.wake_up.vm_id,
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	407	ret.wake_up.vcpu);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	408	break;
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	409
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	410	/* Response available. */
Andrew Scull	0973a2e	2018-10-05 11:11:24 +0100	[diff] [blame]	411	case HF_VCPU_RUN_MESSAGE:
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	412	if (ret.message.vm_id == HF_PRIMARY_VM_ID) {
				413	hf_handle_message(vcpu->vm,
Andrew Scull	9470423	2019-04-01 12:36:37 +0100	[diff] [blame^]	414	page_address(hf_recv_page));
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	415	} else {
				416	hf_deliver_message(ret.message.vm_id);
				417	}
Andrew Scull	dc8cab5	2018-10-10 18:29:39 +0100	[diff] [blame]	418	break;
Wedson Almeida Filho	cd9fef9	2019-01-11 21:24:08 +0000	[diff] [blame]	419
				420	/* Notify all waiters. */
				421	case HF_VCPU_RUN_NOTIFY_WAITERS:
				422	hf_notify_waiters(vcpu->vm->id);
				423	break;
Andrew Scull	01f83de	2019-01-23 13:41:47 +0000	[diff] [blame]	424
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	425	/* Abort was triggered. */
Andrew Scull	01f83de	2019-01-23 13:41:47 +0000	[diff] [blame]	426	case HF_VCPU_RUN_ABORTED:
				427	for (i = 0; i < vcpu->vm->vcpu_count; i++) {
				428	if (i == vcpu->vcpu_index)
				429	continue;
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	430	hf_handle_wake_up_request(vcpu->vm->id, i);
Andrew Scull	01f83de	2019-01-23 13:41:47 +0000	[diff] [blame]	431	}
				432	hf_vcpu_sleep(vcpu);
				433	break;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	434	}
				435	}
				436
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	437	return 0;
				438	}
				439
				440	/**
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	441	* Converts a pointer to a struct sock into a pointer to a struct hf_sock. It
				442	* relies on the fact that the first field of hf_sock is a sock.
				443	*/
				444	static struct hf_sock hsock_from_sk(struct sock sk)
				445	{
				446	return (struct hf_sock *)sk;
				447	}
				448
				449	/**
				450	* This is called when the last reference to the outer socket is released. For
				451	* example, if it's a user-space socket, when the last file descriptor pointing
				452	* to this socket is closed.
				453	*
				454	* It begins cleaning up resources, though some can only be cleaned up after all
				455	* references to the underlying socket are released, which is handled by
				456	* hf_sock_destruct().
				457	*/
				458	static int hf_sock_release(struct socket *sock)
				459	{
				460	struct sock *sk = sock->sk;
				461	struct hf_sock *hsock = hsock_from_sk(sk);
				462	unsigned long flags;
				463
				464	if (!sk)
				465	return 0;
				466
				467	/* Shutdown for both send and receive. */
				468	lock_sock(sk);
				469	sk->sk_shutdown \|= RCV_SHUTDOWN \| SEND_SHUTDOWN;
				470	sk->sk_state_change(sk);
				471	release_sock(sk);
				472
				473	/* Remove from the hash table, so lookups from now on won't find it. */
				474	spin_lock_irqsave(&hf_local_port_hash_lock, flags);
				475	hash_del_rcu(&hsock->sk.sk_node);
				476	spin_unlock_irqrestore(&hf_local_port_hash_lock, flags);
				477
				478	/*
				479	* TODO: When we implement a tx queue, we need to clear it here so that
				480	* sk_wmem_alloc will not prevent sk from being freed (sk_free).
				481	*/
				482
				483	/*
				484	* Wait for in-flight lookups to finish. We need to do this here because
Wedson Almeida Filho	89d0e47	2019-01-03 19:18:39 +0000	[diff] [blame]	485	* in-flight lookups rely on the reference to the socket we're about to
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	486	* release.
				487	*/
				488	synchronize_rcu();
				489	sock_put(sk);
				490	sock->sk = NULL;
				491
				492	return 0;
				493	}
				494
				495	/**
				496	* This is called when there are no more references to the socket. It frees all
				497	* resources that haven't been freed during release.
				498	*/
				499	static void hf_sock_destruct(struct sock *sk)
				500	{
				501	/*
				502	* Clear the receive queue now that the handler cannot add any more
				503	* skbs to it.
				504	*/
				505	skb_queue_purge(&sk->sk_receive_queue);
				506	}
				507
				508	/**
				509	* Connects the Hafnium socket to the provided VM and port. After the socket is
				510	* connected, it can be used to exchange datagrams with the specified peer.
				511	*/
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	512	static int hf_sock_connect(struct socket sock, struct sockaddr saddr, int len,
				513	int connect_flags)
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	514	{
				515	struct sock *sk = sock->sk;
				516	struct hf_sock *hsock = hsock_from_sk(sk);
				517	struct hf_vm *vm;
				518	struct sockaddr_hf *addr;
				519	int err;
				520	unsigned long flags;
				521
				522	/* Basic address validation. */
				523	if (len < sizeof(struct sockaddr_hf) \|\| saddr->sa_family != AF_HF)
				524	return -EINVAL;
				525
				526	addr = (struct sockaddr_hf *)saddr;
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	527	vm = hf_vm_from_id(addr->vm_id);
				528	if (!vm)
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	529	return -ENETUNREACH;
				530
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	531	/*
				532	* TODO: Once we implement access control in Hafnium, check that the
				533	* caller is allowed to contact the specified VM. Return -ECONNREFUSED
				534	* if access is denied.
				535	*/
				536
				537	/* Take lock to make sure state doesn't change as we connect. */
				538	lock_sock(sk);
				539
				540	/* Only unconnected sockets are allowed to become connected. */
				541	if (sock->state != SS_UNCONNECTED) {
				542	err = -EISCONN;
				543	goto exit;
				544	}
				545
				546	hsock->local_port = atomic64_inc_return(&hf_next_port);
				547	hsock->remote_port = addr->port;
				548	hsock->peer_vm = vm;
				549
				550	sock->state = SS_CONNECTED;
				551
				552	/* Add socket to hash table now that it's fully initialised. */
				553	spin_lock_irqsave(&hf_local_port_hash_lock, flags);
				554	hash_add_rcu(hf_local_port_hash, &sk->sk_node, hsock->local_port);
				555	spin_unlock_irqrestore(&hf_local_port_hash_lock, flags);
				556
				557	err = 0;
				558	exit:
				559	release_sock(sk);
				560	return err;
				561	}
				562
				563	/**
				564	* Sends the given skb to the appropriate VM by calling Hafnium. It will also
				565	* trigger the wake up of a recipient VM.
				566	*
				567	* Takes ownership of the skb on success.
				568	*/
				569	static int hf_send_skb(struct sk_buff *skb)
				570	{
				571	unsigned long flags;
				572	int64_t ret;
				573	struct hf_sock *hsock = hsock_from_sk(skb->sk);
				574	struct hf_vm *vm = hsock->peer_vm;
Jose Marinho	1cc6c75	2019-03-11 16:28:03 +0000	[diff] [blame]	575	struct spci_message *message = page_address(hf_send_page);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	576
				577	/*
				578	* Call Hafnium under the send lock so that we serialize the use of the
				579	* global send buffer.
				580	*/
				581	spin_lock_irqsave(&hf_send_lock, flags);
Jose Marinho	1cc6c75	2019-03-11 16:28:03 +0000	[diff] [blame]	582	memcpy(message->payload, skb->data, skb->len);
				583	spci_message_init(message, skb->len,
				584	vm->id, current_vm_id);
				585
				586	ret = spci_msg_send(0);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	587	spin_unlock_irqrestore(&hf_send_lock, flags);
				588
				589	if (ret < 0)
				590	return -EAGAIN;
				591
Andrew Scull	71f5736	2019-02-05 16:11:35 +0000	[diff] [blame]	592	/* Ensure the VM will run to pick up the message. */
				593	hf_deliver_message(vm->id);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	594
				595	kfree_skb(skb);
				596
				597	return 0;
				598	}
				599
				600	/**
				601	* Determines if the given socket is in the connected state. It acquires and
				602	* releases the socket lock.
				603	*/
				604	static bool hf_sock_is_connected(struct socket *sock)
				605	{
				606	bool ret;
				607
				608	lock_sock(sock->sk);
				609	ret = sock->state == SS_CONNECTED;
				610	release_sock(sock->sk);
				611
				612	return ret;
				613	}
				614
				615	/**
				616	* Sends a message to the VM & port the socket is connected to. All variants
				617	* of write/send/sendto/sendmsg eventually call this function.
				618	*/
				619	static int hf_sock_sendmsg(struct socket sock, struct msghdr m, size_t len)
				620	{
				621	struct sock *sk = sock->sk;
				622	struct sk_buff *skb;
				623	int err;
				624	struct hf_msg_hdr *hdr;
				625	struct hf_sock *hsock = hsock_from_sk(sk);
Andrew Scull	614ed7f	2019-04-01 12:12:38 +0100	[diff] [blame]	626	size_t payload_max_len = HF_MAILBOX_SIZE - sizeof(struct spci_message)
				627	- sizeof(struct hf_msg_hdr);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	628
				629	/* Check length. */
Andrew Scull	614ed7f	2019-04-01 12:12:38 +0100	[diff] [blame]	630	if (len > payload_max_len)
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	631	return -EMSGSIZE;
				632
				633	/* We don't allow the destination address to be specified. */
				634	if (m->msg_namelen > 0)
				635	return -EISCONN;
				636
				637	/* We don't support out of band messages. */
				638	if (m->msg_flags & MSG_OOB)
				639	return -EOPNOTSUPP;
				640
				641	/*
				642	* Ensure that the socket is connected. We don't need to hold the socket
				643	* lock (acquired and released by hf_sock_is_connected) for the
				644	* remainder of the function because the fields we care about are
				645	* immutable once the state is SS_CONNECTED.
				646	*/
				647	if (!hf_sock_is_connected(sock))
				648	return -ENOTCONN;
				649
				650	/*
				651	* Allocate an skb for this write. If there isn't enough room in the
				652	* socket's send buffer (sk_wmem_alloc >= sk_sndbuf), this will block
				653	* (if it's a blocking call). On success, it increments sk_wmem_alloc
				654	* and sets up the skb such that sk_wmem_alloc gets decremented when
				655	* the skb is freed (sock_wfree gets called).
				656	*/
				657	skb = sock_alloc_send_skb(sk, len + sizeof(struct hf_msg_hdr),
				658	m->msg_flags & MSG_DONTWAIT, &err);
				659	if (!skb)
				660	return err;
				661
				662	/* Reserve room for the header and initialise it. */
				663	skb_reserve(skb, sizeof(struct hf_msg_hdr));
				664	hdr = skb_push(skb, sizeof(struct hf_msg_hdr));
				665	hdr->src_port = hsock->local_port;
				666	hdr->dst_port = hsock->remote_port;
				667
				668	/* Allocate area for the contents, then copy into skb. */
				669	if (!copy_from_iter_full(skb_put(skb, len), len, &m->msg_iter)) {
				670	err = -EFAULT;
				671	goto err_cleanup;
				672	}
				673
				674	/*
				675	* TODO: We currently do this inline, but when we have support for
				676	* readiness notification from Hafnium, we must add this to a per-VM tx
				677	* queue that can make progress when the VM becomes writable. This will
				678	* fix send buffering and poll readiness notification.
				679	*/
				680	err = hf_send_skb(skb);
				681	if (err)
				682	goto err_cleanup;
				683
				684	return 0;
				685
				686	err_cleanup:
				687	kfree_skb(skb);
				688	return err;
				689	}
				690
				691	/**
				692	* Receives a message originated from the VM & port the socket is connected to.
				693	* All variants of read/recv/recvfrom/recvmsg eventually call this function.
				694	*/
				695	static int hf_sock_recvmsg(struct socket sock, struct msghdr m, size_t len,
				696	int flags)
				697	{
				698	struct sock *sk = sock->sk;
				699	struct sk_buff *skb;
				700	int err;
				701	size_t copy_len;
				702
				703	if (!hf_sock_is_connected(sock))
				704	return -ENOTCONN;
				705
				706	/* Grab the next skb from the receive queue. */
				707	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
				708	if (!skb)
				709	return err;
				710
				711	/* Make sure we don't copy more than what fits in the output buffer. */
				712	copy_len = skb->len;
				713	if (copy_len > len) {
				714	copy_len = len;
				715	m->msg_flags \|= MSG_TRUNC;
				716	}
				717
				718	/* Make sure we don't overflow the return value type. */
				719	if (copy_len > INT_MAX) {
				720	copy_len = INT_MAX;
				721	m->msg_flags \|= MSG_TRUNC;
				722	}
				723
				724	/* Copy skb to output iterator, then free it. */
				725	err = skb_copy_datagram_msg(skb, 0, m, copy_len);
				726	skb_free_datagram(sk, skb);
				727	if (err)
				728	return err;
				729
				730	return copy_len;
				731	}
				732
				733	/**
				734	* This function is called when a Hafnium socket is created. It initialises all
				735	* state such that the caller will be able to connect the socket and then send
				736	* and receive messages through it.
				737	*/
				738	static int hf_sock_create(struct net net, struct socket sock, int protocol,
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	739	int kern)
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	740	{
				741	static const struct proto_ops ops = {
				742	.family = PF_HF,
				743	.owner = THIS_MODULE,
				744	.release = hf_sock_release,
				745	.bind = sock_no_bind,
				746	.connect = hf_sock_connect,
				747	.socketpair = sock_no_socketpair,
				748	.accept = sock_no_accept,
				749	.ioctl = sock_no_ioctl,
				750	.listen = sock_no_listen,
				751	.shutdown = sock_no_shutdown,
				752	.setsockopt = sock_no_setsockopt,
				753	.getsockopt = sock_no_getsockopt,
				754	.sendmsg = hf_sock_sendmsg,
				755	.recvmsg = hf_sock_recvmsg,
				756	.mmap = sock_no_mmap,
				757	.sendpage = sock_no_sendpage,
				758	.poll = datagram_poll,
				759	};
				760	struct sock *sk;
				761
				762	if (sock->type != SOCK_DGRAM)
				763	return -ESOCKTNOSUPPORT;
				764
				765	if (protocol != 0)
				766	return -EPROTONOSUPPORT;
				767
				768	/*
				769	* For now we only allow callers with sys admin capability to create
				770	* Hafnium sockets.
				771	*/
				772	if (!capable(CAP_SYS_ADMIN))
				773	return -EPERM;
				774
				775	/* Allocate and initialise socket. */
				776	sk = sk_alloc(net, PF_HF, GFP_KERNEL, &hf_sock_proto, kern);
				777	if (!sk)
				778	return -ENOMEM;
				779
				780	sock_init_data(sock, sk);
				781
				782	sk->sk_destruct = hf_sock_destruct;
				783	sock->ops = &ops;
				784	sock->state = SS_UNCONNECTED;
				785
				786	return 0;
				787	}
				788
				789	/**
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	790	* Frees all resources, including threads, associated with the Hafnium driver.
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	791	*/
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	792	static void hf_free_resources(void)
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	793	{
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	794	uint32_t i, j;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	795
				796	/*
				797	* First stop all worker threads. We need to do this before freeing
				798	* resources because workers may reference each other, so it is only
				799	* safe to free resources after they have all stopped.
				800	*/
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	801	for (i = 0; i < hf_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	802	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	803
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	804	for (j = 0; j < vm->vcpu_count; j++)
				805	kthread_stop(vm->vcpu[j].task);
				806	}
				807
				808	/* Free resources. */
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	809	for (i = 0; i < hf_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	810	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	811
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	812	for (j = 0; j < vm->vcpu_count; j++)
				813	put_task_struct(vm->vcpu[j].task);
				814	kfree(vm->vcpu);
				815	}
				816
				817	kfree(hf_vms);
				818	}
				819
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	820	/**
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	821	* Handles the hypervisor timer interrupt.
				822	*/
				823	static irqreturn_t hf_nop_irq_handler(int irq, void *dev)
				824	{
				825	/*
				826	* No need to do anything, the interrupt only exists to return to the
				827	* primary vCPU so that the virtual timer will be restored and fire as
				828	* normal.
				829	*/
				830	return IRQ_HANDLED;
				831	}
				832
				833	/**
				834	* Enables the hypervisor timer interrupt on a CPU, when it starts or after the
				835	* driver is first loaded.
				836	*/
				837	static int hf_starting_cpu(unsigned int cpu)
				838	{
				839	if (hf_irq != 0) {
				840	/* Enable the interrupt, and set it to be edge-triggered. */
				841	enable_percpu_irq(hf_irq, IRQ_TYPE_EDGE_RISING);
				842	}
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	843
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	844	return 0;
				845	}
				846
				847	/**
				848	* Disables the hypervisor timer interrupt on a CPU when it is powered down.
				849	*/
				850	static int hf_dying_cpu(unsigned int cpu)
				851	{
				852	if (hf_irq != 0) {
				853	/* Disable the interrupt while the CPU is asleep. */
				854	disable_percpu_irq(hf_irq);
				855	}
				856
				857	return 0;
				858	}
				859
				860	/**
				861	* Registers for the hypervisor timer interrupt.
				862	*/
				863	static int hf_int_driver_probe(struct platform_device *pdev)
				864	{
				865	int irq;
				866	int ret;
				867
				868	/*
				869	* Register a handler for the hyperviser timer IRQ, as it is needed for
				870	* Hafnium to emulate the virtual timer for Linux while a secondary vCPU
				871	* is running.
				872	*/
				873	irq = platform_get_irq(pdev, ARCH_TIMER_HYP_PPI);
				874	if (irq < 0) {
				875	pr_err("Error getting hypervisor timer IRQ: %d\n", irq);
				876	return irq;
				877	}
				878	hf_irq = irq;
				879
				880	ret = request_percpu_irq(irq, hf_nop_irq_handler, HYPERVISOR_TIMER_NAME,
				881	pdev);
				882	if (ret != 0) {
				883	pr_err("Error registering hypervisor timer IRQ %d: %d\n",
				884	irq, ret);
				885	return ret;
				886	}
				887	pr_info("Hafnium registered for IRQ %d\n", irq);
				888	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
				889	"hafnium/hypervisor_timer:starting",
				890	hf_starting_cpu, hf_dying_cpu);
				891	if (ret < 0) {
				892	pr_err("Error enabling timer on all CPUs: %d\n", ret);
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	893	free_percpu_irq(irq, pdev);
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	894	return ret;
				895	}
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	896	hf_cpuhp_state = ret;
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	897
				898	return 0;
				899	}
				900
				901	/**
				902	* Unregisters for the hypervisor timer interrupt.
				903	*/
				904	static int hf_int_driver_remove(struct platform_device *pdev)
				905	{
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	906	/*
				907	* This will cause hf_dying_cpu to be called on each CPU, which will
				908	* disable the IRQs.
				909	*/
				910	cpuhp_remove_state(hf_cpuhp_state);
				911	free_percpu_irq(hf_irq, pdev);
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	912
				913	return 0;
				914	}
				915
				916	static const struct of_device_id hf_int_driver_id[] = {
				917	{.compatible = "arm,armv7-timer"},
				918	{.compatible = "arm,armv8-timer"},
				919	{}
				920	};
				921
				922	static struct platform_driver hf_int_driver = {
				923	.driver = {
				924	.name = HYPERVISOR_TIMER_NAME,
				925	.owner = THIS_MODULE,
				926	.of_match_table = of_match_ptr(hf_int_driver_id),
				927	},
				928	.probe = hf_int_driver_probe,
				929	.remove = hf_int_driver_remove,
				930	};
				931
				932	/**
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	933	* Initializes the Hafnium driver by creating a thread for each vCPU of each
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	934	* virtual machine.
				935	*/
				936	static int __init hf_init(void)
				937	{
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	938	static const struct net_proto_family proto_family = {
				939	.family = PF_HF,
				940	.create = hf_sock_create,
				941	.owner = THIS_MODULE,
				942	};
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	943	int64_t ret;
				944	uint32_t i, j;
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	945	uint32_t total_vm_count;
				946	uint32_t total_vcpu_count;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	947
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	948	/* Allocate a page for send and receive buffers. */
				949	hf_send_page = alloc_page(GFP_KERNEL);
				950	if (!hf_send_page) {
				951	pr_err("Unable to allocate send buffer\n");
				952	return -ENOMEM;
				953	}
				954
				955	hf_recv_page = alloc_page(GFP_KERNEL);
				956	if (!hf_recv_page) {
				957	__free_page(hf_send_page);
				958	pr_err("Unable to allocate receive buffer\n");
				959	return -ENOMEM;
				960	}
				961
				962	/*
				963	* Configure both addresses. Once configured, we cannot free these pages
				964	* because the hypervisor will use them, even if the module is
				965	* unloaded.
				966	*/
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	967	ret = hf_vm_configure(page_to_phys(hf_send_page),
				968	page_to_phys(hf_recv_page));
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	969	if (ret) {
				970	__free_page(hf_send_page);
				971	__free_page(hf_recv_page);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	972	/*
				973	* TODO: We may want to grab this information from hypervisor
				974	* and go from there.
				975	*/
Wedson Almeida Filho	f9e1192	2018-08-12 15:54:31 +0100	[diff] [blame]	976	pr_err("Unable to configure VM\n");
				977	return -EIO;
				978	}
				979
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	980	/* Get the number of VMs. */
Andrew Scull	5570423	2018-08-10 17:19:54 +0100	[diff] [blame]	981	ret = hf_vm_get_count();
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	982	if (ret < 0) {
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	983	pr_err("Unable to retrieve number of VMs: %lld\n", ret);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	984	return -EIO;
				985	}
				986
				987	/* Confirm the maximum number of VMs looks sane. */
				988	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VMS < 1);
				989	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VMS > U16_MAX);
				990
				991	/* Validate the number of VMs. There must at least be the primary. */
				992	if (ret < 1 \|\| ret > CONFIG_HAFNIUM_MAX_VMS) {
				993	pr_err("Number of VMs is out of range: %lld\n", ret);
				994	return -EDQUOT;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	995	}
				996
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	997	/* Only track the secondary VMs. */
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	998	total_vm_count = ret - 1;
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	999	hf_vms =
				1000	kmalloc_array(total_vm_count, sizeof(struct hf_vm), GFP_KERNEL);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1001	if (!hf_vms)
				1002	return -ENOMEM;
				1003
Jose Marinho	1cc6c75	2019-03-11 16:28:03 +0000	[diff] [blame]	1004	/* Cache the VM id for later usage. */
				1005	current_vm_id = hf_vm_get_id();
				1006
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1007	/* Initialize each VM. */
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1008	total_vcpu_count = 0;
				1009	for (i = 0; i < total_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	1010	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1011
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	1012	/* Adjust the ID as only the secondaries are tracked. */
Wedson Almeida Filho	ec84193	2019-01-22 23:07:50 +0000	[diff] [blame]	1013	vm->id = i + FIRST_SECONDARY_VM_ID;
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	1014
				1015	ret = hf_vcpu_get_count(vm->id);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1016	if (ret < 0) {
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1017	pr_err("HF_VCPU_GET_COUNT failed for vm=%u: %lld",
				1018	vm->id, ret);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1019	ret = -EIO;
				1020	goto fail_with_cleanup;
				1021	}
				1022
				1023	/* Avoid overflowing the vcpu count. */
				1024	if (ret > (U32_MAX - total_vcpu_count)) {
				1025	pr_err("Too many vcpus: %u\n", total_vcpu_count);
				1026	ret = -EDQUOT;
				1027	goto fail_with_cleanup;
				1028	}
				1029
				1030	/* Confirm the maximum number of VCPUs looks sane. */
				1031	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VCPUS < 1);
				1032	BUILD_BUG_ON(CONFIG_HAFNIUM_MAX_VCPUS > U16_MAX);
				1033
				1034	/* Enforce the limit on vcpus. */
				1035	total_vcpu_count += ret;
				1036	if (total_vcpu_count > CONFIG_HAFNIUM_MAX_VCPUS) {
				1037	pr_err("Too many vcpus: %u\n", total_vcpu_count);
				1038	ret = -EDQUOT;
				1039	goto fail_with_cleanup;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1040	}
				1041
				1042	vm->vcpu_count = ret;
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1043	vm->vcpu = kmalloc_array(vm->vcpu_count, sizeof(struct hf_vcpu),
				1044	GFP_KERNEL);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1045	if (!vm->vcpu) {
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1046	ret = -ENOMEM;
				1047	goto fail_with_cleanup;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1048	}
				1049
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1050	/* Update the number of initialized VMs. */
				1051	hf_vm_count = i + 1;
				1052
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1053	/* Create a kernel thread for each vcpu. */
				1054	for (j = 0; j < vm->vcpu_count; j++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	1055	struct hf_vcpu *vcpu = &vm->vcpu[j];
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	1056
				1057	vcpu->task =
				1058	kthread_create(hf_vcpu_thread, vcpu,
				1059	"vcpu_thread_%u_%u", vm->id, j);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1060	if (IS_ERR(vcpu->task)) {
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1061	pr_err("Error creating task (vm=%u,vcpu=%u): %ld\n",
				1062	vm->id, j, PTR_ERR(vcpu->task));
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1063	vm->vcpu_count = j;
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1064	ret = PTR_ERR(vcpu->task);
				1065	goto fail_with_cleanup;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1066	}
				1067
				1068	get_task_struct(vcpu->task);
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	1069	vcpu->vm = vm;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1070	vcpu->vcpu_index = j;
Wedson Almeida Filho	7fe6233	2018-12-15 03:09:57 +0000	[diff] [blame]	1071	atomic_set(&vcpu->abort_sleep, 0);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1072	}
				1073	}
				1074
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1075	/* Register protocol and socket family. */
				1076	ret = proto_register(&hf_sock_proto, 0);
				1077	if (ret) {
				1078	pr_err("Unable to register protocol: %lld\n", ret);
				1079	goto fail_with_cleanup;
				1080	}
				1081
				1082	ret = sock_register(&proto_family);
				1083	if (ret) {
				1084	pr_err("Unable to register Hafnium's socket family: %lld\n",
				1085	ret);
				1086	goto fail_unregister_proto;
				1087	}
				1088
				1089	/*
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	1090	* Register as a driver for the timer device, so we can register a
				1091	* handler for the hyperviser timer IRQ.
				1092	*/
				1093	ret = platform_driver_register(&hf_int_driver);
				1094	if (ret != 0) {
				1095	pr_err("Error registering timer driver %lld\n", ret);
				1096	goto fail_unregister_socket;
				1097	}
				1098
				1099	/*
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1100	* Start running threads now that all is initialized.
				1101	*
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	1102	* Any failures from this point on must also unregister the driver with
				1103	* platform_driver_unregister().
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1104	*/
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1105	for (i = 0; i < hf_vm_count; i++) {
Andrew Scull	b3a61b5	2018-09-17 14:30:34 +0100	[diff] [blame]	1106	struct hf_vm *vm = &hf_vms[i];
Andrew Scull	0177811	2019-01-14 15:37:53 +0000	[diff] [blame]	1107
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1108	for (j = 0; j < vm->vcpu_count; j++)
				1109	wake_up_process(vm->vcpu[j].task);
				1110	}
				1111
				1112	/* Dump vm/vcpu count info. */
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	1113	pr_info("Hafnium successfully loaded with %u VMs:\n", hf_vm_count);
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	1114	for (i = 0; i < hf_vm_count; i++) {
				1115	struct hf_vm *vm = &hf_vms[i];
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1116
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	1117	pr_info("\tVM %u: %u vCPUS\n", vm->id, vm->vcpu_count);
Andrew Scull	b722f95	2018-09-27 15:39:10 +0100	[diff] [blame]	1118	}
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1119
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1120	return 0;
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1121
Andrew Walbran	b3ca1dc	2019-01-30 17:13:44 +0000	[diff] [blame]	1122	fail_unregister_socket:
				1123	sock_unregister(PF_HF);
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1124	fail_unregister_proto:
				1125	proto_unregister(&hf_sock_proto);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1126	fail_with_cleanup:
				1127	hf_free_resources();
				1128	return ret;
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1129	}
				1130
				1131	/**
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	1132	* Frees up all resources used by the Hafnium driver in preparation for
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1133	* unloading it.
				1134	*/
				1135	static void __exit hf_exit(void)
				1136	{
Andrew Scull	bb7ae41	2018-09-28 21:07:15 +0100	[diff] [blame]	1137	pr_info("Preparing to unload Hafnium\n");
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1138	sock_unregister(PF_HF);
				1139	proto_unregister(&hf_sock_proto);
Andrew Scull	82257c4	2018-10-01 10:37:48 +0100	[diff] [blame]	1140	hf_free_resources();
Andrew Walbran	8d55e50	2019-02-05 11:42:08 +0000	[diff] [blame]	1141	platform_driver_unregister(&hf_int_driver);
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1142	pr_info("Hafnium ready to unload\n");
				1143	}
				1144
Wedson Almeida Filho	1ee3565	2018-12-24 01:36:48 +0000	[diff] [blame]	1145	MODULE_LICENSE("GPL v2");
Wedson Almeida Filho	2f62b42	2018-06-19 06:44:32 +0100	[diff] [blame]	1146
				1147	module_init(hf_init);
				1148	module_exit(hf_exit);