Blame - src/api.c - hafnium/hafnium.git - TrustedFirmware Git Browser

blob: d342bd6e9bd321b522b0cd4e1aa4822541748bbc [file] [log] [blame]

Andrew Scull	1883487	2018-10-12 11:48:09 +0100	[diff] [blame]	1	/*
				2	* Copyright 2018 Google LLC
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* https://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
Andrew Scull	18c78fc	2018-08-20 12:57:41 +0100	[diff] [blame]	17	#include "hf/api.h"
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	18
Andrew Scull	13652af	2018-09-17 14:49:08 +0100	[diff] [blame]	19	#include <assert.h>
				20
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	21	#include "hf/arch/cpu.h"
Andrew Walbran	508e63c	2018-12-20 17:02:37 +0000	[diff] [blame]	22	#include "hf/arch/timer.h"
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	23
				24	#include "hf/dlog.h"
Andrew Scull	6386f25	2018-12-06 13:29:10 +0000	[diff] [blame]	25	#include "hf/mm.h"
				26	#include "hf/spinlock.h"
Andrew Scull	18c78fc	2018-08-20 12:57:41 +0100	[diff] [blame]	27	#include "hf/std.h"
				28	#include "hf/vm.h"
				29
Andrew Scull	f35a5c9	2018-08-07 18:09:46 +0100	[diff] [blame]	30	#include "vmapi/hf/call.h"
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	31
Wedson Almeida Filho	ba641ef	2018-12-03 04:19:44 +0000	[diff] [blame]	32	/*
				33	* To eliminate the risk of deadlocks, we define a partial order for the
				34	* acquisition of locks held concurrently by the same physical CPU. Our current
				35	* ordering requirements are as follows:
				36	*
				37	* vm::lock -> vcpu::lock
Andrew Scull	6386f25	2018-12-06 13:29:10 +0000	[diff] [blame]	38	*
				39	* Locks of the same kind require the lock of lowest address to be locked first,
				40	* see `sl_lock_both()`.
Wedson Almeida Filho	ba641ef	2018-12-03 04:19:44 +0000	[diff] [blame]	41	*/
				42
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	43	static_assert(HF_MAILBOX_SIZE == PAGE_SIZE,
Andrew Scull	13652af	2018-09-17 14:49:08 +0100	[diff] [blame]	44	"Currently, a page is mapped for the send and receive buffers so "
				45	"the maximum request is the size of a page.");
				46
Wedson Almeida Filho	9ed8da5	2018-12-17 16:09:11 +0000	[diff] [blame]	47	static struct mpool api_page_pool;
Wedson Almeida Filho	22d5eaa	2018-12-16 00:38:49 +0000	[diff] [blame]	48
				49	/**
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	50	* Initialises the API page pool by taking ownership of the contents of the
				51	* given page pool.
Wedson Almeida Filho	22d5eaa	2018-12-16 00:38:49 +0000	[diff] [blame]	52	*/
				53	void api_init(struct mpool *ppool)
				54	{
Wedson Almeida Filho	9ed8da5	2018-12-17 16:09:11 +0000	[diff] [blame]	55	mpool_init_from(&api_page_pool, ppool);
Wedson Almeida Filho	22d5eaa	2018-12-16 00:38:49 +0000	[diff] [blame]	56	}
				57
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	58	/**
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	59	* Switches the physical CPU back to the corresponding vcpu of the primary VM.
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	60	*
				61	* This triggers the scheduling logic to run. Run in the context of secondary VM
				62	* to cause HF_VCPU_RUN to return and the primary VM to regain control of the
				63	* cpu.
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	64	*/
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	65	static struct vcpu api_switch_to_primary(struct vcpu current,
Wedson Almeida Filho	ba641ef	2018-12-03 04:19:44 +0000	[diff] [blame]	66	struct hf_vcpu_run_return primary_ret,
				67	enum vcpu_state secondary_state)
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	68	{
Andrew Scull	1950326	2018-09-20 14:48:39 +0100	[diff] [blame]	69	struct vm *primary = vm_get(HF_PRIMARY_VM_ID);
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	70	struct vcpu *next = &primary->vcpus[cpu_index(current->cpu)];
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	71
Andrew Walbran	508e63c	2018-12-20 17:02:37 +0000	[diff] [blame]	72	/*
				73	* If the secondary is blocked but has a timer running, sleep until the
				74	* timer fires rather than indefinitely.
				75	*/
				76	if (primary_ret.code == HF_VCPU_RUN_WAIT_FOR_INTERRUPT &&
				77	arch_timer_enabled_current()) {
				78	primary_ret.code = HF_VCPU_RUN_SLEEP;
				79	primary_ret.sleep.ns = arch_timer_remaining_ns_current();
				80	}
				81
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	82	/* Set the return value for the primary VM's call to HF_VCPU_RUN. */
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	83	arch_regs_set_retval(&next->regs,
				84	hf_vcpu_run_return_encode(primary_ret));
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	85
Wedson Almeida Filho	ba641ef	2018-12-03 04:19:44 +0000	[diff] [blame]	86	/* Mark the current vcpu as waiting. */
				87	sl_lock(&current->lock);
				88	current->state = secondary_state;
				89	sl_unlock(&current->lock);
				90
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	91	return next;
				92	}
				93
				94	/**
Andrew Scull	33fecd3	2019-01-08 14:48:27 +0000	[diff] [blame]	95	* Returns to the primary vm and signals that the vcpu still has work to do so.
				96	*/
				97	struct vcpu api_preempt(struct vcpu current)
				98	{
				99	struct hf_vcpu_run_return ret = {
				100	.code = HF_VCPU_RUN_PREEMPTED,
				101	};
				102
				103	return api_switch_to_primary(current, ret, vcpu_state_ready);
				104	}
				105
				106	/**
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	107	* Puts the current vcpu in wait for interrupt mode, and returns to the primary
				108	* vm.
				109	*/
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	110	struct vcpu api_wait_for_interrupt(struct vcpu current)
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	111	{
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	112	struct hf_vcpu_run_return ret = {
				113	.code = HF_VCPU_RUN_WAIT_FOR_INTERRUPT,
				114	};
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	115
Wedson Almeida Filho	ba641ef	2018-12-03 04:19:44 +0000	[diff] [blame]	116	return api_switch_to_primary(current, ret,
				117	vcpu_state_blocked_interrupt);
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	118	}
				119
				120	/**
Andrew Scull	66d62bf	2019-02-01 13:54:10 +0000	[diff] [blame]	121	* Returns to the primary vm to allow this cpu to be used for other tasks as the
				122	* vcpu does not have work to do at this moment. The current vcpu is marked as
				123	* ready to be scheduled again.
				124	*/
				125	struct vcpu api_yield(struct vcpu current)
				126	{
				127	struct hf_vcpu_run_return ret = {
				128	.code = HF_VCPU_RUN_YIELD,
				129	};
				130
				131	if (current->vm->id == HF_PRIMARY_VM_ID) {
				132	/* Noop on the primary as it makes the scheduling decisions. */
				133	return NULL;
				134	}
				135
				136	return api_switch_to_primary(current, ret, vcpu_state_ready);
				137	}
				138
				139	/**
Andrew Scull	38772ab	2019-01-24 15:16:50 +0000	[diff] [blame]	140	* Aborts the vCPU and triggers its VM to abort fully.
Andrew Scull	9726c25	2019-01-23 13:44:19 +0000	[diff] [blame]	141	*/
				142	struct vcpu api_abort(struct vcpu current)
				143	{
				144	struct hf_vcpu_run_return ret = {
				145	.code = HF_VCPU_RUN_ABORTED,
				146	};
				147
				148	dlog("Aborting VM %u vCPU %u\n", current->vm->id, vcpu_index(current));
				149
				150	if (current->vm->id == HF_PRIMARY_VM_ID) {
				151	/* TODO: what to do when the primary aborts? */
				152	for (;;) {
				153	/* Do nothing. */
				154	}
				155	}
				156
				157	atomic_store_explicit(&current->vm->aborting, true,
				158	memory_order_relaxed);
				159
				160	/* TODO: free resources once all vCPUs abort. */
				161
				162	return api_switch_to_primary(current, ret, vcpu_state_aborted);
				163	}
				164
				165	/**
Andrew Scull	55c4d8b	2018-12-18 18:50:18 +0000	[diff] [blame]	166	* Returns the ID of the VM.
				167	*/
				168	int64_t api_vm_get_id(const struct vcpu *current)
				169	{
				170	return current->vm->id;
				171	}
				172
				173	/**
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	174	* Returns the number of VMs configured to run.
				175	*/
Andrew Scull	c0e569a	2018-10-02 18:05:21 +0100	[diff] [blame]	176	int64_t api_vm_get_count(void)
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	177	{
Andrew Scull	1950326	2018-09-20 14:48:39 +0100	[diff] [blame]	178	return vm_get_count();
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	179	}
				180
				181	/**
				182	* Returns the number of vcpus configured in the given VM.
				183	*/
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	184	int64_t api_vcpu_get_count(uint32_t vm_id, const struct vcpu *current)
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	185	{
Andrew Scull	1950326	2018-09-20 14:48:39 +0100	[diff] [blame]	186	struct vm *vm;
				187
				188	/* Only the primary VM needs to know about vcpus for scheduling. */
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	189	if (current->vm->id != HF_PRIMARY_VM_ID) {
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	190	return -1;
Andrew Scull	7364a8e	2018-07-19 15:39:29 +0100	[diff] [blame]	191	}
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	192
Andrew Scull	1950326	2018-09-20 14:48:39 +0100	[diff] [blame]	193	vm = vm_get(vm_id);
				194	if (vm == NULL) {
				195	return -1;
				196	}
				197
				198	return vm->vcpu_count;
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	199	}
				200
				201	/**
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	202	* This function is called by the architecture-specific context switching
				203	* function to indicate that register state for the given vcpu has been saved
				204	* and can therefore be used by other pcpus.
				205	*/
				206	void api_regs_state_saved(struct vcpu *vcpu)
				207	{
				208	sl_lock(&vcpu->lock);
				209	vcpu->regs_available = true;
				210	sl_unlock(&vcpu->lock);
				211	}
				212
				213	/**
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	214	* Retrieves the next waiter and removes it from the wait list if the VM's
				215	* mailbox is in a writable state.
				216	*/
				217	static struct wait_entry *api_fetch_waiter(struct vm_locked locked_vm)
				218	{
				219	struct wait_entry *entry;
				220	struct vm *vm = locked_vm.vm;
				221
				222	if (vm->mailbox.state != mailbox_state_empty \|\|
				223	vm->mailbox.recv == NULL \|\| list_empty(&vm->mailbox.waiter_list)) {
				224	/* The mailbox is not writable or there are no waiters. */
				225	return NULL;
				226	}
				227
				228	/* Remove waiter from the wait list. */
				229	entry = CONTAINER_OF(vm->mailbox.waiter_list.next, struct wait_entry,
				230	wait_links);
				231	list_remove(&entry->wait_links);
				232	return entry;
				233	}
				234
				235	/**
Andrew Walbran	508e63c	2018-12-20 17:02:37 +0000	[diff] [blame]	236	* Assuming that the arguments have already been checked by the caller, injects
				237	* a virtual interrupt of the given ID into the given target vCPU. This doesn't
				238	* cause the vCPU to actually be run immediately; it will be taken when the vCPU
				239	* is next run, which is up to the scheduler.
				240	*
				241	* Returns:
				242	* - 0 on success if no further action is needed.
				243	* - 1 if it was called by the primary VM and the primary VM now needs to wake
				244	* up or kick the target vCPU.
				245	*/
				246	static int64_t internal_interrupt_inject(struct vm *target_vm,
				247	struct vcpu *target_vcpu,
				248	uint32_t intid, struct vcpu *current,
				249	struct vcpu **next)
				250	{
				251	uint32_t intid_index = intid / INTERRUPT_REGISTER_BITS;
				252	uint32_t intid_mask = 1u << (intid % INTERRUPT_REGISTER_BITS);
				253	bool need_vm_lock;
				254	int64_t ret = 0;
				255
				256	sl_lock(&target_vcpu->lock);
				257	/*
				258	* If we need the target_vm lock we need to release the target_vcpu lock
				259	* first to maintain the correct order of locks. In-between releasing
				260	* and acquiring it again the state of the vCPU could change in such a
				261	* way that we don't actually need to touch the target_vm after all, but
				262	* that's alright: we'll take the target_vm lock anyway, but it's safe,
				263	* just perhaps a little slow in this unusual case. The reverse is not
				264	* possible: if need_vm_lock is false, we don't release the target_vcpu
				265	* lock until we are done, so nothing should change in such as way that
				266	* we need the VM lock after all.
				267	*/
				268	need_vm_lock =
				269	(target_vcpu->interrupts.interrupt_enabled[intid_index] &
				270	~target_vcpu->interrupts.interrupt_pending[intid_index] &
				271	intid_mask) &&
				272	target_vcpu->state == vcpu_state_blocked_mailbox;
				273	if (need_vm_lock) {
				274	sl_unlock(&target_vcpu->lock);
				275	sl_lock(&target_vm->lock);
				276	sl_lock(&target_vcpu->lock);
				277	}
				278
				279	/*
				280	* We only need to change state and (maybe) trigger a virtual IRQ if it
				281	* is enabled and was not previously pending. Otherwise we can skip
				282	* everything except setting the pending bit.
				283	*
				284	* If you change this logic make sure to update the need_vm_lock logic
				285	* above to match.
				286	*/
				287	if (!(target_vcpu->interrupts.interrupt_enabled[intid_index] &
				288	~target_vcpu->interrupts.interrupt_pending[intid_index] &
				289	intid_mask)) {
				290	goto out;
				291	}
				292
				293	/* Increment the count. */
				294	target_vcpu->interrupts.enabled_and_pending_count++;
				295
				296	/*
				297	* Only need to update state if there was not already an
				298	* interrupt enabled and pending.
				299	*/
				300	if (target_vcpu->interrupts.enabled_and_pending_count != 1) {
				301	goto out;
				302	}
				303
				304	if (target_vcpu->state == vcpu_state_blocked_interrupt) {
				305	target_vcpu->state = vcpu_state_ready;
				306	} else if (target_vcpu->state == vcpu_state_blocked_mailbox) {
				307	/*
				308	* need_vm_lock must be true if this path is taken, so if you
				309	* change the condition here or those leading up to it make sure
				310	* to update the need_vm_lock logic above to match.
				311	*/
				312
				313	/* Take target vCPU out of mailbox recv_waiter list. */
				314	/*
				315	* TODO: Consider using a doubly-linked list for the receive
				316	* waiter list to avoid the linear search here.
				317	*/
				318	struct vcpu **previous_next_pointer =
				319	&target_vm->mailbox.recv_waiter;
				320	while (*previous_next_pointer != NULL &&
				321	*previous_next_pointer != target_vcpu) {
				322	/*
				323	* TODO(qwandor): Do we need to lock the vCPUs somehow
				324	* while we walk the linked list, or is the VM lock
				325	* enough?
				326	*/
				327	previous_next_pointer =
				328	&(*previous_next_pointer)->mailbox_next;
				329	}
				330	if (*previous_next_pointer == NULL) {
				331	dlog("Target VCPU state is vcpu_state_blocked_mailbox "
				332	"but is not in VM mailbox waiter list. This "
				333	"should never happen.\n");
				334	} else {
				335	*previous_next_pointer = target_vcpu->mailbox_next;
				336	}
				337
				338	target_vcpu->state = vcpu_state_ready;
				339	}
				340
				341	if (current->vm->id == HF_PRIMARY_VM_ID) {
				342	/*
				343	* If the call came from the primary VM, let it know that it
				344	* should run or kick the target vCPU.
				345	*/
				346	ret = 1;
				347	} else if (current != target_vcpu && next != NULL) {
				348	/*
				349	* Switch to the primary so that it can switch to the target, or
				350	* kick it if it is already running on a different physical CPU.
				351	*/
				352	struct hf_vcpu_run_return ret = {
				353	.code = HF_VCPU_RUN_WAKE_UP,
				354	.wake_up.vm_id = target_vm->id,
				355	.wake_up.vcpu = target_vcpu - target_vm->vcpus,
				356	};
				357	*next = api_switch_to_primary(current, ret, vcpu_state_ready);
				358	}
				359
				360	out:
				361	/* Either way, make it pending. */
				362	target_vcpu->interrupts.interrupt_pending[intid_index] \|= intid_mask;
				363
				364	sl_unlock(&target_vcpu->lock);
				365	if (need_vm_lock) {
				366	sl_unlock(&target_vm->lock);
				367	}
				368
				369	return ret;
				370	}
				371
				372	/**
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	373	* Prepares the vcpu to run by updating its state and fetching whether a return
				374	* value needs to be forced onto the vCPU.
				375	*/
Andrew Scull	38772ab	2019-01-24 15:16:50 +0000	[diff] [blame]	376	static bool api_vcpu_prepare_run(const struct vcpu current, struct vcpu vcpu,
Andrew Walbran	508e63c	2018-12-20 17:02:37 +0000	[diff] [blame]	377	struct retval_state *vcpu_retval,
				378	struct hf_vcpu_run_return *run_ret)
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	379	{
				380	bool ret;
				381
				382	sl_lock(&vcpu->lock);
Andrew Scull	9726c25	2019-01-23 13:44:19 +0000	[diff] [blame]	383
				384	if (atomic_load_explicit(&vcpu->vm->aborting, memory_order_relaxed)) {
				385	if (vcpu->state != vcpu_state_aborted) {
Andrew Scull	8233128	2019-01-25 10:29:34 +0000	[diff] [blame]	386	dlog("Aborting VM %u vCPU %u\n", vcpu->vm->id,
				387	vcpu_index(vcpu));
Andrew Scull	9726c25	2019-01-23 13:44:19 +0000	[diff] [blame]	388	vcpu->state = vcpu_state_aborted;
				389	}
				390	ret = false;
				391	goto out;
				392	}
				393
Andrew Walbran	508e63c	2018-12-20 17:02:37 +0000	[diff] [blame]	394	/*
				395	* Wait until the registers become available. Care must be taken when
				396	* looping on this: it shouldn't be done while holding other locks to
				397	* avoid deadlocks.
				398	*/
				399	while (!vcpu->regs_available) {
				400	if (vcpu->state == vcpu_state_running) {
				401	/*
				402	* vCPU is running on another pCPU.
				403	*
				404	* It's ok to not return HF_VCPU_RUN_SLEEP here because
				405	* the other physical CPU that is currently running this
				406	* vcpu will return HF_VCPU_RUN_SLEEP if neeed. The
				407	* default return value is
				408	* HF_VCPU_RUN_WAIT_FOR_INTERRUPT, so no need to set it
				409	* explicitly.
				410	*/
				411	ret = false;
				412	goto out;
				413	}
				414
				415	sl_unlock(&vcpu->lock);
				416	sl_lock(&vcpu->lock);
				417	}
				418
				419	switch (vcpu->state) {
				420	case vcpu_state_running:
				421	case vcpu_state_off:
				422	case vcpu_state_aborted:
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	423	ret = false;
				424	goto out;
Andrew Walbran	508e63c	2018-12-20 17:02:37 +0000	[diff] [blame]	425	case vcpu_state_blocked_interrupt:
				426	case vcpu_state_blocked_mailbox:
				427	if (arch_timer_pending(&vcpu->regs)) {
				428	break;
				429	}
				430
				431	/*
				432	* The vCPU is not ready to run, return the appropriate code to
				433	* the primary which called vcpu_run.
				434	*/
				435	if (arch_timer_enabled(&vcpu->regs)) {
				436	run_ret->code = HF_VCPU_RUN_SLEEP;
				437	run_ret->sleep.ns =
				438	arch_timer_remaining_ns(&vcpu->regs);
				439	}
				440
				441	ret = false;
				442	goto out;
				443	case vcpu_state_ready:
				444	break;
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	445	}
Andrew Walbran	508e63c	2018-12-20 17:02:37 +0000	[diff] [blame]	446	/*
				447	* If we made it to here then either the state was vcpu_state_ready or
				448	* the timer is pending, so the vCPU should run to handle the timer
				449	* firing.
				450	*/
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	451
				452	vcpu->cpu = current->cpu;
				453	vcpu->state = vcpu_state_running;
				454
				455	/* Fetch return value to inject into vCPU if there is one. */
				456	*vcpu_retval = vcpu->retval;
				457	if (vcpu_retval->force) {
				458	vcpu->retval.force = false;
				459	}
				460
				461	/*
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	462	* Mark the registers as unavailable now that we're about to reflect
				463	* them onto the real registers. This will also prevent another physical
				464	* CPU from trying to read these registers.
				465	*/
				466	vcpu->regs_available = false;
				467
				468	ret = true;
				469
				470	out:
				471	sl_unlock(&vcpu->lock);
				472	return ret;
				473	}
				474
				475	/**
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	476	* Runs the given vcpu of the given vm.
				477	*/
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	478	struct hf_vcpu_run_return api_vcpu_run(uint32_t vm_id, uint32_t vcpu_idx,
Andrew Scull	38772ab	2019-01-24 15:16:50 +0000	[diff] [blame]	479	const struct vcpu *current,
				480	struct vcpu **next)
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	481	{
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	482	struct vm *vm;
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	483	struct vcpu *vcpu;
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	484	struct retval_state vcpu_retval;
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	485	struct hf_vcpu_run_return ret = {
				486	.code = HF_VCPU_RUN_WAIT_FOR_INTERRUPT,
				487	};
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	488
				489	/* Only the primary VM can switch vcpus. */
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	490	if (current->vm->id != HF_PRIMARY_VM_ID) {
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	491	goto out;
Andrew Scull	7364a8e	2018-07-19 15:39:29 +0100	[diff] [blame]	492	}
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	493
Andrew Scull	1950326	2018-09-20 14:48:39 +0100	[diff] [blame]	494	/* Only secondary VM vcpus can be run. */
				495	if (vm_id == HF_PRIMARY_VM_ID) {
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	496	goto out;
Andrew Scull	7364a8e	2018-07-19 15:39:29 +0100	[diff] [blame]	497	}
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	498
Andrew Scull	1950326	2018-09-20 14:48:39 +0100	[diff] [blame]	499	/* The requested VM must exist. */
				500	vm = vm_get(vm_id);
				501	if (vm == NULL) {
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	502	goto out;
Andrew Scull	1950326	2018-09-20 14:48:39 +0100	[diff] [blame]	503	}
				504
				505	/* The requested vcpu must exist. */
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	506	if (vcpu_idx >= vm->vcpu_count) {
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	507	goto out;
Andrew Scull	7364a8e	2018-07-19 15:39:29 +0100	[diff] [blame]	508	}
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	509
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	510	/* Update state if allowed. */
Andrew Scull	f3d4559	2018-09-20 14:30:22 +0100	[diff] [blame]	511	vcpu = &vm->vcpus[vcpu_idx];
Andrew Walbran	508e63c	2018-12-20 17:02:37 +0000	[diff] [blame]	512	if (!api_vcpu_prepare_run(current, vcpu, &vcpu_retval, &ret)) {
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	513	goto out;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	514	}
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	515
Andrew Walbran	508e63c	2018-12-20 17:02:37 +0000	[diff] [blame]	516	/*
				517	* Inject timer interrupt if timer has expired. It's safe to access
				518	* vcpu->regs here because api_vcpu_prepare_run already made sure that
				519	* regs_available was true (and then set it to false) before returning
				520	* true.
				521	*/
				522	if (arch_timer_pending(&vcpu->regs)) {
				523	/* Make virtual timer interrupt pending. */
				524	internal_interrupt_inject(vm, vcpu, HF_VIRTUAL_TIMER_INTID,
				525	vcpu, NULL);
				526
				527	/*
				528	* Set the mask bit so the hardware interrupt doesn't fire
				529	* again. Ideally we wouldn't do this because it affects what
				530	* the secondary vCPU sees, but if we don't then we end up with
				531	* a loop of the interrupt firing each time we try to return to
				532	* the secondary vCPU.
				533	*/
				534	arch_timer_mask(&vcpu->regs);
				535	}
				536
Andrew Scull	33fecd3	2019-01-08 14:48:27 +0000	[diff] [blame]	537	/* Switch to the vcpu. */
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	538	*next = vcpu;
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	539
Andrew Scull	33fecd3	2019-01-08 14:48:27 +0000	[diff] [blame]	540	/*
				541	* Set a placeholder return code to the scheduler. This will be
				542	* overwritten when the switch back to the primary occurs.
				543	*/
				544	ret.code = HF_VCPU_RUN_PREEMPTED;
				545
				546	/* Update return value for the next vcpu if one was injected. */
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	547	if (vcpu_retval.force) {
				548	arch_regs_set_retval(&vcpu->regs, vcpu_retval.value);
				549	}
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	550
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	551	out:
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	552	return ret;
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	553	}
				554
				555	/**
Andrew Scull	81e8509	2018-12-12 12:56:20 +0000	[diff] [blame]	556	* Check that the mode indicates memory that is valid, owned and exclusive.
				557	*/
Andrew Scull	cbefbdb	2019-01-11 16:36:26 +0000	[diff] [blame]	558	static bool api_mode_valid_owned_and_exclusive(int mode)
Andrew Scull	81e8509	2018-12-12 12:56:20 +0000	[diff] [blame]	559	{
				560	return (mode & (MM_MODE_INVALID \| MM_MODE_UNOWNED \| MM_MODE_SHARED)) ==
				561	0;
				562	}
				563
				564	/**
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	565	* Determines the value to be returned by api_vm_configure and api_mailbox_clear
				566	* after they've succeeded. If a secondary VM is running and there are waiters,
				567	* it also switches back to the primary VM for it to wake waiters up.
				568	*/
				569	static int64_t api_waiter_result(struct vm_locked locked_vm,
				570	struct vcpu current, struct vcpu *next)
				571	{
				572	struct vm *vm = locked_vm.vm;
				573	struct hf_vcpu_run_return ret = {
				574	.code = HF_VCPU_RUN_NOTIFY_WAITERS,
				575	};
				576
				577	if (list_empty(&vm->mailbox.waiter_list)) {
				578	/* No waiters, nothing else to do. */
				579	return 0;
				580	}
				581
				582	if (vm->id == HF_PRIMARY_VM_ID) {
				583	/* The caller is the primary VM. Tell it to wake up waiters. */
				584	return 1;
				585	}
				586
				587	/*
				588	* Switch back to the primary VM, informing it that there are waiters
				589	* that need to be notified.
				590	*/
				591	*next = api_switch_to_primary(current, ret, vcpu_state_ready);
				592
				593	return 0;
				594	}
				595
				596	/**
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	597	* Configures the VM to send/receive data through the specified pages. The pages
				598	* must not be shared.
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	599	*
				600	* Returns:
				601	* - -1 on failure.
				602	* - 0 on success if no further action is needed.
				603	* - 1 if it was called by the primary VM and the primary VM now needs to wake
				604	* up or kick waiters. Waiters should be retrieved by calling
				605	* hf_mailbox_waiter_get.
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	606	*/
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	607	int64_t api_vm_configure(ipaddr_t send, ipaddr_t recv, struct vcpu *current,
				608	struct vcpu **next)
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	609	{
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	610	struct vm *vm = current->vm;
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	611	struct vm_locked locked;
Andrew Scull	8087132	2018-08-06 12:04:09 +0100	[diff] [blame]	612	paddr_t pa_send_begin;
				613	paddr_t pa_send_end;
				614	paddr_t pa_recv_begin;
				615	paddr_t pa_recv_end;
Andrew Scull	220e621	2018-12-21 18:09:00 +0000	[diff] [blame]	616	int orig_send_mode;
				617	int orig_recv_mode;
				618	struct mpool local_page_pool;
Andrew Scull	c0e569a	2018-10-02 18:05:21 +0100	[diff] [blame]	619	int64_t ret;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	620
				621	/* Fail if addresses are not page-aligned. */
Andrew Scull	265ada9	2018-07-30 15:19:01 +0100	[diff] [blame]	622	if ((ipa_addr(send) & (PAGE_SIZE - 1)) \|\|
				623	(ipa_addr(recv) & (PAGE_SIZE - 1))) {
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	624	return -1;
				625	}
				626
Andrew Scull	c2eb6a3	2018-12-13 16:54:24 +0000	[diff] [blame]	627	/* Convert to physical addresses. */
				628	pa_send_begin = pa_from_ipa(send);
				629	pa_send_end = pa_add(pa_send_begin, PAGE_SIZE);
				630
				631	pa_recv_begin = pa_from_ipa(recv);
				632	pa_recv_end = pa_add(pa_recv_begin, PAGE_SIZE);
				633
Andrew Scull	c9ccb3f	2018-08-13 15:27:12 +0100	[diff] [blame]	634	/* Fail if the same page is used for the send and receive pages. */
				635	if (pa_addr(pa_send_begin) == pa_addr(pa_recv_begin)) {
Andrew Scull	220e621	2018-12-21 18:09:00 +0000	[diff] [blame]	636	return -1;
				637	}
				638
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	639	vm_lock(vm, &locked);
Andrew Scull	220e621	2018-12-21 18:09:00 +0000	[diff] [blame]	640
				641	/* We only allow these to be setup once. */
				642	if (vm->mailbox.send \|\| vm->mailbox.recv) {
				643	goto fail;
				644	}
				645
				646	/*
				647	* Ensure the pages are valid, owned and exclusive to the VM and that
				648	* the VM has the required access to the memory.
				649	*/
				650	if (!mm_vm_get_mode(&vm->ptable, send, ipa_add(send, PAGE_SIZE),
				651	&orig_send_mode) \|\|
				652	!api_mode_valid_owned_and_exclusive(orig_send_mode) \|\|
				653	(orig_send_mode & MM_MODE_R) == 0 \|\|
				654	(orig_send_mode & MM_MODE_W) == 0) {
				655	goto fail;
				656	}
				657
				658	if (!mm_vm_get_mode(&vm->ptable, recv, ipa_add(recv, PAGE_SIZE),
				659	&orig_recv_mode) \|\|
				660	!api_mode_valid_owned_and_exclusive(orig_recv_mode) \|\|
				661	(orig_recv_mode & MM_MODE_R) == 0) {
				662	goto fail;
				663	}
				664
				665	/*
				666	* Create a local pool so any freed memory can't be used by another
				667	* thread. This is to ensure the original mapping can be restored if any
				668	* stage of the process fails.
				669	*/
				670	mpool_init_with_fallback(&local_page_pool, &api_page_pool);
				671
				672	/* Take memory ownership away from the VM and mark as shared. */
				673	if (!mm_vm_identity_map(
				674	&vm->ptable, pa_send_begin, pa_send_end,
				675	MM_MODE_UNOWNED \| MM_MODE_SHARED \| MM_MODE_R \| MM_MODE_W,
				676	NULL, &local_page_pool)) {
				677	goto fail_free_pool;
				678	}
				679
				680	if (!mm_vm_identity_map(&vm->ptable, pa_recv_begin, pa_recv_end,
				681	MM_MODE_UNOWNED \| MM_MODE_SHARED \| MM_MODE_R,
				682	NULL, &local_page_pool)) {
				683	/* TODO: partial defrag of failed range. */
				684	/* Recover any memory consumed in failed mapping. */
Andrew Scull	da3df7f	2019-01-05 17:49:27 +0000	[diff] [blame]	685	mm_vm_defrag(&vm->ptable, &local_page_pool);
Andrew Scull	220e621	2018-12-21 18:09:00 +0000	[diff] [blame]	686	goto fail_undo_send;
Andrew Scull	c9ccb3f	2018-08-13 15:27:12 +0100	[diff] [blame]	687	}
				688
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	689	/* Map the send page as read-only in the hypervisor address space. */
Wedson Almeida Filho	22d5eaa	2018-12-16 00:38:49 +0000	[diff] [blame]	690	vm->mailbox.send = mm_identity_map(pa_send_begin, pa_send_end,
Andrew Scull	220e621	2018-12-21 18:09:00 +0000	[diff] [blame]	691	MM_MODE_R, &local_page_pool);
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	692	if (!vm->mailbox.send) {
Andrew Scull	220e621	2018-12-21 18:09:00 +0000	[diff] [blame]	693	/* TODO: partial defrag of failed range. */
				694	/* Recover any memory consumed in failed mapping. */
				695	mm_defrag(&local_page_pool);
				696	goto fail_undo_send_and_recv;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	697	}
				698
				699	/*
				700	* Map the receive page as writable in the hypervisor address space. On
				701	* failure, unmap the send page before returning.
				702	*/
Wedson Almeida Filho	22d5eaa	2018-12-16 00:38:49 +0000	[diff] [blame]	703	vm->mailbox.recv = mm_identity_map(pa_recv_begin, pa_recv_end,
Andrew Scull	220e621	2018-12-21 18:09:00 +0000	[diff] [blame]	704	MM_MODE_W, &local_page_pool);
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	705	if (!vm->mailbox.recv) {
Andrew Scull	220e621	2018-12-21 18:09:00 +0000	[diff] [blame]	706	/* TODO: partial defrag of failed range. */
				707	/* Recover any memory consumed in failed mapping. */
				708	mm_defrag(&local_page_pool);
				709	goto fail_undo_all;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	710	}
				711
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	712	/* Tell caller about waiters, if any. */
				713	ret = api_waiter_result(locked, current, next);
Andrew Scull	220e621	2018-12-21 18:09:00 +0000	[diff] [blame]	714	goto exit;
				715
				716	/*
				717	* The following mappings will not require more memory than is available
				718	* in the local pool.
				719	*/
				720	fail_undo_all:
				721	vm->mailbox.send = NULL;
Andrew Scull	da24197	2019-01-05 18:17:48 +0000	[diff] [blame]	722	mm_unmap(pa_send_begin, pa_send_end, &local_page_pool);
Andrew Scull	220e621	2018-12-21 18:09:00 +0000	[diff] [blame]	723
				724	fail_undo_send_and_recv:
				725	mm_vm_identity_map(&vm->ptable, pa_recv_begin, pa_recv_end,
				726	orig_recv_mode, NULL, &local_page_pool);
				727
				728	fail_undo_send:
				729	mm_vm_identity_map(&vm->ptable, pa_send_begin, pa_send_end,
				730	orig_send_mode, NULL, &local_page_pool);
				731
				732	fail_free_pool:
				733	mpool_fini(&local_page_pool);
				734
				735	fail:
				736	ret = -1;
				737
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	738	exit:
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	739	vm_unlock(&locked);
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	740
				741	return ret;
				742	}
				743
				744	/**
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	745	* Copies data from the sender's send buffer to the recipient's receive buffer
				746	* and notifies the recipient.
Wedson Almeida Filho	17c997f	2019-01-09 18:50:09 +0000	[diff] [blame]	747	*
				748	* If the recipient's receive buffer is busy, it can optionally register the
				749	* caller to be notified when the recipient's receive buffer becomes available.
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	750	*/
Wedson Almeida Filho	17c997f	2019-01-09 18:50:09 +0000	[diff] [blame]	751	int64_t api_mailbox_send(uint32_t vm_id, size_t size, bool notify,
				752	struct vcpu current, struct vcpu *next)
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	753	{
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	754	struct vm *from = current->vm;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	755	struct vm *to;
				756	const void *from_buf;
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	757	uint16_t vcpu;
Andrew Scull	c0e569a	2018-10-02 18:05:21 +0100	[diff] [blame]	758	int64_t ret;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	759
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	760	/* Limit the size of transfer. */
				761	if (size > HF_MAILBOX_SIZE) {
Andrew Scull	1950326	2018-09-20 14:48:39 +0100	[diff] [blame]	762	return -1;
				763	}
				764
				765	/* Disallow reflexive requests as this suggests an error in the VM. */
				766	if (vm_id == from->id) {
				767	return -1;
				768	}
				769
				770	/* Ensure the target VM exists. */
				771	to = vm_get(vm_id);
				772	if (to == NULL) {
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	773	return -1;
				774	}
				775
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	776	/*
				777	* Check that the sender has configured its send buffer. It is safe to
				778	* use from_buf after releasing the lock because the buffer cannot be
				779	* modified once it's configured.
				780	*/
				781	sl_lock(&from->lock);
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	782	from_buf = from->mailbox.send;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	783	sl_unlock(&from->lock);
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	784	if (from_buf == NULL) {
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	785	return -1;
				786	}
				787
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	788	sl_lock(&to->lock);
				789
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	790	if (to->mailbox.state != mailbox_state_empty \|\|
				791	to->mailbox.recv == NULL) {
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	792	/*
				793	* Fail if the target isn't currently ready to receive data,
				794	* setting up for notification if requested.
				795	*/
				796	if (notify) {
Wedson Almeida Filho	b790f65	2019-01-22 23:41:56 +0000	[diff] [blame]	797	struct wait_entry *entry =
				798	&current->vm->wait_entries[vm_id];
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	799
				800	/* Append waiter only if it's not there yet. */
				801	if (list_empty(&entry->wait_links)) {
				802	list_append(&to->mailbox.waiter_list,
				803	&entry->wait_links);
				804	}
				805	}
				806
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	807	ret = -1;
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	808	goto out;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	809	}
				810
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	811	/* Copy data. */
				812	memcpy(to->mailbox.recv, from_buf, size);
				813	to->mailbox.recv_bytes = size;
				814	to->mailbox.recv_from_id = from->id;
				815	to->mailbox.state = mailbox_state_read;
				816
				817	/* Messages for the primary VM are delivered directly. */
				818	if (to->id == HF_PRIMARY_VM_ID) {
Wedson Almeida Filho	80eb4a3	2018-11-30 17:11:15 +0000	[diff] [blame]	819	struct hf_vcpu_run_return primary_ret = {
				820	.code = HF_VCPU_RUN_MESSAGE,
				821	.message.size = size,
				822	};
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	823
Wedson Almeida Filho	ba641ef	2018-12-03 04:19:44 +0000	[diff] [blame]	824	*next = api_switch_to_primary(current, primary_ret,
				825	vcpu_state_ready);
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	826	ret = 0;
				827	goto out;
				828	}
				829
				830	/*
				831	* Try to find a vcpu to handle the message and tell the scheduler to
				832	* run it.
				833	*/
				834	if (to->mailbox.recv_waiter == NULL) {
				835	/*
				836	* The scheduler must choose a vcpu to interrupt so it can
				837	* handle the message.
				838	*/
				839	to->mailbox.state = mailbox_state_received;
				840	vcpu = HF_INVALID_VCPU;
				841	} else {
				842	struct vcpu *to_vcpu = to->mailbox.recv_waiter;
				843
				844	/*
Wedson Almeida Filho	80eb4a3	2018-11-30 17:11:15 +0000	[diff] [blame]	845	* Take target vcpu out of waiter list and mark it as ready to
				846	* run again.
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	847	*/
				848	sl_lock(&to_vcpu->lock);
				849	to->mailbox.recv_waiter = to_vcpu->mailbox_next;
				850	to_vcpu->state = vcpu_state_ready;
				851
				852	/* Return from HF_MAILBOX_RECEIVE. */
Wedson Almeida Filho	0330611	2018-11-26 00:08:03 +0000	[diff] [blame]	853	to_vcpu->retval.force = true;
				854	to_vcpu->retval.value = hf_mailbox_receive_return_encode(
				855	(struct hf_mailbox_receive_return){
				856	.vm_id = to->mailbox.recv_from_id,
				857	.size = size,
				858	});
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	859
				860	sl_unlock(&to_vcpu->lock);
				861
				862	vcpu = to_vcpu - to->vcpus;
				863	}
				864
				865	/* Return to the primary VM directly or with a switch. */
Wedson Almeida Filho	80eb4a3	2018-11-30 17:11:15 +0000	[diff] [blame]	866	if (from->id == HF_PRIMARY_VM_ID) {
				867	ret = vcpu;
				868	} else {
				869	struct hf_vcpu_run_return primary_ret = {
				870	.code = HF_VCPU_RUN_WAKE_UP,
				871	.wake_up.vm_id = to->id,
				872	.wake_up.vcpu = vcpu,
				873	};
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	874
Wedson Almeida Filho	ba641ef	2018-12-03 04:19:44 +0000	[diff] [blame]	875	*next = api_switch_to_primary(current, primary_ret,
				876	vcpu_state_ready);
Wedson Almeida Filho	80eb4a3	2018-11-30 17:11:15 +0000	[diff] [blame]	877	ret = 0;
				878	}
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	879
				880	out:
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	881	sl_unlock(&to->lock);
				882
Wedson Almeida Filho	80eb4a3	2018-11-30 17:11:15 +0000	[diff] [blame]	883	return ret;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	884	}
				885
				886	/**
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	887	* Receives a message from the mailbox. If one isn't available, this function
				888	* can optionally block the caller until one becomes available.
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	889	*
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	890	* No new messages can be received until the mailbox has been cleared.
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	891	*/
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	892	struct hf_mailbox_receive_return api_mailbox_receive(bool block,
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	893	struct vcpu *current,
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	894	struct vcpu **next)
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	895	{
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	896	struct vm *vm = current->vm;
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	897	struct hf_mailbox_receive_return ret = {
				898	.vm_id = HF_INVALID_VM_ID,
				899	};
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	900
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	901	/*
				902	* The primary VM will receive messages as a status code from running
				903	* vcpus and must not call this function.
				904	*/
Andrew Scull	1950326	2018-09-20 14:48:39 +0100	[diff] [blame]	905	if (vm->id == HF_PRIMARY_VM_ID) {
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	906	return ret;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	907	}
				908
				909	sl_lock(&vm->lock);
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	910
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	911	/* Return pending messages without blocking. */
				912	if (vm->mailbox.state == mailbox_state_received) {
				913	vm->mailbox.state = mailbox_state_read;
Andrew Scull	6d2db33	2018-10-10 15:28:17 +0100	[diff] [blame]	914	ret.vm_id = vm->mailbox.recv_from_id;
				915	ret.size = vm->mailbox.recv_bytes;
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	916	goto out;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	917	}
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	918
				919	/* No pending message so fail if not allowed to block. */
				920	if (!block) {
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	921	goto out;
				922	}
				923
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	924	sl_lock(&current->lock);
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	925
				926	/* Push vcpu into waiter list. */
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	927	current->mailbox_next = vm->mailbox.recv_waiter;
				928	vm->mailbox.recv_waiter = current;
				929	sl_unlock(&current->lock);
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	930
				931	/* Switch back to primary vm to block. */
Andrew Walbran	b481655	2018-12-05 17:35:42 +0000	[diff] [blame]	932	{
				933	struct hf_vcpu_run_return run_return = {
				934	.code = HF_VCPU_RUN_WAIT_FOR_INTERRUPT,
				935	};
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	936
Andrew Walbran	b481655	2018-12-05 17:35:42 +0000	[diff] [blame]	937	*next = api_switch_to_primary(current, run_return,
				938	vcpu_state_blocked_mailbox);
				939	}
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	940	out:
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	941	sl_unlock(&vm->lock);
				942
				943	return ret;
				944	}
				945
				946	/**
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	947	* Retrieves the next VM whose mailbox became writable. For a VM to be notified
				948	* by this function, the caller must have called api_mailbox_send before with
				949	* the notify argument set to true, and this call must have failed because the
				950	* mailbox was not available.
				951	*
				952	* It should be called repeatedly to retrieve a list of VMs.
				953	*
				954	* Returns -1 if no VM became writable, or the id of the VM whose mailbox
				955	* became writable.
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	956	*/
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	957	int64_t api_mailbox_writable_get(const struct vcpu *current)
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	958	{
Wedson Almeida Filho	00df6c7	2018-10-18 11:19:24 +0100	[diff] [blame]	959	struct vm *vm = current->vm;
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	960	struct wait_entry *entry;
Andrew Scull	c0e569a	2018-10-02 18:05:21 +0100	[diff] [blame]	961	int64_t ret;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	962
				963	sl_lock(&vm->lock);
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	964	if (list_empty(&vm->mailbox.ready_list)) {
				965	ret = -1;
				966	goto exit;
				967	}
				968
				969	entry = CONTAINER_OF(vm->mailbox.ready_list.next, struct wait_entry,
				970	ready_links);
				971	list_remove(&entry->ready_links);
Wedson Almeida Filho	b790f65	2019-01-22 23:41:56 +0000	[diff] [blame]	972	ret = entry - vm->wait_entries;
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	973
				974	exit:
				975	sl_unlock(&vm->lock);
				976	return ret;
				977	}
				978
				979	/**
				980	* Retrieves the next VM waiting to be notified that the mailbox of the
				981	* specified VM became writable. Only primary VMs are allowed to call this.
				982	*
Wedson Almeida Filho	b790f65	2019-01-22 23:41:56 +0000	[diff] [blame]	983	* Returns -1 on failure or if there are no waiters; the VM id of the next
				984	* waiter otherwise.
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	985	*/
				986	int64_t api_mailbox_waiter_get(uint32_t vm_id, const struct vcpu *current)
				987	{
				988	struct vm *vm;
				989	struct vm_locked locked;
				990	struct wait_entry *entry;
				991	struct vm *waiting_vm;
				992
				993	/* Only primary VMs are allowed to call this function. */
				994	if (current->vm->id != HF_PRIMARY_VM_ID) {
				995	return -1;
				996	}
				997
				998	vm = vm_get(vm_id);
				999	if (vm == NULL) {
				1000	return -1;
				1001	}
				1002
				1003	/* Check if there are outstanding notifications from given vm. */
				1004	vm_lock(vm, &locked);
				1005	entry = api_fetch_waiter(locked);
				1006	vm_unlock(&locked);
				1007
				1008	if (entry == NULL) {
				1009	return -1;
				1010	}
				1011
				1012	/* Enqueue notification to waiting VM. */
				1013	waiting_vm = entry->waiting_vm;
				1014
				1015	sl_lock(&waiting_vm->lock);
				1016	if (list_empty(&entry->ready_links)) {
				1017	list_append(&waiting_vm->mailbox.ready_list,
				1018	&entry->ready_links);
				1019	}
				1020	sl_unlock(&waiting_vm->lock);
				1021
				1022	return waiting_vm->id;
				1023	}
				1024
				1025	/**
				1026	* Clears the caller's mailbox so that a new message can be received. The caller
				1027	* must have copied out all data they wish to preserve as new messages will
				1028	* overwrite the old and will arrive asynchronously.
				1029	*
				1030	* Returns:
				1031	* - -1 on failure, if the mailbox hasn't been read or is already empty.
				1032	* - 0 on success if no further action is needed.
				1033	* - 1 if it was called by the primary VM and the primary VM now needs to wake
				1034	* up or kick waiters. Waiters should be retrieved by calling
				1035	* hf_mailbox_waiter_get.
				1036	*/
				1037	int64_t api_mailbox_clear(struct vcpu current, struct vcpu *next)
				1038	{
				1039	struct vm *vm = current->vm;
				1040	struct vm_locked locked;
				1041	int64_t ret;
				1042
				1043	vm_lock(vm, &locked);
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	1044	if (vm->mailbox.state == mailbox_state_read) {
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	1045	ret = api_waiter_result(locked, current, next);
Andrew Scull	aa039b3	2018-10-04 15:02:26 +0100	[diff] [blame]	1046	vm->mailbox.state = mailbox_state_empty;
				1047	} else {
				1048	ret = -1;
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	1049	}
Wedson Almeida Filho	ea62e2e	2019-01-09 19:14:59 +0000	[diff] [blame]	1050	vm_unlock(&locked);
Wedson Almeida Filho	2f94ec1	2018-07-26 16:00:48 +0100	[diff] [blame]	1051
				1052	return ret;
Wedson Almeida Filho	3fcbcff	2018-07-10 23:53:39 +0100	[diff] [blame]	1053	}
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1054
				1055	/**
				1056	* Enables or disables a given interrupt ID for the calling vCPU.
				1057	*
				1058	* Returns 0 on success, or -1 if the intid is invalid.
				1059	*/
Wedson Almeida Filho	c559d13	2019-01-09 19:33:40 +0000	[diff] [blame]	1060	int64_t api_interrupt_enable(uint32_t intid, bool enable, struct vcpu *current)
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1061	{
				1062	uint32_t intid_index = intid / INTERRUPT_REGISTER_BITS;
				1063	uint32_t intid_mask = 1u << (intid % INTERRUPT_REGISTER_BITS);
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	1064
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1065	if (intid >= HF_NUM_INTIDS) {
				1066	return -1;
				1067	}
				1068
				1069	sl_lock(&current->lock);
				1070	if (enable) {
Andrew Walbran	3d84a26	2018-12-13 14:41:19 +0000	[diff] [blame]	1071	/*
				1072	* If it is pending and was not enabled before, increment the
				1073	* count.
				1074	*/
				1075	if (current->interrupts.interrupt_pending[intid_index] &
				1076	~current->interrupts.interrupt_enabled[intid_index] &
				1077	intid_mask) {
				1078	current->interrupts.enabled_and_pending_count++;
				1079	}
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1080	current->interrupts.interrupt_enabled[intid_index] \|=
				1081	intid_mask;
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1082	} else {
Andrew Walbran	3d84a26	2018-12-13 14:41:19 +0000	[diff] [blame]	1083	/*
				1084	* If it is pending and was enabled before, decrement the count.
				1085	*/
				1086	if (current->interrupts.interrupt_pending[intid_index] &
				1087	current->interrupts.interrupt_enabled[intid_index] &
				1088	intid_mask) {
				1089	current->interrupts.enabled_and_pending_count--;
				1090	}
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1091	current->interrupts.interrupt_enabled[intid_index] &=
				1092	~intid_mask;
				1093	}
				1094
				1095	sl_unlock(&current->lock);
				1096	return 0;
				1097	}
				1098
				1099	/**
				1100	* Returns the ID of the next pending interrupt for the calling vCPU, and
				1101	* acknowledges it (i.e. marks it as no longer pending). Returns
				1102	* HF_INVALID_INTID if there are no pending interrupts.
				1103	*/
Wedson Almeida Filho	c559d13	2019-01-09 19:33:40 +0000	[diff] [blame]	1104	uint32_t api_interrupt_get(struct vcpu *current)
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1105	{
				1106	uint8_t i;
				1107	uint32_t first_interrupt = HF_INVALID_INTID;
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1108
				1109	/*
				1110	* Find the first enabled and pending interrupt ID, return it, and
				1111	* deactivate it.
				1112	*/
				1113	sl_lock(&current->lock);
				1114	for (i = 0; i < HF_NUM_INTIDS / INTERRUPT_REGISTER_BITS; ++i) {
				1115	uint32_t enabled_and_pending =
				1116	current->interrupts.interrupt_enabled[i] &
				1117	current->interrupts.interrupt_pending[i];
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	1118
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1119	if (enabled_and_pending != 0) {
Andrew Walbran	3d84a26	2018-12-13 14:41:19 +0000	[diff] [blame]	1120	uint8_t bit_index = ctz(enabled_and_pending);
				1121	/*
				1122	* Mark it as no longer pending and decrement the count.
				1123	*/
				1124	current->interrupts.interrupt_pending[i] &=
				1125	~(1u << bit_index);
				1126	current->interrupts.enabled_and_pending_count--;
				1127	first_interrupt =
				1128	i * INTERRUPT_REGISTER_BITS + bit_index;
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1129	break;
				1130	}
				1131	}
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1132
				1133	sl_unlock(&current->lock);
				1134	return first_interrupt;
				1135	}
				1136
				1137	/**
Andrew Walbran	4cf217a	2018-12-14 15:24:50 +0000	[diff] [blame]	1138	* Returns whether the current vCPU is allowed to inject an interrupt into the
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1139	* given VM and vCPU.
				1140	*/
				1141	static inline bool is_injection_allowed(uint32_t target_vm_id,
				1142	struct vcpu *current)
				1143	{
				1144	uint32_t current_vm_id = current->vm->id;
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	1145
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1146	/*
				1147	* The primary VM is allowed to inject interrupts into any VM. Secondary
				1148	* VMs are only allowed to inject interrupts into their own vCPUs.
				1149	*/
				1150	return current_vm_id == HF_PRIMARY_VM_ID \|\|
				1151	current_vm_id == target_vm_id;
				1152	}
				1153
				1154	/**
				1155	* Injects a virtual interrupt of the given ID into the given target vCPU.
				1156	* This doesn't cause the vCPU to actually be run immediately; it will be taken
				1157	* when the vCPU is next run, which is up to the scheduler.
				1158	*
Andrew Walbran	3d84a26	2018-12-13 14:41:19 +0000	[diff] [blame]	1159	* Returns:
				1160	* - -1 on failure because the target VM or vCPU doesn't exist, the interrupt
				1161	* ID is invalid, or the current VM is not allowed to inject interrupts to
				1162	* the target VM.
				1163	* - 0 on success if no further action is needed.
				1164	* - 1 if it was called by the primary VM and the primary VM now needs to wake
				1165	* up or kick the target vCPU.
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1166	*/
Wedson Almeida Filho	c559d13	2019-01-09 19:33:40 +0000	[diff] [blame]	1167	int64_t api_interrupt_inject(uint32_t target_vm_id, uint32_t target_vcpu_idx,
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1168	uint32_t intid, struct vcpu *current,
				1169	struct vcpu **next)
				1170	{
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1171	struct vcpu *target_vcpu;
				1172	struct vm *target_vm = vm_get(target_vm_id);
				1173
				1174	if (intid >= HF_NUM_INTIDS) {
				1175	return -1;
				1176	}
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	1177
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1178	if (target_vm == NULL) {
				1179	return -1;
				1180	}
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	1181
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1182	if (target_vcpu_idx >= target_vm->vcpu_count) {
				1183	/* The requested vcpu must exist. */
				1184	return -1;
				1185	}
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	1186
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1187	if (!is_injection_allowed(target_vm_id, current)) {
				1188	return -1;
				1189	}
Wedson Almeida Filho	81568c4	2019-01-04 13:33:02 +0000	[diff] [blame]	1190
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1191	target_vcpu = &target_vm->vcpus[target_vcpu_idx];
				1192
				1193	dlog("Injecting IRQ %d for VM %d VCPU %d from VM %d VCPU %d\n", intid,
				1194	target_vm_id, target_vcpu_idx, current->vm->id, current->cpu->id);
Andrew Walbran	508e63c	2018-12-20 17:02:37 +0000	[diff] [blame]	1195	return internal_interrupt_inject(target_vm, target_vcpu, intid, current,
				1196	next);
Andrew Walbran	318f573	2018-11-20 16:23:42 +0000	[diff] [blame]	1197	}
Andrew Scull	6386f25	2018-12-06 13:29:10 +0000	[diff] [blame]	1198
				1199	/**
				1200	* Clears a region of physical memory by overwriting it with zeros. The data is
				1201	* flushed from the cache so the memory has been cleared across the system.
				1202	*/
				1203	static bool api_clear_memory(paddr_t begin, paddr_t end, struct mpool *ppool)
				1204	{
				1205	/*
				1206	* TODO: change this to a cpu local single page window rather than a
				1207	* global mapping of the whole range. Such an approach will limit
				1208	* the changes to stage-1 tables and will allow only local
				1209	* invalidation.
				1210	*/
				1211	void *ptr = mm_identity_map(begin, end, MM_MODE_W, ppool);
				1212	size_t size = pa_addr(end) - pa_addr(begin);
				1213
				1214	if (!ptr) {
				1215	/* TODO: partial defrag of failed range. */
				1216	/* Recover any memory consumed in failed mapping. */
				1217	mm_defrag(ppool);
				1218	return false;
				1219	}
				1220
				1221	memset(ptr, 0, size);
				1222	arch_mm_write_back_dcache(ptr, size);
				1223	mm_unmap(begin, end, ppool);
				1224
				1225	return true;
				1226	}
				1227
				1228	/**
				1229	* Shares memory from the calling VM with another. The memory can be shared in
				1230	* different modes.
				1231	*
				1232	* TODO: the interface for sharing memory will need to be enhanced to allow
				1233	* sharing with different modes e.g. read-only, informing the recipient
				1234	* of the memory they have been given, opting to not wipe the memory and
				1235	* possibly allowing multiple blocks to be transferred. What this will
				1236	* look like is TBD.
				1237	*/
				1238	int64_t api_share_memory(uint32_t vm_id, ipaddr_t addr, size_t size,
				1239	enum hf_share share, struct vcpu *current)
				1240	{
				1241	struct vm *from = current->vm;
				1242	struct vm *to;
				1243	int orig_from_mode;
				1244	int from_mode;
				1245	int to_mode;
				1246	ipaddr_t begin;
				1247	ipaddr_t end;
				1248	paddr_t pa_begin;
				1249	paddr_t pa_end;
				1250	struct mpool local_page_pool;
				1251	int64_t ret;
				1252
				1253	/* Disallow reflexive shares as this suggests an error in the VM. */
				1254	if (vm_id == from->id) {
				1255	return -1;
				1256	}
				1257
				1258	/* Ensure the target VM exists. */
				1259	to = vm_get(vm_id);
				1260	if (to == NULL) {
				1261	return -1;
				1262	}
				1263
				1264	begin = addr;
				1265	end = ipa_add(addr, size);
				1266
				1267	/* Fail if addresses are not page-aligned. */
				1268	if ((ipa_addr(begin) & (PAGE_SIZE - 1)) \|\|
				1269	(ipa_addr(end) & (PAGE_SIZE - 1))) {
				1270	return -1;
				1271	}
				1272
				1273	/* Convert the sharing request to memory management modes. */
				1274	switch (share) {
				1275	case HF_MEMORY_GIVE:
				1276	from_mode = MM_MODE_INVALID \| MM_MODE_UNOWNED;
				1277	to_mode = MM_MODE_R \| MM_MODE_W \| MM_MODE_X;
				1278	break;
				1279
				1280	case HF_MEMORY_LEND:
				1281	from_mode = MM_MODE_INVALID;
				1282	to_mode = MM_MODE_R \| MM_MODE_W \| MM_MODE_X \| MM_MODE_UNOWNED;
				1283	break;
				1284
				1285	case HF_MEMORY_SHARE:
				1286	from_mode = MM_MODE_R \| MM_MODE_W \| MM_MODE_X \| MM_MODE_SHARED;
				1287	to_mode = MM_MODE_R \| MM_MODE_W \| MM_MODE_X \| MM_MODE_UNOWNED \|
				1288	MM_MODE_SHARED;
				1289	break;
				1290
				1291	default:
				1292	/* The input is untrusted so might not be a valid value. */
				1293	return -1;
				1294	}
				1295
				1296	/*
				1297	* Create a local pool so any freed memory can't be used by another
				1298	* thread. This is to ensure the original mapping can be restored if any
				1299	* stage of the process fails.
				1300	*/
				1301	mpool_init_with_fallback(&local_page_pool, &api_page_pool);
				1302
				1303	sl_lock_both(&from->lock, &to->lock);
				1304
				1305	/*
				1306	* Ensure that the memory range is mapped with the same mode so that
				1307	* changes can be reverted if the process fails.
				1308	*/
				1309	if (!mm_vm_get_mode(&from->ptable, begin, end, &orig_from_mode)) {
				1310	goto fail;
				1311	}
				1312
				1313	/*
				1314	* Ensure the memory range is valid for the sender. If it isn't, the
				1315	* sender has either shared it with another VM already or has no claim
				1316	* to the memory.
				1317	*/
				1318	if (orig_from_mode & MM_MODE_INVALID) {
				1319	goto fail;
				1320	}
				1321
				1322	/*
				1323	* The sender must own the memory and have exclusive access to it in
				1324	* order to share it. Alternatively, it is giving memory back to the
				1325	* owning VM.
				1326	*/
				1327	if (orig_from_mode & MM_MODE_UNOWNED) {
				1328	int orig_to_mode;
				1329
				1330	if (share != HF_MEMORY_GIVE \|\|
				1331	!mm_vm_get_mode(&to->ptable, begin, end, &orig_to_mode) \|\|
				1332	orig_to_mode & MM_MODE_UNOWNED) {
				1333	goto fail;
				1334	}
				1335	} else if (orig_from_mode & MM_MODE_SHARED) {
				1336	goto fail;
				1337	}
				1338
				1339	pa_begin = pa_from_ipa(begin);
				1340	pa_end = pa_from_ipa(end);
				1341
				1342	/*
				1343	* First update the mapping for the sender so there is not overlap with
				1344	* the recipient.
				1345	*/
				1346	if (!mm_vm_identity_map(&from->ptable, pa_begin, pa_end, from_mode,
				1347	NULL, &local_page_pool)) {
				1348	goto fail;
				1349	}
				1350
				1351	/* Clear the memory so no VM or device can see the previous contents. */
				1352	if (!api_clear_memory(pa_begin, pa_end, &local_page_pool)) {
				1353	goto fail_return_to_sender;
				1354	}
				1355
				1356	/* Complete the transfer by mapping the memory into the recipient. */
				1357	if (!mm_vm_identity_map(&to->ptable, pa_begin, pa_end, to_mode, NULL,
				1358	&local_page_pool)) {
				1359	/* TODO: partial defrag of failed range. */
				1360	/* Recover any memory consumed in failed mapping. */
				1361	mm_vm_defrag(&from->ptable, &local_page_pool);
				1362	goto fail_return_to_sender;
				1363	}
				1364
				1365	ret = 0;
				1366	goto out;
				1367
				1368	fail_return_to_sender:
				1369	mm_vm_identity_map(&from->ptable, pa_begin, pa_end, orig_from_mode,
				1370	NULL, &local_page_pool);
				1371
				1372	fail:
				1373	ret = -1;
				1374
				1375	out:
				1376	sl_unlock(&from->lock);
				1377	sl_unlock(&to->lock);
				1378
				1379	mpool_fini(&local_page_pool);
				1380
				1381	return ret;
				1382	}