Implement SPCI pull model for memory sharing. Bug: 132420445 Change-Id: Iac21d7949bc54da13a4f25a317f5e83b2727638c

commit: 5de9c3d6d7fb644cfc8b0bf18deb32a43cc96288 [log] [tgz]
author: Andrew Walbran <qwandor@google.com> Mon Feb 10 13:35:29 2020 +0000
committer: Andrew Walbran <qwandor@google.com> Tue Apr 07 18:03:25 2020 +0100
tree: b1262fe1e51c20c626ae17eeadcd0c3dcf6b686d
parent: 933fa55b88a868cc2c035906ae8a57a6889830cc [diff]
diff --git a/src/BUILD.gn b/src/BUILD.gn
index 04e0923..3220b4e 100644
--- a/src/BUILD.gn
+++ b/src/BUILD.gn

@@ -71,6 +71,7 @@
     ":std",
     ":string",
     "//src/arch/${plat_arch}/hypervisor",
+    "//vmlib",
     plat_boot_flow,
     plat_console,
     plat_iommu,

diff --git a/src/api.c b/src/api.c
index 39da4d8..981fd5d 100644
--- a/src/api.c
+++ b/src/api.c

@@ -49,6 +49,11 @@
 	      "Currently, a page is mapped for the send and receive buffers so "
 	      "the maximum request is the size of a page.");
 
+static_assert(MM_PPOOL_ENTRY_SIZE >= HF_MAILBOX_SIZE,
+	      "The page pool entry size must be at least as big as the mailbox "
+	      "size, so that memory region descriptors can be copied from the "
+	      "mailbox for memory sharing.");
+
 static struct mpool api_page_pool;
 
 /**
@@ -380,7 +385,6 @@
 	case SPCI_MEM_DONATE_32:
 	case SPCI_MEM_LEND_32:
 	case SPCI_MEM_SHARE_32:
-	case HF_SPCI_MEM_RELINQUISH:
 		return (struct spci_value){.func = receiver->mailbox.recv_func,
 					   .arg3 = receiver->mailbox.recv_size,
 					   .arg4 = receiver->mailbox.recv_size};
@@ -1453,9 +1457,7 @@
 	struct vm *from = current->vm;
 	struct vm *to;
 	const void *from_msg;
-	uint32_t message_buffer_size;
 	struct spci_memory_region *memory_region;
-	struct two_vm_locked vm_to_from_lock;
 	struct spci_value ret;
 
 	if (ipa_addr(address) != 0 || page_count != 0) {
@@ -1486,25 +1488,36 @@
 	}
 
 	/*
-	 * Copy the memory region descriptor to an internal buffer, so that the
-	 * sender can't change it underneath us.
+	 * Copy the memory region descriptor to a fresh page from the memory
+	 * pool. This prevents the sender from changing it underneath us, and
+	 * also lets us keep it around in the share state table if needed.
 	 */
-	memory_region =
-		(struct spci_memory_region *)cpu_get_buffer(current->cpu);
-	message_buffer_size = cpu_get_buffer_size(current->cpu);
-	if (length > HF_MAILBOX_SIZE || length > message_buffer_size) {
+	if (length > HF_MAILBOX_SIZE || length > MM_PPOOL_ENTRY_SIZE) {
 		return spci_error(SPCI_INVALID_PARAMETERS);
 	}
-	memcpy_s(memory_region, message_buffer_size, from_msg, length);
+	memory_region =
+		(struct spci_memory_region *)mpool_alloc(&api_page_pool);
+	if (memory_region == NULL) {
+		dlog_verbose("Failed to allocate memory region copy.\n");
+		return spci_error(SPCI_NO_MEMORY);
+	}
+	memcpy_s(memory_region, MM_PPOOL_ENTRY_SIZE, from_msg, length);
 
 	/* The sender must match the caller. */
 	if (memory_region->sender != from->id) {
-		return spci_error(SPCI_INVALID_PARAMETERS);
+		dlog_verbose("Memory region sender doesn't match caller.\n");
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
 	}
 
 	if (memory_region->attribute_count != 1) {
 		/* Hafnium doesn't support multi-way memory sharing for now. */
-		return spci_error(SPCI_NOT_SUPPORTED);
+		dlog_verbose(
+			"Multi-way memory sharing not supported (got %d "
+			"attribute descriptors, expected 0).\n",
+			memory_region->attribute_count);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
 	}
 
 	/*
@@ -1512,41 +1525,219 @@
 	 */
 	to = vm_find(memory_region->attributes[0].receiver);
 	if (to == NULL || to == from) {
+		dlog_verbose("Invalid receiver.\n");
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	if (to->id == HF_TEE_VM_ID) {
+		/*
+		 * The 'to' VM lock is only needed in the case that it is the
+		 * TEE VM.
+		 */
+		struct two_vm_locked vm_to_from_lock = vm_lock_both(to, from);
+
+		if (msg_receiver_busy(vm_to_from_lock.vm1, from, false)) {
+			ret = spci_error(SPCI_BUSY);
+			goto out_unlock;
+		}
+
+		ret = spci_memory_send(to, vm_to_from_lock.vm2, memory_region,
+				       length, share_func, &api_page_pool);
+		/*
+		 * spci_memory_send takes ownership of the memory_region, so
+		 * make sure we don't free it.
+		 */
+		memory_region = NULL;
+
+		if (ret.func == SPCI_SUCCESS_32 && to->id == HF_TEE_VM_ID) {
+			/* Forward memory send message on to TEE. */
+			memcpy_s(to->mailbox.recv, SPCI_MSG_PAYLOAD_MAX,
+				 memory_region, length);
+			mpool_free(&api_page_pool, memory_region);
+			memory_region = NULL;
+			to->mailbox.recv_size = length;
+			to->mailbox.recv_sender = from->id;
+			to->mailbox.recv_func = share_func;
+			ret = deliver_msg(vm_to_from_lock.vm1, from->id,
+					  current, next);
+		}
+
+	out_unlock:
+		vm_unlock(&vm_to_from_lock.vm1);
+		vm_unlock(&vm_to_from_lock.vm2);
+	} else {
+		struct vm_locked from_locked = vm_lock(from);
+
+		ret = spci_memory_send(to, from_locked, memory_region, length,
+				       share_func, &api_page_pool);
+		/*
+		 * spci_memory_send takes ownership of the memory_region, so
+		 * make sure we don't free it.
+		 */
+		memory_region = NULL;
+
+		vm_unlock(&from_locked);
+	}
+
+out:
+	if (memory_region != NULL) {
+		mpool_free(&api_page_pool, memory_region);
+	}
+
+	return ret;
+}
+
+struct spci_value api_spci_mem_retrieve_req(ipaddr_t address,
+					    uint32_t page_count,
+					    uint32_t fragment_length,
+					    uint32_t length, uint32_t cookie,
+					    struct vcpu *current)
+{
+	struct vm *to = current->vm;
+	struct vm_locked to_locked;
+	const void *to_msg;
+	struct spci_memory_retrieve_request *retrieve_request;
+	uint32_t message_buffer_size;
+	struct spci_value ret;
+
+	if (ipa_addr(address) != 0 || page_count != 0) {
+		/*
+		 * Hafnium only supports passing the descriptor in the TX
+		 * mailbox.
+		 */
 		return spci_error(SPCI_INVALID_PARAMETERS);
 	}
 
-	vm_to_from_lock = vm_lock_both(to, from);
+	if (fragment_length == length && cookie != 0) {
+		/* Cookie is only allowed if there are multiple fragments. */
+		dlog_verbose("Unexpected cookie %d.\n", cookie);
+		return spci_error(SPCI_INVALID_PARAMETERS);
+	}
 
-	if (msg_receiver_busy(vm_to_from_lock.vm1, from, false)) {
+	retrieve_request =
+		(struct spci_memory_retrieve_request *)cpu_get_buffer(
+			current->cpu);
+	message_buffer_size = cpu_get_buffer_size(current->cpu);
+	if (length > HF_MAILBOX_SIZE || length > message_buffer_size) {
+		dlog_verbose("Retrieve request too long.\n");
+		return spci_error(SPCI_INVALID_PARAMETERS);
+	}
+
+	to_locked = vm_lock(to);
+	to_msg = to->mailbox.send;
+
+	if (to_msg == NULL) {
+		dlog_verbose("TX buffer not setup.\n");
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	/*
+	 * Copy the retrieve request descriptor to an internal buffer, so that
+	 * the caller can't change it underneath us.
+	 */
+	memcpy_s(retrieve_request, message_buffer_size, to_msg, length);
+
+	if (msg_receiver_busy(to_locked, NULL, false)) {
+		/*
+		 * Can't retrieve memory information if the mailbox is not
+		 * available.
+		 */
+		dlog_verbose("RX buffer not ready.\n");
 		ret = spci_error(SPCI_BUSY);
 		goto out;
 	}
 
-	ret = spci_memory_send(vm_to_from_lock.vm1, vm_to_from_lock.vm2,
-			       memory_region, length, share_func,
-			       &api_page_pool);
-
-	if (ret.func == SPCI_SUCCESS_32) {
-		/* Copy data to the destination Rx. */
-		/*
-		 * TODO: Translate the <from> IPA addresses to <to> IPA
-		 * addresses. Currently we assume identity mapping of the stage
-		 * 2 translation. Removing this assumption relies on a mechanism
-		 * to handle scenarios where the memory region fits in the
-		 * source Tx buffer but cannot fit in the destination Rx buffer.
-		 * This mechanism will be defined at the spec level.
-		 */
-		memcpy_s(to->mailbox.recv, SPCI_MSG_PAYLOAD_MAX, memory_region,
-			 length);
-		to->mailbox.recv_size = length;
-		to->mailbox.recv_sender = from->id;
-		to->mailbox.recv_func = share_func;
-		ret = deliver_msg(vm_to_from_lock.vm1, from->id, current, next);
-	}
+	ret = spci_memory_retrieve(to_locked, retrieve_request, length,
+				   &api_page_pool);
 
 out:
-	vm_unlock(&vm_to_from_lock.vm1);
-	vm_unlock(&vm_to_from_lock.vm2);
+	vm_unlock(&to_locked);
+	return ret;
+}
+
+struct spci_value api_spci_mem_relinquish(struct vcpu *current)
+{
+	struct vm *from = current->vm;
+	struct vm_locked from_locked;
+	const void *from_msg;
+	struct spci_mem_relinquish *relinquish_request;
+	uint32_t message_buffer_size;
+	struct spci_value ret;
+	uint32_t length;
+
+	from_locked = vm_lock(from);
+	from_msg = from->mailbox.send;
+
+	if (from_msg == NULL) {
+		dlog_verbose("TX buffer not setup.\n");
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	/*
+	 * Calculate length from relinquish descriptor before copying. We will
+	 * check again later to make sure it hasn't changed.
+	 */
+	length = sizeof(struct spci_mem_relinquish) +
+		 ((struct spci_mem_relinquish *)from_msg)->endpoint_count *
+			 sizeof(spci_vm_id_t);
+	/*
+	 * Copy the relinquish descriptor to an internal buffer, so that the
+	 * caller can't change it underneath us.
+	 */
+	relinquish_request =
+		(struct spci_mem_relinquish *)cpu_get_buffer(current->cpu);
+	message_buffer_size = cpu_get_buffer_size(current->cpu);
+	if (length > HF_MAILBOX_SIZE || length > message_buffer_size) {
+		dlog_verbose("Relinquish message too long.\n");
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+	memcpy_s(relinquish_request, message_buffer_size, from_msg, length);
+
+	if (sizeof(struct spci_mem_relinquish) +
+		    relinquish_request->endpoint_count * sizeof(spci_vm_id_t) !=
+	    length) {
+		dlog_verbose(
+			"Endpoint count changed while copying to internal "
+			"buffer.\n");
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	ret = spci_memory_relinquish(from_locked, relinquish_request,
+				     &api_page_pool);
+
+out:
+	vm_unlock(&from_locked);
+	return ret;
+}
+
+struct spci_value api_spci_mem_reclaim(uint32_t handle, uint32_t flags,
+				       struct vcpu *current)
+{
+	struct vm *to = current->vm;
+	struct vm_locked to_locked;
+	struct spci_value ret;
+
+	to_locked = vm_lock(to);
+
+	if ((handle & SPCI_MEMORY_HANDLE_ALLOCATOR_MASK) ==
+	    SPCI_MEMORY_HANDLE_ALLOCATOR_HYPERVISOR) {
+		ret = spci_memory_reclaim(to_locked, handle,
+					  flags & SPCI_MEM_RECLAIM_CLEAR,
+					  &api_page_pool);
+	} else {
+		dlog_verbose(
+			"Tried to reclaim handle %#x not allocated by "
+			"hypervisor.\n",
+			handle);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+	}
+
+	vm_unlock(&to_locked);
 
 	return ret;
 }

diff --git a/src/arch/aarch64/hypervisor/handler.c b/src/arch/aarch64/hypervisor/handler.c
index b4646d3..4a4f035 100644
--- a/src/arch/aarch64/hypervisor/handler.c
+++ b/src/arch/aarch64/hypervisor/handler.c

@@ -366,11 +366,21 @@
 	case SPCI_MEM_DONATE_32:
 	case SPCI_MEM_LEND_32:
 	case SPCI_MEM_SHARE_32:
-	case HF_SPCI_MEM_RELINQUISH:
 		*args = api_spci_mem_send(func, ipa_init(args->arg1),
 					  args->arg2, args->arg3, args->arg4,
 					  args->arg5, current(), next);
 		return true;
+	case SPCI_MEM_RETRIEVE_REQ_32:
+		*args = api_spci_mem_retrieve_req(
+			ipa_init(args->arg1), args->arg2, args->arg3,
+			args->arg4, args->arg5, current());
+		return true;
+	case SPCI_MEM_RELINQUISH_32:
+		*args = api_spci_mem_relinquish(current());
+		return true;
+	case SPCI_MEM_RECLAIM_32:
+		*args = api_spci_mem_reclaim(args->arg1, args->arg2, current());
+		return true;
 	}
 
 	return false;

diff --git a/src/init.c b/src/init.c
index f944461..2f241b8 100644
--- a/src/init.c
+++ b/src/init.c

@@ -38,9 +38,7 @@
 
 #include "vmapi/hf/call.h"
 
-alignas(alignof(
-	struct mm_page_table)) char ptable_buf[sizeof(struct mm_page_table) *
-					       HEAP_PAGES];
+alignas(MM_PPOOL_ENTRY_SIZE) char ptable_buf[MM_PPOOL_ENTRY_SIZE * HEAP_PAGES];
 
 static struct mpool ppool;
 
@@ -58,7 +56,7 @@
 
 	dlog_notice("Initialising hafnium\n");
 
-	mpool_init(&ppool, sizeof(struct mm_page_table));
+	mpool_init(&ppool, MM_PPOOL_ENTRY_SIZE);
 	mpool_add_chunk(&ppool, ptable_buf, sizeof(ptable_buf));
 
 	if (!mm_init(&ppool)) {

diff --git a/src/spci_memory.c b/src/spci_memory.c
index afd6315..eeafc05 100644
--- a/src/spci_memory.c
+++ b/src/spci_memory.c

@@ -24,6 +24,16 @@
 #include "hf/std.h"
 #include "hf/vm.h"
 
+/** The maximum number of recipients a memory region may be sent to. */
+#define MAX_MEM_SHARE_RECIPIENTS 1
+
+/**
+ * The maximum number of memory sharing handles which may be active at once. A
+ * DONATE handle is active from when it is sent to when it is retrieved; a SHARE
+ * or LEND handle is active from when it is sent to when it is reclaimed.
+ */
+#define MAX_MEM_SHARES 100
+
 static_assert(sizeof(struct spci_memory_region_constituent) % 16 == 0,
 	      "struct spci_memory_region_constituent must be a multiple of 16 "
 	      "bytes long.");
@@ -32,14 +42,227 @@
 	      "bytes long.");
 static_assert(sizeof(struct spci_memory_region) % 16 == 0,
 	      "struct spci_memory_region must be a multiple of 16 bytes long.");
+static_assert(sizeof(struct spci_receiver_address_range) % 16 == 0,
+	      "struct spci_receiver_address_range must be a multiple of 16 "
+	      "bytes long.");
+static_assert(sizeof(struct spci_retrieved_memory_region) % 16 == 0,
+	      "struct spci_retrieved_memory_region must be a multiple of 16 "
+	      "bytes long.");
+static_assert(sizeof(struct spci_memory_retrieve_properties) % 16 == 0,
+	      "struct spci_memory_retrieve_properties must be a multiple of 16 "
+	      "bytes long.");
+static_assert(sizeof(struct spci_memory_retrieve_request) % 16 == 0,
+	      "struct spci_memory_retrieve_request must be a multiple of 16 "
+	      "bytes long.");
 
-struct spci_mem_transitions {
-	uint32_t orig_from_mode;
-	uint32_t orig_to_mode;
-	uint32_t from_mode;
-	uint32_t to_mode;
+struct spci_memory_share_state {
+	/**
+	 * The memory region being shared, or NULL if this share state is
+	 * unallocated.
+	 */
+	struct spci_memory_region *memory_region;
+
+	/**
+	 * The SPCI function used for sharing the memory. Must be one of
+	 * SPCI_MEM_DONATE_32, SPCI_MEM_LEND_32 or SPCI_MEM_SHARE_32 if the
+	 * share state is allocated, or 0.
+	 */
+	uint32_t share_func;
+
+	/**
+	 * Whether each recipient has retrieved the memory region yet. The order
+	 * of this array matches the order of the attribute descriptors in the
+	 * memory region descriptor. Any entries beyond the attribute_count will
+	 * always be false.
+	 */
+	bool retrieved[MAX_MEM_SHARE_RECIPIENTS];
 };
 
+/**
+ * Encapsulates the set of share states while the `share_states_lock` is held.
+ */
+struct share_states_locked {
+	struct spci_memory_share_state *share_states;
+};
+
+/**
+ * All access to members of a `struct spci_memory_share_state` must be guarded
+ * by this lock.
+ */
+static struct spinlock share_states_lock_instance = SPINLOCK_INIT;
+static struct spci_memory_share_state share_states[MAX_MEM_SHARES];
+
+/**
+ * Initialises the next available `struct spci_memory_share_state` and sets
+ * `handle` to its handle. Returns true on succes or false if none are
+ * available.
+ */
+static bool allocate_share_state(uint32_t share_func,
+				 struct spci_memory_region *memory_region,
+				 spci_memory_handle_t *handle)
+{
+	uint32_t i;
+
+	CHECK(memory_region != NULL);
+
+	sl_lock(&share_states_lock_instance);
+	for (i = 0; i < MAX_MEM_SHARES; ++i) {
+		if (share_states[i].share_func == 0) {
+			uint32_t j;
+			struct spci_memory_share_state *allocated_state =
+				&share_states[i];
+			allocated_state->share_func = share_func;
+			allocated_state->memory_region = memory_region;
+			for (j = 0; j < MAX_MEM_SHARE_RECIPIENTS; ++j) {
+				allocated_state->retrieved[j] = false;
+			}
+			*handle = i | SPCI_MEMORY_HANDLE_ALLOCATOR_HYPERVISOR;
+			sl_unlock(&share_states_lock_instance);
+			return true;
+		}
+	}
+
+	sl_unlock(&share_states_lock_instance);
+	return false;
+}
+
+/** Locks the share states lock. */
+struct share_states_locked share_states_lock(void)
+{
+	sl_lock(&share_states_lock_instance);
+
+	return (struct share_states_locked){.share_states = share_states};
+}
+
+/** Unlocks the share states lock. */
+static void share_states_unlock(struct share_states_locked *share_states)
+{
+	CHECK(share_states->share_states != NULL);
+	share_states->share_states = NULL;
+	sl_unlock(&share_states_lock_instance);
+}
+
+/**
+ * If the given handle is a valid handle for an allocated share state then takes
+ * the lock, initialises `share_state_locked` to point to the share state and
+ * returns true. Otherwise returns false and doesn't take the lock.
+ */
+static bool get_share_state(struct share_states_locked share_states,
+			    spci_memory_handle_t handle,
+			    struct spci_memory_share_state **share_state_ret)
+{
+	struct spci_memory_share_state *share_state;
+	uint32_t index = handle & ~SPCI_MEMORY_HANDLE_ALLOCATOR_MASK;
+
+	if (index >= MAX_MEM_SHARES) {
+		return false;
+	}
+
+	share_state = &share_states.share_states[index];
+
+	if (share_state->share_func == 0) {
+		return false;
+	}
+
+	*share_state_ret = share_state;
+	return true;
+}
+
+/** Marks a share state as unallocated. */
+static void share_state_free(struct share_states_locked share_states,
+			     struct spci_memory_share_state *share_state,
+			     struct mpool *page_pool)
+{
+	CHECK(share_states.share_states != NULL);
+	share_state->share_func = 0;
+	mpool_free(page_pool, share_state->memory_region);
+	share_state->memory_region = NULL;
+}
+
+/**
+ * Marks the share state with the given handle as unallocated, or returns false
+ * if the handle was invalid.
+ */
+static bool share_state_free_handle(spci_memory_handle_t handle,
+				    struct mpool *page_pool)
+{
+	struct share_states_locked share_states = share_states_lock();
+	struct spci_memory_share_state *share_state;
+
+	if (!get_share_state(share_states, handle, &share_state)) {
+		share_states_unlock(&share_states);
+		return false;
+	}
+
+	share_state_free(share_states, share_state, page_pool);
+	share_states_unlock(&share_states);
+
+	return true;
+}
+
+static void dump_memory_region(struct spci_memory_region *memory_region)
+{
+	uint32_t i;
+
+	if (LOG_LEVEL < LOG_LEVEL_VERBOSE) {
+		return;
+	}
+
+	dlog("from VM %d, tag %d, flags %#x, %d total pages in %d constituents "
+	     "to %d recipients [",
+	     memory_region->sender, memory_region->tag, memory_region->flags,
+	     memory_region->page_count, memory_region->constituent_count,
+	     memory_region->attribute_count);
+	for (i = 0; i < memory_region->attribute_count; ++i) {
+		if (i != 0) {
+			dlog(", ");
+		}
+		dlog("VM %d: %#x", memory_region->attributes[i].receiver,
+		     memory_region->attributes[i].memory_attributes);
+	}
+	dlog("]");
+}
+
+static void dump_share_states(void)
+{
+	uint32_t i;
+
+	if (LOG_LEVEL < LOG_LEVEL_VERBOSE) {
+		return;
+	}
+
+	dlog("Current share states:\n");
+	sl_lock(&share_states_lock_instance);
+	for (i = 0; i < MAX_MEM_SHARES; ++i) {
+		if (share_states[i].share_func != 0) {
+			dlog("%d: ", i);
+			switch (share_states[i].share_func) {
+			case SPCI_MEM_SHARE_32:
+				dlog("SHARE");
+				break;
+			case SPCI_MEM_LEND_32:
+				dlog("LEND");
+				break;
+			case SPCI_MEM_DONATE_32:
+				dlog("DONATE");
+				break;
+			default:
+				dlog("invalid share_func %#x",
+				     share_states[i].share_func);
+			}
+			dlog(" (");
+			dump_memory_region(share_states[i].memory_region);
+			if (share_states[i].retrieved[0]) {
+				dlog("): retrieved\n");
+			} else {
+				dlog("): not retrieved\n");
+			}
+			break;
+		}
+	}
+	sl_unlock(&share_states_lock_instance);
+}
+
 /* TODO: Add device attributes: GRE, cacheability, shareability. */
 static inline uint32_t spci_memory_attrs_to_mode(uint16_t memory_attributes)
 {
@@ -64,188 +287,21 @@
 }
 
 /**
- * Obtain the next mode to apply to the two VMs.
- *
- * Returns true iff a state transition was found.
+ * Get the current mode in the stage-2 page table of the given vm of all the
+ * pages in the given constituents, if they all have the same mode, or return
+ * false if not.
  */
-static bool spci_msg_get_next_state(
-	const struct spci_mem_transitions *transitions,
-	uint32_t transition_count, uint32_t memory_to_attributes,
-	uint32_t orig_from_mode, uint32_t orig_to_mode, uint32_t *from_mode,
-	uint32_t *to_mode)
-{
-	const uint32_t state_mask =
-		MM_MODE_INVALID | MM_MODE_UNOWNED | MM_MODE_SHARED;
-	const uint32_t orig_from_state = orig_from_mode & state_mask;
-
-	for (uint32_t index = 0; index < transition_count; index++) {
-		uint32_t table_orig_from_mode =
-			transitions[index].orig_from_mode;
-		uint32_t table_orig_to_mode = transitions[index].orig_to_mode;
-
-		if (((orig_from_state) == table_orig_from_mode) &&
-		    ((orig_to_mode & state_mask) == table_orig_to_mode)) {
-			*to_mode = transitions[index].to_mode |
-				   memory_to_attributes;
-
-			*from_mode = transitions[index].from_mode |
-				     (~state_mask & orig_from_mode);
-
-			return true;
-		}
-	}
-	return false;
-}
-
-/**
- * Verify that all pages have the same mode, that the starting mode
- * constitutes a valid state and obtain the next mode to apply
- * to the two VMs.
- *
- * Returns:
- *  The error code false indicates that:
- *   1) a state transition was not found;
- *   2) the pages being shared do not have the same mode within the <to>
- *     or <from> VMs;
- *   3) The beginning and end IPAs are not page aligned;
- *   4) The requested share type was not handled.
- *  Success is indicated by true.
- */
-static bool spci_msg_check_transition(
-	struct vm *to, struct vm *from, uint32_t share_func,
-	uint32_t *orig_from_mode,
+static bool constituents_get_mode(
+	struct vm_locked vm, uint32_t *orig_mode,
 	struct spci_memory_region_constituent *constituents,
-	uint32_t constituent_count, uint32_t memory_to_attributes,
-	uint32_t *from_mode, uint32_t *to_mode)
+	uint32_t constituent_count)
 {
-	uint32_t orig_to_mode;
-	const struct spci_mem_transitions *mem_transition_table;
-	uint32_t transition_table_size;
 	uint32_t i;
 
-	/*
-	 * TODO: Transition table does not currently consider the multiple
-	 * shared case.
-	 */
-	static const struct spci_mem_transitions donate_transitions[] = {
-		{
-			/* 1) {O-EA, !O-NA} -> {!O-NA, O-EA} */
-			.orig_from_mode = 0,
-			.orig_to_mode = MM_MODE_INVALID | MM_MODE_UNOWNED,
-			.from_mode = MM_MODE_INVALID | MM_MODE_UNOWNED,
-			.to_mode = 0,
-		},
-		{
-			/*
-			 * Duplicate of 1) in order to cater for an alternative
-			 * representation of !O-NA:
-			 * (INVALID | UNOWNED | SHARED) and (INVALID | UNOWNED)
-			 * are both alternate representations of !O-NA.
-			 */
-			/* 4) {O-EA, !O-NA} -> {!O-NA, O-EA} */
-			.orig_from_mode = 0,
-			.orig_to_mode = MM_MODE_INVALID | MM_MODE_UNOWNED |
-					MM_MODE_SHARED,
-			.from_mode = MM_MODE_INVALID | MM_MODE_UNOWNED |
-				     MM_MODE_SHARED,
-			.to_mode = 0,
-		},
-	};
-
-	static const uint32_t size_donate_transitions =
-		ARRAY_SIZE(donate_transitions);
-
-	/*
-	 * This data structure holds the allowed state transitions for the
-	 * "lend" state machine. In this state machine the owner keeps ownership
-	 * but loses access to the lent pages.
-	 */
-	static const struct spci_mem_transitions lend_transitions[] = {
-		{
-			/* 1) {O-EA, !O-NA} -> {O-NA, !O-EA} */
-			.orig_from_mode = 0,
-			.orig_to_mode = MM_MODE_INVALID | MM_MODE_UNOWNED |
-					MM_MODE_SHARED,
-			.from_mode = MM_MODE_INVALID,
-			.to_mode = MM_MODE_UNOWNED,
-		},
-		{
-			/*
-			 * Duplicate of 1) in order to cater for an alternative
-			 * representation of !O-NA:
-			 * (INVALID | UNOWNED | SHARED) and (INVALID | UNOWNED)
-			 * are both alternate representations of !O-NA.
-			 */
-			/* 2) {O-EA, !O-NA} -> {O-NA, !O-EA} */
-			.orig_from_mode = 0,
-			.orig_to_mode = MM_MODE_INVALID | MM_MODE_UNOWNED,
-			.from_mode = MM_MODE_INVALID,
-			.to_mode = MM_MODE_UNOWNED,
-		},
-	};
-
-	static const uint32_t size_lend_transitions =
-		ARRAY_SIZE(lend_transitions);
-
-	/*
-	 * This data structure holds the allowed state transitions for the
-	 * "share" state machine. In this state machine the owner keeps the
-	 * shared pages mapped on its stage2 table and keeps access as well.
-	 */
-	static const struct spci_mem_transitions share_transitions[] = {
-		{
-			/* 1) {O-EA, !O-NA} -> {O-SA, !O-SA} */
-			.orig_from_mode = 0,
-			.orig_to_mode = MM_MODE_INVALID | MM_MODE_UNOWNED |
-					MM_MODE_SHARED,
-			.from_mode = MM_MODE_SHARED,
-			.to_mode = MM_MODE_UNOWNED | MM_MODE_SHARED,
-		},
-		{
-			/*
-			 * Duplicate of 1) in order to cater for an alternative
-			 * representation of !O-NA:
-			 * (INVALID | UNOWNED | SHARED) and (INVALID | UNOWNED)
-			 * are both alternate representations of !O-NA.
-			 */
-			/* 2) {O-EA, !O-NA} -> {O-SA, !O-SA} */
-			.orig_from_mode = 0,
-			.orig_to_mode = MM_MODE_INVALID | MM_MODE_UNOWNED,
-			.from_mode = MM_MODE_SHARED,
-			.to_mode = MM_MODE_UNOWNED | MM_MODE_SHARED,
-		},
-	};
-
-	static const uint32_t size_share_transitions =
-		ARRAY_SIZE(share_transitions);
-
-	static const struct spci_mem_transitions relinquish_transitions[] = {
-		{
-			/* 1) {!O-EA, O-NA} -> {!O-NA, O-EA} */
-			.orig_from_mode = MM_MODE_UNOWNED,
-			.orig_to_mode = MM_MODE_INVALID,
-			.from_mode = MM_MODE_INVALID | MM_MODE_UNOWNED |
-				     MM_MODE_SHARED,
-			.to_mode = 0,
-		},
-		{
-			/* 2) {!O-SA, O-SA} -> {!O-NA, O-EA} */
-			.orig_from_mode = MM_MODE_UNOWNED | MM_MODE_SHARED,
-			.orig_to_mode = MM_MODE_SHARED,
-			.from_mode = MM_MODE_INVALID | MM_MODE_UNOWNED |
-				     MM_MODE_SHARED,
-			.to_mode = 0,
-		},
-	};
-
-	static const uint32_t size_relinquish_transitions =
-		ARRAY_SIZE(relinquish_transitions);
-
 	if (constituent_count == 0) {
 		/*
-		 * Fail if there are no constituents. Otherwise
-		 * spci_msg_get_next_state would get an unitialised
-		 * *orig_from_mode and orig_to_mode.
+		 * Fail if there are no constituents. Otherwise we would get an
+		 * uninitialised *orig_mode.
 		 */
 		return false;
 	}
@@ -256,8 +312,7 @@
 				&constituents[i]));
 		size_t size = constituents[i].page_count * PAGE_SIZE;
 		ipaddr_t end = ipa_add(begin, size);
-		uint32_t current_from_mode;
-		uint32_t current_to_mode;
+		uint32_t current_mode;
 
 		/* Fail if addresses are not page-aligned. */
 		if (!is_aligned(ipa_addr(begin), PAGE_SIZE) ||
@@ -269,10 +324,8 @@
 		 * Ensure that this constituent memory range is all mapped with
 		 * the same mode.
 		 */
-		if (!mm_vm_get_mode(&from->ptable, begin, end,
-				    &current_from_mode) ||
-		    !mm_vm_get_mode(&to->ptable, begin, end,
-				    &current_to_mode)) {
+		if (!mm_vm_get_mode(&vm.vm->ptable, begin, end,
+				    &current_mode)) {
 			return false;
 		}
 
@@ -280,48 +333,191 @@
 		 * Ensure that all constituents are mapped with the same mode.
 		 */
 		if (i == 0) {
-			*orig_from_mode = current_from_mode;
-			orig_to_mode = current_to_mode;
-		} else if (current_from_mode != *orig_from_mode ||
-			   current_to_mode != orig_to_mode) {
+			*orig_mode = current_mode;
+		} else if (current_mode != *orig_mode) {
 			return false;
 		}
 	}
 
-	/* Ensure the address range is normal memory and not a device. */
-	if (*orig_from_mode & MM_MODE_D) {
+	return true;
+}
+
+/**
+ * Verify that all pages have the same mode, that the starting mode
+ * constitutes a valid state and obtain the next mode to apply
+ * to the sending VM.
+ *
+ * Returns:
+ *  The error code false indicates that:
+ *   1) a state transition was not found;
+ *   2) the pages being shared do not have the same mode within the <from> VM;
+ *   3) The beginning and end IPAs are not page aligned;
+ *   4) The requested share type was not handled.
+ *  Success is indicated by true.
+ *
+ */
+static bool spci_send_check_transition(
+	struct vm_locked from, uint32_t share_func, uint32_t *orig_from_mode,
+	struct spci_memory_region_constituent *constituents,
+	uint32_t constituent_count, uint32_t *from_mode)
+{
+	const uint32_t state_mask =
+		MM_MODE_INVALID | MM_MODE_UNOWNED | MM_MODE_SHARED;
+
+	if (!constituents_get_mode(from, orig_from_mode, constituents,
+				   constituent_count)) {
 		return false;
 	}
 
+	/* Ensure the address range is normal memory and not a device. */
+	if (*orig_from_mode & MM_MODE_D) {
+		dlog_verbose("Can't share device memory (mode is %#x).\n",
+			     *orig_from_mode);
+		return false;
+	}
+
+	/*
+	 * Ensure the sender is the owner and has exclusive access to the
+	 * memory.
+	 */
+	if ((*orig_from_mode & state_mask) != 0) {
+		return false;
+	}
+
+	/* Find the appropriate new mode. */
+	*from_mode = ~state_mask & *orig_from_mode;
 	switch (share_func) {
 	case SPCI_MEM_DONATE_32:
-		mem_transition_table = donate_transitions;
-		transition_table_size = size_donate_transitions;
+		*from_mode |= MM_MODE_INVALID | MM_MODE_UNOWNED;
 		break;
 
 	case SPCI_MEM_LEND_32:
-		mem_transition_table = lend_transitions;
-		transition_table_size = size_lend_transitions;
+		*from_mode |= MM_MODE_INVALID;
 		break;
 
 	case SPCI_MEM_SHARE_32:
-		mem_transition_table = share_transitions;
-		transition_table_size = size_share_transitions;
-		break;
-
-	case HF_SPCI_MEM_RELINQUISH:
-		mem_transition_table = relinquish_transitions;
-		transition_table_size = size_relinquish_transitions;
+		*from_mode |= MM_MODE_SHARED;
 		break;
 
 	default:
 		return false;
 	}
 
-	return spci_msg_get_next_state(mem_transition_table,
-				       transition_table_size,
-				       memory_to_attributes, *orig_from_mode,
-				       orig_to_mode, from_mode, to_mode);
+	return true;
+}
+
+static bool spci_relinquish_check_transition(
+	struct vm_locked from, uint32_t *orig_from_mode,
+	struct spci_memory_region_constituent *constituents,
+	uint32_t constituent_count, uint32_t *from_mode)
+{
+	const uint32_t state_mask =
+		MM_MODE_INVALID | MM_MODE_UNOWNED | MM_MODE_SHARED;
+	uint32_t orig_from_state;
+
+	if (!constituents_get_mode(from, orig_from_mode, constituents,
+				   constituent_count)) {
+		return false;
+	}
+
+	/* Ensure the address range is normal memory and not a device. */
+	if (*orig_from_mode & MM_MODE_D) {
+		dlog_verbose("Can't relinquish device memory (mode is %#x).\n",
+			     *orig_from_mode);
+		return false;
+	}
+
+	/*
+	 * Ensure the relinquishing VM is not the owner but has access to the
+	 * memory.
+	 */
+	orig_from_state = *orig_from_mode & state_mask;
+	if ((orig_from_state & ~MM_MODE_SHARED) != MM_MODE_UNOWNED) {
+		dlog_verbose(
+			"Tried to relinquish memory in state %#x (masked %#x "
+			"but "
+			"should be %#x).\n",
+			*orig_from_mode, orig_from_state, MM_MODE_UNOWNED);
+		return false;
+	}
+
+	/* Find the appropriate new mode. */
+	*from_mode = (~state_mask & *orig_from_mode) | MM_MODE_UNMAPPED_MASK;
+
+	return true;
+}
+
+/**
+ * Verify that all pages have the same mode, that the starting mode
+ * constitutes a valid state and obtain the next mode to apply
+ * to the retrieving VM.
+ *
+ * Returns:
+ *  The error code false indicates that:
+ *   1) a state transition was not found;
+ *   2) the pages being shared do not have the same mode within the <to> VM;
+ *   3) The beginning and end IPAs are not page aligned;
+ *   4) The requested share type was not handled.
+ *  Success is indicated by true.
+ */
+static bool spci_retrieve_check_transition(
+	struct vm_locked to, uint32_t share_func,
+	struct spci_memory_region_constituent *constituents,
+	uint32_t constituent_count, uint32_t memory_to_attributes,
+	uint32_t *to_mode)
+{
+	uint32_t orig_to_mode;
+
+	if (!constituents_get_mode(to, &orig_to_mode, constituents,
+				   constituent_count)) {
+		return false;
+	}
+
+	if (share_func == SPCI_MEM_RECLAIM_32) {
+		const uint32_t state_mask =
+			MM_MODE_INVALID | MM_MODE_UNOWNED | MM_MODE_SHARED;
+		uint32_t orig_to_state = orig_to_mode & state_mask;
+
+		if (orig_to_state != MM_MODE_INVALID &&
+		    orig_to_state != MM_MODE_SHARED) {
+			return false;
+		}
+	} else {
+		/*
+		 * Ensure the retriever has the expected state. We don't care
+		 * about the MM_MODE_SHARED bit; either with or without it set
+		 * are both valid representations of the !O-NA state.
+		 */
+		if ((orig_to_mode & MM_MODE_UNMAPPED_MASK) !=
+		    MM_MODE_UNMAPPED_MASK) {
+			return false;
+		}
+	}
+
+	/* Find the appropriate new mode. */
+	*to_mode = memory_to_attributes;
+	switch (share_func) {
+	case SPCI_MEM_DONATE_32:
+		*to_mode |= 0;
+		break;
+
+	case SPCI_MEM_LEND_32:
+		*to_mode |= MM_MODE_UNOWNED;
+		break;
+
+	case SPCI_MEM_SHARE_32:
+		*to_mode |= MM_MODE_UNOWNED | MM_MODE_SHARED;
+		break;
+
+	case SPCI_MEM_RECLAIM_32:
+		*to_mode |= 0;
+		break;
+
+	default:
+		return false;
+	}
+
+	return true;
 }
 
 /**
@@ -460,30 +656,27 @@
 }
 
 /**
- * Shares memory from the calling VM with another. The memory can be shared in
- * different modes.
+ * Validates and prepares memory to be sent from the calling VM to another.
  *
- * This function requires the calling context to hold the <to> and <from> locks.
+ * This function requires the calling context to hold the <from> VM lock.
  *
  * Returns:
- *  In case of error one of the following values is returned:
+ *  In case of error, one of the following values is returned:
  *   1) SPCI_INVALID_PARAMETERS - The endpoint provided parameters were
  *     erroneous;
  *   2) SPCI_NO_MEMORY - Hafnium did not have sufficient memory to complete
  *     the request.
  *  Success is indicated by SPCI_SUCCESS.
  */
-static struct spci_value spci_share_memory(
-	struct vm_locked to_locked, struct vm_locked from_locked,
+static struct spci_value spci_send_memory(
+	struct vm_locked from_locked,
 	struct spci_memory_region_constituent *constituents,
-	uint32_t constituent_count, uint32_t memory_to_attributes,
-	uint32_t share_func, struct mpool *page_pool, bool clear)
+	uint32_t constituent_count, uint32_t share_func,
+	struct mpool *page_pool, bool clear)
 {
-	struct vm *to = to_locked.vm;
 	struct vm *from = from_locked.vm;
 	uint32_t orig_from_mode;
 	uint32_t from_mode;
-	uint32_t to_mode;
 	struct mpool local_page_pool;
 	struct spci_value ret;
 
@@ -492,26 +685,17 @@
 	 * not we would get alignment faults trying to read (32-bit) values.
 	 */
 	if (!is_aligned(constituents, 4)) {
-		dlog_verbose("Constituents not aligned.\n");
-		return spci_error(SPCI_INVALID_PARAMETERS);
-	}
-
-	/* Disallow reflexive shares as this suggests an error in the VM. */
-	if (to == from) {
-		dlog_verbose("Reflexive share.\n");
 		return spci_error(SPCI_INVALID_PARAMETERS);
 	}
 
 	/*
-	 * Check if the state transition is lawful for both VMs involved
-	 * in the memory exchange, ensure that all constituents of a memory
-	 * region being shared are at the same state.
+	 * Check if the state transition is lawful for the sender, ensure that
+	 * all constituents of a memory region being shared are at the same
+	 * state.
 	 */
-	if (!spci_msg_check_transition(to, from, share_func, &orig_from_mode,
-				       constituents, constituent_count,
-				       memory_to_attributes, &from_mode,
-				       &to_mode)) {
-		dlog_verbose("Invalid transition.\n");
+	if (!spci_send_check_transition(from_locked, share_func,
+					&orig_from_mode, constituents,
+					constituent_count, &from_mode)) {
 		return spci_error(SPCI_INVALID_PARAMETERS);
 	}
 
@@ -523,16 +707,12 @@
 	mpool_init_with_fallback(&local_page_pool, page_pool);
 
 	/*
-	 * First reserve all required memory for the new page table entries in
-	 * both sender and recipient page tables without committing, to make
-	 * sure the entire operation will succeed without exhausting the page
-	 * pool.
+	 * First reserve all required memory for the new page table entries
+	 * without committing, to make sure the entire operation will succeed
+	 * without exhausting the page pool.
 	 */
 	if (!spci_region_group_identity_map(from_locked, constituents,
 					    constituent_count, from_mode,
-					    page_pool, false) ||
-	    !spci_region_group_identity_map(to_locked, constituents,
-					    constituent_count, to_mode,
 					    page_pool, false)) {
 		/* TODO: partial defrag of failed range. */
 		ret = spci_error(SPCI_NO_MEMORY);
@@ -540,10 +720,10 @@
 	}
 
 	/*
-	 * First update the mapping for the sender so there is no overlap with
-	 * the recipient. This won't allocate because the transaction was
-	 * already prepared above, but may free pages in the case that a whole
-	 * block is being unmapped that was previously partially mapped.
+	 * Update the mapping for the sender. This won't allocate because the
+	 * transaction was already prepared above, but may free pages in the
+	 * case that a whole block is being unmapped that was previously
+	 * partially mapped.
 	 */
 	CHECK(spci_region_group_identity_map(from_locked, constituents,
 					     constituent_count, from_mode,
@@ -566,6 +746,95 @@
 		goto out;
 	}
 
+	ret = (struct spci_value){.func = SPCI_SUCCESS_32};
+
+out:
+	mpool_fini(&local_page_pool);
+
+	/*
+	 * Tidy up the page table by reclaiming failed mappings (if there was an
+	 * error) or merging entries into blocks where possible (on success).
+	 */
+	mm_vm_defrag(&from->ptable, page_pool);
+
+	return ret;
+}
+
+/**
+ * Validates and maps memory shared from one VM to another.
+ *
+ * This function requires the calling context to hold the <to> lock.
+ *
+ * Returns:
+ *  In case of error, one of the following values is returned:
+ *   1) SPCI_INVALID_PARAMETERS - The endpoint provided parameters were
+ *     erroneous;
+ *   2) SPCI_NO_MEMORY - Hafnium did not have sufficient memory to complete
+ *     the request.
+ *  Success is indicated by SPCI_SUCCESS.
+ */
+static struct spci_value spci_retrieve_memory(
+	struct vm_locked to_locked,
+	struct spci_memory_region_constituent *constituents,
+	uint32_t constituent_count, uint32_t memory_to_attributes,
+	uint32_t share_func, bool clear, struct mpool *page_pool)
+{
+	struct vm *to = to_locked.vm;
+	uint32_t to_mode;
+	struct mpool local_page_pool;
+	struct spci_value ret;
+
+	/*
+	 * Make sure constituents are properly aligned to a 32-bit boundary. If
+	 * not we would get alignment faults trying to read (32-bit) values.
+	 */
+	if (!is_aligned(constituents, 4)) {
+		dlog_verbose("Constituents not aligned.\n");
+		return spci_error(SPCI_INVALID_PARAMETERS);
+	}
+
+	/*
+	 * Check if the state transition is lawful for the recipient, and ensure
+	 * that all constituents of the memory region being retrieved are at the
+	 * same state.
+	 */
+	if (!spci_retrieve_check_transition(to_locked, share_func, constituents,
+					    constituent_count,
+					    memory_to_attributes, &to_mode)) {
+		dlog_verbose("Invalid transition.\n");
+		return spci_error(SPCI_INVALID_PARAMETERS);
+	}
+
+	/*
+	 * Create a local pool so any freed memory can't be used by another
+	 * thread. This is to ensure the original mapping can be restored if the
+	 * clear fails.
+	 */
+	mpool_init_with_fallback(&local_page_pool, page_pool);
+
+	/*
+	 * First reserve all required memory for the new page table entries in
+	 * the recipient page tables without committing, to make sure the entire
+	 * operation will succeed without exhausting the page pool.
+	 */
+	if (!spci_region_group_identity_map(to_locked, constituents,
+					    constituent_count, to_mode,
+					    page_pool, false)) {
+		/* TODO: partial defrag of failed range. */
+		dlog_verbose(
+			"Insufficient memory to update recipient page "
+			"table.\n");
+		ret = spci_error(SPCI_NO_MEMORY);
+		goto out;
+	}
+
+	/* Clear the memory so no VM or device can see the previous contents. */
+	if (clear && !spci_clear_memory_constituents(
+			     constituents, constituent_count, page_pool)) {
+		ret = spci_error(SPCI_NO_MEMORY);
+		goto out;
+	}
+
 	/*
 	 * Complete the transfer by mapping the memory into the recipient. This
 	 * won't allocate because the transaction was already prepared above, so
@@ -581,11 +850,88 @@
 	mpool_fini(&local_page_pool);
 
 	/*
-	 * Tidy up the page tables by reclaiming failed mappings (if there was
+	 * Tidy up the page table by reclaiming failed mappings (if there was
 	 * an error) or merging entries into blocks where possible (on success).
 	 */
 	mm_vm_defrag(&to->ptable, page_pool);
-	mm_vm_defrag(&from->ptable, page_pool);
+
+	return ret;
+}
+
+static struct spci_value spci_relinquish_memory(
+	struct vm_locked from_locked,
+	struct spci_memory_region_constituent *constituents,
+	uint32_t constituent_count, struct mpool *page_pool, bool clear)
+{
+	uint32_t orig_from_mode;
+	uint32_t from_mode;
+	struct mpool local_page_pool;
+	struct spci_value ret;
+
+	if (!spci_relinquish_check_transition(from_locked, &orig_from_mode,
+					      constituents, constituent_count,
+					      &from_mode)) {
+		dlog_verbose("Invalid transition.\n");
+		return spci_error(SPCI_INVALID_PARAMETERS);
+	}
+
+	/*
+	 * Create a local pool so any freed memory can't be used by another
+	 * thread. This is to ensure the original mapping can be restored if the
+	 * clear fails.
+	 */
+	mpool_init_with_fallback(&local_page_pool, page_pool);
+
+	/*
+	 * First reserve all required memory for the new page table entries
+	 * without committing, to make sure the entire operation will succeed
+	 * without exhausting the page pool.
+	 */
+	if (!spci_region_group_identity_map(from_locked, constituents,
+					    constituent_count, from_mode,
+					    page_pool, false)) {
+		/* TODO: partial defrag of failed range. */
+		ret = spci_error(SPCI_NO_MEMORY);
+		goto out;
+	}
+
+	/*
+	 * Update the mapping for the sender. This won't allocate because the
+	 * transaction was already prepared above, but may free pages in the
+	 * case that a whole block is being unmapped that was previously
+	 * partially mapped.
+	 */
+	CHECK(spci_region_group_identity_map(from_locked, constituents,
+					     constituent_count, from_mode,
+					     &local_page_pool, true));
+
+	/* Clear the memory so no VM or device can see the previous contents. */
+	if (clear && !spci_clear_memory_constituents(
+			     constituents, constituent_count, page_pool)) {
+		/*
+		 * On failure, roll back by returning memory to the sender. This
+		 * may allocate pages which were previously freed into
+		 * `local_page_pool` by the call above, but will never allocate
+		 * more pages than that so can never fail.
+		 */
+		CHECK(spci_region_group_identity_map(
+			from_locked, constituents, constituent_count,
+			orig_from_mode, &local_page_pool, true));
+
+		ret = spci_error(SPCI_NO_MEMORY);
+		goto out;
+	}
+
+	ret = (struct spci_value){.func = SPCI_SUCCESS_32};
+
+out:
+	mpool_fini(&local_page_pool);
+
+	/*
+	 * Tidy up the page table by reclaiming failed mappings (if there was an
+	 * error) or merging entries into blocks where possible (on success).
+	 */
+	mm_vm_defrag(&from_locked.vm->ptable, page_pool);
 
 	return ret;
 }
@@ -598,19 +944,25 @@
  *
  * Assumes that the caller has already found and locked both VMs and ensured
  * that the destination RX buffer is available, and copied the memory region
- * descriptor from the sender's TX buffer to a trusted internal buffer.
+ * descriptor from the sender's TX buffer to a freshly allocated page from
+ * Hafnium's internal pool.
+ *
+ * This function takes ownership of the `memory_region` passed in; it must not
+ * be freed by the caller.
  */
-struct spci_value spci_memory_send(struct vm_locked to_locked,
-				   struct vm_locked from_locked,
+struct spci_value spci_memory_send(struct vm *to, struct vm_locked from_locked,
 				   struct spci_memory_region *memory_region,
 				   uint32_t memory_share_size,
 				   uint32_t share_func, struct mpool *page_pool)
 {
-	uint32_t memory_to_attributes;
+	struct spci_memory_region_constituent *constituents =
+		spci_memory_region_get_constituents(memory_region);
+	uint32_t constituent_count = memory_region->constituent_count;
 	uint32_t attributes_size;
 	uint32_t constituents_size;
-	struct spci_memory_region_constituent *constituents;
-	uint32_t constituent_count = memory_region->constituent_count;
+	bool clear;
+	struct spci_value ret;
+	spci_memory_handle_t handle;
 
 	/*
 	 * Ensure the number of constituents are within the memory
@@ -624,42 +976,416 @@
 		    sizeof(struct spci_memory_region) + attributes_size ||
 	    memory_share_size !=
 		    memory_region->constituent_offset + constituents_size) {
+		dlog_verbose("Invalid size %d or constituent offset %d.\n",
+			     memory_share_size,
+			     memory_region->constituent_offset);
+		mpool_free(page_pool, memory_region);
 		return spci_error(SPCI_INVALID_PARAMETERS);
 	}
 
 	/* The sender must match the message sender. */
 	if (memory_region->sender != from_locked.vm->id) {
+		dlog_verbose("Invalid sender %d.\n", memory_region->sender);
+		mpool_free(page_pool, memory_region);
 		return spci_error(SPCI_INVALID_PARAMETERS);
 	}
 
 	/* We only support a single recipient. */
 	if (memory_region->attribute_count != 1) {
+		dlog_verbose("Multiple recipients not supported.\n");
+		mpool_free(page_pool, memory_region);
 		return spci_error(SPCI_NOT_SUPPORTED);
 	}
 
 	/* The recipient must match the message recipient. */
-	if (memory_region->attributes[0].receiver != to_locked.vm->id) {
+	if (memory_region->attributes[0].receiver != to->id) {
+		mpool_free(page_pool, memory_region);
 		return spci_error(SPCI_INVALID_PARAMETERS);
 	}
 
-	switch (share_func) {
-	case SPCI_MEM_DONATE_32:
-	case SPCI_MEM_LEND_32:
-	case SPCI_MEM_SHARE_32:
-		memory_to_attributes = spci_memory_attrs_to_mode(
-			memory_region->attributes[0].memory_attributes);
-		break;
-	case HF_SPCI_MEM_RELINQUISH:
-		memory_to_attributes = MM_MODE_R | MM_MODE_W | MM_MODE_X;
-		break;
-	default:
-		dlog_error("Invalid memory sharing message.\n");
+	clear = memory_region->flags & SPCI_MEMORY_REGION_FLAG_CLEAR;
+	/*
+	 * Clear is not allowed for memory sharing, as the sender still has
+	 * access to the memory.
+	 */
+	if (clear && share_func == SPCI_MEM_SHARE_32) {
+		dlog_verbose("Memory can't be cleared while being shared.\n");
 		return spci_error(SPCI_INVALID_PARAMETERS);
 	}
 
+	/*
+	 * Allocate a share state before updating the page table. Otherwise if
+	 * updating the page table succeeded but allocating the share state
+	 * failed then it would leave the memory in a state where nobody could
+	 * get it back.
+	 */
+	if (to->id != HF_TEE_VM_ID &&
+	    !allocate_share_state(share_func, memory_region, &handle)) {
+		dlog_verbose("Failed to allocate share state.\n");
+		mpool_free(page_pool, memory_region);
+		return spci_error(SPCI_NO_MEMORY);
+	}
+
+	dump_share_states();
+
+	/* Check that state is valid in sender page table and update. */
+	ret = spci_send_memory(from_locked, constituents, constituent_count,
+			       share_func, page_pool, clear);
+	if (ret.func != SPCI_SUCCESS_32) {
+		if (to->id != HF_TEE_VM_ID) {
+			/* Free share state. */
+			bool freed = share_state_free_handle(handle, page_pool);
+
+			CHECK(freed);
+		}
+
+		return ret;
+	}
+
+	if (to->id == HF_TEE_VM_ID) {
+		/* Return directly, no need to allocate share state. */
+		return (struct spci_value){.func = SPCI_SUCCESS_32};
+	}
+
+	return (struct spci_value){.func = SPCI_SUCCESS_32, .arg2 = handle};
+}
+
+struct spci_value spci_memory_retrieve(
+	struct vm_locked to_locked,
+	struct spci_memory_retrieve_request *retrieve_request,
+	uint32_t retrieve_request_size, struct mpool *page_pool)
+{
+	uint32_t expected_retrieve_request_size =
+		sizeof(struct spci_memory_retrieve_request) +
+		retrieve_request->retrieve_properties_count *
+			sizeof(struct spci_memory_retrieve_properties);
+	spci_memory_handle_t handle = retrieve_request->handle;
+	struct spci_memory_region *memory_region;
+	struct spci_memory_retrieve_properties *retrieve_properties;
+	uint32_t memory_to_attributes;
+	struct spci_memory_region_constituent *constituents;
+	uint32_t constituent_count;
+	struct share_states_locked share_states;
+	struct spci_memory_share_state *share_state;
+	struct spci_value ret;
+	uint32_t response_size;
+
+	dump_share_states();
+
+	if (retrieve_request_size != expected_retrieve_request_size) {
+		dlog_verbose(
+			"Invalid length for SPCI_MEM_RETRIEVE_REQ, expected %d "
+			"but was %d.\n",
+			expected_retrieve_request_size, retrieve_request_size);
+		return spci_error(SPCI_INVALID_PARAMETERS);
+	}
+
+	share_states = share_states_lock();
+	if (!get_share_state(share_states, handle, &share_state)) {
+		dlog_verbose("Invalid handle %#x for SPCI_MEM_RETRIEVE_REQ.\n",
+			     handle);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	if (retrieve_request->share_func != share_state->share_func) {
+		dlog_verbose(
+			"Incorrect transaction type %#x for "
+			"SPCI_MEM_RETRIEVE_REQ, expected %#x for handle %#x.\n",
+			retrieve_request->share_func, share_state->share_func,
+			handle);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	memory_region = share_state->memory_region;
+	CHECK(memory_region != NULL);
+
+	if (retrieve_request->sender != memory_region->sender) {
+		dlog_verbose(
+			"Incorrect sender ID %d for SPCI_MEM_RETRIEVE_REQ, "
+			"expected %d for handle %#x.\n",
+			retrieve_request->sender, memory_region->sender,
+			handle);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	if (retrieve_request->tag != memory_region->tag) {
+		dlog_verbose(
+			"Incorrect tag %d for SPCI_MEM_RETRIEVE_REQ, expected "
+			"%d for handle %#x.\n",
+			retrieve_request->tag, memory_region->tag, handle);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	if (memory_region->attributes[0].receiver != to_locked.vm->id) {
+		dlog_verbose(
+			"Incorrect receiver VM ID %d for "
+			"SPCI_MEM_RETRIEVE_REQ, expected %d for handle %#x.\n",
+			to_locked.vm->id, memory_region->attributes[0].receiver,
+			handle);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	if (share_state->retrieved[0]) {
+		dlog_verbose("Memory with handle %#x already retrieved.\n",
+			     handle);
+		ret = spci_error(SPCI_DENIED);
+		goto out;
+	}
+
+	if (retrieve_request->attribute_count != 0) {
+		dlog_verbose(
+			"Multi-way memory sharing not supported (got %d "
+			"attribute descriptors on SPCI_MEM_RETRIEVE_REQ, "
+			"expected 0).\n",
+			retrieve_request->attribute_count);
+		ret = spci_error(SPCI_NOT_SUPPORTED);
+		goto out;
+	}
+
+	if (retrieve_request->retrieve_properties_count != 1) {
+		dlog_verbose(
+			"Stream endpoints not supported (got %d retrieve "
+			"properties descriptors on SPCI_MEM_RETRIEVE_REQ, "
+			"expected 1).\n",
+			retrieve_request->retrieve_properties_count);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	retrieve_properties =
+		spci_memory_retrieve_request_first_retrieve_properties(
+			retrieve_request);
+
+	if (retrieve_properties->attributes.receiver != to_locked.vm->id) {
+		dlog_verbose(
+			"Retrieve properties receiver VM ID %d didn't match "
+			"caller of SPCI_MEM_RETRIEVE_REQ.\n",
+			retrieve_properties->attributes.receiver);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	if (retrieve_properties->page_count != memory_region->page_count) {
+		dlog_verbose(
+			"Incorrect page count %d for "
+			"SPCI_MEM_RETRIEVE_REQ, expected %d for handle %#x.\n",
+			retrieve_properties->page_count,
+			memory_region->page_count, handle);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	if (retrieve_properties->constituent_count != 0) {
+		dlog_verbose(
+			"Retriever specified address ranges not supported (got "
+			"%d).\n",
+			retrieve_properties->constituent_count);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	memory_to_attributes = spci_memory_attrs_to_mode(
+		memory_region->attributes[0].memory_attributes);
+
+	constituents = spci_memory_region_get_constituents(memory_region);
+	constituent_count = memory_region->constituent_count;
+	ret = spci_retrieve_memory(to_locked, constituents, constituent_count,
+				   memory_to_attributes,
+				   share_state->share_func, false, page_pool);
+	if (ret.func != SPCI_SUCCESS_32) {
+		goto out;
+	}
+
+	/*
+	 * Copy response to RX buffer of caller and deliver the message. This
+	 * must be done before the share_state is (possibly) freed.
+	 */
+	response_size = spci_retrieved_memory_region_init(
+		to_locked.vm->mailbox.recv, HF_MAILBOX_SIZE, to_locked.vm->id,
+		constituents, constituent_count, memory_region->page_count);
+	to_locked.vm->mailbox.recv_size = response_size;
+	to_locked.vm->mailbox.recv_sender = HF_HYPERVISOR_VM_ID;
+	to_locked.vm->mailbox.recv_func = SPCI_MEM_RETRIEVE_RESP_32;
+	to_locked.vm->mailbox.state = MAILBOX_STATE_READ;
+
+	if (share_state->share_func == SPCI_MEM_DONATE_32) {
+		/*
+		 * Memory that has been donated can't be relinquished, so no
+		 * need to keep the share state around.
+		 */
+		share_state_free(share_states, share_state, page_pool);
+		dlog_verbose("Freed share state for donate.\n");
+	} else {
+		share_state->retrieved[0] = true;
+	}
+
+	ret = (struct spci_value){.func = SPCI_MEM_RETRIEVE_RESP_32,
+				  .arg3 = response_size,
+				  .arg4 = response_size};
+
+out:
+	share_states_unlock(&share_states);
+	dump_share_states();
+	return ret;
+}
+
+struct spci_value spci_memory_relinquish(
+	struct vm_locked from_locked,
+	struct spci_mem_relinquish *relinquish_request, struct mpool *page_pool)
+{
+	spci_memory_handle_t handle = relinquish_request->handle;
+	struct share_states_locked share_states;
+	struct spci_memory_share_state *share_state;
+	struct spci_memory_region *memory_region;
+	bool clear;
+	struct spci_memory_region_constituent *constituents;
+	uint32_t constituent_count;
+	struct spci_value ret;
+
+	if (relinquish_request->endpoint_count != 0) {
+		dlog_verbose(
+			"Stream endpoints not supported (got %d extra "
+			"endpoints on SPCI_MEM_RELINQUISH, expected 0).\n",
+			relinquish_request->endpoint_count);
+		return spci_error(SPCI_INVALID_PARAMETERS);
+	}
+
+	if (relinquish_request->sender != from_locked.vm->id) {
+		dlog_verbose(
+			"VM ID %d in relinquish message doesn't match calling "
+			"VM ID %d.\n",
+			relinquish_request->sender, from_locked.vm->id);
+		return spci_error(SPCI_INVALID_PARAMETERS);
+	}
+
+	dump_share_states();
+
+	share_states = share_states_lock();
+	if (!get_share_state(share_states, handle, &share_state)) {
+		dlog_verbose("Invalid handle %#x for SPCI_MEM_RELINQUISH.\n",
+			     handle);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	memory_region = share_state->memory_region;
+	CHECK(memory_region != NULL);
+
+	if (memory_region->attributes[0].receiver != from_locked.vm->id) {
+		dlog_verbose(
+			"VM ID %d tried to relinquish memory region with "
+			"handle %#x but receiver was %d.\n",
+			from_locked.vm->id, handle,
+			memory_region->attributes[0].receiver);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	if (!share_state->retrieved[0]) {
+		dlog_verbose(
+			"Memory with handle %#x not yet retrieved, can't "
+			"relinquish.\n",
+			handle);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	clear = relinquish_request->flags & SPCI_MEMORY_REGION_FLAG_CLEAR;
+
+	/*
+	 * Clear is not allowed for memory that was shared, as the original
+	 * sender still has access to the memory.
+	 */
+	if (clear && share_state->share_func == SPCI_MEM_SHARE_32) {
+		dlog_verbose("Memory which was shared can't be cleared.\n");
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
 	constituents = spci_memory_region_get_constituents(memory_region);
-	return spci_share_memory(
-		to_locked, from_locked, constituents, constituent_count,
-		memory_to_attributes, share_func, page_pool,
-		memory_region->flags & SPCI_MEMORY_REGION_FLAG_CLEAR);
+	constituent_count = memory_region->constituent_count;
+	ret = spci_relinquish_memory(from_locked, constituents,
+				     constituent_count, page_pool, clear);
+
+	if (ret.func == SPCI_SUCCESS_32) {
+		/*
+		 * Mark memory handle as not retrieved, so it can be reclaimed
+		 * (or retrieved again).
+		 */
+		share_state->retrieved[0] = false;
+	}
+
+out:
+	share_states_unlock(&share_states);
+	dump_share_states();
+	return ret;
+}
+
+/**
+ * Validates that the reclaim transition is allowed for the given handle,
+ * updates the page table of the reclaiming VM, and frees the internal state
+ * associated with the handle.
+ */
+struct spci_value spci_memory_reclaim(struct vm_locked to_locked,
+				      spci_memory_handle_t handle, bool clear,
+				      struct mpool *page_pool)
+{
+	struct share_states_locked share_states;
+	struct spci_memory_share_state *share_state;
+	struct spci_memory_region *memory_region;
+	struct spci_memory_region_constituent *constituents;
+	uint32_t constituent_count;
+	uint32_t memory_to_attributes = MM_MODE_R | MM_MODE_W | MM_MODE_X;
+	struct spci_value ret;
+
+	dump_share_states();
+
+	share_states = share_states_lock();
+	if (!get_share_state(share_states, handle, &share_state)) {
+		dlog_verbose("Invalid handle %#x for SPCI_MEM_RECLAIM.\n",
+			     handle);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	memory_region = share_state->memory_region;
+	CHECK(memory_region != NULL);
+
+	if (to_locked.vm->id != memory_region->sender) {
+		dlog_verbose(
+			"VM %d attempted to reclaim memory handle %#x "
+			"originally sent by VM %d.\n",
+			to_locked.vm->id, handle, memory_region->sender);
+		ret = spci_error(SPCI_INVALID_PARAMETERS);
+		goto out;
+	}
+
+	if (share_state->retrieved[0]) {
+		dlog_verbose(
+			"Tried to reclaim memory handle %#x that has not been "
+			"relinquished.\n",
+			handle);
+		ret = spci_error(SPCI_DENIED);
+		goto out;
+	}
+
+	constituents = spci_memory_region_get_constituents(memory_region);
+	constituent_count = memory_region->constituent_count;
+	ret = spci_retrieve_memory(to_locked, constituents, constituent_count,
+				   memory_to_attributes, SPCI_MEM_RECLAIM_32,
+				   clear, page_pool);
+
+	if (ret.func == SPCI_SUCCESS_32) {
+		share_state_free(share_states, share_state, page_pool);
+		dlog_verbose("Freed share state after successful reclaim.\n");
+	}
+
+out:
+	share_states_unlock(&share_states);
+	return ret;
 }
commit	5de9c3d6d7fb644cfc8b0bf18deb32a43cc96288	[log] [tgz]
author	Andrew Walbran <qwandor@google.com>	Mon Feb 10 13:35:29 2020 +0000
committer	Andrew Walbran <qwandor@google.com>	Tue Apr 07 18:03:25 2020 +0100
tree	b1262fe1e51c20c626ae17eeadcd0c3dcf6b686d
parent	933fa55b88a868cc2c035906ae8a57a6889830cc [diff]