fix(ffa): add RX buffer release to FFA_MSG_WAIT

According to FF-A spec (since FF-A v1.0), an SP call to FFA_MSG_WAIT
should release the SP's RX buffer, if previously full. Hafnium
implementation did not previously enable this transfer.

Signed-off-by: Kathleen Capella <kathleen.capella@arm.com>
Change-Id: If0e49e7f20e48ae5a48b161e92a67a96ac0680e7
diff --git a/src/api.c b/src/api.c
index 4c4e8fa..18d54e4 100644
--- a/src/api.c
+++ b/src/api.c
@@ -1109,6 +1109,24 @@
 	return true;
 }
 
+static void api_ffa_msg_wait_rx_release(struct vcpu *current)
+{
+	struct vm_locked vm_locked;
+
+	vm_locked = plat_ffa_vm_find_locked(current->vm->id);
+	if (vm_locked.vm == NULL) {
+		return;
+	}
+
+	api_release_mailbox(vm_locked);
+
+	if (vm_locked.vm->mailbox.state != MAILBOX_STATE_EMPTY) {
+		dlog_warning("Mailbox not released to producer\n");
+	}
+
+	vm_unlock(&vm_locked);
+}
+
 struct ffa_value api_ffa_msg_wait(struct vcpu *current, struct vcpu **next,
 				  struct ffa_value *args)
 {
@@ -1142,8 +1160,18 @@
 	       next_state == VCPU_STATE_WAITING);
 
 	ret = plat_ffa_msg_wait_prepare(current_locked, next);
+
+	/*
+	 * To maintain partial ordering of locks, release vCPU lock before
+	 * releasing the VM's RX buffer, a process which requires locking the
+	 * VM.
+	 */
 out:
 	vcpu_unlock(&current_locked);
+
+	if (ret.func != FFA_ERROR_32) {
+		api_ffa_msg_wait_rx_release(current);
+	}
 	return ret;
 }