feat: disable alignment check for EL0 partitions

Relax hw alignment check specifically for (S-)EL0 partitions when
Hafnium runs with VHE enabled. EL1 partitions have a specific control
for EL1 and EL0 with respect to alignment check.
Create a hyp_state structure (from already defined flying registers)
within the vCPU context to hold the Hypervisor EL2 static configuration
applied when a vCPU runs. This state is switched back and forth when
running the Hypervisor or the VM.
Add SCTLR_EL2 to this context. An EL0 partition context is initialized
with SCTLR_EL2.A=0 such that alignment check is disabled when EL0 runs
in the EL2&0 translation regime. SCTLR_EL2.A is set back when returning
to the Hypervisor such that Hypervisor execution runs with aligment
check enabled at EL2.
Remove HCR_EL2 saving from vCPU exit path provided this register state
is static and doesn't change while a vCPU runs.
The rationale for such change is to permit running upstream SW stacks
such as the EDKII/StandaloneMm [1] for which default build assumes
unaligned accesses are permitted. Similar query exists for running
Trusted Services on top of Hafnium [2].

[1] https://github.com/tianocore/edk2/tree/master/StandaloneMmPkg
[2] https://trusted-services.readthedocs.io/en/integration/

Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
Change-Id: I2906f4c712425fcfb31adbf89e2e3b9ca293f181
diff --git a/src/arch/aarch64/hypervisor/cpu.c b/src/arch/aarch64/hypervisor/cpu.c
index c09c74c..5e025b5 100644
--- a/src/arch/aarch64/hypervisor/cpu.c
+++ b/src/arch/aarch64/hypervisor/cpu.c
@@ -108,7 +108,9 @@
 		}
 	}
 
-	r->hcr_el2 = get_hcr_el2_value(vm_id, vcpu->vm->el0_partition);
+	r->hyp_state.hcr_el2 =
+		get_hcr_el2_value(vm_id, vcpu->vm->el0_partition);
+	r->hyp_state.sctlr_el2 = get_sctlr_el2_value(vcpu->vm->el0_partition);
 	r->lazy.cnthctl_el2 = cnthctl;
 	if (vcpu->vm->el0_partition) {
 		CHECK(has_vhe_support());
@@ -118,10 +120,11 @@
 		 * are ignored and treated as 0. There is no need to mask the
 		 * VMID (used as asid) to only 8 bits.
 		 */
-		r->ttbr0_el2 = pa_addr(table) | ((uint64_t)vm_id << 48);
+		r->hyp_state.ttbr0_el2 =
+			pa_addr(table) | ((uint64_t)vm_id << 48);
 		r->spsr = PSR_PE_MODE_EL0T;
 	} else {
-		r->ttbr0_el2 = read_msr(ttbr0_el2);
+		r->hyp_state.ttbr0_el2 = read_msr(ttbr0_el2);
 		r->lazy.vtcr_el2 = arch_mm_get_vtcr_el2();
 		r->lazy.vttbr_el2 = pa_addr(table) | ((uint64_t)vm_id << 48);
 #if SECURE_WORLD == 1
diff --git a/src/arch/aarch64/hypervisor/exceptions.S b/src/arch/aarch64/hypervisor/exceptions.S
index 642222d..aa24b6d 100644
--- a/src/arch/aarch64/hypervisor/exceptions.S
+++ b/src/arch/aarch64/hypervisor/exceptions.S
@@ -45,8 +45,6 @@
 	mrs x1, elr_el2
 	mrs x2, spsr_el2
 	stp x1, x2, [x18, #VCPU_REGS + 8 * 31]
-	mrs x1, hcr_el2
-	str x1, [x18, #VCPU_REGS + 8 * 33]
 .endm
 
 /**
@@ -286,7 +284,7 @@
 #if ENABLE_VHE
 	/* Check if VHE support is enabled, equivalent to has_vhe_support(). */
 	mrs x19, id_aa64mmfr1_el1
-	tst x19, #0xf00
+	tst x19, #(ID_AA64MMFR1_EL1_VH_MASK << ID_AA64MMFR1_EL1_VH_SHIFT)
 	b.ne vhe_save
 #endif
 
@@ -653,7 +651,7 @@
 #if ENABLE_VHE
 	/* Check if VHE support is enabled, equivalent to has_vhe_support(). */
 	mrs x19, id_aa64mmfr1_el1
-	tst x19, #0xf00
+	tst x19, #(ID_AA64MMFR1_EL1_VH_MASK << ID_AA64MMFR1_EL1_VH_SHIFT)
 	b.ne vhe_restore
 #endif
 
@@ -871,12 +869,13 @@
 	msr elr_el2, x1
 	msr spsr_el2, x2
 
-	ldr x1, [x0, #VCPU_REGS + 8 * 33]
+	ldp x1, x2, [x0, #VCPU_REGS + 8 * 33]
+	ldp x3, xzr, [x0, #VCPU_REGS + 8 * 35]
 	msr hcr_el2, x1
 	isb
 
-	ldr x1, [x0, #VCPU_REGS + 8 * 34]
-	msr ttbr0_el2, x1
+	msr ttbr0_el2, x2
+	msr sctlr_el2, x3
 	isb
 
 	/* Restore x0..x3, which we have used as scratch before. */
@@ -886,25 +885,27 @@
 
 #if ENABLE_VHE
 enable_vhe_tge:
-	/**
-	 * Switch to host mode ({E2H, TGE} = {1,1}) when VHE is enabled.
-	 * Note that E2H is always set when VHE is enabled.
-	 */
 	mrs x0, id_aa64mmfr1_el1
-	tst x0, #0xf00
+	tst x0, #(ID_AA64MMFR1_EL1_VH_MASK << ID_AA64MMFR1_EL1_VH_SHIFT)
 	b.eq 1f
-	orr x1, x1, #(1 << 27)
-	msr hcr_el2, x1
-	isb
 
 	/**
-	 * Switch to host page tables(ASID 0 tables).
+	 * Apply Hypervisor/SPMC configuration when VHE is enabled:
+	 * Switch to host mode (HCR_EL2.{E2H, TGE} = {1,1}).
+	 * Note that E2H is always set when VHE is enabled.
+	 * Enable alignment check (SCTLR_EL2.A).
+	 * Switch TTBR0_EL2 to host page tables (ASID 0 tables).
 	 */
 	adrp x0, arch_mm_config
 	add x0, x0, :lo12:arch_mm_config
-	ldr x0, [x0]
-	msr ttbr0_el2, x0
+	ldp x3, xzr, [x0]
+	ldp x1, x2, [x0, #0x18]
+	msr hcr_el2, x2
 	isb
-1:
-	ret
+
+	msr sctlr_el2, x1
+	msr ttbr0_el2, x3
+	isb
+
+1:	ret
 #endif
diff --git a/src/arch/aarch64/hypervisor/feature_id.c b/src/arch/aarch64/hypervisor/feature_id.c
index ed3bf8f..57f3262 100644
--- a/src/arch/aarch64/hypervisor/feature_id.c
+++ b/src/arch/aarch64/hypervisor/feature_id.c
@@ -175,7 +175,7 @@
 		~(ID_AA64MMFR1_EL1_VH_MASK << ID_AA64MMFR1_EL1_VH_SHIFT);
 
 	if (features & HF_FEATURE_RAS) {
-		regs->hcr_el2 |= HCR_EL2_TERR;
+		regs->hyp_state.hcr_el2 |= HCR_EL2_TERR;
 		vm->arch.tid3_masks.id_aa64mmfr1_el1 &=
 			~ID_AA64MMFR1_EL1_SPEC_SEI;
 		vm->arch.tid3_masks.id_aa64pfr0_el1 &= ~ID_AA64PFR0_EL1_RAS;
@@ -221,14 +221,14 @@
 	}
 
 	if (features & HF_FEATURE_LOR) {
-		regs->hcr_el2 |= HCR_EL2_TLOR;
+		regs->hyp_state.hcr_el2 |= HCR_EL2_TLOR;
 
 		vm->arch.tid3_masks.id_aa64mmfr1_el1 &= ~ID_AA64MMFR1_EL1_LO;
 	}
 
 	if (features & HF_FEATURE_PAUTH) {
 		/* APK and API bits *enable* trapping when cleared. */
-		regs->hcr_el2 &= ~(HCR_EL2_APK | HCR_EL2_API);
+		regs->hyp_state.hcr_el2 &= ~(HCR_EL2_APK | HCR_EL2_API);
 
 		vm->arch.tid3_masks.id_aa64isar1_el1 &= ~ID_AA64ISAR1_EL1_GPI;
 		vm->arch.tid3_masks.id_aa64isar1_el1 &= ~ID_AA64ISAR1_EL1_GPA;
diff --git a/src/arch/aarch64/hypervisor/handler.c b/src/arch/aarch64/hypervisor/handler.c
index 68a1777..fb7e549 100644
--- a/src/arch/aarch64/hypervisor/handler.c
+++ b/src/arch/aarch64/hypervisor/handler.c
@@ -271,9 +271,9 @@
 static void set_virtual_irq(struct arch_regs *r, bool enable)
 {
 	if (enable) {
-		r->hcr_el2 |= HCR_EL2_VI;
+		r->hyp_state.hcr_el2 |= HCR_EL2_VI;
 	} else {
-		r->hcr_el2 &= ~HCR_EL2_VI;
+		r->hyp_state.hcr_el2 &= ~HCR_EL2_VI;
 	}
 }
 
@@ -282,14 +282,15 @@
  */
 static void set_virtual_irq_current(bool enable)
 {
-	uintreg_t hcr_el2 = current()->regs.hcr_el2;
+	struct vcpu *vcpu = current();
+	uintreg_t hcr_el2 = vcpu->regs.hyp_state.hcr_el2;
 
 	if (enable) {
 		hcr_el2 |= HCR_EL2_VI;
 	} else {
 		hcr_el2 &= ~HCR_EL2_VI;
 	}
-	current()->regs.hcr_el2 = hcr_el2;
+	vcpu->regs.hyp_state.hcr_el2 = hcr_el2;
 }
 
 /**
@@ -299,9 +300,9 @@
 static void set_virtual_fiq(struct arch_regs *r, bool enable)
 {
 	if (enable) {
-		r->hcr_el2 |= HCR_EL2_VF;
+		r->hyp_state.hcr_el2 |= HCR_EL2_VF;
 	} else {
-		r->hcr_el2 &= ~HCR_EL2_VF;
+		r->hyp_state.hcr_el2 &= ~HCR_EL2_VF;
 	}
 }
 
@@ -310,14 +311,15 @@
  */
 static void set_virtual_fiq_current(bool enable)
 {
-	uintreg_t hcr_el2 = current()->regs.hcr_el2;
+	struct vcpu *vcpu = current();
+	uintreg_t hcr_el2 = vcpu->regs.hyp_state.hcr_el2;
 
 	if (enable) {
 		hcr_el2 |= HCR_EL2_VF;
 	} else {
 		hcr_el2 &= ~HCR_EL2_VF;
 	}
-	current()->regs.hcr_el2 = hcr_el2;
+	vcpu->regs.hyp_state.hcr_el2 = hcr_el2;
 }
 
 #if SECURE_WORLD == 1
diff --git a/src/arch/aarch64/inc/hf/arch/types.h b/src/arch/aarch64/inc/hf/arch/types.h
index 17d1a2f..64824ba 100644
--- a/src/arch/aarch64/inc/hf/arch/types.h
+++ b/src/arch/aarch64/inc/hf/arch/types.h
@@ -79,8 +79,13 @@
 	uintreg_t r[NUM_GP_REGS];
 	uintreg_t pc;
 	uintreg_t spsr;
-	uintreg_t hcr_el2;
-	uintreg_t ttbr0_el2;
+
+	/* Hypervisor configuration while a vCPU runs. */
+	struct {
+		uintreg_t hcr_el2;
+		uintreg_t ttbr0_el2;
+		uintreg_t sctlr_el2;
+	} hyp_state;
 
 	/*
 	 * System registers.
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index 8ee65ca..487ae35 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -886,7 +886,7 @@
 #endif
 				    (0xff << (8 * STAGE1_NORMALINDX)),
 
-		.sctlr_el2 = get_sctlr_el2_value(),
+		.sctlr_el2 = get_sctlr_el2_value(false),
 		.vstcr_el2 = (1U << 31) |	    /* RES1. */
 			     (0 << 30) |	    /* SA. */
 			     (0 << 29) |	    /* SW. */
diff --git a/src/arch/aarch64/sysregs.c b/src/arch/aarch64/sysregs.c
index 64d0bba..9c18e5d 100644
--- a/src/arch/aarch64/sysregs.c
+++ b/src/arch/aarch64/sysregs.c
@@ -168,7 +168,7 @@
 /**
  * Returns the value for SCTLR_EL2 for the CPU.
  */
-uintreg_t get_sctlr_el2_value(void)
+uintreg_t get_sctlr_el2_value(bool is_el0_partition)
 {
 	uintreg_t sctlr_el2_value = 0;
 
@@ -182,7 +182,14 @@
 
 	/* MMU-related bits. */
 	sctlr_el2_value |= SCTLR_EL2_M;
-	sctlr_el2_value |= SCTLR_EL2_A;
+
+	/*
+	 * Alignment check enabled, but in the case of an EL0 partition
+	 * with VHE enabled.
+	 */
+	if (!(has_vhe_support() && is_el0_partition)) {
+		sctlr_el2_value |= SCTLR_EL2_A;
+	}
 	sctlr_el2_value |= SCTLR_EL2_C;
 	sctlr_el2_value |= SCTLR_EL2_SA;
 	sctlr_el2_value |= SCTLR_EL2_I;
diff --git a/src/arch/aarch64/sysregs.h b/src/arch/aarch64/sysregs.h
index 273c33e..47014a0 100644
--- a/src/arch/aarch64/sysregs.h
+++ b/src/arch/aarch64/sysregs.h
@@ -24,7 +24,7 @@
 uintreg_t get_cptr_el2_value(void);
 
 /** SCTLR_EL2 */
-uintreg_t get_sctlr_el2_value(void);
+uintreg_t get_sctlr_el2_value(bool is_el0_partition);
 
 /**
  * Branch Target Identification mechanism support in AArch64 state.