Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore
index 4a46fab..2580519 100644
--- a/arch/x86/boot/compressed/.gitignore
+++ b/arch/x86/boot/compressed/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
relocs
vmlinux.bin.all
vmlinux.relocs
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 292b5bc..bf91e0a 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -17,49 +17,60 @@
# (see scripts/Makefile.lib size_append)
# compressed vmlinux.bin.all + u32 size of vmlinux.bin.all
+# Sanitizer runtimes are unavailable and cannot be linked for early boot code.
KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD := y
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
KCOV_INSTRUMENT := n
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
- vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
+ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst
-KBUILD_CFLAGS := -m$(BITS) -O2
-KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
+# CLANG_FLAGS must come before any cc-disable-warning or cc-option calls in
+# case of cross compiling, as it has the '--target=' flag, which is needed to
+# avoid errors with '-march=i386', and future flags may depend on the target to
+# be valid.
+KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS)
+KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
cflags-$(CONFIG_X86_32) := -march=i386
-cflags-$(CONFIG_X86_64) := -mcmodel=small
+cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += -mno-mmx -mno-sse
-KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
-KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
+KBUILD_CFLAGS += -ffreestanding
+KBUILD_CFLAGS += -fno-stack-protector
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
KBUILD_CFLAGS += -Wno-pointer-sign
+KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS += -D__DISABLE_EXPORTS
# Disable relocation relaxation in case the link is not PIE.
KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
+KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
+
+# sev-es.c indirectly inludes inat-table.h which is generated during
+# compilation and stored in $(objtree). Add the directory to the includes so
+# that the compiler finds it even with out-of-tree builds (make O=/some/path).
+CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
UBSAN_SANITIZE :=n
KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
+KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info)
# Compressed kernel should be built as PIE since it may be loaded at any
# address by the bootloader.
-ifeq ($(CONFIG_X86_32),y)
-KBUILD_LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
-else
-# To build 64-bit compressed kernel as PIE, we disable relocation
-# overflow check to avoid relocation overflow error with a new linker
-# command-line option, -z noreloc-overflow.
-KBUILD_LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
- && echo "-z noreloc-overflow -pie --no-dynamic-linker")
+LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker)
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
endif
-LDFLAGS_vmlinux := -T
+LDFLAGS_vmlinux += -T
-hostprogs-y := mkpiggy
+hostprogs := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
@@ -74,50 +85,27 @@
$(obj)/misc.o: $(obj)/../voffset.h
-vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
- $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
+vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o \
+ $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
$(obj)/piggy.o $(obj)/cpuflags.o
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
- vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o
+ vmlinux-objs-y += $(obj)/ident_map_64.o
+ vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
vmlinux-objs-y += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
+ vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o
endif
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
-$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
-
-vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
- $(objtree)/drivers/firmware/efi/libstub/lib.a
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
+efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
-# The compressed kernel is built with -fPIC/-fPIE so that a boot loader
-# can place it anywhere in memory and it will still run. However, since
-# it is executed as-is without any ELF relocation processing performed
-# (and has already had all relocation sections stripped from the binary),
-# none of the code can use data relocations (e.g. static assignments of
-# pointer values), since they will be meaningless at runtime. This check
-# will refuse to link the vmlinux if any of these relocations are found.
-quiet_cmd_check_data_rel = DATAREL $@
-define cmd_check_data_rel
- for obj in $(filter %.o,$^); do \
- $(READELF) -S $$obj | grep -qF .rel.local && { \
- echo "error: $$obj has data relocations!" >&2; \
- exit 1; \
- } || true; \
- done
-endef
-
-# We need to run two commands under "if_changed", so merge them into a
-# single invocation.
-quiet_cmd_check-and-link-vmlinux = LD $@
- cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld)
-
-$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
- $(call if_changed,check-and-link-vmlinux)
+$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE
+ $(call if_changed,ld)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
@@ -146,6 +134,8 @@
$(call if_changed,lzo)
$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lz4)
+$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,zstd22)
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
@@ -153,6 +143,7 @@
suffix-$(CONFIG_KERNEL_XZ) := xz
suffix-$(CONFIG_KERNEL_LZO) := lzo
suffix-$(CONFIG_KERNEL_LZ4) := lz4
+suffix-$(CONFIG_KERNEL_ZSTD) := zst
quiet_cmd_mkpiggy = MKPIGGY $@
cmd_mkpiggy = $(obj)/mkpiggy $< > $@
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index ef2ad72..8bcbcee 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -280,9 +280,9 @@
*/
#define MAX_ADDR_LEN 19
-static acpi_physical_address get_cmdline_acpi_rsdp(void)
+static unsigned long get_cmdline_acpi_rsdp(void)
{
- acpi_physical_address addr = 0;
+ unsigned long addr = 0;
#ifdef CONFIG_KEXEC
char val[MAX_ADDR_LEN] = { };
@@ -292,7 +292,7 @@
if (ret < 0)
return 0;
- if (kstrtoull(val, 16, &addr))
+ if (boot_kstrtoul(val, 16, &addr))
return 0;
#endif
return addr;
@@ -314,7 +314,6 @@
* different ideas about whether to trust a command-line parameter.
*/
rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
-
if (!rsdp)
rsdp = (struct acpi_table_rsdp *)(long)
boot_params->acpi_rsdp_addr;
diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c
index 6448a81..0cc1323 100644
--- a/arch/x86/boot/compressed/cpuflags.c
+++ b/arch/x86/boot/compressed/cpuflags.c
@@ -1,6 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#ifdef CONFIG_RANDOMIZE_BASE
-
#include "../cpuflags.c"
bool has_cpuflag(int flag)
@@ -9,5 +7,3 @@
return test_bit(flag, cpu.flags);
}
-
-#endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
deleted file mode 100644
index 82bc60c..0000000
--- a/arch/x86/boot/compressed/eboot.c
+++ /dev/null
@@ -1,926 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-/* -----------------------------------------------------------------------
- *
- * Copyright 2011 Intel Corporation; author Matt Fleming
- *
- * ----------------------------------------------------------------------- */
-
-#include <linux/efi.h>
-#include <linux/pci.h>
-
-#include <asm/efi.h>
-#include <asm/e820/types.h>
-#include <asm/setup.h>
-#include <asm/desc.h>
-#include <asm/boot.h>
-
-#include "../string.h"
-#include "eboot.h"
-
-static efi_system_table_t *sys_table;
-
-static struct efi_config *efi_early;
-
-__pure const struct efi_config *__efi_early(void)
-{
- return efi_early;
-}
-
-#define BOOT_SERVICES(bits) \
-static void setup_boot_services##bits(struct efi_config *c) \
-{ \
- efi_system_table_##bits##_t *table; \
- \
- table = (typeof(table))sys_table; \
- \
- c->runtime_services = table->runtime; \
- c->boot_services = table->boottime; \
- c->text_output = table->con_out; \
-}
-BOOT_SERVICES(32);
-BOOT_SERVICES(64);
-
-void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
-{
- efi_call_proto(efi_simple_text_output_protocol, output_string,
- efi_early->text_output, str);
-}
-
-static efi_status_t
-preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
-{
- struct pci_setup_rom *rom = NULL;
- efi_status_t status;
- unsigned long size;
- uint64_t romsize;
- void *romimage;
-
- /*
- * Some firmware images contain EFI function pointers at the place where
- * the romimage and romsize fields are supposed to be. Typically the EFI
- * code is mapped at high addresses, translating to an unrealistically
- * large romsize. The UEFI spec limits the size of option ROMs to 16
- * MiB so we reject any ROMs over 16 MiB in size to catch this.
- */
- romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol,
- romimage, pci);
- romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci);
- if (!romimage || !romsize || romsize > SZ_16M)
- return EFI_INVALID_PARAMETER;
-
- size = romsize + sizeof(*rom);
-
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate memory for 'rom'\n");
- return status;
- }
-
- memset(rom, 0, sizeof(*rom));
-
- rom->data.type = SETUP_PCI;
- rom->data.len = size - sizeof(struct setup_data);
- rom->data.next = 0;
- rom->pcilen = pci->romsize;
- *__rom = rom;
-
- status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
- EfiPciIoWidthUint16, PCI_VENDOR_ID, 1,
- &rom->vendor);
-
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to read rom->vendor\n");
- goto free_struct;
- }
-
- status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
- EfiPciIoWidthUint16, PCI_DEVICE_ID, 1,
- &rom->devid);
-
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to read rom->devid\n");
- goto free_struct;
- }
-
- status = efi_call_proto(efi_pci_io_protocol, get_location, pci,
- &rom->segment, &rom->bus, &rom->device,
- &rom->function);
-
- if (status != EFI_SUCCESS)
- goto free_struct;
-
- memcpy(rom->romdata, romimage, romsize);
- return status;
-
-free_struct:
- efi_call_early(free_pool, rom);
- return status;
-}
-
-/*
- * There's no way to return an informative status from this function,
- * because any analysis (and printing of error messages) needs to be
- * done directly at the EFI function call-site.
- *
- * For example, EFI_INVALID_PARAMETER could indicate a bug or maybe we
- * just didn't find any PCI devices, but there's no way to tell outside
- * the context of the call.
- */
-static void setup_efi_pci(struct boot_params *params)
-{
- efi_status_t status;
- void **pci_handle = NULL;
- efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
- unsigned long size = 0;
- unsigned long nr_pci;
- struct setup_data *data;
- int i;
-
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- &pci_proto, NULL, &size, pci_handle);
-
- if (status == EFI_BUFFER_TOO_SMALL) {
- status = efi_call_early(allocate_pool,
- EFI_LOADER_DATA,
- size, (void **)&pci_handle);
-
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate memory for 'pci_handle'\n");
- return;
- }
-
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL, &pci_proto,
- NULL, &size, pci_handle);
- }
-
- if (status != EFI_SUCCESS)
- goto free_handle;
-
- data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
- while (data && data->next)
- data = (struct setup_data *)(unsigned long)data->next;
-
- nr_pci = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
- for (i = 0; i < nr_pci; i++) {
- efi_pci_io_protocol_t *pci = NULL;
- struct pci_setup_rom *rom;
-
- status = efi_call_early(handle_protocol,
- efi_is_64bit() ? ((u64 *)pci_handle)[i]
- : ((u32 *)pci_handle)[i],
- &pci_proto, (void **)&pci);
- if (status != EFI_SUCCESS || !pci)
- continue;
-
- status = preserve_pci_rom_image(pci, &rom);
- if (status != EFI_SUCCESS)
- continue;
-
- if (data)
- data->next = (unsigned long)rom;
- else
- params->hdr.setup_data = (unsigned long)rom;
-
- data = (struct setup_data *)rom;
- }
-
-free_handle:
- efi_call_early(free_pool, pci_handle);
-}
-
-static void retrieve_apple_device_properties(struct boot_params *boot_params)
-{
- efi_guid_t guid = APPLE_PROPERTIES_PROTOCOL_GUID;
- struct setup_data *data, *new;
- efi_status_t status;
- u32 size = 0;
- void *p;
-
- status = efi_call_early(locate_protocol, &guid, NULL, &p);
- if (status != EFI_SUCCESS)
- return;
-
- if (efi_table_attr(apple_properties_protocol, version, p) != 0x10000) {
- efi_printk(sys_table, "Unsupported properties proto version\n");
- return;
- }
-
- efi_call_proto(apple_properties_protocol, get_all, p, NULL, &size);
- if (!size)
- return;
-
- do {
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- size + sizeof(struct setup_data), &new);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate memory for 'properties'\n");
- return;
- }
-
- status = efi_call_proto(apple_properties_protocol, get_all, p,
- new->data, &size);
-
- if (status == EFI_BUFFER_TOO_SMALL)
- efi_call_early(free_pool, new);
- } while (status == EFI_BUFFER_TOO_SMALL);
-
- new->type = SETUP_APPLE_PROPERTIES;
- new->len = size;
- new->next = 0;
-
- data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
- if (!data) {
- boot_params->hdr.setup_data = (unsigned long)new;
- } else {
- while (data->next)
- data = (struct setup_data *)(unsigned long)data->next;
- data->next = (unsigned long)new;
- }
-}
-
-static const efi_char16_t apple[] = L"Apple";
-
-static void setup_quirks(struct boot_params *boot_params)
-{
- efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
- efi_table_attr(efi_system_table, fw_vendor, sys_table);
-
- if (!memcmp(fw_vendor, apple, sizeof(apple))) {
- if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
- retrieve_apple_device_properties(boot_params);
- }
-}
-
-/*
- * See if we have Universal Graphics Adapter (UGA) protocol
- */
-static efi_status_t
-setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
-{
- efi_status_t status;
- u32 width, height;
- void **uga_handle = NULL;
- efi_uga_draw_protocol_t *uga = NULL, *first_uga;
- unsigned long nr_ugas;
- int i;
-
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- size, (void **)&uga_handle);
- if (status != EFI_SUCCESS)
- return status;
-
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- uga_proto, NULL, &size, uga_handle);
- if (status != EFI_SUCCESS)
- goto free_handle;
-
- height = 0;
- width = 0;
-
- first_uga = NULL;
- nr_ugas = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
- for (i = 0; i < nr_ugas; i++) {
- efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
- u32 w, h, depth, refresh;
- void *pciio;
- unsigned long handle = efi_is_64bit() ? ((u64 *)uga_handle)[i]
- : ((u32 *)uga_handle)[i];
-
- status = efi_call_early(handle_protocol, handle,
- uga_proto, (void **)&uga);
- if (status != EFI_SUCCESS)
- continue;
-
- pciio = NULL;
- efi_call_early(handle_protocol, handle, &pciio_proto, &pciio);
-
- status = efi_call_proto(efi_uga_draw_protocol, get_mode, uga,
- &w, &h, &depth, &refresh);
- if (status == EFI_SUCCESS && (!first_uga || pciio)) {
- width = w;
- height = h;
-
- /*
- * Once we've found a UGA supporting PCIIO,
- * don't bother looking any further.
- */
- if (pciio)
- break;
-
- first_uga = uga;
- }
- }
-
- if (!width && !height)
- goto free_handle;
-
- /* EFI framebuffer */
- si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
- si->lfb_depth = 32;
- si->lfb_width = width;
- si->lfb_height = height;
-
- si->red_size = 8;
- si->red_pos = 16;
- si->green_size = 8;
- si->green_pos = 8;
- si->blue_size = 8;
- si->blue_pos = 0;
- si->rsvd_size = 8;
- si->rsvd_pos = 24;
-
-free_handle:
- efi_call_early(free_pool, uga_handle);
-
- return status;
-}
-
-void setup_graphics(struct boot_params *boot_params)
-{
- efi_guid_t graphics_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
- struct screen_info *si;
- efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
- efi_status_t status;
- unsigned long size;
- void **gop_handle = NULL;
- void **uga_handle = NULL;
-
- si = &boot_params->screen_info;
- memset(si, 0, sizeof(*si));
-
- size = 0;
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- &graphics_proto, NULL, &size, gop_handle);
- if (status == EFI_BUFFER_TOO_SMALL)
- status = efi_setup_gop(NULL, si, &graphics_proto, size);
-
- if (status != EFI_SUCCESS) {
- size = 0;
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- &uga_proto, NULL, &size, uga_handle);
- if (status == EFI_BUFFER_TOO_SMALL)
- setup_uga(si, &uga_proto, size);
- }
-}
-
-/*
- * Because the x86 boot code expects to be passed a boot_params we
- * need to create one ourselves (usually the bootloader would create
- * one for us).
- *
- * The caller is responsible for filling out ->code32_start in the
- * returned boot_params.
- */
-struct boot_params *make_boot_params(struct efi_config *c)
-{
- struct boot_params *boot_params;
- struct apm_bios_info *bi;
- struct setup_header *hdr;
- efi_loaded_image_t *image;
- void *handle;
- efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
- int options_size = 0;
- efi_status_t status;
- char *cmdline_ptr;
- unsigned long ramdisk_addr;
- unsigned long ramdisk_size;
-
- efi_early = c;
- sys_table = (efi_system_table_t *)(unsigned long)efi_early->table;
- handle = (void *)(unsigned long)efi_early->image_handle;
-
- /* Check if we were booted by the EFI firmware */
- if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- return NULL;
-
- if (efi_is_64bit())
- setup_boot_services64(efi_early);
- else
- setup_boot_services32(efi_early);
-
- status = efi_call_early(handle_protocol, handle,
- &proto, (void *)&image);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
- return NULL;
- }
-
- status = efi_low_alloc(sys_table, 0x4000, 1,
- (unsigned long *)&boot_params);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate lowmem for boot params\n");
- return NULL;
- }
-
- memset(boot_params, 0x0, 0x4000);
-
- hdr = &boot_params->hdr;
- bi = &boot_params->apm_bios_info;
-
- /* Copy the second sector to boot_params */
- memcpy(&hdr->jump, image->image_base + 512, 512);
-
- /*
- * Fill out some of the header fields ourselves because the
- * EFI firmware loader doesn't load the first sector.
- */
- hdr->root_flags = 1;
- hdr->vid_mode = 0xffff;
- hdr->boot_flag = 0xAA55;
-
- hdr->type_of_loader = 0x21;
-
- /* Convert unicode cmdline to ascii */
- cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
- if (!cmdline_ptr)
- goto fail;
-
- hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
- /* Fill in upper bits of command line address, NOP on 32 bit */
- boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
-
- hdr->ramdisk_image = 0;
- hdr->ramdisk_size = 0;
-
- /* Clear APM BIOS info */
- memset(bi, 0, sizeof(*bi));
-
- status = efi_parse_options(cmdline_ptr);
- if (status != EFI_SUCCESS)
- goto fail2;
-
- status = handle_cmdline_files(sys_table, image,
- (char *)(unsigned long)hdr->cmd_line_ptr,
- "initrd=", hdr->initrd_addr_max,
- &ramdisk_addr, &ramdisk_size);
-
- if (status != EFI_SUCCESS &&
- hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) {
- efi_printk(sys_table, "Trying to load files to higher address\n");
- status = handle_cmdline_files(sys_table, image,
- (char *)(unsigned long)hdr->cmd_line_ptr,
- "initrd=", -1UL,
- &ramdisk_addr, &ramdisk_size);
- }
-
- if (status != EFI_SUCCESS)
- goto fail2;
- hdr->ramdisk_image = ramdisk_addr & 0xffffffff;
- hdr->ramdisk_size = ramdisk_size & 0xffffffff;
- boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32;
- boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32;
-
- return boot_params;
-
-fail2:
- efi_free(sys_table, options_size, hdr->cmd_line_ptr);
-fail:
- efi_free(sys_table, 0x4000, (unsigned long)boot_params);
-
- return NULL;
-}
-
-static void add_e820ext(struct boot_params *params,
- struct setup_data *e820ext, u32 nr_entries)
-{
- struct setup_data *data;
-
- e820ext->type = SETUP_E820_EXT;
- e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
- e820ext->next = 0;
-
- data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
- while (data && data->next)
- data = (struct setup_data *)(unsigned long)data->next;
-
- if (data)
- data->next = (unsigned long)e820ext;
- else
- params->hdr.setup_data = (unsigned long)e820ext;
-}
-
-static efi_status_t
-setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_size)
-{
- struct boot_e820_entry *entry = params->e820_table;
- struct efi_info *efi = ¶ms->efi_info;
- struct boot_e820_entry *prev = NULL;
- u32 nr_entries;
- u32 nr_desc;
- int i;
-
- nr_entries = 0;
- nr_desc = efi->efi_memmap_size / efi->efi_memdesc_size;
-
- for (i = 0; i < nr_desc; i++) {
- efi_memory_desc_t *d;
- unsigned int e820_type = 0;
- unsigned long m = efi->efi_memmap;
-
-#ifdef CONFIG_X86_64
- m |= (u64)efi->efi_memmap_hi << 32;
-#endif
-
- d = efi_early_memdesc_ptr(m, efi->efi_memdesc_size, i);
- switch (d->type) {
- case EFI_RESERVED_TYPE:
- case EFI_RUNTIME_SERVICES_CODE:
- case EFI_RUNTIME_SERVICES_DATA:
- case EFI_MEMORY_MAPPED_IO:
- case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
- case EFI_PAL_CODE:
- e820_type = E820_TYPE_RESERVED;
- break;
-
- case EFI_UNUSABLE_MEMORY:
- e820_type = E820_TYPE_UNUSABLE;
- break;
-
- case EFI_ACPI_RECLAIM_MEMORY:
- e820_type = E820_TYPE_ACPI;
- break;
-
- case EFI_LOADER_CODE:
- case EFI_LOADER_DATA:
- case EFI_BOOT_SERVICES_CODE:
- case EFI_BOOT_SERVICES_DATA:
- case EFI_CONVENTIONAL_MEMORY:
- e820_type = E820_TYPE_RAM;
- break;
-
- case EFI_ACPI_MEMORY_NVS:
- e820_type = E820_TYPE_NVS;
- break;
-
- case EFI_PERSISTENT_MEMORY:
- e820_type = E820_TYPE_PMEM;
- break;
-
- default:
- continue;
- }
-
- /* Merge adjacent mappings */
- if (prev && prev->type == e820_type &&
- (prev->addr + prev->size) == d->phys_addr) {
- prev->size += d->num_pages << 12;
- continue;
- }
-
- if (nr_entries == ARRAY_SIZE(params->e820_table)) {
- u32 need = (nr_desc - i) * sizeof(struct e820_entry) +
- sizeof(struct setup_data);
-
- if (!e820ext || e820ext_size < need)
- return EFI_BUFFER_TOO_SMALL;
-
- /* boot_params map full, switch to e820 extended */
- entry = (struct boot_e820_entry *)e820ext->data;
- }
-
- entry->addr = d->phys_addr;
- entry->size = d->num_pages << PAGE_SHIFT;
- entry->type = e820_type;
- prev = entry++;
- nr_entries++;
- }
-
- if (nr_entries > ARRAY_SIZE(params->e820_table)) {
- u32 nr_e820ext = nr_entries - ARRAY_SIZE(params->e820_table);
-
- add_e820ext(params, e820ext, nr_e820ext);
- nr_entries -= nr_e820ext;
- }
-
- params->e820_entries = (u8)nr_entries;
-
- return EFI_SUCCESS;
-}
-
-static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
- u32 *e820ext_size)
-{
- efi_status_t status;
- unsigned long size;
-
- size = sizeof(struct setup_data) +
- sizeof(struct e820_entry) * nr_desc;
-
- if (*e820ext) {
- efi_call_early(free_pool, *e820ext);
- *e820ext = NULL;
- *e820ext_size = 0;
- }
-
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- size, (void **)e820ext);
- if (status == EFI_SUCCESS)
- *e820ext_size = size;
-
- return status;
-}
-
-static efi_status_t allocate_e820(struct boot_params *params,
- struct setup_data **e820ext,
- u32 *e820ext_size)
-{
- unsigned long map_size, desc_size, buff_size;
- struct efi_boot_memmap boot_map;
- efi_memory_desc_t *map;
- efi_status_t status;
- __u32 nr_desc;
-
- boot_map.map = ↦
- boot_map.map_size = &map_size;
- boot_map.desc_size = &desc_size;
- boot_map.desc_ver = NULL;
- boot_map.key_ptr = NULL;
- boot_map.buff_size = &buff_size;
-
- status = efi_get_memory_map(sys_table, &boot_map);
- if (status != EFI_SUCCESS)
- return status;
-
- nr_desc = buff_size / desc_size;
-
- if (nr_desc > ARRAY_SIZE(params->e820_table)) {
- u32 nr_e820ext = nr_desc - ARRAY_SIZE(params->e820_table);
-
- status = alloc_e820ext(nr_e820ext, e820ext, e820ext_size);
- if (status != EFI_SUCCESS)
- return status;
- }
-
- return EFI_SUCCESS;
-}
-
-struct exit_boot_struct {
- struct boot_params *boot_params;
- struct efi_info *efi;
-};
-
-static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
- struct efi_boot_memmap *map,
- void *priv)
-{
- const char *signature;
- struct exit_boot_struct *p = priv;
-
- signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
- : EFI32_LOADER_SIGNATURE;
- memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
-
- p->efi->efi_systab = (unsigned long)sys_table_arg;
- p->efi->efi_memdesc_size = *map->desc_size;
- p->efi->efi_memdesc_version = *map->desc_ver;
- p->efi->efi_memmap = (unsigned long)*map->map;
- p->efi->efi_memmap_size = *map->map_size;
-
-#ifdef CONFIG_X86_64
- p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
- p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
-#endif
-
- return EFI_SUCCESS;
-}
-
-static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
-{
- unsigned long map_sz, key, desc_size, buff_size;
- efi_memory_desc_t *mem_map;
- struct setup_data *e820ext = NULL;
- __u32 e820ext_size = 0;
- efi_status_t status;
- __u32 desc_version;
- struct efi_boot_memmap map;
- struct exit_boot_struct priv;
-
- map.map = &mem_map;
- map.map_size = &map_sz;
- map.desc_size = &desc_size;
- map.desc_ver = &desc_version;
- map.key_ptr = &key;
- map.buff_size = &buff_size;
- priv.boot_params = boot_params;
- priv.efi = &boot_params->efi_info;
-
- status = allocate_e820(boot_params, &e820ext, &e820ext_size);
- if (status != EFI_SUCCESS)
- return status;
-
- /* Might as well exit boot services now */
- status = efi_exit_boot_services(sys_table, handle, &map, &priv,
- exit_boot_func);
- if (status != EFI_SUCCESS)
- return status;
-
- /* Historic? */
- boot_params->alt_mem_k = 32 * 1024;
-
- status = setup_e820(boot_params, e820ext, e820ext_size);
- if (status != EFI_SUCCESS)
- return status;
-
- return EFI_SUCCESS;
-}
-
-/*
- * On success we return a pointer to a boot_params structure, and NULL
- * on failure.
- */
-struct boot_params *
-efi_main(struct efi_config *c, struct boot_params *boot_params)
-{
- struct desc_ptr *gdt = NULL;
- struct setup_header *hdr = &boot_params->hdr;
- efi_status_t status;
- struct desc_struct *desc;
- void *handle;
- efi_system_table_t *_table;
- unsigned long cmdline_paddr;
-
- efi_early = c;
-
- _table = (efi_system_table_t *)(unsigned long)efi_early->table;
- handle = (void *)(unsigned long)efi_early->image_handle;
-
- sys_table = _table;
-
- /* Check if we were booted by the EFI firmware */
- if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- goto fail;
-
- if (efi_is_64bit())
- setup_boot_services64(efi_early);
- else
- setup_boot_services32(efi_early);
-
- /*
- * make_boot_params() may have been called before efi_main(), in which
- * case this is the second time we parse the cmdline. This is ok,
- * parsing the cmdline multiple times does not have side-effects.
- */
- cmdline_paddr = ((u64)hdr->cmd_line_ptr |
- ((u64)boot_params->ext_cmd_line_ptr << 32));
- efi_parse_options((char *)cmdline_paddr);
-
- /*
- * If the boot loader gave us a value for secure_boot then we use that,
- * otherwise we ask the BIOS.
- */
- if (boot_params->secure_boot == efi_secureboot_mode_unset)
- boot_params->secure_boot = efi_get_secureboot(sys_table);
-
- /* Ask the firmware to clear memory on unclean shutdown */
- efi_enable_reset_attack_mitigation(sys_table);
- efi_retrieve_tpm2_eventlog(sys_table);
-
- setup_graphics(boot_params);
-
- setup_efi_pci(boot_params);
-
- setup_quirks(boot_params);
-
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- sizeof(*gdt), (void **)&gdt);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate memory for 'gdt' structure\n");
- goto fail;
- }
-
- gdt->size = 0x800;
- status = efi_low_alloc(sys_table, gdt->size, 8,
- (unsigned long *)&gdt->address);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n");
- goto fail;
- }
-
- /*
- * If the kernel isn't already loaded at the preferred load
- * address, relocate it.
- */
- if (hdr->pref_address != hdr->code32_start) {
- unsigned long bzimage_addr = hdr->code32_start;
- status = efi_relocate_kernel(sys_table, &bzimage_addr,
- hdr->init_size, hdr->init_size,
- hdr->pref_address,
- hdr->kernel_alignment,
- LOAD_PHYSICAL_ADDR);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "efi_relocate_kernel() failed!\n");
- goto fail;
- }
-
- hdr->pref_address = hdr->code32_start;
- hdr->code32_start = bzimage_addr;
- }
-
- status = exit_boot(boot_params, handle);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "exit_boot() failed!\n");
- goto fail;
- }
-
- memset((char *)gdt->address, 0x0, gdt->size);
- desc = (struct desc_struct *)gdt->address;
-
- /* The first GDT is a dummy. */
- desc++;
-
- if (IS_ENABLED(CONFIG_X86_64)) {
- /* __KERNEL32_CS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
-
- desc++;
- } else {
- /* Second entry is unused on 32-bit */
- desc++;
- }
-
- /* __KERNEL_CS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
-
- if (IS_ENABLED(CONFIG_X86_64)) {
- desc->l = 1;
- desc->d = 0;
- } else {
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
- }
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
- desc++;
-
- /* __KERNEL_DS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
- desc++;
-
- if (IS_ENABLED(CONFIG_X86_64)) {
- /* Task segment value */
- desc->limit0 = 0x0000;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_TSS;
- desc->s = 0;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0x0;
- desc->avl = 0;
- desc->l = 0;
- desc->d = 0;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
- desc++;
- }
-
- asm volatile("cli");
- asm volatile ("lgdt %0" : : "m" (*gdt));
-
- return boot_params;
-fail:
- efi_printk(sys_table, "efi_main() failed!\n");
-
- return NULL;
-}
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
deleted file mode 100644
index 8297387..0000000
--- a/arch/x86/boot/compressed/eboot.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef BOOT_COMPRESSED_EBOOT_H
-#define BOOT_COMPRESSED_EBOOT_H
-
-#define SEG_TYPE_DATA (0 << 3)
-#define SEG_TYPE_READ_WRITE (1 << 1)
-#define SEG_TYPE_CODE (1 << 3)
-#define SEG_TYPE_EXEC_READ (1 << 1)
-#define SEG_TYPE_TSS ((1 << 3) | (1 << 0))
-#define SEG_OP_SIZE_32BIT (1 << 0)
-#define SEG_GRANULARITY_4KB (1 << 0)
-
-#define DESC_TYPE_CODE_DATA (1 << 0)
-
-typedef struct {
- u32 get_mode;
- u32 set_mode;
- u32 blt;
-} efi_uga_draw_protocol_32_t;
-
-typedef struct {
- u64 get_mode;
- u64 set_mode;
- u64 blt;
-} efi_uga_draw_protocol_64_t;
-
-typedef struct {
- void *get_mode;
- void *set_mode;
- void *blt;
-} efi_uga_draw_protocol_t;
-
-#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S
deleted file mode 100644
index 257e341..0000000
--- a/arch/x86/boot/compressed/efi_stub_32.S
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * EFI call stub for IA32.
- *
- * This stub allows us to make EFI calls in physical mode with interrupts
- * turned off. Note that this implementation is different from the one in
- * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
- * mode at this point.
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-
-/*
- * efi_call_phys(void *, ...) is a function with variable parameters.
- * All the callers of this function assure that all the parameters are 4-bytes.
- */
-
-/*
- * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
- * So we'd better save all of them at the beginning of this function and restore
- * at the end no matter how many we use, because we can not assure EFI runtime
- * service functions will comply with gcc calling convention, too.
- */
-
-.text
-ENTRY(efi_call_phys)
- /*
- * 0. The function can only be called in Linux kernel. So CS has been
- * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
- * the values of these registers are the same. And, the corresponding
- * GDT entries are identical. So I will do nothing about segment reg
- * and GDT, but change GDT base register in prelog and epilog.
- */
-
- /*
- * 1. Because we haven't been relocated by this point we need to
- * use relative addressing.
- */
- call 1f
-1: popl %edx
- subl $1b, %edx
-
- /*
- * 2. Now on the top of stack is the return
- * address in the caller of efi_call_phys(), then parameter 1,
- * parameter 2, ..., param n. To make things easy, we save the return
- * address of efi_call_phys in a global variable.
- */
- popl %ecx
- movl %ecx, saved_return_addr(%edx)
- /* get the function pointer into ECX*/
- popl %ecx
- movl %ecx, efi_rt_function_ptr(%edx)
-
- /*
- * 3. Call the physical function.
- */
- call *%ecx
-
- /*
- * 4. Balance the stack. And because EAX contain the return value,
- * we'd better not clobber it. We need to calculate our address
- * again because %ecx and %edx are not preserved across EFI function
- * calls.
- */
- call 1f
-1: popl %edx
- subl $1b, %edx
-
- movl efi_rt_function_ptr(%edx), %ecx
- pushl %ecx
-
- /*
- * 10. Push the saved return address onto the stack and return.
- */
- movl saved_return_addr(%edx), %ecx
- pushl %ecx
- ret
-ENDPROC(efi_call_phys)
-.previous
-
-.data
-saved_return_addr:
- .long 0
-efi_rt_function_ptr:
- .long 0
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
deleted file mode 100644
index 99494df..0000000
--- a/arch/x86/boot/compressed/efi_stub_64.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#include <asm/segment.h>
-#include <asm/msr.h>
-#include <asm/processor-flags.h>
-
-#include "../../platform/efi/efi_stub_64.S"
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
index bff9ab7..c4bb0f9 100644
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -10,7 +10,7 @@
* needs to be able to service interrupts.
*
* On the plus side, we don't have to worry about mangling 64-bit
- * addresses into 32-bits because we're executing with an identify
+ * addresses into 32-bits because we're executing with an identity
* mapped pagetable and haven't transitioned to 64-bit virtual addresses
* yet.
*/
@@ -23,16 +23,11 @@
.code64
.text
-ENTRY(efi64_thunk)
+SYM_FUNC_START(__efi64_thunk)
push %rbp
push %rbx
- subq $8, %rsp
- leaq efi_exit32(%rip), %rax
- movl %eax, 4(%rsp)
- leaq efi_gdt64(%rip), %rax
- movl %eax, (%rsp)
- movl %eax, 2(%rax) /* Fixup the gdt base address */
+ leaq 1f(%rip), %rbp
movl %ds, %eax
push %rax
@@ -48,32 +43,32 @@
movl %esi, 0x0(%rsp)
movl %edx, 0x4(%rsp)
movl %ecx, 0x8(%rsp)
- movq %r8, %rsi
- movl %esi, 0xc(%rsp)
- movq %r9, %rsi
- movl %esi, 0x10(%rsp)
+ movl %r8d, 0xc(%rsp)
+ movl %r9d, 0x10(%rsp)
- sgdt save_gdt(%rip)
-
- leaq 1f(%rip), %rbx
- movq %rbx, func_rt_ptr(%rip)
+ leaq 0x14(%rsp), %rbx
+ sgdt (%rbx)
/*
* Switch to gdt with 32-bit segments. This is the firmware GDT
* that was installed when the kernel started executing. This
* pointer was saved at the EFI stub entry point in head_64.S.
+ *
+ * Pass the saved DS selector to the 32-bit code, and use far return to
+ * restore the saved CS selector.
*/
leaq efi32_boot_gdt(%rip), %rax
lgdt (%rax)
- pushq $__KERNEL_CS
+ movzwl efi32_boot_ds(%rip), %edx
+ movzwq efi32_boot_cs(%rip), %rax
+ pushq %rax
leaq efi_enter32(%rip), %rax
pushq %rax
lretq
1: addq $32, %rsp
-
- lgdt save_gdt(%rip)
+ movq %rdi, %rax
pop %rbx
movl %ebx, %ss
@@ -81,30 +76,21 @@
movl %ebx, %es
pop %rbx
movl %ebx, %ds
+ /* Clear out 32-bit selector from FS and GS */
+ xorl %ebx, %ebx
+ movl %ebx, %fs
+ movl %ebx, %gs
/*
* Convert 32-bit status code into 64-bit.
*/
- test %rax, %rax
- jz 1f
- movl %eax, %ecx
- andl $0x0fffffff, %ecx
- andl $0xf0000000, %eax
- shl $32, %rax
- or %rcx, %rax
-1:
- addq $8, %rsp
+ roll $1, %eax
+ rorq $1, %rax
+
pop %rbx
pop %rbp
ret
-ENDPROC(efi64_thunk)
-
-ENTRY(efi_exit32)
- movq func_rt_ptr(%rip), %rax
- push %rax
- mov %rdi, %rax
- ret
-ENDPROC(efi_exit32)
+SYM_FUNC_END(__efi64_thunk)
.code32
/*
@@ -112,11 +98,13 @@
*
* The stack should represent the 32-bit calling convention.
*/
-ENTRY(efi_enter32)
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %ss
+SYM_FUNC_START_LOCAL(efi_enter32)
+ /* Load firmware selector into data and stack segment registers */
+ movl %edx, %ds
+ movl %edx, %es
+ movl %edx, %fs
+ movl %edx, %gs
+ movl %edx, %ss
/* Reload pgtables */
movl %cr3, %eax
@@ -144,9 +132,7 @@
*/
cli
- movl 56(%esp), %eax
- movl %eax, 2(%eax)
- lgdtl (%eax)
+ lgdtl (%ebx)
movl %cr4, %eax
btsl $(X86_CR4_PAE_BIT), %eax
@@ -163,35 +149,27 @@
xorl %eax, %eax
lldt %ax
- movl 60(%esp), %eax
pushl $__KERNEL_CS
- pushl %eax
+ pushl %ebp
/* Enable paging */
movl %cr0, %eax
btsl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
lret
-ENDPROC(efi_enter32)
+SYM_FUNC_END(efi_enter32)
.data
.balign 8
- .global efi32_boot_gdt
-efi32_boot_gdt: .word 0
- .quad 0
-
-save_gdt: .word 0
- .quad 0
-func_rt_ptr: .quad 0
-
- .global efi_gdt64
-efi_gdt64:
- .word efi_gdt64_end - efi_gdt64
- .long 0 /* Filled out by user */
+SYM_DATA_START(efi32_boot_gdt)
.word 0
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x00af9a000000ffff /* __KERNEL_CS */
- .quad 0x00cf92000000ffff /* __KERNEL_DS */
- .quad 0x0080890000000000 /* TS descriptor */
- .quad 0x0000000000000000 /* TS continued */
-efi_gdt64_end:
+ .quad 0
+SYM_DATA_END(efi32_boot_gdt)
+
+SYM_DATA_START(efi32_boot_cs)
+ .word 0
+SYM_DATA_END(efi32_boot_cs)
+
+SYM_DATA_START(efi32_boot_ds)
+ .word 0
+SYM_DATA_END(efi32_boot_ds)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index d7c0fcc..659fad5 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -33,52 +33,19 @@
#include <asm/bootparam.h>
/*
- * The 32-bit x86 assembler in binutils 2.26 will generate R_386_GOT32X
- * relocation to get the symbol address in PIC. When the compressed x86
- * kernel isn't built as PIC, the linker optimizes R_386_GOT32X
- * relocations to their fixed symbol addresses. However, when the
- * compressed x86 kernel is loaded at a different address, it leads
- * to the following load failure:
- *
- * Failed to allocate space for phdrs
- *
- * during the decompression stage.
- *
- * If the compressed x86 kernel is relocatable at run-time, it should be
- * compiled with -fPIE, instead of -fPIC, if possible and should be built as
- * Position Independent Executable (PIE) so that linker won't optimize
- * R_386_GOT32X relocation to its fixed symbol address. Older
- * linkers generate R_386_32 relocations against locally defined symbols,
- * _bss, _ebss, _got, _egot and _end, in PIE. It isn't wrong, just less
- * optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle
- * R_386_32 relocations when relocating the kernel. To generate
- * R_386_RELATIVE relocations, we mark _bss, _ebss, _got, _egot and _end as
- * hidden:
+ * These symbols needed to be marked as .hidden to prevent the BFD linker from
+ * generating R_386_32 (rather than R_386_RELATIVE) relocations for them when
+ * the 32-bit compressed kernel is linked as PIE. This is no longer necessary,
+ * but it doesn't hurt to keep them .hidden.
*/
.hidden _bss
.hidden _ebss
- .hidden _got
- .hidden _egot
.hidden _end
__HEAD
-ENTRY(startup_32)
+SYM_FUNC_START(startup_32)
cld
- /*
- * Test KEEP_SEGMENTS flag to see if the bootloader is asking
- * us to not reload segments
- */
- testb $KEEP_SEGMENTS, BP_loadflags(%esi)
- jnz 1f
-
cli
- movl $__BOOT_DS, %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %fs
- movl %eax, %gs
- movl %eax, %ss
-1:
/*
* Calculate the delta between where we were compiled to run
@@ -90,17 +57,46 @@
*/
leal (BP_scratch+4)(%esi), %esp
call 1f
-1: popl %ebp
- subl $1b, %ebp
+1: popl %edx
+ addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %edx
+
+ /* Load new GDT */
+ leal gdt@GOTOFF(%edx), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
+ /* Load segment registers with our descriptors */
+ movl $__BOOT_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
/*
- * %ebp contains the address we are loaded at by the boot loader and %ebx
- * contains the address where we should move the kernel image temporarily
- * for safe in-place decompression.
+ * %edx contains the address we are loaded at by the boot loader (plus the
+ * offset to the GOT). The below code calculates %ebx to be the address where
+ * we should move the kernel image temporarily for safe in-place decompression
+ * (again, plus the offset to the GOT).
+ *
+ * %ebp is calculated to be the address that the kernel will be decompressed to.
*/
#ifdef CONFIG_RELOCATABLE
- movl %ebp, %ebx
+ leal startup_32@GOTOFF(%edx), %ebx
+
+#ifdef CONFIG_EFI_STUB
+/*
+ * If we were loaded via the EFI LoadImage service, startup_32() will be at an
+ * offset to the start of the space allocated for the image. efi_pe_entry() will
+ * set up image_offset to tell us where the image actually starts, so that we
+ * can use the full available buffer.
+ * image_offset = startup_32 - image_base
+ * Otherwise image_offset will be zero and has no effect on the calculations.
+ */
+ subl image_offset@GOTOFF(%edx), %ebx
+#endif
+
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
@@ -112,13 +108,13 @@
movl $LOAD_PHYSICAL_ADDR, %ebx
1:
+ movl %ebx, %ebp // Save the output address for later
/* Target address to relocate to for decompression */
- movl BP_init_size(%esi), %eax
- subl $_end, %eax
- addl %eax, %ebx
+ addl BP_init_size(%esi), %ebx
+ subl $_end@GOTOFF, %ebx
/* Set up the stack */
- leal boot_stack_end(%ebx), %esp
+ leal boot_stack_end@GOTOFF(%ebx), %esp
/* Zero EFLAGS */
pushl $0
@@ -129,8 +125,8 @@
* where decompression in place becomes safe.
*/
pushl %esi
- leal (_bss-4)(%ebp), %esi
- leal (_bss-4)(%ebx), %edi
+ leal (_bss@GOTOFF-4)(%edx), %esi
+ leal (_bss@GOTOFF-4)(%ebx), %edi
movl $(_bss - startup_32), %ecx
shrl $2, %ecx
std
@@ -138,122 +134,61 @@
cld
popl %esi
+ /*
+ * The GDT may get overwritten either during the copy we just did or
+ * during extract_kernel below. To avoid any issues, repoint the GDTR
+ * to the new copy of the GDT.
+ */
+ leal gdt@GOTOFF(%ebx), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
/*
* Jump to the relocated address.
*/
- leal .Lrelocated(%ebx), %eax
+ leal .Lrelocated@GOTOFF(%ebx), %eax
jmp *%eax
-ENDPROC(startup_32)
+SYM_FUNC_END(startup_32)
#ifdef CONFIG_EFI_STUB
-/*
- * We don't need the return address, so set up the stack so efi_main() can find
- * its arguments.
- */
-ENTRY(efi_pe_entry)
+SYM_FUNC_START(efi32_stub_entry)
+SYM_FUNC_START_ALIAS(efi_stub_entry)
add $0x4, %esp
-
- call 1f
-1: popl %esi
- subl $1b, %esi
-
- popl %ecx
- movl %ecx, efi32_config(%esi) /* Handle */
- popl %ecx
- movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */
-
- /* Relocate efi_config->call() */
- leal efi32_config(%esi), %eax
- add %esi, 40(%eax)
- pushl %eax
-
- call make_boot_params
- cmpl $0, %eax
- je fail
- movl %esi, BP_code32_start(%eax)
- popl %ecx
- pushl %eax
- pushl %ecx
- jmp 2f /* Skip efi_config initialization */
-ENDPROC(efi_pe_entry)
-
-ENTRY(efi32_stub_entry)
- add $0x4, %esp
- popl %ecx
- popl %edx
-
- call 1f
-1: popl %esi
- subl $1b, %esi
-
- movl %ecx, efi32_config(%esi) /* Handle */
- movl %edx, efi32_config+8(%esi) /* EFI System table pointer */
-
- /* Relocate efi_config->call() */
- leal efi32_config(%esi), %eax
- add %esi, 40(%eax)
- pushl %eax
-2:
+ movl 8(%esp), %esi /* save boot_params pointer */
call efi_main
- cmpl $0, %eax
- movl %eax, %esi
- jne 2f
-fail:
- /* EFI init failed, so hang. */
- hlt
- jmp fail
-2:
- movl BP_code32_start(%esi), %eax
- leal startup_32(%eax), %eax
+ /* efi_main returns the possibly relocated address of startup_32 */
jmp *%eax
-ENDPROC(efi32_stub_entry)
+SYM_FUNC_END(efi32_stub_entry)
+SYM_FUNC_END_ALIAS(efi_stub_entry)
#endif
.text
-.Lrelocated:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
/*
* Clear BSS (stack is currently empty)
*/
xorl %eax, %eax
- leal _bss(%ebx), %edi
- leal _ebss(%ebx), %ecx
+ leal _bss@GOTOFF(%ebx), %edi
+ leal _ebss@GOTOFF(%ebx), %ecx
subl %edi, %ecx
shrl $2, %ecx
rep stosl
/*
- * Adjust our own GOT
- */
- leal _got(%ebx), %edx
- leal _egot(%ebx), %ecx
-1:
- cmpl %ecx, %edx
- jae 2f
- addl %ebx, (%edx)
- addl $4, %edx
- jmp 1b
-2:
-
-/*
* Do the extraction, and jump to the new kernel..
*/
- /* push arguments for extract_kernel: */
- pushl $z_output_len /* decompressed length, end of relocs */
+ /* push arguments for extract_kernel: */
- movl BP_init_size(%esi), %eax
- subl $_end, %eax
- movl %ebx, %ebp
- subl %eax, %ebp
- pushl %ebp /* output address */
-
- pushl $z_input_len /* input_len */
- leal input_data(%ebx), %eax
- pushl %eax /* input_data */
- leal boot_heap(%ebx), %eax
- pushl %eax /* heap area */
- pushl %esi /* real mode pointer */
- call extract_kernel /* returns kernel location in %eax */
+ pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */
+ pushl %ebp /* output address */
+ pushl input_len@GOTOFF(%ebx) /* input_len */
+ leal input_data@GOTOFF(%ebx), %eax
+ pushl %eax /* input_data */
+ leal boot_heap@GOTOFF(%ebx), %eax
+ pushl %eax /* heap area */
+ pushl %esi /* real mode pointer */
+ call extract_kernel /* returns kernel location in %eax */
addl $24, %esp
/*
@@ -261,14 +196,21 @@
*/
xorl %ebx, %ebx
jmp *%eax
+SYM_FUNC_END(.Lrelocated)
+
+ .data
+ .balign 8
+SYM_DATA_START_LOCAL(gdt)
+ .word gdt_end - gdt - 1
+ .long 0
+ .word 0
+ .quad 0x0000000000000000 /* Reserved */
+ .quad 0x00cf9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
#ifdef CONFIG_EFI_STUB
- .data
-efi32_config:
- .fill 5,8,0
- .long efi_call_phys
- .long 0
- .byte 0
+SYM_DATA(image_offset, .long 0)
#endif
/*
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 50c9eeb..72f655c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,6 +33,7 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
+#include <asm/desc_defs.h>
#include "pgtable.h"
/*
@@ -40,13 +41,37 @@
*/
.hidden _bss
.hidden _ebss
- .hidden _got
- .hidden _egot
.hidden _end
__HEAD
+
+/*
+ * This macro gives the relative virtual address of X, i.e. the offset of X
+ * from startup_32. This is the same as the link-time virtual address of X,
+ * since startup_32 is at 0, but defining it this way tells the
+ * assembler/linker that we do not want the actual run-time address of X. This
+ * prevents the linker from trying to create unwanted run-time relocation
+ * entries for the reference when the compressed kernel is linked as PIE.
+ *
+ * A reference X(%reg) will result in the link-time VA of X being stored with
+ * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
+ * adds the 64-bit base address where the kernel is loaded.
+ *
+ * Replacing it with (X-startup_32)(%reg) results in the offset being stored,
+ * and no run-time relocation.
+ *
+ * The macro should be used as a displacement with a base register containing
+ * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
+ * [$ rva(X)].
+ *
+ * This macro can only be used from within the .head.text section, since the
+ * expression requires startup_32 to be in the same section as the code being
+ * assembled.
+ */
+#define rva(X) ((X) - startup_32)
+
.code32
-ENTRY(startup_32)
+SYM_FUNC_START(startup_32)
/*
* 32bit entry is 0 and it is ABI so immutable!
* If we come here directly from a bootloader,
@@ -54,19 +79,7 @@
* all need to be under the 4G limit.
*/
cld
- /*
- * Test KEEP_SEGMENTS flag to see if the bootloader is asking
- * us to not reload segments
- */
- testb $KEEP_SEGMENTS, BP_loadflags(%esi)
- jnz 1f
-
cli
- movl $(__BOOT_DS), %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %ss
-1:
/*
* Calculate the delta between where we were compiled to run
@@ -79,12 +92,23 @@
leal (BP_scratch+4)(%esi), %esp
call 1f
1: popl %ebp
- subl $1b, %ebp
+ subl $ rva(1b), %ebp
+
+ /* Load new GDT with the 64bit segments using 32bit descriptor */
+ leal rva(gdt)(%ebp), %eax
+ movl %eax, 2(%eax)
+ lgdt (%eax)
+
+ /* Load segment registers with our descriptors */
+ movl $__BOOT_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
/* setup a stack and make sure cpu supports long mode. */
- movl $boot_stack_end, %eax
- addl %ebp, %eax
- movl %eax, %esp
+ leal rva(boot_stack_end)(%ebp), %esp
call verify_cpu
testl %eax, %eax
@@ -101,6 +125,19 @@
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
+
+#ifdef CONFIG_EFI_STUB
+/*
+ * If we were loaded via the EFI LoadImage service, startup_32 will be at an
+ * offset to the start of the space allocated for the image. efi_pe_entry will
+ * set up image_offset to tell us where the image actually starts, so that we
+ * can use the full available buffer.
+ * image_offset = startup_32 - image_base
+ * Otherwise image_offset will be zero and has no effect on the calculations.
+ */
+ subl rva(image_offset)(%ebp), %ebx
+#endif
+
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
@@ -113,18 +150,13 @@
1:
/* Target address to relocate to for decompression */
- movl BP_init_size(%esi), %eax
- subl $_end, %eax
- addl %eax, %ebx
+ addl BP_init_size(%esi), %ebx
+ subl $ rva(_end), %ebx
/*
* Prepare for entering 64 bit mode
*/
- /* Load new GDT with the 64bit segments using 32bit descriptor */
- addl %ebp, gdt+2(%ebp)
- lgdt gdt(%ebp)
-
/* Enable PAE mode */
movl %cr4, %eax
orl $X86_CR4_PAE, %eax
@@ -140,26 +172,36 @@
*/
call get_sev_encryption_bit
xorl %edx, %edx
+#ifdef CONFIG_AMD_MEM_ENCRYPT
testl %eax, %eax
jz 1f
subl $32, %eax /* Encryption bit is always above bit 31 */
bts %eax, %edx /* Set encryption mask for page tables */
+ /*
+ * Mark SEV as active in sev_status so that startup32_check_sev_cbit()
+ * will do a check. The sev_status memory will be fully initialized
+ * with the contents of MSR_AMD_SEV_STATUS later in
+ * set_sev_encryption_mask(). For now it is sufficient to know that SEV
+ * is active.
+ */
+ movl $1, rva(sev_status)(%ebp)
1:
+#endif
/* Initialize Page tables to 0 */
- leal pgtable(%ebx), %edi
+ leal rva(pgtable)(%ebx), %edi
xorl %eax, %eax
movl $(BOOT_INIT_PGT_SIZE/4), %ecx
rep stosl
/* Build Level 4 */
- leal pgtable + 0(%ebx), %edi
+ leal rva(pgtable + 0)(%ebx), %edi
leal 0x1007 (%edi), %eax
movl %eax, 0(%edi)
addl %edx, 4(%edi)
/* Build Level 3 */
- leal pgtable + 0x1000(%ebx), %edi
+ leal rva(pgtable + 0x1000)(%ebx), %edi
leal 0x1007(%edi), %eax
movl $4, %ecx
1: movl %eax, 0x00(%edi)
@@ -170,7 +212,7 @@
jnz 1b
/* Build Level 2 */
- leal pgtable + 0x2000(%ebx), %edi
+ leal rva(pgtable + 0x2000)(%ebx), %edi
movl $0x00000183, %eax
movl $2048, %ecx
1: movl %eax, 0(%edi)
@@ -181,7 +223,7 @@
jnz 1b
/* Enable the boot page tables */
- leal pgtable(%ebx), %eax
+ leal rva(pgtable)(%ebx), %eax
movl %eax, %cr3
/* Enable Long mode in EFER (Extended Feature Enable Register) */
@@ -206,15 +248,33 @@
* We place all of the values on our mini stack so lret can
* used to perform that far jump.
*/
- pushl $__KERNEL_CS
- leal startup_64(%ebp), %eax
+ leal rva(startup_64)(%ebp), %eax
#ifdef CONFIG_EFI_MIXED
- movl efi32_config(%ebp), %ebx
- cmp $0, %ebx
+ movl rva(efi32_boot_args)(%ebp), %edi
+ cmp $0, %edi
jz 1f
- leal handover_entry(%ebp), %eax
+ leal rva(efi64_stub_entry)(%ebp), %eax
+ movl rva(efi32_boot_args+4)(%ebp), %esi
+ movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer
+ cmpl $0, %edx
+ jnz 1f
+ /*
+ * efi_pe_entry uses MS calling convention, which requires 32 bytes of
+ * shadow space on the stack even if all arguments are passed in
+ * registers. We also need an additional 8 bytes for the space that
+ * would be occupied by the return address, and this also results in
+ * the correct stack alignment for entry.
+ */
+ subl $40, %esp
+ leal rva(efi_pe_entry)(%ebp), %eax
+ movl %edi, %ecx // MS calling convention
+ movl %esi, %edx
1:
#endif
+ /* Check if the C-bit position is correct when SEV is active */
+ call startup32_check_sev_cbit
+
+ pushl $__KERNEL_CS
pushl %eax
/* Enter paged protected Mode, activating Long Mode */
@@ -223,27 +283,30 @@
/* Jump from 32bit compatibility mode into 64bit mode. */
lret
-ENDPROC(startup_32)
+SYM_FUNC_END(startup_32)
#ifdef CONFIG_EFI_MIXED
.org 0x190
-ENTRY(efi32_stub_entry)
+SYM_FUNC_START(efi32_stub_entry)
add $0x4, %esp /* Discard return address */
popl %ecx
popl %edx
popl %esi
- leal (BP_scratch+4)(%esi), %esp
call 1f
1: pop %ebp
- subl $1b, %ebp
+ subl $ rva(1b), %ebp
- movl %ecx, efi32_config(%ebp)
- movl %edx, efi32_config+8(%ebp)
- sgdtl efi32_boot_gdt(%ebp)
+ movl %esi, rva(efi32_boot_args+8)(%ebp)
+SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
+ movl %ecx, rva(efi32_boot_args)(%ebp)
+ movl %edx, rva(efi32_boot_args+4)(%ebp)
+ movb $0, rva(efi_is64)(%ebp)
- leal efi32_config(%ebp), %eax
- movl %eax, efi_config(%ebp)
+ /* Save firmware GDTR and code/data selectors */
+ sgdtl rva(efi32_boot_gdt)(%ebp)
+ movw %cs, rva(efi32_boot_cs)(%ebp)
+ movw %ds, rva(efi32_boot_ds)(%ebp)
/* Disable paging */
movl %cr0, %eax
@@ -251,12 +314,12 @@
movl %eax, %cr0
jmp startup_32
-ENDPROC(efi32_stub_entry)
+SYM_FUNC_END(efi32_stub_entry)
#endif
.code64
.org 0x200
-ENTRY(startup_64)
+SYM_CODE_START(startup_64)
/*
* 64bit entry is 0x200 and it is ABI so immutable!
* We come here either from startup_32 or directly from a
@@ -268,6 +331,9 @@
* and command line.
*/
+ cld
+ cli
+
/* Setup data segments. */
xorl %eax, %eax
movl %eax, %ds
@@ -292,6 +358,20 @@
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
+
+#ifdef CONFIG_EFI_STUB
+/*
+ * If we were loaded via the EFI LoadImage service, startup_32 will be at an
+ * offset to the start of the space allocated for the image. efi_pe_entry will
+ * set up image_offset to tell us where the image actually starts, so that we
+ * can use the full available buffer.
+ * image_offset = startup_32 - image_base
+ * Otherwise image_offset will be zero and has no effect on the calculations.
+ */
+ movl image_offset(%rip), %eax
+ subq %rax, %rbp
+#endif
+
movl BP_kernel_alignment(%rsi), %eax
decl %eax
addq %rax, %rbp
@@ -305,30 +385,11 @@
/* Target address to relocate to for decompression */
movl BP_init_size(%rsi), %ebx
- subl $_end, %ebx
+ subl $ rva(_end), %ebx
addq %rbp, %rbx
/* Set up the stack */
- leaq boot_stack_end(%rbx), %rsp
-
- /*
- * paging_prepare() and cleanup_trampoline() below can have GOT
- * references. Adjust the table with address we are running at.
- *
- * Zero RAX for adjust_got: the GOT was not adjusted before;
- * there's no adjustment to undo.
- */
- xorq %rax, %rax
-
- /*
- * Calculate the address the binary is loaded at and use it as
- * a GOT adjustment.
- */
- call 1f
-1: popq %rdi
- subq $1b, %rdi
-
- call .Ladjust_got
+ leaq rva(boot_stack_end)(%rbx), %rsp
/*
* At this point we are in long mode with 4-level paging enabled,
@@ -356,9 +417,21 @@
*/
/* Make sure we have GDT with 32-bit code segment */
- leaq gdt(%rip), %rax
- movq %rax, gdt64+2(%rip)
- lgdt gdt64(%rip)
+ leaq gdt64(%rip), %rax
+ addq %rax, 2(%rax)
+ lgdt (%rax)
+
+ /* Reload CS so IRET returns to a CS actually in the GDT */
+ pushq $__KERNEL_CS
+ leaq .Lon_kernel_cs(%rip), %rax
+ pushq %rax
+ lretq
+
+.Lon_kernel_cs:
+
+ pushq %rsi
+ call load_stage1_idt
+ popq %rsi
/*
* paging_prepare() sets up the trampoline and checks if we need to
@@ -394,7 +467,7 @@
lretq
trampoline_return:
/* Restore the stack, the 32-bit trampoline uses its own stack */
- leaq boot_stack_end(%rbx), %rsp
+ leaq rva(boot_stack_end)(%rbx), %rsp
/*
* cleanup_trampoline() would restore trampoline memory.
@@ -406,7 +479,7 @@
* this function call.
*/
pushq %rsi
- leaq top_pgtable(%rbx), %rdi
+ leaq rva(top_pgtable)(%rbx), %rdi
call cleanup_trampoline
popq %rsi
@@ -414,110 +487,53 @@
pushq $0
popfq
- /*
- * Previously we've adjusted the GOT with address the binary was
- * loaded at. Now we need to re-adjust for relocation address.
- *
- * Calculate the address the binary is loaded at, so that we can
- * undo the previous GOT adjustment.
- */
- call 1f
-1: popq %rax
- subq $1b, %rax
-
- /* The new adjustment is the relocation address */
- movq %rbx, %rdi
- call .Ladjust_got
-
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
pushq %rsi
leaq (_bss-8)(%rip), %rsi
- leaq (_bss-8)(%rbx), %rdi
- movq $_bss /* - $startup_32 */, %rcx
- shrq $3, %rcx
+ leaq rva(_bss-8)(%rbx), %rdi
+ movl $(_bss - startup_32), %ecx
+ shrl $3, %ecx
std
rep movsq
cld
popq %rsi
+ /*
+ * The GDT may get overwritten either during the copy we just did or
+ * during extract_kernel below. To avoid any issues, repoint the GDTR
+ * to the new copy of the GDT.
+ */
+ leaq rva(gdt64)(%rbx), %rax
+ leaq rva(gdt)(%rbx), %rdx
+ movq %rdx, 2(%rax)
+ lgdt (%rax)
+
/*
* Jump to the relocated address.
*/
- leaq .Lrelocated(%rbx), %rax
+ leaq rva(.Lrelocated)(%rbx), %rax
jmp *%rax
+SYM_CODE_END(startup_64)
#ifdef CONFIG_EFI_STUB
-
-/* The entry point for the PE/COFF executable is efi_pe_entry. */
-ENTRY(efi_pe_entry)
- movq %rcx, efi64_config(%rip) /* Handle */
- movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
-
- leaq efi64_config(%rip), %rax
- movq %rax, efi_config(%rip)
-
- call 1f
-1: popq %rbp
- subq $1b, %rbp
-
- /*
- * Relocate efi_config->call().
- */
- addq %rbp, efi64_config+40(%rip)
-
- movq %rax, %rdi
- call make_boot_params
- cmpq $0,%rax
- je fail
- mov %rax, %rsi
- leaq startup_32(%rip), %rax
- movl %eax, BP_code32_start(%rsi)
- jmp 2f /* Skip the relocation */
-
-handover_entry:
- call 1f
-1: popq %rbp
- subq $1b, %rbp
-
- /*
- * Relocate efi_config->call().
- */
- movq efi_config(%rip), %rax
- addq %rbp, 40(%rax)
-2:
- movq efi_config(%rip), %rdi
- call efi_main
- movq %rax,%rsi
- cmpq $0,%rax
- jne 2f
-fail:
- /* EFI init failed, so hang. */
- hlt
- jmp fail
-2:
- movl BP_code32_start(%esi), %eax
- leaq startup_64(%rax), %rax
- jmp *%rax
-ENDPROC(efi_pe_entry)
-
.org 0x390
-ENTRY(efi64_stub_entry)
- movq %rdi, efi64_config(%rip) /* Handle */
- movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */
-
- leaq efi64_config(%rip), %rax
- movq %rax, efi_config(%rip)
-
- movq %rdx, %rsi
- jmp handover_entry
-ENDPROC(efi64_stub_entry)
+SYM_FUNC_START(efi64_stub_entry)
+SYM_FUNC_START_ALIAS(efi_stub_entry)
+ and $~0xf, %rsp /* realign the stack */
+ movq %rdx, %rbx /* save boot_params pointer */
+ call efi_main
+ movq %rbx,%rsi
+ leaq rva(startup_64)(%rax), %rax
+ jmp *%rax
+SYM_FUNC_END(efi64_stub_entry)
+SYM_FUNC_END_ALIAS(efi_stub_entry)
#endif
.text
-.Lrelocated:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
/*
* Clear BSS (stack is currently empty)
@@ -530,15 +546,33 @@
rep stosq
/*
+ * If running as an SEV guest, the encryption mask is required in the
+ * page-table setup code below. When the guest also has SEV-ES enabled
+ * set_sev_encryption_mask() will cause #VC exceptions, but the stage2
+ * handler can't map its GHCB because the page-table is not set up yet.
+ * So set up the encryption mask here while still on the stage1 #VC
+ * handler. Then load stage2 IDT and switch to the kernel's own
+ * page-table.
+ */
+ pushq %rsi
+ call set_sev_encryption_mask
+ call load_stage2_idt
+
+ /* Pass boot_params to initialize_identity_maps() */
+ movq (%rsp), %rdi
+ call initialize_identity_maps
+ popq %rsi
+
+/*
* Do the extraction, and jump to the new kernel..
*/
pushq %rsi /* Save the real mode argument */
movq %rsi, %rdi /* real mode address */
leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
leaq input_data(%rip), %rdx /* input_data */
- movl $z_input_len, %ecx /* input_len */
+ movl input_len(%rip), %ecx /* input_len */
movq %rbp, %r8 /* output target address */
- movq $z_output_len, %r9 /* decompressed length, end of relocs */
+ movl output_len(%rip), %r9d /* decompressed length, end of relocs */
call extract_kernel /* returns kernel location in %rax */
popq %rsi
@@ -546,27 +580,7 @@
* Jump to the decompressed kernel.
*/
jmp *%rax
-
-/*
- * Adjust the global offset table
- *
- * RAX is the previous adjustment of the table to undo (use 0 if it's the
- * first time we touch GOT).
- * RDI is the new adjustment to apply.
- */
-.Ladjust_got:
- /* Walk through the GOT adding the address to the entries */
- leaq _got(%rip), %rdx
- leaq _egot(%rip), %rcx
-1:
- cmpq %rcx, %rdx
- jae 2f
- subq %rax, (%rdx) /* Undo previous adjustment */
- addq %rdi, (%rdx) /* Apply the new adjustment */
- addq $8, %rdx
- jmp 1b
-2:
- ret
+SYM_FUNC_END(.Lrelocated)
.code32
/*
@@ -576,7 +590,7 @@
* ECX contains the base address of the trampoline memory.
* Non zero RDX means trampoline needs to enable 5-level paging.
*/
-ENTRY(trampoline_32bit_src)
+SYM_CODE_START(trampoline_32bit_src)
/* Set up data and stack segments */
movl $__KERNEL_DS, %eax
movl %eax, %ds
@@ -639,11 +653,13 @@
movl %eax, %cr0
lret
+SYM_CODE_END(trampoline_32bit_src)
.code64
-.Lpaging_enabled:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
/* Return from the trampoline */
jmp *%rdi
+SYM_FUNC_END(.Lpaging_enabled)
/*
* The trampoline code has a size limit.
@@ -653,71 +669,229 @@
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
.code32
-.Lno_longmode:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
hlt
jmp 1b
+SYM_FUNC_END(.Lno_longmode)
#include "../../kernel/verify_cpu.S"
.data
-gdt64:
- .word gdt_end - gdt
- .quad 0
+SYM_DATA_START_LOCAL(gdt64)
+ .word gdt_end - gdt - 1
+ .quad gdt - gdt64
+SYM_DATA_END(gdt64)
.balign 8
-gdt:
- .word gdt_end - gdt
- .long gdt
+SYM_DATA_START_LOCAL(gdt)
+ .word gdt_end - gdt - 1
+ .long 0
.word 0
.quad 0x00cf9a000000ffff /* __KERNEL32_CS */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
.quad 0x0000000000000000 /* TS continued */
-gdt_end:
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
+
+SYM_DATA_START(boot_idt_desc)
+ .word boot_idt_end - boot_idt - 1
+ .quad 0
+SYM_DATA_END(boot_idt_desc)
+ .balign 8
+SYM_DATA_START(boot_idt)
+ .rept BOOT_IDT_ENTRIES
+ .quad 0
+ .quad 0
+ .endr
+SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
#ifdef CONFIG_EFI_STUB
-efi_config:
- .quad 0
-
+SYM_DATA(image_offset, .long 0)
+#endif
#ifdef CONFIG_EFI_MIXED
- .global efi32_config
-efi32_config:
- .fill 5,8,0
- .quad efi64_thunk
- .byte 0
+SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
+SYM_DATA(efi_is64, .byte 1)
+
+#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime)
+#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
+#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
+
+ __HEAD
+ .code32
+SYM_FUNC_START(efi32_pe_entry)
+/*
+ * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
+ * efi_system_table_32_t *sys_table)
+ */
+
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %eax // dummy push to allocate loaded_image
+
+ pushl %ebx // save callee-save registers
+ pushl %edi
+
+ call verify_cpu // check for long mode support
+ testl %eax, %eax
+ movl $0x80000003, %eax // EFI_UNSUPPORTED
+ jnz 2f
+
+ call 1f
+1: pop %ebx
+ subl $ rva(1b), %ebx
+
+ /* Get the loaded image protocol pointer from the image handle */
+ leal -4(%ebp), %eax
+ pushl %eax // &loaded_image
+ leal rva(loaded_image_proto)(%ebx), %eax
+ pushl %eax // pass the GUID address
+ pushl 8(%ebp) // pass the image handle
+
+ /*
+ * Note the alignment of the stack frame.
+ * sys_table
+ * handle <-- 16-byte aligned on entry by ABI
+ * return address
+ * frame pointer
+ * loaded_image <-- local variable
+ * saved %ebx <-- 16-byte aligned here
+ * saved %edi
+ * &loaded_image
+ * &loaded_image_proto
+ * handle <-- 16-byte aligned for call to handle_protocol
+ */
+
+ movl 12(%ebp), %eax // sys_table
+ movl ST32_boottime(%eax), %eax // sys_table->boottime
+ call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
+ addl $12, %esp // restore argument space
+ testl %eax, %eax
+ jnz 2f
+
+ movl 8(%ebp), %ecx // image_handle
+ movl 12(%ebp), %edx // sys_table
+ movl -4(%ebp), %esi // loaded_image
+ movl LI32_image_base(%esi), %esi // loaded_image->image_base
+ movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry
+ /*
+ * We need to set the image_offset variable here since startup_32() will
+ * use it before we get to the 64-bit efi_pe_entry() in C code.
+ */
+ subl %esi, %ebx
+ movl %ebx, rva(image_offset)(%ebp) // save image_offset
+ jmp efi32_pe_stub_entry
+
+2: popl %edi // restore callee-save registers
+ popl %ebx
+ leave
+ ret
+SYM_FUNC_END(efi32_pe_entry)
+
+ .section ".rodata"
+ /* EFI loaded image protocol GUID */
+ .balign 4
+SYM_DATA_START_LOCAL(loaded_image_proto)
+ .long 0x5b1b31a1
+ .word 0x9562, 0x11d2
+ .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
+SYM_DATA_END(loaded_image_proto)
#endif
- .global efi64_config
-efi64_config:
- .fill 5,8,0
- .quad efi_call
- .byte 1
-#endif /* CONFIG_EFI_STUB */
+/*
+ * Check for the correct C-bit position when the startup_32 boot-path is used.
+ *
+ * The check makes use of the fact that all memory is encrypted when paging is
+ * disabled. The function creates 64 bits of random data using the RDRAND
+ * instruction. RDRAND is mandatory for SEV guests, so always available. If the
+ * hypervisor violates that the kernel will crash right here.
+ *
+ * The 64 bits of random data are stored to a memory location and at the same
+ * time kept in the %eax and %ebx registers. Since encryption is always active
+ * when paging is off the random data will be stored encrypted in main memory.
+ *
+ * Then paging is enabled. When the C-bit position is correct all memory is
+ * still mapped encrypted and comparing the register values with memory will
+ * succeed. An incorrect C-bit position will map all memory unencrypted, so that
+ * the compare will use the encrypted random data and fail.
+ */
+ __HEAD
+ .code32
+SYM_FUNC_START(startup32_check_sev_cbit)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+
+ /* Check for non-zero sev_status */
+ movl rva(sev_status)(%ebp), %eax
+ testl %eax, %eax
+ jz 4f
+
+ /*
+ * Get two 32-bit random values - Don't bail out if RDRAND fails
+ * because it is better to prevent forward progress if no random value
+ * can be gathered.
+ */
+1: rdrand %eax
+ jnc 1b
+2: rdrand %ebx
+ jnc 2b
+
+ /* Store to memory and keep it in the registers */
+ movl %eax, rva(sev_check_data)(%ebp)
+ movl %ebx, rva(sev_check_data+4)(%ebp)
+
+ /* Enable paging to see if encryption is active */
+ movl %cr0, %edx /* Backup %cr0 in %edx */
+ movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
+ movl %ecx, %cr0
+
+ cmpl %eax, rva(sev_check_data)(%ebp)
+ jne 3f
+ cmpl %ebx, rva(sev_check_data+4)(%ebp)
+ jne 3f
+
+ movl %edx, %cr0 /* Restore previous %cr0 */
+
+ jmp 4f
+
+3: /* Check failed - hlt the machine */
+ hlt
+ jmp 3b
+
+4:
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+#endif
+ ret
+SYM_FUNC_END(startup32_check_sev_cbit)
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
-boot_heap:
- .fill BOOT_HEAP_SIZE, 1, 0
-boot_stack:
+SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0)
+
+SYM_DATA_START_LOCAL(boot_stack)
.fill BOOT_STACK_SIZE, 1, 0
-boot_stack_end:
+ .balign 16
+SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
/*
* Space for page tables (not in .bss so not zeroed)
*/
- .section ".pgtable","a",@nobits
+ .section ".pgtable","aw",@nobits
.balign 4096
-pgtable:
- .fill BOOT_PGT_SIZE, 1, 0
+SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)
/*
* The page table is going to be used instead of page table in the trampoline
* memory.
*/
-top_pgtable:
- .fill PAGE_SIZE, 1, 0
+SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
new file mode 100644
index 0000000..39b2ede
--- /dev/null
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code is used on x86_64 to create page table identity mappings on
+ * demand by building up a new set of page tables (or appending to the
+ * existing ones), and then switching over to them when ready.
+ *
+ * Copyright (C) 2015-2016 Yinghai Lu
+ * Copyright (C) 2016 Kees Cook
+ */
+
+/*
+ * Since we're dealing with identity mappings, physical and virtual
+ * addresses are the same, so override these defines which are ultimately
+ * used by the headers in misc.h.
+ */
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x)))
+
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
+#include "error.h"
+#include "misc.h"
+
+/* These actually do the work of building the kernel identity maps. */
+#include <linux/pgtable.h>
+#include <asm/cmpxchg.h>
+#include <asm/trap_pf.h>
+#include <asm/trapnr.h>
+#include <asm/init.h>
+/* Use the static base for this part of the boot process */
+#undef __PAGE_OFFSET
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE
+#include "../../mm/ident_map.c"
+
+#define _SETUP
+#include <asm/setup.h> /* For COMMAND_LINE_SIZE */
+#undef _SETUP
+
+extern unsigned long get_cmd_line_ptr(void);
+
+/* Used by PAGE_KERN* macros: */
+pteval_t __default_kernel_pte_mask __read_mostly = ~0;
+
+/* Used to track our page table allocation area. */
+struct alloc_pgt_data {
+ unsigned char *pgt_buf;
+ unsigned long pgt_buf_size;
+ unsigned long pgt_buf_offset;
+};
+
+/*
+ * Allocates space for a page table entry, using struct alloc_pgt_data
+ * above. Besides the local callers, this is used as the allocation
+ * callback in mapping_info below.
+ */
+static void *alloc_pgt_page(void *context)
+{
+ struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
+ unsigned char *entry;
+
+ /* Validate there is space available for a new page. */
+ if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
+ debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
+ debug_putaddr(pages->pgt_buf_offset);
+ debug_putaddr(pages->pgt_buf_size);
+ return NULL;
+ }
+
+ entry = pages->pgt_buf + pages->pgt_buf_offset;
+ pages->pgt_buf_offset += PAGE_SIZE;
+
+ return entry;
+}
+
+/* Used to track our allocated page tables. */
+static struct alloc_pgt_data pgt_data;
+
+/* The top level page table entry pointer. */
+static unsigned long top_level_pgt;
+
+phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
+
+/*
+ * Mapping information structure passed to kernel_ident_mapping_init().
+ * Due to relocation, pointers must be assigned at run time not build time.
+ */
+static struct x86_mapping_info mapping_info;
+
+/*
+ * Adds the specified range to the identity mappings.
+ */
+static void add_identity_map(unsigned long start, unsigned long end)
+{
+ int ret;
+
+ /* Align boundary to 2M. */
+ start = round_down(start, PMD_SIZE);
+ end = round_up(end, PMD_SIZE);
+ if (start >= end)
+ return;
+
+ /* Build the mapping. */
+ ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end);
+ if (ret)
+ error("Error: kernel_ident_mapping_init() failed\n");
+}
+
+/* Locates and clears a region for a new top level page table. */
+void initialize_identity_maps(void *rmode)
+{
+ unsigned long cmdline;
+
+ /* Exclude the encryption mask from __PHYSICAL_MASK */
+ physical_mask &= ~sme_me_mask;
+
+ /* Init mapping_info with run-time function/buffer pointers. */
+ mapping_info.alloc_pgt_page = alloc_pgt_page;
+ mapping_info.context = &pgt_data;
+ mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
+ mapping_info.kernpg_flag = _KERNPG_TABLE;
+
+ /*
+ * It should be impossible for this not to already be true,
+ * but since calling this a second time would rewind the other
+ * counters, let's just make sure this is reset too.
+ */
+ pgt_data.pgt_buf_offset = 0;
+
+ /*
+ * If we came here via startup_32(), cr3 will be _pgtable already
+ * and we must append to the existing area instead of entirely
+ * overwriting it.
+ *
+ * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
+ * the top-level page table is allocated separately.
+ *
+ * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
+ * cases. On 4-level paging it's equal to 'top_level_pgt'.
+ */
+ top_level_pgt = read_cr3_pa();
+ if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
+ pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
+ pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
+ memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+ } else {
+ pgt_data.pgt_buf = _pgtable;
+ pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
+ memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+ top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
+ }
+
+ /*
+ * New page-table is set up - map the kernel image, boot_params and the
+ * command line. The uncompressed kernel requires boot_params and the
+ * command line to be mapped in the identity mapping. Map them
+ * explicitly here in case the compressed kernel does not touch them,
+ * or does not touch all the pages covering them.
+ */
+ add_identity_map((unsigned long)_head, (unsigned long)_end);
+ boot_params = rmode;
+ add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
+ cmdline = get_cmd_line_ptr();
+ add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+
+ /* Load the new page-table. */
+ sev_verify_cbit(top_level_pgt);
+ write_cr3(top_level_pgt);
+}
+
+/*
+ * This switches the page tables to the new level4 that has been built
+ * via calls to add_identity_map() above. If booted via startup_32(),
+ * this is effectively a no-op.
+ */
+void finalize_identity_maps(void)
+{
+ write_cr3(top_level_pgt);
+}
+
+static pte_t *split_large_pmd(struct x86_mapping_info *info,
+ pmd_t *pmdp, unsigned long __address)
+{
+ unsigned long page_flags;
+ unsigned long address;
+ pte_t *pte;
+ pmd_t pmd;
+ int i;
+
+ pte = (pte_t *)info->alloc_pgt_page(info->context);
+ if (!pte)
+ return NULL;
+
+ address = __address & PMD_MASK;
+ /* No large page - clear PSE flag */
+ page_flags = info->page_flag & ~_PAGE_PSE;
+
+ /* Populate the PTEs */
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ set_pte(&pte[i], __pte(address | page_flags));
+ address += PAGE_SIZE;
+ }
+
+ /*
+ * Ideally we need to clear the large PMD first and do a TLB
+ * flush before we write the new PMD. But the 2M range of the
+ * PMD might contain the code we execute and/or the stack
+ * we are on, so we can't do that. But that should be safe here
+ * because we are going from large to small mappings and we are
+ * also the only user of the page-table, so there is no chance
+ * of a TLB multihit.
+ */
+ pmd = __pmd((unsigned long)pte | info->kernpg_flag);
+ set_pmd(pmdp, pmd);
+ /* Flush TLB to establish the new PMD */
+ write_cr3(top_level_pgt);
+
+ return pte + pte_index(__address);
+}
+
+static void clflush_page(unsigned long address)
+{
+ unsigned int flush_size;
+ char *cl, *start, *end;
+
+ /*
+ * Hardcode cl-size to 64 - CPUID can't be used here because that might
+ * cause another #VC exception and the GHCB is not ready to use yet.
+ */
+ flush_size = 64;
+ start = (char *)(address & PAGE_MASK);
+ end = start + PAGE_SIZE;
+
+ /*
+ * First make sure there are no pending writes on the cache-lines to
+ * flush.
+ */
+ asm volatile("mfence" : : : "memory");
+
+ for (cl = start; cl != end; cl += flush_size)
+ clflush(cl);
+}
+
+static int set_clr_page_flags(struct x86_mapping_info *info,
+ unsigned long address,
+ pteval_t set, pteval_t clr)
+{
+ pgd_t *pgdp = (pgd_t *)top_level_pgt;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep, pte;
+
+ /*
+ * First make sure there is a PMD mapping for 'address'.
+ * It should already exist, but keep things generic.
+ *
+ * To map the page just read from it and fault it in if there is no
+ * mapping yet. add_identity_map() can't be called here because that
+ * would unconditionally map the address on PMD level, destroying any
+ * PTE-level mappings that might already exist. Use assembly here so
+ * the access won't be optimized away.
+ */
+ asm volatile("mov %[address], %%r9"
+ :: [address] "g" (*(unsigned long *)address)
+ : "r9", "memory");
+
+ /*
+ * The page is mapped at least with PMD size - so skip checks and walk
+ * directly to the PMD.
+ */
+ p4dp = p4d_offset(pgdp, address);
+ pudp = pud_offset(p4dp, address);
+ pmdp = pmd_offset(pudp, address);
+
+ if (pmd_large(*pmdp))
+ ptep = split_large_pmd(info, pmdp, address);
+ else
+ ptep = pte_offset_kernel(pmdp, address);
+
+ if (!ptep)
+ return -ENOMEM;
+
+ /*
+ * Changing encryption attributes of a page requires to flush it from
+ * the caches.
+ */
+ if ((set | clr) & _PAGE_ENC)
+ clflush_page(address);
+
+ /* Update PTE */
+ pte = *ptep;
+ pte = pte_set_flags(pte, set);
+ pte = pte_clear_flags(pte, clr);
+ set_pte(ptep, pte);
+
+ /* Flush TLB after changing encryption attribute */
+ write_cr3(top_level_pgt);
+
+ return 0;
+}
+
+int set_page_decrypted(unsigned long address)
+{
+ return set_clr_page_flags(&mapping_info, address, 0, _PAGE_ENC);
+}
+
+int set_page_encrypted(unsigned long address)
+{
+ return set_clr_page_flags(&mapping_info, address, _PAGE_ENC, 0);
+}
+
+int set_page_non_present(unsigned long address)
+{
+ return set_clr_page_flags(&mapping_info, address, 0, _PAGE_PRESENT);
+}
+
+static void do_pf_error(const char *msg, unsigned long error_code,
+ unsigned long address, unsigned long ip)
+{
+ error_putstr(msg);
+
+ error_putstr("\nError Code: ");
+ error_puthex(error_code);
+ error_putstr("\nCR2: 0x");
+ error_puthex(address);
+ error_putstr("\nRIP relative to _head: 0x");
+ error_puthex(ip - (unsigned long)_head);
+ error_putstr("\n");
+
+ error("Stopping.\n");
+}
+
+void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+ unsigned long address = native_read_cr2();
+ unsigned long end;
+ bool ghcb_fault;
+
+ ghcb_fault = sev_es_check_ghcb_fault(address);
+
+ address &= PMD_MASK;
+ end = address + PMD_SIZE;
+
+ /*
+ * Check for unexpected error codes. Unexpected are:
+ * - Faults on present pages
+ * - User faults
+ * - Reserved bits set
+ */
+ if (error_code & (X86_PF_PROT | X86_PF_USER | X86_PF_RSVD))
+ do_pf_error("Unexpected page-fault:", error_code, address, regs->ip);
+ else if (ghcb_fault)
+ do_pf_error("Page-fault on GHCB page:", error_code, address, regs->ip);
+
+ /*
+ * Error code is sane - now identity map the 2M region around
+ * the faulting address.
+ */
+ add_identity_map(address, end);
+}
diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
new file mode 100644
index 0000000..804a502
--- /dev/null
+++ b/arch/x86/boot/compressed/idt_64.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <asm/trap_pf.h>
+#include <asm/segment.h>
+#include <asm/trapnr.h>
+#include "misc.h"
+
+static void set_idt_entry(int vector, void (*handler)(void))
+{
+ unsigned long address = (unsigned long)handler;
+ gate_desc entry;
+
+ memset(&entry, 0, sizeof(entry));
+
+ entry.offset_low = (u16)(address & 0xffff);
+ entry.segment = __KERNEL_CS;
+ entry.bits.type = GATE_TRAP;
+ entry.bits.p = 1;
+ entry.offset_middle = (u16)((address >> 16) & 0xffff);
+ entry.offset_high = (u32)(address >> 32);
+
+ memcpy(&boot_idt[vector], &entry, sizeof(entry));
+}
+
+/* Have this here so we don't need to include <asm/desc.h> */
+static void load_boot_idt(const struct desc_ptr *dtr)
+{
+ asm volatile("lidt %0"::"m" (*dtr));
+}
+
+/* Setup IDT before kernel jumping to .Lrelocated */
+void load_stage1_idt(void)
+{
+ boot_idt_desc.address = (unsigned long)boot_idt;
+
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+ set_idt_entry(X86_TRAP_VC, boot_stage1_vc);
+
+ load_boot_idt(&boot_idt_desc);
+}
+
+/* Setup IDT after kernel jumping to .Lrelocated */
+void load_stage2_idt(void)
+{
+ boot_idt_desc.address = (unsigned long)boot_idt;
+
+ set_idt_entry(X86_TRAP_PF, boot_page_fault);
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ set_idt_entry(X86_TRAP_VC, boot_stage2_vc);
+#endif
+
+ load_boot_idt(&boot_idt_desc);
+}
diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S
new file mode 100644
index 0000000..22890e1
--- /dev/null
+++ b/arch/x86/boot/compressed/idt_handlers_64.S
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Early IDT handler entry points
+ *
+ * Copyright (C) 2019 SUSE
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#include <asm/segment.h>
+
+/* For ORIG_RAX */
+#include "../../entry/calling.h"
+
+.macro EXCEPTION_HANDLER name function error_code=0
+SYM_FUNC_START(\name)
+
+ /* Build pt_regs */
+ .if \error_code == 0
+ pushq $0
+ .endif
+
+ pushq %rdi
+ pushq %rsi
+ pushq %rdx
+ pushq %rcx
+ pushq %rax
+ pushq %r8
+ pushq %r9
+ pushq %r10
+ pushq %r11
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+ /* Call handler with pt_regs */
+ movq %rsp, %rdi
+ /* Error code is second parameter */
+ movq ORIG_RAX(%rsp), %rsi
+ call \function
+
+ /* Restore regs */
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+
+ /* Remove error code and return */
+ addq $8, %rsp
+
+ iretq
+SYM_FUNC_END(\name)
+ .endm
+
+ .text
+ .code64
+
+EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1
+EXCEPTION_HANDLER boot_stage2_vc do_boot_stage2_vc error_code=1
+#endif
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 2e53c05..b92fffb 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -19,13 +19,6 @@
*/
#define BOOT_CTYPE_H
-/*
- * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
- * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
- * which is meaningless and will cause compiling error in some cases.
- */
-#define __DISABLE_EXPORTS
-
#include "misc.h"
#include "error.h"
#include "../string.h"
@@ -43,17 +36,12 @@
#define STATIC
#include <linux/decompress/mm.h>
-#ifdef CONFIG_X86_5LEVEL
-unsigned int __pgtable_l5_enabled;
-unsigned int pgdir_shift __ro_after_init = 39;
-unsigned int ptrs_per_p4d __ro_after_init = 1;
-#endif
+#define _SETUP
+#include <asm/setup.h> /* For COMMAND_LINE_SIZE */
+#undef _SETUP
extern unsigned long get_cmd_line_ptr(void);
-/* Used by PAGE_KERN* macros: */
-pteval_t __default_kernel_pte_mask __read_mostly = ~0;
-
/* Simplified build-specific string for starting entropy. */
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
@@ -94,8 +82,11 @@
static bool memmap_too_large;
-/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
-static unsigned long long mem_limit = ULLONG_MAX;
+/*
+ * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit.
+ * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options.
+ */
+static u64 mem_limit;
/* Number of immovable memory regions */
static int num_immovable_mem;
@@ -132,8 +123,13 @@
#include "../../../../lib/ctype.c"
#include "../../../../lib/cmdline.c"
+enum parse_mode {
+ PARSE_MEMMAP,
+ PARSE_EFI,
+};
+
static int
-parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
+parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
{
char *oldp;
@@ -156,9 +152,30 @@
*start = memparse(p + 1, &p);
return 0;
case '@':
- /* memmap=nn@ss specifies usable region, should be skipped */
- *size = 0;
- /* Fall through */
+ if (mode == PARSE_MEMMAP) {
+ /*
+ * memmap=nn@ss specifies usable region, should
+ * be skipped
+ */
+ *size = 0;
+ } else {
+ u64 flags;
+
+ /*
+ * efi_fake_mem=nn@ss:attr the attr specifies
+ * flags that might imply a soft-reservation.
+ */
+ *start = memparse(p + 1, &p);
+ if (p && *p == ':') {
+ p++;
+ if (kstrtoull(p, 0, &flags) < 0)
+ *size = 0;
+ else if (flags & EFI_MEMORY_SP)
+ return 0;
+ }
+ *size = 0;
+ }
+ fallthrough;
default:
/*
* If w/o offset, only size specified, memmap=nn[KMG] has the
@@ -172,7 +189,7 @@
return -EINVAL;
}
-static void mem_avoid_memmap(char *str)
+static void mem_avoid_memmap(enum parse_mode mode, char *str)
{
static int i;
@@ -181,20 +198,20 @@
while (str && (i < MAX_MEMMAP_REGIONS)) {
int rc;
- unsigned long long start, size;
+ u64 start, size;
char *k = strchr(str, ',');
if (k)
*k++ = 0;
- rc = parse_memmap(str, &start, &size);
+ rc = parse_memmap(str, &start, &size, mode);
if (rc < 0)
break;
str = k;
if (start == 0) {
/* Store the specified memory limit if size > 0 */
- if (size > 0)
+ if (size > 0 && size < mem_limit)
mem_limit = size;
continue;
@@ -238,19 +255,18 @@
}
}
-
static void handle_mem_options(void)
{
char *args = (char *)get_cmd_line_ptr();
- size_t len = strlen((char *)args);
+ size_t len;
char *tmp_cmdline;
char *param, *val;
u64 mem_size;
- if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
- !strstr(args, "hugepages"))
+ if (!args)
return;
+ len = strnlen(args, COMMAND_LINE_SIZE-1);
tmp_cmdline = malloc(len + 1);
if (!tmp_cmdline)
error("Failed to allocate space for tmp_cmdline");
@@ -265,14 +281,12 @@
while (*args) {
args = next_arg(args, ¶m, &val);
/* Stop at -- */
- if (!val && strcmp(param, "--") == 0) {
- warn("Only '--' specified in cmdline");
- goto out;
- }
+ if (!val && strcmp(param, "--") == 0)
+ break;
if (!strcmp(param, "memmap")) {
- mem_avoid_memmap(val);
- } else if (strstr(param, "hugepages")) {
+ mem_avoid_memmap(PARSE_MEMMAP, val);
+ } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) {
parse_gb_huge_pages(param, val);
} else if (!strcmp(param, "mem")) {
char *p = val;
@@ -281,19 +295,23 @@
continue;
mem_size = memparse(p, &p);
if (mem_size == 0)
- goto out;
+ break;
- mem_limit = mem_size;
+ if (mem_size < mem_limit)
+ mem_limit = mem_size;
+ } else if (!strcmp(param, "efi_fake_mem")) {
+ mem_avoid_memmap(PARSE_EFI, val);
}
}
-out:
free(tmp_cmdline);
return;
}
/*
- * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
+ * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM)
+ * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit.
+ *
* The mem_avoid array is used to store the ranges that need to be avoided
* when KASLR searches for an appropriate random address. We must avoid any
* regions that are unsafe to overlap with during decompression, and other
@@ -371,8 +389,7 @@
{
unsigned long init_size = boot_params->hdr.init_size;
u64 initrd_start, initrd_size;
- u64 cmd_line, cmd_line_size;
- char *ptr;
+ unsigned long cmd_line, cmd_line_size;
/*
* Avoid the region that is unsafe to overlap during
@@ -380,8 +397,6 @@
*/
mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
- add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start,
- mem_avoid[MEM_AVOID_ZO_RANGE].size);
/* Avoid initrd. */
initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
@@ -393,22 +408,17 @@
/* No need to set mapping for initrd, it will be handled in VO. */
/* Avoid kernel command line. */
- cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32;
- cmd_line |= boot_params->hdr.cmd_line_ptr;
+ cmd_line = get_cmd_line_ptr();
/* Calculate size of cmd_line. */
- ptr = (char *)(unsigned long)cmd_line;
- for (cmd_line_size = 0; ptr[cmd_line_size++];)
- ;
- mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
- mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
- add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
- mem_avoid[MEM_AVOID_CMDLINE].size);
+ if (cmd_line) {
+ cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1;
+ mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
+ mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
+ }
/* Avoid boot parameters. */
mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
- add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
- mem_avoid[MEM_AVOID_BOOTPARAMS].size);
/* We don't need to set a mapping for setup_data. */
@@ -417,11 +427,6 @@
/* Enumerate the immovable memory regions */
num_immovable_mem = count_immovable_mem_regions();
-
-#ifdef CONFIG_X86_VERBOSE_BOOTUP
- /* Make sure video RAM can be used. */
- add_identity_map(0, PMD_SIZE);
-#endif
}
/*
@@ -433,7 +438,7 @@
{
int i;
struct setup_data *ptr;
- unsigned long earliest = img->start + img->size;
+ u64 earliest = img->start + img->size;
bool is_overlapping = false;
for (i = 0; i < MEM_AVOID_MAX; i++) {
@@ -459,6 +464,18 @@
is_overlapping = true;
}
+ if (ptr->type == SETUP_INDIRECT &&
+ ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
+ avoid.start = ((struct setup_indirect *)ptr->data)->addr;
+ avoid.size = ((struct setup_indirect *)ptr->data)->len;
+
+ if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
+ *overlap = avoid;
+ earliest = overlap->start;
+ is_overlapping = true;
+ }
+ }
+
ptr = (struct setup_data *)(unsigned long)ptr->next;
}
@@ -466,18 +483,16 @@
}
struct slot_area {
- unsigned long addr;
- int num;
+ u64 addr;
+ unsigned long num;
};
#define MAX_SLOT_AREA 100
static struct slot_area slot_areas[MAX_SLOT_AREA];
-
+static unsigned int slot_area_index;
static unsigned long slot_max;
-static unsigned long slot_area_index;
-
static void store_slot_info(struct mem_vector *region, unsigned long image_size)
{
struct slot_area slot_area;
@@ -486,13 +501,10 @@
return;
slot_area.addr = region->start;
- slot_area.num = (region->size - image_size) /
- CONFIG_PHYSICAL_ALIGN + 1;
+ slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN;
- if (slot_area.num > 0) {
- slot_areas[slot_area_index++] = slot_area;
- slot_max += slot_area.num;
- }
+ slot_areas[slot_area_index++] = slot_area;
+ slot_max += slot_area.num;
}
/*
@@ -502,57 +514,53 @@
static void
process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
{
- unsigned long addr, size = 0;
+ u64 pud_start, pud_end;
+ unsigned long gb_huge_pages;
struct mem_vector tmp;
- int i = 0;
- if (!max_gb_huge_pages) {
+ if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) {
store_slot_info(region, image_size);
return;
}
- addr = ALIGN(region->start, PUD_SIZE);
- /* Did we raise the address above the passed in memory entry? */
- if (addr < region->start + region->size)
- size = region->size - (addr - region->start);
-
- /* Check how many 1GB huge pages can be filtered out: */
- while (size > PUD_SIZE && max_gb_huge_pages) {
- size -= PUD_SIZE;
- max_gb_huge_pages--;
- i++;
- }
+ /* Are there any 1GB pages in the region? */
+ pud_start = ALIGN(region->start, PUD_SIZE);
+ pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE);
/* No good 1GB huge pages found: */
- if (!i) {
+ if (pud_start >= pud_end) {
store_slot_info(region, image_size);
return;
}
- /*
- * Skip those 'i'*1GB good huge pages, and continue checking and
- * processing the remaining head or tail part of the passed region
- * if available.
- */
-
- if (addr >= region->start + image_size) {
+ /* Check if the head part of the region is usable. */
+ if (pud_start >= region->start + image_size) {
tmp.start = region->start;
- tmp.size = addr - region->start;
+ tmp.size = pud_start - region->start;
store_slot_info(&tmp, image_size);
}
- size = region->size - (addr - region->start) - i * PUD_SIZE;
- if (size >= image_size) {
- tmp.start = addr + i * PUD_SIZE;
- tmp.size = size;
+ /* Skip the good 1GB pages. */
+ gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT;
+ if (gb_huge_pages > max_gb_huge_pages) {
+ pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT);
+ max_gb_huge_pages = 0;
+ } else {
+ max_gb_huge_pages -= gb_huge_pages;
+ }
+
+ /* Check if the tail part of the region is usable. */
+ if (region->start + region->size >= pud_end + image_size) {
+ tmp.start = pud_end;
+ tmp.size = region->start + region->size - pud_end;
store_slot_info(&tmp, image_size);
}
}
-static unsigned long slots_fetch_random(void)
+static u64 slots_fetch_random(void)
{
unsigned long slot;
- int i;
+ unsigned int i;
/* Handle case of no slots stored. */
if (slot_max == 0)
@@ -565,7 +573,7 @@
slot -= slot_areas[i].num;
continue;
}
- return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
+ return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN);
}
if (i == slot_area_index)
@@ -578,49 +586,23 @@
unsigned long image_size)
{
struct mem_vector region, overlap;
- unsigned long start_orig, end;
- struct mem_vector cur_entry;
+ u64 region_end;
- /* On 32-bit, ignore entries entirely above our maximum. */
- if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE)
- return;
-
- /* Ignore entries entirely below our minimum. */
- if (entry->start + entry->size < minimum)
- return;
-
- /* Ignore entries above memory limit */
- end = min(entry->size + entry->start, mem_limit);
- if (entry->start >= end)
- return;
- cur_entry.start = entry->start;
- cur_entry.size = end - entry->start;
-
- region.start = cur_entry.start;
- region.size = cur_entry.size;
+ /* Enforce minimum and memory limit. */
+ region.start = max_t(u64, entry->start, minimum);
+ region_end = min(entry->start + entry->size, mem_limit);
/* Give up if slot area array is full. */
while (slot_area_index < MAX_SLOT_AREA) {
- start_orig = region.start;
-
- /* Potentially raise address to minimum location. */
- if (region.start < minimum)
- region.start = minimum;
-
/* Potentially raise address to meet alignment needs. */
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
/* Did we raise the address above the passed in memory entry? */
- if (region.start > cur_entry.start + cur_entry.size)
+ if (region.start > region_end)
return;
/* Reduce size by any delta from the original address. */
- region.size -= region.start - start_orig;
-
- /* On 32-bit, reduce region size to fit within max size. */
- if (IS_ENABLED(CONFIG_X86_32) &&
- region.start + region.size > KERNEL_IMAGE_SIZE)
- region.size = KERNEL_IMAGE_SIZE - region.start;
+ region.size = region_end - region.start;
/* Return if region can't contain decompressed kernel */
if (region.size < image_size)
@@ -633,27 +615,19 @@
}
/* Store beginning of region if holds at least image_size. */
- if (overlap.start > region.start + image_size) {
- struct mem_vector beginning;
-
- beginning.start = region.start;
- beginning.size = overlap.start - region.start;
- process_gb_huge_pages(&beginning, image_size);
+ if (overlap.start >= region.start + image_size) {
+ region.size = overlap.start - region.start;
+ process_gb_huge_pages(®ion, image_size);
}
- /* Return if overlap extends to or past end of region. */
- if (overlap.start + overlap.size >= region.start + region.size)
- return;
-
/* Clip off the overlapping region and start over. */
- region.size -= overlap.start - region.start + overlap.size;
region.start = overlap.start + overlap.size;
}
}
static bool process_mem_region(struct mem_vector *region,
- unsigned long long minimum,
- unsigned long long image_size)
+ unsigned long minimum,
+ unsigned long image_size)
{
int i;
/*
@@ -676,7 +650,7 @@
* immovable memory and @region.
*/
for (i = 0; i < num_immovable_mem; i++) {
- unsigned long long start, end, entry_end, region_end;
+ u64 start, end, entry_end, region_end;
struct mem_vector entry;
if (!mem_overlaps(region, &immovable_mem[i]))
@@ -703,8 +677,8 @@
#ifdef CONFIG_EFI
/*
- * Returns true if mirror region found (and must have been processed
- * for slots adding)
+ * Returns true if we processed the EFI memmap, which we prefer over the E820
+ * table if it is available.
*/
static bool
process_efi_entries(unsigned long minimum, unsigned long image_size)
@@ -760,6 +734,10 @@
if (md->type != EFI_CONVENTIONAL_MEMORY)
continue;
+ if (efi_soft_reserve_enabled() &&
+ (md->attribute & EFI_MEMORY_SP))
+ continue;
+
if (efi_mirror_found &&
!(md->attribute & EFI_MEMORY_MORE_RELIABLE))
continue;
@@ -802,20 +780,30 @@
static unsigned long find_random_phys_addr(unsigned long minimum,
unsigned long image_size)
{
+ u64 phys_addr;
+
+ /* Bail out early if it's impossible to succeed. */
+ if (minimum + image_size > mem_limit)
+ return 0;
+
/* Check if we had too many memmaps. */
if (memmap_too_large) {
debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
return 0;
}
- /* Make sure minimum is aligned. */
- minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+ if (!process_efi_entries(minimum, image_size))
+ process_e820_entries(minimum, image_size);
- if (process_efi_entries(minimum, image_size))
- return slots_fetch_random();
+ phys_addr = slots_fetch_random();
- process_e820_entries(minimum, image_size);
- return slots_fetch_random();
+ /* Perform a final check to make sure the address is in range. */
+ if (phys_addr < minimum || phys_addr + image_size > mem_limit) {
+ warn("Invalid physical address chosen!\n");
+ return 0;
+ }
+
+ return (unsigned long)phys_addr;
}
static unsigned long find_random_virt_addr(unsigned long minimum,
@@ -823,18 +811,12 @@
{
unsigned long slots, random_addr;
- /* Make sure minimum is aligned. */
- minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
- /* Align image_size for easy slot calculations. */
- image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN);
-
/*
* There are how many CONFIG_PHYSICAL_ALIGN-sized slots
* that can hold image_size within the range of minimum to
* KERNEL_IMAGE_SIZE?
*/
- slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
- CONFIG_PHYSICAL_ALIGN + 1;
+ slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN;
random_addr = kaslr_get_random_long("Virtual") % slots;
@@ -858,18 +840,12 @@
return;
}
-#ifdef CONFIG_X86_5LEVEL
- if (__read_cr4() & X86_CR4_LA57) {
- __pgtable_l5_enabled = 1;
- pgdir_shift = 48;
- ptrs_per_p4d = 512;
- }
-#endif
-
boot_params->hdr.loadflags |= KASLR_FLAG;
- /* Prepare to add new identity pagetables on demand. */
- initialize_identity_maps();
+ if (IS_ENABLED(CONFIG_X86_32))
+ mem_limit = KERNEL_IMAGE_SIZE;
+ else
+ mem_limit = MAXMEM;
/* Record the various known unsafe memory ranges. */
mem_avoid_init(input, input_size, *output);
@@ -880,6 +856,8 @@
* location:
*/
min_addr = min(*output, 512UL << 20);
+ /* Make sure minimum is aligned. */
+ min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN);
/* Walk available memory entries to find a random address. */
random_addr = find_random_phys_addr(min_addr, output_size);
@@ -887,19 +865,8 @@
warn("Physical KASLR disabled: no suitable memory region!");
} else {
/* Update the new physical address location. */
- if (*output != random_addr) {
- add_identity_map(random_addr, output_size);
+ if (*output != random_addr)
*output = random_addr;
- }
-
- /*
- * This loads the identity mapping page table.
- * This should only be done if a new physical address
- * is found for the kernel, otherwise we should keep
- * the old page table to make it be like the "nokaslr"
- * case.
- */
- finalize_identity_maps();
}
diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c
deleted file mode 100644
index 9557c5a..0000000
--- a/arch/x86/boot/compressed/kaslr_64.c
+++ /dev/null
@@ -1,153 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This code is used on x86_64 to create page table identity mappings on
- * demand by building up a new set of page tables (or appending to the
- * existing ones), and then switching over to them when ready.
- *
- * Copyright (C) 2015-2016 Yinghai Lu
- * Copyright (C) 2016 Kees Cook
- */
-
-/*
- * Since we're dealing with identity mappings, physical and virtual
- * addresses are the same, so override these defines which are ultimately
- * used by the headers in misc.h.
- */
-#define __pa(x) ((unsigned long)(x))
-#define __va(x) ((void *)((unsigned long)(x)))
-
-/* No PAGE_TABLE_ISOLATION support needed either: */
-#undef CONFIG_PAGE_TABLE_ISOLATION
-
-#include "misc.h"
-
-/* These actually do the work of building the kernel identity maps. */
-#include <asm/init.h>
-#include <asm/pgtable.h>
-/* Use the static base for this part of the boot process */
-#undef __PAGE_OFFSET
-#define __PAGE_OFFSET __PAGE_OFFSET_BASE
-#include "../../mm/ident_map.c"
-
-/* Used to track our page table allocation area. */
-struct alloc_pgt_data {
- unsigned char *pgt_buf;
- unsigned long pgt_buf_size;
- unsigned long pgt_buf_offset;
-};
-
-/*
- * Allocates space for a page table entry, using struct alloc_pgt_data
- * above. Besides the local callers, this is used as the allocation
- * callback in mapping_info below.
- */
-static void *alloc_pgt_page(void *context)
-{
- struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
- unsigned char *entry;
-
- /* Validate there is space available for a new page. */
- if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
- debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
- debug_putaddr(pages->pgt_buf_offset);
- debug_putaddr(pages->pgt_buf_size);
- return NULL;
- }
-
- entry = pages->pgt_buf + pages->pgt_buf_offset;
- pages->pgt_buf_offset += PAGE_SIZE;
-
- return entry;
-}
-
-/* Used to track our allocated page tables. */
-static struct alloc_pgt_data pgt_data;
-
-/* The top level page table entry pointer. */
-static unsigned long top_level_pgt;
-
-phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
-
-/*
- * Mapping information structure passed to kernel_ident_mapping_init().
- * Due to relocation, pointers must be assigned at run time not build time.
- */
-static struct x86_mapping_info mapping_info;
-
-/* Locates and clears a region for a new top level page table. */
-void initialize_identity_maps(void)
-{
- /* If running as an SEV guest, the encryption mask is required. */
- set_sev_encryption_mask();
-
- /* Exclude the encryption mask from __PHYSICAL_MASK */
- physical_mask &= ~sme_me_mask;
-
- /* Init mapping_info with run-time function/buffer pointers. */
- mapping_info.alloc_pgt_page = alloc_pgt_page;
- mapping_info.context = &pgt_data;
- mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
- mapping_info.kernpg_flag = _KERNPG_TABLE;
-
- /*
- * It should be impossible for this not to already be true,
- * but since calling this a second time would rewind the other
- * counters, let's just make sure this is reset too.
- */
- pgt_data.pgt_buf_offset = 0;
-
- /*
- * If we came here via startup_32(), cr3 will be _pgtable already
- * and we must append to the existing area instead of entirely
- * overwriting it.
- *
- * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
- * the top-level page table is allocated separately.
- *
- * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
- * cases. On 4-level paging it's equal to 'top_level_pgt'.
- */
- top_level_pgt = read_cr3_pa();
- if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
- debug_putstr("booted via startup_32()\n");
- pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
- pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
- memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
- } else {
- debug_putstr("booted via startup_64()\n");
- pgt_data.pgt_buf = _pgtable;
- pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
- memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
- top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
- }
-}
-
-/*
- * Adds the specified range to what will become the new identity mappings.
- * Once all ranges have been added, the new mapping is activated by calling
- * finalize_identity_maps() below.
- */
-void add_identity_map(unsigned long start, unsigned long size)
-{
- unsigned long end = start + size;
-
- /* Align boundary to 2M. */
- start = round_down(start, PMD_SIZE);
- end = round_up(end, PMD_SIZE);
- if (start >= end)
- return;
-
- /* Build the mapping. */
- kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
- start, end);
-}
-
-/*
- * This switches the page tables to the new level4 that has been built
- * via calls to add_identity_map() above. If booted via startup_32(),
- * this is effectively a no-op.
- */
-void finalize_identity_maps(void)
-{
- write_cr3(top_level_pgt);
-}
diff --git a/arch/x86/boot/compressed/kernel_info.S b/arch/x86/boot/compressed/kernel_info.S
new file mode 100644
index 0000000..f818ee8
--- /dev/null
+++ b/arch/x86/boot/compressed/kernel_info.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/bootparam.h>
+
+ .section ".rodata.kernel_info", "a"
+
+ .global kernel_info
+
+kernel_info:
+ /* Header, Linux top (structure). */
+ .ascii "LToP"
+ /* Size. */
+ .long kernel_info_var_len_data - kernel_info
+ /* Size total. */
+ .long kernel_info_end - kernel_info
+
+ /* Maximal allowed type for setup_data and setup_indirect structs. */
+ .long SETUP_TYPE_MAX
+
+kernel_info_var_len_data:
+ /* Empty for time being... */
+kernel_info_end:
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index 6afb713..a6dea4e 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -15,7 +15,7 @@
.text
.code32
-ENTRY(get_sev_encryption_bit)
+SYM_FUNC_START(get_sev_encryption_bit)
xor %eax, %eax
#ifdef CONFIG_AMD_MEM_ENCRYPT
@@ -23,12 +23,6 @@
push %ecx
push %edx
- /* Check if running under a hypervisor */
- movl $1, %eax
- cpuid
- bt $31, %ecx /* Check the hypervisor bit */
- jnc .Lno_sev
-
movl $0x80000000, %eax /* CPUID to check the highest leaf */
cpuid
cmpl $0x8000001f, %eax /* See if 0x8000001f is available */
@@ -65,10 +59,13 @@
#endif /* CONFIG_AMD_MEM_ENCRYPT */
ret
-ENDPROC(get_sev_encryption_bit)
+SYM_FUNC_END(get_sev_encryption_bit)
.code64
-ENTRY(set_sev_encryption_mask)
+
+#include "../../kernel/sev_verify_cbit.S"
+
+SYM_FUNC_START(set_sev_encryption_mask)
#ifdef CONFIG_AMD_MEM_ENCRYPT
push %rbp
push %rdx
@@ -81,6 +78,19 @@
bts %rax, sme_me_mask(%rip) /* Create the encryption mask */
+ /*
+ * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in
+ * get_sev_encryption_bit() because this function is 32-bit code and
+ * shared between 64-bit and 32-bit boot path.
+ */
+ movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */
+ rdmsr
+
+ /* Store MSR value in sev_status */
+ shlq $32, %rdx
+ orq %rdx, %rax
+ movq %rax, sev_status(%rip)
+
.Lno_sev_mask:
movq %rbp, %rsp /* Restore original stack pointer */
@@ -90,12 +100,13 @@
xor %rax, %rax
ret
-ENDPROC(set_sev_encryption_mask)
+SYM_FUNC_END(set_sev_encryption_mask)
.data
#ifdef CONFIG_AMD_MEM_ENCRYPT
.balign 8
-GLOBAL(sme_me_mask)
- .quad 0
+SYM_DATA(sme_me_mask, .quad 0)
+SYM_DATA(sev_status, .quad 0)
+SYM_DATA(sev_check_data, .quad 0)
#endif
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 9652d5c..267e7f9 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -30,12 +30,9 @@
#define STATIC static
/*
- * Use normal definitions of mem*() from string.c. There are already
- * included header files which expect a definition of memset() and by
- * the time we define memset macro, it is too late.
+ * Provide definitions of memzero and memmove as some of the decompressors will
+ * try to define their own functions if these are not defined as macros.
*/
-#undef memcpy
-#undef memset
#define memzero(s, n) memset((s), 0, (n))
#define memmove memmove
@@ -77,6 +74,10 @@
#ifdef CONFIG_KERNEL_LZ4
#include "../../../../lib/decompress_unlz4.c"
#endif
+
+#ifdef CONFIG_KERNEL_ZSTD
+#include "../../../../lib/decompress_unzstd.c"
+#endif
/*
* NOTE: When adding a new decompressor, please update the analysis in
* ../header.S.
@@ -441,6 +442,13 @@
parse_elf(output);
handle_relocations(output, output_len, virt_addr);
debug_putstr("done.\nBooting the kernel.\n");
+
+ /*
+ * Flush GHCB from cache and map it encrypted again when running as
+ * SEV-ES guest.
+ */
+ sev_es_shutdown_ghcb();
+
return output;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index c818139..d9a631c 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,6 +23,7 @@
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
+#include <asm/desc_defs.h>
#define BOOT_CTYPE_H
#include <linux/acpi.h>
@@ -36,6 +37,9 @@
#define memptr unsigned
#endif
+/* boot/compressed/vmlinux start and end markers */
+extern char _head[], _end[];
+
/* misc.c */
extern memptr free_mem_ptr;
extern memptr free_mem_end_ptr;
@@ -59,7 +63,7 @@
static inline void debug_putstr(const char *s)
{ }
-static inline void debug_puthex(const char *s)
+static inline void debug_puthex(unsigned long value)
{ }
#define debug_putaddr(x) /* */
@@ -70,8 +74,8 @@
int cmdline_find_option_bool(const char *option);
struct mem_vector {
- unsigned long long start;
- unsigned long long size;
+ u64 start;
+ u64 size;
};
#if CONFIG_RANDOMIZE_BASE
@@ -81,8 +85,6 @@
unsigned long *output,
unsigned long output_size,
unsigned long *virt_addr);
-/* cpuflags.c */
-bool has_cpuflag(int flag);
#else
static inline void choose_random_location(unsigned long input,
unsigned long input_size,
@@ -93,18 +95,14 @@
}
#endif
+/* cpuflags.c */
+bool has_cpuflag(int flag);
+
#ifdef CONFIG_X86_64
-void initialize_identity_maps(void);
-void add_identity_map(unsigned long start, unsigned long size);
-void finalize_identity_maps(void);
+extern int set_page_decrypted(unsigned long address);
+extern int set_page_encrypted(unsigned long address);
+extern int set_page_non_present(unsigned long address);
extern unsigned char _pgtable[];
-#else
-static inline void initialize_identity_maps(void)
-{ }
-static inline void add_identity_map(unsigned long start, unsigned long size)
-{ }
-static inline void finalize_identity_maps(void)
-{ }
#endif
#ifdef CONFIG_EARLY_PRINTK
@@ -119,6 +117,17 @@
void set_sev_encryption_mask(void);
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+void sev_es_shutdown_ghcb(void);
+extern bool sev_es_check_ghcb_fault(unsigned long address);
+#else
+static inline void sev_es_shutdown_ghcb(void) { }
+static inline bool sev_es_check_ghcb_fault(unsigned long address)
+{
+ return false;
+}
+#endif
+
/* acpi.c */
#ifdef CONFIG_ACPI
acpi_physical_address get_rsdp_addr(void);
@@ -133,4 +142,23 @@
static inline int count_immovable_mem_regions(void) { return 0; }
#endif
+/* ident_map_64.c */
+#ifdef CONFIG_X86_5LEVEL
+extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
+#endif
+
+/* Used by PAGE_KERN* macros: */
+extern pteval_t __default_kernel_pte_mask;
+
+/* idt_64.c */
+extern gate_desc boot_idt[BOOT_IDT_ENTRIES];
+extern struct desc_ptr boot_idt_desc;
+
+/* IDT Entry Points */
+void boot_page_fault(void);
+void boot_stage1_vc(void);
+void boot_stage2_vc(void);
+
+unsigned long sev_verify_cbit(unsigned long cr3);
+
#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 7e01248..52aa56c 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -60,6 +60,12 @@
printf(".incbin \"%s\"\n", argv[1]);
printf("input_data_end:\n");
+ printf(".section \".rodata\",\"a\",@progbits\n");
+ printf(".globl input_len\n");
+ printf("input_len:\n\t.long %lu\n", ilen);
+ printf(".globl output_len\n");
+ printf("output_len:\n\t.long %lu\n", (unsigned long)olen);
+
retval = 0;
bail:
if (f)
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 7d0394f..2a78746 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -8,6 +8,13 @@
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
+#ifdef CONFIG_X86_5LEVEL
+/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
+unsigned int __section(".data") __pgtable_l5_enabled;
+unsigned int __section(".data") pgdir_shift = 39;
+unsigned int __section(".data") ptrs_per_p4d = 1;
+#endif
+
struct paging_config {
unsigned long trampoline_start;
unsigned long l5_required;
@@ -23,7 +30,7 @@
* Avoid putting the pointer into .bss as it will be cleared between
* paging_prepare() and extract_kernel().
*/
-unsigned long *trampoline_32bit __section(.data);
+unsigned long *trampoline_32bit __section(".data");
extern struct boot_params *boot_params;
int cmdline_find_option_bool(const char *option);
@@ -198,4 +205,13 @@
/* Restore trampoline memory */
memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
+
+ /* Initialize variables for 5-level paging */
+#ifdef CONFIG_X86_5LEVEL
+ if (__read_cr4() & X86_CR4_LA57) {
+ __pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
+ }
+#endif
}
diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c
new file mode 100644
index 0000000..27826c2
--- /dev/null
+++ b/arch/x86/boot/compressed/sev-es.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+/*
+ * misc.h needs to be first because it knows how to include the other kernel
+ * headers in the pre-decompression code in a way that does not break
+ * compilation.
+ */
+#include "misc.h"
+
+#include <asm/pgtable_types.h>
+#include <asm/sev-es.h>
+#include <asm/trapnr.h>
+#include <asm/trap_pf.h>
+#include <asm/msr-index.h>
+#include <asm/fpu/xcr.h>
+#include <asm/ptrace.h>
+#include <asm/svm.h>
+
+#include "error.h"
+
+struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
+struct ghcb *boot_ghcb;
+
+/*
+ * Copy a version of this function here - insn-eval.c can't be used in
+ * pre-decompression code.
+ */
+static bool insn_has_rep_prefix(struct insn *insn)
+{
+ insn_byte_t p;
+ int i;
+
+ insn_get_prefixes(insn);
+
+ for_each_insn_prefix(insn, i, p) {
+ if (p == 0xf2 || p == 0xf3)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and
+ * doesn't use segments.
+ */
+static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
+{
+ return 0UL;
+}
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+ unsigned long low, high;
+
+ asm volatile("rdmsr" : "=a" (low), "=d" (high) :
+ "c" (MSR_AMD64_SEV_ES_GHCB));
+
+ return ((high << 32) | low);
+}
+
+static inline void sev_es_wr_ghcb_msr(u64 val)
+{
+ u32 low, high;
+
+ low = val & 0xffffffffUL;
+ high = val >> 32;
+
+ asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB),
+ "a"(low), "d" (high) : "memory");
+}
+
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+ char buffer[MAX_INSN_SIZE];
+ enum es_result ret;
+
+ memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+
+ insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
+ insn_get_length(&ctxt->insn);
+
+ ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
+
+ return ret;
+}
+
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+ void *dst, char *buf, size_t size)
+{
+ memcpy(dst, buf, size);
+
+ return ES_OK;
+}
+
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+ void *src, char *buf, size_t size)
+{
+ memcpy(buf, src, size);
+
+ return ES_OK;
+}
+
+#undef __init
+#undef __pa
+#define __init
+#define __pa(x) ((unsigned long)(x))
+
+#define __BOOT_COMPRESSED
+
+/* Basic instruction decoding support needed */
+#include "../../lib/inat.c"
+#include "../../lib/insn.c"
+
+/* Include code for early handlers */
+#include "../../kernel/sev-es-shared.c"
+
+static bool early_setup_sev_es(void)
+{
+ if (!sev_es_negotiate_protocol())
+ sev_es_terminate(GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED);
+
+ if (set_page_decrypted((unsigned long)&boot_ghcb_page))
+ return false;
+
+ /* Page is now mapped decrypted, clear it */
+ memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page));
+
+ boot_ghcb = &boot_ghcb_page;
+
+ /* Initialize lookup tables for the instruction decoder */
+ inat_init_tables();
+
+ return true;
+}
+
+void sev_es_shutdown_ghcb(void)
+{
+ if (!boot_ghcb)
+ return;
+
+ if (!sev_es_check_cpu_features())
+ error("SEV-ES CPU Features missing.");
+
+ /*
+ * GHCB Page must be flushed from the cache and mapped encrypted again.
+ * Otherwise the running kernel will see strange cache effects when
+ * trying to use that page.
+ */
+ if (set_page_encrypted((unsigned long)&boot_ghcb_page))
+ error("Can't map GHCB page encrypted");
+
+ /*
+ * GHCB page is mapped encrypted again and flushed from the cache.
+ * Mark it non-present now to catch bugs when #VC exceptions trigger
+ * after this point.
+ */
+ if (set_page_non_present((unsigned long)&boot_ghcb_page))
+ error("Can't unmap GHCB page");
+}
+
+bool sev_es_check_ghcb_fault(unsigned long address)
+{
+ /* Check whether the fault was on the GHCB page */
+ return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page);
+}
+
+void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
+{
+ struct es_em_ctxt ctxt;
+ enum es_result result;
+
+ if (!boot_ghcb && !early_setup_sev_es())
+ sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+ vc_ghcb_invalidate(boot_ghcb);
+ result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+ if (result != ES_OK)
+ goto finish;
+
+ switch (exit_code) {
+ case SVM_EXIT_RDTSC:
+ case SVM_EXIT_RDTSCP:
+ result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code);
+ break;
+ case SVM_EXIT_IOIO:
+ result = vc_handle_ioio(boot_ghcb, &ctxt);
+ break;
+ case SVM_EXIT_CPUID:
+ result = vc_handle_cpuid(boot_ghcb, &ctxt);
+ break;
+ default:
+ result = ES_UNSUPPORTED;
+ break;
+ }
+
+finish:
+ if (result == ES_OK) {
+ vc_finish_insn(&ctxt);
+ } else if (result != ES_RETRY) {
+ /*
+ * For now, just halt the machine. That makes debugging easier,
+ * later we just call sev_es_terminate() here.
+ */
+ while (true)
+ asm volatile("hlt\n");
+ }
+}
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 508cfa6..112b237 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -42,16 +42,11 @@
*(.rodata.*)
_erodata = . ;
}
- .got : {
- _got = .;
- KEEP(*(.got.plt))
- KEEP(*(.got))
- _egot = .;
- }
.data : {
_data = . ;
*(.data)
*(.data.*)
+ *(.bss.efistub)
_edata = . ;
}
. = ALIGN(L1_CACHE_BYTES);
@@ -73,4 +68,50 @@
#endif
. = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */
_end = .;
+
+ STABS_DEBUG
+ DWARF_DEBUG
+ ELF_DETAILS
+
+ DISCARDS
+ /DISCARD/ : {
+ *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss)
+ *(.hash) *(.gnu.hash)
+ *(.note.*)
+ }
+
+ .got.plt (INFO) : {
+ *(.got.plt)
+ }
+ ASSERT(SIZEOF(.got.plt) == 0 ||
+#ifdef CONFIG_X86_64
+ SIZEOF(.got.plt) == 0x18,
+#else
+ SIZEOF(.got.plt) == 0xc,
+#endif
+ "Unexpected GOT/PLT entries detected!")
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .got : {
+ *(.got)
+ }
+ ASSERT(SIZEOF(.got) == 0, "Unexpected GOT entries detected!")
+
+ .plt : {
+ *(.plt) *(.plt.*)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+
+ .rel.dyn : {
+ *(.rel.*) *(.rel_*)
+ }
+ ASSERT(SIZEOF(.rel.dyn) == 0, "Unexpected run-time relocations (.rel) detected!")
+
+ .rela.dyn : {
+ *(.rela.*) *(.rela_*)
+ }
+ ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
}